{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6220839813374806, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006220839813374805, "grad_norm": 1.5990997552871704, "learning_rate": 4.995e-05, "log_odds_chosen": -0.5121189951896667, "log_odds_ratio": -1.175696849822998, "logits/chosen": -3.061245918273926, "logits/rejected": 0.8060249090194702, "logps/chosen": -1.5489490032196045, "logps/rejected": -1.0588122606277466, "loss": 0.9422, "nll_loss": 0.8245992660522461, "rewards/accuracies": 0.5, "rewards/chosen": -0.1548949033021927, "rewards/margins": -0.04901367798447609, "rewards/rejected": -0.1058812290430069, "step": 1 }, { "epoch": 0.001244167962674961, "grad_norm": 0.43756288290023804, "learning_rate": 4.99e-05, "log_odds_chosen": 0.13479183614253998, "log_odds_ratio": -0.6995198726654053, "logits/chosen": -0.5496019721031189, "logits/rejected": 0.916201114654541, "logps/chosen": -1.1331713199615479, "logps/rejected": -1.1910749673843384, "loss": 1.1757, "nll_loss": 1.105738639831543, "rewards/accuracies": 0.75, "rewards/chosen": -0.11331713944673538, "rewards/margins": 0.005790362134575844, "rewards/rejected": -0.11910750716924667, "step": 2 }, { "epoch": 0.0018662519440124418, "grad_norm": 0.6946638822555542, "learning_rate": 4.9850000000000006e-05, "log_odds_chosen": -0.7716032862663269, "log_odds_ratio": -1.2915821075439453, "logits/chosen": -1.1749107837677002, "logits/rejected": 0.48298171162605286, "logps/chosen": -1.713394284248352, "logps/rejected": -1.0794651508331299, "loss": 1.1266, "nll_loss": 0.9974524974822998, "rewards/accuracies": 0.25, "rewards/chosen": -0.17133943736553192, "rewards/margins": -0.06339291483163834, "rewards/rejected": -0.10794650763273239, "step": 3 }, { "epoch": 0.002488335925349922, "grad_norm": 0.3383781611919403, "learning_rate": 4.9800000000000004e-05, "log_odds_chosen": -1.0775129795074463, "log_odds_ratio": -1.6865262985229492, "logits/chosen": -0.8680384159088135, "logits/rejected": 1.1301361322402954, "logps/chosen": -2.1693308353424072, "logps/rejected": -1.1649184226989746, "loss": 1.0211, "nll_loss": 0.8524219393730164, "rewards/accuracies": 0.5, "rewards/chosen": -0.21693310141563416, "rewards/margins": -0.10044124722480774, "rewards/rejected": -0.11649185419082642, "step": 4 }, { "epoch": 0.003110419906687403, "grad_norm": 0.6052569150924683, "learning_rate": 4.975e-05, "log_odds_chosen": -0.2497221976518631, "log_odds_ratio": -0.9002423286437988, "logits/chosen": 2.113248348236084, "logits/rejected": 1.7459735870361328, "logps/chosen": -1.5321459770202637, "logps/rejected": -1.26905357837677, "loss": 1.3045, "nll_loss": 1.214469075202942, "rewards/accuracies": 0.5, "rewards/chosen": -0.15321458876132965, "rewards/margins": -0.02630922943353653, "rewards/rejected": -0.12690536677837372, "step": 5 }, { "epoch": 0.0037325038880248835, "grad_norm": 3.1706557273864746, "learning_rate": 4.97e-05, "log_odds_chosen": -1.2696902751922607, "log_odds_ratio": -1.8387222290039062, "logits/chosen": 0.5090468525886536, "logits/rejected": 1.799258828163147, "logps/chosen": -2.498434066772461, "logps/rejected": -1.3061614036560059, "loss": 1.159, "nll_loss": 0.9750999212265015, "rewards/accuracies": 0.375, "rewards/chosen": -0.24984338879585266, "rewards/margins": -0.11922725290060043, "rewards/rejected": -0.13061614334583282, "step": 6 }, { "epoch": 0.004354587869362364, "grad_norm": 0.22779366374015808, "learning_rate": 4.965e-05, "log_odds_chosen": -0.6782891154289246, "log_odds_ratio": -1.229583740234375, "logits/chosen": 0.6050777435302734, "logits/rejected": 1.8784853219985962, "logps/chosen": -1.7958674430847168, "logps/rejected": -1.2137447595596313, "loss": 0.9138, "nll_loss": 0.7908901572227478, "rewards/accuracies": 0.375, "rewards/chosen": -0.1795867532491684, "rewards/margins": -0.0582122765481472, "rewards/rejected": -0.1213744729757309, "step": 7 }, { "epoch": 0.004976671850699844, "grad_norm": 0.22942067682743073, "learning_rate": 4.96e-05, "log_odds_chosen": 0.006755713373422623, "log_odds_ratio": -0.7070448398590088, "logits/chosen": 2.0835607051849365, "logits/rejected": 2.2670180797576904, "logps/chosen": -1.0497729778289795, "logps/rejected": -1.0410140752792358, "loss": 1.0557, "nll_loss": 0.9849987626075745, "rewards/accuracies": 0.625, "rewards/chosen": -0.10497729480266571, "rewards/margins": -0.0008758809417486191, "rewards/rejected": -0.10410141199827194, "step": 8 }, { "epoch": 0.005598755832037325, "grad_norm": 1.5027852058410645, "learning_rate": 4.9550000000000005e-05, "log_odds_chosen": -1.2336622476577759, "log_odds_ratio": -1.7122440338134766, "logits/chosen": 1.264065146446228, "logits/rejected": 2.8031373023986816, "logps/chosen": -2.152813673019409, "logps/rejected": -1.0926671028137207, "loss": 1.0961, "nll_loss": 0.9248759746551514, "rewards/accuracies": 0.25, "rewards/chosen": -0.21528135240077972, "rewards/margins": -0.1060146614909172, "rewards/rejected": -0.10926671326160431, "step": 9 }, { "epoch": 0.006220839813374806, "grad_norm": 7.84841775894165, "learning_rate": 4.9500000000000004e-05, "log_odds_chosen": -0.36369776725769043, "log_odds_ratio": -1.0270187854766846, "logits/chosen": 1.8378279209136963, "logits/rejected": 2.67022705078125, "logps/chosen": -1.7057809829711914, "logps/rejected": -1.3585100173950195, "loss": 1.0536, "nll_loss": 0.9509172439575195, "rewards/accuracies": 0.375, "rewards/chosen": -0.17057809233665466, "rewards/margins": -0.034727081656455994, "rewards/rejected": -0.13585101068019867, "step": 10 }, { "epoch": 0.006842923794712286, "grad_norm": 0.20901276171207428, "learning_rate": 4.945e-05, "log_odds_chosen": -0.23809382319450378, "log_odds_ratio": -0.8853789567947388, "logits/chosen": 1.5684062242507935, "logits/rejected": 2.971144199371338, "logps/chosen": -1.1353232860565186, "logps/rejected": -0.9783434867858887, "loss": 0.9364, "nll_loss": 0.8478444814682007, "rewards/accuracies": 0.125, "rewards/chosen": -0.11353233456611633, "rewards/margins": -0.015697985887527466, "rewards/rejected": -0.09783434867858887, "step": 11 }, { "epoch": 0.007465007776049767, "grad_norm": 0.24611681699752808, "learning_rate": 4.94e-05, "log_odds_chosen": -0.17657308280467987, "log_odds_ratio": -0.8087456226348877, "logits/chosen": 3.477548122406006, "logits/rejected": 4.530710697174072, "logps/chosen": -1.1894445419311523, "logps/rejected": -1.0661442279815674, "loss": 1.1843, "nll_loss": 1.1034001111984253, "rewards/accuracies": 0.375, "rewards/chosen": -0.11894445866346359, "rewards/margins": -0.012330022640526295, "rewards/rejected": -0.10661444067955017, "step": 12 }, { "epoch": 0.008087091757387248, "grad_norm": 0.217342346906662, "learning_rate": 4.935e-05, "log_odds_chosen": -0.23449140787124634, "log_odds_ratio": -0.8244205713272095, "logits/chosen": 2.5574779510498047, "logits/rejected": 4.194855690002441, "logps/chosen": -1.2394225597381592, "logps/rejected": -1.0743162631988525, "loss": 1.1663, "nll_loss": 1.0838088989257812, "rewards/accuracies": 0.125, "rewards/chosen": -0.12394225597381592, "rewards/margins": -0.01651064306497574, "rewards/rejected": -0.10743162035942078, "step": 13 }, { "epoch": 0.008709175738724729, "grad_norm": 0.4070003032684326, "learning_rate": 4.93e-05, "log_odds_chosen": -1.379059910774231, "log_odds_ratio": -2.258007287979126, "logits/chosen": 1.8881428241729736, "logits/rejected": 2.1496148109436035, "logps/chosen": -2.8094940185546875, "logps/rejected": -1.301595687866211, "loss": 1.0798, "nll_loss": 0.8539876937866211, "rewards/accuracies": 0.625, "rewards/chosen": -0.2809494137763977, "rewards/margins": -0.15078984200954437, "rewards/rejected": -0.13015958666801453, "step": 14 }, { "epoch": 0.00933125972006221, "grad_norm": 0.23260748386383057, "learning_rate": 4.9250000000000004e-05, "log_odds_chosen": -0.16578805446624756, "log_odds_ratio": -0.7918254137039185, "logits/chosen": 2.4860520362854004, "logits/rejected": 3.8841402530670166, "logps/chosen": -1.3346906900405884, "logps/rejected": -1.2134218215942383, "loss": 1.1331, "nll_loss": 1.0539392232894897, "rewards/accuracies": 0.375, "rewards/chosen": -0.13346907496452332, "rewards/margins": -0.012126876041293144, "rewards/rejected": -0.12134218215942383, "step": 15 }, { "epoch": 0.009953343701399688, "grad_norm": 0.1825733780860901, "learning_rate": 4.92e-05, "log_odds_chosen": -0.3614625632762909, "log_odds_ratio": -0.9392529129981995, "logits/chosen": 2.31900691986084, "logits/rejected": 2.9353525638580322, "logps/chosen": -1.47523033618927, "logps/rejected": -1.185972809791565, "loss": 1.082, "nll_loss": 0.9881048202514648, "rewards/accuracies": 0.25, "rewards/chosen": -0.14752303063869476, "rewards/margins": -0.02892574481666088, "rewards/rejected": -0.11859728395938873, "step": 16 }, { "epoch": 0.010575427682737169, "grad_norm": 0.21975092589855194, "learning_rate": 4.915e-05, "log_odds_chosen": -0.3503631353378296, "log_odds_ratio": -0.8912189602851868, "logits/chosen": 1.604474425315857, "logits/rejected": 2.647216320037842, "logps/chosen": -1.483432650566101, "logps/rejected": -1.2270509004592896, "loss": 0.8812, "nll_loss": 0.7920456528663635, "rewards/accuracies": 0.125, "rewards/chosen": -0.1483432650566101, "rewards/margins": -0.02563817799091339, "rewards/rejected": -0.12270509451627731, "step": 17 }, { "epoch": 0.01119751166407465, "grad_norm": 0.7166666388511658, "learning_rate": 4.91e-05, "log_odds_chosen": 0.13804574310779572, "log_odds_ratio": -0.645270586013794, "logits/chosen": 2.1303985118865967, "logits/rejected": 3.5221128463745117, "logps/chosen": -1.138808012008667, "logps/rejected": -1.2420754432678223, "loss": 0.8695, "nll_loss": 0.8049733638763428, "rewards/accuracies": 0.625, "rewards/chosen": -0.11388080567121506, "rewards/margins": 0.010326748713850975, "rewards/rejected": -0.12420755624771118, "step": 18 }, { "epoch": 0.01181959564541213, "grad_norm": 0.3583829402923584, "learning_rate": 4.905e-05, "log_odds_chosen": -0.710419774055481, "log_odds_ratio": -1.2239866256713867, "logits/chosen": 2.4776275157928467, "logits/rejected": 3.7551608085632324, "logps/chosen": -1.6493169069290161, "logps/rejected": -1.0448060035705566, "loss": 0.945, "nll_loss": 0.8225875496864319, "rewards/accuracies": 0.25, "rewards/chosen": -0.16493168473243713, "rewards/margins": -0.06045108661055565, "rewards/rejected": -0.10448060184717178, "step": 19 }, { "epoch": 0.012441679626749611, "grad_norm": 0.2361656278371811, "learning_rate": 4.9e-05, "log_odds_chosen": -0.10248635709285736, "log_odds_ratio": -0.7523667216300964, "logits/chosen": 1.7299888134002686, "logits/rejected": 3.813870906829834, "logps/chosen": -1.0503981113433838, "logps/rejected": -0.9909194707870483, "loss": 0.8247, "nll_loss": 0.7494850158691406, "rewards/accuracies": 0.375, "rewards/chosen": -0.1050398200750351, "rewards/margins": -0.005947869271039963, "rewards/rejected": -0.09909194707870483, "step": 20 }, { "epoch": 0.013063763608087092, "grad_norm": 0.5069213509559631, "learning_rate": 4.8950000000000004e-05, "log_odds_chosen": -0.12599247694015503, "log_odds_ratio": -0.7684826254844666, "logits/chosen": 1.8015549182891846, "logits/rejected": 3.3546299934387207, "logps/chosen": -1.3068116903305054, "logps/rejected": -1.199318289756775, "loss": 0.9884, "nll_loss": 0.9115766286849976, "rewards/accuracies": 0.25, "rewards/chosen": -0.13068117201328278, "rewards/margins": -0.010749343782663345, "rewards/rejected": -0.11993182450532913, "step": 21 }, { "epoch": 0.013685847589424573, "grad_norm": 0.21723033487796783, "learning_rate": 4.89e-05, "log_odds_chosen": -0.027980871498584747, "log_odds_ratio": -0.7586711049079895, "logits/chosen": 2.1451902389526367, "logits/rejected": 3.524837017059326, "logps/chosen": -1.201343297958374, "logps/rejected": -1.1644256114959717, "loss": 0.9602, "nll_loss": 0.8842926025390625, "rewards/accuracies": 0.375, "rewards/chosen": -0.12013433873653412, "rewards/margins": -0.003691784106194973, "rewards/rejected": -0.11644256114959717, "step": 22 }, { "epoch": 0.014307931570762053, "grad_norm": 0.7032377123832703, "learning_rate": 4.885e-05, "log_odds_chosen": 0.06119448319077492, "log_odds_ratio": -0.6719111204147339, "logits/chosen": 2.8135011196136475, "logits/rejected": 4.099876880645752, "logps/chosen": -1.3067809343338013, "logps/rejected": -1.3609553575515747, "loss": 1.0529, "nll_loss": 0.9857478141784668, "rewards/accuracies": 0.5, "rewards/chosen": -0.13067808747291565, "rewards/margins": 0.005417431704699993, "rewards/rejected": -0.13609552383422852, "step": 23 }, { "epoch": 0.014930015552099534, "grad_norm": 0.16798748075962067, "learning_rate": 4.88e-05, "log_odds_chosen": -0.146559938788414, "log_odds_ratio": -0.7807488441467285, "logits/chosen": 2.9722163677215576, "logits/rejected": 4.2791242599487305, "logps/chosen": -1.1158338785171509, "logps/rejected": -1.0221004486083984, "loss": 1.0413, "nll_loss": 0.9632115960121155, "rewards/accuracies": 0.25, "rewards/chosen": -0.1115833967924118, "rewards/margins": -0.009373345412313938, "rewards/rejected": -0.10221004486083984, "step": 24 }, { "epoch": 0.015552099533437015, "grad_norm": 0.32012248039245605, "learning_rate": 4.875e-05, "log_odds_chosen": -0.18442490696907043, "log_odds_ratio": -0.9081165790557861, "logits/chosen": 2.4388880729675293, "logits/rejected": 3.846700668334961, "logps/chosen": -1.4206616878509521, "logps/rejected": -1.2228360176086426, "loss": 0.974, "nll_loss": 0.8832047581672668, "rewards/accuracies": 0.625, "rewards/chosen": -0.14206618070602417, "rewards/margins": -0.019782574847340584, "rewards/rejected": -0.12228360772132874, "step": 25 }, { "epoch": 0.016174183514774496, "grad_norm": 0.33273598551750183, "learning_rate": 4.87e-05, "log_odds_chosen": -0.6990683674812317, "log_odds_ratio": -1.3674430847167969, "logits/chosen": 3.5793888568878174, "logits/rejected": 3.8019113540649414, "logps/chosen": -1.9460030794143677, "logps/rejected": -1.294486403465271, "loss": 1.2106, "nll_loss": 1.0738792419433594, "rewards/accuracies": 0.5, "rewards/chosen": -0.19460032880306244, "rewards/margins": -0.06515169143676758, "rewards/rejected": -0.12944863736629486, "step": 26 }, { "epoch": 0.016796267496111975, "grad_norm": 3.539905071258545, "learning_rate": 4.8650000000000003e-05, "log_odds_chosen": 0.11518344283103943, "log_odds_ratio": -0.6598777770996094, "logits/chosen": 2.098487615585327, "logits/rejected": 1.9545716047286987, "logps/chosen": -1.1680437326431274, "logps/rejected": -1.252279281616211, "loss": 0.8025, "nll_loss": 0.7365168929100037, "rewards/accuracies": 0.5, "rewards/chosen": -0.11680437624454498, "rewards/margins": 0.008423548191785812, "rewards/rejected": -0.1252279132604599, "step": 27 }, { "epoch": 0.017418351477449457, "grad_norm": 3.020901679992676, "learning_rate": 4.86e-05, "log_odds_chosen": -0.5429134964942932, "log_odds_ratio": -1.1207876205444336, "logits/chosen": 2.6301114559173584, "logits/rejected": 3.8189711570739746, "logps/chosen": -1.7131836414337158, "logps/rejected": -1.2582485675811768, "loss": 1.0718, "nll_loss": 0.9597415924072266, "rewards/accuracies": 0.375, "rewards/chosen": -0.17131835222244263, "rewards/margins": -0.04549350216984749, "rewards/rejected": -0.12582485377788544, "step": 28 }, { "epoch": 0.018040435458786936, "grad_norm": 0.16388735175132751, "learning_rate": 4.855e-05, "log_odds_chosen": -0.20636707544326782, "log_odds_ratio": -0.8130278587341309, "logits/chosen": 2.4605510234832764, "logits/rejected": 3.5745866298675537, "logps/chosen": -1.1986327171325684, "logps/rejected": -1.058274269104004, "loss": 0.9229, "nll_loss": 0.8415518999099731, "rewards/accuracies": 0.25, "rewards/chosen": -0.11986327171325684, "rewards/margins": -0.014035841450095177, "rewards/rejected": -0.10582742094993591, "step": 29 }, { "epoch": 0.01866251944012442, "grad_norm": 0.19232743978500366, "learning_rate": 4.85e-05, "log_odds_chosen": -0.2556931674480438, "log_odds_ratio": -0.8336398601531982, "logits/chosen": 2.9968199729919434, "logits/rejected": 4.6838765144348145, "logps/chosen": -1.0448908805847168, "logps/rejected": -0.8920204043388367, "loss": 1.0125, "nll_loss": 0.9291301965713501, "rewards/accuracies": 0.125, "rewards/chosen": -0.10448908805847168, "rewards/margins": -0.015287039801478386, "rewards/rejected": -0.08920204639434814, "step": 30 }, { "epoch": 0.019284603421461897, "grad_norm": 0.15270353853702545, "learning_rate": 4.845e-05, "log_odds_chosen": -0.31180787086486816, "log_odds_ratio": -0.9033458828926086, "logits/chosen": 2.6127586364746094, "logits/rejected": 3.483029365539551, "logps/chosen": -1.1772806644439697, "logps/rejected": -0.9254850149154663, "loss": 0.8605, "nll_loss": 0.7702099084854126, "rewards/accuracies": 0.25, "rewards/chosen": -0.11772806942462921, "rewards/margins": -0.02517956867814064, "rewards/rejected": -0.09254850447177887, "step": 31 }, { "epoch": 0.019906687402799376, "grad_norm": 0.24742813408374786, "learning_rate": 4.8400000000000004e-05, "log_odds_chosen": -0.4165438413619995, "log_odds_ratio": -0.9416205883026123, "logits/chosen": 1.1703848838806152, "logits/rejected": 3.2198734283447266, "logps/chosen": -1.3125711679458618, "logps/rejected": -1.0129786729812622, "loss": 0.8494, "nll_loss": 0.7552543878555298, "rewards/accuracies": 0.125, "rewards/chosen": -0.13125711679458618, "rewards/margins": -0.02995925024151802, "rewards/rejected": -0.10129787027835846, "step": 32 }, { "epoch": 0.02052877138413686, "grad_norm": 0.20717312395572662, "learning_rate": 4.835e-05, "log_odds_chosen": -0.43756213784217834, "log_odds_ratio": -1.0350837707519531, "logits/chosen": 3.0430374145507812, "logits/rejected": 5.120755672454834, "logps/chosen": -1.3322926759719849, "logps/rejected": -0.9562426805496216, "loss": 1.0423, "nll_loss": 0.9388405084609985, "rewards/accuracies": 0.25, "rewards/chosen": -0.13322927057743073, "rewards/margins": -0.03760499879717827, "rewards/rejected": -0.09562426805496216, "step": 33 }, { "epoch": 0.021150855365474338, "grad_norm": 0.6029897928237915, "learning_rate": 4.83e-05, "log_odds_chosen": -0.041211143136024475, "log_odds_ratio": -0.7303495407104492, "logits/chosen": 2.8978190422058105, "logits/rejected": 4.314732551574707, "logps/chosen": -1.2479820251464844, "logps/rejected": -1.219968557357788, "loss": 0.9818, "nll_loss": 0.9087687134742737, "rewards/accuracies": 0.375, "rewards/chosen": -0.12479820847511292, "rewards/margins": -0.0028013568371534348, "rewards/rejected": -0.12199684977531433, "step": 34 }, { "epoch": 0.02177293934681182, "grad_norm": 0.2770039141178131, "learning_rate": 4.825e-05, "log_odds_chosen": -0.07565765082836151, "log_odds_ratio": -0.7700670957565308, "logits/chosen": 3.4843027591705322, "logits/rejected": 3.697402000427246, "logps/chosen": -1.2663030624389648, "logps/rejected": -1.226333498954773, "loss": 1.1064, "nll_loss": 1.0294300317764282, "rewards/accuracies": 0.25, "rewards/chosen": -0.12663030624389648, "rewards/margins": -0.003996945917606354, "rewards/rejected": -0.12263335287570953, "step": 35 }, { "epoch": 0.0223950233281493, "grad_norm": 0.19298230111598969, "learning_rate": 4.82e-05, "log_odds_chosen": 0.4019961357116699, "log_odds_ratio": -0.5303748846054077, "logits/chosen": 2.6493451595306396, "logits/rejected": 4.199905872344971, "logps/chosen": -0.8650291562080383, "logps/rejected": -1.1187388896942139, "loss": 0.9015, "nll_loss": 0.8484418392181396, "rewards/accuracies": 0.75, "rewards/chosen": -0.08650290966033936, "rewards/margins": 0.025370977818965912, "rewards/rejected": -0.11187389492988586, "step": 36 }, { "epoch": 0.023017107309486782, "grad_norm": 0.2592315673828125, "learning_rate": 4.815e-05, "log_odds_chosen": -0.47786006331443787, "log_odds_ratio": -1.1106913089752197, "logits/chosen": 1.911085605621338, "logits/rejected": 3.474100351333618, "logps/chosen": -1.5157802104949951, "logps/rejected": -1.1557257175445557, "loss": 1.0016, "nll_loss": 0.8905512094497681, "rewards/accuracies": 0.125, "rewards/chosen": -0.15157800912857056, "rewards/margins": -0.03600544109940529, "rewards/rejected": -0.11557257175445557, "step": 37 }, { "epoch": 0.02363919129082426, "grad_norm": 0.22197005152702332, "learning_rate": 4.8100000000000004e-05, "log_odds_chosen": -0.24612115323543549, "log_odds_ratio": -0.8575789928436279, "logits/chosen": 2.0853166580200195, "logits/rejected": 2.376945734024048, "logps/chosen": -1.2783766984939575, "logps/rejected": -1.0839169025421143, "loss": 0.8213, "nll_loss": 0.73553067445755, "rewards/accuracies": 0.25, "rewards/chosen": -0.127837672829628, "rewards/margins": -0.019445981830358505, "rewards/rejected": -0.10839168727397919, "step": 38 }, { "epoch": 0.024261275272161743, "grad_norm": 0.24557054042816162, "learning_rate": 4.805e-05, "log_odds_chosen": -0.18656456470489502, "log_odds_ratio": -0.8466726541519165, "logits/chosen": 2.951633930206299, "logits/rejected": 4.386725902557373, "logps/chosen": -1.0201852321624756, "logps/rejected": -0.9069477319717407, "loss": 0.9591, "nll_loss": 0.8744035959243774, "rewards/accuracies": 0.375, "rewards/chosen": -0.10201852768659592, "rewards/margins": -0.011323747225105762, "rewards/rejected": -0.09069477766752243, "step": 39 }, { "epoch": 0.024883359253499222, "grad_norm": 0.1822604238986969, "learning_rate": 4.8e-05, "log_odds_chosen": -0.039638370275497437, "log_odds_ratio": -0.7252129912376404, "logits/chosen": 1.7212930917739868, "logits/rejected": 3.5935091972351074, "logps/chosen": -1.2547193765640259, "logps/rejected": -1.2297290563583374, "loss": 0.8422, "nll_loss": 0.7696593999862671, "rewards/accuracies": 0.625, "rewards/chosen": -0.12547193467617035, "rewards/margins": -0.002499036490917206, "rewards/rejected": -0.12297290563583374, "step": 40 }, { "epoch": 0.0255054432348367, "grad_norm": 0.4303940236568451, "learning_rate": 4.795e-05, "log_odds_chosen": -0.5628086924552917, "log_odds_ratio": -1.1011394262313843, "logits/chosen": 1.804811954498291, "logits/rejected": 4.109908580780029, "logps/chosen": -1.3430132865905762, "logps/rejected": -0.910142183303833, "loss": 0.8828, "nll_loss": 0.772662878036499, "rewards/accuracies": 0.25, "rewards/chosen": -0.1343013346195221, "rewards/margins": -0.043287117034196854, "rewards/rejected": -0.09101422131061554, "step": 41 }, { "epoch": 0.026127527216174184, "grad_norm": 0.22790291905403137, "learning_rate": 4.79e-05, "log_odds_chosen": -0.08913028985261917, "log_odds_ratio": -0.7628570795059204, "logits/chosen": 1.8077365159988403, "logits/rejected": 2.038315773010254, "logps/chosen": -1.3737506866455078, "logps/rejected": -1.3219356536865234, "loss": 0.9207, "nll_loss": 0.8443830013275146, "rewards/accuracies": 0.375, "rewards/chosen": -0.13737505674362183, "rewards/margins": -0.005181487649679184, "rewards/rejected": -0.13219358026981354, "step": 42 }, { "epoch": 0.026749611197511663, "grad_norm": 0.18566672503948212, "learning_rate": 4.785e-05, "log_odds_chosen": 0.47161194682121277, "log_odds_ratio": -0.5659717917442322, "logits/chosen": 2.2776291370391846, "logits/rejected": 2.514277458190918, "logps/chosen": -1.042227029800415, "logps/rejected": -1.4109022617340088, "loss": 0.8949, "nll_loss": 0.8383415937423706, "rewards/accuracies": 0.625, "rewards/chosen": -0.10422270745038986, "rewards/margins": 0.036867525428533554, "rewards/rejected": -0.14109022915363312, "step": 43 }, { "epoch": 0.027371695178849145, "grad_norm": 0.35892772674560547, "learning_rate": 4.78e-05, "log_odds_chosen": -0.351146399974823, "log_odds_ratio": -1.1022428274154663, "logits/chosen": 1.4172916412353516, "logits/rejected": 3.157374143600464, "logps/chosen": -1.5696899890899658, "logps/rejected": -1.1946138143539429, "loss": 0.9, "nll_loss": 0.7897965908050537, "rewards/accuracies": 0.5, "rewards/chosen": -0.15696901082992554, "rewards/margins": -0.037507638335227966, "rewards/rejected": -0.11946137994527817, "step": 44 }, { "epoch": 0.027993779160186624, "grad_norm": 0.388747900724411, "learning_rate": 4.775e-05, "log_odds_chosen": 0.06426665186882019, "log_odds_ratio": -0.6822032928466797, "logits/chosen": 1.552047848701477, "logits/rejected": 2.8711278438568115, "logps/chosen": -1.121537208557129, "logps/rejected": -1.158513069152832, "loss": 0.8752, "nll_loss": 0.8070287108421326, "rewards/accuracies": 0.5, "rewards/chosen": -0.11215372383594513, "rewards/margins": 0.003697587177157402, "rewards/rejected": -0.11585131287574768, "step": 45 }, { "epoch": 0.028615863141524107, "grad_norm": 0.31807225942611694, "learning_rate": 4.77e-05, "log_odds_chosen": -0.01670071855187416, "log_odds_ratio": -0.7142594456672668, "logits/chosen": 2.114004135131836, "logits/rejected": 3.1321046352386475, "logps/chosen": -1.0298341512680054, "logps/rejected": -1.018587589263916, "loss": 0.8789, "nll_loss": 0.8074406385421753, "rewards/accuracies": 0.5, "rewards/chosen": -0.10298341512680054, "rewards/margins": -0.0011246586218476295, "rewards/rejected": -0.10185875743627548, "step": 46 }, { "epoch": 0.029237947122861586, "grad_norm": 0.18275466561317444, "learning_rate": 4.765e-05, "log_odds_chosen": -0.062389228492975235, "log_odds_ratio": -0.7514591217041016, "logits/chosen": 1.1435599327087402, "logits/rejected": 2.3499629497528076, "logps/chosen": -1.070279836654663, "logps/rejected": -1.0232110023498535, "loss": 0.7367, "nll_loss": 0.6615514755249023, "rewards/accuracies": 0.375, "rewards/chosen": -0.10702798515558243, "rewards/margins": -0.004706883803009987, "rewards/rejected": -0.102321095764637, "step": 47 }, { "epoch": 0.029860031104199068, "grad_norm": 0.283637136220932, "learning_rate": 4.76e-05, "log_odds_chosen": 0.04298657178878784, "log_odds_ratio": -0.7060101628303528, "logits/chosen": 0.9537409543991089, "logits/rejected": 2.76373291015625, "logps/chosen": -0.9863978624343872, "logps/rejected": -0.9677107334136963, "loss": 0.7097, "nll_loss": 0.6391026973724365, "rewards/accuracies": 0.625, "rewards/chosen": -0.09863978624343872, "rewards/margins": -0.001868714578449726, "rewards/rejected": -0.09677107632160187, "step": 48 }, { "epoch": 0.030482115085536547, "grad_norm": 0.2659762501716614, "learning_rate": 4.755e-05, "log_odds_chosen": -0.15763582289218903, "log_odds_ratio": -0.7842304706573486, "logits/chosen": 0.8293619751930237, "logits/rejected": 2.636796712875366, "logps/chosen": -1.15511953830719, "logps/rejected": -1.038378119468689, "loss": 0.7968, "nll_loss": 0.718331515789032, "rewards/accuracies": 0.375, "rewards/chosen": -0.115511953830719, "rewards/margins": -0.011674145236611366, "rewards/rejected": -0.10383781045675278, "step": 49 }, { "epoch": 0.03110419906687403, "grad_norm": 0.2557560205459595, "learning_rate": 4.75e-05, "log_odds_chosen": -0.03294600918889046, "log_odds_ratio": -0.7847276926040649, "logits/chosen": 1.1990070343017578, "logits/rejected": 3.6774067878723145, "logps/chosen": -1.2008081674575806, "logps/rejected": -1.1088212728500366, "loss": 0.8465, "nll_loss": 0.7680559158325195, "rewards/accuracies": 0.375, "rewards/chosen": -0.12008081376552582, "rewards/margins": -0.009198684245347977, "rewards/rejected": -0.11088212579488754, "step": 50 }, { "epoch": 0.031726283048211505, "grad_norm": 0.21976199746131897, "learning_rate": 4.745e-05, "log_odds_chosen": 0.3041134476661682, "log_odds_ratio": -0.580925464630127, "logits/chosen": 1.4744720458984375, "logits/rejected": 3.0478222370147705, "logps/chosen": -0.8667433261871338, "logps/rejected": -1.0165106058120728, "loss": 0.7666, "nll_loss": 0.7085108757019043, "rewards/accuracies": 0.875, "rewards/chosen": -0.08667434006929398, "rewards/margins": 0.01497671939432621, "rewards/rejected": -0.10165105760097504, "step": 51 }, { "epoch": 0.03234836702954899, "grad_norm": 0.2689627707004547, "learning_rate": 4.74e-05, "log_odds_chosen": -0.04352305829524994, "log_odds_ratio": -0.7418375015258789, "logits/chosen": 2.5920138359069824, "logits/rejected": 2.2302138805389404, "logps/chosen": -1.021632432937622, "logps/rejected": -0.9504865407943726, "loss": 1.0453, "nll_loss": 0.9711462259292603, "rewards/accuracies": 0.25, "rewards/chosen": -0.10216324031352997, "rewards/margins": -0.007114590145647526, "rewards/rejected": -0.09504866600036621, "step": 52 }, { "epoch": 0.03297045101088647, "grad_norm": 0.22366289794445038, "learning_rate": 4.735e-05, "log_odds_chosen": 0.04003528878092766, "log_odds_ratio": -0.6888149976730347, "logits/chosen": 3.182617664337158, "logits/rejected": 3.555081844329834, "logps/chosen": -0.9702656269073486, "logps/rejected": -0.9838634133338928, "loss": 1.0702, "nll_loss": 1.0013432502746582, "rewards/accuracies": 0.5, "rewards/chosen": -0.09702656418085098, "rewards/margins": 0.001359778456389904, "rewards/rejected": -0.09838633239269257, "step": 53 }, { "epoch": 0.03359253499222395, "grad_norm": 0.38575857877731323, "learning_rate": 4.73e-05, "log_odds_chosen": 0.22280624508857727, "log_odds_ratio": -0.6437617540359497, "logits/chosen": 1.0312542915344238, "logits/rejected": 3.3192896842956543, "logps/chosen": -1.1636738777160645, "logps/rejected": -1.2852468490600586, "loss": 0.7405, "nll_loss": 0.6761186122894287, "rewards/accuracies": 0.75, "rewards/chosen": -0.1163673847913742, "rewards/margins": 0.012157305143773556, "rewards/rejected": -0.12852469086647034, "step": 54 }, { "epoch": 0.03421461897356143, "grad_norm": 0.173259437084198, "learning_rate": 4.7249999999999997e-05, "log_odds_chosen": 0.047059908509254456, "log_odds_ratio": -0.7024832963943481, "logits/chosen": 2.349436044692993, "logits/rejected": 2.2064785957336426, "logps/chosen": -1.1089081764221191, "logps/rejected": -1.118696689605713, "loss": 0.998, "nll_loss": 0.9277853965759277, "rewards/accuracies": 0.5, "rewards/chosen": -0.11089081317186356, "rewards/margins": 0.0009788572788238525, "rewards/rejected": -0.11186967045068741, "step": 55 }, { "epoch": 0.034836702954898914, "grad_norm": 0.21381795406341553, "learning_rate": 4.72e-05, "log_odds_chosen": -0.1794767677783966, "log_odds_ratio": -0.7933696508407593, "logits/chosen": 1.327786922454834, "logits/rejected": 1.7987782955169678, "logps/chosen": -1.178837537765503, "logps/rejected": -1.0669177770614624, "loss": 0.867, "nll_loss": 0.7876596450805664, "rewards/accuracies": 0.375, "rewards/chosen": -0.11788375675678253, "rewards/margins": -0.011191976256668568, "rewards/rejected": -0.10669177770614624, "step": 56 }, { "epoch": 0.03545878693623639, "grad_norm": 0.2719959318637848, "learning_rate": 4.715e-05, "log_odds_chosen": -0.30999311804771423, "log_odds_ratio": -0.9616641402244568, "logits/chosen": 2.1510074138641357, "logits/rejected": 3.185220241546631, "logps/chosen": -1.4096884727478027, "logps/rejected": -1.1700233221054077, "loss": 0.9744, "nll_loss": 0.8782221078872681, "rewards/accuracies": 0.375, "rewards/chosen": -0.14096882939338684, "rewards/margins": -0.02396649867296219, "rewards/rejected": -0.11700233817100525, "step": 57 }, { "epoch": 0.03608087091757387, "grad_norm": 0.24494118988513947, "learning_rate": 4.71e-05, "log_odds_chosen": 0.19044572114944458, "log_odds_ratio": -0.6316022276878357, "logits/chosen": 2.780532121658325, "logits/rejected": 3.8303580284118652, "logps/chosen": -1.0032345056533813, "logps/rejected": -1.114229679107666, "loss": 0.9724, "nll_loss": 0.9092568755149841, "rewards/accuracies": 0.375, "rewards/chosen": -0.10032345354557037, "rewards/margins": 0.011099515482783318, "rewards/rejected": -0.11142296344041824, "step": 58 }, { "epoch": 0.03670295489891135, "grad_norm": 0.23869889974594116, "learning_rate": 4.705e-05, "log_odds_chosen": 0.14286461472511292, "log_odds_ratio": -0.6479381322860718, "logits/chosen": 1.359169363975525, "logits/rejected": 2.286386013031006, "logps/chosen": -1.03233003616333, "logps/rejected": -1.1210219860076904, "loss": 0.7819, "nll_loss": 0.7171339988708496, "rewards/accuracies": 0.75, "rewards/chosen": -0.10323300957679749, "rewards/margins": 0.00886919628828764, "rewards/rejected": -0.1121022030711174, "step": 59 }, { "epoch": 0.03732503888024884, "grad_norm": 0.40756744146347046, "learning_rate": 4.7e-05, "log_odds_chosen": 0.5899947285652161, "log_odds_ratio": -0.5545991659164429, "logits/chosen": 2.521620273590088, "logits/rejected": 3.599379062652588, "logps/chosen": -0.8197504281997681, "logps/rejected": -1.0699812173843384, "loss": 0.9495, "nll_loss": 0.8940035104751587, "rewards/accuracies": 0.625, "rewards/chosen": -0.0819750428199768, "rewards/margins": 0.025023072957992554, "rewards/rejected": -0.10699811577796936, "step": 60 }, { "epoch": 0.037947122861586316, "grad_norm": 0.40065282583236694, "learning_rate": 4.695e-05, "log_odds_chosen": 0.022990047931671143, "log_odds_ratio": -0.7364428639411926, "logits/chosen": 0.8582637906074524, "logits/rejected": 2.706080436706543, "logps/chosen": -1.1528658866882324, "logps/rejected": -1.143157958984375, "loss": 0.7133, "nll_loss": 0.6396671533584595, "rewards/accuracies": 0.5, "rewards/chosen": -0.11528658866882324, "rewards/margins": -0.0009707985445857048, "rewards/rejected": -0.11431579291820526, "step": 61 }, { "epoch": 0.038569206842923795, "grad_norm": 0.22253791987895966, "learning_rate": 4.69e-05, "log_odds_chosen": 0.4593227803707123, "log_odds_ratio": -0.5568712949752808, "logits/chosen": 0.7542824745178223, "logits/rejected": 0.9769484400749207, "logps/chosen": -0.932292103767395, "logps/rejected": -1.1556060314178467, "loss": 0.6361, "nll_loss": 0.5803820490837097, "rewards/accuracies": 0.5, "rewards/chosen": -0.09322921931743622, "rewards/margins": 0.022331394255161285, "rewards/rejected": -0.1155606061220169, "step": 62 }, { "epoch": 0.039191290824261274, "grad_norm": 0.4563582241535187, "learning_rate": 4.685000000000001e-05, "log_odds_chosen": 0.05771663784980774, "log_odds_ratio": -0.7888731956481934, "logits/chosen": 1.4716992378234863, "logits/rejected": 3.1634292602539062, "logps/chosen": -1.1427593231201172, "logps/rejected": -1.0027639865875244, "loss": 0.8393, "nll_loss": 0.7604351043701172, "rewards/accuracies": 0.375, "rewards/chosen": -0.11427594721317291, "rewards/margins": -0.013999542221426964, "rewards/rejected": -0.1002763956785202, "step": 63 }, { "epoch": 0.03981337480559875, "grad_norm": 0.5556821823120117, "learning_rate": 4.6800000000000006e-05, "log_odds_chosen": -0.21914222836494446, "log_odds_ratio": -0.8504500389099121, "logits/chosen": 1.2280817031860352, "logits/rejected": 3.1541242599487305, "logps/chosen": -1.2485235929489136, "logps/rejected": -1.1014806032180786, "loss": 0.7758, "nll_loss": 0.6907318830490112, "rewards/accuracies": 0.375, "rewards/chosen": -0.12485235929489136, "rewards/margins": -0.014704296365380287, "rewards/rejected": -0.11014805734157562, "step": 64 }, { "epoch": 0.04043545878693624, "grad_norm": 0.22284820675849915, "learning_rate": 4.6750000000000005e-05, "log_odds_chosen": -0.11895395815372467, "log_odds_ratio": -0.7598588466644287, "logits/chosen": 1.8724644184112549, "logits/rejected": 2.6201133728027344, "logps/chosen": -1.231781244277954, "logps/rejected": -1.1473100185394287, "loss": 0.8682, "nll_loss": 0.7921833992004395, "rewards/accuracies": 0.375, "rewards/chosen": -0.12317812442779541, "rewards/margins": -0.008447128348052502, "rewards/rejected": -0.11473099887371063, "step": 65 }, { "epoch": 0.04105754276827372, "grad_norm": 0.25803834199905396, "learning_rate": 4.6700000000000003e-05, "log_odds_chosen": -0.09246137738227844, "log_odds_ratio": -0.7877139449119568, "logits/chosen": 0.9774875640869141, "logits/rejected": 2.8366801738739014, "logps/chosen": -1.0853774547576904, "logps/rejected": -1.0088139772415161, "loss": 0.7878, "nll_loss": 0.7090435028076172, "rewards/accuracies": 0.375, "rewards/chosen": -0.10853774100542068, "rewards/margins": -0.007656336762011051, "rewards/rejected": -0.10088139772415161, "step": 66 }, { "epoch": 0.0416796267496112, "grad_norm": 0.15937106311321259, "learning_rate": 4.665e-05, "log_odds_chosen": -0.2035675346851349, "log_odds_ratio": -0.8312056064605713, "logits/chosen": 1.3091591596603394, "logits/rejected": 2.8286824226379395, "logps/chosen": -1.1093143224716187, "logps/rejected": -0.9541065096855164, "loss": 0.7537, "nll_loss": 0.6705965399742126, "rewards/accuracies": 0.5, "rewards/chosen": -0.11093142628669739, "rewards/margins": -0.015520783141255379, "rewards/rejected": -0.09541065990924835, "step": 67 }, { "epoch": 0.042301710730948676, "grad_norm": 0.25136131048202515, "learning_rate": 4.660000000000001e-05, "log_odds_chosen": -0.14508166909217834, "log_odds_ratio": -0.8028329014778137, "logits/chosen": 1.2004531621932983, "logits/rejected": 2.2064342498779297, "logps/chosen": -1.021368145942688, "logps/rejected": -0.9574182629585266, "loss": 0.6492, "nll_loss": 0.5688824653625488, "rewards/accuracies": 0.5, "rewards/chosen": -0.10213680565357208, "rewards/margins": -0.006394978612661362, "rewards/rejected": -0.09574183076620102, "step": 68 }, { "epoch": 0.04292379471228616, "grad_norm": 0.18070419132709503, "learning_rate": 4.655000000000001e-05, "log_odds_chosen": 0.4045894742012024, "log_odds_ratio": -0.6035602688789368, "logits/chosen": 2.1579999923706055, "logits/rejected": 2.2763824462890625, "logps/chosen": -0.9550964832305908, "logps/rejected": -1.1929335594177246, "loss": 0.8618, "nll_loss": 0.8014136552810669, "rewards/accuracies": 0.75, "rewards/chosen": -0.09550965577363968, "rewards/margins": 0.02378370799124241, "rewards/rejected": -0.11929336190223694, "step": 69 }, { "epoch": 0.04354587869362364, "grad_norm": 0.5423634052276611, "learning_rate": 4.6500000000000005e-05, "log_odds_chosen": -0.19840699434280396, "log_odds_ratio": -0.8418025374412537, "logits/chosen": 2.095249652862549, "logits/rejected": 2.98868465423584, "logps/chosen": -0.9458328485488892, "logps/rejected": -0.8142030239105225, "loss": 0.9194, "nll_loss": 0.8351881504058838, "rewards/accuracies": 0.375, "rewards/chosen": -0.09458328783512115, "rewards/margins": -0.013162979856133461, "rewards/rejected": -0.08142030984163284, "step": 70 }, { "epoch": 0.04416796267496112, "grad_norm": 0.2877205014228821, "learning_rate": 4.6450000000000004e-05, "log_odds_chosen": 0.0428520105779171, "log_odds_ratio": -0.7418846487998962, "logits/chosen": 0.8320183157920837, "logits/rejected": 2.5365350246429443, "logps/chosen": -0.9517344236373901, "logps/rejected": -0.9330209493637085, "loss": 0.7129, "nll_loss": 0.6387526392936707, "rewards/accuracies": 0.625, "rewards/chosen": -0.0951734408736229, "rewards/margins": -0.0018713511526584625, "rewards/rejected": -0.09330209344625473, "step": 71 }, { "epoch": 0.0447900466562986, "grad_norm": 0.22278496623039246, "learning_rate": 4.64e-05, "log_odds_chosen": -0.054201096296310425, "log_odds_ratio": -0.7669423818588257, "logits/chosen": 2.898837089538574, "logits/rejected": 3.8671226501464844, "logps/chosen": -1.125044345855713, "logps/rejected": -1.1063265800476074, "loss": 0.9359, "nll_loss": 0.8592216372489929, "rewards/accuracies": 0.5, "rewards/chosen": -0.11250443756580353, "rewards/margins": -0.0018717655912041664, "rewards/rejected": -0.11063267290592194, "step": 72 }, { "epoch": 0.04541213063763608, "grad_norm": 0.2126590758562088, "learning_rate": 4.635e-05, "log_odds_chosen": -0.2587714195251465, "log_odds_ratio": -0.891248345375061, "logits/chosen": 1.3153867721557617, "logits/rejected": 3.1547749042510986, "logps/chosen": -1.4425960779190063, "logps/rejected": -1.1883153915405273, "loss": 0.8521, "nll_loss": 0.7630225419998169, "rewards/accuracies": 0.5, "rewards/chosen": -0.14425960183143616, "rewards/margins": -0.025428064167499542, "rewards/rejected": -0.11883153021335602, "step": 73 }, { "epoch": 0.046034214618973564, "grad_norm": 0.42494454979896545, "learning_rate": 4.630000000000001e-05, "log_odds_chosen": 0.1402682065963745, "log_odds_ratio": -0.6622962951660156, "logits/chosen": 1.657547950744629, "logits/rejected": 1.9613828659057617, "logps/chosen": -0.766802191734314, "logps/rejected": -0.799024224281311, "loss": 0.7761, "nll_loss": 0.7098743915557861, "rewards/accuracies": 0.5, "rewards/chosen": -0.07668022811412811, "rewards/margins": 0.0032222005538642406, "rewards/rejected": -0.07990242540836334, "step": 74 }, { "epoch": 0.04665629860031104, "grad_norm": 0.17336860299110413, "learning_rate": 4.6250000000000006e-05, "log_odds_chosen": 0.1157303899526596, "log_odds_ratio": -0.6770206689834595, "logits/chosen": 2.7488303184509277, "logits/rejected": 2.7461235523223877, "logps/chosen": -0.833381175994873, "logps/rejected": -0.8952413201332092, "loss": 0.865, "nll_loss": 0.7972859144210815, "rewards/accuracies": 0.5, "rewards/chosen": -0.08333811908960342, "rewards/margins": 0.006186014041304588, "rewards/rejected": -0.08952413499355316, "step": 75 }, { "epoch": 0.04727838258164852, "grad_norm": 0.18598797917366028, "learning_rate": 4.6200000000000005e-05, "log_odds_chosen": -0.043106935918331146, "log_odds_ratio": -0.7398505210876465, "logits/chosen": 1.1851791143417358, "logits/rejected": 2.6704442501068115, "logps/chosen": -1.0668216943740845, "logps/rejected": -1.033825159072876, "loss": 0.7596, "nll_loss": 0.6856452226638794, "rewards/accuracies": 0.5, "rewards/chosen": -0.1066821739077568, "rewards/margins": -0.003299661912024021, "rewards/rejected": -0.10338252037763596, "step": 76 }, { "epoch": 0.047900466562986, "grad_norm": 0.1425790637731552, "learning_rate": 4.6150000000000004e-05, "log_odds_chosen": -0.12183598428964615, "log_odds_ratio": -0.7798743844032288, "logits/chosen": 0.8571373224258423, "logits/rejected": 2.4758567810058594, "logps/chosen": -1.1691839694976807, "logps/rejected": -1.0594091415405273, "loss": 0.6561, "nll_loss": 0.5780637860298157, "rewards/accuracies": 0.5, "rewards/chosen": -0.1169183999300003, "rewards/margins": -0.010977484285831451, "rewards/rejected": -0.10594092309474945, "step": 77 }, { "epoch": 0.04852255054432349, "grad_norm": 0.23847784101963043, "learning_rate": 4.61e-05, "log_odds_chosen": -0.0839817151427269, "log_odds_ratio": -0.7639693021774292, "logits/chosen": 3.320763349533081, "logits/rejected": 4.665729522705078, "logps/chosen": -0.9754164814949036, "logps/rejected": -0.9289852380752563, "loss": 0.9986, "nll_loss": 0.9221831560134888, "rewards/accuracies": 0.25, "rewards/chosen": -0.09754165261983871, "rewards/margins": -0.004643128253519535, "rewards/rejected": -0.09289852529764175, "step": 78 }, { "epoch": 0.049144634525660966, "grad_norm": 0.2742615342140198, "learning_rate": 4.605e-05, "log_odds_chosen": 0.17970319092273712, "log_odds_ratio": -0.6801419854164124, "logits/chosen": 2.0109176635742188, "logits/rejected": 3.4676363468170166, "logps/chosen": -1.0586036443710327, "logps/rejected": -1.1820811033248901, "loss": 0.8246, "nll_loss": 0.7566049695014954, "rewards/accuracies": 0.875, "rewards/chosen": -0.1058603823184967, "rewards/margins": 0.012347733601927757, "rewards/rejected": -0.11820811033248901, "step": 79 }, { "epoch": 0.049766718506998445, "grad_norm": 0.2062321901321411, "learning_rate": 4.600000000000001e-05, "log_odds_chosen": 0.0320289321243763, "log_odds_ratio": -0.6907176971435547, "logits/chosen": 1.7227623462677002, "logits/rejected": 2.875901937484741, "logps/chosen": -1.0828742980957031, "logps/rejected": -1.0998992919921875, "loss": 0.8051, "nll_loss": 0.7360435128211975, "rewards/accuracies": 0.5, "rewards/chosen": -0.10828742384910583, "rewards/margins": 0.0017025060951709747, "rewards/rejected": -0.1099899411201477, "step": 80 }, { "epoch": 0.050388802488335924, "grad_norm": 0.2916569113731384, "learning_rate": 4.5950000000000006e-05, "log_odds_chosen": -0.3863591253757477, "log_odds_ratio": -0.9683763384819031, "logits/chosen": 1.3633993864059448, "logits/rejected": 2.970306158065796, "logps/chosen": -1.1555265188217163, "logps/rejected": -0.8885267376899719, "loss": 0.7104, "nll_loss": 0.6135969161987305, "rewards/accuracies": 0.375, "rewards/chosen": -0.11555266380310059, "rewards/margins": -0.0266999751329422, "rewards/rejected": -0.08885267376899719, "step": 81 }, { "epoch": 0.0510108864696734, "grad_norm": 0.19114866852760315, "learning_rate": 4.5900000000000004e-05, "log_odds_chosen": 0.09835843741893768, "log_odds_ratio": -0.6535273194313049, "logits/chosen": 1.593051791191101, "logits/rejected": 2.7270560264587402, "logps/chosen": -0.9967777729034424, "logps/rejected": -1.0528018474578857, "loss": 0.7754, "nll_loss": 0.7100351452827454, "rewards/accuracies": 0.5, "rewards/chosen": -0.09967778623104095, "rewards/margins": 0.005602394696325064, "rewards/rejected": -0.10528017580509186, "step": 82 }, { "epoch": 0.05163297045101089, "grad_norm": 0.21949782967567444, "learning_rate": 4.585e-05, "log_odds_chosen": 0.2077902853488922, "log_odds_ratio": -0.6175616979598999, "logits/chosen": 2.0219221115112305, "logits/rejected": 3.6574249267578125, "logps/chosen": -0.9124993085861206, "logps/rejected": -1.0192480087280273, "loss": 0.8179, "nll_loss": 0.7561153769493103, "rewards/accuracies": 0.625, "rewards/chosen": -0.09124993532896042, "rewards/margins": 0.010674861259758472, "rewards/rejected": -0.10192479193210602, "step": 83 }, { "epoch": 0.05225505443234837, "grad_norm": 0.17727652192115784, "learning_rate": 4.58e-05, "log_odds_chosen": 0.38519418239593506, "log_odds_ratio": -0.5557951331138611, "logits/chosen": 0.8552349209785461, "logits/rejected": 2.699705123901367, "logps/chosen": -0.8921289443969727, "logps/rejected": -1.1134952306747437, "loss": 0.6496, "nll_loss": 0.5940346121788025, "rewards/accuracies": 0.75, "rewards/chosen": -0.08921289443969727, "rewards/margins": 0.022136623039841652, "rewards/rejected": -0.11134952306747437, "step": 84 }, { "epoch": 0.05287713841368585, "grad_norm": 0.2293669730424881, "learning_rate": 4.575e-05, "log_odds_chosen": -0.21045897901058197, "log_odds_ratio": -0.8564555048942566, "logits/chosen": 1.66201913356781, "logits/rejected": 2.8478963375091553, "logps/chosen": -1.3357698917388916, "logps/rejected": -1.1636067628860474, "loss": 0.8297, "nll_loss": 0.7440907955169678, "rewards/accuracies": 0.625, "rewards/chosen": -0.13357700407505035, "rewards/margins": -0.01721632108092308, "rewards/rejected": -0.11636068671941757, "step": 85 }, { "epoch": 0.053499222395023326, "grad_norm": 0.29000750184059143, "learning_rate": 4.5700000000000006e-05, "log_odds_chosen": -0.1497875154018402, "log_odds_ratio": -0.7847409248352051, "logits/chosen": 2.6281542778015137, "logits/rejected": 3.4142749309539795, "logps/chosen": -1.2299106121063232, "logps/rejected": -1.1186976432800293, "loss": 1.0464, "nll_loss": 0.9679691195487976, "rewards/accuracies": 0.5, "rewards/chosen": -0.12299105525016785, "rewards/margins": -0.011121302843093872, "rewards/rejected": -0.11186975985765457, "step": 86 }, { "epoch": 0.05412130637636081, "grad_norm": 0.26299771666526794, "learning_rate": 4.5650000000000005e-05, "log_odds_chosen": 0.5908235311508179, "log_odds_ratio": -0.49592533707618713, "logits/chosen": 1.5190047025680542, "logits/rejected": 2.7360827922821045, "logps/chosen": -0.7108929753303528, "logps/rejected": -0.9945129752159119, "loss": 0.7238, "nll_loss": 0.6742295026779175, "rewards/accuracies": 0.75, "rewards/chosen": -0.07108929753303528, "rewards/margins": 0.02836199663579464, "rewards/rejected": -0.09945128858089447, "step": 87 }, { "epoch": 0.05474339035769829, "grad_norm": 0.19910745322704315, "learning_rate": 4.5600000000000004e-05, "log_odds_chosen": -0.4192565679550171, "log_odds_ratio": -0.9752273559570312, "logits/chosen": 0.8375768065452576, "logits/rejected": 1.8594812154769897, "logps/chosen": -1.1285021305084229, "logps/rejected": -0.8286036252975464, "loss": 0.7274, "nll_loss": 0.6299166679382324, "rewards/accuracies": 0.375, "rewards/chosen": -0.11285021156072617, "rewards/margins": -0.029989851638674736, "rewards/rejected": -0.08286036550998688, "step": 88 }, { "epoch": 0.05536547433903577, "grad_norm": 0.43857982754707336, "learning_rate": 4.555e-05, "log_odds_chosen": -0.48293331265449524, "log_odds_ratio": -1.1548466682434082, "logits/chosen": 1.518347978591919, "logits/rejected": 2.750607490539551, "logps/chosen": -1.5658156871795654, "logps/rejected": -1.0724000930786133, "loss": 0.8034, "nll_loss": 0.6879459619522095, "rewards/accuracies": 0.625, "rewards/chosen": -0.1565815806388855, "rewards/margins": -0.049341559410095215, "rewards/rejected": -0.10724002122879028, "step": 89 }, { "epoch": 0.05598755832037325, "grad_norm": 0.26479724049568176, "learning_rate": 4.55e-05, "log_odds_chosen": 0.23310042917728424, "log_odds_ratio": -0.6027222275733948, "logits/chosen": 1.3731447458267212, "logits/rejected": 2.6510982513427734, "logps/chosen": -1.0542906522750854, "logps/rejected": -1.2222278118133545, "loss": 0.7583, "nll_loss": 0.6980407238006592, "rewards/accuracies": 0.75, "rewards/chosen": -0.10542906820774078, "rewards/margins": 0.016793712973594666, "rewards/rejected": -0.12222278118133545, "step": 90 }, { "epoch": 0.05660964230171073, "grad_norm": 0.24054040014743805, "learning_rate": 4.545000000000001e-05, "log_odds_chosen": 0.2605292499065399, "log_odds_ratio": -0.6213144063949585, "logits/chosen": 1.1814061403274536, "logits/rejected": 2.075345277786255, "logps/chosen": -1.1208800077438354, "logps/rejected": -1.2274812459945679, "loss": 0.7286, "nll_loss": 0.6664672493934631, "rewards/accuracies": 0.5, "rewards/chosen": -0.11208800971508026, "rewards/margins": 0.010660117492079735, "rewards/rejected": -0.12274813652038574, "step": 91 }, { "epoch": 0.05723172628304821, "grad_norm": 0.39278140664100647, "learning_rate": 4.5400000000000006e-05, "log_odds_chosen": -0.26259005069732666, "log_odds_ratio": -0.8627680540084839, "logits/chosen": 1.247246503829956, "logits/rejected": 2.8098278045654297, "logps/chosen": -1.082601547241211, "logps/rejected": -0.9376986622810364, "loss": 0.8206, "nll_loss": 0.7343120574951172, "rewards/accuracies": 0.25, "rewards/chosen": -0.1082601547241211, "rewards/margins": -0.014490286819636822, "rewards/rejected": -0.09376987814903259, "step": 92 }, { "epoch": 0.05785381026438569, "grad_norm": 0.1920693963766098, "learning_rate": 4.5350000000000005e-05, "log_odds_chosen": 0.21707671880722046, "log_odds_ratio": -0.6184121370315552, "logits/chosen": 2.694953441619873, "logits/rejected": 3.1188573837280273, "logps/chosen": -0.831054151058197, "logps/rejected": -0.9499989151954651, "loss": 0.9065, "nll_loss": 0.8446973562240601, "rewards/accuracies": 0.875, "rewards/chosen": -0.0831054076552391, "rewards/margins": 0.011894481256604195, "rewards/rejected": -0.09499989449977875, "step": 93 }, { "epoch": 0.05847589424572317, "grad_norm": 0.21006019413471222, "learning_rate": 4.53e-05, "log_odds_chosen": 0.05670143663883209, "log_odds_ratio": -0.6701164245605469, "logits/chosen": 1.0237641334533691, "logits/rejected": 2.528775215148926, "logps/chosen": -1.1318689584732056, "logps/rejected": -1.1793004274368286, "loss": 0.7016, "nll_loss": 0.6345678567886353, "rewards/accuracies": 0.5, "rewards/chosen": -0.11318689584732056, "rewards/margins": 0.004743154160678387, "rewards/rejected": -0.11793004721403122, "step": 94 }, { "epoch": 0.05909797822706065, "grad_norm": 0.2606309950351715, "learning_rate": 4.525e-05, "log_odds_chosen": -0.46452876925468445, "log_odds_ratio": -1.0579557418823242, "logits/chosen": 0.17954808473587036, "logits/rejected": 2.247058391571045, "logps/chosen": -1.6242289543151855, "logps/rejected": -1.2107869386672974, "loss": 0.6754, "nll_loss": 0.5696069598197937, "rewards/accuracies": 0.625, "rewards/chosen": -0.16242289543151855, "rewards/margins": -0.04134419932961464, "rewards/rejected": -0.12107868492603302, "step": 95 }, { "epoch": 0.059720062208398136, "grad_norm": 0.2298119217157364, "learning_rate": 4.52e-05, "log_odds_chosen": 0.015163015574216843, "log_odds_ratio": -0.6908687353134155, "logits/chosen": 2.02345871925354, "logits/rejected": 3.0907723903656006, "logps/chosen": -1.0270793437957764, "logps/rejected": -1.0349122285842896, "loss": 0.7404, "nll_loss": 0.6712822318077087, "rewards/accuracies": 0.625, "rewards/chosen": -0.10270794481039047, "rewards/margins": 0.0007832786068320274, "rewards/rejected": -0.10349121689796448, "step": 96 }, { "epoch": 0.060342146189735615, "grad_norm": 0.4934215247631073, "learning_rate": 4.5150000000000006e-05, "log_odds_chosen": -0.43015801906585693, "log_odds_ratio": -0.9842768311500549, "logits/chosen": 2.0014078617095947, "logits/rejected": 3.313342809677124, "logps/chosen": -1.313707709312439, "logps/rejected": -0.9843780994415283, "loss": 0.8658, "nll_loss": 0.7673825621604919, "rewards/accuracies": 0.375, "rewards/chosen": -0.13137076795101166, "rewards/margins": -0.032932963222265244, "rewards/rejected": -0.09843780100345612, "step": 97 }, { "epoch": 0.060964230171073094, "grad_norm": 0.20960745215415955, "learning_rate": 4.5100000000000005e-05, "log_odds_chosen": 0.02951214462518692, "log_odds_ratio": -0.774150550365448, "logits/chosen": 1.9913530349731445, "logits/rejected": 2.7125914096832275, "logps/chosen": -1.133882999420166, "logps/rejected": -1.1040964126586914, "loss": 0.9025, "nll_loss": 0.8251147866249084, "rewards/accuracies": 0.625, "rewards/chosen": -0.1133883148431778, "rewards/margins": -0.002978656440973282, "rewards/rejected": -0.11040964722633362, "step": 98 }, { "epoch": 0.06158631415241057, "grad_norm": 0.21473316848278046, "learning_rate": 4.5050000000000004e-05, "log_odds_chosen": -0.014744851738214493, "log_odds_ratio": -0.7248890399932861, "logits/chosen": 2.0002171993255615, "logits/rejected": 2.113471269607544, "logps/chosen": -0.9717570543289185, "logps/rejected": -0.9748814105987549, "loss": 0.7985, "nll_loss": 0.7260515093803406, "rewards/accuracies": 0.625, "rewards/chosen": -0.0971757099032402, "rewards/margins": 0.0003124335780739784, "rewards/rejected": -0.09748813509941101, "step": 99 }, { "epoch": 0.06220839813374806, "grad_norm": 0.21056877076625824, "learning_rate": 4.5e-05, "log_odds_chosen": -0.039973024278879166, "log_odds_ratio": -0.7171774506568909, "logits/chosen": 2.3132238388061523, "logits/rejected": 2.871912717819214, "logps/chosen": -1.0969483852386475, "logps/rejected": -1.065551996231079, "loss": 0.8489, "nll_loss": 0.7771739959716797, "rewards/accuracies": 0.625, "rewards/chosen": -0.10969482362270355, "rewards/margins": -0.0031396341510117054, "rewards/rejected": -0.10655518621206284, "step": 100 }, { "epoch": 0.06283048211508553, "grad_norm": 0.17404352128505707, "learning_rate": 4.495e-05, "log_odds_chosen": 0.03608079254627228, "log_odds_ratio": -0.7069602608680725, "logits/chosen": 1.436377763748169, "logits/rejected": 2.1634533405303955, "logps/chosen": -0.9186925292015076, "logps/rejected": -0.8996652960777283, "loss": 0.6925, "nll_loss": 0.621782124042511, "rewards/accuracies": 0.5, "rewards/chosen": -0.09186924993991852, "rewards/margins": -0.0019027264788746834, "rewards/rejected": -0.08996652066707611, "step": 101 }, { "epoch": 0.06345256609642301, "grad_norm": 0.3078821301460266, "learning_rate": 4.49e-05, "log_odds_chosen": -0.12548185884952545, "log_odds_ratio": -0.8313822746276855, "logits/chosen": 0.9674258232116699, "logits/rejected": 3.7763495445251465, "logps/chosen": -1.282372236251831, "logps/rejected": -1.1455416679382324, "loss": 0.7181, "nll_loss": 0.6350088715553284, "rewards/accuracies": 0.375, "rewards/chosen": -0.12823721766471863, "rewards/margins": -0.013683034107089043, "rewards/rejected": -0.11455416679382324, "step": 102 }, { "epoch": 0.0640746500777605, "grad_norm": 0.2155919075012207, "learning_rate": 4.4850000000000006e-05, "log_odds_chosen": 0.16065430641174316, "log_odds_ratio": -0.6278191804885864, "logits/chosen": 2.0724029541015625, "logits/rejected": 3.3654284477233887, "logps/chosen": -1.0031177997589111, "logps/rejected": -1.0878639221191406, "loss": 0.78, "nll_loss": 0.7172412276268005, "rewards/accuracies": 0.75, "rewards/chosen": -0.100311778485775, "rewards/margins": 0.00847461074590683, "rewards/rejected": -0.10878638923168182, "step": 103 }, { "epoch": 0.06469673405909798, "grad_norm": 0.26061123609542847, "learning_rate": 4.4800000000000005e-05, "log_odds_chosen": -0.028148103505373, "log_odds_ratio": -0.7147566080093384, "logits/chosen": 1.7463017702102661, "logits/rejected": 2.0959701538085938, "logps/chosen": -0.9808987379074097, "logps/rejected": -0.9746424555778503, "loss": 0.824, "nll_loss": 0.7525503039360046, "rewards/accuracies": 0.625, "rewards/chosen": -0.09808988124132156, "rewards/margins": -0.000625628512352705, "rewards/rejected": -0.09746424853801727, "step": 104 }, { "epoch": 0.06531881804043546, "grad_norm": 0.4242609739303589, "learning_rate": 4.4750000000000004e-05, "log_odds_chosen": -0.15638473629951477, "log_odds_ratio": -0.8672036528587341, "logits/chosen": 1.6652724742889404, "logits/rejected": 1.994666576385498, "logps/chosen": -1.2177703380584717, "logps/rejected": -1.057265043258667, "loss": 0.6882, "nll_loss": 0.6014548540115356, "rewards/accuracies": 0.5, "rewards/chosen": -0.12177702784538269, "rewards/margins": -0.016050517559051514, "rewards/rejected": -0.10572651028633118, "step": 105 }, { "epoch": 0.06594090202177294, "grad_norm": 0.3692471385002136, "learning_rate": 4.47e-05, "log_odds_chosen": 0.24592670798301697, "log_odds_ratio": -0.793420672416687, "logits/chosen": 2.528710126876831, "logits/rejected": 3.5555386543273926, "logps/chosen": -1.0099613666534424, "logps/rejected": -0.9727597236633301, "loss": 0.9496, "nll_loss": 0.870262622833252, "rewards/accuracies": 0.75, "rewards/chosen": -0.10099614411592484, "rewards/margins": -0.0037201670929789543, "rewards/rejected": -0.09727597236633301, "step": 106 }, { "epoch": 0.06656298600311042, "grad_norm": 0.1919865608215332, "learning_rate": 4.465e-05, "log_odds_chosen": 0.038264740258455276, "log_odds_ratio": -0.7012819647789001, "logits/chosen": 0.6481499671936035, "logits/rejected": 1.7020373344421387, "logps/chosen": -1.1631755828857422, "logps/rejected": -1.2091020345687866, "loss": 0.7014, "nll_loss": 0.6312416195869446, "rewards/accuracies": 0.375, "rewards/chosen": -0.11631755530834198, "rewards/margins": 0.004592650569975376, "rewards/rejected": -0.12091021239757538, "step": 107 }, { "epoch": 0.0671850699844479, "grad_norm": 0.2411794811487198, "learning_rate": 4.46e-05, "log_odds_chosen": -0.03066416084766388, "log_odds_ratio": -0.7151470184326172, "logits/chosen": 1.6738927364349365, "logits/rejected": 2.311450481414795, "logps/chosen": -1.1736071109771729, "logps/rejected": -1.1376099586486816, "loss": 0.8258, "nll_loss": 0.7543072700500488, "rewards/accuracies": 0.375, "rewards/chosen": -0.11736071854829788, "rewards/margins": -0.0035997098311781883, "rewards/rejected": -0.11376100778579712, "step": 108 }, { "epoch": 0.06780715396578538, "grad_norm": 0.19159743189811707, "learning_rate": 4.4550000000000005e-05, "log_odds_chosen": 0.06242550164461136, "log_odds_ratio": -0.6757259964942932, "logits/chosen": 2.0136990547180176, "logits/rejected": 1.885206699371338, "logps/chosen": -1.0233063697814941, "logps/rejected": -1.0701019763946533, "loss": 0.7497, "nll_loss": 0.6821112036705017, "rewards/accuracies": 0.5, "rewards/chosen": -0.10233063995838165, "rewards/margins": 0.004679564386606216, "rewards/rejected": -0.10701019316911697, "step": 109 }, { "epoch": 0.06842923794712286, "grad_norm": 0.23534443974494934, "learning_rate": 4.4500000000000004e-05, "log_odds_chosen": 0.31563812494277954, "log_odds_ratio": -0.5620191097259521, "logits/chosen": 2.677371025085449, "logits/rejected": 3.858182191848755, "logps/chosen": -0.9001976251602173, "logps/rejected": -1.078892469406128, "loss": 0.848, "nll_loss": 0.7917859554290771, "rewards/accuracies": 0.75, "rewards/chosen": -0.09001976251602173, "rewards/margins": 0.017869489267468452, "rewards/rejected": -0.10788924992084503, "step": 110 }, { "epoch": 0.06905132192846034, "grad_norm": 0.23337529599666595, "learning_rate": 4.445e-05, "log_odds_chosen": -0.18345850706100464, "log_odds_ratio": -0.8887477517127991, "logits/chosen": 1.6702814102172852, "logits/rejected": 3.8112714290618896, "logps/chosen": -1.1495945453643799, "logps/rejected": -0.9585939645767212, "loss": 0.7497, "nll_loss": 0.6608361005783081, "rewards/accuracies": 0.375, "rewards/chosen": -0.11495945602655411, "rewards/margins": -0.019100071862339973, "rewards/rejected": -0.09585939347743988, "step": 111 }, { "epoch": 0.06967340590979783, "grad_norm": 0.20986708998680115, "learning_rate": 4.44e-05, "log_odds_chosen": 0.09954327344894409, "log_odds_ratio": -0.6632246971130371, "logits/chosen": 1.9254231452941895, "logits/rejected": 2.9965901374816895, "logps/chosen": -0.9920103549957275, "logps/rejected": -1.0455927848815918, "loss": 0.7958, "nll_loss": 0.7294626235961914, "rewards/accuracies": 0.5, "rewards/chosen": -0.09920103847980499, "rewards/margins": 0.005358239635825157, "rewards/rejected": -0.1045592799782753, "step": 112 }, { "epoch": 0.07029548989113531, "grad_norm": 0.2572464346885681, "learning_rate": 4.435e-05, "log_odds_chosen": -0.011964142322540283, "log_odds_ratio": -0.7803436517715454, "logits/chosen": -0.40443724393844604, "logits/rejected": 1.6792998313903809, "logps/chosen": -1.0715501308441162, "logps/rejected": -0.9998044967651367, "loss": 0.5049, "nll_loss": 0.42683103680610657, "rewards/accuracies": 0.375, "rewards/chosen": -0.10715501755475998, "rewards/margins": -0.007174567319452763, "rewards/rejected": -0.09998045116662979, "step": 113 }, { "epoch": 0.07091757387247279, "grad_norm": 0.22227022051811218, "learning_rate": 4.43e-05, "log_odds_chosen": 0.5500046014785767, "log_odds_ratio": -0.5628729462623596, "logits/chosen": 2.092911958694458, "logits/rejected": 2.7500369548797607, "logps/chosen": -0.6779322028160095, "logps/rejected": -0.9072044491767883, "loss": 0.7892, "nll_loss": 0.7329133749008179, "rewards/accuracies": 0.75, "rewards/chosen": -0.06779322028160095, "rewards/margins": 0.02292722463607788, "rewards/rejected": -0.09072044491767883, "step": 114 }, { "epoch": 0.07153965785381027, "grad_norm": 0.2829152047634125, "learning_rate": 4.4250000000000005e-05, "log_odds_chosen": -0.08828212320804596, "log_odds_ratio": -0.7812116146087646, "logits/chosen": 0.9317520260810852, "logits/rejected": 2.114894390106201, "logps/chosen": -1.2550923824310303, "logps/rejected": -1.182921290397644, "loss": 0.6555, "nll_loss": 0.5774081945419312, "rewards/accuracies": 0.5, "rewards/chosen": -0.12550924718379974, "rewards/margins": -0.007217114791274071, "rewards/rejected": -0.11829212307929993, "step": 115 }, { "epoch": 0.07216174183514774, "grad_norm": 0.3006725013256073, "learning_rate": 4.4200000000000004e-05, "log_odds_chosen": 0.020487122237682343, "log_odds_ratio": -0.749638557434082, "logits/chosen": 1.9692126512527466, "logits/rejected": 2.8845958709716797, "logps/chosen": -1.1205189228057861, "logps/rejected": -1.1407134532928467, "loss": 0.8778, "nll_loss": 0.8028479814529419, "rewards/accuracies": 0.75, "rewards/chosen": -0.11205189675092697, "rewards/margins": 0.0020194537937641144, "rewards/rejected": -0.11407135426998138, "step": 116 }, { "epoch": 0.07278382581648522, "grad_norm": 0.2044357806444168, "learning_rate": 4.415e-05, "log_odds_chosen": 0.0019652023911476135, "log_odds_ratio": -0.7550162076950073, "logits/chosen": 1.0696661472320557, "logits/rejected": 2.325007677078247, "logps/chosen": -1.0620698928833008, "logps/rejected": -1.0414937734603882, "loss": 0.7055, "nll_loss": 0.6299588680267334, "rewards/accuracies": 0.375, "rewards/chosen": -0.1062069982290268, "rewards/margins": -0.0020576175302267075, "rewards/rejected": -0.10414938628673553, "step": 117 }, { "epoch": 0.0734059097978227, "grad_norm": 0.24016836285591125, "learning_rate": 4.41e-05, "log_odds_chosen": -0.1463804543018341, "log_odds_ratio": -0.8528624176979065, "logits/chosen": 0.5641318559646606, "logits/rejected": 1.8689111471176147, "logps/chosen": -1.148106336593628, "logps/rejected": -0.9835876226425171, "loss": 0.5974, "nll_loss": 0.5120998620986938, "rewards/accuracies": 0.5, "rewards/chosen": -0.11481063812971115, "rewards/margins": -0.016451874747872353, "rewards/rejected": -0.09835876524448395, "step": 118 }, { "epoch": 0.07402799377916018, "grad_norm": 0.2635195553302765, "learning_rate": 4.405e-05, "log_odds_chosen": 0.1805187165737152, "log_odds_ratio": -0.6187409162521362, "logits/chosen": 0.9240512251853943, "logits/rejected": 3.181490898132324, "logps/chosen": -1.1101839542388916, "logps/rejected": -1.2136919498443604, "loss": 0.6813, "nll_loss": 0.6194241046905518, "rewards/accuracies": 0.75, "rewards/chosen": -0.11101838946342468, "rewards/margins": 0.010350802913308144, "rewards/rejected": -0.12136919796466827, "step": 119 }, { "epoch": 0.07465007776049767, "grad_norm": 0.2084805965423584, "learning_rate": 4.4000000000000006e-05, "log_odds_chosen": -0.07975497096776962, "log_odds_ratio": -0.7737417817115784, "logits/chosen": 0.5016249418258667, "logits/rejected": 2.5767385959625244, "logps/chosen": -1.0363280773162842, "logps/rejected": -0.9799712896347046, "loss": 0.6462, "nll_loss": 0.5687938928604126, "rewards/accuracies": 0.5, "rewards/chosen": -0.10363280773162842, "rewards/margins": -0.005635684821754694, "rewards/rejected": -0.09799712896347046, "step": 120 }, { "epoch": 0.07527216174183515, "grad_norm": 0.22351621091365814, "learning_rate": 4.3950000000000004e-05, "log_odds_chosen": 0.28257864713668823, "log_odds_ratio": -0.569965124130249, "logits/chosen": 1.8498495817184448, "logits/rejected": 1.6400539875030518, "logps/chosen": -0.9594013690948486, "logps/rejected": -1.1362048387527466, "loss": 0.7609, "nll_loss": 0.7038699388504028, "rewards/accuracies": 0.875, "rewards/chosen": -0.09594013541936874, "rewards/margins": 0.017680345103144646, "rewards/rejected": -0.11362048983573914, "step": 121 }, { "epoch": 0.07589424572317263, "grad_norm": 0.21545082330703735, "learning_rate": 4.39e-05, "log_odds_chosen": 0.19825129210948944, "log_odds_ratio": -0.6359566450119019, "logits/chosen": 1.388837456703186, "logits/rejected": 1.8813964128494263, "logps/chosen": -0.8985767364501953, "logps/rejected": -0.9870655536651611, "loss": 0.7647, "nll_loss": 0.701100766658783, "rewards/accuracies": 0.75, "rewards/chosen": -0.08985768258571625, "rewards/margins": 0.00884888507425785, "rewards/rejected": -0.09870655834674835, "step": 122 }, { "epoch": 0.07651632970451011, "grad_norm": 0.25020650029182434, "learning_rate": 4.385e-05, "log_odds_chosen": 0.05013291537761688, "log_odds_ratio": -0.6812134981155396, "logits/chosen": 1.9867496490478516, "logits/rejected": 2.5251035690307617, "logps/chosen": -1.0619776248931885, "logps/rejected": -1.1009055376052856, "loss": 0.7372, "nll_loss": 0.669075071811676, "rewards/accuracies": 0.625, "rewards/chosen": -0.10619775950908661, "rewards/margins": 0.0038927989080548286, "rewards/rejected": -0.11009055376052856, "step": 123 }, { "epoch": 0.07713841368584759, "grad_norm": 0.2289746254682541, "learning_rate": 4.38e-05, "log_odds_chosen": 0.8278762102127075, "log_odds_ratio": -0.4634576439857483, "logits/chosen": 1.5844309329986572, "logits/rejected": 2.797327995300293, "logps/chosen": -0.75380539894104, "logps/rejected": -1.1942193508148193, "loss": 0.7341, "nll_loss": 0.687774658203125, "rewards/accuracies": 0.75, "rewards/chosen": -0.07538054138422012, "rewards/margins": 0.04404138773679733, "rewards/rejected": -0.11942192912101746, "step": 124 }, { "epoch": 0.07776049766718507, "grad_norm": 0.24135081470012665, "learning_rate": 4.375e-05, "log_odds_chosen": 0.6638460755348206, "log_odds_ratio": -0.5225129127502441, "logits/chosen": 1.0815123319625854, "logits/rejected": 1.9553213119506836, "logps/chosen": -0.7082183957099915, "logps/rejected": -1.012854814529419, "loss": 0.627, "nll_loss": 0.5747087597846985, "rewards/accuracies": 0.75, "rewards/chosen": -0.0708218514919281, "rewards/margins": 0.030463647097349167, "rewards/rejected": -0.10128549486398697, "step": 125 }, { "epoch": 0.07838258164852255, "grad_norm": 0.20711150765419006, "learning_rate": 4.3700000000000005e-05, "log_odds_chosen": 0.07271279394626617, "log_odds_ratio": -0.680426836013794, "logits/chosen": 1.3267732858657837, "logits/rejected": 2.416010618209839, "logps/chosen": -1.0107229948043823, "logps/rejected": -1.066054105758667, "loss": 0.7109, "nll_loss": 0.6428604125976562, "rewards/accuracies": 0.5, "rewards/chosen": -0.1010722890496254, "rewards/margins": 0.005533117335289717, "rewards/rejected": -0.1066054105758667, "step": 126 }, { "epoch": 0.07900466562986003, "grad_norm": 0.20425938069820404, "learning_rate": 4.3650000000000004e-05, "log_odds_chosen": 0.27624690532684326, "log_odds_ratio": -0.5898454189300537, "logits/chosen": 2.7942919731140137, "logits/rejected": 2.604480266571045, "logps/chosen": -0.8998833894729614, "logps/rejected": -1.0633008480072021, "loss": 0.9294, "nll_loss": 0.8704652786254883, "rewards/accuracies": 0.75, "rewards/chosen": -0.0899883359670639, "rewards/margins": 0.016341738402843475, "rewards/rejected": -0.10633008182048798, "step": 127 }, { "epoch": 0.0796267496111975, "grad_norm": 0.23229432106018066, "learning_rate": 4.36e-05, "log_odds_chosen": -0.17983496189117432, "log_odds_ratio": -0.8006916046142578, "logits/chosen": 1.8407796621322632, "logits/rejected": 3.0865976810455322, "logps/chosen": -0.9708831310272217, "logps/rejected": -0.877918004989624, "loss": 0.8562, "nll_loss": 0.776081919670105, "rewards/accuracies": 0.25, "rewards/chosen": -0.09708831459283829, "rewards/margins": -0.009296516887843609, "rewards/rejected": -0.0877918004989624, "step": 128 }, { "epoch": 0.080248833592535, "grad_norm": 0.2508530914783478, "learning_rate": 4.355e-05, "log_odds_chosen": 0.16054421663284302, "log_odds_ratio": -0.6215721368789673, "logits/chosen": 1.587475061416626, "logits/rejected": 1.081530213356018, "logps/chosen": -1.0525381565093994, "logps/rejected": -1.1599868535995483, "loss": 0.7807, "nll_loss": 0.718558669090271, "rewards/accuracies": 0.75, "rewards/chosen": -0.10525382310152054, "rewards/margins": 0.010744860395789146, "rewards/rejected": -0.11599868535995483, "step": 129 }, { "epoch": 0.08087091757387248, "grad_norm": 0.2501443028450012, "learning_rate": 4.35e-05, "log_odds_chosen": 0.1309620440006256, "log_odds_ratio": -0.8406453728675842, "logits/chosen": -0.24144795536994934, "logits/rejected": 2.991492748260498, "logps/chosen": -1.2649434804916382, "logps/rejected": -1.2791178226470947, "loss": 0.5402, "nll_loss": 0.4561711847782135, "rewards/accuracies": 0.625, "rewards/chosen": -0.12649434804916382, "rewards/margins": 0.0014174282550811768, "rewards/rejected": -0.127911776304245, "step": 130 }, { "epoch": 0.08149300155520996, "grad_norm": 0.1905927062034607, "learning_rate": 4.345e-05, "log_odds_chosen": 0.6442151069641113, "log_odds_ratio": -0.5027979016304016, "logits/chosen": 1.5362439155578613, "logits/rejected": 1.6745655536651611, "logps/chosen": -0.8681899309158325, "logps/rejected": -1.2771090269088745, "loss": 0.7633, "nll_loss": 0.7129961848258972, "rewards/accuracies": 0.625, "rewards/chosen": -0.08681898564100266, "rewards/margins": 0.040891923010349274, "rewards/rejected": -0.12771092355251312, "step": 131 }, { "epoch": 0.08211508553654744, "grad_norm": 0.1754845827817917, "learning_rate": 4.3400000000000005e-05, "log_odds_chosen": 0.23881648480892181, "log_odds_ratio": -0.6901636123657227, "logits/chosen": 1.53242027759552, "logits/rejected": 3.242114543914795, "logps/chosen": -0.8632210493087769, "logps/rejected": -0.9391394853591919, "loss": 0.7607, "nll_loss": 0.6916568279266357, "rewards/accuracies": 0.5, "rewards/chosen": -0.0863221138715744, "rewards/margins": 0.007591850124299526, "rewards/rejected": -0.0939139574766159, "step": 132 }, { "epoch": 0.08273716951788491, "grad_norm": 0.30056408047676086, "learning_rate": 4.335e-05, "log_odds_chosen": 0.33130699396133423, "log_odds_ratio": -0.738244354724884, "logits/chosen": 1.5825538635253906, "logits/rejected": 2.5602152347564697, "logps/chosen": -1.2096562385559082, "logps/rejected": -1.3360333442687988, "loss": 0.7298, "nll_loss": 0.6560216546058655, "rewards/accuracies": 0.5, "rewards/chosen": -0.1209656149148941, "rewards/margins": 0.012637722305953503, "rewards/rejected": -0.13360333442687988, "step": 133 }, { "epoch": 0.0833592534992224, "grad_norm": 0.253454327583313, "learning_rate": 4.33e-05, "log_odds_chosen": -0.36041635274887085, "log_odds_ratio": -0.9571161270141602, "logits/chosen": 1.5677220821380615, "logits/rejected": 2.3479645252227783, "logps/chosen": -1.132239580154419, "logps/rejected": -0.9019601345062256, "loss": 0.7929, "nll_loss": 0.6972097158432007, "rewards/accuracies": 0.375, "rewards/chosen": -0.11322395503520966, "rewards/margins": -0.023027939721941948, "rewards/rejected": -0.09019602090120316, "step": 134 }, { "epoch": 0.08398133748055987, "grad_norm": 0.21625332534313202, "learning_rate": 4.325e-05, "log_odds_chosen": 0.08987618237733841, "log_odds_ratio": -0.6680045127868652, "logits/chosen": 2.276296377182007, "logits/rejected": 2.9406542778015137, "logps/chosen": -0.8994870781898499, "logps/rejected": -0.9431521892547607, "loss": 0.8555, "nll_loss": 0.7886654138565063, "rewards/accuracies": 0.375, "rewards/chosen": -0.08994871377944946, "rewards/margins": 0.004366515204310417, "rewards/rejected": -0.09431522339582443, "step": 135 }, { "epoch": 0.08460342146189735, "grad_norm": 0.18930700421333313, "learning_rate": 4.32e-05, "log_odds_chosen": -0.0023861005902290344, "log_odds_ratio": -0.7080649137496948, "logits/chosen": 0.006744086742401123, "logits/rejected": 1.9157198667526245, "logps/chosen": -0.8520735502243042, "logps/rejected": -0.8123384714126587, "loss": 0.5836, "nll_loss": 0.5127810835838318, "rewards/accuracies": 0.375, "rewards/chosen": -0.08520735800266266, "rewards/margins": -0.003973505459725857, "rewards/rejected": -0.08123385906219482, "step": 136 }, { "epoch": 0.08522550544323483, "grad_norm": 0.22874124348163605, "learning_rate": 4.315e-05, "log_odds_chosen": 0.33399975299835205, "log_odds_ratio": -0.6337571144104004, "logits/chosen": 2.6366705894470215, "logits/rejected": 3.1834094524383545, "logps/chosen": -1.010011911392212, "logps/rejected": -1.225651741027832, "loss": 0.9296, "nll_loss": 0.8662106990814209, "rewards/accuracies": 0.5, "rewards/chosen": -0.10100119560956955, "rewards/margins": 0.02156398445367813, "rewards/rejected": -0.12256518006324768, "step": 137 }, { "epoch": 0.08584758942457232, "grad_norm": 0.2262139767408371, "learning_rate": 4.3100000000000004e-05, "log_odds_chosen": 0.2507573962211609, "log_odds_ratio": -0.6413716673851013, "logits/chosen": 2.071014642715454, "logits/rejected": 2.5810890197753906, "logps/chosen": -1.1221033334732056, "logps/rejected": -1.1979209184646606, "loss": 0.7838, "nll_loss": 0.7196545004844666, "rewards/accuracies": 0.375, "rewards/chosen": -0.11221033334732056, "rewards/margins": 0.007581758312880993, "rewards/rejected": -0.11979209631681442, "step": 138 }, { "epoch": 0.0864696734059098, "grad_norm": 0.21719491481781006, "learning_rate": 4.305e-05, "log_odds_chosen": 0.1720152199268341, "log_odds_ratio": -0.6350564360618591, "logits/chosen": 0.4697237014770508, "logits/rejected": 2.250985860824585, "logps/chosen": -1.0021406412124634, "logps/rejected": -1.1049684286117554, "loss": 0.5969, "nll_loss": 0.5334354043006897, "rewards/accuracies": 0.5, "rewards/chosen": -0.10021406412124634, "rewards/margins": 0.010282783769071102, "rewards/rejected": -0.11049684882164001, "step": 139 }, { "epoch": 0.08709175738724728, "grad_norm": 0.25784918665885925, "learning_rate": 4.3e-05, "log_odds_chosen": 0.1582861989736557, "log_odds_ratio": -0.6418213844299316, "logits/chosen": 0.9707194566726685, "logits/rejected": 2.337258815765381, "logps/chosen": -1.272529125213623, "logps/rejected": -1.3633763790130615, "loss": 0.7069, "nll_loss": 0.6427214741706848, "rewards/accuracies": 0.625, "rewards/chosen": -0.12725290656089783, "rewards/margins": 0.00908473040908575, "rewards/rejected": -0.13633763790130615, "step": 140 }, { "epoch": 0.08771384136858476, "grad_norm": 0.33133354783058167, "learning_rate": 4.295e-05, "log_odds_chosen": 0.1922331303358078, "log_odds_ratio": -0.6399127840995789, "logits/chosen": 1.1344259977340698, "logits/rejected": 2.2432358264923096, "logps/chosen": -1.0353434085845947, "logps/rejected": -1.1536777019500732, "loss": 0.7003, "nll_loss": 0.6363195180892944, "rewards/accuracies": 0.625, "rewards/chosen": -0.10353434085845947, "rewards/margins": 0.011833428405225277, "rewards/rejected": -0.11536777764558792, "step": 141 }, { "epoch": 0.08833592534992224, "grad_norm": 0.24132822453975677, "learning_rate": 4.29e-05, "log_odds_chosen": 0.1710241138935089, "log_odds_ratio": -0.6606246829032898, "logits/chosen": 1.6139814853668213, "logits/rejected": 2.8867573738098145, "logps/chosen": -0.8465310335159302, "logps/rejected": -0.9183309078216553, "loss": 0.7557, "nll_loss": 0.689607560634613, "rewards/accuracies": 0.625, "rewards/chosen": -0.0846531093120575, "rewards/margins": 0.007179984822869301, "rewards/rejected": -0.09183309227228165, "step": 142 }, { "epoch": 0.08895800933125972, "grad_norm": 0.3063518702983856, "learning_rate": 4.285e-05, "log_odds_chosen": 0.3355182707309723, "log_odds_ratio": -0.5703614950180054, "logits/chosen": 1.9923746585845947, "logits/rejected": 1.7918429374694824, "logps/chosen": -1.0820659399032593, "logps/rejected": -1.3053545951843262, "loss": 0.8471, "nll_loss": 0.7900703549385071, "rewards/accuracies": 0.625, "rewards/chosen": -0.1082065999507904, "rewards/margins": 0.022328879684209824, "rewards/rejected": -0.13053546845912933, "step": 143 }, { "epoch": 0.0895800933125972, "grad_norm": 0.24182361364364624, "learning_rate": 4.2800000000000004e-05, "log_odds_chosen": 0.3469311594963074, "log_odds_ratio": -0.6052234768867493, "logits/chosen": 2.344413995742798, "logits/rejected": 2.9837701320648193, "logps/chosen": -1.0931798219680786, "logps/rejected": -1.2991085052490234, "loss": 0.8078, "nll_loss": 0.7472943067550659, "rewards/accuracies": 0.625, "rewards/chosen": -0.10931797325611115, "rewards/margins": 0.020592868328094482, "rewards/rejected": -0.12991085648536682, "step": 144 }, { "epoch": 0.09020217729393468, "grad_norm": 0.20874236524105072, "learning_rate": 4.275e-05, "log_odds_chosen": 0.6926552057266235, "log_odds_ratio": -0.4596474766731262, "logits/chosen": 1.773850679397583, "logits/rejected": 3.3926727771759033, "logps/chosen": -0.5454069972038269, "logps/rejected": -0.8595741987228394, "loss": 0.8004, "nll_loss": 0.7544057965278625, "rewards/accuracies": 0.875, "rewards/chosen": -0.05454070121049881, "rewards/margins": 0.031416717916727066, "rewards/rejected": -0.08595742285251617, "step": 145 }, { "epoch": 0.09082426127527216, "grad_norm": 0.24432261288166046, "learning_rate": 4.27e-05, "log_odds_chosen": 0.3871600031852722, "log_odds_ratio": -0.5729288458824158, "logits/chosen": 1.1837117671966553, "logits/rejected": 2.0443482398986816, "logps/chosen": -0.9735139608383179, "logps/rejected": -1.1910879611968994, "loss": 0.6312, "nll_loss": 0.5739009380340576, "rewards/accuracies": 0.75, "rewards/chosen": -0.09735140204429626, "rewards/margins": 0.021757401525974274, "rewards/rejected": -0.11910881102085114, "step": 146 }, { "epoch": 0.09144634525660965, "grad_norm": 0.3139575719833374, "learning_rate": 4.265e-05, "log_odds_chosen": 0.4184523820877075, "log_odds_ratio": -0.5198309421539307, "logits/chosen": 0.5413355231285095, "logits/rejected": 1.8938310146331787, "logps/chosen": -0.8826829195022583, "logps/rejected": -1.14265775680542, "loss": 0.5939, "nll_loss": 0.5419252514839172, "rewards/accuracies": 1.0, "rewards/chosen": -0.08826829493045807, "rewards/margins": 0.025997478514909744, "rewards/rejected": -0.11426577717065811, "step": 147 }, { "epoch": 0.09206842923794713, "grad_norm": 0.20702946186065674, "learning_rate": 4.26e-05, "log_odds_chosen": 0.07650808244943619, "log_odds_ratio": -0.6737152338027954, "logits/chosen": 1.5741207599639893, "logits/rejected": 3.246458053588867, "logps/chosen": -1.008644938468933, "logps/rejected": -1.0792831182479858, "loss": 0.8092, "nll_loss": 0.7418737411499023, "rewards/accuracies": 0.625, "rewards/chosen": -0.10086449980735779, "rewards/margins": 0.007063813507556915, "rewards/rejected": -0.1079283133149147, "step": 148 }, { "epoch": 0.0926905132192846, "grad_norm": 0.20067504048347473, "learning_rate": 4.2550000000000004e-05, "log_odds_chosen": -0.02825966477394104, "log_odds_ratio": -0.7468290328979492, "logits/chosen": 1.176827311515808, "logits/rejected": 2.1293768882751465, "logps/chosen": -1.0007497072219849, "logps/rejected": -0.9864083528518677, "loss": 0.7599, "nll_loss": 0.6852264404296875, "rewards/accuracies": 0.5, "rewards/chosen": -0.10007497668266296, "rewards/margins": -0.0014341361820697784, "rewards/rejected": -0.09864082932472229, "step": 149 }, { "epoch": 0.09331259720062209, "grad_norm": 0.2831576466560364, "learning_rate": 4.25e-05, "log_odds_chosen": 0.007619678974151611, "log_odds_ratio": -0.7431962490081787, "logits/chosen": 1.9270395040512085, "logits/rejected": 1.999312162399292, "logps/chosen": -1.0627632141113281, "logps/rejected": -1.0206648111343384, "loss": 0.8259, "nll_loss": 0.7516275644302368, "rewards/accuracies": 0.625, "rewards/chosen": -0.10627631843090057, "rewards/margins": -0.0042098406702280045, "rewards/rejected": -0.10206647962331772, "step": 150 }, { "epoch": 0.09393468118195956, "grad_norm": 0.3491978049278259, "learning_rate": 4.245e-05, "log_odds_chosen": -0.12725840508937836, "log_odds_ratio": -0.7853566408157349, "logits/chosen": 2.8336124420166016, "logits/rejected": 1.3309614658355713, "logps/chosen": -1.260406732559204, "logps/rejected": -1.1789261102676392, "loss": 0.9395, "nll_loss": 0.8609429001808167, "rewards/accuracies": 0.5, "rewards/chosen": -0.12604066729545593, "rewards/margins": -0.00814807042479515, "rewards/rejected": -0.11789260059595108, "step": 151 }, { "epoch": 0.09455676516329704, "grad_norm": 0.22511498630046844, "learning_rate": 4.24e-05, "log_odds_chosen": 0.27719905972480774, "log_odds_ratio": -0.6086083054542542, "logits/chosen": 1.3456978797912598, "logits/rejected": 2.8321447372436523, "logps/chosen": -0.9333552122116089, "logps/rejected": -1.0779469013214111, "loss": 0.7306, "nll_loss": 0.6697627305984497, "rewards/accuracies": 0.75, "rewards/chosen": -0.09333551675081253, "rewards/margins": 0.014459175989031792, "rewards/rejected": -0.10779468715190887, "step": 152 }, { "epoch": 0.09517884914463452, "grad_norm": 0.25012776255607605, "learning_rate": 4.235e-05, "log_odds_chosen": 0.10945864021778107, "log_odds_ratio": -0.6662627458572388, "logits/chosen": 0.27826401591300964, "logits/rejected": 1.6450650691986084, "logps/chosen": -1.1210460662841797, "logps/rejected": -1.172456979751587, "loss": 0.6499, "nll_loss": 0.583318829536438, "rewards/accuracies": 0.625, "rewards/chosen": -0.11210459470748901, "rewards/margins": 0.005141102708876133, "rewards/rejected": -0.11724570393562317, "step": 153 }, { "epoch": 0.095800933125972, "grad_norm": 0.19437120854854584, "learning_rate": 4.23e-05, "log_odds_chosen": 0.4470350742340088, "log_odds_ratio": -0.5733773708343506, "logits/chosen": 1.7237200736999512, "logits/rejected": 2.95843768119812, "logps/chosen": -0.9336342215538025, "logps/rejected": -1.1649386882781982, "loss": 0.7838, "nll_loss": 0.7264678478240967, "rewards/accuracies": 0.75, "rewards/chosen": -0.0933634340763092, "rewards/margins": 0.02313043922185898, "rewards/rejected": -0.11649386584758759, "step": 154 }, { "epoch": 0.09642301710730948, "grad_norm": 0.22907698154449463, "learning_rate": 4.2250000000000004e-05, "log_odds_chosen": 0.1783168613910675, "log_odds_ratio": -0.7230756878852844, "logits/chosen": -0.021383345127105713, "logits/rejected": 1.2943320274353027, "logps/chosen": -0.8587887287139893, "logps/rejected": -0.9087654948234558, "loss": 0.6111, "nll_loss": 0.5387856960296631, "rewards/accuracies": 0.5, "rewards/chosen": -0.0858788713812828, "rewards/margins": 0.0049976771697402, "rewards/rejected": -0.09087655693292618, "step": 155 }, { "epoch": 0.09704510108864697, "grad_norm": 0.2803076505661011, "learning_rate": 4.22e-05, "log_odds_chosen": 0.3767983913421631, "log_odds_ratio": -0.5567384958267212, "logits/chosen": 1.33505117893219, "logits/rejected": 2.911482810974121, "logps/chosen": -0.9518997073173523, "logps/rejected": -1.1626166105270386, "loss": 0.7235, "nll_loss": 0.6678577661514282, "rewards/accuracies": 0.875, "rewards/chosen": -0.09518995881080627, "rewards/margins": 0.021071698516607285, "rewards/rejected": -0.11626166105270386, "step": 156 }, { "epoch": 0.09766718506998445, "grad_norm": 0.7590816020965576, "learning_rate": 4.215e-05, "log_odds_chosen": 0.3583412170410156, "log_odds_ratio": -0.6060498356819153, "logits/chosen": 1.9442944526672363, "logits/rejected": 2.415597915649414, "logps/chosen": -1.2141615152359009, "logps/rejected": -1.442068338394165, "loss": 0.8981, "nll_loss": 0.8374723792076111, "rewards/accuracies": 0.75, "rewards/chosen": -0.12141615152359009, "rewards/margins": 0.02279069647192955, "rewards/rejected": -0.14420685172080994, "step": 157 }, { "epoch": 0.09828926905132193, "grad_norm": 0.2292739897966385, "learning_rate": 4.21e-05, "log_odds_chosen": 0.2378174364566803, "log_odds_ratio": -0.6532160043716431, "logits/chosen": -0.0699785053730011, "logits/rejected": 2.0174026489257812, "logps/chosen": -0.974482536315918, "logps/rejected": -1.0714282989501953, "loss": 0.5066, "nll_loss": 0.4412919580936432, "rewards/accuracies": 0.625, "rewards/chosen": -0.09744825214147568, "rewards/margins": 0.009694581851363182, "rewards/rejected": -0.10714283585548401, "step": 158 }, { "epoch": 0.09891135303265941, "grad_norm": 0.22754822671413422, "learning_rate": 4.205e-05, "log_odds_chosen": 1.0634626150131226, "log_odds_ratio": -0.407301664352417, "logits/chosen": 1.4696998596191406, "logits/rejected": 2.612338066101074, "logps/chosen": -0.6319990754127502, "logps/rejected": -1.2708525657653809, "loss": 0.7594, "nll_loss": 0.7186335921287537, "rewards/accuracies": 0.875, "rewards/chosen": -0.06319990754127502, "rewards/margins": 0.06388533860445023, "rewards/rejected": -0.12708523869514465, "step": 159 }, { "epoch": 0.09953343701399689, "grad_norm": 0.2632899880409241, "learning_rate": 4.2e-05, "log_odds_chosen": 0.16408134996891022, "log_odds_ratio": -0.6438462734222412, "logits/chosen": 1.6441045999526978, "logits/rejected": 2.838496685028076, "logps/chosen": -0.9144777059555054, "logps/rejected": -0.9933640360832214, "loss": 0.6945, "nll_loss": 0.6300674080848694, "rewards/accuracies": 0.75, "rewards/chosen": -0.09144777059555054, "rewards/margins": 0.007888639345765114, "rewards/rejected": -0.0993364006280899, "step": 160 }, { "epoch": 0.10015552099533437, "grad_norm": 0.41054767370224, "learning_rate": 4.195e-05, "log_odds_chosen": 0.16628219187259674, "log_odds_ratio": -0.6762892007827759, "logits/chosen": 1.4416700601577759, "logits/rejected": 1.777522087097168, "logps/chosen": -1.003077507019043, "logps/rejected": -1.0760802030563354, "loss": 0.7156, "nll_loss": 0.647979736328125, "rewards/accuracies": 0.25, "rewards/chosen": -0.10030774772167206, "rewards/margins": 0.007300272583961487, "rewards/rejected": -0.10760802030563354, "step": 161 }, { "epoch": 0.10077760497667185, "grad_norm": 0.38517841696739197, "learning_rate": 4.19e-05, "log_odds_chosen": 0.19973157346248627, "log_odds_ratio": -0.6236205697059631, "logits/chosen": 2.4252243041992188, "logits/rejected": 3.675344944000244, "logps/chosen": -0.9389520287513733, "logps/rejected": -1.043189525604248, "loss": 0.8682, "nll_loss": 0.8058300614356995, "rewards/accuracies": 0.375, "rewards/chosen": -0.09389521181583405, "rewards/margins": 0.010423748753964901, "rewards/rejected": -0.10431894659996033, "step": 162 }, { "epoch": 0.10139968895800933, "grad_norm": 0.22616150975227356, "learning_rate": 4.185e-05, "log_odds_chosen": 0.1786232888698578, "log_odds_ratio": -0.6155363917350769, "logits/chosen": 0.7019532322883606, "logits/rejected": 1.6438217163085938, "logps/chosen": -1.0487325191497803, "logps/rejected": -1.1808518171310425, "loss": 0.7142, "nll_loss": 0.6526217460632324, "rewards/accuracies": 0.75, "rewards/chosen": -0.10487325489521027, "rewards/margins": 0.013211926445364952, "rewards/rejected": -0.11808518320322037, "step": 163 }, { "epoch": 0.1020217729393468, "grad_norm": 0.27632614970207214, "learning_rate": 4.18e-05, "log_odds_chosen": 0.1357310712337494, "log_odds_ratio": -0.6856433749198914, "logits/chosen": 1.608304500579834, "logits/rejected": 2.1849358081817627, "logps/chosen": -1.013383150100708, "logps/rejected": -1.0906598567962646, "loss": 0.7822, "nll_loss": 0.7136261463165283, "rewards/accuracies": 0.625, "rewards/chosen": -0.10133831202983856, "rewards/margins": 0.007727675139904022, "rewards/rejected": -0.10906599462032318, "step": 164 }, { "epoch": 0.1026438569206843, "grad_norm": 0.2569625675678253, "learning_rate": 4.175e-05, "log_odds_chosen": 0.1790873259305954, "log_odds_ratio": -0.6222453117370605, "logits/chosen": 0.2540859580039978, "logits/rejected": 1.5012887716293335, "logps/chosen": -0.9785174131393433, "logps/rejected": -1.096341609954834, "loss": 0.6326, "nll_loss": 0.5704231262207031, "rewards/accuracies": 0.625, "rewards/chosen": -0.09785175323486328, "rewards/margins": 0.01178241241723299, "rewards/rejected": -0.1096341609954834, "step": 165 }, { "epoch": 0.10326594090202178, "grad_norm": 0.29103732109069824, "learning_rate": 4.17e-05, "log_odds_chosen": 0.6809232234954834, "log_odds_ratio": -0.5375621914863586, "logits/chosen": 2.260108709335327, "logits/rejected": 3.4551193714141846, "logps/chosen": -0.7766050100326538, "logps/rejected": -1.0041743516921997, "loss": 0.9029, "nll_loss": 0.8491120338439941, "rewards/accuracies": 0.75, "rewards/chosen": -0.07766050100326538, "rewards/margins": 0.02275693416595459, "rewards/rejected": -0.10041744261980057, "step": 166 }, { "epoch": 0.10388802488335926, "grad_norm": 0.24556207656860352, "learning_rate": 4.165e-05, "log_odds_chosen": 0.17520564794540405, "log_odds_ratio": -0.6451351046562195, "logits/chosen": 0.37003058195114136, "logits/rejected": 3.1531291007995605, "logps/chosen": -0.95960533618927, "logps/rejected": -1.0630546808242798, "loss": 0.6518, "nll_loss": 0.5872541666030884, "rewards/accuracies": 0.5, "rewards/chosen": -0.09596052765846252, "rewards/margins": 0.010344942100346088, "rewards/rejected": -0.10630547255277634, "step": 167 }, { "epoch": 0.10451010886469674, "grad_norm": 0.43578729033470154, "learning_rate": 4.16e-05, "log_odds_chosen": 0.3509252965450287, "log_odds_ratio": -0.579054057598114, "logits/chosen": 1.795426368713379, "logits/rejected": 1.9425467252731323, "logps/chosen": -0.8844292163848877, "logps/rejected": -1.0651838779449463, "loss": 0.7989, "nll_loss": 0.7409669160842896, "rewards/accuracies": 0.625, "rewards/chosen": -0.08844292163848877, "rewards/margins": 0.018075458705425262, "rewards/rejected": -0.10651838779449463, "step": 168 }, { "epoch": 0.10513219284603421, "grad_norm": 0.21419963240623474, "learning_rate": 4.155e-05, "log_odds_chosen": 0.2651481032371521, "log_odds_ratio": -0.6336223483085632, "logits/chosen": -0.10004106163978577, "logits/rejected": 2.5327444076538086, "logps/chosen": -0.9215387105941772, "logps/rejected": -1.0058555603027344, "loss": 0.5136, "nll_loss": 0.45028623938560486, "rewards/accuracies": 0.375, "rewards/chosen": -0.0921538770198822, "rewards/margins": 0.008431695401668549, "rewards/rejected": -0.10058555752038956, "step": 169 }, { "epoch": 0.1057542768273717, "grad_norm": 0.26683712005615234, "learning_rate": 4.15e-05, "log_odds_chosen": 0.1297835409641266, "log_odds_ratio": -0.7755084037780762, "logits/chosen": 1.6993978023529053, "logits/rejected": 1.991716980934143, "logps/chosen": -1.0198416709899902, "logps/rejected": -0.9949245452880859, "loss": 0.7747, "nll_loss": 0.6971030831336975, "rewards/accuracies": 0.625, "rewards/chosen": -0.1019841730594635, "rewards/margins": -0.002491721883416176, "rewards/rejected": -0.09949245303869247, "step": 170 }, { "epoch": 0.10637636080870917, "grad_norm": 0.21372289955615997, "learning_rate": 4.145e-05, "log_odds_chosen": 0.11716372519731522, "log_odds_ratio": -0.7064924240112305, "logits/chosen": -0.41527295112609863, "logits/rejected": 0.6108665466308594, "logps/chosen": -0.9913889169692993, "logps/rejected": -1.063600778579712, "loss": 0.5345, "nll_loss": 0.46387529373168945, "rewards/accuracies": 0.625, "rewards/chosen": -0.09913888573646545, "rewards/margins": 0.007221193052828312, "rewards/rejected": -0.10636007785797119, "step": 171 }, { "epoch": 0.10699844479004665, "grad_norm": 0.34510332345962524, "learning_rate": 4.14e-05, "log_odds_chosen": -0.006949573755264282, "log_odds_ratio": -0.7512286901473999, "logits/chosen": 2.2331154346466064, "logits/rejected": 2.70103120803833, "logps/chosen": -0.9838117361068726, "logps/rejected": -0.9597645998001099, "loss": 0.8141, "nll_loss": 0.7389856576919556, "rewards/accuracies": 0.375, "rewards/chosen": -0.0983811691403389, "rewards/margins": -0.0024047140032052994, "rewards/rejected": -0.09597645699977875, "step": 172 }, { "epoch": 0.10762052877138413, "grad_norm": 0.24371562898159027, "learning_rate": 4.135e-05, "log_odds_chosen": -0.088199183344841, "log_odds_ratio": -0.7620775699615479, "logits/chosen": 1.896087408065796, "logits/rejected": 2.6639156341552734, "logps/chosen": -1.0846797227859497, "logps/rejected": -1.0300229787826538, "loss": 0.7958, "nll_loss": 0.7195842266082764, "rewards/accuracies": 0.375, "rewards/chosen": -0.1084679663181305, "rewards/margins": -0.005465677008032799, "rewards/rejected": -0.10300229489803314, "step": 173 }, { "epoch": 0.10824261275272162, "grad_norm": 0.2548561692237854, "learning_rate": 4.13e-05, "log_odds_chosen": 0.16646553575992584, "log_odds_ratio": -0.6395515203475952, "logits/chosen": 1.1434202194213867, "logits/rejected": 2.398165464401245, "logps/chosen": -1.1627336740493774, "logps/rejected": -1.2480618953704834, "loss": 0.7475, "nll_loss": 0.6835085153579712, "rewards/accuracies": 0.875, "rewards/chosen": -0.11627336591482162, "rewards/margins": 0.008532822132110596, "rewards/rejected": -0.12480619549751282, "step": 174 }, { "epoch": 0.1088646967340591, "grad_norm": 0.34918922185897827, "learning_rate": 4.125e-05, "log_odds_chosen": 0.6802692413330078, "log_odds_ratio": -0.6001302599906921, "logits/chosen": 2.011270046234131, "logits/rejected": 3.118522882461548, "logps/chosen": -1.0734280347824097, "logps/rejected": -1.3806785345077515, "loss": 0.822, "nll_loss": 0.762000560760498, "rewards/accuracies": 0.625, "rewards/chosen": -0.10734279453754425, "rewards/margins": 0.030725058168172836, "rewards/rejected": -0.13806785643100739, "step": 175 }, { "epoch": 0.10948678071539658, "grad_norm": 0.2557761073112488, "learning_rate": 4.12e-05, "log_odds_chosen": 0.3574141263961792, "log_odds_ratio": -0.5623692274093628, "logits/chosen": 0.9804760813713074, "logits/rejected": 1.5133546590805054, "logps/chosen": -1.0537047386169434, "logps/rejected": -1.304141879081726, "loss": 0.6899, "nll_loss": 0.6336163878440857, "rewards/accuracies": 0.625, "rewards/chosen": -0.10537047684192657, "rewards/margins": 0.02504371665418148, "rewards/rejected": -0.1304141879081726, "step": 176 }, { "epoch": 0.11010886469673406, "grad_norm": 0.2286410927772522, "learning_rate": 4.115e-05, "log_odds_chosen": -0.17244988679885864, "log_odds_ratio": -0.9307609796524048, "logits/chosen": 1.0503787994384766, "logits/rejected": 2.7753310203552246, "logps/chosen": -1.295305848121643, "logps/rejected": -1.0674867630004883, "loss": 0.8022, "nll_loss": 0.7091712951660156, "rewards/accuracies": 0.5, "rewards/chosen": -0.12953059375286102, "rewards/margins": -0.02278190851211548, "rewards/rejected": -0.10674867033958435, "step": 177 }, { "epoch": 0.11073094867807154, "grad_norm": 0.35464537143707275, "learning_rate": 4.11e-05, "log_odds_chosen": -0.1714506447315216, "log_odds_ratio": -0.927706241607666, "logits/chosen": 1.4732853174209595, "logits/rejected": 2.787705421447754, "logps/chosen": -1.286841869354248, "logps/rejected": -1.1253931522369385, "loss": 0.7488, "nll_loss": 0.6560234427452087, "rewards/accuracies": 0.625, "rewards/chosen": -0.12868419289588928, "rewards/margins": -0.016144881024956703, "rewards/rejected": -0.11253931373357773, "step": 178 }, { "epoch": 0.11135303265940902, "grad_norm": 0.46238580346107483, "learning_rate": 4.105e-05, "log_odds_chosen": 0.37182509899139404, "log_odds_ratio": -0.5696491599082947, "logits/chosen": 2.656465530395508, "logits/rejected": 3.799373149871826, "logps/chosen": -1.022263765335083, "logps/rejected": -1.18893563747406, "loss": 0.9302, "nll_loss": 0.8732700347900391, "rewards/accuracies": 0.75, "rewards/chosen": -0.1022263765335083, "rewards/margins": 0.01666719652712345, "rewards/rejected": -0.1188935711979866, "step": 179 }, { "epoch": 0.1119751166407465, "grad_norm": 0.41658157110214233, "learning_rate": 4.1e-05, "log_odds_chosen": 0.14363381266593933, "log_odds_ratio": -0.667683482170105, "logits/chosen": 1.3900699615478516, "logits/rejected": 2.4027140140533447, "logps/chosen": -1.093644380569458, "logps/rejected": -1.1805763244628906, "loss": 0.7966, "nll_loss": 0.7298657298088074, "rewards/accuracies": 0.75, "rewards/chosen": -0.10936443507671356, "rewards/margins": 0.008693188428878784, "rewards/rejected": -0.11805762350559235, "step": 180 }, { "epoch": 0.11259720062208398, "grad_norm": 0.2824760675430298, "learning_rate": 4.095e-05, "log_odds_chosen": 0.02471756935119629, "log_odds_ratio": -0.6913062334060669, "logits/chosen": 1.611011266708374, "logits/rejected": 2.775919198989868, "logps/chosen": -1.021841287612915, "logps/rejected": -1.0538395643234253, "loss": 0.7521, "nll_loss": 0.6830064058303833, "rewards/accuracies": 0.5, "rewards/chosen": -0.10218413919210434, "rewards/margins": 0.0031998255290091038, "rewards/rejected": -0.105383962392807, "step": 181 }, { "epoch": 0.11321928460342146, "grad_norm": 0.2572093605995178, "learning_rate": 4.09e-05, "log_odds_chosen": 0.49159225821495056, "log_odds_ratio": -0.4956834614276886, "logits/chosen": 0.8409824371337891, "logits/rejected": 2.636554002761841, "logps/chosen": -0.7410684823989868, "logps/rejected": -1.0188310146331787, "loss": 0.5459, "nll_loss": 0.4963690936565399, "rewards/accuracies": 0.75, "rewards/chosen": -0.0741068571805954, "rewards/margins": 0.027776243165135384, "rewards/rejected": -0.10188309103250504, "step": 182 }, { "epoch": 0.11384136858475895, "grad_norm": 0.20492465794086456, "learning_rate": 4.085e-05, "log_odds_chosen": 1.2914584875106812, "log_odds_ratio": -0.311604768037796, "logits/chosen": 2.0142695903778076, "logits/rejected": 2.8114659786224365, "logps/chosen": -0.6310855746269226, "logps/rejected": -1.169175386428833, "loss": 0.7257, "nll_loss": 0.6945566534996033, "rewards/accuracies": 1.0, "rewards/chosen": -0.06310856342315674, "rewards/margins": 0.05380897969007492, "rewards/rejected": -0.11691753566265106, "step": 183 }, { "epoch": 0.11446345256609643, "grad_norm": 0.2868058383464813, "learning_rate": 4.08e-05, "log_odds_chosen": 0.2746756076812744, "log_odds_ratio": -0.5826683640480042, "logits/chosen": 2.0338900089263916, "logits/rejected": 2.3741323947906494, "logps/chosen": -1.0554683208465576, "logps/rejected": -1.2263842821121216, "loss": 0.8424, "nll_loss": 0.784168004989624, "rewards/accuracies": 0.75, "rewards/chosen": -0.10554683953523636, "rewards/margins": 0.01709158718585968, "rewards/rejected": -0.12263843417167664, "step": 184 }, { "epoch": 0.1150855365474339, "grad_norm": 0.23621974885463715, "learning_rate": 4.075e-05, "log_odds_chosen": 0.32036083936691284, "log_odds_ratio": -0.5880539417266846, "logits/chosen": 1.523934006690979, "logits/rejected": 1.8056572675704956, "logps/chosen": -0.8745806217193604, "logps/rejected": -1.0699481964111328, "loss": 0.791, "nll_loss": 0.7322185635566711, "rewards/accuracies": 0.75, "rewards/chosen": -0.0874580666422844, "rewards/margins": 0.019536755979061127, "rewards/rejected": -0.10699482262134552, "step": 185 }, { "epoch": 0.11570762052877138, "grad_norm": 0.22687800228595734, "learning_rate": 4.07e-05, "log_odds_chosen": 0.19166047871112823, "log_odds_ratio": -0.6182323098182678, "logits/chosen": 1.2291524410247803, "logits/rejected": 2.7583565711975098, "logps/chosen": -0.8949711918830872, "logps/rejected": -0.9789303541183472, "loss": 0.6768, "nll_loss": 0.6149976849555969, "rewards/accuracies": 0.5, "rewards/chosen": -0.08949711918830872, "rewards/margins": 0.008395911194384098, "rewards/rejected": -0.09789302945137024, "step": 186 }, { "epoch": 0.11632970451010886, "grad_norm": 0.2710927128791809, "learning_rate": 4.065e-05, "log_odds_chosen": -0.290906697511673, "log_odds_ratio": -0.8772719502449036, "logits/chosen": 1.058547019958496, "logits/rejected": 2.3658413887023926, "logps/chosen": -1.139380693435669, "logps/rejected": -0.9223094582557678, "loss": 0.8327, "nll_loss": 0.7450096607208252, "rewards/accuracies": 0.375, "rewards/chosen": -0.11393808573484421, "rewards/margins": -0.02170712873339653, "rewards/rejected": -0.09223095327615738, "step": 187 }, { "epoch": 0.11695178849144634, "grad_norm": 0.25022706389427185, "learning_rate": 4.0600000000000004e-05, "log_odds_chosen": -0.1785784363746643, "log_odds_ratio": -0.9537197947502136, "logits/chosen": 0.9926282167434692, "logits/rejected": 2.011066436767578, "logps/chosen": -1.2994134426116943, "logps/rejected": -1.0407631397247314, "loss": 0.7163, "nll_loss": 0.6209733486175537, "rewards/accuracies": 0.625, "rewards/chosen": -0.12994134426116943, "rewards/margins": -0.025865033268928528, "rewards/rejected": -0.1040763109922409, "step": 188 }, { "epoch": 0.11757387247278382, "grad_norm": 0.4500291049480438, "learning_rate": 4.055e-05, "log_odds_chosen": 0.24771341681480408, "log_odds_ratio": -0.5874661207199097, "logits/chosen": 1.660165786743164, "logits/rejected": 2.7396130561828613, "logps/chosen": -0.8763042688369751, "logps/rejected": -1.037272572517395, "loss": 0.7822, "nll_loss": 0.7234908938407898, "rewards/accuracies": 0.875, "rewards/chosen": -0.08763042092323303, "rewards/margins": 0.01609683595597744, "rewards/rejected": -0.10372726619243622, "step": 189 }, { "epoch": 0.1181959564541213, "grad_norm": 0.3109215795993805, "learning_rate": 4.05e-05, "log_odds_chosen": -0.002947285771369934, "log_odds_ratio": -0.7154306769371033, "logits/chosen": 2.46467924118042, "logits/rejected": 2.4921913146972656, "logps/chosen": -1.175093650817871, "logps/rejected": -1.155705451965332, "loss": 0.9375, "nll_loss": 0.8659391403198242, "rewards/accuracies": 0.5, "rewards/chosen": -0.11750936508178711, "rewards/margins": -0.0019388198852539062, "rewards/rejected": -0.1155705451965332, "step": 190 }, { "epoch": 0.1188180404354588, "grad_norm": 0.2340676635503769, "learning_rate": 4.045000000000001e-05, "log_odds_chosen": 0.6892496347427368, "log_odds_ratio": -0.46823450922966003, "logits/chosen": 1.4322489500045776, "logits/rejected": 2.8000543117523193, "logps/chosen": -0.6895767450332642, "logps/rejected": -1.0397164821624756, "loss": 0.6969, "nll_loss": 0.6500332951545715, "rewards/accuracies": 0.75, "rewards/chosen": -0.06895767152309418, "rewards/margins": 0.03501397743821144, "rewards/rejected": -0.10397165268659592, "step": 191 }, { "epoch": 0.11944012441679627, "grad_norm": 0.24883458018302917, "learning_rate": 4.0400000000000006e-05, "log_odds_chosen": 0.26280373334884644, "log_odds_ratio": -0.6019066572189331, "logits/chosen": 0.9045218229293823, "logits/rejected": 2.5123941898345947, "logps/chosen": -1.074615716934204, "logps/rejected": -1.2176018953323364, "loss": 0.6199, "nll_loss": 0.5597118735313416, "rewards/accuracies": 0.5, "rewards/chosen": -0.10746156424283981, "rewards/margins": 0.014298615977168083, "rewards/rejected": -0.12176018953323364, "step": 192 }, { "epoch": 0.12006220839813375, "grad_norm": 0.7652886509895325, "learning_rate": 4.0350000000000005e-05, "log_odds_chosen": 0.2115470916032791, "log_odds_ratio": -0.6227182149887085, "logits/chosen": 1.9737762212753296, "logits/rejected": 3.5508880615234375, "logps/chosen": -0.902724027633667, "logps/rejected": -0.994047999382019, "loss": 0.85, "nll_loss": 0.7876854538917542, "rewards/accuracies": 0.75, "rewards/chosen": -0.09027239680290222, "rewards/margins": 0.009132396429777145, "rewards/rejected": -0.09940479695796967, "step": 193 }, { "epoch": 0.12068429237947123, "grad_norm": 0.25935572385787964, "learning_rate": 4.0300000000000004e-05, "log_odds_chosen": 0.6906004548072815, "log_odds_ratio": -0.42626866698265076, "logits/chosen": 0.13742883503437042, "logits/rejected": 1.888708472251892, "logps/chosen": -0.8336443901062012, "logps/rejected": -1.2578824758529663, "loss": 0.5188, "nll_loss": 0.47615402936935425, "rewards/accuracies": 1.0, "rewards/chosen": -0.08336444199085236, "rewards/margins": 0.042423803359270096, "rewards/rejected": -0.12578824162483215, "step": 194 }, { "epoch": 0.12130637636080871, "grad_norm": 4.610418796539307, "learning_rate": 4.025e-05, "log_odds_chosen": 0.5752971172332764, "log_odds_ratio": -0.5104885697364807, "logits/chosen": 1.1361490488052368, "logits/rejected": 1.9390631914138794, "logps/chosen": -0.9524904489517212, "logps/rejected": -1.3292269706726074, "loss": 0.7264, "nll_loss": 0.6753022074699402, "rewards/accuracies": 0.875, "rewards/chosen": -0.09524904191493988, "rewards/margins": 0.037673648446798325, "rewards/rejected": -0.1329226940870285, "step": 195 }, { "epoch": 0.12192846034214619, "grad_norm": 0.2897116243839264, "learning_rate": 4.02e-05, "log_odds_chosen": -0.022627107799053192, "log_odds_ratio": -0.7134356498718262, "logits/chosen": 1.5104551315307617, "logits/rejected": 2.7221500873565674, "logps/chosen": -0.9891096353530884, "logps/rejected": -0.9596316814422607, "loss": 0.7559, "nll_loss": 0.6845479011535645, "rewards/accuracies": 0.375, "rewards/chosen": -0.09891095757484436, "rewards/margins": -0.0029477900825440884, "rewards/rejected": -0.09596316516399384, "step": 196 }, { "epoch": 0.12255054432348367, "grad_norm": 0.37426719069480896, "learning_rate": 4.015000000000001e-05, "log_odds_chosen": -0.2981864809989929, "log_odds_ratio": -0.980148196220398, "logits/chosen": 1.6573596000671387, "logits/rejected": 1.4556148052215576, "logps/chosen": -1.2158352136611938, "logps/rejected": -1.0322885513305664, "loss": 0.7979, "nll_loss": 0.6998590230941772, "rewards/accuracies": 0.625, "rewards/chosen": -0.12158352136611938, "rewards/margins": -0.018354661762714386, "rewards/rejected": -0.1032288521528244, "step": 197 }, { "epoch": 0.12317262830482115, "grad_norm": 0.3264211118221283, "learning_rate": 4.0100000000000006e-05, "log_odds_chosen": 0.4851124882698059, "log_odds_ratio": -0.5224840641021729, "logits/chosen": 2.4400124549865723, "logits/rejected": 1.3729251623153687, "logps/chosen": -1.0999153852462769, "logps/rejected": -1.459439992904663, "loss": 0.8856, "nll_loss": 0.8333885669708252, "rewards/accuracies": 0.875, "rewards/chosen": -0.10999153554439545, "rewards/margins": 0.03595246374607086, "rewards/rejected": -0.1459439992904663, "step": 198 }, { "epoch": 0.12379471228615863, "grad_norm": 0.28378090262413025, "learning_rate": 4.0050000000000004e-05, "log_odds_chosen": 0.29335591197013855, "log_odds_ratio": -0.5916770100593567, "logits/chosen": 0.7340425252914429, "logits/rejected": 1.0187065601348877, "logps/chosen": -1.0739442110061646, "logps/rejected": -1.1977989673614502, "loss": 0.7556, "nll_loss": 0.6964027881622314, "rewards/accuracies": 0.75, "rewards/chosen": -0.10739442706108093, "rewards/margins": 0.012385478243231773, "rewards/rejected": -0.11977989971637726, "step": 199 }, { "epoch": 0.12441679626749612, "grad_norm": 0.7085748910903931, "learning_rate": 4e-05, "log_odds_chosen": -0.2727814018726349, "log_odds_ratio": -0.9787805676460266, "logits/chosen": 1.3356515169143677, "logits/rejected": 2.2699167728424072, "logps/chosen": -1.482822060585022, "logps/rejected": -1.2205945253372192, "loss": 0.8284, "nll_loss": 0.7305508852005005, "rewards/accuracies": 0.5, "rewards/chosen": -0.1482822149991989, "rewards/margins": -0.02622275799512863, "rewards/rejected": -0.12205944955348969, "step": 200 }, { "epoch": 0.12503888024883358, "grad_norm": 0.31775179505348206, "learning_rate": 3.995e-05, "log_odds_chosen": -0.02565819025039673, "log_odds_ratio": -0.7218791842460632, "logits/chosen": 1.1910040378570557, "logits/rejected": 2.9965484142303467, "logps/chosen": -1.1026508808135986, "logps/rejected": -1.0807952880859375, "loss": 0.7445, "nll_loss": 0.6723363399505615, "rewards/accuracies": 0.5, "rewards/chosen": -0.1102650910615921, "rewards/margins": -0.002185564488172531, "rewards/rejected": -0.10807952284812927, "step": 201 }, { "epoch": 0.12566096423017106, "grad_norm": 0.21708033978939056, "learning_rate": 3.99e-05, "log_odds_chosen": 0.4155348539352417, "log_odds_ratio": -0.6038289070129395, "logits/chosen": 0.3837054967880249, "logits/rejected": 1.7948483228683472, "logps/chosen": -1.0251164436340332, "logps/rejected": -1.305844783782959, "loss": 0.6265, "nll_loss": 0.5660817623138428, "rewards/accuracies": 0.625, "rewards/chosen": -0.10251164436340332, "rewards/margins": 0.028072843328118324, "rewards/rejected": -0.1305844783782959, "step": 202 }, { "epoch": 0.12628304821150854, "grad_norm": 1.0549097061157227, "learning_rate": 3.9850000000000006e-05, "log_odds_chosen": 0.11141453683376312, "log_odds_ratio": -0.6722193956375122, "logits/chosen": 1.9163165092468262, "logits/rejected": 2.140613555908203, "logps/chosen": -0.9174782633781433, "logps/rejected": -0.9806128144264221, "loss": 0.8362, "nll_loss": 0.7690044045448303, "rewards/accuracies": 0.5, "rewards/chosen": -0.09174783527851105, "rewards/margins": 0.00631345110014081, "rewards/rejected": -0.09806128591299057, "step": 203 }, { "epoch": 0.12690513219284602, "grad_norm": 0.2251633256673813, "learning_rate": 3.9800000000000005e-05, "log_odds_chosen": 0.6894951462745667, "log_odds_ratio": -0.4621826410293579, "logits/chosen": -0.8240604996681213, "logits/rejected": 2.079634189605713, "logps/chosen": -0.9713005423545837, "logps/rejected": -1.3288459777832031, "loss": 0.4771, "nll_loss": 0.43083181977272034, "rewards/accuracies": 1.0, "rewards/chosen": -0.09713006019592285, "rewards/margins": 0.03575454279780388, "rewards/rejected": -0.13288459181785583, "step": 204 }, { "epoch": 0.12752721617418353, "grad_norm": 0.40125712752342224, "learning_rate": 3.9750000000000004e-05, "log_odds_chosen": 0.03986138850450516, "log_odds_ratio": -0.8418188691139221, "logits/chosen": 2.9345784187316895, "logits/rejected": 3.9708542823791504, "logps/chosen": -1.3259272575378418, "logps/rejected": -1.2527263164520264, "loss": 0.9907, "nll_loss": 0.9065566062927246, "rewards/accuracies": 0.625, "rewards/chosen": -0.13259272277355194, "rewards/margins": -0.0073200855404138565, "rewards/rejected": -0.12527263164520264, "step": 205 }, { "epoch": 0.128149300155521, "grad_norm": 0.2802586257457733, "learning_rate": 3.97e-05, "log_odds_chosen": 0.718868613243103, "log_odds_ratio": -0.4649730622768402, "logits/chosen": 0.6591233611106873, "logits/rejected": 1.1603331565856934, "logps/chosen": -0.8362342715263367, "logps/rejected": -1.1428916454315186, "loss": 0.7012, "nll_loss": 0.6546894907951355, "rewards/accuracies": 0.875, "rewards/chosen": -0.08362342417240143, "rewards/margins": 0.030665744096040726, "rewards/rejected": -0.11428917199373245, "step": 206 }, { "epoch": 0.12877138413685849, "grad_norm": 0.3060365915298462, "learning_rate": 3.965e-05, "log_odds_chosen": 0.21121090650558472, "log_odds_ratio": -0.6019690036773682, "logits/chosen": 2.085531234741211, "logits/rejected": 3.4953126907348633, "logps/chosen": -0.9660547971725464, "logps/rejected": -1.1018478870391846, "loss": 0.7583, "nll_loss": 0.6981133222579956, "rewards/accuracies": 0.75, "rewards/chosen": -0.09660547971725464, "rewards/margins": 0.013579306192696095, "rewards/rejected": -0.11018478870391846, "step": 207 }, { "epoch": 0.12939346811819596, "grad_norm": 0.3177313208580017, "learning_rate": 3.960000000000001e-05, "log_odds_chosen": 0.4785788357257843, "log_odds_ratio": -0.5458232164382935, "logits/chosen": 2.354724645614624, "logits/rejected": 3.253253936767578, "logps/chosen": -1.0346095561981201, "logps/rejected": -1.3615843057632446, "loss": 0.7659, "nll_loss": 0.7113272547721863, "rewards/accuracies": 0.625, "rewards/chosen": -0.10346096754074097, "rewards/margins": 0.03269747272133827, "rewards/rejected": -0.13615843653678894, "step": 208 }, { "epoch": 0.13001555209953344, "grad_norm": 0.299040287733078, "learning_rate": 3.9550000000000006e-05, "log_odds_chosen": 0.5891073942184448, "log_odds_ratio": -0.5249563455581665, "logits/chosen": 1.062352180480957, "logits/rejected": 1.9807929992675781, "logps/chosen": -0.889495849609375, "logps/rejected": -1.2828330993652344, "loss": 0.6767, "nll_loss": 0.6242160201072693, "rewards/accuracies": 0.625, "rewards/chosen": -0.08894958347082138, "rewards/margins": 0.03933371603488922, "rewards/rejected": -0.1282833069562912, "step": 209 }, { "epoch": 0.13063763608087092, "grad_norm": 0.25538620352745056, "learning_rate": 3.9500000000000005e-05, "log_odds_chosen": 0.03600820153951645, "log_odds_ratio": -0.706552267074585, "logits/chosen": 0.718265175819397, "logits/rejected": 1.6833255290985107, "logps/chosen": -1.0343286991119385, "logps/rejected": -1.0732911825180054, "loss": 0.6782, "nll_loss": 0.6075564026832581, "rewards/accuracies": 0.5, "rewards/chosen": -0.10343287885189056, "rewards/margins": 0.0038962452672421932, "rewards/rejected": -0.1073291227221489, "step": 210 }, { "epoch": 0.1312597200622084, "grad_norm": 0.2507280111312866, "learning_rate": 3.9450000000000003e-05, "log_odds_chosen": 0.43304914236068726, "log_odds_ratio": -0.5274550914764404, "logits/chosen": 0.4559652507305145, "logits/rejected": 1.6874964237213135, "logps/chosen": -0.9379274845123291, "logps/rejected": -1.214919090270996, "loss": 0.644, "nll_loss": 0.5912134647369385, "rewards/accuracies": 0.875, "rewards/chosen": -0.09379275143146515, "rewards/margins": 0.027699150145053864, "rewards/rejected": -0.12149190157651901, "step": 211 }, { "epoch": 0.13188180404354588, "grad_norm": 0.23815755546092987, "learning_rate": 3.94e-05, "log_odds_chosen": 0.2283017784357071, "log_odds_ratio": -0.6008410453796387, "logits/chosen": 1.0329132080078125, "logits/rejected": 1.2031893730163574, "logps/chosen": -1.1192843914031982, "logps/rejected": -1.2944506406784058, "loss": 0.8059, "nll_loss": 0.7458454370498657, "rewards/accuracies": 0.625, "rewards/chosen": -0.11192844063043594, "rewards/margins": 0.01751662604510784, "rewards/rejected": -0.12944507598876953, "step": 212 }, { "epoch": 0.13250388802488336, "grad_norm": 0.3345191478729248, "learning_rate": 3.935e-05, "log_odds_chosen": 0.5041581392288208, "log_odds_ratio": -0.5905402302742004, "logits/chosen": 1.7287249565124512, "logits/rejected": 1.066641092300415, "logps/chosen": -1.1445698738098145, "logps/rejected": -1.4820538759231567, "loss": 0.7486, "nll_loss": 0.6895924210548401, "rewards/accuracies": 0.75, "rewards/chosen": -0.11445698887109756, "rewards/margins": 0.03374838829040527, "rewards/rejected": -0.14820538461208344, "step": 213 }, { "epoch": 0.13312597200622084, "grad_norm": 0.25710660219192505, "learning_rate": 3.9300000000000007e-05, "log_odds_chosen": -0.0680234357714653, "log_odds_ratio": -0.7658126354217529, "logits/chosen": 1.8002554178237915, "logits/rejected": 3.2470219135284424, "logps/chosen": -1.1491390466690063, "logps/rejected": -1.102350115776062, "loss": 0.8621, "nll_loss": 0.785519003868103, "rewards/accuracies": 0.375, "rewards/chosen": -0.11491391062736511, "rewards/margins": -0.004678888246417046, "rewards/rejected": -0.11023502051830292, "step": 214 }, { "epoch": 0.13374805598755832, "grad_norm": 0.356227308511734, "learning_rate": 3.9250000000000005e-05, "log_odds_chosen": -0.18295426666736603, "log_odds_ratio": -0.860011875629425, "logits/chosen": 1.2315165996551514, "logits/rejected": 2.66998291015625, "logps/chosen": -1.4245922565460205, "logps/rejected": -1.23429536819458, "loss": 0.7234, "nll_loss": 0.63743656873703, "rewards/accuracies": 0.5, "rewards/chosen": -0.1424592286348343, "rewards/margins": -0.019029691815376282, "rewards/rejected": -0.1234295442700386, "step": 215 }, { "epoch": 0.1343701399688958, "grad_norm": 0.2328234314918518, "learning_rate": 3.9200000000000004e-05, "log_odds_chosen": 0.3651959002017975, "log_odds_ratio": -0.5772303342819214, "logits/chosen": -0.4574909210205078, "logits/rejected": 0.5566681623458862, "logps/chosen": -0.9843193292617798, "logps/rejected": -1.1436246633529663, "loss": 0.5471, "nll_loss": 0.4893374741077423, "rewards/accuracies": 0.75, "rewards/chosen": -0.09843192994594574, "rewards/margins": 0.01593054085969925, "rewards/rejected": -0.11436247825622559, "step": 216 }, { "epoch": 0.13499222395023328, "grad_norm": 0.2810547947883606, "learning_rate": 3.915e-05, "log_odds_chosen": 0.579887330532074, "log_odds_ratio": -0.5066784620285034, "logits/chosen": 1.3967379331588745, "logits/rejected": 1.9853203296661377, "logps/chosen": -0.9148995280265808, "logps/rejected": -1.2805899381637573, "loss": 0.786, "nll_loss": 0.7353231310844421, "rewards/accuracies": 0.75, "rewards/chosen": -0.09148994833230972, "rewards/margins": 0.03656903654336929, "rewards/rejected": -0.1280589997768402, "step": 217 }, { "epoch": 0.13561430793157075, "grad_norm": 0.32773271203041077, "learning_rate": 3.91e-05, "log_odds_chosen": 1.0615527629852295, "log_odds_ratio": -0.4024091958999634, "logits/chosen": 1.4982575178146362, "logits/rejected": 2.0108492374420166, "logps/chosen": -0.6647369861602783, "logps/rejected": -1.3382258415222168, "loss": 0.6722, "nll_loss": 0.6319430470466614, "rewards/accuracies": 0.875, "rewards/chosen": -0.06647370010614395, "rewards/margins": 0.0673488974571228, "rewards/rejected": -0.13382260501384735, "step": 218 }, { "epoch": 0.13623639191290823, "grad_norm": 0.2670206129550934, "learning_rate": 3.905e-05, "log_odds_chosen": 0.33397001028060913, "log_odds_ratio": -0.5816282033920288, "logits/chosen": 0.1389634609222412, "logits/rejected": 1.4346809387207031, "logps/chosen": -1.0145219564437866, "logps/rejected": -1.2008839845657349, "loss": 0.5609, "nll_loss": 0.502739429473877, "rewards/accuracies": 0.875, "rewards/chosen": -0.10145218670368195, "rewards/margins": 0.01863621175289154, "rewards/rejected": -0.12008840590715408, "step": 219 }, { "epoch": 0.1368584758942457, "grad_norm": 14.104445457458496, "learning_rate": 3.9000000000000006e-05, "log_odds_chosen": -0.10747827589511871, "log_odds_ratio": -0.845554530620575, "logits/chosen": 1.2553915977478027, "logits/rejected": 2.3911399841308594, "logps/chosen": -1.141734004020691, "logps/rejected": -1.053473711013794, "loss": 1.0518, "nll_loss": 0.967269778251648, "rewards/accuracies": 0.125, "rewards/chosen": -0.11417339742183685, "rewards/margins": -0.00882603321224451, "rewards/rejected": -0.10534737259149551, "step": 220 }, { "epoch": 0.1374805598755832, "grad_norm": 0.25021231174468994, "learning_rate": 3.8950000000000005e-05, "log_odds_chosen": 0.47089827060699463, "log_odds_ratio": -0.5921870470046997, "logits/chosen": 2.0083541870117188, "logits/rejected": 3.1078169345855713, "logps/chosen": -0.9033266305923462, "logps/rejected": -1.06514310836792, "loss": 0.839, "nll_loss": 0.7797623872756958, "rewards/accuracies": 0.5, "rewards/chosen": -0.09033266454935074, "rewards/margins": 0.016181640326976776, "rewards/rejected": -0.10651430487632751, "step": 221 }, { "epoch": 0.13810264385692067, "grad_norm": 10.2030611038208, "learning_rate": 3.8900000000000004e-05, "log_odds_chosen": 0.5311214327812195, "log_odds_ratio": -0.5039387941360474, "logits/chosen": 0.4797963500022888, "logits/rejected": 2.54323410987854, "logps/chosen": -0.9488534331321716, "logps/rejected": -1.3131181001663208, "loss": 0.6864, "nll_loss": 0.6359692811965942, "rewards/accuracies": 0.75, "rewards/chosen": -0.09488534927368164, "rewards/margins": 0.03642646223306656, "rewards/rejected": -0.1313118040561676, "step": 222 }, { "epoch": 0.13872472783825818, "grad_norm": 2.635735511779785, "learning_rate": 3.885e-05, "log_odds_chosen": 0.4188484251499176, "log_odds_ratio": -0.5459132194519043, "logits/chosen": 1.5027981996536255, "logits/rejected": 2.377605676651001, "logps/chosen": -0.9653737545013428, "logps/rejected": -1.2187042236328125, "loss": 0.792, "nll_loss": 0.7373826503753662, "rewards/accuracies": 0.75, "rewards/chosen": -0.09653738141059875, "rewards/margins": 0.025333039462566376, "rewards/rejected": -0.12187041342258453, "step": 223 }, { "epoch": 0.13934681181959566, "grad_norm": 0.4424149692058563, "learning_rate": 3.88e-05, "log_odds_chosen": 0.8767533898353577, "log_odds_ratio": -0.5110883116722107, "logits/chosen": -1.5340461730957031, "logits/rejected": 1.1958626508712769, "logps/chosen": -0.9010230898857117, "logps/rejected": -1.3352127075195312, "loss": 0.4859, "nll_loss": 0.43478310108184814, "rewards/accuracies": 0.75, "rewards/chosen": -0.09010231494903564, "rewards/margins": 0.043418966233730316, "rewards/rejected": -0.13352127373218536, "step": 224 }, { "epoch": 0.13996889580093314, "grad_norm": 0.6511993408203125, "learning_rate": 3.875e-05, "log_odds_chosen": 0.3861117959022522, "log_odds_ratio": -0.5581840872764587, "logits/chosen": -0.923552930355072, "logits/rejected": 1.8610806465148926, "logps/chosen": -1.0786206722259521, "logps/rejected": -1.2818100452423096, "loss": 0.5934, "nll_loss": 0.537615180015564, "rewards/accuracies": 0.625, "rewards/chosen": -0.10786207020282745, "rewards/margins": 0.02031894586980343, "rewards/rejected": -0.12818101048469543, "step": 225 }, { "epoch": 0.14059097978227061, "grad_norm": 0.24201221764087677, "learning_rate": 3.8700000000000006e-05, "log_odds_chosen": 0.5091780424118042, "log_odds_ratio": -0.5338168144226074, "logits/chosen": 1.6636664867401123, "logits/rejected": 2.6963648796081543, "logps/chosen": -0.8673657178878784, "logps/rejected": -1.1037362813949585, "loss": 0.7649, "nll_loss": 0.7115618586540222, "rewards/accuracies": 0.875, "rewards/chosen": -0.08673657476902008, "rewards/margins": 0.023637056350708008, "rewards/rejected": -0.11037363111972809, "step": 226 }, { "epoch": 0.1412130637636081, "grad_norm": 0.25063690543174744, "learning_rate": 3.8650000000000004e-05, "log_odds_chosen": 0.5832358598709106, "log_odds_ratio": -0.5092697739601135, "logits/chosen": -0.03008924424648285, "logits/rejected": 0.8798378705978394, "logps/chosen": -0.8644341230392456, "logps/rejected": -1.1819239854812622, "loss": 0.6909, "nll_loss": 0.6399303674697876, "rewards/accuracies": 0.75, "rewards/chosen": -0.08644340932369232, "rewards/margins": 0.031748995184898376, "rewards/rejected": -0.1181924045085907, "step": 227 }, { "epoch": 0.14183514774494557, "grad_norm": 0.24055233597755432, "learning_rate": 3.86e-05, "log_odds_chosen": 1.0738061666488647, "log_odds_ratio": -0.3834771513938904, "logits/chosen": 1.553779125213623, "logits/rejected": 2.292296886444092, "logps/chosen": -0.8580217361450195, "logps/rejected": -1.567826509475708, "loss": 0.8033, "nll_loss": 0.7649025917053223, "rewards/accuracies": 0.875, "rewards/chosen": -0.08580217510461807, "rewards/margins": 0.07098047435283661, "rewards/rejected": -0.15678265690803528, "step": 228 }, { "epoch": 0.14245723172628305, "grad_norm": 0.25163745880126953, "learning_rate": 3.855e-05, "log_odds_chosen": 0.2982984483242035, "log_odds_ratio": -0.5847681164741516, "logits/chosen": 0.5120735764503479, "logits/rejected": 2.372066020965576, "logps/chosen": -0.9295814037322998, "logps/rejected": -1.0953707695007324, "loss": 0.6999, "nll_loss": 0.641383171081543, "rewards/accuracies": 0.625, "rewards/chosen": -0.09295813739299774, "rewards/margins": 0.01657894067466259, "rewards/rejected": -0.10953707993030548, "step": 229 }, { "epoch": 0.14307931570762053, "grad_norm": 0.23865918815135956, "learning_rate": 3.85e-05, "log_odds_chosen": 0.5459116697311401, "log_odds_ratio": -0.5268529653549194, "logits/chosen": 1.0045875310897827, "logits/rejected": 2.660670042037964, "logps/chosen": -0.8819479942321777, "logps/rejected": -1.2443805932998657, "loss": 0.6868, "nll_loss": 0.6341012120246887, "rewards/accuracies": 0.875, "rewards/chosen": -0.08819480240345001, "rewards/margins": 0.036243267357349396, "rewards/rejected": -0.12443806976079941, "step": 230 }, { "epoch": 0.143701399688958, "grad_norm": 0.26255959272384644, "learning_rate": 3.845e-05, "log_odds_chosen": -0.15134698152542114, "log_odds_ratio": -0.779100239276886, "logits/chosen": 0.3234993517398834, "logits/rejected": 1.570976734161377, "logps/chosen": -1.0752030611038208, "logps/rejected": -0.9749929904937744, "loss": 0.6795, "nll_loss": 0.601568341255188, "rewards/accuracies": 0.375, "rewards/chosen": -0.10752031207084656, "rewards/margins": -0.010021003894507885, "rewards/rejected": -0.0974992960691452, "step": 231 }, { "epoch": 0.1443234836702955, "grad_norm": 0.29671376943588257, "learning_rate": 3.8400000000000005e-05, "log_odds_chosen": 0.5076391696929932, "log_odds_ratio": -0.5193696618080139, "logits/chosen": 0.057988643646240234, "logits/rejected": 1.8262519836425781, "logps/chosen": -1.0712543725967407, "logps/rejected": -1.4361586570739746, "loss": 0.5648, "nll_loss": 0.5128502249717712, "rewards/accuracies": 0.75, "rewards/chosen": -0.10712544620037079, "rewards/margins": 0.036490414291620255, "rewards/rejected": -0.14361585676670074, "step": 232 }, { "epoch": 0.14494556765163297, "grad_norm": 0.2637360990047455, "learning_rate": 3.8350000000000004e-05, "log_odds_chosen": 0.7366672158241272, "log_odds_ratio": -0.46905070543289185, "logits/chosen": 1.854955792427063, "logits/rejected": 2.6175291538238525, "logps/chosen": -0.9328523874282837, "logps/rejected": -1.3441451787948608, "loss": 0.8956, "nll_loss": 0.8487153053283691, "rewards/accuracies": 0.875, "rewards/chosen": -0.09328524768352509, "rewards/margins": 0.04112928360700607, "rewards/rejected": -0.13441452383995056, "step": 233 }, { "epoch": 0.14556765163297045, "grad_norm": 0.49430274963378906, "learning_rate": 3.83e-05, "log_odds_chosen": -0.25712642073631287, "log_odds_ratio": -0.8602423667907715, "logits/chosen": 1.4578043222427368, "logits/rejected": 1.9011411666870117, "logps/chosen": -1.434701919555664, "logps/rejected": -1.2270002365112305, "loss": 0.7861, "nll_loss": 0.7000505924224854, "rewards/accuracies": 0.375, "rewards/chosen": -0.14347019791603088, "rewards/margins": -0.020770173519849777, "rewards/rejected": -0.12270002067089081, "step": 234 }, { "epoch": 0.14618973561430793, "grad_norm": 0.3046986758708954, "learning_rate": 3.825e-05, "log_odds_chosen": 0.23919163644313812, "log_odds_ratio": -0.627653181552887, "logits/chosen": 0.6866488456726074, "logits/rejected": 0.5810664296150208, "logps/chosen": -0.9853194952011108, "logps/rejected": -1.1365830898284912, "loss": 0.6872, "nll_loss": 0.6244243383407593, "rewards/accuracies": 0.5, "rewards/chosen": -0.09853194653987885, "rewards/margins": 0.015126367099583149, "rewards/rejected": -0.11365832388401031, "step": 235 }, { "epoch": 0.1468118195956454, "grad_norm": 0.25227728486061096, "learning_rate": 3.82e-05, "log_odds_chosen": 0.22255516052246094, "log_odds_ratio": -0.6572456359863281, "logits/chosen": 0.5956082940101624, "logits/rejected": 1.849258303642273, "logps/chosen": -0.964582085609436, "logps/rejected": -1.040386438369751, "loss": 0.7036, "nll_loss": 0.6379085779190063, "rewards/accuracies": 0.375, "rewards/chosen": -0.09645821154117584, "rewards/margins": 0.007580441422760487, "rewards/rejected": -0.10403865575790405, "step": 236 }, { "epoch": 0.14743390357698288, "grad_norm": 0.2732040584087372, "learning_rate": 3.8150000000000006e-05, "log_odds_chosen": 0.020949020981788635, "log_odds_ratio": -0.704890251159668, "logits/chosen": 0.8338685631752014, "logits/rejected": 2.4033875465393066, "logps/chosen": -1.0415844917297363, "logps/rejected": -1.0674909353256226, "loss": 0.7705, "nll_loss": 0.6999964714050293, "rewards/accuracies": 0.625, "rewards/chosen": -0.1041584387421608, "rewards/margins": 0.0025906474329531193, "rewards/rejected": -0.10674908757209778, "step": 237 }, { "epoch": 0.14805598755832036, "grad_norm": 0.3222413957118988, "learning_rate": 3.8100000000000005e-05, "log_odds_chosen": 0.291021466255188, "log_odds_ratio": -0.6350386738777161, "logits/chosen": 0.5952332019805908, "logits/rejected": 1.7890355587005615, "logps/chosen": -1.1999800205230713, "logps/rejected": -1.4598280191421509, "loss": 0.6637, "nll_loss": 0.6002064943313599, "rewards/accuracies": 0.625, "rewards/chosen": -0.1199980080127716, "rewards/margins": 0.02598479948937893, "rewards/rejected": -0.1459828019142151, "step": 238 }, { "epoch": 0.14867807153965784, "grad_norm": 0.29912009835243225, "learning_rate": 3.805e-05, "log_odds_chosen": 0.09837212413549423, "log_odds_ratio": -0.6721018552780151, "logits/chosen": 0.2817448377609253, "logits/rejected": 1.6711862087249756, "logps/chosen": -1.1941783428192139, "logps/rejected": -1.2375702857971191, "loss": 0.7284, "nll_loss": 0.6612299084663391, "rewards/accuracies": 0.5, "rewards/chosen": -0.11941783875226974, "rewards/margins": 0.004339195322245359, "rewards/rejected": -0.1237570270895958, "step": 239 }, { "epoch": 0.14930015552099535, "grad_norm": 0.22214223444461823, "learning_rate": 3.8e-05, "log_odds_chosen": 1.0463130474090576, "log_odds_ratio": -0.42685216665267944, "logits/chosen": -0.5133315324783325, "logits/rejected": 0.24643856287002563, "logps/chosen": -0.7221020460128784, "logps/rejected": -1.3742396831512451, "loss": 0.5453, "nll_loss": 0.5026174187660217, "rewards/accuracies": 0.75, "rewards/chosen": -0.07221020758152008, "rewards/margins": 0.06521373987197876, "rewards/rejected": -0.13742394745349884, "step": 240 }, { "epoch": 0.14992223950233283, "grad_norm": 0.3170778751373291, "learning_rate": 3.795e-05, "log_odds_chosen": 0.5840449929237366, "log_odds_ratio": -0.5006461143493652, "logits/chosen": 1.4450798034667969, "logits/rejected": 1.941690444946289, "logps/chosen": -1.015654444694519, "logps/rejected": -1.3106660842895508, "loss": 0.6944, "nll_loss": 0.644288182258606, "rewards/accuracies": 0.875, "rewards/chosen": -0.10156545042991638, "rewards/margins": 0.029501162469387054, "rewards/rejected": -0.13106660544872284, "step": 241 }, { "epoch": 0.1505443234836703, "grad_norm": 0.39447447657585144, "learning_rate": 3.79e-05, "log_odds_chosen": 0.01272672414779663, "log_odds_ratio": -0.7251988649368286, "logits/chosen": 1.618621826171875, "logits/rejected": 1.5481436252593994, "logps/chosen": -0.9903098940849304, "logps/rejected": -1.0202150344848633, "loss": 0.7432, "nll_loss": 0.6706516742706299, "rewards/accuracies": 0.25, "rewards/chosen": -0.099031001329422, "rewards/margins": 0.0029905084520578384, "rewards/rejected": -0.10202150046825409, "step": 242 }, { "epoch": 0.15116640746500778, "grad_norm": 0.24457427859306335, "learning_rate": 3.7850000000000005e-05, "log_odds_chosen": 0.30422642827033997, "log_odds_ratio": -0.5725257396697998, "logits/chosen": -0.17796938121318817, "logits/rejected": 1.5813944339752197, "logps/chosen": -1.0886629819869995, "logps/rejected": -1.2814066410064697, "loss": 0.6392, "nll_loss": 0.5819449424743652, "rewards/accuracies": 0.75, "rewards/chosen": -0.10886628925800323, "rewards/margins": 0.019274361431598663, "rewards/rejected": -0.1281406581401825, "step": 243 }, { "epoch": 0.15178849144634526, "grad_norm": 0.33944302797317505, "learning_rate": 3.7800000000000004e-05, "log_odds_chosen": 0.7016199231147766, "log_odds_ratio": -0.5355318784713745, "logits/chosen": 2.0018675327301025, "logits/rejected": 1.7284493446350098, "logps/chosen": -0.9196906685829163, "logps/rejected": -1.4196425676345825, "loss": 0.7753, "nll_loss": 0.7217522263526917, "rewards/accuracies": 0.5, "rewards/chosen": -0.0919690728187561, "rewards/margins": 0.049995195120573044, "rewards/rejected": -0.14196425676345825, "step": 244 }, { "epoch": 0.15241057542768274, "grad_norm": 0.23365607857704163, "learning_rate": 3.775e-05, "log_odds_chosen": 0.09530195593833923, "log_odds_ratio": -0.6637953519821167, "logits/chosen": 1.1631795167922974, "logits/rejected": 1.794580101966858, "logps/chosen": -1.027880072593689, "logps/rejected": -1.0927073955535889, "loss": 0.7318, "nll_loss": 0.6654148697853088, "rewards/accuracies": 0.375, "rewards/chosen": -0.10278801620006561, "rewards/margins": 0.0064827194437384605, "rewards/rejected": -0.10927073657512665, "step": 245 }, { "epoch": 0.15303265940902022, "grad_norm": 0.342902272939682, "learning_rate": 3.77e-05, "log_odds_chosen": 1.1789519786834717, "log_odds_ratio": -0.3561103940010071, "logits/chosen": 0.7682339549064636, "logits/rejected": 1.2601045370101929, "logps/chosen": -0.6911965608596802, "logps/rejected": -1.3815280199050903, "loss": 0.6737, "nll_loss": 0.6381384134292603, "rewards/accuracies": 0.875, "rewards/chosen": -0.0691196545958519, "rewards/margins": 0.06903316080570221, "rewards/rejected": -0.1381528079509735, "step": 246 }, { "epoch": 0.1536547433903577, "grad_norm": 0.40401729941368103, "learning_rate": 3.765e-05, "log_odds_chosen": 0.05288776755332947, "log_odds_ratio": -0.7165863513946533, "logits/chosen": 1.2672884464263916, "logits/rejected": 2.7773818969726562, "logps/chosen": -0.8994283676147461, "logps/rejected": -0.9454131722450256, "loss": 0.7608, "nll_loss": 0.6891713738441467, "rewards/accuracies": 0.5, "rewards/chosen": -0.08994284272193909, "rewards/margins": 0.004598475992679596, "rewards/rejected": -0.09454131126403809, "step": 247 }, { "epoch": 0.15427682737169518, "grad_norm": 0.37135761976242065, "learning_rate": 3.76e-05, "log_odds_chosen": 0.08654891699552536, "log_odds_ratio": -0.7375810146331787, "logits/chosen": 1.4379180669784546, "logits/rejected": 2.5666956901550293, "logps/chosen": -0.9742010831832886, "logps/rejected": -1.0682315826416016, "loss": 0.7763, "nll_loss": 0.7025222182273865, "rewards/accuracies": 0.625, "rewards/chosen": -0.0974201112985611, "rewards/margins": 0.009403041563928127, "rewards/rejected": -0.1068231463432312, "step": 248 }, { "epoch": 0.15489891135303266, "grad_norm": 0.43483659625053406, "learning_rate": 3.7550000000000005e-05, "log_odds_chosen": 0.4684327244758606, "log_odds_ratio": -0.6200071573257446, "logits/chosen": 1.2784299850463867, "logits/rejected": 2.3502230644226074, "logps/chosen": -0.9977530837059021, "logps/rejected": -1.2462825775146484, "loss": 0.8474, "nll_loss": 0.7854040265083313, "rewards/accuracies": 0.625, "rewards/chosen": -0.09977530688047409, "rewards/margins": 0.024852953851222992, "rewards/rejected": -0.12462826073169708, "step": 249 }, { "epoch": 0.15552099533437014, "grad_norm": 0.29905253648757935, "learning_rate": 3.7500000000000003e-05, "log_odds_chosen": 1.0909241437911987, "log_odds_ratio": -0.4056417644023895, "logits/chosen": 3.0120909214019775, "logits/rejected": 3.5190353393554688, "logps/chosen": -0.7593393325805664, "logps/rejected": -1.3986889123916626, "loss": 0.9261, "nll_loss": 0.8855429291725159, "rewards/accuracies": 0.875, "rewards/chosen": -0.07593393325805664, "rewards/margins": 0.06393495947122574, "rewards/rejected": -0.13986890017986298, "step": 250 }, { "epoch": 0.15614307931570762, "grad_norm": 0.27395838499069214, "learning_rate": 3.745e-05, "log_odds_chosen": 0.778897225856781, "log_odds_ratio": -0.4891205430030823, "logits/chosen": 1.333661437034607, "logits/rejected": 2.29944109916687, "logps/chosen": -0.8269086480140686, "logps/rejected": -1.2990074157714844, "loss": 0.7312, "nll_loss": 0.6823058128356934, "rewards/accuracies": 0.75, "rewards/chosen": -0.08269086480140686, "rewards/margins": 0.04720987379550934, "rewards/rejected": -0.1299007385969162, "step": 251 }, { "epoch": 0.1567651632970451, "grad_norm": 0.24890701472759247, "learning_rate": 3.74e-05, "log_odds_chosen": 0.9393471479415894, "log_odds_ratio": -0.5004830360412598, "logits/chosen": 1.544869065284729, "logits/rejected": 1.4583313465118408, "logps/chosen": -1.0949639081954956, "logps/rejected": -1.7828260660171509, "loss": 0.7394, "nll_loss": 0.689353883266449, "rewards/accuracies": 0.75, "rewards/chosen": -0.10949639976024628, "rewards/margins": 0.06878622621297836, "rewards/rejected": -0.17828263342380524, "step": 252 }, { "epoch": 0.15738724727838257, "grad_norm": 0.23351988196372986, "learning_rate": 3.735e-05, "log_odds_chosen": 0.4725225269794464, "log_odds_ratio": -0.5411194562911987, "logits/chosen": 0.5843527317047119, "logits/rejected": 1.433046579360962, "logps/chosen": -1.076469898223877, "logps/rejected": -1.3948270082473755, "loss": 0.6985, "nll_loss": 0.6443914175033569, "rewards/accuracies": 0.625, "rewards/chosen": -0.10764698684215546, "rewards/margins": 0.03183571621775627, "rewards/rejected": -0.13948270678520203, "step": 253 }, { "epoch": 0.15800933125972005, "grad_norm": 0.21208809316158295, "learning_rate": 3.73e-05, "log_odds_chosen": 0.7200300693511963, "log_odds_ratio": -0.4416605830192566, "logits/chosen": 0.6532263159751892, "logits/rejected": 2.229447364807129, "logps/chosen": -0.8342301249504089, "logps/rejected": -1.3484030961990356, "loss": 0.6562, "nll_loss": 0.6120399236679077, "rewards/accuracies": 1.0, "rewards/chosen": -0.08342301100492477, "rewards/margins": 0.05141729116439819, "rewards/rejected": -0.13484030961990356, "step": 254 }, { "epoch": 0.15863141524105753, "grad_norm": 0.5676325559616089, "learning_rate": 3.7250000000000004e-05, "log_odds_chosen": 0.865597128868103, "log_odds_ratio": -0.45989227294921875, "logits/chosen": 0.8128045797348022, "logits/rejected": 2.4521899223327637, "logps/chosen": -0.9576820135116577, "logps/rejected": -1.3842835426330566, "loss": 0.6939, "nll_loss": 0.6479240655899048, "rewards/accuracies": 0.875, "rewards/chosen": -0.09576819837093353, "rewards/margins": 0.04266016185283661, "rewards/rejected": -0.13842836022377014, "step": 255 }, { "epoch": 0.159253499222395, "grad_norm": 0.2591799199581146, "learning_rate": 3.72e-05, "log_odds_chosen": 0.2194564789533615, "log_odds_ratio": -0.6655052900314331, "logits/chosen": 1.342261791229248, "logits/rejected": 2.7451820373535156, "logps/chosen": -1.106384038925171, "logps/rejected": -1.2910670042037964, "loss": 0.8368, "nll_loss": 0.7702622413635254, "rewards/accuracies": 0.375, "rewards/chosen": -0.11063840985298157, "rewards/margins": 0.018468298017978668, "rewards/rejected": -0.12910671532154083, "step": 256 }, { "epoch": 0.1598755832037325, "grad_norm": 0.2928728759288788, "learning_rate": 3.715e-05, "log_odds_chosen": 0.38483601808547974, "log_odds_ratio": -0.5389297008514404, "logits/chosen": 0.9790713787078857, "logits/rejected": 1.9639866352081299, "logps/chosen": -0.9232936501502991, "logps/rejected": -1.1887941360473633, "loss": 0.6545, "nll_loss": 0.6006432771682739, "rewards/accuracies": 1.0, "rewards/chosen": -0.09232936054468155, "rewards/margins": 0.026550058275461197, "rewards/rejected": -0.11887942254543304, "step": 257 }, { "epoch": 0.16049766718507, "grad_norm": 0.3074938654899597, "learning_rate": 3.71e-05, "log_odds_chosen": -0.0491148866713047, "log_odds_ratio": -0.7412834763526917, "logits/chosen": 1.0462923049926758, "logits/rejected": 1.831377625465393, "logps/chosen": -1.1442368030548096, "logps/rejected": -1.0997331142425537, "loss": 0.7083, "nll_loss": 0.6341397166252136, "rewards/accuracies": 0.25, "rewards/chosen": -0.11442368477582932, "rewards/margins": -0.004450378008186817, "rewards/rejected": -0.10997331142425537, "step": 258 }, { "epoch": 0.16111975116640748, "grad_norm": 0.3968244791030884, "learning_rate": 3.705e-05, "log_odds_chosen": 0.7382861971855164, "log_odds_ratio": -0.4755243957042694, "logits/chosen": 0.9706071615219116, "logits/rejected": 1.7821481227874756, "logps/chosen": -0.8874275088310242, "logps/rejected": -1.2820873260498047, "loss": 0.691, "nll_loss": 0.643477201461792, "rewards/accuracies": 0.75, "rewards/chosen": -0.08874274790287018, "rewards/margins": 0.03946599364280701, "rewards/rejected": -0.12820874154567719, "step": 259 }, { "epoch": 0.16174183514774496, "grad_norm": 0.32076671719551086, "learning_rate": 3.7e-05, "log_odds_chosen": 0.11305558681488037, "log_odds_ratio": -0.7279486656188965, "logits/chosen": 0.5689743757247925, "logits/rejected": 0.621085524559021, "logps/chosen": -1.1365184783935547, "logps/rejected": -1.2172517776489258, "loss": 0.6489, "nll_loss": 0.5761117339134216, "rewards/accuracies": 0.5, "rewards/chosen": -0.11365185678005219, "rewards/margins": 0.008073337376117706, "rewards/rejected": -0.1217251867055893, "step": 260 }, { "epoch": 0.16236391912908243, "grad_norm": 0.33515191078186035, "learning_rate": 3.6950000000000004e-05, "log_odds_chosen": 0.40560805797576904, "log_odds_ratio": -0.543319821357727, "logits/chosen": 1.7440519332885742, "logits/rejected": 2.6206088066101074, "logps/chosen": -0.9863896369934082, "logps/rejected": -1.235954761505127, "loss": 0.7871, "nll_loss": 0.7327961325645447, "rewards/accuracies": 0.875, "rewards/chosen": -0.09863896667957306, "rewards/margins": 0.024956511333584785, "rewards/rejected": -0.12359548360109329, "step": 261 }, { "epoch": 0.1629860031104199, "grad_norm": 0.2292770892381668, "learning_rate": 3.69e-05, "log_odds_chosen": 0.2317352145910263, "log_odds_ratio": -0.5968304872512817, "logits/chosen": -0.9270865321159363, "logits/rejected": 1.1997534036636353, "logps/chosen": -0.9344391226768494, "logps/rejected": -1.0741225481033325, "loss": 0.4987, "nll_loss": 0.438981294631958, "rewards/accuracies": 0.75, "rewards/chosen": -0.09344391524791718, "rewards/margins": 0.013968334533274174, "rewards/rejected": -0.10741224884986877, "step": 262 }, { "epoch": 0.1636080870917574, "grad_norm": 0.31699907779693604, "learning_rate": 3.685e-05, "log_odds_chosen": 1.1996018886566162, "log_odds_ratio": -0.30720216035842896, "logits/chosen": 1.0490620136260986, "logits/rejected": 0.9187926054000854, "logps/chosen": -0.8102648258209229, "logps/rejected": -1.644590973854065, "loss": 0.6033, "nll_loss": 0.5725414752960205, "rewards/accuracies": 1.0, "rewards/chosen": -0.08102648705244064, "rewards/margins": 0.08343260735273361, "rewards/rejected": -0.16445909440517426, "step": 263 }, { "epoch": 0.16423017107309487, "grad_norm": 0.9323865175247192, "learning_rate": 3.68e-05, "log_odds_chosen": 0.5835751295089722, "log_odds_ratio": -0.5283180475234985, "logits/chosen": 1.8825874328613281, "logits/rejected": 1.0911766290664673, "logps/chosen": -1.128019094467163, "logps/rejected": -1.5025029182434082, "loss": 0.7552, "nll_loss": 0.7023845911026001, "rewards/accuracies": 0.875, "rewards/chosen": -0.11280190944671631, "rewards/margins": 0.03744838759303093, "rewards/rejected": -0.15025030076503754, "step": 264 }, { "epoch": 0.16485225505443235, "grad_norm": 0.32262593507766724, "learning_rate": 3.675e-05, "log_odds_chosen": 0.255434513092041, "log_odds_ratio": -0.5993427634239197, "logits/chosen": 0.18540018796920776, "logits/rejected": 2.5416390895843506, "logps/chosen": -0.8826195597648621, "logps/rejected": -1.0138144493103027, "loss": 0.6066, "nll_loss": 0.546635091304779, "rewards/accuracies": 0.75, "rewards/chosen": -0.08826196193695068, "rewards/margins": 0.013119479641318321, "rewards/rejected": -0.10138144344091415, "step": 265 }, { "epoch": 0.16547433903576983, "grad_norm": 0.3099498748779297, "learning_rate": 3.6700000000000004e-05, "log_odds_chosen": 0.4512448310852051, "log_odds_ratio": -0.556388258934021, "logits/chosen": -0.3677622079849243, "logits/rejected": 0.3967688977718353, "logps/chosen": -1.083742380142212, "logps/rejected": -1.4190504550933838, "loss": 0.5988, "nll_loss": 0.5431588292121887, "rewards/accuracies": 0.625, "rewards/chosen": -0.10837424546480179, "rewards/margins": 0.03353080153465271, "rewards/rejected": -0.1419050395488739, "step": 266 }, { "epoch": 0.1660964230171073, "grad_norm": 0.9130797982215881, "learning_rate": 3.665e-05, "log_odds_chosen": 0.501509428024292, "log_odds_ratio": -0.504950761795044, "logits/chosen": 1.868678092956543, "logits/rejected": 2.891838312149048, "logps/chosen": -1.104312539100647, "logps/rejected": -1.4863413572311401, "loss": 0.7864, "nll_loss": 0.7358621954917908, "rewards/accuracies": 0.75, "rewards/chosen": -0.1104312613606453, "rewards/margins": 0.03820287063717842, "rewards/rejected": -0.148634135723114, "step": 267 }, { "epoch": 0.1667185069984448, "grad_norm": 0.232061967253685, "learning_rate": 3.66e-05, "log_odds_chosen": 0.2719687521457672, "log_odds_ratio": -0.6023093461990356, "logits/chosen": 0.2932952642440796, "logits/rejected": 2.1686325073242188, "logps/chosen": -1.1295243501663208, "logps/rejected": -1.3326761722564697, "loss": 0.6743, "nll_loss": 0.6140591502189636, "rewards/accuracies": 0.5, "rewards/chosen": -0.11295244097709656, "rewards/margins": 0.02031518518924713, "rewards/rejected": -0.1332676261663437, "step": 268 }, { "epoch": 0.16734059097978227, "grad_norm": 1.3609570264816284, "learning_rate": 3.655e-05, "log_odds_chosen": -0.1332504153251648, "log_odds_ratio": -0.774581789970398, "logits/chosen": 0.7395839095115662, "logits/rejected": 1.5577596426010132, "logps/chosen": -1.1454954147338867, "logps/rejected": -1.0469775199890137, "loss": 0.7448, "nll_loss": 0.6673346757888794, "rewards/accuracies": 0.375, "rewards/chosen": -0.11454953998327255, "rewards/margins": -0.009851792827248573, "rewards/rejected": -0.10469775646924973, "step": 269 }, { "epoch": 0.16796267496111975, "grad_norm": 0.2783643901348114, "learning_rate": 3.65e-05, "log_odds_chosen": 0.3987334072589874, "log_odds_ratio": -0.617426872253418, "logits/chosen": 2.3885960578918457, "logits/rejected": 3.718479633331299, "logps/chosen": -1.0800246000289917, "logps/rejected": -1.39646315574646, "loss": 0.8736, "nll_loss": 0.8118734955787659, "rewards/accuracies": 0.625, "rewards/chosen": -0.10800246894359589, "rewards/margins": 0.03164386749267578, "rewards/rejected": -0.13964633643627167, "step": 270 }, { "epoch": 0.16858475894245722, "grad_norm": 0.3506051301956177, "learning_rate": 3.645e-05, "log_odds_chosen": 0.32452934980392456, "log_odds_ratio": -0.6055722236633301, "logits/chosen": 1.9042444229125977, "logits/rejected": 2.771538257598877, "logps/chosen": -0.8178438544273376, "logps/rejected": -1.0214296579360962, "loss": 0.8372, "nll_loss": 0.7766833901405334, "rewards/accuracies": 0.5, "rewards/chosen": -0.08178438246250153, "rewards/margins": 0.020358584821224213, "rewards/rejected": -0.10214296728372574, "step": 271 }, { "epoch": 0.1692068429237947, "grad_norm": 0.29172876477241516, "learning_rate": 3.6400000000000004e-05, "log_odds_chosen": 0.320917010307312, "log_odds_ratio": -0.6874103546142578, "logits/chosen": 1.4236152172088623, "logits/rejected": 2.6094887256622314, "logps/chosen": -0.9422950148582458, "logps/rejected": -1.0288571119308472, "loss": 0.8041, "nll_loss": 0.7353872060775757, "rewards/accuracies": 0.625, "rewards/chosen": -0.09422949701547623, "rewards/margins": 0.008656204678118229, "rewards/rejected": -0.10288571566343307, "step": 272 }, { "epoch": 0.16982892690513218, "grad_norm": 0.2542218565940857, "learning_rate": 3.635e-05, "log_odds_chosen": 0.542289137840271, "log_odds_ratio": -0.48945939540863037, "logits/chosen": 1.2661612033843994, "logits/rejected": 1.9925477504730225, "logps/chosen": -0.8164982199668884, "logps/rejected": -1.123006820678711, "loss": 0.8182, "nll_loss": 0.7692335247993469, "rewards/accuracies": 0.75, "rewards/chosen": -0.08164982497692108, "rewards/margins": 0.03065086528658867, "rewards/rejected": -0.11230067908763885, "step": 273 }, { "epoch": 0.17045101088646966, "grad_norm": 0.2937983274459839, "learning_rate": 3.63e-05, "log_odds_chosen": 1.1165152788162231, "log_odds_ratio": -0.4164004921913147, "logits/chosen": 1.1048343181610107, "logits/rejected": 2.457761764526367, "logps/chosen": -0.7275225520133972, "logps/rejected": -1.4913136959075928, "loss": 0.6468, "nll_loss": 0.6051377058029175, "rewards/accuracies": 0.75, "rewards/chosen": -0.07275225222110748, "rewards/margins": 0.07637912034988403, "rewards/rejected": -0.14913137257099152, "step": 274 }, { "epoch": 0.17107309486780714, "grad_norm": 0.3650302290916443, "learning_rate": 3.625e-05, "log_odds_chosen": 0.3353332281112671, "log_odds_ratio": -0.5689051151275635, "logits/chosen": 2.027620792388916, "logits/rejected": 3.2360668182373047, "logps/chosen": -0.9137417078018188, "logps/rejected": -1.1490848064422607, "loss": 0.7901, "nll_loss": 0.7331695556640625, "rewards/accuracies": 0.875, "rewards/chosen": -0.09137416630983353, "rewards/margins": 0.02353430539369583, "rewards/rejected": -0.11490847915410995, "step": 275 }, { "epoch": 0.17169517884914465, "grad_norm": 0.2370610237121582, "learning_rate": 3.62e-05, "log_odds_chosen": 0.3498604893684387, "log_odds_ratio": -0.6041584014892578, "logits/chosen": -0.5574699640274048, "logits/rejected": 1.5286118984222412, "logps/chosen": -1.06705641746521, "logps/rejected": -1.3181275129318237, "loss": 0.5466, "nll_loss": 0.48619967699050903, "rewards/accuracies": 0.75, "rewards/chosen": -0.10670565068721771, "rewards/margins": 0.025107108056545258, "rewards/rejected": -0.13181275129318237, "step": 276 }, { "epoch": 0.17231726283048213, "grad_norm": 0.2990007996559143, "learning_rate": 3.615e-05, "log_odds_chosen": 0.07544267177581787, "log_odds_ratio": -0.6616002917289734, "logits/chosen": 2.9491500854492188, "logits/rejected": 3.6665618419647217, "logps/chosen": -0.9842727184295654, "logps/rejected": -1.0366499423980713, "loss": 0.9659, "nll_loss": 0.8997125625610352, "rewards/accuracies": 0.5, "rewards/chosen": -0.09842726588249207, "rewards/margins": 0.005237726494669914, "rewards/rejected": -0.10366499423980713, "step": 277 }, { "epoch": 0.1729393468118196, "grad_norm": 1.6273510456085205, "learning_rate": 3.61e-05, "log_odds_chosen": 1.1774389743804932, "log_odds_ratio": -0.4116966128349304, "logits/chosen": 0.9792468547821045, "logits/rejected": 1.7449513673782349, "logps/chosen": -0.8007335066795349, "logps/rejected": -1.571602702140808, "loss": 0.7018, "nll_loss": 0.6606743335723877, "rewards/accuracies": 0.75, "rewards/chosen": -0.08007335662841797, "rewards/margins": 0.0770869255065918, "rewards/rejected": -0.15716028213500977, "step": 278 }, { "epoch": 0.17356143079315708, "grad_norm": 0.276890367269516, "learning_rate": 3.605e-05, "log_odds_chosen": 1.3592523336410522, "log_odds_ratio": -0.28409016132354736, "logits/chosen": 0.7802825570106506, "logits/rejected": 1.77823805809021, "logps/chosen": -0.6940028071403503, "logps/rejected": -1.5032212734222412, "loss": 0.5705, "nll_loss": 0.5420591831207275, "rewards/accuracies": 0.875, "rewards/chosen": -0.06940028071403503, "rewards/margins": 0.08092184364795685, "rewards/rejected": -0.15032212436199188, "step": 279 }, { "epoch": 0.17418351477449456, "grad_norm": 0.3467434048652649, "learning_rate": 3.6e-05, "log_odds_chosen": 0.3342052102088928, "log_odds_ratio": -0.5645737051963806, "logits/chosen": 1.7210997343063354, "logits/rejected": 2.7231311798095703, "logps/chosen": -0.9332855939865112, "logps/rejected": -1.1408545970916748, "loss": 0.7302, "nll_loss": 0.67376708984375, "rewards/accuracies": 0.75, "rewards/chosen": -0.093328557908535, "rewards/margins": 0.020756900310516357, "rewards/rejected": -0.11408545821905136, "step": 280 }, { "epoch": 0.17480559875583204, "grad_norm": 0.2782360017299652, "learning_rate": 3.595e-05, "log_odds_chosen": 0.4959554076194763, "log_odds_ratio": -0.597368597984314, "logits/chosen": 0.8248426914215088, "logits/rejected": 1.6470648050308228, "logps/chosen": -0.9373691082000732, "logps/rejected": -1.1336480379104614, "loss": 0.7087, "nll_loss": 0.6489871144294739, "rewards/accuracies": 0.5, "rewards/chosen": -0.0937369167804718, "rewards/margins": 0.0196278914809227, "rewards/rejected": -0.1133648082613945, "step": 281 }, { "epoch": 0.17542768273716952, "grad_norm": 0.270268052816391, "learning_rate": 3.59e-05, "log_odds_chosen": 0.804435133934021, "log_odds_ratio": -0.5560287237167358, "logits/chosen": 2.5741701126098633, "logits/rejected": 1.9791196584701538, "logps/chosen": -0.9681861400604248, "logps/rejected": -1.6530413627624512, "loss": 0.9065, "nll_loss": 0.8508476614952087, "rewards/accuracies": 0.625, "rewards/chosen": -0.09681861847639084, "rewards/margins": 0.06848552078008652, "rewards/rejected": -0.16530415415763855, "step": 282 }, { "epoch": 0.176049766718507, "grad_norm": 0.28208354115486145, "learning_rate": 3.585e-05, "log_odds_chosen": 0.49895477294921875, "log_odds_ratio": -0.6017277836799622, "logits/chosen": 0.2750604748725891, "logits/rejected": 2.2774534225463867, "logps/chosen": -1.0913801193237305, "logps/rejected": -1.2801364660263062, "loss": 0.6199, "nll_loss": 0.5597580671310425, "rewards/accuracies": 0.75, "rewards/chosen": -0.10913802683353424, "rewards/margins": 0.01887562870979309, "rewards/rejected": -0.12801367044448853, "step": 283 }, { "epoch": 0.17667185069984448, "grad_norm": 0.2498951107263565, "learning_rate": 3.58e-05, "log_odds_chosen": 0.6866430044174194, "log_odds_ratio": -0.5442928075790405, "logits/chosen": 0.35522377490997314, "logits/rejected": 0.7245559692382812, "logps/chosen": -0.9898964762687683, "logps/rejected": -1.554041862487793, "loss": 0.6184, "nll_loss": 0.5640102028846741, "rewards/accuracies": 0.625, "rewards/chosen": -0.09898965060710907, "rewards/margins": 0.05641452595591545, "rewards/rejected": -0.15540418028831482, "step": 284 }, { "epoch": 0.17729393468118196, "grad_norm": 0.29676172137260437, "learning_rate": 3.575e-05, "log_odds_chosen": 0.38468289375305176, "log_odds_ratio": -0.5942354798316956, "logits/chosen": -0.6643446087837219, "logits/rejected": 1.5239275693893433, "logps/chosen": -1.1168832778930664, "logps/rejected": -1.3112976551055908, "loss": 0.5597, "nll_loss": 0.5003250241279602, "rewards/accuracies": 0.625, "rewards/chosen": -0.11168833076953888, "rewards/margins": 0.01944144070148468, "rewards/rejected": -0.13112977147102356, "step": 285 }, { "epoch": 0.17791601866251944, "grad_norm": 0.9721925258636475, "learning_rate": 3.57e-05, "log_odds_chosen": 1.3216912746429443, "log_odds_ratio": -0.36708056926727295, "logits/chosen": 1.362522006034851, "logits/rejected": 2.3680434226989746, "logps/chosen": -0.9718787670135498, "logps/rejected": -2.045164108276367, "loss": 0.622, "nll_loss": 0.5853403806686401, "rewards/accuracies": 1.0, "rewards/chosen": -0.09718787670135498, "rewards/margins": 0.10732853412628174, "rewards/rejected": -0.20451641082763672, "step": 286 }, { "epoch": 0.17853810264385692, "grad_norm": 0.2858312427997589, "learning_rate": 3.565e-05, "log_odds_chosen": 1.0211000442504883, "log_odds_ratio": -0.3588743507862091, "logits/chosen": 1.6440815925598145, "logits/rejected": 2.406961679458618, "logps/chosen": -1.0506576299667358, "logps/rejected": -1.7641756534576416, "loss": 0.7087, "nll_loss": 0.6728169918060303, "rewards/accuracies": 1.0, "rewards/chosen": -0.10506576299667358, "rewards/margins": 0.07135181128978729, "rewards/rejected": -0.17641757428646088, "step": 287 }, { "epoch": 0.1791601866251944, "grad_norm": 0.27648454904556274, "learning_rate": 3.56e-05, "log_odds_chosen": 0.6506868600845337, "log_odds_ratio": -0.49529018998146057, "logits/chosen": 1.4622275829315186, "logits/rejected": 1.405707836151123, "logps/chosen": -0.974983811378479, "logps/rejected": -1.3737361431121826, "loss": 0.774, "nll_loss": 0.7245060205459595, "rewards/accuracies": 0.75, "rewards/chosen": -0.09749838709831238, "rewards/margins": 0.039875224232673645, "rewards/rejected": -0.13737361133098602, "step": 288 }, { "epoch": 0.17978227060653187, "grad_norm": 0.2890698313713074, "learning_rate": 3.555e-05, "log_odds_chosen": 0.971682071685791, "log_odds_ratio": -0.35939812660217285, "logits/chosen": 1.5366928577423096, "logits/rejected": 2.6480050086975098, "logps/chosen": -0.7586332559585571, "logps/rejected": -1.406290888786316, "loss": 0.6716, "nll_loss": 0.6356822848320007, "rewards/accuracies": 1.0, "rewards/chosen": -0.07586333155632019, "rewards/margins": 0.06476576626300812, "rewards/rejected": -0.1406290978193283, "step": 289 }, { "epoch": 0.18040435458786935, "grad_norm": 0.29232749342918396, "learning_rate": 3.55e-05, "log_odds_chosen": 1.324554443359375, "log_odds_ratio": -0.3050360679626465, "logits/chosen": 0.5814070701599121, "logits/rejected": 2.793435573577881, "logps/chosen": -0.7117588520050049, "logps/rejected": -1.6056010723114014, "loss": 0.5403, "nll_loss": 0.5098059177398682, "rewards/accuracies": 0.875, "rewards/chosen": -0.07117588818073273, "rewards/margins": 0.08938423544168472, "rewards/rejected": -0.16056011617183685, "step": 290 }, { "epoch": 0.18102643856920683, "grad_norm": 0.29234594106674194, "learning_rate": 3.545e-05, "log_odds_chosen": 1.1288540363311768, "log_odds_ratio": -0.3035117983818054, "logits/chosen": 1.6192704439163208, "logits/rejected": 2.7230443954467773, "logps/chosen": -0.7904139757156372, "logps/rejected": -1.5225837230682373, "loss": 0.6339, "nll_loss": 0.6035600900650024, "rewards/accuracies": 1.0, "rewards/chosen": -0.07904140651226044, "rewards/margins": 0.0732169896364212, "rewards/rejected": -0.15225839614868164, "step": 291 }, { "epoch": 0.1816485225505443, "grad_norm": 0.43359532952308655, "learning_rate": 3.54e-05, "log_odds_chosen": 0.20111624896526337, "log_odds_ratio": -0.6230050325393677, "logits/chosen": 0.7468219995498657, "logits/rejected": 2.083394765853882, "logps/chosen": -1.0547893047332764, "logps/rejected": -1.2056244611740112, "loss": 0.6486, "nll_loss": 0.5863381624221802, "rewards/accuracies": 0.5, "rewards/chosen": -0.1054789274930954, "rewards/margins": 0.015083517879247665, "rewards/rejected": -0.12056245654821396, "step": 292 }, { "epoch": 0.1822706065318818, "grad_norm": 0.3505414128303528, "learning_rate": 3.535e-05, "log_odds_chosen": 0.34149283170700073, "log_odds_ratio": -0.5823227167129517, "logits/chosen": 1.2648005485534668, "logits/rejected": 3.214367389678955, "logps/chosen": -0.9986240863800049, "logps/rejected": -1.2265021800994873, "loss": 0.7081, "nll_loss": 0.6498473882675171, "rewards/accuracies": 0.625, "rewards/chosen": -0.09986241161823273, "rewards/margins": 0.022787809371948242, "rewards/rejected": -0.12265022099018097, "step": 293 }, { "epoch": 0.1828926905132193, "grad_norm": 0.3010155260562897, "learning_rate": 3.53e-05, "log_odds_chosen": 0.9094794988632202, "log_odds_ratio": -0.41278043389320374, "logits/chosen": 1.4252872467041016, "logits/rejected": 2.675663471221924, "logps/chosen": -0.8446587324142456, "logps/rejected": -1.3711124658584595, "loss": 0.7532, "nll_loss": 0.7119126319885254, "rewards/accuracies": 0.75, "rewards/chosen": -0.0844658762216568, "rewards/margins": 0.05264536663889885, "rewards/rejected": -0.13711124658584595, "step": 294 }, { "epoch": 0.18351477449455678, "grad_norm": 0.263263463973999, "learning_rate": 3.525e-05, "log_odds_chosen": 0.30774155259132385, "log_odds_ratio": -0.6023176908493042, "logits/chosen": 1.2870378494262695, "logits/rejected": 1.972282886505127, "logps/chosen": -0.9613863825798035, "logps/rejected": -1.178411602973938, "loss": 0.7256, "nll_loss": 0.6653767824172974, "rewards/accuracies": 0.625, "rewards/chosen": -0.09613863378763199, "rewards/margins": 0.02170252427458763, "rewards/rejected": -0.11784116178750992, "step": 295 }, { "epoch": 0.18413685847589426, "grad_norm": 0.27597174048423767, "learning_rate": 3.52e-05, "log_odds_chosen": 0.4085116684436798, "log_odds_ratio": -0.6184495091438293, "logits/chosen": 0.3999248147010803, "logits/rejected": 2.451507329940796, "logps/chosen": -0.8906404376029968, "logps/rejected": -1.0293775796890259, "loss": 0.6227, "nll_loss": 0.5608530044555664, "rewards/accuracies": 0.625, "rewards/chosen": -0.08906404674053192, "rewards/margins": 0.013873708434402943, "rewards/rejected": -0.10293775051832199, "step": 296 }, { "epoch": 0.18475894245723173, "grad_norm": 0.41982316970825195, "learning_rate": 3.515e-05, "log_odds_chosen": -0.07312265038490295, "log_odds_ratio": -0.9710935354232788, "logits/chosen": 0.8743797540664673, "logits/rejected": 1.2161155939102173, "logps/chosen": -1.2751758098602295, "logps/rejected": -1.1848974227905273, "loss": 0.7096, "nll_loss": 0.6124569773674011, "rewards/accuracies": 0.5, "rewards/chosen": -0.12751758098602295, "rewards/margins": -0.009027836844325066, "rewards/rejected": -0.11848974972963333, "step": 297 }, { "epoch": 0.1853810264385692, "grad_norm": 0.2401142567396164, "learning_rate": 3.51e-05, "log_odds_chosen": 0.5721354484558105, "log_odds_ratio": -0.5739449858665466, "logits/chosen": 1.8936402797698975, "logits/rejected": 2.0619664192199707, "logps/chosen": -0.9201027154922485, "logps/rejected": -1.209338903427124, "loss": 0.7463, "nll_loss": 0.6889411807060242, "rewards/accuracies": 0.5, "rewards/chosen": -0.09201027452945709, "rewards/margins": 0.02892361581325531, "rewards/rejected": -0.1209338828921318, "step": 298 }, { "epoch": 0.1860031104199067, "grad_norm": 0.33103877305984497, "learning_rate": 3.505e-05, "log_odds_chosen": 0.36520734429359436, "log_odds_ratio": -0.5649385452270508, "logits/chosen": 1.2041155099868774, "logits/rejected": 1.5141119956970215, "logps/chosen": -1.1315367221832275, "logps/rejected": -1.3210612535476685, "loss": 0.775, "nll_loss": 0.7185065150260925, "rewards/accuracies": 0.875, "rewards/chosen": -0.11315366625785828, "rewards/margins": 0.018952451646327972, "rewards/rejected": -0.13210612535476685, "step": 299 }, { "epoch": 0.18662519440124417, "grad_norm": 0.38658761978149414, "learning_rate": 3.5e-05, "log_odds_chosen": 0.6449403166770935, "log_odds_ratio": -0.6450217366218567, "logits/chosen": 0.46619826555252075, "logits/rejected": 1.577237606048584, "logps/chosen": -1.1663486957550049, "logps/rejected": -1.5513371229171753, "loss": 0.6948, "nll_loss": 0.6303378343582153, "rewards/accuracies": 0.625, "rewards/chosen": -0.11663487553596497, "rewards/margins": 0.038498833775520325, "rewards/rejected": -0.1551337093114853, "step": 300 }, { "epoch": 0.18724727838258165, "grad_norm": 0.32990527153015137, "learning_rate": 3.495e-05, "log_odds_chosen": 0.9941543340682983, "log_odds_ratio": -0.5177374482154846, "logits/chosen": 0.8194706439971924, "logits/rejected": 2.0760574340820312, "logps/chosen": -0.925238311290741, "logps/rejected": -1.5417176485061646, "loss": 0.6898, "nll_loss": 0.6380496621131897, "rewards/accuracies": 0.75, "rewards/chosen": -0.09252384305000305, "rewards/margins": 0.061647929251194, "rewards/rejected": -0.15417176485061646, "step": 301 }, { "epoch": 0.18786936236391913, "grad_norm": 0.34704282879829407, "learning_rate": 3.49e-05, "log_odds_chosen": 0.5234130620956421, "log_odds_ratio": -0.4979130029678345, "logits/chosen": 2.157592296600342, "logits/rejected": 2.386396646499634, "logps/chosen": -0.96225506067276, "logps/rejected": -1.3348091840744019, "loss": 0.7822, "nll_loss": 0.732367217540741, "rewards/accuracies": 0.75, "rewards/chosen": -0.09622550010681152, "rewards/margins": 0.0372554175555706, "rewards/rejected": -0.13348092138767242, "step": 302 }, { "epoch": 0.1884914463452566, "grad_norm": 0.26417276263237, "learning_rate": 3.485e-05, "log_odds_chosen": 0.839657187461853, "log_odds_ratio": -0.41882774233818054, "logits/chosen": 0.9267981052398682, "logits/rejected": 0.5470160841941833, "logps/chosen": -0.984728217124939, "logps/rejected": -1.620476484298706, "loss": 0.5851, "nll_loss": 0.5432447791099548, "rewards/accuracies": 0.75, "rewards/chosen": -0.09847281873226166, "rewards/margins": 0.06357483565807343, "rewards/rejected": -0.16204765439033508, "step": 303 }, { "epoch": 0.1891135303265941, "grad_norm": 0.3210340440273285, "learning_rate": 3.48e-05, "log_odds_chosen": 0.43130385875701904, "log_odds_ratio": -0.547843873500824, "logits/chosen": 1.4130476713180542, "logits/rejected": 3.369215965270996, "logps/chosen": -0.9559162855148315, "logps/rejected": -1.1902281045913696, "loss": 0.702, "nll_loss": 0.6471742391586304, "rewards/accuracies": 0.75, "rewards/chosen": -0.09559163451194763, "rewards/margins": 0.023431185632944107, "rewards/rejected": -0.11902281641960144, "step": 304 }, { "epoch": 0.18973561430793157, "grad_norm": 0.3766573965549469, "learning_rate": 3.475e-05, "log_odds_chosen": 2.982311725616455, "log_odds_ratio": -0.23432503640651703, "logits/chosen": 2.01702618598938, "logits/rejected": 3.3692893981933594, "logps/chosen": -0.6424603462219238, "logps/rejected": -2.8637423515319824, "loss": 0.7736, "nll_loss": 0.7501252889633179, "rewards/accuracies": 0.875, "rewards/chosen": -0.0642460361123085, "rewards/margins": 0.22212818264961243, "rewards/rejected": -0.2863742411136627, "step": 305 }, { "epoch": 0.19035769828926905, "grad_norm": 0.3161027431488037, "learning_rate": 3.4699999999999996e-05, "log_odds_chosen": 0.4053652286529541, "log_odds_ratio": -0.5691996812820435, "logits/chosen": 1.4379727840423584, "logits/rejected": 1.3441359996795654, "logps/chosen": -0.9353913068771362, "logps/rejected": -1.1935211420059204, "loss": 0.7362, "nll_loss": 0.6792629361152649, "rewards/accuracies": 0.625, "rewards/chosen": -0.09353913366794586, "rewards/margins": 0.025812990963459015, "rewards/rejected": -0.11935213208198547, "step": 306 }, { "epoch": 0.19097978227060652, "grad_norm": 0.27438247203826904, "learning_rate": 3.465e-05, "log_odds_chosen": 0.4557202160358429, "log_odds_ratio": -0.5387417674064636, "logits/chosen": 1.537845492362976, "logits/rejected": 2.4677371978759766, "logps/chosen": -0.969843864440918, "logps/rejected": -1.2558746337890625, "loss": 0.7606, "nll_loss": 0.7067520022392273, "rewards/accuracies": 0.75, "rewards/chosen": -0.0969843864440918, "rewards/margins": 0.02860308811068535, "rewards/rejected": -0.12558747828006744, "step": 307 }, { "epoch": 0.191601866251944, "grad_norm": 0.2716653645038605, "learning_rate": 3.46e-05, "log_odds_chosen": 1.008226752281189, "log_odds_ratio": -0.4756288528442383, "logits/chosen": 0.08089017868041992, "logits/rejected": 1.4091609716415405, "logps/chosen": -0.9258524179458618, "logps/rejected": -1.6496083736419678, "loss": 0.4917, "nll_loss": 0.4441262483596802, "rewards/accuracies": 0.75, "rewards/chosen": -0.0925852507352829, "rewards/margins": 0.07237560302019119, "rewards/rejected": -0.1649608314037323, "step": 308 }, { "epoch": 0.19222395023328148, "grad_norm": 0.28455743193626404, "learning_rate": 3.455e-05, "log_odds_chosen": 1.0614575147628784, "log_odds_ratio": -0.35518938302993774, "logits/chosen": 0.6718518733978271, "logits/rejected": 1.3685194253921509, "logps/chosen": -0.678547739982605, "logps/rejected": -1.356400489807129, "loss": 0.5612, "nll_loss": 0.5256614685058594, "rewards/accuracies": 0.875, "rewards/chosen": -0.06785477697849274, "rewards/margins": 0.06778527796268463, "rewards/rejected": -0.13564005494117737, "step": 309 }, { "epoch": 0.19284603421461896, "grad_norm": 0.31106212735176086, "learning_rate": 3.45e-05, "log_odds_chosen": 0.7074335813522339, "log_odds_ratio": -0.5154873132705688, "logits/chosen": 1.195151448249817, "logits/rejected": 2.1725990772247314, "logps/chosen": -0.8819376230239868, "logps/rejected": -1.3819676637649536, "loss": 0.6628, "nll_loss": 0.6112890243530273, "rewards/accuracies": 0.75, "rewards/chosen": -0.08819375932216644, "rewards/margins": 0.05000301077961922, "rewards/rejected": -0.13819676637649536, "step": 310 }, { "epoch": 0.19346811819595647, "grad_norm": 0.2936580181121826, "learning_rate": 3.445e-05, "log_odds_chosen": 0.511105477809906, "log_odds_ratio": -0.5984890460968018, "logits/chosen": 0.3100643455982208, "logits/rejected": 1.2994273900985718, "logps/chosen": -1.3991050720214844, "logps/rejected": -1.8176642656326294, "loss": 0.5565, "nll_loss": 0.4966951906681061, "rewards/accuracies": 0.625, "rewards/chosen": -0.1399105191230774, "rewards/margins": 0.04185590147972107, "rewards/rejected": -0.18176642060279846, "step": 311 }, { "epoch": 0.19409020217729395, "grad_norm": 0.4400920271873474, "learning_rate": 3.4399999999999996e-05, "log_odds_chosen": 0.6868494749069214, "log_odds_ratio": -0.49262088537216187, "logits/chosen": 1.3997236490249634, "logits/rejected": 2.0570526123046875, "logps/chosen": -1.0203096866607666, "logps/rejected": -1.5456228256225586, "loss": 0.6983, "nll_loss": 0.6490747928619385, "rewards/accuracies": 0.625, "rewards/chosen": -0.10203097760677338, "rewards/margins": 0.052531301975250244, "rewards/rejected": -0.15456226468086243, "step": 312 }, { "epoch": 0.19471228615863143, "grad_norm": 0.4693183898925781, "learning_rate": 3.435e-05, "log_odds_chosen": 0.6722878813743591, "log_odds_ratio": -0.571422278881073, "logits/chosen": 1.9739662408828735, "logits/rejected": 3.177738904953003, "logps/chosen": -1.0663738250732422, "logps/rejected": -1.4175429344177246, "loss": 0.7562, "nll_loss": 0.6990476250648499, "rewards/accuracies": 0.625, "rewards/chosen": -0.1066373735666275, "rewards/margins": 0.03511692211031914, "rewards/rejected": -0.14175429940223694, "step": 313 }, { "epoch": 0.1953343701399689, "grad_norm": 0.27695855498313904, "learning_rate": 3.430000000000001e-05, "log_odds_chosen": 0.9974111318588257, "log_odds_ratio": -0.47927969694137573, "logits/chosen": 1.736506462097168, "logits/rejected": 1.5564743280410767, "logps/chosen": -1.0914074182510376, "logps/rejected": -1.9759117364883423, "loss": 0.6947, "nll_loss": 0.6467252373695374, "rewards/accuracies": 0.625, "rewards/chosen": -0.10914073884487152, "rewards/margins": 0.08845044672489166, "rewards/rejected": -0.19759118556976318, "step": 314 }, { "epoch": 0.19595645412130638, "grad_norm": 4.111656665802002, "learning_rate": 3.4250000000000006e-05, "log_odds_chosen": 1.0268677473068237, "log_odds_ratio": -0.38255006074905396, "logits/chosen": 0.9214022159576416, "logits/rejected": 1.5696971416473389, "logps/chosen": -0.7452452778816223, "logps/rejected": -1.4652537107467651, "loss": 0.6268, "nll_loss": 0.5885062217712402, "rewards/accuracies": 1.0, "rewards/chosen": -0.07452452182769775, "rewards/margins": 0.07200084626674652, "rewards/rejected": -0.14652536809444427, "step": 315 }, { "epoch": 0.19657853810264386, "grad_norm": 0.34326064586639404, "learning_rate": 3.4200000000000005e-05, "log_odds_chosen": 0.33381396532058716, "log_odds_ratio": -0.5759456157684326, "logits/chosen": 1.1284606456756592, "logits/rejected": 1.8769559860229492, "logps/chosen": -0.8769740462303162, "logps/rejected": -1.100290298461914, "loss": 0.7314, "nll_loss": 0.6737844944000244, "rewards/accuracies": 0.75, "rewards/chosen": -0.08769740909337997, "rewards/margins": 0.022331636399030685, "rewards/rejected": -0.11002904176712036, "step": 316 }, { "epoch": 0.19720062208398134, "grad_norm": 0.3009902238845825, "learning_rate": 3.415e-05, "log_odds_chosen": 0.6529784202575684, "log_odds_ratio": -0.4737992286682129, "logits/chosen": 1.0143990516662598, "logits/rejected": 1.9090029001235962, "logps/chosen": -1.226149320602417, "logps/rejected": -1.7656174898147583, "loss": 0.6483, "nll_loss": 0.6009653210639954, "rewards/accuracies": 0.875, "rewards/chosen": -0.12261494994163513, "rewards/margins": 0.05394680052995682, "rewards/rejected": -0.17656174302101135, "step": 317 }, { "epoch": 0.19782270606531882, "grad_norm": 0.29174575209617615, "learning_rate": 3.41e-05, "log_odds_chosen": 1.0072104930877686, "log_odds_ratio": -0.4887734353542328, "logits/chosen": 1.7796566486358643, "logits/rejected": 2.4973080158233643, "logps/chosen": -1.2199254035949707, "logps/rejected": -2.0509541034698486, "loss": 0.7258, "nll_loss": 0.676878035068512, "rewards/accuracies": 0.625, "rewards/chosen": -0.12199252843856812, "rewards/margins": 0.08310288190841675, "rewards/rejected": -0.20509541034698486, "step": 318 }, { "epoch": 0.1984447900466563, "grad_norm": 0.4274386465549469, "learning_rate": 3.405e-05, "log_odds_chosen": 0.33251264691352844, "log_odds_ratio": -0.6084076166152954, "logits/chosen": 1.3834333419799805, "logits/rejected": 1.8153676986694336, "logps/chosen": -1.1630151271820068, "logps/rejected": -1.408416748046875, "loss": 0.719, "nll_loss": 0.658169686794281, "rewards/accuracies": 0.5, "rewards/chosen": -0.1163015216588974, "rewards/margins": 0.024540148675441742, "rewards/rejected": -0.14084166288375854, "step": 319 }, { "epoch": 0.19906687402799378, "grad_norm": 0.33442965149879456, "learning_rate": 3.4000000000000007e-05, "log_odds_chosen": 0.6064150333404541, "log_odds_ratio": -0.5035816431045532, "logits/chosen": 0.972122848033905, "logits/rejected": 2.684668779373169, "logps/chosen": -1.0591764450073242, "logps/rejected": -1.486153483390808, "loss": 0.615, "nll_loss": 0.5646054744720459, "rewards/accuracies": 0.75, "rewards/chosen": -0.10591764003038406, "rewards/margins": 0.04269770532846451, "rewards/rejected": -0.14861536026000977, "step": 320 }, { "epoch": 0.19968895800933126, "grad_norm": 0.270923376083374, "learning_rate": 3.3950000000000005e-05, "log_odds_chosen": 1.028401494026184, "log_odds_ratio": -0.3738253712654114, "logits/chosen": 1.1716986894607544, "logits/rejected": 2.318563222885132, "logps/chosen": -0.6336233615875244, "logps/rejected": -1.1869080066680908, "loss": 0.5955, "nll_loss": 0.5581661462783813, "rewards/accuracies": 0.875, "rewards/chosen": -0.06336233764886856, "rewards/margins": 0.05532847344875336, "rewards/rejected": -0.11869080364704132, "step": 321 }, { "epoch": 0.20031104199066874, "grad_norm": 0.7050295472145081, "learning_rate": 3.3900000000000004e-05, "log_odds_chosen": 1.2485467195510864, "log_odds_ratio": -0.29749155044555664, "logits/chosen": 0.9530289173126221, "logits/rejected": 2.255811929702759, "logps/chosen": -0.6973420977592468, "logps/rejected": -1.3816578388214111, "loss": 0.577, "nll_loss": 0.5472236275672913, "rewards/accuracies": 1.0, "rewards/chosen": -0.06973421573638916, "rewards/margins": 0.06843157112598419, "rewards/rejected": -0.13816578686237335, "step": 322 }, { "epoch": 0.20093312597200622, "grad_norm": 0.2560308873653412, "learning_rate": 3.385e-05, "log_odds_chosen": 0.8242784738540649, "log_odds_ratio": -0.4802219867706299, "logits/chosen": 0.9842218160629272, "logits/rejected": 1.5221753120422363, "logps/chosen": -1.1165326833724976, "logps/rejected": -1.7994098663330078, "loss": 0.6561, "nll_loss": 0.6081154942512512, "rewards/accuracies": 0.75, "rewards/chosen": -0.11165326833724976, "rewards/margins": 0.06828770041465759, "rewards/rejected": -0.17994098365306854, "step": 323 }, { "epoch": 0.2015552099533437, "grad_norm": 0.2929477393627167, "learning_rate": 3.38e-05, "log_odds_chosen": 1.0364418029785156, "log_odds_ratio": -0.4716857671737671, "logits/chosen": 1.3951971530914307, "logits/rejected": 2.5507805347442627, "logps/chosen": -0.8110775947570801, "logps/rejected": -1.4586906433105469, "loss": 0.8055, "nll_loss": 0.7583492994308472, "rewards/accuracies": 0.625, "rewards/chosen": -0.08110776543617249, "rewards/margins": 0.06476131081581116, "rewards/rejected": -0.14586907625198364, "step": 324 }, { "epoch": 0.20217729393468117, "grad_norm": 0.25941890478134155, "learning_rate": 3.375000000000001e-05, "log_odds_chosen": 1.196507453918457, "log_odds_ratio": -0.31142622232437134, "logits/chosen": 1.1948047876358032, "logits/rejected": 1.6353682279586792, "logps/chosen": -0.9516217708587646, "logps/rejected": -1.8310502767562866, "loss": 0.6742, "nll_loss": 0.6430565118789673, "rewards/accuracies": 1.0, "rewards/chosen": -0.09516217559576035, "rewards/margins": 0.08794285356998444, "rewards/rejected": -0.18310505151748657, "step": 325 }, { "epoch": 0.20279937791601865, "grad_norm": 0.42425087094306946, "learning_rate": 3.3700000000000006e-05, "log_odds_chosen": 0.2692258954048157, "log_odds_ratio": -0.6452361345291138, "logits/chosen": 2.0312280654907227, "logits/rejected": 2.811347484588623, "logps/chosen": -1.1653238534927368, "logps/rejected": -1.2902796268463135, "loss": 0.8677, "nll_loss": 0.8031848669052124, "rewards/accuracies": 0.75, "rewards/chosen": -0.11653238534927368, "rewards/margins": 0.01249559223651886, "rewards/rejected": -0.12902797758579254, "step": 326 }, { "epoch": 0.20342146189735613, "grad_norm": 0.3335670232772827, "learning_rate": 3.3650000000000005e-05, "log_odds_chosen": 0.3461630642414093, "log_odds_ratio": -0.6145853996276855, "logits/chosen": 1.8230409622192383, "logits/rejected": 2.055349826812744, "logps/chosen": -1.0126659870147705, "logps/rejected": -1.273895025253296, "loss": 0.8458, "nll_loss": 0.7843534350395203, "rewards/accuracies": 0.5, "rewards/chosen": -0.10126660019159317, "rewards/margins": 0.02612290345132351, "rewards/rejected": -0.12738950550556183, "step": 327 }, { "epoch": 0.2040435458786936, "grad_norm": 0.32027310132980347, "learning_rate": 3.3600000000000004e-05, "log_odds_chosen": 1.1716110706329346, "log_odds_ratio": -0.3758586049079895, "logits/chosen": 1.397815465927124, "logits/rejected": 2.642426013946533, "logps/chosen": -0.8429104685783386, "logps/rejected": -1.7424283027648926, "loss": 0.5571, "nll_loss": 0.519532322883606, "rewards/accuracies": 1.0, "rewards/chosen": -0.08429105579853058, "rewards/margins": 0.0899517834186554, "rewards/rejected": -0.17424283921718597, "step": 328 }, { "epoch": 0.20466562986003112, "grad_norm": 0.25524646043777466, "learning_rate": 3.355e-05, "log_odds_chosen": 0.36870211362838745, "log_odds_ratio": -0.5927896499633789, "logits/chosen": 0.5124801397323608, "logits/rejected": 1.5821843147277832, "logps/chosen": -0.9563712477684021, "logps/rejected": -1.2237095832824707, "loss": 0.5892, "nll_loss": 0.5298933386802673, "rewards/accuracies": 0.5, "rewards/chosen": -0.09563712775707245, "rewards/margins": 0.026733826845884323, "rewards/rejected": -0.12237095832824707, "step": 329 }, { "epoch": 0.2052877138413686, "grad_norm": 0.3280141055583954, "learning_rate": 3.35e-05, "log_odds_chosen": 0.3331131637096405, "log_odds_ratio": -0.6164728403091431, "logits/chosen": 1.4830467700958252, "logits/rejected": 2.0828585624694824, "logps/chosen": -0.9265480637550354, "logps/rejected": -1.1410126686096191, "loss": 0.7481, "nll_loss": 0.6864792704582214, "rewards/accuracies": 0.5, "rewards/chosen": -0.09265480935573578, "rewards/margins": 0.021446455270051956, "rewards/rejected": -0.11410126835107803, "step": 330 }, { "epoch": 0.20590979782270608, "grad_norm": 0.3560031056404114, "learning_rate": 3.345000000000001e-05, "log_odds_chosen": 0.13016769289970398, "log_odds_ratio": -0.6902910470962524, "logits/chosen": 1.8078997135162354, "logits/rejected": 1.5569121837615967, "logps/chosen": -1.1119384765625, "logps/rejected": -1.1824815273284912, "loss": 0.8469, "nll_loss": 0.777821958065033, "rewards/accuracies": 0.5, "rewards/chosen": -0.11119385808706284, "rewards/margins": 0.007054307498037815, "rewards/rejected": -0.11824816465377808, "step": 331 }, { "epoch": 0.20653188180404355, "grad_norm": 0.28344908356666565, "learning_rate": 3.3400000000000005e-05, "log_odds_chosen": 0.8456354141235352, "log_odds_ratio": -0.4065042734146118, "logits/chosen": 1.0167481899261475, "logits/rejected": 2.0346903800964355, "logps/chosen": -0.7277791500091553, "logps/rejected": -1.2344926595687866, "loss": 0.5522, "nll_loss": 0.5115180015563965, "rewards/accuracies": 0.875, "rewards/chosen": -0.07277791202068329, "rewards/margins": 0.05067135766148567, "rewards/rejected": -0.12344926595687866, "step": 332 }, { "epoch": 0.20715396578538103, "grad_norm": 0.5055153965950012, "learning_rate": 3.3350000000000004e-05, "log_odds_chosen": 0.24805226922035217, "log_odds_ratio": -0.5933313369750977, "logits/chosen": 2.0232362747192383, "logits/rejected": 2.7429163455963135, "logps/chosen": -0.9573587775230408, "logps/rejected": -1.0880261659622192, "loss": 0.7547, "nll_loss": 0.6953402161598206, "rewards/accuracies": 0.75, "rewards/chosen": -0.09573587775230408, "rewards/margins": 0.013066737912595272, "rewards/rejected": -0.10880261659622192, "step": 333 }, { "epoch": 0.2077760497667185, "grad_norm": 0.3780578076839447, "learning_rate": 3.33e-05, "log_odds_chosen": 0.21228660643100739, "log_odds_ratio": -0.6047529578208923, "logits/chosen": 2.0565428733825684, "logits/rejected": 3.0869245529174805, "logps/chosen": -0.9381221532821655, "logps/rejected": -1.077812671661377, "loss": 0.7898, "nll_loss": 0.7292795181274414, "rewards/accuracies": 0.625, "rewards/chosen": -0.09381221234798431, "rewards/margins": 0.013969066552817822, "rewards/rejected": -0.10778127610683441, "step": 334 }, { "epoch": 0.208398133748056, "grad_norm": 0.26417893171310425, "learning_rate": 3.325e-05, "log_odds_chosen": 1.0664198398590088, "log_odds_ratio": -0.38333842158317566, "logits/chosen": 0.9955233335494995, "logits/rejected": 1.3207647800445557, "logps/chosen": -0.8093827366828918, "logps/rejected": -1.371497392654419, "loss": 0.6961, "nll_loss": 0.6577703952789307, "rewards/accuracies": 0.875, "rewards/chosen": -0.08093827962875366, "rewards/margins": 0.05621146783232689, "rewards/rejected": -0.13714975118637085, "step": 335 }, { "epoch": 0.20902021772939347, "grad_norm": 0.36560311913490295, "learning_rate": 3.32e-05, "log_odds_chosen": 0.31488385796546936, "log_odds_ratio": -0.590886652469635, "logits/chosen": 1.9713830947875977, "logits/rejected": 3.206526041030884, "logps/chosen": -0.9142054915428162, "logps/rejected": -1.1143088340759277, "loss": 0.845, "nll_loss": 0.7859491109848022, "rewards/accuracies": 0.625, "rewards/chosen": -0.09142055362462997, "rewards/margins": 0.020010333508253098, "rewards/rejected": -0.11143089830875397, "step": 336 }, { "epoch": 0.20964230171073095, "grad_norm": 0.2682207226753235, "learning_rate": 3.3150000000000006e-05, "log_odds_chosen": 0.976233959197998, "log_odds_ratio": -0.40072351694107056, "logits/chosen": 1.840961217880249, "logits/rejected": 2.919851064682007, "logps/chosen": -0.6725811958312988, "logps/rejected": -1.3152039051055908, "loss": 0.6882, "nll_loss": 0.6481640338897705, "rewards/accuracies": 0.75, "rewards/chosen": -0.06725811958312988, "rewards/margins": 0.0642622709274292, "rewards/rejected": -0.13152039051055908, "step": 337 }, { "epoch": 0.21026438569206843, "grad_norm": 0.4169570505619049, "learning_rate": 3.3100000000000005e-05, "log_odds_chosen": 1.2906016111373901, "log_odds_ratio": -0.3724203109741211, "logits/chosen": 1.817138433456421, "logits/rejected": 2.0239479541778564, "logps/chosen": -0.9505648612976074, "logps/rejected": -1.9772679805755615, "loss": 0.5799, "nll_loss": 0.5426954030990601, "rewards/accuracies": 0.875, "rewards/chosen": -0.09505648910999298, "rewards/margins": 0.10267031937837601, "rewards/rejected": -0.19772681593894958, "step": 338 }, { "epoch": 0.2108864696734059, "grad_norm": 0.3151361644268036, "learning_rate": 3.3050000000000004e-05, "log_odds_chosen": 1.0252983570098877, "log_odds_ratio": -0.4122687578201294, "logits/chosen": 2.1130666732788086, "logits/rejected": 2.364145278930664, "logps/chosen": -1.0088870525360107, "logps/rejected": -1.782043695449829, "loss": 0.8336, "nll_loss": 0.7923810482025146, "rewards/accuracies": 0.875, "rewards/chosen": -0.1008887067437172, "rewards/margins": 0.07731565833091736, "rewards/rejected": -0.17820435762405396, "step": 339 }, { "epoch": 0.2115085536547434, "grad_norm": 0.33638083934783936, "learning_rate": 3.3e-05, "log_odds_chosen": 0.8013219833374023, "log_odds_ratio": -0.43864017724990845, "logits/chosen": 1.7561466693878174, "logits/rejected": 2.6629652976989746, "logps/chosen": -0.8455312252044678, "logps/rejected": -1.3596203327178955, "loss": 0.6516, "nll_loss": 0.6077067852020264, "rewards/accuracies": 0.875, "rewards/chosen": -0.08455312252044678, "rewards/margins": 0.05140891298651695, "rewards/rejected": -0.13596203923225403, "step": 340 }, { "epoch": 0.21213063763608087, "grad_norm": 0.27090558409690857, "learning_rate": 3.295e-05, "log_odds_chosen": 0.4188057482242584, "log_odds_ratio": -0.5633822083473206, "logits/chosen": 1.1251304149627686, "logits/rejected": 2.424240827560425, "logps/chosen": -1.0555152893066406, "logps/rejected": -1.3421663045883179, "loss": 0.6382, "nll_loss": 0.5818169713020325, "rewards/accuracies": 0.75, "rewards/chosen": -0.10555154085159302, "rewards/margins": 0.028665099292993546, "rewards/rejected": -0.13421663641929626, "step": 341 }, { "epoch": 0.21275272161741834, "grad_norm": 0.24415026605129242, "learning_rate": 3.29e-05, "log_odds_chosen": 1.4160504341125488, "log_odds_ratio": -0.36501258611679077, "logits/chosen": 0.9225729703903198, "logits/rejected": 2.2199554443359375, "logps/chosen": -0.6697980165481567, "logps/rejected": -1.5039746761322021, "loss": 0.6941, "nll_loss": 0.6576475501060486, "rewards/accuracies": 0.625, "rewards/chosen": -0.0669798031449318, "rewards/margins": 0.08341766893863678, "rewards/rejected": -0.15039746463298798, "step": 342 }, { "epoch": 0.21337480559875582, "grad_norm": 0.34027838706970215, "learning_rate": 3.2850000000000006e-05, "log_odds_chosen": 0.43100878596305847, "log_odds_ratio": -0.5822449922561646, "logits/chosen": 1.3398222923278809, "logits/rejected": 2.844086170196533, "logps/chosen": -0.9215174913406372, "logps/rejected": -1.1033726930618286, "loss": 0.7517, "nll_loss": 0.693493127822876, "rewards/accuracies": 0.75, "rewards/chosen": -0.09215174615383148, "rewards/margins": 0.01818552240729332, "rewards/rejected": -0.1103372722864151, "step": 343 }, { "epoch": 0.2139968895800933, "grad_norm": 0.3031538128852844, "learning_rate": 3.2800000000000004e-05, "log_odds_chosen": 1.1786978244781494, "log_odds_ratio": -0.31301864981651306, "logits/chosen": 1.4277385473251343, "logits/rejected": 2.615865468978882, "logps/chosen": -0.7201449871063232, "logps/rejected": -1.503613829612732, "loss": 0.5573, "nll_loss": 0.5259483456611633, "rewards/accuracies": 1.0, "rewards/chosen": -0.07201449573040009, "rewards/margins": 0.07834688574075699, "rewards/rejected": -0.15036138892173767, "step": 344 }, { "epoch": 0.21461897356143078, "grad_norm": 0.2811865210533142, "learning_rate": 3.275e-05, "log_odds_chosen": 0.3644857704639435, "log_odds_ratio": -0.5817010998725891, "logits/chosen": 1.2433083057403564, "logits/rejected": 2.170772075653076, "logps/chosen": -0.9040166735649109, "logps/rejected": -1.057985544204712, "loss": 0.7894, "nll_loss": 0.7312281727790833, "rewards/accuracies": 0.375, "rewards/chosen": -0.09040166437625885, "rewards/margins": 0.015396892093122005, "rewards/rejected": -0.10579856485128403, "step": 345 }, { "epoch": 0.21524105754276826, "grad_norm": 0.3066243529319763, "learning_rate": 3.27e-05, "log_odds_chosen": 0.550826907157898, "log_odds_ratio": -0.5378211736679077, "logits/chosen": 0.5228650569915771, "logits/rejected": 1.609564185142517, "logps/chosen": -1.0457979440689087, "logps/rejected": -1.4700343608856201, "loss": 0.5974, "nll_loss": 0.5436267852783203, "rewards/accuracies": 0.75, "rewards/chosen": -0.10457979887723923, "rewards/margins": 0.04242362827062607, "rewards/rejected": -0.1470034271478653, "step": 346 }, { "epoch": 0.21586314152410577, "grad_norm": 0.3229999542236328, "learning_rate": 3.265e-05, "log_odds_chosen": 0.49356645345687866, "log_odds_ratio": -0.4990430474281311, "logits/chosen": 1.1496738195419312, "logits/rejected": 1.674940586090088, "logps/chosen": -1.011631965637207, "logps/rejected": -1.363969326019287, "loss": 0.6646, "nll_loss": 0.6147063970565796, "rewards/accuracies": 0.875, "rewards/chosen": -0.10116319358348846, "rewards/margins": 0.035233743488788605, "rewards/rejected": -0.13639694452285767, "step": 347 }, { "epoch": 0.21648522550544325, "grad_norm": 0.31860995292663574, "learning_rate": 3.26e-05, "log_odds_chosen": 2.2881035804748535, "log_odds_ratio": -0.2554280757904053, "logits/chosen": 0.6009454727172852, "logits/rejected": 0.508087158203125, "logps/chosen": -1.1216373443603516, "logps/rejected": -3.143105983734131, "loss": 0.5162, "nll_loss": 0.490666002035141, "rewards/accuracies": 1.0, "rewards/chosen": -0.1121637299656868, "rewards/margins": 0.20214685797691345, "rewards/rejected": -0.31431058049201965, "step": 348 }, { "epoch": 0.21710730948678073, "grad_norm": 0.643325686454773, "learning_rate": 3.2550000000000005e-05, "log_odds_chosen": 0.8634718656539917, "log_odds_ratio": -0.47730451822280884, "logits/chosen": 1.144991397857666, "logits/rejected": 2.048892021179199, "logps/chosen": -0.8258051872253418, "logps/rejected": -1.4491708278656006, "loss": 0.6755, "nll_loss": 0.6277719140052795, "rewards/accuracies": 0.75, "rewards/chosen": -0.08258052170276642, "rewards/margins": 0.06233656033873558, "rewards/rejected": -0.1449170708656311, "step": 349 }, { "epoch": 0.2177293934681182, "grad_norm": 0.29045066237449646, "learning_rate": 3.2500000000000004e-05, "log_odds_chosen": 1.0438034534454346, "log_odds_ratio": -0.38323289155960083, "logits/chosen": 0.4679931700229645, "logits/rejected": 2.0712461471557617, "logps/chosen": -1.0007789134979248, "logps/rejected": -1.753403902053833, "loss": 0.6007, "nll_loss": 0.5623734593391418, "rewards/accuracies": 0.875, "rewards/chosen": -0.10007788985967636, "rewards/margins": 0.07526249438524246, "rewards/rejected": -0.17534038424491882, "step": 350 }, { "epoch": 0.21835147744945568, "grad_norm": 2.7242562770843506, "learning_rate": 3.245e-05, "log_odds_chosen": 0.9390478730201721, "log_odds_ratio": -0.44669294357299805, "logits/chosen": 0.5468963384628296, "logits/rejected": 1.3738582134246826, "logps/chosen": -0.8493483066558838, "logps/rejected": -1.5678167343139648, "loss": 0.5128, "nll_loss": 0.4681122601032257, "rewards/accuracies": 0.75, "rewards/chosen": -0.08493483811616898, "rewards/margins": 0.07184682786464691, "rewards/rejected": -0.1567816585302353, "step": 351 }, { "epoch": 0.21897356143079316, "grad_norm": 0.3089011013507843, "learning_rate": 3.24e-05, "log_odds_chosen": 1.6514302492141724, "log_odds_ratio": -0.3678509593009949, "logits/chosen": 0.9059101343154907, "logits/rejected": 1.880787968635559, "logps/chosen": -0.7281700372695923, "logps/rejected": -1.8344961404800415, "loss": 0.5784, "nll_loss": 0.541600227355957, "rewards/accuracies": 0.75, "rewards/chosen": -0.07281699776649475, "rewards/margins": 0.11063261330127716, "rewards/rejected": -0.1834496110677719, "step": 352 }, { "epoch": 0.21959564541213064, "grad_norm": 0.41005030274391174, "learning_rate": 3.235e-05, "log_odds_chosen": 0.8523861169815063, "log_odds_ratio": -0.4098551273345947, "logits/chosen": 1.472335934638977, "logits/rejected": 2.3259546756744385, "logps/chosen": -1.0261791944503784, "logps/rejected": -1.6941561698913574, "loss": 0.6855, "nll_loss": 0.6445062756538391, "rewards/accuracies": 0.75, "rewards/chosen": -0.10261793434619904, "rewards/margins": 0.06679768860340118, "rewards/rejected": -0.16941560804843903, "step": 353 }, { "epoch": 0.22021772939346812, "grad_norm": 0.39794912934303284, "learning_rate": 3.2300000000000006e-05, "log_odds_chosen": 0.9575679302215576, "log_odds_ratio": -0.4112628400325775, "logits/chosen": 1.2577632665634155, "logits/rejected": 2.603459358215332, "logps/chosen": -1.028702735900879, "logps/rejected": -1.6863713264465332, "loss": 0.6172, "nll_loss": 0.5761150121688843, "rewards/accuracies": 0.75, "rewards/chosen": -0.10287026315927505, "rewards/margins": 0.06576685607433319, "rewards/rejected": -0.16863712668418884, "step": 354 }, { "epoch": 0.2208398133748056, "grad_norm": 0.6424588561058044, "learning_rate": 3.2250000000000005e-05, "log_odds_chosen": 2.402007818222046, "log_odds_ratio": -0.25134098529815674, "logits/chosen": 1.1088523864746094, "logits/rejected": 1.1565300226211548, "logps/chosen": -0.7431226968765259, "logps/rejected": -2.6517767906188965, "loss": 0.71, "nll_loss": 0.6848562955856323, "rewards/accuracies": 1.0, "rewards/chosen": -0.07431226223707199, "rewards/margins": 0.1908653974533081, "rewards/rejected": -0.2651776671409607, "step": 355 }, { "epoch": 0.22146189735614308, "grad_norm": 0.4050353169441223, "learning_rate": 3.2200000000000003e-05, "log_odds_chosen": 0.809384822845459, "log_odds_ratio": -0.4564736783504486, "logits/chosen": 2.147434711456299, "logits/rejected": 3.05849027633667, "logps/chosen": -1.0253255367279053, "logps/rejected": -1.654374361038208, "loss": 0.8009, "nll_loss": 0.7552969455718994, "rewards/accuracies": 0.625, "rewards/chosen": -0.10253255814313889, "rewards/margins": 0.06290488690137863, "rewards/rejected": -0.1654374599456787, "step": 356 }, { "epoch": 0.22208398133748056, "grad_norm": 0.6486241817474365, "learning_rate": 3.215e-05, "log_odds_chosen": 0.5740684866905212, "log_odds_ratio": -0.5048813819885254, "logits/chosen": 2.6620006561279297, "logits/rejected": 3.147725820541382, "logps/chosen": -0.9634166955947876, "logps/rejected": -1.3289614915847778, "loss": 0.8307, "nll_loss": 0.780228316783905, "rewards/accuracies": 0.75, "rewards/chosen": -0.09634166210889816, "rewards/margins": 0.0365544855594635, "rewards/rejected": -0.13289615511894226, "step": 357 }, { "epoch": 0.22270606531881804, "grad_norm": 0.3724525570869446, "learning_rate": 3.21e-05, "log_odds_chosen": 0.028345106169581413, "log_odds_ratio": -0.724675714969635, "logits/chosen": 2.111036777496338, "logits/rejected": 3.2740390300750732, "logps/chosen": -1.2557599544525146, "logps/rejected": -1.243468999862671, "loss": 0.7887, "nll_loss": 0.7161970734596252, "rewards/accuracies": 0.625, "rewards/chosen": -0.12557600438594818, "rewards/margins": -0.0012290971353650093, "rewards/rejected": -0.12434690445661545, "step": 358 }, { "epoch": 0.22332814930015552, "grad_norm": 0.26534923911094666, "learning_rate": 3.205e-05, "log_odds_chosen": 0.871075451374054, "log_odds_ratio": -0.4620305001735687, "logits/chosen": 0.9481121301651001, "logits/rejected": 1.3824822902679443, "logps/chosen": -0.7495682835578918, "logps/rejected": -1.0883417129516602, "loss": 0.7144, "nll_loss": 0.6682002544403076, "rewards/accuracies": 0.75, "rewards/chosen": -0.07495683431625366, "rewards/margins": 0.03387734666466713, "rewards/rejected": -0.1088341772556305, "step": 359 }, { "epoch": 0.223950233281493, "grad_norm": 0.31574657559394836, "learning_rate": 3.2000000000000005e-05, "log_odds_chosen": 0.45125481486320496, "log_odds_ratio": -0.6207079291343689, "logits/chosen": 0.23232275247573853, "logits/rejected": 1.9814536571502686, "logps/chosen": -1.2163420915603638, "logps/rejected": -1.5950084924697876, "loss": 0.6097, "nll_loss": 0.5476274490356445, "rewards/accuracies": 0.625, "rewards/chosen": -0.12163421511650085, "rewards/margins": 0.03786662966012955, "rewards/rejected": -0.159500852227211, "step": 360 }, { "epoch": 0.22457231726283047, "grad_norm": 0.35571300983428955, "learning_rate": 3.1950000000000004e-05, "log_odds_chosen": 0.4454658329486847, "log_odds_ratio": -0.5699670910835266, "logits/chosen": 1.5059998035430908, "logits/rejected": 1.0860791206359863, "logps/chosen": -1.0126605033874512, "logps/rejected": -1.4028286933898926, "loss": 0.6565, "nll_loss": 0.5994773507118225, "rewards/accuracies": 0.625, "rewards/chosen": -0.101266048848629, "rewards/margins": 0.03901681676506996, "rewards/rejected": -0.14028286933898926, "step": 361 }, { "epoch": 0.22519440124416795, "grad_norm": 0.28542983531951904, "learning_rate": 3.19e-05, "log_odds_chosen": 1.22482430934906, "log_odds_ratio": -0.3668474853038788, "logits/chosen": 1.2312403917312622, "logits/rejected": 2.7701077461242676, "logps/chosen": -0.6266268491744995, "logps/rejected": -1.2044650316238403, "loss": 0.6899, "nll_loss": 0.6532502770423889, "rewards/accuracies": 0.75, "rewards/chosen": -0.06266268342733383, "rewards/margins": 0.0577838197350502, "rewards/rejected": -0.12044650316238403, "step": 362 }, { "epoch": 0.22581648522550543, "grad_norm": 0.31290653347969055, "learning_rate": 3.185e-05, "log_odds_chosen": 0.8134715557098389, "log_odds_ratio": -0.3888576328754425, "logits/chosen": 1.2525287866592407, "logits/rejected": 2.074699640274048, "logps/chosen": -0.958530068397522, "logps/rejected": -1.5322929620742798, "loss": 0.589, "nll_loss": 0.5501154661178589, "rewards/accuracies": 1.0, "rewards/chosen": -0.09585300832986832, "rewards/margins": 0.05737629532814026, "rewards/rejected": -0.15322929620742798, "step": 363 }, { "epoch": 0.2264385692068429, "grad_norm": 0.31028440594673157, "learning_rate": 3.18e-05, "log_odds_chosen": 0.7677963972091675, "log_odds_ratio": -0.42222875356674194, "logits/chosen": 1.078382134437561, "logits/rejected": 2.6262946128845215, "logps/chosen": -0.8856097459793091, "logps/rejected": -1.4135570526123047, "loss": 0.6642, "nll_loss": 0.6219831705093384, "rewards/accuracies": 0.875, "rewards/chosen": -0.08856097608804703, "rewards/margins": 0.05279473215341568, "rewards/rejected": -0.1413557082414627, "step": 364 }, { "epoch": 0.22706065318818042, "grad_norm": 0.2672368288040161, "learning_rate": 3.175e-05, "log_odds_chosen": 1.25239098072052, "log_odds_ratio": -0.34830427169799805, "logits/chosen": 1.7153308391571045, "logits/rejected": 2.940908908843994, "logps/chosen": -0.7854593992233276, "logps/rejected": -1.5576868057250977, "loss": 0.7977, "nll_loss": 0.7629010081291199, "rewards/accuracies": 0.875, "rewards/chosen": -0.078545942902565, "rewards/margins": 0.07722274959087372, "rewards/rejected": -0.15576869249343872, "step": 365 }, { "epoch": 0.2276827371695179, "grad_norm": 0.28569233417510986, "learning_rate": 3.1700000000000005e-05, "log_odds_chosen": 0.839358925819397, "log_odds_ratio": -0.43827319145202637, "logits/chosen": 0.81386399269104, "logits/rejected": 3.110175371170044, "logps/chosen": -0.7708005905151367, "logps/rejected": -1.379541039466858, "loss": 0.5576, "nll_loss": 0.5137704610824585, "rewards/accuracies": 0.875, "rewards/chosen": -0.07708005607128143, "rewards/margins": 0.06087404862046242, "rewards/rejected": -0.13795410096645355, "step": 366 }, { "epoch": 0.22830482115085537, "grad_norm": 1.7108992338180542, "learning_rate": 3.1650000000000004e-05, "log_odds_chosen": 1.8122532367706299, "log_odds_ratio": -0.20352888107299805, "logits/chosen": 1.2920408248901367, "logits/rejected": 2.4468111991882324, "logps/chosen": -0.7317562103271484, "logps/rejected": -1.9532015323638916, "loss": 0.7003, "nll_loss": 0.6799642443656921, "rewards/accuracies": 1.0, "rewards/chosen": -0.07317563146352768, "rewards/margins": 0.12214451283216476, "rewards/rejected": -0.19532015919685364, "step": 367 }, { "epoch": 0.22892690513219285, "grad_norm": 0.5031489133834839, "learning_rate": 3.16e-05, "log_odds_chosen": -0.09820356965065002, "log_odds_ratio": -0.8625038266181946, "logits/chosen": 0.7859886884689331, "logits/rejected": 2.1682181358337402, "logps/chosen": -1.468904733657837, "logps/rejected": -1.2969123125076294, "loss": 0.6516, "nll_loss": 0.5653401613235474, "rewards/accuracies": 0.75, "rewards/chosen": -0.14689047634601593, "rewards/margins": -0.01719924435019493, "rewards/rejected": -0.1296912282705307, "step": 368 }, { "epoch": 0.22954898911353033, "grad_norm": 0.2742994427680969, "learning_rate": 3.155e-05, "log_odds_chosen": 1.5738685131072998, "log_odds_ratio": -0.3663029968738556, "logits/chosen": 0.23371529579162598, "logits/rejected": 2.0417320728302, "logps/chosen": -0.8390317559242249, "logps/rejected": -2.128387689590454, "loss": 0.5617, "nll_loss": 0.525061309337616, "rewards/accuracies": 1.0, "rewards/chosen": -0.08390317857265472, "rewards/margins": 0.12893559038639069, "rewards/rejected": -0.2128387689590454, "step": 369 }, { "epoch": 0.2301710730948678, "grad_norm": 0.2917932868003845, "learning_rate": 3.15e-05, "log_odds_chosen": 0.8865772485733032, "log_odds_ratio": -0.36870071291923523, "logits/chosen": 0.578902542591095, "logits/rejected": 1.5433131456375122, "logps/chosen": -1.0158836841583252, "logps/rejected": -1.5882716178894043, "loss": 0.6669, "nll_loss": 0.6300525665283203, "rewards/accuracies": 1.0, "rewards/chosen": -0.10158836096525192, "rewards/margins": 0.057238806039094925, "rewards/rejected": -0.15882715582847595, "step": 370 }, { "epoch": 0.2307931570762053, "grad_norm": 0.27657777070999146, "learning_rate": 3.145e-05, "log_odds_chosen": 0.8730921745300293, "log_odds_ratio": -0.41202032566070557, "logits/chosen": 0.9755443334579468, "logits/rejected": 1.815916895866394, "logps/chosen": -0.9039212465286255, "logps/rejected": -1.4282338619232178, "loss": 0.6118, "nll_loss": 0.5705785751342773, "rewards/accuracies": 0.875, "rewards/chosen": -0.09039213508367538, "rewards/margins": 0.05243125185370445, "rewards/rejected": -0.14282338321208954, "step": 371 }, { "epoch": 0.23141524105754277, "grad_norm": 0.3784266412258148, "learning_rate": 3.1400000000000004e-05, "log_odds_chosen": 1.1115049123764038, "log_odds_ratio": -0.4001220166683197, "logits/chosen": 1.2740614414215088, "logits/rejected": 3.4113335609436035, "logps/chosen": -0.7833281755447388, "logps/rejected": -1.5305798053741455, "loss": 0.6974, "nll_loss": 0.6573410034179688, "rewards/accuracies": 0.875, "rewards/chosen": -0.07833281904459, "rewards/margins": 0.07472515851259232, "rewards/rejected": -0.1530579775571823, "step": 372 }, { "epoch": 0.23203732503888025, "grad_norm": 0.49009525775909424, "learning_rate": 3.135e-05, "log_odds_chosen": 0.5858776569366455, "log_odds_ratio": -0.5308746099472046, "logits/chosen": 1.560257911682129, "logits/rejected": 2.5096850395202637, "logps/chosen": -0.7814056873321533, "logps/rejected": -1.2176673412322998, "loss": 0.7221, "nll_loss": 0.6690379977226257, "rewards/accuracies": 0.625, "rewards/chosen": -0.07814057171344757, "rewards/margins": 0.043626170605421066, "rewards/rejected": -0.12176674604415894, "step": 373 }, { "epoch": 0.23265940902021773, "grad_norm": 0.27680882811546326, "learning_rate": 3.13e-05, "log_odds_chosen": 0.5893234014511108, "log_odds_ratio": -0.5311247706413269, "logits/chosen": 1.2210685014724731, "logits/rejected": 1.663694143295288, "logps/chosen": -0.836790919303894, "logps/rejected": -1.2266275882720947, "loss": 0.647, "nll_loss": 0.5938542485237122, "rewards/accuracies": 0.625, "rewards/chosen": -0.08367909491062164, "rewards/margins": 0.03898368030786514, "rewards/rejected": -0.12266277521848679, "step": 374 }, { "epoch": 0.2332814930015552, "grad_norm": 0.3387400209903717, "learning_rate": 3.125e-05, "log_odds_chosen": 0.5931373834609985, "log_odds_ratio": -0.5173163414001465, "logits/chosen": 0.8472631573677063, "logits/rejected": 2.168799877166748, "logps/chosen": -1.1901843547821045, "logps/rejected": -1.6465400457382202, "loss": 0.6611, "nll_loss": 0.6094157695770264, "rewards/accuracies": 0.75, "rewards/chosen": -0.11901844292879105, "rewards/margins": 0.045635566115379333, "rewards/rejected": -0.16465400159358978, "step": 375 }, { "epoch": 0.23390357698289269, "grad_norm": 0.3112637996673584, "learning_rate": 3.12e-05, "log_odds_chosen": 0.626380443572998, "log_odds_ratio": -0.4817531108856201, "logits/chosen": 1.1131579875946045, "logits/rejected": 2.7692978382110596, "logps/chosen": -1.171850323677063, "logps/rejected": -1.6517088413238525, "loss": 0.6714, "nll_loss": 0.623209536075592, "rewards/accuracies": 0.875, "rewards/chosen": -0.11718503385782242, "rewards/margins": 0.047985851764678955, "rewards/rejected": -0.16517089307308197, "step": 376 }, { "epoch": 0.23452566096423016, "grad_norm": 0.2978714108467102, "learning_rate": 3.115e-05, "log_odds_chosen": 0.37127476930618286, "log_odds_ratio": -0.5513190031051636, "logits/chosen": 1.0451431274414062, "logits/rejected": 2.6317138671875, "logps/chosen": -1.1320627927780151, "logps/rejected": -1.4079375267028809, "loss": 0.7035, "nll_loss": 0.6483767032623291, "rewards/accuracies": 0.625, "rewards/chosen": -0.11320628225803375, "rewards/margins": 0.02758748084306717, "rewards/rejected": -0.14079375565052032, "step": 377 }, { "epoch": 0.23514774494556764, "grad_norm": 0.3199593424797058, "learning_rate": 3.1100000000000004e-05, "log_odds_chosen": 1.089561939239502, "log_odds_ratio": -0.34473738074302673, "logits/chosen": 1.0959789752960205, "logits/rejected": 1.9354054927825928, "logps/chosen": -0.8988323211669922, "logps/rejected": -1.6733230352401733, "loss": 0.5175, "nll_loss": 0.4830198884010315, "rewards/accuracies": 0.875, "rewards/chosen": -0.0898832380771637, "rewards/margins": 0.07744906842708588, "rewards/rejected": -0.16733232140541077, "step": 378 }, { "epoch": 0.23576982892690512, "grad_norm": 0.5537139177322388, "learning_rate": 3.105e-05, "log_odds_chosen": 0.8812462091445923, "log_odds_ratio": -0.6632511615753174, "logits/chosen": 2.529099464416504, "logits/rejected": 2.7828726768493652, "logps/chosen": -1.2979620695114136, "logps/rejected": -1.926892876625061, "loss": 0.7607, "nll_loss": 0.6943774819374084, "rewards/accuracies": 0.875, "rewards/chosen": -0.12979620695114136, "rewards/margins": 0.0628930851817131, "rewards/rejected": -0.19268929958343506, "step": 379 }, { "epoch": 0.2363919129082426, "grad_norm": 0.3323799669742584, "learning_rate": 3.1e-05, "log_odds_chosen": 0.7835572957992554, "log_odds_ratio": -0.4892599582672119, "logits/chosen": 0.6586530208587646, "logits/rejected": 1.666867971420288, "logps/chosen": -1.1222596168518066, "logps/rejected": -1.716899037361145, "loss": 0.5659, "nll_loss": 0.5169761776924133, "rewards/accuracies": 0.75, "rewards/chosen": -0.1122259572148323, "rewards/margins": 0.059463948011398315, "rewards/rejected": -0.17168991267681122, "step": 380 }, { "epoch": 0.23701399688958008, "grad_norm": 0.29903772473335266, "learning_rate": 3.095e-05, "log_odds_chosen": 0.5510812401771545, "log_odds_ratio": -0.4852423071861267, "logits/chosen": 1.5083937644958496, "logits/rejected": 2.799187183380127, "logps/chosen": -0.7832547426223755, "logps/rejected": -1.1330596208572388, "loss": 0.6428, "nll_loss": 0.5942538976669312, "rewards/accuracies": 0.875, "rewards/chosen": -0.07832548022270203, "rewards/margins": 0.03498048335313797, "rewards/rejected": -0.1133059710264206, "step": 381 }, { "epoch": 0.2376360808709176, "grad_norm": 0.38349026441574097, "learning_rate": 3.09e-05, "log_odds_chosen": 0.34245169162750244, "log_odds_ratio": -0.6288639307022095, "logits/chosen": 1.488581895828247, "logits/rejected": 1.2697405815124512, "logps/chosen": -1.0095301866531372, "logps/rejected": -1.2948238849639893, "loss": 0.7459, "nll_loss": 0.683020830154419, "rewards/accuracies": 0.625, "rewards/chosen": -0.10095302015542984, "rewards/margins": 0.028529373928904533, "rewards/rejected": -0.12948238849639893, "step": 382 }, { "epoch": 0.23825816485225507, "grad_norm": 0.3196040987968445, "learning_rate": 3.0850000000000004e-05, "log_odds_chosen": 0.7200406789779663, "log_odds_ratio": -0.48095566034317017, "logits/chosen": 2.4836840629577637, "logits/rejected": 1.727085828781128, "logps/chosen": -0.9393627047538757, "logps/rejected": -1.4800735712051392, "loss": 0.8215, "nll_loss": 0.7733873724937439, "rewards/accuracies": 0.75, "rewards/chosen": -0.09393627196550369, "rewards/margins": 0.0540710911154747, "rewards/rejected": -0.1480073630809784, "step": 383 }, { "epoch": 0.23888024883359255, "grad_norm": 0.3995237946510315, "learning_rate": 3.08e-05, "log_odds_chosen": 1.3195679187774658, "log_odds_ratio": -0.3445848822593689, "logits/chosen": 1.1876567602157593, "logits/rejected": 2.206796169281006, "logps/chosen": -0.69122314453125, "logps/rejected": -1.3169430494308472, "loss": 0.5767, "nll_loss": 0.5422327518463135, "rewards/accuracies": 0.875, "rewards/chosen": -0.069122314453125, "rewards/margins": 0.06257198750972748, "rewards/rejected": -0.13169430196285248, "step": 384 }, { "epoch": 0.23950233281493002, "grad_norm": 0.31453338265419006, "learning_rate": 3.075e-05, "log_odds_chosen": 0.7995316982269287, "log_odds_ratio": -0.4378565847873688, "logits/chosen": 3.7806599140167236, "logits/rejected": 3.5833585262298584, "logps/chosen": -0.8676797151565552, "logps/rejected": -1.3756786584854126, "loss": 1.0576, "nll_loss": 1.0137659311294556, "rewards/accuracies": 1.0, "rewards/chosen": -0.08676797151565552, "rewards/margins": 0.0507998913526535, "rewards/rejected": -0.13756787776947021, "step": 385 }, { "epoch": 0.2401244167962675, "grad_norm": 0.2684924900531769, "learning_rate": 3.07e-05, "log_odds_chosen": 1.4387656450271606, "log_odds_ratio": -0.25688570737838745, "logits/chosen": 1.3283145427703857, "logits/rejected": 2.276088237762451, "logps/chosen": -0.7973716259002686, "logps/rejected": -1.7727547883987427, "loss": 0.6458, "nll_loss": 0.6201030015945435, "rewards/accuracies": 0.875, "rewards/chosen": -0.07973716408014297, "rewards/margins": 0.09753831475973129, "rewards/rejected": -0.17727547883987427, "step": 386 }, { "epoch": 0.24074650077760498, "grad_norm": 0.3620300889015198, "learning_rate": 3.065e-05, "log_odds_chosen": 0.5435608625411987, "log_odds_ratio": -0.5255359411239624, "logits/chosen": 2.283266544342041, "logits/rejected": 2.0596556663513184, "logps/chosen": -1.0229352712631226, "logps/rejected": -1.451112151145935, "loss": 0.7554, "nll_loss": 0.7028760313987732, "rewards/accuracies": 0.625, "rewards/chosen": -0.10229352116584778, "rewards/margins": 0.04281768575310707, "rewards/rejected": -0.14511121809482574, "step": 387 }, { "epoch": 0.24136858475894246, "grad_norm": 0.34285151958465576, "learning_rate": 3.06e-05, "log_odds_chosen": 1.1580283641815186, "log_odds_ratio": -0.33749106526374817, "logits/chosen": 2.170480966567993, "logits/rejected": 3.085576295852661, "logps/chosen": -0.8325943350791931, "logps/rejected": -1.6810599565505981, "loss": 0.6661, "nll_loss": 0.6323240995407104, "rewards/accuracies": 0.875, "rewards/chosen": -0.08325943350791931, "rewards/margins": 0.08484657108783722, "rewards/rejected": -0.16810600459575653, "step": 388 }, { "epoch": 0.24199066874027994, "grad_norm": 0.31308212876319885, "learning_rate": 3.0550000000000004e-05, "log_odds_chosen": 1.1371029615402222, "log_odds_ratio": -0.41829434037208557, "logits/chosen": 0.6740908622741699, "logits/rejected": 1.5658845901489258, "logps/chosen": -0.9937759041786194, "logps/rejected": -1.626152753829956, "loss": 0.6029, "nll_loss": 0.5611146688461304, "rewards/accuracies": 0.75, "rewards/chosen": -0.0993775948882103, "rewards/margins": 0.06323768198490143, "rewards/rejected": -0.16261526942253113, "step": 389 }, { "epoch": 0.24261275272161742, "grad_norm": 0.29140704870224, "learning_rate": 3.05e-05, "log_odds_chosen": 0.5190244913101196, "log_odds_ratio": -0.5214281678199768, "logits/chosen": 2.445509910583496, "logits/rejected": 2.593230724334717, "logps/chosen": -1.0484591722488403, "logps/rejected": -1.3859442472457886, "loss": 0.8534, "nll_loss": 0.801255464553833, "rewards/accuracies": 0.75, "rewards/chosen": -0.10484592616558075, "rewards/margins": 0.03374850004911423, "rewards/rejected": -0.13859443366527557, "step": 390 }, { "epoch": 0.2432348367029549, "grad_norm": 0.3124774694442749, "learning_rate": 3.045e-05, "log_odds_chosen": 0.5714182257652283, "log_odds_ratio": -0.5035386085510254, "logits/chosen": 2.587074041366577, "logits/rejected": 3.1037638187408447, "logps/chosen": -1.0052751302719116, "logps/rejected": -1.411800503730774, "loss": 0.9459, "nll_loss": 0.8955031037330627, "rewards/accuracies": 0.625, "rewards/chosen": -0.10052751004695892, "rewards/margins": 0.04065253585577011, "rewards/rejected": -0.14118005335330963, "step": 391 }, { "epoch": 0.24385692068429238, "grad_norm": 0.2874357998371124, "learning_rate": 3.04e-05, "log_odds_chosen": 0.4438624083995819, "log_odds_ratio": -0.5330299139022827, "logits/chosen": 0.8436980247497559, "logits/rejected": 3.554211139678955, "logps/chosen": -1.0223864316940308, "logps/rejected": -1.3054369688034058, "loss": 0.5154, "nll_loss": 0.4620867967605591, "rewards/accuracies": 0.625, "rewards/chosen": -0.10223864763975143, "rewards/margins": 0.0283050537109375, "rewards/rejected": -0.13054370880126953, "step": 392 }, { "epoch": 0.24447900466562986, "grad_norm": 0.34963977336883545, "learning_rate": 3.035e-05, "log_odds_chosen": 0.7523168921470642, "log_odds_ratio": -0.4647229313850403, "logits/chosen": 1.5345008373260498, "logits/rejected": 1.5623347759246826, "logps/chosen": -1.007373332977295, "logps/rejected": -1.5803990364074707, "loss": 0.6779, "nll_loss": 0.6314302086830139, "rewards/accuracies": 0.75, "rewards/chosen": -0.10073733329772949, "rewards/margins": 0.057302575558423996, "rewards/rejected": -0.15803992748260498, "step": 393 }, { "epoch": 0.24510108864696734, "grad_norm": 0.4444126784801483, "learning_rate": 3.03e-05, "log_odds_chosen": 1.1722054481506348, "log_odds_ratio": -0.3408425450325012, "logits/chosen": 2.263749361038208, "logits/rejected": 2.531400203704834, "logps/chosen": -0.7516819834709167, "logps/rejected": -1.4847530126571655, "loss": 0.6886, "nll_loss": 0.654498815536499, "rewards/accuracies": 0.75, "rewards/chosen": -0.0751681998372078, "rewards/margins": 0.073307104408741, "rewards/rejected": -0.1484753042459488, "step": 394 }, { "epoch": 0.24572317262830481, "grad_norm": 0.2417965829372406, "learning_rate": 3.025e-05, "log_odds_chosen": 1.0758328437805176, "log_odds_ratio": -0.3839593231678009, "logits/chosen": 0.5677065253257751, "logits/rejected": 1.7079464197158813, "logps/chosen": -0.7603127956390381, "logps/rejected": -1.385190725326538, "loss": 0.5789, "nll_loss": 0.5405372977256775, "rewards/accuracies": 0.75, "rewards/chosen": -0.07603128254413605, "rewards/margins": 0.06248778849840164, "rewards/rejected": -0.1385190784931183, "step": 395 }, { "epoch": 0.2463452566096423, "grad_norm": 0.3106265962123871, "learning_rate": 3.02e-05, "log_odds_chosen": 0.9861092567443848, "log_odds_ratio": -0.4771953225135803, "logits/chosen": 0.5812429785728455, "logits/rejected": 2.854665756225586, "logps/chosen": -0.8859405517578125, "logps/rejected": -1.554978370666504, "loss": 0.5522, "nll_loss": 0.5044968724250793, "rewards/accuracies": 0.75, "rewards/chosen": -0.08859404921531677, "rewards/margins": 0.06690378487110138, "rewards/rejected": -0.15549783408641815, "step": 396 }, { "epoch": 0.24696734059097977, "grad_norm": 0.25298336148262024, "learning_rate": 3.015e-05, "log_odds_chosen": 0.6958006024360657, "log_odds_ratio": -0.4609626531600952, "logits/chosen": 0.29168179631233215, "logits/rejected": 1.9156887531280518, "logps/chosen": -0.8693374395370483, "logps/rejected": -1.3414729833602905, "loss": 0.5615, "nll_loss": 0.5154497027397156, "rewards/accuracies": 0.875, "rewards/chosen": -0.08693374693393707, "rewards/margins": 0.04721354693174362, "rewards/rejected": -0.1341472864151001, "step": 397 }, { "epoch": 0.24758942457231725, "grad_norm": 0.39282822608947754, "learning_rate": 3.01e-05, "log_odds_chosen": 0.480314701795578, "log_odds_ratio": -0.5577341914176941, "logits/chosen": 1.667938470840454, "logits/rejected": 2.5177690982818604, "logps/chosen": -1.1416622400283813, "logps/rejected": -1.504913330078125, "loss": 0.6952, "nll_loss": 0.6393857002258301, "rewards/accuracies": 0.75, "rewards/chosen": -0.11416622251272202, "rewards/margins": 0.036325111985206604, "rewards/rejected": -0.15049132704734802, "step": 398 }, { "epoch": 0.24821150855365473, "grad_norm": 0.6533320546150208, "learning_rate": 3.0050000000000002e-05, "log_odds_chosen": 1.0804705619812012, "log_odds_ratio": -0.4304221570491791, "logits/chosen": 1.3469984531402588, "logits/rejected": 3.310586452484131, "logps/chosen": -1.1268970966339111, "logps/rejected": -1.9855341911315918, "loss": 0.7102, "nll_loss": 0.6671367287635803, "rewards/accuracies": 0.75, "rewards/chosen": -0.11268971115350723, "rewards/margins": 0.08586370944976807, "rewards/rejected": -0.1985534131526947, "step": 399 }, { "epoch": 0.24883359253499224, "grad_norm": 0.3909144699573517, "learning_rate": 3e-05, "log_odds_chosen": 0.8222372531890869, "log_odds_ratio": -0.555374264717102, "logits/chosen": -0.3867577612400055, "logits/rejected": 2.40067720413208, "logps/chosen": -1.1070265769958496, "logps/rejected": -1.7690173387527466, "loss": 0.5574, "nll_loss": 0.5018182992935181, "rewards/accuracies": 0.625, "rewards/chosen": -0.11070266366004944, "rewards/margins": 0.06619907915592194, "rewards/rejected": -0.17690175771713257, "step": 400 }, { "epoch": 0.24945567651632972, "grad_norm": 0.4009895920753479, "learning_rate": 2.995e-05, "log_odds_chosen": 0.35318857431411743, "log_odds_ratio": -0.9164329767227173, "logits/chosen": 1.3482327461242676, "logits/rejected": 1.7844626903533936, "logps/chosen": -1.4168295860290527, "logps/rejected": -1.6120425462722778, "loss": 0.6499, "nll_loss": 0.5582701563835144, "rewards/accuracies": 0.625, "rewards/chosen": -0.1416829526424408, "rewards/margins": 0.019521303474903107, "rewards/rejected": -0.1612042635679245, "step": 401 }, { "epoch": 0.25007776049766717, "grad_norm": 0.33025988936424255, "learning_rate": 2.9900000000000002e-05, "log_odds_chosen": 1.3418104648590088, "log_odds_ratio": -0.3277786374092102, "logits/chosen": 1.4879672527313232, "logits/rejected": 2.3433167934417725, "logps/chosen": -0.9078612327575684, "logps/rejected": -1.8650798797607422, "loss": 0.6201, "nll_loss": 0.5872801542282104, "rewards/accuracies": 0.875, "rewards/chosen": -0.09078612923622131, "rewards/margins": 0.09572187811136246, "rewards/rejected": -0.18650799989700317, "step": 402 }, { "epoch": 0.2506998444790047, "grad_norm": 0.3194940686225891, "learning_rate": 2.985e-05, "log_odds_chosen": 0.8244792819023132, "log_odds_ratio": -0.42599987983703613, "logits/chosen": 0.572659969329834, "logits/rejected": 1.3702340126037598, "logps/chosen": -0.9174282550811768, "logps/rejected": -1.5245065689086914, "loss": 0.5852, "nll_loss": 0.5425539612770081, "rewards/accuracies": 0.75, "rewards/chosen": -0.09174282848834991, "rewards/margins": 0.06070783734321594, "rewards/rejected": -0.15245066583156586, "step": 403 }, { "epoch": 0.2513219284603421, "grad_norm": 0.406558096408844, "learning_rate": 2.98e-05, "log_odds_chosen": 1.7406198978424072, "log_odds_ratio": -0.6884851455688477, "logits/chosen": 1.4606916904449463, "logits/rejected": 1.6664860248565674, "logps/chosen": -1.0016249418258667, "logps/rejected": -2.2720675468444824, "loss": 0.7226, "nll_loss": 0.6537201404571533, "rewards/accuracies": 0.75, "rewards/chosen": -0.10016249865293503, "rewards/margins": 0.12704426050186157, "rewards/rejected": -0.2272067666053772, "step": 404 }, { "epoch": 0.25194401244167963, "grad_norm": 0.30637553334236145, "learning_rate": 2.975e-05, "log_odds_chosen": 0.4996417164802551, "log_odds_ratio": -0.5414650440216064, "logits/chosen": 1.5827730894088745, "logits/rejected": 2.8780758380889893, "logps/chosen": -0.9463892579078674, "logps/rejected": -1.2346899509429932, "loss": 0.7172, "nll_loss": 0.6631010174751282, "rewards/accuracies": 0.625, "rewards/chosen": -0.09463892877101898, "rewards/margins": 0.02883007377386093, "rewards/rejected": -0.12346899509429932, "step": 405 }, { "epoch": 0.2525660964230171, "grad_norm": 0.3793873190879822, "learning_rate": 2.97e-05, "log_odds_chosen": 0.015795554965734482, "log_odds_ratio": -0.7355694770812988, "logits/chosen": 0.8564848899841309, "logits/rejected": 3.023225784301758, "logps/chosen": -1.1393344402313232, "logps/rejected": -1.1146671772003174, "loss": 0.574, "nll_loss": 0.5004271268844604, "rewards/accuracies": 0.5, "rewards/chosen": -0.11393344402313232, "rewards/margins": -0.0024667298421263695, "rewards/rejected": -0.11146670579910278, "step": 406 }, { "epoch": 0.2531881804043546, "grad_norm": 0.31722235679626465, "learning_rate": 2.965e-05, "log_odds_chosen": 0.505951464176178, "log_odds_ratio": -0.5452799797058105, "logits/chosen": 2.190391778945923, "logits/rejected": 2.1522607803344727, "logps/chosen": -0.8337920904159546, "logps/rejected": -1.0933805704116821, "loss": 0.7712, "nll_loss": 0.716640055179596, "rewards/accuracies": 0.5, "rewards/chosen": -0.08337920904159546, "rewards/margins": 0.02595885656774044, "rewards/rejected": -0.10933806002140045, "step": 407 }, { "epoch": 0.25381026438569204, "grad_norm": 0.6297598481178284, "learning_rate": 2.96e-05, "log_odds_chosen": 0.40282291173934937, "log_odds_ratio": -0.8236533403396606, "logits/chosen": 2.029449701309204, "logits/rejected": 3.087233781814575, "logps/chosen": -1.7967513799667358, "logps/rejected": -2.068850040435791, "loss": 0.846, "nll_loss": 0.7636764645576477, "rewards/accuracies": 0.75, "rewards/chosen": -0.1796751469373703, "rewards/margins": 0.027209851890802383, "rewards/rejected": -0.20688499510288239, "step": 408 }, { "epoch": 0.25443234836702955, "grad_norm": 1.622485876083374, "learning_rate": 2.955e-05, "log_odds_chosen": 1.3102138042449951, "log_odds_ratio": -0.38241660594940186, "logits/chosen": 0.22542864084243774, "logits/rejected": 1.7303202152252197, "logps/chosen": -0.8405523300170898, "logps/rejected": -1.6514477729797363, "loss": 0.5552, "nll_loss": 0.517005980014801, "rewards/accuracies": 1.0, "rewards/chosen": -0.08405523747205734, "rewards/margins": 0.08108954131603241, "rewards/rejected": -0.16514477133750916, "step": 409 }, { "epoch": 0.25505443234836706, "grad_norm": 0.34497782588005066, "learning_rate": 2.95e-05, "log_odds_chosen": 1.3162719011306763, "log_odds_ratio": -0.27363449335098267, "logits/chosen": 0.6687554121017456, "logits/rejected": 1.8202764987945557, "logps/chosen": -0.6283218860626221, "logps/rejected": -1.4075915813446045, "loss": 0.6224, "nll_loss": 0.5950397253036499, "rewards/accuracies": 1.0, "rewards/chosen": -0.06283218413591385, "rewards/margins": 0.07792697846889496, "rewards/rejected": -0.1407591551542282, "step": 410 }, { "epoch": 0.2556765163297045, "grad_norm": 0.666723906993866, "learning_rate": 2.945e-05, "log_odds_chosen": 0.7361310124397278, "log_odds_ratio": -0.4306948482990265, "logits/chosen": 1.0494723320007324, "logits/rejected": 1.2754552364349365, "logps/chosen": -1.0213119983673096, "logps/rejected": -1.5761754512786865, "loss": 0.559, "nll_loss": 0.5158807039260864, "rewards/accuracies": 1.0, "rewards/chosen": -0.102131187915802, "rewards/margins": 0.05548635125160217, "rewards/rejected": -0.15761755406856537, "step": 411 }, { "epoch": 0.256298600311042, "grad_norm": 0.4294110834598541, "learning_rate": 2.94e-05, "log_odds_chosen": 0.5257182121276855, "log_odds_ratio": -0.5099442005157471, "logits/chosen": 3.1594438552856445, "logits/rejected": 1.7579195499420166, "logps/chosen": -0.9839767217636108, "logps/rejected": -1.3301349878311157, "loss": 1.0331, "nll_loss": 0.9820601344108582, "rewards/accuracies": 0.875, "rewards/chosen": -0.09839767217636108, "rewards/margins": 0.03461581841111183, "rewards/rejected": -0.13301348686218262, "step": 412 }, { "epoch": 0.25692068429237946, "grad_norm": 0.3066917955875397, "learning_rate": 2.935e-05, "log_odds_chosen": 0.9262756109237671, "log_odds_ratio": -0.36216309666633606, "logits/chosen": 0.2067386507987976, "logits/rejected": 0.22625547647476196, "logps/chosen": -1.0756921768188477, "logps/rejected": -1.7890634536743164, "loss": 0.529, "nll_loss": 0.4928092360496521, "rewards/accuracies": 0.875, "rewards/chosen": -0.10756923258304596, "rewards/margins": 0.07133711129426956, "rewards/rejected": -0.17890633642673492, "step": 413 }, { "epoch": 0.25754276827371697, "grad_norm": 0.32451072335243225, "learning_rate": 2.93e-05, "log_odds_chosen": 1.0704395771026611, "log_odds_ratio": -0.35353147983551025, "logits/chosen": 1.414031982421875, "logits/rejected": 1.678096890449524, "logps/chosen": -0.946209192276001, "logps/rejected": -1.6878459453582764, "loss": 0.695, "nll_loss": 0.6596387624740601, "rewards/accuracies": 0.75, "rewards/chosen": -0.09462092816829681, "rewards/margins": 0.07416368275880814, "rewards/rejected": -0.16878461837768555, "step": 414 }, { "epoch": 0.2581648522550544, "grad_norm": 0.30788925290107727, "learning_rate": 2.925e-05, "log_odds_chosen": 0.5195921659469604, "log_odds_ratio": -0.5618450045585632, "logits/chosen": 0.8189148306846619, "logits/rejected": 1.116391897201538, "logps/chosen": -0.992274284362793, "logps/rejected": -1.413496494293213, "loss": 0.6824, "nll_loss": 0.6262446641921997, "rewards/accuracies": 0.75, "rewards/chosen": -0.0992274284362793, "rewards/margins": 0.04212221875786781, "rewards/rejected": -0.1413496434688568, "step": 415 }, { "epoch": 0.25878693623639193, "grad_norm": 0.34574657678604126, "learning_rate": 2.9199999999999998e-05, "log_odds_chosen": 0.9739059209823608, "log_odds_ratio": -0.34965360164642334, "logits/chosen": 0.7820599675178528, "logits/rejected": 1.1054322719573975, "logps/chosen": -1.1314696073532104, "logps/rejected": -1.8759431838989258, "loss": 0.7164, "nll_loss": 0.6814556121826172, "rewards/accuracies": 0.875, "rewards/chosen": -0.11314696073532104, "rewards/margins": 0.07444734871387482, "rewards/rejected": -0.18759430944919586, "step": 416 }, { "epoch": 0.2594090202177294, "grad_norm": 0.5183652639389038, "learning_rate": 2.915e-05, "log_odds_chosen": 0.8738367557525635, "log_odds_ratio": -0.47149088978767395, "logits/chosen": -0.39875268936157227, "logits/rejected": 1.5387773513793945, "logps/chosen": -1.315935730934143, "logps/rejected": -1.958770513534546, "loss": 0.562, "nll_loss": 0.5148109197616577, "rewards/accuracies": 0.875, "rewards/chosen": -0.13159358501434326, "rewards/margins": 0.06428346782922745, "rewards/rejected": -0.1958770453929901, "step": 417 }, { "epoch": 0.2600311041990669, "grad_norm": 0.35823681950569153, "learning_rate": 2.91e-05, "log_odds_chosen": 0.8519835472106934, "log_odds_ratio": -0.39780083298683167, "logits/chosen": 2.6089086532592773, "logits/rejected": 2.7018566131591797, "logps/chosen": -0.9135912656784058, "logps/rejected": -1.5112643241882324, "loss": 0.8546, "nll_loss": 0.8147870302200317, "rewards/accuracies": 0.875, "rewards/chosen": -0.09135912358760834, "rewards/margins": 0.05976732447743416, "rewards/rejected": -0.1511264443397522, "step": 418 }, { "epoch": 0.26065318818040434, "grad_norm": 0.48351654410362244, "learning_rate": 2.9049999999999998e-05, "log_odds_chosen": 1.4411815404891968, "log_odds_ratio": -0.2508581578731537, "logits/chosen": 1.8507972955703735, "logits/rejected": 2.76145339012146, "logps/chosen": -1.100071668624878, "logps/rejected": -2.2368063926696777, "loss": 0.5349, "nll_loss": 0.5098253488540649, "rewards/accuracies": 1.0, "rewards/chosen": -0.11000718176364899, "rewards/margins": 0.11367346346378326, "rewards/rejected": -0.22368063032627106, "step": 419 }, { "epoch": 0.26127527216174184, "grad_norm": 0.33516889810562134, "learning_rate": 2.9e-05, "log_odds_chosen": 1.056069016456604, "log_odds_ratio": -0.4038728177547455, "logits/chosen": 1.260915756225586, "logits/rejected": 2.18727970123291, "logps/chosen": -0.9167536497116089, "logps/rejected": -1.6653428077697754, "loss": 0.5633, "nll_loss": 0.5229440927505493, "rewards/accuracies": 0.875, "rewards/chosen": -0.09167537093162537, "rewards/margins": 0.07485891878604889, "rewards/rejected": -0.16653428971767426, "step": 420 }, { "epoch": 0.2618973561430793, "grad_norm": 0.3093640208244324, "learning_rate": 2.895e-05, "log_odds_chosen": 0.41482430696487427, "log_odds_ratio": -0.5347127914428711, "logits/chosen": 2.2311692237854004, "logits/rejected": 2.891629219055176, "logps/chosen": -1.0031148195266724, "logps/rejected": -1.3123852014541626, "loss": 0.8781, "nll_loss": 0.8245925307273865, "rewards/accuracies": 0.75, "rewards/chosen": -0.10031148046255112, "rewards/margins": 0.030927037820219994, "rewards/rejected": -0.13123852014541626, "step": 421 }, { "epoch": 0.2625194401244168, "grad_norm": 0.3477174639701843, "learning_rate": 2.8899999999999998e-05, "log_odds_chosen": 1.0467660427093506, "log_odds_ratio": -0.34793394804000854, "logits/chosen": 2.9287118911743164, "logits/rejected": 3.672017812728882, "logps/chosen": -0.987645149230957, "logps/rejected": -1.7407939434051514, "loss": 0.8457, "nll_loss": 0.8109293580055237, "rewards/accuracies": 1.0, "rewards/chosen": -0.09876450896263123, "rewards/margins": 0.07531488686800003, "rewards/rejected": -0.17407938838005066, "step": 422 }, { "epoch": 0.26314152410575425, "grad_norm": 0.30915936827659607, "learning_rate": 2.885e-05, "log_odds_chosen": 0.5334398746490479, "log_odds_ratio": -0.6047460436820984, "logits/chosen": 1.0569583177566528, "logits/rejected": 2.308332681655884, "logps/chosen": -1.227040410041809, "logps/rejected": -1.577738642692566, "loss": 0.5431, "nll_loss": 0.48261669278144836, "rewards/accuracies": 0.5, "rewards/chosen": -0.12270405143499374, "rewards/margins": 0.03506981581449509, "rewards/rejected": -0.15777386724948883, "step": 423 }, { "epoch": 0.26376360808709176, "grad_norm": 0.2928369641304016, "learning_rate": 2.88e-05, "log_odds_chosen": 1.6705491542816162, "log_odds_ratio": -0.23528079688549042, "logits/chosen": 1.2303978204727173, "logits/rejected": 2.6390182971954346, "logps/chosen": -0.7747675180435181, "logps/rejected": -2.0271377563476562, "loss": 0.6079, "nll_loss": 0.5843520760536194, "rewards/accuracies": 1.0, "rewards/chosen": -0.07747675478458405, "rewards/margins": 0.12523701786994934, "rewards/rejected": -0.20271378755569458, "step": 424 }, { "epoch": 0.2643856920684292, "grad_norm": 0.31654441356658936, "learning_rate": 2.8749999999999997e-05, "log_odds_chosen": 1.7006933689117432, "log_odds_ratio": -0.2859562039375305, "logits/chosen": 1.474023461341858, "logits/rejected": 2.0397026538848877, "logps/chosen": -1.0018465518951416, "logps/rejected": -2.3935539722442627, "loss": 0.7213, "nll_loss": 0.6926729083061218, "rewards/accuracies": 0.875, "rewards/chosen": -0.10018465667963028, "rewards/margins": 0.13917075097560883, "rewards/rejected": -0.2393554002046585, "step": 425 }, { "epoch": 0.2650077760497667, "grad_norm": 1.583665370941162, "learning_rate": 2.87e-05, "log_odds_chosen": 0.9322236776351929, "log_odds_ratio": -0.6919344663619995, "logits/chosen": 2.5925381183624268, "logits/rejected": 3.1540775299072266, "logps/chosen": -1.319345474243164, "logps/rejected": -1.910394310951233, "loss": 0.8361, "nll_loss": 0.7669383883476257, "rewards/accuracies": 0.875, "rewards/chosen": -0.13193455338478088, "rewards/margins": 0.059104882180690765, "rewards/rejected": -0.19103942811489105, "step": 426 }, { "epoch": 0.2656298600311042, "grad_norm": 0.9140015244483948, "learning_rate": 2.865e-05, "log_odds_chosen": 0.03581523895263672, "log_odds_ratio": -0.8274832963943481, "logits/chosen": 1.621453046798706, "logits/rejected": 2.172755241394043, "logps/chosen": -1.689666748046875, "logps/rejected": -1.6490963697433472, "loss": 0.7067, "nll_loss": 0.6239080429077148, "rewards/accuracies": 0.5, "rewards/chosen": -0.16896668076515198, "rewards/margins": -0.004057048819959164, "rewards/rejected": -0.16490963101387024, "step": 427 }, { "epoch": 0.2662519440124417, "grad_norm": 0.32180896401405334, "learning_rate": 2.86e-05, "log_odds_chosen": 0.9865052103996277, "log_odds_ratio": -0.41136422753334045, "logits/chosen": 2.8309919834136963, "logits/rejected": 3.1370339393615723, "logps/chosen": -0.9552088379859924, "logps/rejected": -1.727388858795166, "loss": 0.8813, "nll_loss": 0.84019935131073, "rewards/accuracies": 0.75, "rewards/chosen": -0.09552087634801865, "rewards/margins": 0.07721800357103348, "rewards/rejected": -0.17273887991905212, "step": 428 }, { "epoch": 0.2668740279937792, "grad_norm": 0.4916917085647583, "learning_rate": 2.855e-05, "log_odds_chosen": 1.0642729997634888, "log_odds_ratio": -0.4081045091152191, "logits/chosen": 1.5744514465332031, "logits/rejected": 2.353875160217285, "logps/chosen": -1.2089930772781372, "logps/rejected": -2.0792760848999023, "loss": 0.7548, "nll_loss": 0.7139753103256226, "rewards/accuracies": 0.75, "rewards/chosen": -0.12089931219816208, "rewards/margins": 0.08702830970287323, "rewards/rejected": -0.2079276144504547, "step": 429 }, { "epoch": 0.26749611197511663, "grad_norm": 0.34557002782821655, "learning_rate": 2.8499999999999998e-05, "log_odds_chosen": 1.0231351852416992, "log_odds_ratio": -0.4096643626689911, "logits/chosen": 1.2953555583953857, "logits/rejected": 1.8278127908706665, "logps/chosen": -0.9621484279632568, "logps/rejected": -1.7493982315063477, "loss": 0.6828, "nll_loss": 0.6418822407722473, "rewards/accuracies": 0.75, "rewards/chosen": -0.09621484577655792, "rewards/margins": 0.07872497290372849, "rewards/rejected": -0.174939826130867, "step": 430 }, { "epoch": 0.26811819595645414, "grad_norm": 0.26518648862838745, "learning_rate": 2.845e-05, "log_odds_chosen": 0.6238207221031189, "log_odds_ratio": -0.5502378940582275, "logits/chosen": 0.34324905276298523, "logits/rejected": 1.6478018760681152, "logps/chosen": -0.7102484703063965, "logps/rejected": -1.1375188827514648, "loss": 0.5392, "nll_loss": 0.484172523021698, "rewards/accuracies": 0.75, "rewards/chosen": -0.07102484256029129, "rewards/margins": 0.04272705316543579, "rewards/rejected": -0.11375189572572708, "step": 431 }, { "epoch": 0.2687402799377916, "grad_norm": 0.5316938757896423, "learning_rate": 2.84e-05, "log_odds_chosen": 0.8510596752166748, "log_odds_ratio": -0.43761295080184937, "logits/chosen": 0.8524940013885498, "logits/rejected": 2.9505248069763184, "logps/chosen": -1.0910060405731201, "logps/rejected": -1.6920232772827148, "loss": 0.635, "nll_loss": 0.5912693738937378, "rewards/accuracies": 0.75, "rewards/chosen": -0.10910061001777649, "rewards/margins": 0.06010172516107559, "rewards/rejected": -0.16920232772827148, "step": 432 }, { "epoch": 0.2693623639191291, "grad_norm": 0.43368545174598694, "learning_rate": 2.8349999999999998e-05, "log_odds_chosen": 0.9525305032730103, "log_odds_ratio": -0.48337578773498535, "logits/chosen": 1.1380829811096191, "logits/rejected": 2.6026346683502197, "logps/chosen": -0.9431745409965515, "logps/rejected": -1.4990016222000122, "loss": 0.6376, "nll_loss": 0.5892944931983948, "rewards/accuracies": 0.875, "rewards/chosen": -0.09431745111942291, "rewards/margins": 0.05558270961046219, "rewards/rejected": -0.1499001681804657, "step": 433 }, { "epoch": 0.26998444790046655, "grad_norm": 0.42278578877449036, "learning_rate": 2.83e-05, "log_odds_chosen": 1.4672513008117676, "log_odds_ratio": -0.39815062284469604, "logits/chosen": 1.3916385173797607, "logits/rejected": 1.4025530815124512, "logps/chosen": -1.2792010307312012, "logps/rejected": -2.5439209938049316, "loss": 0.6265, "nll_loss": 0.5867272019386292, "rewards/accuracies": 0.875, "rewards/chosen": -0.12792009115219116, "rewards/margins": 0.12647199630737305, "rewards/rejected": -0.2543920874595642, "step": 434 }, { "epoch": 0.27060653188180406, "grad_norm": 0.3120807707309723, "learning_rate": 2.825e-05, "log_odds_chosen": 3.2113146781921387, "log_odds_ratio": -0.19626906514167786, "logits/chosen": 1.3311731815338135, "logits/rejected": 2.3478856086730957, "logps/chosen": -1.103269338607788, "logps/rejected": -3.982461929321289, "loss": 0.6981, "nll_loss": 0.6784613132476807, "rewards/accuracies": 1.0, "rewards/chosen": -0.11032693833112717, "rewards/margins": 0.2879192531108856, "rewards/rejected": -0.3982461988925934, "step": 435 }, { "epoch": 0.2712286158631415, "grad_norm": 0.38851398229599, "learning_rate": 2.8199999999999998e-05, "log_odds_chosen": 0.159065380692482, "log_odds_ratio": -0.6263766288757324, "logits/chosen": 2.574674606323242, "logits/rejected": 3.693694591522217, "logps/chosen": -1.2162128686904907, "logps/rejected": -1.3283966779708862, "loss": 0.8492, "nll_loss": 0.7865930199623108, "rewards/accuracies": 0.75, "rewards/chosen": -0.12162129580974579, "rewards/margins": 0.011218377389013767, "rewards/rejected": -0.13283966481685638, "step": 436 }, { "epoch": 0.271850699844479, "grad_norm": 0.2566005289554596, "learning_rate": 2.815e-05, "log_odds_chosen": 1.7677913904190063, "log_odds_ratio": -0.20369936525821686, "logits/chosen": 1.7614707946777344, "logits/rejected": 1.692598819732666, "logps/chosen": -0.8401159048080444, "logps/rejected": -2.08687162399292, "loss": 0.6384, "nll_loss": 0.6180017590522766, "rewards/accuracies": 0.875, "rewards/chosen": -0.08401159197092056, "rewards/margins": 0.12467555701732635, "rewards/rejected": -0.20868715643882751, "step": 437 }, { "epoch": 0.27247278382581647, "grad_norm": 0.3621348738670349, "learning_rate": 2.8100000000000005e-05, "log_odds_chosen": 1.5565158128738403, "log_odds_ratio": -0.3029978573322296, "logits/chosen": 1.124603271484375, "logits/rejected": 1.5136412382125854, "logps/chosen": -0.9071807861328125, "logps/rejected": -1.9489789009094238, "loss": 0.7149, "nll_loss": 0.684585452079773, "rewards/accuracies": 0.875, "rewards/chosen": -0.09071807563304901, "rewards/margins": 0.10417980700731277, "rewards/rejected": -0.19489789009094238, "step": 438 }, { "epoch": 0.273094867807154, "grad_norm": 0.26659613847732544, "learning_rate": 2.8050000000000004e-05, "log_odds_chosen": 1.9500977993011475, "log_odds_ratio": -0.4435105323791504, "logits/chosen": 0.07519307732582092, "logits/rejected": 1.447799563407898, "logps/chosen": -0.751785159111023, "logps/rejected": -2.4592161178588867, "loss": 0.5007, "nll_loss": 0.45637768507003784, "rewards/accuracies": 0.625, "rewards/chosen": -0.07517851889133453, "rewards/margins": 0.17074309289455414, "rewards/rejected": -0.24592161178588867, "step": 439 }, { "epoch": 0.2737169517884914, "grad_norm": 0.39172080159187317, "learning_rate": 2.8000000000000003e-05, "log_odds_chosen": 1.3814113140106201, "log_odds_ratio": -0.27988070249557495, "logits/chosen": 1.5671734809875488, "logits/rejected": 2.607694625854492, "logps/chosen": -1.35719633102417, "logps/rejected": -2.5071897506713867, "loss": 0.7241, "nll_loss": 0.6961413621902466, "rewards/accuracies": 1.0, "rewards/chosen": -0.1357196420431137, "rewards/margins": 0.11499933898448944, "rewards/rejected": -0.25071898102760315, "step": 440 }, { "epoch": 0.27433903576982893, "grad_norm": 0.333368718624115, "learning_rate": 2.7950000000000005e-05, "log_odds_chosen": 2.5234973430633545, "log_odds_ratio": -0.2566179037094116, "logits/chosen": 0.35800909996032715, "logits/rejected": 0.616550087928772, "logps/chosen": -0.9834619760513306, "logps/rejected": -3.113126516342163, "loss": 0.5216, "nll_loss": 0.49595892429351807, "rewards/accuracies": 0.875, "rewards/chosen": -0.09834619611501694, "rewards/margins": 0.2129664272069931, "rewards/rejected": -0.31131264567375183, "step": 441 }, { "epoch": 0.2749611197511664, "grad_norm": 0.4109332263469696, "learning_rate": 2.7900000000000004e-05, "log_odds_chosen": 1.012038230895996, "log_odds_ratio": -0.5099627375602722, "logits/chosen": 1.473235845565796, "logits/rejected": 3.1542019844055176, "logps/chosen": -1.083565592765808, "logps/rejected": -1.784134864807129, "loss": 0.6504, "nll_loss": 0.5994206070899963, "rewards/accuracies": 0.75, "rewards/chosen": -0.10835656523704529, "rewards/margins": 0.07005693018436432, "rewards/rejected": -0.1784134954214096, "step": 442 }, { "epoch": 0.2755832037325039, "grad_norm": 0.2587113678455353, "learning_rate": 2.7850000000000003e-05, "log_odds_chosen": 1.4472953081130981, "log_odds_ratio": -0.35538187623023987, "logits/chosen": 1.4479413032531738, "logits/rejected": 1.8385202884674072, "logps/chosen": -0.7116351127624512, "logps/rejected": -1.5945403575897217, "loss": 0.7645, "nll_loss": 0.7289303541183472, "rewards/accuracies": 0.75, "rewards/chosen": -0.07116350531578064, "rewards/margins": 0.08829053491353989, "rewards/rejected": -0.15945404767990112, "step": 443 }, { "epoch": 0.27620528771384134, "grad_norm": 0.34995758533477783, "learning_rate": 2.7800000000000005e-05, "log_odds_chosen": 1.9064420461654663, "log_odds_ratio": -0.24451762437820435, "logits/chosen": 2.46639347076416, "logits/rejected": 2.436615467071533, "logps/chosen": -0.9964478015899658, "logps/rejected": -2.5733470916748047, "loss": 0.8252, "nll_loss": 0.8007709980010986, "rewards/accuracies": 1.0, "rewards/chosen": -0.09964478015899658, "rewards/margins": 0.15768994390964508, "rewards/rejected": -0.25733470916748047, "step": 444 }, { "epoch": 0.27682737169517885, "grad_norm": 0.35382080078125, "learning_rate": 2.7750000000000004e-05, "log_odds_chosen": 1.689176082611084, "log_odds_ratio": -0.306673526763916, "logits/chosen": 2.0533225536346436, "logits/rejected": 2.4105100631713867, "logps/chosen": -0.6672767996788025, "logps/rejected": -1.78840172290802, "loss": 0.6612, "nll_loss": 0.6305685043334961, "rewards/accuracies": 1.0, "rewards/chosen": -0.06672768294811249, "rewards/margins": 0.11211249232292175, "rewards/rejected": -0.17884017527103424, "step": 445 }, { "epoch": 0.27744945567651635, "grad_norm": 13.6814603805542, "learning_rate": 2.7700000000000002e-05, "log_odds_chosen": 1.1355578899383545, "log_odds_ratio": -0.4099384546279907, "logits/chosen": 0.387063205242157, "logits/rejected": 1.498714566230774, "logps/chosen": -1.0867677927017212, "logps/rejected": -1.9361159801483154, "loss": 0.6092, "nll_loss": 0.5681792497634888, "rewards/accuracies": 0.875, "rewards/chosen": -0.10867678374052048, "rewards/margins": 0.08493481576442719, "rewards/rejected": -0.19361159205436707, "step": 446 }, { "epoch": 0.2780715396578538, "grad_norm": 10.337038040161133, "learning_rate": 2.7650000000000005e-05, "log_odds_chosen": 0.6619716286659241, "log_odds_ratio": -0.4907890260219574, "logits/chosen": 1.320190668106079, "logits/rejected": 1.9713841676712036, "logps/chosen": -0.9940346479415894, "logps/rejected": -1.3662760257720947, "loss": 0.6622, "nll_loss": 0.6130806803703308, "rewards/accuracies": 0.75, "rewards/chosen": -0.09940346330404282, "rewards/margins": 0.037224140018224716, "rewards/rejected": -0.13662761449813843, "step": 447 }, { "epoch": 0.2786936236391913, "grad_norm": 0.44021517038345337, "learning_rate": 2.7600000000000003e-05, "log_odds_chosen": 1.695500135421753, "log_odds_ratio": -0.2624688148498535, "logits/chosen": 1.6216803789138794, "logits/rejected": 1.5513224601745605, "logps/chosen": -0.8733179569244385, "logps/rejected": -2.162715435028076, "loss": 0.5369, "nll_loss": 0.5106138586997986, "rewards/accuracies": 0.875, "rewards/chosen": -0.08733180165290833, "rewards/margins": 0.12893976271152496, "rewards/rejected": -0.2162715643644333, "step": 448 }, { "epoch": 0.27931570762052876, "grad_norm": 0.32137152552604675, "learning_rate": 2.7550000000000002e-05, "log_odds_chosen": 0.7980210185050964, "log_odds_ratio": -0.4316975474357605, "logits/chosen": 0.15938672423362732, "logits/rejected": 1.6466835737228394, "logps/chosen": -0.824579656124115, "logps/rejected": -1.3830028772354126, "loss": 0.5517, "nll_loss": 0.5084915161132812, "rewards/accuracies": 0.875, "rewards/chosen": -0.08245796710252762, "rewards/margins": 0.055842325091362, "rewards/rejected": -0.13830029964447021, "step": 449 }, { "epoch": 0.27993779160186627, "grad_norm": 0.2964528501033783, "learning_rate": 2.7500000000000004e-05, "log_odds_chosen": 1.321192741394043, "log_odds_ratio": -0.3193834722042084, "logits/chosen": 0.9817930459976196, "logits/rejected": 1.3354594707489014, "logps/chosen": -0.9670354127883911, "logps/rejected": -1.8899989128112793, "loss": 0.4508, "nll_loss": 0.4188663959503174, "rewards/accuracies": 0.875, "rewards/chosen": -0.09670353680849075, "rewards/margins": 0.09229634702205658, "rewards/rejected": -0.18899987637996674, "step": 450 }, { "epoch": 0.2805598755832037, "grad_norm": 0.29384738206863403, "learning_rate": 2.7450000000000003e-05, "log_odds_chosen": 1.4085745811462402, "log_odds_ratio": -0.2883527874946594, "logits/chosen": 0.6253018379211426, "logits/rejected": 1.5829182863235474, "logps/chosen": -1.206660270690918, "logps/rejected": -2.262795925140381, "loss": 0.5474, "nll_loss": 0.5185524225234985, "rewards/accuracies": 1.0, "rewards/chosen": -0.1206660345196724, "rewards/margins": 0.10561355948448181, "rewards/rejected": -0.2262795865535736, "step": 451 }, { "epoch": 0.28118195956454123, "grad_norm": 0.30835622549057007, "learning_rate": 2.7400000000000002e-05, "log_odds_chosen": 0.7946129441261292, "log_odds_ratio": -0.40670210123062134, "logits/chosen": 0.3787962794303894, "logits/rejected": 1.9494355916976929, "logps/chosen": -0.9770557880401611, "logps/rejected": -1.5579826831817627, "loss": 0.531, "nll_loss": 0.4903002083301544, "rewards/accuracies": 0.875, "rewards/chosen": -0.09770557284355164, "rewards/margins": 0.05809270963072777, "rewards/rejected": -0.1557982712984085, "step": 452 }, { "epoch": 0.2818040435458787, "grad_norm": 0.400429368019104, "learning_rate": 2.7350000000000004e-05, "log_odds_chosen": 2.4994308948516846, "log_odds_ratio": -0.2668745219707489, "logits/chosen": 1.045554280281067, "logits/rejected": 2.1419525146484375, "logps/chosen": -1.122701644897461, "logps/rejected": -3.259110689163208, "loss": 0.7045, "nll_loss": 0.6777728796005249, "rewards/accuracies": 0.875, "rewards/chosen": -0.11227016150951385, "rewards/margins": 0.21364091336727142, "rewards/rejected": -0.32591110467910767, "step": 453 }, { "epoch": 0.2824261275272162, "grad_norm": 0.40336883068084717, "learning_rate": 2.7300000000000003e-05, "log_odds_chosen": 1.403609275817871, "log_odds_ratio": -0.2870773673057556, "logits/chosen": 0.7189739942550659, "logits/rejected": 2.4073359966278076, "logps/chosen": -0.8193572759628296, "logps/rejected": -1.7679626941680908, "loss": 0.5697, "nll_loss": 0.5410056114196777, "rewards/accuracies": 1.0, "rewards/chosen": -0.08193572610616684, "rewards/margins": 0.09486055374145508, "rewards/rejected": -0.17679627239704132, "step": 454 }, { "epoch": 0.28304821150855364, "grad_norm": 0.278027206659317, "learning_rate": 2.725e-05, "log_odds_chosen": 2.407792568206787, "log_odds_ratio": -0.10633019357919693, "logits/chosen": 0.24619188904762268, "logits/rejected": 1.2273669242858887, "logps/chosen": -0.9152811765670776, "logps/rejected": -2.8626348972320557, "loss": 0.4933, "nll_loss": 0.4826865792274475, "rewards/accuracies": 1.0, "rewards/chosen": -0.09152812510728836, "rewards/margins": 0.19473537802696228, "rewards/rejected": -0.28626352548599243, "step": 455 }, { "epoch": 0.28367029548989114, "grad_norm": 0.3393852710723877, "learning_rate": 2.7200000000000004e-05, "log_odds_chosen": 0.6036549806594849, "log_odds_ratio": -0.5104788541793823, "logits/chosen": 1.5058778524398804, "logits/rejected": 2.87321400642395, "logps/chosen": -1.0817363262176514, "logps/rejected": -1.4985532760620117, "loss": 0.7031, "nll_loss": 0.6520944237709045, "rewards/accuracies": 0.625, "rewards/chosen": -0.10817363858222961, "rewards/margins": 0.04168170318007469, "rewards/rejected": -0.1498553454875946, "step": 456 }, { "epoch": 0.2842923794712286, "grad_norm": 0.3408838212490082, "learning_rate": 2.7150000000000003e-05, "log_odds_chosen": 0.8742717504501343, "log_odds_ratio": -0.43518275022506714, "logits/chosen": 2.0727810859680176, "logits/rejected": 2.7927446365356445, "logps/chosen": -1.0099351406097412, "logps/rejected": -1.6576815843582153, "loss": 0.8319, "nll_loss": 0.7883896827697754, "rewards/accuracies": 0.75, "rewards/chosen": -0.10099352151155472, "rewards/margins": 0.06477463990449905, "rewards/rejected": -0.16576816141605377, "step": 457 }, { "epoch": 0.2849144634525661, "grad_norm": 0.47845590114593506, "learning_rate": 2.7100000000000005e-05, "log_odds_chosen": 0.9391593933105469, "log_odds_ratio": -0.6174224019050598, "logits/chosen": 2.785203456878662, "logits/rejected": 2.1296370029449463, "logps/chosen": -0.9823821187019348, "logps/rejected": -1.7206125259399414, "loss": 0.9131, "nll_loss": 0.8513160347938538, "rewards/accuracies": 0.625, "rewards/chosen": -0.09823821485042572, "rewards/margins": 0.07382305711507797, "rewards/rejected": -0.1720612645149231, "step": 458 }, { "epoch": 0.28553654743390355, "grad_norm": 1.0342097282409668, "learning_rate": 2.7050000000000004e-05, "log_odds_chosen": 2.006476879119873, "log_odds_ratio": -0.3376705050468445, "logits/chosen": 0.40670859813690186, "logits/rejected": 2.496821880340576, "logps/chosen": -0.828924834728241, "logps/rejected": -2.3141822814941406, "loss": 0.565, "nll_loss": 0.5311979651451111, "rewards/accuracies": 0.75, "rewards/chosen": -0.08289249241352081, "rewards/margins": 0.14852574467658997, "rewards/rejected": -0.23141823709011078, "step": 459 }, { "epoch": 0.28615863141524106, "grad_norm": 0.29952242970466614, "learning_rate": 2.7000000000000002e-05, "log_odds_chosen": 2.3515138626098633, "log_odds_ratio": -0.279621422290802, "logits/chosen": 0.9336141347885132, "logits/rejected": 1.1252137422561646, "logps/chosen": -0.8351043462753296, "logps/rejected": -2.546229600906372, "loss": 0.6218, "nll_loss": 0.5937999486923218, "rewards/accuracies": 0.875, "rewards/chosen": -0.08351044356822968, "rewards/margins": 0.17111250758171082, "rewards/rejected": -0.2546229362487793, "step": 460 }, { "epoch": 0.2867807153965785, "grad_norm": 0.3982970416545868, "learning_rate": 2.6950000000000005e-05, "log_odds_chosen": 1.8943520784378052, "log_odds_ratio": -0.33027487993240356, "logits/chosen": 1.9241790771484375, "logits/rejected": 2.623364210128784, "logps/chosen": -1.0681153535842896, "logps/rejected": -2.736832857131958, "loss": 0.715, "nll_loss": 0.6819977760314941, "rewards/accuracies": 0.875, "rewards/chosen": -0.1068115383386612, "rewards/margins": 0.16687177121639252, "rewards/rejected": -0.2736833095550537, "step": 461 }, { "epoch": 0.287402799377916, "grad_norm": 0.5346242785453796, "learning_rate": 2.6900000000000003e-05, "log_odds_chosen": 0.7686117887496948, "log_odds_ratio": -0.7439150810241699, "logits/chosen": 1.9250644445419312, "logits/rejected": 2.6550610065460205, "logps/chosen": -1.2357155084609985, "logps/rejected": -1.7963919639587402, "loss": 0.7496, "nll_loss": 0.6752496957778931, "rewards/accuracies": 0.5, "rewards/chosen": -0.12357156723737717, "rewards/margins": 0.056067660450935364, "rewards/rejected": -0.17963922023773193, "step": 462 }, { "epoch": 0.2880248833592535, "grad_norm": 0.39354756474494934, "learning_rate": 2.6850000000000002e-05, "log_odds_chosen": 1.5090014934539795, "log_odds_ratio": -0.3865567445755005, "logits/chosen": 1.0894285440444946, "logits/rejected": 1.4281337261199951, "logps/chosen": -1.1414062976837158, "logps/rejected": -2.367482900619507, "loss": 0.6988, "nll_loss": 0.6601517200469971, "rewards/accuracies": 0.875, "rewards/chosen": -0.11414062976837158, "rewards/margins": 0.12260768562555313, "rewards/rejected": -0.23674830794334412, "step": 463 }, { "epoch": 0.288646967340591, "grad_norm": 0.35714831948280334, "learning_rate": 2.6800000000000004e-05, "log_odds_chosen": 1.6752851009368896, "log_odds_ratio": -0.21526263654232025, "logits/chosen": 0.7329784631729126, "logits/rejected": 1.1709595918655396, "logps/chosen": -0.9770262241363525, "logps/rejected": -2.2166688442230225, "loss": 0.6376, "nll_loss": 0.6160634160041809, "rewards/accuracies": 1.0, "rewards/chosen": -0.09770262241363525, "rewards/margins": 0.12396427989006042, "rewards/rejected": -0.22166690230369568, "step": 464 }, { "epoch": 0.2892690513219285, "grad_norm": 0.3404373526573181, "learning_rate": 2.6750000000000003e-05, "log_odds_chosen": 1.9938693046569824, "log_odds_ratio": -0.2569563686847687, "logits/chosen": 2.7068655490875244, "logits/rejected": 2.8174753189086914, "logps/chosen": -0.9502182006835938, "logps/rejected": -2.4726524353027344, "loss": 0.9042, "nll_loss": 0.8784732818603516, "rewards/accuracies": 1.0, "rewards/chosen": -0.0950218141078949, "rewards/margins": 0.15224343538284302, "rewards/rejected": -0.24726524949073792, "step": 465 }, { "epoch": 0.28989113530326593, "grad_norm": 0.5440021753311157, "learning_rate": 2.6700000000000002e-05, "log_odds_chosen": 1.1913514137268066, "log_odds_ratio": -0.6887476444244385, "logits/chosen": 0.07142001390457153, "logits/rejected": 1.684450387954712, "logps/chosen": -1.438823938369751, "logps/rejected": -2.2212705612182617, "loss": 0.5957, "nll_loss": 0.5267887115478516, "rewards/accuracies": 0.75, "rewards/chosen": -0.1438823938369751, "rewards/margins": 0.07824468612670898, "rewards/rejected": -0.22212707996368408, "step": 466 }, { "epoch": 0.29051321928460344, "grad_norm": 0.2981872260570526, "learning_rate": 2.6650000000000004e-05, "log_odds_chosen": 0.7413637638092041, "log_odds_ratio": -0.46821674704551697, "logits/chosen": 0.9663263559341431, "logits/rejected": 2.2991671562194824, "logps/chosen": -0.8860517740249634, "logps/rejected": -1.3749059438705444, "loss": 0.6357, "nll_loss": 0.5888561606407166, "rewards/accuracies": 0.75, "rewards/chosen": -0.08860517293214798, "rewards/margins": 0.048885416239500046, "rewards/rejected": -0.13749060034751892, "step": 467 }, { "epoch": 0.2911353032659409, "grad_norm": 0.33800575137138367, "learning_rate": 2.6600000000000003e-05, "log_odds_chosen": 1.826517105102539, "log_odds_ratio": -0.31884247064590454, "logits/chosen": 0.21384793519973755, "logits/rejected": 1.3374475240707397, "logps/chosen": -0.8113455772399902, "logps/rejected": -2.257542610168457, "loss": 0.5751, "nll_loss": 0.5432652831077576, "rewards/accuracies": 1.0, "rewards/chosen": -0.08113455772399902, "rewards/margins": 0.14461968839168549, "rewards/rejected": -0.2257542759180069, "step": 468 }, { "epoch": 0.2917573872472784, "grad_norm": 0.9032492637634277, "learning_rate": 2.655e-05, "log_odds_chosen": 1.8913346529006958, "log_odds_ratio": -0.431749552488327, "logits/chosen": -0.06455998122692108, "logits/rejected": -0.07008242607116699, "logps/chosen": -0.9198107719421387, "logps/rejected": -2.359321355819702, "loss": 0.6571, "nll_loss": 0.613900899887085, "rewards/accuracies": 0.75, "rewards/chosen": -0.09198108315467834, "rewards/margins": 0.14395105838775635, "rewards/rejected": -0.2359321266412735, "step": 469 }, { "epoch": 0.29237947122861585, "grad_norm": 0.27374881505966187, "learning_rate": 2.6500000000000004e-05, "log_odds_chosen": 1.2873222827911377, "log_odds_ratio": -0.34891578555107117, "logits/chosen": -1.634531021118164, "logits/rejected": 1.292966604232788, "logps/chosen": -1.1532635688781738, "logps/rejected": -2.1369102001190186, "loss": 0.5003, "nll_loss": 0.4654574394226074, "rewards/accuracies": 0.75, "rewards/chosen": -0.11532635986804962, "rewards/margins": 0.09836466610431671, "rewards/rejected": -0.21369104087352753, "step": 470 }, { "epoch": 0.29300155520995336, "grad_norm": 0.5123220086097717, "learning_rate": 2.6450000000000003e-05, "log_odds_chosen": 1.2327231168746948, "log_odds_ratio": -0.3070361614227295, "logits/chosen": 1.1967523097991943, "logits/rejected": 2.225430488586426, "logps/chosen": -0.8559145927429199, "logps/rejected": -1.7130717039108276, "loss": 0.6756, "nll_loss": 0.6448498368263245, "rewards/accuracies": 1.0, "rewards/chosen": -0.08559145778417587, "rewards/margins": 0.08571571856737137, "rewards/rejected": -0.17130717635154724, "step": 471 }, { "epoch": 0.2936236391912908, "grad_norm": 0.3058389127254486, "learning_rate": 2.64e-05, "log_odds_chosen": 2.075676918029785, "log_odds_ratio": -0.3366090655326843, "logits/chosen": 1.349937915802002, "logits/rejected": 2.2221884727478027, "logps/chosen": -0.7084921598434448, "logps/rejected": -2.309854507446289, "loss": 0.6101, "nll_loss": 0.5764274597167969, "rewards/accuracies": 0.875, "rewards/chosen": -0.0708492174744606, "rewards/margins": 0.16013625264167786, "rewards/rejected": -0.23098547756671906, "step": 472 }, { "epoch": 0.2942457231726283, "grad_norm": 0.40857183933258057, "learning_rate": 2.6350000000000004e-05, "log_odds_chosen": 1.1024802923202515, "log_odds_ratio": -0.33238816261291504, "logits/chosen": -0.2755090296268463, "logits/rejected": 0.9011416435241699, "logps/chosen": -1.0526714324951172, "logps/rejected": -1.9224944114685059, "loss": 0.5391, "nll_loss": 0.5058131814002991, "rewards/accuracies": 0.875, "rewards/chosen": -0.10526715219020844, "rewards/margins": 0.08698229491710663, "rewards/rejected": -0.19224943220615387, "step": 473 }, { "epoch": 0.29486780715396577, "grad_norm": 0.31799551844596863, "learning_rate": 2.6300000000000002e-05, "log_odds_chosen": 1.5882513523101807, "log_odds_ratio": -0.3513425290584564, "logits/chosen": 0.6262343525886536, "logits/rejected": 0.8877112865447998, "logps/chosen": -0.9927375912666321, "logps/rejected": -2.278759241104126, "loss": 0.6255, "nll_loss": 0.5903184413909912, "rewards/accuracies": 0.625, "rewards/chosen": -0.09927376359701157, "rewards/margins": 0.12860216200351715, "rewards/rejected": -0.22787591814994812, "step": 474 }, { "epoch": 0.2954898911353033, "grad_norm": 0.30045124888420105, "learning_rate": 2.625e-05, "log_odds_chosen": 3.8556902408599854, "log_odds_ratio": -0.16180792450904846, "logits/chosen": 1.1640573740005493, "logits/rejected": 1.8254338502883911, "logps/chosen": -0.5255829095840454, "logps/rejected": -3.281663179397583, "loss": 0.646, "nll_loss": 0.6297725439071655, "rewards/accuracies": 0.875, "rewards/chosen": -0.0525582879781723, "rewards/margins": 0.27560803294181824, "rewards/rejected": -0.32816633582115173, "step": 475 }, { "epoch": 0.2961119751166407, "grad_norm": 0.42903876304626465, "learning_rate": 2.6200000000000003e-05, "log_odds_chosen": 1.0457801818847656, "log_odds_ratio": -0.3495453894138336, "logits/chosen": 1.600659728050232, "logits/rejected": 2.682332992553711, "logps/chosen": -0.9263206124305725, "logps/rejected": -1.708702564239502, "loss": 0.786, "nll_loss": 0.7510562539100647, "rewards/accuracies": 0.875, "rewards/chosen": -0.09263206273317337, "rewards/margins": 0.07823819667100906, "rewards/rejected": -0.17087027430534363, "step": 476 }, { "epoch": 0.29673405909797823, "grad_norm": 0.5805628895759583, "learning_rate": 2.6150000000000002e-05, "log_odds_chosen": 1.5545982122421265, "log_odds_ratio": -0.3727916479110718, "logits/chosen": 0.31747573614120483, "logits/rejected": 1.172074556350708, "logps/chosen": -1.09333074092865, "logps/rejected": -2.3875832557678223, "loss": 0.6095, "nll_loss": 0.572248101234436, "rewards/accuracies": 0.875, "rewards/chosen": -0.10933306813240051, "rewards/margins": 0.12942524254322052, "rewards/rejected": -0.23875831067562103, "step": 477 }, { "epoch": 0.2973561430793157, "grad_norm": 0.31039735674858093, "learning_rate": 2.61e-05, "log_odds_chosen": 1.6635842323303223, "log_odds_ratio": -0.3079639971256256, "logits/chosen": 2.197296142578125, "logits/rejected": 2.432086706161499, "logps/chosen": -0.9411294460296631, "logps/rejected": -2.21671199798584, "loss": 0.8491, "nll_loss": 0.8183450102806091, "rewards/accuracies": 0.875, "rewards/chosen": -0.09411294758319855, "rewards/margins": 0.12755824625492096, "rewards/rejected": -0.2216711938381195, "step": 478 }, { "epoch": 0.2979782270606532, "grad_norm": 0.3499162793159485, "learning_rate": 2.6050000000000003e-05, "log_odds_chosen": 2.1621012687683105, "log_odds_ratio": -0.23205527663230896, "logits/chosen": 2.0782124996185303, "logits/rejected": 3.038877248764038, "logps/chosen": -0.9419960379600525, "logps/rejected": -2.5322046279907227, "loss": 0.7844, "nll_loss": 0.7611782550811768, "rewards/accuracies": 0.875, "rewards/chosen": -0.09419961273670197, "rewards/margins": 0.15902084112167358, "rewards/rejected": -0.25322046875953674, "step": 479 }, { "epoch": 0.2986003110419907, "grad_norm": 0.32958048582077026, "learning_rate": 2.6000000000000002e-05, "log_odds_chosen": 1.6519575119018555, "log_odds_ratio": -0.24578575789928436, "logits/chosen": 1.0557212829589844, "logits/rejected": 2.017346143722534, "logps/chosen": -0.8723708987236023, "logps/rejected": -2.108044385910034, "loss": 0.5416, "nll_loss": 0.5170350074768066, "rewards/accuracies": 0.875, "rewards/chosen": -0.08723708987236023, "rewards/margins": 0.1235673651099205, "rewards/rejected": -0.21080444753170013, "step": 480 }, { "epoch": 0.29922239502332815, "grad_norm": 0.296722412109375, "learning_rate": 2.595e-05, "log_odds_chosen": 1.3938841819763184, "log_odds_ratio": -0.23713521659374237, "logits/chosen": 0.20026570558547974, "logits/rejected": 2.2107229232788086, "logps/chosen": -0.8148910999298096, "logps/rejected": -1.8060866594314575, "loss": 0.5741, "nll_loss": 0.5503672361373901, "rewards/accuracies": 1.0, "rewards/chosen": -0.08148910850286484, "rewards/margins": 0.09911955893039703, "rewards/rejected": -0.18060868978500366, "step": 481 }, { "epoch": 0.29984447900466565, "grad_norm": 0.3062569200992584, "learning_rate": 2.5900000000000003e-05, "log_odds_chosen": 2.3930747509002686, "log_odds_ratio": -0.28188061714172363, "logits/chosen": 1.990435242652893, "logits/rejected": 3.036588191986084, "logps/chosen": -0.8244961500167847, "logps/rejected": -2.826347827911377, "loss": 0.7671, "nll_loss": 0.738939642906189, "rewards/accuracies": 1.0, "rewards/chosen": -0.08244961500167847, "rewards/margins": 0.2001851499080658, "rewards/rejected": -0.28263476490974426, "step": 482 }, { "epoch": 0.3004665629860031, "grad_norm": 0.38664117455482483, "learning_rate": 2.585e-05, "log_odds_chosen": 1.1401886940002441, "log_odds_ratio": -0.37039393186569214, "logits/chosen": 1.838365912437439, "logits/rejected": 2.5238819122314453, "logps/chosen": -1.0199286937713623, "logps/rejected": -1.7830042839050293, "loss": 0.8311, "nll_loss": 0.7940711975097656, "rewards/accuracies": 0.75, "rewards/chosen": -0.10199287533760071, "rewards/margins": 0.07630756497383118, "rewards/rejected": -0.17830044031143188, "step": 483 }, { "epoch": 0.3010886469673406, "grad_norm": 0.5141417980194092, "learning_rate": 2.58e-05, "log_odds_chosen": 0.9313275814056396, "log_odds_ratio": -0.5068573951721191, "logits/chosen": -0.31626325845718384, "logits/rejected": 2.140399217605591, "logps/chosen": -1.3676080703735352, "logps/rejected": -2.118687152862549, "loss": 0.5733, "nll_loss": 0.5226230621337891, "rewards/accuracies": 0.625, "rewards/chosen": -0.13676080107688904, "rewards/margins": 0.07510790228843689, "rewards/rejected": -0.21186870336532593, "step": 484 }, { "epoch": 0.30171073094867806, "grad_norm": 0.3580343425273895, "learning_rate": 2.5750000000000002e-05, "log_odds_chosen": 1.8977057933807373, "log_odds_ratio": -0.39567553997039795, "logits/chosen": 1.289947748184204, "logits/rejected": 1.3862617015838623, "logps/chosen": -0.8238151669502258, "logps/rejected": -2.428439140319824, "loss": 0.6119, "nll_loss": 0.572327733039856, "rewards/accuracies": 0.625, "rewards/chosen": -0.08238151669502258, "rewards/margins": 0.1604623943567276, "rewards/rejected": -0.24284391105175018, "step": 485 }, { "epoch": 0.30233281493001557, "grad_norm": 0.4157785177230835, "learning_rate": 2.57e-05, "log_odds_chosen": 1.6333980560302734, "log_odds_ratio": -0.2904350161552429, "logits/chosen": 1.140645146369934, "logits/rejected": 1.8697725534439087, "logps/chosen": -0.847370982170105, "logps/rejected": -1.8554041385650635, "loss": 0.6347, "nll_loss": 0.6056182384490967, "rewards/accuracies": 0.875, "rewards/chosen": -0.08473709970712662, "rewards/margins": 0.10080333054065704, "rewards/rejected": -0.18554043769836426, "step": 486 }, { "epoch": 0.302954898911353, "grad_norm": 0.4197591543197632, "learning_rate": 2.5650000000000003e-05, "log_odds_chosen": 1.9002084732055664, "log_odds_ratio": -0.3634560704231262, "logits/chosen": 1.3721474409103394, "logits/rejected": 2.3936052322387695, "logps/chosen": -0.7132107615470886, "logps/rejected": -2.0016989707946777, "loss": 0.622, "nll_loss": 0.5856096744537354, "rewards/accuracies": 0.875, "rewards/chosen": -0.07132107764482498, "rewards/margins": 0.1288488358259201, "rewards/rejected": -0.2001699060201645, "step": 487 }, { "epoch": 0.30357698289269053, "grad_norm": 0.3078671395778656, "learning_rate": 2.5600000000000002e-05, "log_odds_chosen": 1.5222787857055664, "log_odds_ratio": -0.25322094559669495, "logits/chosen": 0.909111738204956, "logits/rejected": 1.0842841863632202, "logps/chosen": -0.8780727982521057, "logps/rejected": -1.9370026588439941, "loss": 0.6661, "nll_loss": 0.6408250331878662, "rewards/accuracies": 1.0, "rewards/chosen": -0.08780728280544281, "rewards/margins": 0.10589298605918884, "rewards/rejected": -0.19370028376579285, "step": 488 }, { "epoch": 0.304199066874028, "grad_norm": 0.3910052478313446, "learning_rate": 2.555e-05, "log_odds_chosen": 2.0691475868225098, "log_odds_ratio": -0.26565665006637573, "logits/chosen": -0.1676226258277893, "logits/rejected": 0.8520660400390625, "logps/chosen": -1.0638614892959595, "logps/rejected": -2.8081817626953125, "loss": 0.4561, "nll_loss": 0.42956626415252686, "rewards/accuracies": 1.0, "rewards/chosen": -0.10638613998889923, "rewards/margins": 0.17443202435970306, "rewards/rejected": -0.2808181643486023, "step": 489 }, { "epoch": 0.3048211508553655, "grad_norm": 0.4584553837776184, "learning_rate": 2.5500000000000003e-05, "log_odds_chosen": 2.8596675395965576, "log_odds_ratio": -0.1520201563835144, "logits/chosen": 1.6730536222457886, "logits/rejected": 1.3357551097869873, "logps/chosen": -1.114020586013794, "logps/rejected": -3.625382423400879, "loss": 0.673, "nll_loss": 0.6578033566474915, "rewards/accuracies": 0.875, "rewards/chosen": -0.11140205711126328, "rewards/margins": 0.2511361837387085, "rewards/rejected": -0.36253824830055237, "step": 490 }, { "epoch": 0.30544323483670294, "grad_norm": 1.8427069187164307, "learning_rate": 2.5450000000000002e-05, "log_odds_chosen": 1.788784146308899, "log_odds_ratio": -0.3101259469985962, "logits/chosen": -1.2556865215301514, "logits/rejected": 1.946107268333435, "logps/chosen": -1.0118483304977417, "logps/rejected": -2.407310962677002, "loss": 0.4638, "nll_loss": 0.4328010380268097, "rewards/accuracies": 0.875, "rewards/chosen": -0.10118483006954193, "rewards/margins": 0.13954627513885498, "rewards/rejected": -0.2407311201095581, "step": 491 }, { "epoch": 0.30606531881804044, "grad_norm": 0.2789076268672943, "learning_rate": 2.54e-05, "log_odds_chosen": 1.594778060913086, "log_odds_ratio": -0.2929396629333496, "logits/chosen": -1.0101150274276733, "logits/rejected": 1.347983956336975, "logps/chosen": -0.8147050142288208, "logps/rejected": -1.9583085775375366, "loss": 0.4648, "nll_loss": 0.4355263411998749, "rewards/accuracies": 1.0, "rewards/chosen": -0.08147049695253372, "rewards/margins": 0.11436036229133606, "rewards/rejected": -0.19583086669445038, "step": 492 }, { "epoch": 0.3066874027993779, "grad_norm": 0.33555591106414795, "learning_rate": 2.5350000000000003e-05, "log_odds_chosen": 3.407597541809082, "log_odds_ratio": -0.0761854499578476, "logits/chosen": 2.7496001720428467, "logits/rejected": 1.1246817111968994, "logps/chosen": -0.6887914538383484, "logps/rejected": -3.3821420669555664, "loss": 0.6723, "nll_loss": 0.6646910309791565, "rewards/accuracies": 1.0, "rewards/chosen": -0.0688791424036026, "rewards/margins": 0.2693350613117218, "rewards/rejected": -0.3382142186164856, "step": 493 }, { "epoch": 0.3073094867807154, "grad_norm": 0.45261499285697937, "learning_rate": 2.5300000000000002e-05, "log_odds_chosen": 2.10783052444458, "log_odds_ratio": -0.2936263680458069, "logits/chosen": 0.6424102783203125, "logits/rejected": 1.272334098815918, "logps/chosen": -1.630174994468689, "logps/rejected": -3.450590133666992, "loss": 0.5338, "nll_loss": 0.5044101476669312, "rewards/accuracies": 0.875, "rewards/chosen": -0.16301749646663666, "rewards/margins": 0.18204151093959808, "rewards/rejected": -0.34505900740623474, "step": 494 }, { "epoch": 0.30793157076205285, "grad_norm": 0.2947097420692444, "learning_rate": 2.525e-05, "log_odds_chosen": 1.4103057384490967, "log_odds_ratio": -0.363348126411438, "logits/chosen": 1.169882893562317, "logits/rejected": 3.214463233947754, "logps/chosen": -0.8717095851898193, "logps/rejected": -1.8850769996643066, "loss": 0.7606, "nll_loss": 0.7242623567581177, "rewards/accuracies": 0.75, "rewards/chosen": -0.08717096596956253, "rewards/margins": 0.10133674740791321, "rewards/rejected": -0.18850769102573395, "step": 495 }, { "epoch": 0.30855365474339036, "grad_norm": 0.4085759222507477, "learning_rate": 2.5200000000000003e-05, "log_odds_chosen": 1.9591169357299805, "log_odds_ratio": -0.2569133937358856, "logits/chosen": 0.9140021800994873, "logits/rejected": 1.3358986377716064, "logps/chosen": -0.8664543032646179, "logps/rejected": -2.4069149494171143, "loss": 0.6013, "nll_loss": 0.5756421685218811, "rewards/accuracies": 0.875, "rewards/chosen": -0.08664542436599731, "rewards/margins": 0.15404607355594635, "rewards/rejected": -0.24069149792194366, "step": 496 }, { "epoch": 0.3091757387247278, "grad_norm": 0.9854008555412292, "learning_rate": 2.515e-05, "log_odds_chosen": 2.951676368713379, "log_odds_ratio": -0.07785683870315552, "logits/chosen": -0.4764617085456848, "logits/rejected": 0.5835741758346558, "logps/chosen": -0.888968825340271, "logps/rejected": -3.347611427307129, "loss": 0.445, "nll_loss": 0.43723028898239136, "rewards/accuracies": 1.0, "rewards/chosen": -0.08889688551425934, "rewards/margins": 0.24586427211761475, "rewards/rejected": -0.3347611427307129, "step": 497 }, { "epoch": 0.3097978227060653, "grad_norm": 0.4051247239112854, "learning_rate": 2.51e-05, "log_odds_chosen": 1.4772777557373047, "log_odds_ratio": -0.3094131350517273, "logits/chosen": 0.31006258726119995, "logits/rejected": 0.590751588344574, "logps/chosen": -0.988946795463562, "logps/rejected": -2.1498780250549316, "loss": 0.5912, "nll_loss": 0.5602656602859497, "rewards/accuracies": 1.0, "rewards/chosen": -0.09889468550682068, "rewards/margins": 0.11609311401844025, "rewards/rejected": -0.21498778462409973, "step": 498 }, { "epoch": 0.3104199066874028, "grad_norm": 0.3615843951702118, "learning_rate": 2.5050000000000002e-05, "log_odds_chosen": 1.8403782844543457, "log_odds_ratio": -0.2444225549697876, "logits/chosen": 2.0023744106292725, "logits/rejected": 1.907917857170105, "logps/chosen": -0.7947826385498047, "logps/rejected": -2.229252576828003, "loss": 0.7428, "nll_loss": 0.7183516025543213, "rewards/accuracies": 0.875, "rewards/chosen": -0.07947827130556107, "rewards/margins": 0.14344698190689087, "rewards/rejected": -0.22292526066303253, "step": 499 }, { "epoch": 0.3110419906687403, "grad_norm": 0.4477740526199341, "learning_rate": 2.5e-05, "log_odds_chosen": 1.363769769668579, "log_odds_ratio": -0.4156811535358429, "logits/chosen": 1.2972413301467896, "logits/rejected": 1.1339985132217407, "logps/chosen": -1.0941433906555176, "logps/rejected": -2.226752281188965, "loss": 0.7844, "nll_loss": 0.7427895665168762, "rewards/accuracies": 0.875, "rewards/chosen": -0.10941435396671295, "rewards/margins": 0.11326086521148682, "rewards/rejected": -0.22267521917819977, "step": 500 }, { "epoch": 0.3116640746500778, "grad_norm": 1.0008854866027832, "learning_rate": 2.495e-05, "log_odds_chosen": 1.8221601247787476, "log_odds_ratio": -0.35362672805786133, "logits/chosen": 2.6204891204833984, "logits/rejected": 3.1090645790100098, "logps/chosen": -0.8226886987686157, "logps/rejected": -2.1769776344299316, "loss": 0.9722, "nll_loss": 0.9368324279785156, "rewards/accuracies": 0.75, "rewards/chosen": -0.0822688639163971, "rewards/margins": 0.13542889058589935, "rewards/rejected": -0.21769778430461884, "step": 501 }, { "epoch": 0.31228615863141523, "grad_norm": 1.1925100088119507, "learning_rate": 2.4900000000000002e-05, "log_odds_chosen": 2.48640775680542, "log_odds_ratio": -0.30636078119277954, "logits/chosen": 1.1644136905670166, "logits/rejected": 1.6417752504348755, "logps/chosen": -1.196340799331665, "logps/rejected": -3.419325828552246, "loss": 0.7578, "nll_loss": 0.7271352410316467, "rewards/accuracies": 0.75, "rewards/chosen": -0.11963406950235367, "rewards/margins": 0.2222985029220581, "rewards/rejected": -0.34193259477615356, "step": 502 }, { "epoch": 0.31290824261275274, "grad_norm": 0.7043509483337402, "learning_rate": 2.485e-05, "log_odds_chosen": 1.8432042598724365, "log_odds_ratio": -0.4298593997955322, "logits/chosen": 1.2005046606063843, "logits/rejected": 2.524587869644165, "logps/chosen": -1.1420859098434448, "logps/rejected": -2.6464970111846924, "loss": 0.7629, "nll_loss": 0.7198686599731445, "rewards/accuracies": 0.875, "rewards/chosen": -0.11420859396457672, "rewards/margins": 0.15044112503528595, "rewards/rejected": -0.2646496891975403, "step": 503 }, { "epoch": 0.3135303265940902, "grad_norm": 0.3517081141471863, "learning_rate": 2.48e-05, "log_odds_chosen": 1.7295416593551636, "log_odds_ratio": -0.34764620661735535, "logits/chosen": 2.7036778926849365, "logits/rejected": 2.9315719604492188, "logps/chosen": -0.7309411764144897, "logps/rejected": -1.7857908010482788, "loss": 0.9757, "nll_loss": 0.940963625907898, "rewards/accuracies": 0.875, "rewards/chosen": -0.07309411466121674, "rewards/margins": 0.1054849624633789, "rewards/rejected": -0.17857909202575684, "step": 504 }, { "epoch": 0.3141524105754277, "grad_norm": 0.5126684904098511, "learning_rate": 2.4750000000000002e-05, "log_odds_chosen": 1.807173728942871, "log_odds_ratio": -0.328085720539093, "logits/chosen": -0.5260485410690308, "logits/rejected": 1.1646230220794678, "logps/chosen": -0.7566425204277039, "logps/rejected": -2.0484488010406494, "loss": 0.5638, "nll_loss": 0.5310238003730774, "rewards/accuracies": 0.75, "rewards/chosen": -0.07566425204277039, "rewards/margins": 0.12918062508106232, "rewards/rejected": -0.2048448771238327, "step": 505 }, { "epoch": 0.31477449455676515, "grad_norm": 0.6230505108833313, "learning_rate": 2.47e-05, "log_odds_chosen": 1.132399320602417, "log_odds_ratio": -0.4643266499042511, "logits/chosen": -0.7182011008262634, "logits/rejected": 1.7023909091949463, "logps/chosen": -1.1046946048736572, "logps/rejected": -2.0132546424865723, "loss": 0.57, "nll_loss": 0.5235822200775146, "rewards/accuracies": 0.75, "rewards/chosen": -0.11046946048736572, "rewards/margins": 0.09085602313280106, "rewards/rejected": -0.20132547616958618, "step": 506 }, { "epoch": 0.31539657853810266, "grad_norm": 0.4481191039085388, "learning_rate": 2.465e-05, "log_odds_chosen": 1.0254859924316406, "log_odds_ratio": -0.45982372760772705, "logits/chosen": -0.05534517765045166, "logits/rejected": 0.7204030156135559, "logps/chosen": -1.1251353025436401, "logps/rejected": -1.9631757736206055, "loss": 0.5937, "nll_loss": 0.5477457642555237, "rewards/accuracies": 0.75, "rewards/chosen": -0.11251352727413177, "rewards/margins": 0.08380405604839325, "rewards/rejected": -0.19631758332252502, "step": 507 }, { "epoch": 0.3160186625194401, "grad_norm": 0.43889644742012024, "learning_rate": 2.46e-05, "log_odds_chosen": 1.6597342491149902, "log_odds_ratio": -0.36858415603637695, "logits/chosen": 0.8218276500701904, "logits/rejected": 2.8646650314331055, "logps/chosen": -0.9696469902992249, "logps/rejected": -2.277371883392334, "loss": 0.6888, "nll_loss": 0.6519296765327454, "rewards/accuracies": 0.875, "rewards/chosen": -0.09696470201015472, "rewards/margins": 0.13077248632907867, "rewards/rejected": -0.2277372032403946, "step": 508 }, { "epoch": 0.3166407465007776, "grad_norm": 0.2650894224643707, "learning_rate": 2.455e-05, "log_odds_chosen": 1.8257701396942139, "log_odds_ratio": -0.2810014486312866, "logits/chosen": -0.23144686222076416, "logits/rejected": 1.1131678819656372, "logps/chosen": -1.10640287399292, "logps/rejected": -2.5477852821350098, "loss": 0.4731, "nll_loss": 0.4450156092643738, "rewards/accuracies": 0.875, "rewards/chosen": -0.11064029484987259, "rewards/margins": 0.14413823187351227, "rewards/rejected": -0.25477853417396545, "step": 509 }, { "epoch": 0.31726283048211507, "grad_norm": 0.3267921209335327, "learning_rate": 2.45e-05, "log_odds_chosen": 3.1637048721313477, "log_odds_ratio": -0.12191061675548553, "logits/chosen": 0.48765379190444946, "logits/rejected": 1.0577715635299683, "logps/chosen": -0.9315196871757507, "logps/rejected": -3.6205620765686035, "loss": 0.6443, "nll_loss": 0.6321412920951843, "rewards/accuracies": 1.0, "rewards/chosen": -0.09315197169780731, "rewards/margins": 0.2689042389392853, "rewards/rejected": -0.3620561957359314, "step": 510 }, { "epoch": 0.3178849144634526, "grad_norm": 0.3435508608818054, "learning_rate": 2.445e-05, "log_odds_chosen": 1.2938909530639648, "log_odds_ratio": -0.27308449149131775, "logits/chosen": -0.5473048686981201, "logits/rejected": 1.6775987148284912, "logps/chosen": -0.9595645666122437, "logps/rejected": -1.921897292137146, "loss": 0.4999, "nll_loss": 0.47254717350006104, "rewards/accuracies": 1.0, "rewards/chosen": -0.0959564596414566, "rewards/margins": 0.09623328596353531, "rewards/rejected": -0.19218973815441132, "step": 511 }, { "epoch": 0.31850699844479, "grad_norm": 0.3145331144332886, "learning_rate": 2.44e-05, "log_odds_chosen": 1.6362143754959106, "log_odds_ratio": -0.24002549052238464, "logits/chosen": 0.3165720999240875, "logits/rejected": 0.3282266855239868, "logps/chosen": -0.7263202667236328, "logps/rejected": -1.7319252490997314, "loss": 0.5356, "nll_loss": 0.5115996599197388, "rewards/accuracies": 1.0, "rewards/chosen": -0.07263202965259552, "rewards/margins": 0.1005605086684227, "rewards/rejected": -0.17319253087043762, "step": 512 }, { "epoch": 0.31912908242612753, "grad_norm": 0.6429533958435059, "learning_rate": 2.435e-05, "log_odds_chosen": 2.0500099658966064, "log_odds_ratio": -0.2836396098136902, "logits/chosen": 0.73582923412323, "logits/rejected": 0.8022631406784058, "logps/chosen": -1.438249111175537, "logps/rejected": -2.903639793395996, "loss": 0.6947, "nll_loss": 0.6662985682487488, "rewards/accuracies": 0.875, "rewards/chosen": -0.14382490515708923, "rewards/margins": 0.14653907716274261, "rewards/rejected": -0.29036396741867065, "step": 513 }, { "epoch": 0.319751166407465, "grad_norm": 0.27727633714675903, "learning_rate": 2.43e-05, "log_odds_chosen": 1.874891996383667, "log_odds_ratio": -0.25810471177101135, "logits/chosen": -0.31271833181381226, "logits/rejected": 0.6087108850479126, "logps/chosen": -0.8699772357940674, "logps/rejected": -2.310192108154297, "loss": 0.5346, "nll_loss": 0.5087747573852539, "rewards/accuracies": 0.875, "rewards/chosen": -0.08699773252010345, "rewards/margins": 0.14402146637439728, "rewards/rejected": -0.23101919889450073, "step": 514 }, { "epoch": 0.3203732503888025, "grad_norm": 0.2998380661010742, "learning_rate": 2.425e-05, "log_odds_chosen": 1.8412083387374878, "log_odds_ratio": -0.2547343969345093, "logits/chosen": -0.4206013083457947, "logits/rejected": 0.5524145364761353, "logps/chosen": -0.7460927963256836, "logps/rejected": -2.051219940185547, "loss": 0.4801, "nll_loss": 0.45466262102127075, "rewards/accuracies": 1.0, "rewards/chosen": -0.07460928708314896, "rewards/margins": 0.13051271438598633, "rewards/rejected": -0.2051219940185547, "step": 515 }, { "epoch": 0.32099533437014, "grad_norm": 1.0635550022125244, "learning_rate": 2.4200000000000002e-05, "log_odds_chosen": 3.999274492263794, "log_odds_ratio": -0.14171874523162842, "logits/chosen": 0.9166915416717529, "logits/rejected": -0.4675154387950897, "logps/chosen": -0.9731952548027039, "logps/rejected": -4.343837738037109, "loss": 0.6554, "nll_loss": 0.641204833984375, "rewards/accuracies": 1.0, "rewards/chosen": -0.09731952100992203, "rewards/margins": 0.337064266204834, "rewards/rejected": -0.4343838095664978, "step": 516 }, { "epoch": 0.32161741835147745, "grad_norm": 0.31030628085136414, "learning_rate": 2.415e-05, "log_odds_chosen": 1.8920516967773438, "log_odds_ratio": -0.2721685767173767, "logits/chosen": -0.13164083659648895, "logits/rejected": 1.2151129245758057, "logps/chosen": -0.9168779850006104, "logps/rejected": -2.433690071105957, "loss": 0.5855, "nll_loss": 0.5582566261291504, "rewards/accuracies": 1.0, "rewards/chosen": -0.09168778359889984, "rewards/margins": 0.15168119966983795, "rewards/rejected": -0.2433689832687378, "step": 517 }, { "epoch": 0.32223950233281495, "grad_norm": 0.288943886756897, "learning_rate": 2.41e-05, "log_odds_chosen": 1.9927161931991577, "log_odds_ratio": -0.2383418083190918, "logits/chosen": 0.47148334980010986, "logits/rejected": 1.6919628381729126, "logps/chosen": -0.9311184883117676, "logps/rejected": -2.584421157836914, "loss": 0.6358, "nll_loss": 0.6119847297668457, "rewards/accuracies": 0.875, "rewards/chosen": -0.09311184287071228, "rewards/margins": 0.16533026099205017, "rewards/rejected": -0.25844210386276245, "step": 518 }, { "epoch": 0.3228615863141524, "grad_norm": 0.32276710867881775, "learning_rate": 2.4050000000000002e-05, "log_odds_chosen": 2.0655078887939453, "log_odds_ratio": -0.2558887302875519, "logits/chosen": 1.606288194656372, "logits/rejected": 2.212973117828369, "logps/chosen": -0.7719038128852844, "logps/rejected": -2.3773133754730225, "loss": 0.7828, "nll_loss": 0.7572447657585144, "rewards/accuracies": 0.75, "rewards/chosen": -0.07719038426876068, "rewards/margins": 0.16054093837738037, "rewards/rejected": -0.23773130774497986, "step": 519 }, { "epoch": 0.3234836702954899, "grad_norm": 0.38670703768730164, "learning_rate": 2.4e-05, "log_odds_chosen": 1.765141487121582, "log_odds_ratio": -0.28975823521614075, "logits/chosen": 2.2421889305114746, "logits/rejected": 2.0839686393737793, "logps/chosen": -0.9007455110549927, "logps/rejected": -2.3477883338928223, "loss": 0.7432, "nll_loss": 0.7142671346664429, "rewards/accuracies": 0.875, "rewards/chosen": -0.0900745540857315, "rewards/margins": 0.14470428228378296, "rewards/rejected": -0.23477882146835327, "step": 520 }, { "epoch": 0.32410575427682736, "grad_norm": 0.582175076007843, "learning_rate": 2.395e-05, "log_odds_chosen": 2.358574151992798, "log_odds_ratio": -0.3873441517353058, "logits/chosen": -0.06897962093353271, "logits/rejected": 1.1061187982559204, "logps/chosen": -1.0776824951171875, "logps/rejected": -3.145253896713257, "loss": 0.6335, "nll_loss": 0.5947161316871643, "rewards/accuracies": 0.75, "rewards/chosen": -0.10776825249195099, "rewards/margins": 0.20675715804100037, "rewards/rejected": -0.3145253658294678, "step": 521 }, { "epoch": 0.32472783825816487, "grad_norm": 0.31357330083847046, "learning_rate": 2.39e-05, "log_odds_chosen": 2.555722713470459, "log_odds_ratio": -0.2816966474056244, "logits/chosen": -0.15091806650161743, "logits/rejected": 0.5959459543228149, "logps/chosen": -0.9637516140937805, "logps/rejected": -3.1803767681121826, "loss": 0.6144, "nll_loss": 0.5862266421318054, "rewards/accuracies": 0.875, "rewards/chosen": -0.09637516736984253, "rewards/margins": 0.2216625064611435, "rewards/rejected": -0.3180376887321472, "step": 522 }, { "epoch": 0.3253499222395023, "grad_norm": 0.7683733701705933, "learning_rate": 2.385e-05, "log_odds_chosen": 3.3063204288482666, "log_odds_ratio": -0.09356483817100525, "logits/chosen": 2.2439417839050293, "logits/rejected": 1.6922907829284668, "logps/chosen": -0.9698910117149353, "logps/rejected": -3.716911792755127, "loss": 0.8029, "nll_loss": 0.7935055494308472, "rewards/accuracies": 1.0, "rewards/chosen": -0.09698909521102905, "rewards/margins": 0.2747020721435547, "rewards/rejected": -0.37169113755226135, "step": 523 }, { "epoch": 0.3259720062208398, "grad_norm": 0.39197778701782227, "learning_rate": 2.38e-05, "log_odds_chosen": 1.962248682975769, "log_odds_ratio": -0.18143852055072784, "logits/chosen": -0.4806569516658783, "logits/rejected": -0.31491222977638245, "logps/chosen": -1.0480461120605469, "logps/rejected": -2.6499183177948, "loss": 0.5054, "nll_loss": 0.4872513711452484, "rewards/accuracies": 1.0, "rewards/chosen": -0.10480460524559021, "rewards/margins": 0.160187229514122, "rewards/rejected": -0.2649918496608734, "step": 524 }, { "epoch": 0.3265940902021773, "grad_norm": 0.37640804052352905, "learning_rate": 2.375e-05, "log_odds_chosen": 2.661229133605957, "log_odds_ratio": -0.1665583997964859, "logits/chosen": 1.625669240951538, "logits/rejected": 1.2462046146392822, "logps/chosen": -0.9341843128204346, "logps/rejected": -3.1749377250671387, "loss": 0.6937, "nll_loss": 0.6770105361938477, "rewards/accuracies": 1.0, "rewards/chosen": -0.0934184342622757, "rewards/margins": 0.2240753471851349, "rewards/rejected": -0.3174937963485718, "step": 525 }, { "epoch": 0.3272161741835148, "grad_norm": 0.34807097911834717, "learning_rate": 2.37e-05, "log_odds_chosen": 0.8722881078720093, "log_odds_ratio": -0.4786973297595978, "logits/chosen": 1.2106951475143433, "logits/rejected": 1.6873623132705688, "logps/chosen": -1.1643617153167725, "logps/rejected": -1.9418911933898926, "loss": 0.8321, "nll_loss": 0.7842261791229248, "rewards/accuracies": 0.625, "rewards/chosen": -0.11643616855144501, "rewards/margins": 0.07775293290615082, "rewards/rejected": -0.19418910145759583, "step": 526 }, { "epoch": 0.32783825816485224, "grad_norm": 0.3314477801322937, "learning_rate": 2.365e-05, "log_odds_chosen": 1.965777039527893, "log_odds_ratio": -0.28736618161201477, "logits/chosen": 1.045924186706543, "logits/rejected": 1.1745761632919312, "logps/chosen": -0.6382919549942017, "logps/rejected": -2.006195068359375, "loss": 0.5883, "nll_loss": 0.5595134496688843, "rewards/accuracies": 0.875, "rewards/chosen": -0.0638291984796524, "rewards/margins": 0.13679032027721405, "rewards/rejected": -0.20061951875686646, "step": 527 }, { "epoch": 0.32846034214618974, "grad_norm": 0.3520694673061371, "learning_rate": 2.36e-05, "log_odds_chosen": 2.4457571506500244, "log_odds_ratio": -0.20794501900672913, "logits/chosen": 1.5410584211349487, "logits/rejected": 0.5601137280464172, "logps/chosen": -0.8247537016868591, "logps/rejected": -2.7022957801818848, "loss": 0.6541, "nll_loss": 0.6333186626434326, "rewards/accuracies": 1.0, "rewards/chosen": -0.08247537165880203, "rewards/margins": 0.18775421380996704, "rewards/rejected": -0.2702295780181885, "step": 528 }, { "epoch": 0.3290824261275272, "grad_norm": 0.320633202791214, "learning_rate": 2.355e-05, "log_odds_chosen": 1.7150052785873413, "log_odds_ratio": -0.28227710723876953, "logits/chosen": 0.22371336817741394, "logits/rejected": 1.3191059827804565, "logps/chosen": -1.1468290090560913, "logps/rejected": -2.596248149871826, "loss": 0.5774, "nll_loss": 0.5491545796394348, "rewards/accuracies": 1.0, "rewards/chosen": -0.11468289792537689, "rewards/margins": 0.14494192600250244, "rewards/rejected": -0.2596248388290405, "step": 529 }, { "epoch": 0.3297045101088647, "grad_norm": 0.5384267568588257, "learning_rate": 2.35e-05, "log_odds_chosen": 1.6268489360809326, "log_odds_ratio": -0.248253732919693, "logits/chosen": 2.6993472576141357, "logits/rejected": 2.4896597862243652, "logps/chosen": -0.9050356149673462, "logps/rejected": -2.147216558456421, "loss": 0.8078, "nll_loss": 0.7829287648200989, "rewards/accuracies": 1.0, "rewards/chosen": -0.09050355851650238, "rewards/margins": 0.12421809881925583, "rewards/rejected": -0.2147216498851776, "step": 530 }, { "epoch": 0.33032659409020215, "grad_norm": 1.307396650314331, "learning_rate": 2.345e-05, "log_odds_chosen": 2.3934273719787598, "log_odds_ratio": -0.22615107893943787, "logits/chosen": 2.7248733043670654, "logits/rejected": 2.540579319000244, "logps/chosen": -0.8816531300544739, "logps/rejected": -2.799293041229248, "loss": 0.8883, "nll_loss": 0.8656710386276245, "rewards/accuracies": 0.875, "rewards/chosen": -0.08816531300544739, "rewards/margins": 0.1917639970779419, "rewards/rejected": -0.27992933988571167, "step": 531 }, { "epoch": 0.33094867807153966, "grad_norm": 0.36137479543685913, "learning_rate": 2.3400000000000003e-05, "log_odds_chosen": 0.7194154262542725, "log_odds_ratio": -0.523330569267273, "logits/chosen": 0.8707399368286133, "logits/rejected": 1.3606274127960205, "logps/chosen": -1.3928475379943848, "logps/rejected": -1.9430372714996338, "loss": 0.7521, "nll_loss": 0.6998083591461182, "rewards/accuracies": 0.75, "rewards/chosen": -0.13928475975990295, "rewards/margins": 0.05501896142959595, "rewards/rejected": -0.1943037360906601, "step": 532 }, { "epoch": 0.33157076205287717, "grad_norm": 0.4101194143295288, "learning_rate": 2.3350000000000002e-05, "log_odds_chosen": 2.2328317165374756, "log_odds_ratio": -0.23609133064746857, "logits/chosen": 2.1293582916259766, "logits/rejected": 0.01358860731124878, "logps/chosen": -1.1123179197311401, "logps/rejected": -2.9739584922790527, "loss": 0.716, "nll_loss": 0.6923679113388062, "rewards/accuracies": 0.875, "rewards/chosen": -0.11123178899288177, "rewards/margins": 0.18616405129432678, "rewards/rejected": -0.29739582538604736, "step": 533 }, { "epoch": 0.3321928460342146, "grad_norm": 0.3048454523086548, "learning_rate": 2.3300000000000004e-05, "log_odds_chosen": 4.277897357940674, "log_odds_ratio": -0.17860309779644012, "logits/chosen": 1.040080189704895, "logits/rejected": 1.0091297626495361, "logps/chosen": -1.0271538496017456, "logps/rejected": -4.838980674743652, "loss": 0.6094, "nll_loss": 0.5915553569793701, "rewards/accuracies": 0.875, "rewards/chosen": -0.1027153879404068, "rewards/margins": 0.3811826705932617, "rewards/rejected": -0.4838980436325073, "step": 534 }, { "epoch": 0.3328149300155521, "grad_norm": 0.3863018751144409, "learning_rate": 2.3250000000000003e-05, "log_odds_chosen": 0.512001633644104, "log_odds_ratio": -0.5913516283035278, "logits/chosen": 2.4631636142730713, "logits/rejected": 2.423994302749634, "logps/chosen": -1.0720179080963135, "logps/rejected": -1.4990208148956299, "loss": 0.8055, "nll_loss": 0.7463449835777283, "rewards/accuracies": 0.75, "rewards/chosen": -0.10720179229974747, "rewards/margins": 0.04270029440522194, "rewards/rejected": -0.1499020755290985, "step": 535 }, { "epoch": 0.3334370139968896, "grad_norm": 0.3722380995750427, "learning_rate": 2.32e-05, "log_odds_chosen": 1.6009268760681152, "log_odds_ratio": -0.4362635016441345, "logits/chosen": 0.6439146995544434, "logits/rejected": 0.752334713935852, "logps/chosen": -0.9249628782272339, "logps/rejected": -2.262263774871826, "loss": 0.544, "nll_loss": 0.5003713965415955, "rewards/accuracies": 0.75, "rewards/chosen": -0.09249629080295563, "rewards/margins": 0.13373006880283356, "rewards/rejected": -0.22622635960578918, "step": 536 }, { "epoch": 0.3340590979782271, "grad_norm": 0.3637986481189728, "learning_rate": 2.3150000000000004e-05, "log_odds_chosen": 3.318232536315918, "log_odds_ratio": -0.2623625695705414, "logits/chosen": 1.6884994506835938, "logits/rejected": 1.948002576828003, "logps/chosen": -0.8119041323661804, "logps/rejected": -3.6660208702087402, "loss": 0.7499, "nll_loss": 0.7236235737800598, "rewards/accuracies": 0.75, "rewards/chosen": -0.08119041472673416, "rewards/margins": 0.28541168570518494, "rewards/rejected": -0.3666021227836609, "step": 537 }, { "epoch": 0.33468118195956453, "grad_norm": 0.3645683825016022, "learning_rate": 2.3100000000000002e-05, "log_odds_chosen": 2.602912664413452, "log_odds_ratio": -0.19379067420959473, "logits/chosen": 0.5529426336288452, "logits/rejected": 0.2645620107650757, "logps/chosen": -0.8854231834411621, "logps/rejected": -3.0381038188934326, "loss": 0.6065, "nll_loss": 0.5871433019638062, "rewards/accuracies": 0.875, "rewards/chosen": -0.08854231983423233, "rewards/margins": 0.215268075466156, "rewards/rejected": -0.3038104176521301, "step": 538 }, { "epoch": 0.33530326594090204, "grad_norm": 1.0917738676071167, "learning_rate": 2.305e-05, "log_odds_chosen": 2.948880672454834, "log_odds_ratio": -0.2544633746147156, "logits/chosen": -0.09750711917877197, "logits/rejected": -1.0221948623657227, "logps/chosen": -0.605278491973877, "logps/rejected": -2.81388783454895, "loss": 0.5105, "nll_loss": 0.48507392406463623, "rewards/accuracies": 0.75, "rewards/chosen": -0.06052785366773605, "rewards/margins": 0.22086089849472046, "rewards/rejected": -0.2813887894153595, "step": 539 }, { "epoch": 0.3359253499222395, "grad_norm": 0.30375197529792786, "learning_rate": 2.3000000000000003e-05, "log_odds_chosen": 2.7974700927734375, "log_odds_ratio": -0.29055845737457275, "logits/chosen": 0.4769752621650696, "logits/rejected": 0.44122734665870667, "logps/chosen": -0.7745877504348755, "logps/rejected": -3.094503164291382, "loss": 0.571, "nll_loss": 0.5419222712516785, "rewards/accuracies": 0.875, "rewards/chosen": -0.07745878398418427, "rewards/margins": 0.23199154436588287, "rewards/rejected": -0.30945032835006714, "step": 540 }, { "epoch": 0.336547433903577, "grad_norm": 0.6567568182945251, "learning_rate": 2.2950000000000002e-05, "log_odds_chosen": 2.1677944660186768, "log_odds_ratio": -0.57481849193573, "logits/chosen": -0.9570534229278564, "logits/rejected": -1.0735530853271484, "logps/chosen": -1.4620994329452515, "logps/rejected": -3.3756513595581055, "loss": 0.4604, "nll_loss": 0.40293216705322266, "rewards/accuracies": 0.875, "rewards/chosen": -0.1462099552154541, "rewards/margins": 0.19135519862174988, "rewards/rejected": -0.337565153837204, "step": 541 }, { "epoch": 0.33716951788491445, "grad_norm": 0.41450825333595276, "learning_rate": 2.29e-05, "log_odds_chosen": 1.4745512008666992, "log_odds_ratio": -0.3406660556793213, "logits/chosen": 0.29881247878074646, "logits/rejected": 0.5884536504745483, "logps/chosen": -0.9175814390182495, "logps/rejected": -2.129868507385254, "loss": 0.4939, "nll_loss": 0.45984673500061035, "rewards/accuracies": 0.875, "rewards/chosen": -0.09175814688205719, "rewards/margins": 0.12122871726751328, "rewards/rejected": -0.21298687160015106, "step": 542 }, { "epoch": 0.33779160186625196, "grad_norm": 0.33071574568748474, "learning_rate": 2.2850000000000003e-05, "log_odds_chosen": 1.4744378328323364, "log_odds_ratio": -0.3100065290927887, "logits/chosen": 1.2643396854400635, "logits/rejected": 1.169672966003418, "logps/chosen": -0.7786130309104919, "logps/rejected": -1.8644421100616455, "loss": 0.6003, "nll_loss": 0.5693123936653137, "rewards/accuracies": 1.0, "rewards/chosen": -0.07786130905151367, "rewards/margins": 0.10858289897441864, "rewards/rejected": -0.1864442080259323, "step": 543 }, { "epoch": 0.3384136858475894, "grad_norm": 0.33353278040885925, "learning_rate": 2.2800000000000002e-05, "log_odds_chosen": 1.95163893699646, "log_odds_ratio": -0.32277804613113403, "logits/chosen": 0.27112460136413574, "logits/rejected": 1.4154012203216553, "logps/chosen": -0.8759363889694214, "logps/rejected": -2.446195602416992, "loss": 0.6165, "nll_loss": 0.5842545032501221, "rewards/accuracies": 0.75, "rewards/chosen": -0.08759364485740662, "rewards/margins": 0.15702593326568604, "rewards/rejected": -0.24461957812309265, "step": 544 }, { "epoch": 0.3390357698289269, "grad_norm": 0.4259192645549774, "learning_rate": 2.275e-05, "log_odds_chosen": 2.227778911590576, "log_odds_ratio": -0.392104834318161, "logits/chosen": 0.6782884001731873, "logits/rejected": 1.1886241436004639, "logps/chosen": -0.9938414096832275, "logps/rejected": -3.0460071563720703, "loss": 0.6122, "nll_loss": 0.5729869604110718, "rewards/accuracies": 0.875, "rewards/chosen": -0.09938414394855499, "rewards/margins": 0.20521658658981323, "rewards/rejected": -0.3046007454395294, "step": 545 }, { "epoch": 0.33965785381026437, "grad_norm": 0.3001631796360016, "learning_rate": 2.2700000000000003e-05, "log_odds_chosen": 2.93178391456604, "log_odds_ratio": -0.1864704191684723, "logits/chosen": 1.5299491882324219, "logits/rejected": 2.5147063732147217, "logps/chosen": -0.8954837322235107, "logps/rejected": -3.2739996910095215, "loss": 0.7499, "nll_loss": 0.7312994599342346, "rewards/accuracies": 1.0, "rewards/chosen": -0.08954837173223495, "rewards/margins": 0.2378515899181366, "rewards/rejected": -0.32739996910095215, "step": 546 }, { "epoch": 0.34027993779160187, "grad_norm": 0.3409689664840698, "learning_rate": 2.265e-05, "log_odds_chosen": 2.394829273223877, "log_odds_ratio": -0.12511810660362244, "logits/chosen": 0.43690744042396545, "logits/rejected": 0.4180241823196411, "logps/chosen": -0.892842173576355, "logps/rejected": -2.846055746078491, "loss": 0.549, "nll_loss": 0.5364462733268738, "rewards/accuracies": 1.0, "rewards/chosen": -0.08928421884775162, "rewards/margins": 0.19532136619091034, "rewards/rejected": -0.28460556268692017, "step": 547 }, { "epoch": 0.3409020217729393, "grad_norm": 0.4478369355201721, "learning_rate": 2.26e-05, "log_odds_chosen": 2.7050700187683105, "log_odds_ratio": -0.22413116693496704, "logits/chosen": 0.4567830562591553, "logits/rejected": 0.9032511115074158, "logps/chosen": -1.3370281457901, "logps/rejected": -3.741605758666992, "loss": 0.561, "nll_loss": 0.5385462045669556, "rewards/accuracies": 0.875, "rewards/chosen": -0.1337028294801712, "rewards/margins": 0.24045775830745697, "rewards/rejected": -0.3741605877876282, "step": 548 }, { "epoch": 0.34152410575427683, "grad_norm": 0.35546326637268066, "learning_rate": 2.2550000000000003e-05, "log_odds_chosen": 4.098875045776367, "log_odds_ratio": -0.07504149526357651, "logits/chosen": 1.9520654678344727, "logits/rejected": 0.00759616494178772, "logps/chosen": -0.7652401924133301, "logps/rejected": -4.112580299377441, "loss": 0.5871, "nll_loss": 0.5796326398849487, "rewards/accuracies": 1.0, "rewards/chosen": -0.07652401924133301, "rewards/margins": 0.33473408222198486, "rewards/rejected": -0.4112580716609955, "step": 549 }, { "epoch": 0.3421461897356143, "grad_norm": 0.3201517164707184, "learning_rate": 2.25e-05, "log_odds_chosen": 4.117212772369385, "log_odds_ratio": -0.029010329395532608, "logits/chosen": 1.0014082193374634, "logits/rejected": 0.7812284231185913, "logps/chosen": -0.8319846987724304, "logps/rejected": -4.328241348266602, "loss": 0.5622, "nll_loss": 0.5592617988586426, "rewards/accuracies": 1.0, "rewards/chosen": -0.08319847285747528, "rewards/margins": 0.34962570667266846, "rewards/rejected": -0.43282419443130493, "step": 550 }, { "epoch": 0.3427682737169518, "grad_norm": 0.352874755859375, "learning_rate": 2.245e-05, "log_odds_chosen": 3.4196906089782715, "log_odds_ratio": -0.07896264642477036, "logits/chosen": 0.3399137854576111, "logits/rejected": 0.0779067724943161, "logps/chosen": -0.8405764698982239, "logps/rejected": -3.7050743103027344, "loss": 0.5314, "nll_loss": 0.5235511064529419, "rewards/accuracies": 1.0, "rewards/chosen": -0.08405765146017075, "rewards/margins": 0.2864498198032379, "rewards/rejected": -0.37050747871398926, "step": 551 }, { "epoch": 0.3433903576982893, "grad_norm": 0.3518848121166229, "learning_rate": 2.2400000000000002e-05, "log_odds_chosen": 2.2333786487579346, "log_odds_ratio": -0.20728513598442078, "logits/chosen": 1.2030657529830933, "logits/rejected": 1.240146517753601, "logps/chosen": -1.1116034984588623, "logps/rejected": -2.8557775020599365, "loss": 0.7377, "nll_loss": 0.7169494032859802, "rewards/accuracies": 1.0, "rewards/chosen": -0.11116035282611847, "rewards/margins": 0.1744173914194107, "rewards/rejected": -0.2855777442455292, "step": 552 }, { "epoch": 0.34401244167962675, "grad_norm": 0.3764328956604004, "learning_rate": 2.235e-05, "log_odds_chosen": 2.8303418159484863, "log_odds_ratio": -0.2849666178226471, "logits/chosen": 1.2564030885696411, "logits/rejected": 0.2557767927646637, "logps/chosen": -0.8771718740463257, "logps/rejected": -3.4054300785064697, "loss": 0.6693, "nll_loss": 0.6408088207244873, "rewards/accuracies": 0.75, "rewards/chosen": -0.08771719038486481, "rewards/margins": 0.2528257966041565, "rewards/rejected": -0.3405430018901825, "step": 553 }, { "epoch": 0.34463452566096425, "grad_norm": 0.32724928855895996, "learning_rate": 2.23e-05, "log_odds_chosen": 2.3714241981506348, "log_odds_ratio": -0.36799246072769165, "logits/chosen": 1.9873566627502441, "logits/rejected": 0.9282538890838623, "logps/chosen": -0.8043345212936401, "logps/rejected": -2.7831761837005615, "loss": 0.7771, "nll_loss": 0.7403146624565125, "rewards/accuracies": 0.75, "rewards/chosen": -0.0804334506392479, "rewards/margins": 0.19788417220115662, "rewards/rejected": -0.2783176302909851, "step": 554 }, { "epoch": 0.3452566096423017, "grad_norm": 0.3000686466693878, "learning_rate": 2.2250000000000002e-05, "log_odds_chosen": 3.208042860031128, "log_odds_ratio": -0.16007201373577118, "logits/chosen": 1.556321382522583, "logits/rejected": 0.668292760848999, "logps/chosen": -0.8184808492660522, "logps/rejected": -3.525345802307129, "loss": 0.5825, "nll_loss": 0.5665206909179688, "rewards/accuracies": 0.875, "rewards/chosen": -0.08184809237718582, "rewards/margins": 0.2706865072250366, "rewards/rejected": -0.35253459215164185, "step": 555 }, { "epoch": 0.3458786936236392, "grad_norm": 0.3622443377971649, "learning_rate": 2.22e-05, "log_odds_chosen": 5.820090293884277, "log_odds_ratio": -0.07336248457431793, "logits/chosen": 2.9645447731018066, "logits/rejected": 2.1063284873962402, "logps/chosen": -0.7918724417686462, "logps/rejected": -5.822584629058838, "loss": 0.7855, "nll_loss": 0.7781620621681213, "rewards/accuracies": 1.0, "rewards/chosen": -0.07918724417686462, "rewards/margins": 0.5030711889266968, "rewards/rejected": -0.5822584629058838, "step": 556 }, { "epoch": 0.34650077760497666, "grad_norm": 0.3985669016838074, "learning_rate": 2.215e-05, "log_odds_chosen": 3.1644110679626465, "log_odds_ratio": -0.12268514186143875, "logits/chosen": 0.2535923719406128, "logits/rejected": -1.1449623107910156, "logps/chosen": -1.1264760494232178, "logps/rejected": -3.9241061210632324, "loss": 0.5895, "nll_loss": 0.5772807598114014, "rewards/accuracies": 1.0, "rewards/chosen": -0.11264760792255402, "rewards/margins": 0.27976301312446594, "rewards/rejected": -0.39241063594818115, "step": 557 }, { "epoch": 0.34712286158631417, "grad_norm": 0.5123311877250671, "learning_rate": 2.2100000000000002e-05, "log_odds_chosen": 2.835562229156494, "log_odds_ratio": -0.34649112820625305, "logits/chosen": 0.7598134875297546, "logits/rejected": 0.006757020950317383, "logps/chosen": -1.0932013988494873, "logps/rejected": -3.476534128189087, "loss": 0.5802, "nll_loss": 0.5455945134162903, "rewards/accuracies": 0.875, "rewards/chosen": -0.10932014137506485, "rewards/margins": 0.23833326995372772, "rewards/rejected": -0.3476533889770508, "step": 558 }, { "epoch": 0.3477449455676516, "grad_norm": 0.3804253041744232, "learning_rate": 2.205e-05, "log_odds_chosen": 5.65109920501709, "log_odds_ratio": -0.11326686292886734, "logits/chosen": 2.597390651702881, "logits/rejected": 1.8537123203277588, "logps/chosen": -0.8809130787849426, "logps/rejected": -5.866379261016846, "loss": 0.9367, "nll_loss": 0.9254015684127808, "rewards/accuracies": 0.875, "rewards/chosen": -0.08809130638837814, "rewards/margins": 0.49854663014411926, "rewards/rejected": -0.5866379737854004, "step": 559 }, { "epoch": 0.3483670295489891, "grad_norm": 0.2974945604801178, "learning_rate": 2.2000000000000003e-05, "log_odds_chosen": 4.304051399230957, "log_odds_ratio": -0.08408652245998383, "logits/chosen": -0.3859021067619324, "logits/rejected": 0.04299017786979675, "logps/chosen": -1.005317211151123, "logps/rejected": -4.898586273193359, "loss": 0.5036, "nll_loss": 0.4951820969581604, "rewards/accuracies": 1.0, "rewards/chosen": -0.10053171962499619, "rewards/margins": 0.38932690024375916, "rewards/rejected": -0.48985862731933594, "step": 560 }, { "epoch": 0.3489891135303266, "grad_norm": 0.32148414850234985, "learning_rate": 2.195e-05, "log_odds_chosen": 4.430983543395996, "log_odds_ratio": -0.026275552809238434, "logits/chosen": 2.0504298210144043, "logits/rejected": 1.0362154245376587, "logps/chosen": -0.7540563344955444, "logps/rejected": -4.490864276885986, "loss": 0.6834, "nll_loss": 0.6807276606559753, "rewards/accuracies": 1.0, "rewards/chosen": -0.07540564239025116, "rewards/margins": 0.37368080019950867, "rewards/rejected": -0.44908642768859863, "step": 561 }, { "epoch": 0.3496111975116641, "grad_norm": 0.4179425835609436, "learning_rate": 2.19e-05, "log_odds_chosen": 4.381219387054443, "log_odds_ratio": -0.14103896915912628, "logits/chosen": 1.1080811023712158, "logits/rejected": -0.20480573177337646, "logps/chosen": -0.9135346412658691, "logps/rejected": -4.840203285217285, "loss": 0.5167, "nll_loss": 0.5026371479034424, "rewards/accuracies": 0.875, "rewards/chosen": -0.09135347604751587, "rewards/margins": 0.39266693592071533, "rewards/rejected": -0.4840203821659088, "step": 562 }, { "epoch": 0.35023328149300154, "grad_norm": 0.3320809602737427, "learning_rate": 2.1850000000000003e-05, "log_odds_chosen": 4.221715450286865, "log_odds_ratio": -0.04479138180613518, "logits/chosen": 1.4076143503189087, "logits/rejected": 0.8501245379447937, "logps/chosen": -0.8335117101669312, "logps/rejected": -4.350778102874756, "loss": 0.6814, "nll_loss": 0.6769353151321411, "rewards/accuracies": 1.0, "rewards/chosen": -0.08335117250680923, "rewards/margins": 0.3517266511917114, "rewards/rejected": -0.43507784605026245, "step": 563 }, { "epoch": 0.35085536547433904, "grad_norm": 2.018888473510742, "learning_rate": 2.18e-05, "log_odds_chosen": 2.925218105316162, "log_odds_ratio": -0.1921694278717041, "logits/chosen": 1.601369023323059, "logits/rejected": 0.20986300706863403, "logps/chosen": -0.9109419584274292, "logps/rejected": -3.379201889038086, "loss": 0.7414, "nll_loss": 0.7222239375114441, "rewards/accuracies": 0.875, "rewards/chosen": -0.09109419584274292, "rewards/margins": 0.24682600796222687, "rewards/rejected": -0.3379201889038086, "step": 564 }, { "epoch": 0.3514774494556765, "grad_norm": 0.30817800760269165, "learning_rate": 2.175e-05, "log_odds_chosen": 5.583270072937012, "log_odds_ratio": -0.07630279660224915, "logits/chosen": 0.5737905502319336, "logits/rejected": 0.908176839351654, "logps/chosen": -0.9042786955833435, "logps/rejected": -5.677913188934326, "loss": 0.6135, "nll_loss": 0.6059155464172363, "rewards/accuracies": 1.0, "rewards/chosen": -0.09042787551879883, "rewards/margins": 0.4773634374141693, "rewards/rejected": -0.5677912831306458, "step": 565 }, { "epoch": 0.352099533437014, "grad_norm": 0.2887404263019562, "learning_rate": 2.1700000000000002e-05, "log_odds_chosen": 4.915496349334717, "log_odds_ratio": -0.03920300304889679, "logits/chosen": -0.6548490524291992, "logits/rejected": -0.16563282907009125, "logps/chosen": -0.9077558517456055, "logps/rejected": -5.154465675354004, "loss": 0.4734, "nll_loss": 0.46949946880340576, "rewards/accuracies": 1.0, "rewards/chosen": -0.09077560156583786, "rewards/margins": 0.4246709644794464, "rewards/rejected": -0.5154465436935425, "step": 566 }, { "epoch": 0.35272161741835145, "grad_norm": 0.3343895673751831, "learning_rate": 2.165e-05, "log_odds_chosen": 5.319578170776367, "log_odds_ratio": -0.11831340938806534, "logits/chosen": 2.253048896789551, "logits/rejected": 1.2730216979980469, "logps/chosen": -0.8506143093109131, "logps/rejected": -5.664251327514648, "loss": 0.6425, "nll_loss": 0.6306980848312378, "rewards/accuracies": 0.875, "rewards/chosen": -0.08506143093109131, "rewards/margins": 0.4813637435436249, "rewards/rejected": -0.5664252042770386, "step": 567 }, { "epoch": 0.35334370139968896, "grad_norm": 0.26959332823753357, "learning_rate": 2.16e-05, "log_odds_chosen": 5.405831813812256, "log_odds_ratio": -0.015014220029115677, "logits/chosen": 2.161424160003662, "logits/rejected": 1.572087287902832, "logps/chosen": -0.7183213829994202, "logps/rejected": -5.414179801940918, "loss": 0.7563, "nll_loss": 0.7547677755355835, "rewards/accuracies": 1.0, "rewards/chosen": -0.07183213531970978, "rewards/margins": 0.4695858657360077, "rewards/rejected": -0.5414179563522339, "step": 568 }, { "epoch": 0.35396578538102647, "grad_norm": 6.484348297119141, "learning_rate": 2.1550000000000002e-05, "log_odds_chosen": 3.8986024856567383, "log_odds_ratio": -0.22238805890083313, "logits/chosen": 0.6533817052841187, "logits/rejected": 0.09037871658802032, "logps/chosen": -1.0746116638183594, "logps/rejected": -4.593562602996826, "loss": 0.6437, "nll_loss": 0.6214991807937622, "rewards/accuracies": 1.0, "rewards/chosen": -0.10746116936206818, "rewards/margins": 0.3518950939178467, "rewards/rejected": -0.45935627818107605, "step": 569 }, { "epoch": 0.3545878693623639, "grad_norm": 0.3435267210006714, "learning_rate": 2.15e-05, "log_odds_chosen": 4.144981861114502, "log_odds_ratio": -0.13062036037445068, "logits/chosen": 1.8960034847259521, "logits/rejected": 0.15083402395248413, "logps/chosen": -0.9747380018234253, "logps/rejected": -4.638625621795654, "loss": 0.7867, "nll_loss": 0.7736024856567383, "rewards/accuracies": 0.875, "rewards/chosen": -0.09747380763292313, "rewards/margins": 0.366388738155365, "rewards/rejected": -0.4638625383377075, "step": 570 }, { "epoch": 0.3552099533437014, "grad_norm": 0.5003693103790283, "learning_rate": 2.145e-05, "log_odds_chosen": 2.2045464515686035, "log_odds_ratio": -0.32526710629463196, "logits/chosen": 1.7149858474731445, "logits/rejected": 1.3935751914978027, "logps/chosen": -0.7401760816574097, "logps/rejected": -2.5005102157592773, "loss": 0.7001, "nll_loss": 0.6675523519515991, "rewards/accuracies": 0.875, "rewards/chosen": -0.07401760667562485, "rewards/margins": 0.17603342235088348, "rewards/rejected": -0.25005102157592773, "step": 571 }, { "epoch": 0.3558320373250389, "grad_norm": 0.3263773024082184, "learning_rate": 2.1400000000000002e-05, "log_odds_chosen": 3.654919147491455, "log_odds_ratio": -0.08848594129085541, "logits/chosen": -0.6415503621101379, "logits/rejected": 0.4079166054725647, "logps/chosen": -1.0272693634033203, "logps/rejected": -4.235315799713135, "loss": 0.4354, "nll_loss": 0.42651909589767456, "rewards/accuracies": 1.0, "rewards/chosen": -0.10272695124149323, "rewards/margins": 0.3208046555519104, "rewards/rejected": -0.42353159189224243, "step": 572 }, { "epoch": 0.3564541213063764, "grad_norm": 0.40876343846321106, "learning_rate": 2.135e-05, "log_odds_chosen": 2.0924434661865234, "log_odds_ratio": -0.37442755699157715, "logits/chosen": 1.6909630298614502, "logits/rejected": 2.1783745288848877, "logps/chosen": -0.7929383516311646, "logps/rejected": -2.4616189002990723, "loss": 0.7618, "nll_loss": 0.7243326306343079, "rewards/accuracies": 1.0, "rewards/chosen": -0.07929383963346481, "rewards/margins": 0.16686806082725525, "rewards/rejected": -0.24616187810897827, "step": 573 }, { "epoch": 0.35707620528771383, "grad_norm": 0.49694570899009705, "learning_rate": 2.13e-05, "log_odds_chosen": 1.9976987838745117, "log_odds_ratio": -0.37749671936035156, "logits/chosen": 0.9177133440971375, "logits/rejected": -0.533532440662384, "logps/chosen": -0.8589506149291992, "logps/rejected": -2.627117156982422, "loss": 0.5703, "nll_loss": 0.5325274467468262, "rewards/accuracies": 0.75, "rewards/chosen": -0.08589506149291992, "rewards/margins": 0.1768166571855545, "rewards/rejected": -0.26271170377731323, "step": 574 }, { "epoch": 0.35769828926905134, "grad_norm": 0.43489086627960205, "learning_rate": 2.125e-05, "log_odds_chosen": 3.7538657188415527, "log_odds_ratio": -0.17192378640174866, "logits/chosen": -0.6949158310890198, "logits/rejected": -0.29193541407585144, "logps/chosen": -0.8917832374572754, "logps/rejected": -4.247375965118408, "loss": 0.4755, "nll_loss": 0.45832446217536926, "rewards/accuracies": 1.0, "rewards/chosen": -0.08917832374572754, "rewards/margins": 0.33555927872657776, "rewards/rejected": -0.4247376024723053, "step": 575 }, { "epoch": 0.3583203732503888, "grad_norm": 0.3112671971321106, "learning_rate": 2.12e-05, "log_odds_chosen": 5.385041236877441, "log_odds_ratio": -0.10352785885334015, "logits/chosen": 0.3990916609764099, "logits/rejected": 0.9243254661560059, "logps/chosen": -1.1848556995391846, "logps/rejected": -5.890063285827637, "loss": 0.537, "nll_loss": 0.5266907215118408, "rewards/accuracies": 0.875, "rewards/chosen": -0.11848556995391846, "rewards/margins": 0.4705207943916321, "rewards/rejected": -0.5890063047409058, "step": 576 }, { "epoch": 0.3589424572317263, "grad_norm": 0.7652101516723633, "learning_rate": 2.115e-05, "log_odds_chosen": 3.0768189430236816, "log_odds_ratio": -0.24112540483474731, "logits/chosen": 1.8470680713653564, "logits/rejected": 2.1239311695098877, "logps/chosen": -0.721358060836792, "logps/rejected": -3.037296772003174, "loss": 0.7862, "nll_loss": 0.76209557056427, "rewards/accuracies": 0.75, "rewards/chosen": -0.0721358060836792, "rewards/margins": 0.23159386217594147, "rewards/rejected": -0.3037296533584595, "step": 577 }, { "epoch": 0.35956454121306375, "grad_norm": 0.39108505845069885, "learning_rate": 2.11e-05, "log_odds_chosen": 2.50784969329834, "log_odds_ratio": -0.16467276215553284, "logits/chosen": 0.8514099717140198, "logits/rejected": 0.698555052280426, "logps/chosen": -1.0424742698669434, "logps/rejected": -3.1801681518554688, "loss": 0.6768, "nll_loss": 0.6602964401245117, "rewards/accuracies": 0.875, "rewards/chosen": -0.10424742847681046, "rewards/margins": 0.21376937627792358, "rewards/rejected": -0.31801682710647583, "step": 578 }, { "epoch": 0.36018662519440126, "grad_norm": 0.3793575167655945, "learning_rate": 2.105e-05, "log_odds_chosen": 4.474118232727051, "log_odds_ratio": -0.22063733637332916, "logits/chosen": 1.5521801710128784, "logits/rejected": 1.0251915454864502, "logps/chosen": -0.9196445941925049, "logps/rejected": -5.048022747039795, "loss": 0.6981, "nll_loss": 0.6760702133178711, "rewards/accuracies": 0.875, "rewards/chosen": -0.09196445345878601, "rewards/margins": 0.41283780336380005, "rewards/rejected": -0.5048022866249084, "step": 579 }, { "epoch": 0.3608087091757387, "grad_norm": 0.3384208679199219, "learning_rate": 2.1e-05, "log_odds_chosen": 5.3531174659729, "log_odds_ratio": -0.14996495842933655, "logits/chosen": 1.6041481494903564, "logits/rejected": 1.5773200988769531, "logps/chosen": -1.218875765800476, "logps/rejected": -6.06456995010376, "loss": 0.8003, "nll_loss": 0.7853128910064697, "rewards/accuracies": 1.0, "rewards/chosen": -0.12188757956027985, "rewards/margins": 0.4845694303512573, "rewards/rejected": -0.606456995010376, "step": 580 }, { "epoch": 0.3614307931570762, "grad_norm": 0.4473244845867157, "learning_rate": 2.095e-05, "log_odds_chosen": 5.5421247482299805, "log_odds_ratio": -0.04228505492210388, "logits/chosen": 0.6503898501396179, "logits/rejected": -0.6624352931976318, "logps/chosen": -1.1132351160049438, "logps/rejected": -6.240670204162598, "loss": 0.6585, "nll_loss": 0.6542383432388306, "rewards/accuracies": 1.0, "rewards/chosen": -0.1113235130906105, "rewards/margins": 0.5127434730529785, "rewards/rejected": -0.6240670084953308, "step": 581 }, { "epoch": 0.36205287713841366, "grad_norm": 0.32590481638908386, "learning_rate": 2.09e-05, "log_odds_chosen": 3.7834081649780273, "log_odds_ratio": -0.29060444235801697, "logits/chosen": -0.28049537539482117, "logits/rejected": -0.4547344446182251, "logps/chosen": -1.0908867120742798, "logps/rejected": -4.53211784362793, "loss": 0.5202, "nll_loss": 0.49115025997161865, "rewards/accuracies": 0.875, "rewards/chosen": -0.10908866673707962, "rewards/margins": 0.34412315487861633, "rewards/rejected": -0.45321184396743774, "step": 582 }, { "epoch": 0.36267496111975117, "grad_norm": 0.6480214595794678, "learning_rate": 2.085e-05, "log_odds_chosen": 3.60688853263855, "log_odds_ratio": -0.2324177324771881, "logits/chosen": 0.307085782289505, "logits/rejected": -0.46215736865997314, "logps/chosen": -0.9965261816978455, "logps/rejected": -4.287740707397461, "loss": 0.6023, "nll_loss": 0.5790976285934448, "rewards/accuracies": 1.0, "rewards/chosen": -0.09965262562036514, "rewards/margins": 0.32912150025367737, "rewards/rejected": -0.4287740886211395, "step": 583 }, { "epoch": 0.3632970451010886, "grad_norm": 0.5480687022209167, "learning_rate": 2.08e-05, "log_odds_chosen": 4.9113874435424805, "log_odds_ratio": -0.12982912361621857, "logits/chosen": 1.9692302942276, "logits/rejected": 0.6301515102386475, "logps/chosen": -0.8638044595718384, "logps/rejected": -5.2282514572143555, "loss": 0.6303, "nll_loss": 0.617352306842804, "rewards/accuracies": 0.875, "rewards/chosen": -0.08638045191764832, "rewards/margins": 0.4364446699619293, "rewards/rejected": -0.5228251218795776, "step": 584 }, { "epoch": 0.36391912908242613, "grad_norm": 0.34628981351852417, "learning_rate": 2.075e-05, "log_odds_chosen": 3.9489634037017822, "log_odds_ratio": -0.22906804084777832, "logits/chosen": 0.8565244674682617, "logits/rejected": 1.0159627199172974, "logps/chosen": -0.9767001271247864, "logps/rejected": -4.592409133911133, "loss": 0.6459, "nll_loss": 0.6229780316352844, "rewards/accuracies": 0.75, "rewards/chosen": -0.09767001122236252, "rewards/margins": 0.3615708351135254, "rewards/rejected": -0.4592408537864685, "step": 585 }, { "epoch": 0.3645412130637636, "grad_norm": 0.2683575749397278, "learning_rate": 2.07e-05, "log_odds_chosen": 4.981381893157959, "log_odds_ratio": -0.20831695199012756, "logits/chosen": 0.9119428396224976, "logits/rejected": 0.23448503017425537, "logps/chosen": -1.1264652013778687, "logps/rejected": -5.7671332359313965, "loss": 0.6212, "nll_loss": 0.6003206968307495, "rewards/accuracies": 0.75, "rewards/chosen": -0.11264652013778687, "rewards/margins": 0.4640668034553528, "rewards/rejected": -0.5767133831977844, "step": 586 }, { "epoch": 0.3651632970451011, "grad_norm": 0.33639833331108093, "learning_rate": 2.065e-05, "log_odds_chosen": 4.010759353637695, "log_odds_ratio": -0.2678699791431427, "logits/chosen": 0.12189310789108276, "logits/rejected": 0.07927525043487549, "logps/chosen": -0.9886451363563538, "logps/rejected": -4.671942710876465, "loss": 0.4694, "nll_loss": 0.44262024760246277, "rewards/accuracies": 0.875, "rewards/chosen": -0.09886451065540314, "rewards/margins": 0.3683297634124756, "rewards/rejected": -0.4671942889690399, "step": 587 }, { "epoch": 0.3657853810264386, "grad_norm": 0.43022793531417847, "learning_rate": 2.06e-05, "log_odds_chosen": 2.4500086307525635, "log_odds_ratio": -0.44840744137763977, "logits/chosen": 2.032104730606079, "logits/rejected": 2.6225948333740234, "logps/chosen": -1.0749928951263428, "logps/rejected": -3.159858226776123, "loss": 0.7611, "nll_loss": 0.7162356376647949, "rewards/accuracies": 0.875, "rewards/chosen": -0.10749930143356323, "rewards/margins": 0.20848651230335236, "rewards/rejected": -0.3159858286380768, "step": 588 }, { "epoch": 0.36640746500777605, "grad_norm": 0.31727105379104614, "learning_rate": 2.055e-05, "log_odds_chosen": 4.297815322875977, "log_odds_ratio": -0.17429843544960022, "logits/chosen": 0.28740793466567993, "logits/rejected": 0.8092455863952637, "logps/chosen": -0.7892931699752808, "logps/rejected": -4.623042106628418, "loss": 0.5373, "nll_loss": 0.5198439359664917, "rewards/accuracies": 0.875, "rewards/chosen": -0.07892931997776031, "rewards/margins": 0.3833748996257782, "rewards/rejected": -0.4623042047023773, "step": 589 }, { "epoch": 0.36702954898911355, "grad_norm": 0.46551135182380676, "learning_rate": 2.05e-05, "log_odds_chosen": 3.207967519760132, "log_odds_ratio": -0.2668222188949585, "logits/chosen": 1.4202075004577637, "logits/rejected": 1.1370333433151245, "logps/chosen": -1.0534496307373047, "logps/rejected": -3.9848947525024414, "loss": 0.6709, "nll_loss": 0.644168496131897, "rewards/accuracies": 0.875, "rewards/chosen": -0.10534496605396271, "rewards/margins": 0.29314449429512024, "rewards/rejected": -0.39848947525024414, "step": 590 }, { "epoch": 0.367651632970451, "grad_norm": 2.5857887268066406, "learning_rate": 2.045e-05, "log_odds_chosen": 4.6939496994018555, "log_odds_ratio": -0.1030438169836998, "logits/chosen": 0.7330646514892578, "logits/rejected": 0.881181001663208, "logps/chosen": -1.2379714250564575, "logps/rejected": -5.404970169067383, "loss": 0.6535, "nll_loss": 0.6432254314422607, "rewards/accuracies": 1.0, "rewards/chosen": -0.12379714846611023, "rewards/margins": 0.4166998565196991, "rewards/rejected": -0.5404970049858093, "step": 591 }, { "epoch": 0.3682737169517885, "grad_norm": 0.3276950716972351, "learning_rate": 2.04e-05, "log_odds_chosen": 2.8923096656799316, "log_odds_ratio": -0.409442663192749, "logits/chosen": 0.031567931175231934, "logits/rejected": 0.7758247256278992, "logps/chosen": -0.711024284362793, "logps/rejected": -3.3380963802337646, "loss": 0.5007, "nll_loss": 0.45975714921951294, "rewards/accuracies": 0.75, "rewards/chosen": -0.07110242545604706, "rewards/margins": 0.2627072334289551, "rewards/rejected": -0.33380964398384094, "step": 592 }, { "epoch": 0.36889580093312596, "grad_norm": 0.29507437348365784, "learning_rate": 2.035e-05, "log_odds_chosen": 4.467270851135254, "log_odds_ratio": -0.2962123155593872, "logits/chosen": 0.7136313915252686, "logits/rejected": 0.936424195766449, "logps/chosen": -0.9805070161819458, "logps/rejected": -5.16839075088501, "loss": 0.6137, "nll_loss": 0.5840867757797241, "rewards/accuracies": 0.75, "rewards/chosen": -0.09805070608854294, "rewards/margins": 0.4187883734703064, "rewards/rejected": -0.5168390870094299, "step": 593 }, { "epoch": 0.36951788491446347, "grad_norm": 0.3454137146472931, "learning_rate": 2.0300000000000002e-05, "log_odds_chosen": 5.492619037628174, "log_odds_ratio": -0.05524108186364174, "logits/chosen": 1.6836175918579102, "logits/rejected": 0.8593066930770874, "logps/chosen": -0.8198999166488647, "logps/rejected": -5.759896755218506, "loss": 0.6107, "nll_loss": 0.6052013039588928, "rewards/accuracies": 1.0, "rewards/chosen": -0.08198998868465424, "rewards/margins": 0.4939996600151062, "rewards/rejected": -0.5759896636009216, "step": 594 }, { "epoch": 0.3701399688958009, "grad_norm": 1.686590313911438, "learning_rate": 2.025e-05, "log_odds_chosen": 4.503698825836182, "log_odds_ratio": -0.20807135105133057, "logits/chosen": 2.4879651069641113, "logits/rejected": 1.0714972019195557, "logps/chosen": -1.0486226081848145, "logps/rejected": -5.218942642211914, "loss": 0.8232, "nll_loss": 0.8023769855499268, "rewards/accuracies": 0.875, "rewards/chosen": -0.1048622578382492, "rewards/margins": 0.41703200340270996, "rewards/rejected": -0.5218942761421204, "step": 595 }, { "epoch": 0.3707620528771384, "grad_norm": 2.878927230834961, "learning_rate": 2.0200000000000003e-05, "log_odds_chosen": 2.9781720638275146, "log_odds_ratio": -0.39062219858169556, "logits/chosen": 0.059173583984375, "logits/rejected": 0.10687759518623352, "logps/chosen": -1.6145503520965576, "logps/rejected": -4.240401268005371, "loss": 0.8397, "nll_loss": 0.8006084561347961, "rewards/accuracies": 0.75, "rewards/chosen": -0.16145503520965576, "rewards/margins": 0.2625851035118103, "rewards/rejected": -0.4240401089191437, "step": 596 }, { "epoch": 0.3713841368584759, "grad_norm": 3.408799409866333, "learning_rate": 2.0150000000000002e-05, "log_odds_chosen": 4.163212299346924, "log_odds_ratio": -0.12928536534309387, "logits/chosen": 1.2136975526809692, "logits/rejected": 0.2280959188938141, "logps/chosen": -1.0870234966278076, "logps/rejected": -4.872440814971924, "loss": 0.7765, "nll_loss": 0.7635830640792847, "rewards/accuracies": 0.875, "rewards/chosen": -0.10870234668254852, "rewards/margins": 0.3785417675971985, "rewards/rejected": -0.4872441291809082, "step": 597 }, { "epoch": 0.3720062208398134, "grad_norm": 0.31707343459129333, "learning_rate": 2.01e-05, "log_odds_chosen": 6.093807220458984, "log_odds_ratio": -0.01401099655777216, "logits/chosen": 0.5640559792518616, "logits/rejected": 0.06941845268011093, "logps/chosen": -0.77422034740448, "logps/rejected": -6.081796646118164, "loss": 0.5637, "nll_loss": 0.5623044371604919, "rewards/accuracies": 1.0, "rewards/chosen": -0.07742203772068024, "rewards/margins": 0.5307576060295105, "rewards/rejected": -0.6081796288490295, "step": 598 }, { "epoch": 0.37262830482115084, "grad_norm": 0.2775684595108032, "learning_rate": 2.0050000000000003e-05, "log_odds_chosen": 2.811703681945801, "log_odds_ratio": -0.17024891078472137, "logits/chosen": -0.14262226223945618, "logits/rejected": -0.5278723239898682, "logps/chosen": -0.5868371725082397, "logps/rejected": -2.578371524810791, "loss": 0.523, "nll_loss": 0.5059659481048584, "rewards/accuracies": 0.875, "rewards/chosen": -0.05868372321128845, "rewards/margins": 0.19915342330932617, "rewards/rejected": -0.2578371465206146, "step": 599 }, { "epoch": 0.37325038880248834, "grad_norm": 0.3168306052684784, "learning_rate": 2e-05, "log_odds_chosen": 4.713489532470703, "log_odds_ratio": -0.11067051440477371, "logits/chosen": 1.329630970954895, "logits/rejected": 0.1642017960548401, "logps/chosen": -0.7641226053237915, "logps/rejected": -4.863486289978027, "loss": 0.611, "nll_loss": 0.5999280214309692, "rewards/accuracies": 1.0, "rewards/chosen": -0.07641226053237915, "rewards/margins": 0.4099363684654236, "rewards/rejected": -0.48634862899780273, "step": 600 }, { "epoch": 0.3738724727838258, "grad_norm": 0.286088764667511, "learning_rate": 1.995e-05, "log_odds_chosen": 1.7783844470977783, "log_odds_ratio": -0.32731106877326965, "logits/chosen": 0.7408541440963745, "logits/rejected": -0.08958357572555542, "logps/chosen": -0.768362820148468, "logps/rejected": -2.1349611282348633, "loss": 0.6597, "nll_loss": 0.6270171403884888, "rewards/accuracies": 0.875, "rewards/chosen": -0.07683628797531128, "rewards/margins": 0.13665983080863953, "rewards/rejected": -0.2134961038827896, "step": 601 }, { "epoch": 0.3744945567651633, "grad_norm": 0.3127974569797516, "learning_rate": 1.9900000000000003e-05, "log_odds_chosen": 2.3000106811523438, "log_odds_ratio": -0.3905388116836548, "logits/chosen": -0.8903562426567078, "logits/rejected": -0.00967445969581604, "logps/chosen": -0.9396561980247498, "logps/rejected": -3.0212459564208984, "loss": 0.5167, "nll_loss": 0.47764530777931213, "rewards/accuracies": 0.875, "rewards/chosen": -0.09396561980247498, "rewards/margins": 0.2081589549779892, "rewards/rejected": -0.30212458968162537, "step": 602 }, { "epoch": 0.37511664074650075, "grad_norm": 0.415966272354126, "learning_rate": 1.985e-05, "log_odds_chosen": 1.083411693572998, "log_odds_ratio": -0.5429335832595825, "logits/chosen": -0.10423839092254639, "logits/rejected": 2.1669833660125732, "logps/chosen": -1.113211750984192, "logps/rejected": -1.9851309061050415, "loss": 0.6267, "nll_loss": 0.5724228024482727, "rewards/accuracies": 0.75, "rewards/chosen": -0.11132118105888367, "rewards/margins": 0.08719191700220108, "rewards/rejected": -0.19851309061050415, "step": 603 }, { "epoch": 0.37573872472783826, "grad_norm": 3.7788166999816895, "learning_rate": 1.9800000000000004e-05, "log_odds_chosen": 2.944392204284668, "log_odds_ratio": -0.21253594756126404, "logits/chosen": 1.583858847618103, "logits/rejected": 0.9380096793174744, "logps/chosen": -0.8593783974647522, "logps/rejected": -3.203774929046631, "loss": 0.8276, "nll_loss": 0.8063594102859497, "rewards/accuracies": 0.875, "rewards/chosen": -0.08593783527612686, "rewards/margins": 0.2344396710395813, "rewards/rejected": -0.32037752866744995, "step": 604 }, { "epoch": 0.37636080870917576, "grad_norm": 0.37738439440727234, "learning_rate": 1.9750000000000002e-05, "log_odds_chosen": 1.2513070106506348, "log_odds_ratio": -0.47075262665748596, "logits/chosen": 1.0833340883255005, "logits/rejected": 0.25691819190979004, "logps/chosen": -0.9458091855049133, "logps/rejected": -2.022855043411255, "loss": 0.721, "nll_loss": 0.6739503145217896, "rewards/accuracies": 0.75, "rewards/chosen": -0.09458091855049133, "rewards/margins": 0.10770457983016968, "rewards/rejected": -0.2022855132818222, "step": 605 }, { "epoch": 0.3769828926905132, "grad_norm": 0.44008681178092957, "learning_rate": 1.97e-05, "log_odds_chosen": 1.6499419212341309, "log_odds_ratio": -0.28058454394340515, "logits/chosen": 1.6621906757354736, "logits/rejected": 0.8158775568008423, "logps/chosen": -1.0555553436279297, "logps/rejected": -2.435013771057129, "loss": 0.8268, "nll_loss": 0.798723578453064, "rewards/accuracies": 0.875, "rewards/chosen": -0.10555553436279297, "rewards/margins": 0.13794584572315216, "rewards/rejected": -0.24350138008594513, "step": 606 }, { "epoch": 0.3776049766718507, "grad_norm": 0.43245071172714233, "learning_rate": 1.9650000000000003e-05, "log_odds_chosen": 0.48218634724617004, "log_odds_ratio": -0.541495680809021, "logits/chosen": -0.4415658116340637, "logits/rejected": 0.35667887330055237, "logps/chosen": -1.1343573331832886, "logps/rejected": -1.5530166625976562, "loss": 0.5693, "nll_loss": 0.5151444673538208, "rewards/accuracies": 0.625, "rewards/chosen": -0.11343573778867722, "rewards/margins": 0.041865941137075424, "rewards/rejected": -0.15530166029930115, "step": 607 }, { "epoch": 0.3782270606531882, "grad_norm": 0.49462637305259705, "learning_rate": 1.9600000000000002e-05, "log_odds_chosen": 1.4209492206573486, "log_odds_ratio": -0.3335047662258148, "logits/chosen": 0.7791264057159424, "logits/rejected": 0.8404102325439453, "logps/chosen": -0.7444546222686768, "logps/rejected": -1.7486032247543335, "loss": 0.7176, "nll_loss": 0.6841996908187866, "rewards/accuracies": 0.875, "rewards/chosen": -0.0744454637169838, "rewards/margins": 0.10041484981775284, "rewards/rejected": -0.17486032843589783, "step": 608 }, { "epoch": 0.3788491446345257, "grad_norm": 0.5927086472511292, "learning_rate": 1.955e-05, "log_odds_chosen": 1.383941411972046, "log_odds_ratio": -0.28821372985839844, "logits/chosen": -0.8597515821456909, "logits/rejected": -0.6633905172348022, "logps/chosen": -0.9070121049880981, "logps/rejected": -1.9484810829162598, "loss": 0.5367, "nll_loss": 0.5078994035720825, "rewards/accuracies": 0.875, "rewards/chosen": -0.09070120751857758, "rewards/margins": 0.10414689779281616, "rewards/rejected": -0.19484810531139374, "step": 609 }, { "epoch": 0.37947122861586313, "grad_norm": 0.32356026768684387, "learning_rate": 1.9500000000000003e-05, "log_odds_chosen": 3.314235210418701, "log_odds_ratio": -0.2443312555551529, "logits/chosen": 1.1803479194641113, "logits/rejected": 1.4565016031265259, "logps/chosen": -0.8451287746429443, "logps/rejected": -3.459113597869873, "loss": 0.7266, "nll_loss": 0.7021805644035339, "rewards/accuracies": 0.75, "rewards/chosen": -0.0845128744840622, "rewards/margins": 0.2613984942436218, "rewards/rejected": -0.3459113836288452, "step": 610 }, { "epoch": 0.38009331259720064, "grad_norm": 0.4172796905040741, "learning_rate": 1.9450000000000002e-05, "log_odds_chosen": 1.747850775718689, "log_odds_ratio": -0.3201906681060791, "logits/chosen": 1.09573233127594, "logits/rejected": 1.502558946609497, "logps/chosen": -0.7899297475814819, "logps/rejected": -2.1978812217712402, "loss": 0.752, "nll_loss": 0.720018744468689, "rewards/accuracies": 1.0, "rewards/chosen": -0.07899297773838043, "rewards/margins": 0.14079514145851135, "rewards/rejected": -0.21978813409805298, "step": 611 }, { "epoch": 0.3807153965785381, "grad_norm": 0.30057263374328613, "learning_rate": 1.94e-05, "log_odds_chosen": 2.8357088565826416, "log_odds_ratio": -0.31256797909736633, "logits/chosen": -1.0876035690307617, "logits/rejected": -0.628466010093689, "logps/chosen": -0.811927080154419, "logps/rejected": -3.3203463554382324, "loss": 0.4024, "nll_loss": 0.3711714446544647, "rewards/accuracies": 0.625, "rewards/chosen": -0.08119270205497742, "rewards/margins": 0.2508419156074524, "rewards/rejected": -0.3320346474647522, "step": 612 }, { "epoch": 0.3813374805598756, "grad_norm": 0.5218482613563538, "learning_rate": 1.9350000000000003e-05, "log_odds_chosen": 3.0079641342163086, "log_odds_ratio": -0.14967142045497894, "logits/chosen": 1.8589670658111572, "logits/rejected": 1.5846030712127686, "logps/chosen": -1.0442512035369873, "logps/rejected": -3.6195616722106934, "loss": 0.7302, "nll_loss": 0.7151949405670166, "rewards/accuracies": 1.0, "rewards/chosen": -0.10442513227462769, "rewards/margins": 0.2575310468673706, "rewards/rejected": -0.3619561791419983, "step": 613 }, { "epoch": 0.38195956454121305, "grad_norm": 0.4662970006465912, "learning_rate": 1.93e-05, "log_odds_chosen": 3.828094005584717, "log_odds_ratio": -0.0793749988079071, "logits/chosen": 1.2202463150024414, "logits/rejected": 0.9613503217697144, "logps/chosen": -0.9680242538452148, "logps/rejected": -4.134416580200195, "loss": 0.6713, "nll_loss": 0.6633151769638062, "rewards/accuracies": 1.0, "rewards/chosen": -0.09680242836475372, "rewards/margins": 0.3166392147541046, "rewards/rejected": -0.41344162821769714, "step": 614 }, { "epoch": 0.38258164852255055, "grad_norm": 0.32331582903862, "learning_rate": 1.925e-05, "log_odds_chosen": 2.5637335777282715, "log_odds_ratio": -0.2143726497888565, "logits/chosen": 0.5455524921417236, "logits/rejected": 0.08359070122241974, "logps/chosen": -0.7310885190963745, "logps/rejected": -2.6874022483825684, "loss": 0.5022, "nll_loss": 0.48075175285339355, "rewards/accuracies": 0.875, "rewards/chosen": -0.07310885190963745, "rewards/margins": 0.19563135504722595, "rewards/rejected": -0.2687402069568634, "step": 615 }, { "epoch": 0.383203732503888, "grad_norm": 0.8497756719589233, "learning_rate": 1.9200000000000003e-05, "log_odds_chosen": 3.15248966217041, "log_odds_ratio": -0.3250788748264313, "logits/chosen": 2.0130248069763184, "logits/rejected": 1.5514307022094727, "logps/chosen": -1.110895037651062, "logps/rejected": -3.9910812377929688, "loss": 0.859, "nll_loss": 0.8265219926834106, "rewards/accuracies": 0.75, "rewards/chosen": -0.11108951270580292, "rewards/margins": 0.2880186140537262, "rewards/rejected": -0.3991081118583679, "step": 616 }, { "epoch": 0.3838258164852255, "grad_norm": 0.29061028361320496, "learning_rate": 1.915e-05, "log_odds_chosen": 3.13297963142395, "log_odds_ratio": -0.1648668348789215, "logits/chosen": 0.3452632427215576, "logits/rejected": 0.05708187818527222, "logps/chosen": -0.825201153755188, "logps/rejected": -3.448404312133789, "loss": 0.6881, "nll_loss": 0.6715787649154663, "rewards/accuracies": 0.875, "rewards/chosen": -0.08252011239528656, "rewards/margins": 0.262320339679718, "rewards/rejected": -0.34484046697616577, "step": 617 }, { "epoch": 0.38444790046656296, "grad_norm": 1.4165366888046265, "learning_rate": 1.91e-05, "log_odds_chosen": 2.9382688999176025, "log_odds_ratio": -0.19285082817077637, "logits/chosen": 1.947314739227295, "logits/rejected": 1.6154723167419434, "logps/chosen": -0.8680057525634766, "logps/rejected": -3.3975019454956055, "loss": 0.7199, "nll_loss": 0.700596034526825, "rewards/accuracies": 0.875, "rewards/chosen": -0.08680057525634766, "rewards/margins": 0.252949595451355, "rewards/rejected": -0.339750200510025, "step": 618 }, { "epoch": 0.38506998444790047, "grad_norm": 0.49961647391319275, "learning_rate": 1.9050000000000002e-05, "log_odds_chosen": 3.0872249603271484, "log_odds_ratio": -0.21634311974048615, "logits/chosen": 2.1546196937561035, "logits/rejected": 1.6376492977142334, "logps/chosen": -0.9351372122764587, "logps/rejected": -3.629427433013916, "loss": 0.7868, "nll_loss": 0.7651870250701904, "rewards/accuracies": 0.75, "rewards/chosen": -0.09351371973752975, "rewards/margins": 0.2694290280342102, "rewards/rejected": -0.36294275522232056, "step": 619 }, { "epoch": 0.3856920684292379, "grad_norm": 0.6055630445480347, "learning_rate": 1.9e-05, "log_odds_chosen": 4.04840087890625, "log_odds_ratio": -0.11545915901660919, "logits/chosen": 0.9643297791481018, "logits/rejected": 0.8798283338546753, "logps/chosen": -1.150406002998352, "logps/rejected": -4.413238048553467, "loss": 0.7236, "nll_loss": 0.7120473384857178, "rewards/accuracies": 0.875, "rewards/chosen": -0.1150406002998352, "rewards/margins": 0.32628321647644043, "rewards/rejected": -0.44132378697395325, "step": 620 }, { "epoch": 0.38631415241057543, "grad_norm": 0.40856286883354187, "learning_rate": 1.895e-05, "log_odds_chosen": 3.0728964805603027, "log_odds_ratio": -0.2111356258392334, "logits/chosen": 0.6732938885688782, "logits/rejected": 2.3867170810699463, "logps/chosen": -0.8632257580757141, "logps/rejected": -3.499908208847046, "loss": 0.5808, "nll_loss": 0.5596716403961182, "rewards/accuracies": 1.0, "rewards/chosen": -0.08632257580757141, "rewards/margins": 0.2636682391166687, "rewards/rejected": -0.3499908149242401, "step": 621 }, { "epoch": 0.38693623639191294, "grad_norm": 0.3176197111606598, "learning_rate": 1.8900000000000002e-05, "log_odds_chosen": 3.05029296875, "log_odds_ratio": -0.26527780294418335, "logits/chosen": 0.6581851243972778, "logits/rejected": 0.3060731887817383, "logps/chosen": -0.9692713618278503, "logps/rejected": -3.717808246612549, "loss": 0.5477, "nll_loss": 0.5212218165397644, "rewards/accuracies": 0.875, "rewards/chosen": -0.09692715108394623, "rewards/margins": 0.2748537063598633, "rewards/rejected": -0.3717808723449707, "step": 622 }, { "epoch": 0.3875583203732504, "grad_norm": 0.3400088846683502, "learning_rate": 1.885e-05, "log_odds_chosen": 4.018574237823486, "log_odds_ratio": -0.2551661729812622, "logits/chosen": 1.1399219036102295, "logits/rejected": -0.18498098850250244, "logps/chosen": -0.8985238075256348, "logps/rejected": -4.519594192504883, "loss": 0.6992, "nll_loss": 0.673705518245697, "rewards/accuracies": 0.75, "rewards/chosen": -0.08985237777233124, "rewards/margins": 0.3621070384979248, "rewards/rejected": -0.45195937156677246, "step": 623 }, { "epoch": 0.3881804043545879, "grad_norm": 0.43447425961494446, "learning_rate": 1.88e-05, "log_odds_chosen": 3.226219654083252, "log_odds_ratio": -0.14265930652618408, "logits/chosen": 0.49467071890830994, "logits/rejected": 0.27663129568099976, "logps/chosen": -0.838038444519043, "logps/rejected": -3.3186306953430176, "loss": 0.5229, "nll_loss": 0.5086257457733154, "rewards/accuracies": 0.875, "rewards/chosen": -0.08380384743213654, "rewards/margins": 0.2480592131614685, "rewards/rejected": -0.33186307549476624, "step": 624 }, { "epoch": 0.38880248833592534, "grad_norm": 0.3414386212825775, "learning_rate": 1.8750000000000002e-05, "log_odds_chosen": 3.874777317047119, "log_odds_ratio": -0.19898560643196106, "logits/chosen": 1.617907166481018, "logits/rejected": 0.7998380064964294, "logps/chosen": -1.1137770414352417, "logps/rejected": -4.6822614669799805, "loss": 0.6368, "nll_loss": 0.6169208288192749, "rewards/accuracies": 0.875, "rewards/chosen": -0.11137770861387253, "rewards/margins": 0.35684841871261597, "rewards/rejected": -0.4682261347770691, "step": 625 }, { "epoch": 0.38942457231726285, "grad_norm": 0.28912225365638733, "learning_rate": 1.87e-05, "log_odds_chosen": 4.579721927642822, "log_odds_ratio": -0.09703951328992844, "logits/chosen": 0.810788094997406, "logits/rejected": 0.904729962348938, "logps/chosen": -0.7196419835090637, "logps/rejected": -4.698285102844238, "loss": 0.5709, "nll_loss": 0.5611502528190613, "rewards/accuracies": 0.875, "rewards/chosen": -0.07196419686079025, "rewards/margins": 0.39786434173583984, "rewards/rejected": -0.4698285460472107, "step": 626 }, { "epoch": 0.3900466562986003, "grad_norm": 0.3980149030685425, "learning_rate": 1.865e-05, "log_odds_chosen": 3.308889865875244, "log_odds_ratio": -0.1399206817150116, "logits/chosen": 2.5614981651306152, "logits/rejected": 0.708278238773346, "logps/chosen": -0.9311361312866211, "logps/rejected": -3.834683895111084, "loss": 0.7021, "nll_loss": 0.6881444454193115, "rewards/accuracies": 1.0, "rewards/chosen": -0.09311362355947495, "rewards/margins": 0.29035475850105286, "rewards/rejected": -0.3834683895111084, "step": 627 }, { "epoch": 0.3906687402799378, "grad_norm": 3.1673495769500732, "learning_rate": 1.86e-05, "log_odds_chosen": 3.9142515659332275, "log_odds_ratio": -0.12769760191440582, "logits/chosen": 0.7221090197563171, "logits/rejected": 0.20109251141548157, "logps/chosen": -1.9082034826278687, "logps/rejected": -5.504971504211426, "loss": 0.8577, "nll_loss": 0.8449530601501465, "rewards/accuracies": 1.0, "rewards/chosen": -0.1908203661441803, "rewards/margins": 0.3596767783164978, "rewards/rejected": -0.5504971146583557, "step": 628 }, { "epoch": 0.39129082426127526, "grad_norm": 0.38298946619033813, "learning_rate": 1.855e-05, "log_odds_chosen": 4.3971781730651855, "log_odds_ratio": -0.14027798175811768, "logits/chosen": 0.8268332481384277, "logits/rejected": 2.3540313243865967, "logps/chosen": -1.0750876665115356, "logps/rejected": -4.887978553771973, "loss": 0.6025, "nll_loss": 0.5884767174720764, "rewards/accuracies": 1.0, "rewards/chosen": -0.10750877112150192, "rewards/margins": 0.3812890648841858, "rewards/rejected": -0.4887978136539459, "step": 629 }, { "epoch": 0.39191290824261277, "grad_norm": 2.4173014163970947, "learning_rate": 1.85e-05, "log_odds_chosen": 3.843627452850342, "log_odds_ratio": -0.27270084619522095, "logits/chosen": 2.5830585956573486, "logits/rejected": 2.2897543907165527, "logps/chosen": -1.134068250656128, "logps/rejected": -4.5426716804504395, "loss": 0.817, "nll_loss": 0.7896883487701416, "rewards/accuracies": 0.75, "rewards/chosen": -0.11340682208538055, "rewards/margins": 0.3408603370189667, "rewards/rejected": -0.45426714420318604, "step": 630 }, { "epoch": 0.3925349922239502, "grad_norm": 0.37030836939811707, "learning_rate": 1.845e-05, "log_odds_chosen": 4.004006385803223, "log_odds_ratio": -0.220392107963562, "logits/chosen": 1.8551138639450073, "logits/rejected": 1.160433053970337, "logps/chosen": -0.9171284437179565, "logps/rejected": -4.551581382751465, "loss": 0.63, "nll_loss": 0.6079891324043274, "rewards/accuracies": 0.75, "rewards/chosen": -0.0917128473520279, "rewards/margins": 0.3634452819824219, "rewards/rejected": -0.45515817403793335, "step": 631 }, { "epoch": 0.3931570762052877, "grad_norm": 0.36467447876930237, "learning_rate": 1.84e-05, "log_odds_chosen": 2.4729325771331787, "log_odds_ratio": -0.19554854929447174, "logits/chosen": 1.852522373199463, "logits/rejected": 1.3634965419769287, "logps/chosen": -1.3775891065597534, "logps/rejected": -3.4965128898620605, "loss": 0.692, "nll_loss": 0.6724200248718262, "rewards/accuracies": 0.875, "rewards/chosen": -0.13775891065597534, "rewards/margins": 0.21189242601394653, "rewards/rejected": -0.3496513068675995, "step": 632 }, { "epoch": 0.3937791601866252, "grad_norm": 0.535660982131958, "learning_rate": 1.8350000000000002e-05, "log_odds_chosen": 1.2777886390686035, "log_odds_ratio": -0.5359351634979248, "logits/chosen": 1.1757664680480957, "logits/rejected": 0.9977664947509766, "logps/chosen": -1.3026446104049683, "logps/rejected": -2.4279894828796387, "loss": 0.6938, "nll_loss": 0.6402309536933899, "rewards/accuracies": 0.75, "rewards/chosen": -0.13026446104049683, "rewards/margins": 0.11253447830677032, "rewards/rejected": -0.24279895424842834, "step": 633 }, { "epoch": 0.3944012441679627, "grad_norm": 0.40152236819267273, "learning_rate": 1.83e-05, "log_odds_chosen": 3.575596809387207, "log_odds_ratio": -0.08371242880821228, "logits/chosen": 2.7394649982452393, "logits/rejected": 1.1751067638397217, "logps/chosen": -0.9250873327255249, "logps/rejected": -3.953653335571289, "loss": 0.7159, "nll_loss": 0.7075378894805908, "rewards/accuracies": 1.0, "rewards/chosen": -0.0925087258219719, "rewards/margins": 0.3028566241264343, "rewards/rejected": -0.3953653573989868, "step": 634 }, { "epoch": 0.39502332814930013, "grad_norm": 0.3517719805240631, "learning_rate": 1.825e-05, "log_odds_chosen": 3.799075126647949, "log_odds_ratio": -0.07084185630083084, "logits/chosen": 0.9560929536819458, "logits/rejected": 0.8968772888183594, "logps/chosen": -0.9981387257575989, "logps/rejected": -4.241707801818848, "loss": 0.6964, "nll_loss": 0.6893182396888733, "rewards/accuracies": 1.0, "rewards/chosen": -0.09981386363506317, "rewards/margins": 0.32435691356658936, "rewards/rejected": -0.4241707921028137, "step": 635 }, { "epoch": 0.39564541213063764, "grad_norm": 0.31663310527801514, "learning_rate": 1.8200000000000002e-05, "log_odds_chosen": 4.332825183868408, "log_odds_ratio": -0.07596045732498169, "logits/chosen": 1.717186689376831, "logits/rejected": 0.9638761878013611, "logps/chosen": -0.711776614189148, "logps/rejected": -4.22796106338501, "loss": 0.5847, "nll_loss": 0.5770620107650757, "rewards/accuracies": 1.0, "rewards/chosen": -0.0711776614189148, "rewards/margins": 0.3516184091567993, "rewards/rejected": -0.4227961301803589, "step": 636 }, { "epoch": 0.3962674961119751, "grad_norm": 0.361131489276886, "learning_rate": 1.815e-05, "log_odds_chosen": 2.669607162475586, "log_odds_ratio": -0.34226924180984497, "logits/chosen": 2.9982523918151855, "logits/rejected": 2.0027904510498047, "logps/chosen": -0.8131262063980103, "logps/rejected": -3.100098133087158, "loss": 0.7247, "nll_loss": 0.6904410719871521, "rewards/accuracies": 0.75, "rewards/chosen": -0.0813126266002655, "rewards/margins": 0.22869719564914703, "rewards/rejected": -0.31000980734825134, "step": 637 }, { "epoch": 0.3968895800933126, "grad_norm": 0.3656984269618988, "learning_rate": 1.81e-05, "log_odds_chosen": 4.020977020263672, "log_odds_ratio": -0.2539737820625305, "logits/chosen": 2.4173789024353027, "logits/rejected": 1.9622955322265625, "logps/chosen": -0.8449468612670898, "logps/rejected": -4.402913570404053, "loss": 0.7507, "nll_loss": 0.7252607345581055, "rewards/accuracies": 0.875, "rewards/chosen": -0.0844946876168251, "rewards/margins": 0.3557966947555542, "rewards/rejected": -0.4402914047241211, "step": 638 }, { "epoch": 0.39751166407465005, "grad_norm": 0.9636780619621277, "learning_rate": 1.805e-05, "log_odds_chosen": 2.350464344024658, "log_odds_ratio": -0.4629105031490326, "logits/chosen": 1.6177942752838135, "logits/rejected": 0.7681326270103455, "logps/chosen": -1.3030866384506226, "logps/rejected": -3.3601512908935547, "loss": 0.7014, "nll_loss": 0.655118465423584, "rewards/accuracies": 0.875, "rewards/chosen": -0.13030865788459778, "rewards/margins": 0.20570647716522217, "rewards/rejected": -0.33601510524749756, "step": 639 }, { "epoch": 0.39813374805598756, "grad_norm": 0.34603214263916016, "learning_rate": 1.8e-05, "log_odds_chosen": 2.6793177127838135, "log_odds_ratio": -0.2908114194869995, "logits/chosen": 1.2206950187683105, "logits/rejected": 0.9481405019760132, "logps/chosen": -0.8355297446250916, "logps/rejected": -2.9877710342407227, "loss": 0.5837, "nll_loss": 0.5545707941055298, "rewards/accuracies": 0.75, "rewards/chosen": -0.08355297893285751, "rewards/margins": 0.21522416174411774, "rewards/rejected": -0.29877713322639465, "step": 640 }, { "epoch": 0.39875583203732506, "grad_norm": 0.40048831701278687, "learning_rate": 1.795e-05, "log_odds_chosen": 4.30087423324585, "log_odds_ratio": -0.019221052527427673, "logits/chosen": 3.0978634357452393, "logits/rejected": 1.65860915184021, "logps/chosen": -0.8779377341270447, "logps/rejected": -4.604584693908691, "loss": 0.7469, "nll_loss": 0.7449491620063782, "rewards/accuracies": 1.0, "rewards/chosen": -0.08779378235340118, "rewards/margins": 0.3726646900177002, "rewards/rejected": -0.4604584574699402, "step": 641 }, { "epoch": 0.3993779160186625, "grad_norm": 0.29520246386528015, "learning_rate": 1.79e-05, "log_odds_chosen": 3.081165075302124, "log_odds_ratio": -0.14847292006015778, "logits/chosen": 0.902208685874939, "logits/rejected": 0.8600935339927673, "logps/chosen": -1.0332772731781006, "logps/rejected": -3.71449875831604, "loss": 0.5638, "nll_loss": 0.5489675998687744, "rewards/accuracies": 1.0, "rewards/chosen": -0.10332773625850677, "rewards/margins": 0.26812219619750977, "rewards/rejected": -0.37144988775253296, "step": 642 }, { "epoch": 0.4, "grad_norm": 0.3169514834880829, "learning_rate": 1.785e-05, "log_odds_chosen": 4.029628753662109, "log_odds_ratio": -0.07181131094694138, "logits/chosen": 0.8769886493682861, "logits/rejected": 0.4741711914539337, "logps/chosen": -0.7680267095565796, "logps/rejected": -4.1113505363464355, "loss": 0.5882, "nll_loss": 0.5809819102287292, "rewards/accuracies": 1.0, "rewards/chosen": -0.07680267095565796, "rewards/margins": 0.3343323767185211, "rewards/rejected": -0.4111350476741791, "step": 643 }, { "epoch": 0.4006220839813375, "grad_norm": 0.37789186835289, "learning_rate": 1.78e-05, "log_odds_chosen": 5.709261417388916, "log_odds_ratio": -0.12950600683689117, "logits/chosen": 1.899143934249878, "logits/rejected": 1.7838191986083984, "logps/chosen": -0.7111093997955322, "logps/rejected": -5.467113494873047, "loss": 0.7045, "nll_loss": 0.691598117351532, "rewards/accuracies": 0.875, "rewards/chosen": -0.07111093401908875, "rewards/margins": 0.47560036182403564, "rewards/rejected": -0.5467113256454468, "step": 644 }, { "epoch": 0.401244167962675, "grad_norm": 0.311746209859848, "learning_rate": 1.775e-05, "log_odds_chosen": 3.939924716949463, "log_odds_ratio": -0.24164189398288727, "logits/chosen": 2.170393228530884, "logits/rejected": 1.6897330284118652, "logps/chosen": -0.8287167549133301, "logps/rejected": -4.303630352020264, "loss": 0.6971, "nll_loss": 0.6729692220687866, "rewards/accuracies": 0.75, "rewards/chosen": -0.08287167549133301, "rewards/margins": 0.3474913537502289, "rewards/rejected": -0.4303630292415619, "step": 645 }, { "epoch": 0.40186625194401243, "grad_norm": 0.30233389139175415, "learning_rate": 1.77e-05, "log_odds_chosen": 3.523589849472046, "log_odds_ratio": -0.2539083957672119, "logits/chosen": 0.255723237991333, "logits/rejected": 0.8434269428253174, "logps/chosen": -0.8574182391166687, "logps/rejected": -3.7108993530273438, "loss": 0.4845, "nll_loss": 0.4590851068496704, "rewards/accuracies": 0.875, "rewards/chosen": -0.08574181795120239, "rewards/margins": 0.285348117351532, "rewards/rejected": -0.3710899353027344, "step": 646 }, { "epoch": 0.40248833592534994, "grad_norm": 0.3650731146335602, "learning_rate": 1.765e-05, "log_odds_chosen": 1.8685635328292847, "log_odds_ratio": -0.4129212498664856, "logits/chosen": 1.030846118927002, "logits/rejected": 2.034632444381714, "logps/chosen": -0.8975147008895874, "logps/rejected": -2.4678120613098145, "loss": 0.6229, "nll_loss": 0.5816302299499512, "rewards/accuracies": 0.75, "rewards/chosen": -0.0897514745593071, "rewards/margins": 0.15702971816062927, "rewards/rejected": -0.24678120017051697, "step": 647 }, { "epoch": 0.4031104199066874, "grad_norm": 0.3364572823047638, "learning_rate": 1.76e-05, "log_odds_chosen": 3.6588826179504395, "log_odds_ratio": -0.10638586431741714, "logits/chosen": 1.2081890106201172, "logits/rejected": 1.002839207649231, "logps/chosen": -0.8798947930335999, "logps/rejected": -4.0094380378723145, "loss": 0.5447, "nll_loss": 0.5340352058410645, "rewards/accuracies": 1.0, "rewards/chosen": -0.08798947930335999, "rewards/margins": 0.312954306602478, "rewards/rejected": -0.4009438157081604, "step": 648 }, { "epoch": 0.4037325038880249, "grad_norm": 0.4246959984302521, "learning_rate": 1.755e-05, "log_odds_chosen": 5.4836812019348145, "log_odds_ratio": -0.11485705524682999, "logits/chosen": 1.9523580074310303, "logits/rejected": 2.006519079208374, "logps/chosen": -0.7004702091217041, "logps/rejected": -5.304394245147705, "loss": 0.7531, "nll_loss": 0.7416150569915771, "rewards/accuracies": 0.875, "rewards/chosen": -0.07004702091217041, "rewards/margins": 0.46039241552352905, "rewards/rejected": -0.5304394364356995, "step": 649 }, { "epoch": 0.40435458786936235, "grad_norm": 0.32653963565826416, "learning_rate": 1.75e-05, "log_odds_chosen": 4.108030319213867, "log_odds_ratio": -0.10118812322616577, "logits/chosen": 2.7480831146240234, "logits/rejected": 1.3352696895599365, "logps/chosen": -0.8290809392929077, "logps/rejected": -4.4214935302734375, "loss": 0.8378, "nll_loss": 0.8277164697647095, "rewards/accuracies": 1.0, "rewards/chosen": -0.08290809392929077, "rewards/margins": 0.3592412769794464, "rewards/rejected": -0.4421493411064148, "step": 650 }, { "epoch": 0.40497667185069985, "grad_norm": 0.4039763808250427, "learning_rate": 1.745e-05, "log_odds_chosen": 3.81673264503479, "log_odds_ratio": -0.19408871233463287, "logits/chosen": 1.0739060640335083, "logits/rejected": 0.9814785718917847, "logps/chosen": -0.716781497001648, "logps/rejected": -3.921405553817749, "loss": 0.5342, "nll_loss": 0.5147431492805481, "rewards/accuracies": 1.0, "rewards/chosen": -0.07167814671993256, "rewards/margins": 0.3204624354839325, "rewards/rejected": -0.39214056730270386, "step": 651 }, { "epoch": 0.4055987558320373, "grad_norm": 0.4498097598552704, "learning_rate": 1.74e-05, "log_odds_chosen": 4.934605598449707, "log_odds_ratio": -0.15549834072589874, "logits/chosen": 3.4097490310668945, "logits/rejected": 3.1473355293273926, "logps/chosen": -0.8442947268486023, "logps/rejected": -5.246354103088379, "loss": 0.8392, "nll_loss": 0.8236282467842102, "rewards/accuracies": 1.0, "rewards/chosen": -0.08442947268486023, "rewards/margins": 0.4402059018611908, "rewards/rejected": -0.524635374546051, "step": 652 }, { "epoch": 0.4062208398133748, "grad_norm": 0.3200288414955139, "learning_rate": 1.7349999999999998e-05, "log_odds_chosen": 2.3036115169525146, "log_odds_ratio": -0.26165878772735596, "logits/chosen": 0.7089138627052307, "logits/rejected": 0.37762632966041565, "logps/chosen": -0.7141548991203308, "logps/rejected": -2.460519313812256, "loss": 0.5317, "nll_loss": 0.5055035352706909, "rewards/accuracies": 0.875, "rewards/chosen": -0.0714154914021492, "rewards/margins": 0.17463642358779907, "rewards/rejected": -0.24605193734169006, "step": 653 }, { "epoch": 0.40684292379471226, "grad_norm": 0.36487001180648804, "learning_rate": 1.73e-05, "log_odds_chosen": 4.135533332824707, "log_odds_ratio": -0.21677449345588684, "logits/chosen": 0.40090829133987427, "logits/rejected": 0.37310242652893066, "logps/chosen": -1.2473537921905518, "logps/rejected": -5.138498306274414, "loss": 0.5796, "nll_loss": 0.557910680770874, "rewards/accuracies": 0.75, "rewards/chosen": -0.12473537027835846, "rewards/margins": 0.38911446928977966, "rewards/rejected": -0.5138498544692993, "step": 654 }, { "epoch": 0.40746500777604977, "grad_norm": 0.3971942961215973, "learning_rate": 1.725e-05, "log_odds_chosen": 5.033882141113281, "log_odds_ratio": -0.09416782855987549, "logits/chosen": 3.2653441429138184, "logits/rejected": 1.552402377128601, "logps/chosen": -0.847460925579071, "logps/rejected": -5.348739147186279, "loss": 0.8744, "nll_loss": 0.8649776577949524, "rewards/accuracies": 1.0, "rewards/chosen": -0.0847460925579071, "rewards/margins": 0.45012784004211426, "rewards/rejected": -0.5348739624023438, "step": 655 }, { "epoch": 0.4080870917573872, "grad_norm": 0.388738214969635, "learning_rate": 1.7199999999999998e-05, "log_odds_chosen": 5.930737495422363, "log_odds_ratio": -0.033261630684137344, "logits/chosen": 2.618645191192627, "logits/rejected": 1.6277352571487427, "logps/chosen": -1.0782268047332764, "logps/rejected": -6.42779541015625, "loss": 0.6678, "nll_loss": 0.6645084023475647, "rewards/accuracies": 1.0, "rewards/chosen": -0.10782268643379211, "rewards/margins": 0.5349568128585815, "rewards/rejected": -0.6427795886993408, "step": 656 }, { "epoch": 0.40870917573872473, "grad_norm": 0.6455793976783752, "learning_rate": 1.7150000000000004e-05, "log_odds_chosen": 4.305086612701416, "log_odds_ratio": -0.03871765732765198, "logits/chosen": 2.1431026458740234, "logits/rejected": -0.24009643495082855, "logps/chosen": -0.8816466927528381, "logps/rejected": -4.646897792816162, "loss": 0.6774, "nll_loss": 0.6735305786132812, "rewards/accuracies": 1.0, "rewards/chosen": -0.08816467225551605, "rewards/margins": 0.3765251040458679, "rewards/rejected": -0.46468979120254517, "step": 657 }, { "epoch": 0.40933125972006223, "grad_norm": 0.36170580983161926, "learning_rate": 1.7100000000000002e-05, "log_odds_chosen": 3.966200590133667, "log_odds_ratio": -0.18316572904586792, "logits/chosen": 2.2034897804260254, "logits/rejected": 2.678478240966797, "logps/chosen": -0.7432326674461365, "logps/rejected": -4.13386869430542, "loss": 0.6123, "nll_loss": 0.5939409136772156, "rewards/accuracies": 1.0, "rewards/chosen": -0.07432326674461365, "rewards/margins": 0.3390636146068573, "rewards/rejected": -0.41338688135147095, "step": 658 }, { "epoch": 0.4099533437013997, "grad_norm": 0.3560597896575928, "learning_rate": 1.705e-05, "log_odds_chosen": 3.1310007572174072, "log_odds_ratio": -0.12409868091344833, "logits/chosen": 1.4124412536621094, "logits/rejected": 0.49765628576278687, "logps/chosen": -0.9497761726379395, "logps/rejected": -3.5961623191833496, "loss": 0.6037, "nll_loss": 0.5912914872169495, "rewards/accuracies": 1.0, "rewards/chosen": -0.09497761726379395, "rewards/margins": 0.26463860273361206, "rewards/rejected": -0.359616219997406, "step": 659 }, { "epoch": 0.4105754276827372, "grad_norm": 2.7320940494537354, "learning_rate": 1.7000000000000003e-05, "log_odds_chosen": 4.352439880371094, "log_odds_ratio": -0.029573818668723106, "logits/chosen": 0.5563382506370544, "logits/rejected": -0.09724438190460205, "logps/chosen": -0.9204356670379639, "logps/rejected": -4.707180976867676, "loss": 0.5443, "nll_loss": 0.5413377285003662, "rewards/accuracies": 1.0, "rewards/chosen": -0.09204356372356415, "rewards/margins": 0.37867453694343567, "rewards/rejected": -0.4707180857658386, "step": 660 }, { "epoch": 0.41119751166407464, "grad_norm": 0.5051707029342651, "learning_rate": 1.6950000000000002e-05, "log_odds_chosen": 3.2970833778381348, "log_odds_ratio": -0.17525769770145416, "logits/chosen": 1.211416244506836, "logits/rejected": 1.6842365264892578, "logps/chosen": -1.3437680006027222, "logps/rejected": -4.288342475891113, "loss": 0.7476, "nll_loss": 0.730032205581665, "rewards/accuracies": 0.875, "rewards/chosen": -0.13437680900096893, "rewards/margins": 0.29445746541023254, "rewards/rejected": -0.4288342595100403, "step": 661 }, { "epoch": 0.41181959564541215, "grad_norm": 0.3857666552066803, "learning_rate": 1.69e-05, "log_odds_chosen": 3.65859055519104, "log_odds_ratio": -0.2163608968257904, "logits/chosen": 2.043635606765747, "logits/rejected": 1.3761276006698608, "logps/chosen": -0.7962802648544312, "logps/rejected": -3.883711338043213, "loss": 0.6613, "nll_loss": 0.6396494507789612, "rewards/accuracies": 0.875, "rewards/chosen": -0.07962802797555923, "rewards/margins": 0.3087431490421295, "rewards/rejected": -0.38837113976478577, "step": 662 }, { "epoch": 0.4124416796267496, "grad_norm": 3.1776084899902344, "learning_rate": 1.6850000000000003e-05, "log_odds_chosen": 2.944477081298828, "log_odds_ratio": -0.2576942443847656, "logits/chosen": 2.157097339630127, "logits/rejected": 1.435225009918213, "logps/chosen": -0.9808058738708496, "logps/rejected": -3.4601707458496094, "loss": 0.9127, "nll_loss": 0.8869736194610596, "rewards/accuracies": 0.875, "rewards/chosen": -0.0980805903673172, "rewards/margins": 0.24793650209903717, "rewards/rejected": -0.34601709246635437, "step": 663 }, { "epoch": 0.4130637636080871, "grad_norm": 0.39393720030784607, "learning_rate": 1.6800000000000002e-05, "log_odds_chosen": 3.360602378845215, "log_odds_ratio": -0.3380497097969055, "logits/chosen": 1.5823501348495483, "logits/rejected": 1.2890987396240234, "logps/chosen": -0.8942395448684692, "logps/rejected": -3.9563956260681152, "loss": 0.6122, "nll_loss": 0.5784330368041992, "rewards/accuracies": 0.75, "rewards/chosen": -0.08942395448684692, "rewards/margins": 0.3062155842781067, "rewards/rejected": -0.395639568567276, "step": 664 }, { "epoch": 0.41368584758942456, "grad_norm": 0.32575756311416626, "learning_rate": 1.675e-05, "log_odds_chosen": 6.160964012145996, "log_odds_ratio": -0.1419294774532318, "logits/chosen": 0.717900276184082, "logits/rejected": 0.69581139087677, "logps/chosen": -0.8181707262992859, "logps/rejected": -6.384195327758789, "loss": 0.5373, "nll_loss": 0.5230689644813538, "rewards/accuracies": 0.875, "rewards/chosen": -0.08181708306074142, "rewards/margins": 0.5566024780273438, "rewards/rejected": -0.6384195685386658, "step": 665 }, { "epoch": 0.41430793157076207, "grad_norm": 0.3093670606613159, "learning_rate": 1.6700000000000003e-05, "log_odds_chosen": 2.2433371543884277, "log_odds_ratio": -0.32789260149002075, "logits/chosen": 0.5118715763092041, "logits/rejected": 0.6324558258056641, "logps/chosen": -0.9613909721374512, "logps/rejected": -2.8688957691192627, "loss": 0.6066, "nll_loss": 0.5737725496292114, "rewards/accuracies": 0.875, "rewards/chosen": -0.0961390882730484, "rewards/margins": 0.19075046479701996, "rewards/rejected": -0.28688958287239075, "step": 666 }, { "epoch": 0.4149300155520995, "grad_norm": 0.8644431233406067, "learning_rate": 1.665e-05, "log_odds_chosen": 4.1721930503845215, "log_odds_ratio": -0.17341499030590057, "logits/chosen": 2.2626147270202637, "logits/rejected": 1.2522985935211182, "logps/chosen": -0.8507257103919983, "logps/rejected": -4.383547782897949, "loss": 0.7139, "nll_loss": 0.696591854095459, "rewards/accuracies": 0.875, "rewards/chosen": -0.08507256954908371, "rewards/margins": 0.3532821834087372, "rewards/rejected": -0.4383547604084015, "step": 667 }, { "epoch": 0.415552099533437, "grad_norm": 1.0994956493377686, "learning_rate": 1.66e-05, "log_odds_chosen": 1.5850971937179565, "log_odds_ratio": -0.5201008915901184, "logits/chosen": 0.19767147302627563, "logits/rejected": -0.6920922994613647, "logps/chosen": -1.2969383001327515, "logps/rejected": -2.6513142585754395, "loss": 0.5958, "nll_loss": 0.5437737703323364, "rewards/accuracies": 0.75, "rewards/chosen": -0.12969382107257843, "rewards/margins": 0.13543760776519775, "rewards/rejected": -0.265131413936615, "step": 668 }, { "epoch": 0.4161741835147745, "grad_norm": 0.3375812768936157, "learning_rate": 1.6550000000000002e-05, "log_odds_chosen": 3.9452292919158936, "log_odds_ratio": -0.091424360871315, "logits/chosen": 1.0888257026672363, "logits/rejected": 0.020370274782180786, "logps/chosen": -0.9099546074867249, "logps/rejected": -4.351152420043945, "loss": 0.537, "nll_loss": 0.5278565287590027, "rewards/accuracies": 1.0, "rewards/chosen": -0.09099546074867249, "rewards/margins": 0.34411975741386414, "rewards/rejected": -0.43511518836021423, "step": 669 }, { "epoch": 0.416796267496112, "grad_norm": 0.4032786190509796, "learning_rate": 1.65e-05, "log_odds_chosen": 3.414780616760254, "log_odds_ratio": -0.18640896677970886, "logits/chosen": 0.40261098742485046, "logits/rejected": 0.1558481752872467, "logps/chosen": -0.7581571936607361, "logps/rejected": -3.518937110900879, "loss": 0.5746, "nll_loss": 0.5559825301170349, "rewards/accuracies": 0.875, "rewards/chosen": -0.07581572234630585, "rewards/margins": 0.2760779857635498, "rewards/rejected": -0.35189372301101685, "step": 670 }, { "epoch": 0.41741835147744943, "grad_norm": 0.36787623167037964, "learning_rate": 1.645e-05, "log_odds_chosen": 4.076958656311035, "log_odds_ratio": -0.1996491253376007, "logits/chosen": 1.331395149230957, "logits/rejected": 0.43952685594558716, "logps/chosen": -0.7202173471450806, "logps/rejected": -4.184619903564453, "loss": 0.5901, "nll_loss": 0.570127010345459, "rewards/accuracies": 0.875, "rewards/chosen": -0.0720217376947403, "rewards/margins": 0.34644028544425964, "rewards/rejected": -0.41846203804016113, "step": 671 }, { "epoch": 0.41804043545878694, "grad_norm": 0.41560977697372437, "learning_rate": 1.6400000000000002e-05, "log_odds_chosen": 4.469395637512207, "log_odds_ratio": -0.1743958741426468, "logits/chosen": 1.108622431755066, "logits/rejected": 1.4150950908660889, "logps/chosen": -0.9114997982978821, "logps/rejected": -4.8220343589782715, "loss": 0.7003, "nll_loss": 0.6828351020812988, "rewards/accuracies": 0.875, "rewards/chosen": -0.09114998579025269, "rewards/margins": 0.3910534381866455, "rewards/rejected": -0.4822034537792206, "step": 672 }, { "epoch": 0.4186625194401244, "grad_norm": 0.2804428040981293, "learning_rate": 1.635e-05, "log_odds_chosen": 4.052908420562744, "log_odds_ratio": -0.12896524369716644, "logits/chosen": -1.030667781829834, "logits/rejected": 0.36360377073287964, "logps/chosen": -0.73125821352005, "logps/rejected": -4.111758232116699, "loss": 0.4006, "nll_loss": 0.38765519857406616, "rewards/accuracies": 1.0, "rewards/chosen": -0.07312582433223724, "rewards/margins": 0.3380500078201294, "rewards/rejected": -0.41117581725120544, "step": 673 }, { "epoch": 0.4192846034214619, "grad_norm": 0.37161964178085327, "learning_rate": 1.63e-05, "log_odds_chosen": 1.824882984161377, "log_odds_ratio": -0.35899919271469116, "logits/chosen": 0.13106179237365723, "logits/rejected": 0.015201985836029053, "logps/chosen": -0.9987883567810059, "logps/rejected": -2.579470157623291, "loss": 0.6135, "nll_loss": 0.5776486396789551, "rewards/accuracies": 0.75, "rewards/chosen": -0.09987884014844894, "rewards/margins": 0.1580681949853897, "rewards/rejected": -0.25794702768325806, "step": 674 }, { "epoch": 0.4199066874027994, "grad_norm": 0.3221202790737152, "learning_rate": 1.6250000000000002e-05, "log_odds_chosen": 4.007673263549805, "log_odds_ratio": -0.1417747437953949, "logits/chosen": 1.9651204347610474, "logits/rejected": 1.1200003623962402, "logps/chosen": -0.862080454826355, "logps/rejected": -4.311037063598633, "loss": 0.6915, "nll_loss": 0.6773675680160522, "rewards/accuracies": 0.875, "rewards/chosen": -0.0862080454826355, "rewards/margins": 0.3448956310749054, "rewards/rejected": -0.4311037063598633, "step": 675 }, { "epoch": 0.42052877138413686, "grad_norm": 0.4121679961681366, "learning_rate": 1.62e-05, "log_odds_chosen": 1.8610512018203735, "log_odds_ratio": -0.46205413341522217, "logits/chosen": 0.4040051996707916, "logits/rejected": 1.0429785251617432, "logps/chosen": -0.926953911781311, "logps/rejected": -2.516986131668091, "loss": 0.5862, "nll_loss": 0.5399788618087769, "rewards/accuracies": 0.75, "rewards/chosen": -0.09269539266824722, "rewards/margins": 0.15900322794914246, "rewards/rejected": -0.2516986131668091, "step": 676 }, { "epoch": 0.42115085536547436, "grad_norm": 0.4730220139026642, "learning_rate": 1.6150000000000003e-05, "log_odds_chosen": 1.888508915901184, "log_odds_ratio": -0.6478555798530579, "logits/chosen": 1.903955101966858, "logits/rejected": 1.5147833824157715, "logps/chosen": -1.0022650957107544, "logps/rejected": -2.6973609924316406, "loss": 0.7048, "nll_loss": 0.6400637626647949, "rewards/accuracies": 0.375, "rewards/chosen": -0.1002265140414238, "rewards/margins": 0.16950958967208862, "rewards/rejected": -0.26973608136177063, "step": 677 }, { "epoch": 0.4217729393468118, "grad_norm": 0.36020079255104065, "learning_rate": 1.6100000000000002e-05, "log_odds_chosen": 2.8405559062957764, "log_odds_ratio": -0.3089936077594757, "logits/chosen": 1.1266642808914185, "logits/rejected": 1.9728171825408936, "logps/chosen": -0.886470377445221, "logps/rejected": -3.355649948120117, "loss": 0.7891, "nll_loss": 0.7581912279129028, "rewards/accuracies": 0.75, "rewards/chosen": -0.0886470377445221, "rewards/margins": 0.2469179928302765, "rewards/rejected": -0.3355650305747986, "step": 678 }, { "epoch": 0.4223950233281493, "grad_norm": 0.30074650049209595, "learning_rate": 1.605e-05, "log_odds_chosen": 3.77069354057312, "log_odds_ratio": -0.19860349595546722, "logits/chosen": -0.12679839134216309, "logits/rejected": 1.3421680927276611, "logps/chosen": -0.6535059213638306, "logps/rejected": -3.7568020820617676, "loss": 0.614, "nll_loss": 0.5941286087036133, "rewards/accuracies": 0.875, "rewards/chosen": -0.06535059213638306, "rewards/margins": 0.3103296458721161, "rewards/rejected": -0.37568023800849915, "step": 679 }, { "epoch": 0.4230171073094868, "grad_norm": 0.3977271020412445, "learning_rate": 1.6000000000000003e-05, "log_odds_chosen": 2.453881025314331, "log_odds_ratio": -0.35164380073547363, "logits/chosen": -0.057597219944000244, "logits/rejected": 2.3313894271850586, "logps/chosen": -1.0195229053497314, "logps/rejected": -3.0189602375030518, "loss": 0.6306, "nll_loss": 0.5954169034957886, "rewards/accuracies": 0.75, "rewards/chosen": -0.10195229947566986, "rewards/margins": 0.19994372129440308, "rewards/rejected": -0.30189603567123413, "step": 680 }, { "epoch": 0.4236391912908243, "grad_norm": 0.3623410165309906, "learning_rate": 1.595e-05, "log_odds_chosen": 3.358813762664795, "log_odds_ratio": -0.17362789809703827, "logits/chosen": 1.127630352973938, "logits/rejected": 0.4902253746986389, "logps/chosen": -0.9184183478355408, "logps/rejected": -3.821197509765625, "loss": 0.6398, "nll_loss": 0.6224524974822998, "rewards/accuracies": 0.875, "rewards/chosen": -0.09184183925390244, "rewards/margins": 0.290277898311615, "rewards/rejected": -0.38211971521377563, "step": 681 }, { "epoch": 0.42426127527216173, "grad_norm": 0.41106173396110535, "learning_rate": 1.59e-05, "log_odds_chosen": 3.8026862144470215, "log_odds_ratio": -0.0973445326089859, "logits/chosen": 2.4839863777160645, "logits/rejected": 1.665204405784607, "logps/chosen": -0.588760256767273, "logps/rejected": -3.5553464889526367, "loss": 0.7788, "nll_loss": 0.7690457105636597, "rewards/accuracies": 1.0, "rewards/chosen": -0.058876026421785355, "rewards/margins": 0.29665863513946533, "rewards/rejected": -0.3555346429347992, "step": 682 }, { "epoch": 0.42488335925349924, "grad_norm": 0.829839289188385, "learning_rate": 1.5850000000000002e-05, "log_odds_chosen": 1.962416172027588, "log_odds_ratio": -0.36821892857551575, "logits/chosen": 0.22051328420639038, "logits/rejected": -0.3354063630104065, "logps/chosen": -0.9992512464523315, "logps/rejected": -2.68438720703125, "loss": 0.5613, "nll_loss": 0.5244842171669006, "rewards/accuracies": 0.625, "rewards/chosen": -0.09992511570453644, "rewards/margins": 0.16851362586021423, "rewards/rejected": -0.26843875646591187, "step": 683 }, { "epoch": 0.4255054432348367, "grad_norm": 0.4520435333251953, "learning_rate": 1.58e-05, "log_odds_chosen": 3.9583230018615723, "log_odds_ratio": -0.18740130960941315, "logits/chosen": -0.35055220127105713, "logits/rejected": -0.14263691008090973, "logps/chosen": -1.0594209432601929, "logps/rejected": -4.505016803741455, "loss": 0.4304, "nll_loss": 0.41164255142211914, "rewards/accuracies": 0.875, "rewards/chosen": -0.10594210028648376, "rewards/margins": 0.34455960988998413, "rewards/rejected": -0.4505016803741455, "step": 684 }, { "epoch": 0.4261275272161742, "grad_norm": 0.3600594699382782, "learning_rate": 1.575e-05, "log_odds_chosen": 3.882615089416504, "log_odds_ratio": -0.11261001229286194, "logits/chosen": 1.8874908685684204, "logits/rejected": 0.43688538670539856, "logps/chosen": -0.9325780868530273, "logps/rejected": -4.299278259277344, "loss": 0.5315, "nll_loss": 0.5202258825302124, "rewards/accuracies": 1.0, "rewards/chosen": -0.09325780719518661, "rewards/margins": 0.33667001128196716, "rewards/rejected": -0.4299277663230896, "step": 685 }, { "epoch": 0.42674961119751165, "grad_norm": 0.5007074475288391, "learning_rate": 1.5700000000000002e-05, "log_odds_chosen": 1.91114342212677, "log_odds_ratio": -0.26344698667526245, "logits/chosen": 1.0757803916931152, "logits/rejected": 1.6833606958389282, "logps/chosen": -0.8399538993835449, "logps/rejected": -2.355059862136841, "loss": 0.5938, "nll_loss": 0.5674468874931335, "rewards/accuracies": 1.0, "rewards/chosen": -0.08399539440870285, "rewards/margins": 0.15151061117649078, "rewards/rejected": -0.23550599813461304, "step": 686 }, { "epoch": 0.42737169517884915, "grad_norm": 0.3760165870189667, "learning_rate": 1.565e-05, "log_odds_chosen": 2.5150809288024902, "log_odds_ratio": -0.399432897567749, "logits/chosen": 0.41969525814056396, "logits/rejected": 0.30745965242385864, "logps/chosen": -0.9256432056427002, "logps/rejected": -3.213193416595459, "loss": 0.5689, "nll_loss": 0.528993546962738, "rewards/accuracies": 0.75, "rewards/chosen": -0.09256432205438614, "rewards/margins": 0.22875504195690155, "rewards/rejected": -0.3213193714618683, "step": 687 }, { "epoch": 0.4279937791601866, "grad_norm": 0.39373117685317993, "learning_rate": 1.56e-05, "log_odds_chosen": 3.7912368774414062, "log_odds_ratio": -0.1475895345211029, "logits/chosen": -0.4038998782634735, "logits/rejected": 0.29189857840538025, "logps/chosen": -0.9779609441757202, "logps/rejected": -4.346105575561523, "loss": 0.4229, "nll_loss": 0.4081049859523773, "rewards/accuracies": 1.0, "rewards/chosen": -0.09779609739780426, "rewards/margins": 0.3368144929409027, "rewards/rejected": -0.4346105754375458, "step": 688 }, { "epoch": 0.4286158631415241, "grad_norm": 0.5293461084365845, "learning_rate": 1.5550000000000002e-05, "log_odds_chosen": 3.5006730556488037, "log_odds_ratio": -0.12377304583787918, "logits/chosen": 0.7839882969856262, "logits/rejected": 1.3053977489471436, "logps/chosen": -0.5710941553115845, "logps/rejected": -3.120854377746582, "loss": 0.5796, "nll_loss": 0.5672070980072021, "rewards/accuracies": 1.0, "rewards/chosen": -0.057109422981739044, "rewards/margins": 0.2549760341644287, "rewards/rejected": -0.31208541989326477, "step": 689 }, { "epoch": 0.42923794712286156, "grad_norm": 0.44740065932273865, "learning_rate": 1.55e-05, "log_odds_chosen": 3.0434255599975586, "log_odds_ratio": -0.24693435430526733, "logits/chosen": 0.18788853287696838, "logits/rejected": 0.6065223217010498, "logps/chosen": -0.9421164393424988, "logps/rejected": -3.573080062866211, "loss": 0.5578, "nll_loss": 0.5330719351768494, "rewards/accuracies": 0.875, "rewards/chosen": -0.094211645424366, "rewards/margins": 0.26309633255004883, "rewards/rejected": -0.35730797052383423, "step": 690 }, { "epoch": 0.42986003110419907, "grad_norm": 0.35186102986335754, "learning_rate": 1.545e-05, "log_odds_chosen": 3.3561038970947266, "log_odds_ratio": -0.2130623757839203, "logits/chosen": 0.4123086929321289, "logits/rejected": 1.1552950143814087, "logps/chosen": -0.699867844581604, "logps/rejected": -3.4619529247283936, "loss": 0.5932, "nll_loss": 0.5718498229980469, "rewards/accuracies": 0.75, "rewards/chosen": -0.06998678296804428, "rewards/margins": 0.2762084901332855, "rewards/rejected": -0.3461953103542328, "step": 691 }, { "epoch": 0.4304821150855365, "grad_norm": 0.48770788311958313, "learning_rate": 1.54e-05, "log_odds_chosen": 3.8186118602752686, "log_odds_ratio": -0.34374508261680603, "logits/chosen": 0.5811923742294312, "logits/rejected": 1.4528242349624634, "logps/chosen": -0.7848690748214722, "logps/rejected": -4.019944190979004, "loss": 0.6222, "nll_loss": 0.5878376364707947, "rewards/accuracies": 0.75, "rewards/chosen": -0.07848691940307617, "rewards/margins": 0.32350754737854004, "rewards/rejected": -0.4019944667816162, "step": 692 }, { "epoch": 0.431104199066874, "grad_norm": 2.120375871658325, "learning_rate": 1.535e-05, "log_odds_chosen": 1.4963163137435913, "log_odds_ratio": -0.4591095447540283, "logits/chosen": 1.3761413097381592, "logits/rejected": 2.085134506225586, "logps/chosen": -1.22217857837677, "logps/rejected": -2.508049249649048, "loss": 0.8543, "nll_loss": 0.8084296584129333, "rewards/accuracies": 0.5, "rewards/chosen": -0.12221785634756088, "rewards/margins": 0.12858706712722778, "rewards/rejected": -0.25080496072769165, "step": 693 }, { "epoch": 0.43172628304821153, "grad_norm": 0.38951340317726135, "learning_rate": 1.53e-05, "log_odds_chosen": 5.393576622009277, "log_odds_ratio": -0.18696874380111694, "logits/chosen": 1.783607006072998, "logits/rejected": 0.4088016748428345, "logps/chosen": -0.8493199348449707, "logps/rejected": -5.712547302246094, "loss": 0.6466, "nll_loss": 0.627892017364502, "rewards/accuracies": 0.875, "rewards/chosen": -0.08493199944496155, "rewards/margins": 0.4863227605819702, "rewards/rejected": -0.5712547898292542, "step": 694 }, { "epoch": 0.432348367029549, "grad_norm": 0.5150628089904785, "learning_rate": 1.525e-05, "log_odds_chosen": 5.354584693908691, "log_odds_ratio": -0.12360195815563202, "logits/chosen": 1.867887020111084, "logits/rejected": 1.4280177354812622, "logps/chosen": -0.9081195592880249, "logps/rejected": -5.845778465270996, "loss": 0.7201, "nll_loss": 0.7077158093452454, "rewards/accuracies": 0.875, "rewards/chosen": -0.09081195294857025, "rewards/margins": 0.4937658905982971, "rewards/rejected": -0.5845778584480286, "step": 695 }, { "epoch": 0.4329704510108865, "grad_norm": 0.2805895507335663, "learning_rate": 1.52e-05, "log_odds_chosen": 5.2841339111328125, "log_odds_ratio": -0.008528401143848896, "logits/chosen": -0.00789671391248703, "logits/rejected": 0.17581342160701752, "logps/chosen": -0.8426008820533752, "logps/rejected": -5.343410491943359, "loss": 0.4871, "nll_loss": 0.4862731397151947, "rewards/accuracies": 1.0, "rewards/chosen": -0.08426009118556976, "rewards/margins": 0.4500809907913208, "rewards/rejected": -0.5343410968780518, "step": 696 }, { "epoch": 0.43359253499222394, "grad_norm": 0.9375795722007751, "learning_rate": 1.515e-05, "log_odds_chosen": 2.055760383605957, "log_odds_ratio": -0.3684552013874054, "logits/chosen": 2.628263235092163, "logits/rejected": 2.48541259765625, "logps/chosen": -0.9256740808486938, "logps/rejected": -2.7253613471984863, "loss": 0.7838, "nll_loss": 0.746984601020813, "rewards/accuracies": 0.875, "rewards/chosen": -0.09256740659475327, "rewards/margins": 0.1799687147140503, "rewards/rejected": -0.27253612875938416, "step": 697 }, { "epoch": 0.43421461897356145, "grad_norm": 0.43897396326065063, "learning_rate": 1.51e-05, "log_odds_chosen": 4.525207996368408, "log_odds_ratio": -0.1548263132572174, "logits/chosen": 0.5639280080795288, "logits/rejected": 0.028501048684120178, "logps/chosen": -0.9530531167984009, "logps/rejected": -5.0771989822387695, "loss": 0.5532, "nll_loss": 0.5376973152160645, "rewards/accuracies": 0.875, "rewards/chosen": -0.09530530869960785, "rewards/margins": 0.4124146103858948, "rewards/rejected": -0.5077199339866638, "step": 698 }, { "epoch": 0.4348367029548989, "grad_norm": 0.32845211029052734, "learning_rate": 1.505e-05, "log_odds_chosen": 3.9304654598236084, "log_odds_ratio": -0.1376427263021469, "logits/chosen": 1.2447631359100342, "logits/rejected": 0.8316696286201477, "logps/chosen": -0.6440844535827637, "logps/rejected": -3.911695957183838, "loss": 0.6137, "nll_loss": 0.5998940467834473, "rewards/accuracies": 1.0, "rewards/chosen": -0.06440845131874084, "rewards/margins": 0.3267611563205719, "rewards/rejected": -0.39116960763931274, "step": 699 }, { "epoch": 0.4354587869362364, "grad_norm": 0.32732221484184265, "learning_rate": 1.5e-05, "log_odds_chosen": 4.258359909057617, "log_odds_ratio": -0.09592651575803757, "logits/chosen": 0.23326629400253296, "logits/rejected": 1.4764102697372437, "logps/chosen": -0.8468846678733826, "logps/rejected": -4.427027225494385, "loss": 0.5165, "nll_loss": 0.5069240927696228, "rewards/accuracies": 1.0, "rewards/chosen": -0.0846884697675705, "rewards/margins": 0.358014315366745, "rewards/rejected": -0.4427027702331543, "step": 700 }, { "epoch": 0.43608087091757386, "grad_norm": 1.247667670249939, "learning_rate": 1.4950000000000001e-05, "log_odds_chosen": 4.05362606048584, "log_odds_ratio": -0.2478107213973999, "logits/chosen": 0.7657288312911987, "logits/rejected": 0.623124897480011, "logps/chosen": -1.4577549695968628, "logps/rejected": -5.168236255645752, "loss": 0.5482, "nll_loss": 0.5233848690986633, "rewards/accuracies": 0.875, "rewards/chosen": -0.14577549695968628, "rewards/margins": 0.37104812264442444, "rewards/rejected": -0.5168236494064331, "step": 701 }, { "epoch": 0.43670295489891137, "grad_norm": 0.40692177414894104, "learning_rate": 1.49e-05, "log_odds_chosen": 3.904751777648926, "log_odds_ratio": -0.15392708778381348, "logits/chosen": 1.5741872787475586, "logits/rejected": 1.567471981048584, "logps/chosen": -0.8535467982292175, "logps/rejected": -4.262635707855225, "loss": 0.6844, "nll_loss": 0.6690041422843933, "rewards/accuracies": 0.875, "rewards/chosen": -0.08535467833280563, "rewards/margins": 0.340908944606781, "rewards/rejected": -0.42626357078552246, "step": 702 }, { "epoch": 0.4373250388802488, "grad_norm": 0.39099812507629395, "learning_rate": 1.485e-05, "log_odds_chosen": 4.589461803436279, "log_odds_ratio": -0.050541818141937256, "logits/chosen": 1.9745683670043945, "logits/rejected": 2.1288652420043945, "logps/chosen": -1.3265926837921143, "logps/rejected": -5.420770645141602, "loss": 0.7475, "nll_loss": 0.7424065470695496, "rewards/accuracies": 1.0, "rewards/chosen": -0.13265925645828247, "rewards/margins": 0.4094178378582001, "rewards/rejected": -0.5420770645141602, "step": 703 }, { "epoch": 0.4379471228615863, "grad_norm": 0.3653831481933594, "learning_rate": 1.48e-05, "log_odds_chosen": 2.963881015777588, "log_odds_ratio": -0.27740052342414856, "logits/chosen": 0.6383364796638489, "logits/rejected": 0.5264080762863159, "logps/chosen": -0.9153085350990295, "logps/rejected": -3.5376999378204346, "loss": 0.6302, "nll_loss": 0.602420449256897, "rewards/accuracies": 0.75, "rewards/chosen": -0.09153085201978683, "rewards/margins": 0.26223915815353394, "rewards/rejected": -0.35377001762390137, "step": 704 }, { "epoch": 0.4385692068429238, "grad_norm": 4.076732635498047, "learning_rate": 1.475e-05, "log_odds_chosen": 5.495579242706299, "log_odds_ratio": -0.13295693695545197, "logits/chosen": 1.3471734523773193, "logits/rejected": 1.2759082317352295, "logps/chosen": -0.9006038308143616, "logps/rejected": -5.952462196350098, "loss": 0.6168, "nll_loss": 0.6035174131393433, "rewards/accuracies": 1.0, "rewards/chosen": -0.09006038308143616, "rewards/margins": 0.5051857829093933, "rewards/rejected": -0.5952461957931519, "step": 705 }, { "epoch": 0.4391912908242613, "grad_norm": 0.42146316170692444, "learning_rate": 1.47e-05, "log_odds_chosen": 3.6146352291107178, "log_odds_ratio": -0.1981172412633896, "logits/chosen": 1.6295064687728882, "logits/rejected": 1.07228684425354, "logps/chosen": -0.8335095643997192, "logps/rejected": -3.8007986545562744, "loss": 0.6058, "nll_loss": 0.5859972238540649, "rewards/accuracies": 0.75, "rewards/chosen": -0.08335095643997192, "rewards/margins": 0.2967289090156555, "rewards/rejected": -0.38007986545562744, "step": 706 }, { "epoch": 0.43981337480559873, "grad_norm": 0.3019298017024994, "learning_rate": 1.465e-05, "log_odds_chosen": 5.203786849975586, "log_odds_ratio": -0.020779546350240707, "logits/chosen": 1.577458381652832, "logits/rejected": 0.5964698791503906, "logps/chosen": -0.8217610716819763, "logps/rejected": -5.420132637023926, "loss": 0.5656, "nll_loss": 0.5635250210762024, "rewards/accuracies": 1.0, "rewards/chosen": -0.0821761041879654, "rewards/margins": 0.4598371088504791, "rewards/rejected": -0.5420132279396057, "step": 707 }, { "epoch": 0.44043545878693624, "grad_norm": 1.8788961172103882, "learning_rate": 1.4599999999999999e-05, "log_odds_chosen": 4.655858993530273, "log_odds_ratio": -0.05985920503735542, "logits/chosen": 1.0745199918746948, "logits/rejected": 1.0020854473114014, "logps/chosen": -1.2417633533477783, "logps/rejected": -5.395903587341309, "loss": 0.588, "nll_loss": 0.5820605158805847, "rewards/accuracies": 1.0, "rewards/chosen": -0.12417633831501007, "rewards/margins": 0.41541406512260437, "rewards/rejected": -0.5395903587341309, "step": 708 }, { "epoch": 0.4410575427682737, "grad_norm": 0.3622523546218872, "learning_rate": 1.455e-05, "log_odds_chosen": 5.548418045043945, "log_odds_ratio": -0.08194790780544281, "logits/chosen": 0.051224738359451294, "logits/rejected": -0.29287880659103394, "logps/chosen": -0.8937790989875793, "logps/rejected": -5.9226579666137695, "loss": 0.3898, "nll_loss": 0.38160476088523865, "rewards/accuracies": 1.0, "rewards/chosen": -0.08937790989875793, "rewards/margins": 0.5028879046440125, "rewards/rejected": -0.5922658443450928, "step": 709 }, { "epoch": 0.4416796267496112, "grad_norm": 0.34681272506713867, "learning_rate": 1.45e-05, "log_odds_chosen": 5.7191925048828125, "log_odds_ratio": -0.07530266791582108, "logits/chosen": 1.2113844156265259, "logits/rejected": -0.6352530717849731, "logps/chosen": -0.8490526676177979, "logps/rejected": -5.936434268951416, "loss": 0.5826, "nll_loss": 0.5750584006309509, "rewards/accuracies": 1.0, "rewards/chosen": -0.08490526676177979, "rewards/margins": 0.5087381601333618, "rewards/rejected": -0.5936433672904968, "step": 710 }, { "epoch": 0.4423017107309487, "grad_norm": 0.3714011311531067, "learning_rate": 1.4449999999999999e-05, "log_odds_chosen": 5.149613380432129, "log_odds_ratio": -0.008057689294219017, "logits/chosen": 1.679274559020996, "logits/rejected": -0.04641413688659668, "logps/chosen": -0.9280933141708374, "logps/rejected": -5.554920196533203, "loss": 0.6515, "nll_loss": 0.6507111191749573, "rewards/accuracies": 1.0, "rewards/chosen": -0.09280932694673538, "rewards/margins": 0.46268269419670105, "rewards/rejected": -0.5554920434951782, "step": 711 }, { "epoch": 0.44292379471228616, "grad_norm": 0.30056387186050415, "learning_rate": 1.44e-05, "log_odds_chosen": 4.624875545501709, "log_odds_ratio": -0.06813670694828033, "logits/chosen": 2.250699281692505, "logits/rejected": 2.0250535011291504, "logps/chosen": -0.6873379945755005, "logps/rejected": -4.537752628326416, "loss": 0.7692, "nll_loss": 0.7623446583747864, "rewards/accuracies": 1.0, "rewards/chosen": -0.06873380392789841, "rewards/margins": 0.3850414752960205, "rewards/rejected": -0.4537752866744995, "step": 712 }, { "epoch": 0.44354587869362366, "grad_norm": 0.33198800683021545, "learning_rate": 1.435e-05, "log_odds_chosen": 5.732954025268555, "log_odds_ratio": -0.033084020018577576, "logits/chosen": -0.4894905686378479, "logits/rejected": -0.5070227980613708, "logps/chosen": -1.1875081062316895, "logps/rejected": -6.428521156311035, "loss": 0.425, "nll_loss": 0.42172831296920776, "rewards/accuracies": 1.0, "rewards/chosen": -0.11875081807374954, "rewards/margins": 0.5241013765335083, "rewards/rejected": -0.6428521871566772, "step": 713 }, { "epoch": 0.4441679626749611, "grad_norm": 0.34545430541038513, "learning_rate": 1.43e-05, "log_odds_chosen": 4.477220058441162, "log_odds_ratio": -0.05571537837386131, "logits/chosen": 0.9135289788246155, "logits/rejected": 0.4658157229423523, "logps/chosen": -0.9963458776473999, "logps/rejected": -4.778862953186035, "loss": 0.5084, "nll_loss": 0.5028534531593323, "rewards/accuracies": 1.0, "rewards/chosen": -0.09963459521532059, "rewards/margins": 0.3782517611980438, "rewards/rejected": -0.4778863489627838, "step": 714 }, { "epoch": 0.4447900466562986, "grad_norm": 0.31467583775520325, "learning_rate": 1.4249999999999999e-05, "log_odds_chosen": 3.738637924194336, "log_odds_ratio": -0.11523061245679855, "logits/chosen": 0.8655556440353394, "logits/rejected": 0.12430325150489807, "logps/chosen": -0.8071171641349792, "logps/rejected": -4.038856506347656, "loss": 0.5292, "nll_loss": 0.517656147480011, "rewards/accuracies": 0.875, "rewards/chosen": -0.0807117149233818, "rewards/margins": 0.3231739401817322, "rewards/rejected": -0.4038856327533722, "step": 715 }, { "epoch": 0.4454121306376361, "grad_norm": 3.6782705783843994, "learning_rate": 1.42e-05, "log_odds_chosen": 4.408926010131836, "log_odds_ratio": -0.053505804389715195, "logits/chosen": 1.5419108867645264, "logits/rejected": 0.1588946431875229, "logps/chosen": -0.9761942625045776, "logps/rejected": -4.855075359344482, "loss": 0.5731, "nll_loss": 0.5677310228347778, "rewards/accuracies": 1.0, "rewards/chosen": -0.09761942923069, "rewards/margins": 0.3878880739212036, "rewards/rejected": -0.4855075478553772, "step": 716 }, { "epoch": 0.4460342146189736, "grad_norm": 0.3332521319389343, "learning_rate": 1.415e-05, "log_odds_chosen": 5.683034896850586, "log_odds_ratio": -0.03364837169647217, "logits/chosen": 0.9579274654388428, "logits/rejected": 0.7839161157608032, "logps/chosen": -1.0205984115600586, "logps/rejected": -5.810733795166016, "loss": 0.6103, "nll_loss": 0.6069284081459045, "rewards/accuracies": 1.0, "rewards/chosen": -0.10205984115600586, "rewards/margins": 0.4790135324001312, "rewards/rejected": -0.5810733437538147, "step": 717 }, { "epoch": 0.44665629860031103, "grad_norm": 0.45680540800094604, "learning_rate": 1.4099999999999999e-05, "log_odds_chosen": 2.5157294273376465, "log_odds_ratio": -0.25136426091194153, "logits/chosen": 1.3738585710525513, "logits/rejected": 1.645888328552246, "logps/chosen": -0.8006247282028198, "logps/rejected": -2.843050479888916, "loss": 0.6798, "nll_loss": 0.6546663045883179, "rewards/accuracies": 0.875, "rewards/chosen": -0.08006247133016586, "rewards/margins": 0.20424258708953857, "rewards/rejected": -0.28430506587028503, "step": 718 }, { "epoch": 0.44727838258164854, "grad_norm": 0.6165868043899536, "learning_rate": 1.4050000000000003e-05, "log_odds_chosen": 5.895066261291504, "log_odds_ratio": -0.00467541953548789, "logits/chosen": 1.104529619216919, "logits/rejected": 0.7458992004394531, "logps/chosen": -0.8854390382766724, "logps/rejected": -6.143112659454346, "loss": 0.6325, "nll_loss": 0.6319921612739563, "rewards/accuracies": 1.0, "rewards/chosen": -0.08854390680789948, "rewards/margins": 0.5257673263549805, "rewards/rejected": -0.6143112778663635, "step": 719 }, { "epoch": 0.447900466562986, "grad_norm": 0.5500882267951965, "learning_rate": 1.4000000000000001e-05, "log_odds_chosen": 3.6151845455169678, "log_odds_ratio": -0.15118198096752167, "logits/chosen": 2.462423086166382, "logits/rejected": 1.7982007265090942, "logps/chosen": -1.5363001823425293, "logps/rejected": -4.657369613647461, "loss": 0.8473, "nll_loss": 0.8321783542633057, "rewards/accuracies": 0.875, "rewards/chosen": -0.15363001823425293, "rewards/margins": 0.3121069371700287, "rewards/rejected": -0.4657369554042816, "step": 720 }, { "epoch": 0.4485225505443235, "grad_norm": 0.7661917805671692, "learning_rate": 1.3950000000000002e-05, "log_odds_chosen": 2.443437099456787, "log_odds_ratio": -0.3714645504951477, "logits/chosen": 1.7393274307250977, "logits/rejected": 0.9712520837783813, "logps/chosen": -1.2769322395324707, "logps/rejected": -3.5214457511901855, "loss": 0.851, "nll_loss": 0.8138446807861328, "rewards/accuracies": 0.75, "rewards/chosen": -0.12769320607185364, "rewards/margins": 0.22445137798786163, "rewards/rejected": -0.35214459896087646, "step": 721 }, { "epoch": 0.44914463452566095, "grad_norm": 0.3548874258995056, "learning_rate": 1.3900000000000002e-05, "log_odds_chosen": 3.8431100845336914, "log_odds_ratio": -0.2611660361289978, "logits/chosen": 1.9599686861038208, "logits/rejected": 1.1322951316833496, "logps/chosen": -0.877863347530365, "logps/rejected": -4.278283596038818, "loss": 0.7638, "nll_loss": 0.7376934289932251, "rewards/accuracies": 0.875, "rewards/chosen": -0.08778633922338486, "rewards/margins": 0.3400420546531677, "rewards/rejected": -0.4278283715248108, "step": 722 }, { "epoch": 0.44976671850699845, "grad_norm": 0.42921215295791626, "learning_rate": 1.3850000000000001e-05, "log_odds_chosen": 3.950148820877075, "log_odds_ratio": -0.16853103041648865, "logits/chosen": 2.9101808071136475, "logits/rejected": 2.3471872806549072, "logps/chosen": -0.8549723625183105, "logps/rejected": -4.339173316955566, "loss": 0.7838, "nll_loss": 0.7669782638549805, "rewards/accuracies": 0.875, "rewards/chosen": -0.08549723774194717, "rewards/margins": 0.3484201431274414, "rewards/rejected": -0.433917373418808, "step": 723 }, { "epoch": 0.4503888024883359, "grad_norm": 0.3380347192287445, "learning_rate": 1.3800000000000002e-05, "log_odds_chosen": 4.601840972900391, "log_odds_ratio": -0.11074228584766388, "logits/chosen": 2.156388282775879, "logits/rejected": 1.037245273590088, "logps/chosen": -0.9112977981567383, "logps/rejected": -5.017061233520508, "loss": 0.7269, "nll_loss": 0.7158026695251465, "rewards/accuracies": 0.875, "rewards/chosen": -0.09112977981567383, "rewards/margins": 0.41057640314102173, "rewards/rejected": -0.5017061829566956, "step": 724 }, { "epoch": 0.4510108864696734, "grad_norm": 0.38814446330070496, "learning_rate": 1.3750000000000002e-05, "log_odds_chosen": 3.3909547328948975, "log_odds_ratio": -0.21502827107906342, "logits/chosen": 1.6492749452590942, "logits/rejected": 0.4639023542404175, "logps/chosen": -1.0277436971664429, "logps/rejected": -4.112637519836426, "loss": 0.6731, "nll_loss": 0.6515559554100037, "rewards/accuracies": 0.875, "rewards/chosen": -0.10277437418699265, "rewards/margins": 0.3084893822669983, "rewards/rejected": -0.41126376390457153, "step": 725 }, { "epoch": 0.45163297045101086, "grad_norm": 2.843536138534546, "learning_rate": 1.3700000000000001e-05, "log_odds_chosen": 4.456535816192627, "log_odds_ratio": -0.08549236506223679, "logits/chosen": 0.24373212456703186, "logits/rejected": 0.3230777084827423, "logps/chosen": -0.9399199485778809, "logps/rejected": -4.7474799156188965, "loss": 0.5621, "nll_loss": 0.5535575747489929, "rewards/accuracies": 1.0, "rewards/chosen": -0.09399199485778809, "rewards/margins": 0.3807560205459595, "rewards/rejected": -0.47474801540374756, "step": 726 }, { "epoch": 0.45225505443234837, "grad_norm": 0.967526912689209, "learning_rate": 1.3650000000000001e-05, "log_odds_chosen": 2.7426035404205322, "log_odds_ratio": -0.29068922996520996, "logits/chosen": 2.4513349533081055, "logits/rejected": 1.7992994785308838, "logps/chosen": -0.7617477774620056, "logps/rejected": -3.0559778213500977, "loss": 0.7583, "nll_loss": 0.7292664051055908, "rewards/accuracies": 0.75, "rewards/chosen": -0.0761747807264328, "rewards/margins": 0.22942303121089935, "rewards/rejected": -0.30559781193733215, "step": 727 }, { "epoch": 0.4528771384136858, "grad_norm": 2.641636610031128, "learning_rate": 1.3600000000000002e-05, "log_odds_chosen": 3.6268856525421143, "log_odds_ratio": -0.170202374458313, "logits/chosen": 0.4291355013847351, "logits/rejected": -0.2707882225513458, "logps/chosen": -1.1543688774108887, "logps/rejected": -4.4368486404418945, "loss": 0.6416, "nll_loss": 0.6245691776275635, "rewards/accuracies": 1.0, "rewards/chosen": -0.11543689668178558, "rewards/margins": 0.32824793457984924, "rewards/rejected": -0.443684846162796, "step": 728 }, { "epoch": 0.4534992223950233, "grad_norm": 0.3928133547306061, "learning_rate": 1.3550000000000002e-05, "log_odds_chosen": 3.8950159549713135, "log_odds_ratio": -0.15199552476406097, "logits/chosen": 1.5332711935043335, "logits/rejected": 0.1663268804550171, "logps/chosen": -0.902508020401001, "logps/rejected": -4.069859981536865, "loss": 0.6854, "nll_loss": 0.6701560616493225, "rewards/accuracies": 0.875, "rewards/chosen": -0.09025079011917114, "rewards/margins": 0.3167352080345154, "rewards/rejected": -0.4069859981536865, "step": 729 }, { "epoch": 0.45412130637636083, "grad_norm": 0.30603450536727905, "learning_rate": 1.3500000000000001e-05, "log_odds_chosen": 4.912703514099121, "log_odds_ratio": -0.0980292409658432, "logits/chosen": 0.3904115855693817, "logits/rejected": -0.14418435096740723, "logps/chosen": -0.9671989679336548, "logps/rejected": -5.454221725463867, "loss": 0.4348, "nll_loss": 0.4250153601169586, "rewards/accuracies": 0.875, "rewards/chosen": -0.0967198982834816, "rewards/margins": 0.44870230555534363, "rewards/rejected": -0.5454221963882446, "step": 730 }, { "epoch": 0.4547433903576983, "grad_norm": 2.858635902404785, "learning_rate": 1.3450000000000002e-05, "log_odds_chosen": 3.114245891571045, "log_odds_ratio": -0.20222879946231842, "logits/chosen": -0.15233629941940308, "logits/rejected": 0.6302044987678528, "logps/chosen": -1.0312150716781616, "logps/rejected": -3.7412331104278564, "loss": 0.474, "nll_loss": 0.4537477493286133, "rewards/accuracies": 0.875, "rewards/chosen": -0.10312151163816452, "rewards/margins": 0.27100181579589844, "rewards/rejected": -0.37412333488464355, "step": 731 }, { "epoch": 0.4553654743390358, "grad_norm": 0.31833547353744507, "learning_rate": 1.3400000000000002e-05, "log_odds_chosen": 4.866644859313965, "log_odds_ratio": -0.10650589317083359, "logits/chosen": -0.30437642335891724, "logits/rejected": 0.5934653878211975, "logps/chosen": -0.6947398781776428, "logps/rejected": -4.713142395019531, "loss": 0.4473, "nll_loss": 0.436599999666214, "rewards/accuracies": 1.0, "rewards/chosen": -0.069473996758461, "rewards/margins": 0.4018402099609375, "rewards/rejected": -0.4713142216205597, "step": 732 }, { "epoch": 0.45598755832037324, "grad_norm": 0.37434032559394836, "learning_rate": 1.3350000000000001e-05, "log_odds_chosen": 4.45645809173584, "log_odds_ratio": -0.12092647701501846, "logits/chosen": 0.8990957736968994, "logits/rejected": 0.6855148077011108, "logps/chosen": -0.7242429256439209, "logps/rejected": -4.579753398895264, "loss": 0.5855, "nll_loss": 0.5733587741851807, "rewards/accuracies": 0.875, "rewards/chosen": -0.07242429256439209, "rewards/margins": 0.3855510354042053, "rewards/rejected": -0.4579753577709198, "step": 733 }, { "epoch": 0.45660964230171075, "grad_norm": 0.5148495435714722, "learning_rate": 1.3300000000000001e-05, "log_odds_chosen": 3.205353021621704, "log_odds_ratio": -0.24751462042331696, "logits/chosen": 1.291861653327942, "logits/rejected": 2.1259381771087646, "logps/chosen": -1.0250825881958008, "logps/rejected": -3.7852745056152344, "loss": 0.6724, "nll_loss": 0.6476327776908875, "rewards/accuracies": 0.875, "rewards/chosen": -0.10250826179981232, "rewards/margins": 0.27601921558380127, "rewards/rejected": -0.3785274624824524, "step": 734 }, { "epoch": 0.4572317262830482, "grad_norm": 0.4234316051006317, "learning_rate": 1.3250000000000002e-05, "log_odds_chosen": 4.532406806945801, "log_odds_ratio": -0.10275155305862427, "logits/chosen": 1.3938021659851074, "logits/rejected": 1.6723461151123047, "logps/chosen": -0.6356872916221619, "logps/rejected": -4.270766735076904, "loss": 0.7704, "nll_loss": 0.760124146938324, "rewards/accuracies": 1.0, "rewards/chosen": -0.06356872618198395, "rewards/margins": 0.3635079562664032, "rewards/rejected": -0.42707663774490356, "step": 735 }, { "epoch": 0.4578538102643857, "grad_norm": 0.5846631526947021, "learning_rate": 1.32e-05, "log_odds_chosen": 3.4026882648468018, "log_odds_ratio": -0.30543139576911926, "logits/chosen": 0.7343127131462097, "logits/rejected": 0.6888481378555298, "logps/chosen": -1.0733798742294312, "logps/rejected": -4.219671249389648, "loss": 0.5944, "nll_loss": 0.5638964176177979, "rewards/accuracies": 0.75, "rewards/chosen": -0.10733799636363983, "rewards/margins": 0.31462913751602173, "rewards/rejected": -0.42196714878082275, "step": 736 }, { "epoch": 0.45847589424572316, "grad_norm": 0.4446645975112915, "learning_rate": 1.3150000000000001e-05, "log_odds_chosen": 4.259887218475342, "log_odds_ratio": -0.12168325483798981, "logits/chosen": 2.147245407104492, "logits/rejected": 1.1088201999664307, "logps/chosen": -0.8293672204017639, "logps/rejected": -4.540325164794922, "loss": 0.6804, "nll_loss": 0.6682307720184326, "rewards/accuracies": 1.0, "rewards/chosen": -0.08293672651052475, "rewards/margins": 0.37109577655792236, "rewards/rejected": -0.4540324807167053, "step": 737 }, { "epoch": 0.45909797822706067, "grad_norm": 0.5487768650054932, "learning_rate": 1.3100000000000002e-05, "log_odds_chosen": 4.622917175292969, "log_odds_ratio": -0.19834764301776886, "logits/chosen": -0.7730895280838013, "logits/rejected": 0.3794826865196228, "logps/chosen": -1.0225191116333008, "logps/rejected": -5.2524261474609375, "loss": 0.3588, "nll_loss": 0.33894115686416626, "rewards/accuracies": 0.875, "rewards/chosen": -0.10225191712379456, "rewards/margins": 0.42299070954322815, "rewards/rejected": -0.5252426266670227, "step": 738 }, { "epoch": 0.4597200622083981, "grad_norm": 0.3344896733760834, "learning_rate": 1.305e-05, "log_odds_chosen": 2.9035658836364746, "log_odds_ratio": -0.22372430562973022, "logits/chosen": 0.3367067575454712, "logits/rejected": 0.7152668237686157, "logps/chosen": -0.7381393313407898, "logps/rejected": -3.179778575897217, "loss": 0.6071, "nll_loss": 0.5847198963165283, "rewards/accuracies": 1.0, "rewards/chosen": -0.07381393760442734, "rewards/margins": 0.24416393041610718, "rewards/rejected": -0.3179778456687927, "step": 739 }, { "epoch": 0.4603421461897356, "grad_norm": 0.5565928220748901, "learning_rate": 1.3000000000000001e-05, "log_odds_chosen": 2.2139623165130615, "log_odds_ratio": -0.3558502793312073, "logits/chosen": 1.9886784553527832, "logits/rejected": 1.4688613414764404, "logps/chosen": -0.8802056312561035, "logps/rejected": -2.744699001312256, "loss": 0.758, "nll_loss": 0.722421407699585, "rewards/accuracies": 0.75, "rewards/chosen": -0.08802057057619095, "rewards/margins": 0.18644936382770538, "rewards/rejected": -0.27446994185447693, "step": 740 }, { "epoch": 0.4609642301710731, "grad_norm": 0.3876553475856781, "learning_rate": 1.2950000000000001e-05, "log_odds_chosen": 4.107831001281738, "log_odds_ratio": -0.07306887209415436, "logits/chosen": -0.357515811920166, "logits/rejected": 0.5429799556732178, "logps/chosen": -0.9162774682044983, "logps/rejected": -4.474459171295166, "loss": 0.4091, "nll_loss": 0.40178635716438293, "rewards/accuracies": 1.0, "rewards/chosen": -0.09162774682044983, "rewards/margins": 0.3558181822299957, "rewards/rejected": -0.44744592905044556, "step": 741 }, { "epoch": 0.4615863141524106, "grad_norm": 0.3235586881637573, "learning_rate": 1.29e-05, "log_odds_chosen": 2.25211238861084, "log_odds_ratio": -0.3039625883102417, "logits/chosen": 0.7069388628005981, "logits/rejected": 2.00510835647583, "logps/chosen": -0.753617525100708, "logps/rejected": -2.387873649597168, "loss": 0.685, "nll_loss": 0.6546491384506226, "rewards/accuracies": 0.875, "rewards/chosen": -0.07536175847053528, "rewards/margins": 0.16342565417289734, "rewards/rejected": -0.23878741264343262, "step": 742 }, { "epoch": 0.46220839813374803, "grad_norm": 0.367152601480484, "learning_rate": 1.285e-05, "log_odds_chosen": 4.8678741455078125, "log_odds_ratio": -0.13750433921813965, "logits/chosen": 1.9285204410552979, "logits/rejected": 2.173274278640747, "logps/chosen": -0.8739117383956909, "logps/rejected": -5.209456443786621, "loss": 0.6599, "nll_loss": 0.6461677551269531, "rewards/accuracies": 1.0, "rewards/chosen": -0.08739117532968521, "rewards/margins": 0.433554470539093, "rewards/rejected": -0.52094566822052, "step": 743 }, { "epoch": 0.46283048211508554, "grad_norm": 0.6167296767234802, "learning_rate": 1.2800000000000001e-05, "log_odds_chosen": 4.1003737449646, "log_odds_ratio": -0.2411804348230362, "logits/chosen": 0.8901242613792419, "logits/rejected": 0.6706501841545105, "logps/chosen": -0.9060766696929932, "logps/rejected": -4.523157119750977, "loss": 0.6385, "nll_loss": 0.6144038438796997, "rewards/accuracies": 0.875, "rewards/chosen": -0.0906076729297638, "rewards/margins": 0.36170801520347595, "rewards/rejected": -0.45231568813323975, "step": 744 }, { "epoch": 0.463452566096423, "grad_norm": 0.4313476085662842, "learning_rate": 1.2750000000000002e-05, "log_odds_chosen": 5.386116981506348, "log_odds_ratio": -0.24166685342788696, "logits/chosen": 1.0413144826889038, "logits/rejected": 0.93155837059021, "logps/chosen": -0.8907713890075684, "logps/rejected": -5.863996982574463, "loss": 0.6041, "nll_loss": 0.5798915028572083, "rewards/accuracies": 0.75, "rewards/chosen": -0.08907714486122131, "rewards/margins": 0.49732255935668945, "rewards/rejected": -0.5863996744155884, "step": 745 }, { "epoch": 0.4640746500777605, "grad_norm": 0.2939267158508301, "learning_rate": 1.27e-05, "log_odds_chosen": 3.964977264404297, "log_odds_ratio": -0.10894638299942017, "logits/chosen": -0.354805588722229, "logits/rejected": 0.003582596778869629, "logps/chosen": -1.0284769535064697, "logps/rejected": -4.510259628295898, "loss": 0.4564, "nll_loss": 0.44550204277038574, "rewards/accuracies": 1.0, "rewards/chosen": -0.10284771025180817, "rewards/margins": 0.34817826747894287, "rewards/rejected": -0.45102596282958984, "step": 746 }, { "epoch": 0.464696734059098, "grad_norm": 0.34394949674606323, "learning_rate": 1.2650000000000001e-05, "log_odds_chosen": 4.815141677856445, "log_odds_ratio": -0.11680752038955688, "logits/chosen": 1.5240862369537354, "logits/rejected": 1.7166731357574463, "logps/chosen": -0.6423882246017456, "logps/rejected": -4.7596049308776855, "loss": 0.6477, "nll_loss": 0.6360157132148743, "rewards/accuracies": 1.0, "rewards/chosen": -0.06423883140087128, "rewards/margins": 0.41172167658805847, "rewards/rejected": -0.47596052289009094, "step": 747 }, { "epoch": 0.46531881804043546, "grad_norm": 0.3430861830711365, "learning_rate": 1.2600000000000001e-05, "log_odds_chosen": 3.8095812797546387, "log_odds_ratio": -0.20983168482780457, "logits/chosen": -0.6760037541389465, "logits/rejected": -0.1070479154586792, "logps/chosen": -0.6517633199691772, "logps/rejected": -3.8257155418395996, "loss": 0.5232, "nll_loss": 0.5022388696670532, "rewards/accuracies": 1.0, "rewards/chosen": -0.0651763305068016, "rewards/margins": 0.3173952102661133, "rewards/rejected": -0.38257157802581787, "step": 748 }, { "epoch": 0.46594090202177296, "grad_norm": 0.3617217242717743, "learning_rate": 1.255e-05, "log_odds_chosen": 3.6365814208984375, "log_odds_ratio": -0.1220984011888504, "logits/chosen": 2.1960654258728027, "logits/rejected": 1.0074217319488525, "logps/chosen": -0.78511643409729, "logps/rejected": -3.9075863361358643, "loss": 0.6951, "nll_loss": 0.6828843355178833, "rewards/accuracies": 1.0, "rewards/chosen": -0.07851164042949677, "rewards/margins": 0.31224697828292847, "rewards/rejected": -0.3907586336135864, "step": 749 }, { "epoch": 0.4665629860031104, "grad_norm": 0.5665165781974792, "learning_rate": 1.25e-05, "log_odds_chosen": 2.4751880168914795, "log_odds_ratio": -0.3891245722770691, "logits/chosen": 1.7859808206558228, "logits/rejected": 0.44065773487091064, "logps/chosen": -0.7846461534500122, "logps/rejected": -2.9159486293792725, "loss": 0.6791, "nll_loss": 0.640195369720459, "rewards/accuracies": 0.75, "rewards/chosen": -0.07846461236476898, "rewards/margins": 0.21313023567199707, "rewards/rejected": -0.29159486293792725, "step": 750 }, { "epoch": 0.4671850699844479, "grad_norm": 1.5871644020080566, "learning_rate": 1.2450000000000001e-05, "log_odds_chosen": 4.6841936111450195, "log_odds_ratio": -0.0442461296916008, "logits/chosen": 2.642913579940796, "logits/rejected": 1.204238772392273, "logps/chosen": -1.1520380973815918, "logps/rejected": -5.388706207275391, "loss": 0.802, "nll_loss": 0.7975590825080872, "rewards/accuracies": 1.0, "rewards/chosen": -0.11520381271839142, "rewards/margins": 0.4236668646335602, "rewards/rejected": -0.5388706922531128, "step": 751 }, { "epoch": 0.46780715396578537, "grad_norm": 0.423178493976593, "learning_rate": 1.24e-05, "log_odds_chosen": 3.278820514678955, "log_odds_ratio": -0.3369329571723938, "logits/chosen": 2.5465831756591797, "logits/rejected": 2.608609676361084, "logps/chosen": -0.9291982650756836, "logps/rejected": -3.932260751724243, "loss": 0.8509, "nll_loss": 0.817159116268158, "rewards/accuracies": 0.75, "rewards/chosen": -0.09291982650756836, "rewards/margins": 0.3003062605857849, "rewards/rejected": -0.39322608709335327, "step": 752 }, { "epoch": 0.4684292379471229, "grad_norm": 0.2830430269241333, "learning_rate": 1.235e-05, "log_odds_chosen": 3.9936811923980713, "log_odds_ratio": -0.26642856001853943, "logits/chosen": 0.5490342378616333, "logits/rejected": 1.9696109294891357, "logps/chosen": -1.0759899616241455, "logps/rejected": -4.762292861938477, "loss": 0.6052, "nll_loss": 0.5785794258117676, "rewards/accuracies": 0.75, "rewards/chosen": -0.10759899765253067, "rewards/margins": 0.3686302602291107, "rewards/rejected": -0.4762292504310608, "step": 753 }, { "epoch": 0.46905132192846033, "grad_norm": 0.37960949540138245, "learning_rate": 1.23e-05, "log_odds_chosen": 4.016284942626953, "log_odds_ratio": -0.3642813265323639, "logits/chosen": 0.7298337817192078, "logits/rejected": 1.9862333536148071, "logps/chosen": -0.7522934079170227, "logps/rejected": -4.1859636306762695, "loss": 0.5958, "nll_loss": 0.559417724609375, "rewards/accuracies": 0.75, "rewards/chosen": -0.07522933930158615, "rewards/margins": 0.3433670401573181, "rewards/rejected": -0.41859638690948486, "step": 754 }, { "epoch": 0.46967340590979784, "grad_norm": 0.3662182688713074, "learning_rate": 1.225e-05, "log_odds_chosen": 6.478982448577881, "log_odds_ratio": -0.08034564554691315, "logits/chosen": 1.0255630016326904, "logits/rejected": 0.4613604247570038, "logps/chosen": -0.8740299344062805, "logps/rejected": -6.873804092407227, "loss": 0.6471, "nll_loss": 0.6390318870544434, "rewards/accuracies": 1.0, "rewards/chosen": -0.08740299940109253, "rewards/margins": 0.5999773740768433, "rewards/rejected": -0.6873804330825806, "step": 755 }, { "epoch": 0.4702954898911353, "grad_norm": 1.2985732555389404, "learning_rate": 1.22e-05, "log_odds_chosen": 3.5273189544677734, "log_odds_ratio": -0.33595922589302063, "logits/chosen": -0.20280686020851135, "logits/rejected": 1.2919225692749023, "logps/chosen": -1.2622438669204712, "logps/rejected": -4.5340962409973145, "loss": 0.5605, "nll_loss": 0.5268831849098206, "rewards/accuracies": 0.625, "rewards/chosen": -0.12622438371181488, "rewards/margins": 0.3271852433681488, "rewards/rejected": -0.4534096419811249, "step": 756 }, { "epoch": 0.4709175738724728, "grad_norm": 2.0065884590148926, "learning_rate": 1.215e-05, "log_odds_chosen": 3.059215784072876, "log_odds_ratio": -0.27708762884140015, "logits/chosen": 0.45762962102890015, "logits/rejected": 0.717374324798584, "logps/chosen": -1.0753413438796997, "logps/rejected": -3.8107690811157227, "loss": 0.6642, "nll_loss": 0.6364901065826416, "rewards/accuracies": 0.75, "rewards/chosen": -0.10753414034843445, "rewards/margins": 0.27354276180267334, "rewards/rejected": -0.3810769021511078, "step": 757 }, { "epoch": 0.47153965785381025, "grad_norm": 5.8776164054870605, "learning_rate": 1.2100000000000001e-05, "log_odds_chosen": 4.531339168548584, "log_odds_ratio": -0.12009022384881973, "logits/chosen": 0.6293901801109314, "logits/rejected": 0.4070852994918823, "logps/chosen": -1.4273791313171387, "logps/rejected": -5.634232521057129, "loss": 0.901, "nll_loss": 0.888999879360199, "rewards/accuracies": 0.875, "rewards/chosen": -0.14273792505264282, "rewards/margins": 0.4206853210926056, "rewards/rejected": -0.563423216342926, "step": 758 }, { "epoch": 0.47216174183514775, "grad_norm": 0.2695918083190918, "learning_rate": 1.205e-05, "log_odds_chosen": 2.9982948303222656, "log_odds_ratio": -0.26615574955940247, "logits/chosen": -0.7575415372848511, "logits/rejected": 0.376910924911499, "logps/chosen": -1.1440861225128174, "logps/rejected": -3.7636454105377197, "loss": 0.47, "nll_loss": 0.4433676302433014, "rewards/accuracies": 0.875, "rewards/chosen": -0.11440862715244293, "rewards/margins": 0.2619559168815613, "rewards/rejected": -0.3763645589351654, "step": 759 }, { "epoch": 0.4727838258164852, "grad_norm": 0.376305490732193, "learning_rate": 1.2e-05, "log_odds_chosen": 3.4311892986297607, "log_odds_ratio": -0.199777290225029, "logits/chosen": 1.3470544815063477, "logits/rejected": 0.804937481880188, "logps/chosen": -0.9575113654136658, "logps/rejected": -4.017138481140137, "loss": 0.6356, "nll_loss": 0.6155799627304077, "rewards/accuracies": 1.0, "rewards/chosen": -0.09575113654136658, "rewards/margins": 0.3059626817703247, "rewards/rejected": -0.40171384811401367, "step": 760 }, { "epoch": 0.4734059097978227, "grad_norm": 0.3834342956542969, "learning_rate": 1.195e-05, "log_odds_chosen": 1.3307521343231201, "log_odds_ratio": -0.4671354591846466, "logits/chosen": 1.2574310302734375, "logits/rejected": 1.2822082042694092, "logps/chosen": -0.9174765944480896, "logps/rejected": -2.0750315189361572, "loss": 0.685, "nll_loss": 0.638303279876709, "rewards/accuracies": 0.875, "rewards/chosen": -0.09174765646457672, "rewards/margins": 0.11575548350811005, "rewards/rejected": -0.20750313997268677, "step": 761 }, { "epoch": 0.47402799377916016, "grad_norm": 0.6633325219154358, "learning_rate": 1.19e-05, "log_odds_chosen": 2.5100934505462646, "log_odds_ratio": -0.3733833432197571, "logits/chosen": 3.2173590660095215, "logits/rejected": 3.156155824661255, "logps/chosen": -0.7270236611366272, "logps/rejected": -2.6550979614257812, "loss": 0.8232, "nll_loss": 0.7858138084411621, "rewards/accuracies": 0.875, "rewards/chosen": -0.07270237058401108, "rewards/margins": 0.19280743598937988, "rewards/rejected": -0.26550978422164917, "step": 762 }, { "epoch": 0.47465007776049767, "grad_norm": 0.6370331048965454, "learning_rate": 1.185e-05, "log_odds_chosen": 4.554332256317139, "log_odds_ratio": -0.28026673197746277, "logits/chosen": 1.6826764345169067, "logits/rejected": 0.6190115213394165, "logps/chosen": -1.153404712677002, "logps/rejected": -5.1978960037231445, "loss": 0.6612, "nll_loss": 0.6331894397735596, "rewards/accuracies": 0.875, "rewards/chosen": -0.11534047871828079, "rewards/margins": 0.40444907546043396, "rewards/rejected": -0.5197895765304565, "step": 763 }, { "epoch": 0.4752721617418352, "grad_norm": 0.5234032869338989, "learning_rate": 1.18e-05, "log_odds_chosen": 2.557978391647339, "log_odds_ratio": -0.5858641266822815, "logits/chosen": 0.9482438564300537, "logits/rejected": 0.596887469291687, "logps/chosen": -1.0016982555389404, "logps/rejected": -3.3711018562316895, "loss": 0.632, "nll_loss": 0.5734348893165588, "rewards/accuracies": 0.5, "rewards/chosen": -0.1001698300242424, "rewards/margins": 0.2369403839111328, "rewards/rejected": -0.3371102213859558, "step": 764 }, { "epoch": 0.4758942457231726, "grad_norm": 0.39715540409088135, "learning_rate": 1.175e-05, "log_odds_chosen": 3.6863224506378174, "log_odds_ratio": -0.15334059298038483, "logits/chosen": 2.0271711349487305, "logits/rejected": 0.7028290033340454, "logps/chosen": -0.8889638781547546, "logps/rejected": -4.144840240478516, "loss": 0.7578, "nll_loss": 0.7424666881561279, "rewards/accuracies": 0.875, "rewards/chosen": -0.08889639377593994, "rewards/margins": 0.325587660074234, "rewards/rejected": -0.41448402404785156, "step": 765 }, { "epoch": 0.47651632970451013, "grad_norm": 0.3871302604675293, "learning_rate": 1.1700000000000001e-05, "log_odds_chosen": 2.7272534370422363, "log_odds_ratio": -0.3169942796230316, "logits/chosen": 1.3704383373260498, "logits/rejected": 0.95868980884552, "logps/chosen": -1.1230028867721558, "logps/rejected": -3.561866283416748, "loss": 0.694, "nll_loss": 0.6622994542121887, "rewards/accuracies": 0.625, "rewards/chosen": -0.11230029910802841, "rewards/margins": 0.24388636648654938, "rewards/rejected": -0.3561866581439972, "step": 766 }, { "epoch": 0.4771384136858476, "grad_norm": 0.41518551111221313, "learning_rate": 1.1650000000000002e-05, "log_odds_chosen": 2.6603739261627197, "log_odds_ratio": -0.24516430497169495, "logits/chosen": 2.105510950088501, "logits/rejected": 0.4677017629146576, "logps/chosen": -0.8573104739189148, "logps/rejected": -3.140277624130249, "loss": 0.6672, "nll_loss": 0.6426551342010498, "rewards/accuracies": 0.875, "rewards/chosen": -0.08573104441165924, "rewards/margins": 0.22829669713974, "rewards/rejected": -0.3140277564525604, "step": 767 }, { "epoch": 0.4777604976671851, "grad_norm": 0.4271754026412964, "learning_rate": 1.16e-05, "log_odds_chosen": 3.0503485202789307, "log_odds_ratio": -0.2287687361240387, "logits/chosen": 2.8986668586730957, "logits/rejected": 1.9720364809036255, "logps/chosen": -0.7971369624137878, "logps/rejected": -3.1767632961273193, "loss": 0.8597, "nll_loss": 0.8368232250213623, "rewards/accuracies": 1.0, "rewards/chosen": -0.07971370220184326, "rewards/margins": 0.23796263337135315, "rewards/rejected": -0.317676305770874, "step": 768 }, { "epoch": 0.47838258164852254, "grad_norm": 0.5015910863876343, "learning_rate": 1.1550000000000001e-05, "log_odds_chosen": 4.93192720413208, "log_odds_ratio": -0.18531693518161774, "logits/chosen": 2.360891580581665, "logits/rejected": 0.9528889656066895, "logps/chosen": -0.8603143692016602, "logps/rejected": -5.222134590148926, "loss": 0.7677, "nll_loss": 0.7491949200630188, "rewards/accuracies": 0.875, "rewards/chosen": -0.08603143692016602, "rewards/margins": 0.43618205189704895, "rewards/rejected": -0.5222134590148926, "step": 769 }, { "epoch": 0.47900466562986005, "grad_norm": 1.472103238105774, "learning_rate": 1.1500000000000002e-05, "log_odds_chosen": 2.148057699203491, "log_odds_ratio": -0.39299821853637695, "logits/chosen": 0.8005610108375549, "logits/rejected": -0.056345269083976746, "logps/chosen": -1.6142408847808838, "logps/rejected": -3.466493844985962, "loss": 0.8477, "nll_loss": 0.8084092736244202, "rewards/accuracies": 0.625, "rewards/chosen": -0.16142407059669495, "rewards/margins": 0.18522529304027557, "rewards/rejected": -0.3466493785381317, "step": 770 }, { "epoch": 0.4796267496111975, "grad_norm": 0.3125849664211273, "learning_rate": 1.145e-05, "log_odds_chosen": 4.567728519439697, "log_odds_ratio": -0.20986464619636536, "logits/chosen": 0.3897290825843811, "logits/rejected": 1.1200284957885742, "logps/chosen": -0.8692652583122253, "logps/rejected": -4.947718620300293, "loss": 0.5962, "nll_loss": 0.5752478837966919, "rewards/accuracies": 0.75, "rewards/chosen": -0.08692653477191925, "rewards/margins": 0.40784531831741333, "rewards/rejected": -0.4947718381881714, "step": 771 }, { "epoch": 0.480248833592535, "grad_norm": 1.0467990636825562, "learning_rate": 1.1400000000000001e-05, "log_odds_chosen": 3.529940128326416, "log_odds_ratio": -0.1735749989748001, "logits/chosen": 2.3256747722625732, "logits/rejected": 1.5398943424224854, "logps/chosen": -1.0427285432815552, "logps/rejected": -4.057622909545898, "loss": 0.7398, "nll_loss": 0.7224363684654236, "rewards/accuracies": 1.0, "rewards/chosen": -0.10427285730838776, "rewards/margins": 0.3014894723892212, "rewards/rejected": -0.40576231479644775, "step": 772 }, { "epoch": 0.48087091757387246, "grad_norm": 0.3493899703025818, "learning_rate": 1.1350000000000001e-05, "log_odds_chosen": 5.22404146194458, "log_odds_ratio": -0.024094879627227783, "logits/chosen": 1.574556589126587, "logits/rejected": -0.5061302185058594, "logps/chosen": -0.9601320028305054, "logps/rejected": -5.636563301086426, "loss": 0.4969, "nll_loss": 0.4944761395454407, "rewards/accuracies": 1.0, "rewards/chosen": -0.09601320326328278, "rewards/margins": 0.4676430821418762, "rewards/rejected": -0.5636562705039978, "step": 773 }, { "epoch": 0.48149300155520997, "grad_norm": 0.33069247007369995, "learning_rate": 1.13e-05, "log_odds_chosen": 5.7939653396606445, "log_odds_ratio": -0.047045718878507614, "logits/chosen": 2.868722438812256, "logits/rejected": 1.0096173286437988, "logps/chosen": -0.8514919877052307, "logps/rejected": -6.00200080871582, "loss": 0.8638, "nll_loss": 0.8591352701187134, "rewards/accuracies": 1.0, "rewards/chosen": -0.08514919877052307, "rewards/margins": 0.5150509476661682, "rewards/rejected": -0.6002000570297241, "step": 774 }, { "epoch": 0.4821150855365474, "grad_norm": 0.3597210645675659, "learning_rate": 1.125e-05, "log_odds_chosen": 2.536614179611206, "log_odds_ratio": -0.3833247125148773, "logits/chosen": 1.416181206703186, "logits/rejected": 2.180039405822754, "logps/chosen": -0.8901622891426086, "logps/rejected": -3.139539957046509, "loss": 0.6833, "nll_loss": 0.6449413299560547, "rewards/accuracies": 0.75, "rewards/chosen": -0.08901622891426086, "rewards/margins": 0.2249377965927124, "rewards/rejected": -0.31395402550697327, "step": 775 }, { "epoch": 0.4827371695178849, "grad_norm": 0.2865667939186096, "learning_rate": 1.1200000000000001e-05, "log_odds_chosen": 5.424200057983398, "log_odds_ratio": -0.09211785346269608, "logits/chosen": 1.124145269393921, "logits/rejected": -0.09277313947677612, "logps/chosen": -0.867756187915802, "logps/rejected": -5.792477607727051, "loss": 0.5922, "nll_loss": 0.583034098148346, "rewards/accuracies": 0.875, "rewards/chosen": -0.08677561581134796, "rewards/margins": 0.4924721419811249, "rewards/rejected": -0.579247772693634, "step": 776 }, { "epoch": 0.4833592534992224, "grad_norm": 0.27115461230278015, "learning_rate": 1.115e-05, "log_odds_chosen": 3.815460443496704, "log_odds_ratio": -0.20083631575107574, "logits/chosen": 0.29774150252342224, "logits/rejected": 0.6260338425636292, "logps/chosen": -0.8240159749984741, "logps/rejected": -4.16973876953125, "loss": 0.5038, "nll_loss": 0.48371002078056335, "rewards/accuracies": 0.875, "rewards/chosen": -0.0824015885591507, "rewards/margins": 0.3345722556114197, "rewards/rejected": -0.41697388887405396, "step": 777 }, { "epoch": 0.4839813374805599, "grad_norm": 0.38135629892349243, "learning_rate": 1.11e-05, "log_odds_chosen": 4.489835262298584, "log_odds_ratio": -0.26796337962150574, "logits/chosen": 1.5280147790908813, "logits/rejected": 1.375030755996704, "logps/chosen": -0.9886677265167236, "logps/rejected": -5.18032693862915, "loss": 0.6699, "nll_loss": 0.6430768966674805, "rewards/accuracies": 0.875, "rewards/chosen": -0.0988667756319046, "rewards/margins": 0.4191659092903137, "rewards/rejected": -0.5180326700210571, "step": 778 }, { "epoch": 0.48460342146189733, "grad_norm": 0.33285027742385864, "learning_rate": 1.1050000000000001e-05, "log_odds_chosen": 6.08347749710083, "log_odds_ratio": -0.008807210251688957, "logits/chosen": 0.8070425987243652, "logits/rejected": 0.062301695346832275, "logps/chosen": -0.9627847075462341, "logps/rejected": -6.553963661193848, "loss": 0.5153, "nll_loss": 0.5144031643867493, "rewards/accuracies": 1.0, "rewards/chosen": -0.09627847373485565, "rewards/margins": 0.5591179132461548, "rewards/rejected": -0.6553963422775269, "step": 779 }, { "epoch": 0.48522550544323484, "grad_norm": 0.28858453035354614, "learning_rate": 1.1000000000000001e-05, "log_odds_chosen": 4.537181377410889, "log_odds_ratio": -0.16321076452732086, "logits/chosen": 0.9505159258842468, "logits/rejected": 1.2651251554489136, "logps/chosen": -0.49642449617385864, "logps/rejected": -4.0449538230896, "loss": 0.5712, "nll_loss": 0.5549124479293823, "rewards/accuracies": 0.875, "rewards/chosen": -0.049642451107501984, "rewards/margins": 0.35485297441482544, "rewards/rejected": -0.4044954180717468, "step": 780 }, { "epoch": 0.4858475894245723, "grad_norm": 0.31122761964797974, "learning_rate": 1.095e-05, "log_odds_chosen": 3.597323179244995, "log_odds_ratio": -0.22623345255851746, "logits/chosen": -0.918379545211792, "logits/rejected": 0.03294803947210312, "logps/chosen": -0.9022977352142334, "logps/rejected": -4.053953647613525, "loss": 0.4282, "nll_loss": 0.40558505058288574, "rewards/accuracies": 0.875, "rewards/chosen": -0.09022977203130722, "rewards/margins": 0.31516557931900024, "rewards/rejected": -0.40539535880088806, "step": 781 }, { "epoch": 0.4864696734059098, "grad_norm": 0.3407188057899475, "learning_rate": 1.09e-05, "log_odds_chosen": 4.901708602905273, "log_odds_ratio": -0.08885428309440613, "logits/chosen": 1.8469200134277344, "logits/rejected": 0.9414504766464233, "logps/chosen": -0.7245330810546875, "logps/rejected": -4.952267169952393, "loss": 0.6573, "nll_loss": 0.6484310626983643, "rewards/accuracies": 1.0, "rewards/chosen": -0.07245330512523651, "rewards/margins": 0.42277342081069946, "rewards/rejected": -0.49522674083709717, "step": 782 }, { "epoch": 0.4870917573872473, "grad_norm": 0.41091054677963257, "learning_rate": 1.0850000000000001e-05, "log_odds_chosen": 4.862772464752197, "log_odds_ratio": -0.10557924956083298, "logits/chosen": 0.577217161655426, "logits/rejected": 0.0676531195640564, "logps/chosen": -0.8654823303222656, "logps/rejected": -5.2216901779174805, "loss": 0.5218, "nll_loss": 0.5112905502319336, "rewards/accuracies": 0.875, "rewards/chosen": -0.08654823154211044, "rewards/margins": 0.43562084436416626, "rewards/rejected": -0.5221690535545349, "step": 783 }, { "epoch": 0.48771384136858476, "grad_norm": 0.43482547998428345, "learning_rate": 1.08e-05, "log_odds_chosen": 2.8547940254211426, "log_odds_ratio": -0.5087026357650757, "logits/chosen": 1.314387559890747, "logits/rejected": 0.8139910697937012, "logps/chosen": -1.0577054023742676, "logps/rejected": -3.741490364074707, "loss": 0.6691, "nll_loss": 0.618266224861145, "rewards/accuracies": 0.625, "rewards/chosen": -0.105770543217659, "rewards/margins": 0.26837849617004395, "rewards/rejected": -0.37414902448654175, "step": 784 }, { "epoch": 0.48833592534992226, "grad_norm": 0.3116896152496338, "learning_rate": 1.075e-05, "log_odds_chosen": 4.188148498535156, "log_odds_ratio": -0.24271881580352783, "logits/chosen": 0.7602165937423706, "logits/rejected": 1.3262357711791992, "logps/chosen": -0.8295053243637085, "logps/rejected": -4.384270191192627, "loss": 0.6448, "nll_loss": 0.6205779910087585, "rewards/accuracies": 0.75, "rewards/chosen": -0.08295053988695145, "rewards/margins": 0.3554764986038208, "rewards/rejected": -0.43842706084251404, "step": 785 }, { "epoch": 0.4889580093312597, "grad_norm": 0.6962890028953552, "learning_rate": 1.0700000000000001e-05, "log_odds_chosen": 3.2535345554351807, "log_odds_ratio": -0.12973089516162872, "logits/chosen": 0.8944046497344971, "logits/rejected": -0.8863468170166016, "logps/chosen": -1.5091285705566406, "logps/rejected": -4.428877353668213, "loss": 0.5866, "nll_loss": 0.5735969543457031, "rewards/accuracies": 1.0, "rewards/chosen": -0.1509128361940384, "rewards/margins": 0.29197490215301514, "rewards/rejected": -0.4428877830505371, "step": 786 }, { "epoch": 0.4895800933125972, "grad_norm": 0.41683006286621094, "learning_rate": 1.065e-05, "log_odds_chosen": 3.8502464294433594, "log_odds_ratio": -0.18103164434432983, "logits/chosen": 1.4110684394836426, "logits/rejected": 0.5764380097389221, "logps/chosen": -0.7307590246200562, "logps/rejected": -4.002379417419434, "loss": 0.4647, "nll_loss": 0.446548193693161, "rewards/accuracies": 0.875, "rewards/chosen": -0.07307590544223785, "rewards/margins": 0.3271620273590088, "rewards/rejected": -0.40023791790008545, "step": 787 }, { "epoch": 0.49020217729393467, "grad_norm": 0.3538471460342407, "learning_rate": 1.06e-05, "log_odds_chosen": 4.150043964385986, "log_odds_ratio": -0.12563171982765198, "logits/chosen": 1.96152925491333, "logits/rejected": 0.12847623229026794, "logps/chosen": -0.7252289056777954, "logps/rejected": -4.287262439727783, "loss": 0.621, "nll_loss": 0.6084175109863281, "rewards/accuracies": 0.875, "rewards/chosen": -0.07252289354801178, "rewards/margins": 0.3562033474445343, "rewards/rejected": -0.4287262260913849, "step": 788 }, { "epoch": 0.4908242612752722, "grad_norm": 0.3300181031227112, "learning_rate": 1.055e-05, "log_odds_chosen": 3.877500057220459, "log_odds_ratio": -0.13669435679912567, "logits/chosen": 2.18703031539917, "logits/rejected": 2.082003116607666, "logps/chosen": -0.8657932877540588, "logps/rejected": -4.306628704071045, "loss": 0.7181, "nll_loss": 0.7044727206230164, "rewards/accuracies": 0.875, "rewards/chosen": -0.0865793377161026, "rewards/margins": 0.3440835475921631, "rewards/rejected": -0.4306628704071045, "step": 789 }, { "epoch": 0.49144634525660963, "grad_norm": 0.43428757786750793, "learning_rate": 1.05e-05, "log_odds_chosen": 2.6995761394500732, "log_odds_ratio": -0.38942408561706543, "logits/chosen": 2.258654832839966, "logits/rejected": 2.685004234313965, "logps/chosen": -0.883932888507843, "logps/rejected": -3.241953134536743, "loss": 0.7016, "nll_loss": 0.6626549363136292, "rewards/accuracies": 0.625, "rewards/chosen": -0.08839329332113266, "rewards/margins": 0.23580200970172882, "rewards/rejected": -0.3241952955722809, "step": 790 }, { "epoch": 0.49206842923794714, "grad_norm": 0.31180664896965027, "learning_rate": 1.045e-05, "log_odds_chosen": 5.281048774719238, "log_odds_ratio": -0.00905335508286953, "logits/chosen": 1.120410680770874, "logits/rejected": 0.44220006465911865, "logps/chosen": -0.8353912830352783, "logps/rejected": -5.5110182762146, "loss": 0.4997, "nll_loss": 0.4988272786140442, "rewards/accuracies": 1.0, "rewards/chosen": -0.08353912830352783, "rewards/margins": 0.467562735080719, "rewards/rejected": -0.551101803779602, "step": 791 }, { "epoch": 0.4926905132192846, "grad_norm": 0.4472506642341614, "learning_rate": 1.04e-05, "log_odds_chosen": 3.6429500579833984, "log_odds_ratio": -0.186533123254776, "logits/chosen": 1.0584020614624023, "logits/rejected": 1.7685458660125732, "logps/chosen": -0.9058350920677185, "logps/rejected": -4.094951629638672, "loss": 0.6775, "nll_loss": 0.658807635307312, "rewards/accuracies": 0.875, "rewards/chosen": -0.09058351069688797, "rewards/margins": 0.3189116418361664, "rewards/rejected": -0.40949517488479614, "step": 792 }, { "epoch": 0.4933125972006221, "grad_norm": 0.5813391208648682, "learning_rate": 1.035e-05, "log_odds_chosen": 4.276451110839844, "log_odds_ratio": -0.13493305444717407, "logits/chosen": 0.2429172396659851, "logits/rejected": -0.4983159899711609, "logps/chosen": -1.4180018901824951, "logps/rejected": -5.342185020446777, "loss": 0.5255, "nll_loss": 0.5120512843132019, "rewards/accuracies": 0.875, "rewards/chosen": -0.141800194978714, "rewards/margins": 0.3924183249473572, "rewards/rejected": -0.5342184901237488, "step": 793 }, { "epoch": 0.49393468118195955, "grad_norm": 0.368551105260849, "learning_rate": 1.03e-05, "log_odds_chosen": 3.7925801277160645, "log_odds_ratio": -0.21110907196998596, "logits/chosen": 2.3769233226776123, "logits/rejected": 0.7364627122879028, "logps/chosen": -0.8370175361633301, "logps/rejected": -4.201289176940918, "loss": 0.656, "nll_loss": 0.634913444519043, "rewards/accuracies": 0.875, "rewards/chosen": -0.08370175957679749, "rewards/margins": 0.33642715215682983, "rewards/rejected": -0.42012888193130493, "step": 794 }, { "epoch": 0.49455676516329705, "grad_norm": 0.34030696749687195, "learning_rate": 1.025e-05, "log_odds_chosen": 3.466846227645874, "log_odds_ratio": -0.19665193557739258, "logits/chosen": 1.3573992252349854, "logits/rejected": 0.5698047280311584, "logps/chosen": -0.7727445363998413, "logps/rejected": -3.6376049518585205, "loss": 0.558, "nll_loss": 0.5383586287498474, "rewards/accuracies": 0.875, "rewards/chosen": -0.07727445662021637, "rewards/margins": 0.28648605942726135, "rewards/rejected": -0.36376050114631653, "step": 795 }, { "epoch": 0.4951788491446345, "grad_norm": 2.465184211730957, "learning_rate": 1.02e-05, "log_odds_chosen": 3.1407623291015625, "log_odds_ratio": -0.2292679399251938, "logits/chosen": 0.6717162132263184, "logits/rejected": 0.7924606800079346, "logps/chosen": -1.6437124013900757, "logps/rejected": -4.459639549255371, "loss": 0.7352, "nll_loss": 0.7122251987457275, "rewards/accuracies": 0.875, "rewards/chosen": -0.16437125205993652, "rewards/margins": 0.2815927267074585, "rewards/rejected": -0.44596394896507263, "step": 796 }, { "epoch": 0.495800933125972, "grad_norm": 0.9437676668167114, "learning_rate": 1.0150000000000001e-05, "log_odds_chosen": 4.749540328979492, "log_odds_ratio": -0.1930580735206604, "logits/chosen": 1.5148296356201172, "logits/rejected": 0.5548698902130127, "logps/chosen": -0.7817761898040771, "logps/rejected": -5.032675266265869, "loss": 0.6243, "nll_loss": 0.6050349473953247, "rewards/accuracies": 0.875, "rewards/chosen": -0.07817761600017548, "rewards/margins": 0.425089955329895, "rewards/rejected": -0.5032675266265869, "step": 797 }, { "epoch": 0.49642301710730946, "grad_norm": 0.7716901898384094, "learning_rate": 1.0100000000000002e-05, "log_odds_chosen": 4.870780944824219, "log_odds_ratio": -0.11819732189178467, "logits/chosen": 0.3446436822414398, "logits/rejected": -0.5686097145080566, "logps/chosen": -0.8253393173217773, "logps/rejected": -5.124600410461426, "loss": 0.4937, "nll_loss": 0.48190826177597046, "rewards/accuracies": 0.875, "rewards/chosen": -0.08253393322229385, "rewards/margins": 0.4299260973930359, "rewards/rejected": -0.5124600529670715, "step": 798 }, { "epoch": 0.49704510108864697, "grad_norm": 0.3625529706478119, "learning_rate": 1.005e-05, "log_odds_chosen": 5.230063438415527, "log_odds_ratio": -0.08902789652347565, "logits/chosen": 1.6205828189849854, "logits/rejected": 1.2859604358673096, "logps/chosen": -1.2735844850540161, "logps/rejected": -6.1855292320251465, "loss": 0.6788, "nll_loss": 0.6698963642120361, "rewards/accuracies": 1.0, "rewards/chosen": -0.12735846638679504, "rewards/margins": 0.491194486618042, "rewards/rejected": -0.6185529828071594, "step": 799 }, { "epoch": 0.4976671850699845, "grad_norm": 0.42168471217155457, "learning_rate": 1e-05, "log_odds_chosen": 2.8343665599823, "log_odds_ratio": -0.29494884610176086, "logits/chosen": 2.636620283126831, "logits/rejected": 1.8547230958938599, "logps/chosen": -0.8090919256210327, "logps/rejected": -3.2238035202026367, "loss": 0.7996, "nll_loss": 0.77005934715271, "rewards/accuracies": 0.75, "rewards/chosen": -0.08090918511152267, "rewards/margins": 0.24147115647792816, "rewards/rejected": -0.32238033413887024, "step": 800 }, { "epoch": 0.4982892690513219, "grad_norm": 0.37538668513298035, "learning_rate": 9.950000000000001e-06, "log_odds_chosen": 4.194415092468262, "log_odds_ratio": -0.22797462344169617, "logits/chosen": 2.708955764770508, "logits/rejected": 0.8517798781394958, "logps/chosen": -0.9027718305587769, "logps/rejected": -4.7235212326049805, "loss": 0.7717, "nll_loss": 0.7489374876022339, "rewards/accuracies": 0.75, "rewards/chosen": -0.09027719497680664, "rewards/margins": 0.38207489252090454, "rewards/rejected": -0.4723520874977112, "step": 801 }, { "epoch": 0.49891135303265943, "grad_norm": 0.3566712737083435, "learning_rate": 9.900000000000002e-06, "log_odds_chosen": 3.805783271789551, "log_odds_ratio": -0.34034591913223267, "logits/chosen": 1.3816561698913574, "logits/rejected": 0.4715338349342346, "logps/chosen": -0.8862382173538208, "logps/rejected": -4.359492301940918, "loss": 0.6453, "nll_loss": 0.6112942695617676, "rewards/accuracies": 0.625, "rewards/chosen": -0.08862382173538208, "rewards/margins": 0.3473253846168518, "rewards/rejected": -0.4359492361545563, "step": 802 }, { "epoch": 0.4995334370139969, "grad_norm": 0.3383307158946991, "learning_rate": 9.85e-06, "log_odds_chosen": 3.2576406002044678, "log_odds_ratio": -0.11606215685606003, "logits/chosen": 0.8672167062759399, "logits/rejected": 0.9878823757171631, "logps/chosen": -0.6832941770553589, "logps/rejected": -3.2288990020751953, "loss": 0.6006, "nll_loss": 0.5890172719955444, "rewards/accuracies": 1.0, "rewards/chosen": -0.06832942366600037, "rewards/margins": 0.2545604705810547, "rewards/rejected": -0.32288989424705505, "step": 803 }, { "epoch": 0.5001555209953343, "grad_norm": 1.342377781867981, "learning_rate": 9.800000000000001e-06, "log_odds_chosen": 3.6162753105163574, "log_odds_ratio": -0.22811348736286163, "logits/chosen": 0.25409284234046936, "logits/rejected": 0.6416158676147461, "logps/chosen": -0.8462536334991455, "logps/rejected": -4.046585559844971, "loss": 0.5652, "nll_loss": 0.5423653721809387, "rewards/accuracies": 0.875, "rewards/chosen": -0.08462535589933395, "rewards/margins": 0.3200331926345825, "rewards/rejected": -0.40465855598449707, "step": 804 }, { "epoch": 0.5007776049766719, "grad_norm": 0.38830915093421936, "learning_rate": 9.750000000000002e-06, "log_odds_chosen": 3.037473201751709, "log_odds_ratio": -0.4166441559791565, "logits/chosen": 2.476818561553955, "logits/rejected": 1.7838196754455566, "logps/chosen": -1.0396037101745605, "logps/rejected": -3.9347009658813477, "loss": 0.7205, "nll_loss": 0.6788171529769897, "rewards/accuracies": 0.625, "rewards/chosen": -0.10396037995815277, "rewards/margins": 0.28950971364974976, "rewards/rejected": -0.3934701085090637, "step": 805 }, { "epoch": 0.5013996889580093, "grad_norm": 0.37463346123695374, "learning_rate": 9.7e-06, "log_odds_chosen": 4.786480903625488, "log_odds_ratio": -0.08557663857936859, "logits/chosen": 1.0099029541015625, "logits/rejected": -0.1891649067401886, "logps/chosen": -0.9944522976875305, "logps/rejected": -5.300948143005371, "loss": 0.5613, "nll_loss": 0.5527094602584839, "rewards/accuracies": 1.0, "rewards/chosen": -0.09944523125886917, "rewards/margins": 0.4306495785713196, "rewards/rejected": -0.5300948023796082, "step": 806 }, { "epoch": 0.5020217729393468, "grad_norm": 0.3566811680793762, "learning_rate": 9.65e-06, "log_odds_chosen": 3.241489887237549, "log_odds_ratio": -0.24942581355571747, "logits/chosen": 3.2233145236968994, "logits/rejected": 2.1009066104888916, "logps/chosen": -0.7947208881378174, "logps/rejected": -3.3181252479553223, "loss": 0.7944, "nll_loss": 0.7694109082221985, "rewards/accuracies": 0.75, "rewards/chosen": -0.07947209477424622, "rewards/margins": 0.2523404359817505, "rewards/rejected": -0.3318125009536743, "step": 807 }, { "epoch": 0.5026438569206843, "grad_norm": 1.0895134210586548, "learning_rate": 9.600000000000001e-06, "log_odds_chosen": 3.148625373840332, "log_odds_ratio": -0.2908478081226349, "logits/chosen": 0.9981393814086914, "logits/rejected": 0.09825985133647919, "logps/chosen": -1.0793797969818115, "logps/rejected": -3.959749698638916, "loss": 0.7648, "nll_loss": 0.7357277870178223, "rewards/accuracies": 0.75, "rewards/chosen": -0.10793796926736832, "rewards/margins": 0.2880370020866394, "rewards/rejected": -0.3959749639034271, "step": 808 }, { "epoch": 0.5032659409020218, "grad_norm": 0.4039660692214966, "learning_rate": 9.55e-06, "log_odds_chosen": 4.045183181762695, "log_odds_ratio": -0.23220917582511902, "logits/chosen": 1.330578088760376, "logits/rejected": 1.3192451000213623, "logps/chosen": -1.286431908607483, "logps/rejected": -5.05302095413208, "loss": 0.6188, "nll_loss": 0.5955660343170166, "rewards/accuracies": 0.75, "rewards/chosen": -0.128643199801445, "rewards/margins": 0.3766588866710663, "rewards/rejected": -0.5053020715713501, "step": 809 }, { "epoch": 0.5038880248833593, "grad_norm": 0.5143351554870605, "learning_rate": 9.5e-06, "log_odds_chosen": 4.110378265380859, "log_odds_ratio": -0.240739107131958, "logits/chosen": 2.9270503520965576, "logits/rejected": 2.2957873344421387, "logps/chosen": -1.0331082344055176, "logps/rejected": -4.818493366241455, "loss": 0.8335, "nll_loss": 0.8093845248222351, "rewards/accuracies": 0.875, "rewards/chosen": -0.10331083834171295, "rewards/margins": 0.37853848934173584, "rewards/rejected": -0.48184934258461, "step": 810 }, { "epoch": 0.5045101088646967, "grad_norm": 0.35940125584602356, "learning_rate": 9.450000000000001e-06, "log_odds_chosen": 2.423135757446289, "log_odds_ratio": -0.2942277789115906, "logits/chosen": 0.5553222894668579, "logits/rejected": 1.4473676681518555, "logps/chosen": -0.7806976437568665, "logps/rejected": -2.7081189155578613, "loss": 0.5079, "nll_loss": 0.47850099205970764, "rewards/accuracies": 0.875, "rewards/chosen": -0.078069768846035, "rewards/margins": 0.19274210929870605, "rewards/rejected": -0.27081188559532166, "step": 811 }, { "epoch": 0.5051321928460342, "grad_norm": 0.31486862897872925, "learning_rate": 9.4e-06, "log_odds_chosen": 4.782650470733643, "log_odds_ratio": -0.06745091825723648, "logits/chosen": 0.6869497299194336, "logits/rejected": 0.17597493529319763, "logps/chosen": -0.9358524680137634, "logps/rejected": -5.170880317687988, "loss": 0.4733, "nll_loss": 0.4665657877922058, "rewards/accuracies": 1.0, "rewards/chosen": -0.09358525276184082, "rewards/margins": 0.42350274324417114, "rewards/rejected": -0.517087996006012, "step": 812 }, { "epoch": 0.5057542768273717, "grad_norm": 0.2950216829776764, "learning_rate": 9.35e-06, "log_odds_chosen": 4.459681510925293, "log_odds_ratio": -0.0953890010714531, "logits/chosen": 0.6839005947113037, "logits/rejected": 0.27213621139526367, "logps/chosen": -0.9052809476852417, "logps/rejected": -4.865182399749756, "loss": 0.4605, "nll_loss": 0.45096686482429504, "rewards/accuracies": 1.0, "rewards/chosen": -0.09052809327840805, "rewards/margins": 0.39599016308784485, "rewards/rejected": -0.4865182638168335, "step": 813 }, { "epoch": 0.5063763608087092, "grad_norm": 0.3427290618419647, "learning_rate": 9.3e-06, "log_odds_chosen": 6.598719120025635, "log_odds_ratio": -0.007421756163239479, "logits/chosen": 1.0570212602615356, "logits/rejected": 0.9498794674873352, "logps/chosen": -0.9093166589736938, "logps/rejected": -6.913853645324707, "loss": 0.5848, "nll_loss": 0.5840796828269958, "rewards/accuracies": 1.0, "rewards/chosen": -0.09093166887760162, "rewards/margins": 0.6004536747932434, "rewards/rejected": -0.6913853883743286, "step": 814 }, { "epoch": 0.5069984447900466, "grad_norm": 0.2884136736392975, "learning_rate": 9.25e-06, "log_odds_chosen": 3.891439437866211, "log_odds_ratio": -0.12934939563274384, "logits/chosen": 0.9800029993057251, "logits/rejected": 1.1361968517303467, "logps/chosen": -1.0110560655593872, "logps/rejected": -4.505780220031738, "loss": 0.5232, "nll_loss": 0.5102529525756836, "rewards/accuracies": 0.875, "rewards/chosen": -0.10110560804605484, "rewards/margins": 0.3494724631309509, "rewards/rejected": -0.4505780339241028, "step": 815 }, { "epoch": 0.5076205287713841, "grad_norm": 0.35629749298095703, "learning_rate": 9.2e-06, "log_odds_chosen": 4.490930080413818, "log_odds_ratio": -0.10382703691720963, "logits/chosen": 1.9403718709945679, "logits/rejected": 0.49081113934516907, "logps/chosen": -0.7934112548828125, "logps/rejected": -4.741158485412598, "loss": 0.6434, "nll_loss": 0.6329823136329651, "rewards/accuracies": 0.875, "rewards/chosen": -0.07934112846851349, "rewards/margins": 0.39477473497390747, "rewards/rejected": -0.47411584854125977, "step": 816 }, { "epoch": 0.5082426127527216, "grad_norm": 0.4089803695678711, "learning_rate": 9.15e-06, "log_odds_chosen": 3.807797908782959, "log_odds_ratio": -0.22873249650001526, "logits/chosen": -0.5712364315986633, "logits/rejected": 0.023705005645751953, "logps/chosen": -1.6236436367034912, "logps/rejected": -5.201904296875, "loss": 0.4672, "nll_loss": 0.44429489970207214, "rewards/accuracies": 0.875, "rewards/chosen": -0.16236436367034912, "rewards/margins": 0.3578260838985443, "rewards/rejected": -0.5201904773712158, "step": 817 }, { "epoch": 0.5088646967340591, "grad_norm": 0.4340582489967346, "learning_rate": 9.100000000000001e-06, "log_odds_chosen": 4.368274211883545, "log_odds_ratio": -0.2061048150062561, "logits/chosen": 0.8786136507987976, "logits/rejected": 0.8603896498680115, "logps/chosen": -0.9838038086891174, "logps/rejected": -5.024988651275635, "loss": 0.5664, "nll_loss": 0.5458295941352844, "rewards/accuracies": 0.75, "rewards/chosen": -0.09838037937879562, "rewards/margins": 0.40411844849586487, "rewards/rejected": -0.5024988651275635, "step": 818 }, { "epoch": 0.5094867807153965, "grad_norm": 0.3766569197177887, "learning_rate": 9.05e-06, "log_odds_chosen": 5.19111442565918, "log_odds_ratio": -0.0431944876909256, "logits/chosen": 1.3828849792480469, "logits/rejected": -0.19894227385520935, "logps/chosen": -0.7179228067398071, "logps/rejected": -5.100705623626709, "loss": 0.5891, "nll_loss": 0.5847594738006592, "rewards/accuracies": 1.0, "rewards/chosen": -0.07179228961467743, "rewards/margins": 0.43827831745147705, "rewards/rejected": -0.5100706219673157, "step": 819 }, { "epoch": 0.5101088646967341, "grad_norm": 0.3517646789550781, "learning_rate": 9e-06, "log_odds_chosen": 3.4543983936309814, "log_odds_ratio": -0.10853774845600128, "logits/chosen": 1.693161964416504, "logits/rejected": 0.43125981092453003, "logps/chosen": -1.0501185655593872, "logps/rejected": -4.055387496948242, "loss": 0.636, "nll_loss": 0.6251487731933594, "rewards/accuracies": 1.0, "rewards/chosen": -0.10501186549663544, "rewards/margins": 0.300526887178421, "rewards/rejected": -0.40553876757621765, "step": 820 }, { "epoch": 0.5107309486780716, "grad_norm": 1.9642311334609985, "learning_rate": 8.95e-06, "log_odds_chosen": 4.056253433227539, "log_odds_ratio": -0.17738457024097443, "logits/chosen": 1.7935194969177246, "logits/rejected": 0.7376617193222046, "logps/chosen": -0.7888966798782349, "logps/rejected": -4.304933547973633, "loss": 0.6337, "nll_loss": 0.6160002946853638, "rewards/accuracies": 0.875, "rewards/chosen": -0.07888966053724289, "rewards/margins": 0.3516036868095398, "rewards/rejected": -0.4304933547973633, "step": 821 }, { "epoch": 0.511353032659409, "grad_norm": 0.2685078978538513, "learning_rate": 8.9e-06, "log_odds_chosen": 5.639597415924072, "log_odds_ratio": -0.029838988557457924, "logits/chosen": 0.07695133984088898, "logits/rejected": 0.47179538011550903, "logps/chosen": -0.8393157720565796, "logps/rejected": -5.901039123535156, "loss": 0.4426, "nll_loss": 0.43960070610046387, "rewards/accuracies": 1.0, "rewards/chosen": -0.0839315727353096, "rewards/margins": 0.5061724185943604, "rewards/rejected": -0.5901039838790894, "step": 822 }, { "epoch": 0.5119751166407465, "grad_norm": 0.351177841424942, "learning_rate": 8.85e-06, "log_odds_chosen": 5.065117359161377, "log_odds_ratio": -0.08009850978851318, "logits/chosen": 0.05210921913385391, "logits/rejected": -0.3567178249359131, "logps/chosen": -0.9725984930992126, "logps/rejected": -5.50432825088501, "loss": 0.4287, "nll_loss": 0.4206945300102234, "rewards/accuracies": 1.0, "rewards/chosen": -0.09725984930992126, "rewards/margins": 0.4531729817390442, "rewards/rejected": -0.5504328012466431, "step": 823 }, { "epoch": 0.512597200622084, "grad_norm": 0.589407205581665, "learning_rate": 8.8e-06, "log_odds_chosen": 3.0141940116882324, "log_odds_ratio": -0.38279733061790466, "logits/chosen": 2.124873638153076, "logits/rejected": 1.0183517932891846, "logps/chosen": -0.9143626093864441, "logps/rejected": -3.5896453857421875, "loss": 0.7332, "nll_loss": 0.6948947906494141, "rewards/accuracies": 0.625, "rewards/chosen": -0.09143626689910889, "rewards/margins": 0.2675282955169678, "rewards/rejected": -0.35896456241607666, "step": 824 }, { "epoch": 0.5132192846034215, "grad_norm": 0.32223331928253174, "learning_rate": 8.75e-06, "log_odds_chosen": 3.163451910018921, "log_odds_ratio": -0.27636605501174927, "logits/chosen": 1.7252213954925537, "logits/rejected": 0.4546215534210205, "logps/chosen": -0.8569576740264893, "logps/rejected": -3.6060609817504883, "loss": 0.7564, "nll_loss": 0.7287983298301697, "rewards/accuracies": 0.875, "rewards/chosen": -0.0856957733631134, "rewards/margins": 0.2749103307723999, "rewards/rejected": -0.3606061041355133, "step": 825 }, { "epoch": 0.5138413685847589, "grad_norm": 0.35858839750289917, "learning_rate": 8.7e-06, "log_odds_chosen": 2.8523874282836914, "log_odds_ratio": -0.14599651098251343, "logits/chosen": 1.4515256881713867, "logits/rejected": 1.5431532859802246, "logps/chosen": -0.8925541639328003, "logps/rejected": -3.3079447746276855, "loss": 0.6665, "nll_loss": 0.6518935561180115, "rewards/accuracies": 1.0, "rewards/chosen": -0.0892554223537445, "rewards/margins": 0.24153903126716614, "rewards/rejected": -0.33079448342323303, "step": 826 }, { "epoch": 0.5144634525660964, "grad_norm": 0.5025385618209839, "learning_rate": 8.65e-06, "log_odds_chosen": 2.716071367263794, "log_odds_ratio": -0.4175480008125305, "logits/chosen": 1.7826498746871948, "logits/rejected": 2.1285691261291504, "logps/chosen": -1.079801321029663, "logps/rejected": -3.5945372581481934, "loss": 0.6716, "nll_loss": 0.6298132538795471, "rewards/accuracies": 0.75, "rewards/chosen": -0.10798013210296631, "rewards/margins": 0.251473605632782, "rewards/rejected": -0.3594537377357483, "step": 827 }, { "epoch": 0.5150855365474339, "grad_norm": 0.3128713071346283, "learning_rate": 8.599999999999999e-06, "log_odds_chosen": 4.946896553039551, "log_odds_ratio": -0.09469583630561829, "logits/chosen": 1.2161049842834473, "logits/rejected": 1.1760735511779785, "logps/chosen": -0.8445440530776978, "logps/rejected": -5.290299892425537, "loss": 0.5957, "nll_loss": 0.5861875414848328, "rewards/accuracies": 1.0, "rewards/chosen": -0.08445440977811813, "rewards/margins": 0.44457557797431946, "rewards/rejected": -0.5290299654006958, "step": 828 }, { "epoch": 0.5157076205287714, "grad_norm": 0.7158114314079285, "learning_rate": 8.550000000000001e-06, "log_odds_chosen": 4.49724817276001, "log_odds_ratio": -0.17848162353038788, "logits/chosen": 1.4604125022888184, "logits/rejected": 0.44166508316993713, "logps/chosen": -0.7384613156318665, "logps/rejected": -4.72969913482666, "loss": 0.5113, "nll_loss": 0.49343129992485046, "rewards/accuracies": 1.0, "rewards/chosen": -0.07384613156318665, "rewards/margins": 0.3991238474845886, "rewards/rejected": -0.47296997904777527, "step": 829 }, { "epoch": 0.5163297045101088, "grad_norm": 0.3281008303165436, "learning_rate": 8.500000000000002e-06, "log_odds_chosen": 3.149600028991699, "log_odds_ratio": -0.3649888038635254, "logits/chosen": 1.408589243888855, "logits/rejected": 1.8521182537078857, "logps/chosen": -0.9668858051300049, "logps/rejected": -3.8920159339904785, "loss": 0.6461, "nll_loss": 0.609637975692749, "rewards/accuracies": 0.625, "rewards/chosen": -0.09668857604265213, "rewards/margins": 0.29251301288604736, "rewards/rejected": -0.3892015814781189, "step": 830 }, { "epoch": 0.5169517884914463, "grad_norm": 0.32905203104019165, "learning_rate": 8.45e-06, "log_odds_chosen": 4.028629302978516, "log_odds_ratio": -0.09469190984964371, "logits/chosen": -0.4233950972557068, "logits/rejected": 0.36360660195350647, "logps/chosen": -0.9033867120742798, "logps/rejected": -4.371126174926758, "loss": 0.4923, "nll_loss": 0.4828674793243408, "rewards/accuracies": 1.0, "rewards/chosen": -0.09033866971731186, "rewards/margins": 0.3467739224433899, "rewards/rejected": -0.43711256980895996, "step": 831 }, { "epoch": 0.5175738724727839, "grad_norm": 0.430584579706192, "learning_rate": 8.400000000000001e-06, "log_odds_chosen": 4.424380302429199, "log_odds_ratio": -0.1801256239414215, "logits/chosen": 2.5836946964263916, "logits/rejected": 2.416637659072876, "logps/chosen": -0.998272716999054, "logps/rejected": -4.759140968322754, "loss": 0.7411, "nll_loss": 0.7231161594390869, "rewards/accuracies": 0.875, "rewards/chosen": -0.09982727468013763, "rewards/margins": 0.3760868310928345, "rewards/rejected": -0.4759141206741333, "step": 832 }, { "epoch": 0.5181959564541213, "grad_norm": 0.3688509464263916, "learning_rate": 8.350000000000001e-06, "log_odds_chosen": 5.4708356857299805, "log_odds_ratio": -0.04741556942462921, "logits/chosen": 1.5119433403015137, "logits/rejected": -0.3896360397338867, "logps/chosen": -0.7715045213699341, "logps/rejected": -5.580893039703369, "loss": 0.5971, "nll_loss": 0.5923291444778442, "rewards/accuracies": 1.0, "rewards/chosen": -0.07715044915676117, "rewards/margins": 0.4809388816356659, "rewards/rejected": -0.5580893158912659, "step": 833 }, { "epoch": 0.5188180404354588, "grad_norm": 0.370304137468338, "learning_rate": 8.3e-06, "log_odds_chosen": 2.8266732692718506, "log_odds_ratio": -0.25435543060302734, "logits/chosen": 0.7392969131469727, "logits/rejected": 1.3636996746063232, "logps/chosen": -0.9792646765708923, "logps/rejected": -3.4305460453033447, "loss": 0.5873, "nll_loss": 0.5618682503700256, "rewards/accuracies": 0.875, "rewards/chosen": -0.09792646765708923, "rewards/margins": 0.24512813985347748, "rewards/rejected": -0.3430545926094055, "step": 834 }, { "epoch": 0.5194401244167963, "grad_norm": 1.1292908191680908, "learning_rate": 8.25e-06, "log_odds_chosen": 3.191591739654541, "log_odds_ratio": -0.16945341229438782, "logits/chosen": 2.268345832824707, "logits/rejected": 1.9634860754013062, "logps/chosen": -0.9450038075447083, "logps/rejected": -3.7623562812805176, "loss": 0.6896, "nll_loss": 0.6726588010787964, "rewards/accuracies": 0.875, "rewards/chosen": -0.09450038522481918, "rewards/margins": 0.28173527121543884, "rewards/rejected": -0.3762356638908386, "step": 835 }, { "epoch": 0.5200622083981338, "grad_norm": 0.5898036956787109, "learning_rate": 8.200000000000001e-06, "log_odds_chosen": 5.049922943115234, "log_odds_ratio": -0.03130911663174629, "logits/chosen": 1.6196269989013672, "logits/rejected": 0.1569942831993103, "logps/chosen": -0.920583963394165, "logps/rejected": -5.398545742034912, "loss": 0.6857, "nll_loss": 0.6825913190841675, "rewards/accuracies": 1.0, "rewards/chosen": -0.09205839782953262, "rewards/margins": 0.44779619574546814, "rewards/rejected": -0.5398545861244202, "step": 836 }, { "epoch": 0.5206842923794712, "grad_norm": 0.3221326172351837, "learning_rate": 8.15e-06, "log_odds_chosen": 5.945927619934082, "log_odds_ratio": -0.021535350009799004, "logits/chosen": 1.1049885749816895, "logits/rejected": 0.1460282951593399, "logps/chosen": -0.8474592566490173, "logps/rejected": -6.200389862060547, "loss": 0.5018, "nll_loss": 0.49965280294418335, "rewards/accuracies": 1.0, "rewards/chosen": -0.08474592864513397, "rewards/margins": 0.5352929830551147, "rewards/rejected": -0.6200389266014099, "step": 837 }, { "epoch": 0.5213063763608087, "grad_norm": 0.33995768427848816, "learning_rate": 8.1e-06, "log_odds_chosen": 3.2288389205932617, "log_odds_ratio": -0.27857640385627747, "logits/chosen": 1.4248796701431274, "logits/rejected": 1.4942407608032227, "logps/chosen": -0.7745346426963806, "logps/rejected": -3.5616447925567627, "loss": 0.6797, "nll_loss": 0.6518310308456421, "rewards/accuracies": 0.75, "rewards/chosen": -0.07745346426963806, "rewards/margins": 0.2787110209465027, "rewards/rejected": -0.35616451501846313, "step": 838 }, { "epoch": 0.5219284603421462, "grad_norm": 0.31470462679862976, "learning_rate": 8.050000000000001e-06, "log_odds_chosen": 7.015840530395508, "log_odds_ratio": -0.0050363121554255486, "logits/chosen": 1.4353834390640259, "logits/rejected": 1.1269909143447876, "logps/chosen": -0.8338403701782227, "logps/rejected": -7.1698222160339355, "loss": 0.5726, "nll_loss": 0.5720517635345459, "rewards/accuracies": 1.0, "rewards/chosen": -0.08338404446840286, "rewards/margins": 0.6335982084274292, "rewards/rejected": -0.7169821858406067, "step": 839 }, { "epoch": 0.5225505443234837, "grad_norm": 0.3146345317363739, "learning_rate": 8.000000000000001e-06, "log_odds_chosen": 3.7398879528045654, "log_odds_ratio": -0.19314292073249817, "logits/chosen": 1.4435521364212036, "logits/rejected": 2.414797782897949, "logps/chosen": -0.7224211096763611, "logps/rejected": -3.8994083404541016, "loss": 0.6447, "nll_loss": 0.6253674030303955, "rewards/accuracies": 0.875, "rewards/chosen": -0.07224211096763611, "rewards/margins": 0.31769871711730957, "rewards/rejected": -0.38994085788726807, "step": 840 }, { "epoch": 0.5231726283048211, "grad_norm": 0.3796578347682953, "learning_rate": 7.95e-06, "log_odds_chosen": 4.678387641906738, "log_odds_ratio": -0.32569360733032227, "logits/chosen": 2.938084602355957, "logits/rejected": 2.356147050857544, "logps/chosen": -0.7135329842567444, "logps/rejected": -4.978067398071289, "loss": 0.8559, "nll_loss": 0.8233269453048706, "rewards/accuracies": 0.625, "rewards/chosen": -0.07135330140590668, "rewards/margins": 0.42645344138145447, "rewards/rejected": -0.49780675768852234, "step": 841 }, { "epoch": 0.5237947122861586, "grad_norm": 0.3580376207828522, "learning_rate": 7.9e-06, "log_odds_chosen": 3.7918758392333984, "log_odds_ratio": -0.33086949586868286, "logits/chosen": 0.5326130986213684, "logits/rejected": 0.12543895840644836, "logps/chosen": -0.8650779724121094, "logps/rejected": -4.286233901977539, "loss": 0.5654, "nll_loss": 0.5323303937911987, "rewards/accuracies": 0.75, "rewards/chosen": -0.08650778979063034, "rewards/margins": 0.342115581035614, "rewards/rejected": -0.42862337827682495, "step": 842 }, { "epoch": 0.5244167962674962, "grad_norm": 0.3808853030204773, "learning_rate": 7.850000000000001e-06, "log_odds_chosen": 4.189432144165039, "log_odds_ratio": -0.27564239501953125, "logits/chosen": 2.1349778175354004, "logits/rejected": 1.1208148002624512, "logps/chosen": -0.9227831959724426, "logps/rejected": -4.730731010437012, "loss": 0.7277, "nll_loss": 0.700088381767273, "rewards/accuracies": 0.75, "rewards/chosen": -0.09227833151817322, "rewards/margins": 0.38079482316970825, "rewards/rejected": -0.47307315468788147, "step": 843 }, { "epoch": 0.5250388802488336, "grad_norm": 0.9150951504707336, "learning_rate": 7.8e-06, "log_odds_chosen": 3.935750722885132, "log_odds_ratio": -0.21879051625728607, "logits/chosen": 0.3282691538333893, "logits/rejected": 1.1714022159576416, "logps/chosen": -1.1815775632858276, "logps/rejected": -4.750007629394531, "loss": 0.6211, "nll_loss": 0.5991711616516113, "rewards/accuracies": 0.75, "rewards/chosen": -0.1181577518582344, "rewards/margins": 0.3568430542945862, "rewards/rejected": -0.47500079870224, "step": 844 }, { "epoch": 0.5256609642301711, "grad_norm": 0.43844273686408997, "learning_rate": 7.75e-06, "log_odds_chosen": 2.4059865474700928, "log_odds_ratio": -0.394694983959198, "logits/chosen": 1.3405787944793701, "logits/rejected": 1.228584885597229, "logps/chosen": -0.713738739490509, "logps/rejected": -2.4642791748046875, "loss": 0.6272, "nll_loss": 0.5876884460449219, "rewards/accuracies": 0.625, "rewards/chosen": -0.07137387245893478, "rewards/margins": 0.17505404353141785, "rewards/rejected": -0.24642793834209442, "step": 845 }, { "epoch": 0.5262830482115085, "grad_norm": 0.29125887155532837, "learning_rate": 7.7e-06, "log_odds_chosen": 8.201855659484863, "log_odds_ratio": -0.0014205931220203638, "logits/chosen": 0.5951703190803528, "logits/rejected": 0.41203880310058594, "logps/chosen": -0.6095064878463745, "logps/rejected": -7.787240982055664, "loss": 0.5115, "nll_loss": 0.5113813877105713, "rewards/accuracies": 1.0, "rewards/chosen": -0.06095064431428909, "rewards/margins": 0.7177734375, "rewards/rejected": -0.7787241339683533, "step": 846 }, { "epoch": 0.5269051321928461, "grad_norm": 1.37794029712677, "learning_rate": 7.65e-06, "log_odds_chosen": 5.424922943115234, "log_odds_ratio": -0.04665739834308624, "logits/chosen": 0.7469074726104736, "logits/rejected": 0.22907613217830658, "logps/chosen": -1.2817590236663818, "logps/rejected": -6.238846778869629, "loss": 0.5371, "nll_loss": 0.532480776309967, "rewards/accuracies": 1.0, "rewards/chosen": -0.12817591428756714, "rewards/margins": 0.49570873379707336, "rewards/rejected": -0.6238846182823181, "step": 847 }, { "epoch": 0.5275272161741835, "grad_norm": 0.27533194422721863, "learning_rate": 7.6e-06, "log_odds_chosen": 6.979497909545898, "log_odds_ratio": -0.00903667975217104, "logits/chosen": 0.06894741952419281, "logits/rejected": 0.27600759267807007, "logps/chosen": -0.9300459623336792, "logps/rejected": -7.287872314453125, "loss": 0.5618, "nll_loss": 0.5608495473861694, "rewards/accuracies": 1.0, "rewards/chosen": -0.09300459921360016, "rewards/margins": 0.6357825994491577, "rewards/rejected": -0.7287871837615967, "step": 848 }, { "epoch": 0.528149300155521, "grad_norm": 0.2954027056694031, "learning_rate": 7.55e-06, "log_odds_chosen": 4.8234710693359375, "log_odds_ratio": -0.12771743535995483, "logits/chosen": 1.4827038049697876, "logits/rejected": 1.096189260482788, "logps/chosen": -0.776839554309845, "logps/rejected": -5.056708812713623, "loss": 0.5763, "nll_loss": 0.5634976029396057, "rewards/accuracies": 1.0, "rewards/chosen": -0.0776839554309845, "rewards/margins": 0.42798691987991333, "rewards/rejected": -0.5056709051132202, "step": 849 }, { "epoch": 0.5287713841368584, "grad_norm": 0.4383789300918579, "learning_rate": 7.5e-06, "log_odds_chosen": 4.074307918548584, "log_odds_ratio": -0.1638539880514145, "logits/chosen": 0.1360701024532318, "logits/rejected": 0.7712302207946777, "logps/chosen": -0.8641620874404907, "logps/rejected": -4.405178546905518, "loss": 0.4002, "nll_loss": 0.3837825059890747, "rewards/accuracies": 0.875, "rewards/chosen": -0.08641621470451355, "rewards/margins": 0.35410165786743164, "rewards/rejected": -0.4405178725719452, "step": 850 }, { "epoch": 0.529393468118196, "grad_norm": 0.3684031367301941, "learning_rate": 7.45e-06, "log_odds_chosen": 4.50360107421875, "log_odds_ratio": -0.21045728027820587, "logits/chosen": 2.8498353958129883, "logits/rejected": 1.271877646446228, "logps/chosen": -0.9192016124725342, "logps/rejected": -4.949028491973877, "loss": 0.7212, "nll_loss": 0.7001411318778992, "rewards/accuracies": 0.875, "rewards/chosen": -0.0919201597571373, "rewards/margins": 0.4029826819896698, "rewards/rejected": -0.4949028491973877, "step": 851 }, { "epoch": 0.5300155520995334, "grad_norm": 0.41067174077033997, "learning_rate": 7.4e-06, "log_odds_chosen": 3.9802048206329346, "log_odds_ratio": -0.298592209815979, "logits/chosen": 2.4309494495391846, "logits/rejected": 2.2804596424102783, "logps/chosen": -0.7731521129608154, "logps/rejected": -4.271833896636963, "loss": 0.7431, "nll_loss": 0.7132678627967834, "rewards/accuracies": 0.875, "rewards/chosen": -0.07731521129608154, "rewards/margins": 0.34986817836761475, "rewards/rejected": -0.42718344926834106, "step": 852 }, { "epoch": 0.5306376360808709, "grad_norm": 0.34988710284233093, "learning_rate": 7.35e-06, "log_odds_chosen": 3.954108715057373, "log_odds_ratio": -0.2007138431072235, "logits/chosen": 1.4486238956451416, "logits/rejected": 0.7966309785842896, "logps/chosen": -0.7078330516815186, "logps/rejected": -4.10945463180542, "loss": 0.5629, "nll_loss": 0.5428740978240967, "rewards/accuracies": 0.875, "rewards/chosen": -0.07078330963850021, "rewards/margins": 0.34016215801239014, "rewards/rejected": -0.41094547510147095, "step": 853 }, { "epoch": 0.5312597200622085, "grad_norm": 0.531642496585846, "learning_rate": 7.2999999999999996e-06, "log_odds_chosen": 5.208446025848389, "log_odds_ratio": -0.1282520890235901, "logits/chosen": 0.8819208145141602, "logits/rejected": 0.8269643783569336, "logps/chosen": -0.8965824842453003, "logps/rejected": -5.501850605010986, "loss": 0.5189, "nll_loss": 0.5060710906982422, "rewards/accuracies": 1.0, "rewards/chosen": -0.08965825289487839, "rewards/margins": 0.46052682399749756, "rewards/rejected": -0.5501850843429565, "step": 854 }, { "epoch": 0.5318818040435459, "grad_norm": 0.353059321641922, "learning_rate": 7.25e-06, "log_odds_chosen": 4.275794982910156, "log_odds_ratio": -0.1521713137626648, "logits/chosen": 2.3613967895507812, "logits/rejected": 0.609958827495575, "logps/chosen": -0.8156629204750061, "logps/rejected": -4.58781623840332, "loss": 0.593, "nll_loss": 0.577816367149353, "rewards/accuracies": 0.875, "rewards/chosen": -0.08156629651784897, "rewards/margins": 0.37721535563468933, "rewards/rejected": -0.4587816596031189, "step": 855 }, { "epoch": 0.5325038880248834, "grad_norm": 0.3639867305755615, "learning_rate": 7.2e-06, "log_odds_chosen": 4.824565410614014, "log_odds_ratio": -0.2290450930595398, "logits/chosen": 1.6250410079956055, "logits/rejected": 1.7094388008117676, "logps/chosen": -0.8250951766967773, "logps/rejected": -5.078400135040283, "loss": 0.6442, "nll_loss": 0.6213299036026001, "rewards/accuracies": 0.875, "rewards/chosen": -0.08250951766967773, "rewards/margins": 0.4253305196762085, "rewards/rejected": -0.5078400373458862, "step": 856 }, { "epoch": 0.5331259720062208, "grad_norm": 0.37084388732910156, "learning_rate": 7.15e-06, "log_odds_chosen": 4.704831123352051, "log_odds_ratio": -0.12719248235225677, "logits/chosen": 0.6303936243057251, "logits/rejected": 0.46867427229881287, "logps/chosen": -0.9530895948410034, "logps/rejected": -5.224120140075684, "loss": 0.5273, "nll_loss": 0.5145441293716431, "rewards/accuracies": 1.0, "rewards/chosen": -0.09530895948410034, "rewards/margins": 0.42710307240486145, "rewards/rejected": -0.5224120020866394, "step": 857 }, { "epoch": 0.5337480559875584, "grad_norm": 1.0703786611557007, "learning_rate": 7.1e-06, "log_odds_chosen": 4.3336992263793945, "log_odds_ratio": -0.3372271656990051, "logits/chosen": 0.3954066038131714, "logits/rejected": 0.7838265895843506, "logps/chosen": -0.9540671110153198, "logps/rejected": -4.953222274780273, "loss": 0.626, "nll_loss": 0.5922662615776062, "rewards/accuracies": 0.75, "rewards/chosen": -0.09540671110153198, "rewards/margins": 0.39991557598114014, "rewards/rejected": -0.4953222870826721, "step": 858 }, { "epoch": 0.5343701399688958, "grad_norm": 1.5287100076675415, "learning_rate": 7.049999999999999e-06, "log_odds_chosen": 5.106422424316406, "log_odds_ratio": -0.02494587004184723, "logits/chosen": 1.100020408630371, "logits/rejected": 0.40951472520828247, "logps/chosen": -1.1385383605957031, "logps/rejected": -5.838616371154785, "loss": 0.5596, "nll_loss": 0.5571079850196838, "rewards/accuracies": 1.0, "rewards/chosen": -0.11385384202003479, "rewards/margins": 0.47000783681869507, "rewards/rejected": -0.5838617086410522, "step": 859 }, { "epoch": 0.5349922239502333, "grad_norm": 0.37003448605537415, "learning_rate": 7.000000000000001e-06, "log_odds_chosen": 4.985795974731445, "log_odds_ratio": -0.15885290503501892, "logits/chosen": 1.3111166954040527, "logits/rejected": 1.0276559591293335, "logps/chosen": -0.8623284697532654, "logps/rejected": -5.377542495727539, "loss": 0.6302, "nll_loss": 0.6143087148666382, "rewards/accuracies": 0.875, "rewards/chosen": -0.08623284846544266, "rewards/margins": 0.4515213668346405, "rewards/rejected": -0.537754237651825, "step": 860 }, { "epoch": 0.5356143079315707, "grad_norm": 0.8909189105033875, "learning_rate": 6.950000000000001e-06, "log_odds_chosen": 2.3723697662353516, "log_odds_ratio": -0.35334068536758423, "logits/chosen": 1.6953346729278564, "logits/rejected": -0.16396528482437134, "logps/chosen": -0.9004030227661133, "logps/rejected": -2.9755284786224365, "loss": 0.6941, "nll_loss": 0.6587247848510742, "rewards/accuracies": 0.875, "rewards/chosen": -0.09004030376672745, "rewards/margins": 0.20751255750656128, "rewards/rejected": -0.29755285382270813, "step": 861 }, { "epoch": 0.5362363919129083, "grad_norm": 0.2798997461795807, "learning_rate": 6.900000000000001e-06, "log_odds_chosen": 4.707751274108887, "log_odds_ratio": -0.10399264097213745, "logits/chosen": 0.29543545842170715, "logits/rejected": -1.7914931774139404, "logps/chosen": -0.909212589263916, "logps/rejected": -5.118707180023193, "loss": 0.5727, "nll_loss": 0.5623191595077515, "rewards/accuracies": 0.875, "rewards/chosen": -0.09092126786708832, "rewards/margins": 0.4209495186805725, "rewards/rejected": -0.5118707418441772, "step": 862 }, { "epoch": 0.5368584758942457, "grad_norm": 0.2905239462852478, "learning_rate": 6.8500000000000005e-06, "log_odds_chosen": 4.615797519683838, "log_odds_ratio": -0.142097607254982, "logits/chosen": -0.19048839807510376, "logits/rejected": 0.7622358798980713, "logps/chosen": -0.7061002254486084, "logps/rejected": -4.774419784545898, "loss": 0.422, "nll_loss": 0.40780800580978394, "rewards/accuracies": 0.875, "rewards/chosen": -0.07061002403497696, "rewards/margins": 0.40683192014694214, "rewards/rejected": -0.4774419665336609, "step": 863 }, { "epoch": 0.5374805598755832, "grad_norm": 0.39981669187545776, "learning_rate": 6.800000000000001e-06, "log_odds_chosen": 3.8930208683013916, "log_odds_ratio": -0.18875019252300262, "logits/chosen": 1.3811320066452026, "logits/rejected": 0.9418545365333557, "logps/chosen": -1.095811128616333, "logps/rejected": -4.510628700256348, "loss": 0.6449, "nll_loss": 0.6260712742805481, "rewards/accuracies": 0.875, "rewards/chosen": -0.1095811203122139, "rewards/margins": 0.3414817154407501, "rewards/rejected": -0.4510628581047058, "step": 864 }, { "epoch": 0.5381026438569206, "grad_norm": 0.41308942437171936, "learning_rate": 6.750000000000001e-06, "log_odds_chosen": 3.0796897411346436, "log_odds_ratio": -0.2442033886909485, "logits/chosen": 0.9326578974723816, "logits/rejected": 0.1488349288702011, "logps/chosen": -1.112588882446289, "logps/rejected": -3.866307497024536, "loss": 0.5677, "nll_loss": 0.543321967124939, "rewards/accuracies": 0.875, "rewards/chosen": -0.11125887930393219, "rewards/margins": 0.27537187933921814, "rewards/rejected": -0.38663074374198914, "step": 865 }, { "epoch": 0.5387247278382582, "grad_norm": 0.34175509214401245, "learning_rate": 6.700000000000001e-06, "log_odds_chosen": 5.5318708419799805, "log_odds_ratio": -0.061005041003227234, "logits/chosen": 1.7314218282699585, "logits/rejected": 1.6747318506240845, "logps/chosen": -0.7393934726715088, "logps/rejected": -5.642154693603516, "loss": 0.6447, "nll_loss": 0.6386001706123352, "rewards/accuracies": 1.0, "rewards/chosen": -0.07393934577703476, "rewards/margins": 0.4902760982513428, "rewards/rejected": -0.5642154216766357, "step": 866 }, { "epoch": 0.5393468118195957, "grad_norm": 1.9305363893508911, "learning_rate": 6.650000000000001e-06, "log_odds_chosen": 5.084562301635742, "log_odds_ratio": -0.02670995518565178, "logits/chosen": 0.5640244483947754, "logits/rejected": -0.02540937066078186, "logps/chosen": -1.191550374031067, "logps/rejected": -5.851881504058838, "loss": 0.5294, "nll_loss": 0.5266839265823364, "rewards/accuracies": 1.0, "rewards/chosen": -0.11915504187345505, "rewards/margins": 0.46603310108184814, "rewards/rejected": -0.5851881504058838, "step": 867 }, { "epoch": 0.5399688958009331, "grad_norm": 0.6636577248573303, "learning_rate": 6.6e-06, "log_odds_chosen": 4.547132968902588, "log_odds_ratio": -0.21664825081825256, "logits/chosen": 1.5044053792953491, "logits/rejected": 0.3689953684806824, "logps/chosen": -0.8013274669647217, "logps/rejected": -4.848517417907715, "loss": 0.6273, "nll_loss": 0.6056653261184692, "rewards/accuracies": 0.75, "rewards/chosen": -0.08013273775577545, "rewards/margins": 0.4047189950942993, "rewards/rejected": -0.48485174775123596, "step": 868 }, { "epoch": 0.5405909797822706, "grad_norm": 0.3548082709312439, "learning_rate": 6.550000000000001e-06, "log_odds_chosen": 3.8713083267211914, "log_odds_ratio": -0.2565319836139679, "logits/chosen": 1.0006804466247559, "logits/rejected": -0.03945589065551758, "logps/chosen": -0.8375179171562195, "logps/rejected": -4.2133684158325195, "loss": 0.6634, "nll_loss": 0.6377798318862915, "rewards/accuracies": 0.75, "rewards/chosen": -0.08375179767608643, "rewards/margins": 0.33758509159088135, "rewards/rejected": -0.4213368892669678, "step": 869 }, { "epoch": 0.5412130637636081, "grad_norm": 0.3238200843334198, "learning_rate": 6.5000000000000004e-06, "log_odds_chosen": 5.353576183319092, "log_odds_ratio": -0.07162611186504364, "logits/chosen": 2.206803321838379, "logits/rejected": 1.6463418006896973, "logps/chosen": -0.7490416765213013, "logps/rejected": -5.4248809814453125, "loss": 0.6596, "nll_loss": 0.6524254083633423, "rewards/accuracies": 1.0, "rewards/chosen": -0.0749041736125946, "rewards/margins": 0.46758395433425903, "rewards/rejected": -0.5424880981445312, "step": 870 }, { "epoch": 0.5418351477449456, "grad_norm": 1.8776357173919678, "learning_rate": 6.45e-06, "log_odds_chosen": 4.40454626083374, "log_odds_ratio": -0.17675238847732544, "logits/chosen": 2.513120412826538, "logits/rejected": 1.0964940786361694, "logps/chosen": -1.0388731956481934, "logps/rejected": -5.051460266113281, "loss": 0.7226, "nll_loss": 0.7049591541290283, "rewards/accuracies": 0.875, "rewards/chosen": -0.10388731956481934, "rewards/margins": 0.4012587368488312, "rewards/rejected": -0.5051460266113281, "step": 871 }, { "epoch": 0.542457231726283, "grad_norm": 0.406896710395813, "learning_rate": 6.4000000000000006e-06, "log_odds_chosen": 6.131252288818359, "log_odds_ratio": -0.08929823338985443, "logits/chosen": 2.746368885040283, "logits/rejected": 1.540923833847046, "logps/chosen": -0.8624634742736816, "logps/rejected": -6.512821674346924, "loss": 0.6874, "nll_loss": 0.6784294843673706, "rewards/accuracies": 1.0, "rewards/chosen": -0.08624634891748428, "rewards/margins": 0.5650358200073242, "rewards/rejected": -0.6512821316719055, "step": 872 }, { "epoch": 0.5430793157076206, "grad_norm": 0.4132915437221527, "learning_rate": 6.35e-06, "log_odds_chosen": 5.865504741668701, "log_odds_ratio": -0.09850253164768219, "logits/chosen": 0.35298776626586914, "logits/rejected": 1.0819687843322754, "logps/chosen": -0.8687587976455688, "logps/rejected": -6.098135948181152, "loss": 0.4361, "nll_loss": 0.4262300133705139, "rewards/accuracies": 1.0, "rewards/chosen": -0.08687587827444077, "rewards/margins": 0.5229377150535583, "rewards/rejected": -0.6098135709762573, "step": 873 }, { "epoch": 0.543701399688958, "grad_norm": 0.40822353959083557, "learning_rate": 6.300000000000001e-06, "log_odds_chosen": 3.693990707397461, "log_odds_ratio": -0.26884984970092773, "logits/chosen": 1.2561935186386108, "logits/rejected": 0.1087363213300705, "logps/chosen": -0.9086207151412964, "logps/rejected": -4.258871078491211, "loss": 0.5151, "nll_loss": 0.48824068903923035, "rewards/accuracies": 0.75, "rewards/chosen": -0.09086208045482635, "rewards/margins": 0.33502498269081116, "rewards/rejected": -0.4258870780467987, "step": 874 }, { "epoch": 0.5443234836702955, "grad_norm": 0.3257782757282257, "learning_rate": 6.25e-06, "log_odds_chosen": 4.554495334625244, "log_odds_ratio": -0.14924365282058716, "logits/chosen": 1.54774808883667, "logits/rejected": 1.4602476358413696, "logps/chosen": -0.7244719862937927, "logps/rejected": -4.493233680725098, "loss": 0.6859, "nll_loss": 0.6709988117218018, "rewards/accuracies": 0.875, "rewards/chosen": -0.07244720309972763, "rewards/margins": 0.37687617540359497, "rewards/rejected": -0.4493233561515808, "step": 875 }, { "epoch": 0.5449455676516329, "grad_norm": 0.3226686120033264, "learning_rate": 6.2e-06, "log_odds_chosen": 3.7493808269500732, "log_odds_ratio": -0.15756797790527344, "logits/chosen": 1.012019395828247, "logits/rejected": 0.5823137760162354, "logps/chosen": -0.9175806045532227, "logps/rejected": -4.2481865882873535, "loss": 0.6132, "nll_loss": 0.5974055528640747, "rewards/accuracies": 0.875, "rewards/chosen": -0.09175807237625122, "rewards/margins": 0.333060622215271, "rewards/rejected": -0.4248186945915222, "step": 876 }, { "epoch": 0.5455676516329705, "grad_norm": 0.9020261764526367, "learning_rate": 6.15e-06, "log_odds_chosen": 4.887069225311279, "log_odds_ratio": -0.03150523826479912, "logits/chosen": 1.5572091341018677, "logits/rejected": 0.7369846105575562, "logps/chosen": -1.265540599822998, "logps/rejected": -5.7456254959106445, "loss": 0.7783, "nll_loss": 0.7751142978668213, "rewards/accuracies": 1.0, "rewards/chosen": -0.12655405700206757, "rewards/margins": 0.4480084776878357, "rewards/rejected": -0.5745625495910645, "step": 877 }, { "epoch": 0.546189735614308, "grad_norm": 0.33154386281967163, "learning_rate": 6.1e-06, "log_odds_chosen": 6.636990547180176, "log_odds_ratio": -0.007074399385601282, "logits/chosen": 0.43664056062698364, "logits/rejected": -0.06868959963321686, "logps/chosen": -1.0449284315109253, "logps/rejected": -7.1396708488464355, "loss": 0.4785, "nll_loss": 0.4777759909629822, "rewards/accuracies": 1.0, "rewards/chosen": -0.10449284315109253, "rewards/margins": 0.6094743013381958, "rewards/rejected": -0.7139671444892883, "step": 878 }, { "epoch": 0.5468118195956454, "grad_norm": 0.3189108073711395, "learning_rate": 6.0500000000000005e-06, "log_odds_chosen": 4.096932411193848, "log_odds_ratio": -0.1806538999080658, "logits/chosen": 1.3440558910369873, "logits/rejected": 0.4599352180957794, "logps/chosen": -0.9252468347549438, "logps/rejected": -4.582737445831299, "loss": 0.6193, "nll_loss": 0.6012023687362671, "rewards/accuracies": 0.875, "rewards/chosen": -0.09252467751502991, "rewards/margins": 0.3657490611076355, "rewards/rejected": -0.4582737684249878, "step": 879 }, { "epoch": 0.5474339035769828, "grad_norm": 0.3894885778427124, "learning_rate": 6e-06, "log_odds_chosen": 5.063629150390625, "log_odds_ratio": -0.17309612035751343, "logits/chosen": 1.391903042793274, "logits/rejected": 0.8823519349098206, "logps/chosen": -0.9142184257507324, "logps/rejected": -5.5893988609313965, "loss": 0.5591, "nll_loss": 0.5418129563331604, "rewards/accuracies": 0.875, "rewards/chosen": -0.09142184257507324, "rewards/margins": 0.46751803159713745, "rewards/rejected": -0.5589399337768555, "step": 880 }, { "epoch": 0.5480559875583204, "grad_norm": 0.31810715794563293, "learning_rate": 5.95e-06, "log_odds_chosen": 6.099935531616211, "log_odds_ratio": -0.04295491427183151, "logits/chosen": 0.7478195428848267, "logits/rejected": 0.5277884006500244, "logps/chosen": -0.9480465650558472, "logps/rejected": -6.505160331726074, "loss": 0.5633, "nll_loss": 0.559039831161499, "rewards/accuracies": 1.0, "rewards/chosen": -0.09480465203523636, "rewards/margins": 0.5557113885879517, "rewards/rejected": -0.6505160331726074, "step": 881 }, { "epoch": 0.5486780715396579, "grad_norm": 0.537359893321991, "learning_rate": 5.9e-06, "log_odds_chosen": 2.3679771423339844, "log_odds_ratio": -0.3640172481536865, "logits/chosen": 1.483041763305664, "logits/rejected": 1.1189666986465454, "logps/chosen": -0.8394365906715393, "logps/rejected": -2.901829957962036, "loss": 0.6892, "nll_loss": 0.652812123298645, "rewards/accuracies": 0.75, "rewards/chosen": -0.08394366502761841, "rewards/margins": 0.20623932778835297, "rewards/rejected": -0.2901829779148102, "step": 882 }, { "epoch": 0.5493001555209953, "grad_norm": 0.3169862627983093, "learning_rate": 5.850000000000001e-06, "log_odds_chosen": 3.7267801761627197, "log_odds_ratio": -0.34081006050109863, "logits/chosen": 1.571112871170044, "logits/rejected": 0.19689306616783142, "logps/chosen": -0.8036859035491943, "logps/rejected": -4.05713415145874, "loss": 0.6, "nll_loss": 0.5659441351890564, "rewards/accuracies": 0.625, "rewards/chosen": -0.08036859333515167, "rewards/margins": 0.32534483075141907, "rewards/rejected": -0.40571340918540955, "step": 883 }, { "epoch": 0.5499222395023328, "grad_norm": 0.4113629460334778, "learning_rate": 5.8e-06, "log_odds_chosen": 4.897263526916504, "log_odds_ratio": -0.14118805527687073, "logits/chosen": 2.258298635482788, "logits/rejected": 0.5916818380355835, "logps/chosen": -0.8462052345275879, "logps/rejected": -5.26324462890625, "loss": 0.6423, "nll_loss": 0.6282092332839966, "rewards/accuracies": 0.875, "rewards/chosen": -0.08462052792310715, "rewards/margins": 0.4417039155960083, "rewards/rejected": -0.526324450969696, "step": 884 }, { "epoch": 0.5505443234836703, "grad_norm": 0.42419642210006714, "learning_rate": 5.750000000000001e-06, "log_odds_chosen": 4.345916748046875, "log_odds_ratio": -0.2700275182723999, "logits/chosen": 2.6262309551239014, "logits/rejected": 1.3854570388793945, "logps/chosen": -1.1909689903259277, "logps/rejected": -5.231269836425781, "loss": 0.8041, "nll_loss": 0.7771395444869995, "rewards/accuracies": 0.75, "rewards/chosen": -0.11909689754247665, "rewards/margins": 0.4040301442146301, "rewards/rejected": -0.5231269598007202, "step": 885 }, { "epoch": 0.5511664074650078, "grad_norm": 0.358368843793869, "learning_rate": 5.7000000000000005e-06, "log_odds_chosen": 6.011335372924805, "log_odds_ratio": -0.003882630495354533, "logits/chosen": 0.656829297542572, "logits/rejected": 0.7573573589324951, "logps/chosen": -0.9341671466827393, "logps/rejected": -6.402599334716797, "loss": 0.5654, "nll_loss": 0.5649951696395874, "rewards/accuracies": 1.0, "rewards/chosen": -0.0934167206287384, "rewards/margins": 0.5468432307243347, "rewards/rejected": -0.6402599811553955, "step": 886 }, { "epoch": 0.5517884914463452, "grad_norm": 0.3700637221336365, "learning_rate": 5.65e-06, "log_odds_chosen": 4.019516468048096, "log_odds_ratio": -0.26321983337402344, "logits/chosen": 1.6410436630249023, "logits/rejected": 1.4497946500778198, "logps/chosen": -0.910016655921936, "logps/rejected": -4.60683012008667, "loss": 0.6428, "nll_loss": 0.616430938243866, "rewards/accuracies": 0.875, "rewards/chosen": -0.09100165963172913, "rewards/margins": 0.36968132853507996, "rewards/rejected": -0.4606829881668091, "step": 887 }, { "epoch": 0.5524105754276827, "grad_norm": 4.00311803817749, "learning_rate": 5.600000000000001e-06, "log_odds_chosen": 4.81396484375, "log_odds_ratio": -0.017167825251817703, "logits/chosen": 0.5925553441047668, "logits/rejected": -0.003340400755405426, "logps/chosen": -0.8698358535766602, "logps/rejected": -5.038300514221191, "loss": 0.582, "nll_loss": 0.5802885890007019, "rewards/accuracies": 1.0, "rewards/chosen": -0.0869835913181305, "rewards/margins": 0.41684651374816895, "rewards/rejected": -0.503830075263977, "step": 888 }, { "epoch": 0.5530326594090202, "grad_norm": 0.9824345111846924, "learning_rate": 5.55e-06, "log_odds_chosen": 2.4528815746307373, "log_odds_ratio": -0.7164135575294495, "logits/chosen": 1.6816127300262451, "logits/rejected": 1.278057336807251, "logps/chosen": -1.7241687774658203, "logps/rejected": -4.091108322143555, "loss": 0.7123, "nll_loss": 0.6406722068786621, "rewards/accuracies": 0.5, "rewards/chosen": -0.17241688072681427, "rewards/margins": 0.23669394850730896, "rewards/rejected": -0.40911081433296204, "step": 889 }, { "epoch": 0.5536547433903577, "grad_norm": 0.401115357875824, "learning_rate": 5.500000000000001e-06, "log_odds_chosen": 4.721898555755615, "log_odds_ratio": -0.049223363399505615, "logits/chosen": 1.1281979084014893, "logits/rejected": 0.8041211366653442, "logps/chosen": -1.3164769411087036, "logps/rejected": -5.681437015533447, "loss": 0.6608, "nll_loss": 0.6558688282966614, "rewards/accuracies": 1.0, "rewards/chosen": -0.13164770603179932, "rewards/margins": 0.4364960491657257, "rewards/rejected": -0.5681437849998474, "step": 890 }, { "epoch": 0.5542768273716951, "grad_norm": 0.41568583250045776, "learning_rate": 5.45e-06, "log_odds_chosen": 3.2344136238098145, "log_odds_ratio": -0.23986506462097168, "logits/chosen": 1.8116191625595093, "logits/rejected": 1.3638396263122559, "logps/chosen": -0.9154233932495117, "logps/rejected": -3.7720108032226562, "loss": 0.7533, "nll_loss": 0.7292731404304504, "rewards/accuracies": 0.875, "rewards/chosen": -0.0915423333644867, "rewards/margins": 0.2856587767601013, "rewards/rejected": -0.377201110124588, "step": 891 }, { "epoch": 0.5548989113530327, "grad_norm": 0.41260719299316406, "learning_rate": 5.4e-06, "log_odds_chosen": 4.115436553955078, "log_odds_ratio": -0.22443503141403198, "logits/chosen": 1.470842719078064, "logits/rejected": 0.9105685353279114, "logps/chosen": -0.9408708810806274, "logps/rejected": -4.714263916015625, "loss": 0.6892, "nll_loss": 0.666793704032898, "rewards/accuracies": 0.75, "rewards/chosen": -0.09408708661794662, "rewards/margins": 0.37733930349349976, "rewards/rejected": -0.4714263677597046, "step": 892 }, { "epoch": 0.5555209953343702, "grad_norm": 0.38643932342529297, "learning_rate": 5.3500000000000004e-06, "log_odds_chosen": 5.207808494567871, "log_odds_ratio": -0.17040032148361206, "logits/chosen": 1.891057014465332, "logits/rejected": -0.6716984510421753, "logps/chosen": -0.9740372896194458, "logps/rejected": -5.78117561340332, "loss": 0.632, "nll_loss": 0.6149402856826782, "rewards/accuracies": 0.875, "rewards/chosen": -0.09740372747182846, "rewards/margins": 0.4807139039039612, "rewards/rejected": -0.5781176090240479, "step": 893 }, { "epoch": 0.5561430793157076, "grad_norm": 0.6569875478744507, "learning_rate": 5.3e-06, "log_odds_chosen": 2.787360191345215, "log_odds_ratio": -0.31620126962661743, "logits/chosen": 2.0118942260742188, "logits/rejected": 1.3410391807556152, "logps/chosen": -0.7760646939277649, "logps/rejected": -3.01680850982666, "loss": 0.7015, "nll_loss": 0.6699241399765015, "rewards/accuracies": 0.75, "rewards/chosen": -0.07760646939277649, "rewards/margins": 0.2240743786096573, "rewards/rejected": -0.3016808331012726, "step": 894 }, { "epoch": 0.5567651632970451, "grad_norm": 1.307922601699829, "learning_rate": 5.25e-06, "log_odds_chosen": 3.8499703407287598, "log_odds_ratio": -0.13547715544700623, "logits/chosen": 1.6815910339355469, "logits/rejected": 0.4014458656311035, "logps/chosen": -0.8500081300735474, "logps/rejected": -4.165833473205566, "loss": 0.5871, "nll_loss": 0.5735604763031006, "rewards/accuracies": 0.875, "rewards/chosen": -0.08500081300735474, "rewards/margins": 0.33158254623413086, "rewards/rejected": -0.4165833592414856, "step": 895 }, { "epoch": 0.5573872472783826, "grad_norm": 0.37747126817703247, "learning_rate": 5.2e-06, "log_odds_chosen": 5.886293888092041, "log_odds_ratio": -0.07489710301160812, "logits/chosen": 0.7270099520683289, "logits/rejected": -0.3343905508518219, "logps/chosen": -0.8939152956008911, "logps/rejected": -6.278520584106445, "loss": 0.4426, "nll_loss": 0.4351494312286377, "rewards/accuracies": 1.0, "rewards/chosen": -0.08939152956008911, "rewards/margins": 0.5384604930877686, "rewards/rejected": -0.6278520822525024, "step": 896 }, { "epoch": 0.5580093312597201, "grad_norm": 0.3080809712409973, "learning_rate": 5.15e-06, "log_odds_chosen": 4.336005210876465, "log_odds_ratio": -0.20657199621200562, "logits/chosen": 1.8069411516189575, "logits/rejected": 0.9967038631439209, "logps/chosen": -0.8536757230758667, "logps/rejected": -4.759378433227539, "loss": 0.6298, "nll_loss": 0.6091340780258179, "rewards/accuracies": 0.875, "rewards/chosen": -0.08536757528781891, "rewards/margins": 0.3905702531337738, "rewards/rejected": -0.4759378433227539, "step": 897 }, { "epoch": 0.5586314152410575, "grad_norm": 0.4497907757759094, "learning_rate": 5.1e-06, "log_odds_chosen": 4.7341156005859375, "log_odds_ratio": -0.12223678082227707, "logits/chosen": 2.783708095550537, "logits/rejected": 1.9557205438613892, "logps/chosen": -0.8115973472595215, "logps/rejected": -4.9661407470703125, "loss": 0.7959, "nll_loss": 0.7836655378341675, "rewards/accuracies": 0.875, "rewards/chosen": -0.08115973323583603, "rewards/margins": 0.4154543876647949, "rewards/rejected": -0.49661409854888916, "step": 898 }, { "epoch": 0.559253499222395, "grad_norm": 0.3507234752178192, "learning_rate": 5.050000000000001e-06, "log_odds_chosen": 6.665820598602295, "log_odds_ratio": -0.01345449686050415, "logits/chosen": 1.5257761478424072, "logits/rejected": 0.8127526044845581, "logps/chosen": -1.231274962425232, "logps/rejected": -7.183798313140869, "loss": 0.6412, "nll_loss": 0.6399024128913879, "rewards/accuracies": 1.0, "rewards/chosen": -0.12312749028205872, "rewards/margins": 0.5952523946762085, "rewards/rejected": -0.7183798551559448, "step": 899 }, { "epoch": 0.5598755832037325, "grad_norm": 0.3265915811061859, "learning_rate": 5e-06, "log_odds_chosen": 5.529303550720215, "log_odds_ratio": -0.0950424000620842, "logits/chosen": 0.9178490042686462, "logits/rejected": 1.3479315042495728, "logps/chosen": -0.8234384059906006, "logps/rejected": -5.609393119812012, "loss": 0.6265, "nll_loss": 0.6170258522033691, "rewards/accuracies": 1.0, "rewards/chosen": -0.08234383910894394, "rewards/margins": 0.47859543561935425, "rewards/rejected": -0.5609393119812012, "step": 900 }, { "epoch": 0.56049766718507, "grad_norm": 0.4529457688331604, "learning_rate": 4.950000000000001e-06, "log_odds_chosen": 6.182548522949219, "log_odds_ratio": -0.10109666734933853, "logits/chosen": 1.3867193460464478, "logits/rejected": 0.8778035640716553, "logps/chosen": -0.8060503602027893, "logps/rejected": -6.155576705932617, "loss": 0.6428, "nll_loss": 0.6326842308044434, "rewards/accuracies": 1.0, "rewards/chosen": -0.08060503751039505, "rewards/margins": 0.5349526405334473, "rewards/rejected": -0.6155576705932617, "step": 901 }, { "epoch": 0.5611197511664074, "grad_norm": 0.35935789346694946, "learning_rate": 4.9000000000000005e-06, "log_odds_chosen": 4.441336631774902, "log_odds_ratio": -0.16026294231414795, "logits/chosen": 1.9076406955718994, "logits/rejected": 1.1897597312927246, "logps/chosen": -0.8750441074371338, "logps/rejected": -4.786342620849609, "loss": 0.7553, "nll_loss": 0.73931884765625, "rewards/accuracies": 1.0, "rewards/chosen": -0.08750440925359726, "rewards/margins": 0.39112988114356995, "rewards/rejected": -0.4786342680454254, "step": 902 }, { "epoch": 0.5617418351477449, "grad_norm": 0.29695722460746765, "learning_rate": 4.85e-06, "log_odds_chosen": 4.3331217765808105, "log_odds_ratio": -0.23966997861862183, "logits/chosen": -0.5572576522827148, "logits/rejected": 1.0803070068359375, "logps/chosen": -0.7401511669158936, "logps/rejected": -4.379242420196533, "loss": 0.4785, "nll_loss": 0.45448851585388184, "rewards/accuracies": 0.875, "rewards/chosen": -0.07401511818170547, "rewards/margins": 0.36390912532806396, "rewards/rejected": -0.43792423605918884, "step": 903 }, { "epoch": 0.5623639191290825, "grad_norm": 0.42112380266189575, "learning_rate": 4.800000000000001e-06, "log_odds_chosen": 5.218524932861328, "log_odds_ratio": -0.054378729313611984, "logits/chosen": 0.5482559204101562, "logits/rejected": 0.2777423858642578, "logps/chosen": -0.7422811985015869, "logps/rejected": -5.270057201385498, "loss": 0.5098, "nll_loss": 0.5044077038764954, "rewards/accuracies": 1.0, "rewards/chosen": -0.07422812283039093, "rewards/margins": 0.452777624130249, "rewards/rejected": -0.5270057320594788, "step": 904 }, { "epoch": 0.5629860031104199, "grad_norm": 3.2522900104522705, "learning_rate": 4.75e-06, "log_odds_chosen": 3.5051608085632324, "log_odds_ratio": -0.281528502702713, "logits/chosen": 1.1515018939971924, "logits/rejected": 0.7810160517692566, "logps/chosen": -0.8040460348129272, "logps/rejected": -3.8523013591766357, "loss": 0.6415, "nll_loss": 0.6133573651313782, "rewards/accuracies": 0.625, "rewards/chosen": -0.0804046094417572, "rewards/margins": 0.3048255145549774, "rewards/rejected": -0.3852301239967346, "step": 905 }, { "epoch": 0.5636080870917574, "grad_norm": 0.4139149785041809, "learning_rate": 4.7e-06, "log_odds_chosen": 4.3448076248168945, "log_odds_ratio": -0.19571033120155334, "logits/chosen": 0.6563786268234253, "logits/rejected": -0.061872243881225586, "logps/chosen": -0.9398016333580017, "logps/rejected": -4.782492637634277, "loss": 0.626, "nll_loss": 0.6064011454582214, "rewards/accuracies": 0.875, "rewards/chosen": -0.09398016333580017, "rewards/margins": 0.384269118309021, "rewards/rejected": -0.47824928164482117, "step": 906 }, { "epoch": 0.5642301710730949, "grad_norm": 0.3649228811264038, "learning_rate": 4.65e-06, "log_odds_chosen": 5.071795463562012, "log_odds_ratio": -0.13153302669525146, "logits/chosen": 1.0045881271362305, "logits/rejected": 0.6343420743942261, "logps/chosen": -0.9673879146575928, "logps/rejected": -5.603385925292969, "loss": 0.6296, "nll_loss": 0.6164366602897644, "rewards/accuracies": 0.875, "rewards/chosen": -0.0967387929558754, "rewards/margins": 0.4635998606681824, "rewards/rejected": -0.5603386163711548, "step": 907 }, { "epoch": 0.5648522550544324, "grad_norm": 0.334963858127594, "learning_rate": 4.6e-06, "log_odds_chosen": 4.234353065490723, "log_odds_ratio": -0.12835365533828735, "logits/chosen": 0.9635106325149536, "logits/rejected": -0.14736008644104004, "logps/chosen": -0.8886890411376953, "logps/rejected": -4.6457037925720215, "loss": 0.5793, "nll_loss": 0.5664956569671631, "rewards/accuracies": 0.875, "rewards/chosen": -0.08886890113353729, "rewards/margins": 0.3757014870643616, "rewards/rejected": -0.46457037329673767, "step": 908 }, { "epoch": 0.5654743390357698, "grad_norm": 5.453255653381348, "learning_rate": 4.5500000000000005e-06, "log_odds_chosen": 6.330126762390137, "log_odds_ratio": -0.08912979811429977, "logits/chosen": 1.8540780544281006, "logits/rejected": 1.8941954374313354, "logps/chosen": -1.4149324893951416, "logps/rejected": -7.361577033996582, "loss": 0.7808, "nll_loss": 0.7718974351882935, "rewards/accuracies": 1.0, "rewards/chosen": -0.14149326086044312, "rewards/margins": 0.5946645140647888, "rewards/rejected": -0.7361577153205872, "step": 909 }, { "epoch": 0.5660964230171073, "grad_norm": 0.3110488951206207, "learning_rate": 4.5e-06, "log_odds_chosen": 3.5994150638580322, "log_odds_ratio": -0.239741250872612, "logits/chosen": 1.7619941234588623, "logits/rejected": 1.8184112310409546, "logps/chosen": -0.855003833770752, "logps/rejected": -4.064135551452637, "loss": 0.6879, "nll_loss": 0.6639161705970764, "rewards/accuracies": 0.875, "rewards/chosen": -0.08550038188695908, "rewards/margins": 0.3209131360054016, "rewards/rejected": -0.40641355514526367, "step": 910 }, { "epoch": 0.5667185069984448, "grad_norm": 0.3939039707183838, "learning_rate": 4.45e-06, "log_odds_chosen": 3.505305290222168, "log_odds_ratio": -0.22142846882343292, "logits/chosen": 2.593437671661377, "logits/rejected": 2.5147769451141357, "logps/chosen": -0.9565043449401855, "logps/rejected": -4.017549514770508, "loss": 0.7414, "nll_loss": 0.7192279100418091, "rewards/accuracies": 0.75, "rewards/chosen": -0.09565043449401855, "rewards/margins": 0.30610448122024536, "rewards/rejected": -0.4017549157142639, "step": 911 }, { "epoch": 0.5673405909797823, "grad_norm": 0.3363257646560669, "learning_rate": 4.4e-06, "log_odds_chosen": 5.338538646697998, "log_odds_ratio": -0.13085848093032837, "logits/chosen": 1.9446805715560913, "logits/rejected": 1.3729445934295654, "logps/chosen": -0.9459335803985596, "logps/rejected": -5.780755043029785, "loss": 0.7294, "nll_loss": 0.7163442969322205, "rewards/accuracies": 0.875, "rewards/chosen": -0.0945933610200882, "rewards/margins": 0.48348212242126465, "rewards/rejected": -0.5780754685401917, "step": 912 }, { "epoch": 0.5679626749611197, "grad_norm": 0.45077571272850037, "learning_rate": 4.35e-06, "log_odds_chosen": 5.213942527770996, "log_odds_ratio": -0.09788724780082703, "logits/chosen": 2.052887201309204, "logits/rejected": 1.9616841077804565, "logps/chosen": -0.6288750767707825, "logps/rejected": -5.032301902770996, "loss": 0.6667, "nll_loss": 0.6569293737411499, "rewards/accuracies": 1.0, "rewards/chosen": -0.06288750469684601, "rewards/margins": 0.4403426945209503, "rewards/rejected": -0.5032302141189575, "step": 913 }, { "epoch": 0.5685847589424572, "grad_norm": 0.4367021322250366, "learning_rate": 4.2999999999999995e-06, "log_odds_chosen": 3.5740416049957275, "log_odds_ratio": -0.27110692858695984, "logits/chosen": 1.6953269243240356, "logits/rejected": 1.7087769508361816, "logps/chosen": -0.8259687423706055, "logps/rejected": -3.9957141876220703, "loss": 0.7026, "nll_loss": 0.6754826307296753, "rewards/accuracies": 0.875, "rewards/chosen": -0.08259688317775726, "rewards/margins": 0.31697455048561096, "rewards/rejected": -0.39957141876220703, "step": 914 }, { "epoch": 0.5692068429237948, "grad_norm": 0.3404788672924042, "learning_rate": 4.250000000000001e-06, "log_odds_chosen": 5.116745948791504, "log_odds_ratio": -0.16189473867416382, "logits/chosen": 0.7443652749061584, "logits/rejected": 0.514636754989624, "logps/chosen": -0.7846352458000183, "logps/rejected": -5.240664482116699, "loss": 0.5443, "nll_loss": 0.528134822845459, "rewards/accuracies": 1.0, "rewards/chosen": -0.07846352458000183, "rewards/margins": 0.4456029236316681, "rewards/rejected": -0.5240664482116699, "step": 915 }, { "epoch": 0.5698289269051322, "grad_norm": 0.27389395236968994, "learning_rate": 4.2000000000000004e-06, "log_odds_chosen": 6.271382808685303, "log_odds_ratio": -0.09461876004934311, "logits/chosen": 0.35306596755981445, "logits/rejected": 0.12613160908222198, "logps/chosen": -0.6129301190376282, "logps/rejected": -5.839328765869141, "loss": 0.4305, "nll_loss": 0.42103177309036255, "rewards/accuracies": 0.875, "rewards/chosen": -0.061293017119169235, "rewards/margins": 0.5226399302482605, "rewards/rejected": -0.5839329361915588, "step": 916 }, { "epoch": 0.5704510108864697, "grad_norm": 0.43340426683425903, "learning_rate": 4.15e-06, "log_odds_chosen": 5.669953346252441, "log_odds_ratio": -0.1194998174905777, "logits/chosen": 1.3494383096694946, "logits/rejected": 1.1074585914611816, "logps/chosen": -0.9059891700744629, "logps/rejected": -6.040713787078857, "loss": 0.5614, "nll_loss": 0.5494275093078613, "rewards/accuracies": 0.875, "rewards/chosen": -0.090598925948143, "rewards/margins": 0.5134724378585815, "rewards/rejected": -0.6040713787078857, "step": 917 }, { "epoch": 0.5710730948678071, "grad_norm": 0.28480619192123413, "learning_rate": 4.1000000000000006e-06, "log_odds_chosen": 4.810488700866699, "log_odds_ratio": -0.24605686962604523, "logits/chosen": 0.9984217286109924, "logits/rejected": 0.8857402205467224, "logps/chosen": -0.8175363540649414, "logps/rejected": -5.161768436431885, "loss": 0.5676, "nll_loss": 0.5429906845092773, "rewards/accuracies": 0.875, "rewards/chosen": -0.08175363391637802, "rewards/margins": 0.43442320823669434, "rewards/rejected": -0.5161768198013306, "step": 918 }, { "epoch": 0.5716951788491447, "grad_norm": 0.3191167414188385, "learning_rate": 4.05e-06, "log_odds_chosen": 5.344412803649902, "log_odds_ratio": -0.019217826426029205, "logits/chosen": 1.5908796787261963, "logits/rejected": 0.3335094451904297, "logps/chosen": -0.9169878363609314, "logps/rejected": -5.7179670333862305, "loss": 0.6339, "nll_loss": 0.6319629549980164, "rewards/accuracies": 1.0, "rewards/chosen": -0.0916987806558609, "rewards/margins": 0.4800979793071747, "rewards/rejected": -0.5717967748641968, "step": 919 }, { "epoch": 0.5723172628304821, "grad_norm": 0.44434309005737305, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 3.852116107940674, "log_odds_ratio": -0.28635814785957336, "logits/chosen": 0.21245375275611877, "logits/rejected": 1.163073182106018, "logps/chosen": -0.973721981048584, "logps/rejected": -4.449609756469727, "loss": 0.5692, "nll_loss": 0.5406039953231812, "rewards/accuracies": 0.875, "rewards/chosen": -0.09737220406532288, "rewards/margins": 0.34758880734443665, "rewards/rejected": -0.4449610114097595, "step": 920 }, { "epoch": 0.5729393468118196, "grad_norm": 0.34492459893226624, "learning_rate": 3.95e-06, "log_odds_chosen": 6.800656795501709, "log_odds_ratio": -0.003856105264276266, "logits/chosen": 0.9772761464118958, "logits/rejected": -0.33364957571029663, "logps/chosen": -1.0136868953704834, "logps/rejected": -7.332953929901123, "loss": 0.5389, "nll_loss": 0.5384761691093445, "rewards/accuracies": 1.0, "rewards/chosen": -0.10136869549751282, "rewards/margins": 0.6319266557693481, "rewards/rejected": -0.7332954406738281, "step": 921 }, { "epoch": 0.573561430793157, "grad_norm": 1.2370455265045166, "learning_rate": 3.9e-06, "log_odds_chosen": 4.8015666007995605, "log_odds_ratio": -0.21101605892181396, "logits/chosen": 0.5794129371643066, "logits/rejected": 1.2505478858947754, "logps/chosen": -0.8675190806388855, "logps/rejected": -5.256567478179932, "loss": 0.5551, "nll_loss": 0.5340374708175659, "rewards/accuracies": 0.75, "rewards/chosen": -0.08675190806388855, "rewards/margins": 0.43890485167503357, "rewards/rejected": -0.5256567597389221, "step": 922 }, { "epoch": 0.5741835147744946, "grad_norm": 0.4660135507583618, "learning_rate": 3.85e-06, "log_odds_chosen": 3.216860055923462, "log_odds_ratio": -0.39120519161224365, "logits/chosen": 1.5594065189361572, "logits/rejected": 1.4013652801513672, "logps/chosen": -0.7943279147148132, "logps/rejected": -3.688270092010498, "loss": 0.6174, "nll_loss": 0.5782856345176697, "rewards/accuracies": 0.625, "rewards/chosen": -0.07943278551101685, "rewards/margins": 0.28939422965049744, "rewards/rejected": -0.3688269853591919, "step": 923 }, { "epoch": 0.574805598755832, "grad_norm": 0.6995812654495239, "learning_rate": 3.8e-06, "log_odds_chosen": 3.838277816772461, "log_odds_ratio": -0.18953169882297516, "logits/chosen": 0.29223647713661194, "logits/rejected": -0.4534839391708374, "logps/chosen": -1.0429164171218872, "logps/rejected": -4.500563621520996, "loss": 0.512, "nll_loss": 0.4930779039859772, "rewards/accuracies": 0.875, "rewards/chosen": -0.1042916476726532, "rewards/margins": 0.34576472640037537, "rewards/rejected": -0.45005637407302856, "step": 924 }, { "epoch": 0.5754276827371695, "grad_norm": 0.5403011441230774, "learning_rate": 3.75e-06, "log_odds_chosen": 6.133852958679199, "log_odds_ratio": -0.15583519637584686, "logits/chosen": 2.0422708988189697, "logits/rejected": 1.8932280540466309, "logps/chosen": -1.2914113998413086, "logps/rejected": -7.070437431335449, "loss": 0.6696, "nll_loss": 0.654043972492218, "rewards/accuracies": 0.875, "rewards/chosen": -0.12914115190505981, "rewards/margins": 0.5779025554656982, "rewards/rejected": -0.7070437073707581, "step": 925 }, { "epoch": 0.576049766718507, "grad_norm": 0.3176095187664032, "learning_rate": 3.7e-06, "log_odds_chosen": 6.375709056854248, "log_odds_ratio": -0.09541305154561996, "logits/chosen": 1.2084145545959473, "logits/rejected": 0.8542147874832153, "logps/chosen": -0.7740020751953125, "logps/rejected": -6.466667175292969, "loss": 0.5507, "nll_loss": 0.5411289930343628, "rewards/accuracies": 1.0, "rewards/chosen": -0.07740020006895065, "rewards/margins": 0.5692665576934814, "rewards/rejected": -0.6466667652130127, "step": 926 }, { "epoch": 0.5766718506998445, "grad_norm": 0.2633552551269531, "learning_rate": 3.6499999999999998e-06, "log_odds_chosen": 6.904919624328613, "log_odds_ratio": -0.012354625388979912, "logits/chosen": 0.22586172819137573, "logits/rejected": -1.0335397720336914, "logps/chosen": -0.9746154546737671, "logps/rejected": -7.385928153991699, "loss": 0.4561, "nll_loss": 0.45485904812812805, "rewards/accuracies": 1.0, "rewards/chosen": -0.09746154397726059, "rewards/margins": 0.6411312818527222, "rewards/rejected": -0.7385928630828857, "step": 927 }, { "epoch": 0.577293934681182, "grad_norm": 0.3632732629776001, "learning_rate": 3.6e-06, "log_odds_chosen": 7.5079522132873535, "log_odds_ratio": -0.090024434030056, "logits/chosen": 1.7778668403625488, "logits/rejected": 1.4662402868270874, "logps/chosen": -0.7078216671943665, "logps/rejected": -7.575026035308838, "loss": 0.6268, "nll_loss": 0.6177496910095215, "rewards/accuracies": 1.0, "rewards/chosen": -0.07078216224908829, "rewards/margins": 0.6867204904556274, "rewards/rejected": -0.7575026154518127, "step": 928 }, { "epoch": 0.5779160186625194, "grad_norm": 0.29226717352867126, "learning_rate": 3.55e-06, "log_odds_chosen": 7.713617324829102, "log_odds_ratio": -0.0007500970386900008, "logits/chosen": -0.28721946477890015, "logits/rejected": -1.3594586849212646, "logps/chosen": -1.1380939483642578, "logps/rejected": -8.426959991455078, "loss": 0.445, "nll_loss": 0.4448773264884949, "rewards/accuracies": 1.0, "rewards/chosen": -0.11380939930677414, "rewards/margins": 0.728886604309082, "rewards/rejected": -0.8426960706710815, "step": 929 }, { "epoch": 0.578538102643857, "grad_norm": 2.12334942817688, "learning_rate": 3.5000000000000004e-06, "log_odds_chosen": 4.998147010803223, "log_odds_ratio": -0.11884446442127228, "logits/chosen": 3.089864492416382, "logits/rejected": 2.141456365585327, "logps/chosen": -1.126284122467041, "logps/rejected": -5.71080207824707, "loss": 0.9242, "nll_loss": 0.912270188331604, "rewards/accuracies": 0.875, "rewards/chosen": -0.11262841522693634, "rewards/margins": 0.4584518074989319, "rewards/rejected": -0.571080207824707, "step": 930 }, { "epoch": 0.5791601866251944, "grad_norm": 0.42409899830818176, "learning_rate": 3.4500000000000004e-06, "log_odds_chosen": 3.8276333808898926, "log_odds_ratio": -0.16574029624462128, "logits/chosen": 0.26170673966407776, "logits/rejected": 0.38364577293395996, "logps/chosen": -1.0315451622009277, "logps/rejected": -4.483916759490967, "loss": 0.5318, "nll_loss": 0.5152575969696045, "rewards/accuracies": 0.875, "rewards/chosen": -0.10315451771020889, "rewards/margins": 0.3452371656894684, "rewards/rejected": -0.4483916759490967, "step": 931 }, { "epoch": 0.5797822706065319, "grad_norm": 0.4062352776527405, "learning_rate": 3.4000000000000005e-06, "log_odds_chosen": 3.2291901111602783, "log_odds_ratio": -0.24742823839187622, "logits/chosen": 0.7725391387939453, "logits/rejected": 0.30735743045806885, "logps/chosen": -0.930127739906311, "logps/rejected": -3.832090377807617, "loss": 0.5666, "nll_loss": 0.541907012462616, "rewards/accuracies": 0.875, "rewards/chosen": -0.09301277250051498, "rewards/margins": 0.2901962697505951, "rewards/rejected": -0.3832090198993683, "step": 932 }, { "epoch": 0.5804043545878693, "grad_norm": 0.3979949355125427, "learning_rate": 3.3500000000000005e-06, "log_odds_chosen": 4.8410234451293945, "log_odds_ratio": -0.1899186670780182, "logits/chosen": 2.149868965148926, "logits/rejected": 1.030944585800171, "logps/chosen": -0.7170071601867676, "logps/rejected": -4.952804088592529, "loss": 0.5799, "nll_loss": 0.5609343647956848, "rewards/accuracies": 1.0, "rewards/chosen": -0.07170072197914124, "rewards/margins": 0.4235796630382538, "rewards/rejected": -0.495280385017395, "step": 933 }, { "epoch": 0.5810264385692069, "grad_norm": 0.3322451114654541, "learning_rate": 3.3e-06, "log_odds_chosen": 3.1167378425598145, "log_odds_ratio": -0.3341616094112396, "logits/chosen": 0.11869756132364273, "logits/rejected": 0.63154137134552, "logps/chosen": -0.9321353435516357, "logps/rejected": -3.7424397468566895, "loss": 0.5244, "nll_loss": 0.4910022020339966, "rewards/accuracies": 0.75, "rewards/chosen": -0.0932135358452797, "rewards/margins": 0.28103047609329224, "rewards/rejected": -0.37424400448799133, "step": 934 }, { "epoch": 0.5816485225505443, "grad_norm": 0.5389782786369324, "learning_rate": 3.2500000000000002e-06, "log_odds_chosen": 3.847651243209839, "log_odds_ratio": -0.259707510471344, "logits/chosen": 1.731050968170166, "logits/rejected": 1.2012372016906738, "logps/chosen": -0.8918728828430176, "logps/rejected": -4.2864990234375, "loss": 0.754, "nll_loss": 0.7280244827270508, "rewards/accuracies": 0.75, "rewards/chosen": -0.08918728679418564, "rewards/margins": 0.3394625782966614, "rewards/rejected": -0.42864990234375, "step": 935 }, { "epoch": 0.5822706065318818, "grad_norm": 0.31316882371902466, "learning_rate": 3.2000000000000003e-06, "log_odds_chosen": 6.7213544845581055, "log_odds_ratio": -0.0976264625787735, "logits/chosen": 1.4556970596313477, "logits/rejected": 0.45087191462516785, "logps/chosen": -0.7859498262405396, "logps/rejected": -6.838555335998535, "loss": 0.5728, "nll_loss": 0.5629971623420715, "rewards/accuracies": 0.875, "rewards/chosen": -0.07859498262405396, "rewards/margins": 0.6052606105804443, "rewards/rejected": -0.6838555335998535, "step": 936 }, { "epoch": 0.5828926905132192, "grad_norm": 0.29591137170791626, "learning_rate": 3.1500000000000003e-06, "log_odds_chosen": 3.940699577331543, "log_odds_ratio": -0.2765880823135376, "logits/chosen": 2.854828119277954, "logits/rejected": 1.7593179941177368, "logps/chosen": -0.7826642990112305, "logps/rejected": -4.19375467300415, "loss": 0.6859, "nll_loss": 0.6582664251327515, "rewards/accuracies": 0.75, "rewards/chosen": -0.0782664343714714, "rewards/margins": 0.341109037399292, "rewards/rejected": -0.4193755090236664, "step": 937 }, { "epoch": 0.5835147744945568, "grad_norm": 0.33533087372779846, "learning_rate": 3.1e-06, "log_odds_chosen": 5.005894660949707, "log_odds_ratio": -0.02236769162118435, "logits/chosen": 1.9137576818466187, "logits/rejected": 2.3711225986480713, "logps/chosen": -1.10660982131958, "logps/rejected": -5.497869491577148, "loss": 0.7255, "nll_loss": 0.7232633829116821, "rewards/accuracies": 1.0, "rewards/chosen": -0.11066099256277084, "rewards/margins": 0.4391259551048279, "rewards/rejected": -0.5497869253158569, "step": 938 }, { "epoch": 0.5841368584758942, "grad_norm": 0.36376431584358215, "learning_rate": 3.05e-06, "log_odds_chosen": 4.021162986755371, "log_odds_ratio": -0.3778960406780243, "logits/chosen": 1.345890760421753, "logits/rejected": 0.1843852400779724, "logps/chosen": -0.7622407674789429, "logps/rejected": -4.160394191741943, "loss": 0.6573, "nll_loss": 0.6194642186164856, "rewards/accuracies": 0.75, "rewards/chosen": -0.07622407376766205, "rewards/margins": 0.3398153483867645, "rewards/rejected": -0.4160394072532654, "step": 939 }, { "epoch": 0.5847589424572317, "grad_norm": 0.3828088939189911, "learning_rate": 3e-06, "log_odds_chosen": 5.327631950378418, "log_odds_ratio": -0.0874582976102829, "logits/chosen": 1.6233155727386475, "logits/rejected": 1.4965178966522217, "logps/chosen": -0.850585401058197, "logps/rejected": -5.66487979888916, "loss": 0.6942, "nll_loss": 0.6854044795036316, "rewards/accuracies": 1.0, "rewards/chosen": -0.0850585475564003, "rewards/margins": 0.48142948746681213, "rewards/rejected": -0.5664880275726318, "step": 940 }, { "epoch": 0.5853810264385692, "grad_norm": 0.5152103900909424, "learning_rate": 2.95e-06, "log_odds_chosen": 2.946143388748169, "log_odds_ratio": -0.3106565773487091, "logits/chosen": 1.6423062086105347, "logits/rejected": -0.19852420687675476, "logps/chosen": -0.9775319695472717, "logps/rejected": -3.6458029747009277, "loss": 0.6133, "nll_loss": 0.5822546482086182, "rewards/accuracies": 0.875, "rewards/chosen": -0.09775320440530777, "rewards/margins": 0.2668271064758301, "rewards/rejected": -0.36458033323287964, "step": 941 }, { "epoch": 0.5860031104199067, "grad_norm": 0.38813576102256775, "learning_rate": 2.9e-06, "log_odds_chosen": 6.363837242126465, "log_odds_ratio": -0.13211406767368317, "logits/chosen": 0.037667542695999146, "logits/rejected": 0.7557448744773865, "logps/chosen": -0.7755674123764038, "logps/rejected": -6.5638628005981445, "loss": 0.4901, "nll_loss": 0.4769030809402466, "rewards/accuracies": 0.875, "rewards/chosen": -0.07755675166845322, "rewards/margins": 0.5788295269012451, "rewards/rejected": -0.6563862562179565, "step": 942 }, { "epoch": 0.5866251944012442, "grad_norm": 0.5654268264770508, "learning_rate": 2.8500000000000002e-06, "log_odds_chosen": 4.809700012207031, "log_odds_ratio": -0.0758974701166153, "logits/chosen": 1.1682859659194946, "logits/rejected": 0.6298458576202393, "logps/chosen": -0.9121124744415283, "logps/rejected": -5.2214860916137695, "loss": 0.5784, "nll_loss": 0.5708357095718384, "rewards/accuracies": 1.0, "rewards/chosen": -0.09121125191450119, "rewards/margins": 0.4309373199939728, "rewards/rejected": -0.522148609161377, "step": 943 }, { "epoch": 0.5872472783825816, "grad_norm": 0.38585397601127625, "learning_rate": 2.8000000000000003e-06, "log_odds_chosen": 5.488147735595703, "log_odds_ratio": -0.2799147367477417, "logits/chosen": 2.2300140857696533, "logits/rejected": 1.4071381092071533, "logps/chosen": -0.9285607933998108, "logps/rejected": -6.072427749633789, "loss": 0.7492, "nll_loss": 0.7212356925010681, "rewards/accuracies": 0.75, "rewards/chosen": -0.09285607933998108, "rewards/margins": 0.5143867135047913, "rewards/rejected": -0.60724276304245, "step": 944 }, { "epoch": 0.5878693623639192, "grad_norm": 0.3419051170349121, "learning_rate": 2.7500000000000004e-06, "log_odds_chosen": 6.435153484344482, "log_odds_ratio": -0.0760195329785347, "logits/chosen": 0.642075777053833, "logits/rejected": -0.014084309339523315, "logps/chosen": -1.5039077997207642, "logps/rejected": -7.517624855041504, "loss": 0.3377, "nll_loss": 0.3301193416118622, "rewards/accuracies": 1.0, "rewards/chosen": -0.15039077401161194, "rewards/margins": 0.601371705532074, "rewards/rejected": -0.7517625093460083, "step": 945 }, { "epoch": 0.5884914463452566, "grad_norm": 0.9480850696563721, "learning_rate": 2.7e-06, "log_odds_chosen": 6.029423713684082, "log_odds_ratio": -0.18862660229206085, "logits/chosen": 1.9027588367462158, "logits/rejected": 1.7689592838287354, "logps/chosen": -0.8314551711082458, "logps/rejected": -6.228260517120361, "loss": 0.555, "nll_loss": 0.5361568927764893, "rewards/accuracies": 0.875, "rewards/chosen": -0.08314551413059235, "rewards/margins": 0.539680540561676, "rewards/rejected": -0.622826099395752, "step": 946 }, { "epoch": 0.5891135303265941, "grad_norm": 0.38399749994277954, "learning_rate": 2.65e-06, "log_odds_chosen": 5.510779857635498, "log_odds_ratio": -0.11834244430065155, "logits/chosen": 0.9599204063415527, "logits/rejected": 0.9103103280067444, "logps/chosen": -0.8163239359855652, "logps/rejected": -5.78401517868042, "loss": 0.4977, "nll_loss": 0.48582959175109863, "rewards/accuracies": 0.875, "rewards/chosen": -0.08163239061832428, "rewards/margins": 0.49676913022994995, "rewards/rejected": -0.578401505947113, "step": 947 }, { "epoch": 0.5897356143079315, "grad_norm": 0.25352492928504944, "learning_rate": 2.6e-06, "log_odds_chosen": 6.789501190185547, "log_odds_ratio": -0.05249481275677681, "logits/chosen": 0.444355309009552, "logits/rejected": 1.3890902996063232, "logps/chosen": -0.7576724886894226, "logps/rejected": -6.7853617668151855, "loss": 0.4709, "nll_loss": 0.465690553188324, "rewards/accuracies": 1.0, "rewards/chosen": -0.07576724886894226, "rewards/margins": 0.6027689576148987, "rewards/rejected": -0.6785361766815186, "step": 948 }, { "epoch": 0.5903576982892691, "grad_norm": 0.6953301429748535, "learning_rate": 2.55e-06, "log_odds_chosen": 4.289721488952637, "log_odds_ratio": -0.22087326645851135, "logits/chosen": 2.951247215270996, "logits/rejected": 1.5472073554992676, "logps/chosen": -0.7607182264328003, "logps/rejected": -4.448215007781982, "loss": 0.7362, "nll_loss": 0.7141574621200562, "rewards/accuracies": 0.875, "rewards/chosen": -0.07607182115316391, "rewards/margins": 0.3687496781349182, "rewards/rejected": -0.4448215365409851, "step": 949 }, { "epoch": 0.5909797822706065, "grad_norm": 0.3470051884651184, "learning_rate": 2.5e-06, "log_odds_chosen": 7.393134117126465, "log_odds_ratio": -0.011001640930771828, "logits/chosen": 2.181663990020752, "logits/rejected": 1.1673696041107178, "logps/chosen": -0.728095531463623, "logps/rejected": -7.325507164001465, "loss": 0.674, "nll_loss": 0.672852635383606, "rewards/accuracies": 1.0, "rewards/chosen": -0.07280955463647842, "rewards/margins": 0.6597411632537842, "rewards/rejected": -0.7325507402420044, "step": 950 }, { "epoch": 0.591601866251944, "grad_norm": 0.6204577088356018, "learning_rate": 2.4500000000000003e-06, "log_odds_chosen": 3.74992299079895, "log_odds_ratio": -0.24129045009613037, "logits/chosen": 1.1197046041488647, "logits/rejected": 0.7634965181350708, "logps/chosen": -1.1183922290802002, "logps/rejected": -4.537961959838867, "loss": 0.556, "nll_loss": 0.5318636894226074, "rewards/accuracies": 0.875, "rewards/chosen": -0.11183921992778778, "rewards/margins": 0.3419570028781891, "rewards/rejected": -0.45379623770713806, "step": 951 }, { "epoch": 0.5922239502332814, "grad_norm": 0.7573308944702148, "learning_rate": 2.4000000000000003e-06, "log_odds_chosen": 3.447063684463501, "log_odds_ratio": -0.21511992812156677, "logits/chosen": 1.8121840953826904, "logits/rejected": 0.5099284648895264, "logps/chosen": -0.9625261425971985, "logps/rejected": -4.040431022644043, "loss": 0.6615, "nll_loss": 0.6400207877159119, "rewards/accuracies": 0.875, "rewards/chosen": -0.09625261276960373, "rewards/margins": 0.30779051780700684, "rewards/rejected": -0.40404313802719116, "step": 952 }, { "epoch": 0.592846034214619, "grad_norm": 0.3285903036594391, "learning_rate": 2.35e-06, "log_odds_chosen": 6.717590808868408, "log_odds_ratio": -0.002201066818088293, "logits/chosen": 1.8004734516143799, "logits/rejected": 0.05097063630819321, "logps/chosen": -0.9343357086181641, "logps/rejected": -7.141386032104492, "loss": 0.5761, "nll_loss": 0.5759262442588806, "rewards/accuracies": 1.0, "rewards/chosen": -0.09343355894088745, "rewards/margins": 0.6207050085067749, "rewards/rejected": -0.7141385674476624, "step": 953 }, { "epoch": 0.5934681181959565, "grad_norm": 0.35679325461387634, "learning_rate": 2.3e-06, "log_odds_chosen": 5.247450828552246, "log_odds_ratio": -0.25419700145721436, "logits/chosen": 2.1967992782592773, "logits/rejected": 1.1378300189971924, "logps/chosen": -0.8751773834228516, "logps/rejected": -5.667536735534668, "loss": 0.7373, "nll_loss": 0.7118582725524902, "rewards/accuracies": 0.75, "rewards/chosen": -0.08751773089170456, "rewards/margins": 0.4792359173297882, "rewards/rejected": -0.566753625869751, "step": 954 }, { "epoch": 0.5940902021772939, "grad_norm": 0.24915580451488495, "learning_rate": 2.25e-06, "log_odds_chosen": 4.880882263183594, "log_odds_ratio": -0.20119339227676392, "logits/chosen": 0.1770033836364746, "logits/rejected": 0.6878756284713745, "logps/chosen": -0.9567283391952515, "logps/rejected": -5.4595465660095215, "loss": 0.4933, "nll_loss": 0.4731942415237427, "rewards/accuracies": 0.75, "rewards/chosen": -0.09567283093929291, "rewards/margins": 0.4502818286418915, "rewards/rejected": -0.5459545850753784, "step": 955 }, { "epoch": 0.5947122861586314, "grad_norm": 1.7730331420898438, "learning_rate": 2.2e-06, "log_odds_chosen": 5.833465099334717, "log_odds_ratio": -0.09857569634914398, "logits/chosen": 2.1895477771759033, "logits/rejected": 2.4121108055114746, "logps/chosen": -1.1158063411712646, "logps/rejected": -6.559569358825684, "loss": 0.7713, "nll_loss": 0.7614297866821289, "rewards/accuracies": 0.875, "rewards/chosen": -0.11158062517642975, "rewards/margins": 0.5443763732910156, "rewards/rejected": -0.6559569835662842, "step": 956 }, { "epoch": 0.5953343701399689, "grad_norm": 0.44484496116638184, "learning_rate": 2.1499999999999997e-06, "log_odds_chosen": 8.722646713256836, "log_odds_ratio": -0.003650566330179572, "logits/chosen": 2.142489433288574, "logits/rejected": 1.7523647546768188, "logps/chosen": -0.7479883432388306, "logps/rejected": -8.673405647277832, "loss": 0.7162, "nll_loss": 0.715814471244812, "rewards/accuracies": 1.0, "rewards/chosen": -0.0747988373041153, "rewards/margins": 0.7925417423248291, "rewards/rejected": -0.8673405647277832, "step": 957 }, { "epoch": 0.5959564541213064, "grad_norm": 0.38487762212753296, "learning_rate": 2.1000000000000002e-06, "log_odds_chosen": 4.871744155883789, "log_odds_ratio": -0.34087318181991577, "logits/chosen": 2.5661439895629883, "logits/rejected": 0.9334918260574341, "logps/chosen": -0.6973855495452881, "logps/rejected": -5.2056474685668945, "loss": 0.5733, "nll_loss": 0.5392202138900757, "rewards/accuracies": 0.75, "rewards/chosen": -0.06973855197429657, "rewards/margins": 0.4508262276649475, "rewards/rejected": -0.5205647349357605, "step": 958 }, { "epoch": 0.5965785381026438, "grad_norm": 0.3092884123325348, "learning_rate": 2.0500000000000003e-06, "log_odds_chosen": 7.264720916748047, "log_odds_ratio": -0.0031101834028959274, "logits/chosen": 1.385936975479126, "logits/rejected": 0.4052692949771881, "logps/chosen": -0.8156126737594604, "logps/rejected": -7.355830669403076, "loss": 0.5032, "nll_loss": 0.5028782486915588, "rewards/accuracies": 1.0, "rewards/chosen": -0.08156126737594604, "rewards/margins": 0.6540217995643616, "rewards/rejected": -0.7355830669403076, "step": 959 }, { "epoch": 0.5972006220839814, "grad_norm": 0.5304663777351379, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 5.928452491760254, "log_odds_ratio": -0.09689196944236755, "logits/chosen": 1.602445125579834, "logits/rejected": 0.8351498246192932, "logps/chosen": -1.78778076171875, "logps/rejected": -7.4334940910339355, "loss": 0.7022, "nll_loss": 0.6925529837608337, "rewards/accuracies": 0.875, "rewards/chosen": -0.17877808213233948, "rewards/margins": 0.5645713806152344, "rewards/rejected": -0.7433494329452515, "step": 960 }, { "epoch": 0.5978227060653188, "grad_norm": 0.4608170986175537, "learning_rate": 1.95e-06, "log_odds_chosen": 4.951751232147217, "log_odds_ratio": -0.16974444687366486, "logits/chosen": 0.48448848724365234, "logits/rejected": 0.6948592662811279, "logps/chosen": -0.8525481820106506, "logps/rejected": -5.083009719848633, "loss": 0.4455, "nll_loss": 0.428507924079895, "rewards/accuracies": 0.875, "rewards/chosen": -0.08525481820106506, "rewards/margins": 0.42304617166519165, "rewards/rejected": -0.5083009600639343, "step": 961 }, { "epoch": 0.5984447900466563, "grad_norm": 0.33842310309410095, "learning_rate": 1.9e-06, "log_odds_chosen": 6.945926666259766, "log_odds_ratio": -0.14915025234222412, "logits/chosen": 1.039058804512024, "logits/rejected": 1.184691309928894, "logps/chosen": -0.9467121958732605, "logps/rejected": -7.190241813659668, "loss": 0.5227, "nll_loss": 0.5077849626541138, "rewards/accuracies": 0.875, "rewards/chosen": -0.09467122703790665, "rewards/margins": 0.6243529915809631, "rewards/rejected": -0.7190241813659668, "step": 962 }, { "epoch": 0.5990668740279937, "grad_norm": 0.7515162229537964, "learning_rate": 1.85e-06, "log_odds_chosen": 4.541479110717773, "log_odds_ratio": -0.22119711339473724, "logits/chosen": 0.734094500541687, "logits/rejected": 0.07864078134298325, "logps/chosen": -0.8313609957695007, "logps/rejected": -4.912501811981201, "loss": 0.551, "nll_loss": 0.5288982391357422, "rewards/accuracies": 0.875, "rewards/chosen": -0.08313610404729843, "rewards/margins": 0.40811410546302795, "rewards/rejected": -0.4912501573562622, "step": 963 }, { "epoch": 0.5996889580093313, "grad_norm": 0.3785715401172638, "learning_rate": 1.8e-06, "log_odds_chosen": 6.697405815124512, "log_odds_ratio": -0.020956946536898613, "logits/chosen": 1.6168622970581055, "logits/rejected": 1.023451566696167, "logps/chosen": -0.6088523864746094, "logps/rejected": -6.411888599395752, "loss": 0.567, "nll_loss": 0.5649264454841614, "rewards/accuracies": 1.0, "rewards/chosen": -0.060885243117809296, "rewards/margins": 0.5803036689758301, "rewards/rejected": -0.6411888599395752, "step": 964 }, { "epoch": 0.6003110419906688, "grad_norm": 0.32801535725593567, "learning_rate": 1.7500000000000002e-06, "log_odds_chosen": 7.661484241485596, "log_odds_ratio": -0.010182654485106468, "logits/chosen": 2.53751277923584, "logits/rejected": 1.5586211681365967, "logps/chosen": -0.7776228189468384, "logps/rejected": -7.800995826721191, "loss": 0.6569, "nll_loss": 0.6558878421783447, "rewards/accuracies": 1.0, "rewards/chosen": -0.07776228338479996, "rewards/margins": 0.7023372650146484, "rewards/rejected": -0.7800995707511902, "step": 965 }, { "epoch": 0.6009331259720062, "grad_norm": 4.966485500335693, "learning_rate": 1.7000000000000002e-06, "log_odds_chosen": 4.641650676727295, "log_odds_ratio": -0.21231749653816223, "logits/chosen": 1.5193026065826416, "logits/rejected": 0.49554136395454407, "logps/chosen": -0.9282476305961609, "logps/rejected": -5.024073123931885, "loss": 0.68, "nll_loss": 0.6587827205657959, "rewards/accuracies": 0.875, "rewards/chosen": -0.09282475709915161, "rewards/margins": 0.40958255529403687, "rewards/rejected": -0.5024073123931885, "step": 966 }, { "epoch": 0.6015552099533437, "grad_norm": 0.49166029691696167, "learning_rate": 1.65e-06, "log_odds_chosen": 4.773428916931152, "log_odds_ratio": -0.17224228382110596, "logits/chosen": 1.6243770122528076, "logits/rejected": 1.5114920139312744, "logps/chosen": -0.9838494062423706, "logps/rejected": -5.343306541442871, "loss": 0.5384, "nll_loss": 0.5212177634239197, "rewards/accuracies": 0.875, "rewards/chosen": -0.09838493913412094, "rewards/margins": 0.43594568967819214, "rewards/rejected": -0.5343306064605713, "step": 967 }, { "epoch": 0.6021772939346812, "grad_norm": 0.3038322329521179, "learning_rate": 1.6000000000000001e-06, "log_odds_chosen": 6.636908054351807, "log_odds_ratio": -0.11172385513782501, "logits/chosen": -0.003827810287475586, "logits/rejected": 0.5097255706787109, "logps/chosen": -0.8371415138244629, "logps/rejected": -6.972446918487549, "loss": 0.4479, "nll_loss": 0.4367726743221283, "rewards/accuracies": 0.875, "rewards/chosen": -0.08371415734291077, "rewards/margins": 0.6135305166244507, "rewards/rejected": -0.6972447037696838, "step": 968 }, { "epoch": 0.6027993779160187, "grad_norm": 0.42850571870803833, "learning_rate": 1.55e-06, "log_odds_chosen": 4.930295944213867, "log_odds_ratio": -0.054657693952322006, "logits/chosen": 1.7624989748001099, "logits/rejected": 1.1231470108032227, "logps/chosen": -0.9003041982650757, "logps/rejected": -5.306840419769287, "loss": 0.6412, "nll_loss": 0.6357303857803345, "rewards/accuracies": 1.0, "rewards/chosen": -0.09003043174743652, "rewards/margins": 0.44065362215042114, "rewards/rejected": -0.5306840538978577, "step": 969 }, { "epoch": 0.6034214618973561, "grad_norm": 0.33446815609931946, "learning_rate": 1.5e-06, "log_odds_chosen": 5.287704944610596, "log_odds_ratio": -0.20487284660339355, "logits/chosen": 1.4688787460327148, "logits/rejected": 1.6766948699951172, "logps/chosen": -0.9011386036872864, "logps/rejected": -5.782619476318359, "loss": 0.6234, "nll_loss": 0.6029089689254761, "rewards/accuracies": 0.75, "rewards/chosen": -0.09011386334896088, "rewards/margins": 0.4881480932235718, "rewards/rejected": -0.5782619118690491, "step": 970 }, { "epoch": 0.6040435458786936, "grad_norm": 5.544436454772949, "learning_rate": 1.45e-06, "log_odds_chosen": 6.159671783447266, "log_odds_ratio": -0.40772995352745056, "logits/chosen": 1.4968130588531494, "logits/rejected": 2.0050668716430664, "logps/chosen": -0.9551042914390564, "logps/rejected": -6.332335472106934, "loss": 0.7724, "nll_loss": 0.7315971255302429, "rewards/accuracies": 0.75, "rewards/chosen": -0.09551043063402176, "rewards/margins": 0.5377230644226074, "rewards/rejected": -0.6332335472106934, "step": 971 }, { "epoch": 0.6046656298600311, "grad_norm": 0.6907927393913269, "learning_rate": 1.4000000000000001e-06, "log_odds_chosen": 4.74246883392334, "log_odds_ratio": -0.3178080916404724, "logits/chosen": 0.3425654172897339, "logits/rejected": 1.6831358671188354, "logps/chosen": -1.2201842069625854, "logps/rejected": -5.71635627746582, "loss": 0.6642, "nll_loss": 0.6323726177215576, "rewards/accuracies": 0.75, "rewards/chosen": -0.12201841175556183, "rewards/margins": 0.4496172368526459, "rewards/rejected": -0.5716356039047241, "step": 972 }, { "epoch": 0.6052877138413686, "grad_norm": 0.3379632830619812, "learning_rate": 1.35e-06, "log_odds_chosen": 7.221384525299072, "log_odds_ratio": -0.11525887250900269, "logits/chosen": 0.7215989828109741, "logits/rejected": 0.8484539985656738, "logps/chosen": -0.808670163154602, "logps/rejected": -7.426860809326172, "loss": 0.5458, "nll_loss": 0.5342952609062195, "rewards/accuracies": 0.875, "rewards/chosen": -0.08086702227592468, "rewards/margins": 0.6618191003799438, "rewards/rejected": -0.7426860332489014, "step": 973 }, { "epoch": 0.605909797822706, "grad_norm": 0.3536883294582367, "learning_rate": 1.3e-06, "log_odds_chosen": 5.5262041091918945, "log_odds_ratio": -0.06627706438302994, "logits/chosen": -0.3007934093475342, "logits/rejected": -0.25916212797164917, "logps/chosen": -0.7046493291854858, "logps/rejected": -5.4488701820373535, "loss": 0.3994, "nll_loss": 0.39276355504989624, "rewards/accuracies": 1.0, "rewards/chosen": -0.07046493142843246, "rewards/margins": 0.4744220972061157, "rewards/rejected": -0.5448870062828064, "step": 974 }, { "epoch": 0.6065318818040435, "grad_norm": 1.1498117446899414, "learning_rate": 1.25e-06, "log_odds_chosen": 4.309863090515137, "log_odds_ratio": -0.1867905706167221, "logits/chosen": -1.1799075603485107, "logits/rejected": -1.1014156341552734, "logps/chosen": -1.2807620763778687, "logps/rejected": -5.209968566894531, "loss": 0.5001, "nll_loss": 0.4814422130584717, "rewards/accuracies": 1.0, "rewards/chosen": -0.1280762106180191, "rewards/margins": 0.3929206430912018, "rewards/rejected": -0.5209968090057373, "step": 975 }, { "epoch": 0.6071539657853811, "grad_norm": 0.7061277627944946, "learning_rate": 1.2000000000000002e-06, "log_odds_chosen": 6.085260391235352, "log_odds_ratio": -0.16170816123485565, "logits/chosen": 1.1271741390228271, "logits/rejected": 1.259488821029663, "logps/chosen": -0.8908754587173462, "logps/rejected": -6.308341026306152, "loss": 0.697, "nll_loss": 0.6808428764343262, "rewards/accuracies": 0.875, "rewards/chosen": -0.08908754587173462, "rewards/margins": 0.5417464971542358, "rewards/rejected": -0.6308341026306152, "step": 976 }, { "epoch": 0.6077760497667185, "grad_norm": 0.4019508957862854, "learning_rate": 1.15e-06, "log_odds_chosen": 5.367095470428467, "log_odds_ratio": -0.06431747227907181, "logits/chosen": 2.3293800354003906, "logits/rejected": 1.6834604740142822, "logps/chosen": -0.7575269937515259, "logps/rejected": -5.442158222198486, "loss": 0.6577, "nll_loss": 0.6512343883514404, "rewards/accuracies": 1.0, "rewards/chosen": -0.07575269788503647, "rewards/margins": 0.46846315264701843, "rewards/rejected": -0.5442157983779907, "step": 977 }, { "epoch": 0.608398133748056, "grad_norm": 4.2344136238098145, "learning_rate": 1.1e-06, "log_odds_chosen": 5.4656572341918945, "log_odds_ratio": -0.1600266396999359, "logits/chosen": 1.5850176811218262, "logits/rejected": 1.394923448562622, "logps/chosen": -0.9550702571868896, "logps/rejected": -5.794343948364258, "loss": 0.7159, "nll_loss": 0.6998506188392639, "rewards/accuracies": 0.875, "rewards/chosen": -0.09550703316926956, "rewards/margins": 0.4839273989200592, "rewards/rejected": -0.5794344544410706, "step": 978 }, { "epoch": 0.6090202177293935, "grad_norm": 0.49649715423583984, "learning_rate": 1.0500000000000001e-06, "log_odds_chosen": 6.332185745239258, "log_odds_ratio": -0.06125331670045853, "logits/chosen": 1.4913352727890015, "logits/rejected": 1.0419901609420776, "logps/chosen": -1.0030255317687988, "logps/rejected": -6.77717399597168, "loss": 0.5802, "nll_loss": 0.5740865468978882, "rewards/accuracies": 1.0, "rewards/chosen": -0.1003025621175766, "rewards/margins": 0.577414870262146, "rewards/rejected": -0.6777174472808838, "step": 979 }, { "epoch": 0.609642301710731, "grad_norm": 0.5118820667266846, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": 5.745312213897705, "log_odds_ratio": -0.14600278437137604, "logits/chosen": -0.29222479462623596, "logits/rejected": 0.4791012704372406, "logps/chosen": -1.1164146661758423, "logps/rejected": -6.352675914764404, "loss": 0.5329, "nll_loss": 0.5183060765266418, "rewards/accuracies": 0.875, "rewards/chosen": -0.11164146661758423, "rewards/margins": 0.5236261487007141, "rewards/rejected": -0.6352676153182983, "step": 980 }, { "epoch": 0.6102643856920684, "grad_norm": 0.36166447401046753, "learning_rate": 9.5e-07, "log_odds_chosen": 4.485109329223633, "log_odds_ratio": -0.2791633903980255, "logits/chosen": 1.925029993057251, "logits/rejected": 1.1006145477294922, "logps/chosen": -1.005967617034912, "logps/rejected": -5.209171295166016, "loss": 0.6741, "nll_loss": 0.6461948156356812, "rewards/accuracies": 0.75, "rewards/chosen": -0.10059677064418793, "rewards/margins": 0.42032039165496826, "rewards/rejected": -0.5209171772003174, "step": 981 }, { "epoch": 0.6108864696734059, "grad_norm": 0.3162643313407898, "learning_rate": 9e-07, "log_odds_chosen": 6.980396747589111, "log_odds_ratio": -0.0031216649804264307, "logits/chosen": 0.3812088370323181, "logits/rejected": 0.4569092392921448, "logps/chosen": -1.1486375331878662, "logps/rejected": -7.6883745193481445, "loss": 0.5023, "nll_loss": 0.5020057559013367, "rewards/accuracies": 1.0, "rewards/chosen": -0.11486375331878662, "rewards/margins": 0.6539736986160278, "rewards/rejected": -0.7688374519348145, "step": 982 }, { "epoch": 0.6115085536547434, "grad_norm": 0.5131247043609619, "learning_rate": 8.500000000000001e-07, "log_odds_chosen": 6.256967544555664, "log_odds_ratio": -0.09810786694288254, "logits/chosen": 1.9399669170379639, "logits/rejected": 1.8002293109893799, "logps/chosen": -0.7033792734146118, "logps/rejected": -6.037574768066406, "loss": 0.7078, "nll_loss": 0.6979826092720032, "rewards/accuracies": 1.0, "rewards/chosen": -0.0703379288315773, "rewards/margins": 0.5334195494651794, "rewards/rejected": -0.6037575006484985, "step": 983 }, { "epoch": 0.6121306376360809, "grad_norm": 0.3207487165927887, "learning_rate": 8.000000000000001e-07, "log_odds_chosen": 6.562335968017578, "log_odds_ratio": -0.012317328713834286, "logits/chosen": 2.2676196098327637, "logits/rejected": 0.9253313541412354, "logps/chosen": -1.3998887538909912, "logps/rejected": -7.568476676940918, "loss": 0.6606, "nll_loss": 0.6593425869941711, "rewards/accuracies": 1.0, "rewards/chosen": -0.13998886942863464, "rewards/margins": 0.6168588399887085, "rewards/rejected": -0.756847620010376, "step": 984 }, { "epoch": 0.6127527216174183, "grad_norm": 1.0857791900634766, "learning_rate": 7.5e-07, "log_odds_chosen": 6.333230018615723, "log_odds_ratio": -0.10016593337059021, "logits/chosen": -0.028537094593048096, "logits/rejected": -0.48187828063964844, "logps/chosen": -1.0475420951843262, "logps/rejected": -6.9887003898620605, "loss": 0.4935, "nll_loss": 0.4835060238838196, "rewards/accuracies": 0.875, "rewards/chosen": -0.10475420206785202, "rewards/margins": 0.5941158533096313, "rewards/rejected": -0.698870062828064, "step": 985 }, { "epoch": 0.6133748055987558, "grad_norm": 0.2819894254207611, "learning_rate": 7.000000000000001e-07, "log_odds_chosen": 8.16779899597168, "log_odds_ratio": -0.0010539994109421968, "logits/chosen": 0.38561496138572693, "logits/rejected": 0.6741596460342407, "logps/chosen": -0.7265850305557251, "logps/rejected": -8.145223617553711, "loss": 0.5776, "nll_loss": 0.5774969458580017, "rewards/accuracies": 1.0, "rewards/chosen": -0.07265850901603699, "rewards/margins": 0.7418637871742249, "rewards/rejected": -0.8145222663879395, "step": 986 }, { "epoch": 0.6139968895800934, "grad_norm": 0.3129705488681793, "learning_rate": 6.5e-07, "log_odds_chosen": 6.294776439666748, "log_odds_ratio": -0.01629718951880932, "logits/chosen": 0.7048096656799316, "logits/rejected": 0.23704242706298828, "logps/chosen": -0.9289849400520325, "logps/rejected": -6.6981096267700195, "loss": 0.5134, "nll_loss": 0.5117835402488708, "rewards/accuracies": 1.0, "rewards/chosen": -0.09289849549531937, "rewards/margins": 0.5769124627113342, "rewards/rejected": -0.669810950756073, "step": 987 }, { "epoch": 0.6146189735614308, "grad_norm": 0.3183548152446747, "learning_rate": 6.000000000000001e-07, "log_odds_chosen": 6.734692573547363, "log_odds_ratio": -0.03981401398777962, "logits/chosen": 0.0914728045463562, "logits/rejected": -0.8617876172065735, "logps/chosen": -1.0856958627700806, "logps/rejected": -7.369524955749512, "loss": 0.4957, "nll_loss": 0.49168896675109863, "rewards/accuracies": 1.0, "rewards/chosen": -0.10856959223747253, "rewards/margins": 0.6283829212188721, "rewards/rejected": -0.736952543258667, "step": 988 }, { "epoch": 0.6152410575427683, "grad_norm": 0.6235529184341431, "learning_rate": 5.5e-07, "log_odds_chosen": 6.597626686096191, "log_odds_ratio": -0.0906713604927063, "logits/chosen": 3.484645128250122, "logits/rejected": 3.2487385272979736, "logps/chosen": -0.6023905873298645, "logps/rejected": -6.2580156326293945, "loss": 0.875, "nll_loss": 0.8659148812294006, "rewards/accuracies": 1.0, "rewards/chosen": -0.06023906171321869, "rewards/margins": 0.5655625462532043, "rewards/rejected": -0.6258015632629395, "step": 989 }, { "epoch": 0.6158631415241057, "grad_norm": 0.8194119334220886, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": 4.316708087921143, "log_odds_ratio": -0.18914556503295898, "logits/chosen": 3.4032726287841797, "logits/rejected": 2.219083309173584, "logps/chosen": -0.8312450647354126, "logps/rejected": -4.661098957061768, "loss": 0.8722, "nll_loss": 0.8533015251159668, "rewards/accuracies": 1.0, "rewards/chosen": -0.08312450349330902, "rewards/margins": 0.3829854130744934, "rewards/rejected": -0.4661099314689636, "step": 990 }, { "epoch": 0.6164852255054433, "grad_norm": 1.9890536069869995, "learning_rate": 4.5e-07, "log_odds_chosen": 4.387207984924316, "log_odds_ratio": -0.2528379559516907, "logits/chosen": 2.1739017963409424, "logits/rejected": 2.1724283695220947, "logps/chosen": -1.1620302200317383, "logps/rejected": -5.209095478057861, "loss": 0.8117, "nll_loss": 0.7864143252372742, "rewards/accuracies": 0.75, "rewards/chosen": -0.11620301753282547, "rewards/margins": 0.40470650792121887, "rewards/rejected": -0.5209095478057861, "step": 991 }, { "epoch": 0.6171073094867807, "grad_norm": 0.3827112317085266, "learning_rate": 4.0000000000000003e-07, "log_odds_chosen": 3.7871646881103516, "log_odds_ratio": -0.2918343245983124, "logits/chosen": 0.2715454697608948, "logits/rejected": 1.047527551651001, "logps/chosen": -0.6410025358200073, "logps/rejected": -3.8774826526641846, "loss": 0.5016, "nll_loss": 0.47238293290138245, "rewards/accuracies": 0.875, "rewards/chosen": -0.06410025805234909, "rewards/margins": 0.32364803552627563, "rewards/rejected": -0.38774827122688293, "step": 992 }, { "epoch": 0.6177293934681182, "grad_norm": 0.40105417370796204, "learning_rate": 3.5000000000000004e-07, "log_odds_chosen": 3.676414966583252, "log_odds_ratio": -0.37624049186706543, "logits/chosen": 2.6079487800598145, "logits/rejected": 1.3168003559112549, "logps/chosen": -0.7987608313560486, "logps/rejected": -4.06448221206665, "loss": 0.784, "nll_loss": 0.7463983297348022, "rewards/accuracies": 0.625, "rewards/chosen": -0.07987608015537262, "rewards/margins": 0.32657214999198914, "rewards/rejected": -0.40644824504852295, "step": 993 }, { "epoch": 0.6183514774494556, "grad_norm": 0.5913990139961243, "learning_rate": 3.0000000000000004e-07, "log_odds_chosen": 5.702366828918457, "log_odds_ratio": -0.13208632171154022, "logits/chosen": 0.6971375346183777, "logits/rejected": 0.4381665587425232, "logps/chosen": -0.904596209526062, "logps/rejected": -6.130751609802246, "loss": 0.4285, "nll_loss": 0.415324866771698, "rewards/accuracies": 1.0, "rewards/chosen": -0.09045961499214172, "rewards/margins": 0.5226155519485474, "rewards/rejected": -0.6130751371383667, "step": 994 }, { "epoch": 0.6189735614307932, "grad_norm": 0.3188931941986084, "learning_rate": 2.5000000000000004e-07, "log_odds_chosen": 5.129542350769043, "log_odds_ratio": -0.12642808258533478, "logits/chosen": 0.5445621013641357, "logits/rejected": 0.43241095542907715, "logps/chosen": -1.0532197952270508, "logps/rejected": -5.681353569030762, "loss": 0.5615, "nll_loss": 0.5489034652709961, "rewards/accuracies": 0.875, "rewards/chosen": -0.1053219810128212, "rewards/margins": 0.4628134071826935, "rewards/rejected": -0.5681353807449341, "step": 995 }, { "epoch": 0.6195956454121306, "grad_norm": 0.467891126871109, "learning_rate": 2.0000000000000002e-07, "log_odds_chosen": 3.8318843841552734, "log_odds_ratio": -0.21675176918506622, "logits/chosen": 1.3111586570739746, "logits/rejected": 1.6427356004714966, "logps/chosen": -1.457216739654541, "logps/rejected": -5.035771369934082, "loss": 0.5957, "nll_loss": 0.5739951133728027, "rewards/accuracies": 0.875, "rewards/chosen": -0.1457216888666153, "rewards/margins": 0.3578554689884186, "rewards/rejected": -0.5035771727561951, "step": 996 }, { "epoch": 0.6202177293934681, "grad_norm": 0.4008631706237793, "learning_rate": 1.5000000000000002e-07, "log_odds_chosen": 3.40852952003479, "log_odds_ratio": -0.2698494493961334, "logits/chosen": 1.4783838987350464, "logits/rejected": 1.5272125005722046, "logps/chosen": -1.0040234327316284, "logps/rejected": -4.11934232711792, "loss": 0.6774, "nll_loss": 0.6503931283950806, "rewards/accuracies": 0.75, "rewards/chosen": -0.1004023477435112, "rewards/margins": 0.3115319013595581, "rewards/rejected": -0.4119342267513275, "step": 997 }, { "epoch": 0.6208398133748056, "grad_norm": 0.535465657711029, "learning_rate": 1.0000000000000001e-07, "log_odds_chosen": 6.407077789306641, "log_odds_ratio": -0.03647714480757713, "logits/chosen": 0.8850218653678894, "logits/rejected": 1.0664775371551514, "logps/chosen": -0.763054370880127, "logps/rejected": -6.34374475479126, "loss": 0.5953, "nll_loss": 0.5916392803192139, "rewards/accuracies": 1.0, "rewards/chosen": -0.07630544155836105, "rewards/margins": 0.5580690503120422, "rewards/rejected": -0.6343744993209839, "step": 998 }, { "epoch": 0.6214618973561431, "grad_norm": 0.29914966225624084, "learning_rate": 5.0000000000000004e-08, "log_odds_chosen": 6.531560897827148, "log_odds_ratio": -0.11589479446411133, "logits/chosen": 1.1404070854187012, "logits/rejected": 0.8704703450202942, "logps/chosen": -0.751317024230957, "logps/rejected": -6.46155309677124, "loss": 0.5341, "nll_loss": 0.5225302577018738, "rewards/accuracies": 0.875, "rewards/chosen": -0.07513170689344406, "rewards/margins": 0.5710236430168152, "rewards/rejected": -0.6461552977561951, "step": 999 }, { "epoch": 0.6220839813374806, "grad_norm": 0.3536701202392578, "learning_rate": 0.0, "log_odds_chosen": 6.4564714431762695, "log_odds_ratio": -0.04604131728410721, "logits/chosen": 2.8046364784240723, "logits/rejected": 0.9901584386825562, "logps/chosen": -1.0534753799438477, "logps/rejected": -7.014216899871826, "loss": 0.7159, "nll_loss": 0.7113260626792908, "rewards/accuracies": 1.0, "rewards/chosen": -0.10534752905368805, "rewards/margins": 0.596074104309082, "rewards/rejected": -0.7014216184616089, "step": 1000 } ], "logging_steps": 1, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }