diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,18033 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6220839813374806, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0006220839813374805, + "grad_norm": 1.5990997552871704, + "learning_rate": 4.995e-05, + "log_odds_chosen": -0.5121189951896667, + "log_odds_ratio": -1.175696849822998, + "logits/chosen": -3.061245918273926, + "logits/rejected": 0.8060249090194702, + "logps/chosen": -1.5489490032196045, + "logps/rejected": -1.0588122606277466, + "loss": 0.9422, + "nll_loss": 0.8245992660522461, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1548949033021927, + "rewards/margins": -0.04901367798447609, + "rewards/rejected": -0.1058812290430069, + "step": 1 + }, + { + "epoch": 0.001244167962674961, + "grad_norm": 0.43756288290023804, + "learning_rate": 4.99e-05, + "log_odds_chosen": 0.13479183614253998, + "log_odds_ratio": -0.6995198726654053, + "logits/chosen": -0.5496019721031189, + "logits/rejected": 0.916201114654541, + "logps/chosen": -1.1331713199615479, + "logps/rejected": -1.1910749673843384, + "loss": 1.1757, + "nll_loss": 1.105738639831543, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11331713944673538, + "rewards/margins": 0.005790362134575844, + "rewards/rejected": -0.11910750716924667, + "step": 2 + }, + { + "epoch": 0.0018662519440124418, + "grad_norm": 0.6946638822555542, + "learning_rate": 4.9850000000000006e-05, + "log_odds_chosen": -0.7716032862663269, + "log_odds_ratio": -1.2915821075439453, + "logits/chosen": -1.1749107837677002, + "logits/rejected": 0.48298171162605286, + "logps/chosen": -1.713394284248352, + "logps/rejected": -1.0794651508331299, + "loss": 1.1266, + "nll_loss": 0.9974524974822998, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.17133943736553192, + "rewards/margins": -0.06339291483163834, + "rewards/rejected": -0.10794650763273239, + "step": 3 + }, + { + "epoch": 0.002488335925349922, + "grad_norm": 0.3383781611919403, + "learning_rate": 4.9800000000000004e-05, + "log_odds_chosen": -1.0775129795074463, + "log_odds_ratio": -1.6865262985229492, + "logits/chosen": -0.8680384159088135, + "logits/rejected": 1.1301361322402954, + "logps/chosen": -2.1693308353424072, + "logps/rejected": -1.1649184226989746, + "loss": 1.0211, + "nll_loss": 0.8524219393730164, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.21693310141563416, + "rewards/margins": -0.10044124722480774, + "rewards/rejected": -0.11649185419082642, + "step": 4 + }, + { + "epoch": 0.003110419906687403, + "grad_norm": 0.6052569150924683, + "learning_rate": 4.975e-05, + "log_odds_chosen": -0.2497221976518631, + "log_odds_ratio": -0.9002423286437988, + "logits/chosen": 2.113248348236084, + "logits/rejected": 1.7459735870361328, + "logps/chosen": -1.5321459770202637, + "logps/rejected": -1.26905357837677, + "loss": 1.3045, + "nll_loss": 1.214469075202942, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.15321458876132965, + "rewards/margins": -0.02630922943353653, + "rewards/rejected": -0.12690536677837372, + "step": 5 + }, + { + "epoch": 0.0037325038880248835, + "grad_norm": 3.1706557273864746, + "learning_rate": 4.97e-05, + "log_odds_chosen": -1.2696902751922607, + "log_odds_ratio": -1.8387222290039062, + "logits/chosen": 0.5090468525886536, + "logits/rejected": 1.799258828163147, + "logps/chosen": -2.498434066772461, + "logps/rejected": -1.3061614036560059, + "loss": 1.159, + "nll_loss": 0.9750999212265015, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.24984338879585266, + "rewards/margins": -0.11922725290060043, + "rewards/rejected": -0.13061614334583282, + "step": 6 + }, + { + "epoch": 0.004354587869362364, + "grad_norm": 0.22779366374015808, + "learning_rate": 4.965e-05, + "log_odds_chosen": -0.6782891154289246, + "log_odds_ratio": -1.229583740234375, + "logits/chosen": 0.6050777435302734, + "logits/rejected": 1.8784853219985962, + "logps/chosen": -1.7958674430847168, + "logps/rejected": -1.2137447595596313, + "loss": 0.9138, + "nll_loss": 0.7908901572227478, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1795867532491684, + "rewards/margins": -0.0582122765481472, + "rewards/rejected": -0.1213744729757309, + "step": 7 + }, + { + "epoch": 0.004976671850699844, + "grad_norm": 0.22942067682743073, + "learning_rate": 4.96e-05, + "log_odds_chosen": 0.006755713373422623, + "log_odds_ratio": -0.7070448398590088, + "logits/chosen": 2.0835607051849365, + "logits/rejected": 2.2670180797576904, + "logps/chosen": -1.0497729778289795, + "logps/rejected": -1.0410140752792358, + "loss": 1.0557, + "nll_loss": 0.9849987626075745, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10497729480266571, + "rewards/margins": -0.0008758809417486191, + "rewards/rejected": -0.10410141199827194, + "step": 8 + }, + { + "epoch": 0.005598755832037325, + "grad_norm": 1.5027852058410645, + "learning_rate": 4.9550000000000005e-05, + "log_odds_chosen": -1.2336622476577759, + "log_odds_ratio": -1.7122440338134766, + "logits/chosen": 1.264065146446228, + "logits/rejected": 2.8031373023986816, + "logps/chosen": -2.152813673019409, + "logps/rejected": -1.0926671028137207, + "loss": 1.0961, + "nll_loss": 0.9248759746551514, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.21528135240077972, + "rewards/margins": -0.1060146614909172, + "rewards/rejected": -0.10926671326160431, + "step": 9 + }, + { + "epoch": 0.006220839813374806, + "grad_norm": 7.84841775894165, + "learning_rate": 4.9500000000000004e-05, + "log_odds_chosen": -0.36369776725769043, + "log_odds_ratio": -1.0270187854766846, + "logits/chosen": 1.8378279209136963, + "logits/rejected": 2.67022705078125, + "logps/chosen": -1.7057809829711914, + "logps/rejected": -1.3585100173950195, + "loss": 1.0536, + "nll_loss": 0.9509172439575195, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.17057809233665466, + "rewards/margins": -0.034727081656455994, + "rewards/rejected": -0.13585101068019867, + "step": 10 + }, + { + "epoch": 0.006842923794712286, + "grad_norm": 0.20901276171207428, + "learning_rate": 4.945e-05, + "log_odds_chosen": -0.23809382319450378, + "log_odds_ratio": -0.8853789567947388, + "logits/chosen": 1.5684062242507935, + "logits/rejected": 2.971144199371338, + "logps/chosen": -1.1353232860565186, + "logps/rejected": -0.9783434867858887, + "loss": 0.9364, + "nll_loss": 0.8478444814682007, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.11353233456611633, + "rewards/margins": -0.015697985887527466, + "rewards/rejected": -0.09783434867858887, + "step": 11 + }, + { + "epoch": 0.007465007776049767, + "grad_norm": 0.24611681699752808, + "learning_rate": 4.94e-05, + "log_odds_chosen": -0.17657308280467987, + "log_odds_ratio": -0.8087456226348877, + "logits/chosen": 3.477548122406006, + "logits/rejected": 4.530710697174072, + "logps/chosen": -1.1894445419311523, + "logps/rejected": -1.0661442279815674, + "loss": 1.1843, + "nll_loss": 1.1034001111984253, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11894445866346359, + "rewards/margins": -0.012330022640526295, + "rewards/rejected": -0.10661444067955017, + "step": 12 + }, + { + "epoch": 0.008087091757387248, + "grad_norm": 0.217342346906662, + "learning_rate": 4.935e-05, + "log_odds_chosen": -0.23449140787124634, + "log_odds_ratio": -0.8244205713272095, + "logits/chosen": 2.5574779510498047, + "logits/rejected": 4.194855690002441, + "logps/chosen": -1.2394225597381592, + "logps/rejected": -1.0743162631988525, + "loss": 1.1663, + "nll_loss": 1.0838088989257812, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.12394225597381592, + "rewards/margins": -0.01651064306497574, + "rewards/rejected": -0.10743162035942078, + "step": 13 + }, + { + "epoch": 0.008709175738724729, + "grad_norm": 0.4070003032684326, + "learning_rate": 4.93e-05, + "log_odds_chosen": -1.379059910774231, + "log_odds_ratio": -2.258007287979126, + "logits/chosen": 1.8881428241729736, + "logits/rejected": 2.1496148109436035, + "logps/chosen": -2.8094940185546875, + "logps/rejected": -1.301595687866211, + "loss": 1.0798, + "nll_loss": 0.8539876937866211, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2809494137763977, + "rewards/margins": -0.15078984200954437, + "rewards/rejected": -0.13015958666801453, + "step": 14 + }, + { + "epoch": 0.00933125972006221, + "grad_norm": 0.23260748386383057, + "learning_rate": 4.9250000000000004e-05, + "log_odds_chosen": -0.16578805446624756, + "log_odds_ratio": -0.7918254137039185, + "logits/chosen": 2.4860520362854004, + "logits/rejected": 3.8841402530670166, + "logps/chosen": -1.3346906900405884, + "logps/rejected": -1.2134218215942383, + "loss": 1.1331, + "nll_loss": 1.0539392232894897, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.13346907496452332, + "rewards/margins": -0.012126876041293144, + "rewards/rejected": -0.12134218215942383, + "step": 15 + }, + { + "epoch": 0.009953343701399688, + "grad_norm": 0.1825733780860901, + "learning_rate": 4.92e-05, + "log_odds_chosen": -0.3614625632762909, + "log_odds_ratio": -0.9392529129981995, + "logits/chosen": 2.31900691986084, + "logits/rejected": 2.9353525638580322, + "logps/chosen": -1.47523033618927, + "logps/rejected": -1.185972809791565, + "loss": 1.082, + "nll_loss": 0.9881048202514648, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.14752303063869476, + "rewards/margins": -0.02892574481666088, + "rewards/rejected": -0.11859728395938873, + "step": 16 + }, + { + "epoch": 0.010575427682737169, + "grad_norm": 0.21975092589855194, + "learning_rate": 4.915e-05, + "log_odds_chosen": -0.3503631353378296, + "log_odds_ratio": -0.8912189602851868, + "logits/chosen": 1.604474425315857, + "logits/rejected": 2.647216320037842, + "logps/chosen": -1.483432650566101, + "logps/rejected": -1.2270509004592896, + "loss": 0.8812, + "nll_loss": 0.7920456528663635, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.1483432650566101, + "rewards/margins": -0.02563817799091339, + "rewards/rejected": -0.12270509451627731, + "step": 17 + }, + { + "epoch": 0.01119751166407465, + "grad_norm": 0.7166666388511658, + "learning_rate": 4.91e-05, + "log_odds_chosen": 0.13804574310779572, + "log_odds_ratio": -0.645270586013794, + "logits/chosen": 2.1303985118865967, + "logits/rejected": 3.5221128463745117, + "logps/chosen": -1.138808012008667, + "logps/rejected": -1.2420754432678223, + "loss": 0.8695, + "nll_loss": 0.8049733638763428, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11388080567121506, + "rewards/margins": 0.010326748713850975, + "rewards/rejected": -0.12420755624771118, + "step": 18 + }, + { + "epoch": 0.01181959564541213, + "grad_norm": 0.3583829402923584, + "learning_rate": 4.905e-05, + "log_odds_chosen": -0.710419774055481, + "log_odds_ratio": -1.2239866256713867, + "logits/chosen": 2.4776275157928467, + "logits/rejected": 3.7551608085632324, + "logps/chosen": -1.6493169069290161, + "logps/rejected": -1.0448060035705566, + "loss": 0.945, + "nll_loss": 0.8225875496864319, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.16493168473243713, + "rewards/margins": -0.06045108661055565, + "rewards/rejected": -0.10448060184717178, + "step": 19 + }, + { + "epoch": 0.012441679626749611, + "grad_norm": 0.2361656278371811, + "learning_rate": 4.9e-05, + "log_odds_chosen": -0.10248635709285736, + "log_odds_ratio": -0.7523667216300964, + "logits/chosen": 1.7299888134002686, + "logits/rejected": 3.813870906829834, + "logps/chosen": -1.0503981113433838, + "logps/rejected": -0.9909194707870483, + "loss": 0.8247, + "nll_loss": 0.7494850158691406, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1050398200750351, + "rewards/margins": -0.005947869271039963, + "rewards/rejected": -0.09909194707870483, + "step": 20 + }, + { + "epoch": 0.013063763608087092, + "grad_norm": 0.5069213509559631, + "learning_rate": 4.8950000000000004e-05, + "log_odds_chosen": -0.12599247694015503, + "log_odds_ratio": -0.7684826254844666, + "logits/chosen": 1.8015549182891846, + "logits/rejected": 3.3546299934387207, + "logps/chosen": -1.3068116903305054, + "logps/rejected": -1.199318289756775, + "loss": 0.9884, + "nll_loss": 0.9115766286849976, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.13068117201328278, + "rewards/margins": -0.010749343782663345, + "rewards/rejected": -0.11993182450532913, + "step": 21 + }, + { + "epoch": 0.013685847589424573, + "grad_norm": 0.21723033487796783, + "learning_rate": 4.89e-05, + "log_odds_chosen": -0.027980871498584747, + "log_odds_ratio": -0.7586711049079895, + "logits/chosen": 2.1451902389526367, + "logits/rejected": 3.524837017059326, + "logps/chosen": -1.201343297958374, + "logps/rejected": -1.1644256114959717, + "loss": 0.9602, + "nll_loss": 0.8842926025390625, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.12013433873653412, + "rewards/margins": -0.003691784106194973, + "rewards/rejected": -0.11644256114959717, + "step": 22 + }, + { + "epoch": 0.014307931570762053, + "grad_norm": 0.7032377123832703, + "learning_rate": 4.885e-05, + "log_odds_chosen": 0.06119448319077492, + "log_odds_ratio": -0.6719111204147339, + "logits/chosen": 2.8135011196136475, + "logits/rejected": 4.099876880645752, + "logps/chosen": -1.3067809343338013, + "logps/rejected": -1.3609553575515747, + "loss": 1.0529, + "nll_loss": 0.9857478141784668, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.13067808747291565, + "rewards/margins": 0.005417431704699993, + "rewards/rejected": -0.13609552383422852, + "step": 23 + }, + { + "epoch": 0.014930015552099534, + "grad_norm": 0.16798748075962067, + "learning_rate": 4.88e-05, + "log_odds_chosen": -0.146559938788414, + "log_odds_ratio": -0.7807488441467285, + "logits/chosen": 2.9722163677215576, + "logits/rejected": 4.2791242599487305, + "logps/chosen": -1.1158338785171509, + "logps/rejected": -1.0221004486083984, + "loss": 1.0413, + "nll_loss": 0.9632115960121155, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.1115833967924118, + "rewards/margins": -0.009373345412313938, + "rewards/rejected": -0.10221004486083984, + "step": 24 + }, + { + "epoch": 0.015552099533437015, + "grad_norm": 0.32012248039245605, + "learning_rate": 4.875e-05, + "log_odds_chosen": -0.18442490696907043, + "log_odds_ratio": -0.9081165790557861, + "logits/chosen": 2.4388880729675293, + "logits/rejected": 3.846700668334961, + "logps/chosen": -1.4206616878509521, + "logps/rejected": -1.2228360176086426, + "loss": 0.974, + "nll_loss": 0.8832047581672668, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.14206618070602417, + "rewards/margins": -0.019782574847340584, + "rewards/rejected": -0.12228360772132874, + "step": 25 + }, + { + "epoch": 0.016174183514774496, + "grad_norm": 0.33273598551750183, + "learning_rate": 4.87e-05, + "log_odds_chosen": -0.6990683674812317, + "log_odds_ratio": -1.3674430847167969, + "logits/chosen": 3.5793888568878174, + "logits/rejected": 3.8019113540649414, + "logps/chosen": -1.9460030794143677, + "logps/rejected": -1.294486403465271, + "loss": 1.2106, + "nll_loss": 1.0738792419433594, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.19460032880306244, + "rewards/margins": -0.06515169143676758, + "rewards/rejected": -0.12944863736629486, + "step": 26 + }, + { + "epoch": 0.016796267496111975, + "grad_norm": 3.539905071258545, + "learning_rate": 4.8650000000000003e-05, + "log_odds_chosen": 0.11518344283103943, + "log_odds_ratio": -0.6598777770996094, + "logits/chosen": 2.098487615585327, + "logits/rejected": 1.9545716047286987, + "logps/chosen": -1.1680437326431274, + "logps/rejected": -1.252279281616211, + "loss": 0.8025, + "nll_loss": 0.7365168929100037, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11680437624454498, + "rewards/margins": 0.008423548191785812, + "rewards/rejected": -0.1252279132604599, + "step": 27 + }, + { + "epoch": 0.017418351477449457, + "grad_norm": 3.020901679992676, + "learning_rate": 4.86e-05, + "log_odds_chosen": -0.5429134964942932, + "log_odds_ratio": -1.1207876205444336, + "logits/chosen": 2.6301114559173584, + "logits/rejected": 3.8189711570739746, + "logps/chosen": -1.7131836414337158, + "logps/rejected": -1.2582485675811768, + "loss": 1.0718, + "nll_loss": 0.9597415924072266, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.17131835222244263, + "rewards/margins": -0.04549350216984749, + "rewards/rejected": -0.12582485377788544, + "step": 28 + }, + { + "epoch": 0.018040435458786936, + "grad_norm": 0.16388735175132751, + "learning_rate": 4.855e-05, + "log_odds_chosen": -0.20636707544326782, + "log_odds_ratio": -0.8130278587341309, + "logits/chosen": 2.4605510234832764, + "logits/rejected": 3.5745866298675537, + "logps/chosen": -1.1986327171325684, + "logps/rejected": -1.058274269104004, + "loss": 0.9229, + "nll_loss": 0.8415518999099731, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.11986327171325684, + "rewards/margins": -0.014035841450095177, + "rewards/rejected": -0.10582742094993591, + "step": 29 + }, + { + "epoch": 0.01866251944012442, + "grad_norm": 0.19232743978500366, + "learning_rate": 4.85e-05, + "log_odds_chosen": -0.2556931674480438, + "log_odds_ratio": -0.8336398601531982, + "logits/chosen": 2.9968199729919434, + "logits/rejected": 4.6838765144348145, + "logps/chosen": -1.0448908805847168, + "logps/rejected": -0.8920204043388367, + "loss": 1.0125, + "nll_loss": 0.9291301965713501, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.10448908805847168, + "rewards/margins": -0.015287039801478386, + "rewards/rejected": -0.08920204639434814, + "step": 30 + }, + { + "epoch": 0.019284603421461897, + "grad_norm": 0.15270353853702545, + "learning_rate": 4.845e-05, + "log_odds_chosen": -0.31180787086486816, + "log_odds_ratio": -0.9033458828926086, + "logits/chosen": 2.6127586364746094, + "logits/rejected": 3.483029365539551, + "logps/chosen": -1.1772806644439697, + "logps/rejected": -0.9254850149154663, + "loss": 0.8605, + "nll_loss": 0.7702099084854126, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.11772806942462921, + "rewards/margins": -0.02517956867814064, + "rewards/rejected": -0.09254850447177887, + "step": 31 + }, + { + "epoch": 0.019906687402799376, + "grad_norm": 0.24742813408374786, + "learning_rate": 4.8400000000000004e-05, + "log_odds_chosen": -0.4165438413619995, + "log_odds_ratio": -0.9416205883026123, + "logits/chosen": 1.1703848838806152, + "logits/rejected": 3.2198734283447266, + "logps/chosen": -1.3125711679458618, + "logps/rejected": -1.0129786729812622, + "loss": 0.8494, + "nll_loss": 0.7552543878555298, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.13125711679458618, + "rewards/margins": -0.02995925024151802, + "rewards/rejected": -0.10129787027835846, + "step": 32 + }, + { + "epoch": 0.02052877138413686, + "grad_norm": 0.20717312395572662, + "learning_rate": 4.835e-05, + "log_odds_chosen": -0.43756213784217834, + "log_odds_ratio": -1.0350837707519531, + "logits/chosen": 3.0430374145507812, + "logits/rejected": 5.120755672454834, + "logps/chosen": -1.3322926759719849, + "logps/rejected": -0.9562426805496216, + "loss": 1.0423, + "nll_loss": 0.9388405084609985, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.13322927057743073, + "rewards/margins": -0.03760499879717827, + "rewards/rejected": -0.09562426805496216, + "step": 33 + }, + { + "epoch": 0.021150855365474338, + "grad_norm": 0.6029897928237915, + "learning_rate": 4.83e-05, + "log_odds_chosen": -0.041211143136024475, + "log_odds_ratio": -0.7303495407104492, + "logits/chosen": 2.8978190422058105, + "logits/rejected": 4.314732551574707, + "logps/chosen": -1.2479820251464844, + "logps/rejected": -1.219968557357788, + "loss": 0.9818, + "nll_loss": 0.9087687134742737, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.12479820847511292, + "rewards/margins": -0.0028013568371534348, + "rewards/rejected": -0.12199684977531433, + "step": 34 + }, + { + "epoch": 0.02177293934681182, + "grad_norm": 0.2770039141178131, + "learning_rate": 4.825e-05, + "log_odds_chosen": -0.07565765082836151, + "log_odds_ratio": -0.7700670957565308, + "logits/chosen": 3.4843027591705322, + "logits/rejected": 3.697402000427246, + "logps/chosen": -1.2663030624389648, + "logps/rejected": -1.226333498954773, + "loss": 1.1064, + "nll_loss": 1.0294300317764282, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.12663030624389648, + "rewards/margins": -0.003996945917606354, + "rewards/rejected": -0.12263335287570953, + "step": 35 + }, + { + "epoch": 0.0223950233281493, + "grad_norm": 0.19298230111598969, + "learning_rate": 4.82e-05, + "log_odds_chosen": 0.4019961357116699, + "log_odds_ratio": -0.5303748846054077, + "logits/chosen": 2.6493451595306396, + "logits/rejected": 4.199905872344971, + "logps/chosen": -0.8650291562080383, + "logps/rejected": -1.1187388896942139, + "loss": 0.9015, + "nll_loss": 0.8484418392181396, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08650290966033936, + "rewards/margins": 0.025370977818965912, + "rewards/rejected": -0.11187389492988586, + "step": 36 + }, + { + "epoch": 0.023017107309486782, + "grad_norm": 0.2592315673828125, + "learning_rate": 4.815e-05, + "log_odds_chosen": -0.47786006331443787, + "log_odds_ratio": -1.1106913089752197, + "logits/chosen": 1.911085605621338, + "logits/rejected": 3.474100351333618, + "logps/chosen": -1.5157802104949951, + "logps/rejected": -1.1557257175445557, + "loss": 1.0016, + "nll_loss": 0.8905512094497681, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.15157800912857056, + "rewards/margins": -0.03600544109940529, + "rewards/rejected": -0.11557257175445557, + "step": 37 + }, + { + "epoch": 0.02363919129082426, + "grad_norm": 0.22197005152702332, + "learning_rate": 4.8100000000000004e-05, + "log_odds_chosen": -0.24612115323543549, + "log_odds_ratio": -0.8575789928436279, + "logits/chosen": 2.0853166580200195, + "logits/rejected": 2.376945734024048, + "logps/chosen": -1.2783766984939575, + "logps/rejected": -1.0839169025421143, + "loss": 0.8213, + "nll_loss": 0.73553067445755, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.127837672829628, + "rewards/margins": -0.019445981830358505, + "rewards/rejected": -0.10839168727397919, + "step": 38 + }, + { + "epoch": 0.024261275272161743, + "grad_norm": 0.24557054042816162, + "learning_rate": 4.805e-05, + "log_odds_chosen": -0.18656456470489502, + "log_odds_ratio": -0.8466726541519165, + "logits/chosen": 2.951633930206299, + "logits/rejected": 4.386725902557373, + "logps/chosen": -1.0201852321624756, + "logps/rejected": -0.9069477319717407, + "loss": 0.9591, + "nll_loss": 0.8744035959243774, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.10201852768659592, + "rewards/margins": -0.011323747225105762, + "rewards/rejected": -0.09069477766752243, + "step": 39 + }, + { + "epoch": 0.024883359253499222, + "grad_norm": 0.1822604238986969, + "learning_rate": 4.8e-05, + "log_odds_chosen": -0.039638370275497437, + "log_odds_ratio": -0.7252129912376404, + "logits/chosen": 1.7212930917739868, + "logits/rejected": 3.5935091972351074, + "logps/chosen": -1.2547193765640259, + "logps/rejected": -1.2297290563583374, + "loss": 0.8422, + "nll_loss": 0.7696593999862671, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12547193467617035, + "rewards/margins": -0.002499036490917206, + "rewards/rejected": -0.12297290563583374, + "step": 40 + }, + { + "epoch": 0.0255054432348367, + "grad_norm": 0.4303940236568451, + "learning_rate": 4.795e-05, + "log_odds_chosen": -0.5628086924552917, + "log_odds_ratio": -1.1011394262313843, + "logits/chosen": 1.804811954498291, + "logits/rejected": 4.109908580780029, + "logps/chosen": -1.3430132865905762, + "logps/rejected": -0.910142183303833, + "loss": 0.8828, + "nll_loss": 0.772662878036499, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.1343013346195221, + "rewards/margins": -0.043287117034196854, + "rewards/rejected": -0.09101422131061554, + "step": 41 + }, + { + "epoch": 0.026127527216174184, + "grad_norm": 0.22790291905403137, + "learning_rate": 4.79e-05, + "log_odds_chosen": -0.08913028985261917, + "log_odds_ratio": -0.7628570795059204, + "logits/chosen": 1.8077365159988403, + "logits/rejected": 2.038315773010254, + "logps/chosen": -1.3737506866455078, + "logps/rejected": -1.3219356536865234, + "loss": 0.9207, + "nll_loss": 0.8443830013275146, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.13737505674362183, + "rewards/margins": -0.005181487649679184, + "rewards/rejected": -0.13219358026981354, + "step": 42 + }, + { + "epoch": 0.026749611197511663, + "grad_norm": 0.18566672503948212, + "learning_rate": 4.785e-05, + "log_odds_chosen": 0.47161194682121277, + "log_odds_ratio": -0.5659717917442322, + "logits/chosen": 2.2776291370391846, + "logits/rejected": 2.514277458190918, + "logps/chosen": -1.042227029800415, + "logps/rejected": -1.4109022617340088, + "loss": 0.8949, + "nll_loss": 0.8383415937423706, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10422270745038986, + "rewards/margins": 0.036867525428533554, + "rewards/rejected": -0.14109022915363312, + "step": 43 + }, + { + "epoch": 0.027371695178849145, + "grad_norm": 0.35892772674560547, + "learning_rate": 4.78e-05, + "log_odds_chosen": -0.351146399974823, + "log_odds_ratio": -1.1022428274154663, + "logits/chosen": 1.4172916412353516, + "logits/rejected": 3.157374143600464, + "logps/chosen": -1.5696899890899658, + "logps/rejected": -1.1946138143539429, + "loss": 0.9, + "nll_loss": 0.7897965908050537, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.15696901082992554, + "rewards/margins": -0.037507638335227966, + "rewards/rejected": -0.11946137994527817, + "step": 44 + }, + { + "epoch": 0.027993779160186624, + "grad_norm": 0.388747900724411, + "learning_rate": 4.775e-05, + "log_odds_chosen": 0.06426665186882019, + "log_odds_ratio": -0.6822032928466797, + "logits/chosen": 1.552047848701477, + "logits/rejected": 2.8711278438568115, + "logps/chosen": -1.121537208557129, + "logps/rejected": -1.158513069152832, + "loss": 0.8752, + "nll_loss": 0.8070287108421326, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11215372383594513, + "rewards/margins": 0.003697587177157402, + "rewards/rejected": -0.11585131287574768, + "step": 45 + }, + { + "epoch": 0.028615863141524107, + "grad_norm": 0.31807225942611694, + "learning_rate": 4.77e-05, + "log_odds_chosen": -0.01670071855187416, + "log_odds_ratio": -0.7142594456672668, + "logits/chosen": 2.114004135131836, + "logits/rejected": 3.1321046352386475, + "logps/chosen": -1.0298341512680054, + "logps/rejected": -1.018587589263916, + "loss": 0.8789, + "nll_loss": 0.8074406385421753, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10298341512680054, + "rewards/margins": -0.0011246586218476295, + "rewards/rejected": -0.10185875743627548, + "step": 46 + }, + { + "epoch": 0.029237947122861586, + "grad_norm": 0.18275466561317444, + "learning_rate": 4.765e-05, + "log_odds_chosen": -0.062389228492975235, + "log_odds_ratio": -0.7514591217041016, + "logits/chosen": 1.1435599327087402, + "logits/rejected": 2.3499629497528076, + "logps/chosen": -1.070279836654663, + "logps/rejected": -1.0232110023498535, + "loss": 0.7367, + "nll_loss": 0.6615514755249023, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.10702798515558243, + "rewards/margins": -0.004706883803009987, + "rewards/rejected": -0.102321095764637, + "step": 47 + }, + { + "epoch": 0.029860031104199068, + "grad_norm": 0.283637136220932, + "learning_rate": 4.76e-05, + "log_odds_chosen": 0.04298657178878784, + "log_odds_ratio": -0.7060101628303528, + "logits/chosen": 0.9537409543991089, + "logits/rejected": 2.76373291015625, + "logps/chosen": -0.9863978624343872, + "logps/rejected": -0.9677107334136963, + "loss": 0.7097, + "nll_loss": 0.6391026973724365, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09863978624343872, + "rewards/margins": -0.001868714578449726, + "rewards/rejected": -0.09677107632160187, + "step": 48 + }, + { + "epoch": 0.030482115085536547, + "grad_norm": 0.2659762501716614, + "learning_rate": 4.755e-05, + "log_odds_chosen": -0.15763582289218903, + "log_odds_ratio": -0.7842304706573486, + "logits/chosen": 0.8293619751930237, + "logits/rejected": 2.636796712875366, + "logps/chosen": -1.15511953830719, + "logps/rejected": -1.038378119468689, + "loss": 0.7968, + "nll_loss": 0.718331515789032, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.115511953830719, + "rewards/margins": -0.011674145236611366, + "rewards/rejected": -0.10383781045675278, + "step": 49 + }, + { + "epoch": 0.03110419906687403, + "grad_norm": 0.2557560205459595, + "learning_rate": 4.75e-05, + "log_odds_chosen": -0.03294600918889046, + "log_odds_ratio": -0.7847276926040649, + "logits/chosen": 1.1990070343017578, + "logits/rejected": 3.6774067878723145, + "logps/chosen": -1.2008081674575806, + "logps/rejected": -1.1088212728500366, + "loss": 0.8465, + "nll_loss": 0.7680559158325195, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.12008081376552582, + "rewards/margins": -0.009198684245347977, + "rewards/rejected": -0.11088212579488754, + "step": 50 + }, + { + "epoch": 0.031726283048211505, + "grad_norm": 0.21976199746131897, + "learning_rate": 4.745e-05, + "log_odds_chosen": 0.3041134476661682, + "log_odds_ratio": -0.580925464630127, + "logits/chosen": 1.4744720458984375, + "logits/rejected": 3.0478222370147705, + "logps/chosen": -0.8667433261871338, + "logps/rejected": -1.0165106058120728, + "loss": 0.7666, + "nll_loss": 0.7085108757019043, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08667434006929398, + "rewards/margins": 0.01497671939432621, + "rewards/rejected": -0.10165105760097504, + "step": 51 + }, + { + "epoch": 0.03234836702954899, + "grad_norm": 0.2689627707004547, + "learning_rate": 4.74e-05, + "log_odds_chosen": -0.04352305829524994, + "log_odds_ratio": -0.7418375015258789, + "logits/chosen": 2.5920138359069824, + "logits/rejected": 2.2302138805389404, + "logps/chosen": -1.021632432937622, + "logps/rejected": -0.9504865407943726, + "loss": 1.0453, + "nll_loss": 0.9711462259292603, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.10216324031352997, + "rewards/margins": -0.007114590145647526, + "rewards/rejected": -0.09504866600036621, + "step": 52 + }, + { + "epoch": 0.03297045101088647, + "grad_norm": 0.22366289794445038, + "learning_rate": 4.735e-05, + "log_odds_chosen": 0.04003528878092766, + "log_odds_ratio": -0.6888149976730347, + "logits/chosen": 3.182617664337158, + "logits/rejected": 3.555081844329834, + "logps/chosen": -0.9702656269073486, + "logps/rejected": -0.9838634133338928, + "loss": 1.0702, + "nll_loss": 1.0013432502746582, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09702656418085098, + "rewards/margins": 0.001359778456389904, + "rewards/rejected": -0.09838633239269257, + "step": 53 + }, + { + "epoch": 0.03359253499222395, + "grad_norm": 0.38575857877731323, + "learning_rate": 4.73e-05, + "log_odds_chosen": 0.22280624508857727, + "log_odds_ratio": -0.6437617540359497, + "logits/chosen": 1.0312542915344238, + "logits/rejected": 3.3192896842956543, + "logps/chosen": -1.1636738777160645, + "logps/rejected": -1.2852468490600586, + "loss": 0.7405, + "nll_loss": 0.6761186122894287, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1163673847913742, + "rewards/margins": 0.012157305143773556, + "rewards/rejected": -0.12852469086647034, + "step": 54 + }, + { + "epoch": 0.03421461897356143, + "grad_norm": 0.173259437084198, + "learning_rate": 4.7249999999999997e-05, + "log_odds_chosen": 0.047059908509254456, + "log_odds_ratio": -0.7024832963943481, + "logits/chosen": 2.349436044692993, + "logits/rejected": 2.2064785957336426, + "logps/chosen": -1.1089081764221191, + "logps/rejected": -1.118696689605713, + "loss": 0.998, + "nll_loss": 0.9277853965759277, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11089081317186356, + "rewards/margins": 0.0009788572788238525, + "rewards/rejected": -0.11186967045068741, + "step": 55 + }, + { + "epoch": 0.034836702954898914, + "grad_norm": 0.21381795406341553, + "learning_rate": 4.72e-05, + "log_odds_chosen": -0.1794767677783966, + "log_odds_ratio": -0.7933696508407593, + "logits/chosen": 1.327786922454834, + "logits/rejected": 1.7987782955169678, + "logps/chosen": -1.178837537765503, + "logps/rejected": -1.0669177770614624, + "loss": 0.867, + "nll_loss": 0.7876596450805664, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11788375675678253, + "rewards/margins": -0.011191976256668568, + "rewards/rejected": -0.10669177770614624, + "step": 56 + }, + { + "epoch": 0.03545878693623639, + "grad_norm": 0.2719959318637848, + "learning_rate": 4.715e-05, + "log_odds_chosen": -0.30999311804771423, + "log_odds_ratio": -0.9616641402244568, + "logits/chosen": 2.1510074138641357, + "logits/rejected": 3.185220241546631, + "logps/chosen": -1.4096884727478027, + "logps/rejected": -1.1700233221054077, + "loss": 0.9744, + "nll_loss": 0.8782221078872681, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.14096882939338684, + "rewards/margins": -0.02396649867296219, + "rewards/rejected": -0.11700233817100525, + "step": 57 + }, + { + "epoch": 0.03608087091757387, + "grad_norm": 0.24494118988513947, + "learning_rate": 4.71e-05, + "log_odds_chosen": 0.19044572114944458, + "log_odds_ratio": -0.6316022276878357, + "logits/chosen": 2.780532121658325, + "logits/rejected": 3.8303580284118652, + "logps/chosen": -1.0032345056533813, + "logps/rejected": -1.114229679107666, + "loss": 0.9724, + "nll_loss": 0.9092568755149841, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.10032345354557037, + "rewards/margins": 0.011099515482783318, + "rewards/rejected": -0.11142296344041824, + "step": 58 + }, + { + "epoch": 0.03670295489891135, + "grad_norm": 0.23869889974594116, + "learning_rate": 4.705e-05, + "log_odds_chosen": 0.14286461472511292, + "log_odds_ratio": -0.6479381322860718, + "logits/chosen": 1.359169363975525, + "logits/rejected": 2.286386013031006, + "logps/chosen": -1.03233003616333, + "logps/rejected": -1.1210219860076904, + "loss": 0.7819, + "nll_loss": 0.7171339988708496, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10323300957679749, + "rewards/margins": 0.00886919628828764, + "rewards/rejected": -0.1121022030711174, + "step": 59 + }, + { + "epoch": 0.03732503888024884, + "grad_norm": 0.40756744146347046, + "learning_rate": 4.7e-05, + "log_odds_chosen": 0.5899947285652161, + "log_odds_ratio": -0.5545991659164429, + "logits/chosen": 2.521620273590088, + "logits/rejected": 3.599379062652588, + "logps/chosen": -0.8197504281997681, + "logps/rejected": -1.0699812173843384, + "loss": 0.9495, + "nll_loss": 0.8940035104751587, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0819750428199768, + "rewards/margins": 0.025023072957992554, + "rewards/rejected": -0.10699811577796936, + "step": 60 + }, + { + "epoch": 0.037947122861586316, + "grad_norm": 0.40065282583236694, + "learning_rate": 4.695e-05, + "log_odds_chosen": 0.022990047931671143, + "log_odds_ratio": -0.7364428639411926, + "logits/chosen": 0.8582637906074524, + "logits/rejected": 2.706080436706543, + "logps/chosen": -1.1528658866882324, + "logps/rejected": -1.143157958984375, + "loss": 0.7133, + "nll_loss": 0.6396671533584595, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11528658866882324, + "rewards/margins": -0.0009707985445857048, + "rewards/rejected": -0.11431579291820526, + "step": 61 + }, + { + "epoch": 0.038569206842923795, + "grad_norm": 0.22253791987895966, + "learning_rate": 4.69e-05, + "log_odds_chosen": 0.4593227803707123, + "log_odds_ratio": -0.5568712949752808, + "logits/chosen": 0.7542824745178223, + "logits/rejected": 0.9769484400749207, + "logps/chosen": -0.932292103767395, + "logps/rejected": -1.1556060314178467, + "loss": 0.6361, + "nll_loss": 0.5803820490837097, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09322921931743622, + "rewards/margins": 0.022331394255161285, + "rewards/rejected": -0.1155606061220169, + "step": 62 + }, + { + "epoch": 0.039191290824261274, + "grad_norm": 0.4563582241535187, + "learning_rate": 4.685000000000001e-05, + "log_odds_chosen": 0.05771663784980774, + "log_odds_ratio": -0.7888731956481934, + "logits/chosen": 1.4716992378234863, + "logits/rejected": 3.1634292602539062, + "logps/chosen": -1.1427593231201172, + "logps/rejected": -1.0027639865875244, + "loss": 0.8393, + "nll_loss": 0.7604351043701172, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11427594721317291, + "rewards/margins": -0.013999542221426964, + "rewards/rejected": -0.1002763956785202, + "step": 63 + }, + { + "epoch": 0.03981337480559875, + "grad_norm": 0.5556821823120117, + "learning_rate": 4.6800000000000006e-05, + "log_odds_chosen": -0.21914222836494446, + "log_odds_ratio": -0.8504500389099121, + "logits/chosen": 1.2280817031860352, + "logits/rejected": 3.1541242599487305, + "logps/chosen": -1.2485235929489136, + "logps/rejected": -1.1014806032180786, + "loss": 0.7758, + "nll_loss": 0.6907318830490112, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.12485235929489136, + "rewards/margins": -0.014704296365380287, + "rewards/rejected": -0.11014805734157562, + "step": 64 + }, + { + "epoch": 0.04043545878693624, + "grad_norm": 0.22284820675849915, + "learning_rate": 4.6750000000000005e-05, + "log_odds_chosen": -0.11895395815372467, + "log_odds_ratio": -0.7598588466644287, + "logits/chosen": 1.8724644184112549, + "logits/rejected": 2.6201133728027344, + "logps/chosen": -1.231781244277954, + "logps/rejected": -1.1473100185394287, + "loss": 0.8682, + "nll_loss": 0.7921833992004395, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.12317812442779541, + "rewards/margins": -0.008447128348052502, + "rewards/rejected": -0.11473099887371063, + "step": 65 + }, + { + "epoch": 0.04105754276827372, + "grad_norm": 0.25803834199905396, + "learning_rate": 4.6700000000000003e-05, + "log_odds_chosen": -0.09246137738227844, + "log_odds_ratio": -0.7877139449119568, + "logits/chosen": 0.9774875640869141, + "logits/rejected": 2.8366801738739014, + "logps/chosen": -1.0853774547576904, + "logps/rejected": -1.0088139772415161, + "loss": 0.7878, + "nll_loss": 0.7090435028076172, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.10853774100542068, + "rewards/margins": -0.007656336762011051, + "rewards/rejected": -0.10088139772415161, + "step": 66 + }, + { + "epoch": 0.0416796267496112, + "grad_norm": 0.15937106311321259, + "learning_rate": 4.665e-05, + "log_odds_chosen": -0.2035675346851349, + "log_odds_ratio": -0.8312056064605713, + "logits/chosen": 1.3091591596603394, + "logits/rejected": 2.8286824226379395, + "logps/chosen": -1.1093143224716187, + "logps/rejected": -0.9541065096855164, + "loss": 0.7537, + "nll_loss": 0.6705965399742126, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11093142628669739, + "rewards/margins": -0.015520783141255379, + "rewards/rejected": -0.09541065990924835, + "step": 67 + }, + { + "epoch": 0.042301710730948676, + "grad_norm": 0.25136131048202515, + "learning_rate": 4.660000000000001e-05, + "log_odds_chosen": -0.14508166909217834, + "log_odds_ratio": -0.8028329014778137, + "logits/chosen": 1.2004531621932983, + "logits/rejected": 2.2064342498779297, + "logps/chosen": -1.021368145942688, + "logps/rejected": -0.9574182629585266, + "loss": 0.6492, + "nll_loss": 0.5688824653625488, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10213680565357208, + "rewards/margins": -0.006394978612661362, + "rewards/rejected": -0.09574183076620102, + "step": 68 + }, + { + "epoch": 0.04292379471228616, + "grad_norm": 0.18070419132709503, + "learning_rate": 4.655000000000001e-05, + "log_odds_chosen": 0.4045894742012024, + "log_odds_ratio": -0.6035602688789368, + "logits/chosen": 2.1579999923706055, + "logits/rejected": 2.2763824462890625, + "logps/chosen": -0.9550964832305908, + "logps/rejected": -1.1929335594177246, + "loss": 0.8618, + "nll_loss": 0.8014136552810669, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09550965577363968, + "rewards/margins": 0.02378370799124241, + "rewards/rejected": -0.11929336190223694, + "step": 69 + }, + { + "epoch": 0.04354587869362364, + "grad_norm": 0.5423634052276611, + "learning_rate": 4.6500000000000005e-05, + "log_odds_chosen": -0.19840699434280396, + "log_odds_ratio": -0.8418025374412537, + "logits/chosen": 2.095249652862549, + "logits/rejected": 2.98868465423584, + "logps/chosen": -0.9458328485488892, + "logps/rejected": -0.8142030239105225, + "loss": 0.9194, + "nll_loss": 0.8351881504058838, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.09458328783512115, + "rewards/margins": -0.013162979856133461, + "rewards/rejected": -0.08142030984163284, + "step": 70 + }, + { + "epoch": 0.04416796267496112, + "grad_norm": 0.2877205014228821, + "learning_rate": 4.6450000000000004e-05, + "log_odds_chosen": 0.0428520105779171, + "log_odds_ratio": -0.7418846487998962, + "logits/chosen": 0.8320183157920837, + "logits/rejected": 2.5365350246429443, + "logps/chosen": -0.9517344236373901, + "logps/rejected": -0.9330209493637085, + "loss": 0.7129, + "nll_loss": 0.6387526392936707, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0951734408736229, + "rewards/margins": -0.0018713511526584625, + "rewards/rejected": -0.09330209344625473, + "step": 71 + }, + { + "epoch": 0.0447900466562986, + "grad_norm": 0.22278496623039246, + "learning_rate": 4.64e-05, + "log_odds_chosen": -0.054201096296310425, + "log_odds_ratio": -0.7669423818588257, + "logits/chosen": 2.898837089538574, + "logits/rejected": 3.8671226501464844, + "logps/chosen": -1.125044345855713, + "logps/rejected": -1.1063265800476074, + "loss": 0.9359, + "nll_loss": 0.8592216372489929, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11250443756580353, + "rewards/margins": -0.0018717655912041664, + "rewards/rejected": -0.11063267290592194, + "step": 72 + }, + { + "epoch": 0.04541213063763608, + "grad_norm": 0.2126590758562088, + "learning_rate": 4.635e-05, + "log_odds_chosen": -0.2587714195251465, + "log_odds_ratio": -0.891248345375061, + "logits/chosen": 1.3153867721557617, + "logits/rejected": 3.1547749042510986, + "logps/chosen": -1.4425960779190063, + "logps/rejected": -1.1883153915405273, + "loss": 0.8521, + "nll_loss": 0.7630225419998169, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.14425960183143616, + "rewards/margins": -0.025428064167499542, + "rewards/rejected": -0.11883153021335602, + "step": 73 + }, + { + "epoch": 0.046034214618973564, + "grad_norm": 0.42494454979896545, + "learning_rate": 4.630000000000001e-05, + "log_odds_chosen": 0.1402682065963745, + "log_odds_ratio": -0.6622962951660156, + "logits/chosen": 1.657547950744629, + "logits/rejected": 1.9613828659057617, + "logps/chosen": -0.766802191734314, + "logps/rejected": -0.799024224281311, + "loss": 0.7761, + "nll_loss": 0.7098743915557861, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.07668022811412811, + "rewards/margins": 0.0032222005538642406, + "rewards/rejected": -0.07990242540836334, + "step": 74 + }, + { + "epoch": 0.04665629860031104, + "grad_norm": 0.17336860299110413, + "learning_rate": 4.6250000000000006e-05, + "log_odds_chosen": 0.1157303899526596, + "log_odds_ratio": -0.6770206689834595, + "logits/chosen": 2.7488303184509277, + "logits/rejected": 2.7461235523223877, + "logps/chosen": -0.833381175994873, + "logps/rejected": -0.8952413201332092, + "loss": 0.865, + "nll_loss": 0.7972859144210815, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.08333811908960342, + "rewards/margins": 0.006186014041304588, + "rewards/rejected": -0.08952413499355316, + "step": 75 + }, + { + "epoch": 0.04727838258164852, + "grad_norm": 0.18598797917366028, + "learning_rate": 4.6200000000000005e-05, + "log_odds_chosen": -0.043106935918331146, + "log_odds_ratio": -0.7398505210876465, + "logits/chosen": 1.1851791143417358, + "logits/rejected": 2.6704442501068115, + "logps/chosen": -1.0668216943740845, + "logps/rejected": -1.033825159072876, + "loss": 0.7596, + "nll_loss": 0.6856452226638794, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1066821739077568, + "rewards/margins": -0.003299661912024021, + "rewards/rejected": -0.10338252037763596, + "step": 76 + }, + { + "epoch": 0.047900466562986, + "grad_norm": 0.1425790637731552, + "learning_rate": 4.6150000000000004e-05, + "log_odds_chosen": -0.12183598428964615, + "log_odds_ratio": -0.7798743844032288, + "logits/chosen": 0.8571373224258423, + "logits/rejected": 2.4758567810058594, + "logps/chosen": -1.1691839694976807, + "logps/rejected": -1.0594091415405273, + "loss": 0.6561, + "nll_loss": 0.5780637860298157, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1169183999300003, + "rewards/margins": -0.010977484285831451, + "rewards/rejected": -0.10594092309474945, + "step": 77 + }, + { + "epoch": 0.04852255054432349, + "grad_norm": 0.23847784101963043, + "learning_rate": 4.61e-05, + "log_odds_chosen": -0.0839817151427269, + "log_odds_ratio": -0.7639693021774292, + "logits/chosen": 3.320763349533081, + "logits/rejected": 4.665729522705078, + "logps/chosen": -0.9754164814949036, + "logps/rejected": -0.9289852380752563, + "loss": 0.9986, + "nll_loss": 0.9221831560134888, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.09754165261983871, + "rewards/margins": -0.004643128253519535, + "rewards/rejected": -0.09289852529764175, + "step": 78 + }, + { + "epoch": 0.049144634525660966, + "grad_norm": 0.2742615342140198, + "learning_rate": 4.605e-05, + "log_odds_chosen": 0.17970319092273712, + "log_odds_ratio": -0.6801419854164124, + "logits/chosen": 2.0109176635742188, + "logits/rejected": 3.4676363468170166, + "logps/chosen": -1.0586036443710327, + "logps/rejected": -1.1820811033248901, + "loss": 0.8246, + "nll_loss": 0.7566049695014954, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1058603823184967, + "rewards/margins": 0.012347733601927757, + "rewards/rejected": -0.11820811033248901, + "step": 79 + }, + { + "epoch": 0.049766718506998445, + "grad_norm": 0.2062321901321411, + "learning_rate": 4.600000000000001e-05, + "log_odds_chosen": 0.0320289321243763, + "log_odds_ratio": -0.6907176971435547, + "logits/chosen": 1.7227623462677002, + "logits/rejected": 2.875901937484741, + "logps/chosen": -1.0828742980957031, + "logps/rejected": -1.0998992919921875, + "loss": 0.8051, + "nll_loss": 0.7360435128211975, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10828742384910583, + "rewards/margins": 0.0017025060951709747, + "rewards/rejected": -0.1099899411201477, + "step": 80 + }, + { + "epoch": 0.050388802488335924, + "grad_norm": 0.2916569113731384, + "learning_rate": 4.5950000000000006e-05, + "log_odds_chosen": -0.3863591253757477, + "log_odds_ratio": -0.9683763384819031, + "logits/chosen": 1.3633993864059448, + "logits/rejected": 2.970306158065796, + "logps/chosen": -1.1555265188217163, + "logps/rejected": -0.8885267376899719, + "loss": 0.7104, + "nll_loss": 0.6135969161987305, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11555266380310059, + "rewards/margins": -0.0266999751329422, + "rewards/rejected": -0.08885267376899719, + "step": 81 + }, + { + "epoch": 0.0510108864696734, + "grad_norm": 0.19114866852760315, + "learning_rate": 4.5900000000000004e-05, + "log_odds_chosen": 0.09835843741893768, + "log_odds_ratio": -0.6535273194313049, + "logits/chosen": 1.593051791191101, + "logits/rejected": 2.7270560264587402, + "logps/chosen": -0.9967777729034424, + "logps/rejected": -1.0528018474578857, + "loss": 0.7754, + "nll_loss": 0.7100351452827454, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09967778623104095, + "rewards/margins": 0.005602394696325064, + "rewards/rejected": -0.10528017580509186, + "step": 82 + }, + { + "epoch": 0.05163297045101089, + "grad_norm": 0.21949782967567444, + "learning_rate": 4.585e-05, + "log_odds_chosen": 0.2077902853488922, + "log_odds_ratio": -0.6175616979598999, + "logits/chosen": 2.0219221115112305, + "logits/rejected": 3.6574249267578125, + "logps/chosen": -0.9124993085861206, + "logps/rejected": -1.0192480087280273, + "loss": 0.8179, + "nll_loss": 0.7561153769493103, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09124993532896042, + "rewards/margins": 0.010674861259758472, + "rewards/rejected": -0.10192479193210602, + "step": 83 + }, + { + "epoch": 0.05225505443234837, + "grad_norm": 0.17727652192115784, + "learning_rate": 4.58e-05, + "log_odds_chosen": 0.38519418239593506, + "log_odds_ratio": -0.5557951331138611, + "logits/chosen": 0.8552349209785461, + "logits/rejected": 2.699705123901367, + "logps/chosen": -0.8921289443969727, + "logps/rejected": -1.1134952306747437, + "loss": 0.6496, + "nll_loss": 0.5940346121788025, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08921289443969727, + "rewards/margins": 0.022136623039841652, + "rewards/rejected": -0.11134952306747437, + "step": 84 + }, + { + "epoch": 0.05287713841368585, + "grad_norm": 0.2293669730424881, + "learning_rate": 4.575e-05, + "log_odds_chosen": -0.21045897901058197, + "log_odds_ratio": -0.8564555048942566, + "logits/chosen": 1.66201913356781, + "logits/rejected": 2.8478963375091553, + "logps/chosen": -1.3357698917388916, + "logps/rejected": -1.1636067628860474, + "loss": 0.8297, + "nll_loss": 0.7440907955169678, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13357700407505035, + "rewards/margins": -0.01721632108092308, + "rewards/rejected": -0.11636068671941757, + "step": 85 + }, + { + "epoch": 0.053499222395023326, + "grad_norm": 0.29000750184059143, + "learning_rate": 4.5700000000000006e-05, + "log_odds_chosen": -0.1497875154018402, + "log_odds_ratio": -0.7847409248352051, + "logits/chosen": 2.6281542778015137, + "logits/rejected": 3.4142749309539795, + "logps/chosen": -1.2299106121063232, + "logps/rejected": -1.1186976432800293, + "loss": 1.0464, + "nll_loss": 0.9679691195487976, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12299105525016785, + "rewards/margins": -0.011121302843093872, + "rewards/rejected": -0.11186975985765457, + "step": 86 + }, + { + "epoch": 0.05412130637636081, + "grad_norm": 0.26299771666526794, + "learning_rate": 4.5650000000000005e-05, + "log_odds_chosen": 0.5908235311508179, + "log_odds_ratio": -0.49592533707618713, + "logits/chosen": 1.5190047025680542, + "logits/rejected": 2.7360827922821045, + "logps/chosen": -0.7108929753303528, + "logps/rejected": -0.9945129752159119, + "loss": 0.7238, + "nll_loss": 0.6742295026779175, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07108929753303528, + "rewards/margins": 0.02836199663579464, + "rewards/rejected": -0.09945128858089447, + "step": 87 + }, + { + "epoch": 0.05474339035769829, + "grad_norm": 0.19910745322704315, + "learning_rate": 4.5600000000000004e-05, + "log_odds_chosen": -0.4192565679550171, + "log_odds_ratio": -0.9752273559570312, + "logits/chosen": 0.8375768065452576, + "logits/rejected": 1.8594812154769897, + "logps/chosen": -1.1285021305084229, + "logps/rejected": -0.8286036252975464, + "loss": 0.7274, + "nll_loss": 0.6299166679382324, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11285021156072617, + "rewards/margins": -0.029989851638674736, + "rewards/rejected": -0.08286036550998688, + "step": 88 + }, + { + "epoch": 0.05536547433903577, + "grad_norm": 0.43857982754707336, + "learning_rate": 4.555e-05, + "log_odds_chosen": -0.48293331265449524, + "log_odds_ratio": -1.1548466682434082, + "logits/chosen": 1.518347978591919, + "logits/rejected": 2.750607490539551, + "logps/chosen": -1.5658156871795654, + "logps/rejected": -1.0724000930786133, + "loss": 0.8034, + "nll_loss": 0.6879459619522095, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1565815806388855, + "rewards/margins": -0.049341559410095215, + "rewards/rejected": -0.10724002122879028, + "step": 89 + }, + { + "epoch": 0.05598755832037325, + "grad_norm": 0.26479724049568176, + "learning_rate": 4.55e-05, + "log_odds_chosen": 0.23310042917728424, + "log_odds_ratio": -0.6027222275733948, + "logits/chosen": 1.3731447458267212, + "logits/rejected": 2.6510982513427734, + "logps/chosen": -1.0542906522750854, + "logps/rejected": -1.2222278118133545, + "loss": 0.7583, + "nll_loss": 0.6980407238006592, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10542906820774078, + "rewards/margins": 0.016793712973594666, + "rewards/rejected": -0.12222278118133545, + "step": 90 + }, + { + "epoch": 0.05660964230171073, + "grad_norm": 0.24054040014743805, + "learning_rate": 4.545000000000001e-05, + "log_odds_chosen": 0.2605292499065399, + "log_odds_ratio": -0.6213144063949585, + "logits/chosen": 1.1814061403274536, + "logits/rejected": 2.075345277786255, + "logps/chosen": -1.1208800077438354, + "logps/rejected": -1.2274812459945679, + "loss": 0.7286, + "nll_loss": 0.6664672493934631, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11208800971508026, + "rewards/margins": 0.010660117492079735, + "rewards/rejected": -0.12274813652038574, + "step": 91 + }, + { + "epoch": 0.05723172628304821, + "grad_norm": 0.39278140664100647, + "learning_rate": 4.5400000000000006e-05, + "log_odds_chosen": -0.26259005069732666, + "log_odds_ratio": -0.8627680540084839, + "logits/chosen": 1.247246503829956, + "logits/rejected": 2.8098278045654297, + "logps/chosen": -1.082601547241211, + "logps/rejected": -0.9376986622810364, + "loss": 0.8206, + "nll_loss": 0.7343120574951172, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.1082601547241211, + "rewards/margins": -0.014490286819636822, + "rewards/rejected": -0.09376987814903259, + "step": 92 + }, + { + "epoch": 0.05785381026438569, + "grad_norm": 0.1920693963766098, + "learning_rate": 4.5350000000000005e-05, + "log_odds_chosen": 0.21707671880722046, + "log_odds_ratio": -0.6184121370315552, + "logits/chosen": 2.694953441619873, + "logits/rejected": 3.1188573837280273, + "logps/chosen": -0.831054151058197, + "logps/rejected": -0.9499989151954651, + "loss": 0.9065, + "nll_loss": 0.8446973562240601, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0831054076552391, + "rewards/margins": 0.011894481256604195, + "rewards/rejected": -0.09499989449977875, + "step": 93 + }, + { + "epoch": 0.05847589424572317, + "grad_norm": 0.21006019413471222, + "learning_rate": 4.53e-05, + "log_odds_chosen": 0.05670143663883209, + "log_odds_ratio": -0.6701164245605469, + "logits/chosen": 1.0237641334533691, + "logits/rejected": 2.528775215148926, + "logps/chosen": -1.1318689584732056, + "logps/rejected": -1.1793004274368286, + "loss": 0.7016, + "nll_loss": 0.6345678567886353, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11318689584732056, + "rewards/margins": 0.004743154160678387, + "rewards/rejected": -0.11793004721403122, + "step": 94 + }, + { + "epoch": 0.05909797822706065, + "grad_norm": 0.2606309950351715, + "learning_rate": 4.525e-05, + "log_odds_chosen": -0.46452876925468445, + "log_odds_ratio": -1.0579557418823242, + "logits/chosen": 0.17954808473587036, + "logits/rejected": 2.247058391571045, + "logps/chosen": -1.6242289543151855, + "logps/rejected": -1.2107869386672974, + "loss": 0.6754, + "nll_loss": 0.5696069598197937, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.16242289543151855, + "rewards/margins": -0.04134419932961464, + "rewards/rejected": -0.12107868492603302, + "step": 95 + }, + { + "epoch": 0.059720062208398136, + "grad_norm": 0.2298119217157364, + "learning_rate": 4.52e-05, + "log_odds_chosen": 0.015163015574216843, + "log_odds_ratio": -0.6908687353134155, + "logits/chosen": 2.02345871925354, + "logits/rejected": 3.0907723903656006, + "logps/chosen": -1.0270793437957764, + "logps/rejected": -1.0349122285842896, + "loss": 0.7404, + "nll_loss": 0.6712822318077087, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10270794481039047, + "rewards/margins": 0.0007832786068320274, + "rewards/rejected": -0.10349121689796448, + "step": 96 + }, + { + "epoch": 0.060342146189735615, + "grad_norm": 0.4934215247631073, + "learning_rate": 4.5150000000000006e-05, + "log_odds_chosen": -0.43015801906585693, + "log_odds_ratio": -0.9842768311500549, + "logits/chosen": 2.0014078617095947, + "logits/rejected": 3.313342809677124, + "logps/chosen": -1.313707709312439, + "logps/rejected": -0.9843780994415283, + "loss": 0.8658, + "nll_loss": 0.7673825621604919, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.13137076795101166, + "rewards/margins": -0.032932963222265244, + "rewards/rejected": -0.09843780100345612, + "step": 97 + }, + { + "epoch": 0.060964230171073094, + "grad_norm": 0.20960745215415955, + "learning_rate": 4.5100000000000005e-05, + "log_odds_chosen": 0.02951214462518692, + "log_odds_ratio": -0.774150550365448, + "logits/chosen": 1.9913530349731445, + "logits/rejected": 2.7125914096832275, + "logps/chosen": -1.133882999420166, + "logps/rejected": -1.1040964126586914, + "loss": 0.9025, + "nll_loss": 0.8251147866249084, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1133883148431778, + "rewards/margins": -0.002978656440973282, + "rewards/rejected": -0.11040964722633362, + "step": 98 + }, + { + "epoch": 0.06158631415241057, + "grad_norm": 0.21473316848278046, + "learning_rate": 4.5050000000000004e-05, + "log_odds_chosen": -0.014744851738214493, + "log_odds_ratio": -0.7248890399932861, + "logits/chosen": 2.0002171993255615, + "logits/rejected": 2.113471269607544, + "logps/chosen": -0.9717570543289185, + "logps/rejected": -0.9748814105987549, + "loss": 0.7985, + "nll_loss": 0.7260515093803406, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0971757099032402, + "rewards/margins": 0.0003124335780739784, + "rewards/rejected": -0.09748813509941101, + "step": 99 + }, + { + "epoch": 0.06220839813374806, + "grad_norm": 0.21056877076625824, + "learning_rate": 4.5e-05, + "log_odds_chosen": -0.039973024278879166, + "log_odds_ratio": -0.7171774506568909, + "logits/chosen": 2.3132238388061523, + "logits/rejected": 2.871912717819214, + "logps/chosen": -1.0969483852386475, + "logps/rejected": -1.065551996231079, + "loss": 0.8489, + "nll_loss": 0.7771739959716797, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10969482362270355, + "rewards/margins": -0.0031396341510117054, + "rewards/rejected": -0.10655518621206284, + "step": 100 + }, + { + "epoch": 0.06283048211508553, + "grad_norm": 0.17404352128505707, + "learning_rate": 4.495e-05, + "log_odds_chosen": 0.03608079254627228, + "log_odds_ratio": -0.7069602608680725, + "logits/chosen": 1.436377763748169, + "logits/rejected": 2.1634533405303955, + "logps/chosen": -0.9186925292015076, + "logps/rejected": -0.8996652960777283, + "loss": 0.6925, + "nll_loss": 0.621782124042511, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09186924993991852, + "rewards/margins": -0.0019027264788746834, + "rewards/rejected": -0.08996652066707611, + "step": 101 + }, + { + "epoch": 0.06345256609642301, + "grad_norm": 0.3078821301460266, + "learning_rate": 4.49e-05, + "log_odds_chosen": -0.12548185884952545, + "log_odds_ratio": -0.8313822746276855, + "logits/chosen": 0.9674258232116699, + "logits/rejected": 3.7763495445251465, + "logps/chosen": -1.282372236251831, + "logps/rejected": -1.1455416679382324, + "loss": 0.7181, + "nll_loss": 0.6350088715553284, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.12823721766471863, + "rewards/margins": -0.013683034107089043, + "rewards/rejected": -0.11455416679382324, + "step": 102 + }, + { + "epoch": 0.0640746500777605, + "grad_norm": 0.2155919075012207, + "learning_rate": 4.4850000000000006e-05, + "log_odds_chosen": 0.16065430641174316, + "log_odds_ratio": -0.6278191804885864, + "logits/chosen": 2.0724029541015625, + "logits/rejected": 3.3654284477233887, + "logps/chosen": -1.0031177997589111, + "logps/rejected": -1.0878639221191406, + "loss": 0.78, + "nll_loss": 0.7172412276268005, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.100311778485775, + "rewards/margins": 0.00847461074590683, + "rewards/rejected": -0.10878638923168182, + "step": 103 + }, + { + "epoch": 0.06469673405909798, + "grad_norm": 0.26061123609542847, + "learning_rate": 4.4800000000000005e-05, + "log_odds_chosen": -0.028148103505373, + "log_odds_ratio": -0.7147566080093384, + "logits/chosen": 1.7463017702102661, + "logits/rejected": 2.0959701538085938, + "logps/chosen": -0.9808987379074097, + "logps/rejected": -0.9746424555778503, + "loss": 0.824, + "nll_loss": 0.7525503039360046, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09808988124132156, + "rewards/margins": -0.000625628512352705, + "rewards/rejected": -0.09746424853801727, + "step": 104 + }, + { + "epoch": 0.06531881804043546, + "grad_norm": 0.4242609739303589, + "learning_rate": 4.4750000000000004e-05, + "log_odds_chosen": -0.15638473629951477, + "log_odds_ratio": -0.8672036528587341, + "logits/chosen": 1.6652724742889404, + "logits/rejected": 1.994666576385498, + "logps/chosen": -1.2177703380584717, + "logps/rejected": -1.057265043258667, + "loss": 0.6882, + "nll_loss": 0.6014548540115356, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12177702784538269, + "rewards/margins": -0.016050517559051514, + "rewards/rejected": -0.10572651028633118, + "step": 105 + }, + { + "epoch": 0.06594090202177294, + "grad_norm": 0.3692471385002136, + "learning_rate": 4.47e-05, + "log_odds_chosen": 0.24592670798301697, + "log_odds_ratio": -0.793420672416687, + "logits/chosen": 2.528710126876831, + "logits/rejected": 3.5555386543273926, + "logps/chosen": -1.0099613666534424, + "logps/rejected": -0.9727597236633301, + "loss": 0.9496, + "nll_loss": 0.870262622833252, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10099614411592484, + "rewards/margins": -0.0037201670929789543, + "rewards/rejected": -0.09727597236633301, + "step": 106 + }, + { + "epoch": 0.06656298600311042, + "grad_norm": 0.1919865608215332, + "learning_rate": 4.465e-05, + "log_odds_chosen": 0.038264740258455276, + "log_odds_ratio": -0.7012819647789001, + "logits/chosen": 0.6481499671936035, + "logits/rejected": 1.7020373344421387, + "logps/chosen": -1.1631755828857422, + "logps/rejected": -1.2091020345687866, + "loss": 0.7014, + "nll_loss": 0.6312416195869446, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11631755530834198, + "rewards/margins": 0.004592650569975376, + "rewards/rejected": -0.12091021239757538, + "step": 107 + }, + { + "epoch": 0.0671850699844479, + "grad_norm": 0.2411794811487198, + "learning_rate": 4.46e-05, + "log_odds_chosen": -0.03066416084766388, + "log_odds_ratio": -0.7151470184326172, + "logits/chosen": 1.6738927364349365, + "logits/rejected": 2.311450481414795, + "logps/chosen": -1.1736071109771729, + "logps/rejected": -1.1376099586486816, + "loss": 0.8258, + "nll_loss": 0.7543072700500488, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11736071854829788, + "rewards/margins": -0.0035997098311781883, + "rewards/rejected": -0.11376100778579712, + "step": 108 + }, + { + "epoch": 0.06780715396578538, + "grad_norm": 0.19159743189811707, + "learning_rate": 4.4550000000000005e-05, + "log_odds_chosen": 0.06242550164461136, + "log_odds_ratio": -0.6757259964942932, + "logits/chosen": 2.0136990547180176, + "logits/rejected": 1.885206699371338, + "logps/chosen": -1.0233063697814941, + "logps/rejected": -1.0701019763946533, + "loss": 0.7497, + "nll_loss": 0.6821112036705017, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10233063995838165, + "rewards/margins": 0.004679564386606216, + "rewards/rejected": -0.10701019316911697, + "step": 109 + }, + { + "epoch": 0.06842923794712286, + "grad_norm": 0.23534443974494934, + "learning_rate": 4.4500000000000004e-05, + "log_odds_chosen": 0.31563812494277954, + "log_odds_ratio": -0.5620191097259521, + "logits/chosen": 2.677371025085449, + "logits/rejected": 3.858182191848755, + "logps/chosen": -0.9001976251602173, + "logps/rejected": -1.078892469406128, + "loss": 0.848, + "nll_loss": 0.7917859554290771, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09001976251602173, + "rewards/margins": 0.017869489267468452, + "rewards/rejected": -0.10788924992084503, + "step": 110 + }, + { + "epoch": 0.06905132192846034, + "grad_norm": 0.23337529599666595, + "learning_rate": 4.445e-05, + "log_odds_chosen": -0.18345850706100464, + "log_odds_ratio": -0.8887477517127991, + "logits/chosen": 1.6702814102172852, + "logits/rejected": 3.8112714290618896, + "logps/chosen": -1.1495945453643799, + "logps/rejected": -0.9585939645767212, + "loss": 0.7497, + "nll_loss": 0.6608361005783081, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11495945602655411, + "rewards/margins": -0.019100071862339973, + "rewards/rejected": -0.09585939347743988, + "step": 111 + }, + { + "epoch": 0.06967340590979783, + "grad_norm": 0.20986708998680115, + "learning_rate": 4.44e-05, + "log_odds_chosen": 0.09954327344894409, + "log_odds_ratio": -0.6632246971130371, + "logits/chosen": 1.9254231452941895, + "logits/rejected": 2.9965901374816895, + "logps/chosen": -0.9920103549957275, + "logps/rejected": -1.0455927848815918, + "loss": 0.7958, + "nll_loss": 0.7294626235961914, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09920103847980499, + "rewards/margins": 0.005358239635825157, + "rewards/rejected": -0.1045592799782753, + "step": 112 + }, + { + "epoch": 0.07029548989113531, + "grad_norm": 0.2572464346885681, + "learning_rate": 4.435e-05, + "log_odds_chosen": -0.011964142322540283, + "log_odds_ratio": -0.7803436517715454, + "logits/chosen": -0.40443724393844604, + "logits/rejected": 1.6792998313903809, + "logps/chosen": -1.0715501308441162, + "logps/rejected": -0.9998044967651367, + "loss": 0.5049, + "nll_loss": 0.42683103680610657, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.10715501755475998, + "rewards/margins": -0.007174567319452763, + "rewards/rejected": -0.09998045116662979, + "step": 113 + }, + { + "epoch": 0.07091757387247279, + "grad_norm": 0.22227022051811218, + "learning_rate": 4.43e-05, + "log_odds_chosen": 0.5500046014785767, + "log_odds_ratio": -0.5628729462623596, + "logits/chosen": 2.092911958694458, + "logits/rejected": 2.7500369548797607, + "logps/chosen": -0.6779322028160095, + "logps/rejected": -0.9072044491767883, + "loss": 0.7892, + "nll_loss": 0.7329133749008179, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06779322028160095, + "rewards/margins": 0.02292722463607788, + "rewards/rejected": -0.09072044491767883, + "step": 114 + }, + { + "epoch": 0.07153965785381027, + "grad_norm": 0.2829152047634125, + "learning_rate": 4.4250000000000005e-05, + "log_odds_chosen": -0.08828212320804596, + "log_odds_ratio": -0.7812116146087646, + "logits/chosen": 0.9317520260810852, + "logits/rejected": 2.114894390106201, + "logps/chosen": -1.2550923824310303, + "logps/rejected": -1.182921290397644, + "loss": 0.6555, + "nll_loss": 0.5774081945419312, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12550924718379974, + "rewards/margins": -0.007217114791274071, + "rewards/rejected": -0.11829212307929993, + "step": 115 + }, + { + "epoch": 0.07216174183514774, + "grad_norm": 0.3006725013256073, + "learning_rate": 4.4200000000000004e-05, + "log_odds_chosen": 0.020487122237682343, + "log_odds_ratio": -0.749638557434082, + "logits/chosen": 1.9692126512527466, + "logits/rejected": 2.8845958709716797, + "logps/chosen": -1.1205189228057861, + "logps/rejected": -1.1407134532928467, + "loss": 0.8778, + "nll_loss": 0.8028479814529419, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11205189675092697, + "rewards/margins": 0.0020194537937641144, + "rewards/rejected": -0.11407135426998138, + "step": 116 + }, + { + "epoch": 0.07278382581648522, + "grad_norm": 0.2044357806444168, + "learning_rate": 4.415e-05, + "log_odds_chosen": 0.0019652023911476135, + "log_odds_ratio": -0.7550162076950073, + "logits/chosen": 1.0696661472320557, + "logits/rejected": 2.325007677078247, + "logps/chosen": -1.0620698928833008, + "logps/rejected": -1.0414937734603882, + "loss": 0.7055, + "nll_loss": 0.6299588680267334, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1062069982290268, + "rewards/margins": -0.0020576175302267075, + "rewards/rejected": -0.10414938628673553, + "step": 117 + }, + { + "epoch": 0.0734059097978227, + "grad_norm": 0.24016836285591125, + "learning_rate": 4.41e-05, + "log_odds_chosen": -0.1463804543018341, + "log_odds_ratio": -0.8528624176979065, + "logits/chosen": 0.5641318559646606, + "logits/rejected": 1.8689111471176147, + "logps/chosen": -1.148106336593628, + "logps/rejected": -0.9835876226425171, + "loss": 0.5974, + "nll_loss": 0.5120998620986938, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11481063812971115, + "rewards/margins": -0.016451874747872353, + "rewards/rejected": -0.09835876524448395, + "step": 118 + }, + { + "epoch": 0.07402799377916018, + "grad_norm": 0.2635195553302765, + "learning_rate": 4.405e-05, + "log_odds_chosen": 0.1805187165737152, + "log_odds_ratio": -0.6187409162521362, + "logits/chosen": 0.9240512251853943, + "logits/rejected": 3.181490898132324, + "logps/chosen": -1.1101839542388916, + "logps/rejected": -1.2136919498443604, + "loss": 0.6813, + "nll_loss": 0.6194241046905518, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11101838946342468, + "rewards/margins": 0.010350802913308144, + "rewards/rejected": -0.12136919796466827, + "step": 119 + }, + { + "epoch": 0.07465007776049767, + "grad_norm": 0.2084805965423584, + "learning_rate": 4.4000000000000006e-05, + "log_odds_chosen": -0.07975497096776962, + "log_odds_ratio": -0.7737417817115784, + "logits/chosen": 0.5016249418258667, + "logits/rejected": 2.5767385959625244, + "logps/chosen": -1.0363280773162842, + "logps/rejected": -0.9799712896347046, + "loss": 0.6462, + "nll_loss": 0.5687938928604126, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10363280773162842, + "rewards/margins": -0.005635684821754694, + "rewards/rejected": -0.09799712896347046, + "step": 120 + }, + { + "epoch": 0.07527216174183515, + "grad_norm": 0.22351621091365814, + "learning_rate": 4.3950000000000004e-05, + "log_odds_chosen": 0.28257864713668823, + "log_odds_ratio": -0.569965124130249, + "logits/chosen": 1.8498495817184448, + "logits/rejected": 1.6400539875030518, + "logps/chosen": -0.9594013690948486, + "logps/rejected": -1.1362048387527466, + "loss": 0.7609, + "nll_loss": 0.7038699388504028, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09594013541936874, + "rewards/margins": 0.017680345103144646, + "rewards/rejected": -0.11362048983573914, + "step": 121 + }, + { + "epoch": 0.07589424572317263, + "grad_norm": 0.21545082330703735, + "learning_rate": 4.39e-05, + "log_odds_chosen": 0.19825129210948944, + "log_odds_ratio": -0.6359566450119019, + "logits/chosen": 1.388837456703186, + "logits/rejected": 1.8813964128494263, + "logps/chosen": -0.8985767364501953, + "logps/rejected": -0.9870655536651611, + "loss": 0.7647, + "nll_loss": 0.701100766658783, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08985768258571625, + "rewards/margins": 0.00884888507425785, + "rewards/rejected": -0.09870655834674835, + "step": 122 + }, + { + "epoch": 0.07651632970451011, + "grad_norm": 0.25020650029182434, + "learning_rate": 4.385e-05, + "log_odds_chosen": 0.05013291537761688, + "log_odds_ratio": -0.6812134981155396, + "logits/chosen": 1.9867496490478516, + "logits/rejected": 2.5251035690307617, + "logps/chosen": -1.0619776248931885, + "logps/rejected": -1.1009055376052856, + "loss": 0.7372, + "nll_loss": 0.669075071811676, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10619775950908661, + "rewards/margins": 0.0038927989080548286, + "rewards/rejected": -0.11009055376052856, + "step": 123 + }, + { + "epoch": 0.07713841368584759, + "grad_norm": 0.2289746254682541, + "learning_rate": 4.38e-05, + "log_odds_chosen": 0.8278762102127075, + "log_odds_ratio": -0.4634576439857483, + "logits/chosen": 1.5844309329986572, + "logits/rejected": 2.797327995300293, + "logps/chosen": -0.75380539894104, + "logps/rejected": -1.1942193508148193, + "loss": 0.7341, + "nll_loss": 0.687774658203125, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07538054138422012, + "rewards/margins": 0.04404138773679733, + "rewards/rejected": -0.11942192912101746, + "step": 124 + }, + { + "epoch": 0.07776049766718507, + "grad_norm": 0.24135081470012665, + "learning_rate": 4.375e-05, + "log_odds_chosen": 0.6638460755348206, + "log_odds_ratio": -0.5225129127502441, + "logits/chosen": 1.0815123319625854, + "logits/rejected": 1.9553213119506836, + "logps/chosen": -0.7082183957099915, + "logps/rejected": -1.012854814529419, + "loss": 0.627, + "nll_loss": 0.5747087597846985, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0708218514919281, + "rewards/margins": 0.030463647097349167, + "rewards/rejected": -0.10128549486398697, + "step": 125 + }, + { + "epoch": 0.07838258164852255, + "grad_norm": 0.20711150765419006, + "learning_rate": 4.3700000000000005e-05, + "log_odds_chosen": 0.07271279394626617, + "log_odds_ratio": -0.680426836013794, + "logits/chosen": 1.3267732858657837, + "logits/rejected": 2.416010618209839, + "logps/chosen": -1.0107229948043823, + "logps/rejected": -1.066054105758667, + "loss": 0.7109, + "nll_loss": 0.6428604125976562, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1010722890496254, + "rewards/margins": 0.005533117335289717, + "rewards/rejected": -0.1066054105758667, + "step": 126 + }, + { + "epoch": 0.07900466562986003, + "grad_norm": 0.20425938069820404, + "learning_rate": 4.3650000000000004e-05, + "log_odds_chosen": 0.27624690532684326, + "log_odds_ratio": -0.5898454189300537, + "logits/chosen": 2.7942919731140137, + "logits/rejected": 2.604480266571045, + "logps/chosen": -0.8998833894729614, + "logps/rejected": -1.0633008480072021, + "loss": 0.9294, + "nll_loss": 0.8704652786254883, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0899883359670639, + "rewards/margins": 0.016341738402843475, + "rewards/rejected": -0.10633008182048798, + "step": 127 + }, + { + "epoch": 0.0796267496111975, + "grad_norm": 0.23229432106018066, + "learning_rate": 4.36e-05, + "log_odds_chosen": -0.17983496189117432, + "log_odds_ratio": -0.8006916046142578, + "logits/chosen": 1.8407796621322632, + "logits/rejected": 3.0865976810455322, + "logps/chosen": -0.9708831310272217, + "logps/rejected": -0.877918004989624, + "loss": 0.8562, + "nll_loss": 0.776081919670105, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.09708831459283829, + "rewards/margins": -0.009296516887843609, + "rewards/rejected": -0.0877918004989624, + "step": 128 + }, + { + "epoch": 0.080248833592535, + "grad_norm": 0.2508530914783478, + "learning_rate": 4.355e-05, + "log_odds_chosen": 0.16054421663284302, + "log_odds_ratio": -0.6215721368789673, + "logits/chosen": 1.587475061416626, + "logits/rejected": 1.081530213356018, + "logps/chosen": -1.0525381565093994, + "logps/rejected": -1.1599868535995483, + "loss": 0.7807, + "nll_loss": 0.718558669090271, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10525382310152054, + "rewards/margins": 0.010744860395789146, + "rewards/rejected": -0.11599868535995483, + "step": 129 + }, + { + "epoch": 0.08087091757387248, + "grad_norm": 0.2501443028450012, + "learning_rate": 4.35e-05, + "log_odds_chosen": 0.1309620440006256, + "log_odds_ratio": -0.8406453728675842, + "logits/chosen": -0.24144795536994934, + "logits/rejected": 2.991492748260498, + "logps/chosen": -1.2649434804916382, + "logps/rejected": -1.2791178226470947, + "loss": 0.5402, + "nll_loss": 0.4561711847782135, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12649434804916382, + "rewards/margins": 0.0014174282550811768, + "rewards/rejected": -0.127911776304245, + "step": 130 + }, + { + "epoch": 0.08149300155520996, + "grad_norm": 0.1905927062034607, + "learning_rate": 4.345e-05, + "log_odds_chosen": 0.6442151069641113, + "log_odds_ratio": -0.5027979016304016, + "logits/chosen": 1.5362439155578613, + "logits/rejected": 1.6745655536651611, + "logps/chosen": -0.8681899309158325, + "logps/rejected": -1.2771090269088745, + "loss": 0.7633, + "nll_loss": 0.7129961848258972, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08681898564100266, + "rewards/margins": 0.040891923010349274, + "rewards/rejected": -0.12771092355251312, + "step": 131 + }, + { + "epoch": 0.08211508553654744, + "grad_norm": 0.1754845827817917, + "learning_rate": 4.3400000000000005e-05, + "log_odds_chosen": 0.23881648480892181, + "log_odds_ratio": -0.6901636123657227, + "logits/chosen": 1.53242027759552, + "logits/rejected": 3.242114543914795, + "logps/chosen": -0.8632210493087769, + "logps/rejected": -0.9391394853591919, + "loss": 0.7607, + "nll_loss": 0.6916568279266357, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0863221138715744, + "rewards/margins": 0.007591850124299526, + "rewards/rejected": -0.0939139574766159, + "step": 132 + }, + { + "epoch": 0.08273716951788491, + "grad_norm": 0.30056408047676086, + "learning_rate": 4.335e-05, + "log_odds_chosen": 0.33130699396133423, + "log_odds_ratio": -0.738244354724884, + "logits/chosen": 1.5825538635253906, + "logits/rejected": 2.5602152347564697, + "logps/chosen": -1.2096562385559082, + "logps/rejected": -1.3360333442687988, + "loss": 0.7298, + "nll_loss": 0.6560216546058655, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1209656149148941, + "rewards/margins": 0.012637722305953503, + "rewards/rejected": -0.13360333442687988, + "step": 133 + }, + { + "epoch": 0.0833592534992224, + "grad_norm": 0.253454327583313, + "learning_rate": 4.33e-05, + "log_odds_chosen": -0.36041635274887085, + "log_odds_ratio": -0.9571161270141602, + "logits/chosen": 1.5677220821380615, + "logits/rejected": 2.3479645252227783, + "logps/chosen": -1.132239580154419, + "logps/rejected": -0.9019601345062256, + "loss": 0.7929, + "nll_loss": 0.6972097158432007, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11322395503520966, + "rewards/margins": -0.023027939721941948, + "rewards/rejected": -0.09019602090120316, + "step": 134 + }, + { + "epoch": 0.08398133748055987, + "grad_norm": 0.21625332534313202, + "learning_rate": 4.325e-05, + "log_odds_chosen": 0.08987618237733841, + "log_odds_ratio": -0.6680045127868652, + "logits/chosen": 2.276296377182007, + "logits/rejected": 2.9406542778015137, + "logps/chosen": -0.8994870781898499, + "logps/rejected": -0.9431521892547607, + "loss": 0.8555, + "nll_loss": 0.7886654138565063, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.08994871377944946, + "rewards/margins": 0.004366515204310417, + "rewards/rejected": -0.09431522339582443, + "step": 135 + }, + { + "epoch": 0.08460342146189735, + "grad_norm": 0.18930700421333313, + "learning_rate": 4.32e-05, + "log_odds_chosen": -0.0023861005902290344, + "log_odds_ratio": -0.7080649137496948, + "logits/chosen": 0.006744086742401123, + "logits/rejected": 1.9157198667526245, + "logps/chosen": -0.8520735502243042, + "logps/rejected": -0.8123384714126587, + "loss": 0.5836, + "nll_loss": 0.5127810835838318, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.08520735800266266, + "rewards/margins": -0.003973505459725857, + "rewards/rejected": -0.08123385906219482, + "step": 136 + }, + { + "epoch": 0.08522550544323483, + "grad_norm": 0.22874124348163605, + "learning_rate": 4.315e-05, + "log_odds_chosen": 0.33399975299835205, + "log_odds_ratio": -0.6337571144104004, + "logits/chosen": 2.6366705894470215, + "logits/rejected": 3.1834094524383545, + "logps/chosen": -1.010011911392212, + "logps/rejected": -1.225651741027832, + "loss": 0.9296, + "nll_loss": 0.8662106990814209, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10100119560956955, + "rewards/margins": 0.02156398445367813, + "rewards/rejected": -0.12256518006324768, + "step": 137 + }, + { + "epoch": 0.08584758942457232, + "grad_norm": 0.2262139767408371, + "learning_rate": 4.3100000000000004e-05, + "log_odds_chosen": 0.2507573962211609, + "log_odds_ratio": -0.6413716673851013, + "logits/chosen": 2.071014642715454, + "logits/rejected": 2.5810890197753906, + "logps/chosen": -1.1221033334732056, + "logps/rejected": -1.1979209184646606, + "loss": 0.7838, + "nll_loss": 0.7196545004844666, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11221033334732056, + "rewards/margins": 0.007581758312880993, + "rewards/rejected": -0.11979209631681442, + "step": 138 + }, + { + "epoch": 0.0864696734059098, + "grad_norm": 0.21719491481781006, + "learning_rate": 4.305e-05, + "log_odds_chosen": 0.1720152199268341, + "log_odds_ratio": -0.6350564360618591, + "logits/chosen": 0.4697237014770508, + "logits/rejected": 2.250985860824585, + "logps/chosen": -1.0021406412124634, + "logps/rejected": -1.1049684286117554, + "loss": 0.5969, + "nll_loss": 0.5334354043006897, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10021406412124634, + "rewards/margins": 0.010282783769071102, + "rewards/rejected": -0.11049684882164001, + "step": 139 + }, + { + "epoch": 0.08709175738724728, + "grad_norm": 0.25784918665885925, + "learning_rate": 4.3e-05, + "log_odds_chosen": 0.1582861989736557, + "log_odds_ratio": -0.6418213844299316, + "logits/chosen": 0.9707194566726685, + "logits/rejected": 2.337258815765381, + "logps/chosen": -1.272529125213623, + "logps/rejected": -1.3633763790130615, + "loss": 0.7069, + "nll_loss": 0.6427214741706848, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12725290656089783, + "rewards/margins": 0.00908473040908575, + "rewards/rejected": -0.13633763790130615, + "step": 140 + }, + { + "epoch": 0.08771384136858476, + "grad_norm": 0.33133354783058167, + "learning_rate": 4.295e-05, + "log_odds_chosen": 0.1922331303358078, + "log_odds_ratio": -0.6399127840995789, + "logits/chosen": 1.1344259977340698, + "logits/rejected": 2.2432358264923096, + "logps/chosen": -1.0353434085845947, + "logps/rejected": -1.1536777019500732, + "loss": 0.7003, + "nll_loss": 0.6363195180892944, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10353434085845947, + "rewards/margins": 0.011833428405225277, + "rewards/rejected": -0.11536777764558792, + "step": 141 + }, + { + "epoch": 0.08833592534992224, + "grad_norm": 0.24132822453975677, + "learning_rate": 4.29e-05, + "log_odds_chosen": 0.1710241138935089, + "log_odds_ratio": -0.6606246829032898, + "logits/chosen": 1.6139814853668213, + "logits/rejected": 2.8867573738098145, + "logps/chosen": -0.8465310335159302, + "logps/rejected": -0.9183309078216553, + "loss": 0.7557, + "nll_loss": 0.689607560634613, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0846531093120575, + "rewards/margins": 0.007179984822869301, + "rewards/rejected": -0.09183309227228165, + "step": 142 + }, + { + "epoch": 0.08895800933125972, + "grad_norm": 0.3063518702983856, + "learning_rate": 4.285e-05, + "log_odds_chosen": 0.3355182707309723, + "log_odds_ratio": -0.5703614950180054, + "logits/chosen": 1.9923746585845947, + "logits/rejected": 1.7918429374694824, + "logps/chosen": -1.0820659399032593, + "logps/rejected": -1.3053545951843262, + "loss": 0.8471, + "nll_loss": 0.7900703549385071, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1082065999507904, + "rewards/margins": 0.022328879684209824, + "rewards/rejected": -0.13053546845912933, + "step": 143 + }, + { + "epoch": 0.0895800933125972, + "grad_norm": 0.24182361364364624, + "learning_rate": 4.2800000000000004e-05, + "log_odds_chosen": 0.3469311594963074, + "log_odds_ratio": -0.6052234768867493, + "logits/chosen": 2.344413995742798, + "logits/rejected": 2.9837701320648193, + "logps/chosen": -1.0931798219680786, + "logps/rejected": -1.2991085052490234, + "loss": 0.8078, + "nll_loss": 0.7472943067550659, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10931797325611115, + "rewards/margins": 0.020592868328094482, + "rewards/rejected": -0.12991085648536682, + "step": 144 + }, + { + "epoch": 0.09020217729393468, + "grad_norm": 0.20874236524105072, + "learning_rate": 4.275e-05, + "log_odds_chosen": 0.6926552057266235, + "log_odds_ratio": -0.4596474766731262, + "logits/chosen": 1.773850679397583, + "logits/rejected": 3.3926727771759033, + "logps/chosen": -0.5454069972038269, + "logps/rejected": -0.8595741987228394, + "loss": 0.8004, + "nll_loss": 0.7544057965278625, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.05454070121049881, + "rewards/margins": 0.031416717916727066, + "rewards/rejected": -0.08595742285251617, + "step": 145 + }, + { + "epoch": 0.09082426127527216, + "grad_norm": 0.24432261288166046, + "learning_rate": 4.27e-05, + "log_odds_chosen": 0.3871600031852722, + "log_odds_ratio": -0.5729288458824158, + "logits/chosen": 1.1837117671966553, + "logits/rejected": 2.0443482398986816, + "logps/chosen": -0.9735139608383179, + "logps/rejected": -1.1910879611968994, + "loss": 0.6312, + "nll_loss": 0.5739009380340576, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09735140204429626, + "rewards/margins": 0.021757401525974274, + "rewards/rejected": -0.11910881102085114, + "step": 146 + }, + { + "epoch": 0.09144634525660965, + "grad_norm": 0.3139575719833374, + "learning_rate": 4.265e-05, + "log_odds_chosen": 0.4184523820877075, + "log_odds_ratio": -0.5198309421539307, + "logits/chosen": 0.5413355231285095, + "logits/rejected": 1.8938310146331787, + "logps/chosen": -0.8826829195022583, + "logps/rejected": -1.14265775680542, + "loss": 0.5939, + "nll_loss": 0.5419252514839172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08826829493045807, + "rewards/margins": 0.025997478514909744, + "rewards/rejected": -0.11426577717065811, + "step": 147 + }, + { + "epoch": 0.09206842923794713, + "grad_norm": 0.20702946186065674, + "learning_rate": 4.26e-05, + "log_odds_chosen": 0.07650808244943619, + "log_odds_ratio": -0.6737152338027954, + "logits/chosen": 1.5741207599639893, + "logits/rejected": 3.246458053588867, + "logps/chosen": -1.008644938468933, + "logps/rejected": -1.0792831182479858, + "loss": 0.8092, + "nll_loss": 0.7418737411499023, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10086449980735779, + "rewards/margins": 0.007063813507556915, + "rewards/rejected": -0.1079283133149147, + "step": 148 + }, + { + "epoch": 0.0926905132192846, + "grad_norm": 0.20067504048347473, + "learning_rate": 4.2550000000000004e-05, + "log_odds_chosen": -0.02825966477394104, + "log_odds_ratio": -0.7468290328979492, + "logits/chosen": 1.176827311515808, + "logits/rejected": 2.1293768882751465, + "logps/chosen": -1.0007497072219849, + "logps/rejected": -0.9864083528518677, + "loss": 0.7599, + "nll_loss": 0.6852264404296875, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10007497668266296, + "rewards/margins": -0.0014341361820697784, + "rewards/rejected": -0.09864082932472229, + "step": 149 + }, + { + "epoch": 0.09331259720062209, + "grad_norm": 0.2831576466560364, + "learning_rate": 4.25e-05, + "log_odds_chosen": 0.007619678974151611, + "log_odds_ratio": -0.7431962490081787, + "logits/chosen": 1.9270395040512085, + "logits/rejected": 1.999312162399292, + "logps/chosen": -1.0627632141113281, + "logps/rejected": -1.0206648111343384, + "loss": 0.8259, + "nll_loss": 0.7516275644302368, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10627631843090057, + "rewards/margins": -0.0042098406702280045, + "rewards/rejected": -0.10206647962331772, + "step": 150 + }, + { + "epoch": 0.09393468118195956, + "grad_norm": 0.3491978049278259, + "learning_rate": 4.245e-05, + "log_odds_chosen": -0.12725840508937836, + "log_odds_ratio": -0.7853566408157349, + "logits/chosen": 2.8336124420166016, + "logits/rejected": 1.3309614658355713, + "logps/chosen": -1.260406732559204, + "logps/rejected": -1.1789261102676392, + "loss": 0.9395, + "nll_loss": 0.8609429001808167, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12604066729545593, + "rewards/margins": -0.00814807042479515, + "rewards/rejected": -0.11789260059595108, + "step": 151 + }, + { + "epoch": 0.09455676516329704, + "grad_norm": 0.22511498630046844, + "learning_rate": 4.24e-05, + "log_odds_chosen": 0.27719905972480774, + "log_odds_ratio": -0.6086083054542542, + "logits/chosen": 1.3456978797912598, + "logits/rejected": 2.8321447372436523, + "logps/chosen": -0.9333552122116089, + "logps/rejected": -1.0779469013214111, + "loss": 0.7306, + "nll_loss": 0.6697627305984497, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09333551675081253, + "rewards/margins": 0.014459175989031792, + "rewards/rejected": -0.10779468715190887, + "step": 152 + }, + { + "epoch": 0.09517884914463452, + "grad_norm": 0.25012776255607605, + "learning_rate": 4.235e-05, + "log_odds_chosen": 0.10945864021778107, + "log_odds_ratio": -0.6662627458572388, + "logits/chosen": 0.27826401591300964, + "logits/rejected": 1.6450650691986084, + "logps/chosen": -1.1210460662841797, + "logps/rejected": -1.172456979751587, + "loss": 0.6499, + "nll_loss": 0.583318829536438, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11210459470748901, + "rewards/margins": 0.005141102708876133, + "rewards/rejected": -0.11724570393562317, + "step": 153 + }, + { + "epoch": 0.095800933125972, + "grad_norm": 0.19437120854854584, + "learning_rate": 4.23e-05, + "log_odds_chosen": 0.4470350742340088, + "log_odds_ratio": -0.5733773708343506, + "logits/chosen": 1.7237200736999512, + "logits/rejected": 2.95843768119812, + "logps/chosen": -0.9336342215538025, + "logps/rejected": -1.1649386882781982, + "loss": 0.7838, + "nll_loss": 0.7264678478240967, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0933634340763092, + "rewards/margins": 0.02313043922185898, + "rewards/rejected": -0.11649386584758759, + "step": 154 + }, + { + "epoch": 0.09642301710730948, + "grad_norm": 0.22907698154449463, + "learning_rate": 4.2250000000000004e-05, + "log_odds_chosen": 0.1783168613910675, + "log_odds_ratio": -0.7230756878852844, + "logits/chosen": -0.021383345127105713, + "logits/rejected": 1.2943320274353027, + "logps/chosen": -0.8587887287139893, + "logps/rejected": -0.9087654948234558, + "loss": 0.6111, + "nll_loss": 0.5387856960296631, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0858788713812828, + "rewards/margins": 0.0049976771697402, + "rewards/rejected": -0.09087655693292618, + "step": 155 + }, + { + "epoch": 0.09704510108864697, + "grad_norm": 0.2803076505661011, + "learning_rate": 4.22e-05, + "log_odds_chosen": 0.3767983913421631, + "log_odds_ratio": -0.5567384958267212, + "logits/chosen": 1.33505117893219, + "logits/rejected": 2.911482810974121, + "logps/chosen": -0.9518997073173523, + "logps/rejected": -1.1626166105270386, + "loss": 0.7235, + "nll_loss": 0.6678577661514282, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09518995881080627, + "rewards/margins": 0.021071698516607285, + "rewards/rejected": -0.11626166105270386, + "step": 156 + }, + { + "epoch": 0.09766718506998445, + "grad_norm": 0.7590816020965576, + "learning_rate": 4.215e-05, + "log_odds_chosen": 0.3583412170410156, + "log_odds_ratio": -0.6060498356819153, + "logits/chosen": 1.9442944526672363, + "logits/rejected": 2.415597915649414, + "logps/chosen": -1.2141615152359009, + "logps/rejected": -1.442068338394165, + "loss": 0.8981, + "nll_loss": 0.8374723792076111, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12141615152359009, + "rewards/margins": 0.02279069647192955, + "rewards/rejected": -0.14420685172080994, + "step": 157 + }, + { + "epoch": 0.09828926905132193, + "grad_norm": 0.2292739897966385, + "learning_rate": 4.21e-05, + "log_odds_chosen": 0.2378174364566803, + "log_odds_ratio": -0.6532160043716431, + "logits/chosen": -0.0699785053730011, + "logits/rejected": 2.0174026489257812, + "logps/chosen": -0.974482536315918, + "logps/rejected": -1.0714282989501953, + "loss": 0.5066, + "nll_loss": 0.4412919580936432, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09744825214147568, + "rewards/margins": 0.009694581851363182, + "rewards/rejected": -0.10714283585548401, + "step": 158 + }, + { + "epoch": 0.09891135303265941, + "grad_norm": 0.22754822671413422, + "learning_rate": 4.205e-05, + "log_odds_chosen": 1.0634626150131226, + "log_odds_ratio": -0.407301664352417, + "logits/chosen": 1.4696998596191406, + "logits/rejected": 2.612338066101074, + "logps/chosen": -0.6319990754127502, + "logps/rejected": -1.2708525657653809, + "loss": 0.7594, + "nll_loss": 0.7186335921287537, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.06319990754127502, + "rewards/margins": 0.06388533860445023, + "rewards/rejected": -0.12708523869514465, + "step": 159 + }, + { + "epoch": 0.09953343701399689, + "grad_norm": 0.2632899880409241, + "learning_rate": 4.2e-05, + "log_odds_chosen": 0.16408134996891022, + "log_odds_ratio": -0.6438462734222412, + "logits/chosen": 1.6441045999526978, + "logits/rejected": 2.838496685028076, + "logps/chosen": -0.9144777059555054, + "logps/rejected": -0.9933640360832214, + "loss": 0.6945, + "nll_loss": 0.6300674080848694, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09144777059555054, + "rewards/margins": 0.007888639345765114, + "rewards/rejected": -0.0993364006280899, + "step": 160 + }, + { + "epoch": 0.10015552099533437, + "grad_norm": 0.41054767370224, + "learning_rate": 4.195e-05, + "log_odds_chosen": 0.16628219187259674, + "log_odds_ratio": -0.6762892007827759, + "logits/chosen": 1.4416700601577759, + "logits/rejected": 1.777522087097168, + "logps/chosen": -1.003077507019043, + "logps/rejected": -1.0760802030563354, + "loss": 0.7156, + "nll_loss": 0.647979736328125, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.10030774772167206, + "rewards/margins": 0.007300272583961487, + "rewards/rejected": -0.10760802030563354, + "step": 161 + }, + { + "epoch": 0.10077760497667185, + "grad_norm": 0.38517841696739197, + "learning_rate": 4.19e-05, + "log_odds_chosen": 0.19973157346248627, + "log_odds_ratio": -0.6236205697059631, + "logits/chosen": 2.4252243041992188, + "logits/rejected": 3.675344944000244, + "logps/chosen": -0.9389520287513733, + "logps/rejected": -1.043189525604248, + "loss": 0.8682, + "nll_loss": 0.8058300614356995, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.09389521181583405, + "rewards/margins": 0.010423748753964901, + "rewards/rejected": -0.10431894659996033, + "step": 162 + }, + { + "epoch": 0.10139968895800933, + "grad_norm": 0.22616150975227356, + "learning_rate": 4.185e-05, + "log_odds_chosen": 0.1786232888698578, + "log_odds_ratio": -0.6155363917350769, + "logits/chosen": 0.7019532322883606, + "logits/rejected": 1.6438217163085938, + "logps/chosen": -1.0487325191497803, + "logps/rejected": -1.1808518171310425, + "loss": 0.7142, + "nll_loss": 0.6526217460632324, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10487325489521027, + "rewards/margins": 0.013211926445364952, + "rewards/rejected": -0.11808518320322037, + "step": 163 + }, + { + "epoch": 0.1020217729393468, + "grad_norm": 0.27632614970207214, + "learning_rate": 4.18e-05, + "log_odds_chosen": 0.1357310712337494, + "log_odds_ratio": -0.6856433749198914, + "logits/chosen": 1.608304500579834, + "logits/rejected": 2.1849358081817627, + "logps/chosen": -1.013383150100708, + "logps/rejected": -1.0906598567962646, + "loss": 0.7822, + "nll_loss": 0.7136261463165283, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10133831202983856, + "rewards/margins": 0.007727675139904022, + "rewards/rejected": -0.10906599462032318, + "step": 164 + }, + { + "epoch": 0.1026438569206843, + "grad_norm": 0.2569625675678253, + "learning_rate": 4.175e-05, + "log_odds_chosen": 0.1790873259305954, + "log_odds_ratio": -0.6222453117370605, + "logits/chosen": 0.2540859580039978, + "logits/rejected": 1.5012887716293335, + "logps/chosen": -0.9785174131393433, + "logps/rejected": -1.096341609954834, + "loss": 0.6326, + "nll_loss": 0.5704231262207031, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09785175323486328, + "rewards/margins": 0.01178241241723299, + "rewards/rejected": -0.1096341609954834, + "step": 165 + }, + { + "epoch": 0.10326594090202178, + "grad_norm": 0.29103732109069824, + "learning_rate": 4.17e-05, + "log_odds_chosen": 0.6809232234954834, + "log_odds_ratio": -0.5375621914863586, + "logits/chosen": 2.260108709335327, + "logits/rejected": 3.4551193714141846, + "logps/chosen": -0.7766050100326538, + "logps/rejected": -1.0041743516921997, + "loss": 0.9029, + "nll_loss": 0.8491120338439941, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07766050100326538, + "rewards/margins": 0.02275693416595459, + "rewards/rejected": -0.10041744261980057, + "step": 166 + }, + { + "epoch": 0.10388802488335926, + "grad_norm": 0.24556207656860352, + "learning_rate": 4.165e-05, + "log_odds_chosen": 0.17520564794540405, + "log_odds_ratio": -0.6451351046562195, + "logits/chosen": 0.37003058195114136, + "logits/rejected": 3.1531291007995605, + "logps/chosen": -0.95960533618927, + "logps/rejected": -1.0630546808242798, + "loss": 0.6518, + "nll_loss": 0.5872541666030884, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09596052765846252, + "rewards/margins": 0.010344942100346088, + "rewards/rejected": -0.10630547255277634, + "step": 167 + }, + { + "epoch": 0.10451010886469674, + "grad_norm": 0.43578729033470154, + "learning_rate": 4.16e-05, + "log_odds_chosen": 0.3509252965450287, + "log_odds_ratio": -0.579054057598114, + "logits/chosen": 1.795426368713379, + "logits/rejected": 1.9425467252731323, + "logps/chosen": -0.8844292163848877, + "logps/rejected": -1.0651838779449463, + "loss": 0.7989, + "nll_loss": 0.7409669160842896, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08844292163848877, + "rewards/margins": 0.018075458705425262, + "rewards/rejected": -0.10651838779449463, + "step": 168 + }, + { + "epoch": 0.10513219284603421, + "grad_norm": 0.21419963240623474, + "learning_rate": 4.155e-05, + "log_odds_chosen": 0.2651481032371521, + "log_odds_ratio": -0.6336223483085632, + "logits/chosen": -0.10004106163978577, + "logits/rejected": 2.5327444076538086, + "logps/chosen": -0.9215387105941772, + "logps/rejected": -1.0058555603027344, + "loss": 0.5136, + "nll_loss": 0.45028623938560486, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.0921538770198822, + "rewards/margins": 0.008431695401668549, + "rewards/rejected": -0.10058555752038956, + "step": 169 + }, + { + "epoch": 0.1057542768273717, + "grad_norm": 0.26683712005615234, + "learning_rate": 4.15e-05, + "log_odds_chosen": 0.1297835409641266, + "log_odds_ratio": -0.7755084037780762, + "logits/chosen": 1.6993978023529053, + "logits/rejected": 1.991716980934143, + "logps/chosen": -1.0198416709899902, + "logps/rejected": -0.9949245452880859, + "loss": 0.7747, + "nll_loss": 0.6971030831336975, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1019841730594635, + "rewards/margins": -0.002491721883416176, + "rewards/rejected": -0.09949245303869247, + "step": 170 + }, + { + "epoch": 0.10637636080870917, + "grad_norm": 0.21372289955615997, + "learning_rate": 4.145e-05, + "log_odds_chosen": 0.11716372519731522, + "log_odds_ratio": -0.7064924240112305, + "logits/chosen": -0.41527295112609863, + "logits/rejected": 0.6108665466308594, + "logps/chosen": -0.9913889169692993, + "logps/rejected": -1.063600778579712, + "loss": 0.5345, + "nll_loss": 0.46387529373168945, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09913888573646545, + "rewards/margins": 0.007221193052828312, + "rewards/rejected": -0.10636007785797119, + "step": 171 + }, + { + "epoch": 0.10699844479004665, + "grad_norm": 0.34510332345962524, + "learning_rate": 4.14e-05, + "log_odds_chosen": -0.006949573755264282, + "log_odds_ratio": -0.7512286901473999, + "logits/chosen": 2.2331154346466064, + "logits/rejected": 2.70103120803833, + "logps/chosen": -0.9838117361068726, + "logps/rejected": -0.9597645998001099, + "loss": 0.8141, + "nll_loss": 0.7389856576919556, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.0983811691403389, + "rewards/margins": -0.0024047140032052994, + "rewards/rejected": -0.09597645699977875, + "step": 172 + }, + { + "epoch": 0.10762052877138413, + "grad_norm": 0.24371562898159027, + "learning_rate": 4.135e-05, + "log_odds_chosen": -0.088199183344841, + "log_odds_ratio": -0.7620775699615479, + "logits/chosen": 1.896087408065796, + "logits/rejected": 2.6639156341552734, + "logps/chosen": -1.0846797227859497, + "logps/rejected": -1.0300229787826538, + "loss": 0.7958, + "nll_loss": 0.7195842266082764, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1084679663181305, + "rewards/margins": -0.005465677008032799, + "rewards/rejected": -0.10300229489803314, + "step": 173 + }, + { + "epoch": 0.10824261275272162, + "grad_norm": 0.2548561692237854, + "learning_rate": 4.13e-05, + "log_odds_chosen": 0.16646553575992584, + "log_odds_ratio": -0.6395515203475952, + "logits/chosen": 1.1434202194213867, + "logits/rejected": 2.398165464401245, + "logps/chosen": -1.1627336740493774, + "logps/rejected": -1.2480618953704834, + "loss": 0.7475, + "nll_loss": 0.6835085153579712, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11627336591482162, + "rewards/margins": 0.008532822132110596, + "rewards/rejected": -0.12480619549751282, + "step": 174 + }, + { + "epoch": 0.1088646967340591, + "grad_norm": 0.34918922185897827, + "learning_rate": 4.125e-05, + "log_odds_chosen": 0.6802692413330078, + "log_odds_ratio": -0.6001302599906921, + "logits/chosen": 2.011270046234131, + "logits/rejected": 3.118522882461548, + "logps/chosen": -1.0734280347824097, + "logps/rejected": -1.3806785345077515, + "loss": 0.822, + "nll_loss": 0.762000560760498, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10734279453754425, + "rewards/margins": 0.030725058168172836, + "rewards/rejected": -0.13806785643100739, + "step": 175 + }, + { + "epoch": 0.10948678071539658, + "grad_norm": 0.2557761073112488, + "learning_rate": 4.12e-05, + "log_odds_chosen": 0.3574141263961792, + "log_odds_ratio": -0.5623692274093628, + "logits/chosen": 0.9804760813713074, + "logits/rejected": 1.5133546590805054, + "logps/chosen": -1.0537047386169434, + "logps/rejected": -1.304141879081726, + "loss": 0.6899, + "nll_loss": 0.6336163878440857, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10537047684192657, + "rewards/margins": 0.02504371665418148, + "rewards/rejected": -0.1304141879081726, + "step": 176 + }, + { + "epoch": 0.11010886469673406, + "grad_norm": 0.2286410927772522, + "learning_rate": 4.115e-05, + "log_odds_chosen": -0.17244988679885864, + "log_odds_ratio": -0.9307609796524048, + "logits/chosen": 1.0503787994384766, + "logits/rejected": 2.7753310203552246, + "logps/chosen": -1.295305848121643, + "logps/rejected": -1.0674867630004883, + "loss": 0.8022, + "nll_loss": 0.7091712951660156, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12953059375286102, + "rewards/margins": -0.02278190851211548, + "rewards/rejected": -0.10674867033958435, + "step": 177 + }, + { + "epoch": 0.11073094867807154, + "grad_norm": 0.35464537143707275, + "learning_rate": 4.11e-05, + "log_odds_chosen": -0.1714506447315216, + "log_odds_ratio": -0.927706241607666, + "logits/chosen": 1.4732853174209595, + "logits/rejected": 2.787705421447754, + "logps/chosen": -1.286841869354248, + "logps/rejected": -1.1253931522369385, + "loss": 0.7488, + "nll_loss": 0.6560234427452087, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12868419289588928, + "rewards/margins": -0.016144881024956703, + "rewards/rejected": -0.11253931373357773, + "step": 178 + }, + { + "epoch": 0.11135303265940902, + "grad_norm": 0.46238580346107483, + "learning_rate": 4.105e-05, + "log_odds_chosen": 0.37182509899139404, + "log_odds_ratio": -0.5696491599082947, + "logits/chosen": 2.656465530395508, + "logits/rejected": 3.799373149871826, + "logps/chosen": -1.022263765335083, + "logps/rejected": -1.18893563747406, + "loss": 0.9302, + "nll_loss": 0.8732700347900391, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1022263765335083, + "rewards/margins": 0.01666719652712345, + "rewards/rejected": -0.1188935711979866, + "step": 179 + }, + { + "epoch": 0.1119751166407465, + "grad_norm": 0.41658157110214233, + "learning_rate": 4.1e-05, + "log_odds_chosen": 0.14363381266593933, + "log_odds_ratio": -0.667683482170105, + "logits/chosen": 1.3900699615478516, + "logits/rejected": 2.4027140140533447, + "logps/chosen": -1.093644380569458, + "logps/rejected": -1.1805763244628906, + "loss": 0.7966, + "nll_loss": 0.7298657298088074, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10936443507671356, + "rewards/margins": 0.008693188428878784, + "rewards/rejected": -0.11805762350559235, + "step": 180 + }, + { + "epoch": 0.11259720062208398, + "grad_norm": 0.2824760675430298, + "learning_rate": 4.095e-05, + "log_odds_chosen": 0.02471756935119629, + "log_odds_ratio": -0.6913062334060669, + "logits/chosen": 1.611011266708374, + "logits/rejected": 2.775919198989868, + "logps/chosen": -1.021841287612915, + "logps/rejected": -1.0538395643234253, + "loss": 0.7521, + "nll_loss": 0.6830064058303833, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10218413919210434, + "rewards/margins": 0.0031998255290091038, + "rewards/rejected": -0.105383962392807, + "step": 181 + }, + { + "epoch": 0.11321928460342146, + "grad_norm": 0.2572093605995178, + "learning_rate": 4.09e-05, + "log_odds_chosen": 0.49159225821495056, + "log_odds_ratio": -0.4956834614276886, + "logits/chosen": 0.8409824371337891, + "logits/rejected": 2.636554002761841, + "logps/chosen": -0.7410684823989868, + "logps/rejected": -1.0188310146331787, + "loss": 0.5459, + "nll_loss": 0.4963690936565399, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0741068571805954, + "rewards/margins": 0.027776243165135384, + "rewards/rejected": -0.10188309103250504, + "step": 182 + }, + { + "epoch": 0.11384136858475895, + "grad_norm": 0.20492465794086456, + "learning_rate": 4.085e-05, + "log_odds_chosen": 1.2914584875106812, + "log_odds_ratio": -0.311604768037796, + "logits/chosen": 2.0142695903778076, + "logits/rejected": 2.8114659786224365, + "logps/chosen": -0.6310855746269226, + "logps/rejected": -1.169175386428833, + "loss": 0.7257, + "nll_loss": 0.6945566534996033, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06310856342315674, + "rewards/margins": 0.05380897969007492, + "rewards/rejected": -0.11691753566265106, + "step": 183 + }, + { + "epoch": 0.11446345256609643, + "grad_norm": 0.2868058383464813, + "learning_rate": 4.08e-05, + "log_odds_chosen": 0.2746756076812744, + "log_odds_ratio": -0.5826683640480042, + "logits/chosen": 2.0338900089263916, + "logits/rejected": 2.3741323947906494, + "logps/chosen": -1.0554683208465576, + "logps/rejected": -1.2263842821121216, + "loss": 0.8424, + "nll_loss": 0.784168004989624, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10554683953523636, + "rewards/margins": 0.01709158718585968, + "rewards/rejected": -0.12263843417167664, + "step": 184 + }, + { + "epoch": 0.1150855365474339, + "grad_norm": 0.23621974885463715, + "learning_rate": 4.075e-05, + "log_odds_chosen": 0.32036083936691284, + "log_odds_ratio": -0.5880539417266846, + "logits/chosen": 1.523934006690979, + "logits/rejected": 1.8056572675704956, + "logps/chosen": -0.8745806217193604, + "logps/rejected": -1.0699481964111328, + "loss": 0.791, + "nll_loss": 0.7322185635566711, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0874580666422844, + "rewards/margins": 0.019536755979061127, + "rewards/rejected": -0.10699482262134552, + "step": 185 + }, + { + "epoch": 0.11570762052877138, + "grad_norm": 0.22687800228595734, + "learning_rate": 4.07e-05, + "log_odds_chosen": 0.19166047871112823, + "log_odds_ratio": -0.6182323098182678, + "logits/chosen": 1.2291524410247803, + "logits/rejected": 2.7583565711975098, + "logps/chosen": -0.8949711918830872, + "logps/rejected": -0.9789303541183472, + "loss": 0.6768, + "nll_loss": 0.6149976849555969, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.08949711918830872, + "rewards/margins": 0.008395911194384098, + "rewards/rejected": -0.09789302945137024, + "step": 186 + }, + { + "epoch": 0.11632970451010886, + "grad_norm": 0.2710927128791809, + "learning_rate": 4.065e-05, + "log_odds_chosen": -0.290906697511673, + "log_odds_ratio": -0.8772719502449036, + "logits/chosen": 1.058547019958496, + "logits/rejected": 2.3658413887023926, + "logps/chosen": -1.139380693435669, + "logps/rejected": -0.9223094582557678, + "loss": 0.8327, + "nll_loss": 0.7450096607208252, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11393808573484421, + "rewards/margins": -0.02170712873339653, + "rewards/rejected": -0.09223095327615738, + "step": 187 + }, + { + "epoch": 0.11695178849144634, + "grad_norm": 0.25022706389427185, + "learning_rate": 4.0600000000000004e-05, + "log_odds_chosen": -0.1785784363746643, + "log_odds_ratio": -0.9537197947502136, + "logits/chosen": 0.9926282167434692, + "logits/rejected": 2.011066436767578, + "logps/chosen": -1.2994134426116943, + "logps/rejected": -1.0407631397247314, + "loss": 0.7163, + "nll_loss": 0.6209733486175537, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12994134426116943, + "rewards/margins": -0.025865033268928528, + "rewards/rejected": -0.1040763109922409, + "step": 188 + }, + { + "epoch": 0.11757387247278382, + "grad_norm": 0.4500291049480438, + "learning_rate": 4.055e-05, + "log_odds_chosen": 0.24771341681480408, + "log_odds_ratio": -0.5874661207199097, + "logits/chosen": 1.660165786743164, + "logits/rejected": 2.7396130561828613, + "logps/chosen": -0.8763042688369751, + "logps/rejected": -1.037272572517395, + "loss": 0.7822, + "nll_loss": 0.7234908938407898, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08763042092323303, + "rewards/margins": 0.01609683595597744, + "rewards/rejected": -0.10372726619243622, + "step": 189 + }, + { + "epoch": 0.1181959564541213, + "grad_norm": 0.3109215795993805, + "learning_rate": 4.05e-05, + "log_odds_chosen": -0.002947285771369934, + "log_odds_ratio": -0.7154306769371033, + "logits/chosen": 2.46467924118042, + "logits/rejected": 2.4921913146972656, + "logps/chosen": -1.175093650817871, + "logps/rejected": -1.155705451965332, + "loss": 0.9375, + "nll_loss": 0.8659391403198242, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11750936508178711, + "rewards/margins": -0.0019388198852539062, + "rewards/rejected": -0.1155705451965332, + "step": 190 + }, + { + "epoch": 0.1188180404354588, + "grad_norm": 0.2340676635503769, + "learning_rate": 4.045000000000001e-05, + "log_odds_chosen": 0.6892496347427368, + "log_odds_ratio": -0.46823450922966003, + "logits/chosen": 1.4322489500045776, + "logits/rejected": 2.8000543117523193, + "logps/chosen": -0.6895767450332642, + "logps/rejected": -1.0397164821624756, + "loss": 0.6969, + "nll_loss": 0.6500332951545715, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06895767152309418, + "rewards/margins": 0.03501397743821144, + "rewards/rejected": -0.10397165268659592, + "step": 191 + }, + { + "epoch": 0.11944012441679627, + "grad_norm": 0.24883458018302917, + "learning_rate": 4.0400000000000006e-05, + "log_odds_chosen": 0.26280373334884644, + "log_odds_ratio": -0.6019066572189331, + "logits/chosen": 0.9045218229293823, + "logits/rejected": 2.5123941898345947, + "logps/chosen": -1.074615716934204, + "logps/rejected": -1.2176018953323364, + "loss": 0.6199, + "nll_loss": 0.5597118735313416, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10746156424283981, + "rewards/margins": 0.014298615977168083, + "rewards/rejected": -0.12176018953323364, + "step": 192 + }, + { + "epoch": 0.12006220839813375, + "grad_norm": 0.7652886509895325, + "learning_rate": 4.0350000000000005e-05, + "log_odds_chosen": 0.2115470916032791, + "log_odds_ratio": -0.6227182149887085, + "logits/chosen": 1.9737762212753296, + "logits/rejected": 3.5508880615234375, + "logps/chosen": -0.902724027633667, + "logps/rejected": -0.994047999382019, + "loss": 0.85, + "nll_loss": 0.7876854538917542, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09027239680290222, + "rewards/margins": 0.009132396429777145, + "rewards/rejected": -0.09940479695796967, + "step": 193 + }, + { + "epoch": 0.12068429237947123, + "grad_norm": 0.25935572385787964, + "learning_rate": 4.0300000000000004e-05, + "log_odds_chosen": 0.6906004548072815, + "log_odds_ratio": -0.42626866698265076, + "logits/chosen": 0.13742883503437042, + "logits/rejected": 1.888708472251892, + "logps/chosen": -0.8336443901062012, + "logps/rejected": -1.2578824758529663, + "loss": 0.5188, + "nll_loss": 0.47615402936935425, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08336444199085236, + "rewards/margins": 0.042423803359270096, + "rewards/rejected": -0.12578824162483215, + "step": 194 + }, + { + "epoch": 0.12130637636080871, + "grad_norm": 4.610418796539307, + "learning_rate": 4.025e-05, + "log_odds_chosen": 0.5752971172332764, + "log_odds_ratio": -0.5104885697364807, + "logits/chosen": 1.1361490488052368, + "logits/rejected": 1.9390631914138794, + "logps/chosen": -0.9524904489517212, + "logps/rejected": -1.3292269706726074, + "loss": 0.7264, + "nll_loss": 0.6753022074699402, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09524904191493988, + "rewards/margins": 0.037673648446798325, + "rewards/rejected": -0.1329226940870285, + "step": 195 + }, + { + "epoch": 0.12192846034214619, + "grad_norm": 0.2897116243839264, + "learning_rate": 4.02e-05, + "log_odds_chosen": -0.022627107799053192, + "log_odds_ratio": -0.7134356498718262, + "logits/chosen": 1.5104551315307617, + "logits/rejected": 2.7221500873565674, + "logps/chosen": -0.9891096353530884, + "logps/rejected": -0.9596316814422607, + "loss": 0.7559, + "nll_loss": 0.6845479011535645, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.09891095757484436, + "rewards/margins": -0.0029477900825440884, + "rewards/rejected": -0.09596316516399384, + "step": 196 + }, + { + "epoch": 0.12255054432348367, + "grad_norm": 0.37426719069480896, + "learning_rate": 4.015000000000001e-05, + "log_odds_chosen": -0.2981864809989929, + "log_odds_ratio": -0.980148196220398, + "logits/chosen": 1.6573596000671387, + "logits/rejected": 1.4556148052215576, + "logps/chosen": -1.2158352136611938, + "logps/rejected": -1.0322885513305664, + "loss": 0.7979, + "nll_loss": 0.6998590230941772, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12158352136611938, + "rewards/margins": -0.018354661762714386, + "rewards/rejected": -0.1032288521528244, + "step": 197 + }, + { + "epoch": 0.12317262830482115, + "grad_norm": 0.3264211118221283, + "learning_rate": 4.0100000000000006e-05, + "log_odds_chosen": 0.4851124882698059, + "log_odds_ratio": -0.5224840641021729, + "logits/chosen": 2.4400124549865723, + "logits/rejected": 1.3729251623153687, + "logps/chosen": -1.0999153852462769, + "logps/rejected": -1.459439992904663, + "loss": 0.8856, + "nll_loss": 0.8333885669708252, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10999153554439545, + "rewards/margins": 0.03595246374607086, + "rewards/rejected": -0.1459439992904663, + "step": 198 + }, + { + "epoch": 0.12379471228615863, + "grad_norm": 0.28378090262413025, + "learning_rate": 4.0050000000000004e-05, + "log_odds_chosen": 0.29335591197013855, + "log_odds_ratio": -0.5916770100593567, + "logits/chosen": 0.7340425252914429, + "logits/rejected": 1.0187065601348877, + "logps/chosen": -1.0739442110061646, + "logps/rejected": -1.1977989673614502, + "loss": 0.7556, + "nll_loss": 0.6964027881622314, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10739442706108093, + "rewards/margins": 0.012385478243231773, + "rewards/rejected": -0.11977989971637726, + "step": 199 + }, + { + "epoch": 0.12441679626749612, + "grad_norm": 0.7085748910903931, + "learning_rate": 4e-05, + "log_odds_chosen": -0.2727814018726349, + "log_odds_ratio": -0.9787805676460266, + "logits/chosen": 1.3356515169143677, + "logits/rejected": 2.2699167728424072, + "logps/chosen": -1.482822060585022, + "logps/rejected": -1.2205945253372192, + "loss": 0.8284, + "nll_loss": 0.7305508852005005, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1482822149991989, + "rewards/margins": -0.02622275799512863, + "rewards/rejected": -0.12205944955348969, + "step": 200 + }, + { + "epoch": 0.12503888024883358, + "grad_norm": 0.31775179505348206, + "learning_rate": 3.995e-05, + "log_odds_chosen": -0.02565819025039673, + "log_odds_ratio": -0.7218791842460632, + "logits/chosen": 1.1910040378570557, + "logits/rejected": 2.9965484142303467, + "logps/chosen": -1.1026508808135986, + "logps/rejected": -1.0807952880859375, + "loss": 0.7445, + "nll_loss": 0.6723363399505615, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1102650910615921, + "rewards/margins": -0.002185564488172531, + "rewards/rejected": -0.10807952284812927, + "step": 201 + }, + { + "epoch": 0.12566096423017106, + "grad_norm": 0.21708033978939056, + "learning_rate": 3.99e-05, + "log_odds_chosen": 0.4155348539352417, + "log_odds_ratio": -0.6038289070129395, + "logits/chosen": 0.3837054967880249, + "logits/rejected": 1.7948483228683472, + "logps/chosen": -1.0251164436340332, + "logps/rejected": -1.305844783782959, + "loss": 0.6265, + "nll_loss": 0.5660817623138428, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10251164436340332, + "rewards/margins": 0.028072843328118324, + "rewards/rejected": -0.1305844783782959, + "step": 202 + }, + { + "epoch": 0.12628304821150854, + "grad_norm": 1.0549097061157227, + "learning_rate": 3.9850000000000006e-05, + "log_odds_chosen": 0.11141453683376312, + "log_odds_ratio": -0.6722193956375122, + "logits/chosen": 1.9163165092468262, + "logits/rejected": 2.140613555908203, + "logps/chosen": -0.9174782633781433, + "logps/rejected": -0.9806128144264221, + "loss": 0.8362, + "nll_loss": 0.7690044045448303, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09174783527851105, + "rewards/margins": 0.00631345110014081, + "rewards/rejected": -0.09806128591299057, + "step": 203 + }, + { + "epoch": 0.12690513219284602, + "grad_norm": 0.2251633256673813, + "learning_rate": 3.9800000000000005e-05, + "log_odds_chosen": 0.6894951462745667, + "log_odds_ratio": -0.4621826410293579, + "logits/chosen": -0.8240604996681213, + "logits/rejected": 2.079634189605713, + "logps/chosen": -0.9713005423545837, + "logps/rejected": -1.3288459777832031, + "loss": 0.4771, + "nll_loss": 0.43083181977272034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09713006019592285, + "rewards/margins": 0.03575454279780388, + "rewards/rejected": -0.13288459181785583, + "step": 204 + }, + { + "epoch": 0.12752721617418353, + "grad_norm": 0.40125712752342224, + "learning_rate": 3.9750000000000004e-05, + "log_odds_chosen": 0.03986138850450516, + "log_odds_ratio": -0.8418188691139221, + "logits/chosen": 2.9345784187316895, + "logits/rejected": 3.9708542823791504, + "logps/chosen": -1.3259272575378418, + "logps/rejected": -1.2527263164520264, + "loss": 0.9907, + "nll_loss": 0.9065566062927246, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13259272277355194, + "rewards/margins": -0.0073200855404138565, + "rewards/rejected": -0.12527263164520264, + "step": 205 + }, + { + "epoch": 0.128149300155521, + "grad_norm": 0.2802586257457733, + "learning_rate": 3.97e-05, + "log_odds_chosen": 0.718868613243103, + "log_odds_ratio": -0.4649730622768402, + "logits/chosen": 0.6591233611106873, + "logits/rejected": 1.1603331565856934, + "logps/chosen": -0.8362342715263367, + "logps/rejected": -1.1428916454315186, + "loss": 0.7012, + "nll_loss": 0.6546894907951355, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08362342417240143, + "rewards/margins": 0.030665744096040726, + "rewards/rejected": -0.11428917199373245, + "step": 206 + }, + { + "epoch": 0.12877138413685849, + "grad_norm": 0.3060365915298462, + "learning_rate": 3.965e-05, + "log_odds_chosen": 0.21121090650558472, + "log_odds_ratio": -0.6019690036773682, + "logits/chosen": 2.085531234741211, + "logits/rejected": 3.4953126907348633, + "logps/chosen": -0.9660547971725464, + "logps/rejected": -1.1018478870391846, + "loss": 0.7583, + "nll_loss": 0.6981133222579956, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09660547971725464, + "rewards/margins": 0.013579306192696095, + "rewards/rejected": -0.11018478870391846, + "step": 207 + }, + { + "epoch": 0.12939346811819596, + "grad_norm": 0.3177313208580017, + "learning_rate": 3.960000000000001e-05, + "log_odds_chosen": 0.4785788357257843, + "log_odds_ratio": -0.5458232164382935, + "logits/chosen": 2.354724645614624, + "logits/rejected": 3.253253936767578, + "logps/chosen": -1.0346095561981201, + "logps/rejected": -1.3615843057632446, + "loss": 0.7659, + "nll_loss": 0.7113272547721863, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10346096754074097, + "rewards/margins": 0.03269747272133827, + "rewards/rejected": -0.13615843653678894, + "step": 208 + }, + { + "epoch": 0.13001555209953344, + "grad_norm": 0.299040287733078, + "learning_rate": 3.9550000000000006e-05, + "log_odds_chosen": 0.5891073942184448, + "log_odds_ratio": -0.5249563455581665, + "logits/chosen": 1.062352180480957, + "logits/rejected": 1.9807929992675781, + "logps/chosen": -0.889495849609375, + "logps/rejected": -1.2828330993652344, + "loss": 0.6767, + "nll_loss": 0.6242160201072693, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08894958347082138, + "rewards/margins": 0.03933371603488922, + "rewards/rejected": -0.1282833069562912, + "step": 209 + }, + { + "epoch": 0.13063763608087092, + "grad_norm": 0.25538620352745056, + "learning_rate": 3.9500000000000005e-05, + "log_odds_chosen": 0.03600820153951645, + "log_odds_ratio": -0.706552267074585, + "logits/chosen": 0.718265175819397, + "logits/rejected": 1.6833255290985107, + "logps/chosen": -1.0343286991119385, + "logps/rejected": -1.0732911825180054, + "loss": 0.6782, + "nll_loss": 0.6075564026832581, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10343287885189056, + "rewards/margins": 0.0038962452672421932, + "rewards/rejected": -0.1073291227221489, + "step": 210 + }, + { + "epoch": 0.1312597200622084, + "grad_norm": 0.2507280111312866, + "learning_rate": 3.9450000000000003e-05, + "log_odds_chosen": 0.43304914236068726, + "log_odds_ratio": -0.5274550914764404, + "logits/chosen": 0.4559652507305145, + "logits/rejected": 1.6874964237213135, + "logps/chosen": -0.9379274845123291, + "logps/rejected": -1.214919090270996, + "loss": 0.644, + "nll_loss": 0.5912134647369385, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09379275143146515, + "rewards/margins": 0.027699150145053864, + "rewards/rejected": -0.12149190157651901, + "step": 211 + }, + { + "epoch": 0.13188180404354588, + "grad_norm": 0.23815755546092987, + "learning_rate": 3.94e-05, + "log_odds_chosen": 0.2283017784357071, + "log_odds_ratio": -0.6008410453796387, + "logits/chosen": 1.0329132080078125, + "logits/rejected": 1.2031893730163574, + "logps/chosen": -1.1192843914031982, + "logps/rejected": -1.2944506406784058, + "loss": 0.8059, + "nll_loss": 0.7458454370498657, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11192844063043594, + "rewards/margins": 0.01751662604510784, + "rewards/rejected": -0.12944507598876953, + "step": 212 + }, + { + "epoch": 0.13250388802488336, + "grad_norm": 0.3345191478729248, + "learning_rate": 3.935e-05, + "log_odds_chosen": 0.5041581392288208, + "log_odds_ratio": -0.5905402302742004, + "logits/chosen": 1.7287249565124512, + "logits/rejected": 1.066641092300415, + "logps/chosen": -1.1445698738098145, + "logps/rejected": -1.4820538759231567, + "loss": 0.7486, + "nll_loss": 0.6895924210548401, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11445698887109756, + "rewards/margins": 0.03374838829040527, + "rewards/rejected": -0.14820538461208344, + "step": 213 + }, + { + "epoch": 0.13312597200622084, + "grad_norm": 0.25710660219192505, + "learning_rate": 3.9300000000000007e-05, + "log_odds_chosen": -0.0680234357714653, + "log_odds_ratio": -0.7658126354217529, + "logits/chosen": 1.8002554178237915, + "logits/rejected": 3.2470219135284424, + "logps/chosen": -1.1491390466690063, + "logps/rejected": -1.102350115776062, + "loss": 0.8621, + "nll_loss": 0.785519003868103, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11491391062736511, + "rewards/margins": -0.004678888246417046, + "rewards/rejected": -0.11023502051830292, + "step": 214 + }, + { + "epoch": 0.13374805598755832, + "grad_norm": 0.356227308511734, + "learning_rate": 3.9250000000000005e-05, + "log_odds_chosen": -0.18295426666736603, + "log_odds_ratio": -0.860011875629425, + "logits/chosen": 1.2315165996551514, + "logits/rejected": 2.66998291015625, + "logps/chosen": -1.4245922565460205, + "logps/rejected": -1.23429536819458, + "loss": 0.7234, + "nll_loss": 0.63743656873703, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1424592286348343, + "rewards/margins": -0.019029691815376282, + "rewards/rejected": -0.1234295442700386, + "step": 215 + }, + { + "epoch": 0.1343701399688958, + "grad_norm": 0.2328234314918518, + "learning_rate": 3.9200000000000004e-05, + "log_odds_chosen": 0.3651959002017975, + "log_odds_ratio": -0.5772303342819214, + "logits/chosen": -0.4574909210205078, + "logits/rejected": 0.5566681623458862, + "logps/chosen": -0.9843193292617798, + "logps/rejected": -1.1436246633529663, + "loss": 0.5471, + "nll_loss": 0.4893374741077423, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09843192994594574, + "rewards/margins": 0.01593054085969925, + "rewards/rejected": -0.11436247825622559, + "step": 216 + }, + { + "epoch": 0.13499222395023328, + "grad_norm": 0.2810547947883606, + "learning_rate": 3.915e-05, + "log_odds_chosen": 0.579887330532074, + "log_odds_ratio": -0.5066784620285034, + "logits/chosen": 1.3967379331588745, + "logits/rejected": 1.9853203296661377, + "logps/chosen": -0.9148995280265808, + "logps/rejected": -1.2805899381637573, + "loss": 0.786, + "nll_loss": 0.7353231310844421, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09148994833230972, + "rewards/margins": 0.03656903654336929, + "rewards/rejected": -0.1280589997768402, + "step": 217 + }, + { + "epoch": 0.13561430793157075, + "grad_norm": 0.32773271203041077, + "learning_rate": 3.91e-05, + "log_odds_chosen": 1.0615527629852295, + "log_odds_ratio": -0.4024091958999634, + "logits/chosen": 1.4982575178146362, + "logits/rejected": 2.0108492374420166, + "logps/chosen": -0.6647369861602783, + "logps/rejected": -1.3382258415222168, + "loss": 0.6722, + "nll_loss": 0.6319430470466614, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.06647370010614395, + "rewards/margins": 0.0673488974571228, + "rewards/rejected": -0.13382260501384735, + "step": 218 + }, + { + "epoch": 0.13623639191290823, + "grad_norm": 0.2670206129550934, + "learning_rate": 3.905e-05, + "log_odds_chosen": 0.33397001028060913, + "log_odds_ratio": -0.5816282033920288, + "logits/chosen": 0.1389634609222412, + "logits/rejected": 1.4346809387207031, + "logps/chosen": -1.0145219564437866, + "logps/rejected": -1.2008839845657349, + "loss": 0.5609, + "nll_loss": 0.502739429473877, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10145218670368195, + "rewards/margins": 0.01863621175289154, + "rewards/rejected": -0.12008840590715408, + "step": 219 + }, + { + "epoch": 0.1368584758942457, + "grad_norm": 14.104445457458496, + "learning_rate": 3.9000000000000006e-05, + "log_odds_chosen": -0.10747827589511871, + "log_odds_ratio": -0.845554530620575, + "logits/chosen": 1.2553915977478027, + "logits/rejected": 2.3911399841308594, + "logps/chosen": -1.141734004020691, + "logps/rejected": -1.053473711013794, + "loss": 1.0518, + "nll_loss": 0.967269778251648, + "rewards/accuracies": 0.125, + "rewards/chosen": -0.11417339742183685, + "rewards/margins": -0.00882603321224451, + "rewards/rejected": -0.10534737259149551, + "step": 220 + }, + { + "epoch": 0.1374805598755832, + "grad_norm": 0.25021231174468994, + "learning_rate": 3.8950000000000005e-05, + "log_odds_chosen": 0.47089827060699463, + "log_odds_ratio": -0.5921870470046997, + "logits/chosen": 2.0083541870117188, + "logits/rejected": 3.1078169345855713, + "logps/chosen": -0.9033266305923462, + "logps/rejected": -1.06514310836792, + "loss": 0.839, + "nll_loss": 0.7797623872756958, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09033266454935074, + "rewards/margins": 0.016181640326976776, + "rewards/rejected": -0.10651430487632751, + "step": 221 + }, + { + "epoch": 0.13810264385692067, + "grad_norm": 10.2030611038208, + "learning_rate": 3.8900000000000004e-05, + "log_odds_chosen": 0.5311214327812195, + "log_odds_ratio": -0.5039387941360474, + "logits/chosen": 0.4797963500022888, + "logits/rejected": 2.54323410987854, + "logps/chosen": -0.9488534331321716, + "logps/rejected": -1.3131181001663208, + "loss": 0.6864, + "nll_loss": 0.6359692811965942, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09488534927368164, + "rewards/margins": 0.03642646223306656, + "rewards/rejected": -0.1313118040561676, + "step": 222 + }, + { + "epoch": 0.13872472783825818, + "grad_norm": 2.635735511779785, + "learning_rate": 3.885e-05, + "log_odds_chosen": 0.4188484251499176, + "log_odds_ratio": -0.5459132194519043, + "logits/chosen": 1.5027981996536255, + "logits/rejected": 2.377605676651001, + "logps/chosen": -0.9653737545013428, + "logps/rejected": -1.2187042236328125, + "loss": 0.792, + "nll_loss": 0.7373826503753662, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09653738141059875, + "rewards/margins": 0.025333039462566376, + "rewards/rejected": -0.12187041342258453, + "step": 223 + }, + { + "epoch": 0.13934681181959566, + "grad_norm": 0.4424149692058563, + "learning_rate": 3.88e-05, + "log_odds_chosen": 0.8767533898353577, + "log_odds_ratio": -0.5110883116722107, + "logits/chosen": -1.5340461730957031, + "logits/rejected": 1.1958626508712769, + "logps/chosen": -0.9010230898857117, + "logps/rejected": -1.3352127075195312, + "loss": 0.4859, + "nll_loss": 0.43478310108184814, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09010231494903564, + "rewards/margins": 0.043418966233730316, + "rewards/rejected": -0.13352127373218536, + "step": 224 + }, + { + "epoch": 0.13996889580093314, + "grad_norm": 0.6511993408203125, + "learning_rate": 3.875e-05, + "log_odds_chosen": 0.3861117959022522, + "log_odds_ratio": -0.5581840872764587, + "logits/chosen": -0.923552930355072, + "logits/rejected": 1.8610806465148926, + "logps/chosen": -1.0786206722259521, + "logps/rejected": -1.2818100452423096, + "loss": 0.5934, + "nll_loss": 0.537615180015564, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10786207020282745, + "rewards/margins": 0.02031894586980343, + "rewards/rejected": -0.12818101048469543, + "step": 225 + }, + { + "epoch": 0.14059097978227061, + "grad_norm": 0.24201221764087677, + "learning_rate": 3.8700000000000006e-05, + "log_odds_chosen": 0.5091780424118042, + "log_odds_ratio": -0.5338168144226074, + "logits/chosen": 1.6636664867401123, + "logits/rejected": 2.6963648796081543, + "logps/chosen": -0.8673657178878784, + "logps/rejected": -1.1037362813949585, + "loss": 0.7649, + "nll_loss": 0.7115618586540222, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08673657476902008, + "rewards/margins": 0.023637056350708008, + "rewards/rejected": -0.11037363111972809, + "step": 226 + }, + { + "epoch": 0.1412130637636081, + "grad_norm": 0.25063690543174744, + "learning_rate": 3.8650000000000004e-05, + "log_odds_chosen": 0.5832358598709106, + "log_odds_ratio": -0.5092697739601135, + "logits/chosen": -0.03008924424648285, + "logits/rejected": 0.8798378705978394, + "logps/chosen": -0.8644341230392456, + "logps/rejected": -1.1819239854812622, + "loss": 0.6909, + "nll_loss": 0.6399303674697876, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08644340932369232, + "rewards/margins": 0.031748995184898376, + "rewards/rejected": -0.1181924045085907, + "step": 227 + }, + { + "epoch": 0.14183514774494557, + "grad_norm": 0.24055233597755432, + "learning_rate": 3.86e-05, + "log_odds_chosen": 1.0738061666488647, + "log_odds_ratio": -0.3834771513938904, + "logits/chosen": 1.553779125213623, + "logits/rejected": 2.292296886444092, + "logps/chosen": -0.8580217361450195, + "logps/rejected": -1.567826509475708, + "loss": 0.8033, + "nll_loss": 0.7649025917053223, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08580217510461807, + "rewards/margins": 0.07098047435283661, + "rewards/rejected": -0.15678265690803528, + "step": 228 + }, + { + "epoch": 0.14245723172628305, + "grad_norm": 0.25163745880126953, + "learning_rate": 3.855e-05, + "log_odds_chosen": 0.2982984483242035, + "log_odds_ratio": -0.5847681164741516, + "logits/chosen": 0.5120735764503479, + "logits/rejected": 2.372066020965576, + "logps/chosen": -0.9295814037322998, + "logps/rejected": -1.0953707695007324, + "loss": 0.6999, + "nll_loss": 0.641383171081543, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09295813739299774, + "rewards/margins": 0.01657894067466259, + "rewards/rejected": -0.10953707993030548, + "step": 229 + }, + { + "epoch": 0.14307931570762053, + "grad_norm": 0.23865918815135956, + "learning_rate": 3.85e-05, + "log_odds_chosen": 0.5459116697311401, + "log_odds_ratio": -0.5268529653549194, + "logits/chosen": 1.0045875310897827, + "logits/rejected": 2.660670042037964, + "logps/chosen": -0.8819479942321777, + "logps/rejected": -1.2443805932998657, + "loss": 0.6868, + "nll_loss": 0.6341012120246887, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08819480240345001, + "rewards/margins": 0.036243267357349396, + "rewards/rejected": -0.12443806976079941, + "step": 230 + }, + { + "epoch": 0.143701399688958, + "grad_norm": 0.26255959272384644, + "learning_rate": 3.845e-05, + "log_odds_chosen": -0.15134698152542114, + "log_odds_ratio": -0.779100239276886, + "logits/chosen": 0.3234993517398834, + "logits/rejected": 1.570976734161377, + "logps/chosen": -1.0752030611038208, + "logps/rejected": -0.9749929904937744, + "loss": 0.6795, + "nll_loss": 0.601568341255188, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.10752031207084656, + "rewards/margins": -0.010021003894507885, + "rewards/rejected": -0.0974992960691452, + "step": 231 + }, + { + "epoch": 0.1443234836702955, + "grad_norm": 0.29671376943588257, + "learning_rate": 3.8400000000000005e-05, + "log_odds_chosen": 0.5076391696929932, + "log_odds_ratio": -0.5193696618080139, + "logits/chosen": 0.057988643646240234, + "logits/rejected": 1.8262519836425781, + "logps/chosen": -1.0712543725967407, + "logps/rejected": -1.4361586570739746, + "loss": 0.5648, + "nll_loss": 0.5128502249717712, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10712544620037079, + "rewards/margins": 0.036490414291620255, + "rewards/rejected": -0.14361585676670074, + "step": 232 + }, + { + "epoch": 0.14494556765163297, + "grad_norm": 0.2637360990047455, + "learning_rate": 3.8350000000000004e-05, + "log_odds_chosen": 0.7366672158241272, + "log_odds_ratio": -0.46905070543289185, + "logits/chosen": 1.854955792427063, + "logits/rejected": 2.6175291538238525, + "logps/chosen": -0.9328523874282837, + "logps/rejected": -1.3441451787948608, + "loss": 0.8956, + "nll_loss": 0.8487153053283691, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09328524768352509, + "rewards/margins": 0.04112928360700607, + "rewards/rejected": -0.13441452383995056, + "step": 233 + }, + { + "epoch": 0.14556765163297045, + "grad_norm": 0.49430274963378906, + "learning_rate": 3.83e-05, + "log_odds_chosen": -0.25712642073631287, + "log_odds_ratio": -0.8602423667907715, + "logits/chosen": 1.4578043222427368, + "logits/rejected": 1.9011411666870117, + "logps/chosen": -1.434701919555664, + "logps/rejected": -1.2270002365112305, + "loss": 0.7861, + "nll_loss": 0.7000505924224854, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.14347019791603088, + "rewards/margins": -0.020770173519849777, + "rewards/rejected": -0.12270002067089081, + "step": 234 + }, + { + "epoch": 0.14618973561430793, + "grad_norm": 0.3046986758708954, + "learning_rate": 3.825e-05, + "log_odds_chosen": 0.23919163644313812, + "log_odds_ratio": -0.627653181552887, + "logits/chosen": 0.6866488456726074, + "logits/rejected": 0.5810664296150208, + "logps/chosen": -0.9853194952011108, + "logps/rejected": -1.1365830898284912, + "loss": 0.6872, + "nll_loss": 0.6244243383407593, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09853194653987885, + "rewards/margins": 0.015126367099583149, + "rewards/rejected": -0.11365832388401031, + "step": 235 + }, + { + "epoch": 0.1468118195956454, + "grad_norm": 0.25227728486061096, + "learning_rate": 3.82e-05, + "log_odds_chosen": 0.22255516052246094, + "log_odds_ratio": -0.6572456359863281, + "logits/chosen": 0.5956082940101624, + "logits/rejected": 1.849258303642273, + "logps/chosen": -0.964582085609436, + "logps/rejected": -1.040386438369751, + "loss": 0.7036, + "nll_loss": 0.6379085779190063, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.09645821154117584, + "rewards/margins": 0.007580441422760487, + "rewards/rejected": -0.10403865575790405, + "step": 236 + }, + { + "epoch": 0.14743390357698288, + "grad_norm": 0.2732040584087372, + "learning_rate": 3.8150000000000006e-05, + "log_odds_chosen": 0.020949020981788635, + "log_odds_ratio": -0.704890251159668, + "logits/chosen": 0.8338685631752014, + "logits/rejected": 2.4033875465393066, + "logps/chosen": -1.0415844917297363, + "logps/rejected": -1.0674909353256226, + "loss": 0.7705, + "nll_loss": 0.6999964714050293, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1041584387421608, + "rewards/margins": 0.0025906474329531193, + "rewards/rejected": -0.10674908757209778, + "step": 237 + }, + { + "epoch": 0.14805598755832036, + "grad_norm": 0.3222413957118988, + "learning_rate": 3.8100000000000005e-05, + "log_odds_chosen": 0.291021466255188, + "log_odds_ratio": -0.6350386738777161, + "logits/chosen": 0.5952332019805908, + "logits/rejected": 1.7890355587005615, + "logps/chosen": -1.1999800205230713, + "logps/rejected": -1.4598280191421509, + "loss": 0.6637, + "nll_loss": 0.6002064943313599, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1199980080127716, + "rewards/margins": 0.02598479948937893, + "rewards/rejected": -0.1459828019142151, + "step": 238 + }, + { + "epoch": 0.14867807153965784, + "grad_norm": 0.29912009835243225, + "learning_rate": 3.805e-05, + "log_odds_chosen": 0.09837212413549423, + "log_odds_ratio": -0.6721018552780151, + "logits/chosen": 0.2817448377609253, + "logits/rejected": 1.6711862087249756, + "logps/chosen": -1.1941783428192139, + "logps/rejected": -1.2375702857971191, + "loss": 0.7284, + "nll_loss": 0.6612299084663391, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11941783875226974, + "rewards/margins": 0.004339195322245359, + "rewards/rejected": -0.1237570270895958, + "step": 239 + }, + { + "epoch": 0.14930015552099535, + "grad_norm": 0.22214223444461823, + "learning_rate": 3.8e-05, + "log_odds_chosen": 1.0463130474090576, + "log_odds_ratio": -0.42685216665267944, + "logits/chosen": -0.5133315324783325, + "logits/rejected": 0.24643856287002563, + "logps/chosen": -0.7221020460128784, + "logps/rejected": -1.3742396831512451, + "loss": 0.5453, + "nll_loss": 0.5026174187660217, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07221020758152008, + "rewards/margins": 0.06521373987197876, + "rewards/rejected": -0.13742394745349884, + "step": 240 + }, + { + "epoch": 0.14992223950233283, + "grad_norm": 0.3170778751373291, + "learning_rate": 3.795e-05, + "log_odds_chosen": 0.5840449929237366, + "log_odds_ratio": -0.5006461143493652, + "logits/chosen": 1.4450798034667969, + "logits/rejected": 1.941690444946289, + "logps/chosen": -1.015654444694519, + "logps/rejected": -1.3106660842895508, + "loss": 0.6944, + "nll_loss": 0.644288182258606, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10156545042991638, + "rewards/margins": 0.029501162469387054, + "rewards/rejected": -0.13106660544872284, + "step": 241 + }, + { + "epoch": 0.1505443234836703, + "grad_norm": 0.39447447657585144, + "learning_rate": 3.79e-05, + "log_odds_chosen": 0.01272672414779663, + "log_odds_ratio": -0.7251988649368286, + "logits/chosen": 1.618621826171875, + "logits/rejected": 1.5481436252593994, + "logps/chosen": -0.9903098940849304, + "logps/rejected": -1.0202150344848633, + "loss": 0.7432, + "nll_loss": 0.6706516742706299, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.099031001329422, + "rewards/margins": 0.0029905084520578384, + "rewards/rejected": -0.10202150046825409, + "step": 242 + }, + { + "epoch": 0.15116640746500778, + "grad_norm": 0.24457427859306335, + "learning_rate": 3.7850000000000005e-05, + "log_odds_chosen": 0.30422642827033997, + "log_odds_ratio": -0.5725257396697998, + "logits/chosen": -0.17796938121318817, + "logits/rejected": 1.5813944339752197, + "logps/chosen": -1.0886629819869995, + "logps/rejected": -1.2814066410064697, + "loss": 0.6392, + "nll_loss": 0.5819449424743652, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10886628925800323, + "rewards/margins": 0.019274361431598663, + "rewards/rejected": -0.1281406581401825, + "step": 243 + }, + { + "epoch": 0.15178849144634526, + "grad_norm": 0.33944302797317505, + "learning_rate": 3.7800000000000004e-05, + "log_odds_chosen": 0.7016199231147766, + "log_odds_ratio": -0.5355318784713745, + "logits/chosen": 2.0018675327301025, + "logits/rejected": 1.7284493446350098, + "logps/chosen": -0.9196906685829163, + "logps/rejected": -1.4196425676345825, + "loss": 0.7753, + "nll_loss": 0.7217522263526917, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0919690728187561, + "rewards/margins": 0.049995195120573044, + "rewards/rejected": -0.14196425676345825, + "step": 244 + }, + { + "epoch": 0.15241057542768274, + "grad_norm": 0.23365607857704163, + "learning_rate": 3.775e-05, + "log_odds_chosen": 0.09530195593833923, + "log_odds_ratio": -0.6637953519821167, + "logits/chosen": 1.1631795167922974, + "logits/rejected": 1.794580101966858, + "logps/chosen": -1.027880072593689, + "logps/rejected": -1.0927073955535889, + "loss": 0.7318, + "nll_loss": 0.6654148697853088, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.10278801620006561, + "rewards/margins": 0.0064827194437384605, + "rewards/rejected": -0.10927073657512665, + "step": 245 + }, + { + "epoch": 0.15303265940902022, + "grad_norm": 0.342902272939682, + "learning_rate": 3.77e-05, + "log_odds_chosen": 1.1789519786834717, + "log_odds_ratio": -0.3561103940010071, + "logits/chosen": 0.7682339549064636, + "logits/rejected": 1.2601045370101929, + "logps/chosen": -0.6911965608596802, + "logps/rejected": -1.3815280199050903, + "loss": 0.6737, + "nll_loss": 0.6381384134292603, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0691196545958519, + "rewards/margins": 0.06903316080570221, + "rewards/rejected": -0.1381528079509735, + "step": 246 + }, + { + "epoch": 0.1536547433903577, + "grad_norm": 0.40401729941368103, + "learning_rate": 3.765e-05, + "log_odds_chosen": 0.05288776755332947, + "log_odds_ratio": -0.7165863513946533, + "logits/chosen": 1.2672884464263916, + "logits/rejected": 2.7773818969726562, + "logps/chosen": -0.8994283676147461, + "logps/rejected": -0.9454131722450256, + "loss": 0.7608, + "nll_loss": 0.6891713738441467, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.08994284272193909, + "rewards/margins": 0.004598475992679596, + "rewards/rejected": -0.09454131126403809, + "step": 247 + }, + { + "epoch": 0.15427682737169518, + "grad_norm": 0.37135761976242065, + "learning_rate": 3.76e-05, + "log_odds_chosen": 0.08654891699552536, + "log_odds_ratio": -0.7375810146331787, + "logits/chosen": 1.4379180669784546, + "logits/rejected": 2.5666956901550293, + "logps/chosen": -0.9742010831832886, + "logps/rejected": -1.0682315826416016, + "loss": 0.7763, + "nll_loss": 0.7025222182273865, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0974201112985611, + "rewards/margins": 0.009403041563928127, + "rewards/rejected": -0.1068231463432312, + "step": 248 + }, + { + "epoch": 0.15489891135303266, + "grad_norm": 0.43483659625053406, + "learning_rate": 3.7550000000000005e-05, + "log_odds_chosen": 0.4684327244758606, + "log_odds_ratio": -0.6200071573257446, + "logits/chosen": 1.2784299850463867, + "logits/rejected": 2.3502230644226074, + "logps/chosen": -0.9977530837059021, + "logps/rejected": -1.2462825775146484, + "loss": 0.8474, + "nll_loss": 0.7854040265083313, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09977530688047409, + "rewards/margins": 0.024852953851222992, + "rewards/rejected": -0.12462826073169708, + "step": 249 + }, + { + "epoch": 0.15552099533437014, + "grad_norm": 0.29905253648757935, + "learning_rate": 3.7500000000000003e-05, + "log_odds_chosen": 1.0909241437911987, + "log_odds_ratio": -0.4056417644023895, + "logits/chosen": 3.0120909214019775, + "logits/rejected": 3.5190353393554688, + "logps/chosen": -0.7593393325805664, + "logps/rejected": -1.3986889123916626, + "loss": 0.9261, + "nll_loss": 0.8855429291725159, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07593393325805664, + "rewards/margins": 0.06393495947122574, + "rewards/rejected": -0.13986890017986298, + "step": 250 + }, + { + "epoch": 0.15614307931570762, + "grad_norm": 0.27395838499069214, + "learning_rate": 3.745e-05, + "log_odds_chosen": 0.778897225856781, + "log_odds_ratio": -0.4891205430030823, + "logits/chosen": 1.333661437034607, + "logits/rejected": 2.29944109916687, + "logps/chosen": -0.8269086480140686, + "logps/rejected": -1.2990074157714844, + "loss": 0.7312, + "nll_loss": 0.6823058128356934, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08269086480140686, + "rewards/margins": 0.04720987379550934, + "rewards/rejected": -0.1299007385969162, + "step": 251 + }, + { + "epoch": 0.1567651632970451, + "grad_norm": 0.24890701472759247, + "learning_rate": 3.74e-05, + "log_odds_chosen": 0.9393471479415894, + "log_odds_ratio": -0.5004830360412598, + "logits/chosen": 1.544869065284729, + "logits/rejected": 1.4583313465118408, + "logps/chosen": -1.0949639081954956, + "logps/rejected": -1.7828260660171509, + "loss": 0.7394, + "nll_loss": 0.689353883266449, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10949639976024628, + "rewards/margins": 0.06878622621297836, + "rewards/rejected": -0.17828263342380524, + "step": 252 + }, + { + "epoch": 0.15738724727838257, + "grad_norm": 0.23351988196372986, + "learning_rate": 3.735e-05, + "log_odds_chosen": 0.4725225269794464, + "log_odds_ratio": -0.5411194562911987, + "logits/chosen": 0.5843527317047119, + "logits/rejected": 1.433046579360962, + "logps/chosen": -1.076469898223877, + "logps/rejected": -1.3948270082473755, + "loss": 0.6985, + "nll_loss": 0.6443914175033569, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10764698684215546, + "rewards/margins": 0.03183571621775627, + "rewards/rejected": -0.13948270678520203, + "step": 253 + }, + { + "epoch": 0.15800933125972005, + "grad_norm": 0.21208809316158295, + "learning_rate": 3.73e-05, + "log_odds_chosen": 0.7200300693511963, + "log_odds_ratio": -0.4416605830192566, + "logits/chosen": 0.6532263159751892, + "logits/rejected": 2.229447364807129, + "logps/chosen": -0.8342301249504089, + "logps/rejected": -1.3484030961990356, + "loss": 0.6562, + "nll_loss": 0.6120399236679077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08342301100492477, + "rewards/margins": 0.05141729116439819, + "rewards/rejected": -0.13484030961990356, + "step": 254 + }, + { + "epoch": 0.15863141524105753, + "grad_norm": 0.5676325559616089, + "learning_rate": 3.7250000000000004e-05, + "log_odds_chosen": 0.865597128868103, + "log_odds_ratio": -0.45989227294921875, + "logits/chosen": 0.8128045797348022, + "logits/rejected": 2.4521899223327637, + "logps/chosen": -0.9576820135116577, + "logps/rejected": -1.3842835426330566, + "loss": 0.6939, + "nll_loss": 0.6479240655899048, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09576819837093353, + "rewards/margins": 0.04266016185283661, + "rewards/rejected": -0.13842836022377014, + "step": 255 + }, + { + "epoch": 0.159253499222395, + "grad_norm": 0.2591799199581146, + "learning_rate": 3.72e-05, + "log_odds_chosen": 0.2194564789533615, + "log_odds_ratio": -0.6655052900314331, + "logits/chosen": 1.342261791229248, + "logits/rejected": 2.7451820373535156, + "logps/chosen": -1.106384038925171, + "logps/rejected": -1.2910670042037964, + "loss": 0.8368, + "nll_loss": 0.7702622413635254, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11063840985298157, + "rewards/margins": 0.018468298017978668, + "rewards/rejected": -0.12910671532154083, + "step": 256 + }, + { + "epoch": 0.1598755832037325, + "grad_norm": 0.2928728759288788, + "learning_rate": 3.715e-05, + "log_odds_chosen": 0.38483601808547974, + "log_odds_ratio": -0.5389297008514404, + "logits/chosen": 0.9790713787078857, + "logits/rejected": 1.9639866352081299, + "logps/chosen": -0.9232936501502991, + "logps/rejected": -1.1887941360473633, + "loss": 0.6545, + "nll_loss": 0.6006432771682739, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09232936054468155, + "rewards/margins": 0.026550058275461197, + "rewards/rejected": -0.11887942254543304, + "step": 257 + }, + { + "epoch": 0.16049766718507, + "grad_norm": 0.3074938654899597, + "learning_rate": 3.71e-05, + "log_odds_chosen": -0.0491148866713047, + "log_odds_ratio": -0.7412834763526917, + "logits/chosen": 1.0462923049926758, + "logits/rejected": 1.831377625465393, + "logps/chosen": -1.1442368030548096, + "logps/rejected": -1.0997331142425537, + "loss": 0.7083, + "nll_loss": 0.6341397166252136, + "rewards/accuracies": 0.25, + "rewards/chosen": -0.11442368477582932, + "rewards/margins": -0.004450378008186817, + "rewards/rejected": -0.10997331142425537, + "step": 258 + }, + { + "epoch": 0.16111975116640748, + "grad_norm": 0.3968244791030884, + "learning_rate": 3.705e-05, + "log_odds_chosen": 0.7382861971855164, + "log_odds_ratio": -0.4755243957042694, + "logits/chosen": 0.9706071615219116, + "logits/rejected": 1.7821481227874756, + "logps/chosen": -0.8874275088310242, + "logps/rejected": -1.2820873260498047, + "loss": 0.691, + "nll_loss": 0.643477201461792, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08874274790287018, + "rewards/margins": 0.03946599364280701, + "rewards/rejected": -0.12820874154567719, + "step": 259 + }, + { + "epoch": 0.16174183514774496, + "grad_norm": 0.32076671719551086, + "learning_rate": 3.7e-05, + "log_odds_chosen": 0.11305558681488037, + "log_odds_ratio": -0.7279486656188965, + "logits/chosen": 0.5689743757247925, + "logits/rejected": 0.621085524559021, + "logps/chosen": -1.1365184783935547, + "logps/rejected": -1.2172517776489258, + "loss": 0.6489, + "nll_loss": 0.5761117339134216, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11365185678005219, + "rewards/margins": 0.008073337376117706, + "rewards/rejected": -0.1217251867055893, + "step": 260 + }, + { + "epoch": 0.16236391912908243, + "grad_norm": 0.33515191078186035, + "learning_rate": 3.6950000000000004e-05, + "log_odds_chosen": 0.40560805797576904, + "log_odds_ratio": -0.543319821357727, + "logits/chosen": 1.7440519332885742, + "logits/rejected": 2.6206088066101074, + "logps/chosen": -0.9863896369934082, + "logps/rejected": -1.235954761505127, + "loss": 0.7871, + "nll_loss": 0.7327961325645447, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09863896667957306, + "rewards/margins": 0.024956511333584785, + "rewards/rejected": -0.12359548360109329, + "step": 261 + }, + { + "epoch": 0.1629860031104199, + "grad_norm": 0.2292770892381668, + "learning_rate": 3.69e-05, + "log_odds_chosen": 0.2317352145910263, + "log_odds_ratio": -0.5968304872512817, + "logits/chosen": -0.9270865321159363, + "logits/rejected": 1.1997534036636353, + "logps/chosen": -0.9344391226768494, + "logps/rejected": -1.0741225481033325, + "loss": 0.4987, + "nll_loss": 0.438981294631958, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09344391524791718, + "rewards/margins": 0.013968334533274174, + "rewards/rejected": -0.10741224884986877, + "step": 262 + }, + { + "epoch": 0.1636080870917574, + "grad_norm": 0.31699907779693604, + "learning_rate": 3.685e-05, + "log_odds_chosen": 1.1996018886566162, + "log_odds_ratio": -0.30720216035842896, + "logits/chosen": 1.0490620136260986, + "logits/rejected": 0.9187926054000854, + "logps/chosen": -0.8102648258209229, + "logps/rejected": -1.644590973854065, + "loss": 0.6033, + "nll_loss": 0.5725414752960205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08102648705244064, + "rewards/margins": 0.08343260735273361, + "rewards/rejected": -0.16445909440517426, + "step": 263 + }, + { + "epoch": 0.16423017107309487, + "grad_norm": 0.9323865175247192, + "learning_rate": 3.68e-05, + "log_odds_chosen": 0.5835751295089722, + "log_odds_ratio": -0.5283180475234985, + "logits/chosen": 1.8825874328613281, + "logits/rejected": 1.0911766290664673, + "logps/chosen": -1.128019094467163, + "logps/rejected": -1.5025029182434082, + "loss": 0.7552, + "nll_loss": 0.7023845911026001, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11280190944671631, + "rewards/margins": 0.03744838759303093, + "rewards/rejected": -0.15025030076503754, + "step": 264 + }, + { + "epoch": 0.16485225505443235, + "grad_norm": 0.32262593507766724, + "learning_rate": 3.675e-05, + "log_odds_chosen": 0.255434513092041, + "log_odds_ratio": -0.5993427634239197, + "logits/chosen": 0.18540018796920776, + "logits/rejected": 2.5416390895843506, + "logps/chosen": -0.8826195597648621, + "logps/rejected": -1.0138144493103027, + "loss": 0.6066, + "nll_loss": 0.546635091304779, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08826196193695068, + "rewards/margins": 0.013119479641318321, + "rewards/rejected": -0.10138144344091415, + "step": 265 + }, + { + "epoch": 0.16547433903576983, + "grad_norm": 0.3099498748779297, + "learning_rate": 3.6700000000000004e-05, + "log_odds_chosen": 0.4512448310852051, + "log_odds_ratio": -0.556388258934021, + "logits/chosen": -0.3677622079849243, + "logits/rejected": 0.3967688977718353, + "logps/chosen": -1.083742380142212, + "logps/rejected": -1.4190504550933838, + "loss": 0.5988, + "nll_loss": 0.5431588292121887, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10837424546480179, + "rewards/margins": 0.03353080153465271, + "rewards/rejected": -0.1419050395488739, + "step": 266 + }, + { + "epoch": 0.1660964230171073, + "grad_norm": 0.9130797982215881, + "learning_rate": 3.665e-05, + "log_odds_chosen": 0.501509428024292, + "log_odds_ratio": -0.504950761795044, + "logits/chosen": 1.868678092956543, + "logits/rejected": 2.891838312149048, + "logps/chosen": -1.104312539100647, + "logps/rejected": -1.4863413572311401, + "loss": 0.7864, + "nll_loss": 0.7358621954917908, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1104312613606453, + "rewards/margins": 0.03820287063717842, + "rewards/rejected": -0.148634135723114, + "step": 267 + }, + { + "epoch": 0.1667185069984448, + "grad_norm": 0.232061967253685, + "learning_rate": 3.66e-05, + "log_odds_chosen": 0.2719687521457672, + "log_odds_ratio": -0.6023093461990356, + "logits/chosen": 0.2932952642440796, + "logits/rejected": 2.1686325073242188, + "logps/chosen": -1.1295243501663208, + "logps/rejected": -1.3326761722564697, + "loss": 0.6743, + "nll_loss": 0.6140591502189636, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11295244097709656, + "rewards/margins": 0.02031518518924713, + "rewards/rejected": -0.1332676261663437, + "step": 268 + }, + { + "epoch": 0.16734059097978227, + "grad_norm": 1.3609570264816284, + "learning_rate": 3.655e-05, + "log_odds_chosen": -0.1332504153251648, + "log_odds_ratio": -0.774581789970398, + "logits/chosen": 0.7395839095115662, + "logits/rejected": 1.5577596426010132, + "logps/chosen": -1.1454954147338867, + "logps/rejected": -1.0469775199890137, + "loss": 0.7448, + "nll_loss": 0.6673346757888794, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.11454953998327255, + "rewards/margins": -0.009851792827248573, + "rewards/rejected": -0.10469775646924973, + "step": 269 + }, + { + "epoch": 0.16796267496111975, + "grad_norm": 0.2783643901348114, + "learning_rate": 3.65e-05, + "log_odds_chosen": 0.3987334072589874, + "log_odds_ratio": -0.617426872253418, + "logits/chosen": 2.3885960578918457, + "logits/rejected": 3.718479633331299, + "logps/chosen": -1.0800246000289917, + "logps/rejected": -1.39646315574646, + "loss": 0.8736, + "nll_loss": 0.8118734955787659, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10800246894359589, + "rewards/margins": 0.03164386749267578, + "rewards/rejected": -0.13964633643627167, + "step": 270 + }, + { + "epoch": 0.16858475894245722, + "grad_norm": 0.3506051301956177, + "learning_rate": 3.645e-05, + "log_odds_chosen": 0.32452934980392456, + "log_odds_ratio": -0.6055722236633301, + "logits/chosen": 1.9042444229125977, + "logits/rejected": 2.771538257598877, + "logps/chosen": -0.8178438544273376, + "logps/rejected": -1.0214296579360962, + "loss": 0.8372, + "nll_loss": 0.7766833901405334, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.08178438246250153, + "rewards/margins": 0.020358584821224213, + "rewards/rejected": -0.10214296728372574, + "step": 271 + }, + { + "epoch": 0.1692068429237947, + "grad_norm": 0.29172876477241516, + "learning_rate": 3.6400000000000004e-05, + "log_odds_chosen": 0.320917010307312, + "log_odds_ratio": -0.6874103546142578, + "logits/chosen": 1.4236152172088623, + "logits/rejected": 2.6094887256622314, + "logps/chosen": -0.9422950148582458, + "logps/rejected": -1.0288571119308472, + "loss": 0.8041, + "nll_loss": 0.7353872060775757, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09422949701547623, + "rewards/margins": 0.008656204678118229, + "rewards/rejected": -0.10288571566343307, + "step": 272 + }, + { + "epoch": 0.16982892690513218, + "grad_norm": 0.2542218565940857, + "learning_rate": 3.635e-05, + "log_odds_chosen": 0.542289137840271, + "log_odds_ratio": -0.48945939540863037, + "logits/chosen": 1.2661612033843994, + "logits/rejected": 1.9925477504730225, + "logps/chosen": -0.8164982199668884, + "logps/rejected": -1.123006820678711, + "loss": 0.8182, + "nll_loss": 0.7692335247993469, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08164982497692108, + "rewards/margins": 0.03065086528658867, + "rewards/rejected": -0.11230067908763885, + "step": 273 + }, + { + "epoch": 0.17045101088646966, + "grad_norm": 0.2937983274459839, + "learning_rate": 3.63e-05, + "log_odds_chosen": 1.1165152788162231, + "log_odds_ratio": -0.4164004921913147, + "logits/chosen": 1.1048343181610107, + "logits/rejected": 2.457761764526367, + "logps/chosen": -0.7275225520133972, + "logps/rejected": -1.4913136959075928, + "loss": 0.6468, + "nll_loss": 0.6051377058029175, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07275225222110748, + "rewards/margins": 0.07637912034988403, + "rewards/rejected": -0.14913137257099152, + "step": 274 + }, + { + "epoch": 0.17107309486780714, + "grad_norm": 0.3650302290916443, + "learning_rate": 3.625e-05, + "log_odds_chosen": 0.3353332281112671, + "log_odds_ratio": -0.5689051151275635, + "logits/chosen": 2.027620792388916, + "logits/rejected": 3.2360668182373047, + "logps/chosen": -0.9137417078018188, + "logps/rejected": -1.1490848064422607, + "loss": 0.7901, + "nll_loss": 0.7331695556640625, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09137416630983353, + "rewards/margins": 0.02353430539369583, + "rewards/rejected": -0.11490847915410995, + "step": 275 + }, + { + "epoch": 0.17169517884914465, + "grad_norm": 0.2370610237121582, + "learning_rate": 3.62e-05, + "log_odds_chosen": 0.3498604893684387, + "log_odds_ratio": -0.6041584014892578, + "logits/chosen": -0.5574699640274048, + "logits/rejected": 1.5286118984222412, + "logps/chosen": -1.06705641746521, + "logps/rejected": -1.3181275129318237, + "loss": 0.5466, + "nll_loss": 0.48619967699050903, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10670565068721771, + "rewards/margins": 0.025107108056545258, + "rewards/rejected": -0.13181275129318237, + "step": 276 + }, + { + "epoch": 0.17231726283048213, + "grad_norm": 0.2990007996559143, + "learning_rate": 3.615e-05, + "log_odds_chosen": 0.07544267177581787, + "log_odds_ratio": -0.6616002917289734, + "logits/chosen": 2.9491500854492188, + "logits/rejected": 3.6665618419647217, + "logps/chosen": -0.9842727184295654, + "logps/rejected": -1.0366499423980713, + "loss": 0.9659, + "nll_loss": 0.8997125625610352, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09842726588249207, + "rewards/margins": 0.005237726494669914, + "rewards/rejected": -0.10366499423980713, + "step": 277 + }, + { + "epoch": 0.1729393468118196, + "grad_norm": 1.6273510456085205, + "learning_rate": 3.61e-05, + "log_odds_chosen": 1.1774389743804932, + "log_odds_ratio": -0.4116966128349304, + "logits/chosen": 0.9792468547821045, + "logits/rejected": 1.7449513673782349, + "logps/chosen": -0.8007335066795349, + "logps/rejected": -1.571602702140808, + "loss": 0.7018, + "nll_loss": 0.6606743335723877, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08007335662841797, + "rewards/margins": 0.0770869255065918, + "rewards/rejected": -0.15716028213500977, + "step": 278 + }, + { + "epoch": 0.17356143079315708, + "grad_norm": 0.276890367269516, + "learning_rate": 3.605e-05, + "log_odds_chosen": 1.3592523336410522, + "log_odds_ratio": -0.28409016132354736, + "logits/chosen": 0.7802825570106506, + "logits/rejected": 1.77823805809021, + "logps/chosen": -0.6940028071403503, + "logps/rejected": -1.5032212734222412, + "loss": 0.5705, + "nll_loss": 0.5420591831207275, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.06940028071403503, + "rewards/margins": 0.08092184364795685, + "rewards/rejected": -0.15032212436199188, + "step": 279 + }, + { + "epoch": 0.17418351477449456, + "grad_norm": 0.3467434048652649, + "learning_rate": 3.6e-05, + "log_odds_chosen": 0.3342052102088928, + "log_odds_ratio": -0.5645737051963806, + "logits/chosen": 1.7210997343063354, + "logits/rejected": 2.7231311798095703, + "logps/chosen": -0.9332855939865112, + "logps/rejected": -1.1408545970916748, + "loss": 0.7302, + "nll_loss": 0.67376708984375, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.093328557908535, + "rewards/margins": 0.020756900310516357, + "rewards/rejected": -0.11408545821905136, + "step": 280 + }, + { + "epoch": 0.17480559875583204, + "grad_norm": 0.2782360017299652, + "learning_rate": 3.595e-05, + "log_odds_chosen": 0.4959554076194763, + "log_odds_ratio": -0.597368597984314, + "logits/chosen": 0.8248426914215088, + "logits/rejected": 1.6470648050308228, + "logps/chosen": -0.9373691082000732, + "logps/rejected": -1.1336480379104614, + "loss": 0.7087, + "nll_loss": 0.6489871144294739, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.0937369167804718, + "rewards/margins": 0.0196278914809227, + "rewards/rejected": -0.1133648082613945, + "step": 281 + }, + { + "epoch": 0.17542768273716952, + "grad_norm": 0.270268052816391, + "learning_rate": 3.59e-05, + "log_odds_chosen": 0.804435133934021, + "log_odds_ratio": -0.5560287237167358, + "logits/chosen": 2.5741701126098633, + "logits/rejected": 1.9791196584701538, + "logps/chosen": -0.9681861400604248, + "logps/rejected": -1.6530413627624512, + "loss": 0.9065, + "nll_loss": 0.8508476614952087, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09681861847639084, + "rewards/margins": 0.06848552078008652, + "rewards/rejected": -0.16530415415763855, + "step": 282 + }, + { + "epoch": 0.176049766718507, + "grad_norm": 0.28208354115486145, + "learning_rate": 3.585e-05, + "log_odds_chosen": 0.49895477294921875, + "log_odds_ratio": -0.6017277836799622, + "logits/chosen": 0.2750604748725891, + "logits/rejected": 2.2774534225463867, + "logps/chosen": -1.0913801193237305, + "logps/rejected": -1.2801364660263062, + "loss": 0.6199, + "nll_loss": 0.5597580671310425, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10913802683353424, + "rewards/margins": 0.01887562870979309, + "rewards/rejected": -0.12801367044448853, + "step": 283 + }, + { + "epoch": 0.17667185069984448, + "grad_norm": 0.2498951107263565, + "learning_rate": 3.58e-05, + "log_odds_chosen": 0.6866430044174194, + "log_odds_ratio": -0.5442928075790405, + "logits/chosen": 0.35522377490997314, + "logits/rejected": 0.7245559692382812, + "logps/chosen": -0.9898964762687683, + "logps/rejected": -1.554041862487793, + "loss": 0.6184, + "nll_loss": 0.5640102028846741, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09898965060710907, + "rewards/margins": 0.05641452595591545, + "rewards/rejected": -0.15540418028831482, + "step": 284 + }, + { + "epoch": 0.17729393468118196, + "grad_norm": 0.29676172137260437, + "learning_rate": 3.575e-05, + "log_odds_chosen": 0.38468289375305176, + "log_odds_ratio": -0.5942354798316956, + "logits/chosen": -0.6643446087837219, + "logits/rejected": 1.5239275693893433, + "logps/chosen": -1.1168832778930664, + "logps/rejected": -1.3112976551055908, + "loss": 0.5597, + "nll_loss": 0.5003250241279602, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11168833076953888, + "rewards/margins": 0.01944144070148468, + "rewards/rejected": -0.13112977147102356, + "step": 285 + }, + { + "epoch": 0.17791601866251944, + "grad_norm": 0.9721925258636475, + "learning_rate": 3.57e-05, + "log_odds_chosen": 1.3216912746429443, + "log_odds_ratio": -0.36708056926727295, + "logits/chosen": 1.362522006034851, + "logits/rejected": 2.3680434226989746, + "logps/chosen": -0.9718787670135498, + "logps/rejected": -2.045164108276367, + "loss": 0.622, + "nll_loss": 0.5853403806686401, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09718787670135498, + "rewards/margins": 0.10732853412628174, + "rewards/rejected": -0.20451641082763672, + "step": 286 + }, + { + "epoch": 0.17853810264385692, + "grad_norm": 0.2858312427997589, + "learning_rate": 3.565e-05, + "log_odds_chosen": 1.0211000442504883, + "log_odds_ratio": -0.3588743507862091, + "logits/chosen": 1.6440815925598145, + "logits/rejected": 2.406961679458618, + "logps/chosen": -1.0506576299667358, + "logps/rejected": -1.7641756534576416, + "loss": 0.7087, + "nll_loss": 0.6728169918060303, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10506576299667358, + "rewards/margins": 0.07135181128978729, + "rewards/rejected": -0.17641757428646088, + "step": 287 + }, + { + "epoch": 0.1791601866251944, + "grad_norm": 0.27648454904556274, + "learning_rate": 3.56e-05, + "log_odds_chosen": 0.6506868600845337, + "log_odds_ratio": -0.49529018998146057, + "logits/chosen": 1.4622275829315186, + "logits/rejected": 1.405707836151123, + "logps/chosen": -0.974983811378479, + "logps/rejected": -1.3737361431121826, + "loss": 0.774, + "nll_loss": 0.7245060205459595, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09749838709831238, + "rewards/margins": 0.039875224232673645, + "rewards/rejected": -0.13737361133098602, + "step": 288 + }, + { + "epoch": 0.17978227060653187, + "grad_norm": 0.2890698313713074, + "learning_rate": 3.555e-05, + "log_odds_chosen": 0.971682071685791, + "log_odds_ratio": -0.35939812660217285, + "logits/chosen": 1.5366928577423096, + "logits/rejected": 2.6480050086975098, + "logps/chosen": -0.7586332559585571, + "logps/rejected": -1.406290888786316, + "loss": 0.6716, + "nll_loss": 0.6356822848320007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07586333155632019, + "rewards/margins": 0.06476576626300812, + "rewards/rejected": -0.1406290978193283, + "step": 289 + }, + { + "epoch": 0.18040435458786935, + "grad_norm": 0.29232749342918396, + "learning_rate": 3.55e-05, + "log_odds_chosen": 1.324554443359375, + "log_odds_ratio": -0.3050360679626465, + "logits/chosen": 0.5814070701599121, + "logits/rejected": 2.793435573577881, + "logps/chosen": -0.7117588520050049, + "logps/rejected": -1.6056010723114014, + "loss": 0.5403, + "nll_loss": 0.5098059177398682, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07117588818073273, + "rewards/margins": 0.08938423544168472, + "rewards/rejected": -0.16056011617183685, + "step": 290 + }, + { + "epoch": 0.18102643856920683, + "grad_norm": 0.29234594106674194, + "learning_rate": 3.545e-05, + "log_odds_chosen": 1.1288540363311768, + "log_odds_ratio": -0.3035117983818054, + "logits/chosen": 1.6192704439163208, + "logits/rejected": 2.7230443954467773, + "logps/chosen": -0.7904139757156372, + "logps/rejected": -1.5225837230682373, + "loss": 0.6339, + "nll_loss": 0.6035600900650024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07904140651226044, + "rewards/margins": 0.0732169896364212, + "rewards/rejected": -0.15225839614868164, + "step": 291 + }, + { + "epoch": 0.1816485225505443, + "grad_norm": 0.43359532952308655, + "learning_rate": 3.54e-05, + "log_odds_chosen": 0.20111624896526337, + "log_odds_ratio": -0.6230050325393677, + "logits/chosen": 0.7468219995498657, + "logits/rejected": 2.083394765853882, + "logps/chosen": -1.0547893047332764, + "logps/rejected": -1.2056244611740112, + "loss": 0.6486, + "nll_loss": 0.5863381624221802, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1054789274930954, + "rewards/margins": 0.015083517879247665, + "rewards/rejected": -0.12056245654821396, + "step": 292 + }, + { + "epoch": 0.1822706065318818, + "grad_norm": 0.3505414128303528, + "learning_rate": 3.535e-05, + "log_odds_chosen": 0.34149283170700073, + "log_odds_ratio": -0.5823227167129517, + "logits/chosen": 1.2648005485534668, + "logits/rejected": 3.214367389678955, + "logps/chosen": -0.9986240863800049, + "logps/rejected": -1.2265021800994873, + "loss": 0.7081, + "nll_loss": 0.6498473882675171, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09986241161823273, + "rewards/margins": 0.022787809371948242, + "rewards/rejected": -0.12265022099018097, + "step": 293 + }, + { + "epoch": 0.1828926905132193, + "grad_norm": 0.3010155260562897, + "learning_rate": 3.53e-05, + "log_odds_chosen": 0.9094794988632202, + "log_odds_ratio": -0.41278043389320374, + "logits/chosen": 1.4252872467041016, + "logits/rejected": 2.675663471221924, + "logps/chosen": -0.8446587324142456, + "logps/rejected": -1.3711124658584595, + "loss": 0.7532, + "nll_loss": 0.7119126319885254, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0844658762216568, + "rewards/margins": 0.05264536663889885, + "rewards/rejected": -0.13711124658584595, + "step": 294 + }, + { + "epoch": 0.18351477449455678, + "grad_norm": 0.263263463973999, + "learning_rate": 3.525e-05, + "log_odds_chosen": 0.30774155259132385, + "log_odds_ratio": -0.6023176908493042, + "logits/chosen": 1.2870378494262695, + "logits/rejected": 1.972282886505127, + "logps/chosen": -0.9613863825798035, + "logps/rejected": -1.178411602973938, + "loss": 0.7256, + "nll_loss": 0.6653767824172974, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09613863378763199, + "rewards/margins": 0.02170252427458763, + "rewards/rejected": -0.11784116178750992, + "step": 295 + }, + { + "epoch": 0.18413685847589426, + "grad_norm": 0.27597174048423767, + "learning_rate": 3.52e-05, + "log_odds_chosen": 0.4085116684436798, + "log_odds_ratio": -0.6184495091438293, + "logits/chosen": 0.3999248147010803, + "logits/rejected": 2.451507329940796, + "logps/chosen": -0.8906404376029968, + "logps/rejected": -1.0293775796890259, + "loss": 0.6227, + "nll_loss": 0.5608530044555664, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08906404674053192, + "rewards/margins": 0.013873708434402943, + "rewards/rejected": -0.10293775051832199, + "step": 296 + }, + { + "epoch": 0.18475894245723173, + "grad_norm": 0.41982316970825195, + "learning_rate": 3.515e-05, + "log_odds_chosen": -0.07312265038490295, + "log_odds_ratio": -0.9710935354232788, + "logits/chosen": 0.8743797540664673, + "logits/rejected": 1.2161155939102173, + "logps/chosen": -1.2751758098602295, + "logps/rejected": -1.1848974227905273, + "loss": 0.7096, + "nll_loss": 0.6124569773674011, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12751758098602295, + "rewards/margins": -0.009027836844325066, + "rewards/rejected": -0.11848974972963333, + "step": 297 + }, + { + "epoch": 0.1853810264385692, + "grad_norm": 0.2401142567396164, + "learning_rate": 3.51e-05, + "log_odds_chosen": 0.5721354484558105, + "log_odds_ratio": -0.5739449858665466, + "logits/chosen": 1.8936402797698975, + "logits/rejected": 2.0619664192199707, + "logps/chosen": -0.9201027154922485, + "logps/rejected": -1.209338903427124, + "loss": 0.7463, + "nll_loss": 0.6889411807060242, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09201027452945709, + "rewards/margins": 0.02892361581325531, + "rewards/rejected": -0.1209338828921318, + "step": 298 + }, + { + "epoch": 0.1860031104199067, + "grad_norm": 0.33103877305984497, + "learning_rate": 3.505e-05, + "log_odds_chosen": 0.36520734429359436, + "log_odds_ratio": -0.5649385452270508, + "logits/chosen": 1.2041155099868774, + "logits/rejected": 1.5141119956970215, + "logps/chosen": -1.1315367221832275, + "logps/rejected": -1.3210612535476685, + "loss": 0.775, + "nll_loss": 0.7185065150260925, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11315366625785828, + "rewards/margins": 0.018952451646327972, + "rewards/rejected": -0.13210612535476685, + "step": 299 + }, + { + "epoch": 0.18662519440124417, + "grad_norm": 0.38658761978149414, + "learning_rate": 3.5e-05, + "log_odds_chosen": 0.6449403166770935, + "log_odds_ratio": -0.6450217366218567, + "logits/chosen": 0.46619826555252075, + "logits/rejected": 1.577237606048584, + "logps/chosen": -1.1663486957550049, + "logps/rejected": -1.5513371229171753, + "loss": 0.6948, + "nll_loss": 0.6303378343582153, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11663487553596497, + "rewards/margins": 0.038498833775520325, + "rewards/rejected": -0.1551337093114853, + "step": 300 + }, + { + "epoch": 0.18724727838258165, + "grad_norm": 0.32990527153015137, + "learning_rate": 3.495e-05, + "log_odds_chosen": 0.9941543340682983, + "log_odds_ratio": -0.5177374482154846, + "logits/chosen": 0.8194706439971924, + "logits/rejected": 2.0760574340820312, + "logps/chosen": -0.925238311290741, + "logps/rejected": -1.5417176485061646, + "loss": 0.6898, + "nll_loss": 0.6380496621131897, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09252384305000305, + "rewards/margins": 0.061647929251194, + "rewards/rejected": -0.15417176485061646, + "step": 301 + }, + { + "epoch": 0.18786936236391913, + "grad_norm": 0.34704282879829407, + "learning_rate": 3.49e-05, + "log_odds_chosen": 0.5234130620956421, + "log_odds_ratio": -0.4979130029678345, + "logits/chosen": 2.157592296600342, + "logits/rejected": 2.386396646499634, + "logps/chosen": -0.96225506067276, + "logps/rejected": -1.3348091840744019, + "loss": 0.7822, + "nll_loss": 0.732367217540741, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09622550010681152, + "rewards/margins": 0.0372554175555706, + "rewards/rejected": -0.13348092138767242, + "step": 302 + }, + { + "epoch": 0.1884914463452566, + "grad_norm": 0.26417276263237, + "learning_rate": 3.485e-05, + "log_odds_chosen": 0.839657187461853, + "log_odds_ratio": -0.41882774233818054, + "logits/chosen": 0.9267981052398682, + "logits/rejected": 0.5470160841941833, + "logps/chosen": -0.984728217124939, + "logps/rejected": -1.620476484298706, + "loss": 0.5851, + "nll_loss": 0.5432447791099548, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09847281873226166, + "rewards/margins": 0.06357483565807343, + "rewards/rejected": -0.16204765439033508, + "step": 303 + }, + { + "epoch": 0.1891135303265941, + "grad_norm": 0.3210340440273285, + "learning_rate": 3.48e-05, + "log_odds_chosen": 0.43130385875701904, + "log_odds_ratio": -0.547843873500824, + "logits/chosen": 1.4130476713180542, + "logits/rejected": 3.369215965270996, + "logps/chosen": -0.9559162855148315, + "logps/rejected": -1.1902281045913696, + "loss": 0.702, + "nll_loss": 0.6471742391586304, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09559163451194763, + "rewards/margins": 0.023431185632944107, + "rewards/rejected": -0.11902281641960144, + "step": 304 + }, + { + "epoch": 0.18973561430793157, + "grad_norm": 0.3766573965549469, + "learning_rate": 3.475e-05, + "log_odds_chosen": 2.982311725616455, + "log_odds_ratio": -0.23432503640651703, + "logits/chosen": 2.01702618598938, + "logits/rejected": 3.3692893981933594, + "logps/chosen": -0.6424603462219238, + "logps/rejected": -2.8637423515319824, + "loss": 0.7736, + "nll_loss": 0.7501252889633179, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0642460361123085, + "rewards/margins": 0.22212818264961243, + "rewards/rejected": -0.2863742411136627, + "step": 305 + }, + { + "epoch": 0.19035769828926905, + "grad_norm": 0.3161027431488037, + "learning_rate": 3.4699999999999996e-05, + "log_odds_chosen": 0.4053652286529541, + "log_odds_ratio": -0.5691996812820435, + "logits/chosen": 1.4379727840423584, + "logits/rejected": 1.3441359996795654, + "logps/chosen": -0.9353913068771362, + "logps/rejected": -1.1935211420059204, + "loss": 0.7362, + "nll_loss": 0.6792629361152649, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09353913366794586, + "rewards/margins": 0.025812990963459015, + "rewards/rejected": -0.11935213208198547, + "step": 306 + }, + { + "epoch": 0.19097978227060652, + "grad_norm": 0.27438247203826904, + "learning_rate": 3.465e-05, + "log_odds_chosen": 0.4557202160358429, + "log_odds_ratio": -0.5387417674064636, + "logits/chosen": 1.537845492362976, + "logits/rejected": 2.4677371978759766, + "logps/chosen": -0.969843864440918, + "logps/rejected": -1.2558746337890625, + "loss": 0.7606, + "nll_loss": 0.7067520022392273, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0969843864440918, + "rewards/margins": 0.02860308811068535, + "rewards/rejected": -0.12558747828006744, + "step": 307 + }, + { + "epoch": 0.191601866251944, + "grad_norm": 0.2716653645038605, + "learning_rate": 3.46e-05, + "log_odds_chosen": 1.008226752281189, + "log_odds_ratio": -0.4756288528442383, + "logits/chosen": 0.08089017868041992, + "logits/rejected": 1.4091609716415405, + "logps/chosen": -0.9258524179458618, + "logps/rejected": -1.6496083736419678, + "loss": 0.4917, + "nll_loss": 0.4441262483596802, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0925852507352829, + "rewards/margins": 0.07237560302019119, + "rewards/rejected": -0.1649608314037323, + "step": 308 + }, + { + "epoch": 0.19222395023328148, + "grad_norm": 0.28455743193626404, + "learning_rate": 3.455e-05, + "log_odds_chosen": 1.0614575147628784, + "log_odds_ratio": -0.35518938302993774, + "logits/chosen": 0.6718518733978271, + "logits/rejected": 1.3685194253921509, + "logps/chosen": -0.678547739982605, + "logps/rejected": -1.356400489807129, + "loss": 0.5612, + "nll_loss": 0.5256614685058594, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.06785477697849274, + "rewards/margins": 0.06778527796268463, + "rewards/rejected": -0.13564005494117737, + "step": 309 + }, + { + "epoch": 0.19284603421461896, + "grad_norm": 0.31106212735176086, + "learning_rate": 3.45e-05, + "log_odds_chosen": 0.7074335813522339, + "log_odds_ratio": -0.5154873132705688, + "logits/chosen": 1.195151448249817, + "logits/rejected": 2.1725990772247314, + "logps/chosen": -0.8819376230239868, + "logps/rejected": -1.3819676637649536, + "loss": 0.6628, + "nll_loss": 0.6112890243530273, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08819375932216644, + "rewards/margins": 0.05000301077961922, + "rewards/rejected": -0.13819676637649536, + "step": 310 + }, + { + "epoch": 0.19346811819595647, + "grad_norm": 0.2936580181121826, + "learning_rate": 3.445e-05, + "log_odds_chosen": 0.511105477809906, + "log_odds_ratio": -0.5984890460968018, + "logits/chosen": 0.3100643455982208, + "logits/rejected": 1.2994273900985718, + "logps/chosen": -1.3991050720214844, + "logps/rejected": -1.8176642656326294, + "loss": 0.5565, + "nll_loss": 0.4966951906681061, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1399105191230774, + "rewards/margins": 0.04185590147972107, + "rewards/rejected": -0.18176642060279846, + "step": 311 + }, + { + "epoch": 0.19409020217729395, + "grad_norm": 0.4400920271873474, + "learning_rate": 3.4399999999999996e-05, + "log_odds_chosen": 0.6868494749069214, + "log_odds_ratio": -0.49262088537216187, + "logits/chosen": 1.3997236490249634, + "logits/rejected": 2.0570526123046875, + "logps/chosen": -1.0203096866607666, + "logps/rejected": -1.5456228256225586, + "loss": 0.6983, + "nll_loss": 0.6490747928619385, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10203097760677338, + "rewards/margins": 0.052531301975250244, + "rewards/rejected": -0.15456226468086243, + "step": 312 + }, + { + "epoch": 0.19471228615863143, + "grad_norm": 0.4693183898925781, + "learning_rate": 3.435e-05, + "log_odds_chosen": 0.6722878813743591, + "log_odds_ratio": -0.571422278881073, + "logits/chosen": 1.9739662408828735, + "logits/rejected": 3.177738904953003, + "logps/chosen": -1.0663738250732422, + "logps/rejected": -1.4175429344177246, + "loss": 0.7562, + "nll_loss": 0.6990476250648499, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1066373735666275, + "rewards/margins": 0.03511692211031914, + "rewards/rejected": -0.14175429940223694, + "step": 313 + }, + { + "epoch": 0.1953343701399689, + "grad_norm": 0.27695855498313904, + "learning_rate": 3.430000000000001e-05, + "log_odds_chosen": 0.9974111318588257, + "log_odds_ratio": -0.47927969694137573, + "logits/chosen": 1.736506462097168, + "logits/rejected": 1.5564743280410767, + "logps/chosen": -1.0914074182510376, + "logps/rejected": -1.9759117364883423, + "loss": 0.6947, + "nll_loss": 0.6467252373695374, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10914073884487152, + "rewards/margins": 0.08845044672489166, + "rewards/rejected": -0.19759118556976318, + "step": 314 + }, + { + "epoch": 0.19595645412130638, + "grad_norm": 4.111656665802002, + "learning_rate": 3.4250000000000006e-05, + "log_odds_chosen": 1.0268677473068237, + "log_odds_ratio": -0.38255006074905396, + "logits/chosen": 0.9214022159576416, + "logits/rejected": 1.5696971416473389, + "logps/chosen": -0.7452452778816223, + "logps/rejected": -1.4652537107467651, + "loss": 0.6268, + "nll_loss": 0.5885062217712402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07452452182769775, + "rewards/margins": 0.07200084626674652, + "rewards/rejected": -0.14652536809444427, + "step": 315 + }, + { + "epoch": 0.19657853810264386, + "grad_norm": 0.34326064586639404, + "learning_rate": 3.4200000000000005e-05, + "log_odds_chosen": 0.33381396532058716, + "log_odds_ratio": -0.5759456157684326, + "logits/chosen": 1.1284606456756592, + "logits/rejected": 1.8769559860229492, + "logps/chosen": -0.8769740462303162, + "logps/rejected": -1.100290298461914, + "loss": 0.7314, + "nll_loss": 0.6737844944000244, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08769740909337997, + "rewards/margins": 0.022331636399030685, + "rewards/rejected": -0.11002904176712036, + "step": 316 + }, + { + "epoch": 0.19720062208398134, + "grad_norm": 0.3009902238845825, + "learning_rate": 3.415e-05, + "log_odds_chosen": 0.6529784202575684, + "log_odds_ratio": -0.4737992286682129, + "logits/chosen": 1.0143990516662598, + "logits/rejected": 1.9090029001235962, + "logps/chosen": -1.226149320602417, + "logps/rejected": -1.7656174898147583, + "loss": 0.6483, + "nll_loss": 0.6009653210639954, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12261494994163513, + "rewards/margins": 0.05394680052995682, + "rewards/rejected": -0.17656174302101135, + "step": 317 + }, + { + "epoch": 0.19782270606531882, + "grad_norm": 0.29174575209617615, + "learning_rate": 3.41e-05, + "log_odds_chosen": 1.0072104930877686, + "log_odds_ratio": -0.4887734353542328, + "logits/chosen": 1.7796566486358643, + "logits/rejected": 2.4973080158233643, + "logps/chosen": -1.2199254035949707, + "logps/rejected": -2.0509541034698486, + "loss": 0.7258, + "nll_loss": 0.676878035068512, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12199252843856812, + "rewards/margins": 0.08310288190841675, + "rewards/rejected": -0.20509541034698486, + "step": 318 + }, + { + "epoch": 0.1984447900466563, + "grad_norm": 0.4274386465549469, + "learning_rate": 3.405e-05, + "log_odds_chosen": 0.33251264691352844, + "log_odds_ratio": -0.6084076166152954, + "logits/chosen": 1.3834333419799805, + "logits/rejected": 1.8153676986694336, + "logps/chosen": -1.1630151271820068, + "logps/rejected": -1.408416748046875, + "loss": 0.719, + "nll_loss": 0.658169686794281, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1163015216588974, + "rewards/margins": 0.024540148675441742, + "rewards/rejected": -0.14084166288375854, + "step": 319 + }, + { + "epoch": 0.19906687402799378, + "grad_norm": 0.33442965149879456, + "learning_rate": 3.4000000000000007e-05, + "log_odds_chosen": 0.6064150333404541, + "log_odds_ratio": -0.5035816431045532, + "logits/chosen": 0.972122848033905, + "logits/rejected": 2.684668779373169, + "logps/chosen": -1.0591764450073242, + "logps/rejected": -1.486153483390808, + "loss": 0.615, + "nll_loss": 0.5646054744720459, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10591764003038406, + "rewards/margins": 0.04269770532846451, + "rewards/rejected": -0.14861536026000977, + "step": 320 + }, + { + "epoch": 0.19968895800933126, + "grad_norm": 0.270923376083374, + "learning_rate": 3.3950000000000005e-05, + "log_odds_chosen": 1.028401494026184, + "log_odds_ratio": -0.3738253712654114, + "logits/chosen": 1.1716986894607544, + "logits/rejected": 2.318563222885132, + "logps/chosen": -0.6336233615875244, + "logps/rejected": -1.1869080066680908, + "loss": 0.5955, + "nll_loss": 0.5581661462783813, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.06336233764886856, + "rewards/margins": 0.05532847344875336, + "rewards/rejected": -0.11869080364704132, + "step": 321 + }, + { + "epoch": 0.20031104199066874, + "grad_norm": 0.7050295472145081, + "learning_rate": 3.3900000000000004e-05, + "log_odds_chosen": 1.2485467195510864, + "log_odds_ratio": -0.29749155044555664, + "logits/chosen": 0.9530289173126221, + "logits/rejected": 2.255811929702759, + "logps/chosen": -0.6973420977592468, + "logps/rejected": -1.3816578388214111, + "loss": 0.577, + "nll_loss": 0.5472236275672913, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06973421573638916, + "rewards/margins": 0.06843157112598419, + "rewards/rejected": -0.13816578686237335, + "step": 322 + }, + { + "epoch": 0.20093312597200622, + "grad_norm": 0.2560308873653412, + "learning_rate": 3.385e-05, + "log_odds_chosen": 0.8242784738540649, + "log_odds_ratio": -0.4802219867706299, + "logits/chosen": 0.9842218160629272, + "logits/rejected": 1.5221753120422363, + "logps/chosen": -1.1165326833724976, + "logps/rejected": -1.7994098663330078, + "loss": 0.6561, + "nll_loss": 0.6081154942512512, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11165326833724976, + "rewards/margins": 0.06828770041465759, + "rewards/rejected": -0.17994098365306854, + "step": 323 + }, + { + "epoch": 0.2015552099533437, + "grad_norm": 0.2929477393627167, + "learning_rate": 3.38e-05, + "log_odds_chosen": 1.0364418029785156, + "log_odds_ratio": -0.4716857671737671, + "logits/chosen": 1.3951971530914307, + "logits/rejected": 2.5507805347442627, + "logps/chosen": -0.8110775947570801, + "logps/rejected": -1.4586906433105469, + "loss": 0.8055, + "nll_loss": 0.7583492994308472, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08110776543617249, + "rewards/margins": 0.06476131081581116, + "rewards/rejected": -0.14586907625198364, + "step": 324 + }, + { + "epoch": 0.20217729393468117, + "grad_norm": 0.25941890478134155, + "learning_rate": 3.375000000000001e-05, + "log_odds_chosen": 1.196507453918457, + "log_odds_ratio": -0.31142622232437134, + "logits/chosen": 1.1948047876358032, + "logits/rejected": 1.6353682279586792, + "logps/chosen": -0.9516217708587646, + "logps/rejected": -1.8310502767562866, + "loss": 0.6742, + "nll_loss": 0.6430565118789673, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09516217559576035, + "rewards/margins": 0.08794285356998444, + "rewards/rejected": -0.18310505151748657, + "step": 325 + }, + { + "epoch": 0.20279937791601865, + "grad_norm": 0.42425087094306946, + "learning_rate": 3.3700000000000006e-05, + "log_odds_chosen": 0.2692258954048157, + "log_odds_ratio": -0.6452361345291138, + "logits/chosen": 2.0312280654907227, + "logits/rejected": 2.811347484588623, + "logps/chosen": -1.1653238534927368, + "logps/rejected": -1.2902796268463135, + "loss": 0.8677, + "nll_loss": 0.8031848669052124, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11653238534927368, + "rewards/margins": 0.01249559223651886, + "rewards/rejected": -0.12902797758579254, + "step": 326 + }, + { + "epoch": 0.20342146189735613, + "grad_norm": 0.3335670232772827, + "learning_rate": 3.3650000000000005e-05, + "log_odds_chosen": 0.3461630642414093, + "log_odds_ratio": -0.6145853996276855, + "logits/chosen": 1.8230409622192383, + "logits/rejected": 2.055349826812744, + "logps/chosen": -1.0126659870147705, + "logps/rejected": -1.273895025253296, + "loss": 0.8458, + "nll_loss": 0.7843534350395203, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.10126660019159317, + "rewards/margins": 0.02612290345132351, + "rewards/rejected": -0.12738950550556183, + "step": 327 + }, + { + "epoch": 0.2040435458786936, + "grad_norm": 0.32027310132980347, + "learning_rate": 3.3600000000000004e-05, + "log_odds_chosen": 1.1716110706329346, + "log_odds_ratio": -0.3758586049079895, + "logits/chosen": 1.397815465927124, + "logits/rejected": 2.642426013946533, + "logps/chosen": -0.8429104685783386, + "logps/rejected": -1.7424283027648926, + "loss": 0.5571, + "nll_loss": 0.519532322883606, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08429105579853058, + "rewards/margins": 0.0899517834186554, + "rewards/rejected": -0.17424283921718597, + "step": 328 + }, + { + "epoch": 0.20466562986003112, + "grad_norm": 0.25524646043777466, + "learning_rate": 3.355e-05, + "log_odds_chosen": 0.36870211362838745, + "log_odds_ratio": -0.5927896499633789, + "logits/chosen": 0.5124801397323608, + "logits/rejected": 1.5821843147277832, + "logps/chosen": -0.9563712477684021, + "logps/rejected": -1.2237095832824707, + "loss": 0.5892, + "nll_loss": 0.5298933386802673, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09563712775707245, + "rewards/margins": 0.026733826845884323, + "rewards/rejected": -0.12237095832824707, + "step": 329 + }, + { + "epoch": 0.2052877138413686, + "grad_norm": 0.3280141055583954, + "learning_rate": 3.35e-05, + "log_odds_chosen": 0.3331131637096405, + "log_odds_ratio": -0.6164728403091431, + "logits/chosen": 1.4830467700958252, + "logits/rejected": 2.0828585624694824, + "logps/chosen": -0.9265480637550354, + "logps/rejected": -1.1410126686096191, + "loss": 0.7481, + "nll_loss": 0.6864792704582214, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.09265480935573578, + "rewards/margins": 0.021446455270051956, + "rewards/rejected": -0.11410126835107803, + "step": 330 + }, + { + "epoch": 0.20590979782270608, + "grad_norm": 0.3560031056404114, + "learning_rate": 3.345000000000001e-05, + "log_odds_chosen": 0.13016769289970398, + "log_odds_ratio": -0.6902910470962524, + "logits/chosen": 1.8078997135162354, + "logits/rejected": 1.5569121837615967, + "logps/chosen": -1.1119384765625, + "logps/rejected": -1.1824815273284912, + "loss": 0.8469, + "nll_loss": 0.777821958065033, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11119385808706284, + "rewards/margins": 0.007054307498037815, + "rewards/rejected": -0.11824816465377808, + "step": 331 + }, + { + "epoch": 0.20653188180404355, + "grad_norm": 0.28344908356666565, + "learning_rate": 3.3400000000000005e-05, + "log_odds_chosen": 0.8456354141235352, + "log_odds_ratio": -0.4065042734146118, + "logits/chosen": 1.0167481899261475, + "logits/rejected": 2.0346903800964355, + "logps/chosen": -0.7277791500091553, + "logps/rejected": -1.2344926595687866, + "loss": 0.5522, + "nll_loss": 0.5115180015563965, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07277791202068329, + "rewards/margins": 0.05067135766148567, + "rewards/rejected": -0.12344926595687866, + "step": 332 + }, + { + "epoch": 0.20715396578538103, + "grad_norm": 0.5055153965950012, + "learning_rate": 3.3350000000000004e-05, + "log_odds_chosen": 0.24805226922035217, + "log_odds_ratio": -0.5933313369750977, + "logits/chosen": 2.0232362747192383, + "logits/rejected": 2.7429163455963135, + "logps/chosen": -0.9573587775230408, + "logps/rejected": -1.0880261659622192, + "loss": 0.7547, + "nll_loss": 0.6953402161598206, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09573587775230408, + "rewards/margins": 0.013066737912595272, + "rewards/rejected": -0.10880261659622192, + "step": 333 + }, + { + "epoch": 0.2077760497667185, + "grad_norm": 0.3780578076839447, + "learning_rate": 3.33e-05, + "log_odds_chosen": 0.21228660643100739, + "log_odds_ratio": -0.6047529578208923, + "logits/chosen": 2.0565428733825684, + "logits/rejected": 3.0869245529174805, + "logps/chosen": -0.9381221532821655, + "logps/rejected": -1.077812671661377, + "loss": 0.7898, + "nll_loss": 0.7292795181274414, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09381221234798431, + "rewards/margins": 0.013969066552817822, + "rewards/rejected": -0.10778127610683441, + "step": 334 + }, + { + "epoch": 0.208398133748056, + "grad_norm": 0.26417893171310425, + "learning_rate": 3.325e-05, + "log_odds_chosen": 1.0664198398590088, + "log_odds_ratio": -0.38333842158317566, + "logits/chosen": 0.9955233335494995, + "logits/rejected": 1.3207647800445557, + "logps/chosen": -0.8093827366828918, + "logps/rejected": -1.371497392654419, + "loss": 0.6961, + "nll_loss": 0.6577703952789307, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08093827962875366, + "rewards/margins": 0.05621146783232689, + "rewards/rejected": -0.13714975118637085, + "step": 335 + }, + { + "epoch": 0.20902021772939347, + "grad_norm": 0.36560311913490295, + "learning_rate": 3.32e-05, + "log_odds_chosen": 0.31488385796546936, + "log_odds_ratio": -0.590886652469635, + "logits/chosen": 1.9713830947875977, + "logits/rejected": 3.206526041030884, + "logps/chosen": -0.9142054915428162, + "logps/rejected": -1.1143088340759277, + "loss": 0.845, + "nll_loss": 0.7859491109848022, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09142055362462997, + "rewards/margins": 0.020010333508253098, + "rewards/rejected": -0.11143089830875397, + "step": 336 + }, + { + "epoch": 0.20964230171073095, + "grad_norm": 0.2682207226753235, + "learning_rate": 3.3150000000000006e-05, + "log_odds_chosen": 0.976233959197998, + "log_odds_ratio": -0.40072351694107056, + "logits/chosen": 1.840961217880249, + "logits/rejected": 2.919851064682007, + "logps/chosen": -0.6725811958312988, + "logps/rejected": -1.3152039051055908, + "loss": 0.6882, + "nll_loss": 0.6481640338897705, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06725811958312988, + "rewards/margins": 0.0642622709274292, + "rewards/rejected": -0.13152039051055908, + "step": 337 + }, + { + "epoch": 0.21026438569206843, + "grad_norm": 0.4169570505619049, + "learning_rate": 3.3100000000000005e-05, + "log_odds_chosen": 1.2906016111373901, + "log_odds_ratio": -0.3724203109741211, + "logits/chosen": 1.817138433456421, + "logits/rejected": 2.0239479541778564, + "logps/chosen": -0.9505648612976074, + "logps/rejected": -1.9772679805755615, + "loss": 0.5799, + "nll_loss": 0.5426954030990601, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09505648910999298, + "rewards/margins": 0.10267031937837601, + "rewards/rejected": -0.19772681593894958, + "step": 338 + }, + { + "epoch": 0.2108864696734059, + "grad_norm": 0.3151361644268036, + "learning_rate": 3.3050000000000004e-05, + "log_odds_chosen": 1.0252983570098877, + "log_odds_ratio": -0.4122687578201294, + "logits/chosen": 2.1130666732788086, + "logits/rejected": 2.364145278930664, + "logps/chosen": -1.0088870525360107, + "logps/rejected": -1.782043695449829, + "loss": 0.8336, + "nll_loss": 0.7923810482025146, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1008887067437172, + "rewards/margins": 0.07731565833091736, + "rewards/rejected": -0.17820435762405396, + "step": 339 + }, + { + "epoch": 0.2115085536547434, + "grad_norm": 0.33638083934783936, + "learning_rate": 3.3e-05, + "log_odds_chosen": 0.8013219833374023, + "log_odds_ratio": -0.43864017724990845, + "logits/chosen": 1.7561466693878174, + "logits/rejected": 2.6629652976989746, + "logps/chosen": -0.8455312252044678, + "logps/rejected": -1.3596203327178955, + "loss": 0.6516, + "nll_loss": 0.6077067852020264, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08455312252044678, + "rewards/margins": 0.05140891298651695, + "rewards/rejected": -0.13596203923225403, + "step": 340 + }, + { + "epoch": 0.21213063763608087, + "grad_norm": 0.27090558409690857, + "learning_rate": 3.295e-05, + "log_odds_chosen": 0.4188057482242584, + "log_odds_ratio": -0.5633822083473206, + "logits/chosen": 1.1251304149627686, + "logits/rejected": 2.424240827560425, + "logps/chosen": -1.0555152893066406, + "logps/rejected": -1.3421663045883179, + "loss": 0.6382, + "nll_loss": 0.5818169713020325, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10555154085159302, + "rewards/margins": 0.028665099292993546, + "rewards/rejected": -0.13421663641929626, + "step": 341 + }, + { + "epoch": 0.21275272161741834, + "grad_norm": 0.24415026605129242, + "learning_rate": 3.29e-05, + "log_odds_chosen": 1.4160504341125488, + "log_odds_ratio": -0.36501258611679077, + "logits/chosen": 0.9225729703903198, + "logits/rejected": 2.2199554443359375, + "logps/chosen": -0.6697980165481567, + "logps/rejected": -1.5039746761322021, + "loss": 0.6941, + "nll_loss": 0.6576475501060486, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0669798031449318, + "rewards/margins": 0.08341766893863678, + "rewards/rejected": -0.15039746463298798, + "step": 342 + }, + { + "epoch": 0.21337480559875582, + "grad_norm": 0.34027838706970215, + "learning_rate": 3.2850000000000006e-05, + "log_odds_chosen": 0.43100878596305847, + "log_odds_ratio": -0.5822449922561646, + "logits/chosen": 1.3398222923278809, + "logits/rejected": 2.844086170196533, + "logps/chosen": -0.9215174913406372, + "logps/rejected": -1.1033726930618286, + "loss": 0.7517, + "nll_loss": 0.693493127822876, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09215174615383148, + "rewards/margins": 0.01818552240729332, + "rewards/rejected": -0.1103372722864151, + "step": 343 + }, + { + "epoch": 0.2139968895800933, + "grad_norm": 0.3031538128852844, + "learning_rate": 3.2800000000000004e-05, + "log_odds_chosen": 1.1786978244781494, + "log_odds_ratio": -0.31301864981651306, + "logits/chosen": 1.4277385473251343, + "logits/rejected": 2.615865468978882, + "logps/chosen": -0.7201449871063232, + "logps/rejected": -1.503613829612732, + "loss": 0.5573, + "nll_loss": 0.5259483456611633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07201449573040009, + "rewards/margins": 0.07834688574075699, + "rewards/rejected": -0.15036138892173767, + "step": 344 + }, + { + "epoch": 0.21461897356143078, + "grad_norm": 0.2811865210533142, + "learning_rate": 3.275e-05, + "log_odds_chosen": 0.3644857704639435, + "log_odds_ratio": -0.5817010998725891, + "logits/chosen": 1.2433083057403564, + "logits/rejected": 2.170772075653076, + "logps/chosen": -0.9040166735649109, + "logps/rejected": -1.057985544204712, + "loss": 0.7894, + "nll_loss": 0.7312281727790833, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.09040166437625885, + "rewards/margins": 0.015396892093122005, + "rewards/rejected": -0.10579856485128403, + "step": 345 + }, + { + "epoch": 0.21524105754276826, + "grad_norm": 0.3066243529319763, + "learning_rate": 3.27e-05, + "log_odds_chosen": 0.550826907157898, + "log_odds_ratio": -0.5378211736679077, + "logits/chosen": 0.5228650569915771, + "logits/rejected": 1.609564185142517, + "logps/chosen": -1.0457979440689087, + "logps/rejected": -1.4700343608856201, + "loss": 0.5974, + "nll_loss": 0.5436267852783203, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10457979887723923, + "rewards/margins": 0.04242362827062607, + "rewards/rejected": -0.1470034271478653, + "step": 346 + }, + { + "epoch": 0.21586314152410577, + "grad_norm": 0.3229999542236328, + "learning_rate": 3.265e-05, + "log_odds_chosen": 0.49356645345687866, + "log_odds_ratio": -0.4990430474281311, + "logits/chosen": 1.1496738195419312, + "logits/rejected": 1.674940586090088, + "logps/chosen": -1.011631965637207, + "logps/rejected": -1.363969326019287, + "loss": 0.6646, + "nll_loss": 0.6147063970565796, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10116319358348846, + "rewards/margins": 0.035233743488788605, + "rewards/rejected": -0.13639694452285767, + "step": 347 + }, + { + "epoch": 0.21648522550544325, + "grad_norm": 0.31860995292663574, + "learning_rate": 3.26e-05, + "log_odds_chosen": 2.2881035804748535, + "log_odds_ratio": -0.2554280757904053, + "logits/chosen": 0.6009454727172852, + "logits/rejected": 0.508087158203125, + "logps/chosen": -1.1216373443603516, + "logps/rejected": -3.143105983734131, + "loss": 0.5162, + "nll_loss": 0.490666002035141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1121637299656868, + "rewards/margins": 0.20214685797691345, + "rewards/rejected": -0.31431058049201965, + "step": 348 + }, + { + "epoch": 0.21710730948678073, + "grad_norm": 0.643325686454773, + "learning_rate": 3.2550000000000005e-05, + "log_odds_chosen": 0.8634718656539917, + "log_odds_ratio": -0.47730451822280884, + "logits/chosen": 1.144991397857666, + "logits/rejected": 2.048892021179199, + "logps/chosen": -0.8258051872253418, + "logps/rejected": -1.4491708278656006, + "loss": 0.6755, + "nll_loss": 0.6277719140052795, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08258052170276642, + "rewards/margins": 0.06233656033873558, + "rewards/rejected": -0.1449170708656311, + "step": 349 + }, + { + "epoch": 0.2177293934681182, + "grad_norm": 0.29045066237449646, + "learning_rate": 3.2500000000000004e-05, + "log_odds_chosen": 1.0438034534454346, + "log_odds_ratio": -0.38323289155960083, + "logits/chosen": 0.4679931700229645, + "logits/rejected": 2.0712461471557617, + "logps/chosen": -1.0007789134979248, + "logps/rejected": -1.753403902053833, + "loss": 0.6007, + "nll_loss": 0.5623734593391418, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10007788985967636, + "rewards/margins": 0.07526249438524246, + "rewards/rejected": -0.17534038424491882, + "step": 350 + }, + { + "epoch": 0.21835147744945568, + "grad_norm": 2.7242562770843506, + "learning_rate": 3.245e-05, + "log_odds_chosen": 0.9390478730201721, + "log_odds_ratio": -0.44669294357299805, + "logits/chosen": 0.5468963384628296, + "logits/rejected": 1.3738582134246826, + "logps/chosen": -0.8493483066558838, + "logps/rejected": -1.5678167343139648, + "loss": 0.5128, + "nll_loss": 0.4681122601032257, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08493483811616898, + "rewards/margins": 0.07184682786464691, + "rewards/rejected": -0.1567816585302353, + "step": 351 + }, + { + "epoch": 0.21897356143079316, + "grad_norm": 0.3089011013507843, + "learning_rate": 3.24e-05, + "log_odds_chosen": 1.6514302492141724, + "log_odds_ratio": -0.3678509593009949, + "logits/chosen": 0.9059101343154907, + "logits/rejected": 1.880787968635559, + "logps/chosen": -0.7281700372695923, + "logps/rejected": -1.8344961404800415, + "loss": 0.5784, + "nll_loss": 0.541600227355957, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07281699776649475, + "rewards/margins": 0.11063261330127716, + "rewards/rejected": -0.1834496110677719, + "step": 352 + }, + { + "epoch": 0.21959564541213064, + "grad_norm": 0.41005030274391174, + "learning_rate": 3.235e-05, + "log_odds_chosen": 0.8523861169815063, + "log_odds_ratio": -0.4098551273345947, + "logits/chosen": 1.472335934638977, + "logits/rejected": 2.3259546756744385, + "logps/chosen": -1.0261791944503784, + "logps/rejected": -1.6941561698913574, + "loss": 0.6855, + "nll_loss": 0.6445062756538391, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10261793434619904, + "rewards/margins": 0.06679768860340118, + "rewards/rejected": -0.16941560804843903, + "step": 353 + }, + { + "epoch": 0.22021772939346812, + "grad_norm": 0.39794912934303284, + "learning_rate": 3.2300000000000006e-05, + "log_odds_chosen": 0.9575679302215576, + "log_odds_ratio": -0.4112628400325775, + "logits/chosen": 1.2577632665634155, + "logits/rejected": 2.603459358215332, + "logps/chosen": -1.028702735900879, + "logps/rejected": -1.6863713264465332, + "loss": 0.6172, + "nll_loss": 0.5761150121688843, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10287026315927505, + "rewards/margins": 0.06576685607433319, + "rewards/rejected": -0.16863712668418884, + "step": 354 + }, + { + "epoch": 0.2208398133748056, + "grad_norm": 0.6424588561058044, + "learning_rate": 3.2250000000000005e-05, + "log_odds_chosen": 2.402007818222046, + "log_odds_ratio": -0.25134098529815674, + "logits/chosen": 1.1088523864746094, + "logits/rejected": 1.1565300226211548, + "logps/chosen": -0.7431226968765259, + "logps/rejected": -2.6517767906188965, + "loss": 0.71, + "nll_loss": 0.6848562955856323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07431226223707199, + "rewards/margins": 0.1908653974533081, + "rewards/rejected": -0.2651776671409607, + "step": 355 + }, + { + "epoch": 0.22146189735614308, + "grad_norm": 0.4050353169441223, + "learning_rate": 3.2200000000000003e-05, + "log_odds_chosen": 0.809384822845459, + "log_odds_ratio": -0.4564736783504486, + "logits/chosen": 2.147434711456299, + "logits/rejected": 3.05849027633667, + "logps/chosen": -1.0253255367279053, + "logps/rejected": -1.654374361038208, + "loss": 0.8009, + "nll_loss": 0.7552969455718994, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10253255814313889, + "rewards/margins": 0.06290488690137863, + "rewards/rejected": -0.1654374599456787, + "step": 356 + }, + { + "epoch": 0.22208398133748056, + "grad_norm": 0.6486241817474365, + "learning_rate": 3.215e-05, + "log_odds_chosen": 0.5740684866905212, + "log_odds_ratio": -0.5048813819885254, + "logits/chosen": 2.6620006561279297, + "logits/rejected": 3.147725820541382, + "logps/chosen": -0.9634166955947876, + "logps/rejected": -1.3289614915847778, + "loss": 0.8307, + "nll_loss": 0.780228316783905, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09634166210889816, + "rewards/margins": 0.0365544855594635, + "rewards/rejected": -0.13289615511894226, + "step": 357 + }, + { + "epoch": 0.22270606531881804, + "grad_norm": 0.3724525570869446, + "learning_rate": 3.21e-05, + "log_odds_chosen": 0.028345106169581413, + "log_odds_ratio": -0.724675714969635, + "logits/chosen": 2.111036777496338, + "logits/rejected": 3.2740390300750732, + "logps/chosen": -1.2557599544525146, + "logps/rejected": -1.243468999862671, + "loss": 0.7887, + "nll_loss": 0.7161970734596252, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12557600438594818, + "rewards/margins": -0.0012290971353650093, + "rewards/rejected": -0.12434690445661545, + "step": 358 + }, + { + "epoch": 0.22332814930015552, + "grad_norm": 0.26534923911094666, + "learning_rate": 3.205e-05, + "log_odds_chosen": 0.871075451374054, + "log_odds_ratio": -0.4620305001735687, + "logits/chosen": 0.9481121301651001, + "logits/rejected": 1.3824822902679443, + "logps/chosen": -0.7495682835578918, + "logps/rejected": -1.0883417129516602, + "loss": 0.7144, + "nll_loss": 0.6682002544403076, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07495683431625366, + "rewards/margins": 0.03387734666466713, + "rewards/rejected": -0.1088341772556305, + "step": 359 + }, + { + "epoch": 0.223950233281493, + "grad_norm": 0.31574657559394836, + "learning_rate": 3.2000000000000005e-05, + "log_odds_chosen": 0.45125481486320496, + "log_odds_ratio": -0.6207079291343689, + "logits/chosen": 0.23232275247573853, + "logits/rejected": 1.9814536571502686, + "logps/chosen": -1.2163420915603638, + "logps/rejected": -1.5950084924697876, + "loss": 0.6097, + "nll_loss": 0.5476274490356445, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12163421511650085, + "rewards/margins": 0.03786662966012955, + "rewards/rejected": -0.159500852227211, + "step": 360 + }, + { + "epoch": 0.22457231726283047, + "grad_norm": 0.35571300983428955, + "learning_rate": 3.1950000000000004e-05, + "log_odds_chosen": 0.4454658329486847, + "log_odds_ratio": -0.5699670910835266, + "logits/chosen": 1.5059998035430908, + "logits/rejected": 1.0860791206359863, + "logps/chosen": -1.0126605033874512, + "logps/rejected": -1.4028286933898926, + "loss": 0.6565, + "nll_loss": 0.5994773507118225, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.101266048848629, + "rewards/margins": 0.03901681676506996, + "rewards/rejected": -0.14028286933898926, + "step": 361 + }, + { + "epoch": 0.22519440124416795, + "grad_norm": 0.28542983531951904, + "learning_rate": 3.19e-05, + "log_odds_chosen": 1.22482430934906, + "log_odds_ratio": -0.3668474853038788, + "logits/chosen": 1.2312403917312622, + "logits/rejected": 2.7701077461242676, + "logps/chosen": -0.6266268491744995, + "logps/rejected": -1.2044650316238403, + "loss": 0.6899, + "nll_loss": 0.6532502770423889, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06266268342733383, + "rewards/margins": 0.0577838197350502, + "rewards/rejected": -0.12044650316238403, + "step": 362 + }, + { + "epoch": 0.22581648522550543, + "grad_norm": 0.31290653347969055, + "learning_rate": 3.185e-05, + "log_odds_chosen": 0.8134715557098389, + "log_odds_ratio": -0.3888576328754425, + "logits/chosen": 1.2525287866592407, + "logits/rejected": 2.074699640274048, + "logps/chosen": -0.958530068397522, + "logps/rejected": -1.5322929620742798, + "loss": 0.589, + "nll_loss": 0.5501154661178589, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09585300832986832, + "rewards/margins": 0.05737629532814026, + "rewards/rejected": -0.15322929620742798, + "step": 363 + }, + { + "epoch": 0.2264385692068429, + "grad_norm": 0.31028440594673157, + "learning_rate": 3.18e-05, + "log_odds_chosen": 0.7677963972091675, + "log_odds_ratio": -0.42222875356674194, + "logits/chosen": 1.078382134437561, + "logits/rejected": 2.6262946128845215, + "logps/chosen": -0.8856097459793091, + "logps/rejected": -1.4135570526123047, + "loss": 0.6642, + "nll_loss": 0.6219831705093384, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08856097608804703, + "rewards/margins": 0.05279473215341568, + "rewards/rejected": -0.1413557082414627, + "step": 364 + }, + { + "epoch": 0.22706065318818042, + "grad_norm": 0.2672368288040161, + "learning_rate": 3.175e-05, + "log_odds_chosen": 1.25239098072052, + "log_odds_ratio": -0.34830427169799805, + "logits/chosen": 1.7153308391571045, + "logits/rejected": 2.940908908843994, + "logps/chosen": -0.7854593992233276, + "logps/rejected": -1.5576868057250977, + "loss": 0.7977, + "nll_loss": 0.7629010081291199, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.078545942902565, + "rewards/margins": 0.07722274959087372, + "rewards/rejected": -0.15576869249343872, + "step": 365 + }, + { + "epoch": 0.2276827371695179, + "grad_norm": 0.28569233417510986, + "learning_rate": 3.1700000000000005e-05, + "log_odds_chosen": 0.839358925819397, + "log_odds_ratio": -0.43827319145202637, + "logits/chosen": 0.81386399269104, + "logits/rejected": 3.110175371170044, + "logps/chosen": -0.7708005905151367, + "logps/rejected": -1.379541039466858, + "loss": 0.5576, + "nll_loss": 0.5137704610824585, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07708005607128143, + "rewards/margins": 0.06087404862046242, + "rewards/rejected": -0.13795410096645355, + "step": 366 + }, + { + "epoch": 0.22830482115085537, + "grad_norm": 1.7108992338180542, + "learning_rate": 3.1650000000000004e-05, + "log_odds_chosen": 1.8122532367706299, + "log_odds_ratio": -0.20352888107299805, + "logits/chosen": 1.2920408248901367, + "logits/rejected": 2.4468111991882324, + "logps/chosen": -0.7317562103271484, + "logps/rejected": -1.9532015323638916, + "loss": 0.7003, + "nll_loss": 0.6799642443656921, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07317563146352768, + "rewards/margins": 0.12214451283216476, + "rewards/rejected": -0.19532015919685364, + "step": 367 + }, + { + "epoch": 0.22892690513219285, + "grad_norm": 0.5031489133834839, + "learning_rate": 3.16e-05, + "log_odds_chosen": -0.09820356965065002, + "log_odds_ratio": -0.8625038266181946, + "logits/chosen": 0.7859886884689331, + "logits/rejected": 2.1682181358337402, + "logps/chosen": -1.468904733657837, + "logps/rejected": -1.2969123125076294, + "loss": 0.6516, + "nll_loss": 0.5653401613235474, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.14689047634601593, + "rewards/margins": -0.01719924435019493, + "rewards/rejected": -0.1296912282705307, + "step": 368 + }, + { + "epoch": 0.22954898911353033, + "grad_norm": 0.2742994427680969, + "learning_rate": 3.155e-05, + "log_odds_chosen": 1.5738685131072998, + "log_odds_ratio": -0.3663029968738556, + "logits/chosen": 0.23371529579162598, + "logits/rejected": 2.0417320728302, + "logps/chosen": -0.8390317559242249, + "logps/rejected": -2.128387689590454, + "loss": 0.5617, + "nll_loss": 0.525061309337616, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08390317857265472, + "rewards/margins": 0.12893559038639069, + "rewards/rejected": -0.2128387689590454, + "step": 369 + }, + { + "epoch": 0.2301710730948678, + "grad_norm": 0.2917932868003845, + "learning_rate": 3.15e-05, + "log_odds_chosen": 0.8865772485733032, + "log_odds_ratio": -0.36870071291923523, + "logits/chosen": 0.578902542591095, + "logits/rejected": 1.5433131456375122, + "logps/chosen": -1.0158836841583252, + "logps/rejected": -1.5882716178894043, + "loss": 0.6669, + "nll_loss": 0.6300525665283203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10158836096525192, + "rewards/margins": 0.057238806039094925, + "rewards/rejected": -0.15882715582847595, + "step": 370 + }, + { + "epoch": 0.2307931570762053, + "grad_norm": 0.27657777070999146, + "learning_rate": 3.145e-05, + "log_odds_chosen": 0.8730921745300293, + "log_odds_ratio": -0.41202032566070557, + "logits/chosen": 0.9755443334579468, + "logits/rejected": 1.815916895866394, + "logps/chosen": -0.9039212465286255, + "logps/rejected": -1.4282338619232178, + "loss": 0.6118, + "nll_loss": 0.5705785751342773, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09039213508367538, + "rewards/margins": 0.05243125185370445, + "rewards/rejected": -0.14282338321208954, + "step": 371 + }, + { + "epoch": 0.23141524105754277, + "grad_norm": 0.3784266412258148, + "learning_rate": 3.1400000000000004e-05, + "log_odds_chosen": 1.1115049123764038, + "log_odds_ratio": -0.4001220166683197, + "logits/chosen": 1.2740614414215088, + "logits/rejected": 3.4113335609436035, + "logps/chosen": -0.7833281755447388, + "logps/rejected": -1.5305798053741455, + "loss": 0.6974, + "nll_loss": 0.6573410034179688, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07833281904459, + "rewards/margins": 0.07472515851259232, + "rewards/rejected": -0.1530579775571823, + "step": 372 + }, + { + "epoch": 0.23203732503888025, + "grad_norm": 0.49009525775909424, + "learning_rate": 3.135e-05, + "log_odds_chosen": 0.5858776569366455, + "log_odds_ratio": -0.5308746099472046, + "logits/chosen": 1.560257911682129, + "logits/rejected": 2.5096850395202637, + "logps/chosen": -0.7814056873321533, + "logps/rejected": -1.2176673412322998, + "loss": 0.7221, + "nll_loss": 0.6690379977226257, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07814057171344757, + "rewards/margins": 0.043626170605421066, + "rewards/rejected": -0.12176674604415894, + "step": 373 + }, + { + "epoch": 0.23265940902021773, + "grad_norm": 0.27680882811546326, + "learning_rate": 3.13e-05, + "log_odds_chosen": 0.5893234014511108, + "log_odds_ratio": -0.5311247706413269, + "logits/chosen": 1.2210685014724731, + "logits/rejected": 1.663694143295288, + "logps/chosen": -0.836790919303894, + "logps/rejected": -1.2266275882720947, + "loss": 0.647, + "nll_loss": 0.5938542485237122, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08367909491062164, + "rewards/margins": 0.03898368030786514, + "rewards/rejected": -0.12266277521848679, + "step": 374 + }, + { + "epoch": 0.2332814930015552, + "grad_norm": 0.3387400209903717, + "learning_rate": 3.125e-05, + "log_odds_chosen": 0.5931373834609985, + "log_odds_ratio": -0.5173163414001465, + "logits/chosen": 0.8472631573677063, + "logits/rejected": 2.168799877166748, + "logps/chosen": -1.1901843547821045, + "logps/rejected": -1.6465400457382202, + "loss": 0.6611, + "nll_loss": 0.6094157695770264, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11901844292879105, + "rewards/margins": 0.045635566115379333, + "rewards/rejected": -0.16465400159358978, + "step": 375 + }, + { + "epoch": 0.23390357698289269, + "grad_norm": 0.3112637996673584, + "learning_rate": 3.12e-05, + "log_odds_chosen": 0.626380443572998, + "log_odds_ratio": -0.4817531108856201, + "logits/chosen": 1.1131579875946045, + "logits/rejected": 2.7692978382110596, + "logps/chosen": -1.171850323677063, + "logps/rejected": -1.6517088413238525, + "loss": 0.6714, + "nll_loss": 0.623209536075592, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11718503385782242, + "rewards/margins": 0.047985851764678955, + "rewards/rejected": -0.16517089307308197, + "step": 376 + }, + { + "epoch": 0.23452566096423016, + "grad_norm": 0.2978714108467102, + "learning_rate": 3.115e-05, + "log_odds_chosen": 0.37127476930618286, + "log_odds_ratio": -0.5513190031051636, + "logits/chosen": 1.0451431274414062, + "logits/rejected": 2.6317138671875, + "logps/chosen": -1.1320627927780151, + "logps/rejected": -1.4079375267028809, + "loss": 0.7035, + "nll_loss": 0.6483767032623291, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11320628225803375, + "rewards/margins": 0.02758748084306717, + "rewards/rejected": -0.14079375565052032, + "step": 377 + }, + { + "epoch": 0.23514774494556764, + "grad_norm": 0.3199593424797058, + "learning_rate": 3.1100000000000004e-05, + "log_odds_chosen": 1.089561939239502, + "log_odds_ratio": -0.34473738074302673, + "logits/chosen": 1.0959789752960205, + "logits/rejected": 1.9354054927825928, + "logps/chosen": -0.8988323211669922, + "logps/rejected": -1.6733230352401733, + "loss": 0.5175, + "nll_loss": 0.4830198884010315, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0898832380771637, + "rewards/margins": 0.07744906842708588, + "rewards/rejected": -0.16733232140541077, + "step": 378 + }, + { + "epoch": 0.23576982892690512, + "grad_norm": 0.5537139177322388, + "learning_rate": 3.105e-05, + "log_odds_chosen": 0.8812462091445923, + "log_odds_ratio": -0.6632511615753174, + "logits/chosen": 2.529099464416504, + "logits/rejected": 2.7828726768493652, + "logps/chosen": -1.2979620695114136, + "logps/rejected": -1.926892876625061, + "loss": 0.7607, + "nll_loss": 0.6943774819374084, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12979620695114136, + "rewards/margins": 0.0628930851817131, + "rewards/rejected": -0.19268929958343506, + "step": 379 + }, + { + "epoch": 0.2363919129082426, + "grad_norm": 0.3323799669742584, + "learning_rate": 3.1e-05, + "log_odds_chosen": 0.7835572957992554, + "log_odds_ratio": -0.4892599582672119, + "logits/chosen": 0.6586530208587646, + "logits/rejected": 1.666867971420288, + "logps/chosen": -1.1222596168518066, + "logps/rejected": -1.716899037361145, + "loss": 0.5659, + "nll_loss": 0.5169761776924133, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1122259572148323, + "rewards/margins": 0.059463948011398315, + "rewards/rejected": -0.17168991267681122, + "step": 380 + }, + { + "epoch": 0.23701399688958008, + "grad_norm": 0.29903772473335266, + "learning_rate": 3.095e-05, + "log_odds_chosen": 0.5510812401771545, + "log_odds_ratio": -0.4852423071861267, + "logits/chosen": 1.5083937644958496, + "logits/rejected": 2.799187183380127, + "logps/chosen": -0.7832547426223755, + "logps/rejected": -1.1330596208572388, + "loss": 0.6428, + "nll_loss": 0.5942538976669312, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07832548022270203, + "rewards/margins": 0.03498048335313797, + "rewards/rejected": -0.1133059710264206, + "step": 381 + }, + { + "epoch": 0.2376360808709176, + "grad_norm": 0.38349026441574097, + "learning_rate": 3.09e-05, + "log_odds_chosen": 0.34245169162750244, + "log_odds_ratio": -0.6288639307022095, + "logits/chosen": 1.488581895828247, + "logits/rejected": 1.2697405815124512, + "logps/chosen": -1.0095301866531372, + "logps/rejected": -1.2948238849639893, + "loss": 0.7459, + "nll_loss": 0.683020830154419, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10095302015542984, + "rewards/margins": 0.028529373928904533, + "rewards/rejected": -0.12948238849639893, + "step": 382 + }, + { + "epoch": 0.23825816485225507, + "grad_norm": 0.3196040987968445, + "learning_rate": 3.0850000000000004e-05, + "log_odds_chosen": 0.7200406789779663, + "log_odds_ratio": -0.48095566034317017, + "logits/chosen": 2.4836840629577637, + "logits/rejected": 1.727085828781128, + "logps/chosen": -0.9393627047538757, + "logps/rejected": -1.4800735712051392, + "loss": 0.8215, + "nll_loss": 0.7733873724937439, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09393627196550369, + "rewards/margins": 0.0540710911154747, + "rewards/rejected": -0.1480073630809784, + "step": 383 + }, + { + "epoch": 0.23888024883359255, + "grad_norm": 0.3995237946510315, + "learning_rate": 3.08e-05, + "log_odds_chosen": 1.3195679187774658, + "log_odds_ratio": -0.3445848822593689, + "logits/chosen": 1.1876567602157593, + "logits/rejected": 2.206796169281006, + "logps/chosen": -0.69122314453125, + "logps/rejected": -1.3169430494308472, + "loss": 0.5767, + "nll_loss": 0.5422327518463135, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.069122314453125, + "rewards/margins": 0.06257198750972748, + "rewards/rejected": -0.13169430196285248, + "step": 384 + }, + { + "epoch": 0.23950233281493002, + "grad_norm": 0.31453338265419006, + "learning_rate": 3.075e-05, + "log_odds_chosen": 0.7995316982269287, + "log_odds_ratio": -0.4378565847873688, + "logits/chosen": 3.7806599140167236, + "logits/rejected": 3.5833585262298584, + "logps/chosen": -0.8676797151565552, + "logps/rejected": -1.3756786584854126, + "loss": 1.0576, + "nll_loss": 1.0137659311294556, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08676797151565552, + "rewards/margins": 0.0507998913526535, + "rewards/rejected": -0.13756787776947021, + "step": 385 + }, + { + "epoch": 0.2401244167962675, + "grad_norm": 0.2684924900531769, + "learning_rate": 3.07e-05, + "log_odds_chosen": 1.4387656450271606, + "log_odds_ratio": -0.25688570737838745, + "logits/chosen": 1.3283145427703857, + "logits/rejected": 2.276088237762451, + "logps/chosen": -0.7973716259002686, + "logps/rejected": -1.7727547883987427, + "loss": 0.6458, + "nll_loss": 0.6201030015945435, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07973716408014297, + "rewards/margins": 0.09753831475973129, + "rewards/rejected": -0.17727547883987427, + "step": 386 + }, + { + "epoch": 0.24074650077760498, + "grad_norm": 0.3620300889015198, + "learning_rate": 3.065e-05, + "log_odds_chosen": 0.5435608625411987, + "log_odds_ratio": -0.5255359411239624, + "logits/chosen": 2.283266544342041, + "logits/rejected": 2.0596556663513184, + "logps/chosen": -1.0229352712631226, + "logps/rejected": -1.451112151145935, + "loss": 0.7554, + "nll_loss": 0.7028760313987732, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10229352116584778, + "rewards/margins": 0.04281768575310707, + "rewards/rejected": -0.14511121809482574, + "step": 387 + }, + { + "epoch": 0.24136858475894246, + "grad_norm": 0.34285151958465576, + "learning_rate": 3.06e-05, + "log_odds_chosen": 1.1580283641815186, + "log_odds_ratio": -0.33749106526374817, + "logits/chosen": 2.170480966567993, + "logits/rejected": 3.085576295852661, + "logps/chosen": -0.8325943350791931, + "logps/rejected": -1.6810599565505981, + "loss": 0.6661, + "nll_loss": 0.6323240995407104, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08325943350791931, + "rewards/margins": 0.08484657108783722, + "rewards/rejected": -0.16810600459575653, + "step": 388 + }, + { + "epoch": 0.24199066874027994, + "grad_norm": 0.31308212876319885, + "learning_rate": 3.0550000000000004e-05, + "log_odds_chosen": 1.1371029615402222, + "log_odds_ratio": -0.41829434037208557, + "logits/chosen": 0.6740908622741699, + "logits/rejected": 1.5658845901489258, + "logps/chosen": -0.9937759041786194, + "logps/rejected": -1.626152753829956, + "loss": 0.6029, + "nll_loss": 0.5611146688461304, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0993775948882103, + "rewards/margins": 0.06323768198490143, + "rewards/rejected": -0.16261526942253113, + "step": 389 + }, + { + "epoch": 0.24261275272161742, + "grad_norm": 0.29140704870224, + "learning_rate": 3.05e-05, + "log_odds_chosen": 0.5190244913101196, + "log_odds_ratio": -0.5214281678199768, + "logits/chosen": 2.445509910583496, + "logits/rejected": 2.593230724334717, + "logps/chosen": -1.0484591722488403, + "logps/rejected": -1.3859442472457886, + "loss": 0.8534, + "nll_loss": 0.801255464553833, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10484592616558075, + "rewards/margins": 0.03374850004911423, + "rewards/rejected": -0.13859443366527557, + "step": 390 + }, + { + "epoch": 0.2432348367029549, + "grad_norm": 0.3124774694442749, + "learning_rate": 3.045e-05, + "log_odds_chosen": 0.5714182257652283, + "log_odds_ratio": -0.5035386085510254, + "logits/chosen": 2.587074041366577, + "logits/rejected": 3.1037638187408447, + "logps/chosen": -1.0052751302719116, + "logps/rejected": -1.411800503730774, + "loss": 0.9459, + "nll_loss": 0.8955031037330627, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10052751004695892, + "rewards/margins": 0.04065253585577011, + "rewards/rejected": -0.14118005335330963, + "step": 391 + }, + { + "epoch": 0.24385692068429238, + "grad_norm": 0.2874357998371124, + "learning_rate": 3.04e-05, + "log_odds_chosen": 0.4438624083995819, + "log_odds_ratio": -0.5330299139022827, + "logits/chosen": 0.8436980247497559, + "logits/rejected": 3.554211139678955, + "logps/chosen": -1.0223864316940308, + "logps/rejected": -1.3054369688034058, + "loss": 0.5154, + "nll_loss": 0.4620867967605591, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10223864763975143, + "rewards/margins": 0.0283050537109375, + "rewards/rejected": -0.13054370880126953, + "step": 392 + }, + { + "epoch": 0.24447900466562986, + "grad_norm": 0.34963977336883545, + "learning_rate": 3.035e-05, + "log_odds_chosen": 0.7523168921470642, + "log_odds_ratio": -0.4647229313850403, + "logits/chosen": 1.5345008373260498, + "logits/rejected": 1.5623347759246826, + "logps/chosen": -1.007373332977295, + "logps/rejected": -1.5803990364074707, + "loss": 0.6779, + "nll_loss": 0.6314302086830139, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10073733329772949, + "rewards/margins": 0.057302575558423996, + "rewards/rejected": -0.15803992748260498, + "step": 393 + }, + { + "epoch": 0.24510108864696734, + "grad_norm": 0.4444126784801483, + "learning_rate": 3.03e-05, + "log_odds_chosen": 1.1722054481506348, + "log_odds_ratio": -0.3408425450325012, + "logits/chosen": 2.263749361038208, + "logits/rejected": 2.531400203704834, + "logps/chosen": -0.7516819834709167, + "logps/rejected": -1.4847530126571655, + "loss": 0.6886, + "nll_loss": 0.654498815536499, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0751681998372078, + "rewards/margins": 0.073307104408741, + "rewards/rejected": -0.1484753042459488, + "step": 394 + }, + { + "epoch": 0.24572317262830481, + "grad_norm": 0.2417965829372406, + "learning_rate": 3.025e-05, + "log_odds_chosen": 1.0758328437805176, + "log_odds_ratio": -0.3839593231678009, + "logits/chosen": 0.5677065253257751, + "logits/rejected": 1.7079464197158813, + "logps/chosen": -0.7603127956390381, + "logps/rejected": -1.385190725326538, + "loss": 0.5789, + "nll_loss": 0.5405372977256775, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07603128254413605, + "rewards/margins": 0.06248778849840164, + "rewards/rejected": -0.1385190784931183, + "step": 395 + }, + { + "epoch": 0.2463452566096423, + "grad_norm": 0.3106265962123871, + "learning_rate": 3.02e-05, + "log_odds_chosen": 0.9861092567443848, + "log_odds_ratio": -0.4771953225135803, + "logits/chosen": 0.5812429785728455, + "logits/rejected": 2.854665756225586, + "logps/chosen": -0.8859405517578125, + "logps/rejected": -1.554978370666504, + "loss": 0.5522, + "nll_loss": 0.5044968724250793, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08859404921531677, + "rewards/margins": 0.06690378487110138, + "rewards/rejected": -0.15549783408641815, + "step": 396 + }, + { + "epoch": 0.24696734059097977, + "grad_norm": 0.25298336148262024, + "learning_rate": 3.015e-05, + "log_odds_chosen": 0.6958006024360657, + "log_odds_ratio": -0.4609626531600952, + "logits/chosen": 0.29168179631233215, + "logits/rejected": 1.9156887531280518, + "logps/chosen": -0.8693374395370483, + "logps/rejected": -1.3414729833602905, + "loss": 0.5615, + "nll_loss": 0.5154497027397156, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08693374693393707, + "rewards/margins": 0.04721354693174362, + "rewards/rejected": -0.1341472864151001, + "step": 397 + }, + { + "epoch": 0.24758942457231725, + "grad_norm": 0.39282822608947754, + "learning_rate": 3.01e-05, + "log_odds_chosen": 0.480314701795578, + "log_odds_ratio": -0.5577341914176941, + "logits/chosen": 1.667938470840454, + "logits/rejected": 2.5177690982818604, + "logps/chosen": -1.1416622400283813, + "logps/rejected": -1.504913330078125, + "loss": 0.6952, + "nll_loss": 0.6393857002258301, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11416622251272202, + "rewards/margins": 0.036325111985206604, + "rewards/rejected": -0.15049132704734802, + "step": 398 + }, + { + "epoch": 0.24821150855365473, + "grad_norm": 0.6533320546150208, + "learning_rate": 3.0050000000000002e-05, + "log_odds_chosen": 1.0804705619812012, + "log_odds_ratio": -0.4304221570491791, + "logits/chosen": 1.3469984531402588, + "logits/rejected": 3.310586452484131, + "logps/chosen": -1.1268970966339111, + "logps/rejected": -1.9855341911315918, + "loss": 0.7102, + "nll_loss": 0.6671367287635803, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11268971115350723, + "rewards/margins": 0.08586370944976807, + "rewards/rejected": -0.1985534131526947, + "step": 399 + }, + { + "epoch": 0.24883359253499224, + "grad_norm": 0.3909144699573517, + "learning_rate": 3e-05, + "log_odds_chosen": 0.8222372531890869, + "log_odds_ratio": -0.555374264717102, + "logits/chosen": -0.3867577612400055, + "logits/rejected": 2.40067720413208, + "logps/chosen": -1.1070265769958496, + "logps/rejected": -1.7690173387527466, + "loss": 0.5574, + "nll_loss": 0.5018182992935181, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11070266366004944, + "rewards/margins": 0.06619907915592194, + "rewards/rejected": -0.17690175771713257, + "step": 400 + }, + { + "epoch": 0.24945567651632972, + "grad_norm": 0.4009895920753479, + "learning_rate": 2.995e-05, + "log_odds_chosen": 0.35318857431411743, + "log_odds_ratio": -0.9164329767227173, + "logits/chosen": 1.3482327461242676, + "logits/rejected": 1.7844626903533936, + "logps/chosen": -1.4168295860290527, + "logps/rejected": -1.6120425462722778, + "loss": 0.6499, + "nll_loss": 0.5582701563835144, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1416829526424408, + "rewards/margins": 0.019521303474903107, + "rewards/rejected": -0.1612042635679245, + "step": 401 + }, + { + "epoch": 0.25007776049766717, + "grad_norm": 0.33025988936424255, + "learning_rate": 2.9900000000000002e-05, + "log_odds_chosen": 1.3418104648590088, + "log_odds_ratio": -0.3277786374092102, + "logits/chosen": 1.4879672527313232, + "logits/rejected": 2.3433167934417725, + "logps/chosen": -0.9078612327575684, + "logps/rejected": -1.8650798797607422, + "loss": 0.6201, + "nll_loss": 0.5872801542282104, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09078612923622131, + "rewards/margins": 0.09572187811136246, + "rewards/rejected": -0.18650799989700317, + "step": 402 + }, + { + "epoch": 0.2506998444790047, + "grad_norm": 0.3194940686225891, + "learning_rate": 2.985e-05, + "log_odds_chosen": 0.8244792819023132, + "log_odds_ratio": -0.42599987983703613, + "logits/chosen": 0.572659969329834, + "logits/rejected": 1.3702340126037598, + "logps/chosen": -0.9174282550811768, + "logps/rejected": -1.5245065689086914, + "loss": 0.5852, + "nll_loss": 0.5425539612770081, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09174282848834991, + "rewards/margins": 0.06070783734321594, + "rewards/rejected": -0.15245066583156586, + "step": 403 + }, + { + "epoch": 0.2513219284603421, + "grad_norm": 0.406558096408844, + "learning_rate": 2.98e-05, + "log_odds_chosen": 1.7406198978424072, + "log_odds_ratio": -0.6884851455688477, + "logits/chosen": 1.4606916904449463, + "logits/rejected": 1.6664860248565674, + "logps/chosen": -1.0016249418258667, + "logps/rejected": -2.2720675468444824, + "loss": 0.7226, + "nll_loss": 0.6537201404571533, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10016249865293503, + "rewards/margins": 0.12704426050186157, + "rewards/rejected": -0.2272067666053772, + "step": 404 + }, + { + "epoch": 0.25194401244167963, + "grad_norm": 0.30637553334236145, + "learning_rate": 2.975e-05, + "log_odds_chosen": 0.4996417164802551, + "log_odds_ratio": -0.5414650440216064, + "logits/chosen": 1.5827730894088745, + "logits/rejected": 2.8780758380889893, + "logps/chosen": -0.9463892579078674, + "logps/rejected": -1.2346899509429932, + "loss": 0.7172, + "nll_loss": 0.6631010174751282, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09463892877101898, + "rewards/margins": 0.02883007377386093, + "rewards/rejected": -0.12346899509429932, + "step": 405 + }, + { + "epoch": 0.2525660964230171, + "grad_norm": 0.3793873190879822, + "learning_rate": 2.97e-05, + "log_odds_chosen": 0.015795554965734482, + "log_odds_ratio": -0.7355694770812988, + "logits/chosen": 0.8564848899841309, + "logits/rejected": 3.023225784301758, + "logps/chosen": -1.1393344402313232, + "logps/rejected": -1.1146671772003174, + "loss": 0.574, + "nll_loss": 0.5004271268844604, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.11393344402313232, + "rewards/margins": -0.0024667298421263695, + "rewards/rejected": -0.11146670579910278, + "step": 406 + }, + { + "epoch": 0.2531881804043546, + "grad_norm": 0.31722235679626465, + "learning_rate": 2.965e-05, + "log_odds_chosen": 0.505951464176178, + "log_odds_ratio": -0.5452799797058105, + "logits/chosen": 2.190391778945923, + "logits/rejected": 2.1522607803344727, + "logps/chosen": -0.8337920904159546, + "logps/rejected": -1.0933805704116821, + "loss": 0.7712, + "nll_loss": 0.716640055179596, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.08337920904159546, + "rewards/margins": 0.02595885656774044, + "rewards/rejected": -0.10933806002140045, + "step": 407 + }, + { + "epoch": 0.25381026438569204, + "grad_norm": 0.6297598481178284, + "learning_rate": 2.96e-05, + "log_odds_chosen": 0.40282291173934937, + "log_odds_ratio": -0.8236533403396606, + "logits/chosen": 2.029449701309204, + "logits/rejected": 3.087233781814575, + "logps/chosen": -1.7967513799667358, + "logps/rejected": -2.068850040435791, + "loss": 0.846, + "nll_loss": 0.7636764645576477, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1796751469373703, + "rewards/margins": 0.027209851890802383, + "rewards/rejected": -0.20688499510288239, + "step": 408 + }, + { + "epoch": 0.25443234836702955, + "grad_norm": 1.622485876083374, + "learning_rate": 2.955e-05, + "log_odds_chosen": 1.3102138042449951, + "log_odds_ratio": -0.38241660594940186, + "logits/chosen": 0.22542864084243774, + "logits/rejected": 1.7303202152252197, + "logps/chosen": -0.8405523300170898, + "logps/rejected": -1.6514477729797363, + "loss": 0.5552, + "nll_loss": 0.517005980014801, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08405523747205734, + "rewards/margins": 0.08108954131603241, + "rewards/rejected": -0.16514477133750916, + "step": 409 + }, + { + "epoch": 0.25505443234836706, + "grad_norm": 0.34497782588005066, + "learning_rate": 2.95e-05, + "log_odds_chosen": 1.3162719011306763, + "log_odds_ratio": -0.27363449335098267, + "logits/chosen": 0.6687554121017456, + "logits/rejected": 1.8202764987945557, + "logps/chosen": -0.6283218860626221, + "logps/rejected": -1.4075915813446045, + "loss": 0.6224, + "nll_loss": 0.5950397253036499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06283218413591385, + "rewards/margins": 0.07792697846889496, + "rewards/rejected": -0.1407591551542282, + "step": 410 + }, + { + "epoch": 0.2556765163297045, + "grad_norm": 0.666723906993866, + "learning_rate": 2.945e-05, + "log_odds_chosen": 0.7361310124397278, + "log_odds_ratio": -0.4306948482990265, + "logits/chosen": 1.0494723320007324, + "logits/rejected": 1.2754552364349365, + "logps/chosen": -1.0213119983673096, + "logps/rejected": -1.5761754512786865, + "loss": 0.559, + "nll_loss": 0.5158807039260864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.102131187915802, + "rewards/margins": 0.05548635125160217, + "rewards/rejected": -0.15761755406856537, + "step": 411 + }, + { + "epoch": 0.256298600311042, + "grad_norm": 0.4294110834598541, + "learning_rate": 2.94e-05, + "log_odds_chosen": 0.5257182121276855, + "log_odds_ratio": -0.5099442005157471, + "logits/chosen": 3.1594438552856445, + "logits/rejected": 1.7579195499420166, + "logps/chosen": -0.9839767217636108, + "logps/rejected": -1.3301349878311157, + "loss": 1.0331, + "nll_loss": 0.9820601344108582, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09839767217636108, + "rewards/margins": 0.03461581841111183, + "rewards/rejected": -0.13301348686218262, + "step": 412 + }, + { + "epoch": 0.25692068429237946, + "grad_norm": 0.3066917955875397, + "learning_rate": 2.935e-05, + "log_odds_chosen": 0.9262756109237671, + "log_odds_ratio": -0.36216309666633606, + "logits/chosen": 0.2067386507987976, + "logits/rejected": 0.22625547647476196, + "logps/chosen": -1.0756921768188477, + "logps/rejected": -1.7890634536743164, + "loss": 0.529, + "nll_loss": 0.4928092360496521, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10756923258304596, + "rewards/margins": 0.07133711129426956, + "rewards/rejected": -0.17890633642673492, + "step": 413 + }, + { + "epoch": 0.25754276827371697, + "grad_norm": 0.32451072335243225, + "learning_rate": 2.93e-05, + "log_odds_chosen": 1.0704395771026611, + "log_odds_ratio": -0.35353147983551025, + "logits/chosen": 1.414031982421875, + "logits/rejected": 1.678096890449524, + "logps/chosen": -0.946209192276001, + "logps/rejected": -1.6878459453582764, + "loss": 0.695, + "nll_loss": 0.6596387624740601, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09462092816829681, + "rewards/margins": 0.07416368275880814, + "rewards/rejected": -0.16878461837768555, + "step": 414 + }, + { + "epoch": 0.2581648522550544, + "grad_norm": 0.30788925290107727, + "learning_rate": 2.925e-05, + "log_odds_chosen": 0.5195921659469604, + "log_odds_ratio": -0.5618450045585632, + "logits/chosen": 0.8189148306846619, + "logits/rejected": 1.116391897201538, + "logps/chosen": -0.992274284362793, + "logps/rejected": -1.413496494293213, + "loss": 0.6824, + "nll_loss": 0.6262446641921997, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0992274284362793, + "rewards/margins": 0.04212221875786781, + "rewards/rejected": -0.1413496434688568, + "step": 415 + }, + { + "epoch": 0.25878693623639193, + "grad_norm": 0.34574657678604126, + "learning_rate": 2.9199999999999998e-05, + "log_odds_chosen": 0.9739059209823608, + "log_odds_ratio": -0.34965360164642334, + "logits/chosen": 0.7820599675178528, + "logits/rejected": 1.1054322719573975, + "logps/chosen": -1.1314696073532104, + "logps/rejected": -1.8759431838989258, + "loss": 0.7164, + "nll_loss": 0.6814556121826172, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11314696073532104, + "rewards/margins": 0.07444734871387482, + "rewards/rejected": -0.18759430944919586, + "step": 416 + }, + { + "epoch": 0.2594090202177294, + "grad_norm": 0.5183652639389038, + "learning_rate": 2.915e-05, + "log_odds_chosen": 0.8738367557525635, + "log_odds_ratio": -0.47149088978767395, + "logits/chosen": -0.39875268936157227, + "logits/rejected": 1.5387773513793945, + "logps/chosen": -1.315935730934143, + "logps/rejected": -1.958770513534546, + "loss": 0.562, + "nll_loss": 0.5148109197616577, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13159358501434326, + "rewards/margins": 0.06428346782922745, + "rewards/rejected": -0.1958770453929901, + "step": 417 + }, + { + "epoch": 0.2600311041990669, + "grad_norm": 0.35823681950569153, + "learning_rate": 2.91e-05, + "log_odds_chosen": 0.8519835472106934, + "log_odds_ratio": -0.39780083298683167, + "logits/chosen": 2.6089086532592773, + "logits/rejected": 2.7018566131591797, + "logps/chosen": -0.9135912656784058, + "logps/rejected": -1.5112643241882324, + "loss": 0.8546, + "nll_loss": 0.8147870302200317, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09135912358760834, + "rewards/margins": 0.05976732447743416, + "rewards/rejected": -0.1511264443397522, + "step": 418 + }, + { + "epoch": 0.26065318818040434, + "grad_norm": 0.48351654410362244, + "learning_rate": 2.9049999999999998e-05, + "log_odds_chosen": 1.4411815404891968, + "log_odds_ratio": -0.2508581578731537, + "logits/chosen": 1.8507972955703735, + "logits/rejected": 2.76145339012146, + "logps/chosen": -1.100071668624878, + "logps/rejected": -2.2368063926696777, + "loss": 0.5349, + "nll_loss": 0.5098253488540649, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11000718176364899, + "rewards/margins": 0.11367346346378326, + "rewards/rejected": -0.22368063032627106, + "step": 419 + }, + { + "epoch": 0.26127527216174184, + "grad_norm": 0.33516889810562134, + "learning_rate": 2.9e-05, + "log_odds_chosen": 1.056069016456604, + "log_odds_ratio": -0.4038728177547455, + "logits/chosen": 1.260915756225586, + "logits/rejected": 2.18727970123291, + "logps/chosen": -0.9167536497116089, + "logps/rejected": -1.6653428077697754, + "loss": 0.5633, + "nll_loss": 0.5229440927505493, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09167537093162537, + "rewards/margins": 0.07485891878604889, + "rewards/rejected": -0.16653428971767426, + "step": 420 + }, + { + "epoch": 0.2618973561430793, + "grad_norm": 0.3093640208244324, + "learning_rate": 2.895e-05, + "log_odds_chosen": 0.41482430696487427, + "log_odds_ratio": -0.5347127914428711, + "logits/chosen": 2.2311692237854004, + "logits/rejected": 2.891629219055176, + "logps/chosen": -1.0031148195266724, + "logps/rejected": -1.3123852014541626, + "loss": 0.8781, + "nll_loss": 0.8245925307273865, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10031148046255112, + "rewards/margins": 0.030927037820219994, + "rewards/rejected": -0.13123852014541626, + "step": 421 + }, + { + "epoch": 0.2625194401244168, + "grad_norm": 0.3477174639701843, + "learning_rate": 2.8899999999999998e-05, + "log_odds_chosen": 1.0467660427093506, + "log_odds_ratio": -0.34793394804000854, + "logits/chosen": 2.9287118911743164, + "logits/rejected": 3.672017812728882, + "logps/chosen": -0.987645149230957, + "logps/rejected": -1.7407939434051514, + "loss": 0.8457, + "nll_loss": 0.8109293580055237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09876450896263123, + "rewards/margins": 0.07531488686800003, + "rewards/rejected": -0.17407938838005066, + "step": 422 + }, + { + "epoch": 0.26314152410575425, + "grad_norm": 0.30915936827659607, + "learning_rate": 2.885e-05, + "log_odds_chosen": 0.5334398746490479, + "log_odds_ratio": -0.6047460436820984, + "logits/chosen": 1.0569583177566528, + "logits/rejected": 2.308332681655884, + "logps/chosen": -1.227040410041809, + "logps/rejected": -1.577738642692566, + "loss": 0.5431, + "nll_loss": 0.48261669278144836, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12270405143499374, + "rewards/margins": 0.03506981581449509, + "rewards/rejected": -0.15777386724948883, + "step": 423 + }, + { + "epoch": 0.26376360808709176, + "grad_norm": 0.2928369641304016, + "learning_rate": 2.88e-05, + "log_odds_chosen": 1.6705491542816162, + "log_odds_ratio": -0.23528079688549042, + "logits/chosen": 1.2303978204727173, + "logits/rejected": 2.6390182971954346, + "logps/chosen": -0.7747675180435181, + "logps/rejected": -2.0271377563476562, + "loss": 0.6079, + "nll_loss": 0.5843520760536194, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07747675478458405, + "rewards/margins": 0.12523701786994934, + "rewards/rejected": -0.20271378755569458, + "step": 424 + }, + { + "epoch": 0.2643856920684292, + "grad_norm": 0.31654441356658936, + "learning_rate": 2.8749999999999997e-05, + "log_odds_chosen": 1.7006933689117432, + "log_odds_ratio": -0.2859562039375305, + "logits/chosen": 1.474023461341858, + "logits/rejected": 2.0397026538848877, + "logps/chosen": -1.0018465518951416, + "logps/rejected": -2.3935539722442627, + "loss": 0.7213, + "nll_loss": 0.6926729083061218, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10018465667963028, + "rewards/margins": 0.13917075097560883, + "rewards/rejected": -0.2393554002046585, + "step": 425 + }, + { + "epoch": 0.2650077760497667, + "grad_norm": 1.583665370941162, + "learning_rate": 2.87e-05, + "log_odds_chosen": 0.9322236776351929, + "log_odds_ratio": -0.6919344663619995, + "logits/chosen": 2.5925381183624268, + "logits/rejected": 3.1540775299072266, + "logps/chosen": -1.319345474243164, + "logps/rejected": -1.910394310951233, + "loss": 0.8361, + "nll_loss": 0.7669383883476257, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13193455338478088, + "rewards/margins": 0.059104882180690765, + "rewards/rejected": -0.19103942811489105, + "step": 426 + }, + { + "epoch": 0.2656298600311042, + "grad_norm": 0.9140015244483948, + "learning_rate": 2.865e-05, + "log_odds_chosen": 0.03581523895263672, + "log_odds_ratio": -0.8274832963943481, + "logits/chosen": 1.621453046798706, + "logits/rejected": 2.172755241394043, + "logps/chosen": -1.689666748046875, + "logps/rejected": -1.6490963697433472, + "loss": 0.7067, + "nll_loss": 0.6239080429077148, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.16896668076515198, + "rewards/margins": -0.004057048819959164, + "rewards/rejected": -0.16490963101387024, + "step": 427 + }, + { + "epoch": 0.2662519440124417, + "grad_norm": 0.32180896401405334, + "learning_rate": 2.86e-05, + "log_odds_chosen": 0.9865052103996277, + "log_odds_ratio": -0.41136422753334045, + "logits/chosen": 2.8309919834136963, + "logits/rejected": 3.1370339393615723, + "logps/chosen": -0.9552088379859924, + "logps/rejected": -1.727388858795166, + "loss": 0.8813, + "nll_loss": 0.84019935131073, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09552087634801865, + "rewards/margins": 0.07721800357103348, + "rewards/rejected": -0.17273887991905212, + "step": 428 + }, + { + "epoch": 0.2668740279937792, + "grad_norm": 0.4916917085647583, + "learning_rate": 2.855e-05, + "log_odds_chosen": 1.0642729997634888, + "log_odds_ratio": -0.4081045091152191, + "logits/chosen": 1.5744514465332031, + "logits/rejected": 2.353875160217285, + "logps/chosen": -1.2089930772781372, + "logps/rejected": -2.0792760848999023, + "loss": 0.7548, + "nll_loss": 0.7139753103256226, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12089931219816208, + "rewards/margins": 0.08702830970287323, + "rewards/rejected": -0.2079276144504547, + "step": 429 + }, + { + "epoch": 0.26749611197511663, + "grad_norm": 0.34557002782821655, + "learning_rate": 2.8499999999999998e-05, + "log_odds_chosen": 1.0231351852416992, + "log_odds_ratio": -0.4096643626689911, + "logits/chosen": 1.2953555583953857, + "logits/rejected": 1.8278127908706665, + "logps/chosen": -0.9621484279632568, + "logps/rejected": -1.7493982315063477, + "loss": 0.6828, + "nll_loss": 0.6418822407722473, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09621484577655792, + "rewards/margins": 0.07872497290372849, + "rewards/rejected": -0.174939826130867, + "step": 430 + }, + { + "epoch": 0.26811819595645414, + "grad_norm": 0.26518648862838745, + "learning_rate": 2.845e-05, + "log_odds_chosen": 0.6238207221031189, + "log_odds_ratio": -0.5502378940582275, + "logits/chosen": 0.34324905276298523, + "logits/rejected": 1.6478018760681152, + "logps/chosen": -0.7102484703063965, + "logps/rejected": -1.1375188827514648, + "loss": 0.5392, + "nll_loss": 0.484172523021698, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07102484256029129, + "rewards/margins": 0.04272705316543579, + "rewards/rejected": -0.11375189572572708, + "step": 431 + }, + { + "epoch": 0.2687402799377916, + "grad_norm": 0.5316938757896423, + "learning_rate": 2.84e-05, + "log_odds_chosen": 0.8510596752166748, + "log_odds_ratio": -0.43761295080184937, + "logits/chosen": 0.8524940013885498, + "logits/rejected": 2.9505248069763184, + "logps/chosen": -1.0910060405731201, + "logps/rejected": -1.6920232772827148, + "loss": 0.635, + "nll_loss": 0.5912693738937378, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10910061001777649, + "rewards/margins": 0.06010172516107559, + "rewards/rejected": -0.16920232772827148, + "step": 432 + }, + { + "epoch": 0.2693623639191291, + "grad_norm": 0.43368545174598694, + "learning_rate": 2.8349999999999998e-05, + "log_odds_chosen": 0.9525305032730103, + "log_odds_ratio": -0.48337578773498535, + "logits/chosen": 1.1380829811096191, + "logits/rejected": 2.6026346683502197, + "logps/chosen": -0.9431745409965515, + "logps/rejected": -1.4990016222000122, + "loss": 0.6376, + "nll_loss": 0.5892944931983948, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09431745111942291, + "rewards/margins": 0.05558270961046219, + "rewards/rejected": -0.1499001681804657, + "step": 433 + }, + { + "epoch": 0.26998444790046655, + "grad_norm": 0.42278578877449036, + "learning_rate": 2.83e-05, + "log_odds_chosen": 1.4672513008117676, + "log_odds_ratio": -0.39815062284469604, + "logits/chosen": 1.3916385173797607, + "logits/rejected": 1.4025530815124512, + "logps/chosen": -1.2792010307312012, + "logps/rejected": -2.5439209938049316, + "loss": 0.6265, + "nll_loss": 0.5867272019386292, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12792009115219116, + "rewards/margins": 0.12647199630737305, + "rewards/rejected": -0.2543920874595642, + "step": 434 + }, + { + "epoch": 0.27060653188180406, + "grad_norm": 0.3120807707309723, + "learning_rate": 2.825e-05, + "log_odds_chosen": 3.2113146781921387, + "log_odds_ratio": -0.19626906514167786, + "logits/chosen": 1.3311731815338135, + "logits/rejected": 2.3478856086730957, + "logps/chosen": -1.103269338607788, + "logps/rejected": -3.982461929321289, + "loss": 0.6981, + "nll_loss": 0.6784613132476807, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11032693833112717, + "rewards/margins": 0.2879192531108856, + "rewards/rejected": -0.3982461988925934, + "step": 435 + }, + { + "epoch": 0.2712286158631415, + "grad_norm": 0.38851398229599, + "learning_rate": 2.8199999999999998e-05, + "log_odds_chosen": 0.159065380692482, + "log_odds_ratio": -0.6263766288757324, + "logits/chosen": 2.574674606323242, + "logits/rejected": 3.693694591522217, + "logps/chosen": -1.2162128686904907, + "logps/rejected": -1.3283966779708862, + "loss": 0.8492, + "nll_loss": 0.7865930199623108, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12162129580974579, + "rewards/margins": 0.011218377389013767, + "rewards/rejected": -0.13283966481685638, + "step": 436 + }, + { + "epoch": 0.271850699844479, + "grad_norm": 0.2566005289554596, + "learning_rate": 2.815e-05, + "log_odds_chosen": 1.7677913904190063, + "log_odds_ratio": -0.20369936525821686, + "logits/chosen": 1.7614707946777344, + "logits/rejected": 1.692598819732666, + "logps/chosen": -0.8401159048080444, + "logps/rejected": -2.08687162399292, + "loss": 0.6384, + "nll_loss": 0.6180017590522766, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08401159197092056, + "rewards/margins": 0.12467555701732635, + "rewards/rejected": -0.20868715643882751, + "step": 437 + }, + { + "epoch": 0.27247278382581647, + "grad_norm": 0.3621348738670349, + "learning_rate": 2.8100000000000005e-05, + "log_odds_chosen": 1.5565158128738403, + "log_odds_ratio": -0.3029978573322296, + "logits/chosen": 1.124603271484375, + "logits/rejected": 1.5136412382125854, + "logps/chosen": -0.9071807861328125, + "logps/rejected": -1.9489789009094238, + "loss": 0.7149, + "nll_loss": 0.684585452079773, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09071807563304901, + "rewards/margins": 0.10417980700731277, + "rewards/rejected": -0.19489789009094238, + "step": 438 + }, + { + "epoch": 0.273094867807154, + "grad_norm": 0.26659613847732544, + "learning_rate": 2.8050000000000004e-05, + "log_odds_chosen": 1.9500977993011475, + "log_odds_ratio": -0.4435105323791504, + "logits/chosen": 0.07519307732582092, + "logits/rejected": 1.447799563407898, + "logps/chosen": -0.751785159111023, + "logps/rejected": -2.4592161178588867, + "loss": 0.5007, + "nll_loss": 0.45637768507003784, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07517851889133453, + "rewards/margins": 0.17074309289455414, + "rewards/rejected": -0.24592161178588867, + "step": 439 + }, + { + "epoch": 0.2737169517884914, + "grad_norm": 0.39172080159187317, + "learning_rate": 2.8000000000000003e-05, + "log_odds_chosen": 1.3814113140106201, + "log_odds_ratio": -0.27988070249557495, + "logits/chosen": 1.5671734809875488, + "logits/rejected": 2.607694625854492, + "logps/chosen": -1.35719633102417, + "logps/rejected": -2.5071897506713867, + "loss": 0.7241, + "nll_loss": 0.6961413621902466, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1357196420431137, + "rewards/margins": 0.11499933898448944, + "rewards/rejected": -0.25071898102760315, + "step": 440 + }, + { + "epoch": 0.27433903576982893, + "grad_norm": 0.333368718624115, + "learning_rate": 2.7950000000000005e-05, + "log_odds_chosen": 2.5234973430633545, + "log_odds_ratio": -0.2566179037094116, + "logits/chosen": 0.35800909996032715, + "logits/rejected": 0.616550087928772, + "logps/chosen": -0.9834619760513306, + "logps/rejected": -3.113126516342163, + "loss": 0.5216, + "nll_loss": 0.49595892429351807, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09834619611501694, + "rewards/margins": 0.2129664272069931, + "rewards/rejected": -0.31131264567375183, + "step": 441 + }, + { + "epoch": 0.2749611197511664, + "grad_norm": 0.4109332263469696, + "learning_rate": 2.7900000000000004e-05, + "log_odds_chosen": 1.012038230895996, + "log_odds_ratio": -0.5099627375602722, + "logits/chosen": 1.473235845565796, + "logits/rejected": 3.1542019844055176, + "logps/chosen": -1.083565592765808, + "logps/rejected": -1.784134864807129, + "loss": 0.6504, + "nll_loss": 0.5994206070899963, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10835656523704529, + "rewards/margins": 0.07005693018436432, + "rewards/rejected": -0.1784134954214096, + "step": 442 + }, + { + "epoch": 0.2755832037325039, + "grad_norm": 0.2587113678455353, + "learning_rate": 2.7850000000000003e-05, + "log_odds_chosen": 1.4472953081130981, + "log_odds_ratio": -0.35538187623023987, + "logits/chosen": 1.4479413032531738, + "logits/rejected": 1.8385202884674072, + "logps/chosen": -0.7116351127624512, + "logps/rejected": -1.5945403575897217, + "loss": 0.7645, + "nll_loss": 0.7289303541183472, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07116350531578064, + "rewards/margins": 0.08829053491353989, + "rewards/rejected": -0.15945404767990112, + "step": 443 + }, + { + "epoch": 0.27620528771384134, + "grad_norm": 0.34995758533477783, + "learning_rate": 2.7800000000000005e-05, + "log_odds_chosen": 1.9064420461654663, + "log_odds_ratio": -0.24451762437820435, + "logits/chosen": 2.46639347076416, + "logits/rejected": 2.436615467071533, + "logps/chosen": -0.9964478015899658, + "logps/rejected": -2.5733470916748047, + "loss": 0.8252, + "nll_loss": 0.8007709980010986, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09964478015899658, + "rewards/margins": 0.15768994390964508, + "rewards/rejected": -0.25733470916748047, + "step": 444 + }, + { + "epoch": 0.27682737169517885, + "grad_norm": 0.35382080078125, + "learning_rate": 2.7750000000000004e-05, + "log_odds_chosen": 1.689176082611084, + "log_odds_ratio": -0.306673526763916, + "logits/chosen": 2.0533225536346436, + "logits/rejected": 2.4105100631713867, + "logps/chosen": -0.6672767996788025, + "logps/rejected": -1.78840172290802, + "loss": 0.6612, + "nll_loss": 0.6305685043334961, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06672768294811249, + "rewards/margins": 0.11211249232292175, + "rewards/rejected": -0.17884017527103424, + "step": 445 + }, + { + "epoch": 0.27744945567651635, + "grad_norm": 13.6814603805542, + "learning_rate": 2.7700000000000002e-05, + "log_odds_chosen": 1.1355578899383545, + "log_odds_ratio": -0.4099384546279907, + "logits/chosen": 0.387063205242157, + "logits/rejected": 1.498714566230774, + "logps/chosen": -1.0867677927017212, + "logps/rejected": -1.9361159801483154, + "loss": 0.6092, + "nll_loss": 0.5681792497634888, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10867678374052048, + "rewards/margins": 0.08493481576442719, + "rewards/rejected": -0.19361159205436707, + "step": 446 + }, + { + "epoch": 0.2780715396578538, + "grad_norm": 10.337038040161133, + "learning_rate": 2.7650000000000005e-05, + "log_odds_chosen": 0.6619716286659241, + "log_odds_ratio": -0.4907890260219574, + "logits/chosen": 1.320190668106079, + "logits/rejected": 1.9713841676712036, + "logps/chosen": -0.9940346479415894, + "logps/rejected": -1.3662760257720947, + "loss": 0.6622, + "nll_loss": 0.6130806803703308, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09940346330404282, + "rewards/margins": 0.037224140018224716, + "rewards/rejected": -0.13662761449813843, + "step": 447 + }, + { + "epoch": 0.2786936236391913, + "grad_norm": 0.44021517038345337, + "learning_rate": 2.7600000000000003e-05, + "log_odds_chosen": 1.695500135421753, + "log_odds_ratio": -0.2624688148498535, + "logits/chosen": 1.6216803789138794, + "logits/rejected": 1.5513224601745605, + "logps/chosen": -0.8733179569244385, + "logps/rejected": -2.162715435028076, + "loss": 0.5369, + "nll_loss": 0.5106138586997986, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08733180165290833, + "rewards/margins": 0.12893976271152496, + "rewards/rejected": -0.2162715643644333, + "step": 448 + }, + { + "epoch": 0.27931570762052876, + "grad_norm": 0.32137152552604675, + "learning_rate": 2.7550000000000002e-05, + "log_odds_chosen": 0.7980210185050964, + "log_odds_ratio": -0.4316975474357605, + "logits/chosen": 0.15938672423362732, + "logits/rejected": 1.6466835737228394, + "logps/chosen": -0.824579656124115, + "logps/rejected": -1.3830028772354126, + "loss": 0.5517, + "nll_loss": 0.5084915161132812, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08245796710252762, + "rewards/margins": 0.055842325091362, + "rewards/rejected": -0.13830029964447021, + "step": 449 + }, + { + "epoch": 0.27993779160186627, + "grad_norm": 0.2964528501033783, + "learning_rate": 2.7500000000000004e-05, + "log_odds_chosen": 1.321192741394043, + "log_odds_ratio": -0.3193834722042084, + "logits/chosen": 0.9817930459976196, + "logits/rejected": 1.3354594707489014, + "logps/chosen": -0.9670354127883911, + "logps/rejected": -1.8899989128112793, + "loss": 0.4508, + "nll_loss": 0.4188663959503174, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09670353680849075, + "rewards/margins": 0.09229634702205658, + "rewards/rejected": -0.18899987637996674, + "step": 450 + }, + { + "epoch": 0.2805598755832037, + "grad_norm": 0.29384738206863403, + "learning_rate": 2.7450000000000003e-05, + "log_odds_chosen": 1.4085745811462402, + "log_odds_ratio": -0.2883527874946594, + "logits/chosen": 0.6253018379211426, + "logits/rejected": 1.5829182863235474, + "logps/chosen": -1.206660270690918, + "logps/rejected": -2.262795925140381, + "loss": 0.5474, + "nll_loss": 0.5185524225234985, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1206660345196724, + "rewards/margins": 0.10561355948448181, + "rewards/rejected": -0.2262795865535736, + "step": 451 + }, + { + "epoch": 0.28118195956454123, + "grad_norm": 0.30835622549057007, + "learning_rate": 2.7400000000000002e-05, + "log_odds_chosen": 0.7946129441261292, + "log_odds_ratio": -0.40670210123062134, + "logits/chosen": 0.3787962794303894, + "logits/rejected": 1.9494355916976929, + "logps/chosen": -0.9770557880401611, + "logps/rejected": -1.5579826831817627, + "loss": 0.531, + "nll_loss": 0.4903002083301544, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09770557284355164, + "rewards/margins": 0.05809270963072777, + "rewards/rejected": -0.1557982712984085, + "step": 452 + }, + { + "epoch": 0.2818040435458787, + "grad_norm": 0.400429368019104, + "learning_rate": 2.7350000000000004e-05, + "log_odds_chosen": 2.4994308948516846, + "log_odds_ratio": -0.2668745219707489, + "logits/chosen": 1.045554280281067, + "logits/rejected": 2.1419525146484375, + "logps/chosen": -1.122701644897461, + "logps/rejected": -3.259110689163208, + "loss": 0.7045, + "nll_loss": 0.6777728796005249, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11227016150951385, + "rewards/margins": 0.21364091336727142, + "rewards/rejected": -0.32591110467910767, + "step": 453 + }, + { + "epoch": 0.2824261275272162, + "grad_norm": 0.40336883068084717, + "learning_rate": 2.7300000000000003e-05, + "log_odds_chosen": 1.403609275817871, + "log_odds_ratio": -0.2870773673057556, + "logits/chosen": 0.7189739942550659, + "logits/rejected": 2.4073359966278076, + "logps/chosen": -0.8193572759628296, + "logps/rejected": -1.7679626941680908, + "loss": 0.5697, + "nll_loss": 0.5410056114196777, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08193572610616684, + "rewards/margins": 0.09486055374145508, + "rewards/rejected": -0.17679627239704132, + "step": 454 + }, + { + "epoch": 0.28304821150855364, + "grad_norm": 0.278027206659317, + "learning_rate": 2.725e-05, + "log_odds_chosen": 2.407792568206787, + "log_odds_ratio": -0.10633019357919693, + "logits/chosen": 0.24619188904762268, + "logits/rejected": 1.2273669242858887, + "logps/chosen": -0.9152811765670776, + "logps/rejected": -2.8626348972320557, + "loss": 0.4933, + "nll_loss": 0.4826865792274475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09152812510728836, + "rewards/margins": 0.19473537802696228, + "rewards/rejected": -0.28626352548599243, + "step": 455 + }, + { + "epoch": 0.28367029548989114, + "grad_norm": 0.3393852710723877, + "learning_rate": 2.7200000000000004e-05, + "log_odds_chosen": 0.6036549806594849, + "log_odds_ratio": -0.5104788541793823, + "logits/chosen": 1.5058778524398804, + "logits/rejected": 2.87321400642395, + "logps/chosen": -1.0817363262176514, + "logps/rejected": -1.4985532760620117, + "loss": 0.7031, + "nll_loss": 0.6520944237709045, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10817363858222961, + "rewards/margins": 0.04168170318007469, + "rewards/rejected": -0.1498553454875946, + "step": 456 + }, + { + "epoch": 0.2842923794712286, + "grad_norm": 0.3408838212490082, + "learning_rate": 2.7150000000000003e-05, + "log_odds_chosen": 0.8742717504501343, + "log_odds_ratio": -0.43518275022506714, + "logits/chosen": 2.0727810859680176, + "logits/rejected": 2.7927446365356445, + "logps/chosen": -1.0099351406097412, + "logps/rejected": -1.6576815843582153, + "loss": 0.8319, + "nll_loss": 0.7883896827697754, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10099352151155472, + "rewards/margins": 0.06477463990449905, + "rewards/rejected": -0.16576816141605377, + "step": 457 + }, + { + "epoch": 0.2849144634525661, + "grad_norm": 0.47845590114593506, + "learning_rate": 2.7100000000000005e-05, + "log_odds_chosen": 0.9391593933105469, + "log_odds_ratio": -0.6174224019050598, + "logits/chosen": 2.785203456878662, + "logits/rejected": 2.1296370029449463, + "logps/chosen": -0.9823821187019348, + "logps/rejected": -1.7206125259399414, + "loss": 0.9131, + "nll_loss": 0.8513160347938538, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09823821485042572, + "rewards/margins": 0.07382305711507797, + "rewards/rejected": -0.1720612645149231, + "step": 458 + }, + { + "epoch": 0.28553654743390355, + "grad_norm": 1.0342097282409668, + "learning_rate": 2.7050000000000004e-05, + "log_odds_chosen": 2.006476879119873, + "log_odds_ratio": -0.3376705050468445, + "logits/chosen": 0.40670859813690186, + "logits/rejected": 2.496821880340576, + "logps/chosen": -0.828924834728241, + "logps/rejected": -2.3141822814941406, + "loss": 0.565, + "nll_loss": 0.5311979651451111, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08289249241352081, + "rewards/margins": 0.14852574467658997, + "rewards/rejected": -0.23141823709011078, + "step": 459 + }, + { + "epoch": 0.28615863141524106, + "grad_norm": 0.29952242970466614, + "learning_rate": 2.7000000000000002e-05, + "log_odds_chosen": 2.3515138626098633, + "log_odds_ratio": -0.279621422290802, + "logits/chosen": 0.9336141347885132, + "logits/rejected": 1.1252137422561646, + "logps/chosen": -0.8351043462753296, + "logps/rejected": -2.546229600906372, + "loss": 0.6218, + "nll_loss": 0.5937999486923218, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08351044356822968, + "rewards/margins": 0.17111250758171082, + "rewards/rejected": -0.2546229362487793, + "step": 460 + }, + { + "epoch": 0.2867807153965785, + "grad_norm": 0.3982970416545868, + "learning_rate": 2.6950000000000005e-05, + "log_odds_chosen": 1.8943520784378052, + "log_odds_ratio": -0.33027487993240356, + "logits/chosen": 1.9241790771484375, + "logits/rejected": 2.623364210128784, + "logps/chosen": -1.0681153535842896, + "logps/rejected": -2.736832857131958, + "loss": 0.715, + "nll_loss": 0.6819977760314941, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1068115383386612, + "rewards/margins": 0.16687177121639252, + "rewards/rejected": -0.2736833095550537, + "step": 461 + }, + { + "epoch": 0.287402799377916, + "grad_norm": 0.5346242785453796, + "learning_rate": 2.6900000000000003e-05, + "log_odds_chosen": 0.7686117887496948, + "log_odds_ratio": -0.7439150810241699, + "logits/chosen": 1.9250644445419312, + "logits/rejected": 2.6550610065460205, + "logps/chosen": -1.2357155084609985, + "logps/rejected": -1.7963919639587402, + "loss": 0.7496, + "nll_loss": 0.6752496957778931, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12357156723737717, + "rewards/margins": 0.056067660450935364, + "rewards/rejected": -0.17963922023773193, + "step": 462 + }, + { + "epoch": 0.2880248833592535, + "grad_norm": 0.39354756474494934, + "learning_rate": 2.6850000000000002e-05, + "log_odds_chosen": 1.5090014934539795, + "log_odds_ratio": -0.3865567445755005, + "logits/chosen": 1.0894285440444946, + "logits/rejected": 1.4281337261199951, + "logps/chosen": -1.1414062976837158, + "logps/rejected": -2.367482900619507, + "loss": 0.6988, + "nll_loss": 0.6601517200469971, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11414062976837158, + "rewards/margins": 0.12260768562555313, + "rewards/rejected": -0.23674830794334412, + "step": 463 + }, + { + "epoch": 0.288646967340591, + "grad_norm": 0.35714831948280334, + "learning_rate": 2.6800000000000004e-05, + "log_odds_chosen": 1.6752851009368896, + "log_odds_ratio": -0.21526263654232025, + "logits/chosen": 0.7329784631729126, + "logits/rejected": 1.1709595918655396, + "logps/chosen": -0.9770262241363525, + "logps/rejected": -2.2166688442230225, + "loss": 0.6376, + "nll_loss": 0.6160634160041809, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09770262241363525, + "rewards/margins": 0.12396427989006042, + "rewards/rejected": -0.22166690230369568, + "step": 464 + }, + { + "epoch": 0.2892690513219285, + "grad_norm": 0.3404373526573181, + "learning_rate": 2.6750000000000003e-05, + "log_odds_chosen": 1.9938693046569824, + "log_odds_ratio": -0.2569563686847687, + "logits/chosen": 2.7068655490875244, + "logits/rejected": 2.8174753189086914, + "logps/chosen": -0.9502182006835938, + "logps/rejected": -2.4726524353027344, + "loss": 0.9042, + "nll_loss": 0.8784732818603516, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0950218141078949, + "rewards/margins": 0.15224343538284302, + "rewards/rejected": -0.24726524949073792, + "step": 465 + }, + { + "epoch": 0.28989113530326593, + "grad_norm": 0.5440021753311157, + "learning_rate": 2.6700000000000002e-05, + "log_odds_chosen": 1.1913514137268066, + "log_odds_ratio": -0.6887476444244385, + "logits/chosen": 0.07142001390457153, + "logits/rejected": 1.684450387954712, + "logps/chosen": -1.438823938369751, + "logps/rejected": -2.2212705612182617, + "loss": 0.5957, + "nll_loss": 0.5267887115478516, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1438823938369751, + "rewards/margins": 0.07824468612670898, + "rewards/rejected": -0.22212707996368408, + "step": 466 + }, + { + "epoch": 0.29051321928460344, + "grad_norm": 0.2981872260570526, + "learning_rate": 2.6650000000000004e-05, + "log_odds_chosen": 0.7413637638092041, + "log_odds_ratio": -0.46821674704551697, + "logits/chosen": 0.9663263559341431, + "logits/rejected": 2.2991671562194824, + "logps/chosen": -0.8860517740249634, + "logps/rejected": -1.3749059438705444, + "loss": 0.6357, + "nll_loss": 0.5888561606407166, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08860517293214798, + "rewards/margins": 0.048885416239500046, + "rewards/rejected": -0.13749060034751892, + "step": 467 + }, + { + "epoch": 0.2911353032659409, + "grad_norm": 0.33800575137138367, + "learning_rate": 2.6600000000000003e-05, + "log_odds_chosen": 1.826517105102539, + "log_odds_ratio": -0.31884247064590454, + "logits/chosen": 0.21384793519973755, + "logits/rejected": 1.3374475240707397, + "logps/chosen": -0.8113455772399902, + "logps/rejected": -2.257542610168457, + "loss": 0.5751, + "nll_loss": 0.5432652831077576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08113455772399902, + "rewards/margins": 0.14461968839168549, + "rewards/rejected": -0.2257542759180069, + "step": 468 + }, + { + "epoch": 0.2917573872472784, + "grad_norm": 0.9032492637634277, + "learning_rate": 2.655e-05, + "log_odds_chosen": 1.8913346529006958, + "log_odds_ratio": -0.431749552488327, + "logits/chosen": -0.06455998122692108, + "logits/rejected": -0.07008242607116699, + "logps/chosen": -0.9198107719421387, + "logps/rejected": -2.359321355819702, + "loss": 0.6571, + "nll_loss": 0.613900899887085, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09198108315467834, + "rewards/margins": 0.14395105838775635, + "rewards/rejected": -0.2359321266412735, + "step": 469 + }, + { + "epoch": 0.29237947122861585, + "grad_norm": 0.27374881505966187, + "learning_rate": 2.6500000000000004e-05, + "log_odds_chosen": 1.2873222827911377, + "log_odds_ratio": -0.34891578555107117, + "logits/chosen": -1.634531021118164, + "logits/rejected": 1.292966604232788, + "logps/chosen": -1.1532635688781738, + "logps/rejected": -2.1369102001190186, + "loss": 0.5003, + "nll_loss": 0.4654574394226074, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11532635986804962, + "rewards/margins": 0.09836466610431671, + "rewards/rejected": -0.21369104087352753, + "step": 470 + }, + { + "epoch": 0.29300155520995336, + "grad_norm": 0.5123220086097717, + "learning_rate": 2.6450000000000003e-05, + "log_odds_chosen": 1.2327231168746948, + "log_odds_ratio": -0.3070361614227295, + "logits/chosen": 1.1967523097991943, + "logits/rejected": 2.225430488586426, + "logps/chosen": -0.8559145927429199, + "logps/rejected": -1.7130717039108276, + "loss": 0.6756, + "nll_loss": 0.6448498368263245, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08559145778417587, + "rewards/margins": 0.08571571856737137, + "rewards/rejected": -0.17130717635154724, + "step": 471 + }, + { + "epoch": 0.2936236391912908, + "grad_norm": 0.3058389127254486, + "learning_rate": 2.64e-05, + "log_odds_chosen": 2.075676918029785, + "log_odds_ratio": -0.3366090655326843, + "logits/chosen": 1.349937915802002, + "logits/rejected": 2.2221884727478027, + "logps/chosen": -0.7084921598434448, + "logps/rejected": -2.309854507446289, + "loss": 0.6101, + "nll_loss": 0.5764274597167969, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0708492174744606, + "rewards/margins": 0.16013625264167786, + "rewards/rejected": -0.23098547756671906, + "step": 472 + }, + { + "epoch": 0.2942457231726283, + "grad_norm": 0.40857183933258057, + "learning_rate": 2.6350000000000004e-05, + "log_odds_chosen": 1.1024802923202515, + "log_odds_ratio": -0.33238816261291504, + "logits/chosen": -0.2755090296268463, + "logits/rejected": 0.9011416435241699, + "logps/chosen": -1.0526714324951172, + "logps/rejected": -1.9224944114685059, + "loss": 0.5391, + "nll_loss": 0.5058131814002991, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10526715219020844, + "rewards/margins": 0.08698229491710663, + "rewards/rejected": -0.19224943220615387, + "step": 473 + }, + { + "epoch": 0.29486780715396577, + "grad_norm": 0.31799551844596863, + "learning_rate": 2.6300000000000002e-05, + "log_odds_chosen": 1.5882513523101807, + "log_odds_ratio": -0.3513425290584564, + "logits/chosen": 0.6262343525886536, + "logits/rejected": 0.8877112865447998, + "logps/chosen": -0.9927375912666321, + "logps/rejected": -2.278759241104126, + "loss": 0.6255, + "nll_loss": 0.5903184413909912, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09927376359701157, + "rewards/margins": 0.12860216200351715, + "rewards/rejected": -0.22787591814994812, + "step": 474 + }, + { + "epoch": 0.2954898911353033, + "grad_norm": 0.30045124888420105, + "learning_rate": 2.625e-05, + "log_odds_chosen": 3.8556902408599854, + "log_odds_ratio": -0.16180792450904846, + "logits/chosen": 1.1640573740005493, + "logits/rejected": 1.8254338502883911, + "logps/chosen": -0.5255829095840454, + "logps/rejected": -3.281663179397583, + "loss": 0.646, + "nll_loss": 0.6297725439071655, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0525582879781723, + "rewards/margins": 0.27560803294181824, + "rewards/rejected": -0.32816633582115173, + "step": 475 + }, + { + "epoch": 0.2961119751166407, + "grad_norm": 0.42903876304626465, + "learning_rate": 2.6200000000000003e-05, + "log_odds_chosen": 1.0457801818847656, + "log_odds_ratio": -0.3495453894138336, + "logits/chosen": 1.600659728050232, + "logits/rejected": 2.682332992553711, + "logps/chosen": -0.9263206124305725, + "logps/rejected": -1.708702564239502, + "loss": 0.786, + "nll_loss": 0.7510562539100647, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09263206273317337, + "rewards/margins": 0.07823819667100906, + "rewards/rejected": -0.17087027430534363, + "step": 476 + }, + { + "epoch": 0.29673405909797823, + "grad_norm": 0.5805628895759583, + "learning_rate": 2.6150000000000002e-05, + "log_odds_chosen": 1.5545982122421265, + "log_odds_ratio": -0.3727916479110718, + "logits/chosen": 0.31747573614120483, + "logits/rejected": 1.172074556350708, + "logps/chosen": -1.09333074092865, + "logps/rejected": -2.3875832557678223, + "loss": 0.6095, + "nll_loss": 0.572248101234436, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10933306813240051, + "rewards/margins": 0.12942524254322052, + "rewards/rejected": -0.23875831067562103, + "step": 477 + }, + { + "epoch": 0.2973561430793157, + "grad_norm": 0.31039735674858093, + "learning_rate": 2.61e-05, + "log_odds_chosen": 1.6635842323303223, + "log_odds_ratio": -0.3079639971256256, + "logits/chosen": 2.197296142578125, + "logits/rejected": 2.432086706161499, + "logps/chosen": -0.9411294460296631, + "logps/rejected": -2.21671199798584, + "loss": 0.8491, + "nll_loss": 0.8183450102806091, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09411294758319855, + "rewards/margins": 0.12755824625492096, + "rewards/rejected": -0.2216711938381195, + "step": 478 + }, + { + "epoch": 0.2979782270606532, + "grad_norm": 0.3499162793159485, + "learning_rate": 2.6050000000000003e-05, + "log_odds_chosen": 2.1621012687683105, + "log_odds_ratio": -0.23205527663230896, + "logits/chosen": 2.0782124996185303, + "logits/rejected": 3.038877248764038, + "logps/chosen": -0.9419960379600525, + "logps/rejected": -2.5322046279907227, + "loss": 0.7844, + "nll_loss": 0.7611782550811768, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09419961273670197, + "rewards/margins": 0.15902084112167358, + "rewards/rejected": -0.25322046875953674, + "step": 479 + }, + { + "epoch": 0.2986003110419907, + "grad_norm": 0.32958048582077026, + "learning_rate": 2.6000000000000002e-05, + "log_odds_chosen": 1.6519575119018555, + "log_odds_ratio": -0.24578575789928436, + "logits/chosen": 1.0557212829589844, + "logits/rejected": 2.017346143722534, + "logps/chosen": -0.8723708987236023, + "logps/rejected": -2.108044385910034, + "loss": 0.5416, + "nll_loss": 0.5170350074768066, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08723708987236023, + "rewards/margins": 0.1235673651099205, + "rewards/rejected": -0.21080444753170013, + "step": 480 + }, + { + "epoch": 0.29922239502332815, + "grad_norm": 0.296722412109375, + "learning_rate": 2.595e-05, + "log_odds_chosen": 1.3938841819763184, + "log_odds_ratio": -0.23713521659374237, + "logits/chosen": 0.20026570558547974, + "logits/rejected": 2.2107229232788086, + "logps/chosen": -0.8148910999298096, + "logps/rejected": -1.8060866594314575, + "loss": 0.5741, + "nll_loss": 0.5503672361373901, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08148910850286484, + "rewards/margins": 0.09911955893039703, + "rewards/rejected": -0.18060868978500366, + "step": 481 + }, + { + "epoch": 0.29984447900466565, + "grad_norm": 0.3062569200992584, + "learning_rate": 2.5900000000000003e-05, + "log_odds_chosen": 2.3930747509002686, + "log_odds_ratio": -0.28188061714172363, + "logits/chosen": 1.990435242652893, + "logits/rejected": 3.036588191986084, + "logps/chosen": -0.8244961500167847, + "logps/rejected": -2.826347827911377, + "loss": 0.7671, + "nll_loss": 0.738939642906189, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08244961500167847, + "rewards/margins": 0.2001851499080658, + "rewards/rejected": -0.28263476490974426, + "step": 482 + }, + { + "epoch": 0.3004665629860031, + "grad_norm": 0.38664117455482483, + "learning_rate": 2.585e-05, + "log_odds_chosen": 1.1401886940002441, + "log_odds_ratio": -0.37039393186569214, + "logits/chosen": 1.838365912437439, + "logits/rejected": 2.5238819122314453, + "logps/chosen": -1.0199286937713623, + "logps/rejected": -1.7830042839050293, + "loss": 0.8311, + "nll_loss": 0.7940711975097656, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10199287533760071, + "rewards/margins": 0.07630756497383118, + "rewards/rejected": -0.17830044031143188, + "step": 483 + }, + { + "epoch": 0.3010886469673406, + "grad_norm": 0.5141417980194092, + "learning_rate": 2.58e-05, + "log_odds_chosen": 0.9313275814056396, + "log_odds_ratio": -0.5068573951721191, + "logits/chosen": -0.31626325845718384, + "logits/rejected": 2.140399217605591, + "logps/chosen": -1.3676080703735352, + "logps/rejected": -2.118687152862549, + "loss": 0.5733, + "nll_loss": 0.5226230621337891, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.13676080107688904, + "rewards/margins": 0.07510790228843689, + "rewards/rejected": -0.21186870336532593, + "step": 484 + }, + { + "epoch": 0.30171073094867806, + "grad_norm": 0.3580343425273895, + "learning_rate": 2.5750000000000002e-05, + "log_odds_chosen": 1.8977057933807373, + "log_odds_ratio": -0.39567553997039795, + "logits/chosen": 1.289947748184204, + "logits/rejected": 1.3862617015838623, + "logps/chosen": -0.8238151669502258, + "logps/rejected": -2.428439140319824, + "loss": 0.6119, + "nll_loss": 0.572327733039856, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08238151669502258, + "rewards/margins": 0.1604623943567276, + "rewards/rejected": -0.24284391105175018, + "step": 485 + }, + { + "epoch": 0.30233281493001557, + "grad_norm": 0.4157785177230835, + "learning_rate": 2.57e-05, + "log_odds_chosen": 1.6333980560302734, + "log_odds_ratio": -0.2904350161552429, + "logits/chosen": 1.140645146369934, + "logits/rejected": 1.8697725534439087, + "logps/chosen": -0.847370982170105, + "logps/rejected": -1.8554041385650635, + "loss": 0.6347, + "nll_loss": 0.6056182384490967, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08473709970712662, + "rewards/margins": 0.10080333054065704, + "rewards/rejected": -0.18554043769836426, + "step": 486 + }, + { + "epoch": 0.302954898911353, + "grad_norm": 0.4197591543197632, + "learning_rate": 2.5650000000000003e-05, + "log_odds_chosen": 1.9002084732055664, + "log_odds_ratio": -0.3634560704231262, + "logits/chosen": 1.3721474409103394, + "logits/rejected": 2.3936052322387695, + "logps/chosen": -0.7132107615470886, + "logps/rejected": -2.0016989707946777, + "loss": 0.622, + "nll_loss": 0.5856096744537354, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07132107764482498, + "rewards/margins": 0.1288488358259201, + "rewards/rejected": -0.2001699060201645, + "step": 487 + }, + { + "epoch": 0.30357698289269053, + "grad_norm": 0.3078671395778656, + "learning_rate": 2.5600000000000002e-05, + "log_odds_chosen": 1.5222787857055664, + "log_odds_ratio": -0.25322094559669495, + "logits/chosen": 0.909111738204956, + "logits/rejected": 1.0842841863632202, + "logps/chosen": -0.8780727982521057, + "logps/rejected": -1.9370026588439941, + "loss": 0.6661, + "nll_loss": 0.6408250331878662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08780728280544281, + "rewards/margins": 0.10589298605918884, + "rewards/rejected": -0.19370028376579285, + "step": 488 + }, + { + "epoch": 0.304199066874028, + "grad_norm": 0.3910052478313446, + "learning_rate": 2.555e-05, + "log_odds_chosen": 2.0691475868225098, + "log_odds_ratio": -0.26565665006637573, + "logits/chosen": -0.1676226258277893, + "logits/rejected": 0.8520660400390625, + "logps/chosen": -1.0638614892959595, + "logps/rejected": -2.8081817626953125, + "loss": 0.4561, + "nll_loss": 0.42956626415252686, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10638613998889923, + "rewards/margins": 0.17443202435970306, + "rewards/rejected": -0.2808181643486023, + "step": 489 + }, + { + "epoch": 0.3048211508553655, + "grad_norm": 0.4584553837776184, + "learning_rate": 2.5500000000000003e-05, + "log_odds_chosen": 2.8596675395965576, + "log_odds_ratio": -0.1520201563835144, + "logits/chosen": 1.6730536222457886, + "logits/rejected": 1.3357551097869873, + "logps/chosen": -1.114020586013794, + "logps/rejected": -3.625382423400879, + "loss": 0.673, + "nll_loss": 0.6578033566474915, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11140205711126328, + "rewards/margins": 0.2511361837387085, + "rewards/rejected": -0.36253824830055237, + "step": 490 + }, + { + "epoch": 0.30544323483670294, + "grad_norm": 1.8427069187164307, + "learning_rate": 2.5450000000000002e-05, + "log_odds_chosen": 1.788784146308899, + "log_odds_ratio": -0.3101259469985962, + "logits/chosen": -1.2556865215301514, + "logits/rejected": 1.946107268333435, + "logps/chosen": -1.0118483304977417, + "logps/rejected": -2.407310962677002, + "loss": 0.4638, + "nll_loss": 0.4328010380268097, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10118483006954193, + "rewards/margins": 0.13954627513885498, + "rewards/rejected": -0.2407311201095581, + "step": 491 + }, + { + "epoch": 0.30606531881804044, + "grad_norm": 0.2789076268672943, + "learning_rate": 2.54e-05, + "log_odds_chosen": 1.594778060913086, + "log_odds_ratio": -0.2929396629333496, + "logits/chosen": -1.0101150274276733, + "logits/rejected": 1.347983956336975, + "logps/chosen": -0.8147050142288208, + "logps/rejected": -1.9583085775375366, + "loss": 0.4648, + "nll_loss": 0.4355263411998749, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08147049695253372, + "rewards/margins": 0.11436036229133606, + "rewards/rejected": -0.19583086669445038, + "step": 492 + }, + { + "epoch": 0.3066874027993779, + "grad_norm": 0.33555591106414795, + "learning_rate": 2.5350000000000003e-05, + "log_odds_chosen": 3.407597541809082, + "log_odds_ratio": -0.0761854499578476, + "logits/chosen": 2.7496001720428467, + "logits/rejected": 1.1246817111968994, + "logps/chosen": -0.6887914538383484, + "logps/rejected": -3.3821420669555664, + "loss": 0.6723, + "nll_loss": 0.6646910309791565, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0688791424036026, + "rewards/margins": 0.2693350613117218, + "rewards/rejected": -0.3382142186164856, + "step": 493 + }, + { + "epoch": 0.3073094867807154, + "grad_norm": 0.45261499285697937, + "learning_rate": 2.5300000000000002e-05, + "log_odds_chosen": 2.10783052444458, + "log_odds_ratio": -0.2936263680458069, + "logits/chosen": 0.6424102783203125, + "logits/rejected": 1.272334098815918, + "logps/chosen": -1.630174994468689, + "logps/rejected": -3.450590133666992, + "loss": 0.5338, + "nll_loss": 0.5044101476669312, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16301749646663666, + "rewards/margins": 0.18204151093959808, + "rewards/rejected": -0.34505900740623474, + "step": 494 + }, + { + "epoch": 0.30793157076205285, + "grad_norm": 0.2947097420692444, + "learning_rate": 2.525e-05, + "log_odds_chosen": 1.4103057384490967, + "log_odds_ratio": -0.363348126411438, + "logits/chosen": 1.169882893562317, + "logits/rejected": 3.214463233947754, + "logps/chosen": -0.8717095851898193, + "logps/rejected": -1.8850769996643066, + "loss": 0.7606, + "nll_loss": 0.7242623567581177, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08717096596956253, + "rewards/margins": 0.10133674740791321, + "rewards/rejected": -0.18850769102573395, + "step": 495 + }, + { + "epoch": 0.30855365474339036, + "grad_norm": 0.4085759222507477, + "learning_rate": 2.5200000000000003e-05, + "log_odds_chosen": 1.9591169357299805, + "log_odds_ratio": -0.2569133937358856, + "logits/chosen": 0.9140021800994873, + "logits/rejected": 1.3358986377716064, + "logps/chosen": -0.8664543032646179, + "logps/rejected": -2.4069149494171143, + "loss": 0.6013, + "nll_loss": 0.5756421685218811, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08664542436599731, + "rewards/margins": 0.15404607355594635, + "rewards/rejected": -0.24069149792194366, + "step": 496 + }, + { + "epoch": 0.3091757387247278, + "grad_norm": 0.9854008555412292, + "learning_rate": 2.515e-05, + "log_odds_chosen": 2.951676368713379, + "log_odds_ratio": -0.07785683870315552, + "logits/chosen": -0.4764617085456848, + "logits/rejected": 0.5835741758346558, + "logps/chosen": -0.888968825340271, + "logps/rejected": -3.347611427307129, + "loss": 0.445, + "nll_loss": 0.43723028898239136, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08889688551425934, + "rewards/margins": 0.24586427211761475, + "rewards/rejected": -0.3347611427307129, + "step": 497 + }, + { + "epoch": 0.3097978227060653, + "grad_norm": 0.4051247239112854, + "learning_rate": 2.51e-05, + "log_odds_chosen": 1.4772777557373047, + "log_odds_ratio": -0.3094131350517273, + "logits/chosen": 0.31006258726119995, + "logits/rejected": 0.590751588344574, + "logps/chosen": -0.988946795463562, + "logps/rejected": -2.1498780250549316, + "loss": 0.5912, + "nll_loss": 0.5602656602859497, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09889468550682068, + "rewards/margins": 0.11609311401844025, + "rewards/rejected": -0.21498778462409973, + "step": 498 + }, + { + "epoch": 0.3104199066874028, + "grad_norm": 0.3615843951702118, + "learning_rate": 2.5050000000000002e-05, + "log_odds_chosen": 1.8403782844543457, + "log_odds_ratio": -0.2444225549697876, + "logits/chosen": 2.0023744106292725, + "logits/rejected": 1.907917857170105, + "logps/chosen": -0.7947826385498047, + "logps/rejected": -2.229252576828003, + "loss": 0.7428, + "nll_loss": 0.7183516025543213, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07947827130556107, + "rewards/margins": 0.14344698190689087, + "rewards/rejected": -0.22292526066303253, + "step": 499 + }, + { + "epoch": 0.3110419906687403, + "grad_norm": 0.4477740526199341, + "learning_rate": 2.5e-05, + "log_odds_chosen": 1.363769769668579, + "log_odds_ratio": -0.4156811535358429, + "logits/chosen": 1.2972413301467896, + "logits/rejected": 1.1339985132217407, + "logps/chosen": -1.0941433906555176, + "logps/rejected": -2.226752281188965, + "loss": 0.7844, + "nll_loss": 0.7427895665168762, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10941435396671295, + "rewards/margins": 0.11326086521148682, + "rewards/rejected": -0.22267521917819977, + "step": 500 + }, + { + "epoch": 0.3116640746500778, + "grad_norm": 1.0008854866027832, + "learning_rate": 2.495e-05, + "log_odds_chosen": 1.8221601247787476, + "log_odds_ratio": -0.35362672805786133, + "logits/chosen": 2.6204891204833984, + "logits/rejected": 3.1090645790100098, + "logps/chosen": -0.8226886987686157, + "logps/rejected": -2.1769776344299316, + "loss": 0.9722, + "nll_loss": 0.9368324279785156, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0822688639163971, + "rewards/margins": 0.13542889058589935, + "rewards/rejected": -0.21769778430461884, + "step": 501 + }, + { + "epoch": 0.31228615863141523, + "grad_norm": 1.1925100088119507, + "learning_rate": 2.4900000000000002e-05, + "log_odds_chosen": 2.48640775680542, + "log_odds_ratio": -0.30636078119277954, + "logits/chosen": 1.1644136905670166, + "logits/rejected": 1.6417752504348755, + "logps/chosen": -1.196340799331665, + "logps/rejected": -3.419325828552246, + "loss": 0.7578, + "nll_loss": 0.7271352410316467, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11963406950235367, + "rewards/margins": 0.2222985029220581, + "rewards/rejected": -0.34193259477615356, + "step": 502 + }, + { + "epoch": 0.31290824261275274, + "grad_norm": 0.7043509483337402, + "learning_rate": 2.485e-05, + "log_odds_chosen": 1.8432042598724365, + "log_odds_ratio": -0.4298593997955322, + "logits/chosen": 1.2005046606063843, + "logits/rejected": 2.524587869644165, + "logps/chosen": -1.1420859098434448, + "logps/rejected": -2.6464970111846924, + "loss": 0.7629, + "nll_loss": 0.7198686599731445, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11420859396457672, + "rewards/margins": 0.15044112503528595, + "rewards/rejected": -0.2646496891975403, + "step": 503 + }, + { + "epoch": 0.3135303265940902, + "grad_norm": 0.3517081141471863, + "learning_rate": 2.48e-05, + "log_odds_chosen": 1.7295416593551636, + "log_odds_ratio": -0.34764620661735535, + "logits/chosen": 2.7036778926849365, + "logits/rejected": 2.9315719604492188, + "logps/chosen": -0.7309411764144897, + "logps/rejected": -1.7857908010482788, + "loss": 0.9757, + "nll_loss": 0.940963625907898, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07309411466121674, + "rewards/margins": 0.1054849624633789, + "rewards/rejected": -0.17857909202575684, + "step": 504 + }, + { + "epoch": 0.3141524105754277, + "grad_norm": 0.5126684904098511, + "learning_rate": 2.4750000000000002e-05, + "log_odds_chosen": 1.807173728942871, + "log_odds_ratio": -0.328085720539093, + "logits/chosen": -0.5260485410690308, + "logits/rejected": 1.1646230220794678, + "logps/chosen": -0.7566425204277039, + "logps/rejected": -2.0484488010406494, + "loss": 0.5638, + "nll_loss": 0.5310238003730774, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07566425204277039, + "rewards/margins": 0.12918062508106232, + "rewards/rejected": -0.2048448771238327, + "step": 505 + }, + { + "epoch": 0.31477449455676515, + "grad_norm": 0.6230505108833313, + "learning_rate": 2.47e-05, + "log_odds_chosen": 1.132399320602417, + "log_odds_ratio": -0.4643266499042511, + "logits/chosen": -0.7182011008262634, + "logits/rejected": 1.7023909091949463, + "logps/chosen": -1.1046946048736572, + "logps/rejected": -2.0132546424865723, + "loss": 0.57, + "nll_loss": 0.5235822200775146, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11046946048736572, + "rewards/margins": 0.09085602313280106, + "rewards/rejected": -0.20132547616958618, + "step": 506 + }, + { + "epoch": 0.31539657853810266, + "grad_norm": 0.4481191039085388, + "learning_rate": 2.465e-05, + "log_odds_chosen": 1.0254859924316406, + "log_odds_ratio": -0.45982372760772705, + "logits/chosen": -0.05534517765045166, + "logits/rejected": 0.7204030156135559, + "logps/chosen": -1.1251353025436401, + "logps/rejected": -1.9631757736206055, + "loss": 0.5937, + "nll_loss": 0.5477457642555237, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11251352727413177, + "rewards/margins": 0.08380405604839325, + "rewards/rejected": -0.19631758332252502, + "step": 507 + }, + { + "epoch": 0.3160186625194401, + "grad_norm": 0.43889644742012024, + "learning_rate": 2.46e-05, + "log_odds_chosen": 1.6597342491149902, + "log_odds_ratio": -0.36858415603637695, + "logits/chosen": 0.8218276500701904, + "logits/rejected": 2.8646650314331055, + "logps/chosen": -0.9696469902992249, + "logps/rejected": -2.277371883392334, + "loss": 0.6888, + "nll_loss": 0.6519296765327454, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09696470201015472, + "rewards/margins": 0.13077248632907867, + "rewards/rejected": -0.2277372032403946, + "step": 508 + }, + { + "epoch": 0.3166407465007776, + "grad_norm": 0.2650894224643707, + "learning_rate": 2.455e-05, + "log_odds_chosen": 1.8257701396942139, + "log_odds_ratio": -0.2810014486312866, + "logits/chosen": -0.23144686222076416, + "logits/rejected": 1.1131678819656372, + "logps/chosen": -1.10640287399292, + "logps/rejected": -2.5477852821350098, + "loss": 0.4731, + "nll_loss": 0.4450156092643738, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11064029484987259, + "rewards/margins": 0.14413823187351227, + "rewards/rejected": -0.25477853417396545, + "step": 509 + }, + { + "epoch": 0.31726283048211507, + "grad_norm": 0.3267921209335327, + "learning_rate": 2.45e-05, + "log_odds_chosen": 3.1637048721313477, + "log_odds_ratio": -0.12191061675548553, + "logits/chosen": 0.48765379190444946, + "logits/rejected": 1.0577715635299683, + "logps/chosen": -0.9315196871757507, + "logps/rejected": -3.6205620765686035, + "loss": 0.6443, + "nll_loss": 0.6321412920951843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09315197169780731, + "rewards/margins": 0.2689042389392853, + "rewards/rejected": -0.3620561957359314, + "step": 510 + }, + { + "epoch": 0.3178849144634526, + "grad_norm": 0.3435508608818054, + "learning_rate": 2.445e-05, + "log_odds_chosen": 1.2938909530639648, + "log_odds_ratio": -0.27308449149131775, + "logits/chosen": -0.5473048686981201, + "logits/rejected": 1.6775987148284912, + "logps/chosen": -0.9595645666122437, + "logps/rejected": -1.921897292137146, + "loss": 0.4999, + "nll_loss": 0.47254717350006104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0959564596414566, + "rewards/margins": 0.09623328596353531, + "rewards/rejected": -0.19218973815441132, + "step": 511 + }, + { + "epoch": 0.31850699844479, + "grad_norm": 0.3145331144332886, + "learning_rate": 2.44e-05, + "log_odds_chosen": 1.6362143754959106, + "log_odds_ratio": -0.24002549052238464, + "logits/chosen": 0.3165720999240875, + "logits/rejected": 0.3282266855239868, + "logps/chosen": -0.7263202667236328, + "logps/rejected": -1.7319252490997314, + "loss": 0.5356, + "nll_loss": 0.5115996599197388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07263202965259552, + "rewards/margins": 0.1005605086684227, + "rewards/rejected": -0.17319253087043762, + "step": 512 + }, + { + "epoch": 0.31912908242612753, + "grad_norm": 0.6429533958435059, + "learning_rate": 2.435e-05, + "log_odds_chosen": 2.0500099658966064, + "log_odds_ratio": -0.2836396098136902, + "logits/chosen": 0.73582923412323, + "logits/rejected": 0.8022631406784058, + "logps/chosen": -1.438249111175537, + "logps/rejected": -2.903639793395996, + "loss": 0.6947, + "nll_loss": 0.6662985682487488, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14382490515708923, + "rewards/margins": 0.14653907716274261, + "rewards/rejected": -0.29036396741867065, + "step": 513 + }, + { + "epoch": 0.319751166407465, + "grad_norm": 0.27727633714675903, + "learning_rate": 2.43e-05, + "log_odds_chosen": 1.874891996383667, + "log_odds_ratio": -0.25810471177101135, + "logits/chosen": -0.31271833181381226, + "logits/rejected": 0.6087108850479126, + "logps/chosen": -0.8699772357940674, + "logps/rejected": -2.310192108154297, + "loss": 0.5346, + "nll_loss": 0.5087747573852539, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08699773252010345, + "rewards/margins": 0.14402146637439728, + "rewards/rejected": -0.23101919889450073, + "step": 514 + }, + { + "epoch": 0.3203732503888025, + "grad_norm": 0.2998380661010742, + "learning_rate": 2.425e-05, + "log_odds_chosen": 1.8412083387374878, + "log_odds_ratio": -0.2547343969345093, + "logits/chosen": -0.4206013083457947, + "logits/rejected": 0.5524145364761353, + "logps/chosen": -0.7460927963256836, + "logps/rejected": -2.051219940185547, + "loss": 0.4801, + "nll_loss": 0.45466262102127075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07460928708314896, + "rewards/margins": 0.13051271438598633, + "rewards/rejected": -0.2051219940185547, + "step": 515 + }, + { + "epoch": 0.32099533437014, + "grad_norm": 1.0635550022125244, + "learning_rate": 2.4200000000000002e-05, + "log_odds_chosen": 3.999274492263794, + "log_odds_ratio": -0.14171874523162842, + "logits/chosen": 0.9166915416717529, + "logits/rejected": -0.4675154387950897, + "logps/chosen": -0.9731952548027039, + "logps/rejected": -4.343837738037109, + "loss": 0.6554, + "nll_loss": 0.641204833984375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09731952100992203, + "rewards/margins": 0.337064266204834, + "rewards/rejected": -0.4343838095664978, + "step": 516 + }, + { + "epoch": 0.32161741835147745, + "grad_norm": 0.31030628085136414, + "learning_rate": 2.415e-05, + "log_odds_chosen": 1.8920516967773438, + "log_odds_ratio": -0.2721685767173767, + "logits/chosen": -0.13164083659648895, + "logits/rejected": 1.2151129245758057, + "logps/chosen": -0.9168779850006104, + "logps/rejected": -2.433690071105957, + "loss": 0.5855, + "nll_loss": 0.5582566261291504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09168778359889984, + "rewards/margins": 0.15168119966983795, + "rewards/rejected": -0.2433689832687378, + "step": 517 + }, + { + "epoch": 0.32223950233281495, + "grad_norm": 0.288943886756897, + "learning_rate": 2.41e-05, + "log_odds_chosen": 1.9927161931991577, + "log_odds_ratio": -0.2383418083190918, + "logits/chosen": 0.47148334980010986, + "logits/rejected": 1.6919628381729126, + "logps/chosen": -0.9311184883117676, + "logps/rejected": -2.584421157836914, + "loss": 0.6358, + "nll_loss": 0.6119847297668457, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09311184287071228, + "rewards/margins": 0.16533026099205017, + "rewards/rejected": -0.25844210386276245, + "step": 518 + }, + { + "epoch": 0.3228615863141524, + "grad_norm": 0.32276710867881775, + "learning_rate": 2.4050000000000002e-05, + "log_odds_chosen": 2.0655078887939453, + "log_odds_ratio": -0.2558887302875519, + "logits/chosen": 1.606288194656372, + "logits/rejected": 2.212973117828369, + "logps/chosen": -0.7719038128852844, + "logps/rejected": -2.3773133754730225, + "loss": 0.7828, + "nll_loss": 0.7572447657585144, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07719038426876068, + "rewards/margins": 0.16054093837738037, + "rewards/rejected": -0.23773130774497986, + "step": 519 + }, + { + "epoch": 0.3234836702954899, + "grad_norm": 0.38670703768730164, + "learning_rate": 2.4e-05, + "log_odds_chosen": 1.765141487121582, + "log_odds_ratio": -0.28975823521614075, + "logits/chosen": 2.2421889305114746, + "logits/rejected": 2.0839686393737793, + "logps/chosen": -0.9007455110549927, + "logps/rejected": -2.3477883338928223, + "loss": 0.7432, + "nll_loss": 0.7142671346664429, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0900745540857315, + "rewards/margins": 0.14470428228378296, + "rewards/rejected": -0.23477882146835327, + "step": 520 + }, + { + "epoch": 0.32410575427682736, + "grad_norm": 0.582175076007843, + "learning_rate": 2.395e-05, + "log_odds_chosen": 2.358574151992798, + "log_odds_ratio": -0.3873441517353058, + "logits/chosen": -0.06897962093353271, + "logits/rejected": 1.1061187982559204, + "logps/chosen": -1.0776824951171875, + "logps/rejected": -3.145253896713257, + "loss": 0.6335, + "nll_loss": 0.5947161316871643, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10776825249195099, + "rewards/margins": 0.20675715804100037, + "rewards/rejected": -0.3145253658294678, + "step": 521 + }, + { + "epoch": 0.32472783825816487, + "grad_norm": 0.31357330083847046, + "learning_rate": 2.39e-05, + "log_odds_chosen": 2.555722713470459, + "log_odds_ratio": -0.2816966474056244, + "logits/chosen": -0.15091806650161743, + "logits/rejected": 0.5959459543228149, + "logps/chosen": -0.9637516140937805, + "logps/rejected": -3.1803767681121826, + "loss": 0.6144, + "nll_loss": 0.5862266421318054, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09637516736984253, + "rewards/margins": 0.2216625064611435, + "rewards/rejected": -0.3180376887321472, + "step": 522 + }, + { + "epoch": 0.3253499222395023, + "grad_norm": 0.7683733701705933, + "learning_rate": 2.385e-05, + "log_odds_chosen": 3.3063204288482666, + "log_odds_ratio": -0.09356483817100525, + "logits/chosen": 2.2439417839050293, + "logits/rejected": 1.6922907829284668, + "logps/chosen": -0.9698910117149353, + "logps/rejected": -3.716911792755127, + "loss": 0.8029, + "nll_loss": 0.7935055494308472, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09698909521102905, + "rewards/margins": 0.2747020721435547, + "rewards/rejected": -0.37169113755226135, + "step": 523 + }, + { + "epoch": 0.3259720062208398, + "grad_norm": 0.39197778701782227, + "learning_rate": 2.38e-05, + "log_odds_chosen": 1.962248682975769, + "log_odds_ratio": -0.18143852055072784, + "logits/chosen": -0.4806569516658783, + "logits/rejected": -0.31491222977638245, + "logps/chosen": -1.0480461120605469, + "logps/rejected": -2.6499183177948, + "loss": 0.5054, + "nll_loss": 0.4872513711452484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10480460524559021, + "rewards/margins": 0.160187229514122, + "rewards/rejected": -0.2649918496608734, + "step": 524 + }, + { + "epoch": 0.3265940902021773, + "grad_norm": 0.37640804052352905, + "learning_rate": 2.375e-05, + "log_odds_chosen": 2.661229133605957, + "log_odds_ratio": -0.1665583997964859, + "logits/chosen": 1.625669240951538, + "logits/rejected": 1.2462046146392822, + "logps/chosen": -0.9341843128204346, + "logps/rejected": -3.1749377250671387, + "loss": 0.6937, + "nll_loss": 0.6770105361938477, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0934184342622757, + "rewards/margins": 0.2240753471851349, + "rewards/rejected": -0.3174937963485718, + "step": 525 + }, + { + "epoch": 0.3272161741835148, + "grad_norm": 0.34807097911834717, + "learning_rate": 2.37e-05, + "log_odds_chosen": 0.8722881078720093, + "log_odds_ratio": -0.4786973297595978, + "logits/chosen": 1.2106951475143433, + "logits/rejected": 1.6873623132705688, + "logps/chosen": -1.1643617153167725, + "logps/rejected": -1.9418911933898926, + "loss": 0.8321, + "nll_loss": 0.7842261791229248, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11643616855144501, + "rewards/margins": 0.07775293290615082, + "rewards/rejected": -0.19418910145759583, + "step": 526 + }, + { + "epoch": 0.32783825816485224, + "grad_norm": 0.3314477801322937, + "learning_rate": 2.365e-05, + "log_odds_chosen": 1.965777039527893, + "log_odds_ratio": -0.28736618161201477, + "logits/chosen": 1.045924186706543, + "logits/rejected": 1.1745761632919312, + "logps/chosen": -0.6382919549942017, + "logps/rejected": -2.006195068359375, + "loss": 0.5883, + "nll_loss": 0.5595134496688843, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0638291984796524, + "rewards/margins": 0.13679032027721405, + "rewards/rejected": -0.20061951875686646, + "step": 527 + }, + { + "epoch": 0.32846034214618974, + "grad_norm": 0.3520694673061371, + "learning_rate": 2.36e-05, + "log_odds_chosen": 2.4457571506500244, + "log_odds_ratio": -0.20794501900672913, + "logits/chosen": 1.5410584211349487, + "logits/rejected": 0.5601137280464172, + "logps/chosen": -0.8247537016868591, + "logps/rejected": -2.7022957801818848, + "loss": 0.6541, + "nll_loss": 0.6333186626434326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08247537165880203, + "rewards/margins": 0.18775421380996704, + "rewards/rejected": -0.2702295780181885, + "step": 528 + }, + { + "epoch": 0.3290824261275272, + "grad_norm": 0.320633202791214, + "learning_rate": 2.355e-05, + "log_odds_chosen": 1.7150052785873413, + "log_odds_ratio": -0.28227710723876953, + "logits/chosen": 0.22371336817741394, + "logits/rejected": 1.3191059827804565, + "logps/chosen": -1.1468290090560913, + "logps/rejected": -2.596248149871826, + "loss": 0.5774, + "nll_loss": 0.5491545796394348, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11468289792537689, + "rewards/margins": 0.14494192600250244, + "rewards/rejected": -0.2596248388290405, + "step": 529 + }, + { + "epoch": 0.3297045101088647, + "grad_norm": 0.5384267568588257, + "learning_rate": 2.35e-05, + "log_odds_chosen": 1.6268489360809326, + "log_odds_ratio": -0.248253732919693, + "logits/chosen": 2.6993472576141357, + "logits/rejected": 2.4896597862243652, + "logps/chosen": -0.9050356149673462, + "logps/rejected": -2.147216558456421, + "loss": 0.8078, + "nll_loss": 0.7829287648200989, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09050355851650238, + "rewards/margins": 0.12421809881925583, + "rewards/rejected": -0.2147216498851776, + "step": 530 + }, + { + "epoch": 0.33032659409020215, + "grad_norm": 1.307396650314331, + "learning_rate": 2.345e-05, + "log_odds_chosen": 2.3934273719787598, + "log_odds_ratio": -0.22615107893943787, + "logits/chosen": 2.7248733043670654, + "logits/rejected": 2.540579319000244, + "logps/chosen": -0.8816531300544739, + "logps/rejected": -2.799293041229248, + "loss": 0.8883, + "nll_loss": 0.8656710386276245, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08816531300544739, + "rewards/margins": 0.1917639970779419, + "rewards/rejected": -0.27992933988571167, + "step": 531 + }, + { + "epoch": 0.33094867807153966, + "grad_norm": 0.36137479543685913, + "learning_rate": 2.3400000000000003e-05, + "log_odds_chosen": 0.7194154262542725, + "log_odds_ratio": -0.523330569267273, + "logits/chosen": 0.8707399368286133, + "logits/rejected": 1.3606274127960205, + "logps/chosen": -1.3928475379943848, + "logps/rejected": -1.9430372714996338, + "loss": 0.7521, + "nll_loss": 0.6998083591461182, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13928475975990295, + "rewards/margins": 0.05501896142959595, + "rewards/rejected": -0.1943037360906601, + "step": 532 + }, + { + "epoch": 0.33157076205287717, + "grad_norm": 0.4101194143295288, + "learning_rate": 2.3350000000000002e-05, + "log_odds_chosen": 2.2328317165374756, + "log_odds_ratio": -0.23609133064746857, + "logits/chosen": 2.1293582916259766, + "logits/rejected": 0.01358860731124878, + "logps/chosen": -1.1123179197311401, + "logps/rejected": -2.9739584922790527, + "loss": 0.716, + "nll_loss": 0.6923679113388062, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11123178899288177, + "rewards/margins": 0.18616405129432678, + "rewards/rejected": -0.29739582538604736, + "step": 533 + }, + { + "epoch": 0.3321928460342146, + "grad_norm": 0.3048454523086548, + "learning_rate": 2.3300000000000004e-05, + "log_odds_chosen": 4.277897357940674, + "log_odds_ratio": -0.17860309779644012, + "logits/chosen": 1.040080189704895, + "logits/rejected": 1.0091297626495361, + "logps/chosen": -1.0271538496017456, + "logps/rejected": -4.838980674743652, + "loss": 0.6094, + "nll_loss": 0.5915553569793701, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1027153879404068, + "rewards/margins": 0.3811826705932617, + "rewards/rejected": -0.4838980436325073, + "step": 534 + }, + { + "epoch": 0.3328149300155521, + "grad_norm": 0.3863018751144409, + "learning_rate": 2.3250000000000003e-05, + "log_odds_chosen": 0.512001633644104, + "log_odds_ratio": -0.5913516283035278, + "logits/chosen": 2.4631636142730713, + "logits/rejected": 2.423994302749634, + "logps/chosen": -1.0720179080963135, + "logps/rejected": -1.4990208148956299, + "loss": 0.8055, + "nll_loss": 0.7463449835777283, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10720179229974747, + "rewards/margins": 0.04270029440522194, + "rewards/rejected": -0.1499020755290985, + "step": 535 + }, + { + "epoch": 0.3334370139968896, + "grad_norm": 0.3722380995750427, + "learning_rate": 2.32e-05, + "log_odds_chosen": 1.6009268760681152, + "log_odds_ratio": -0.4362635016441345, + "logits/chosen": 0.6439146995544434, + "logits/rejected": 0.752334713935852, + "logps/chosen": -0.9249628782272339, + "logps/rejected": -2.262263774871826, + "loss": 0.544, + "nll_loss": 0.5003713965415955, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09249629080295563, + "rewards/margins": 0.13373006880283356, + "rewards/rejected": -0.22622635960578918, + "step": 536 + }, + { + "epoch": 0.3340590979782271, + "grad_norm": 0.3637986481189728, + "learning_rate": 2.3150000000000004e-05, + "log_odds_chosen": 3.318232536315918, + "log_odds_ratio": -0.2623625695705414, + "logits/chosen": 1.6884994506835938, + "logits/rejected": 1.948002576828003, + "logps/chosen": -0.8119041323661804, + "logps/rejected": -3.6660208702087402, + "loss": 0.7499, + "nll_loss": 0.7236235737800598, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08119041472673416, + "rewards/margins": 0.28541168570518494, + "rewards/rejected": -0.3666021227836609, + "step": 537 + }, + { + "epoch": 0.33468118195956453, + "grad_norm": 0.3645683825016022, + "learning_rate": 2.3100000000000002e-05, + "log_odds_chosen": 2.602912664413452, + "log_odds_ratio": -0.19379067420959473, + "logits/chosen": 0.5529426336288452, + "logits/rejected": 0.2645620107650757, + "logps/chosen": -0.8854231834411621, + "logps/rejected": -3.0381038188934326, + "loss": 0.6065, + "nll_loss": 0.5871433019638062, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08854231983423233, + "rewards/margins": 0.215268075466156, + "rewards/rejected": -0.3038104176521301, + "step": 538 + }, + { + "epoch": 0.33530326594090204, + "grad_norm": 1.0917738676071167, + "learning_rate": 2.305e-05, + "log_odds_chosen": 2.948880672454834, + "log_odds_ratio": -0.2544633746147156, + "logits/chosen": -0.09750711917877197, + "logits/rejected": -1.0221948623657227, + "logps/chosen": -0.605278491973877, + "logps/rejected": -2.81388783454895, + "loss": 0.5105, + "nll_loss": 0.48507392406463623, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06052785366773605, + "rewards/margins": 0.22086089849472046, + "rewards/rejected": -0.2813887894153595, + "step": 539 + }, + { + "epoch": 0.3359253499222395, + "grad_norm": 0.30375197529792786, + "learning_rate": 2.3000000000000003e-05, + "log_odds_chosen": 2.7974700927734375, + "log_odds_ratio": -0.29055845737457275, + "logits/chosen": 0.4769752621650696, + "logits/rejected": 0.44122734665870667, + "logps/chosen": -0.7745877504348755, + "logps/rejected": -3.094503164291382, + "loss": 0.571, + "nll_loss": 0.5419222712516785, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07745878398418427, + "rewards/margins": 0.23199154436588287, + "rewards/rejected": -0.30945032835006714, + "step": 540 + }, + { + "epoch": 0.336547433903577, + "grad_norm": 0.6567568182945251, + "learning_rate": 2.2950000000000002e-05, + "log_odds_chosen": 2.1677944660186768, + "log_odds_ratio": -0.57481849193573, + "logits/chosen": -0.9570534229278564, + "logits/rejected": -1.0735530853271484, + "logps/chosen": -1.4620994329452515, + "logps/rejected": -3.3756513595581055, + "loss": 0.4604, + "nll_loss": 0.40293216705322266, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1462099552154541, + "rewards/margins": 0.19135519862174988, + "rewards/rejected": -0.337565153837204, + "step": 541 + }, + { + "epoch": 0.33716951788491445, + "grad_norm": 0.41450825333595276, + "learning_rate": 2.29e-05, + "log_odds_chosen": 1.4745512008666992, + "log_odds_ratio": -0.3406660556793213, + "logits/chosen": 0.29881247878074646, + "logits/rejected": 0.5884536504745483, + "logps/chosen": -0.9175814390182495, + "logps/rejected": -2.129868507385254, + "loss": 0.4939, + "nll_loss": 0.45984673500061035, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09175814688205719, + "rewards/margins": 0.12122871726751328, + "rewards/rejected": -0.21298687160015106, + "step": 542 + }, + { + "epoch": 0.33779160186625196, + "grad_norm": 0.33071574568748474, + "learning_rate": 2.2850000000000003e-05, + "log_odds_chosen": 1.4744378328323364, + "log_odds_ratio": -0.3100065290927887, + "logits/chosen": 1.2643396854400635, + "logits/rejected": 1.169672966003418, + "logps/chosen": -0.7786130309104919, + "logps/rejected": -1.8644421100616455, + "loss": 0.6003, + "nll_loss": 0.5693123936653137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07786130905151367, + "rewards/margins": 0.10858289897441864, + "rewards/rejected": -0.1864442080259323, + "step": 543 + }, + { + "epoch": 0.3384136858475894, + "grad_norm": 0.33353278040885925, + "learning_rate": 2.2800000000000002e-05, + "log_odds_chosen": 1.95163893699646, + "log_odds_ratio": -0.32277804613113403, + "logits/chosen": 0.27112460136413574, + "logits/rejected": 1.4154012203216553, + "logps/chosen": -0.8759363889694214, + "logps/rejected": -2.446195602416992, + "loss": 0.6165, + "nll_loss": 0.5842545032501221, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08759364485740662, + "rewards/margins": 0.15702593326568604, + "rewards/rejected": -0.24461957812309265, + "step": 544 + }, + { + "epoch": 0.3390357698289269, + "grad_norm": 0.4259192645549774, + "learning_rate": 2.275e-05, + "log_odds_chosen": 2.227778911590576, + "log_odds_ratio": -0.392104834318161, + "logits/chosen": 0.6782884001731873, + "logits/rejected": 1.1886241436004639, + "logps/chosen": -0.9938414096832275, + "logps/rejected": -3.0460071563720703, + "loss": 0.6122, + "nll_loss": 0.5729869604110718, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09938414394855499, + "rewards/margins": 0.20521658658981323, + "rewards/rejected": -0.3046007454395294, + "step": 545 + }, + { + "epoch": 0.33965785381026437, + "grad_norm": 0.3001631796360016, + "learning_rate": 2.2700000000000003e-05, + "log_odds_chosen": 2.93178391456604, + "log_odds_ratio": -0.1864704191684723, + "logits/chosen": 1.5299491882324219, + "logits/rejected": 2.5147063732147217, + "logps/chosen": -0.8954837322235107, + "logps/rejected": -3.2739996910095215, + "loss": 0.7499, + "nll_loss": 0.7312994599342346, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08954837173223495, + "rewards/margins": 0.2378515899181366, + "rewards/rejected": -0.32739996910095215, + "step": 546 + }, + { + "epoch": 0.34027993779160187, + "grad_norm": 0.3409689664840698, + "learning_rate": 2.265e-05, + "log_odds_chosen": 2.394829273223877, + "log_odds_ratio": -0.12511810660362244, + "logits/chosen": 0.43690744042396545, + "logits/rejected": 0.4180241823196411, + "logps/chosen": -0.892842173576355, + "logps/rejected": -2.846055746078491, + "loss": 0.549, + "nll_loss": 0.5364462733268738, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08928421884775162, + "rewards/margins": 0.19532136619091034, + "rewards/rejected": -0.28460556268692017, + "step": 547 + }, + { + "epoch": 0.3409020217729393, + "grad_norm": 0.4478369355201721, + "learning_rate": 2.26e-05, + "log_odds_chosen": 2.7050700187683105, + "log_odds_ratio": -0.22413116693496704, + "logits/chosen": 0.4567830562591553, + "logits/rejected": 0.9032511115074158, + "logps/chosen": -1.3370281457901, + "logps/rejected": -3.741605758666992, + "loss": 0.561, + "nll_loss": 0.5385462045669556, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1337028294801712, + "rewards/margins": 0.24045775830745697, + "rewards/rejected": -0.3741605877876282, + "step": 548 + }, + { + "epoch": 0.34152410575427683, + "grad_norm": 0.35546326637268066, + "learning_rate": 2.2550000000000003e-05, + "log_odds_chosen": 4.098875045776367, + "log_odds_ratio": -0.07504149526357651, + "logits/chosen": 1.9520654678344727, + "logits/rejected": 0.00759616494178772, + "logps/chosen": -0.7652401924133301, + "logps/rejected": -4.112580299377441, + "loss": 0.5871, + "nll_loss": 0.5796326398849487, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07652401924133301, + "rewards/margins": 0.33473408222198486, + "rewards/rejected": -0.4112580716609955, + "step": 549 + }, + { + "epoch": 0.3421461897356143, + "grad_norm": 0.3201517164707184, + "learning_rate": 2.25e-05, + "log_odds_chosen": 4.117212772369385, + "log_odds_ratio": -0.029010329395532608, + "logits/chosen": 1.0014082193374634, + "logits/rejected": 0.7812284231185913, + "logps/chosen": -0.8319846987724304, + "logps/rejected": -4.328241348266602, + "loss": 0.5622, + "nll_loss": 0.5592617988586426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08319847285747528, + "rewards/margins": 0.34962570667266846, + "rewards/rejected": -0.43282419443130493, + "step": 550 + }, + { + "epoch": 0.3427682737169518, + "grad_norm": 0.352874755859375, + "learning_rate": 2.245e-05, + "log_odds_chosen": 3.4196906089782715, + "log_odds_ratio": -0.07896264642477036, + "logits/chosen": 0.3399137854576111, + "logits/rejected": 0.0779067724943161, + "logps/chosen": -0.8405764698982239, + "logps/rejected": -3.7050743103027344, + "loss": 0.5314, + "nll_loss": 0.5235511064529419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08405765146017075, + "rewards/margins": 0.2864498198032379, + "rewards/rejected": -0.37050747871398926, + "step": 551 + }, + { + "epoch": 0.3433903576982893, + "grad_norm": 0.3518848121166229, + "learning_rate": 2.2400000000000002e-05, + "log_odds_chosen": 2.2333786487579346, + "log_odds_ratio": -0.20728513598442078, + "logits/chosen": 1.2030657529830933, + "logits/rejected": 1.240146517753601, + "logps/chosen": -1.1116034984588623, + "logps/rejected": -2.8557775020599365, + "loss": 0.7377, + "nll_loss": 0.7169494032859802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11116035282611847, + "rewards/margins": 0.1744173914194107, + "rewards/rejected": -0.2855777442455292, + "step": 552 + }, + { + "epoch": 0.34401244167962675, + "grad_norm": 0.3764328956604004, + "learning_rate": 2.235e-05, + "log_odds_chosen": 2.8303418159484863, + "log_odds_ratio": -0.2849666178226471, + "logits/chosen": 1.2564030885696411, + "logits/rejected": 0.2557767927646637, + "logps/chosen": -0.8771718740463257, + "logps/rejected": -3.4054300785064697, + "loss": 0.6693, + "nll_loss": 0.6408088207244873, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08771719038486481, + "rewards/margins": 0.2528257966041565, + "rewards/rejected": -0.3405430018901825, + "step": 553 + }, + { + "epoch": 0.34463452566096425, + "grad_norm": 0.32724928855895996, + "learning_rate": 2.23e-05, + "log_odds_chosen": 2.3714241981506348, + "log_odds_ratio": -0.36799246072769165, + "logits/chosen": 1.9873566627502441, + "logits/rejected": 0.9282538890838623, + "logps/chosen": -0.8043345212936401, + "logps/rejected": -2.7831761837005615, + "loss": 0.7771, + "nll_loss": 0.7403146624565125, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0804334506392479, + "rewards/margins": 0.19788417220115662, + "rewards/rejected": -0.2783176302909851, + "step": 554 + }, + { + "epoch": 0.3452566096423017, + "grad_norm": 0.3000686466693878, + "learning_rate": 2.2250000000000002e-05, + "log_odds_chosen": 3.208042860031128, + "log_odds_ratio": -0.16007201373577118, + "logits/chosen": 1.556321382522583, + "logits/rejected": 0.668292760848999, + "logps/chosen": -0.8184808492660522, + "logps/rejected": -3.525345802307129, + "loss": 0.5825, + "nll_loss": 0.5665206909179688, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08184809237718582, + "rewards/margins": 0.2706865072250366, + "rewards/rejected": -0.35253459215164185, + "step": 555 + }, + { + "epoch": 0.3458786936236392, + "grad_norm": 0.3622443377971649, + "learning_rate": 2.22e-05, + "log_odds_chosen": 5.820090293884277, + "log_odds_ratio": -0.07336248457431793, + "logits/chosen": 2.9645447731018066, + "logits/rejected": 2.1063284873962402, + "logps/chosen": -0.7918724417686462, + "logps/rejected": -5.822584629058838, + "loss": 0.7855, + "nll_loss": 0.7781620621681213, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07918724417686462, + "rewards/margins": 0.5030711889266968, + "rewards/rejected": -0.5822584629058838, + "step": 556 + }, + { + "epoch": 0.34650077760497666, + "grad_norm": 0.3985669016838074, + "learning_rate": 2.215e-05, + "log_odds_chosen": 3.1644110679626465, + "log_odds_ratio": -0.12268514186143875, + "logits/chosen": 0.2535923719406128, + "logits/rejected": -1.1449623107910156, + "logps/chosen": -1.1264760494232178, + "logps/rejected": -3.9241061210632324, + "loss": 0.5895, + "nll_loss": 0.5772807598114014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11264760792255402, + "rewards/margins": 0.27976301312446594, + "rewards/rejected": -0.39241063594818115, + "step": 557 + }, + { + "epoch": 0.34712286158631417, + "grad_norm": 0.5123311877250671, + "learning_rate": 2.2100000000000002e-05, + "log_odds_chosen": 2.835562229156494, + "log_odds_ratio": -0.34649112820625305, + "logits/chosen": 0.7598134875297546, + "logits/rejected": 0.006757020950317383, + "logps/chosen": -1.0932013988494873, + "logps/rejected": -3.476534128189087, + "loss": 0.5802, + "nll_loss": 0.5455945134162903, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10932014137506485, + "rewards/margins": 0.23833326995372772, + "rewards/rejected": -0.3476533889770508, + "step": 558 + }, + { + "epoch": 0.3477449455676516, + "grad_norm": 0.3804253041744232, + "learning_rate": 2.205e-05, + "log_odds_chosen": 5.65109920501709, + "log_odds_ratio": -0.11326686292886734, + "logits/chosen": 2.597390651702881, + "logits/rejected": 1.8537123203277588, + "logps/chosen": -0.8809130787849426, + "logps/rejected": -5.866379261016846, + "loss": 0.9367, + "nll_loss": 0.9254015684127808, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08809130638837814, + "rewards/margins": 0.49854663014411926, + "rewards/rejected": -0.5866379737854004, + "step": 559 + }, + { + "epoch": 0.3483670295489891, + "grad_norm": 0.2974945604801178, + "learning_rate": 2.2000000000000003e-05, + "log_odds_chosen": 4.304051399230957, + "log_odds_ratio": -0.08408652245998383, + "logits/chosen": -0.3859021067619324, + "logits/rejected": 0.04299017786979675, + "logps/chosen": -1.005317211151123, + "logps/rejected": -4.898586273193359, + "loss": 0.5036, + "nll_loss": 0.4951820969581604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10053171962499619, + "rewards/margins": 0.38932690024375916, + "rewards/rejected": -0.48985862731933594, + "step": 560 + }, + { + "epoch": 0.3489891135303266, + "grad_norm": 0.32148414850234985, + "learning_rate": 2.195e-05, + "log_odds_chosen": 4.430983543395996, + "log_odds_ratio": -0.026275552809238434, + "logits/chosen": 2.0504298210144043, + "logits/rejected": 1.0362154245376587, + "logps/chosen": -0.7540563344955444, + "logps/rejected": -4.490864276885986, + "loss": 0.6834, + "nll_loss": 0.6807276606559753, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07540564239025116, + "rewards/margins": 0.37368080019950867, + "rewards/rejected": -0.44908642768859863, + "step": 561 + }, + { + "epoch": 0.3496111975116641, + "grad_norm": 0.4179425835609436, + "learning_rate": 2.19e-05, + "log_odds_chosen": 4.381219387054443, + "log_odds_ratio": -0.14103896915912628, + "logits/chosen": 1.1080811023712158, + "logits/rejected": -0.20480573177337646, + "logps/chosen": -0.9135346412658691, + "logps/rejected": -4.840203285217285, + "loss": 0.5167, + "nll_loss": 0.5026371479034424, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09135347604751587, + "rewards/margins": 0.39266693592071533, + "rewards/rejected": -0.4840203821659088, + "step": 562 + }, + { + "epoch": 0.35023328149300154, + "grad_norm": 0.3320809602737427, + "learning_rate": 2.1850000000000003e-05, + "log_odds_chosen": 4.221715450286865, + "log_odds_ratio": -0.04479138180613518, + "logits/chosen": 1.4076143503189087, + "logits/rejected": 0.8501245379447937, + "logps/chosen": -0.8335117101669312, + "logps/rejected": -4.350778102874756, + "loss": 0.6814, + "nll_loss": 0.6769353151321411, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08335117250680923, + "rewards/margins": 0.3517266511917114, + "rewards/rejected": -0.43507784605026245, + "step": 563 + }, + { + "epoch": 0.35085536547433904, + "grad_norm": 2.018888473510742, + "learning_rate": 2.18e-05, + "log_odds_chosen": 2.925218105316162, + "log_odds_ratio": -0.1921694278717041, + "logits/chosen": 1.601369023323059, + "logits/rejected": 0.20986300706863403, + "logps/chosen": -0.9109419584274292, + "logps/rejected": -3.379201889038086, + "loss": 0.7414, + "nll_loss": 0.7222239375114441, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09109419584274292, + "rewards/margins": 0.24682600796222687, + "rewards/rejected": -0.3379201889038086, + "step": 564 + }, + { + "epoch": 0.3514774494556765, + "grad_norm": 0.30817800760269165, + "learning_rate": 2.175e-05, + "log_odds_chosen": 5.583270072937012, + "log_odds_ratio": -0.07630279660224915, + "logits/chosen": 0.5737905502319336, + "logits/rejected": 0.908176839351654, + "logps/chosen": -0.9042786955833435, + "logps/rejected": -5.677913188934326, + "loss": 0.6135, + "nll_loss": 0.6059155464172363, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09042787551879883, + "rewards/margins": 0.4773634374141693, + "rewards/rejected": -0.5677912831306458, + "step": 565 + }, + { + "epoch": 0.352099533437014, + "grad_norm": 0.2887404263019562, + "learning_rate": 2.1700000000000002e-05, + "log_odds_chosen": 4.915496349334717, + "log_odds_ratio": -0.03920300304889679, + "logits/chosen": -0.6548490524291992, + "logits/rejected": -0.16563282907009125, + "logps/chosen": -0.9077558517456055, + "logps/rejected": -5.154465675354004, + "loss": 0.4734, + "nll_loss": 0.46949946880340576, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09077560156583786, + "rewards/margins": 0.4246709644794464, + "rewards/rejected": -0.5154465436935425, + "step": 566 + }, + { + "epoch": 0.35272161741835145, + "grad_norm": 0.3343895673751831, + "learning_rate": 2.165e-05, + "log_odds_chosen": 5.319578170776367, + "log_odds_ratio": -0.11831340938806534, + "logits/chosen": 2.253048896789551, + "logits/rejected": 1.2730216979980469, + "logps/chosen": -0.8506143093109131, + "logps/rejected": -5.664251327514648, + "loss": 0.6425, + "nll_loss": 0.6306980848312378, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08506143093109131, + "rewards/margins": 0.4813637435436249, + "rewards/rejected": -0.5664252042770386, + "step": 567 + }, + { + "epoch": 0.35334370139968896, + "grad_norm": 0.26959332823753357, + "learning_rate": 2.16e-05, + "log_odds_chosen": 5.405831813812256, + "log_odds_ratio": -0.015014220029115677, + "logits/chosen": 2.161424160003662, + "logits/rejected": 1.572087287902832, + "logps/chosen": -0.7183213829994202, + "logps/rejected": -5.414179801940918, + "loss": 0.7563, + "nll_loss": 0.7547677755355835, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07183213531970978, + "rewards/margins": 0.4695858657360077, + "rewards/rejected": -0.5414179563522339, + "step": 568 + }, + { + "epoch": 0.35396578538102647, + "grad_norm": 6.484348297119141, + "learning_rate": 2.1550000000000002e-05, + "log_odds_chosen": 3.8986024856567383, + "log_odds_ratio": -0.22238805890083313, + "logits/chosen": 0.6533817052841187, + "logits/rejected": 0.09037871658802032, + "logps/chosen": -1.0746116638183594, + "logps/rejected": -4.593562602996826, + "loss": 0.6437, + "nll_loss": 0.6214991807937622, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10746116936206818, + "rewards/margins": 0.3518950939178467, + "rewards/rejected": -0.45935627818107605, + "step": 569 + }, + { + "epoch": 0.3545878693623639, + "grad_norm": 0.3435267210006714, + "learning_rate": 2.15e-05, + "log_odds_chosen": 4.144981861114502, + "log_odds_ratio": -0.13062036037445068, + "logits/chosen": 1.8960034847259521, + "logits/rejected": 0.15083402395248413, + "logps/chosen": -0.9747380018234253, + "logps/rejected": -4.638625621795654, + "loss": 0.7867, + "nll_loss": 0.7736024856567383, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09747380763292313, + "rewards/margins": 0.366388738155365, + "rewards/rejected": -0.4638625383377075, + "step": 570 + }, + { + "epoch": 0.3552099533437014, + "grad_norm": 0.5003693103790283, + "learning_rate": 2.145e-05, + "log_odds_chosen": 2.2045464515686035, + "log_odds_ratio": -0.32526710629463196, + "logits/chosen": 1.7149858474731445, + "logits/rejected": 1.3935751914978027, + "logps/chosen": -0.7401760816574097, + "logps/rejected": -2.5005102157592773, + "loss": 0.7001, + "nll_loss": 0.6675523519515991, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07401760667562485, + "rewards/margins": 0.17603342235088348, + "rewards/rejected": -0.25005102157592773, + "step": 571 + }, + { + "epoch": 0.3558320373250389, + "grad_norm": 0.3263773024082184, + "learning_rate": 2.1400000000000002e-05, + "log_odds_chosen": 3.654919147491455, + "log_odds_ratio": -0.08848594129085541, + "logits/chosen": -0.6415503621101379, + "logits/rejected": 0.4079166054725647, + "logps/chosen": -1.0272693634033203, + "logps/rejected": -4.235315799713135, + "loss": 0.4354, + "nll_loss": 0.42651909589767456, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10272695124149323, + "rewards/margins": 0.3208046555519104, + "rewards/rejected": -0.42353159189224243, + "step": 572 + }, + { + "epoch": 0.3564541213063764, + "grad_norm": 0.40876343846321106, + "learning_rate": 2.135e-05, + "log_odds_chosen": 2.0924434661865234, + "log_odds_ratio": -0.37442755699157715, + "logits/chosen": 1.6909630298614502, + "logits/rejected": 2.1783745288848877, + "logps/chosen": -0.7929383516311646, + "logps/rejected": -2.4616189002990723, + "loss": 0.7618, + "nll_loss": 0.7243326306343079, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07929383963346481, + "rewards/margins": 0.16686806082725525, + "rewards/rejected": -0.24616187810897827, + "step": 573 + }, + { + "epoch": 0.35707620528771383, + "grad_norm": 0.49694570899009705, + "learning_rate": 2.13e-05, + "log_odds_chosen": 1.9976987838745117, + "log_odds_ratio": -0.37749671936035156, + "logits/chosen": 0.9177133440971375, + "logits/rejected": -0.533532440662384, + "logps/chosen": -0.8589506149291992, + "logps/rejected": -2.627117156982422, + "loss": 0.5703, + "nll_loss": 0.5325274467468262, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08589506149291992, + "rewards/margins": 0.1768166571855545, + "rewards/rejected": -0.26271170377731323, + "step": 574 + }, + { + "epoch": 0.35769828926905134, + "grad_norm": 0.43489086627960205, + "learning_rate": 2.125e-05, + "log_odds_chosen": 3.7538657188415527, + "log_odds_ratio": -0.17192378640174866, + "logits/chosen": -0.6949158310890198, + "logits/rejected": -0.29193541407585144, + "logps/chosen": -0.8917832374572754, + "logps/rejected": -4.247375965118408, + "loss": 0.4755, + "nll_loss": 0.45832446217536926, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08917832374572754, + "rewards/margins": 0.33555927872657776, + "rewards/rejected": -0.4247376024723053, + "step": 575 + }, + { + "epoch": 0.3583203732503888, + "grad_norm": 0.3112671971321106, + "learning_rate": 2.12e-05, + "log_odds_chosen": 5.385041236877441, + "log_odds_ratio": -0.10352785885334015, + "logits/chosen": 0.3990916609764099, + "logits/rejected": 0.9243254661560059, + "logps/chosen": -1.1848556995391846, + "logps/rejected": -5.890063285827637, + "loss": 0.537, + "nll_loss": 0.5266907215118408, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11848556995391846, + "rewards/margins": 0.4705207943916321, + "rewards/rejected": -0.5890063047409058, + "step": 576 + }, + { + "epoch": 0.3589424572317263, + "grad_norm": 0.7652101516723633, + "learning_rate": 2.115e-05, + "log_odds_chosen": 3.0768189430236816, + "log_odds_ratio": -0.24112540483474731, + "logits/chosen": 1.8470680713653564, + "logits/rejected": 2.1239311695098877, + "logps/chosen": -0.721358060836792, + "logps/rejected": -3.037296772003174, + "loss": 0.7862, + "nll_loss": 0.76209557056427, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0721358060836792, + "rewards/margins": 0.23159386217594147, + "rewards/rejected": -0.3037296533584595, + "step": 577 + }, + { + "epoch": 0.35956454121306375, + "grad_norm": 0.39108505845069885, + "learning_rate": 2.11e-05, + "log_odds_chosen": 2.50784969329834, + "log_odds_ratio": -0.16467276215553284, + "logits/chosen": 0.8514099717140198, + "logits/rejected": 0.698555052280426, + "logps/chosen": -1.0424742698669434, + "logps/rejected": -3.1801681518554688, + "loss": 0.6768, + "nll_loss": 0.6602964401245117, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10424742847681046, + "rewards/margins": 0.21376937627792358, + "rewards/rejected": -0.31801682710647583, + "step": 578 + }, + { + "epoch": 0.36018662519440126, + "grad_norm": 0.3793575167655945, + "learning_rate": 2.105e-05, + "log_odds_chosen": 4.474118232727051, + "log_odds_ratio": -0.22063733637332916, + "logits/chosen": 1.5521801710128784, + "logits/rejected": 1.0251915454864502, + "logps/chosen": -0.9196445941925049, + "logps/rejected": -5.048022747039795, + "loss": 0.6981, + "nll_loss": 0.6760702133178711, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09196445345878601, + "rewards/margins": 0.41283780336380005, + "rewards/rejected": -0.5048022866249084, + "step": 579 + }, + { + "epoch": 0.3608087091757387, + "grad_norm": 0.3384208679199219, + "learning_rate": 2.1e-05, + "log_odds_chosen": 5.3531174659729, + "log_odds_ratio": -0.14996495842933655, + "logits/chosen": 1.6041481494903564, + "logits/rejected": 1.5773200988769531, + "logps/chosen": -1.218875765800476, + "logps/rejected": -6.06456995010376, + "loss": 0.8003, + "nll_loss": 0.7853128910064697, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12188757956027985, + "rewards/margins": 0.4845694303512573, + "rewards/rejected": -0.606456995010376, + "step": 580 + }, + { + "epoch": 0.3614307931570762, + "grad_norm": 0.4473244845867157, + "learning_rate": 2.095e-05, + "log_odds_chosen": 5.5421247482299805, + "log_odds_ratio": -0.04228505492210388, + "logits/chosen": 0.6503898501396179, + "logits/rejected": -0.6624352931976318, + "logps/chosen": -1.1132351160049438, + "logps/rejected": -6.240670204162598, + "loss": 0.6585, + "nll_loss": 0.6542383432388306, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1113235130906105, + "rewards/margins": 0.5127434730529785, + "rewards/rejected": -0.6240670084953308, + "step": 581 + }, + { + "epoch": 0.36205287713841366, + "grad_norm": 0.32590481638908386, + "learning_rate": 2.09e-05, + "log_odds_chosen": 3.7834081649780273, + "log_odds_ratio": -0.29060444235801697, + "logits/chosen": -0.28049537539482117, + "logits/rejected": -0.4547344446182251, + "logps/chosen": -1.0908867120742798, + "logps/rejected": -4.53211784362793, + "loss": 0.5202, + "nll_loss": 0.49115025997161865, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10908866673707962, + "rewards/margins": 0.34412315487861633, + "rewards/rejected": -0.45321184396743774, + "step": 582 + }, + { + "epoch": 0.36267496111975117, + "grad_norm": 0.6480214595794678, + "learning_rate": 2.085e-05, + "log_odds_chosen": 3.60688853263855, + "log_odds_ratio": -0.2324177324771881, + "logits/chosen": 0.307085782289505, + "logits/rejected": -0.46215736865997314, + "logps/chosen": -0.9965261816978455, + "logps/rejected": -4.287740707397461, + "loss": 0.6023, + "nll_loss": 0.5790976285934448, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09965262562036514, + "rewards/margins": 0.32912150025367737, + "rewards/rejected": -0.4287740886211395, + "step": 583 + }, + { + "epoch": 0.3632970451010886, + "grad_norm": 0.5480687022209167, + "learning_rate": 2.08e-05, + "log_odds_chosen": 4.9113874435424805, + "log_odds_ratio": -0.12982912361621857, + "logits/chosen": 1.9692302942276, + "logits/rejected": 0.6301515102386475, + "logps/chosen": -0.8638044595718384, + "logps/rejected": -5.2282514572143555, + "loss": 0.6303, + "nll_loss": 0.617352306842804, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08638045191764832, + "rewards/margins": 0.4364446699619293, + "rewards/rejected": -0.5228251218795776, + "step": 584 + }, + { + "epoch": 0.36391912908242613, + "grad_norm": 0.34628981351852417, + "learning_rate": 2.075e-05, + "log_odds_chosen": 3.9489634037017822, + "log_odds_ratio": -0.22906804084777832, + "logits/chosen": 0.8565244674682617, + "logits/rejected": 1.0159627199172974, + "logps/chosen": -0.9767001271247864, + "logps/rejected": -4.592409133911133, + "loss": 0.6459, + "nll_loss": 0.6229780316352844, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09767001122236252, + "rewards/margins": 0.3615708351135254, + "rewards/rejected": -0.4592408537864685, + "step": 585 + }, + { + "epoch": 0.3645412130637636, + "grad_norm": 0.2683575749397278, + "learning_rate": 2.07e-05, + "log_odds_chosen": 4.981381893157959, + "log_odds_ratio": -0.20831695199012756, + "logits/chosen": 0.9119428396224976, + "logits/rejected": 0.23448503017425537, + "logps/chosen": -1.1264652013778687, + "logps/rejected": -5.7671332359313965, + "loss": 0.6212, + "nll_loss": 0.6003206968307495, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11264652013778687, + "rewards/margins": 0.4640668034553528, + "rewards/rejected": -0.5767133831977844, + "step": 586 + }, + { + "epoch": 0.3651632970451011, + "grad_norm": 0.33639833331108093, + "learning_rate": 2.065e-05, + "log_odds_chosen": 4.010759353637695, + "log_odds_ratio": -0.2678699791431427, + "logits/chosen": 0.12189310789108276, + "logits/rejected": 0.07927525043487549, + "logps/chosen": -0.9886451363563538, + "logps/rejected": -4.671942710876465, + "loss": 0.4694, + "nll_loss": 0.44262024760246277, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09886451065540314, + "rewards/margins": 0.3683297634124756, + "rewards/rejected": -0.4671942889690399, + "step": 587 + }, + { + "epoch": 0.3657853810264386, + "grad_norm": 0.43022793531417847, + "learning_rate": 2.06e-05, + "log_odds_chosen": 2.4500086307525635, + "log_odds_ratio": -0.44840744137763977, + "logits/chosen": 2.032104730606079, + "logits/rejected": 2.6225948333740234, + "logps/chosen": -1.0749928951263428, + "logps/rejected": -3.159858226776123, + "loss": 0.7611, + "nll_loss": 0.7162356376647949, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10749930143356323, + "rewards/margins": 0.20848651230335236, + "rewards/rejected": -0.3159858286380768, + "step": 588 + }, + { + "epoch": 0.36640746500777605, + "grad_norm": 0.31727105379104614, + "learning_rate": 2.055e-05, + "log_odds_chosen": 4.297815322875977, + "log_odds_ratio": -0.17429843544960022, + "logits/chosen": 0.28740793466567993, + "logits/rejected": 0.8092455863952637, + "logps/chosen": -0.7892931699752808, + "logps/rejected": -4.623042106628418, + "loss": 0.5373, + "nll_loss": 0.5198439359664917, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07892931997776031, + "rewards/margins": 0.3833748996257782, + "rewards/rejected": -0.4623042047023773, + "step": 589 + }, + { + "epoch": 0.36702954898911355, + "grad_norm": 0.46551135182380676, + "learning_rate": 2.05e-05, + "log_odds_chosen": 3.207967519760132, + "log_odds_ratio": -0.2668222188949585, + "logits/chosen": 1.4202075004577637, + "logits/rejected": 1.1370333433151245, + "logps/chosen": -1.0534496307373047, + "logps/rejected": -3.9848947525024414, + "loss": 0.6709, + "nll_loss": 0.644168496131897, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10534496605396271, + "rewards/margins": 0.29314449429512024, + "rewards/rejected": -0.39848947525024414, + "step": 590 + }, + { + "epoch": 0.367651632970451, + "grad_norm": 2.5857887268066406, + "learning_rate": 2.045e-05, + "log_odds_chosen": 4.6939496994018555, + "log_odds_ratio": -0.1030438169836998, + "logits/chosen": 0.7330646514892578, + "logits/rejected": 0.881181001663208, + "logps/chosen": -1.2379714250564575, + "logps/rejected": -5.404970169067383, + "loss": 0.6535, + "nll_loss": 0.6432254314422607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12379714846611023, + "rewards/margins": 0.4166998565196991, + "rewards/rejected": -0.5404970049858093, + "step": 591 + }, + { + "epoch": 0.3682737169517885, + "grad_norm": 0.3276950716972351, + "learning_rate": 2.04e-05, + "log_odds_chosen": 2.8923096656799316, + "log_odds_ratio": -0.409442663192749, + "logits/chosen": 0.031567931175231934, + "logits/rejected": 0.7758247256278992, + "logps/chosen": -0.711024284362793, + "logps/rejected": -3.3380963802337646, + "loss": 0.5007, + "nll_loss": 0.45975714921951294, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07110242545604706, + "rewards/margins": 0.2627072334289551, + "rewards/rejected": -0.33380964398384094, + "step": 592 + }, + { + "epoch": 0.36889580093312596, + "grad_norm": 0.29507437348365784, + "learning_rate": 2.035e-05, + "log_odds_chosen": 4.467270851135254, + "log_odds_ratio": -0.2962123155593872, + "logits/chosen": 0.7136313915252686, + "logits/rejected": 0.936424195766449, + "logps/chosen": -0.9805070161819458, + "logps/rejected": -5.16839075088501, + "loss": 0.6137, + "nll_loss": 0.5840867757797241, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09805070608854294, + "rewards/margins": 0.4187883734703064, + "rewards/rejected": -0.5168390870094299, + "step": 593 + }, + { + "epoch": 0.36951788491446347, + "grad_norm": 0.3454137146472931, + "learning_rate": 2.0300000000000002e-05, + "log_odds_chosen": 5.492619037628174, + "log_odds_ratio": -0.05524108186364174, + "logits/chosen": 1.6836175918579102, + "logits/rejected": 0.8593066930770874, + "logps/chosen": -0.8198999166488647, + "logps/rejected": -5.759896755218506, + "loss": 0.6107, + "nll_loss": 0.6052013039588928, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08198998868465424, + "rewards/margins": 0.4939996600151062, + "rewards/rejected": -0.5759896636009216, + "step": 594 + }, + { + "epoch": 0.3701399688958009, + "grad_norm": 1.686590313911438, + "learning_rate": 2.025e-05, + "log_odds_chosen": 4.503698825836182, + "log_odds_ratio": -0.20807135105133057, + "logits/chosen": 2.4879651069641113, + "logits/rejected": 1.0714972019195557, + "logps/chosen": -1.0486226081848145, + "logps/rejected": -5.218942642211914, + "loss": 0.8232, + "nll_loss": 0.8023769855499268, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1048622578382492, + "rewards/margins": 0.41703200340270996, + "rewards/rejected": -0.5218942761421204, + "step": 595 + }, + { + "epoch": 0.3707620528771384, + "grad_norm": 2.878927230834961, + "learning_rate": 2.0200000000000003e-05, + "log_odds_chosen": 2.9781720638275146, + "log_odds_ratio": -0.39062219858169556, + "logits/chosen": 0.059173583984375, + "logits/rejected": 0.10687759518623352, + "logps/chosen": -1.6145503520965576, + "logps/rejected": -4.240401268005371, + "loss": 0.8397, + "nll_loss": 0.8006084561347961, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.16145503520965576, + "rewards/margins": 0.2625851035118103, + "rewards/rejected": -0.4240401089191437, + "step": 596 + }, + { + "epoch": 0.3713841368584759, + "grad_norm": 3.408799409866333, + "learning_rate": 2.0150000000000002e-05, + "log_odds_chosen": 4.163212299346924, + "log_odds_ratio": -0.12928536534309387, + "logits/chosen": 1.2136975526809692, + "logits/rejected": 0.2280959188938141, + "logps/chosen": -1.0870234966278076, + "logps/rejected": -4.872440814971924, + "loss": 0.7765, + "nll_loss": 0.7635830640792847, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10870234668254852, + "rewards/margins": 0.3785417675971985, + "rewards/rejected": -0.4872441291809082, + "step": 597 + }, + { + "epoch": 0.3720062208398134, + "grad_norm": 0.31707343459129333, + "learning_rate": 2.01e-05, + "log_odds_chosen": 6.093807220458984, + "log_odds_ratio": -0.01401099655777216, + "logits/chosen": 0.5640559792518616, + "logits/rejected": 0.06941845268011093, + "logps/chosen": -0.77422034740448, + "logps/rejected": -6.081796646118164, + "loss": 0.5637, + "nll_loss": 0.5623044371604919, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07742203772068024, + "rewards/margins": 0.5307576060295105, + "rewards/rejected": -0.6081796288490295, + "step": 598 + }, + { + "epoch": 0.37262830482115084, + "grad_norm": 0.2775684595108032, + "learning_rate": 2.0050000000000003e-05, + "log_odds_chosen": 2.811703681945801, + "log_odds_ratio": -0.17024891078472137, + "logits/chosen": -0.14262226223945618, + "logits/rejected": -0.5278723239898682, + "logps/chosen": -0.5868371725082397, + "logps/rejected": -2.578371524810791, + "loss": 0.523, + "nll_loss": 0.5059659481048584, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.05868372321128845, + "rewards/margins": 0.19915342330932617, + "rewards/rejected": -0.2578371465206146, + "step": 599 + }, + { + "epoch": 0.37325038880248834, + "grad_norm": 0.3168306052684784, + "learning_rate": 2e-05, + "log_odds_chosen": 4.713489532470703, + "log_odds_ratio": -0.11067051440477371, + "logits/chosen": 1.329630970954895, + "logits/rejected": 0.1642017960548401, + "logps/chosen": -0.7641226053237915, + "logps/rejected": -4.863486289978027, + "loss": 0.611, + "nll_loss": 0.5999280214309692, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07641226053237915, + "rewards/margins": 0.4099363684654236, + "rewards/rejected": -0.48634862899780273, + "step": 600 + }, + { + "epoch": 0.3738724727838258, + "grad_norm": 0.286088764667511, + "learning_rate": 1.995e-05, + "log_odds_chosen": 1.7783844470977783, + "log_odds_ratio": -0.32731106877326965, + "logits/chosen": 0.7408541440963745, + "logits/rejected": -0.08958357572555542, + "logps/chosen": -0.768362820148468, + "logps/rejected": -2.1349611282348633, + "loss": 0.6597, + "nll_loss": 0.6270171403884888, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07683628797531128, + "rewards/margins": 0.13665983080863953, + "rewards/rejected": -0.2134961038827896, + "step": 601 + }, + { + "epoch": 0.3744945567651633, + "grad_norm": 0.3127974569797516, + "learning_rate": 1.9900000000000003e-05, + "log_odds_chosen": 2.3000106811523438, + "log_odds_ratio": -0.3905388116836548, + "logits/chosen": -0.8903562426567078, + "logits/rejected": -0.00967445969581604, + "logps/chosen": -0.9396561980247498, + "logps/rejected": -3.0212459564208984, + "loss": 0.5167, + "nll_loss": 0.47764530777931213, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09396561980247498, + "rewards/margins": 0.2081589549779892, + "rewards/rejected": -0.30212458968162537, + "step": 602 + }, + { + "epoch": 0.37511664074650075, + "grad_norm": 0.415966272354126, + "learning_rate": 1.985e-05, + "log_odds_chosen": 1.083411693572998, + "log_odds_ratio": -0.5429335832595825, + "logits/chosen": -0.10423839092254639, + "logits/rejected": 2.1669833660125732, + "logps/chosen": -1.113211750984192, + "logps/rejected": -1.9851309061050415, + "loss": 0.6267, + "nll_loss": 0.5724228024482727, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11132118105888367, + "rewards/margins": 0.08719191700220108, + "rewards/rejected": -0.19851309061050415, + "step": 603 + }, + { + "epoch": 0.37573872472783826, + "grad_norm": 3.7788166999816895, + "learning_rate": 1.9800000000000004e-05, + "log_odds_chosen": 2.944392204284668, + "log_odds_ratio": -0.21253594756126404, + "logits/chosen": 1.583858847618103, + "logits/rejected": 0.9380096793174744, + "logps/chosen": -0.8593783974647522, + "logps/rejected": -3.203774929046631, + "loss": 0.8276, + "nll_loss": 0.8063594102859497, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08593783527612686, + "rewards/margins": 0.2344396710395813, + "rewards/rejected": -0.32037752866744995, + "step": 604 + }, + { + "epoch": 0.37636080870917576, + "grad_norm": 0.37738439440727234, + "learning_rate": 1.9750000000000002e-05, + "log_odds_chosen": 1.2513070106506348, + "log_odds_ratio": -0.47075262665748596, + "logits/chosen": 1.0833340883255005, + "logits/rejected": 0.25691819190979004, + "logps/chosen": -0.9458091855049133, + "logps/rejected": -2.022855043411255, + "loss": 0.721, + "nll_loss": 0.6739503145217896, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09458091855049133, + "rewards/margins": 0.10770457983016968, + "rewards/rejected": -0.2022855132818222, + "step": 605 + }, + { + "epoch": 0.3769828926905132, + "grad_norm": 0.44008681178092957, + "learning_rate": 1.97e-05, + "log_odds_chosen": 1.6499419212341309, + "log_odds_ratio": -0.28058454394340515, + "logits/chosen": 1.6621906757354736, + "logits/rejected": 0.8158775568008423, + "logps/chosen": -1.0555553436279297, + "logps/rejected": -2.435013771057129, + "loss": 0.8268, + "nll_loss": 0.798723578453064, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10555553436279297, + "rewards/margins": 0.13794584572315216, + "rewards/rejected": -0.24350138008594513, + "step": 606 + }, + { + "epoch": 0.3776049766718507, + "grad_norm": 0.43245071172714233, + "learning_rate": 1.9650000000000003e-05, + "log_odds_chosen": 0.48218634724617004, + "log_odds_ratio": -0.541495680809021, + "logits/chosen": -0.4415658116340637, + "logits/rejected": 0.35667887330055237, + "logps/chosen": -1.1343573331832886, + "logps/rejected": -1.5530166625976562, + "loss": 0.5693, + "nll_loss": 0.5151444673538208, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11343573778867722, + "rewards/margins": 0.041865941137075424, + "rewards/rejected": -0.15530166029930115, + "step": 607 + }, + { + "epoch": 0.3782270606531882, + "grad_norm": 0.49462637305259705, + "learning_rate": 1.9600000000000002e-05, + "log_odds_chosen": 1.4209492206573486, + "log_odds_ratio": -0.3335047662258148, + "logits/chosen": 0.7791264057159424, + "logits/rejected": 0.8404102325439453, + "logps/chosen": -0.7444546222686768, + "logps/rejected": -1.7486032247543335, + "loss": 0.7176, + "nll_loss": 0.6841996908187866, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0744454637169838, + "rewards/margins": 0.10041484981775284, + "rewards/rejected": -0.17486032843589783, + "step": 608 + }, + { + "epoch": 0.3788491446345257, + "grad_norm": 0.5927086472511292, + "learning_rate": 1.955e-05, + "log_odds_chosen": 1.383941411972046, + "log_odds_ratio": -0.28821372985839844, + "logits/chosen": -0.8597515821456909, + "logits/rejected": -0.6633905172348022, + "logps/chosen": -0.9070121049880981, + "logps/rejected": -1.9484810829162598, + "loss": 0.5367, + "nll_loss": 0.5078994035720825, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09070120751857758, + "rewards/margins": 0.10414689779281616, + "rewards/rejected": -0.19484810531139374, + "step": 609 + }, + { + "epoch": 0.37947122861586313, + "grad_norm": 0.32356026768684387, + "learning_rate": 1.9500000000000003e-05, + "log_odds_chosen": 3.314235210418701, + "log_odds_ratio": -0.2443312555551529, + "logits/chosen": 1.1803479194641113, + "logits/rejected": 1.4565016031265259, + "logps/chosen": -0.8451287746429443, + "logps/rejected": -3.459113597869873, + "loss": 0.7266, + "nll_loss": 0.7021805644035339, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0845128744840622, + "rewards/margins": 0.2613984942436218, + "rewards/rejected": -0.3459113836288452, + "step": 610 + }, + { + "epoch": 0.38009331259720064, + "grad_norm": 0.4172796905040741, + "learning_rate": 1.9450000000000002e-05, + "log_odds_chosen": 1.747850775718689, + "log_odds_ratio": -0.3201906681060791, + "logits/chosen": 1.09573233127594, + "logits/rejected": 1.502558946609497, + "logps/chosen": -0.7899297475814819, + "logps/rejected": -2.1978812217712402, + "loss": 0.752, + "nll_loss": 0.720018744468689, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07899297773838043, + "rewards/margins": 0.14079514145851135, + "rewards/rejected": -0.21978813409805298, + "step": 611 + }, + { + "epoch": 0.3807153965785381, + "grad_norm": 0.30057263374328613, + "learning_rate": 1.94e-05, + "log_odds_chosen": 2.8357088565826416, + "log_odds_ratio": -0.31256797909736633, + "logits/chosen": -1.0876035690307617, + "logits/rejected": -0.628466010093689, + "logps/chosen": -0.811927080154419, + "logps/rejected": -3.3203463554382324, + "loss": 0.4024, + "nll_loss": 0.3711714446544647, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08119270205497742, + "rewards/margins": 0.2508419156074524, + "rewards/rejected": -0.3320346474647522, + "step": 612 + }, + { + "epoch": 0.3813374805598756, + "grad_norm": 0.5218482613563538, + "learning_rate": 1.9350000000000003e-05, + "log_odds_chosen": 3.0079641342163086, + "log_odds_ratio": -0.14967142045497894, + "logits/chosen": 1.8589670658111572, + "logits/rejected": 1.5846030712127686, + "logps/chosen": -1.0442512035369873, + "logps/rejected": -3.6195616722106934, + "loss": 0.7302, + "nll_loss": 0.7151949405670166, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10442513227462769, + "rewards/margins": 0.2575310468673706, + "rewards/rejected": -0.3619561791419983, + "step": 613 + }, + { + "epoch": 0.38195956454121305, + "grad_norm": 0.4662970006465912, + "learning_rate": 1.93e-05, + "log_odds_chosen": 3.828094005584717, + "log_odds_ratio": -0.0793749988079071, + "logits/chosen": 1.2202463150024414, + "logits/rejected": 0.9613503217697144, + "logps/chosen": -0.9680242538452148, + "logps/rejected": -4.134416580200195, + "loss": 0.6713, + "nll_loss": 0.6633151769638062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09680242836475372, + "rewards/margins": 0.3166392147541046, + "rewards/rejected": -0.41344162821769714, + "step": 614 + }, + { + "epoch": 0.38258164852255055, + "grad_norm": 0.32331582903862, + "learning_rate": 1.925e-05, + "log_odds_chosen": 2.5637335777282715, + "log_odds_ratio": -0.2143726497888565, + "logits/chosen": 0.5455524921417236, + "logits/rejected": 0.08359070122241974, + "logps/chosen": -0.7310885190963745, + "logps/rejected": -2.6874022483825684, + "loss": 0.5022, + "nll_loss": 0.48075175285339355, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07310885190963745, + "rewards/margins": 0.19563135504722595, + "rewards/rejected": -0.2687402069568634, + "step": 615 + }, + { + "epoch": 0.383203732503888, + "grad_norm": 0.8497756719589233, + "learning_rate": 1.9200000000000003e-05, + "log_odds_chosen": 3.15248966217041, + "log_odds_ratio": -0.3250788748264313, + "logits/chosen": 2.0130248069763184, + "logits/rejected": 1.5514307022094727, + "logps/chosen": -1.110895037651062, + "logps/rejected": -3.9910812377929688, + "loss": 0.859, + "nll_loss": 0.8265219926834106, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11108951270580292, + "rewards/margins": 0.2880186140537262, + "rewards/rejected": -0.3991081118583679, + "step": 616 + }, + { + "epoch": 0.3838258164852255, + "grad_norm": 0.29061028361320496, + "learning_rate": 1.915e-05, + "log_odds_chosen": 3.13297963142395, + "log_odds_ratio": -0.1648668348789215, + "logits/chosen": 0.3452632427215576, + "logits/rejected": 0.05708187818527222, + "logps/chosen": -0.825201153755188, + "logps/rejected": -3.448404312133789, + "loss": 0.6881, + "nll_loss": 0.6715787649154663, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08252011239528656, + "rewards/margins": 0.262320339679718, + "rewards/rejected": -0.34484046697616577, + "step": 617 + }, + { + "epoch": 0.38444790046656296, + "grad_norm": 1.4165366888046265, + "learning_rate": 1.91e-05, + "log_odds_chosen": 2.9382688999176025, + "log_odds_ratio": -0.19285082817077637, + "logits/chosen": 1.947314739227295, + "logits/rejected": 1.6154723167419434, + "logps/chosen": -0.8680057525634766, + "logps/rejected": -3.3975019454956055, + "loss": 0.7199, + "nll_loss": 0.700596034526825, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08680057525634766, + "rewards/margins": 0.252949595451355, + "rewards/rejected": -0.339750200510025, + "step": 618 + }, + { + "epoch": 0.38506998444790047, + "grad_norm": 0.49961647391319275, + "learning_rate": 1.9050000000000002e-05, + "log_odds_chosen": 3.0872249603271484, + "log_odds_ratio": -0.21634311974048615, + "logits/chosen": 2.1546196937561035, + "logits/rejected": 1.6376492977142334, + "logps/chosen": -0.9351372122764587, + "logps/rejected": -3.629427433013916, + "loss": 0.7868, + "nll_loss": 0.7651870250701904, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09351371973752975, + "rewards/margins": 0.2694290280342102, + "rewards/rejected": -0.36294275522232056, + "step": 619 + }, + { + "epoch": 0.3856920684292379, + "grad_norm": 0.6055630445480347, + "learning_rate": 1.9e-05, + "log_odds_chosen": 4.04840087890625, + "log_odds_ratio": -0.11545915901660919, + "logits/chosen": 0.9643297791481018, + "logits/rejected": 0.8798283338546753, + "logps/chosen": -1.150406002998352, + "logps/rejected": -4.413238048553467, + "loss": 0.7236, + "nll_loss": 0.7120473384857178, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1150406002998352, + "rewards/margins": 0.32628321647644043, + "rewards/rejected": -0.44132378697395325, + "step": 620 + }, + { + "epoch": 0.38631415241057543, + "grad_norm": 0.40856286883354187, + "learning_rate": 1.895e-05, + "log_odds_chosen": 3.0728964805603027, + "log_odds_ratio": -0.2111356258392334, + "logits/chosen": 0.6732938885688782, + "logits/rejected": 2.3867170810699463, + "logps/chosen": -0.8632257580757141, + "logps/rejected": -3.499908208847046, + "loss": 0.5808, + "nll_loss": 0.5596716403961182, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08632257580757141, + "rewards/margins": 0.2636682391166687, + "rewards/rejected": -0.3499908149242401, + "step": 621 + }, + { + "epoch": 0.38693623639191294, + "grad_norm": 0.3176197111606598, + "learning_rate": 1.8900000000000002e-05, + "log_odds_chosen": 3.05029296875, + "log_odds_ratio": -0.26527780294418335, + "logits/chosen": 0.6581851243972778, + "logits/rejected": 0.3060731887817383, + "logps/chosen": -0.9692713618278503, + "logps/rejected": -3.717808246612549, + "loss": 0.5477, + "nll_loss": 0.5212218165397644, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09692715108394623, + "rewards/margins": 0.2748537063598633, + "rewards/rejected": -0.3717808723449707, + "step": 622 + }, + { + "epoch": 0.3875583203732504, + "grad_norm": 0.3400088846683502, + "learning_rate": 1.885e-05, + "log_odds_chosen": 4.018574237823486, + "log_odds_ratio": -0.2551661729812622, + "logits/chosen": 1.1399219036102295, + "logits/rejected": -0.18498098850250244, + "logps/chosen": -0.8985238075256348, + "logps/rejected": -4.519594192504883, + "loss": 0.6992, + "nll_loss": 0.673705518245697, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08985237777233124, + "rewards/margins": 0.3621070384979248, + "rewards/rejected": -0.45195937156677246, + "step": 623 + }, + { + "epoch": 0.3881804043545879, + "grad_norm": 0.43447425961494446, + "learning_rate": 1.88e-05, + "log_odds_chosen": 3.226219654083252, + "log_odds_ratio": -0.14265930652618408, + "logits/chosen": 0.49467071890830994, + "logits/rejected": 0.27663129568099976, + "logps/chosen": -0.838038444519043, + "logps/rejected": -3.3186306953430176, + "loss": 0.5229, + "nll_loss": 0.5086257457733154, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08380384743213654, + "rewards/margins": 0.2480592131614685, + "rewards/rejected": -0.33186307549476624, + "step": 624 + }, + { + "epoch": 0.38880248833592534, + "grad_norm": 0.3414386212825775, + "learning_rate": 1.8750000000000002e-05, + "log_odds_chosen": 3.874777317047119, + "log_odds_ratio": -0.19898560643196106, + "logits/chosen": 1.617907166481018, + "logits/rejected": 0.7998380064964294, + "logps/chosen": -1.1137770414352417, + "logps/rejected": -4.6822614669799805, + "loss": 0.6368, + "nll_loss": 0.6169208288192749, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11137770861387253, + "rewards/margins": 0.35684841871261597, + "rewards/rejected": -0.4682261347770691, + "step": 625 + }, + { + "epoch": 0.38942457231726285, + "grad_norm": 0.28912225365638733, + "learning_rate": 1.87e-05, + "log_odds_chosen": 4.579721927642822, + "log_odds_ratio": -0.09703951328992844, + "logits/chosen": 0.810788094997406, + "logits/rejected": 0.904729962348938, + "logps/chosen": -0.7196419835090637, + "logps/rejected": -4.698285102844238, + "loss": 0.5709, + "nll_loss": 0.5611502528190613, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07196419686079025, + "rewards/margins": 0.39786434173583984, + "rewards/rejected": -0.4698285460472107, + "step": 626 + }, + { + "epoch": 0.3900466562986003, + "grad_norm": 0.3980149030685425, + "learning_rate": 1.865e-05, + "log_odds_chosen": 3.308889865875244, + "log_odds_ratio": -0.1399206817150116, + "logits/chosen": 2.5614981651306152, + "logits/rejected": 0.708278238773346, + "logps/chosen": -0.9311361312866211, + "logps/rejected": -3.834683895111084, + "loss": 0.7021, + "nll_loss": 0.6881444454193115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09311362355947495, + "rewards/margins": 0.29035475850105286, + "rewards/rejected": -0.3834683895111084, + "step": 627 + }, + { + "epoch": 0.3906687402799378, + "grad_norm": 3.1673495769500732, + "learning_rate": 1.86e-05, + "log_odds_chosen": 3.9142515659332275, + "log_odds_ratio": -0.12769760191440582, + "logits/chosen": 0.7221090197563171, + "logits/rejected": 0.20109251141548157, + "logps/chosen": -1.9082034826278687, + "logps/rejected": -5.504971504211426, + "loss": 0.8577, + "nll_loss": 0.8449530601501465, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1908203661441803, + "rewards/margins": 0.3596767783164978, + "rewards/rejected": -0.5504971146583557, + "step": 628 + }, + { + "epoch": 0.39129082426127526, + "grad_norm": 0.38298946619033813, + "learning_rate": 1.855e-05, + "log_odds_chosen": 4.3971781730651855, + "log_odds_ratio": -0.14027798175811768, + "logits/chosen": 0.8268332481384277, + "logits/rejected": 2.3540313243865967, + "logps/chosen": -1.0750876665115356, + "logps/rejected": -4.887978553771973, + "loss": 0.6025, + "nll_loss": 0.5884767174720764, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10750877112150192, + "rewards/margins": 0.3812890648841858, + "rewards/rejected": -0.4887978136539459, + "step": 629 + }, + { + "epoch": 0.39191290824261277, + "grad_norm": 2.4173014163970947, + "learning_rate": 1.85e-05, + "log_odds_chosen": 3.843627452850342, + "log_odds_ratio": -0.27270084619522095, + "logits/chosen": 2.5830585956573486, + "logits/rejected": 2.2897543907165527, + "logps/chosen": -1.134068250656128, + "logps/rejected": -4.5426716804504395, + "loss": 0.817, + "nll_loss": 0.7896883487701416, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11340682208538055, + "rewards/margins": 0.3408603370189667, + "rewards/rejected": -0.45426714420318604, + "step": 630 + }, + { + "epoch": 0.3925349922239502, + "grad_norm": 0.37030836939811707, + "learning_rate": 1.845e-05, + "log_odds_chosen": 4.004006385803223, + "log_odds_ratio": -0.220392107963562, + "logits/chosen": 1.8551138639450073, + "logits/rejected": 1.160433053970337, + "logps/chosen": -0.9171284437179565, + "logps/rejected": -4.551581382751465, + "loss": 0.63, + "nll_loss": 0.6079891324043274, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0917128473520279, + "rewards/margins": 0.3634452819824219, + "rewards/rejected": -0.45515817403793335, + "step": 631 + }, + { + "epoch": 0.3931570762052877, + "grad_norm": 0.36467447876930237, + "learning_rate": 1.84e-05, + "log_odds_chosen": 2.4729325771331787, + "log_odds_ratio": -0.19554854929447174, + "logits/chosen": 1.852522373199463, + "logits/rejected": 1.3634965419769287, + "logps/chosen": -1.3775891065597534, + "logps/rejected": -3.4965128898620605, + "loss": 0.692, + "nll_loss": 0.6724200248718262, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13775891065597534, + "rewards/margins": 0.21189242601394653, + "rewards/rejected": -0.3496513068675995, + "step": 632 + }, + { + "epoch": 0.3937791601866252, + "grad_norm": 0.535660982131958, + "learning_rate": 1.8350000000000002e-05, + "log_odds_chosen": 1.2777886390686035, + "log_odds_ratio": -0.5359351634979248, + "logits/chosen": 1.1757664680480957, + "logits/rejected": 0.9977664947509766, + "logps/chosen": -1.3026446104049683, + "logps/rejected": -2.4279894828796387, + "loss": 0.6938, + "nll_loss": 0.6402309536933899, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.13026446104049683, + "rewards/margins": 0.11253447830677032, + "rewards/rejected": -0.24279895424842834, + "step": 633 + }, + { + "epoch": 0.3944012441679627, + "grad_norm": 0.40152236819267273, + "learning_rate": 1.83e-05, + "log_odds_chosen": 3.575596809387207, + "log_odds_ratio": -0.08371242880821228, + "logits/chosen": 2.7394649982452393, + "logits/rejected": 1.1751067638397217, + "logps/chosen": -0.9250873327255249, + "logps/rejected": -3.953653335571289, + "loss": 0.7159, + "nll_loss": 0.7075378894805908, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0925087258219719, + "rewards/margins": 0.3028566241264343, + "rewards/rejected": -0.3953653573989868, + "step": 634 + }, + { + "epoch": 0.39502332814930013, + "grad_norm": 0.3517719805240631, + "learning_rate": 1.825e-05, + "log_odds_chosen": 3.799075126647949, + "log_odds_ratio": -0.07084185630083084, + "logits/chosen": 0.9560929536819458, + "logits/rejected": 0.8968772888183594, + "logps/chosen": -0.9981387257575989, + "logps/rejected": -4.241707801818848, + "loss": 0.6964, + "nll_loss": 0.6893182396888733, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09981386363506317, + "rewards/margins": 0.32435691356658936, + "rewards/rejected": -0.4241707921028137, + "step": 635 + }, + { + "epoch": 0.39564541213063764, + "grad_norm": 0.31663310527801514, + "learning_rate": 1.8200000000000002e-05, + "log_odds_chosen": 4.332825183868408, + "log_odds_ratio": -0.07596045732498169, + "logits/chosen": 1.717186689376831, + "logits/rejected": 0.9638761878013611, + "logps/chosen": -0.711776614189148, + "logps/rejected": -4.22796106338501, + "loss": 0.5847, + "nll_loss": 0.5770620107650757, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0711776614189148, + "rewards/margins": 0.3516184091567993, + "rewards/rejected": -0.4227961301803589, + "step": 636 + }, + { + "epoch": 0.3962674961119751, + "grad_norm": 0.361131489276886, + "learning_rate": 1.815e-05, + "log_odds_chosen": 2.669607162475586, + "log_odds_ratio": -0.34226924180984497, + "logits/chosen": 2.9982523918151855, + "logits/rejected": 2.0027904510498047, + "logps/chosen": -0.8131262063980103, + "logps/rejected": -3.100098133087158, + "loss": 0.7247, + "nll_loss": 0.6904410719871521, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0813126266002655, + "rewards/margins": 0.22869719564914703, + "rewards/rejected": -0.31000980734825134, + "step": 637 + }, + { + "epoch": 0.3968895800933126, + "grad_norm": 0.3656984269618988, + "learning_rate": 1.81e-05, + "log_odds_chosen": 4.020977020263672, + "log_odds_ratio": -0.2539737820625305, + "logits/chosen": 2.4173789024353027, + "logits/rejected": 1.9622955322265625, + "logps/chosen": -0.8449468612670898, + "logps/rejected": -4.402913570404053, + "loss": 0.7507, + "nll_loss": 0.7252607345581055, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0844946876168251, + "rewards/margins": 0.3557966947555542, + "rewards/rejected": -0.4402914047241211, + "step": 638 + }, + { + "epoch": 0.39751166407465005, + "grad_norm": 0.9636780619621277, + "learning_rate": 1.805e-05, + "log_odds_chosen": 2.350464344024658, + "log_odds_ratio": -0.4629105031490326, + "logits/chosen": 1.6177942752838135, + "logits/rejected": 0.7681326270103455, + "logps/chosen": -1.3030866384506226, + "logps/rejected": -3.3601512908935547, + "loss": 0.7014, + "nll_loss": 0.655118465423584, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13030865788459778, + "rewards/margins": 0.20570647716522217, + "rewards/rejected": -0.33601510524749756, + "step": 639 + }, + { + "epoch": 0.39813374805598756, + "grad_norm": 0.34603214263916016, + "learning_rate": 1.8e-05, + "log_odds_chosen": 2.6793177127838135, + "log_odds_ratio": -0.2908114194869995, + "logits/chosen": 1.2206950187683105, + "logits/rejected": 0.9481405019760132, + "logps/chosen": -0.8355297446250916, + "logps/rejected": -2.9877710342407227, + "loss": 0.5837, + "nll_loss": 0.5545707941055298, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08355297893285751, + "rewards/margins": 0.21522416174411774, + "rewards/rejected": -0.29877713322639465, + "step": 640 + }, + { + "epoch": 0.39875583203732506, + "grad_norm": 0.40048831701278687, + "learning_rate": 1.795e-05, + "log_odds_chosen": 4.30087423324585, + "log_odds_ratio": -0.019221052527427673, + "logits/chosen": 3.0978634357452393, + "logits/rejected": 1.65860915184021, + "logps/chosen": -0.8779377341270447, + "logps/rejected": -4.604584693908691, + "loss": 0.7469, + "nll_loss": 0.7449491620063782, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08779378235340118, + "rewards/margins": 0.3726646900177002, + "rewards/rejected": -0.4604584574699402, + "step": 641 + }, + { + "epoch": 0.3993779160186625, + "grad_norm": 0.29520246386528015, + "learning_rate": 1.79e-05, + "log_odds_chosen": 3.081165075302124, + "log_odds_ratio": -0.14847292006015778, + "logits/chosen": 0.902208685874939, + "logits/rejected": 0.8600935339927673, + "logps/chosen": -1.0332772731781006, + "logps/rejected": -3.71449875831604, + "loss": 0.5638, + "nll_loss": 0.5489675998687744, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10332773625850677, + "rewards/margins": 0.26812219619750977, + "rewards/rejected": -0.37144988775253296, + "step": 642 + }, + { + "epoch": 0.4, + "grad_norm": 0.3169514834880829, + "learning_rate": 1.785e-05, + "log_odds_chosen": 4.029628753662109, + "log_odds_ratio": -0.07181131094694138, + "logits/chosen": 0.8769886493682861, + "logits/rejected": 0.4741711914539337, + "logps/chosen": -0.7680267095565796, + "logps/rejected": -4.1113505363464355, + "loss": 0.5882, + "nll_loss": 0.5809819102287292, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07680267095565796, + "rewards/margins": 0.3343323767185211, + "rewards/rejected": -0.4111350476741791, + "step": 643 + }, + { + "epoch": 0.4006220839813375, + "grad_norm": 0.37789186835289, + "learning_rate": 1.78e-05, + "log_odds_chosen": 5.709261417388916, + "log_odds_ratio": -0.12950600683689117, + "logits/chosen": 1.899143934249878, + "logits/rejected": 1.7838191986083984, + "logps/chosen": -0.7111093997955322, + "logps/rejected": -5.467113494873047, + "loss": 0.7045, + "nll_loss": 0.691598117351532, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07111093401908875, + "rewards/margins": 0.47560036182403564, + "rewards/rejected": -0.5467113256454468, + "step": 644 + }, + { + "epoch": 0.401244167962675, + "grad_norm": 0.311746209859848, + "learning_rate": 1.775e-05, + "log_odds_chosen": 3.939924716949463, + "log_odds_ratio": -0.24164189398288727, + "logits/chosen": 2.170393228530884, + "logits/rejected": 1.6897330284118652, + "logps/chosen": -0.8287167549133301, + "logps/rejected": -4.303630352020264, + "loss": 0.6971, + "nll_loss": 0.6729692220687866, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08287167549133301, + "rewards/margins": 0.3474913537502289, + "rewards/rejected": -0.4303630292415619, + "step": 645 + }, + { + "epoch": 0.40186625194401243, + "grad_norm": 0.30233389139175415, + "learning_rate": 1.77e-05, + "log_odds_chosen": 3.523589849472046, + "log_odds_ratio": -0.2539083957672119, + "logits/chosen": 0.255723237991333, + "logits/rejected": 0.8434269428253174, + "logps/chosen": -0.8574182391166687, + "logps/rejected": -3.7108993530273438, + "loss": 0.4845, + "nll_loss": 0.4590851068496704, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08574181795120239, + "rewards/margins": 0.285348117351532, + "rewards/rejected": -0.3710899353027344, + "step": 646 + }, + { + "epoch": 0.40248833592534994, + "grad_norm": 0.3650731146335602, + "learning_rate": 1.765e-05, + "log_odds_chosen": 1.8685635328292847, + "log_odds_ratio": -0.4129212498664856, + "logits/chosen": 1.030846118927002, + "logits/rejected": 2.034632444381714, + "logps/chosen": -0.8975147008895874, + "logps/rejected": -2.4678120613098145, + "loss": 0.6229, + "nll_loss": 0.5816302299499512, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0897514745593071, + "rewards/margins": 0.15702971816062927, + "rewards/rejected": -0.24678120017051697, + "step": 647 + }, + { + "epoch": 0.4031104199066874, + "grad_norm": 0.3364572823047638, + "learning_rate": 1.76e-05, + "log_odds_chosen": 3.6588826179504395, + "log_odds_ratio": -0.10638586431741714, + "logits/chosen": 1.2081890106201172, + "logits/rejected": 1.002839207649231, + "logps/chosen": -0.8798947930335999, + "logps/rejected": -4.0094380378723145, + "loss": 0.5447, + "nll_loss": 0.5340352058410645, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08798947930335999, + "rewards/margins": 0.312954306602478, + "rewards/rejected": -0.4009438157081604, + "step": 648 + }, + { + "epoch": 0.4037325038880249, + "grad_norm": 0.4246959984302521, + "learning_rate": 1.755e-05, + "log_odds_chosen": 5.4836812019348145, + "log_odds_ratio": -0.11485705524682999, + "logits/chosen": 1.9523580074310303, + "logits/rejected": 2.006519079208374, + "logps/chosen": -0.7004702091217041, + "logps/rejected": -5.304394245147705, + "loss": 0.7531, + "nll_loss": 0.7416150569915771, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07004702091217041, + "rewards/margins": 0.46039241552352905, + "rewards/rejected": -0.5304394364356995, + "step": 649 + }, + { + "epoch": 0.40435458786936235, + "grad_norm": 0.32653963565826416, + "learning_rate": 1.75e-05, + "log_odds_chosen": 4.108030319213867, + "log_odds_ratio": -0.10118812322616577, + "logits/chosen": 2.7480831146240234, + "logits/rejected": 1.3352696895599365, + "logps/chosen": -0.8290809392929077, + "logps/rejected": -4.4214935302734375, + "loss": 0.8378, + "nll_loss": 0.8277164697647095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08290809392929077, + "rewards/margins": 0.3592412769794464, + "rewards/rejected": -0.4421493411064148, + "step": 650 + }, + { + "epoch": 0.40497667185069985, + "grad_norm": 0.4039763808250427, + "learning_rate": 1.745e-05, + "log_odds_chosen": 3.81673264503479, + "log_odds_ratio": -0.19408871233463287, + "logits/chosen": 1.0739060640335083, + "logits/rejected": 0.9814785718917847, + "logps/chosen": -0.716781497001648, + "logps/rejected": -3.921405553817749, + "loss": 0.5342, + "nll_loss": 0.5147431492805481, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07167814671993256, + "rewards/margins": 0.3204624354839325, + "rewards/rejected": -0.39214056730270386, + "step": 651 + }, + { + "epoch": 0.4055987558320373, + "grad_norm": 0.4498097598552704, + "learning_rate": 1.74e-05, + "log_odds_chosen": 4.934605598449707, + "log_odds_ratio": -0.15549834072589874, + "logits/chosen": 3.4097490310668945, + "logits/rejected": 3.1473355293273926, + "logps/chosen": -0.8442947268486023, + "logps/rejected": -5.246354103088379, + "loss": 0.8392, + "nll_loss": 0.8236282467842102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08442947268486023, + "rewards/margins": 0.4402059018611908, + "rewards/rejected": -0.524635374546051, + "step": 652 + }, + { + "epoch": 0.4062208398133748, + "grad_norm": 0.3200288414955139, + "learning_rate": 1.7349999999999998e-05, + "log_odds_chosen": 2.3036115169525146, + "log_odds_ratio": -0.26165878772735596, + "logits/chosen": 0.7089138627052307, + "logits/rejected": 0.37762632966041565, + "logps/chosen": -0.7141548991203308, + "logps/rejected": -2.460519313812256, + "loss": 0.5317, + "nll_loss": 0.5055035352706909, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0714154914021492, + "rewards/margins": 0.17463642358779907, + "rewards/rejected": -0.24605193734169006, + "step": 653 + }, + { + "epoch": 0.40684292379471226, + "grad_norm": 0.36487001180648804, + "learning_rate": 1.73e-05, + "log_odds_chosen": 4.135533332824707, + "log_odds_ratio": -0.21677449345588684, + "logits/chosen": 0.40090829133987427, + "logits/rejected": 0.37310242652893066, + "logps/chosen": -1.2473537921905518, + "logps/rejected": -5.138498306274414, + "loss": 0.5796, + "nll_loss": 0.557910680770874, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12473537027835846, + "rewards/margins": 0.38911446928977966, + "rewards/rejected": -0.5138498544692993, + "step": 654 + }, + { + "epoch": 0.40746500777604977, + "grad_norm": 0.3971942961215973, + "learning_rate": 1.725e-05, + "log_odds_chosen": 5.033882141113281, + "log_odds_ratio": -0.09416782855987549, + "logits/chosen": 3.2653441429138184, + "logits/rejected": 1.552402377128601, + "logps/chosen": -0.847460925579071, + "logps/rejected": -5.348739147186279, + "loss": 0.8744, + "nll_loss": 0.8649776577949524, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0847460925579071, + "rewards/margins": 0.45012784004211426, + "rewards/rejected": -0.5348739624023438, + "step": 655 + }, + { + "epoch": 0.4080870917573872, + "grad_norm": 0.388738214969635, + "learning_rate": 1.7199999999999998e-05, + "log_odds_chosen": 5.930737495422363, + "log_odds_ratio": -0.033261630684137344, + "logits/chosen": 2.618645191192627, + "logits/rejected": 1.6277352571487427, + "logps/chosen": -1.0782268047332764, + "logps/rejected": -6.42779541015625, + "loss": 0.6678, + "nll_loss": 0.6645084023475647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10782268643379211, + "rewards/margins": 0.5349568128585815, + "rewards/rejected": -0.6427795886993408, + "step": 656 + }, + { + "epoch": 0.40870917573872473, + "grad_norm": 0.6455793976783752, + "learning_rate": 1.7150000000000004e-05, + "log_odds_chosen": 4.305086612701416, + "log_odds_ratio": -0.03871765732765198, + "logits/chosen": 2.1431026458740234, + "logits/rejected": -0.24009643495082855, + "logps/chosen": -0.8816466927528381, + "logps/rejected": -4.646897792816162, + "loss": 0.6774, + "nll_loss": 0.6735305786132812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08816467225551605, + "rewards/margins": 0.3765251040458679, + "rewards/rejected": -0.46468979120254517, + "step": 657 + }, + { + "epoch": 0.40933125972006223, + "grad_norm": 0.36170580983161926, + "learning_rate": 1.7100000000000002e-05, + "log_odds_chosen": 3.966200590133667, + "log_odds_ratio": -0.18316572904586792, + "logits/chosen": 2.2034897804260254, + "logits/rejected": 2.678478240966797, + "logps/chosen": -0.7432326674461365, + "logps/rejected": -4.13386869430542, + "loss": 0.6123, + "nll_loss": 0.5939409136772156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07432326674461365, + "rewards/margins": 0.3390636146068573, + "rewards/rejected": -0.41338688135147095, + "step": 658 + }, + { + "epoch": 0.4099533437013997, + "grad_norm": 0.3560597896575928, + "learning_rate": 1.705e-05, + "log_odds_chosen": 3.1310007572174072, + "log_odds_ratio": -0.12409868091344833, + "logits/chosen": 1.4124412536621094, + "logits/rejected": 0.49765628576278687, + "logps/chosen": -0.9497761726379395, + "logps/rejected": -3.5961623191833496, + "loss": 0.6037, + "nll_loss": 0.5912914872169495, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09497761726379395, + "rewards/margins": 0.26463860273361206, + "rewards/rejected": -0.359616219997406, + "step": 659 + }, + { + "epoch": 0.4105754276827372, + "grad_norm": 2.7320940494537354, + "learning_rate": 1.7000000000000003e-05, + "log_odds_chosen": 4.352439880371094, + "log_odds_ratio": -0.029573818668723106, + "logits/chosen": 0.5563382506370544, + "logits/rejected": -0.09724438190460205, + "logps/chosen": -0.9204356670379639, + "logps/rejected": -4.707180976867676, + "loss": 0.5443, + "nll_loss": 0.5413377285003662, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09204356372356415, + "rewards/margins": 0.37867453694343567, + "rewards/rejected": -0.4707180857658386, + "step": 660 + }, + { + "epoch": 0.41119751166407464, + "grad_norm": 0.5051707029342651, + "learning_rate": 1.6950000000000002e-05, + "log_odds_chosen": 3.2970833778381348, + "log_odds_ratio": -0.17525769770145416, + "logits/chosen": 1.211416244506836, + "logits/rejected": 1.6842365264892578, + "logps/chosen": -1.3437680006027222, + "logps/rejected": -4.288342475891113, + "loss": 0.7476, + "nll_loss": 0.730032205581665, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.13437680900096893, + "rewards/margins": 0.29445746541023254, + "rewards/rejected": -0.4288342595100403, + "step": 661 + }, + { + "epoch": 0.41181959564541215, + "grad_norm": 0.3857666552066803, + "learning_rate": 1.69e-05, + "log_odds_chosen": 3.65859055519104, + "log_odds_ratio": -0.2163608968257904, + "logits/chosen": 2.043635606765747, + "logits/rejected": 1.3761276006698608, + "logps/chosen": -0.7962802648544312, + "logps/rejected": -3.883711338043213, + "loss": 0.6613, + "nll_loss": 0.6396494507789612, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07962802797555923, + "rewards/margins": 0.3087431490421295, + "rewards/rejected": -0.38837113976478577, + "step": 662 + }, + { + "epoch": 0.4124416796267496, + "grad_norm": 3.1776084899902344, + "learning_rate": 1.6850000000000003e-05, + "log_odds_chosen": 2.944477081298828, + "log_odds_ratio": -0.2576942443847656, + "logits/chosen": 2.157097339630127, + "logits/rejected": 1.435225009918213, + "logps/chosen": -0.9808058738708496, + "logps/rejected": -3.4601707458496094, + "loss": 0.9127, + "nll_loss": 0.8869736194610596, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0980805903673172, + "rewards/margins": 0.24793650209903717, + "rewards/rejected": -0.34601709246635437, + "step": 663 + }, + { + "epoch": 0.4130637636080871, + "grad_norm": 0.39393720030784607, + "learning_rate": 1.6800000000000002e-05, + "log_odds_chosen": 3.360602378845215, + "log_odds_ratio": -0.3380497097969055, + "logits/chosen": 1.5823501348495483, + "logits/rejected": 1.2890987396240234, + "logps/chosen": -0.8942395448684692, + "logps/rejected": -3.9563956260681152, + "loss": 0.6122, + "nll_loss": 0.5784330368041992, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08942395448684692, + "rewards/margins": 0.3062155842781067, + "rewards/rejected": -0.395639568567276, + "step": 664 + }, + { + "epoch": 0.41368584758942456, + "grad_norm": 0.32575756311416626, + "learning_rate": 1.675e-05, + "log_odds_chosen": 6.160964012145996, + "log_odds_ratio": -0.1419294774532318, + "logits/chosen": 0.717900276184082, + "logits/rejected": 0.69581139087677, + "logps/chosen": -0.8181707262992859, + "logps/rejected": -6.384195327758789, + "loss": 0.5373, + "nll_loss": 0.5230689644813538, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08181708306074142, + "rewards/margins": 0.5566024780273438, + "rewards/rejected": -0.6384195685386658, + "step": 665 + }, + { + "epoch": 0.41430793157076207, + "grad_norm": 0.3093670606613159, + "learning_rate": 1.6700000000000003e-05, + "log_odds_chosen": 2.2433371543884277, + "log_odds_ratio": -0.32789260149002075, + "logits/chosen": 0.5118715763092041, + "logits/rejected": 0.6324558258056641, + "logps/chosen": -0.9613909721374512, + "logps/rejected": -2.8688957691192627, + "loss": 0.6066, + "nll_loss": 0.5737725496292114, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0961390882730484, + "rewards/margins": 0.19075046479701996, + "rewards/rejected": -0.28688958287239075, + "step": 666 + }, + { + "epoch": 0.4149300155520995, + "grad_norm": 0.8644431233406067, + "learning_rate": 1.665e-05, + "log_odds_chosen": 4.1721930503845215, + "log_odds_ratio": -0.17341499030590057, + "logits/chosen": 2.2626147270202637, + "logits/rejected": 1.2522985935211182, + "logps/chosen": -0.8507257103919983, + "logps/rejected": -4.383547782897949, + "loss": 0.7139, + "nll_loss": 0.696591854095459, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08507256954908371, + "rewards/margins": 0.3532821834087372, + "rewards/rejected": -0.4383547604084015, + "step": 667 + }, + { + "epoch": 0.415552099533437, + "grad_norm": 1.0994956493377686, + "learning_rate": 1.66e-05, + "log_odds_chosen": 1.5850971937179565, + "log_odds_ratio": -0.5201008915901184, + "logits/chosen": 0.19767147302627563, + "logits/rejected": -0.6920922994613647, + "logps/chosen": -1.2969383001327515, + "logps/rejected": -2.6513142585754395, + "loss": 0.5958, + "nll_loss": 0.5437737703323364, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12969382107257843, + "rewards/margins": 0.13543760776519775, + "rewards/rejected": -0.265131413936615, + "step": 668 + }, + { + "epoch": 0.4161741835147745, + "grad_norm": 0.3375812768936157, + "learning_rate": 1.6550000000000002e-05, + "log_odds_chosen": 3.9452292919158936, + "log_odds_ratio": -0.091424360871315, + "logits/chosen": 1.0888257026672363, + "logits/rejected": 0.020370274782180786, + "logps/chosen": -0.9099546074867249, + "logps/rejected": -4.351152420043945, + "loss": 0.537, + "nll_loss": 0.5278565287590027, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09099546074867249, + "rewards/margins": 0.34411975741386414, + "rewards/rejected": -0.43511518836021423, + "step": 669 + }, + { + "epoch": 0.416796267496112, + "grad_norm": 0.4032786190509796, + "learning_rate": 1.65e-05, + "log_odds_chosen": 3.414780616760254, + "log_odds_ratio": -0.18640896677970886, + "logits/chosen": 0.40261098742485046, + "logits/rejected": 0.1558481752872467, + "logps/chosen": -0.7581571936607361, + "logps/rejected": -3.518937110900879, + "loss": 0.5746, + "nll_loss": 0.5559825301170349, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07581572234630585, + "rewards/margins": 0.2760779857635498, + "rewards/rejected": -0.35189372301101685, + "step": 670 + }, + { + "epoch": 0.41741835147744943, + "grad_norm": 0.36787623167037964, + "learning_rate": 1.645e-05, + "log_odds_chosen": 4.076958656311035, + "log_odds_ratio": -0.1996491253376007, + "logits/chosen": 1.331395149230957, + "logits/rejected": 0.43952685594558716, + "logps/chosen": -0.7202173471450806, + "logps/rejected": -4.184619903564453, + "loss": 0.5901, + "nll_loss": 0.570127010345459, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0720217376947403, + "rewards/margins": 0.34644028544425964, + "rewards/rejected": -0.41846203804016113, + "step": 671 + }, + { + "epoch": 0.41804043545878694, + "grad_norm": 0.41560977697372437, + "learning_rate": 1.6400000000000002e-05, + "log_odds_chosen": 4.469395637512207, + "log_odds_ratio": -0.1743958741426468, + "logits/chosen": 1.108622431755066, + "logits/rejected": 1.4150950908660889, + "logps/chosen": -0.9114997982978821, + "logps/rejected": -4.8220343589782715, + "loss": 0.7003, + "nll_loss": 0.6828351020812988, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09114998579025269, + "rewards/margins": 0.3910534381866455, + "rewards/rejected": -0.4822034537792206, + "step": 672 + }, + { + "epoch": 0.4186625194401244, + "grad_norm": 0.2804428040981293, + "learning_rate": 1.635e-05, + "log_odds_chosen": 4.052908420562744, + "log_odds_ratio": -0.12896524369716644, + "logits/chosen": -1.030667781829834, + "logits/rejected": 0.36360377073287964, + "logps/chosen": -0.73125821352005, + "logps/rejected": -4.111758232116699, + "loss": 0.4006, + "nll_loss": 0.38765519857406616, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07312582433223724, + "rewards/margins": 0.3380500078201294, + "rewards/rejected": -0.41117581725120544, + "step": 673 + }, + { + "epoch": 0.4192846034214619, + "grad_norm": 0.37161964178085327, + "learning_rate": 1.63e-05, + "log_odds_chosen": 1.824882984161377, + "log_odds_ratio": -0.35899919271469116, + "logits/chosen": 0.13106179237365723, + "logits/rejected": 0.015201985836029053, + "logps/chosen": -0.9987883567810059, + "logps/rejected": -2.579470157623291, + "loss": 0.6135, + "nll_loss": 0.5776486396789551, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09987884014844894, + "rewards/margins": 0.1580681949853897, + "rewards/rejected": -0.25794702768325806, + "step": 674 + }, + { + "epoch": 0.4199066874027994, + "grad_norm": 0.3221202790737152, + "learning_rate": 1.6250000000000002e-05, + "log_odds_chosen": 4.007673263549805, + "log_odds_ratio": -0.1417747437953949, + "logits/chosen": 1.9651204347610474, + "logits/rejected": 1.1200003623962402, + "logps/chosen": -0.862080454826355, + "logps/rejected": -4.311037063598633, + "loss": 0.6915, + "nll_loss": 0.6773675680160522, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0862080454826355, + "rewards/margins": 0.3448956310749054, + "rewards/rejected": -0.4311037063598633, + "step": 675 + }, + { + "epoch": 0.42052877138413686, + "grad_norm": 0.4121679961681366, + "learning_rate": 1.62e-05, + "log_odds_chosen": 1.8610512018203735, + "log_odds_ratio": -0.46205413341522217, + "logits/chosen": 0.4040051996707916, + "logits/rejected": 1.0429785251617432, + "logps/chosen": -0.926953911781311, + "logps/rejected": -2.516986131668091, + "loss": 0.5862, + "nll_loss": 0.5399788618087769, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09269539266824722, + "rewards/margins": 0.15900322794914246, + "rewards/rejected": -0.2516986131668091, + "step": 676 + }, + { + "epoch": 0.42115085536547436, + "grad_norm": 0.4730220139026642, + "learning_rate": 1.6150000000000003e-05, + "log_odds_chosen": 1.888508915901184, + "log_odds_ratio": -0.6478555798530579, + "logits/chosen": 1.903955101966858, + "logits/rejected": 1.5147833824157715, + "logps/chosen": -1.0022650957107544, + "logps/rejected": -2.6973609924316406, + "loss": 0.7048, + "nll_loss": 0.6400637626647949, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.1002265140414238, + "rewards/margins": 0.16950958967208862, + "rewards/rejected": -0.26973608136177063, + "step": 677 + }, + { + "epoch": 0.4217729393468118, + "grad_norm": 0.36020079255104065, + "learning_rate": 1.6100000000000002e-05, + "log_odds_chosen": 2.8405559062957764, + "log_odds_ratio": -0.3089936077594757, + "logits/chosen": 1.1266642808914185, + "logits/rejected": 1.9728171825408936, + "logps/chosen": -0.886470377445221, + "logps/rejected": -3.355649948120117, + "loss": 0.7891, + "nll_loss": 0.7581912279129028, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0886470377445221, + "rewards/margins": 0.2469179928302765, + "rewards/rejected": -0.3355650305747986, + "step": 678 + }, + { + "epoch": 0.4223950233281493, + "grad_norm": 0.30074650049209595, + "learning_rate": 1.605e-05, + "log_odds_chosen": 3.77069354057312, + "log_odds_ratio": -0.19860349595546722, + "logits/chosen": -0.12679839134216309, + "logits/rejected": 1.3421680927276611, + "logps/chosen": -0.6535059213638306, + "logps/rejected": -3.7568020820617676, + "loss": 0.614, + "nll_loss": 0.5941286087036133, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.06535059213638306, + "rewards/margins": 0.3103296458721161, + "rewards/rejected": -0.37568023800849915, + "step": 679 + }, + { + "epoch": 0.4230171073094868, + "grad_norm": 0.3977271020412445, + "learning_rate": 1.6000000000000003e-05, + "log_odds_chosen": 2.453881025314331, + "log_odds_ratio": -0.35164380073547363, + "logits/chosen": -0.057597219944000244, + "logits/rejected": 2.3313894271850586, + "logps/chosen": -1.0195229053497314, + "logps/rejected": -3.0189602375030518, + "loss": 0.6306, + "nll_loss": 0.5954169034957886, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10195229947566986, + "rewards/margins": 0.19994372129440308, + "rewards/rejected": -0.30189603567123413, + "step": 680 + }, + { + "epoch": 0.4236391912908243, + "grad_norm": 0.3623410165309906, + "learning_rate": 1.595e-05, + "log_odds_chosen": 3.358813762664795, + "log_odds_ratio": -0.17362789809703827, + "logits/chosen": 1.127630352973938, + "logits/rejected": 0.4902253746986389, + "logps/chosen": -0.9184183478355408, + "logps/rejected": -3.821197509765625, + "loss": 0.6398, + "nll_loss": 0.6224524974822998, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09184183925390244, + "rewards/margins": 0.290277898311615, + "rewards/rejected": -0.38211971521377563, + "step": 681 + }, + { + "epoch": 0.42426127527216173, + "grad_norm": 0.41106173396110535, + "learning_rate": 1.59e-05, + "log_odds_chosen": 3.8026862144470215, + "log_odds_ratio": -0.0973445326089859, + "logits/chosen": 2.4839863777160645, + "logits/rejected": 1.665204405784607, + "logps/chosen": -0.588760256767273, + "logps/rejected": -3.5553464889526367, + "loss": 0.7788, + "nll_loss": 0.7690457105636597, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.058876026421785355, + "rewards/margins": 0.29665863513946533, + "rewards/rejected": -0.3555346429347992, + "step": 682 + }, + { + "epoch": 0.42488335925349924, + "grad_norm": 0.829839289188385, + "learning_rate": 1.5850000000000002e-05, + "log_odds_chosen": 1.962416172027588, + "log_odds_ratio": -0.36821892857551575, + "logits/chosen": 0.22051328420639038, + "logits/rejected": -0.3354063630104065, + "logps/chosen": -0.9992512464523315, + "logps/rejected": -2.68438720703125, + "loss": 0.5613, + "nll_loss": 0.5244842171669006, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09992511570453644, + "rewards/margins": 0.16851362586021423, + "rewards/rejected": -0.26843875646591187, + "step": 683 + }, + { + "epoch": 0.4255054432348367, + "grad_norm": 0.4520435333251953, + "learning_rate": 1.58e-05, + "log_odds_chosen": 3.9583230018615723, + "log_odds_ratio": -0.18740130960941315, + "logits/chosen": -0.35055220127105713, + "logits/rejected": -0.14263691008090973, + "logps/chosen": -1.0594209432601929, + "logps/rejected": -4.505016803741455, + "loss": 0.4304, + "nll_loss": 0.41164255142211914, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10594210028648376, + "rewards/margins": 0.34455960988998413, + "rewards/rejected": -0.4505016803741455, + "step": 684 + }, + { + "epoch": 0.4261275272161742, + "grad_norm": 0.3600594699382782, + "learning_rate": 1.575e-05, + "log_odds_chosen": 3.882615089416504, + "log_odds_ratio": -0.11261001229286194, + "logits/chosen": 1.8874908685684204, + "logits/rejected": 0.43688538670539856, + "logps/chosen": -0.9325780868530273, + "logps/rejected": -4.299278259277344, + "loss": 0.5315, + "nll_loss": 0.5202258825302124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09325780719518661, + "rewards/margins": 0.33667001128196716, + "rewards/rejected": -0.4299277663230896, + "step": 685 + }, + { + "epoch": 0.42674961119751165, + "grad_norm": 0.5007074475288391, + "learning_rate": 1.5700000000000002e-05, + "log_odds_chosen": 1.91114342212677, + "log_odds_ratio": -0.26344698667526245, + "logits/chosen": 1.0757803916931152, + "logits/rejected": 1.6833606958389282, + "logps/chosen": -0.8399538993835449, + "logps/rejected": -2.355059862136841, + "loss": 0.5938, + "nll_loss": 0.5674468874931335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08399539440870285, + "rewards/margins": 0.15151061117649078, + "rewards/rejected": -0.23550599813461304, + "step": 686 + }, + { + "epoch": 0.42737169517884915, + "grad_norm": 0.3760165870189667, + "learning_rate": 1.565e-05, + "log_odds_chosen": 2.5150809288024902, + "log_odds_ratio": -0.399432897567749, + "logits/chosen": 0.41969525814056396, + "logits/rejected": 0.30745965242385864, + "logps/chosen": -0.9256432056427002, + "logps/rejected": -3.213193416595459, + "loss": 0.5689, + "nll_loss": 0.528993546962738, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09256432205438614, + "rewards/margins": 0.22875504195690155, + "rewards/rejected": -0.3213193714618683, + "step": 687 + }, + { + "epoch": 0.4279937791601866, + "grad_norm": 0.39373117685317993, + "learning_rate": 1.56e-05, + "log_odds_chosen": 3.7912368774414062, + "log_odds_ratio": -0.1475895345211029, + "logits/chosen": -0.4038998782634735, + "logits/rejected": 0.29189857840538025, + "logps/chosen": -0.9779609441757202, + "logps/rejected": -4.346105575561523, + "loss": 0.4229, + "nll_loss": 0.4081049859523773, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09779609739780426, + "rewards/margins": 0.3368144929409027, + "rewards/rejected": -0.4346105754375458, + "step": 688 + }, + { + "epoch": 0.4286158631415241, + "grad_norm": 0.5293461084365845, + "learning_rate": 1.5550000000000002e-05, + "log_odds_chosen": 3.5006730556488037, + "log_odds_ratio": -0.12377304583787918, + "logits/chosen": 0.7839882969856262, + "logits/rejected": 1.3053977489471436, + "logps/chosen": -0.5710941553115845, + "logps/rejected": -3.120854377746582, + "loss": 0.5796, + "nll_loss": 0.5672070980072021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.057109422981739044, + "rewards/margins": 0.2549760341644287, + "rewards/rejected": -0.31208541989326477, + "step": 689 + }, + { + "epoch": 0.42923794712286156, + "grad_norm": 0.44740065932273865, + "learning_rate": 1.55e-05, + "log_odds_chosen": 3.0434255599975586, + "log_odds_ratio": -0.24693435430526733, + "logits/chosen": 0.18788853287696838, + "logits/rejected": 0.6065223217010498, + "logps/chosen": -0.9421164393424988, + "logps/rejected": -3.573080062866211, + "loss": 0.5578, + "nll_loss": 0.5330719351768494, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.094211645424366, + "rewards/margins": 0.26309633255004883, + "rewards/rejected": -0.35730797052383423, + "step": 690 + }, + { + "epoch": 0.42986003110419907, + "grad_norm": 0.35186102986335754, + "learning_rate": 1.545e-05, + "log_odds_chosen": 3.3561038970947266, + "log_odds_ratio": -0.2130623757839203, + "logits/chosen": 0.4123086929321289, + "logits/rejected": 1.1552950143814087, + "logps/chosen": -0.699867844581604, + "logps/rejected": -3.4619529247283936, + "loss": 0.5932, + "nll_loss": 0.5718498229980469, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06998678296804428, + "rewards/margins": 0.2762084901332855, + "rewards/rejected": -0.3461953103542328, + "step": 691 + }, + { + "epoch": 0.4304821150855365, + "grad_norm": 0.48770788311958313, + "learning_rate": 1.54e-05, + "log_odds_chosen": 3.8186118602752686, + "log_odds_ratio": -0.34374508261680603, + "logits/chosen": 0.5811923742294312, + "logits/rejected": 1.4528242349624634, + "logps/chosen": -0.7848690748214722, + "logps/rejected": -4.019944190979004, + "loss": 0.6222, + "nll_loss": 0.5878376364707947, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07848691940307617, + "rewards/margins": 0.32350754737854004, + "rewards/rejected": -0.4019944667816162, + "step": 692 + }, + { + "epoch": 0.431104199066874, + "grad_norm": 2.120375871658325, + "learning_rate": 1.535e-05, + "log_odds_chosen": 1.4963163137435913, + "log_odds_ratio": -0.4591095447540283, + "logits/chosen": 1.3761413097381592, + "logits/rejected": 2.085134506225586, + "logps/chosen": -1.22217857837677, + "logps/rejected": -2.508049249649048, + "loss": 0.8543, + "nll_loss": 0.8084296584129333, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.12221785634756088, + "rewards/margins": 0.12858706712722778, + "rewards/rejected": -0.25080496072769165, + "step": 693 + }, + { + "epoch": 0.43172628304821153, + "grad_norm": 0.38951340317726135, + "learning_rate": 1.53e-05, + "log_odds_chosen": 5.393576622009277, + "log_odds_ratio": -0.18696874380111694, + "logits/chosen": 1.783607006072998, + "logits/rejected": 0.4088016748428345, + "logps/chosen": -0.8493199348449707, + "logps/rejected": -5.712547302246094, + "loss": 0.6466, + "nll_loss": 0.627892017364502, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08493199944496155, + "rewards/margins": 0.4863227605819702, + "rewards/rejected": -0.5712547898292542, + "step": 694 + }, + { + "epoch": 0.432348367029549, + "grad_norm": 0.5150628089904785, + "learning_rate": 1.525e-05, + "log_odds_chosen": 5.354584693908691, + "log_odds_ratio": -0.12360195815563202, + "logits/chosen": 1.867887020111084, + "logits/rejected": 1.4280177354812622, + "logps/chosen": -0.9081195592880249, + "logps/rejected": -5.845778465270996, + "loss": 0.7201, + "nll_loss": 0.7077158093452454, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09081195294857025, + "rewards/margins": 0.4937658905982971, + "rewards/rejected": -0.5845778584480286, + "step": 695 + }, + { + "epoch": 0.4329704510108865, + "grad_norm": 0.2805895507335663, + "learning_rate": 1.52e-05, + "log_odds_chosen": 5.2841339111328125, + "log_odds_ratio": -0.008528401143848896, + "logits/chosen": -0.00789671391248703, + "logits/rejected": 0.17581342160701752, + "logps/chosen": -0.8426008820533752, + "logps/rejected": -5.343410491943359, + "loss": 0.4871, + "nll_loss": 0.4862731397151947, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08426009118556976, + "rewards/margins": 0.4500809907913208, + "rewards/rejected": -0.5343410968780518, + "step": 696 + }, + { + "epoch": 0.43359253499222394, + "grad_norm": 0.9375795722007751, + "learning_rate": 1.515e-05, + "log_odds_chosen": 2.055760383605957, + "log_odds_ratio": -0.3684552013874054, + "logits/chosen": 2.628263235092163, + "logits/rejected": 2.48541259765625, + "logps/chosen": -0.9256740808486938, + "logps/rejected": -2.7253613471984863, + "loss": 0.7838, + "nll_loss": 0.746984601020813, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09256740659475327, + "rewards/margins": 0.1799687147140503, + "rewards/rejected": -0.27253612875938416, + "step": 697 + }, + { + "epoch": 0.43421461897356145, + "grad_norm": 0.43897396326065063, + "learning_rate": 1.51e-05, + "log_odds_chosen": 4.525207996368408, + "log_odds_ratio": -0.1548263132572174, + "logits/chosen": 0.5639280080795288, + "logits/rejected": 0.028501048684120178, + "logps/chosen": -0.9530531167984009, + "logps/rejected": -5.0771989822387695, + "loss": 0.5532, + "nll_loss": 0.5376973152160645, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09530530869960785, + "rewards/margins": 0.4124146103858948, + "rewards/rejected": -0.5077199339866638, + "step": 698 + }, + { + "epoch": 0.4348367029548989, + "grad_norm": 0.32845211029052734, + "learning_rate": 1.505e-05, + "log_odds_chosen": 3.9304654598236084, + "log_odds_ratio": -0.1376427263021469, + "logits/chosen": 1.2447631359100342, + "logits/rejected": 0.8316696286201477, + "logps/chosen": -0.6440844535827637, + "logps/rejected": -3.911695957183838, + "loss": 0.6137, + "nll_loss": 0.5998940467834473, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06440845131874084, + "rewards/margins": 0.3267611563205719, + "rewards/rejected": -0.39116960763931274, + "step": 699 + }, + { + "epoch": 0.4354587869362364, + "grad_norm": 0.32732221484184265, + "learning_rate": 1.5e-05, + "log_odds_chosen": 4.258359909057617, + "log_odds_ratio": -0.09592651575803757, + "logits/chosen": 0.23326629400253296, + "logits/rejected": 1.4764102697372437, + "logps/chosen": -0.8468846678733826, + "logps/rejected": -4.427027225494385, + "loss": 0.5165, + "nll_loss": 0.5069240927696228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0846884697675705, + "rewards/margins": 0.358014315366745, + "rewards/rejected": -0.4427027702331543, + "step": 700 + }, + { + "epoch": 0.43608087091757386, + "grad_norm": 1.247667670249939, + "learning_rate": 1.4950000000000001e-05, + "log_odds_chosen": 4.05362606048584, + "log_odds_ratio": -0.2478107213973999, + "logits/chosen": 0.7657288312911987, + "logits/rejected": 0.623124897480011, + "logps/chosen": -1.4577549695968628, + "logps/rejected": -5.168236255645752, + "loss": 0.5482, + "nll_loss": 0.5233848690986633, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14577549695968628, + "rewards/margins": 0.37104812264442444, + "rewards/rejected": -0.5168236494064331, + "step": 701 + }, + { + "epoch": 0.43670295489891137, + "grad_norm": 0.40692177414894104, + "learning_rate": 1.49e-05, + "log_odds_chosen": 3.904751777648926, + "log_odds_ratio": -0.15392708778381348, + "logits/chosen": 1.5741872787475586, + "logits/rejected": 1.567471981048584, + "logps/chosen": -0.8535467982292175, + "logps/rejected": -4.262635707855225, + "loss": 0.6844, + "nll_loss": 0.6690041422843933, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08535467833280563, + "rewards/margins": 0.340908944606781, + "rewards/rejected": -0.42626357078552246, + "step": 702 + }, + { + "epoch": 0.4373250388802488, + "grad_norm": 0.39099812507629395, + "learning_rate": 1.485e-05, + "log_odds_chosen": 4.589461803436279, + "log_odds_ratio": -0.050541818141937256, + "logits/chosen": 1.9745683670043945, + "logits/rejected": 2.1288652420043945, + "logps/chosen": -1.3265926837921143, + "logps/rejected": -5.420770645141602, + "loss": 0.7475, + "nll_loss": 0.7424065470695496, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13265925645828247, + "rewards/margins": 0.4094178378582001, + "rewards/rejected": -0.5420770645141602, + "step": 703 + }, + { + "epoch": 0.4379471228615863, + "grad_norm": 0.3653831481933594, + "learning_rate": 1.48e-05, + "log_odds_chosen": 2.963881015777588, + "log_odds_ratio": -0.27740052342414856, + "logits/chosen": 0.6383364796638489, + "logits/rejected": 0.5264080762863159, + "logps/chosen": -0.9153085350990295, + "logps/rejected": -3.5376999378204346, + "loss": 0.6302, + "nll_loss": 0.602420449256897, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09153085201978683, + "rewards/margins": 0.26223915815353394, + "rewards/rejected": -0.35377001762390137, + "step": 704 + }, + { + "epoch": 0.4385692068429238, + "grad_norm": 4.076732635498047, + "learning_rate": 1.475e-05, + "log_odds_chosen": 5.495579242706299, + "log_odds_ratio": -0.13295693695545197, + "logits/chosen": 1.3471734523773193, + "logits/rejected": 1.2759082317352295, + "logps/chosen": -0.9006038308143616, + "logps/rejected": -5.952462196350098, + "loss": 0.6168, + "nll_loss": 0.6035174131393433, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09006038308143616, + "rewards/margins": 0.5051857829093933, + "rewards/rejected": -0.5952461957931519, + "step": 705 + }, + { + "epoch": 0.4391912908242613, + "grad_norm": 0.42146316170692444, + "learning_rate": 1.47e-05, + "log_odds_chosen": 3.6146352291107178, + "log_odds_ratio": -0.1981172412633896, + "logits/chosen": 1.6295064687728882, + "logits/rejected": 1.07228684425354, + "logps/chosen": -0.8335095643997192, + "logps/rejected": -3.8007986545562744, + "loss": 0.6058, + "nll_loss": 0.5859972238540649, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08335095643997192, + "rewards/margins": 0.2967289090156555, + "rewards/rejected": -0.38007986545562744, + "step": 706 + }, + { + "epoch": 0.43981337480559873, + "grad_norm": 0.3019298017024994, + "learning_rate": 1.465e-05, + "log_odds_chosen": 5.203786849975586, + "log_odds_ratio": -0.020779546350240707, + "logits/chosen": 1.577458381652832, + "logits/rejected": 0.5964698791503906, + "logps/chosen": -0.8217610716819763, + "logps/rejected": -5.420132637023926, + "loss": 0.5656, + "nll_loss": 0.5635250210762024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0821761041879654, + "rewards/margins": 0.4598371088504791, + "rewards/rejected": -0.5420132279396057, + "step": 707 + }, + { + "epoch": 0.44043545878693624, + "grad_norm": 1.8788961172103882, + "learning_rate": 1.4599999999999999e-05, + "log_odds_chosen": 4.655858993530273, + "log_odds_ratio": -0.05985920503735542, + "logits/chosen": 1.0745199918746948, + "logits/rejected": 1.0020854473114014, + "logps/chosen": -1.2417633533477783, + "logps/rejected": -5.395903587341309, + "loss": 0.588, + "nll_loss": 0.5820605158805847, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12417633831501007, + "rewards/margins": 0.41541406512260437, + "rewards/rejected": -0.5395903587341309, + "step": 708 + }, + { + "epoch": 0.4410575427682737, + "grad_norm": 0.3622523546218872, + "learning_rate": 1.455e-05, + "log_odds_chosen": 5.548418045043945, + "log_odds_ratio": -0.08194790780544281, + "logits/chosen": 0.051224738359451294, + "logits/rejected": -0.29287880659103394, + "logps/chosen": -0.8937790989875793, + "logps/rejected": -5.9226579666137695, + "loss": 0.3898, + "nll_loss": 0.38160476088523865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08937790989875793, + "rewards/margins": 0.5028879046440125, + "rewards/rejected": -0.5922658443450928, + "step": 709 + }, + { + "epoch": 0.4416796267496112, + "grad_norm": 0.34681272506713867, + "learning_rate": 1.45e-05, + "log_odds_chosen": 5.7191925048828125, + "log_odds_ratio": -0.07530266791582108, + "logits/chosen": 1.2113844156265259, + "logits/rejected": -0.6352530717849731, + "logps/chosen": -0.8490526676177979, + "logps/rejected": -5.936434268951416, + "loss": 0.5826, + "nll_loss": 0.5750584006309509, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08490526676177979, + "rewards/margins": 0.5087381601333618, + "rewards/rejected": -0.5936433672904968, + "step": 710 + }, + { + "epoch": 0.4423017107309487, + "grad_norm": 0.3714011311531067, + "learning_rate": 1.4449999999999999e-05, + "log_odds_chosen": 5.149613380432129, + "log_odds_ratio": -0.008057689294219017, + "logits/chosen": 1.679274559020996, + "logits/rejected": -0.04641413688659668, + "logps/chosen": -0.9280933141708374, + "logps/rejected": -5.554920196533203, + "loss": 0.6515, + "nll_loss": 0.6507111191749573, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09280932694673538, + "rewards/margins": 0.46268269419670105, + "rewards/rejected": -0.5554920434951782, + "step": 711 + }, + { + "epoch": 0.44292379471228616, + "grad_norm": 0.30056387186050415, + "learning_rate": 1.44e-05, + "log_odds_chosen": 4.624875545501709, + "log_odds_ratio": -0.06813670694828033, + "logits/chosen": 2.250699281692505, + "logits/rejected": 2.0250535011291504, + "logps/chosen": -0.6873379945755005, + "logps/rejected": -4.537752628326416, + "loss": 0.7692, + "nll_loss": 0.7623446583747864, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06873380392789841, + "rewards/margins": 0.3850414752960205, + "rewards/rejected": -0.4537752866744995, + "step": 712 + }, + { + "epoch": 0.44354587869362366, + "grad_norm": 0.33198800683021545, + "learning_rate": 1.435e-05, + "log_odds_chosen": 5.732954025268555, + "log_odds_ratio": -0.033084020018577576, + "logits/chosen": -0.4894905686378479, + "logits/rejected": -0.5070227980613708, + "logps/chosen": -1.1875081062316895, + "logps/rejected": -6.428521156311035, + "loss": 0.425, + "nll_loss": 0.42172831296920776, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11875081807374954, + "rewards/margins": 0.5241013765335083, + "rewards/rejected": -0.6428521871566772, + "step": 713 + }, + { + "epoch": 0.4441679626749611, + "grad_norm": 0.34545430541038513, + "learning_rate": 1.43e-05, + "log_odds_chosen": 4.477220058441162, + "log_odds_ratio": -0.05571537837386131, + "logits/chosen": 0.9135289788246155, + "logits/rejected": 0.4658157229423523, + "logps/chosen": -0.9963458776473999, + "logps/rejected": -4.778862953186035, + "loss": 0.5084, + "nll_loss": 0.5028534531593323, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09963459521532059, + "rewards/margins": 0.3782517611980438, + "rewards/rejected": -0.4778863489627838, + "step": 714 + }, + { + "epoch": 0.4447900466562986, + "grad_norm": 0.31467583775520325, + "learning_rate": 1.4249999999999999e-05, + "log_odds_chosen": 3.738637924194336, + "log_odds_ratio": -0.11523061245679855, + "logits/chosen": 0.8655556440353394, + "logits/rejected": 0.12430325150489807, + "logps/chosen": -0.8071171641349792, + "logps/rejected": -4.038856506347656, + "loss": 0.5292, + "nll_loss": 0.517656147480011, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0807117149233818, + "rewards/margins": 0.3231739401817322, + "rewards/rejected": -0.4038856327533722, + "step": 715 + }, + { + "epoch": 0.4454121306376361, + "grad_norm": 3.6782705783843994, + "learning_rate": 1.42e-05, + "log_odds_chosen": 4.408926010131836, + "log_odds_ratio": -0.053505804389715195, + "logits/chosen": 1.5419108867645264, + "logits/rejected": 0.1588946431875229, + "logps/chosen": -0.9761942625045776, + "logps/rejected": -4.855075359344482, + "loss": 0.5731, + "nll_loss": 0.5677310228347778, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09761942923069, + "rewards/margins": 0.3878880739212036, + "rewards/rejected": -0.4855075478553772, + "step": 716 + }, + { + "epoch": 0.4460342146189736, + "grad_norm": 0.3332521319389343, + "learning_rate": 1.415e-05, + "log_odds_chosen": 5.683034896850586, + "log_odds_ratio": -0.03364837169647217, + "logits/chosen": 0.9579274654388428, + "logits/rejected": 0.7839161157608032, + "logps/chosen": -1.0205984115600586, + "logps/rejected": -5.810733795166016, + "loss": 0.6103, + "nll_loss": 0.6069284081459045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10205984115600586, + "rewards/margins": 0.4790135324001312, + "rewards/rejected": -0.5810733437538147, + "step": 717 + }, + { + "epoch": 0.44665629860031103, + "grad_norm": 0.45680540800094604, + "learning_rate": 1.4099999999999999e-05, + "log_odds_chosen": 2.5157294273376465, + "log_odds_ratio": -0.25136426091194153, + "logits/chosen": 1.3738585710525513, + "logits/rejected": 1.645888328552246, + "logps/chosen": -0.8006247282028198, + "logps/rejected": -2.843050479888916, + "loss": 0.6798, + "nll_loss": 0.6546663045883179, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08006247133016586, + "rewards/margins": 0.20424258708953857, + "rewards/rejected": -0.28430506587028503, + "step": 718 + }, + { + "epoch": 0.44727838258164854, + "grad_norm": 0.6165868043899536, + "learning_rate": 1.4050000000000003e-05, + "log_odds_chosen": 5.895066261291504, + "log_odds_ratio": -0.00467541953548789, + "logits/chosen": 1.104529619216919, + "logits/rejected": 0.7458992004394531, + "logps/chosen": -0.8854390382766724, + "logps/rejected": -6.143112659454346, + "loss": 0.6325, + "nll_loss": 0.6319921612739563, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08854390680789948, + "rewards/margins": 0.5257673263549805, + "rewards/rejected": -0.6143112778663635, + "step": 719 + }, + { + "epoch": 0.447900466562986, + "grad_norm": 0.5500882267951965, + "learning_rate": 1.4000000000000001e-05, + "log_odds_chosen": 3.6151845455169678, + "log_odds_ratio": -0.15118198096752167, + "logits/chosen": 2.462423086166382, + "logits/rejected": 1.7982007265090942, + "logps/chosen": -1.5363001823425293, + "logps/rejected": -4.657369613647461, + "loss": 0.8473, + "nll_loss": 0.8321783542633057, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.15363001823425293, + "rewards/margins": 0.3121069371700287, + "rewards/rejected": -0.4657369554042816, + "step": 720 + }, + { + "epoch": 0.4485225505443235, + "grad_norm": 0.7661917805671692, + "learning_rate": 1.3950000000000002e-05, + "log_odds_chosen": 2.443437099456787, + "log_odds_ratio": -0.3714645504951477, + "logits/chosen": 1.7393274307250977, + "logits/rejected": 0.9712520837783813, + "logps/chosen": -1.2769322395324707, + "logps/rejected": -3.5214457511901855, + "loss": 0.851, + "nll_loss": 0.8138446807861328, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12769320607185364, + "rewards/margins": 0.22445137798786163, + "rewards/rejected": -0.35214459896087646, + "step": 721 + }, + { + "epoch": 0.44914463452566095, + "grad_norm": 0.3548874258995056, + "learning_rate": 1.3900000000000002e-05, + "log_odds_chosen": 3.8431100845336914, + "log_odds_ratio": -0.2611660361289978, + "logits/chosen": 1.9599686861038208, + "logits/rejected": 1.1322951316833496, + "logps/chosen": -0.877863347530365, + "logps/rejected": -4.278283596038818, + "loss": 0.7638, + "nll_loss": 0.7376934289932251, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08778633922338486, + "rewards/margins": 0.3400420546531677, + "rewards/rejected": -0.4278283715248108, + "step": 722 + }, + { + "epoch": 0.44976671850699845, + "grad_norm": 0.42921215295791626, + "learning_rate": 1.3850000000000001e-05, + "log_odds_chosen": 3.950148820877075, + "log_odds_ratio": -0.16853103041648865, + "logits/chosen": 2.9101808071136475, + "logits/rejected": 2.3471872806549072, + "logps/chosen": -0.8549723625183105, + "logps/rejected": -4.339173316955566, + "loss": 0.7838, + "nll_loss": 0.7669782638549805, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08549723774194717, + "rewards/margins": 0.3484201431274414, + "rewards/rejected": -0.433917373418808, + "step": 723 + }, + { + "epoch": 0.4503888024883359, + "grad_norm": 0.3380347192287445, + "learning_rate": 1.3800000000000002e-05, + "log_odds_chosen": 4.601840972900391, + "log_odds_ratio": -0.11074228584766388, + "logits/chosen": 2.156388282775879, + "logits/rejected": 1.037245273590088, + "logps/chosen": -0.9112977981567383, + "logps/rejected": -5.017061233520508, + "loss": 0.7269, + "nll_loss": 0.7158026695251465, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09112977981567383, + "rewards/margins": 0.41057640314102173, + "rewards/rejected": -0.5017061829566956, + "step": 724 + }, + { + "epoch": 0.4510108864696734, + "grad_norm": 0.38814446330070496, + "learning_rate": 1.3750000000000002e-05, + "log_odds_chosen": 3.3909547328948975, + "log_odds_ratio": -0.21502827107906342, + "logits/chosen": 1.6492749452590942, + "logits/rejected": 0.4639023542404175, + "logps/chosen": -1.0277436971664429, + "logps/rejected": -4.112637519836426, + "loss": 0.6731, + "nll_loss": 0.6515559554100037, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10277437418699265, + "rewards/margins": 0.3084893822669983, + "rewards/rejected": -0.41126376390457153, + "step": 725 + }, + { + "epoch": 0.45163297045101086, + "grad_norm": 2.843536138534546, + "learning_rate": 1.3700000000000001e-05, + "log_odds_chosen": 4.456535816192627, + "log_odds_ratio": -0.08549236506223679, + "logits/chosen": 0.24373212456703186, + "logits/rejected": 0.3230777084827423, + "logps/chosen": -0.9399199485778809, + "logps/rejected": -4.7474799156188965, + "loss": 0.5621, + "nll_loss": 0.5535575747489929, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09399199485778809, + "rewards/margins": 0.3807560205459595, + "rewards/rejected": -0.47474801540374756, + "step": 726 + }, + { + "epoch": 0.45225505443234837, + "grad_norm": 0.967526912689209, + "learning_rate": 1.3650000000000001e-05, + "log_odds_chosen": 2.7426035404205322, + "log_odds_ratio": -0.29068922996520996, + "logits/chosen": 2.4513349533081055, + "logits/rejected": 1.7992994785308838, + "logps/chosen": -0.7617477774620056, + "logps/rejected": -3.0559778213500977, + "loss": 0.7583, + "nll_loss": 0.7292664051055908, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0761747807264328, + "rewards/margins": 0.22942303121089935, + "rewards/rejected": -0.30559781193733215, + "step": 727 + }, + { + "epoch": 0.4528771384136858, + "grad_norm": 2.641636610031128, + "learning_rate": 1.3600000000000002e-05, + "log_odds_chosen": 3.6268856525421143, + "log_odds_ratio": -0.170202374458313, + "logits/chosen": 0.4291355013847351, + "logits/rejected": -0.2707882225513458, + "logps/chosen": -1.1543688774108887, + "logps/rejected": -4.4368486404418945, + "loss": 0.6416, + "nll_loss": 0.6245691776275635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11543689668178558, + "rewards/margins": 0.32824793457984924, + "rewards/rejected": -0.443684846162796, + "step": 728 + }, + { + "epoch": 0.4534992223950233, + "grad_norm": 0.3928133547306061, + "learning_rate": 1.3550000000000002e-05, + "log_odds_chosen": 3.8950159549713135, + "log_odds_ratio": -0.15199552476406097, + "logits/chosen": 1.5332711935043335, + "logits/rejected": 0.1663268804550171, + "logps/chosen": -0.902508020401001, + "logps/rejected": -4.069859981536865, + "loss": 0.6854, + "nll_loss": 0.6701560616493225, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09025079011917114, + "rewards/margins": 0.3167352080345154, + "rewards/rejected": -0.4069859981536865, + "step": 729 + }, + { + "epoch": 0.45412130637636083, + "grad_norm": 0.30603450536727905, + "learning_rate": 1.3500000000000001e-05, + "log_odds_chosen": 4.912703514099121, + "log_odds_ratio": -0.0980292409658432, + "logits/chosen": 0.3904115855693817, + "logits/rejected": -0.14418435096740723, + "logps/chosen": -0.9671989679336548, + "logps/rejected": -5.454221725463867, + "loss": 0.4348, + "nll_loss": 0.4250153601169586, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0967198982834816, + "rewards/margins": 0.44870230555534363, + "rewards/rejected": -0.5454221963882446, + "step": 730 + }, + { + "epoch": 0.4547433903576983, + "grad_norm": 2.858635902404785, + "learning_rate": 1.3450000000000002e-05, + "log_odds_chosen": 3.114245891571045, + "log_odds_ratio": -0.20222879946231842, + "logits/chosen": -0.15233629941940308, + "logits/rejected": 0.6302044987678528, + "logps/chosen": -1.0312150716781616, + "logps/rejected": -3.7412331104278564, + "loss": 0.474, + "nll_loss": 0.4537477493286133, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10312151163816452, + "rewards/margins": 0.27100181579589844, + "rewards/rejected": -0.37412333488464355, + "step": 731 + }, + { + "epoch": 0.4553654743390358, + "grad_norm": 0.31833547353744507, + "learning_rate": 1.3400000000000002e-05, + "log_odds_chosen": 4.866644859313965, + "log_odds_ratio": -0.10650589317083359, + "logits/chosen": -0.30437642335891724, + "logits/rejected": 0.5934653878211975, + "logps/chosen": -0.6947398781776428, + "logps/rejected": -4.713142395019531, + "loss": 0.4473, + "nll_loss": 0.436599999666214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.069473996758461, + "rewards/margins": 0.4018402099609375, + "rewards/rejected": -0.4713142216205597, + "step": 732 + }, + { + "epoch": 0.45598755832037324, + "grad_norm": 0.37434032559394836, + "learning_rate": 1.3350000000000001e-05, + "log_odds_chosen": 4.45645809173584, + "log_odds_ratio": -0.12092647701501846, + "logits/chosen": 0.8990957736968994, + "logits/rejected": 0.6855148077011108, + "logps/chosen": -0.7242429256439209, + "logps/rejected": -4.579753398895264, + "loss": 0.5855, + "nll_loss": 0.5733587741851807, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07242429256439209, + "rewards/margins": 0.3855510354042053, + "rewards/rejected": -0.4579753577709198, + "step": 733 + }, + { + "epoch": 0.45660964230171075, + "grad_norm": 0.5148495435714722, + "learning_rate": 1.3300000000000001e-05, + "log_odds_chosen": 3.205353021621704, + "log_odds_ratio": -0.24751462042331696, + "logits/chosen": 1.291861653327942, + "logits/rejected": 2.1259381771087646, + "logps/chosen": -1.0250825881958008, + "logps/rejected": -3.7852745056152344, + "loss": 0.6724, + "nll_loss": 0.6476327776908875, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10250826179981232, + "rewards/margins": 0.27601921558380127, + "rewards/rejected": -0.3785274624824524, + "step": 734 + }, + { + "epoch": 0.4572317262830482, + "grad_norm": 0.4234316051006317, + "learning_rate": 1.3250000000000002e-05, + "log_odds_chosen": 4.532406806945801, + "log_odds_ratio": -0.10275155305862427, + "logits/chosen": 1.3938021659851074, + "logits/rejected": 1.6723461151123047, + "logps/chosen": -0.6356872916221619, + "logps/rejected": -4.270766735076904, + "loss": 0.7704, + "nll_loss": 0.760124146938324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06356872618198395, + "rewards/margins": 0.3635079562664032, + "rewards/rejected": -0.42707663774490356, + "step": 735 + }, + { + "epoch": 0.4578538102643857, + "grad_norm": 0.5846631526947021, + "learning_rate": 1.32e-05, + "log_odds_chosen": 3.4026882648468018, + "log_odds_ratio": -0.30543139576911926, + "logits/chosen": 0.7343127131462097, + "logits/rejected": 0.6888481378555298, + "logps/chosen": -1.0733798742294312, + "logps/rejected": -4.219671249389648, + "loss": 0.5944, + "nll_loss": 0.5638964176177979, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10733799636363983, + "rewards/margins": 0.31462913751602173, + "rewards/rejected": -0.42196714878082275, + "step": 736 + }, + { + "epoch": 0.45847589424572316, + "grad_norm": 0.4446645975112915, + "learning_rate": 1.3150000000000001e-05, + "log_odds_chosen": 4.259887218475342, + "log_odds_ratio": -0.12168325483798981, + "logits/chosen": 2.147245407104492, + "logits/rejected": 1.1088201999664307, + "logps/chosen": -0.8293672204017639, + "logps/rejected": -4.540325164794922, + "loss": 0.6804, + "nll_loss": 0.6682307720184326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08293672651052475, + "rewards/margins": 0.37109577655792236, + "rewards/rejected": -0.4540324807167053, + "step": 737 + }, + { + "epoch": 0.45909797822706067, + "grad_norm": 0.5487768650054932, + "learning_rate": 1.3100000000000002e-05, + "log_odds_chosen": 4.622917175292969, + "log_odds_ratio": -0.19834764301776886, + "logits/chosen": -0.7730895280838013, + "logits/rejected": 0.3794826865196228, + "logps/chosen": -1.0225191116333008, + "logps/rejected": -5.2524261474609375, + "loss": 0.3588, + "nll_loss": 0.33894115686416626, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10225191712379456, + "rewards/margins": 0.42299070954322815, + "rewards/rejected": -0.5252426266670227, + "step": 738 + }, + { + "epoch": 0.4597200622083981, + "grad_norm": 0.3344896733760834, + "learning_rate": 1.305e-05, + "log_odds_chosen": 2.9035658836364746, + "log_odds_ratio": -0.22372430562973022, + "logits/chosen": 0.3367067575454712, + "logits/rejected": 0.7152668237686157, + "logps/chosen": -0.7381393313407898, + "logps/rejected": -3.179778575897217, + "loss": 0.6071, + "nll_loss": 0.5847198963165283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07381393760442734, + "rewards/margins": 0.24416393041610718, + "rewards/rejected": -0.3179778456687927, + "step": 739 + }, + { + "epoch": 0.4603421461897356, + "grad_norm": 0.5565928220748901, + "learning_rate": 1.3000000000000001e-05, + "log_odds_chosen": 2.2139623165130615, + "log_odds_ratio": -0.3558502793312073, + "logits/chosen": 1.9886784553527832, + "logits/rejected": 1.4688613414764404, + "logps/chosen": -0.8802056312561035, + "logps/rejected": -2.744699001312256, + "loss": 0.758, + "nll_loss": 0.722421407699585, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08802057057619095, + "rewards/margins": 0.18644936382770538, + "rewards/rejected": -0.27446994185447693, + "step": 740 + }, + { + "epoch": 0.4609642301710731, + "grad_norm": 0.3876553475856781, + "learning_rate": 1.2950000000000001e-05, + "log_odds_chosen": 4.107831001281738, + "log_odds_ratio": -0.07306887209415436, + "logits/chosen": -0.357515811920166, + "logits/rejected": 0.5429799556732178, + "logps/chosen": -0.9162774682044983, + "logps/rejected": -4.474459171295166, + "loss": 0.4091, + "nll_loss": 0.40178635716438293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09162774682044983, + "rewards/margins": 0.3558181822299957, + "rewards/rejected": -0.44744592905044556, + "step": 741 + }, + { + "epoch": 0.4615863141524106, + "grad_norm": 0.3235586881637573, + "learning_rate": 1.29e-05, + "log_odds_chosen": 2.25211238861084, + "log_odds_ratio": -0.3039625883102417, + "logits/chosen": 0.7069388628005981, + "logits/rejected": 2.00510835647583, + "logps/chosen": -0.753617525100708, + "logps/rejected": -2.387873649597168, + "loss": 0.685, + "nll_loss": 0.6546491384506226, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07536175847053528, + "rewards/margins": 0.16342565417289734, + "rewards/rejected": -0.23878741264343262, + "step": 742 + }, + { + "epoch": 0.46220839813374803, + "grad_norm": 0.367152601480484, + "learning_rate": 1.285e-05, + "log_odds_chosen": 4.8678741455078125, + "log_odds_ratio": -0.13750433921813965, + "logits/chosen": 1.9285204410552979, + "logits/rejected": 2.173274278640747, + "logps/chosen": -0.8739117383956909, + "logps/rejected": -5.209456443786621, + "loss": 0.6599, + "nll_loss": 0.6461677551269531, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08739117532968521, + "rewards/margins": 0.433554470539093, + "rewards/rejected": -0.52094566822052, + "step": 743 + }, + { + "epoch": 0.46283048211508554, + "grad_norm": 0.6167296767234802, + "learning_rate": 1.2800000000000001e-05, + "log_odds_chosen": 4.1003737449646, + "log_odds_ratio": -0.2411804348230362, + "logits/chosen": 0.8901242613792419, + "logits/rejected": 0.6706501841545105, + "logps/chosen": -0.9060766696929932, + "logps/rejected": -4.523157119750977, + "loss": 0.6385, + "nll_loss": 0.6144038438796997, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0906076729297638, + "rewards/margins": 0.36170801520347595, + "rewards/rejected": -0.45231568813323975, + "step": 744 + }, + { + "epoch": 0.463452566096423, + "grad_norm": 0.4313476085662842, + "learning_rate": 1.2750000000000002e-05, + "log_odds_chosen": 5.386116981506348, + "log_odds_ratio": -0.24166685342788696, + "logits/chosen": 1.0413144826889038, + "logits/rejected": 0.93155837059021, + "logps/chosen": -0.8907713890075684, + "logps/rejected": -5.863996982574463, + "loss": 0.6041, + "nll_loss": 0.5798915028572083, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08907714486122131, + "rewards/margins": 0.49732255935668945, + "rewards/rejected": -0.5863996744155884, + "step": 745 + }, + { + "epoch": 0.4640746500777605, + "grad_norm": 0.2939267158508301, + "learning_rate": 1.27e-05, + "log_odds_chosen": 3.964977264404297, + "log_odds_ratio": -0.10894638299942017, + "logits/chosen": -0.354805588722229, + "logits/rejected": 0.003582596778869629, + "logps/chosen": -1.0284769535064697, + "logps/rejected": -4.510259628295898, + "loss": 0.4564, + "nll_loss": 0.44550204277038574, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10284771025180817, + "rewards/margins": 0.34817826747894287, + "rewards/rejected": -0.45102596282958984, + "step": 746 + }, + { + "epoch": 0.464696734059098, + "grad_norm": 0.34394949674606323, + "learning_rate": 1.2650000000000001e-05, + "log_odds_chosen": 4.815141677856445, + "log_odds_ratio": -0.11680752038955688, + "logits/chosen": 1.5240862369537354, + "logits/rejected": 1.7166731357574463, + "logps/chosen": -0.6423882246017456, + "logps/rejected": -4.7596049308776855, + "loss": 0.6477, + "nll_loss": 0.6360157132148743, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06423883140087128, + "rewards/margins": 0.41172167658805847, + "rewards/rejected": -0.47596052289009094, + "step": 747 + }, + { + "epoch": 0.46531881804043546, + "grad_norm": 0.3430861830711365, + "learning_rate": 1.2600000000000001e-05, + "log_odds_chosen": 3.8095812797546387, + "log_odds_ratio": -0.20983168482780457, + "logits/chosen": -0.6760037541389465, + "logits/rejected": -0.1070479154586792, + "logps/chosen": -0.6517633199691772, + "logps/rejected": -3.8257155418395996, + "loss": 0.5232, + "nll_loss": 0.5022388696670532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0651763305068016, + "rewards/margins": 0.3173952102661133, + "rewards/rejected": -0.38257157802581787, + "step": 748 + }, + { + "epoch": 0.46594090202177296, + "grad_norm": 0.3617217242717743, + "learning_rate": 1.255e-05, + "log_odds_chosen": 3.6365814208984375, + "log_odds_ratio": -0.1220984011888504, + "logits/chosen": 2.1960654258728027, + "logits/rejected": 1.0074217319488525, + "logps/chosen": -0.78511643409729, + "logps/rejected": -3.9075863361358643, + "loss": 0.6951, + "nll_loss": 0.6828843355178833, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07851164042949677, + "rewards/margins": 0.31224697828292847, + "rewards/rejected": -0.3907586336135864, + "step": 749 + }, + { + "epoch": 0.4665629860031104, + "grad_norm": 0.5665165781974792, + "learning_rate": 1.25e-05, + "log_odds_chosen": 2.4751880168914795, + "log_odds_ratio": -0.3891245722770691, + "logits/chosen": 1.7859808206558228, + "logits/rejected": 0.44065773487091064, + "logps/chosen": -0.7846461534500122, + "logps/rejected": -2.9159486293792725, + "loss": 0.6791, + "nll_loss": 0.640195369720459, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07846461236476898, + "rewards/margins": 0.21313023567199707, + "rewards/rejected": -0.29159486293792725, + "step": 750 + }, + { + "epoch": 0.4671850699844479, + "grad_norm": 1.5871644020080566, + "learning_rate": 1.2450000000000001e-05, + "log_odds_chosen": 4.6841936111450195, + "log_odds_ratio": -0.0442461296916008, + "logits/chosen": 2.642913579940796, + "logits/rejected": 1.204238772392273, + "logps/chosen": -1.1520380973815918, + "logps/rejected": -5.388706207275391, + "loss": 0.802, + "nll_loss": 0.7975590825080872, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11520381271839142, + "rewards/margins": 0.4236668646335602, + "rewards/rejected": -0.5388706922531128, + "step": 751 + }, + { + "epoch": 0.46780715396578537, + "grad_norm": 0.423178493976593, + "learning_rate": 1.24e-05, + "log_odds_chosen": 3.278820514678955, + "log_odds_ratio": -0.3369329571723938, + "logits/chosen": 2.5465831756591797, + "logits/rejected": 2.608609676361084, + "logps/chosen": -0.9291982650756836, + "logps/rejected": -3.932260751724243, + "loss": 0.8509, + "nll_loss": 0.817159116268158, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09291982650756836, + "rewards/margins": 0.3003062605857849, + "rewards/rejected": -0.39322608709335327, + "step": 752 + }, + { + "epoch": 0.4684292379471229, + "grad_norm": 0.2830430269241333, + "learning_rate": 1.235e-05, + "log_odds_chosen": 3.9936811923980713, + "log_odds_ratio": -0.26642856001853943, + "logits/chosen": 0.5490342378616333, + "logits/rejected": 1.9696109294891357, + "logps/chosen": -1.0759899616241455, + "logps/rejected": -4.762292861938477, + "loss": 0.6052, + "nll_loss": 0.5785794258117676, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10759899765253067, + "rewards/margins": 0.3686302602291107, + "rewards/rejected": -0.4762292504310608, + "step": 753 + }, + { + "epoch": 0.46905132192846033, + "grad_norm": 0.37960949540138245, + "learning_rate": 1.23e-05, + "log_odds_chosen": 4.016284942626953, + "log_odds_ratio": -0.3642813265323639, + "logits/chosen": 0.7298337817192078, + "logits/rejected": 1.9862333536148071, + "logps/chosen": -0.7522934079170227, + "logps/rejected": -4.1859636306762695, + "loss": 0.5958, + "nll_loss": 0.559417724609375, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07522933930158615, + "rewards/margins": 0.3433670401573181, + "rewards/rejected": -0.41859638690948486, + "step": 754 + }, + { + "epoch": 0.46967340590979784, + "grad_norm": 0.3662182688713074, + "learning_rate": 1.225e-05, + "log_odds_chosen": 6.478982448577881, + "log_odds_ratio": -0.08034564554691315, + "logits/chosen": 1.0255630016326904, + "logits/rejected": 0.4613604247570038, + "logps/chosen": -0.8740299344062805, + "logps/rejected": -6.873804092407227, + "loss": 0.6471, + "nll_loss": 0.6390318870544434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08740299940109253, + "rewards/margins": 0.5999773740768433, + "rewards/rejected": -0.6873804330825806, + "step": 755 + }, + { + "epoch": 0.4702954898911353, + "grad_norm": 1.2985732555389404, + "learning_rate": 1.22e-05, + "log_odds_chosen": 3.5273189544677734, + "log_odds_ratio": -0.33595922589302063, + "logits/chosen": -0.20280686020851135, + "logits/rejected": 1.2919225692749023, + "logps/chosen": -1.2622438669204712, + "logps/rejected": -4.5340962409973145, + "loss": 0.5605, + "nll_loss": 0.5268831849098206, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.12622438371181488, + "rewards/margins": 0.3271852433681488, + "rewards/rejected": -0.4534096419811249, + "step": 756 + }, + { + "epoch": 0.4709175738724728, + "grad_norm": 2.0065884590148926, + "learning_rate": 1.215e-05, + "log_odds_chosen": 3.059215784072876, + "log_odds_ratio": -0.27708762884140015, + "logits/chosen": 0.45762962102890015, + "logits/rejected": 0.717374324798584, + "logps/chosen": -1.0753413438796997, + "logps/rejected": -3.8107690811157227, + "loss": 0.6642, + "nll_loss": 0.6364901065826416, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10753414034843445, + "rewards/margins": 0.27354276180267334, + "rewards/rejected": -0.3810769021511078, + "step": 757 + }, + { + "epoch": 0.47153965785381025, + "grad_norm": 5.8776164054870605, + "learning_rate": 1.2100000000000001e-05, + "log_odds_chosen": 4.531339168548584, + "log_odds_ratio": -0.12009022384881973, + "logits/chosen": 0.6293901801109314, + "logits/rejected": 0.4070852994918823, + "logps/chosen": -1.4273791313171387, + "logps/rejected": -5.634232521057129, + "loss": 0.901, + "nll_loss": 0.888999879360199, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.14273792505264282, + "rewards/margins": 0.4206853210926056, + "rewards/rejected": -0.563423216342926, + "step": 758 + }, + { + "epoch": 0.47216174183514775, + "grad_norm": 0.2695918083190918, + "learning_rate": 1.205e-05, + "log_odds_chosen": 2.9982948303222656, + "log_odds_ratio": -0.26615574955940247, + "logits/chosen": -0.7575415372848511, + "logits/rejected": 0.376910924911499, + "logps/chosen": -1.1440861225128174, + "logps/rejected": -3.7636454105377197, + "loss": 0.47, + "nll_loss": 0.4433676302433014, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11440862715244293, + "rewards/margins": 0.2619559168815613, + "rewards/rejected": -0.3763645589351654, + "step": 759 + }, + { + "epoch": 0.4727838258164852, + "grad_norm": 0.376305490732193, + "learning_rate": 1.2e-05, + "log_odds_chosen": 3.4311892986297607, + "log_odds_ratio": -0.199777290225029, + "logits/chosen": 1.3470544815063477, + "logits/rejected": 0.804937481880188, + "logps/chosen": -0.9575113654136658, + "logps/rejected": -4.017138481140137, + "loss": 0.6356, + "nll_loss": 0.6155799627304077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09575113654136658, + "rewards/margins": 0.3059626817703247, + "rewards/rejected": -0.40171384811401367, + "step": 760 + }, + { + "epoch": 0.4734059097978227, + "grad_norm": 0.3834342956542969, + "learning_rate": 1.195e-05, + "log_odds_chosen": 1.3307521343231201, + "log_odds_ratio": -0.4671354591846466, + "logits/chosen": 1.2574310302734375, + "logits/rejected": 1.2822082042694092, + "logps/chosen": -0.9174765944480896, + "logps/rejected": -2.0750315189361572, + "loss": 0.685, + "nll_loss": 0.638303279876709, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09174765646457672, + "rewards/margins": 0.11575548350811005, + "rewards/rejected": -0.20750313997268677, + "step": 761 + }, + { + "epoch": 0.47402799377916016, + "grad_norm": 0.6633325219154358, + "learning_rate": 1.19e-05, + "log_odds_chosen": 2.5100934505462646, + "log_odds_ratio": -0.3733833432197571, + "logits/chosen": 3.2173590660095215, + "logits/rejected": 3.156155824661255, + "logps/chosen": -0.7270236611366272, + "logps/rejected": -2.6550979614257812, + "loss": 0.8232, + "nll_loss": 0.7858138084411621, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07270237058401108, + "rewards/margins": 0.19280743598937988, + "rewards/rejected": -0.26550978422164917, + "step": 762 + }, + { + "epoch": 0.47465007776049767, + "grad_norm": 0.6370331048965454, + "learning_rate": 1.185e-05, + "log_odds_chosen": 4.554332256317139, + "log_odds_ratio": -0.28026673197746277, + "logits/chosen": 1.6826764345169067, + "logits/rejected": 0.6190115213394165, + "logps/chosen": -1.153404712677002, + "logps/rejected": -5.1978960037231445, + "loss": 0.6612, + "nll_loss": 0.6331894397735596, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11534047871828079, + "rewards/margins": 0.40444907546043396, + "rewards/rejected": -0.5197895765304565, + "step": 763 + }, + { + "epoch": 0.4752721617418352, + "grad_norm": 0.5234032869338989, + "learning_rate": 1.18e-05, + "log_odds_chosen": 2.557978391647339, + "log_odds_ratio": -0.5858641266822815, + "logits/chosen": 0.9482438564300537, + "logits/rejected": 0.596887469291687, + "logps/chosen": -1.0016982555389404, + "logps/rejected": -3.3711018562316895, + "loss": 0.632, + "nll_loss": 0.5734348893165588, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.1001698300242424, + "rewards/margins": 0.2369403839111328, + "rewards/rejected": -0.3371102213859558, + "step": 764 + }, + { + "epoch": 0.4758942457231726, + "grad_norm": 0.39715540409088135, + "learning_rate": 1.175e-05, + "log_odds_chosen": 3.6863224506378174, + "log_odds_ratio": -0.15334059298038483, + "logits/chosen": 2.0271711349487305, + "logits/rejected": 0.7028290033340454, + "logps/chosen": -0.8889638781547546, + "logps/rejected": -4.144840240478516, + "loss": 0.7578, + "nll_loss": 0.7424666881561279, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08889639377593994, + "rewards/margins": 0.325587660074234, + "rewards/rejected": -0.41448402404785156, + "step": 765 + }, + { + "epoch": 0.47651632970451013, + "grad_norm": 0.3871302604675293, + "learning_rate": 1.1700000000000001e-05, + "log_odds_chosen": 2.7272534370422363, + "log_odds_ratio": -0.3169942796230316, + "logits/chosen": 1.3704383373260498, + "logits/rejected": 0.95868980884552, + "logps/chosen": -1.1230028867721558, + "logps/rejected": -3.561866283416748, + "loss": 0.694, + "nll_loss": 0.6622994542121887, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.11230029910802841, + "rewards/margins": 0.24388636648654938, + "rewards/rejected": -0.3561866581439972, + "step": 766 + }, + { + "epoch": 0.4771384136858476, + "grad_norm": 0.41518551111221313, + "learning_rate": 1.1650000000000002e-05, + "log_odds_chosen": 2.6603739261627197, + "log_odds_ratio": -0.24516430497169495, + "logits/chosen": 2.105510950088501, + "logits/rejected": 0.4677017629146576, + "logps/chosen": -0.8573104739189148, + "logps/rejected": -3.140277624130249, + "loss": 0.6672, + "nll_loss": 0.6426551342010498, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08573104441165924, + "rewards/margins": 0.22829669713974, + "rewards/rejected": -0.3140277564525604, + "step": 767 + }, + { + "epoch": 0.4777604976671851, + "grad_norm": 0.4271754026412964, + "learning_rate": 1.16e-05, + "log_odds_chosen": 3.0503485202789307, + "log_odds_ratio": -0.2287687361240387, + "logits/chosen": 2.8986668586730957, + "logits/rejected": 1.9720364809036255, + "logps/chosen": -0.7971369624137878, + "logps/rejected": -3.1767632961273193, + "loss": 0.8597, + "nll_loss": 0.8368232250213623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07971370220184326, + "rewards/margins": 0.23796263337135315, + "rewards/rejected": -0.317676305770874, + "step": 768 + }, + { + "epoch": 0.47838258164852254, + "grad_norm": 0.5015910863876343, + "learning_rate": 1.1550000000000001e-05, + "log_odds_chosen": 4.93192720413208, + "log_odds_ratio": -0.18531693518161774, + "logits/chosen": 2.360891580581665, + "logits/rejected": 0.9528889656066895, + "logps/chosen": -0.8603143692016602, + "logps/rejected": -5.222134590148926, + "loss": 0.7677, + "nll_loss": 0.7491949200630188, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08603143692016602, + "rewards/margins": 0.43618205189704895, + "rewards/rejected": -0.5222134590148926, + "step": 769 + }, + { + "epoch": 0.47900466562986005, + "grad_norm": 1.472103238105774, + "learning_rate": 1.1500000000000002e-05, + "log_odds_chosen": 2.148057699203491, + "log_odds_ratio": -0.39299821853637695, + "logits/chosen": 0.8005610108375549, + "logits/rejected": -0.056345269083976746, + "logps/chosen": -1.6142408847808838, + "logps/rejected": -3.466493844985962, + "loss": 0.8477, + "nll_loss": 0.8084092736244202, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.16142407059669495, + "rewards/margins": 0.18522529304027557, + "rewards/rejected": -0.3466493785381317, + "step": 770 + }, + { + "epoch": 0.4796267496111975, + "grad_norm": 0.3125849664211273, + "learning_rate": 1.145e-05, + "log_odds_chosen": 4.567728519439697, + "log_odds_ratio": -0.20986464619636536, + "logits/chosen": 0.3897290825843811, + "logits/rejected": 1.1200284957885742, + "logps/chosen": -0.8692652583122253, + "logps/rejected": -4.947718620300293, + "loss": 0.5962, + "nll_loss": 0.5752478837966919, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08692653477191925, + "rewards/margins": 0.40784531831741333, + "rewards/rejected": -0.4947718381881714, + "step": 771 + }, + { + "epoch": 0.480248833592535, + "grad_norm": 1.0467990636825562, + "learning_rate": 1.1400000000000001e-05, + "log_odds_chosen": 3.529940128326416, + "log_odds_ratio": -0.1735749989748001, + "logits/chosen": 2.3256747722625732, + "logits/rejected": 1.5398943424224854, + "logps/chosen": -1.0427285432815552, + "logps/rejected": -4.057622909545898, + "loss": 0.7398, + "nll_loss": 0.7224363684654236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10427285730838776, + "rewards/margins": 0.3014894723892212, + "rewards/rejected": -0.40576231479644775, + "step": 772 + }, + { + "epoch": 0.48087091757387246, + "grad_norm": 0.3493899703025818, + "learning_rate": 1.1350000000000001e-05, + "log_odds_chosen": 5.22404146194458, + "log_odds_ratio": -0.024094879627227783, + "logits/chosen": 1.574556589126587, + "logits/rejected": -0.5061302185058594, + "logps/chosen": -0.9601320028305054, + "logps/rejected": -5.636563301086426, + "loss": 0.4969, + "nll_loss": 0.4944761395454407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09601320326328278, + "rewards/margins": 0.4676430821418762, + "rewards/rejected": -0.5636562705039978, + "step": 773 + }, + { + "epoch": 0.48149300155520997, + "grad_norm": 0.33069247007369995, + "learning_rate": 1.13e-05, + "log_odds_chosen": 5.7939653396606445, + "log_odds_ratio": -0.047045718878507614, + "logits/chosen": 2.868722438812256, + "logits/rejected": 1.0096173286437988, + "logps/chosen": -0.8514919877052307, + "logps/rejected": -6.00200080871582, + "loss": 0.8638, + "nll_loss": 0.8591352701187134, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08514919877052307, + "rewards/margins": 0.5150509476661682, + "rewards/rejected": -0.6002000570297241, + "step": 774 + }, + { + "epoch": 0.4821150855365474, + "grad_norm": 0.3597210645675659, + "learning_rate": 1.125e-05, + "log_odds_chosen": 2.536614179611206, + "log_odds_ratio": -0.3833247125148773, + "logits/chosen": 1.416181206703186, + "logits/rejected": 2.180039405822754, + "logps/chosen": -0.8901622891426086, + "logps/rejected": -3.139539957046509, + "loss": 0.6833, + "nll_loss": 0.6449413299560547, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08901622891426086, + "rewards/margins": 0.2249377965927124, + "rewards/rejected": -0.31395402550697327, + "step": 775 + }, + { + "epoch": 0.4827371695178849, + "grad_norm": 0.2865667939186096, + "learning_rate": 1.1200000000000001e-05, + "log_odds_chosen": 5.424200057983398, + "log_odds_ratio": -0.09211785346269608, + "logits/chosen": 1.124145269393921, + "logits/rejected": -0.09277313947677612, + "logps/chosen": -0.867756187915802, + "logps/rejected": -5.792477607727051, + "loss": 0.5922, + "nll_loss": 0.583034098148346, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08677561581134796, + "rewards/margins": 0.4924721419811249, + "rewards/rejected": -0.579247772693634, + "step": 776 + }, + { + "epoch": 0.4833592534992224, + "grad_norm": 0.27115461230278015, + "learning_rate": 1.115e-05, + "log_odds_chosen": 3.815460443496704, + "log_odds_ratio": -0.20083631575107574, + "logits/chosen": 0.29774150252342224, + "logits/rejected": 0.6260338425636292, + "logps/chosen": -0.8240159749984741, + "logps/rejected": -4.16973876953125, + "loss": 0.5038, + "nll_loss": 0.48371002078056335, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0824015885591507, + "rewards/margins": 0.3345722556114197, + "rewards/rejected": -0.41697388887405396, + "step": 777 + }, + { + "epoch": 0.4839813374805599, + "grad_norm": 0.38135629892349243, + "learning_rate": 1.11e-05, + "log_odds_chosen": 4.489835262298584, + "log_odds_ratio": -0.26796337962150574, + "logits/chosen": 1.5280147790908813, + "logits/rejected": 1.375030755996704, + "logps/chosen": -0.9886677265167236, + "logps/rejected": -5.18032693862915, + "loss": 0.6699, + "nll_loss": 0.6430768966674805, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0988667756319046, + "rewards/margins": 0.4191659092903137, + "rewards/rejected": -0.5180326700210571, + "step": 778 + }, + { + "epoch": 0.48460342146189733, + "grad_norm": 0.33285027742385864, + "learning_rate": 1.1050000000000001e-05, + "log_odds_chosen": 6.08347749710083, + "log_odds_ratio": -0.008807210251688957, + "logits/chosen": 0.8070425987243652, + "logits/rejected": 0.062301695346832275, + "logps/chosen": -0.9627847075462341, + "logps/rejected": -6.553963661193848, + "loss": 0.5153, + "nll_loss": 0.5144031643867493, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09627847373485565, + "rewards/margins": 0.5591179132461548, + "rewards/rejected": -0.6553963422775269, + "step": 779 + }, + { + "epoch": 0.48522550544323484, + "grad_norm": 0.28858453035354614, + "learning_rate": 1.1000000000000001e-05, + "log_odds_chosen": 4.537181377410889, + "log_odds_ratio": -0.16321076452732086, + "logits/chosen": 0.9505159258842468, + "logits/rejected": 1.2651251554489136, + "logps/chosen": -0.49642449617385864, + "logps/rejected": -4.0449538230896, + "loss": 0.5712, + "nll_loss": 0.5549124479293823, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.049642451107501984, + "rewards/margins": 0.35485297441482544, + "rewards/rejected": -0.4044954180717468, + "step": 780 + }, + { + "epoch": 0.4858475894245723, + "grad_norm": 0.31122761964797974, + "learning_rate": 1.095e-05, + "log_odds_chosen": 3.597323179244995, + "log_odds_ratio": -0.22623345255851746, + "logits/chosen": -0.918379545211792, + "logits/rejected": 0.03294803947210312, + "logps/chosen": -0.9022977352142334, + "logps/rejected": -4.053953647613525, + "loss": 0.4282, + "nll_loss": 0.40558505058288574, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09022977203130722, + "rewards/margins": 0.31516557931900024, + "rewards/rejected": -0.40539535880088806, + "step": 781 + }, + { + "epoch": 0.4864696734059098, + "grad_norm": 0.3407188057899475, + "learning_rate": 1.09e-05, + "log_odds_chosen": 4.901708602905273, + "log_odds_ratio": -0.08885428309440613, + "logits/chosen": 1.8469200134277344, + "logits/rejected": 0.9414504766464233, + "logps/chosen": -0.7245330810546875, + "logps/rejected": -4.952267169952393, + "loss": 0.6573, + "nll_loss": 0.6484310626983643, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07245330512523651, + "rewards/margins": 0.42277342081069946, + "rewards/rejected": -0.49522674083709717, + "step": 782 + }, + { + "epoch": 0.4870917573872473, + "grad_norm": 0.41091054677963257, + "learning_rate": 1.0850000000000001e-05, + "log_odds_chosen": 4.862772464752197, + "log_odds_ratio": -0.10557924956083298, + "logits/chosen": 0.577217161655426, + "logits/rejected": 0.0676531195640564, + "logps/chosen": -0.8654823303222656, + "logps/rejected": -5.2216901779174805, + "loss": 0.5218, + "nll_loss": 0.5112905502319336, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08654823154211044, + "rewards/margins": 0.43562084436416626, + "rewards/rejected": -0.5221690535545349, + "step": 783 + }, + { + "epoch": 0.48771384136858476, + "grad_norm": 0.43482547998428345, + "learning_rate": 1.08e-05, + "log_odds_chosen": 2.8547940254211426, + "log_odds_ratio": -0.5087026357650757, + "logits/chosen": 1.314387559890747, + "logits/rejected": 0.8139910697937012, + "logps/chosen": -1.0577054023742676, + "logps/rejected": -3.741490364074707, + "loss": 0.6691, + "nll_loss": 0.618266224861145, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.105770543217659, + "rewards/margins": 0.26837849617004395, + "rewards/rejected": -0.37414902448654175, + "step": 784 + }, + { + "epoch": 0.48833592534992226, + "grad_norm": 0.3116896152496338, + "learning_rate": 1.075e-05, + "log_odds_chosen": 4.188148498535156, + "log_odds_ratio": -0.24271881580352783, + "logits/chosen": 0.7602165937423706, + "logits/rejected": 1.3262357711791992, + "logps/chosen": -0.8295053243637085, + "logps/rejected": -4.384270191192627, + "loss": 0.6448, + "nll_loss": 0.6205779910087585, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08295053988695145, + "rewards/margins": 0.3554764986038208, + "rewards/rejected": -0.43842706084251404, + "step": 785 + }, + { + "epoch": 0.4889580093312597, + "grad_norm": 0.6962890028953552, + "learning_rate": 1.0700000000000001e-05, + "log_odds_chosen": 3.2535345554351807, + "log_odds_ratio": -0.12973089516162872, + "logits/chosen": 0.8944046497344971, + "logits/rejected": -0.8863468170166016, + "logps/chosen": -1.5091285705566406, + "logps/rejected": -4.428877353668213, + "loss": 0.5866, + "nll_loss": 0.5735969543457031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1509128361940384, + "rewards/margins": 0.29197490215301514, + "rewards/rejected": -0.4428877830505371, + "step": 786 + }, + { + "epoch": 0.4895800933125972, + "grad_norm": 0.41683006286621094, + "learning_rate": 1.065e-05, + "log_odds_chosen": 3.8502464294433594, + "log_odds_ratio": -0.18103164434432983, + "logits/chosen": 1.4110684394836426, + "logits/rejected": 0.5764380097389221, + "logps/chosen": -0.7307590246200562, + "logps/rejected": -4.002379417419434, + "loss": 0.4647, + "nll_loss": 0.446548193693161, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07307590544223785, + "rewards/margins": 0.3271620273590088, + "rewards/rejected": -0.40023791790008545, + "step": 787 + }, + { + "epoch": 0.49020217729393467, + "grad_norm": 0.3538471460342407, + "learning_rate": 1.06e-05, + "log_odds_chosen": 4.150043964385986, + "log_odds_ratio": -0.12563171982765198, + "logits/chosen": 1.96152925491333, + "logits/rejected": 0.12847623229026794, + "logps/chosen": -0.7252289056777954, + "logps/rejected": -4.287262439727783, + "loss": 0.621, + "nll_loss": 0.6084175109863281, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07252289354801178, + "rewards/margins": 0.3562033474445343, + "rewards/rejected": -0.4287262260913849, + "step": 788 + }, + { + "epoch": 0.4908242612752722, + "grad_norm": 0.3300181031227112, + "learning_rate": 1.055e-05, + "log_odds_chosen": 3.877500057220459, + "log_odds_ratio": -0.13669435679912567, + "logits/chosen": 2.18703031539917, + "logits/rejected": 2.082003116607666, + "logps/chosen": -0.8657932877540588, + "logps/rejected": -4.306628704071045, + "loss": 0.7181, + "nll_loss": 0.7044727206230164, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0865793377161026, + "rewards/margins": 0.3440835475921631, + "rewards/rejected": -0.4306628704071045, + "step": 789 + }, + { + "epoch": 0.49144634525660963, + "grad_norm": 0.43428757786750793, + "learning_rate": 1.05e-05, + "log_odds_chosen": 2.6995761394500732, + "log_odds_ratio": -0.38942408561706543, + "logits/chosen": 2.258654832839966, + "logits/rejected": 2.685004234313965, + "logps/chosen": -0.883932888507843, + "logps/rejected": -3.241953134536743, + "loss": 0.7016, + "nll_loss": 0.6626549363136292, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08839329332113266, + "rewards/margins": 0.23580200970172882, + "rewards/rejected": -0.3241952955722809, + "step": 790 + }, + { + "epoch": 0.49206842923794714, + "grad_norm": 0.31180664896965027, + "learning_rate": 1.045e-05, + "log_odds_chosen": 5.281048774719238, + "log_odds_ratio": -0.00905335508286953, + "logits/chosen": 1.120410680770874, + "logits/rejected": 0.44220006465911865, + "logps/chosen": -0.8353912830352783, + "logps/rejected": -5.5110182762146, + "loss": 0.4997, + "nll_loss": 0.4988272786140442, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08353912830352783, + "rewards/margins": 0.467562735080719, + "rewards/rejected": -0.551101803779602, + "step": 791 + }, + { + "epoch": 0.4926905132192846, + "grad_norm": 0.4472506642341614, + "learning_rate": 1.04e-05, + "log_odds_chosen": 3.6429500579833984, + "log_odds_ratio": -0.186533123254776, + "logits/chosen": 1.0584020614624023, + "logits/rejected": 1.7685458660125732, + "logps/chosen": -0.9058350920677185, + "logps/rejected": -4.094951629638672, + "loss": 0.6775, + "nll_loss": 0.658807635307312, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09058351069688797, + "rewards/margins": 0.3189116418361664, + "rewards/rejected": -0.40949517488479614, + "step": 792 + }, + { + "epoch": 0.4933125972006221, + "grad_norm": 0.5813391208648682, + "learning_rate": 1.035e-05, + "log_odds_chosen": 4.276451110839844, + "log_odds_ratio": -0.13493305444717407, + "logits/chosen": 0.2429172396659851, + "logits/rejected": -0.4983159899711609, + "logps/chosen": -1.4180018901824951, + "logps/rejected": -5.342185020446777, + "loss": 0.5255, + "nll_loss": 0.5120512843132019, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.141800194978714, + "rewards/margins": 0.3924183249473572, + "rewards/rejected": -0.5342184901237488, + "step": 793 + }, + { + "epoch": 0.49393468118195955, + "grad_norm": 0.368551105260849, + "learning_rate": 1.03e-05, + "log_odds_chosen": 3.7925801277160645, + "log_odds_ratio": -0.21110907196998596, + "logits/chosen": 2.3769233226776123, + "logits/rejected": 0.7364627122879028, + "logps/chosen": -0.8370175361633301, + "logps/rejected": -4.201289176940918, + "loss": 0.656, + "nll_loss": 0.634913444519043, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08370175957679749, + "rewards/margins": 0.33642715215682983, + "rewards/rejected": -0.42012888193130493, + "step": 794 + }, + { + "epoch": 0.49455676516329705, + "grad_norm": 0.34030696749687195, + "learning_rate": 1.025e-05, + "log_odds_chosen": 3.466846227645874, + "log_odds_ratio": -0.19665193557739258, + "logits/chosen": 1.3573992252349854, + "logits/rejected": 0.5698047280311584, + "logps/chosen": -0.7727445363998413, + "logps/rejected": -3.6376049518585205, + "loss": 0.558, + "nll_loss": 0.5383586287498474, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07727445662021637, + "rewards/margins": 0.28648605942726135, + "rewards/rejected": -0.36376050114631653, + "step": 795 + }, + { + "epoch": 0.4951788491446345, + "grad_norm": 2.465184211730957, + "learning_rate": 1.02e-05, + "log_odds_chosen": 3.1407623291015625, + "log_odds_ratio": -0.2292679399251938, + "logits/chosen": 0.6717162132263184, + "logits/rejected": 0.7924606800079346, + "logps/chosen": -1.6437124013900757, + "logps/rejected": -4.459639549255371, + "loss": 0.7352, + "nll_loss": 0.7122251987457275, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16437125205993652, + "rewards/margins": 0.2815927267074585, + "rewards/rejected": -0.44596394896507263, + "step": 796 + }, + { + "epoch": 0.495800933125972, + "grad_norm": 0.9437676668167114, + "learning_rate": 1.0150000000000001e-05, + "log_odds_chosen": 4.749540328979492, + "log_odds_ratio": -0.1930580735206604, + "logits/chosen": 1.5148296356201172, + "logits/rejected": 0.5548698902130127, + "logps/chosen": -0.7817761898040771, + "logps/rejected": -5.032675266265869, + "loss": 0.6243, + "nll_loss": 0.6050349473953247, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07817761600017548, + "rewards/margins": 0.425089955329895, + "rewards/rejected": -0.5032675266265869, + "step": 797 + }, + { + "epoch": 0.49642301710730946, + "grad_norm": 0.7716901898384094, + "learning_rate": 1.0100000000000002e-05, + "log_odds_chosen": 4.870780944824219, + "log_odds_ratio": -0.11819732189178467, + "logits/chosen": 0.3446436822414398, + "logits/rejected": -0.5686097145080566, + "logps/chosen": -0.8253393173217773, + "logps/rejected": -5.124600410461426, + "loss": 0.4937, + "nll_loss": 0.48190826177597046, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08253393322229385, + "rewards/margins": 0.4299260973930359, + "rewards/rejected": -0.5124600529670715, + "step": 798 + }, + { + "epoch": 0.49704510108864697, + "grad_norm": 0.3625529706478119, + "learning_rate": 1.005e-05, + "log_odds_chosen": 5.230063438415527, + "log_odds_ratio": -0.08902789652347565, + "logits/chosen": 1.6205828189849854, + "logits/rejected": 1.2859604358673096, + "logps/chosen": -1.2735844850540161, + "logps/rejected": -6.1855292320251465, + "loss": 0.6788, + "nll_loss": 0.6698963642120361, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12735846638679504, + "rewards/margins": 0.491194486618042, + "rewards/rejected": -0.6185529828071594, + "step": 799 + }, + { + "epoch": 0.4976671850699845, + "grad_norm": 0.42168471217155457, + "learning_rate": 1e-05, + "log_odds_chosen": 2.8343665599823, + "log_odds_ratio": -0.29494884610176086, + "logits/chosen": 2.636620283126831, + "logits/rejected": 1.8547230958938599, + "logps/chosen": -0.8090919256210327, + "logps/rejected": -3.2238035202026367, + "loss": 0.7996, + "nll_loss": 0.77005934715271, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08090918511152267, + "rewards/margins": 0.24147115647792816, + "rewards/rejected": -0.32238033413887024, + "step": 800 + }, + { + "epoch": 0.4982892690513219, + "grad_norm": 0.37538668513298035, + "learning_rate": 9.950000000000001e-06, + "log_odds_chosen": 4.194415092468262, + "log_odds_ratio": -0.22797462344169617, + "logits/chosen": 2.708955764770508, + "logits/rejected": 0.8517798781394958, + "logps/chosen": -0.9027718305587769, + "logps/rejected": -4.7235212326049805, + "loss": 0.7717, + "nll_loss": 0.7489374876022339, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09027719497680664, + "rewards/margins": 0.38207489252090454, + "rewards/rejected": -0.4723520874977112, + "step": 801 + }, + { + "epoch": 0.49891135303265943, + "grad_norm": 0.3566712737083435, + "learning_rate": 9.900000000000002e-06, + "log_odds_chosen": 3.805783271789551, + "log_odds_ratio": -0.34034591913223267, + "logits/chosen": 1.3816561698913574, + "logits/rejected": 0.4715338349342346, + "logps/chosen": -0.8862382173538208, + "logps/rejected": -4.359492301940918, + "loss": 0.6453, + "nll_loss": 0.6112942695617676, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08862382173538208, + "rewards/margins": 0.3473253846168518, + "rewards/rejected": -0.4359492361545563, + "step": 802 + }, + { + "epoch": 0.4995334370139969, + "grad_norm": 0.3383307158946991, + "learning_rate": 9.85e-06, + "log_odds_chosen": 3.2576406002044678, + "log_odds_ratio": -0.11606215685606003, + "logits/chosen": 0.8672167062759399, + "logits/rejected": 0.9878823757171631, + "logps/chosen": -0.6832941770553589, + "logps/rejected": -3.2288990020751953, + "loss": 0.6006, + "nll_loss": 0.5890172719955444, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06832942366600037, + "rewards/margins": 0.2545604705810547, + "rewards/rejected": -0.32288989424705505, + "step": 803 + }, + { + "epoch": 0.5001555209953343, + "grad_norm": 1.342377781867981, + "learning_rate": 9.800000000000001e-06, + "log_odds_chosen": 3.6162753105163574, + "log_odds_ratio": -0.22811348736286163, + "logits/chosen": 0.25409284234046936, + "logits/rejected": 0.6416158676147461, + "logps/chosen": -0.8462536334991455, + "logps/rejected": -4.046585559844971, + "loss": 0.5652, + "nll_loss": 0.5423653721809387, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08462535589933395, + "rewards/margins": 0.3200331926345825, + "rewards/rejected": -0.40465855598449707, + "step": 804 + }, + { + "epoch": 0.5007776049766719, + "grad_norm": 0.38830915093421936, + "learning_rate": 9.750000000000002e-06, + "log_odds_chosen": 3.037473201751709, + "log_odds_ratio": -0.4166441559791565, + "logits/chosen": 2.476818561553955, + "logits/rejected": 1.7838196754455566, + "logps/chosen": -1.0396037101745605, + "logps/rejected": -3.9347009658813477, + "loss": 0.7205, + "nll_loss": 0.6788171529769897, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.10396037995815277, + "rewards/margins": 0.28950971364974976, + "rewards/rejected": -0.3934701085090637, + "step": 805 + }, + { + "epoch": 0.5013996889580093, + "grad_norm": 0.37463346123695374, + "learning_rate": 9.7e-06, + "log_odds_chosen": 4.786480903625488, + "log_odds_ratio": -0.08557663857936859, + "logits/chosen": 1.0099029541015625, + "logits/rejected": -0.1891649067401886, + "logps/chosen": -0.9944522976875305, + "logps/rejected": -5.300948143005371, + "loss": 0.5613, + "nll_loss": 0.5527094602584839, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09944523125886917, + "rewards/margins": 0.4306495785713196, + "rewards/rejected": -0.5300948023796082, + "step": 806 + }, + { + "epoch": 0.5020217729393468, + "grad_norm": 0.3566811680793762, + "learning_rate": 9.65e-06, + "log_odds_chosen": 3.241489887237549, + "log_odds_ratio": -0.24942581355571747, + "logits/chosen": 3.2233145236968994, + "logits/rejected": 2.1009066104888916, + "logps/chosen": -0.7947208881378174, + "logps/rejected": -3.3181252479553223, + "loss": 0.7944, + "nll_loss": 0.7694109082221985, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07947209477424622, + "rewards/margins": 0.2523404359817505, + "rewards/rejected": -0.3318125009536743, + "step": 807 + }, + { + "epoch": 0.5026438569206843, + "grad_norm": 1.0895134210586548, + "learning_rate": 9.600000000000001e-06, + "log_odds_chosen": 3.148625373840332, + "log_odds_ratio": -0.2908478081226349, + "logits/chosen": 0.9981393814086914, + "logits/rejected": 0.09825985133647919, + "logps/chosen": -1.0793797969818115, + "logps/rejected": -3.959749698638916, + "loss": 0.7648, + "nll_loss": 0.7357277870178223, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10793796926736832, + "rewards/margins": 0.2880370020866394, + "rewards/rejected": -0.3959749639034271, + "step": 808 + }, + { + "epoch": 0.5032659409020218, + "grad_norm": 0.4039660692214966, + "learning_rate": 9.55e-06, + "log_odds_chosen": 4.045183181762695, + "log_odds_ratio": -0.23220917582511902, + "logits/chosen": 1.330578088760376, + "logits/rejected": 1.3192451000213623, + "logps/chosen": -1.286431908607483, + "logps/rejected": -5.05302095413208, + "loss": 0.6188, + "nll_loss": 0.5955660343170166, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.128643199801445, + "rewards/margins": 0.3766588866710663, + "rewards/rejected": -0.5053020715713501, + "step": 809 + }, + { + "epoch": 0.5038880248833593, + "grad_norm": 0.5143351554870605, + "learning_rate": 9.5e-06, + "log_odds_chosen": 4.110378265380859, + "log_odds_ratio": -0.240739107131958, + "logits/chosen": 2.9270503520965576, + "logits/rejected": 2.2957873344421387, + "logps/chosen": -1.0331082344055176, + "logps/rejected": -4.818493366241455, + "loss": 0.8335, + "nll_loss": 0.8093845248222351, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10331083834171295, + "rewards/margins": 0.37853848934173584, + "rewards/rejected": -0.48184934258461, + "step": 810 + }, + { + "epoch": 0.5045101088646967, + "grad_norm": 0.35940125584602356, + "learning_rate": 9.450000000000001e-06, + "log_odds_chosen": 2.423135757446289, + "log_odds_ratio": -0.2942277789115906, + "logits/chosen": 0.5553222894668579, + "logits/rejected": 1.4473676681518555, + "logps/chosen": -0.7806976437568665, + "logps/rejected": -2.7081189155578613, + "loss": 0.5079, + "nll_loss": 0.47850099205970764, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.078069768846035, + "rewards/margins": 0.19274210929870605, + "rewards/rejected": -0.27081188559532166, + "step": 811 + }, + { + "epoch": 0.5051321928460342, + "grad_norm": 0.31486862897872925, + "learning_rate": 9.4e-06, + "log_odds_chosen": 4.782650470733643, + "log_odds_ratio": -0.06745091825723648, + "logits/chosen": 0.6869497299194336, + "logits/rejected": 0.17597493529319763, + "logps/chosen": -0.9358524680137634, + "logps/rejected": -5.170880317687988, + "loss": 0.4733, + "nll_loss": 0.4665657877922058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09358525276184082, + "rewards/margins": 0.42350274324417114, + "rewards/rejected": -0.517087996006012, + "step": 812 + }, + { + "epoch": 0.5057542768273717, + "grad_norm": 0.2950216829776764, + "learning_rate": 9.35e-06, + "log_odds_chosen": 4.459681510925293, + "log_odds_ratio": -0.0953890010714531, + "logits/chosen": 0.6839005947113037, + "logits/rejected": 0.27213621139526367, + "logps/chosen": -0.9052809476852417, + "logps/rejected": -4.865182399749756, + "loss": 0.4605, + "nll_loss": 0.45096686482429504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09052809327840805, + "rewards/margins": 0.39599016308784485, + "rewards/rejected": -0.4865182638168335, + "step": 813 + }, + { + "epoch": 0.5063763608087092, + "grad_norm": 0.3427290618419647, + "learning_rate": 9.3e-06, + "log_odds_chosen": 6.598719120025635, + "log_odds_ratio": -0.007421756163239479, + "logits/chosen": 1.0570212602615356, + "logits/rejected": 0.9498794674873352, + "logps/chosen": -0.9093166589736938, + "logps/rejected": -6.913853645324707, + "loss": 0.5848, + "nll_loss": 0.5840796828269958, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09093166887760162, + "rewards/margins": 0.6004536747932434, + "rewards/rejected": -0.6913853883743286, + "step": 814 + }, + { + "epoch": 0.5069984447900466, + "grad_norm": 0.2884136736392975, + "learning_rate": 9.25e-06, + "log_odds_chosen": 3.891439437866211, + "log_odds_ratio": -0.12934939563274384, + "logits/chosen": 0.9800029993057251, + "logits/rejected": 1.1361968517303467, + "logps/chosen": -1.0110560655593872, + "logps/rejected": -4.505780220031738, + "loss": 0.5232, + "nll_loss": 0.5102529525756836, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10110560804605484, + "rewards/margins": 0.3494724631309509, + "rewards/rejected": -0.4505780339241028, + "step": 815 + }, + { + "epoch": 0.5076205287713841, + "grad_norm": 0.35629749298095703, + "learning_rate": 9.2e-06, + "log_odds_chosen": 4.490930080413818, + "log_odds_ratio": -0.10382703691720963, + "logits/chosen": 1.9403718709945679, + "logits/rejected": 0.49081113934516907, + "logps/chosen": -0.7934112548828125, + "logps/rejected": -4.741158485412598, + "loss": 0.6434, + "nll_loss": 0.6329823136329651, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07934112846851349, + "rewards/margins": 0.39477473497390747, + "rewards/rejected": -0.47411584854125977, + "step": 816 + }, + { + "epoch": 0.5082426127527216, + "grad_norm": 0.4089803695678711, + "learning_rate": 9.15e-06, + "log_odds_chosen": 3.807797908782959, + "log_odds_ratio": -0.22873249650001526, + "logits/chosen": -0.5712364315986633, + "logits/rejected": 0.023705005645751953, + "logps/chosen": -1.6236436367034912, + "logps/rejected": -5.201904296875, + "loss": 0.4672, + "nll_loss": 0.44429489970207214, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.16236436367034912, + "rewards/margins": 0.3578260838985443, + "rewards/rejected": -0.5201904773712158, + "step": 817 + }, + { + "epoch": 0.5088646967340591, + "grad_norm": 0.4340582489967346, + "learning_rate": 9.100000000000001e-06, + "log_odds_chosen": 4.368274211883545, + "log_odds_ratio": -0.2061048150062561, + "logits/chosen": 0.8786136507987976, + "logits/rejected": 0.8603896498680115, + "logps/chosen": -0.9838038086891174, + "logps/rejected": -5.024988651275635, + "loss": 0.5664, + "nll_loss": 0.5458295941352844, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09838037937879562, + "rewards/margins": 0.40411844849586487, + "rewards/rejected": -0.5024988651275635, + "step": 818 + }, + { + "epoch": 0.5094867807153965, + "grad_norm": 0.3766569197177887, + "learning_rate": 9.05e-06, + "log_odds_chosen": 5.19111442565918, + "log_odds_ratio": -0.0431944876909256, + "logits/chosen": 1.3828849792480469, + "logits/rejected": -0.19894227385520935, + "logps/chosen": -0.7179228067398071, + "logps/rejected": -5.100705623626709, + "loss": 0.5891, + "nll_loss": 0.5847594738006592, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07179228961467743, + "rewards/margins": 0.43827831745147705, + "rewards/rejected": -0.5100706219673157, + "step": 819 + }, + { + "epoch": 0.5101088646967341, + "grad_norm": 0.3517646789550781, + "learning_rate": 9e-06, + "log_odds_chosen": 3.4543983936309814, + "log_odds_ratio": -0.10853774845600128, + "logits/chosen": 1.693161964416504, + "logits/rejected": 0.43125981092453003, + "logps/chosen": -1.0501185655593872, + "logps/rejected": -4.055387496948242, + "loss": 0.636, + "nll_loss": 0.6251487731933594, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10501186549663544, + "rewards/margins": 0.300526887178421, + "rewards/rejected": -0.40553876757621765, + "step": 820 + }, + { + "epoch": 0.5107309486780716, + "grad_norm": 1.9642311334609985, + "learning_rate": 8.95e-06, + "log_odds_chosen": 4.056253433227539, + "log_odds_ratio": -0.17738457024097443, + "logits/chosen": 1.7935194969177246, + "logits/rejected": 0.7376617193222046, + "logps/chosen": -0.7888966798782349, + "logps/rejected": -4.304933547973633, + "loss": 0.6337, + "nll_loss": 0.6160002946853638, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07888966053724289, + "rewards/margins": 0.3516036868095398, + "rewards/rejected": -0.4304933547973633, + "step": 821 + }, + { + "epoch": 0.511353032659409, + "grad_norm": 0.2685078978538513, + "learning_rate": 8.9e-06, + "log_odds_chosen": 5.639597415924072, + "log_odds_ratio": -0.029838988557457924, + "logits/chosen": 0.07695133984088898, + "logits/rejected": 0.47179538011550903, + "logps/chosen": -0.8393157720565796, + "logps/rejected": -5.901039123535156, + "loss": 0.4426, + "nll_loss": 0.43960070610046387, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0839315727353096, + "rewards/margins": 0.5061724185943604, + "rewards/rejected": -0.5901039838790894, + "step": 822 + }, + { + "epoch": 0.5119751166407465, + "grad_norm": 0.351177841424942, + "learning_rate": 8.85e-06, + "log_odds_chosen": 5.065117359161377, + "log_odds_ratio": -0.08009850978851318, + "logits/chosen": 0.05210921913385391, + "logits/rejected": -0.3567178249359131, + "logps/chosen": -0.9725984930992126, + "logps/rejected": -5.50432825088501, + "loss": 0.4287, + "nll_loss": 0.4206945300102234, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09725984930992126, + "rewards/margins": 0.4531729817390442, + "rewards/rejected": -0.5504328012466431, + "step": 823 + }, + { + "epoch": 0.512597200622084, + "grad_norm": 0.589407205581665, + "learning_rate": 8.8e-06, + "log_odds_chosen": 3.0141940116882324, + "log_odds_ratio": -0.38279733061790466, + "logits/chosen": 2.124873638153076, + "logits/rejected": 1.0183517932891846, + "logps/chosen": -0.9143626093864441, + "logps/rejected": -3.5896453857421875, + "loss": 0.7332, + "nll_loss": 0.6948947906494141, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09143626689910889, + "rewards/margins": 0.2675282955169678, + "rewards/rejected": -0.35896456241607666, + "step": 824 + }, + { + "epoch": 0.5132192846034215, + "grad_norm": 0.32223331928253174, + "learning_rate": 8.75e-06, + "log_odds_chosen": 3.163451910018921, + "log_odds_ratio": -0.27636605501174927, + "logits/chosen": 1.7252213954925537, + "logits/rejected": 0.4546215534210205, + "logps/chosen": -0.8569576740264893, + "logps/rejected": -3.6060609817504883, + "loss": 0.7564, + "nll_loss": 0.7287983298301697, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0856957733631134, + "rewards/margins": 0.2749103307723999, + "rewards/rejected": -0.3606061041355133, + "step": 825 + }, + { + "epoch": 0.5138413685847589, + "grad_norm": 0.35858839750289917, + "learning_rate": 8.7e-06, + "log_odds_chosen": 2.8523874282836914, + "log_odds_ratio": -0.14599651098251343, + "logits/chosen": 1.4515256881713867, + "logits/rejected": 1.5431532859802246, + "logps/chosen": -0.8925541639328003, + "logps/rejected": -3.3079447746276855, + "loss": 0.6665, + "nll_loss": 0.6518935561180115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0892554223537445, + "rewards/margins": 0.24153903126716614, + "rewards/rejected": -0.33079448342323303, + "step": 826 + }, + { + "epoch": 0.5144634525660964, + "grad_norm": 0.5025385618209839, + "learning_rate": 8.65e-06, + "log_odds_chosen": 2.716071367263794, + "log_odds_ratio": -0.4175480008125305, + "logits/chosen": 1.7826498746871948, + "logits/rejected": 2.1285691261291504, + "logps/chosen": -1.079801321029663, + "logps/rejected": -3.5945372581481934, + "loss": 0.6716, + "nll_loss": 0.6298132538795471, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10798013210296631, + "rewards/margins": 0.251473605632782, + "rewards/rejected": -0.3594537377357483, + "step": 827 + }, + { + "epoch": 0.5150855365474339, + "grad_norm": 0.3128713071346283, + "learning_rate": 8.599999999999999e-06, + "log_odds_chosen": 4.946896553039551, + "log_odds_ratio": -0.09469583630561829, + "logits/chosen": 1.2161049842834473, + "logits/rejected": 1.1760735511779785, + "logps/chosen": -0.8445440530776978, + "logps/rejected": -5.290299892425537, + "loss": 0.5957, + "nll_loss": 0.5861875414848328, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08445440977811813, + "rewards/margins": 0.44457557797431946, + "rewards/rejected": -0.5290299654006958, + "step": 828 + }, + { + "epoch": 0.5157076205287714, + "grad_norm": 0.7158114314079285, + "learning_rate": 8.550000000000001e-06, + "log_odds_chosen": 4.49724817276001, + "log_odds_ratio": -0.17848162353038788, + "logits/chosen": 1.4604125022888184, + "logits/rejected": 0.44166508316993713, + "logps/chosen": -0.7384613156318665, + "logps/rejected": -4.72969913482666, + "loss": 0.5113, + "nll_loss": 0.49343129992485046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07384613156318665, + "rewards/margins": 0.3991238474845886, + "rewards/rejected": -0.47296997904777527, + "step": 829 + }, + { + "epoch": 0.5163297045101088, + "grad_norm": 0.3281008303165436, + "learning_rate": 8.500000000000002e-06, + "log_odds_chosen": 3.149600028991699, + "log_odds_ratio": -0.3649888038635254, + "logits/chosen": 1.408589243888855, + "logits/rejected": 1.8521182537078857, + "logps/chosen": -0.9668858051300049, + "logps/rejected": -3.8920159339904785, + "loss": 0.6461, + "nll_loss": 0.609637975692749, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09668857604265213, + "rewards/margins": 0.29251301288604736, + "rewards/rejected": -0.3892015814781189, + "step": 830 + }, + { + "epoch": 0.5169517884914463, + "grad_norm": 0.32905203104019165, + "learning_rate": 8.45e-06, + "log_odds_chosen": 4.028629302978516, + "log_odds_ratio": -0.09469190984964371, + "logits/chosen": -0.4233950972557068, + "logits/rejected": 0.36360660195350647, + "logps/chosen": -0.9033867120742798, + "logps/rejected": -4.371126174926758, + "loss": 0.4923, + "nll_loss": 0.4828674793243408, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09033866971731186, + "rewards/margins": 0.3467739224433899, + "rewards/rejected": -0.43711256980895996, + "step": 831 + }, + { + "epoch": 0.5175738724727839, + "grad_norm": 0.430584579706192, + "learning_rate": 8.400000000000001e-06, + "log_odds_chosen": 4.424380302429199, + "log_odds_ratio": -0.1801256239414215, + "logits/chosen": 2.5836946964263916, + "logits/rejected": 2.416637659072876, + "logps/chosen": -0.998272716999054, + "logps/rejected": -4.759140968322754, + "loss": 0.7411, + "nll_loss": 0.7231161594390869, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09982727468013763, + "rewards/margins": 0.3760868310928345, + "rewards/rejected": -0.4759141206741333, + "step": 832 + }, + { + "epoch": 0.5181959564541213, + "grad_norm": 0.3688509464263916, + "learning_rate": 8.350000000000001e-06, + "log_odds_chosen": 5.4708356857299805, + "log_odds_ratio": -0.04741556942462921, + "logits/chosen": 1.5119433403015137, + "logits/rejected": -0.3896360397338867, + "logps/chosen": -0.7715045213699341, + "logps/rejected": -5.580893039703369, + "loss": 0.5971, + "nll_loss": 0.5923291444778442, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07715044915676117, + "rewards/margins": 0.4809388816356659, + "rewards/rejected": -0.5580893158912659, + "step": 833 + }, + { + "epoch": 0.5188180404354588, + "grad_norm": 0.370304137468338, + "learning_rate": 8.3e-06, + "log_odds_chosen": 2.8266732692718506, + "log_odds_ratio": -0.25435543060302734, + "logits/chosen": 0.7392969131469727, + "logits/rejected": 1.3636996746063232, + "logps/chosen": -0.9792646765708923, + "logps/rejected": -3.4305460453033447, + "loss": 0.5873, + "nll_loss": 0.5618682503700256, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09792646765708923, + "rewards/margins": 0.24512813985347748, + "rewards/rejected": -0.3430545926094055, + "step": 834 + }, + { + "epoch": 0.5194401244167963, + "grad_norm": 1.1292908191680908, + "learning_rate": 8.25e-06, + "log_odds_chosen": 3.191591739654541, + "log_odds_ratio": -0.16945341229438782, + "logits/chosen": 2.268345832824707, + "logits/rejected": 1.9634860754013062, + "logps/chosen": -0.9450038075447083, + "logps/rejected": -3.7623562812805176, + "loss": 0.6896, + "nll_loss": 0.6726588010787964, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09450038522481918, + "rewards/margins": 0.28173527121543884, + "rewards/rejected": -0.3762356638908386, + "step": 835 + }, + { + "epoch": 0.5200622083981338, + "grad_norm": 0.5898036956787109, + "learning_rate": 8.200000000000001e-06, + "log_odds_chosen": 5.049922943115234, + "log_odds_ratio": -0.03130911663174629, + "logits/chosen": 1.6196269989013672, + "logits/rejected": 0.1569942831993103, + "logps/chosen": -0.920583963394165, + "logps/rejected": -5.398545742034912, + "loss": 0.6857, + "nll_loss": 0.6825913190841675, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09205839782953262, + "rewards/margins": 0.44779619574546814, + "rewards/rejected": -0.5398545861244202, + "step": 836 + }, + { + "epoch": 0.5206842923794712, + "grad_norm": 0.3221326172351837, + "learning_rate": 8.15e-06, + "log_odds_chosen": 5.945927619934082, + "log_odds_ratio": -0.021535350009799004, + "logits/chosen": 1.1049885749816895, + "logits/rejected": 0.1460282951593399, + "logps/chosen": -0.8474592566490173, + "logps/rejected": -6.200389862060547, + "loss": 0.5018, + "nll_loss": 0.49965280294418335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08474592864513397, + "rewards/margins": 0.5352929830551147, + "rewards/rejected": -0.6200389266014099, + "step": 837 + }, + { + "epoch": 0.5213063763608087, + "grad_norm": 0.33995768427848816, + "learning_rate": 8.1e-06, + "log_odds_chosen": 3.2288389205932617, + "log_odds_ratio": -0.27857640385627747, + "logits/chosen": 1.4248796701431274, + "logits/rejected": 1.4942407608032227, + "logps/chosen": -0.7745346426963806, + "logps/rejected": -3.5616447925567627, + "loss": 0.6797, + "nll_loss": 0.6518310308456421, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07745346426963806, + "rewards/margins": 0.2787110209465027, + "rewards/rejected": -0.35616451501846313, + "step": 838 + }, + { + "epoch": 0.5219284603421462, + "grad_norm": 0.31470462679862976, + "learning_rate": 8.050000000000001e-06, + "log_odds_chosen": 7.015840530395508, + "log_odds_ratio": -0.0050363121554255486, + "logits/chosen": 1.4353834390640259, + "logits/rejected": 1.1269909143447876, + "logps/chosen": -0.8338403701782227, + "logps/rejected": -7.1698222160339355, + "loss": 0.5726, + "nll_loss": 0.5720517635345459, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08338404446840286, + "rewards/margins": 0.6335982084274292, + "rewards/rejected": -0.7169821858406067, + "step": 839 + }, + { + "epoch": 0.5225505443234837, + "grad_norm": 0.3146345317363739, + "learning_rate": 8.000000000000001e-06, + "log_odds_chosen": 3.7398879528045654, + "log_odds_ratio": -0.19314292073249817, + "logits/chosen": 1.4435521364212036, + "logits/rejected": 2.414797782897949, + "logps/chosen": -0.7224211096763611, + "logps/rejected": -3.8994083404541016, + "loss": 0.6447, + "nll_loss": 0.6253674030303955, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07224211096763611, + "rewards/margins": 0.31769871711730957, + "rewards/rejected": -0.38994085788726807, + "step": 840 + }, + { + "epoch": 0.5231726283048211, + "grad_norm": 0.3796578347682953, + "learning_rate": 7.95e-06, + "log_odds_chosen": 4.678387641906738, + "log_odds_ratio": -0.32569360733032227, + "logits/chosen": 2.938084602355957, + "logits/rejected": 2.356147050857544, + "logps/chosen": -0.7135329842567444, + "logps/rejected": -4.978067398071289, + "loss": 0.8559, + "nll_loss": 0.8233269453048706, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07135330140590668, + "rewards/margins": 0.42645344138145447, + "rewards/rejected": -0.49780675768852234, + "step": 841 + }, + { + "epoch": 0.5237947122861586, + "grad_norm": 0.3580376207828522, + "learning_rate": 7.9e-06, + "log_odds_chosen": 3.7918758392333984, + "log_odds_ratio": -0.33086949586868286, + "logits/chosen": 0.5326130986213684, + "logits/rejected": 0.12543895840644836, + "logps/chosen": -0.8650779724121094, + "logps/rejected": -4.286233901977539, + "loss": 0.5654, + "nll_loss": 0.5323303937911987, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08650778979063034, + "rewards/margins": 0.342115581035614, + "rewards/rejected": -0.42862337827682495, + "step": 842 + }, + { + "epoch": 0.5244167962674962, + "grad_norm": 0.3808853030204773, + "learning_rate": 7.850000000000001e-06, + "log_odds_chosen": 4.189432144165039, + "log_odds_ratio": -0.27564239501953125, + "logits/chosen": 2.1349778175354004, + "logits/rejected": 1.1208148002624512, + "logps/chosen": -0.9227831959724426, + "logps/rejected": -4.730731010437012, + "loss": 0.7277, + "nll_loss": 0.700088381767273, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09227833151817322, + "rewards/margins": 0.38079482316970825, + "rewards/rejected": -0.47307315468788147, + "step": 843 + }, + { + "epoch": 0.5250388802488336, + "grad_norm": 0.9150951504707336, + "learning_rate": 7.8e-06, + "log_odds_chosen": 3.935750722885132, + "log_odds_ratio": -0.21879051625728607, + "logits/chosen": 0.3282691538333893, + "logits/rejected": 1.1714022159576416, + "logps/chosen": -1.1815775632858276, + "logps/rejected": -4.750007629394531, + "loss": 0.6211, + "nll_loss": 0.5991711616516113, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1181577518582344, + "rewards/margins": 0.3568430542945862, + "rewards/rejected": -0.47500079870224, + "step": 844 + }, + { + "epoch": 0.5256609642301711, + "grad_norm": 0.43844273686408997, + "learning_rate": 7.75e-06, + "log_odds_chosen": 2.4059865474700928, + "log_odds_ratio": -0.394694983959198, + "logits/chosen": 1.3405787944793701, + "logits/rejected": 1.228584885597229, + "logps/chosen": -0.713738739490509, + "logps/rejected": -2.4642791748046875, + "loss": 0.6272, + "nll_loss": 0.5876884460449219, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07137387245893478, + "rewards/margins": 0.17505404353141785, + "rewards/rejected": -0.24642793834209442, + "step": 845 + }, + { + "epoch": 0.5262830482115085, + "grad_norm": 0.29125887155532837, + "learning_rate": 7.7e-06, + "log_odds_chosen": 8.201855659484863, + "log_odds_ratio": -0.0014205931220203638, + "logits/chosen": 0.5951703190803528, + "logits/rejected": 0.41203880310058594, + "logps/chosen": -0.6095064878463745, + "logps/rejected": -7.787240982055664, + "loss": 0.5115, + "nll_loss": 0.5113813877105713, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06095064431428909, + "rewards/margins": 0.7177734375, + "rewards/rejected": -0.7787241339683533, + "step": 846 + }, + { + "epoch": 0.5269051321928461, + "grad_norm": 1.37794029712677, + "learning_rate": 7.65e-06, + "log_odds_chosen": 5.424922943115234, + "log_odds_ratio": -0.04665739834308624, + "logits/chosen": 0.7469074726104736, + "logits/rejected": 0.22907613217830658, + "logps/chosen": -1.2817590236663818, + "logps/rejected": -6.238846778869629, + "loss": 0.5371, + "nll_loss": 0.532480776309967, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12817591428756714, + "rewards/margins": 0.49570873379707336, + "rewards/rejected": -0.6238846182823181, + "step": 847 + }, + { + "epoch": 0.5275272161741835, + "grad_norm": 0.27533194422721863, + "learning_rate": 7.6e-06, + "log_odds_chosen": 6.979497909545898, + "log_odds_ratio": -0.00903667975217104, + "logits/chosen": 0.06894741952419281, + "logits/rejected": 0.27600759267807007, + "logps/chosen": -0.9300459623336792, + "logps/rejected": -7.287872314453125, + "loss": 0.5618, + "nll_loss": 0.5608495473861694, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09300459921360016, + "rewards/margins": 0.6357825994491577, + "rewards/rejected": -0.7287871837615967, + "step": 848 + }, + { + "epoch": 0.528149300155521, + "grad_norm": 0.2954027056694031, + "learning_rate": 7.55e-06, + "log_odds_chosen": 4.8234710693359375, + "log_odds_ratio": -0.12771743535995483, + "logits/chosen": 1.4827038049697876, + "logits/rejected": 1.096189260482788, + "logps/chosen": -0.776839554309845, + "logps/rejected": -5.056708812713623, + "loss": 0.5763, + "nll_loss": 0.5634976029396057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0776839554309845, + "rewards/margins": 0.42798691987991333, + "rewards/rejected": -0.5056709051132202, + "step": 849 + }, + { + "epoch": 0.5287713841368584, + "grad_norm": 0.4383789300918579, + "learning_rate": 7.5e-06, + "log_odds_chosen": 4.074307918548584, + "log_odds_ratio": -0.1638539880514145, + "logits/chosen": 0.1360701024532318, + "logits/rejected": 0.7712302207946777, + "logps/chosen": -0.8641620874404907, + "logps/rejected": -4.405178546905518, + "loss": 0.4002, + "nll_loss": 0.3837825059890747, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08641621470451355, + "rewards/margins": 0.35410165786743164, + "rewards/rejected": -0.4405178725719452, + "step": 850 + }, + { + "epoch": 0.529393468118196, + "grad_norm": 0.3684031367301941, + "learning_rate": 7.45e-06, + "log_odds_chosen": 4.50360107421875, + "log_odds_ratio": -0.21045728027820587, + "logits/chosen": 2.8498353958129883, + "logits/rejected": 1.271877646446228, + "logps/chosen": -0.9192016124725342, + "logps/rejected": -4.949028491973877, + "loss": 0.7212, + "nll_loss": 0.7001411318778992, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0919201597571373, + "rewards/margins": 0.4029826819896698, + "rewards/rejected": -0.4949028491973877, + "step": 851 + }, + { + "epoch": 0.5300155520995334, + "grad_norm": 0.41067174077033997, + "learning_rate": 7.4e-06, + "log_odds_chosen": 3.9802048206329346, + "log_odds_ratio": -0.298592209815979, + "logits/chosen": 2.4309494495391846, + "logits/rejected": 2.2804596424102783, + "logps/chosen": -0.7731521129608154, + "logps/rejected": -4.271833896636963, + "loss": 0.7431, + "nll_loss": 0.7132678627967834, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07731521129608154, + "rewards/margins": 0.34986817836761475, + "rewards/rejected": -0.42718344926834106, + "step": 852 + }, + { + "epoch": 0.5306376360808709, + "grad_norm": 0.34988710284233093, + "learning_rate": 7.35e-06, + "log_odds_chosen": 3.954108715057373, + "log_odds_ratio": -0.2007138431072235, + "logits/chosen": 1.4486238956451416, + "logits/rejected": 0.7966309785842896, + "logps/chosen": -0.7078330516815186, + "logps/rejected": -4.10945463180542, + "loss": 0.5629, + "nll_loss": 0.5428740978240967, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07078330963850021, + "rewards/margins": 0.34016215801239014, + "rewards/rejected": -0.41094547510147095, + "step": 853 + }, + { + "epoch": 0.5312597200622085, + "grad_norm": 0.531642496585846, + "learning_rate": 7.2999999999999996e-06, + "log_odds_chosen": 5.208446025848389, + "log_odds_ratio": -0.1282520890235901, + "logits/chosen": 0.8819208145141602, + "logits/rejected": 0.8269643783569336, + "logps/chosen": -0.8965824842453003, + "logps/rejected": -5.501850605010986, + "loss": 0.5189, + "nll_loss": 0.5060710906982422, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08965825289487839, + "rewards/margins": 0.46052682399749756, + "rewards/rejected": -0.5501850843429565, + "step": 854 + }, + { + "epoch": 0.5318818040435459, + "grad_norm": 0.353059321641922, + "learning_rate": 7.25e-06, + "log_odds_chosen": 4.275794982910156, + "log_odds_ratio": -0.1521713137626648, + "logits/chosen": 2.3613967895507812, + "logits/rejected": 0.609958827495575, + "logps/chosen": -0.8156629204750061, + "logps/rejected": -4.58781623840332, + "loss": 0.593, + "nll_loss": 0.577816367149353, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08156629651784897, + "rewards/margins": 0.37721535563468933, + "rewards/rejected": -0.4587816596031189, + "step": 855 + }, + { + "epoch": 0.5325038880248834, + "grad_norm": 0.3639867305755615, + "learning_rate": 7.2e-06, + "log_odds_chosen": 4.824565410614014, + "log_odds_ratio": -0.2290450930595398, + "logits/chosen": 1.6250410079956055, + "logits/rejected": 1.7094388008117676, + "logps/chosen": -0.8250951766967773, + "logps/rejected": -5.078400135040283, + "loss": 0.6442, + "nll_loss": 0.6213299036026001, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08250951766967773, + "rewards/margins": 0.4253305196762085, + "rewards/rejected": -0.5078400373458862, + "step": 856 + }, + { + "epoch": 0.5331259720062208, + "grad_norm": 0.37084388732910156, + "learning_rate": 7.15e-06, + "log_odds_chosen": 4.704831123352051, + "log_odds_ratio": -0.12719248235225677, + "logits/chosen": 0.6303936243057251, + "logits/rejected": 0.46867427229881287, + "logps/chosen": -0.9530895948410034, + "logps/rejected": -5.224120140075684, + "loss": 0.5273, + "nll_loss": 0.5145441293716431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09530895948410034, + "rewards/margins": 0.42710307240486145, + "rewards/rejected": -0.5224120020866394, + "step": 857 + }, + { + "epoch": 0.5337480559875584, + "grad_norm": 1.0703786611557007, + "learning_rate": 7.1e-06, + "log_odds_chosen": 4.3336992263793945, + "log_odds_ratio": -0.3372271656990051, + "logits/chosen": 0.3954066038131714, + "logits/rejected": 0.7838265895843506, + "logps/chosen": -0.9540671110153198, + "logps/rejected": -4.953222274780273, + "loss": 0.626, + "nll_loss": 0.5922662615776062, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09540671110153198, + "rewards/margins": 0.39991557598114014, + "rewards/rejected": -0.4953222870826721, + "step": 858 + }, + { + "epoch": 0.5343701399688958, + "grad_norm": 1.5287100076675415, + "learning_rate": 7.049999999999999e-06, + "log_odds_chosen": 5.106422424316406, + "log_odds_ratio": -0.02494587004184723, + "logits/chosen": 1.100020408630371, + "logits/rejected": 0.40951472520828247, + "logps/chosen": -1.1385383605957031, + "logps/rejected": -5.838616371154785, + "loss": 0.5596, + "nll_loss": 0.5571079850196838, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11385384202003479, + "rewards/margins": 0.47000783681869507, + "rewards/rejected": -0.5838617086410522, + "step": 859 + }, + { + "epoch": 0.5349922239502333, + "grad_norm": 0.37003448605537415, + "learning_rate": 7.000000000000001e-06, + "log_odds_chosen": 4.985795974731445, + "log_odds_ratio": -0.15885290503501892, + "logits/chosen": 1.3111166954040527, + "logits/rejected": 1.0276559591293335, + "logps/chosen": -0.8623284697532654, + "logps/rejected": -5.377542495727539, + "loss": 0.6302, + "nll_loss": 0.6143087148666382, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08623284846544266, + "rewards/margins": 0.4515213668346405, + "rewards/rejected": -0.537754237651825, + "step": 860 + }, + { + "epoch": 0.5356143079315707, + "grad_norm": 0.8909189105033875, + "learning_rate": 6.950000000000001e-06, + "log_odds_chosen": 2.3723697662353516, + "log_odds_ratio": -0.35334068536758423, + "logits/chosen": 1.6953346729278564, + "logits/rejected": -0.16396528482437134, + "logps/chosen": -0.9004030227661133, + "logps/rejected": -2.9755284786224365, + "loss": 0.6941, + "nll_loss": 0.6587247848510742, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09004030376672745, + "rewards/margins": 0.20751255750656128, + "rewards/rejected": -0.29755285382270813, + "step": 861 + }, + { + "epoch": 0.5362363919129083, + "grad_norm": 0.2798997461795807, + "learning_rate": 6.900000000000001e-06, + "log_odds_chosen": 4.707751274108887, + "log_odds_ratio": -0.10399264097213745, + "logits/chosen": 0.29543545842170715, + "logits/rejected": -1.7914931774139404, + "logps/chosen": -0.909212589263916, + "logps/rejected": -5.118707180023193, + "loss": 0.5727, + "nll_loss": 0.5623191595077515, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09092126786708832, + "rewards/margins": 0.4209495186805725, + "rewards/rejected": -0.5118707418441772, + "step": 862 + }, + { + "epoch": 0.5368584758942457, + "grad_norm": 0.2905239462852478, + "learning_rate": 6.8500000000000005e-06, + "log_odds_chosen": 4.615797519683838, + "log_odds_ratio": -0.142097607254982, + "logits/chosen": -0.19048839807510376, + "logits/rejected": 0.7622358798980713, + "logps/chosen": -0.7061002254486084, + "logps/rejected": -4.774419784545898, + "loss": 0.422, + "nll_loss": 0.40780800580978394, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07061002403497696, + "rewards/margins": 0.40683192014694214, + "rewards/rejected": -0.4774419665336609, + "step": 863 + }, + { + "epoch": 0.5374805598755832, + "grad_norm": 0.39981669187545776, + "learning_rate": 6.800000000000001e-06, + "log_odds_chosen": 3.8930208683013916, + "log_odds_ratio": -0.18875019252300262, + "logits/chosen": 1.3811320066452026, + "logits/rejected": 0.9418545365333557, + "logps/chosen": -1.095811128616333, + "logps/rejected": -4.510628700256348, + "loss": 0.6449, + "nll_loss": 0.6260712742805481, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1095811203122139, + "rewards/margins": 0.3414817154407501, + "rewards/rejected": -0.4510628581047058, + "step": 864 + }, + { + "epoch": 0.5381026438569206, + "grad_norm": 0.41308942437171936, + "learning_rate": 6.750000000000001e-06, + "log_odds_chosen": 3.0796897411346436, + "log_odds_ratio": -0.2442033886909485, + "logits/chosen": 0.9326578974723816, + "logits/rejected": 0.1488349288702011, + "logps/chosen": -1.112588882446289, + "logps/rejected": -3.866307497024536, + "loss": 0.5677, + "nll_loss": 0.543321967124939, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11125887930393219, + "rewards/margins": 0.27537187933921814, + "rewards/rejected": -0.38663074374198914, + "step": 865 + }, + { + "epoch": 0.5387247278382582, + "grad_norm": 0.34175509214401245, + "learning_rate": 6.700000000000001e-06, + "log_odds_chosen": 5.5318708419799805, + "log_odds_ratio": -0.061005041003227234, + "logits/chosen": 1.7314218282699585, + "logits/rejected": 1.6747318506240845, + "logps/chosen": -0.7393934726715088, + "logps/rejected": -5.642154693603516, + "loss": 0.6447, + "nll_loss": 0.6386001706123352, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07393934577703476, + "rewards/margins": 0.4902760982513428, + "rewards/rejected": -0.5642154216766357, + "step": 866 + }, + { + "epoch": 0.5393468118195957, + "grad_norm": 1.9305363893508911, + "learning_rate": 6.650000000000001e-06, + "log_odds_chosen": 5.084562301635742, + "log_odds_ratio": -0.02670995518565178, + "logits/chosen": 0.5640244483947754, + "logits/rejected": -0.02540937066078186, + "logps/chosen": -1.191550374031067, + "logps/rejected": -5.851881504058838, + "loss": 0.5294, + "nll_loss": 0.5266839265823364, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11915504187345505, + "rewards/margins": 0.46603310108184814, + "rewards/rejected": -0.5851881504058838, + "step": 867 + }, + { + "epoch": 0.5399688958009331, + "grad_norm": 0.6636577248573303, + "learning_rate": 6.6e-06, + "log_odds_chosen": 4.547132968902588, + "log_odds_ratio": -0.21664825081825256, + "logits/chosen": 1.5044053792953491, + "logits/rejected": 0.3689953684806824, + "logps/chosen": -0.8013274669647217, + "logps/rejected": -4.848517417907715, + "loss": 0.6273, + "nll_loss": 0.6056653261184692, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08013273775577545, + "rewards/margins": 0.4047189950942993, + "rewards/rejected": -0.48485174775123596, + "step": 868 + }, + { + "epoch": 0.5405909797822706, + "grad_norm": 0.3548082709312439, + "learning_rate": 6.550000000000001e-06, + "log_odds_chosen": 3.8713083267211914, + "log_odds_ratio": -0.2565319836139679, + "logits/chosen": 1.0006804466247559, + "logits/rejected": -0.03945589065551758, + "logps/chosen": -0.8375179171562195, + "logps/rejected": -4.2133684158325195, + "loss": 0.6634, + "nll_loss": 0.6377798318862915, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08375179767608643, + "rewards/margins": 0.33758509159088135, + "rewards/rejected": -0.4213368892669678, + "step": 869 + }, + { + "epoch": 0.5412130637636081, + "grad_norm": 0.3238200843334198, + "learning_rate": 6.5000000000000004e-06, + "log_odds_chosen": 5.353576183319092, + "log_odds_ratio": -0.07162611186504364, + "logits/chosen": 2.206803321838379, + "logits/rejected": 1.6463418006896973, + "logps/chosen": -0.7490416765213013, + "logps/rejected": -5.4248809814453125, + "loss": 0.6596, + "nll_loss": 0.6524254083633423, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0749041736125946, + "rewards/margins": 0.46758395433425903, + "rewards/rejected": -0.5424880981445312, + "step": 870 + }, + { + "epoch": 0.5418351477449456, + "grad_norm": 1.8776357173919678, + "learning_rate": 6.45e-06, + "log_odds_chosen": 4.40454626083374, + "log_odds_ratio": -0.17675238847732544, + "logits/chosen": 2.513120412826538, + "logits/rejected": 1.0964940786361694, + "logps/chosen": -1.0388731956481934, + "logps/rejected": -5.051460266113281, + "loss": 0.7226, + "nll_loss": 0.7049591541290283, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10388731956481934, + "rewards/margins": 0.4012587368488312, + "rewards/rejected": -0.5051460266113281, + "step": 871 + }, + { + "epoch": 0.542457231726283, + "grad_norm": 0.406896710395813, + "learning_rate": 6.4000000000000006e-06, + "log_odds_chosen": 6.131252288818359, + "log_odds_ratio": -0.08929823338985443, + "logits/chosen": 2.746368885040283, + "logits/rejected": 1.540923833847046, + "logps/chosen": -0.8624634742736816, + "logps/rejected": -6.512821674346924, + "loss": 0.6874, + "nll_loss": 0.6784294843673706, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08624634891748428, + "rewards/margins": 0.5650358200073242, + "rewards/rejected": -0.6512821316719055, + "step": 872 + }, + { + "epoch": 0.5430793157076206, + "grad_norm": 0.4132915437221527, + "learning_rate": 6.35e-06, + "log_odds_chosen": 5.865504741668701, + "log_odds_ratio": -0.09850253164768219, + "logits/chosen": 0.35298776626586914, + "logits/rejected": 1.0819687843322754, + "logps/chosen": -0.8687587976455688, + "logps/rejected": -6.098135948181152, + "loss": 0.4361, + "nll_loss": 0.4262300133705139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08687587827444077, + "rewards/margins": 0.5229377150535583, + "rewards/rejected": -0.6098135709762573, + "step": 873 + }, + { + "epoch": 0.543701399688958, + "grad_norm": 0.40822353959083557, + "learning_rate": 6.300000000000001e-06, + "log_odds_chosen": 3.693990707397461, + "log_odds_ratio": -0.26884984970092773, + "logits/chosen": 1.2561935186386108, + "logits/rejected": 0.1087363213300705, + "logps/chosen": -0.9086207151412964, + "logps/rejected": -4.258871078491211, + "loss": 0.5151, + "nll_loss": 0.48824068903923035, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09086208045482635, + "rewards/margins": 0.33502498269081116, + "rewards/rejected": -0.4258870780467987, + "step": 874 + }, + { + "epoch": 0.5443234836702955, + "grad_norm": 0.3257782757282257, + "learning_rate": 6.25e-06, + "log_odds_chosen": 4.554495334625244, + "log_odds_ratio": -0.14924365282058716, + "logits/chosen": 1.54774808883667, + "logits/rejected": 1.4602476358413696, + "logps/chosen": -0.7244719862937927, + "logps/rejected": -4.493233680725098, + "loss": 0.6859, + "nll_loss": 0.6709988117218018, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07244720309972763, + "rewards/margins": 0.37687617540359497, + "rewards/rejected": -0.4493233561515808, + "step": 875 + }, + { + "epoch": 0.5449455676516329, + "grad_norm": 0.3226686120033264, + "learning_rate": 6.2e-06, + "log_odds_chosen": 3.7493808269500732, + "log_odds_ratio": -0.15756797790527344, + "logits/chosen": 1.012019395828247, + "logits/rejected": 0.5823137760162354, + "logps/chosen": -0.9175806045532227, + "logps/rejected": -4.2481865882873535, + "loss": 0.6132, + "nll_loss": 0.5974055528640747, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09175807237625122, + "rewards/margins": 0.333060622215271, + "rewards/rejected": -0.4248186945915222, + "step": 876 + }, + { + "epoch": 0.5455676516329705, + "grad_norm": 0.9020261764526367, + "learning_rate": 6.15e-06, + "log_odds_chosen": 4.887069225311279, + "log_odds_ratio": -0.03150523826479912, + "logits/chosen": 1.5572091341018677, + "logits/rejected": 0.7369846105575562, + "logps/chosen": -1.265540599822998, + "logps/rejected": -5.7456254959106445, + "loss": 0.7783, + "nll_loss": 0.7751142978668213, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12655405700206757, + "rewards/margins": 0.4480084776878357, + "rewards/rejected": -0.5745625495910645, + "step": 877 + }, + { + "epoch": 0.546189735614308, + "grad_norm": 0.33154386281967163, + "learning_rate": 6.1e-06, + "log_odds_chosen": 6.636990547180176, + "log_odds_ratio": -0.007074399385601282, + "logits/chosen": 0.43664056062698364, + "logits/rejected": -0.06868959963321686, + "logps/chosen": -1.0449284315109253, + "logps/rejected": -7.1396708488464355, + "loss": 0.4785, + "nll_loss": 0.4777759909629822, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10449284315109253, + "rewards/margins": 0.6094743013381958, + "rewards/rejected": -0.7139671444892883, + "step": 878 + }, + { + "epoch": 0.5468118195956454, + "grad_norm": 0.3189108073711395, + "learning_rate": 6.0500000000000005e-06, + "log_odds_chosen": 4.096932411193848, + "log_odds_ratio": -0.1806538999080658, + "logits/chosen": 1.3440558910369873, + "logits/rejected": 0.4599352180957794, + "logps/chosen": -0.9252468347549438, + "logps/rejected": -4.582737445831299, + "loss": 0.6193, + "nll_loss": 0.6012023687362671, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09252467751502991, + "rewards/margins": 0.3657490611076355, + "rewards/rejected": -0.4582737684249878, + "step": 879 + }, + { + "epoch": 0.5474339035769828, + "grad_norm": 0.3894885778427124, + "learning_rate": 6e-06, + "log_odds_chosen": 5.063629150390625, + "log_odds_ratio": -0.17309612035751343, + "logits/chosen": 1.391903042793274, + "logits/rejected": 0.8823519349098206, + "logps/chosen": -0.9142184257507324, + "logps/rejected": -5.5893988609313965, + "loss": 0.5591, + "nll_loss": 0.5418129563331604, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09142184257507324, + "rewards/margins": 0.46751803159713745, + "rewards/rejected": -0.5589399337768555, + "step": 880 + }, + { + "epoch": 0.5480559875583204, + "grad_norm": 0.31810715794563293, + "learning_rate": 5.95e-06, + "log_odds_chosen": 6.099935531616211, + "log_odds_ratio": -0.04295491427183151, + "logits/chosen": 0.7478195428848267, + "logits/rejected": 0.5277884006500244, + "logps/chosen": -0.9480465650558472, + "logps/rejected": -6.505160331726074, + "loss": 0.5633, + "nll_loss": 0.559039831161499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09480465203523636, + "rewards/margins": 0.5557113885879517, + "rewards/rejected": -0.6505160331726074, + "step": 881 + }, + { + "epoch": 0.5486780715396579, + "grad_norm": 0.537359893321991, + "learning_rate": 5.9e-06, + "log_odds_chosen": 2.3679771423339844, + "log_odds_ratio": -0.3640172481536865, + "logits/chosen": 1.483041763305664, + "logits/rejected": 1.1189666986465454, + "logps/chosen": -0.8394365906715393, + "logps/rejected": -2.901829957962036, + "loss": 0.6892, + "nll_loss": 0.652812123298645, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08394366502761841, + "rewards/margins": 0.20623932778835297, + "rewards/rejected": -0.2901829779148102, + "step": 882 + }, + { + "epoch": 0.5493001555209953, + "grad_norm": 0.3169862627983093, + "learning_rate": 5.850000000000001e-06, + "log_odds_chosen": 3.7267801761627197, + "log_odds_ratio": -0.34081006050109863, + "logits/chosen": 1.571112871170044, + "logits/rejected": 0.19689306616783142, + "logps/chosen": -0.8036859035491943, + "logps/rejected": -4.05713415145874, + "loss": 0.6, + "nll_loss": 0.5659441351890564, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.08036859333515167, + "rewards/margins": 0.32534483075141907, + "rewards/rejected": -0.40571340918540955, + "step": 883 + }, + { + "epoch": 0.5499222395023328, + "grad_norm": 0.4113629460334778, + "learning_rate": 5.8e-06, + "log_odds_chosen": 4.897263526916504, + "log_odds_ratio": -0.14118805527687073, + "logits/chosen": 2.258298635482788, + "logits/rejected": 0.5916818380355835, + "logps/chosen": -0.8462052345275879, + "logps/rejected": -5.26324462890625, + "loss": 0.6423, + "nll_loss": 0.6282092332839966, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08462052792310715, + "rewards/margins": 0.4417039155960083, + "rewards/rejected": -0.526324450969696, + "step": 884 + }, + { + "epoch": 0.5505443234836703, + "grad_norm": 0.42419642210006714, + "learning_rate": 5.750000000000001e-06, + "log_odds_chosen": 4.345916748046875, + "log_odds_ratio": -0.2700275182723999, + "logits/chosen": 2.6262309551239014, + "logits/rejected": 1.3854570388793945, + "logps/chosen": -1.1909689903259277, + "logps/rejected": -5.231269836425781, + "loss": 0.8041, + "nll_loss": 0.7771395444869995, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11909689754247665, + "rewards/margins": 0.4040301442146301, + "rewards/rejected": -0.5231269598007202, + "step": 885 + }, + { + "epoch": 0.5511664074650078, + "grad_norm": 0.358368843793869, + "learning_rate": 5.7000000000000005e-06, + "log_odds_chosen": 6.011335372924805, + "log_odds_ratio": -0.003882630495354533, + "logits/chosen": 0.656829297542572, + "logits/rejected": 0.7573573589324951, + "logps/chosen": -0.9341671466827393, + "logps/rejected": -6.402599334716797, + "loss": 0.5654, + "nll_loss": 0.5649951696395874, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0934167206287384, + "rewards/margins": 0.5468432307243347, + "rewards/rejected": -0.6402599811553955, + "step": 886 + }, + { + "epoch": 0.5517884914463452, + "grad_norm": 0.3700637221336365, + "learning_rate": 5.65e-06, + "log_odds_chosen": 4.019516468048096, + "log_odds_ratio": -0.26321983337402344, + "logits/chosen": 1.6410436630249023, + "logits/rejected": 1.4497946500778198, + "logps/chosen": -0.910016655921936, + "logps/rejected": -4.60683012008667, + "loss": 0.6428, + "nll_loss": 0.616430938243866, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09100165963172913, + "rewards/margins": 0.36968132853507996, + "rewards/rejected": -0.4606829881668091, + "step": 887 + }, + { + "epoch": 0.5524105754276827, + "grad_norm": 4.00311803817749, + "learning_rate": 5.600000000000001e-06, + "log_odds_chosen": 4.81396484375, + "log_odds_ratio": -0.017167825251817703, + "logits/chosen": 0.5925553441047668, + "logits/rejected": -0.003340400755405426, + "logps/chosen": -0.8698358535766602, + "logps/rejected": -5.038300514221191, + "loss": 0.582, + "nll_loss": 0.5802885890007019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0869835913181305, + "rewards/margins": 0.41684651374816895, + "rewards/rejected": -0.503830075263977, + "step": 888 + }, + { + "epoch": 0.5530326594090202, + "grad_norm": 0.9824345111846924, + "learning_rate": 5.55e-06, + "log_odds_chosen": 2.4528815746307373, + "log_odds_ratio": -0.7164135575294495, + "logits/chosen": 1.6816127300262451, + "logits/rejected": 1.278057336807251, + "logps/chosen": -1.7241687774658203, + "logps/rejected": -4.091108322143555, + "loss": 0.7123, + "nll_loss": 0.6406722068786621, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.17241688072681427, + "rewards/margins": 0.23669394850730896, + "rewards/rejected": -0.40911081433296204, + "step": 889 + }, + { + "epoch": 0.5536547433903577, + "grad_norm": 0.401115357875824, + "learning_rate": 5.500000000000001e-06, + "log_odds_chosen": 4.721898555755615, + "log_odds_ratio": -0.049223363399505615, + "logits/chosen": 1.1281979084014893, + "logits/rejected": 0.8041211366653442, + "logps/chosen": -1.3164769411087036, + "logps/rejected": -5.681437015533447, + "loss": 0.6608, + "nll_loss": 0.6558688282966614, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13164770603179932, + "rewards/margins": 0.4364960491657257, + "rewards/rejected": -0.5681437849998474, + "step": 890 + }, + { + "epoch": 0.5542768273716951, + "grad_norm": 0.41568583250045776, + "learning_rate": 5.45e-06, + "log_odds_chosen": 3.2344136238098145, + "log_odds_ratio": -0.23986506462097168, + "logits/chosen": 1.8116191625595093, + "logits/rejected": 1.3638396263122559, + "logps/chosen": -0.9154233932495117, + "logps/rejected": -3.7720108032226562, + "loss": 0.7533, + "nll_loss": 0.7292731404304504, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0915423333644867, + "rewards/margins": 0.2856587767601013, + "rewards/rejected": -0.377201110124588, + "step": 891 + }, + { + "epoch": 0.5548989113530327, + "grad_norm": 0.41260719299316406, + "learning_rate": 5.4e-06, + "log_odds_chosen": 4.115436553955078, + "log_odds_ratio": -0.22443503141403198, + "logits/chosen": 1.470842719078064, + "logits/rejected": 0.9105685353279114, + "logps/chosen": -0.9408708810806274, + "logps/rejected": -4.714263916015625, + "loss": 0.6892, + "nll_loss": 0.666793704032898, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09408708661794662, + "rewards/margins": 0.37733930349349976, + "rewards/rejected": -0.4714263677597046, + "step": 892 + }, + { + "epoch": 0.5555209953343702, + "grad_norm": 0.38643932342529297, + "learning_rate": 5.3500000000000004e-06, + "log_odds_chosen": 5.207808494567871, + "log_odds_ratio": -0.17040032148361206, + "logits/chosen": 1.891057014465332, + "logits/rejected": -0.6716984510421753, + "logps/chosen": -0.9740372896194458, + "logps/rejected": -5.78117561340332, + "loss": 0.632, + "nll_loss": 0.6149402856826782, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09740372747182846, + "rewards/margins": 0.4807139039039612, + "rewards/rejected": -0.5781176090240479, + "step": 893 + }, + { + "epoch": 0.5561430793157076, + "grad_norm": 0.6569875478744507, + "learning_rate": 5.3e-06, + "log_odds_chosen": 2.787360191345215, + "log_odds_ratio": -0.31620126962661743, + "logits/chosen": 2.0118942260742188, + "logits/rejected": 1.3410391807556152, + "logps/chosen": -0.7760646939277649, + "logps/rejected": -3.01680850982666, + "loss": 0.7015, + "nll_loss": 0.6699241399765015, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07760646939277649, + "rewards/margins": 0.2240743786096573, + "rewards/rejected": -0.3016808331012726, + "step": 894 + }, + { + "epoch": 0.5567651632970451, + "grad_norm": 1.307922601699829, + "learning_rate": 5.25e-06, + "log_odds_chosen": 3.8499703407287598, + "log_odds_ratio": -0.13547715544700623, + "logits/chosen": 1.6815910339355469, + "logits/rejected": 0.4014458656311035, + "logps/chosen": -0.8500081300735474, + "logps/rejected": -4.165833473205566, + "loss": 0.5871, + "nll_loss": 0.5735604763031006, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08500081300735474, + "rewards/margins": 0.33158254623413086, + "rewards/rejected": -0.4165833592414856, + "step": 895 + }, + { + "epoch": 0.5573872472783826, + "grad_norm": 0.37747126817703247, + "learning_rate": 5.2e-06, + "log_odds_chosen": 5.886293888092041, + "log_odds_ratio": -0.07489710301160812, + "logits/chosen": 0.7270099520683289, + "logits/rejected": -0.3343905508518219, + "logps/chosen": -0.8939152956008911, + "logps/rejected": -6.278520584106445, + "loss": 0.4426, + "nll_loss": 0.4351494312286377, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08939152956008911, + "rewards/margins": 0.5384604930877686, + "rewards/rejected": -0.6278520822525024, + "step": 896 + }, + { + "epoch": 0.5580093312597201, + "grad_norm": 0.3080809712409973, + "learning_rate": 5.15e-06, + "log_odds_chosen": 4.336005210876465, + "log_odds_ratio": -0.20657199621200562, + "logits/chosen": 1.8069411516189575, + "logits/rejected": 0.9967038631439209, + "logps/chosen": -0.8536757230758667, + "logps/rejected": -4.759378433227539, + "loss": 0.6298, + "nll_loss": 0.6091340780258179, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08536757528781891, + "rewards/margins": 0.3905702531337738, + "rewards/rejected": -0.4759378433227539, + "step": 897 + }, + { + "epoch": 0.5586314152410575, + "grad_norm": 0.4497907757759094, + "learning_rate": 5.1e-06, + "log_odds_chosen": 4.7341156005859375, + "log_odds_ratio": -0.12223678082227707, + "logits/chosen": 2.783708095550537, + "logits/rejected": 1.9557205438613892, + "logps/chosen": -0.8115973472595215, + "logps/rejected": -4.9661407470703125, + "loss": 0.7959, + "nll_loss": 0.7836655378341675, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08115973323583603, + "rewards/margins": 0.4154543876647949, + "rewards/rejected": -0.49661409854888916, + "step": 898 + }, + { + "epoch": 0.559253499222395, + "grad_norm": 0.3507234752178192, + "learning_rate": 5.050000000000001e-06, + "log_odds_chosen": 6.665820598602295, + "log_odds_ratio": -0.01345449686050415, + "logits/chosen": 1.5257761478424072, + "logits/rejected": 0.8127526044845581, + "logps/chosen": -1.231274962425232, + "logps/rejected": -7.183798313140869, + "loss": 0.6412, + "nll_loss": 0.6399024128913879, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12312749028205872, + "rewards/margins": 0.5952523946762085, + "rewards/rejected": -0.7183798551559448, + "step": 899 + }, + { + "epoch": 0.5598755832037325, + "grad_norm": 0.3265915811061859, + "learning_rate": 5e-06, + "log_odds_chosen": 5.529303550720215, + "log_odds_ratio": -0.0950424000620842, + "logits/chosen": 0.9178490042686462, + "logits/rejected": 1.3479315042495728, + "logps/chosen": -0.8234384059906006, + "logps/rejected": -5.609393119812012, + "loss": 0.6265, + "nll_loss": 0.6170258522033691, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08234383910894394, + "rewards/margins": 0.47859543561935425, + "rewards/rejected": -0.5609393119812012, + "step": 900 + }, + { + "epoch": 0.56049766718507, + "grad_norm": 0.4529457688331604, + "learning_rate": 4.950000000000001e-06, + "log_odds_chosen": 6.182548522949219, + "log_odds_ratio": -0.10109666734933853, + "logits/chosen": 1.3867193460464478, + "logits/rejected": 0.8778035640716553, + "logps/chosen": -0.8060503602027893, + "logps/rejected": -6.155576705932617, + "loss": 0.6428, + "nll_loss": 0.6326842308044434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08060503751039505, + "rewards/margins": 0.5349526405334473, + "rewards/rejected": -0.6155576705932617, + "step": 901 + }, + { + "epoch": 0.5611197511664074, + "grad_norm": 0.35935789346694946, + "learning_rate": 4.9000000000000005e-06, + "log_odds_chosen": 4.441336631774902, + "log_odds_ratio": -0.16026294231414795, + "logits/chosen": 1.9076406955718994, + "logits/rejected": 1.1897597312927246, + "logps/chosen": -0.8750441074371338, + "logps/rejected": -4.786342620849609, + "loss": 0.7553, + "nll_loss": 0.73931884765625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08750440925359726, + "rewards/margins": 0.39112988114356995, + "rewards/rejected": -0.4786342680454254, + "step": 902 + }, + { + "epoch": 0.5617418351477449, + "grad_norm": 0.29695722460746765, + "learning_rate": 4.85e-06, + "log_odds_chosen": 4.3331217765808105, + "log_odds_ratio": -0.23966997861862183, + "logits/chosen": -0.5572576522827148, + "logits/rejected": 1.0803070068359375, + "logps/chosen": -0.7401511669158936, + "logps/rejected": -4.379242420196533, + "loss": 0.4785, + "nll_loss": 0.45448851585388184, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07401511818170547, + "rewards/margins": 0.36390912532806396, + "rewards/rejected": -0.43792423605918884, + "step": 903 + }, + { + "epoch": 0.5623639191290825, + "grad_norm": 0.42112380266189575, + "learning_rate": 4.800000000000001e-06, + "log_odds_chosen": 5.218524932861328, + "log_odds_ratio": -0.054378729313611984, + "logits/chosen": 0.5482559204101562, + "logits/rejected": 0.2777423858642578, + "logps/chosen": -0.7422811985015869, + "logps/rejected": -5.270057201385498, + "loss": 0.5098, + "nll_loss": 0.5044077038764954, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07422812283039093, + "rewards/margins": 0.452777624130249, + "rewards/rejected": -0.5270057320594788, + "step": 904 + }, + { + "epoch": 0.5629860031104199, + "grad_norm": 3.2522900104522705, + "learning_rate": 4.75e-06, + "log_odds_chosen": 3.5051608085632324, + "log_odds_ratio": -0.281528502702713, + "logits/chosen": 1.1515018939971924, + "logits/rejected": 0.7810160517692566, + "logps/chosen": -0.8040460348129272, + "logps/rejected": -3.8523013591766357, + "loss": 0.6415, + "nll_loss": 0.6133573651313782, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0804046094417572, + "rewards/margins": 0.3048255145549774, + "rewards/rejected": -0.3852301239967346, + "step": 905 + }, + { + "epoch": 0.5636080870917574, + "grad_norm": 0.4139149785041809, + "learning_rate": 4.7e-06, + "log_odds_chosen": 4.3448076248168945, + "log_odds_ratio": -0.19571033120155334, + "logits/chosen": 0.6563786268234253, + "logits/rejected": -0.061872243881225586, + "logps/chosen": -0.9398016333580017, + "logps/rejected": -4.782492637634277, + "loss": 0.626, + "nll_loss": 0.6064011454582214, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09398016333580017, + "rewards/margins": 0.384269118309021, + "rewards/rejected": -0.47824928164482117, + "step": 906 + }, + { + "epoch": 0.5642301710730949, + "grad_norm": 0.3649228811264038, + "learning_rate": 4.65e-06, + "log_odds_chosen": 5.071795463562012, + "log_odds_ratio": -0.13153302669525146, + "logits/chosen": 1.0045881271362305, + "logits/rejected": 0.6343420743942261, + "logps/chosen": -0.9673879146575928, + "logps/rejected": -5.603385925292969, + "loss": 0.6296, + "nll_loss": 0.6164366602897644, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0967387929558754, + "rewards/margins": 0.4635998606681824, + "rewards/rejected": -0.5603386163711548, + "step": 907 + }, + { + "epoch": 0.5648522550544324, + "grad_norm": 0.334963858127594, + "learning_rate": 4.6e-06, + "log_odds_chosen": 4.234353065490723, + "log_odds_ratio": -0.12835365533828735, + "logits/chosen": 0.9635106325149536, + "logits/rejected": -0.14736008644104004, + "logps/chosen": -0.8886890411376953, + "logps/rejected": -4.6457037925720215, + "loss": 0.5793, + "nll_loss": 0.5664956569671631, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08886890113353729, + "rewards/margins": 0.3757014870643616, + "rewards/rejected": -0.46457037329673767, + "step": 908 + }, + { + "epoch": 0.5654743390357698, + "grad_norm": 5.453255653381348, + "learning_rate": 4.5500000000000005e-06, + "log_odds_chosen": 6.330126762390137, + "log_odds_ratio": -0.08912979811429977, + "logits/chosen": 1.8540780544281006, + "logits/rejected": 1.8941954374313354, + "logps/chosen": -1.4149324893951416, + "logps/rejected": -7.361577033996582, + "loss": 0.7808, + "nll_loss": 0.7718974351882935, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14149326086044312, + "rewards/margins": 0.5946645140647888, + "rewards/rejected": -0.7361577153205872, + "step": 909 + }, + { + "epoch": 0.5660964230171073, + "grad_norm": 0.3110488951206207, + "learning_rate": 4.5e-06, + "log_odds_chosen": 3.5994150638580322, + "log_odds_ratio": -0.239741250872612, + "logits/chosen": 1.7619941234588623, + "logits/rejected": 1.8184112310409546, + "logps/chosen": -0.855003833770752, + "logps/rejected": -4.064135551452637, + "loss": 0.6879, + "nll_loss": 0.6639161705970764, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08550038188695908, + "rewards/margins": 0.3209131360054016, + "rewards/rejected": -0.40641355514526367, + "step": 910 + }, + { + "epoch": 0.5667185069984448, + "grad_norm": 0.3939039707183838, + "learning_rate": 4.45e-06, + "log_odds_chosen": 3.505305290222168, + "log_odds_ratio": -0.22142846882343292, + "logits/chosen": 2.593437671661377, + "logits/rejected": 2.5147769451141357, + "logps/chosen": -0.9565043449401855, + "logps/rejected": -4.017549514770508, + "loss": 0.7414, + "nll_loss": 0.7192279100418091, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09565043449401855, + "rewards/margins": 0.30610448122024536, + "rewards/rejected": -0.4017549157142639, + "step": 911 + }, + { + "epoch": 0.5673405909797823, + "grad_norm": 0.3363257646560669, + "learning_rate": 4.4e-06, + "log_odds_chosen": 5.338538646697998, + "log_odds_ratio": -0.13085848093032837, + "logits/chosen": 1.9446805715560913, + "logits/rejected": 1.3729445934295654, + "logps/chosen": -0.9459335803985596, + "logps/rejected": -5.780755043029785, + "loss": 0.7294, + "nll_loss": 0.7163442969322205, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0945933610200882, + "rewards/margins": 0.48348212242126465, + "rewards/rejected": -0.5780754685401917, + "step": 912 + }, + { + "epoch": 0.5679626749611197, + "grad_norm": 0.45077571272850037, + "learning_rate": 4.35e-06, + "log_odds_chosen": 5.213942527770996, + "log_odds_ratio": -0.09788724780082703, + "logits/chosen": 2.052887201309204, + "logits/rejected": 1.9616841077804565, + "logps/chosen": -0.6288750767707825, + "logps/rejected": -5.032301902770996, + "loss": 0.6667, + "nll_loss": 0.6569293737411499, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06288750469684601, + "rewards/margins": 0.4403426945209503, + "rewards/rejected": -0.5032302141189575, + "step": 913 + }, + { + "epoch": 0.5685847589424572, + "grad_norm": 0.4367021322250366, + "learning_rate": 4.2999999999999995e-06, + "log_odds_chosen": 3.5740416049957275, + "log_odds_ratio": -0.27110692858695984, + "logits/chosen": 1.6953269243240356, + "logits/rejected": 1.7087769508361816, + "logps/chosen": -0.8259687423706055, + "logps/rejected": -3.9957141876220703, + "loss": 0.7026, + "nll_loss": 0.6754826307296753, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08259688317775726, + "rewards/margins": 0.31697455048561096, + "rewards/rejected": -0.39957141876220703, + "step": 914 + }, + { + "epoch": 0.5692068429237948, + "grad_norm": 0.3404788672924042, + "learning_rate": 4.250000000000001e-06, + "log_odds_chosen": 5.116745948791504, + "log_odds_ratio": -0.16189473867416382, + "logits/chosen": 0.7443652749061584, + "logits/rejected": 0.514636754989624, + "logps/chosen": -0.7846352458000183, + "logps/rejected": -5.240664482116699, + "loss": 0.5443, + "nll_loss": 0.528134822845459, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07846352458000183, + "rewards/margins": 0.4456029236316681, + "rewards/rejected": -0.5240664482116699, + "step": 915 + }, + { + "epoch": 0.5698289269051322, + "grad_norm": 0.27389395236968994, + "learning_rate": 4.2000000000000004e-06, + "log_odds_chosen": 6.271382808685303, + "log_odds_ratio": -0.09461876004934311, + "logits/chosen": 0.35306596755981445, + "logits/rejected": 0.12613160908222198, + "logps/chosen": -0.6129301190376282, + "logps/rejected": -5.839328765869141, + "loss": 0.4305, + "nll_loss": 0.42103177309036255, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.061293017119169235, + "rewards/margins": 0.5226399302482605, + "rewards/rejected": -0.5839329361915588, + "step": 916 + }, + { + "epoch": 0.5704510108864697, + "grad_norm": 0.43340426683425903, + "learning_rate": 4.15e-06, + "log_odds_chosen": 5.669953346252441, + "log_odds_ratio": -0.1194998174905777, + "logits/chosen": 1.3494383096694946, + "logits/rejected": 1.1074585914611816, + "logps/chosen": -0.9059891700744629, + "logps/rejected": -6.040713787078857, + "loss": 0.5614, + "nll_loss": 0.5494275093078613, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.090598925948143, + "rewards/margins": 0.5134724378585815, + "rewards/rejected": -0.6040713787078857, + "step": 917 + }, + { + "epoch": 0.5710730948678071, + "grad_norm": 0.28480619192123413, + "learning_rate": 4.1000000000000006e-06, + "log_odds_chosen": 4.810488700866699, + "log_odds_ratio": -0.24605686962604523, + "logits/chosen": 0.9984217286109924, + "logits/rejected": 0.8857402205467224, + "logps/chosen": -0.8175363540649414, + "logps/rejected": -5.161768436431885, + "loss": 0.5676, + "nll_loss": 0.5429906845092773, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08175363391637802, + "rewards/margins": 0.43442320823669434, + "rewards/rejected": -0.5161768198013306, + "step": 918 + }, + { + "epoch": 0.5716951788491447, + "grad_norm": 0.3191167414188385, + "learning_rate": 4.05e-06, + "log_odds_chosen": 5.344412803649902, + "log_odds_ratio": -0.019217826426029205, + "logits/chosen": 1.5908796787261963, + "logits/rejected": 0.3335094451904297, + "logps/chosen": -0.9169878363609314, + "logps/rejected": -5.7179670333862305, + "loss": 0.6339, + "nll_loss": 0.6319629549980164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0916987806558609, + "rewards/margins": 0.4800979793071747, + "rewards/rejected": -0.5717967748641968, + "step": 919 + }, + { + "epoch": 0.5723172628304821, + "grad_norm": 0.44434309005737305, + "learning_rate": 4.000000000000001e-06, + "log_odds_chosen": 3.852116107940674, + "log_odds_ratio": -0.28635814785957336, + "logits/chosen": 0.21245375275611877, + "logits/rejected": 1.163073182106018, + "logps/chosen": -0.973721981048584, + "logps/rejected": -4.449609756469727, + "loss": 0.5692, + "nll_loss": 0.5406039953231812, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09737220406532288, + "rewards/margins": 0.34758880734443665, + "rewards/rejected": -0.4449610114097595, + "step": 920 + }, + { + "epoch": 0.5729393468118196, + "grad_norm": 0.34492459893226624, + "learning_rate": 3.95e-06, + "log_odds_chosen": 6.800656795501709, + "log_odds_ratio": -0.003856105264276266, + "logits/chosen": 0.9772761464118958, + "logits/rejected": -0.33364957571029663, + "logps/chosen": -1.0136868953704834, + "logps/rejected": -7.332953929901123, + "loss": 0.5389, + "nll_loss": 0.5384761691093445, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10136869549751282, + "rewards/margins": 0.6319266557693481, + "rewards/rejected": -0.7332954406738281, + "step": 921 + }, + { + "epoch": 0.573561430793157, + "grad_norm": 1.2370455265045166, + "learning_rate": 3.9e-06, + "log_odds_chosen": 4.8015666007995605, + "log_odds_ratio": -0.21101605892181396, + "logits/chosen": 0.5794129371643066, + "logits/rejected": 1.2505478858947754, + "logps/chosen": -0.8675190806388855, + "logps/rejected": -5.256567478179932, + "loss": 0.5551, + "nll_loss": 0.5340374708175659, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08675190806388855, + "rewards/margins": 0.43890485167503357, + "rewards/rejected": -0.5256567597389221, + "step": 922 + }, + { + "epoch": 0.5741835147744946, + "grad_norm": 0.4660135507583618, + "learning_rate": 3.85e-06, + "log_odds_chosen": 3.216860055923462, + "log_odds_ratio": -0.39120519161224365, + "logits/chosen": 1.5594065189361572, + "logits/rejected": 1.4013652801513672, + "logps/chosen": -0.7943279147148132, + "logps/rejected": -3.688270092010498, + "loss": 0.6174, + "nll_loss": 0.5782856345176697, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07943278551101685, + "rewards/margins": 0.28939422965049744, + "rewards/rejected": -0.3688269853591919, + "step": 923 + }, + { + "epoch": 0.574805598755832, + "grad_norm": 0.6995812654495239, + "learning_rate": 3.8e-06, + "log_odds_chosen": 3.838277816772461, + "log_odds_ratio": -0.18953169882297516, + "logits/chosen": 0.29223647713661194, + "logits/rejected": -0.4534839391708374, + "logps/chosen": -1.0429164171218872, + "logps/rejected": -4.500563621520996, + "loss": 0.512, + "nll_loss": 0.4930779039859772, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1042916476726532, + "rewards/margins": 0.34576472640037537, + "rewards/rejected": -0.45005637407302856, + "step": 924 + }, + { + "epoch": 0.5754276827371695, + "grad_norm": 0.5403011441230774, + "learning_rate": 3.75e-06, + "log_odds_chosen": 6.133852958679199, + "log_odds_ratio": -0.15583519637584686, + "logits/chosen": 2.0422708988189697, + "logits/rejected": 1.8932280540466309, + "logps/chosen": -1.2914113998413086, + "logps/rejected": -7.070437431335449, + "loss": 0.6696, + "nll_loss": 0.654043972492218, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.12914115190505981, + "rewards/margins": 0.5779025554656982, + "rewards/rejected": -0.7070437073707581, + "step": 925 + }, + { + "epoch": 0.576049766718507, + "grad_norm": 0.3176095187664032, + "learning_rate": 3.7e-06, + "log_odds_chosen": 6.375709056854248, + "log_odds_ratio": -0.09541305154561996, + "logits/chosen": 1.2084145545959473, + "logits/rejected": 0.8542147874832153, + "logps/chosen": -0.7740020751953125, + "logps/rejected": -6.466667175292969, + "loss": 0.5507, + "nll_loss": 0.5411289930343628, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07740020006895065, + "rewards/margins": 0.5692665576934814, + "rewards/rejected": -0.6466667652130127, + "step": 926 + }, + { + "epoch": 0.5766718506998445, + "grad_norm": 0.2633552551269531, + "learning_rate": 3.6499999999999998e-06, + "log_odds_chosen": 6.904919624328613, + "log_odds_ratio": -0.012354625388979912, + "logits/chosen": 0.22586172819137573, + "logits/rejected": -1.0335397720336914, + "logps/chosen": -0.9746154546737671, + "logps/rejected": -7.385928153991699, + "loss": 0.4561, + "nll_loss": 0.45485904812812805, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09746154397726059, + "rewards/margins": 0.6411312818527222, + "rewards/rejected": -0.7385928630828857, + "step": 927 + }, + { + "epoch": 0.577293934681182, + "grad_norm": 0.3632732629776001, + "learning_rate": 3.6e-06, + "log_odds_chosen": 7.5079522132873535, + "log_odds_ratio": -0.090024434030056, + "logits/chosen": 1.7778668403625488, + "logits/rejected": 1.4662402868270874, + "logps/chosen": -0.7078216671943665, + "logps/rejected": -7.575026035308838, + "loss": 0.6268, + "nll_loss": 0.6177496910095215, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07078216224908829, + "rewards/margins": 0.6867204904556274, + "rewards/rejected": -0.7575026154518127, + "step": 928 + }, + { + "epoch": 0.5779160186625194, + "grad_norm": 0.29226717352867126, + "learning_rate": 3.55e-06, + "log_odds_chosen": 7.713617324829102, + "log_odds_ratio": -0.0007500970386900008, + "logits/chosen": -0.28721946477890015, + "logits/rejected": -1.3594586849212646, + "logps/chosen": -1.1380939483642578, + "logps/rejected": -8.426959991455078, + "loss": 0.445, + "nll_loss": 0.4448773264884949, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11380939930677414, + "rewards/margins": 0.728886604309082, + "rewards/rejected": -0.8426960706710815, + "step": 929 + }, + { + "epoch": 0.578538102643857, + "grad_norm": 2.12334942817688, + "learning_rate": 3.5000000000000004e-06, + "log_odds_chosen": 4.998147010803223, + "log_odds_ratio": -0.11884446442127228, + "logits/chosen": 3.089864492416382, + "logits/rejected": 2.141456365585327, + "logps/chosen": -1.126284122467041, + "logps/rejected": -5.71080207824707, + "loss": 0.9242, + "nll_loss": 0.912270188331604, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11262841522693634, + "rewards/margins": 0.4584518074989319, + "rewards/rejected": -0.571080207824707, + "step": 930 + }, + { + "epoch": 0.5791601866251944, + "grad_norm": 0.42409899830818176, + "learning_rate": 3.4500000000000004e-06, + "log_odds_chosen": 3.8276333808898926, + "log_odds_ratio": -0.16574029624462128, + "logits/chosen": 0.26170673966407776, + "logits/rejected": 0.38364577293395996, + "logps/chosen": -1.0315451622009277, + "logps/rejected": -4.483916759490967, + "loss": 0.5318, + "nll_loss": 0.5152575969696045, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10315451771020889, + "rewards/margins": 0.3452371656894684, + "rewards/rejected": -0.4483916759490967, + "step": 931 + }, + { + "epoch": 0.5797822706065319, + "grad_norm": 0.4062352776527405, + "learning_rate": 3.4000000000000005e-06, + "log_odds_chosen": 3.2291901111602783, + "log_odds_ratio": -0.24742823839187622, + "logits/chosen": 0.7725391387939453, + "logits/rejected": 0.30735743045806885, + "logps/chosen": -0.930127739906311, + "logps/rejected": -3.832090377807617, + "loss": 0.5666, + "nll_loss": 0.541907012462616, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09301277250051498, + "rewards/margins": 0.2901962697505951, + "rewards/rejected": -0.3832090198993683, + "step": 932 + }, + { + "epoch": 0.5804043545878693, + "grad_norm": 0.3979949355125427, + "learning_rate": 3.3500000000000005e-06, + "log_odds_chosen": 4.8410234451293945, + "log_odds_ratio": -0.1899186670780182, + "logits/chosen": 2.149868965148926, + "logits/rejected": 1.030944585800171, + "logps/chosen": -0.7170071601867676, + "logps/rejected": -4.952804088592529, + "loss": 0.5799, + "nll_loss": 0.5609343647956848, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07170072197914124, + "rewards/margins": 0.4235796630382538, + "rewards/rejected": -0.495280385017395, + "step": 933 + }, + { + "epoch": 0.5810264385692069, + "grad_norm": 0.3322451114654541, + "learning_rate": 3.3e-06, + "log_odds_chosen": 3.1167378425598145, + "log_odds_ratio": -0.3341616094112396, + "logits/chosen": 0.11869756132364273, + "logits/rejected": 0.63154137134552, + "logps/chosen": -0.9321353435516357, + "logps/rejected": -3.7424397468566895, + "loss": 0.5244, + "nll_loss": 0.4910022020339966, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0932135358452797, + "rewards/margins": 0.28103047609329224, + "rewards/rejected": -0.37424400448799133, + "step": 934 + }, + { + "epoch": 0.5816485225505443, + "grad_norm": 0.5389782786369324, + "learning_rate": 3.2500000000000002e-06, + "log_odds_chosen": 3.847651243209839, + "log_odds_ratio": -0.259707510471344, + "logits/chosen": 1.731050968170166, + "logits/rejected": 1.2012372016906738, + "logps/chosen": -0.8918728828430176, + "logps/rejected": -4.2864990234375, + "loss": 0.754, + "nll_loss": 0.7280244827270508, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08918728679418564, + "rewards/margins": 0.3394625782966614, + "rewards/rejected": -0.42864990234375, + "step": 935 + }, + { + "epoch": 0.5822706065318818, + "grad_norm": 0.31316882371902466, + "learning_rate": 3.2000000000000003e-06, + "log_odds_chosen": 6.7213544845581055, + "log_odds_ratio": -0.0976264625787735, + "logits/chosen": 1.4556970596313477, + "logits/rejected": 0.45087191462516785, + "logps/chosen": -0.7859498262405396, + "logps/rejected": -6.838555335998535, + "loss": 0.5728, + "nll_loss": 0.5629971623420715, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07859498262405396, + "rewards/margins": 0.6052606105804443, + "rewards/rejected": -0.6838555335998535, + "step": 936 + }, + { + "epoch": 0.5828926905132192, + "grad_norm": 0.29591137170791626, + "learning_rate": 3.1500000000000003e-06, + "log_odds_chosen": 3.940699577331543, + "log_odds_ratio": -0.2765880823135376, + "logits/chosen": 2.854828119277954, + "logits/rejected": 1.7593179941177368, + "logps/chosen": -0.7826642990112305, + "logps/rejected": -4.19375467300415, + "loss": 0.6859, + "nll_loss": 0.6582664251327515, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0782664343714714, + "rewards/margins": 0.341109037399292, + "rewards/rejected": -0.4193755090236664, + "step": 937 + }, + { + "epoch": 0.5835147744945568, + "grad_norm": 0.33533087372779846, + "learning_rate": 3.1e-06, + "log_odds_chosen": 5.005894660949707, + "log_odds_ratio": -0.02236769162118435, + "logits/chosen": 1.9137576818466187, + "logits/rejected": 2.3711225986480713, + "logps/chosen": -1.10660982131958, + "logps/rejected": -5.497869491577148, + "loss": 0.7255, + "nll_loss": 0.7232633829116821, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11066099256277084, + "rewards/margins": 0.4391259551048279, + "rewards/rejected": -0.5497869253158569, + "step": 938 + }, + { + "epoch": 0.5841368584758942, + "grad_norm": 0.36376431584358215, + "learning_rate": 3.05e-06, + "log_odds_chosen": 4.021162986755371, + "log_odds_ratio": -0.3778960406780243, + "logits/chosen": 1.345890760421753, + "logits/rejected": 0.1843852400779724, + "logps/chosen": -0.7622407674789429, + "logps/rejected": -4.160394191741943, + "loss": 0.6573, + "nll_loss": 0.6194642186164856, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07622407376766205, + "rewards/margins": 0.3398153483867645, + "rewards/rejected": -0.4160394072532654, + "step": 939 + }, + { + "epoch": 0.5847589424572317, + "grad_norm": 0.3828088939189911, + "learning_rate": 3e-06, + "log_odds_chosen": 5.327631950378418, + "log_odds_ratio": -0.0874582976102829, + "logits/chosen": 1.6233155727386475, + "logits/rejected": 1.4965178966522217, + "logps/chosen": -0.850585401058197, + "logps/rejected": -5.66487979888916, + "loss": 0.6942, + "nll_loss": 0.6854044795036316, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0850585475564003, + "rewards/margins": 0.48142948746681213, + "rewards/rejected": -0.5664880275726318, + "step": 940 + }, + { + "epoch": 0.5853810264385692, + "grad_norm": 0.5152103900909424, + "learning_rate": 2.95e-06, + "log_odds_chosen": 2.946143388748169, + "log_odds_ratio": -0.3106565773487091, + "logits/chosen": 1.6423062086105347, + "logits/rejected": -0.19852420687675476, + "logps/chosen": -0.9775319695472717, + "logps/rejected": -3.6458029747009277, + "loss": 0.6133, + "nll_loss": 0.5822546482086182, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09775320440530777, + "rewards/margins": 0.2668271064758301, + "rewards/rejected": -0.36458033323287964, + "step": 941 + }, + { + "epoch": 0.5860031104199067, + "grad_norm": 0.38813576102256775, + "learning_rate": 2.9e-06, + "log_odds_chosen": 6.363837242126465, + "log_odds_ratio": -0.13211406767368317, + "logits/chosen": 0.037667542695999146, + "logits/rejected": 0.7557448744773865, + "logps/chosen": -0.7755674123764038, + "logps/rejected": -6.5638628005981445, + "loss": 0.4901, + "nll_loss": 0.4769030809402466, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07755675166845322, + "rewards/margins": 0.5788295269012451, + "rewards/rejected": -0.6563862562179565, + "step": 942 + }, + { + "epoch": 0.5866251944012442, + "grad_norm": 0.5654268264770508, + "learning_rate": 2.8500000000000002e-06, + "log_odds_chosen": 4.809700012207031, + "log_odds_ratio": -0.0758974701166153, + "logits/chosen": 1.1682859659194946, + "logits/rejected": 0.6298458576202393, + "logps/chosen": -0.9121124744415283, + "logps/rejected": -5.2214860916137695, + "loss": 0.5784, + "nll_loss": 0.5708357095718384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09121125191450119, + "rewards/margins": 0.4309373199939728, + "rewards/rejected": -0.522148609161377, + "step": 943 + }, + { + "epoch": 0.5872472783825816, + "grad_norm": 0.38585397601127625, + "learning_rate": 2.8000000000000003e-06, + "log_odds_chosen": 5.488147735595703, + "log_odds_ratio": -0.2799147367477417, + "logits/chosen": 2.2300140857696533, + "logits/rejected": 1.4071381092071533, + "logps/chosen": -0.9285607933998108, + "logps/rejected": -6.072427749633789, + "loss": 0.7492, + "nll_loss": 0.7212356925010681, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09285607933998108, + "rewards/margins": 0.5143867135047913, + "rewards/rejected": -0.60724276304245, + "step": 944 + }, + { + "epoch": 0.5878693623639192, + "grad_norm": 0.3419051170349121, + "learning_rate": 2.7500000000000004e-06, + "log_odds_chosen": 6.435153484344482, + "log_odds_ratio": -0.0760195329785347, + "logits/chosen": 0.642075777053833, + "logits/rejected": -0.014084309339523315, + "logps/chosen": -1.5039077997207642, + "logps/rejected": -7.517624855041504, + "loss": 0.3377, + "nll_loss": 0.3301193416118622, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15039077401161194, + "rewards/margins": 0.601371705532074, + "rewards/rejected": -0.7517625093460083, + "step": 945 + }, + { + "epoch": 0.5884914463452566, + "grad_norm": 0.9480850696563721, + "learning_rate": 2.7e-06, + "log_odds_chosen": 6.029423713684082, + "log_odds_ratio": -0.18862660229206085, + "logits/chosen": 1.9027588367462158, + "logits/rejected": 1.7689592838287354, + "logps/chosen": -0.8314551711082458, + "logps/rejected": -6.228260517120361, + "loss": 0.555, + "nll_loss": 0.5361568927764893, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08314551413059235, + "rewards/margins": 0.539680540561676, + "rewards/rejected": -0.622826099395752, + "step": 946 + }, + { + "epoch": 0.5891135303265941, + "grad_norm": 0.38399749994277954, + "learning_rate": 2.65e-06, + "log_odds_chosen": 5.510779857635498, + "log_odds_ratio": -0.11834244430065155, + "logits/chosen": 0.9599204063415527, + "logits/rejected": 0.9103103280067444, + "logps/chosen": -0.8163239359855652, + "logps/rejected": -5.78401517868042, + "loss": 0.4977, + "nll_loss": 0.48582959175109863, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08163239061832428, + "rewards/margins": 0.49676913022994995, + "rewards/rejected": -0.578401505947113, + "step": 947 + }, + { + "epoch": 0.5897356143079315, + "grad_norm": 0.25352492928504944, + "learning_rate": 2.6e-06, + "log_odds_chosen": 6.789501190185547, + "log_odds_ratio": -0.05249481275677681, + "logits/chosen": 0.444355309009552, + "logits/rejected": 1.3890902996063232, + "logps/chosen": -0.7576724886894226, + "logps/rejected": -6.7853617668151855, + "loss": 0.4709, + "nll_loss": 0.465690553188324, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07576724886894226, + "rewards/margins": 0.6027689576148987, + "rewards/rejected": -0.6785361766815186, + "step": 948 + }, + { + "epoch": 0.5903576982892691, + "grad_norm": 0.6953301429748535, + "learning_rate": 2.55e-06, + "log_odds_chosen": 4.289721488952637, + "log_odds_ratio": -0.22087326645851135, + "logits/chosen": 2.951247215270996, + "logits/rejected": 1.5472073554992676, + "logps/chosen": -0.7607182264328003, + "logps/rejected": -4.448215007781982, + "loss": 0.7362, + "nll_loss": 0.7141574621200562, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07607182115316391, + "rewards/margins": 0.3687496781349182, + "rewards/rejected": -0.4448215365409851, + "step": 949 + }, + { + "epoch": 0.5909797822706065, + "grad_norm": 0.3470051884651184, + "learning_rate": 2.5e-06, + "log_odds_chosen": 7.393134117126465, + "log_odds_ratio": -0.011001640930771828, + "logits/chosen": 2.181663990020752, + "logits/rejected": 1.1673696041107178, + "logps/chosen": -0.728095531463623, + "logps/rejected": -7.325507164001465, + "loss": 0.674, + "nll_loss": 0.672852635383606, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07280955463647842, + "rewards/margins": 0.6597411632537842, + "rewards/rejected": -0.7325507402420044, + "step": 950 + }, + { + "epoch": 0.591601866251944, + "grad_norm": 0.6204577088356018, + "learning_rate": 2.4500000000000003e-06, + "log_odds_chosen": 3.74992299079895, + "log_odds_ratio": -0.24129045009613037, + "logits/chosen": 1.1197046041488647, + "logits/rejected": 0.7634965181350708, + "logps/chosen": -1.1183922290802002, + "logps/rejected": -4.537961959838867, + "loss": 0.556, + "nll_loss": 0.5318636894226074, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11183921992778778, + "rewards/margins": 0.3419570028781891, + "rewards/rejected": -0.45379623770713806, + "step": 951 + }, + { + "epoch": 0.5922239502332814, + "grad_norm": 0.7573308944702148, + "learning_rate": 2.4000000000000003e-06, + "log_odds_chosen": 3.447063684463501, + "log_odds_ratio": -0.21511992812156677, + "logits/chosen": 1.8121840953826904, + "logits/rejected": 0.5099284648895264, + "logps/chosen": -0.9625261425971985, + "logps/rejected": -4.040431022644043, + "loss": 0.6615, + "nll_loss": 0.6400207877159119, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09625261276960373, + "rewards/margins": 0.30779051780700684, + "rewards/rejected": -0.40404313802719116, + "step": 952 + }, + { + "epoch": 0.592846034214619, + "grad_norm": 0.3285903036594391, + "learning_rate": 2.35e-06, + "log_odds_chosen": 6.717590808868408, + "log_odds_ratio": -0.002201066818088293, + "logits/chosen": 1.8004734516143799, + "logits/rejected": 0.05097063630819321, + "logps/chosen": -0.9343357086181641, + "logps/rejected": -7.141386032104492, + "loss": 0.5761, + "nll_loss": 0.5759262442588806, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09343355894088745, + "rewards/margins": 0.6207050085067749, + "rewards/rejected": -0.7141385674476624, + "step": 953 + }, + { + "epoch": 0.5934681181959565, + "grad_norm": 0.35679325461387634, + "learning_rate": 2.3e-06, + "log_odds_chosen": 5.247450828552246, + "log_odds_ratio": -0.25419700145721436, + "logits/chosen": 2.1967992782592773, + "logits/rejected": 1.1378300189971924, + "logps/chosen": -0.8751773834228516, + "logps/rejected": -5.667536735534668, + "loss": 0.7373, + "nll_loss": 0.7118582725524902, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08751773089170456, + "rewards/margins": 0.4792359173297882, + "rewards/rejected": -0.566753625869751, + "step": 954 + }, + { + "epoch": 0.5940902021772939, + "grad_norm": 0.24915580451488495, + "learning_rate": 2.25e-06, + "log_odds_chosen": 4.880882263183594, + "log_odds_ratio": -0.20119339227676392, + "logits/chosen": 0.1770033836364746, + "logits/rejected": 0.6878756284713745, + "logps/chosen": -0.9567283391952515, + "logps/rejected": -5.4595465660095215, + "loss": 0.4933, + "nll_loss": 0.4731942415237427, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09567283093929291, + "rewards/margins": 0.4502818286418915, + "rewards/rejected": -0.5459545850753784, + "step": 955 + }, + { + "epoch": 0.5947122861586314, + "grad_norm": 1.7730331420898438, + "learning_rate": 2.2e-06, + "log_odds_chosen": 5.833465099334717, + "log_odds_ratio": -0.09857569634914398, + "logits/chosen": 2.1895477771759033, + "logits/rejected": 2.4121108055114746, + "logps/chosen": -1.1158063411712646, + "logps/rejected": -6.559569358825684, + "loss": 0.7713, + "nll_loss": 0.7614297866821289, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11158062517642975, + "rewards/margins": 0.5443763732910156, + "rewards/rejected": -0.6559569835662842, + "step": 956 + }, + { + "epoch": 0.5953343701399689, + "grad_norm": 0.44484496116638184, + "learning_rate": 2.1499999999999997e-06, + "log_odds_chosen": 8.722646713256836, + "log_odds_ratio": -0.003650566330179572, + "logits/chosen": 2.142489433288574, + "logits/rejected": 1.7523647546768188, + "logps/chosen": -0.7479883432388306, + "logps/rejected": -8.673405647277832, + "loss": 0.7162, + "nll_loss": 0.715814471244812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0747988373041153, + "rewards/margins": 0.7925417423248291, + "rewards/rejected": -0.8673405647277832, + "step": 957 + }, + { + "epoch": 0.5959564541213064, + "grad_norm": 0.38487762212753296, + "learning_rate": 2.1000000000000002e-06, + "log_odds_chosen": 4.871744155883789, + "log_odds_ratio": -0.34087318181991577, + "logits/chosen": 2.5661439895629883, + "logits/rejected": 0.9334918260574341, + "logps/chosen": -0.6973855495452881, + "logps/rejected": -5.2056474685668945, + "loss": 0.5733, + "nll_loss": 0.5392202138900757, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06973855197429657, + "rewards/margins": 0.4508262276649475, + "rewards/rejected": -0.5205647349357605, + "step": 958 + }, + { + "epoch": 0.5965785381026438, + "grad_norm": 0.3092884123325348, + "learning_rate": 2.0500000000000003e-06, + "log_odds_chosen": 7.264720916748047, + "log_odds_ratio": -0.0031101834028959274, + "logits/chosen": 1.385936975479126, + "logits/rejected": 0.4052692949771881, + "logps/chosen": -0.8156126737594604, + "logps/rejected": -7.355830669403076, + "loss": 0.5032, + "nll_loss": 0.5028782486915588, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08156126737594604, + "rewards/margins": 0.6540217995643616, + "rewards/rejected": -0.7355830669403076, + "step": 959 + }, + { + "epoch": 0.5972006220839814, + "grad_norm": 0.5304663777351379, + "learning_rate": 2.0000000000000003e-06, + "log_odds_chosen": 5.928452491760254, + "log_odds_ratio": -0.09689196944236755, + "logits/chosen": 1.602445125579834, + "logits/rejected": 0.8351498246192932, + "logps/chosen": -1.78778076171875, + "logps/rejected": -7.4334940910339355, + "loss": 0.7022, + "nll_loss": 0.6925529837608337, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.17877808213233948, + "rewards/margins": 0.5645713806152344, + "rewards/rejected": -0.7433494329452515, + "step": 960 + }, + { + "epoch": 0.5978227060653188, + "grad_norm": 0.4608170986175537, + "learning_rate": 1.95e-06, + "log_odds_chosen": 4.951751232147217, + "log_odds_ratio": -0.16974444687366486, + "logits/chosen": 0.48448848724365234, + "logits/rejected": 0.6948592662811279, + "logps/chosen": -0.8525481820106506, + "logps/rejected": -5.083009719848633, + "loss": 0.4455, + "nll_loss": 0.428507924079895, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08525481820106506, + "rewards/margins": 0.42304617166519165, + "rewards/rejected": -0.5083009600639343, + "step": 961 + }, + { + "epoch": 0.5984447900466563, + "grad_norm": 0.33842310309410095, + "learning_rate": 1.9e-06, + "log_odds_chosen": 6.945926666259766, + "log_odds_ratio": -0.14915025234222412, + "logits/chosen": 1.039058804512024, + "logits/rejected": 1.184691309928894, + "logps/chosen": -0.9467121958732605, + "logps/rejected": -7.190241813659668, + "loss": 0.5227, + "nll_loss": 0.5077849626541138, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09467122703790665, + "rewards/margins": 0.6243529915809631, + "rewards/rejected": -0.7190241813659668, + "step": 962 + }, + { + "epoch": 0.5990668740279937, + "grad_norm": 0.7515162229537964, + "learning_rate": 1.85e-06, + "log_odds_chosen": 4.541479110717773, + "log_odds_ratio": -0.22119711339473724, + "logits/chosen": 0.734094500541687, + "logits/rejected": 0.07864078134298325, + "logps/chosen": -0.8313609957695007, + "logps/rejected": -4.912501811981201, + "loss": 0.551, + "nll_loss": 0.5288982391357422, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08313610404729843, + "rewards/margins": 0.40811410546302795, + "rewards/rejected": -0.4912501573562622, + "step": 963 + }, + { + "epoch": 0.5996889580093313, + "grad_norm": 0.3785715401172638, + "learning_rate": 1.8e-06, + "log_odds_chosen": 6.697405815124512, + "log_odds_ratio": -0.020956946536898613, + "logits/chosen": 1.6168622970581055, + "logits/rejected": 1.023451566696167, + "logps/chosen": -0.6088523864746094, + "logps/rejected": -6.411888599395752, + "loss": 0.567, + "nll_loss": 0.5649264454841614, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.060885243117809296, + "rewards/margins": 0.5803036689758301, + "rewards/rejected": -0.6411888599395752, + "step": 964 + }, + { + "epoch": 0.6003110419906688, + "grad_norm": 0.32801535725593567, + "learning_rate": 1.7500000000000002e-06, + "log_odds_chosen": 7.661484241485596, + "log_odds_ratio": -0.010182654485106468, + "logits/chosen": 2.53751277923584, + "logits/rejected": 1.5586211681365967, + "logps/chosen": -0.7776228189468384, + "logps/rejected": -7.800995826721191, + "loss": 0.6569, + "nll_loss": 0.6558878421783447, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07776228338479996, + "rewards/margins": 0.7023372650146484, + "rewards/rejected": -0.7800995707511902, + "step": 965 + }, + { + "epoch": 0.6009331259720062, + "grad_norm": 4.966485500335693, + "learning_rate": 1.7000000000000002e-06, + "log_odds_chosen": 4.641650676727295, + "log_odds_ratio": -0.21231749653816223, + "logits/chosen": 1.5193026065826416, + "logits/rejected": 0.49554136395454407, + "logps/chosen": -0.9282476305961609, + "logps/rejected": -5.024073123931885, + "loss": 0.68, + "nll_loss": 0.6587827205657959, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09282475709915161, + "rewards/margins": 0.40958255529403687, + "rewards/rejected": -0.5024073123931885, + "step": 966 + }, + { + "epoch": 0.6015552099533437, + "grad_norm": 0.49166029691696167, + "learning_rate": 1.65e-06, + "log_odds_chosen": 4.773428916931152, + "log_odds_ratio": -0.17224228382110596, + "logits/chosen": 1.6243770122528076, + "logits/rejected": 1.5114920139312744, + "logps/chosen": -0.9838494062423706, + "logps/rejected": -5.343306541442871, + "loss": 0.5384, + "nll_loss": 0.5212177634239197, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09838493913412094, + "rewards/margins": 0.43594568967819214, + "rewards/rejected": -0.5343306064605713, + "step": 967 + }, + { + "epoch": 0.6021772939346812, + "grad_norm": 0.3038322329521179, + "learning_rate": 1.6000000000000001e-06, + "log_odds_chosen": 6.636908054351807, + "log_odds_ratio": -0.11172385513782501, + "logits/chosen": -0.003827810287475586, + "logits/rejected": 0.5097255706787109, + "logps/chosen": -0.8371415138244629, + "logps/rejected": -6.972446918487549, + "loss": 0.4479, + "nll_loss": 0.4367726743221283, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08371415734291077, + "rewards/margins": 0.6135305166244507, + "rewards/rejected": -0.6972447037696838, + "step": 968 + }, + { + "epoch": 0.6027993779160187, + "grad_norm": 0.42850571870803833, + "learning_rate": 1.55e-06, + "log_odds_chosen": 4.930295944213867, + "log_odds_ratio": -0.054657693952322006, + "logits/chosen": 1.7624989748001099, + "logits/rejected": 1.1231470108032227, + "logps/chosen": -0.9003041982650757, + "logps/rejected": -5.306840419769287, + "loss": 0.6412, + "nll_loss": 0.6357303857803345, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09003043174743652, + "rewards/margins": 0.44065362215042114, + "rewards/rejected": -0.5306840538978577, + "step": 969 + }, + { + "epoch": 0.6034214618973561, + "grad_norm": 0.33446815609931946, + "learning_rate": 1.5e-06, + "log_odds_chosen": 5.287704944610596, + "log_odds_ratio": -0.20487284660339355, + "logits/chosen": 1.4688787460327148, + "logits/rejected": 1.6766948699951172, + "logps/chosen": -0.9011386036872864, + "logps/rejected": -5.782619476318359, + "loss": 0.6234, + "nll_loss": 0.6029089689254761, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09011386334896088, + "rewards/margins": 0.4881480932235718, + "rewards/rejected": -0.5782619118690491, + "step": 970 + }, + { + "epoch": 0.6040435458786936, + "grad_norm": 5.544436454772949, + "learning_rate": 1.45e-06, + "log_odds_chosen": 6.159671783447266, + "log_odds_ratio": -0.40772995352745056, + "logits/chosen": 1.4968130588531494, + "logits/rejected": 2.0050668716430664, + "logps/chosen": -0.9551042914390564, + "logps/rejected": -6.332335472106934, + "loss": 0.7724, + "nll_loss": 0.7315971255302429, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.09551043063402176, + "rewards/margins": 0.5377230644226074, + "rewards/rejected": -0.6332335472106934, + "step": 971 + }, + { + "epoch": 0.6046656298600311, + "grad_norm": 0.6907927393913269, + "learning_rate": 1.4000000000000001e-06, + "log_odds_chosen": 4.74246883392334, + "log_odds_ratio": -0.3178080916404724, + "logits/chosen": 0.3425654172897339, + "logits/rejected": 1.6831358671188354, + "logps/chosen": -1.2201842069625854, + "logps/rejected": -5.71635627746582, + "loss": 0.6642, + "nll_loss": 0.6323726177215576, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.12201841175556183, + "rewards/margins": 0.4496172368526459, + "rewards/rejected": -0.5716356039047241, + "step": 972 + }, + { + "epoch": 0.6052877138413686, + "grad_norm": 0.3379632830619812, + "learning_rate": 1.35e-06, + "log_odds_chosen": 7.221384525299072, + "log_odds_ratio": -0.11525887250900269, + "logits/chosen": 0.7215989828109741, + "logits/rejected": 0.8484539985656738, + "logps/chosen": -0.808670163154602, + "logps/rejected": -7.426860809326172, + "loss": 0.5458, + "nll_loss": 0.5342952609062195, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08086702227592468, + "rewards/margins": 0.6618191003799438, + "rewards/rejected": -0.7426860332489014, + "step": 973 + }, + { + "epoch": 0.605909797822706, + "grad_norm": 0.3536883294582367, + "learning_rate": 1.3e-06, + "log_odds_chosen": 5.5262041091918945, + "log_odds_ratio": -0.06627706438302994, + "logits/chosen": -0.3007934093475342, + "logits/rejected": -0.25916212797164917, + "logps/chosen": -0.7046493291854858, + "logps/rejected": -5.4488701820373535, + "loss": 0.3994, + "nll_loss": 0.39276355504989624, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07046493142843246, + "rewards/margins": 0.4744220972061157, + "rewards/rejected": -0.5448870062828064, + "step": 974 + }, + { + "epoch": 0.6065318818040435, + "grad_norm": 1.1498117446899414, + "learning_rate": 1.25e-06, + "log_odds_chosen": 4.309863090515137, + "log_odds_ratio": -0.1867905706167221, + "logits/chosen": -1.1799075603485107, + "logits/rejected": -1.1014156341552734, + "logps/chosen": -1.2807620763778687, + "logps/rejected": -5.209968566894531, + "loss": 0.5001, + "nll_loss": 0.4814422130584717, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1280762106180191, + "rewards/margins": 0.3929206430912018, + "rewards/rejected": -0.5209968090057373, + "step": 975 + }, + { + "epoch": 0.6071539657853811, + "grad_norm": 0.7061277627944946, + "learning_rate": 1.2000000000000002e-06, + "log_odds_chosen": 6.085260391235352, + "log_odds_ratio": -0.16170816123485565, + "logits/chosen": 1.1271741390228271, + "logits/rejected": 1.259488821029663, + "logps/chosen": -0.8908754587173462, + "logps/rejected": -6.308341026306152, + "loss": 0.697, + "nll_loss": 0.6808428764343262, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.08908754587173462, + "rewards/margins": 0.5417464971542358, + "rewards/rejected": -0.6308341026306152, + "step": 976 + }, + { + "epoch": 0.6077760497667185, + "grad_norm": 0.4019508957862854, + "learning_rate": 1.15e-06, + "log_odds_chosen": 5.367095470428467, + "log_odds_ratio": -0.06431747227907181, + "logits/chosen": 2.3293800354003906, + "logits/rejected": 1.6834604740142822, + "logps/chosen": -0.7575269937515259, + "logps/rejected": -5.442158222198486, + "loss": 0.6577, + "nll_loss": 0.6512343883514404, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07575269788503647, + "rewards/margins": 0.46846315264701843, + "rewards/rejected": -0.5442157983779907, + "step": 977 + }, + { + "epoch": 0.608398133748056, + "grad_norm": 4.2344136238098145, + "learning_rate": 1.1e-06, + "log_odds_chosen": 5.4656572341918945, + "log_odds_ratio": -0.1600266396999359, + "logits/chosen": 1.5850176811218262, + "logits/rejected": 1.394923448562622, + "logps/chosen": -0.9550702571868896, + "logps/rejected": -5.794343948364258, + "loss": 0.7159, + "nll_loss": 0.6998506188392639, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.09550703316926956, + "rewards/margins": 0.4839273989200592, + "rewards/rejected": -0.5794344544410706, + "step": 978 + }, + { + "epoch": 0.6090202177293935, + "grad_norm": 0.49649715423583984, + "learning_rate": 1.0500000000000001e-06, + "log_odds_chosen": 6.332185745239258, + "log_odds_ratio": -0.06125331670045853, + "logits/chosen": 1.4913352727890015, + "logits/rejected": 1.0419901609420776, + "logps/chosen": -1.0030255317687988, + "logps/rejected": -6.77717399597168, + "loss": 0.5802, + "nll_loss": 0.5740865468978882, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1003025621175766, + "rewards/margins": 0.577414870262146, + "rewards/rejected": -0.6777174472808838, + "step": 979 + }, + { + "epoch": 0.609642301710731, + "grad_norm": 0.5118820667266846, + "learning_rate": 1.0000000000000002e-06, + "log_odds_chosen": 5.745312213897705, + "log_odds_ratio": -0.14600278437137604, + "logits/chosen": -0.29222479462623596, + "logits/rejected": 0.4791012704372406, + "logps/chosen": -1.1164146661758423, + "logps/rejected": -6.352675914764404, + "loss": 0.5329, + "nll_loss": 0.5183060765266418, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.11164146661758423, + "rewards/margins": 0.5236261487007141, + "rewards/rejected": -0.6352676153182983, + "step": 980 + }, + { + "epoch": 0.6102643856920684, + "grad_norm": 0.36166447401046753, + "learning_rate": 9.5e-07, + "log_odds_chosen": 4.485109329223633, + "log_odds_ratio": -0.2791633903980255, + "logits/chosen": 1.925029993057251, + "logits/rejected": 1.1006145477294922, + "logps/chosen": -1.005967617034912, + "logps/rejected": -5.209171295166016, + "loss": 0.6741, + "nll_loss": 0.6461948156356812, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.10059677064418793, + "rewards/margins": 0.42032039165496826, + "rewards/rejected": -0.5209171772003174, + "step": 981 + }, + { + "epoch": 0.6108864696734059, + "grad_norm": 0.3162643313407898, + "learning_rate": 9e-07, + "log_odds_chosen": 6.980396747589111, + "log_odds_ratio": -0.0031216649804264307, + "logits/chosen": 0.3812088370323181, + "logits/rejected": 0.4569092392921448, + "logps/chosen": -1.1486375331878662, + "logps/rejected": -7.6883745193481445, + "loss": 0.5023, + "nll_loss": 0.5020057559013367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11486375331878662, + "rewards/margins": 0.6539736986160278, + "rewards/rejected": -0.7688374519348145, + "step": 982 + }, + { + "epoch": 0.6115085536547434, + "grad_norm": 0.5131247043609619, + "learning_rate": 8.500000000000001e-07, + "log_odds_chosen": 6.256967544555664, + "log_odds_ratio": -0.09810786694288254, + "logits/chosen": 1.9399669170379639, + "logits/rejected": 1.8002293109893799, + "logps/chosen": -0.7033792734146118, + "logps/rejected": -6.037574768066406, + "loss": 0.7078, + "nll_loss": 0.6979826092720032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0703379288315773, + "rewards/margins": 0.5334195494651794, + "rewards/rejected": -0.6037575006484985, + "step": 983 + }, + { + "epoch": 0.6121306376360809, + "grad_norm": 0.3207487165927887, + "learning_rate": 8.000000000000001e-07, + "log_odds_chosen": 6.562335968017578, + "log_odds_ratio": -0.012317328713834286, + "logits/chosen": 2.2676196098327637, + "logits/rejected": 0.9253313541412354, + "logps/chosen": -1.3998887538909912, + "logps/rejected": -7.568476676940918, + "loss": 0.6606, + "nll_loss": 0.6593425869941711, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13998886942863464, + "rewards/margins": 0.6168588399887085, + "rewards/rejected": -0.756847620010376, + "step": 984 + }, + { + "epoch": 0.6127527216174183, + "grad_norm": 1.0857791900634766, + "learning_rate": 7.5e-07, + "log_odds_chosen": 6.333230018615723, + "log_odds_ratio": -0.10016593337059021, + "logits/chosen": -0.028537094593048096, + "logits/rejected": -0.48187828063964844, + "logps/chosen": -1.0475420951843262, + "logps/rejected": -6.9887003898620605, + "loss": 0.4935, + "nll_loss": 0.4835060238838196, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.10475420206785202, + "rewards/margins": 0.5941158533096313, + "rewards/rejected": -0.698870062828064, + "step": 985 + }, + { + "epoch": 0.6133748055987558, + "grad_norm": 0.2819894254207611, + "learning_rate": 7.000000000000001e-07, + "log_odds_chosen": 8.16779899597168, + "log_odds_ratio": -0.0010539994109421968, + "logits/chosen": 0.38561496138572693, + "logits/rejected": 0.6741596460342407, + "logps/chosen": -0.7265850305557251, + "logps/rejected": -8.145223617553711, + "loss": 0.5776, + "nll_loss": 0.5774969458580017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07265850901603699, + "rewards/margins": 0.7418637871742249, + "rewards/rejected": -0.8145222663879395, + "step": 986 + }, + { + "epoch": 0.6139968895800934, + "grad_norm": 0.3129705488681793, + "learning_rate": 6.5e-07, + "log_odds_chosen": 6.294776439666748, + "log_odds_ratio": -0.01629718951880932, + "logits/chosen": 0.7048096656799316, + "logits/rejected": 0.23704242706298828, + "logps/chosen": -0.9289849400520325, + "logps/rejected": -6.6981096267700195, + "loss": 0.5134, + "nll_loss": 0.5117835402488708, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09289849549531937, + "rewards/margins": 0.5769124627113342, + "rewards/rejected": -0.669810950756073, + "step": 987 + }, + { + "epoch": 0.6146189735614308, + "grad_norm": 0.3183548152446747, + "learning_rate": 6.000000000000001e-07, + "log_odds_chosen": 6.734692573547363, + "log_odds_ratio": -0.03981401398777962, + "logits/chosen": 0.0914728045463562, + "logits/rejected": -0.8617876172065735, + "logps/chosen": -1.0856958627700806, + "logps/rejected": -7.369524955749512, + "loss": 0.4957, + "nll_loss": 0.49168896675109863, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10856959223747253, + "rewards/margins": 0.6283829212188721, + "rewards/rejected": -0.736952543258667, + "step": 988 + }, + { + "epoch": 0.6152410575427683, + "grad_norm": 0.6235529184341431, + "learning_rate": 5.5e-07, + "log_odds_chosen": 6.597626686096191, + "log_odds_ratio": -0.0906713604927063, + "logits/chosen": 3.484645128250122, + "logits/rejected": 3.2487385272979736, + "logps/chosen": -0.6023905873298645, + "logps/rejected": -6.2580156326293945, + "loss": 0.875, + "nll_loss": 0.8659148812294006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06023906171321869, + "rewards/margins": 0.5655625462532043, + "rewards/rejected": -0.6258015632629395, + "step": 989 + }, + { + "epoch": 0.6158631415241057, + "grad_norm": 0.8194119334220886, + "learning_rate": 5.000000000000001e-07, + "log_odds_chosen": 4.316708087921143, + "log_odds_ratio": -0.18914556503295898, + "logits/chosen": 3.4032726287841797, + "logits/rejected": 2.219083309173584, + "logps/chosen": -0.8312450647354126, + "logps/rejected": -4.661098957061768, + "loss": 0.8722, + "nll_loss": 0.8533015251159668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08312450349330902, + "rewards/margins": 0.3829854130744934, + "rewards/rejected": -0.4661099314689636, + "step": 990 + }, + { + "epoch": 0.6164852255054433, + "grad_norm": 1.9890536069869995, + "learning_rate": 4.5e-07, + "log_odds_chosen": 4.387207984924316, + "log_odds_ratio": -0.2528379559516907, + "logits/chosen": 2.1739017963409424, + "logits/rejected": 2.1724283695220947, + "logps/chosen": -1.1620302200317383, + "logps/rejected": -5.209095478057861, + "loss": 0.8117, + "nll_loss": 0.7864143252372742, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.11620301753282547, + "rewards/margins": 0.40470650792121887, + "rewards/rejected": -0.5209095478057861, + "step": 991 + }, + { + "epoch": 0.6171073094867807, + "grad_norm": 0.3827112317085266, + "learning_rate": 4.0000000000000003e-07, + "log_odds_chosen": 3.7871646881103516, + "log_odds_ratio": -0.2918343245983124, + "logits/chosen": 0.2715454697608948, + "logits/rejected": 1.047527551651001, + "logps/chosen": -0.6410025358200073, + "logps/rejected": -3.8774826526641846, + "loss": 0.5016, + "nll_loss": 0.47238293290138245, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.06410025805234909, + "rewards/margins": 0.32364803552627563, + "rewards/rejected": -0.38774827122688293, + "step": 992 + }, + { + "epoch": 0.6177293934681182, + "grad_norm": 0.40105417370796204, + "learning_rate": 3.5000000000000004e-07, + "log_odds_chosen": 3.676414966583252, + "log_odds_ratio": -0.37624049186706543, + "logits/chosen": 2.6079487800598145, + "logits/rejected": 1.3168003559112549, + "logps/chosen": -0.7987608313560486, + "logps/rejected": -4.06448221206665, + "loss": 0.784, + "nll_loss": 0.7463983297348022, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07987608015537262, + "rewards/margins": 0.32657214999198914, + "rewards/rejected": -0.40644824504852295, + "step": 993 + }, + { + "epoch": 0.6183514774494556, + "grad_norm": 0.5913990139961243, + "learning_rate": 3.0000000000000004e-07, + "log_odds_chosen": 5.702366828918457, + "log_odds_ratio": -0.13208632171154022, + "logits/chosen": 0.6971375346183777, + "logits/rejected": 0.4381665587425232, + "logps/chosen": -0.904596209526062, + "logps/rejected": -6.130751609802246, + "loss": 0.4285, + "nll_loss": 0.415324866771698, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09045961499214172, + "rewards/margins": 0.5226155519485474, + "rewards/rejected": -0.6130751371383667, + "step": 994 + }, + { + "epoch": 0.6189735614307932, + "grad_norm": 0.3188931941986084, + "learning_rate": 2.5000000000000004e-07, + "log_odds_chosen": 5.129542350769043, + "log_odds_ratio": -0.12642808258533478, + "logits/chosen": 0.5445621013641357, + "logits/rejected": 0.43241095542907715, + "logps/chosen": -1.0532197952270508, + "logps/rejected": -5.681353569030762, + "loss": 0.5615, + "nll_loss": 0.5489034652709961, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1053219810128212, + "rewards/margins": 0.4628134071826935, + "rewards/rejected": -0.5681353807449341, + "step": 995 + }, + { + "epoch": 0.6195956454121306, + "grad_norm": 0.467891126871109, + "learning_rate": 2.0000000000000002e-07, + "log_odds_chosen": 3.8318843841552734, + "log_odds_ratio": -0.21675176918506622, + "logits/chosen": 1.3111586570739746, + "logits/rejected": 1.6427356004714966, + "logps/chosen": -1.457216739654541, + "logps/rejected": -5.035771369934082, + "loss": 0.5957, + "nll_loss": 0.5739951133728027, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.1457216888666153, + "rewards/margins": 0.3578554689884186, + "rewards/rejected": -0.5035771727561951, + "step": 996 + }, + { + "epoch": 0.6202177293934681, + "grad_norm": 0.4008631706237793, + "learning_rate": 1.5000000000000002e-07, + "log_odds_chosen": 3.40852952003479, + "log_odds_ratio": -0.2698494493961334, + "logits/chosen": 1.4783838987350464, + "logits/rejected": 1.5272125005722046, + "logps/chosen": -1.0040234327316284, + "logps/rejected": -4.11934232711792, + "loss": 0.6774, + "nll_loss": 0.6503931283950806, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.1004023477435112, + "rewards/margins": 0.3115319013595581, + "rewards/rejected": -0.4119342267513275, + "step": 997 + }, + { + "epoch": 0.6208398133748056, + "grad_norm": 0.535465657711029, + "learning_rate": 1.0000000000000001e-07, + "log_odds_chosen": 6.407077789306641, + "log_odds_ratio": -0.03647714480757713, + "logits/chosen": 0.8850218653678894, + "logits/rejected": 1.0664775371551514, + "logps/chosen": -0.763054370880127, + "logps/rejected": -6.34374475479126, + "loss": 0.5953, + "nll_loss": 0.5916392803192139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07630544155836105, + "rewards/margins": 0.5580690503120422, + "rewards/rejected": -0.6343744993209839, + "step": 998 + }, + { + "epoch": 0.6214618973561431, + "grad_norm": 0.29914966225624084, + "learning_rate": 5.0000000000000004e-08, + "log_odds_chosen": 6.531560897827148, + "log_odds_ratio": -0.11589479446411133, + "logits/chosen": 1.1404070854187012, + "logits/rejected": 0.8704703450202942, + "logps/chosen": -0.751317024230957, + "logps/rejected": -6.46155309677124, + "loss": 0.5341, + "nll_loss": 0.5225302577018738, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.07513170689344406, + "rewards/margins": 0.5710236430168152, + "rewards/rejected": -0.6461552977561951, + "step": 999 + }, + { + "epoch": 0.6220839813374806, + "grad_norm": 0.3536701202392578, + "learning_rate": 0.0, + "log_odds_chosen": 6.4564714431762695, + "log_odds_ratio": -0.04604131728410721, + "logits/chosen": 2.8046364784240723, + "logits/rejected": 0.9901584386825562, + "logps/chosen": -1.0534753799438477, + "logps/rejected": -7.014216899871826, + "loss": 0.7159, + "nll_loss": 0.7113260626792908, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10534752905368805, + "rewards/margins": 0.596074104309082, + "rewards/rejected": -0.7014216184616089, + "step": 1000 + } + ], + "logging_steps": 1, + "max_steps": 1000, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}