{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9998584271253628, "eval_steps": 500, "global_step": 14126, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014157287463721952, "grad_norm": 63.84707491036199, "learning_rate": 1.179245283018868e-08, "loss": 1.8583, "step": 1 }, { "epoch": 0.00028314574927443904, "grad_norm": 55.97125814452673, "learning_rate": 2.358490566037736e-08, "loss": 1.6209, "step": 2 }, { "epoch": 0.00042471862391165854, "grad_norm": 55.47292457882607, "learning_rate": 3.537735849056604e-08, "loss": 1.8056, "step": 3 }, { "epoch": 0.0005662914985488781, "grad_norm": 106.85951093901282, "learning_rate": 4.716981132075472e-08, "loss": 1.7069, "step": 4 }, { "epoch": 0.0007078643731860976, "grad_norm": 39.30327824322117, "learning_rate": 5.89622641509434e-08, "loss": 1.4781, "step": 5 }, { "epoch": 0.0008494372478233171, "grad_norm": 57.27069810407522, "learning_rate": 7.075471698113208e-08, "loss": 1.4853, "step": 6 }, { "epoch": 0.0009910101224605366, "grad_norm": 60.03881800942412, "learning_rate": 8.254716981132076e-08, "loss": 1.6872, "step": 7 }, { "epoch": 0.0011325829970977562, "grad_norm": 54.691218623571096, "learning_rate": 9.433962264150944e-08, "loss": 1.6029, "step": 8 }, { "epoch": 0.0012741558717349756, "grad_norm": 39.057755961924514, "learning_rate": 1.0613207547169811e-07, "loss": 1.722, "step": 9 }, { "epoch": 0.0014157287463721952, "grad_norm": 52.518007514387385, "learning_rate": 1.179245283018868e-07, "loss": 1.5892, "step": 10 }, { "epoch": 0.0015573016210094145, "grad_norm": 40.99188300939895, "learning_rate": 1.297169811320755e-07, "loss": 1.7743, "step": 11 }, { "epoch": 0.0016988744956466342, "grad_norm": 38.71449986762066, "learning_rate": 1.4150943396226417e-07, "loss": 1.6641, "step": 12 }, { "epoch": 0.0018404473702838535, "grad_norm": 28.593979464584738, "learning_rate": 1.5330188679245283e-07, "loss": 1.7126, "step": 13 }, { "epoch": 0.001982020244921073, "grad_norm": 25.95256998202442, "learning_rate": 1.6509433962264153e-07, "loss": 1.687, "step": 14 }, { "epoch": 0.0021235931195582925, "grad_norm": 22.958692525925773, "learning_rate": 1.768867924528302e-07, "loss": 1.4344, "step": 15 }, { "epoch": 0.0022651659941955123, "grad_norm": 35.51739849289535, "learning_rate": 1.886792452830189e-07, "loss": 1.4988, "step": 16 }, { "epoch": 0.0024067388688327317, "grad_norm": 31.415543945110795, "learning_rate": 2.0047169811320755e-07, "loss": 1.664, "step": 17 }, { "epoch": 0.002548311743469951, "grad_norm": 38.73468226733714, "learning_rate": 2.1226415094339622e-07, "loss": 1.6399, "step": 18 }, { "epoch": 0.0026898846181071705, "grad_norm": 39.95800560481044, "learning_rate": 2.2405660377358492e-07, "loss": 1.562, "step": 19 }, { "epoch": 0.0028314574927443903, "grad_norm": 38.70349717221139, "learning_rate": 2.358490566037736e-07, "loss": 1.6775, "step": 20 }, { "epoch": 0.0029730303673816097, "grad_norm": 37.92284068609741, "learning_rate": 2.476415094339623e-07, "loss": 1.5582, "step": 21 }, { "epoch": 0.003114603242018829, "grad_norm": 29.92503654968528, "learning_rate": 2.59433962264151e-07, "loss": 1.7453, "step": 22 }, { "epoch": 0.0032561761166560485, "grad_norm": 17.986794608085052, "learning_rate": 2.7122641509433966e-07, "loss": 1.483, "step": 23 }, { "epoch": 0.0033977489912932683, "grad_norm": 19.196691641288172, "learning_rate": 2.8301886792452833e-07, "loss": 1.7226, "step": 24 }, { "epoch": 0.0035393218659304877, "grad_norm": 19.784474090535298, "learning_rate": 2.94811320754717e-07, "loss": 1.6687, "step": 25 }, { "epoch": 0.003680894740567707, "grad_norm": 19.379568041385834, "learning_rate": 3.0660377358490567e-07, "loss": 1.6947, "step": 26 }, { "epoch": 0.003822467615204927, "grad_norm": 20.93732144863233, "learning_rate": 3.183962264150944e-07, "loss": 1.5904, "step": 27 }, { "epoch": 0.003964040489842146, "grad_norm": 20.618692809294735, "learning_rate": 3.3018867924528305e-07, "loss": 1.7195, "step": 28 }, { "epoch": 0.004105613364479366, "grad_norm": 21.44965376141011, "learning_rate": 3.419811320754717e-07, "loss": 1.5418, "step": 29 }, { "epoch": 0.004247186239116585, "grad_norm": 24.713102329691683, "learning_rate": 3.537735849056604e-07, "loss": 1.5442, "step": 30 }, { "epoch": 0.004388759113753805, "grad_norm": 22.915087984212906, "learning_rate": 3.6556603773584905e-07, "loss": 1.5081, "step": 31 }, { "epoch": 0.004530331988391025, "grad_norm": 16.63497923592595, "learning_rate": 3.773584905660378e-07, "loss": 1.6978, "step": 32 }, { "epoch": 0.004671904863028244, "grad_norm": 17.82796580505173, "learning_rate": 3.8915094339622644e-07, "loss": 1.5757, "step": 33 }, { "epoch": 0.0048134777376654635, "grad_norm": 14.200117105350365, "learning_rate": 4.009433962264151e-07, "loss": 1.4431, "step": 34 }, { "epoch": 0.004955050612302682, "grad_norm": 19.485917704063397, "learning_rate": 4.127358490566038e-07, "loss": 1.7673, "step": 35 }, { "epoch": 0.005096623486939902, "grad_norm": 16.828062434598177, "learning_rate": 4.2452830188679244e-07, "loss": 1.647, "step": 36 }, { "epoch": 0.005238196361577122, "grad_norm": 18.20557923369578, "learning_rate": 4.3632075471698116e-07, "loss": 1.6429, "step": 37 }, { "epoch": 0.005379769236214341, "grad_norm": 17.973172010231668, "learning_rate": 4.4811320754716983e-07, "loss": 1.6643, "step": 38 }, { "epoch": 0.005521342110851561, "grad_norm": 21.95264401163474, "learning_rate": 4.599056603773585e-07, "loss": 1.7284, "step": 39 }, { "epoch": 0.005662914985488781, "grad_norm": 20.081733017261296, "learning_rate": 4.716981132075472e-07, "loss": 1.5981, "step": 40 }, { "epoch": 0.005804487860126, "grad_norm": 15.272347507461708, "learning_rate": 4.834905660377359e-07, "loss": 1.3697, "step": 41 }, { "epoch": 0.005946060734763219, "grad_norm": 16.281213449155228, "learning_rate": 4.952830188679246e-07, "loss": 1.5138, "step": 42 }, { "epoch": 0.006087633609400439, "grad_norm": 18.346455727037323, "learning_rate": 5.070754716981133e-07, "loss": 1.6156, "step": 43 }, { "epoch": 0.006229206484037658, "grad_norm": 17.027650673277623, "learning_rate": 5.18867924528302e-07, "loss": 1.6837, "step": 44 }, { "epoch": 0.006370779358674878, "grad_norm": 17.554328510737477, "learning_rate": 5.306603773584906e-07, "loss": 1.6768, "step": 45 }, { "epoch": 0.006512352233312097, "grad_norm": 14.118845991574137, "learning_rate": 5.424528301886793e-07, "loss": 1.5004, "step": 46 }, { "epoch": 0.006653925107949317, "grad_norm": 14.766144790784827, "learning_rate": 5.542452830188679e-07, "loss": 1.5461, "step": 47 }, { "epoch": 0.006795497982586537, "grad_norm": 21.987237869964265, "learning_rate": 5.660377358490567e-07, "loss": 1.4939, "step": 48 }, { "epoch": 0.0069370708572237556, "grad_norm": 19.499311386078485, "learning_rate": 5.778301886792454e-07, "loss": 1.7227, "step": 49 }, { "epoch": 0.007078643731860975, "grad_norm": 15.671376000748497, "learning_rate": 5.89622641509434e-07, "loss": 1.6233, "step": 50 }, { "epoch": 0.007220216606498195, "grad_norm": 17.439377769079268, "learning_rate": 6.014150943396227e-07, "loss": 1.7287, "step": 51 }, { "epoch": 0.007361789481135414, "grad_norm": 14.003872472065261, "learning_rate": 6.132075471698113e-07, "loss": 1.6392, "step": 52 }, { "epoch": 0.007503362355772634, "grad_norm": 13.756472087420967, "learning_rate": 6.25e-07, "loss": 1.4719, "step": 53 }, { "epoch": 0.007644935230409854, "grad_norm": 13.88924412251876, "learning_rate": 6.367924528301888e-07, "loss": 1.4071, "step": 54 }, { "epoch": 0.007786508105047073, "grad_norm": 18.097433239101893, "learning_rate": 6.485849056603774e-07, "loss": 1.5474, "step": 55 }, { "epoch": 0.007928080979684293, "grad_norm": 19.15859169516871, "learning_rate": 6.603773584905661e-07, "loss": 1.694, "step": 56 }, { "epoch": 0.008069653854321512, "grad_norm": 19.846306583587932, "learning_rate": 6.721698113207547e-07, "loss": 1.681, "step": 57 }, { "epoch": 0.008211226728958732, "grad_norm": 15.972953825333935, "learning_rate": 6.839622641509434e-07, "loss": 1.4197, "step": 58 }, { "epoch": 0.00835279960359595, "grad_norm": 15.230586968956427, "learning_rate": 6.957547169811322e-07, "loss": 1.6777, "step": 59 }, { "epoch": 0.00849437247823317, "grad_norm": 14.869030179257814, "learning_rate": 7.075471698113208e-07, "loss": 1.5724, "step": 60 }, { "epoch": 0.00863594535287039, "grad_norm": 18.825678166994823, "learning_rate": 7.193396226415095e-07, "loss": 1.7128, "step": 61 }, { "epoch": 0.00877751822750761, "grad_norm": 21.544871501986485, "learning_rate": 7.311320754716981e-07, "loss": 1.5879, "step": 62 }, { "epoch": 0.00891909110214483, "grad_norm": 14.199419165538846, "learning_rate": 7.429245283018868e-07, "loss": 1.4421, "step": 63 }, { "epoch": 0.00906066397678205, "grad_norm": 16.192808952358394, "learning_rate": 7.547169811320755e-07, "loss": 1.726, "step": 64 }, { "epoch": 0.009202236851419267, "grad_norm": 15.347477291206577, "learning_rate": 7.665094339622642e-07, "loss": 1.5107, "step": 65 }, { "epoch": 0.009343809726056487, "grad_norm": 16.19939348003788, "learning_rate": 7.783018867924529e-07, "loss": 1.5814, "step": 66 }, { "epoch": 0.009485382600693707, "grad_norm": 13.51898158735264, "learning_rate": 7.900943396226415e-07, "loss": 1.5254, "step": 67 }, { "epoch": 0.009626955475330927, "grad_norm": 17.11927688640152, "learning_rate": 8.018867924528302e-07, "loss": 1.7402, "step": 68 }, { "epoch": 0.009768528349968147, "grad_norm": 16.34138684666224, "learning_rate": 8.136792452830189e-07, "loss": 1.6328, "step": 69 }, { "epoch": 0.009910101224605365, "grad_norm": 15.33422058413311, "learning_rate": 8.254716981132076e-07, "loss": 1.5055, "step": 70 }, { "epoch": 0.010051674099242585, "grad_norm": 16.604385789345123, "learning_rate": 8.372641509433963e-07, "loss": 1.8437, "step": 71 }, { "epoch": 0.010193246973879804, "grad_norm": 22.59078770129195, "learning_rate": 8.490566037735849e-07, "loss": 1.7667, "step": 72 }, { "epoch": 0.010334819848517024, "grad_norm": 15.591242831389156, "learning_rate": 8.608490566037736e-07, "loss": 1.6976, "step": 73 }, { "epoch": 0.010476392723154244, "grad_norm": 16.853932885559306, "learning_rate": 8.726415094339623e-07, "loss": 1.8321, "step": 74 }, { "epoch": 0.010617965597791464, "grad_norm": 17.897127127665716, "learning_rate": 8.844339622641509e-07, "loss": 1.6987, "step": 75 }, { "epoch": 0.010759538472428682, "grad_norm": 19.453441889989225, "learning_rate": 8.962264150943397e-07, "loss": 1.5366, "step": 76 }, { "epoch": 0.010901111347065902, "grad_norm": 15.923868730208563, "learning_rate": 9.080188679245283e-07, "loss": 1.6872, "step": 77 }, { "epoch": 0.011042684221703122, "grad_norm": 14.695457867436243, "learning_rate": 9.19811320754717e-07, "loss": 1.6081, "step": 78 }, { "epoch": 0.011184257096340341, "grad_norm": 13.220793581875874, "learning_rate": 9.316037735849057e-07, "loss": 1.5088, "step": 79 }, { "epoch": 0.011325829970977561, "grad_norm": 16.509900446221373, "learning_rate": 9.433962264150944e-07, "loss": 1.6059, "step": 80 }, { "epoch": 0.01146740284561478, "grad_norm": 19.10985010013447, "learning_rate": 9.551886792452833e-07, "loss": 1.5191, "step": 81 }, { "epoch": 0.011608975720252, "grad_norm": 13.444186086422903, "learning_rate": 9.669811320754719e-07, "loss": 1.5635, "step": 82 }, { "epoch": 0.011750548594889219, "grad_norm": 15.684371575512676, "learning_rate": 9.787735849056605e-07, "loss": 1.657, "step": 83 }, { "epoch": 0.011892121469526439, "grad_norm": 17.437719637673652, "learning_rate": 9.90566037735849e-07, "loss": 1.7334, "step": 84 }, { "epoch": 0.012033694344163659, "grad_norm": 16.434440066899146, "learning_rate": 1.002358490566038e-06, "loss": 1.4781, "step": 85 }, { "epoch": 0.012175267218800878, "grad_norm": 14.027730453629607, "learning_rate": 1.0141509433962265e-06, "loss": 1.5501, "step": 86 }, { "epoch": 0.012316840093438097, "grad_norm": 12.857080526049462, "learning_rate": 1.0259433962264152e-06, "loss": 1.4878, "step": 87 }, { "epoch": 0.012458412968075316, "grad_norm": 14.444642253075566, "learning_rate": 1.037735849056604e-06, "loss": 1.7252, "step": 88 }, { "epoch": 0.012599985842712536, "grad_norm": 19.848812869147945, "learning_rate": 1.0495283018867926e-06, "loss": 1.5859, "step": 89 }, { "epoch": 0.012741558717349756, "grad_norm": 14.409536785275609, "learning_rate": 1.0613207547169812e-06, "loss": 1.3952, "step": 90 }, { "epoch": 0.012883131591986976, "grad_norm": 13.819869203072765, "learning_rate": 1.07311320754717e-06, "loss": 1.5638, "step": 91 }, { "epoch": 0.013024704466624194, "grad_norm": 12.706403808863712, "learning_rate": 1.0849056603773587e-06, "loss": 1.6233, "step": 92 }, { "epoch": 0.013166277341261414, "grad_norm": 14.19695004227773, "learning_rate": 1.0966981132075473e-06, "loss": 1.5352, "step": 93 }, { "epoch": 0.013307850215898634, "grad_norm": 13.606861605060066, "learning_rate": 1.1084905660377359e-06, "loss": 1.4425, "step": 94 }, { "epoch": 0.013449423090535853, "grad_norm": 15.755666530871718, "learning_rate": 1.1202830188679247e-06, "loss": 1.4741, "step": 95 }, { "epoch": 0.013590995965173073, "grad_norm": 14.877809403486939, "learning_rate": 1.1320754716981133e-06, "loss": 1.5541, "step": 96 }, { "epoch": 0.013732568839810293, "grad_norm": 13.255857504466126, "learning_rate": 1.143867924528302e-06, "loss": 1.5647, "step": 97 }, { "epoch": 0.013874141714447511, "grad_norm": 13.855458838451487, "learning_rate": 1.1556603773584908e-06, "loss": 1.5705, "step": 98 }, { "epoch": 0.014015714589084731, "grad_norm": 13.55374692792118, "learning_rate": 1.1674528301886794e-06, "loss": 1.4713, "step": 99 }, { "epoch": 0.01415728746372195, "grad_norm": 14.95035118277431, "learning_rate": 1.179245283018868e-06, "loss": 1.6928, "step": 100 }, { "epoch": 0.01429886033835917, "grad_norm": 14.35835058233753, "learning_rate": 1.1910377358490568e-06, "loss": 1.4509, "step": 101 }, { "epoch": 0.01444043321299639, "grad_norm": 14.356722946912853, "learning_rate": 1.2028301886792454e-06, "loss": 1.6253, "step": 102 }, { "epoch": 0.01458200608763361, "grad_norm": 13.253531507191182, "learning_rate": 1.214622641509434e-06, "loss": 1.4329, "step": 103 }, { "epoch": 0.014723578962270828, "grad_norm": 12.068835872461198, "learning_rate": 1.2264150943396227e-06, "loss": 1.5758, "step": 104 }, { "epoch": 0.014865151836908048, "grad_norm": 16.067234870286583, "learning_rate": 1.2382075471698115e-06, "loss": 1.6764, "step": 105 }, { "epoch": 0.015006724711545268, "grad_norm": 13.277565550515734, "learning_rate": 1.25e-06, "loss": 1.5468, "step": 106 }, { "epoch": 0.015148297586182488, "grad_norm": 13.694581590592911, "learning_rate": 1.261792452830189e-06, "loss": 1.5405, "step": 107 }, { "epoch": 0.015289870460819708, "grad_norm": 13.575376524366472, "learning_rate": 1.2735849056603775e-06, "loss": 1.4807, "step": 108 }, { "epoch": 0.015431443335456926, "grad_norm": 13.251138890101696, "learning_rate": 1.2853773584905664e-06, "loss": 1.7341, "step": 109 }, { "epoch": 0.015573016210094145, "grad_norm": 12.48415499649706, "learning_rate": 1.2971698113207548e-06, "loss": 1.6548, "step": 110 }, { "epoch": 0.015714589084731365, "grad_norm": 14.528873503585993, "learning_rate": 1.3089622641509436e-06, "loss": 1.5195, "step": 111 }, { "epoch": 0.015856161959368585, "grad_norm": 10.910482465249267, "learning_rate": 1.3207547169811322e-06, "loss": 1.4978, "step": 112 }, { "epoch": 0.015997734834005805, "grad_norm": 13.338750183946267, "learning_rate": 1.332547169811321e-06, "loss": 1.5451, "step": 113 }, { "epoch": 0.016139307708643025, "grad_norm": 14.897790711740459, "learning_rate": 1.3443396226415094e-06, "loss": 1.6466, "step": 114 }, { "epoch": 0.016280880583280245, "grad_norm": 15.685948511737843, "learning_rate": 1.3561320754716983e-06, "loss": 1.4926, "step": 115 }, { "epoch": 0.016422453457917464, "grad_norm": 13.013156982019078, "learning_rate": 1.3679245283018869e-06, "loss": 1.5655, "step": 116 }, { "epoch": 0.016564026332554684, "grad_norm": 15.222617658566685, "learning_rate": 1.3797169811320757e-06, "loss": 1.6512, "step": 117 }, { "epoch": 0.0167055992071919, "grad_norm": 14.619333065909409, "learning_rate": 1.3915094339622643e-06, "loss": 1.4915, "step": 118 }, { "epoch": 0.01684717208182912, "grad_norm": 13.37392642266976, "learning_rate": 1.4033018867924531e-06, "loss": 1.6999, "step": 119 }, { "epoch": 0.01698874495646634, "grad_norm": 16.901172376067493, "learning_rate": 1.4150943396226415e-06, "loss": 1.4944, "step": 120 }, { "epoch": 0.01713031783110356, "grad_norm": 13.734780506314639, "learning_rate": 1.4268867924528304e-06, "loss": 1.5935, "step": 121 }, { "epoch": 0.01727189070574078, "grad_norm": 17.245475728592375, "learning_rate": 1.438679245283019e-06, "loss": 1.7136, "step": 122 }, { "epoch": 0.017413463580378, "grad_norm": 16.03460765276547, "learning_rate": 1.4504716981132078e-06, "loss": 1.6388, "step": 123 }, { "epoch": 0.01755503645501522, "grad_norm": 14.105209665129605, "learning_rate": 1.4622641509433962e-06, "loss": 1.5156, "step": 124 }, { "epoch": 0.01769660932965244, "grad_norm": 14.131155107694996, "learning_rate": 1.474056603773585e-06, "loss": 1.4179, "step": 125 }, { "epoch": 0.01783818220428966, "grad_norm": 13.715693174528676, "learning_rate": 1.4858490566037737e-06, "loss": 1.5075, "step": 126 }, { "epoch": 0.01797975507892688, "grad_norm": 13.156183890496015, "learning_rate": 1.4976415094339625e-06, "loss": 1.5233, "step": 127 }, { "epoch": 0.0181213279535641, "grad_norm": 14.943304617964309, "learning_rate": 1.509433962264151e-06, "loss": 1.576, "step": 128 }, { "epoch": 0.018262900828201315, "grad_norm": 15.375239176556835, "learning_rate": 1.52122641509434e-06, "loss": 1.5976, "step": 129 }, { "epoch": 0.018404473702838535, "grad_norm": 16.322948865568645, "learning_rate": 1.5330188679245283e-06, "loss": 1.5658, "step": 130 }, { "epoch": 0.018546046577475755, "grad_norm": 14.668642315593237, "learning_rate": 1.5448113207547172e-06, "loss": 1.6371, "step": 131 }, { "epoch": 0.018687619452112975, "grad_norm": 12.54557487621071, "learning_rate": 1.5566037735849058e-06, "loss": 1.6308, "step": 132 }, { "epoch": 0.018829192326750194, "grad_norm": 13.730156468710524, "learning_rate": 1.5683962264150946e-06, "loss": 1.5501, "step": 133 }, { "epoch": 0.018970765201387414, "grad_norm": 12.947855320378775, "learning_rate": 1.580188679245283e-06, "loss": 1.6422, "step": 134 }, { "epoch": 0.019112338076024634, "grad_norm": 15.812208090036545, "learning_rate": 1.5919811320754718e-06, "loss": 1.4228, "step": 135 }, { "epoch": 0.019253910950661854, "grad_norm": 12.463533844914142, "learning_rate": 1.6037735849056604e-06, "loss": 1.3262, "step": 136 }, { "epoch": 0.019395483825299074, "grad_norm": 18.82834624481006, "learning_rate": 1.6155660377358493e-06, "loss": 1.8582, "step": 137 }, { "epoch": 0.019537056699936294, "grad_norm": 14.707654265522446, "learning_rate": 1.6273584905660379e-06, "loss": 1.4652, "step": 138 }, { "epoch": 0.019678629574573513, "grad_norm": 13.564922063753375, "learning_rate": 1.6391509433962267e-06, "loss": 1.6464, "step": 139 }, { "epoch": 0.01982020244921073, "grad_norm": 13.096845383862064, "learning_rate": 1.650943396226415e-06, "loss": 1.4895, "step": 140 }, { "epoch": 0.01996177532384795, "grad_norm": 14.912794807634462, "learning_rate": 1.662735849056604e-06, "loss": 1.577, "step": 141 }, { "epoch": 0.02010334819848517, "grad_norm": 16.625749843580834, "learning_rate": 1.6745283018867925e-06, "loss": 1.5344, "step": 142 }, { "epoch": 0.02024492107312239, "grad_norm": 13.118412362779342, "learning_rate": 1.6863207547169814e-06, "loss": 1.5609, "step": 143 }, { "epoch": 0.02038649394775961, "grad_norm": 12.789803401339752, "learning_rate": 1.6981132075471698e-06, "loss": 1.4033, "step": 144 }, { "epoch": 0.02052806682239683, "grad_norm": 13.561377131657066, "learning_rate": 1.7099056603773586e-06, "loss": 1.4907, "step": 145 }, { "epoch": 0.02066963969703405, "grad_norm": 16.932227183179787, "learning_rate": 1.7216981132075472e-06, "loss": 1.7734, "step": 146 }, { "epoch": 0.02081121257167127, "grad_norm": 15.48548652670461, "learning_rate": 1.733490566037736e-06, "loss": 1.6198, "step": 147 }, { "epoch": 0.020952785446308488, "grad_norm": 15.300986683264753, "learning_rate": 1.7452830188679247e-06, "loss": 1.5848, "step": 148 }, { "epoch": 0.021094358320945708, "grad_norm": 16.437703294548463, "learning_rate": 1.7570754716981135e-06, "loss": 1.5339, "step": 149 }, { "epoch": 0.021235931195582928, "grad_norm": 13.028215160135677, "learning_rate": 1.7688679245283019e-06, "loss": 1.4365, "step": 150 }, { "epoch": 0.021377504070220144, "grad_norm": 15.91159045908045, "learning_rate": 1.7806603773584907e-06, "loss": 1.6195, "step": 151 }, { "epoch": 0.021519076944857364, "grad_norm": 16.954751326082516, "learning_rate": 1.7924528301886793e-06, "loss": 1.551, "step": 152 }, { "epoch": 0.021660649819494584, "grad_norm": 15.046821760517183, "learning_rate": 1.8042452830188682e-06, "loss": 1.825, "step": 153 }, { "epoch": 0.021802222694131804, "grad_norm": 15.091352378961156, "learning_rate": 1.8160377358490566e-06, "loss": 1.6292, "step": 154 }, { "epoch": 0.021943795568769024, "grad_norm": 14.432539182550636, "learning_rate": 1.8278301886792454e-06, "loss": 1.5909, "step": 155 }, { "epoch": 0.022085368443406243, "grad_norm": 15.350198730772796, "learning_rate": 1.839622641509434e-06, "loss": 1.3723, "step": 156 }, { "epoch": 0.022226941318043463, "grad_norm": 12.139384438307225, "learning_rate": 1.8514150943396228e-06, "loss": 1.5192, "step": 157 }, { "epoch": 0.022368514192680683, "grad_norm": 14.336121103218012, "learning_rate": 1.8632075471698114e-06, "loss": 1.5375, "step": 158 }, { "epoch": 0.022510087067317903, "grad_norm": 12.27092155247805, "learning_rate": 1.8750000000000003e-06, "loss": 1.4674, "step": 159 }, { "epoch": 0.022651659941955123, "grad_norm": 14.587461108995125, "learning_rate": 1.8867924528301889e-06, "loss": 1.6306, "step": 160 }, { "epoch": 0.022793232816592342, "grad_norm": 18.411363393349042, "learning_rate": 1.8985849056603775e-06, "loss": 1.5669, "step": 161 }, { "epoch": 0.02293480569122956, "grad_norm": 13.522875584079742, "learning_rate": 1.9103773584905665e-06, "loss": 1.5095, "step": 162 }, { "epoch": 0.02307637856586678, "grad_norm": 12.772640695713509, "learning_rate": 1.9221698113207547e-06, "loss": 1.5482, "step": 163 }, { "epoch": 0.023217951440504, "grad_norm": 13.092669094145359, "learning_rate": 1.9339622641509438e-06, "loss": 1.6376, "step": 164 }, { "epoch": 0.023359524315141218, "grad_norm": 14.703520393527892, "learning_rate": 1.9457547169811324e-06, "loss": 1.4219, "step": 165 }, { "epoch": 0.023501097189778438, "grad_norm": 14.982875967771507, "learning_rate": 1.957547169811321e-06, "loss": 1.6373, "step": 166 }, { "epoch": 0.023642670064415658, "grad_norm": 13.031265807656778, "learning_rate": 1.9693396226415096e-06, "loss": 1.5, "step": 167 }, { "epoch": 0.023784242939052878, "grad_norm": 13.171651419736513, "learning_rate": 1.981132075471698e-06, "loss": 1.5495, "step": 168 }, { "epoch": 0.023925815813690098, "grad_norm": 12.024859427980658, "learning_rate": 1.992924528301887e-06, "loss": 1.5937, "step": 169 }, { "epoch": 0.024067388688327317, "grad_norm": 11.309674494989553, "learning_rate": 2.004716981132076e-06, "loss": 1.5181, "step": 170 }, { "epoch": 0.024208961562964537, "grad_norm": 12.096265585430954, "learning_rate": 2.0165094339622645e-06, "loss": 1.5963, "step": 171 }, { "epoch": 0.024350534437601757, "grad_norm": 19.9050879026962, "learning_rate": 2.028301886792453e-06, "loss": 1.6418, "step": 172 }, { "epoch": 0.024492107312238973, "grad_norm": 11.237402071754202, "learning_rate": 2.0400943396226417e-06, "loss": 1.3917, "step": 173 }, { "epoch": 0.024633680186876193, "grad_norm": 24.721165979040737, "learning_rate": 2.0518867924528303e-06, "loss": 1.6966, "step": 174 }, { "epoch": 0.024775253061513413, "grad_norm": 13.442270115367128, "learning_rate": 2.063679245283019e-06, "loss": 1.3873, "step": 175 }, { "epoch": 0.024916825936150633, "grad_norm": 18.067810333952256, "learning_rate": 2.075471698113208e-06, "loss": 1.6409, "step": 176 }, { "epoch": 0.025058398810787853, "grad_norm": 12.152404647669659, "learning_rate": 2.087264150943396e-06, "loss": 1.5051, "step": 177 }, { "epoch": 0.025199971685425072, "grad_norm": 11.072792566895318, "learning_rate": 2.099056603773585e-06, "loss": 1.5607, "step": 178 }, { "epoch": 0.025341544560062292, "grad_norm": 17.038763394325674, "learning_rate": 2.110849056603774e-06, "loss": 1.6831, "step": 179 }, { "epoch": 0.025483117434699512, "grad_norm": 12.727139791809998, "learning_rate": 2.1226415094339624e-06, "loss": 1.4876, "step": 180 }, { "epoch": 0.025624690309336732, "grad_norm": 15.984658515630796, "learning_rate": 2.134433962264151e-06, "loss": 1.4248, "step": 181 }, { "epoch": 0.02576626318397395, "grad_norm": 13.670608200239414, "learning_rate": 2.14622641509434e-06, "loss": 1.5833, "step": 182 }, { "epoch": 0.02590783605861117, "grad_norm": 11.84797500498503, "learning_rate": 2.1580188679245283e-06, "loss": 1.5871, "step": 183 }, { "epoch": 0.026049408933248388, "grad_norm": 15.782458087267877, "learning_rate": 2.1698113207547173e-06, "loss": 1.6402, "step": 184 }, { "epoch": 0.026190981807885608, "grad_norm": 15.444899048430552, "learning_rate": 2.181603773584906e-06, "loss": 1.4687, "step": 185 }, { "epoch": 0.026332554682522828, "grad_norm": 12.700725408547353, "learning_rate": 2.1933962264150945e-06, "loss": 1.7111, "step": 186 }, { "epoch": 0.026474127557160047, "grad_norm": 12.791023826392621, "learning_rate": 2.205188679245283e-06, "loss": 1.5864, "step": 187 }, { "epoch": 0.026615700431797267, "grad_norm": 19.092763023028272, "learning_rate": 2.2169811320754718e-06, "loss": 1.5508, "step": 188 }, { "epoch": 0.026757273306434487, "grad_norm": 14.931716118391561, "learning_rate": 2.2287735849056604e-06, "loss": 1.6208, "step": 189 }, { "epoch": 0.026898846181071707, "grad_norm": 12.647548368609947, "learning_rate": 2.2405660377358494e-06, "loss": 1.523, "step": 190 }, { "epoch": 0.027040419055708927, "grad_norm": 16.25359128735333, "learning_rate": 2.252358490566038e-06, "loss": 1.6727, "step": 191 }, { "epoch": 0.027181991930346146, "grad_norm": 17.20422126956601, "learning_rate": 2.2641509433962266e-06, "loss": 1.626, "step": 192 }, { "epoch": 0.027323564804983366, "grad_norm": 21.05076711529958, "learning_rate": 2.2759433962264153e-06, "loss": 1.7074, "step": 193 }, { "epoch": 0.027465137679620586, "grad_norm": 16.915585237460196, "learning_rate": 2.287735849056604e-06, "loss": 1.5558, "step": 194 }, { "epoch": 0.027606710554257806, "grad_norm": 21.809565917902717, "learning_rate": 2.2995283018867925e-06, "loss": 1.7469, "step": 195 }, { "epoch": 0.027748283428895022, "grad_norm": 14.04482124429427, "learning_rate": 2.3113207547169815e-06, "loss": 1.4616, "step": 196 }, { "epoch": 0.027889856303532242, "grad_norm": 17.697562977753815, "learning_rate": 2.3231132075471697e-06, "loss": 1.4925, "step": 197 }, { "epoch": 0.028031429178169462, "grad_norm": 19.46590631820318, "learning_rate": 2.3349056603773588e-06, "loss": 1.5952, "step": 198 }, { "epoch": 0.02817300205280668, "grad_norm": 10.736677919476863, "learning_rate": 2.3466981132075474e-06, "loss": 1.5475, "step": 199 }, { "epoch": 0.0283145749274439, "grad_norm": 16.821841909683226, "learning_rate": 2.358490566037736e-06, "loss": 1.5889, "step": 200 }, { "epoch": 0.02845614780208112, "grad_norm": 14.392989338210592, "learning_rate": 2.3702830188679246e-06, "loss": 1.6246, "step": 201 }, { "epoch": 0.02859772067671834, "grad_norm": 14.68586936177538, "learning_rate": 2.3820754716981136e-06, "loss": 1.5908, "step": 202 }, { "epoch": 0.02873929355135556, "grad_norm": 11.30225393046411, "learning_rate": 2.393867924528302e-06, "loss": 1.6981, "step": 203 }, { "epoch": 0.02888086642599278, "grad_norm": 15.139140431280994, "learning_rate": 2.405660377358491e-06, "loss": 1.4584, "step": 204 }, { "epoch": 0.02902243930063, "grad_norm": 18.136757030031752, "learning_rate": 2.4174528301886795e-06, "loss": 1.7026, "step": 205 }, { "epoch": 0.02916401217526722, "grad_norm": 16.90510732910601, "learning_rate": 2.429245283018868e-06, "loss": 1.7415, "step": 206 }, { "epoch": 0.029305585049904437, "grad_norm": 15.73847903356735, "learning_rate": 2.4410377358490567e-06, "loss": 1.5474, "step": 207 }, { "epoch": 0.029447157924541657, "grad_norm": 13.883660440304627, "learning_rate": 2.4528301886792453e-06, "loss": 1.5644, "step": 208 }, { "epoch": 0.029588730799178876, "grad_norm": 15.753096987489972, "learning_rate": 2.464622641509434e-06, "loss": 1.4789, "step": 209 }, { "epoch": 0.029730303673816096, "grad_norm": 11.703151728315607, "learning_rate": 2.476415094339623e-06, "loss": 1.5977, "step": 210 }, { "epoch": 0.029871876548453316, "grad_norm": 20.35579220681886, "learning_rate": 2.4882075471698116e-06, "loss": 1.53, "step": 211 }, { "epoch": 0.030013449423090536, "grad_norm": 13.078082413871822, "learning_rate": 2.5e-06, "loss": 1.4975, "step": 212 }, { "epoch": 0.030155022297727756, "grad_norm": 25.05771536116359, "learning_rate": 2.511792452830189e-06, "loss": 1.5969, "step": 213 }, { "epoch": 0.030296595172364976, "grad_norm": 15.77066089665197, "learning_rate": 2.523584905660378e-06, "loss": 1.5186, "step": 214 }, { "epoch": 0.030438168047002195, "grad_norm": 12.83569677478928, "learning_rate": 2.535377358490566e-06, "loss": 1.6209, "step": 215 }, { "epoch": 0.030579740921639415, "grad_norm": 20.116665648571242, "learning_rate": 2.547169811320755e-06, "loss": 1.7061, "step": 216 }, { "epoch": 0.030721313796276635, "grad_norm": 14.944487529505027, "learning_rate": 2.5589622641509437e-06, "loss": 1.6152, "step": 217 }, { "epoch": 0.03086288667091385, "grad_norm": 11.449867107836203, "learning_rate": 2.5707547169811327e-06, "loss": 1.6268, "step": 218 }, { "epoch": 0.03100445954555107, "grad_norm": 15.189741809049485, "learning_rate": 2.582547169811321e-06, "loss": 1.6159, "step": 219 }, { "epoch": 0.03114603242018829, "grad_norm": 13.791001103102749, "learning_rate": 2.5943396226415095e-06, "loss": 1.547, "step": 220 }, { "epoch": 0.031287605294825514, "grad_norm": 17.008551746681174, "learning_rate": 2.6061320754716986e-06, "loss": 1.7294, "step": 221 }, { "epoch": 0.03142917816946273, "grad_norm": 16.712832992078088, "learning_rate": 2.617924528301887e-06, "loss": 1.4712, "step": 222 }, { "epoch": 0.03157075104409995, "grad_norm": 20.859240402187467, "learning_rate": 2.6297169811320754e-06, "loss": 1.5791, "step": 223 }, { "epoch": 0.03171232391873717, "grad_norm": 13.946644609870269, "learning_rate": 2.6415094339622644e-06, "loss": 1.5184, "step": 224 }, { "epoch": 0.03185389679337439, "grad_norm": 16.748550580071193, "learning_rate": 2.653301886792453e-06, "loss": 1.4621, "step": 225 }, { "epoch": 0.03199546966801161, "grad_norm": 18.75302384471135, "learning_rate": 2.665094339622642e-06, "loss": 1.5516, "step": 226 }, { "epoch": 0.032137042542648826, "grad_norm": 13.769395500217366, "learning_rate": 2.6768867924528303e-06, "loss": 1.578, "step": 227 }, { "epoch": 0.03227861541728605, "grad_norm": 15.258649530612034, "learning_rate": 2.688679245283019e-06, "loss": 1.474, "step": 228 }, { "epoch": 0.032420188291923266, "grad_norm": 17.45684463356843, "learning_rate": 2.700471698113208e-06, "loss": 1.5765, "step": 229 }, { "epoch": 0.03256176116656049, "grad_norm": 14.429139197588968, "learning_rate": 2.7122641509433965e-06, "loss": 1.6272, "step": 230 }, { "epoch": 0.032703334041197706, "grad_norm": 11.525824699767284, "learning_rate": 2.724056603773585e-06, "loss": 1.496, "step": 231 }, { "epoch": 0.03284490691583493, "grad_norm": 13.750744053909694, "learning_rate": 2.7358490566037738e-06, "loss": 1.571, "step": 232 }, { "epoch": 0.032986479790472145, "grad_norm": 15.490688849758717, "learning_rate": 2.7476415094339624e-06, "loss": 1.4308, "step": 233 }, { "epoch": 0.03312805266510937, "grad_norm": 13.793800642217017, "learning_rate": 2.7594339622641514e-06, "loss": 1.4714, "step": 234 }, { "epoch": 0.033269625539746585, "grad_norm": 10.697327139167207, "learning_rate": 2.7712264150943396e-06, "loss": 1.3354, "step": 235 }, { "epoch": 0.0334111984143838, "grad_norm": 12.554985265992823, "learning_rate": 2.7830188679245286e-06, "loss": 1.5042, "step": 236 }, { "epoch": 0.033552771289021024, "grad_norm": 11.094891900407966, "learning_rate": 2.7948113207547173e-06, "loss": 1.439, "step": 237 }, { "epoch": 0.03369434416365824, "grad_norm": 9.912370108947599, "learning_rate": 2.8066037735849063e-06, "loss": 1.499, "step": 238 }, { "epoch": 0.033835917038295464, "grad_norm": 11.257202216608063, "learning_rate": 2.8183962264150945e-06, "loss": 1.5, "step": 239 }, { "epoch": 0.03397748991293268, "grad_norm": 12.816795559463603, "learning_rate": 2.830188679245283e-06, "loss": 1.8102, "step": 240 }, { "epoch": 0.034119062787569904, "grad_norm": 14.413740151375256, "learning_rate": 2.841981132075472e-06, "loss": 1.4316, "step": 241 }, { "epoch": 0.03426063566220712, "grad_norm": 13.237587927219195, "learning_rate": 2.8537735849056608e-06, "loss": 1.6965, "step": 242 }, { "epoch": 0.03440220853684434, "grad_norm": 10.692188684490473, "learning_rate": 2.865566037735849e-06, "loss": 1.6786, "step": 243 }, { "epoch": 0.03454378141148156, "grad_norm": 12.375605404619984, "learning_rate": 2.877358490566038e-06, "loss": 1.627, "step": 244 }, { "epoch": 0.03468535428611878, "grad_norm": 11.62038732096559, "learning_rate": 2.8891509433962266e-06, "loss": 1.4265, "step": 245 }, { "epoch": 0.034826927160756, "grad_norm": 12.351094902399785, "learning_rate": 2.9009433962264156e-06, "loss": 1.5533, "step": 246 }, { "epoch": 0.034968500035393216, "grad_norm": 11.119967576123726, "learning_rate": 2.912735849056604e-06, "loss": 1.4086, "step": 247 }, { "epoch": 0.03511007291003044, "grad_norm": 12.860572805506845, "learning_rate": 2.9245283018867924e-06, "loss": 1.417, "step": 248 }, { "epoch": 0.035251645784667655, "grad_norm": 13.736791196383779, "learning_rate": 2.9363207547169815e-06, "loss": 1.5673, "step": 249 }, { "epoch": 0.03539321865930488, "grad_norm": 12.089991416707239, "learning_rate": 2.94811320754717e-06, "loss": 1.6234, "step": 250 }, { "epoch": 0.035534791533942095, "grad_norm": 12.865588850295339, "learning_rate": 2.9599056603773587e-06, "loss": 1.4899, "step": 251 }, { "epoch": 0.03567636440857932, "grad_norm": 11.37346328583593, "learning_rate": 2.9716981132075473e-06, "loss": 1.3134, "step": 252 }, { "epoch": 0.035817937283216535, "grad_norm": 12.952226154353376, "learning_rate": 2.983490566037736e-06, "loss": 1.7041, "step": 253 }, { "epoch": 0.03595951015785376, "grad_norm": 16.47740892503468, "learning_rate": 2.995283018867925e-06, "loss": 1.7105, "step": 254 }, { "epoch": 0.036101083032490974, "grad_norm": 12.259772314848023, "learning_rate": 3.007075471698113e-06, "loss": 1.5359, "step": 255 }, { "epoch": 0.0362426559071282, "grad_norm": 13.15361696113112, "learning_rate": 3.018867924528302e-06, "loss": 1.4668, "step": 256 }, { "epoch": 0.036384228781765414, "grad_norm": 11.85160884961273, "learning_rate": 3.030660377358491e-06, "loss": 1.4925, "step": 257 }, { "epoch": 0.03652580165640263, "grad_norm": 14.00304434237399, "learning_rate": 3.04245283018868e-06, "loss": 1.5922, "step": 258 }, { "epoch": 0.036667374531039854, "grad_norm": 15.004935406320534, "learning_rate": 3.054245283018868e-06, "loss": 1.6927, "step": 259 }, { "epoch": 0.03680894740567707, "grad_norm": 11.863595083517646, "learning_rate": 3.0660377358490567e-06, "loss": 1.4076, "step": 260 }, { "epoch": 0.03695052028031429, "grad_norm": 11.706887876582863, "learning_rate": 3.0778301886792457e-06, "loss": 1.428, "step": 261 }, { "epoch": 0.03709209315495151, "grad_norm": 12.609342931921738, "learning_rate": 3.0896226415094343e-06, "loss": 1.3543, "step": 262 }, { "epoch": 0.03723366602958873, "grad_norm": 15.37357379532275, "learning_rate": 3.1014150943396225e-06, "loss": 1.5881, "step": 263 }, { "epoch": 0.03737523890422595, "grad_norm": 10.541909638470802, "learning_rate": 3.1132075471698115e-06, "loss": 1.4197, "step": 264 }, { "epoch": 0.03751681177886317, "grad_norm": 12.994250860281323, "learning_rate": 3.125e-06, "loss": 1.4946, "step": 265 }, { "epoch": 0.03765838465350039, "grad_norm": 13.65478731261437, "learning_rate": 3.136792452830189e-06, "loss": 1.5497, "step": 266 }, { "epoch": 0.03779995752813761, "grad_norm": 11.254530608982128, "learning_rate": 3.148584905660378e-06, "loss": 1.5299, "step": 267 }, { "epoch": 0.03794153040277483, "grad_norm": 11.656297126245217, "learning_rate": 3.160377358490566e-06, "loss": 1.5153, "step": 268 }, { "epoch": 0.038083103277412045, "grad_norm": 13.755509815814488, "learning_rate": 3.172169811320755e-06, "loss": 1.427, "step": 269 }, { "epoch": 0.03822467615204927, "grad_norm": 13.587216124600417, "learning_rate": 3.1839622641509436e-06, "loss": 1.5298, "step": 270 }, { "epoch": 0.038366249026686484, "grad_norm": 12.112294884865117, "learning_rate": 3.1957547169811327e-06, "loss": 1.5709, "step": 271 }, { "epoch": 0.03850782190132371, "grad_norm": 13.637326325622755, "learning_rate": 3.207547169811321e-06, "loss": 1.5438, "step": 272 }, { "epoch": 0.038649394775960924, "grad_norm": 17.207797851112897, "learning_rate": 3.2193396226415095e-06, "loss": 1.5918, "step": 273 }, { "epoch": 0.03879096765059815, "grad_norm": 14.940634681875654, "learning_rate": 3.2311320754716985e-06, "loss": 1.5988, "step": 274 }, { "epoch": 0.038932540525235364, "grad_norm": 16.91909026024671, "learning_rate": 3.242924528301887e-06, "loss": 1.6323, "step": 275 }, { "epoch": 0.03907411339987259, "grad_norm": 12.352112846491524, "learning_rate": 3.2547169811320758e-06, "loss": 1.579, "step": 276 }, { "epoch": 0.0392156862745098, "grad_norm": 14.36729636055142, "learning_rate": 3.2665094339622644e-06, "loss": 1.6939, "step": 277 }, { "epoch": 0.03935725914914703, "grad_norm": 14.536952846001634, "learning_rate": 3.2783018867924534e-06, "loss": 1.3646, "step": 278 }, { "epoch": 0.03949883202378424, "grad_norm": 13.0292785763344, "learning_rate": 3.290094339622642e-06, "loss": 1.6463, "step": 279 }, { "epoch": 0.03964040489842146, "grad_norm": 13.19163817071076, "learning_rate": 3.30188679245283e-06, "loss": 1.5869, "step": 280 }, { "epoch": 0.03978197777305868, "grad_norm": 15.541825008182306, "learning_rate": 3.3136792452830192e-06, "loss": 1.3878, "step": 281 }, { "epoch": 0.0399235506476959, "grad_norm": 14.602579260011632, "learning_rate": 3.325471698113208e-06, "loss": 1.6486, "step": 282 }, { "epoch": 0.04006512352233312, "grad_norm": 12.854613524613935, "learning_rate": 3.337264150943397e-06, "loss": 1.4761, "step": 283 }, { "epoch": 0.04020669639697034, "grad_norm": 14.591155187043375, "learning_rate": 3.349056603773585e-06, "loss": 1.4768, "step": 284 }, { "epoch": 0.04034826927160756, "grad_norm": 14.00770574764119, "learning_rate": 3.3608490566037737e-06, "loss": 1.4841, "step": 285 }, { "epoch": 0.04048984214624478, "grad_norm": 12.684547118273601, "learning_rate": 3.3726415094339627e-06, "loss": 1.6088, "step": 286 }, { "epoch": 0.040631415020882, "grad_norm": 14.077606360972457, "learning_rate": 3.3844339622641514e-06, "loss": 1.4525, "step": 287 }, { "epoch": 0.04077298789551922, "grad_norm": 12.37692030062122, "learning_rate": 3.3962264150943395e-06, "loss": 1.461, "step": 288 }, { "epoch": 0.04091456077015644, "grad_norm": 12.70991347864987, "learning_rate": 3.4080188679245286e-06, "loss": 1.6119, "step": 289 }, { "epoch": 0.04105613364479366, "grad_norm": 14.625094421603585, "learning_rate": 3.419811320754717e-06, "loss": 1.5845, "step": 290 }, { "epoch": 0.041197706519430874, "grad_norm": 12.650931439391318, "learning_rate": 3.4316037735849062e-06, "loss": 1.511, "step": 291 }, { "epoch": 0.0413392793940681, "grad_norm": 17.681685391698824, "learning_rate": 3.4433962264150944e-06, "loss": 1.6627, "step": 292 }, { "epoch": 0.041480852268705314, "grad_norm": 13.852901972742254, "learning_rate": 3.455188679245283e-06, "loss": 1.4634, "step": 293 }, { "epoch": 0.04162242514334254, "grad_norm": 11.235050356199906, "learning_rate": 3.466981132075472e-06, "loss": 1.5729, "step": 294 }, { "epoch": 0.04176399801797975, "grad_norm": 15.061645483573551, "learning_rate": 3.4787735849056607e-06, "loss": 1.44, "step": 295 }, { "epoch": 0.041905570892616976, "grad_norm": 13.213340861000459, "learning_rate": 3.4905660377358493e-06, "loss": 1.5443, "step": 296 }, { "epoch": 0.04204714376725419, "grad_norm": 12.829084873878257, "learning_rate": 3.502358490566038e-06, "loss": 1.51, "step": 297 }, { "epoch": 0.042188716641891416, "grad_norm": 11.839341764968793, "learning_rate": 3.514150943396227e-06, "loss": 1.5636, "step": 298 }, { "epoch": 0.04233028951652863, "grad_norm": 12.463975188126554, "learning_rate": 3.5259433962264156e-06, "loss": 1.5464, "step": 299 }, { "epoch": 0.042471862391165856, "grad_norm": 12.505469994572625, "learning_rate": 3.5377358490566038e-06, "loss": 1.6278, "step": 300 }, { "epoch": 0.04261343526580307, "grad_norm": 11.95865948771701, "learning_rate": 3.549528301886793e-06, "loss": 1.4534, "step": 301 }, { "epoch": 0.04275500814044029, "grad_norm": 13.1569510882452, "learning_rate": 3.5613207547169814e-06, "loss": 1.4099, "step": 302 }, { "epoch": 0.04289658101507751, "grad_norm": 12.407375947607674, "learning_rate": 3.5731132075471705e-06, "loss": 1.5631, "step": 303 }, { "epoch": 0.04303815388971473, "grad_norm": 11.495179866198267, "learning_rate": 3.5849056603773586e-06, "loss": 1.4924, "step": 304 }, { "epoch": 0.04317972676435195, "grad_norm": 10.190325198988146, "learning_rate": 3.5966981132075473e-06, "loss": 1.3476, "step": 305 }, { "epoch": 0.04332129963898917, "grad_norm": 11.96765291051355, "learning_rate": 3.6084905660377363e-06, "loss": 1.5363, "step": 306 }, { "epoch": 0.04346287251362639, "grad_norm": 15.508449097450175, "learning_rate": 3.620283018867925e-06, "loss": 1.3793, "step": 307 }, { "epoch": 0.04360444538826361, "grad_norm": 12.019986356959162, "learning_rate": 3.632075471698113e-06, "loss": 1.4787, "step": 308 }, { "epoch": 0.04374601826290083, "grad_norm": 11.371062886423248, "learning_rate": 3.643867924528302e-06, "loss": 1.675, "step": 309 }, { "epoch": 0.04388759113753805, "grad_norm": 18.77545310132771, "learning_rate": 3.6556603773584908e-06, "loss": 1.6515, "step": 310 }, { "epoch": 0.04402916401217527, "grad_norm": 10.37820311588431, "learning_rate": 3.66745283018868e-06, "loss": 1.4924, "step": 311 }, { "epoch": 0.04417073688681249, "grad_norm": 13.989321859855567, "learning_rate": 3.679245283018868e-06, "loss": 1.6333, "step": 312 }, { "epoch": 0.0443123097614497, "grad_norm": 18.14246111374286, "learning_rate": 3.6910377358490566e-06, "loss": 1.4851, "step": 313 }, { "epoch": 0.044453882636086926, "grad_norm": 12.43205655848772, "learning_rate": 3.7028301886792456e-06, "loss": 1.554, "step": 314 }, { "epoch": 0.04459545551072414, "grad_norm": 14.370121136223124, "learning_rate": 3.7146226415094343e-06, "loss": 1.6656, "step": 315 }, { "epoch": 0.044737028385361366, "grad_norm": 13.226365224828282, "learning_rate": 3.726415094339623e-06, "loss": 1.458, "step": 316 }, { "epoch": 0.04487860125999858, "grad_norm": 12.40553231649751, "learning_rate": 3.7382075471698115e-06, "loss": 1.5017, "step": 317 }, { "epoch": 0.045020174134635806, "grad_norm": 10.67209375919637, "learning_rate": 3.7500000000000005e-06, "loss": 1.4327, "step": 318 }, { "epoch": 0.04516174700927302, "grad_norm": 10.722152481335444, "learning_rate": 3.761792452830189e-06, "loss": 1.4864, "step": 319 }, { "epoch": 0.045303319883910245, "grad_norm": 10.303894521469555, "learning_rate": 3.7735849056603777e-06, "loss": 1.4328, "step": 320 }, { "epoch": 0.04544489275854746, "grad_norm": 12.898338128024236, "learning_rate": 3.7853773584905664e-06, "loss": 1.6387, "step": 321 }, { "epoch": 0.045586465633184685, "grad_norm": 12.044024772261295, "learning_rate": 3.797169811320755e-06, "loss": 1.3398, "step": 322 }, { "epoch": 0.0457280385078219, "grad_norm": 13.523557101579728, "learning_rate": 3.808962264150944e-06, "loss": 1.5648, "step": 323 }, { "epoch": 0.04586961138245912, "grad_norm": 15.072970095181454, "learning_rate": 3.820754716981133e-06, "loss": 1.5107, "step": 324 }, { "epoch": 0.04601118425709634, "grad_norm": 12.612250509526008, "learning_rate": 3.832547169811321e-06, "loss": 1.4006, "step": 325 }, { "epoch": 0.04615275713173356, "grad_norm": 11.13828470225099, "learning_rate": 3.8443396226415094e-06, "loss": 1.3893, "step": 326 }, { "epoch": 0.04629433000637078, "grad_norm": 13.189801779089771, "learning_rate": 3.856132075471699e-06, "loss": 1.3141, "step": 327 }, { "epoch": 0.046435902881008, "grad_norm": 18.802420212817683, "learning_rate": 3.8679245283018875e-06, "loss": 1.7083, "step": 328 }, { "epoch": 0.04657747575564522, "grad_norm": 13.115984626949476, "learning_rate": 3.879716981132075e-06, "loss": 1.422, "step": 329 }, { "epoch": 0.046719048630282436, "grad_norm": 14.232245377019035, "learning_rate": 3.891509433962265e-06, "loss": 1.6212, "step": 330 }, { "epoch": 0.04686062150491966, "grad_norm": 12.930792650088017, "learning_rate": 3.903301886792453e-06, "loss": 1.5937, "step": 331 }, { "epoch": 0.047002194379556876, "grad_norm": 11.525853825052804, "learning_rate": 3.915094339622642e-06, "loss": 1.4803, "step": 332 }, { "epoch": 0.0471437672541941, "grad_norm": 15.47951951101817, "learning_rate": 3.926886792452831e-06, "loss": 1.6232, "step": 333 }, { "epoch": 0.047285340128831316, "grad_norm": 9.06063634191386, "learning_rate": 3.938679245283019e-06, "loss": 1.3336, "step": 334 }, { "epoch": 0.04742691300346853, "grad_norm": 18.19059637919712, "learning_rate": 3.950471698113208e-06, "loss": 1.5712, "step": 335 }, { "epoch": 0.047568485878105755, "grad_norm": 15.011710618329829, "learning_rate": 3.962264150943396e-06, "loss": 1.6409, "step": 336 }, { "epoch": 0.04771005875274297, "grad_norm": 13.490860282325459, "learning_rate": 3.974056603773585e-06, "loss": 1.4341, "step": 337 }, { "epoch": 0.047851631627380195, "grad_norm": 18.730439829878613, "learning_rate": 3.985849056603774e-06, "loss": 1.4, "step": 338 }, { "epoch": 0.04799320450201741, "grad_norm": 13.530274530009429, "learning_rate": 3.997641509433962e-06, "loss": 1.5709, "step": 339 }, { "epoch": 0.048134777376654635, "grad_norm": 14.513655973489136, "learning_rate": 4.009433962264152e-06, "loss": 1.5607, "step": 340 }, { "epoch": 0.04827635025129185, "grad_norm": 18.01383948762555, "learning_rate": 4.0212264150943395e-06, "loss": 1.6144, "step": 341 }, { "epoch": 0.048417923125929074, "grad_norm": 11.15823030393373, "learning_rate": 4.033018867924529e-06, "loss": 1.477, "step": 342 }, { "epoch": 0.04855949600056629, "grad_norm": 13.235963692277236, "learning_rate": 4.0448113207547176e-06, "loss": 1.4943, "step": 343 }, { "epoch": 0.048701068875203514, "grad_norm": 16.775677940438893, "learning_rate": 4.056603773584906e-06, "loss": 1.583, "step": 344 }, { "epoch": 0.04884264174984073, "grad_norm": 13.915051090477029, "learning_rate": 4.068396226415095e-06, "loss": 1.4982, "step": 345 }, { "epoch": 0.04898421462447795, "grad_norm": 11.261778366560899, "learning_rate": 4.080188679245283e-06, "loss": 1.5752, "step": 346 }, { "epoch": 0.04912578749911517, "grad_norm": 11.391966522296192, "learning_rate": 4.091981132075472e-06, "loss": 1.4978, "step": 347 }, { "epoch": 0.049267360373752386, "grad_norm": 13.380136804722362, "learning_rate": 4.103773584905661e-06, "loss": 1.5026, "step": 348 }, { "epoch": 0.04940893324838961, "grad_norm": 16.3502494769647, "learning_rate": 4.115566037735849e-06, "loss": 1.4355, "step": 349 }, { "epoch": 0.049550506123026826, "grad_norm": 11.965934721115605, "learning_rate": 4.127358490566038e-06, "loss": 1.4808, "step": 350 }, { "epoch": 0.04969207899766405, "grad_norm": 10.20245985401334, "learning_rate": 4.1391509433962265e-06, "loss": 1.3701, "step": 351 }, { "epoch": 0.049833651872301266, "grad_norm": 15.36983446636422, "learning_rate": 4.150943396226416e-06, "loss": 1.7683, "step": 352 }, { "epoch": 0.04997522474693849, "grad_norm": 14.325277563471774, "learning_rate": 4.162735849056604e-06, "loss": 1.5218, "step": 353 }, { "epoch": 0.050116797621575705, "grad_norm": 11.758763068925779, "learning_rate": 4.174528301886792e-06, "loss": 1.5006, "step": 354 }, { "epoch": 0.05025837049621293, "grad_norm": 10.218577905541508, "learning_rate": 4.186320754716982e-06, "loss": 1.3826, "step": 355 }, { "epoch": 0.050399943370850145, "grad_norm": 12.837478628762943, "learning_rate": 4.19811320754717e-06, "loss": 1.6801, "step": 356 }, { "epoch": 0.05054151624548736, "grad_norm": 12.981278657399459, "learning_rate": 4.209905660377359e-06, "loss": 1.5594, "step": 357 }, { "epoch": 0.050683089120124585, "grad_norm": 12.257283439427072, "learning_rate": 4.221698113207548e-06, "loss": 1.5344, "step": 358 }, { "epoch": 0.0508246619947618, "grad_norm": 10.046586051808182, "learning_rate": 4.233490566037736e-06, "loss": 1.4747, "step": 359 }, { "epoch": 0.050966234869399024, "grad_norm": 12.50178881243569, "learning_rate": 4.245283018867925e-06, "loss": 1.6338, "step": 360 }, { "epoch": 0.05110780774403624, "grad_norm": 14.844289155504434, "learning_rate": 4.2570754716981135e-06, "loss": 1.474, "step": 361 }, { "epoch": 0.051249380618673464, "grad_norm": 11.063001373481269, "learning_rate": 4.268867924528302e-06, "loss": 1.5917, "step": 362 }, { "epoch": 0.05139095349331068, "grad_norm": 11.631744069260355, "learning_rate": 4.280660377358491e-06, "loss": 1.4944, "step": 363 }, { "epoch": 0.0515325263679479, "grad_norm": 11.792182039426654, "learning_rate": 4.29245283018868e-06, "loss": 1.483, "step": 364 }, { "epoch": 0.05167409924258512, "grad_norm": 10.048679883806955, "learning_rate": 4.304245283018868e-06, "loss": 1.3792, "step": 365 }, { "epoch": 0.05181567211722234, "grad_norm": 16.281829179357622, "learning_rate": 4.3160377358490565e-06, "loss": 1.6723, "step": 366 }, { "epoch": 0.05195724499185956, "grad_norm": 12.927769046684508, "learning_rate": 4.327830188679246e-06, "loss": 1.6836, "step": 367 }, { "epoch": 0.052098817866496776, "grad_norm": 17.67335575554652, "learning_rate": 4.339622641509435e-06, "loss": 1.4297, "step": 368 }, { "epoch": 0.052240390741134, "grad_norm": 13.404726453164931, "learning_rate": 4.351415094339622e-06, "loss": 1.4923, "step": 369 }, { "epoch": 0.052381963615771215, "grad_norm": 10.458356474791275, "learning_rate": 4.363207547169812e-06, "loss": 1.5893, "step": 370 }, { "epoch": 0.05252353649040844, "grad_norm": 12.813243118693643, "learning_rate": 4.3750000000000005e-06, "loss": 1.4019, "step": 371 }, { "epoch": 0.052665109365045655, "grad_norm": 14.204267499943844, "learning_rate": 4.386792452830189e-06, "loss": 1.4609, "step": 372 }, { "epoch": 0.05280668223968288, "grad_norm": 11.568275452643846, "learning_rate": 4.398584905660378e-06, "loss": 1.5952, "step": 373 }, { "epoch": 0.052948255114320095, "grad_norm": 10.301470782998356, "learning_rate": 4.410377358490566e-06, "loss": 1.5027, "step": 374 }, { "epoch": 0.05308982798895732, "grad_norm": 9.724272210902896, "learning_rate": 4.422169811320755e-06, "loss": 1.2069, "step": 375 }, { "epoch": 0.053231400863594534, "grad_norm": 10.561957892777674, "learning_rate": 4.4339622641509435e-06, "loss": 1.3286, "step": 376 }, { "epoch": 0.05337297373823176, "grad_norm": 11.343865680367463, "learning_rate": 4.445754716981133e-06, "loss": 1.4346, "step": 377 }, { "epoch": 0.053514546612868974, "grad_norm": 12.051240241461535, "learning_rate": 4.457547169811321e-06, "loss": 1.4667, "step": 378 }, { "epoch": 0.0536561194875062, "grad_norm": 9.953691068209203, "learning_rate": 4.469339622641509e-06, "loss": 1.558, "step": 379 }, { "epoch": 0.053797692362143414, "grad_norm": 11.5291451701543, "learning_rate": 4.481132075471699e-06, "loss": 1.3692, "step": 380 }, { "epoch": 0.05393926523678063, "grad_norm": 10.910131425074693, "learning_rate": 4.4929245283018875e-06, "loss": 1.3542, "step": 381 }, { "epoch": 0.05408083811141785, "grad_norm": 12.734940554926041, "learning_rate": 4.504716981132076e-06, "loss": 1.5591, "step": 382 }, { "epoch": 0.05422241098605507, "grad_norm": 10.05784319105054, "learning_rate": 4.516509433962265e-06, "loss": 1.4132, "step": 383 }, { "epoch": 0.05436398386069229, "grad_norm": 15.70995605074091, "learning_rate": 4.528301886792453e-06, "loss": 1.5423, "step": 384 }, { "epoch": 0.05450555673532951, "grad_norm": 12.757097849128012, "learning_rate": 4.540094339622642e-06, "loss": 1.7114, "step": 385 }, { "epoch": 0.05464712960996673, "grad_norm": 15.60328080328531, "learning_rate": 4.5518867924528305e-06, "loss": 1.6, "step": 386 }, { "epoch": 0.05478870248460395, "grad_norm": 12.499285257886061, "learning_rate": 4.563679245283019e-06, "loss": 1.6051, "step": 387 }, { "epoch": 0.05493027535924117, "grad_norm": 11.840270323889293, "learning_rate": 4.575471698113208e-06, "loss": 1.4581, "step": 388 }, { "epoch": 0.05507184823387839, "grad_norm": 13.803985042434586, "learning_rate": 4.587264150943397e-06, "loss": 1.5296, "step": 389 }, { "epoch": 0.05521342110851561, "grad_norm": 11.070902074433647, "learning_rate": 4.599056603773585e-06, "loss": 1.3439, "step": 390 }, { "epoch": 0.05535499398315283, "grad_norm": 9.05115214227112, "learning_rate": 4.610849056603774e-06, "loss": 1.3973, "step": 391 }, { "epoch": 0.055496566857790045, "grad_norm": 11.169374583604366, "learning_rate": 4.622641509433963e-06, "loss": 1.5362, "step": 392 }, { "epoch": 0.05563813973242727, "grad_norm": 13.861730880658898, "learning_rate": 4.634433962264152e-06, "loss": 1.4226, "step": 393 }, { "epoch": 0.055779712607064484, "grad_norm": 11.698157036548869, "learning_rate": 4.6462264150943394e-06, "loss": 1.3848, "step": 394 }, { "epoch": 0.05592128548170171, "grad_norm": 12.369582714792198, "learning_rate": 4.658018867924529e-06, "loss": 1.6363, "step": 395 }, { "epoch": 0.056062858356338924, "grad_norm": 12.205755558342139, "learning_rate": 4.6698113207547175e-06, "loss": 1.3974, "step": 396 }, { "epoch": 0.05620443123097615, "grad_norm": 11.182746772524984, "learning_rate": 4.681603773584906e-06, "loss": 1.449, "step": 397 }, { "epoch": 0.05634600410561336, "grad_norm": 12.037298094263358, "learning_rate": 4.693396226415095e-06, "loss": 1.6078, "step": 398 }, { "epoch": 0.05648757698025059, "grad_norm": 13.599835316278018, "learning_rate": 4.705188679245283e-06, "loss": 1.4239, "step": 399 }, { "epoch": 0.0566291498548878, "grad_norm": 10.903349307577322, "learning_rate": 4.716981132075472e-06, "loss": 1.3857, "step": 400 }, { "epoch": 0.056770722729525026, "grad_norm": 11.681769776758465, "learning_rate": 4.728773584905661e-06, "loss": 1.6518, "step": 401 }, { "epoch": 0.05691229560416224, "grad_norm": 14.112844993084424, "learning_rate": 4.740566037735849e-06, "loss": 1.4499, "step": 402 }, { "epoch": 0.05705386847879946, "grad_norm": 10.543848307026332, "learning_rate": 4.752358490566038e-06, "loss": 1.5575, "step": 403 }, { "epoch": 0.05719544135343668, "grad_norm": 11.887331693242022, "learning_rate": 4.764150943396227e-06, "loss": 1.4608, "step": 404 }, { "epoch": 0.0573370142280739, "grad_norm": 15.629967472093822, "learning_rate": 4.775943396226416e-06, "loss": 1.6656, "step": 405 }, { "epoch": 0.05747858710271112, "grad_norm": 13.111115004831658, "learning_rate": 4.787735849056604e-06, "loss": 1.5731, "step": 406 }, { "epoch": 0.05762015997734834, "grad_norm": 16.26221558588247, "learning_rate": 4.799528301886793e-06, "loss": 1.5037, "step": 407 }, { "epoch": 0.05776173285198556, "grad_norm": 10.063603879668307, "learning_rate": 4.811320754716982e-06, "loss": 1.331, "step": 408 }, { "epoch": 0.05790330572662278, "grad_norm": 12.313716925109118, "learning_rate": 4.82311320754717e-06, "loss": 1.4243, "step": 409 }, { "epoch": 0.05804487860126, "grad_norm": 14.280535189646583, "learning_rate": 4.834905660377359e-06, "loss": 1.5115, "step": 410 }, { "epoch": 0.05818645147589722, "grad_norm": 13.245199233472292, "learning_rate": 4.8466981132075476e-06, "loss": 1.5789, "step": 411 }, { "epoch": 0.05832802435053444, "grad_norm": 11.715478058852781, "learning_rate": 4.858490566037736e-06, "loss": 1.6146, "step": 412 }, { "epoch": 0.05846959722517166, "grad_norm": 13.590734107798122, "learning_rate": 4.870283018867925e-06, "loss": 1.4784, "step": 413 }, { "epoch": 0.058611170099808874, "grad_norm": 11.480304603921802, "learning_rate": 4.882075471698113e-06, "loss": 1.5629, "step": 414 }, { "epoch": 0.0587527429744461, "grad_norm": 13.021725620468496, "learning_rate": 4.893867924528302e-06, "loss": 1.5649, "step": 415 }, { "epoch": 0.05889431584908331, "grad_norm": 15.366355885130233, "learning_rate": 4.905660377358491e-06, "loss": 1.4631, "step": 416 }, { "epoch": 0.05903588872372054, "grad_norm": 9.805928939708133, "learning_rate": 4.91745283018868e-06, "loss": 1.4948, "step": 417 }, { "epoch": 0.05917746159835775, "grad_norm": 10.019636329858344, "learning_rate": 4.929245283018868e-06, "loss": 1.2915, "step": 418 }, { "epoch": 0.059319034472994976, "grad_norm": 15.484086758637831, "learning_rate": 4.9410377358490565e-06, "loss": 1.6348, "step": 419 }, { "epoch": 0.05946060734763219, "grad_norm": 10.959424524165442, "learning_rate": 4.952830188679246e-06, "loss": 1.5184, "step": 420 }, { "epoch": 0.059602180222269416, "grad_norm": 13.137251304065545, "learning_rate": 4.9646226415094346e-06, "loss": 1.5784, "step": 421 }, { "epoch": 0.05974375309690663, "grad_norm": 14.124406920278435, "learning_rate": 4.976415094339623e-06, "loss": 1.5202, "step": 422 }, { "epoch": 0.059885325971543855, "grad_norm": 11.937542181290004, "learning_rate": 4.988207547169812e-06, "loss": 1.5244, "step": 423 }, { "epoch": 0.06002689884618107, "grad_norm": 9.742690256173312, "learning_rate": 5e-06, "loss": 1.2642, "step": 424 }, { "epoch": 0.06016847172081829, "grad_norm": 9.822308928431587, "learning_rate": 4.999999934288433e-06, "loss": 1.4309, "step": 425 }, { "epoch": 0.06031004459545551, "grad_norm": 11.332820955549721, "learning_rate": 4.999999737153732e-06, "loss": 1.4631, "step": 426 }, { "epoch": 0.06045161747009273, "grad_norm": 11.556825967385235, "learning_rate": 4.999999408595909e-06, "loss": 1.4601, "step": 427 }, { "epoch": 0.06059319034472995, "grad_norm": 14.063105998227519, "learning_rate": 4.999998948614983e-06, "loss": 1.4723, "step": 428 }, { "epoch": 0.06073476321936717, "grad_norm": 12.153318703859375, "learning_rate": 4.999998357210974e-06, "loss": 1.8034, "step": 429 }, { "epoch": 0.06087633609400439, "grad_norm": 12.941784135555968, "learning_rate": 4.999997634383916e-06, "loss": 1.3495, "step": 430 }, { "epoch": 0.06101790896864161, "grad_norm": 13.802442937041016, "learning_rate": 4.9999967801338475e-06, "loss": 1.4215, "step": 431 }, { "epoch": 0.06115948184327883, "grad_norm": 10.468728865417098, "learning_rate": 4.9999957944608115e-06, "loss": 1.4304, "step": 432 }, { "epoch": 0.06130105471791605, "grad_norm": 13.090205693854543, "learning_rate": 4.999994677364861e-06, "loss": 1.5751, "step": 433 }, { "epoch": 0.06144262759255327, "grad_norm": 10.844246361005618, "learning_rate": 4.999993428846054e-06, "loss": 1.438, "step": 434 }, { "epoch": 0.061584200467190486, "grad_norm": 10.791189018686111, "learning_rate": 4.999992048904457e-06, "loss": 1.3169, "step": 435 }, { "epoch": 0.0617257733418277, "grad_norm": 10.443122491527468, "learning_rate": 4.999990537540142e-06, "loss": 1.4441, "step": 436 }, { "epoch": 0.061867346216464926, "grad_norm": 10.822367669219775, "learning_rate": 4.999988894753189e-06, "loss": 1.4573, "step": 437 }, { "epoch": 0.06200891909110214, "grad_norm": 12.175743027089506, "learning_rate": 4.999987120543682e-06, "loss": 1.5634, "step": 438 }, { "epoch": 0.062150491965739366, "grad_norm": 10.986843217091664, "learning_rate": 4.999985214911718e-06, "loss": 1.6051, "step": 439 }, { "epoch": 0.06229206484037658, "grad_norm": 13.378633094839254, "learning_rate": 4.9999831778573945e-06, "loss": 1.3025, "step": 440 }, { "epoch": 0.062433637715013805, "grad_norm": 11.404716827609914, "learning_rate": 4.99998100938082e-06, "loss": 1.5891, "step": 441 }, { "epoch": 0.06257521058965103, "grad_norm": 10.475412918728841, "learning_rate": 4.999978709482108e-06, "loss": 1.6087, "step": 442 }, { "epoch": 0.06271678346428824, "grad_norm": 10.405053055576193, "learning_rate": 4.999976278161378e-06, "loss": 1.601, "step": 443 }, { "epoch": 0.06285835633892546, "grad_norm": 10.537284774663883, "learning_rate": 4.9999737154187596e-06, "loss": 1.5019, "step": 444 }, { "epoch": 0.06299992921356268, "grad_norm": 11.446071060613304, "learning_rate": 4.999971021254387e-06, "loss": 1.4298, "step": 445 }, { "epoch": 0.0631415020881999, "grad_norm": 12.062722633955818, "learning_rate": 4.9999681956684025e-06, "loss": 1.53, "step": 446 }, { "epoch": 0.06328307496283712, "grad_norm": 11.428702217443588, "learning_rate": 4.999965238660954e-06, "loss": 1.5598, "step": 447 }, { "epoch": 0.06342464783747434, "grad_norm": 11.700069094722862, "learning_rate": 4.999962150232197e-06, "loss": 1.6052, "step": 448 }, { "epoch": 0.06356622071211156, "grad_norm": 11.728143370705704, "learning_rate": 4.999958930382293e-06, "loss": 1.5422, "step": 449 }, { "epoch": 0.06370779358674877, "grad_norm": 9.764397188439913, "learning_rate": 4.999955579111413e-06, "loss": 1.5556, "step": 450 }, { "epoch": 0.063849366461386, "grad_norm": 12.1334758903876, "learning_rate": 4.999952096419731e-06, "loss": 1.4828, "step": 451 }, { "epoch": 0.06399093933602322, "grad_norm": 12.090117626138596, "learning_rate": 4.999948482307433e-06, "loss": 1.5335, "step": 452 }, { "epoch": 0.06413251221066044, "grad_norm": 11.912947565659325, "learning_rate": 4.999944736774706e-06, "loss": 1.5197, "step": 453 }, { "epoch": 0.06427408508529765, "grad_norm": 11.46458754505041, "learning_rate": 4.999940859821749e-06, "loss": 1.229, "step": 454 }, { "epoch": 0.06441565795993488, "grad_norm": 12.99614423645138, "learning_rate": 4.999936851448764e-06, "loss": 1.4235, "step": 455 }, { "epoch": 0.0645572308345721, "grad_norm": 13.802513689733237, "learning_rate": 4.9999327116559634e-06, "loss": 1.5288, "step": 456 }, { "epoch": 0.06469880370920932, "grad_norm": 11.233554942796694, "learning_rate": 4.999928440443565e-06, "loss": 1.4566, "step": 457 }, { "epoch": 0.06484037658384653, "grad_norm": 10.988725259314979, "learning_rate": 4.999924037811792e-06, "loss": 1.5973, "step": 458 }, { "epoch": 0.06498194945848375, "grad_norm": 12.718898762950648, "learning_rate": 4.9999195037608765e-06, "loss": 1.2781, "step": 459 }, { "epoch": 0.06512352233312098, "grad_norm": 12.0159862845961, "learning_rate": 4.999914838291056e-06, "loss": 1.5239, "step": 460 }, { "epoch": 0.0652650952077582, "grad_norm": 13.805173285701349, "learning_rate": 4.999910041402577e-06, "loss": 1.5116, "step": 461 }, { "epoch": 0.06540666808239541, "grad_norm": 16.44639248235289, "learning_rate": 4.999905113095691e-06, "loss": 1.6311, "step": 462 }, { "epoch": 0.06554824095703263, "grad_norm": 13.603456596470998, "learning_rate": 4.999900053370657e-06, "loss": 1.5138, "step": 463 }, { "epoch": 0.06568981383166986, "grad_norm": 16.035405087137455, "learning_rate": 4.999894862227741e-06, "loss": 1.563, "step": 464 }, { "epoch": 0.06583138670630707, "grad_norm": 11.927684323468345, "learning_rate": 4.999889539667217e-06, "loss": 1.5195, "step": 465 }, { "epoch": 0.06597295958094429, "grad_norm": 15.219609731715929, "learning_rate": 4.999884085689363e-06, "loss": 1.6859, "step": 466 }, { "epoch": 0.0661145324555815, "grad_norm": 13.508590790548228, "learning_rate": 4.9998785002944665e-06, "loss": 1.6788, "step": 467 }, { "epoch": 0.06625610533021874, "grad_norm": 14.311418384178145, "learning_rate": 4.999872783482822e-06, "loss": 1.8242, "step": 468 }, { "epoch": 0.06639767820485595, "grad_norm": 14.750245690320032, "learning_rate": 4.999866935254729e-06, "loss": 1.5594, "step": 469 }, { "epoch": 0.06653925107949317, "grad_norm": 11.51696786077198, "learning_rate": 4.999860955610495e-06, "loss": 1.6254, "step": 470 }, { "epoch": 0.06668082395413039, "grad_norm": 13.51714225587735, "learning_rate": 4.9998548445504345e-06, "loss": 1.6437, "step": 471 }, { "epoch": 0.0668223968287676, "grad_norm": 12.538607371023605, "learning_rate": 4.999848602074869e-06, "loss": 1.608, "step": 472 }, { "epoch": 0.06696396970340483, "grad_norm": 11.632870870614132, "learning_rate": 4.999842228184127e-06, "loss": 1.5766, "step": 473 }, { "epoch": 0.06710554257804205, "grad_norm": 14.677573627903866, "learning_rate": 4.999835722878542e-06, "loss": 1.4659, "step": 474 }, { "epoch": 0.06724711545267927, "grad_norm": 12.649120291175391, "learning_rate": 4.999829086158458e-06, "loss": 1.7262, "step": 475 }, { "epoch": 0.06738868832731648, "grad_norm": 10.81804818929813, "learning_rate": 4.999822318024222e-06, "loss": 1.5644, "step": 476 }, { "epoch": 0.06753026120195371, "grad_norm": 15.7947715317208, "learning_rate": 4.999815418476191e-06, "loss": 1.488, "step": 477 }, { "epoch": 0.06767183407659093, "grad_norm": 14.375597070650503, "learning_rate": 4.9998083875147275e-06, "loss": 1.7659, "step": 478 }, { "epoch": 0.06781340695122814, "grad_norm": 10.219952713769262, "learning_rate": 4.9998012251402005e-06, "loss": 1.4859, "step": 479 }, { "epoch": 0.06795497982586536, "grad_norm": 13.255501234894185, "learning_rate": 4.9997939313529875e-06, "loss": 1.4737, "step": 480 }, { "epoch": 0.06809655270050258, "grad_norm": 16.134297374159573, "learning_rate": 4.999786506153471e-06, "loss": 1.5892, "step": 481 }, { "epoch": 0.06823812557513981, "grad_norm": 12.068032374646158, "learning_rate": 4.999778949542042e-06, "loss": 1.305, "step": 482 }, { "epoch": 0.06837969844977702, "grad_norm": 11.761297111339843, "learning_rate": 4.999771261519099e-06, "loss": 1.569, "step": 483 }, { "epoch": 0.06852127132441424, "grad_norm": 18.301492031954748, "learning_rate": 4.999763442085043e-06, "loss": 1.4771, "step": 484 }, { "epoch": 0.06866284419905146, "grad_norm": 13.788722220482681, "learning_rate": 4.999755491240287e-06, "loss": 1.4208, "step": 485 }, { "epoch": 0.06880441707368869, "grad_norm": 8.969656762197795, "learning_rate": 4.999747408985249e-06, "loss": 1.5328, "step": 486 }, { "epoch": 0.0689459899483259, "grad_norm": 9.461358436651519, "learning_rate": 4.9997391953203535e-06, "loss": 1.4762, "step": 487 }, { "epoch": 0.06908756282296312, "grad_norm": 11.80433849169713, "learning_rate": 4.999730850246032e-06, "loss": 1.5599, "step": 488 }, { "epoch": 0.06922913569760034, "grad_norm": 16.4909384149201, "learning_rate": 4.999722373762725e-06, "loss": 1.5207, "step": 489 }, { "epoch": 0.06937070857223757, "grad_norm": 12.207963714385697, "learning_rate": 4.999713765870875e-06, "loss": 1.4793, "step": 490 }, { "epoch": 0.06951228144687478, "grad_norm": 11.416758590291046, "learning_rate": 4.999705026570937e-06, "loss": 1.4224, "step": 491 }, { "epoch": 0.069653854321512, "grad_norm": 13.547776065765369, "learning_rate": 4.999696155863369e-06, "loss": 1.5234, "step": 492 }, { "epoch": 0.06979542719614922, "grad_norm": 15.924343048869737, "learning_rate": 4.999687153748638e-06, "loss": 1.4671, "step": 493 }, { "epoch": 0.06993700007078643, "grad_norm": 13.914658513870084, "learning_rate": 4.9996780202272175e-06, "loss": 1.3998, "step": 494 }, { "epoch": 0.07007857294542366, "grad_norm": 12.99757538706288, "learning_rate": 4.999668755299588e-06, "loss": 1.4688, "step": 495 }, { "epoch": 0.07022014582006088, "grad_norm": 13.80811360993397, "learning_rate": 4.999659358966235e-06, "loss": 1.3305, "step": 496 }, { "epoch": 0.0703617186946981, "grad_norm": 14.82587842735054, "learning_rate": 4.999649831227654e-06, "loss": 1.5667, "step": 497 }, { "epoch": 0.07050329156933531, "grad_norm": 9.403285779304255, "learning_rate": 4.999640172084345e-06, "loss": 1.4057, "step": 498 }, { "epoch": 0.07064486444397254, "grad_norm": 11.47047681567363, "learning_rate": 4.999630381536815e-06, "loss": 1.2765, "step": 499 }, { "epoch": 0.07078643731860976, "grad_norm": 13.185388766688336, "learning_rate": 4.99962045958558e-06, "loss": 1.4575, "step": 500 }, { "epoch": 0.07092801019324697, "grad_norm": 12.577961834151319, "learning_rate": 4.999610406231162e-06, "loss": 1.5344, "step": 501 }, { "epoch": 0.07106958306788419, "grad_norm": 11.805460452027061, "learning_rate": 4.999600221474089e-06, "loss": 1.5744, "step": 502 }, { "epoch": 0.0712111559425214, "grad_norm": 12.9381964307195, "learning_rate": 4.999589905314895e-06, "loss": 1.414, "step": 503 }, { "epoch": 0.07135272881715864, "grad_norm": 17.075614165267783, "learning_rate": 4.9995794577541235e-06, "loss": 1.68, "step": 504 }, { "epoch": 0.07149430169179585, "grad_norm": 10.936315156247892, "learning_rate": 4.999568878792324e-06, "loss": 1.4558, "step": 505 }, { "epoch": 0.07163587456643307, "grad_norm": 12.795593719242952, "learning_rate": 4.999558168430053e-06, "loss": 1.421, "step": 506 }, { "epoch": 0.07177744744107029, "grad_norm": 15.565551948797163, "learning_rate": 4.999547326667872e-06, "loss": 1.554, "step": 507 }, { "epoch": 0.07191902031570752, "grad_norm": 11.833314994117245, "learning_rate": 4.999536353506352e-06, "loss": 1.4762, "step": 508 }, { "epoch": 0.07206059319034473, "grad_norm": 9.09547098660986, "learning_rate": 4.99952524894607e-06, "loss": 1.4605, "step": 509 }, { "epoch": 0.07220216606498195, "grad_norm": 13.719351718367221, "learning_rate": 4.999514012987609e-06, "loss": 1.619, "step": 510 }, { "epoch": 0.07234373893961916, "grad_norm": 17.19131349739237, "learning_rate": 4.99950264563156e-06, "loss": 1.4046, "step": 511 }, { "epoch": 0.0724853118142564, "grad_norm": 13.688050141448368, "learning_rate": 4.99949114687852e-06, "loss": 1.5382, "step": 512 }, { "epoch": 0.07262688468889361, "grad_norm": 18.777691953163217, "learning_rate": 4.9994795167290954e-06, "loss": 1.5931, "step": 513 }, { "epoch": 0.07276845756353083, "grad_norm": 13.890788674865284, "learning_rate": 4.999467755183895e-06, "loss": 1.4583, "step": 514 }, { "epoch": 0.07291003043816804, "grad_norm": 11.512549808821065, "learning_rate": 4.999455862243539e-06, "loss": 1.3902, "step": 515 }, { "epoch": 0.07305160331280526, "grad_norm": 14.734938350842361, "learning_rate": 4.999443837908653e-06, "loss": 1.6652, "step": 516 }, { "epoch": 0.07319317618744249, "grad_norm": 11.054571899240978, "learning_rate": 4.999431682179867e-06, "loss": 1.3328, "step": 517 }, { "epoch": 0.07333474906207971, "grad_norm": 12.59049517881073, "learning_rate": 4.999419395057821e-06, "loss": 1.6232, "step": 518 }, { "epoch": 0.07347632193671692, "grad_norm": 14.59989195483218, "learning_rate": 4.999406976543162e-06, "loss": 1.5964, "step": 519 }, { "epoch": 0.07361789481135414, "grad_norm": 9.666164999035255, "learning_rate": 4.999394426636541e-06, "loss": 1.3423, "step": 520 }, { "epoch": 0.07375946768599137, "grad_norm": 11.517200377740053, "learning_rate": 4.9993817453386185e-06, "loss": 1.4648, "step": 521 }, { "epoch": 0.07390104056062859, "grad_norm": 11.925381406046832, "learning_rate": 4.999368932650062e-06, "loss": 1.2968, "step": 522 }, { "epoch": 0.0740426134352658, "grad_norm": 11.650467079851818, "learning_rate": 4.999355988571544e-06, "loss": 1.5174, "step": 523 }, { "epoch": 0.07418418630990302, "grad_norm": 11.306361097609225, "learning_rate": 4.999342913103745e-06, "loss": 1.6329, "step": 524 }, { "epoch": 0.07432575918454024, "grad_norm": 9.930882583841326, "learning_rate": 4.999329706247353e-06, "loss": 1.5081, "step": 525 }, { "epoch": 0.07446733205917747, "grad_norm": 9.176820114003824, "learning_rate": 4.999316368003062e-06, "loss": 1.6258, "step": 526 }, { "epoch": 0.07460890493381468, "grad_norm": 10.538498554453883, "learning_rate": 4.999302898371572e-06, "loss": 1.5378, "step": 527 }, { "epoch": 0.0747504778084519, "grad_norm": 11.622740166985487, "learning_rate": 4.999289297353593e-06, "loss": 1.288, "step": 528 }, { "epoch": 0.07489205068308911, "grad_norm": 12.277252400829425, "learning_rate": 4.9992755649498395e-06, "loss": 1.4761, "step": 529 }, { "epoch": 0.07503362355772634, "grad_norm": 10.835349927179722, "learning_rate": 4.999261701161033e-06, "loss": 1.5363, "step": 530 }, { "epoch": 0.07517519643236356, "grad_norm": 13.249466219532735, "learning_rate": 4.999247705987902e-06, "loss": 1.5437, "step": 531 }, { "epoch": 0.07531676930700078, "grad_norm": 12.464650333425736, "learning_rate": 4.999233579431183e-06, "loss": 1.4988, "step": 532 }, { "epoch": 0.075458342181638, "grad_norm": 14.822912491855673, "learning_rate": 4.999219321491618e-06, "loss": 1.5167, "step": 533 }, { "epoch": 0.07559991505627522, "grad_norm": 11.172517942054395, "learning_rate": 4.999204932169958e-06, "loss": 1.3949, "step": 534 }, { "epoch": 0.07574148793091244, "grad_norm": 11.874834240208234, "learning_rate": 4.999190411466956e-06, "loss": 1.4766, "step": 535 }, { "epoch": 0.07588306080554966, "grad_norm": 14.305089106357231, "learning_rate": 4.999175759383379e-06, "loss": 1.582, "step": 536 }, { "epoch": 0.07602463368018687, "grad_norm": 10.74180254992077, "learning_rate": 4.9991609759199954e-06, "loss": 1.4935, "step": 537 }, { "epoch": 0.07616620655482409, "grad_norm": 10.922164192791504, "learning_rate": 4.9991460610775825e-06, "loss": 1.4974, "step": 538 }, { "epoch": 0.07630777942946132, "grad_norm": 16.01472653729851, "learning_rate": 4.999131014856925e-06, "loss": 1.6579, "step": 539 }, { "epoch": 0.07644935230409854, "grad_norm": 13.823007858807301, "learning_rate": 4.999115837258813e-06, "loss": 1.4678, "step": 540 }, { "epoch": 0.07659092517873575, "grad_norm": 14.743865725720848, "learning_rate": 4.999100528284045e-06, "loss": 1.3637, "step": 541 }, { "epoch": 0.07673249805337297, "grad_norm": 20.605775693803896, "learning_rate": 4.999085087933426e-06, "loss": 1.5534, "step": 542 }, { "epoch": 0.0768740709280102, "grad_norm": 12.025224876283824, "learning_rate": 4.999069516207767e-06, "loss": 1.4447, "step": 543 }, { "epoch": 0.07701564380264742, "grad_norm": 15.570002819298677, "learning_rate": 4.9990538131078885e-06, "loss": 1.5326, "step": 544 }, { "epoch": 0.07715721667728463, "grad_norm": 14.216791911018847, "learning_rate": 4.9990379786346126e-06, "loss": 1.547, "step": 545 }, { "epoch": 0.07729878955192185, "grad_norm": 12.584309790579436, "learning_rate": 4.999022012788774e-06, "loss": 1.3976, "step": 546 }, { "epoch": 0.07744036242655906, "grad_norm": 11.70358458814709, "learning_rate": 4.999005915571211e-06, "loss": 1.3942, "step": 547 }, { "epoch": 0.0775819353011963, "grad_norm": 13.50626538729023, "learning_rate": 4.998989686982771e-06, "loss": 1.3389, "step": 548 }, { "epoch": 0.07772350817583351, "grad_norm": 12.007381711937532, "learning_rate": 4.998973327024306e-06, "loss": 1.6479, "step": 549 }, { "epoch": 0.07786508105047073, "grad_norm": 10.783130072712611, "learning_rate": 4.998956835696676e-06, "loss": 1.6616, "step": 550 }, { "epoch": 0.07800665392510794, "grad_norm": 14.521164279634496, "learning_rate": 4.99894021300075e-06, "loss": 1.5159, "step": 551 }, { "epoch": 0.07814822679974517, "grad_norm": 10.74762681892005, "learning_rate": 4.998923458937399e-06, "loss": 1.498, "step": 552 }, { "epoch": 0.07828979967438239, "grad_norm": 10.794268987798898, "learning_rate": 4.998906573507506e-06, "loss": 1.6019, "step": 553 }, { "epoch": 0.0784313725490196, "grad_norm": 10.770347677727079, "learning_rate": 4.998889556711958e-06, "loss": 1.3818, "step": 554 }, { "epoch": 0.07857294542365682, "grad_norm": 13.408105681774469, "learning_rate": 4.998872408551648e-06, "loss": 1.5437, "step": 555 }, { "epoch": 0.07871451829829405, "grad_norm": 12.028758873593839, "learning_rate": 4.998855129027479e-06, "loss": 1.4907, "step": 556 }, { "epoch": 0.07885609117293127, "grad_norm": 10.56384666029411, "learning_rate": 4.998837718140359e-06, "loss": 1.2869, "step": 557 }, { "epoch": 0.07899766404756849, "grad_norm": 12.28478983952787, "learning_rate": 4.998820175891204e-06, "loss": 1.3529, "step": 558 }, { "epoch": 0.0791392369222057, "grad_norm": 11.792219564642705, "learning_rate": 4.998802502280936e-06, "loss": 1.4622, "step": 559 }, { "epoch": 0.07928080979684292, "grad_norm": 13.329092368542602, "learning_rate": 4.998784697310483e-06, "loss": 1.3649, "step": 560 }, { "epoch": 0.07942238267148015, "grad_norm": 11.057869324559023, "learning_rate": 4.998766760980781e-06, "loss": 1.2895, "step": 561 }, { "epoch": 0.07956395554611737, "grad_norm": 17.875739235697534, "learning_rate": 4.998748693292774e-06, "loss": 1.4857, "step": 562 }, { "epoch": 0.07970552842075458, "grad_norm": 11.603381328386702, "learning_rate": 4.9987304942474115e-06, "loss": 1.4248, "step": 563 }, { "epoch": 0.0798471012953918, "grad_norm": 11.887607026044563, "learning_rate": 4.99871216384565e-06, "loss": 1.5283, "step": 564 }, { "epoch": 0.07998867417002903, "grad_norm": 15.473891388311657, "learning_rate": 4.998693702088453e-06, "loss": 1.3257, "step": 565 }, { "epoch": 0.08013024704466624, "grad_norm": 12.20093347699105, "learning_rate": 4.998675108976792e-06, "loss": 1.4658, "step": 566 }, { "epoch": 0.08027181991930346, "grad_norm": 9.263750320864883, "learning_rate": 4.998656384511643e-06, "loss": 1.3546, "step": 567 }, { "epoch": 0.08041339279394068, "grad_norm": 11.217979468446721, "learning_rate": 4.998637528693991e-06, "loss": 1.3715, "step": 568 }, { "epoch": 0.0805549656685779, "grad_norm": 10.859395374306427, "learning_rate": 4.998618541524827e-06, "loss": 1.5041, "step": 569 }, { "epoch": 0.08069653854321512, "grad_norm": 11.334176050905937, "learning_rate": 4.998599423005149e-06, "loss": 1.5975, "step": 570 }, { "epoch": 0.08083811141785234, "grad_norm": 12.853243621048783, "learning_rate": 4.998580173135963e-06, "loss": 1.6349, "step": 571 }, { "epoch": 0.08097968429248956, "grad_norm": 12.323518576584444, "learning_rate": 4.99856079191828e-06, "loss": 1.6108, "step": 572 }, { "epoch": 0.08112125716712677, "grad_norm": 12.192347418413284, "learning_rate": 4.998541279353119e-06, "loss": 1.3957, "step": 573 }, { "epoch": 0.081262830041764, "grad_norm": 13.405047453896273, "learning_rate": 4.998521635441506e-06, "loss": 1.6809, "step": 574 }, { "epoch": 0.08140440291640122, "grad_norm": 15.857695031095117, "learning_rate": 4.998501860184474e-06, "loss": 1.5762, "step": 575 }, { "epoch": 0.08154597579103844, "grad_norm": 9.232440674893594, "learning_rate": 4.998481953583062e-06, "loss": 1.3994, "step": 576 }, { "epoch": 0.08168754866567565, "grad_norm": 12.481748760651428, "learning_rate": 4.998461915638316e-06, "loss": 1.5891, "step": 577 }, { "epoch": 0.08182912154031288, "grad_norm": 12.296861350453034, "learning_rate": 4.9984417463512916e-06, "loss": 1.5558, "step": 578 }, { "epoch": 0.0819706944149501, "grad_norm": 15.326900514929013, "learning_rate": 4.998421445723046e-06, "loss": 1.5534, "step": 579 }, { "epoch": 0.08211226728958732, "grad_norm": 10.31201799595626, "learning_rate": 4.9984010137546475e-06, "loss": 1.4455, "step": 580 }, { "epoch": 0.08225384016422453, "grad_norm": 12.5087042448355, "learning_rate": 4.998380450447172e-06, "loss": 1.3342, "step": 581 }, { "epoch": 0.08239541303886175, "grad_norm": 13.580405514579809, "learning_rate": 4.998359755801699e-06, "loss": 1.4596, "step": 582 }, { "epoch": 0.08253698591349898, "grad_norm": 13.054087288902624, "learning_rate": 4.9983389298193165e-06, "loss": 1.3979, "step": 583 }, { "epoch": 0.0826785587881362, "grad_norm": 16.097481363554984, "learning_rate": 4.998317972501119e-06, "loss": 1.5228, "step": 584 }, { "epoch": 0.08282013166277341, "grad_norm": 10.742935698375636, "learning_rate": 4.9982968838482085e-06, "loss": 1.4778, "step": 585 }, { "epoch": 0.08296170453741063, "grad_norm": 9.903097234646703, "learning_rate": 4.998275663861692e-06, "loss": 1.4816, "step": 586 }, { "epoch": 0.08310327741204786, "grad_norm": 11.122389240600851, "learning_rate": 4.998254312542689e-06, "loss": 1.4134, "step": 587 }, { "epoch": 0.08324485028668507, "grad_norm": 14.183161269570082, "learning_rate": 4.998232829892319e-06, "loss": 1.3665, "step": 588 }, { "epoch": 0.08338642316132229, "grad_norm": 11.155595861753048, "learning_rate": 4.998211215911711e-06, "loss": 1.6073, "step": 589 }, { "epoch": 0.0835279960359595, "grad_norm": 11.930498396638235, "learning_rate": 4.998189470602003e-06, "loss": 1.5122, "step": 590 }, { "epoch": 0.08366956891059672, "grad_norm": 11.281233430887244, "learning_rate": 4.998167593964337e-06, "loss": 1.6088, "step": 591 }, { "epoch": 0.08381114178523395, "grad_norm": 10.698549503872774, "learning_rate": 4.998145585999864e-06, "loss": 1.718, "step": 592 }, { "epoch": 0.08395271465987117, "grad_norm": 12.130629029219023, "learning_rate": 4.998123446709739e-06, "loss": 1.4406, "step": 593 }, { "epoch": 0.08409428753450839, "grad_norm": 12.019215140464599, "learning_rate": 4.998101176095128e-06, "loss": 1.5776, "step": 594 }, { "epoch": 0.0842358604091456, "grad_norm": 13.126647845775103, "learning_rate": 4.9980787741572e-06, "loss": 1.5982, "step": 595 }, { "epoch": 0.08437743328378283, "grad_norm": 10.516419424625877, "learning_rate": 4.998056240897134e-06, "loss": 1.4333, "step": 596 }, { "epoch": 0.08451900615842005, "grad_norm": 10.987187987184102, "learning_rate": 4.9980335763161145e-06, "loss": 1.3892, "step": 597 }, { "epoch": 0.08466057903305726, "grad_norm": 11.449621895623208, "learning_rate": 4.998010780415332e-06, "loss": 1.5521, "step": 598 }, { "epoch": 0.08480215190769448, "grad_norm": 10.550068960823195, "learning_rate": 4.997987853195985e-06, "loss": 1.3055, "step": 599 }, { "epoch": 0.08494372478233171, "grad_norm": 12.453996769007968, "learning_rate": 4.99796479465928e-06, "loss": 1.5673, "step": 600 }, { "epoch": 0.08508529765696893, "grad_norm": 12.33823860840792, "learning_rate": 4.997941604806428e-06, "loss": 1.5271, "step": 601 }, { "epoch": 0.08522687053160614, "grad_norm": 9.999167979913496, "learning_rate": 4.997918283638647e-06, "loss": 1.5314, "step": 602 }, { "epoch": 0.08536844340624336, "grad_norm": 12.236385954821447, "learning_rate": 4.9978948311571666e-06, "loss": 1.3992, "step": 603 }, { "epoch": 0.08551001628088058, "grad_norm": 13.226262548282254, "learning_rate": 4.997871247363217e-06, "loss": 1.5569, "step": 604 }, { "epoch": 0.08565158915551781, "grad_norm": 11.555384740626607, "learning_rate": 4.997847532258037e-06, "loss": 1.3373, "step": 605 }, { "epoch": 0.08579316203015502, "grad_norm": 11.366308895383472, "learning_rate": 4.997823685842875e-06, "loss": 1.4023, "step": 606 }, { "epoch": 0.08593473490479224, "grad_norm": 13.47912566204801, "learning_rate": 4.997799708118985e-06, "loss": 1.4767, "step": 607 }, { "epoch": 0.08607630777942946, "grad_norm": 13.01151645838697, "learning_rate": 4.997775599087627e-06, "loss": 1.4361, "step": 608 }, { "epoch": 0.08621788065406669, "grad_norm": 20.13331498572585, "learning_rate": 4.997751358750068e-06, "loss": 1.4365, "step": 609 }, { "epoch": 0.0863594535287039, "grad_norm": 13.686224054933337, "learning_rate": 4.997726987107582e-06, "loss": 1.5838, "step": 610 }, { "epoch": 0.08650102640334112, "grad_norm": 27.791031478951307, "learning_rate": 4.997702484161451e-06, "loss": 1.6395, "step": 611 }, { "epoch": 0.08664259927797834, "grad_norm": 13.08566405964313, "learning_rate": 4.997677849912963e-06, "loss": 1.6016, "step": 612 }, { "epoch": 0.08678417215261557, "grad_norm": 12.454622836717409, "learning_rate": 4.997653084363412e-06, "loss": 1.4053, "step": 613 }, { "epoch": 0.08692574502725278, "grad_norm": 33.64316859287707, "learning_rate": 4.997628187514101e-06, "loss": 1.4172, "step": 614 }, { "epoch": 0.08706731790189, "grad_norm": 11.693265257941347, "learning_rate": 4.997603159366339e-06, "loss": 1.3308, "step": 615 }, { "epoch": 0.08720889077652721, "grad_norm": 63.69815600254846, "learning_rate": 4.99757799992144e-06, "loss": 1.6563, "step": 616 }, { "epoch": 0.08735046365116443, "grad_norm": 19.55322981982053, "learning_rate": 4.997552709180729e-06, "loss": 1.5322, "step": 617 }, { "epoch": 0.08749203652580166, "grad_norm": 13.867243067670357, "learning_rate": 4.997527287145534e-06, "loss": 1.6664, "step": 618 }, { "epoch": 0.08763360940043888, "grad_norm": 11.806626893343958, "learning_rate": 4.997501733817191e-06, "loss": 1.4898, "step": 619 }, { "epoch": 0.0877751822750761, "grad_norm": 12.095407140785596, "learning_rate": 4.997476049197046e-06, "loss": 1.4713, "step": 620 }, { "epoch": 0.08791675514971331, "grad_norm": 12.976527811701077, "learning_rate": 4.9974502332864464e-06, "loss": 1.6953, "step": 621 }, { "epoch": 0.08805832802435054, "grad_norm": 15.734348816078015, "learning_rate": 4.99742428608675e-06, "loss": 1.4128, "step": 622 }, { "epoch": 0.08819990089898776, "grad_norm": 12.002069930523964, "learning_rate": 4.9973982075993204e-06, "loss": 1.5239, "step": 623 }, { "epoch": 0.08834147377362497, "grad_norm": 9.998125663576547, "learning_rate": 4.99737199782553e-06, "loss": 1.4491, "step": 624 }, { "epoch": 0.08848304664826219, "grad_norm": 18.504706531538385, "learning_rate": 4.997345656766755e-06, "loss": 1.5198, "step": 625 }, { "epoch": 0.0886246195228994, "grad_norm": 15.063154910717005, "learning_rate": 4.997319184424382e-06, "loss": 1.6458, "step": 626 }, { "epoch": 0.08876619239753664, "grad_norm": 33.441875078844824, "learning_rate": 4.997292580799801e-06, "loss": 1.4896, "step": 627 }, { "epoch": 0.08890776527217385, "grad_norm": 99.67360796620136, "learning_rate": 4.997265845894411e-06, "loss": 1.4678, "step": 628 }, { "epoch": 0.08904933814681107, "grad_norm": 75.94944607681481, "learning_rate": 4.997238979709617e-06, "loss": 1.7554, "step": 629 }, { "epoch": 0.08919091102144829, "grad_norm": 29.49600140902312, "learning_rate": 4.997211982246833e-06, "loss": 1.4393, "step": 630 }, { "epoch": 0.08933248389608552, "grad_norm": 60.09388609903985, "learning_rate": 4.997184853507476e-06, "loss": 1.5791, "step": 631 }, { "epoch": 0.08947405677072273, "grad_norm": 261.71309135673744, "learning_rate": 4.997157593492974e-06, "loss": 2.1542, "step": 632 }, { "epoch": 0.08961562964535995, "grad_norm": 833.5889274696492, "learning_rate": 4.997130202204759e-06, "loss": 8.2476, "step": 633 }, { "epoch": 0.08975720251999716, "grad_norm": 956.0032034465156, "learning_rate": 4.997102679644271e-06, "loss": 14.22, "step": 634 }, { "epoch": 0.0898987753946344, "grad_norm": 430.23429300208045, "learning_rate": 4.997075025812957e-06, "loss": 8.0691, "step": 635 }, { "epoch": 0.09004034826927161, "grad_norm": 362.30667258553217, "learning_rate": 4.997047240712272e-06, "loss": 5.1684, "step": 636 }, { "epoch": 0.09018192114390883, "grad_norm": 349.0120543226265, "learning_rate": 4.997019324343674e-06, "loss": 5.9287, "step": 637 }, { "epoch": 0.09032349401854604, "grad_norm": 200.0047362720443, "learning_rate": 4.996991276708633e-06, "loss": 4.6246, "step": 638 }, { "epoch": 0.09046506689318326, "grad_norm": 156.94261491067363, "learning_rate": 4.996963097808622e-06, "loss": 3.2692, "step": 639 }, { "epoch": 0.09060663976782049, "grad_norm": 112.93656612784895, "learning_rate": 4.996934787645123e-06, "loss": 3.019, "step": 640 }, { "epoch": 0.0907482126424577, "grad_norm": 154.87078157087927, "learning_rate": 4.996906346219623e-06, "loss": 2.68, "step": 641 }, { "epoch": 0.09088978551709492, "grad_norm": 83.58988016743564, "learning_rate": 4.996877773533619e-06, "loss": 2.8031, "step": 642 }, { "epoch": 0.09103135839173214, "grad_norm": 40.53634557406476, "learning_rate": 4.996849069588612e-06, "loss": 2.2218, "step": 643 }, { "epoch": 0.09117293126636937, "grad_norm": 89.59449663972113, "learning_rate": 4.996820234386112e-06, "loss": 2.2926, "step": 644 }, { "epoch": 0.09131450414100659, "grad_norm": 39.404171361711505, "learning_rate": 4.996791267927632e-06, "loss": 2.4727, "step": 645 }, { "epoch": 0.0914560770156438, "grad_norm": 32.55745559835815, "learning_rate": 4.996762170214698e-06, "loss": 2.1037, "step": 646 }, { "epoch": 0.09159764989028102, "grad_norm": 35.91285670017337, "learning_rate": 4.996732941248839e-06, "loss": 2.2015, "step": 647 }, { "epoch": 0.09173922276491824, "grad_norm": 27.37430101312235, "learning_rate": 4.99670358103159e-06, "loss": 2.0208, "step": 648 }, { "epoch": 0.09188079563955547, "grad_norm": 19.10302828604056, "learning_rate": 4.996674089564495e-06, "loss": 1.9139, "step": 649 }, { "epoch": 0.09202236851419268, "grad_norm": 24.40575483652626, "learning_rate": 4.9966444668491055e-06, "loss": 1.915, "step": 650 }, { "epoch": 0.0921639413888299, "grad_norm": 23.90962368964574, "learning_rate": 4.996614712886978e-06, "loss": 1.9129, "step": 651 }, { "epoch": 0.09230551426346711, "grad_norm": 14.977682168305135, "learning_rate": 4.996584827679676e-06, "loss": 1.8413, "step": 652 }, { "epoch": 0.09244708713810434, "grad_norm": 17.727523111918195, "learning_rate": 4.996554811228772e-06, "loss": 1.8157, "step": 653 }, { "epoch": 0.09258866001274156, "grad_norm": 17.307209184212624, "learning_rate": 4.996524663535842e-06, "loss": 1.8923, "step": 654 }, { "epoch": 0.09273023288737878, "grad_norm": 16.586874016777166, "learning_rate": 4.996494384602473e-06, "loss": 1.7986, "step": 655 }, { "epoch": 0.092871805762016, "grad_norm": 14.67964904033014, "learning_rate": 4.996463974430255e-06, "loss": 1.8594, "step": 656 }, { "epoch": 0.09301337863665322, "grad_norm": 17.107621556839256, "learning_rate": 4.996433433020788e-06, "loss": 1.8018, "step": 657 }, { "epoch": 0.09315495151129044, "grad_norm": 18.781724463640845, "learning_rate": 4.996402760375676e-06, "loss": 1.7647, "step": 658 }, { "epoch": 0.09329652438592766, "grad_norm": 15.103173514851989, "learning_rate": 4.996371956496532e-06, "loss": 1.7632, "step": 659 }, { "epoch": 0.09343809726056487, "grad_norm": 22.185477323076338, "learning_rate": 4.996341021384976e-06, "loss": 1.884, "step": 660 }, { "epoch": 0.09357967013520209, "grad_norm": 13.069686154112201, "learning_rate": 4.996309955042634e-06, "loss": 1.6154, "step": 661 }, { "epoch": 0.09372124300983932, "grad_norm": 14.43765377606823, "learning_rate": 4.996278757471139e-06, "loss": 1.807, "step": 662 }, { "epoch": 0.09386281588447654, "grad_norm": 15.026096277341548, "learning_rate": 4.996247428672132e-06, "loss": 1.7516, "step": 663 }, { "epoch": 0.09400438875911375, "grad_norm": 12.58441952360512, "learning_rate": 4.996215968647258e-06, "loss": 1.6755, "step": 664 }, { "epoch": 0.09414596163375097, "grad_norm": 14.5020887251657, "learning_rate": 4.996184377398171e-06, "loss": 1.7809, "step": 665 }, { "epoch": 0.0942875345083882, "grad_norm": 15.816945532915353, "learning_rate": 4.996152654926534e-06, "loss": 1.8357, "step": 666 }, { "epoch": 0.09442910738302542, "grad_norm": 12.124910649235858, "learning_rate": 4.996120801234012e-06, "loss": 1.8485, "step": 667 }, { "epoch": 0.09457068025766263, "grad_norm": 12.146126186881817, "learning_rate": 4.996088816322281e-06, "loss": 1.7427, "step": 668 }, { "epoch": 0.09471225313229985, "grad_norm": 12.383358377163507, "learning_rate": 4.996056700193023e-06, "loss": 1.6613, "step": 669 }, { "epoch": 0.09485382600693706, "grad_norm": 12.55856944982097, "learning_rate": 4.996024452847924e-06, "loss": 1.7892, "step": 670 }, { "epoch": 0.0949953988815743, "grad_norm": 9.979963065100076, "learning_rate": 4.9959920742886815e-06, "loss": 1.8824, "step": 671 }, { "epoch": 0.09513697175621151, "grad_norm": 15.623803665138553, "learning_rate": 4.995959564516997e-06, "loss": 1.7777, "step": 672 }, { "epoch": 0.09527854463084873, "grad_norm": 11.113736740995106, "learning_rate": 4.995926923534578e-06, "loss": 1.6039, "step": 673 }, { "epoch": 0.09542011750548594, "grad_norm": 12.044700020506296, "learning_rate": 4.995894151343143e-06, "loss": 1.6588, "step": 674 }, { "epoch": 0.09556169038012317, "grad_norm": 9.863062342867801, "learning_rate": 4.9958612479444125e-06, "loss": 1.5908, "step": 675 }, { "epoch": 0.09570326325476039, "grad_norm": 11.539173577664423, "learning_rate": 4.995828213340118e-06, "loss": 1.7817, "step": 676 }, { "epoch": 0.0958448361293976, "grad_norm": 10.94668420205549, "learning_rate": 4.995795047531994e-06, "loss": 1.6401, "step": 677 }, { "epoch": 0.09598640900403482, "grad_norm": 11.794534251423743, "learning_rate": 4.995761750521787e-06, "loss": 1.7014, "step": 678 }, { "epoch": 0.09612798187867205, "grad_norm": 11.350778803384346, "learning_rate": 4.995728322311244e-06, "loss": 1.4844, "step": 679 }, { "epoch": 0.09626955475330927, "grad_norm": 10.883551732004706, "learning_rate": 4.995694762902125e-06, "loss": 1.6731, "step": 680 }, { "epoch": 0.09641112762794649, "grad_norm": 11.79099863988548, "learning_rate": 4.9956610722961936e-06, "loss": 1.6883, "step": 681 }, { "epoch": 0.0965527005025837, "grad_norm": 13.557387834408889, "learning_rate": 4.99562725049522e-06, "loss": 1.6599, "step": 682 }, { "epoch": 0.09669427337722092, "grad_norm": 9.681447753462669, "learning_rate": 4.9955932975009825e-06, "loss": 1.7209, "step": 683 }, { "epoch": 0.09683584625185815, "grad_norm": 10.832149577878763, "learning_rate": 4.995559213315267e-06, "loss": 1.6219, "step": 684 }, { "epoch": 0.09697741912649536, "grad_norm": 11.1240398549858, "learning_rate": 4.9955249979398625e-06, "loss": 1.5699, "step": 685 }, { "epoch": 0.09711899200113258, "grad_norm": 12.402177041202409, "learning_rate": 4.995490651376571e-06, "loss": 1.6338, "step": 686 }, { "epoch": 0.0972605648757698, "grad_norm": 8.826652687653288, "learning_rate": 4.9954561736271966e-06, "loss": 1.4361, "step": 687 }, { "epoch": 0.09740213775040703, "grad_norm": 12.081370718284296, "learning_rate": 4.995421564693551e-06, "loss": 1.694, "step": 688 }, { "epoch": 0.09754371062504424, "grad_norm": 11.568901970275812, "learning_rate": 4.995386824577455e-06, "loss": 1.6181, "step": 689 }, { "epoch": 0.09768528349968146, "grad_norm": 10.520255759723716, "learning_rate": 4.995351953280735e-06, "loss": 1.6927, "step": 690 }, { "epoch": 0.09782685637431868, "grad_norm": 11.6620333235776, "learning_rate": 4.995316950805223e-06, "loss": 1.5393, "step": 691 }, { "epoch": 0.0979684292489559, "grad_norm": 10.624676867788585, "learning_rate": 4.995281817152759e-06, "loss": 1.5751, "step": 692 }, { "epoch": 0.09811000212359312, "grad_norm": 15.811520029693984, "learning_rate": 4.995246552325191e-06, "loss": 1.7659, "step": 693 }, { "epoch": 0.09825157499823034, "grad_norm": 9.260416300501493, "learning_rate": 4.9952111563243715e-06, "loss": 1.5984, "step": 694 }, { "epoch": 0.09839314787286756, "grad_norm": 10.574289818402663, "learning_rate": 4.995175629152162e-06, "loss": 1.4672, "step": 695 }, { "epoch": 0.09853472074750477, "grad_norm": 12.619810945331093, "learning_rate": 4.995139970810431e-06, "loss": 1.5841, "step": 696 }, { "epoch": 0.098676293622142, "grad_norm": 10.688343037154207, "learning_rate": 4.995104181301052e-06, "loss": 1.6306, "step": 697 }, { "epoch": 0.09881786649677922, "grad_norm": 10.64254246042611, "learning_rate": 4.995068260625906e-06, "loss": 1.6155, "step": 698 }, { "epoch": 0.09895943937141644, "grad_norm": 11.752506719188478, "learning_rate": 4.995032208786883e-06, "loss": 1.5808, "step": 699 }, { "epoch": 0.09910101224605365, "grad_norm": 9.882519140902916, "learning_rate": 4.994996025785876e-06, "loss": 1.5292, "step": 700 }, { "epoch": 0.09924258512069088, "grad_norm": 11.49757293299805, "learning_rate": 4.99495971162479e-06, "loss": 1.767, "step": 701 }, { "epoch": 0.0993841579953281, "grad_norm": 9.63913115515447, "learning_rate": 4.9949232663055304e-06, "loss": 1.4986, "step": 702 }, { "epoch": 0.09952573086996531, "grad_norm": 11.89502320924333, "learning_rate": 4.994886689830015e-06, "loss": 1.8406, "step": 703 }, { "epoch": 0.09966730374460253, "grad_norm": 11.211246456979733, "learning_rate": 4.994849982200168e-06, "loss": 1.6265, "step": 704 }, { "epoch": 0.09980887661923975, "grad_norm": 11.459386344843717, "learning_rate": 4.994813143417917e-06, "loss": 1.6349, "step": 705 }, { "epoch": 0.09995044949387698, "grad_norm": 13.459429213053982, "learning_rate": 4.994776173485199e-06, "loss": 1.5345, "step": 706 }, { "epoch": 0.1000920223685142, "grad_norm": 10.669543077147102, "learning_rate": 4.994739072403958e-06, "loss": 1.5845, "step": 707 }, { "epoch": 0.10023359524315141, "grad_norm": 10.475920541596698, "learning_rate": 4.994701840176144e-06, "loss": 1.6864, "step": 708 }, { "epoch": 0.10037516811778863, "grad_norm": 13.86117497295447, "learning_rate": 4.994664476803714e-06, "loss": 1.5763, "step": 709 }, { "epoch": 0.10051674099242586, "grad_norm": 9.628325049016143, "learning_rate": 4.9946269822886335e-06, "loss": 1.6373, "step": 710 }, { "epoch": 0.10065831386706307, "grad_norm": 9.945693375095866, "learning_rate": 4.994589356632872e-06, "loss": 1.6476, "step": 711 }, { "epoch": 0.10079988674170029, "grad_norm": 9.312378748161972, "learning_rate": 4.994551599838408e-06, "loss": 1.5423, "step": 712 }, { "epoch": 0.1009414596163375, "grad_norm": 11.47901860099513, "learning_rate": 4.994513711907227e-06, "loss": 1.5144, "step": 713 }, { "epoch": 0.10108303249097472, "grad_norm": 8.8315855012461, "learning_rate": 4.994475692841319e-06, "loss": 1.4727, "step": 714 }, { "epoch": 0.10122460536561195, "grad_norm": 10.96861019208443, "learning_rate": 4.9944375426426846e-06, "loss": 1.6411, "step": 715 }, { "epoch": 0.10136617824024917, "grad_norm": 10.498060501444645, "learning_rate": 4.994399261313329e-06, "loss": 1.3594, "step": 716 }, { "epoch": 0.10150775111488639, "grad_norm": 13.073329980808614, "learning_rate": 4.994360848855264e-06, "loss": 1.41, "step": 717 }, { "epoch": 0.1016493239895236, "grad_norm": 10.602114095934954, "learning_rate": 4.994322305270508e-06, "loss": 1.5643, "step": 718 }, { "epoch": 0.10179089686416083, "grad_norm": 11.847373549009317, "learning_rate": 4.994283630561089e-06, "loss": 1.7558, "step": 719 }, { "epoch": 0.10193246973879805, "grad_norm": 11.042725556660663, "learning_rate": 4.994244824729039e-06, "loss": 1.5328, "step": 720 }, { "epoch": 0.10207404261343526, "grad_norm": 12.460832189993873, "learning_rate": 4.994205887776399e-06, "loss": 1.666, "step": 721 }, { "epoch": 0.10221561548807248, "grad_norm": 7.086175714797533, "learning_rate": 4.9941668197052155e-06, "loss": 1.4206, "step": 722 }, { "epoch": 0.10235718836270971, "grad_norm": 9.902336815825974, "learning_rate": 4.9941276205175405e-06, "loss": 1.5832, "step": 723 }, { "epoch": 0.10249876123734693, "grad_norm": 15.084604845494844, "learning_rate": 4.994088290215438e-06, "loss": 1.6585, "step": 724 }, { "epoch": 0.10264033411198414, "grad_norm": 9.829517550887385, "learning_rate": 4.994048828800972e-06, "loss": 1.445, "step": 725 }, { "epoch": 0.10278190698662136, "grad_norm": 12.583692600266737, "learning_rate": 4.994009236276219e-06, "loss": 1.6067, "step": 726 }, { "epoch": 0.10292347986125858, "grad_norm": 9.39235291793068, "learning_rate": 4.993969512643261e-06, "loss": 1.5841, "step": 727 }, { "epoch": 0.1030650527358958, "grad_norm": 10.776265902404061, "learning_rate": 4.993929657904185e-06, "loss": 1.6634, "step": 728 }, { "epoch": 0.10320662561053302, "grad_norm": 9.663516572721834, "learning_rate": 4.993889672061087e-06, "loss": 1.4487, "step": 729 }, { "epoch": 0.10334819848517024, "grad_norm": 12.028268578441255, "learning_rate": 4.993849555116067e-06, "loss": 1.4712, "step": 730 }, { "epoch": 0.10348977135980746, "grad_norm": 9.400223635996023, "learning_rate": 4.993809307071236e-06, "loss": 1.3275, "step": 731 }, { "epoch": 0.10363134423444469, "grad_norm": 10.443834404230483, "learning_rate": 4.99376892792871e-06, "loss": 1.3998, "step": 732 }, { "epoch": 0.1037729171090819, "grad_norm": 11.198554177286507, "learning_rate": 4.99372841769061e-06, "loss": 1.4169, "step": 733 }, { "epoch": 0.10391448998371912, "grad_norm": 9.572683857994956, "learning_rate": 4.9936877763590664e-06, "loss": 1.5673, "step": 734 }, { "epoch": 0.10405606285835634, "grad_norm": 11.28651132235244, "learning_rate": 4.9936470039362165e-06, "loss": 1.5267, "step": 735 }, { "epoch": 0.10419763573299355, "grad_norm": 12.49624333198219, "learning_rate": 4.993606100424202e-06, "loss": 1.6075, "step": 736 }, { "epoch": 0.10433920860763078, "grad_norm": 12.185571136426, "learning_rate": 4.993565065825175e-06, "loss": 1.4884, "step": 737 }, { "epoch": 0.104480781482268, "grad_norm": 12.593044819742017, "learning_rate": 4.9935239001412915e-06, "loss": 1.3621, "step": 738 }, { "epoch": 0.10462235435690521, "grad_norm": 10.88173633520613, "learning_rate": 4.993482603374715e-06, "loss": 1.437, "step": 739 }, { "epoch": 0.10476392723154243, "grad_norm": 12.702063109060768, "learning_rate": 4.993441175527619e-06, "loss": 1.5878, "step": 740 }, { "epoch": 0.10490550010617966, "grad_norm": 9.250486618851296, "learning_rate": 4.993399616602178e-06, "loss": 1.5772, "step": 741 }, { "epoch": 0.10504707298081688, "grad_norm": 13.010314957297764, "learning_rate": 4.99335792660058e-06, "loss": 1.625, "step": 742 }, { "epoch": 0.1051886458554541, "grad_norm": 11.67811052271982, "learning_rate": 4.993316105525013e-06, "loss": 1.4829, "step": 743 }, { "epoch": 0.10533021873009131, "grad_norm": 10.065743438961869, "learning_rate": 4.993274153377678e-06, "loss": 1.589, "step": 744 }, { "epoch": 0.10547179160472854, "grad_norm": 10.570939704461074, "learning_rate": 4.993232070160781e-06, "loss": 1.6034, "step": 745 }, { "epoch": 0.10561336447936576, "grad_norm": 10.904233729722684, "learning_rate": 4.993189855876531e-06, "loss": 1.4638, "step": 746 }, { "epoch": 0.10575493735400297, "grad_norm": 12.525173017026365, "learning_rate": 4.993147510527151e-06, "loss": 1.57, "step": 747 }, { "epoch": 0.10589651022864019, "grad_norm": 12.329211671847425, "learning_rate": 4.993105034114864e-06, "loss": 1.5369, "step": 748 }, { "epoch": 0.1060380831032774, "grad_norm": 11.219209709418317, "learning_rate": 4.993062426641906e-06, "loss": 1.4416, "step": 749 }, { "epoch": 0.10617965597791464, "grad_norm": 10.033176700556501, "learning_rate": 4.993019688110514e-06, "loss": 1.4368, "step": 750 }, { "epoch": 0.10632122885255185, "grad_norm": 10.200623093067227, "learning_rate": 4.992976818522936e-06, "loss": 1.3937, "step": 751 }, { "epoch": 0.10646280172718907, "grad_norm": 12.069340955034255, "learning_rate": 4.992933817881426e-06, "loss": 1.477, "step": 752 }, { "epoch": 0.10660437460182628, "grad_norm": 10.264897383127257, "learning_rate": 4.992890686188243e-06, "loss": 1.4088, "step": 753 }, { "epoch": 0.10674594747646352, "grad_norm": 10.24734872163302, "learning_rate": 4.992847423445657e-06, "loss": 1.3864, "step": 754 }, { "epoch": 0.10688752035110073, "grad_norm": 11.786857424036752, "learning_rate": 4.992804029655939e-06, "loss": 1.3912, "step": 755 }, { "epoch": 0.10702909322573795, "grad_norm": 12.549588528761051, "learning_rate": 4.992760504821373e-06, "loss": 1.484, "step": 756 }, { "epoch": 0.10717066610037516, "grad_norm": 10.94714557946, "learning_rate": 4.992716848944245e-06, "loss": 1.4681, "step": 757 }, { "epoch": 0.1073122389750124, "grad_norm": 8.658335961153906, "learning_rate": 4.992673062026851e-06, "loss": 1.4373, "step": 758 }, { "epoch": 0.10745381184964961, "grad_norm": 12.69355276265401, "learning_rate": 4.992629144071494e-06, "loss": 1.4432, "step": 759 }, { "epoch": 0.10759538472428683, "grad_norm": 11.701831537333161, "learning_rate": 4.99258509508048e-06, "loss": 1.4592, "step": 760 }, { "epoch": 0.10773695759892404, "grad_norm": 13.35638501353876, "learning_rate": 4.9925409150561264e-06, "loss": 1.3696, "step": 761 }, { "epoch": 0.10787853047356126, "grad_norm": 9.58063099672512, "learning_rate": 4.992496604000756e-06, "loss": 1.6092, "step": 762 }, { "epoch": 0.10802010334819849, "grad_norm": 16.54592515274248, "learning_rate": 4.992452161916698e-06, "loss": 1.5258, "step": 763 }, { "epoch": 0.1081616762228357, "grad_norm": 12.742116847213687, "learning_rate": 4.992407588806287e-06, "loss": 1.5443, "step": 764 }, { "epoch": 0.10830324909747292, "grad_norm": 12.395040910967996, "learning_rate": 4.9923628846718685e-06, "loss": 1.4597, "step": 765 }, { "epoch": 0.10844482197211014, "grad_norm": 10.327990924928727, "learning_rate": 4.992318049515791e-06, "loss": 1.433, "step": 766 }, { "epoch": 0.10858639484674737, "grad_norm": 15.89998072976918, "learning_rate": 4.992273083340412e-06, "loss": 1.5208, "step": 767 }, { "epoch": 0.10872796772138459, "grad_norm": 14.913173898848786, "learning_rate": 4.992227986148096e-06, "loss": 1.3765, "step": 768 }, { "epoch": 0.1088695405960218, "grad_norm": 8.909830437790372, "learning_rate": 4.992182757941212e-06, "loss": 1.3759, "step": 769 }, { "epoch": 0.10901111347065902, "grad_norm": 10.51755980877003, "learning_rate": 4.992137398722139e-06, "loss": 1.3914, "step": 770 }, { "epoch": 0.10915268634529623, "grad_norm": 17.414420875910842, "learning_rate": 4.992091908493262e-06, "loss": 1.5107, "step": 771 }, { "epoch": 0.10929425921993347, "grad_norm": 10.713914853898272, "learning_rate": 4.992046287256971e-06, "loss": 1.419, "step": 772 }, { "epoch": 0.10943583209457068, "grad_norm": 11.411827751156705, "learning_rate": 4.992000535015664e-06, "loss": 1.4031, "step": 773 }, { "epoch": 0.1095774049692079, "grad_norm": 10.160995736175956, "learning_rate": 4.991954651771748e-06, "loss": 1.4584, "step": 774 }, { "epoch": 0.10971897784384511, "grad_norm": 15.79323782775351, "learning_rate": 4.991908637527634e-06, "loss": 1.5055, "step": 775 }, { "epoch": 0.10986055071848234, "grad_norm": 10.146282293163274, "learning_rate": 4.991862492285741e-06, "loss": 1.5186, "step": 776 }, { "epoch": 0.11000212359311956, "grad_norm": 9.54971795589322, "learning_rate": 4.991816216048494e-06, "loss": 1.3946, "step": 777 }, { "epoch": 0.11014369646775678, "grad_norm": 12.08818773997633, "learning_rate": 4.991769808818328e-06, "loss": 1.4581, "step": 778 }, { "epoch": 0.110285269342394, "grad_norm": 9.196663574940889, "learning_rate": 4.991723270597679e-06, "loss": 1.2776, "step": 779 }, { "epoch": 0.11042684221703122, "grad_norm": 10.64143912944135, "learning_rate": 4.9916766013889975e-06, "loss": 1.4212, "step": 780 }, { "epoch": 0.11056841509166844, "grad_norm": 9.471418250287282, "learning_rate": 4.991629801194734e-06, "loss": 1.486, "step": 781 }, { "epoch": 0.11070998796630566, "grad_norm": 10.307093827384318, "learning_rate": 4.9915828700173495e-06, "loss": 1.527, "step": 782 }, { "epoch": 0.11085156084094287, "grad_norm": 19.750176151310967, "learning_rate": 4.991535807859312e-06, "loss": 1.346, "step": 783 }, { "epoch": 0.11099313371558009, "grad_norm": 11.494394014040061, "learning_rate": 4.991488614723094e-06, "loss": 1.5973, "step": 784 }, { "epoch": 0.11113470659021732, "grad_norm": 15.014741393259003, "learning_rate": 4.991441290611177e-06, "loss": 1.597, "step": 785 }, { "epoch": 0.11127627946485454, "grad_norm": 10.178794464497663, "learning_rate": 4.991393835526051e-06, "loss": 1.5616, "step": 786 }, { "epoch": 0.11141785233949175, "grad_norm": 11.773146536509142, "learning_rate": 4.991346249470207e-06, "loss": 1.4924, "step": 787 }, { "epoch": 0.11155942521412897, "grad_norm": 10.935837275959694, "learning_rate": 4.991298532446149e-06, "loss": 1.3476, "step": 788 }, { "epoch": 0.1117009980887662, "grad_norm": 9.815156807795844, "learning_rate": 4.991250684456385e-06, "loss": 1.2554, "step": 789 }, { "epoch": 0.11184257096340341, "grad_norm": 11.011685839898497, "learning_rate": 4.9912027055034295e-06, "loss": 1.5356, "step": 790 }, { "epoch": 0.11198414383804063, "grad_norm": 9.50859203293161, "learning_rate": 4.9911545955898055e-06, "loss": 1.4638, "step": 791 }, { "epoch": 0.11212571671267785, "grad_norm": 12.33184936118635, "learning_rate": 4.991106354718042e-06, "loss": 1.4693, "step": 792 }, { "epoch": 0.11226728958731506, "grad_norm": 10.995423838940871, "learning_rate": 4.991057982890674e-06, "loss": 1.4925, "step": 793 }, { "epoch": 0.1124088624619523, "grad_norm": 9.053989386779618, "learning_rate": 4.991009480110246e-06, "loss": 1.5498, "step": 794 }, { "epoch": 0.11255043533658951, "grad_norm": 10.427955512206157, "learning_rate": 4.990960846379307e-06, "loss": 1.482, "step": 795 }, { "epoch": 0.11269200821122673, "grad_norm": 9.968764158615533, "learning_rate": 4.990912081700413e-06, "loss": 1.4278, "step": 796 }, { "epoch": 0.11283358108586394, "grad_norm": 10.74128689406558, "learning_rate": 4.990863186076129e-06, "loss": 1.3529, "step": 797 }, { "epoch": 0.11297515396050117, "grad_norm": 10.610967367728726, "learning_rate": 4.990814159509025e-06, "loss": 1.6191, "step": 798 }, { "epoch": 0.11311672683513839, "grad_norm": 9.35216905862347, "learning_rate": 4.990765002001677e-06, "loss": 1.4112, "step": 799 }, { "epoch": 0.1132582997097756, "grad_norm": 9.862259383946967, "learning_rate": 4.99071571355667e-06, "loss": 1.6273, "step": 800 }, { "epoch": 0.11339987258441282, "grad_norm": 12.344843948756122, "learning_rate": 4.990666294176596e-06, "loss": 1.5961, "step": 801 }, { "epoch": 0.11354144545905005, "grad_norm": 10.674613456070595, "learning_rate": 4.990616743864051e-06, "loss": 1.4296, "step": 802 }, { "epoch": 0.11368301833368727, "grad_norm": 11.333649649600204, "learning_rate": 4.99056706262164e-06, "loss": 1.4046, "step": 803 }, { "epoch": 0.11382459120832449, "grad_norm": 15.383584703986472, "learning_rate": 4.990517250451978e-06, "loss": 1.4302, "step": 804 }, { "epoch": 0.1139661640829617, "grad_norm": 13.52525792466491, "learning_rate": 4.99046730735768e-06, "loss": 1.4589, "step": 805 }, { "epoch": 0.11410773695759892, "grad_norm": 10.307005372134572, "learning_rate": 4.990417233341373e-06, "loss": 1.4986, "step": 806 }, { "epoch": 0.11424930983223615, "grad_norm": 11.592885275619347, "learning_rate": 4.990367028405688e-06, "loss": 1.3795, "step": 807 }, { "epoch": 0.11439088270687336, "grad_norm": 15.118048465469903, "learning_rate": 4.990316692553265e-06, "loss": 1.5222, "step": 808 }, { "epoch": 0.11453245558151058, "grad_norm": 11.349931866277464, "learning_rate": 4.990266225786751e-06, "loss": 1.3799, "step": 809 }, { "epoch": 0.1146740284561478, "grad_norm": 10.834248274289514, "learning_rate": 4.9902156281087985e-06, "loss": 1.5128, "step": 810 }, { "epoch": 0.11481560133078503, "grad_norm": 13.086762450676243, "learning_rate": 4.990164899522068e-06, "loss": 1.3853, "step": 811 }, { "epoch": 0.11495717420542224, "grad_norm": 10.476260278312358, "learning_rate": 4.990114040029224e-06, "loss": 1.4376, "step": 812 }, { "epoch": 0.11509874708005946, "grad_norm": 12.393281014591762, "learning_rate": 4.990063049632943e-06, "loss": 1.5966, "step": 813 }, { "epoch": 0.11524031995469668, "grad_norm": 10.488510748769379, "learning_rate": 4.9900119283359025e-06, "loss": 1.5068, "step": 814 }, { "epoch": 0.11538189282933389, "grad_norm": 10.36782000178222, "learning_rate": 4.989960676140793e-06, "loss": 1.4215, "step": 815 }, { "epoch": 0.11552346570397112, "grad_norm": 10.0770037833189, "learning_rate": 4.989909293050307e-06, "loss": 1.4633, "step": 816 }, { "epoch": 0.11566503857860834, "grad_norm": 10.392385233304351, "learning_rate": 4.989857779067146e-06, "loss": 1.353, "step": 817 }, { "epoch": 0.11580661145324556, "grad_norm": 10.705423951892149, "learning_rate": 4.989806134194018e-06, "loss": 1.4568, "step": 818 }, { "epoch": 0.11594818432788277, "grad_norm": 12.003048827374808, "learning_rate": 4.9897543584336376e-06, "loss": 1.4292, "step": 819 }, { "epoch": 0.11608975720252, "grad_norm": 12.192993803836309, "learning_rate": 4.989702451788727e-06, "loss": 1.5632, "step": 820 }, { "epoch": 0.11623133007715722, "grad_norm": 9.816705859310579, "learning_rate": 4.989650414262015e-06, "loss": 1.3982, "step": 821 }, { "epoch": 0.11637290295179444, "grad_norm": 10.165301330273191, "learning_rate": 4.989598245856238e-06, "loss": 1.348, "step": 822 }, { "epoch": 0.11651447582643165, "grad_norm": 11.256418389348127, "learning_rate": 4.989545946574136e-06, "loss": 1.4242, "step": 823 }, { "epoch": 0.11665604870106888, "grad_norm": 12.114404827365192, "learning_rate": 4.989493516418461e-06, "loss": 1.5775, "step": 824 }, { "epoch": 0.1167976215757061, "grad_norm": 10.59761017544352, "learning_rate": 4.9894409553919675e-06, "loss": 1.6157, "step": 825 }, { "epoch": 0.11693919445034331, "grad_norm": 11.471377370625415, "learning_rate": 4.98938826349742e-06, "loss": 1.4617, "step": 826 }, { "epoch": 0.11708076732498053, "grad_norm": 11.59241340109617, "learning_rate": 4.989335440737587e-06, "loss": 1.4185, "step": 827 }, { "epoch": 0.11722234019961775, "grad_norm": 9.193007706188174, "learning_rate": 4.989282487115246e-06, "loss": 1.5507, "step": 828 }, { "epoch": 0.11736391307425498, "grad_norm": 13.295391712766436, "learning_rate": 4.98922940263318e-06, "loss": 1.6229, "step": 829 }, { "epoch": 0.1175054859488922, "grad_norm": 10.240517300413984, "learning_rate": 4.989176187294182e-06, "loss": 1.4442, "step": 830 }, { "epoch": 0.11764705882352941, "grad_norm": 9.963497296699972, "learning_rate": 4.989122841101047e-06, "loss": 1.5861, "step": 831 }, { "epoch": 0.11778863169816663, "grad_norm": 10.78252334815084, "learning_rate": 4.98906936405658e-06, "loss": 1.5788, "step": 832 }, { "epoch": 0.11793020457280386, "grad_norm": 9.906121382495096, "learning_rate": 4.989015756163593e-06, "loss": 1.54, "step": 833 }, { "epoch": 0.11807177744744107, "grad_norm": 13.692338480210042, "learning_rate": 4.988962017424903e-06, "loss": 1.4181, "step": 834 }, { "epoch": 0.11821335032207829, "grad_norm": 11.349746338282603, "learning_rate": 4.988908147843336e-06, "loss": 1.5935, "step": 835 }, { "epoch": 0.1183549231967155, "grad_norm": 10.335050214655617, "learning_rate": 4.988854147421724e-06, "loss": 1.5457, "step": 836 }, { "epoch": 0.11849649607135272, "grad_norm": 12.173157375884323, "learning_rate": 4.988800016162904e-06, "loss": 1.461, "step": 837 }, { "epoch": 0.11863806894598995, "grad_norm": 10.167308057554223, "learning_rate": 4.9887457540697235e-06, "loss": 1.5739, "step": 838 }, { "epoch": 0.11877964182062717, "grad_norm": 9.823647551113389, "learning_rate": 4.988691361145035e-06, "loss": 1.384, "step": 839 }, { "epoch": 0.11892121469526439, "grad_norm": 9.498573196076897, "learning_rate": 4.988636837391696e-06, "loss": 1.4092, "step": 840 }, { "epoch": 0.1190627875699016, "grad_norm": 11.079607994774504, "learning_rate": 4.988582182812575e-06, "loss": 1.5839, "step": 841 }, { "epoch": 0.11920436044453883, "grad_norm": 11.704907188650488, "learning_rate": 4.988527397410544e-06, "loss": 1.4571, "step": 842 }, { "epoch": 0.11934593331917605, "grad_norm": 10.202082956096094, "learning_rate": 4.988472481188484e-06, "loss": 1.5641, "step": 843 }, { "epoch": 0.11948750619381326, "grad_norm": 10.106707213562862, "learning_rate": 4.988417434149279e-06, "loss": 1.4081, "step": 844 }, { "epoch": 0.11962907906845048, "grad_norm": 8.744827921740907, "learning_rate": 4.988362256295827e-06, "loss": 1.5319, "step": 845 }, { "epoch": 0.11977065194308771, "grad_norm": 10.992488030051264, "learning_rate": 4.988306947631025e-06, "loss": 1.4399, "step": 846 }, { "epoch": 0.11991222481772493, "grad_norm": 11.09574661555242, "learning_rate": 4.988251508157784e-06, "loss": 1.4727, "step": 847 }, { "epoch": 0.12005379769236214, "grad_norm": 13.265182146244614, "learning_rate": 4.988195937879015e-06, "loss": 1.4219, "step": 848 }, { "epoch": 0.12019537056699936, "grad_norm": 13.773681223027987, "learning_rate": 4.988140236797642e-06, "loss": 1.5204, "step": 849 }, { "epoch": 0.12033694344163658, "grad_norm": 9.276748747367115, "learning_rate": 4.988084404916591e-06, "loss": 1.4569, "step": 850 }, { "epoch": 0.1204785163162738, "grad_norm": 12.315193307701454, "learning_rate": 4.988028442238798e-06, "loss": 1.4235, "step": 851 }, { "epoch": 0.12062008919091102, "grad_norm": 10.236480356564556, "learning_rate": 4.987972348767206e-06, "loss": 1.3441, "step": 852 }, { "epoch": 0.12076166206554824, "grad_norm": 12.074289519769955, "learning_rate": 4.987916124504761e-06, "loss": 1.4432, "step": 853 }, { "epoch": 0.12090323494018546, "grad_norm": 10.731491478950769, "learning_rate": 4.9878597694544215e-06, "loss": 1.5405, "step": 854 }, { "epoch": 0.12104480781482269, "grad_norm": 10.38792735481934, "learning_rate": 4.987803283619149e-06, "loss": 1.4894, "step": 855 }, { "epoch": 0.1211863806894599, "grad_norm": 13.297343764688083, "learning_rate": 4.987746667001913e-06, "loss": 1.4352, "step": 856 }, { "epoch": 0.12132795356409712, "grad_norm": 11.564159377338768, "learning_rate": 4.98768991960569e-06, "loss": 1.4035, "step": 857 }, { "epoch": 0.12146952643873433, "grad_norm": 10.788319268616478, "learning_rate": 4.987633041433462e-06, "loss": 1.5716, "step": 858 }, { "epoch": 0.12161109931337155, "grad_norm": 10.078419099996575, "learning_rate": 4.98757603248822e-06, "loss": 1.5625, "step": 859 }, { "epoch": 0.12175267218800878, "grad_norm": 11.779628328376388, "learning_rate": 4.987518892772961e-06, "loss": 1.4673, "step": 860 }, { "epoch": 0.121894245062646, "grad_norm": 10.348308377077378, "learning_rate": 4.987461622290688e-06, "loss": 1.3049, "step": 861 }, { "epoch": 0.12203581793728321, "grad_norm": 10.941873079451087, "learning_rate": 4.987404221044413e-06, "loss": 1.283, "step": 862 }, { "epoch": 0.12217739081192043, "grad_norm": 10.38829457242791, "learning_rate": 4.9873466890371525e-06, "loss": 1.5517, "step": 863 }, { "epoch": 0.12231896368655766, "grad_norm": 10.487017643853756, "learning_rate": 4.987289026271931e-06, "loss": 1.3882, "step": 864 }, { "epoch": 0.12246053656119488, "grad_norm": 10.47360617894341, "learning_rate": 4.98723123275178e-06, "loss": 1.3254, "step": 865 }, { "epoch": 0.1226021094358321, "grad_norm": 9.719686873929946, "learning_rate": 4.987173308479738e-06, "loss": 1.442, "step": 866 }, { "epoch": 0.12274368231046931, "grad_norm": 10.340243664347854, "learning_rate": 4.98711525345885e-06, "loss": 1.4562, "step": 867 }, { "epoch": 0.12288525518510654, "grad_norm": 12.508585308535032, "learning_rate": 4.987057067692167e-06, "loss": 1.4443, "step": 868 }, { "epoch": 0.12302682805974376, "grad_norm": 10.737156492340773, "learning_rate": 4.986998751182748e-06, "loss": 1.4647, "step": 869 }, { "epoch": 0.12316840093438097, "grad_norm": 8.198778145170808, "learning_rate": 4.98694030393366e-06, "loss": 1.4141, "step": 870 }, { "epoch": 0.12330997380901819, "grad_norm": 10.764822388781512, "learning_rate": 4.986881725947974e-06, "loss": 1.5512, "step": 871 }, { "epoch": 0.1234515466836554, "grad_norm": 10.528233670641663, "learning_rate": 4.98682301722877e-06, "loss": 1.4611, "step": 872 }, { "epoch": 0.12359311955829264, "grad_norm": 12.60769101305485, "learning_rate": 4.986764177779134e-06, "loss": 1.5082, "step": 873 }, { "epoch": 0.12373469243292985, "grad_norm": 12.16832801937511, "learning_rate": 4.986705207602161e-06, "loss": 1.5998, "step": 874 }, { "epoch": 0.12387626530756707, "grad_norm": 9.704389652514578, "learning_rate": 4.986646106700948e-06, "loss": 1.4805, "step": 875 }, { "epoch": 0.12401783818220428, "grad_norm": 9.208338764816224, "learning_rate": 4.986586875078603e-06, "loss": 1.5827, "step": 876 }, { "epoch": 0.12415941105684151, "grad_norm": 12.778534915624864, "learning_rate": 4.98652751273824e-06, "loss": 1.4114, "step": 877 }, { "epoch": 0.12430098393147873, "grad_norm": 12.000310893799858, "learning_rate": 4.986468019682981e-06, "loss": 1.2916, "step": 878 }, { "epoch": 0.12444255680611595, "grad_norm": 11.080675958609522, "learning_rate": 4.98640839591595e-06, "loss": 1.4159, "step": 879 }, { "epoch": 0.12458412968075316, "grad_norm": 7.961734570852571, "learning_rate": 4.986348641440286e-06, "loss": 1.3765, "step": 880 }, { "epoch": 0.12472570255539038, "grad_norm": 10.40012855450332, "learning_rate": 4.986288756259126e-06, "loss": 1.4546, "step": 881 }, { "epoch": 0.12486727543002761, "grad_norm": 11.743833181625646, "learning_rate": 4.986228740375621e-06, "loss": 1.4184, "step": 882 }, { "epoch": 0.12500884830466483, "grad_norm": 10.347958541262122, "learning_rate": 4.986168593792924e-06, "loss": 1.4743, "step": 883 }, { "epoch": 0.12515042117930206, "grad_norm": 10.223492469738929, "learning_rate": 4.986108316514199e-06, "loss": 1.4581, "step": 884 }, { "epoch": 0.12529199405393926, "grad_norm": 10.089092112925025, "learning_rate": 4.986047908542613e-06, "loss": 1.5233, "step": 885 }, { "epoch": 0.1254335669285765, "grad_norm": 14.159141666238437, "learning_rate": 4.9859873698813425e-06, "loss": 1.428, "step": 886 }, { "epoch": 0.1255751398032137, "grad_norm": 12.766450927611377, "learning_rate": 4.985926700533569e-06, "loss": 1.4086, "step": 887 }, { "epoch": 0.12571671267785092, "grad_norm": 10.652181167396979, "learning_rate": 4.985865900502482e-06, "loss": 1.3771, "step": 888 }, { "epoch": 0.12585828555248815, "grad_norm": 11.66516786666278, "learning_rate": 4.985804969791278e-06, "loss": 1.401, "step": 889 }, { "epoch": 0.12599985842712536, "grad_norm": 8.81737194257069, "learning_rate": 4.9857439084031614e-06, "loss": 1.3604, "step": 890 }, { "epoch": 0.12614143130176259, "grad_norm": 8.947010875207106, "learning_rate": 4.985682716341341e-06, "loss": 1.3473, "step": 891 }, { "epoch": 0.1262830041763998, "grad_norm": 11.084417495667587, "learning_rate": 4.985621393609032e-06, "loss": 1.5848, "step": 892 }, { "epoch": 0.12642457705103702, "grad_norm": 10.582898909496079, "learning_rate": 4.985559940209462e-06, "loss": 1.3582, "step": 893 }, { "epoch": 0.12656614992567425, "grad_norm": 10.705546886117466, "learning_rate": 4.985498356145858e-06, "loss": 1.3085, "step": 894 }, { "epoch": 0.12670772280031145, "grad_norm": 9.78502915960381, "learning_rate": 4.985436641421458e-06, "loss": 1.3261, "step": 895 }, { "epoch": 0.12684929567494868, "grad_norm": 11.02536588654222, "learning_rate": 4.985374796039508e-06, "loss": 1.4308, "step": 896 }, { "epoch": 0.1269908685495859, "grad_norm": 10.518302950592801, "learning_rate": 4.985312820003258e-06, "loss": 1.4952, "step": 897 }, { "epoch": 0.1271324414242231, "grad_norm": 9.649568190326738, "learning_rate": 4.985250713315966e-06, "loss": 1.3738, "step": 898 }, { "epoch": 0.12727401429886034, "grad_norm": 11.193004085324974, "learning_rate": 4.985188475980898e-06, "loss": 1.4633, "step": 899 }, { "epoch": 0.12741558717349755, "grad_norm": 10.687334382854196, "learning_rate": 4.985126108001323e-06, "loss": 1.433, "step": 900 }, { "epoch": 0.12755716004813478, "grad_norm": 9.90608171867615, "learning_rate": 4.985063609380522e-06, "loss": 1.4008, "step": 901 }, { "epoch": 0.127698732922772, "grad_norm": 9.797498375925842, "learning_rate": 4.985000980121782e-06, "loss": 1.3425, "step": 902 }, { "epoch": 0.1278403057974092, "grad_norm": 9.58152842456169, "learning_rate": 4.984938220228391e-06, "loss": 1.4432, "step": 903 }, { "epoch": 0.12798187867204644, "grad_norm": 10.445653232218362, "learning_rate": 4.9848753297036515e-06, "loss": 1.2645, "step": 904 }, { "epoch": 0.12812345154668364, "grad_norm": 10.15854804229604, "learning_rate": 4.984812308550869e-06, "loss": 1.3024, "step": 905 }, { "epoch": 0.12826502442132087, "grad_norm": 7.922158621802766, "learning_rate": 4.984749156773355e-06, "loss": 1.3801, "step": 906 }, { "epoch": 0.1284065972959581, "grad_norm": 10.465745181523857, "learning_rate": 4.984685874374432e-06, "loss": 1.3546, "step": 907 }, { "epoch": 0.1285481701705953, "grad_norm": 11.597037206659994, "learning_rate": 4.984622461357425e-06, "loss": 1.5502, "step": 908 }, { "epoch": 0.12868974304523254, "grad_norm": 11.275372863152908, "learning_rate": 4.984558917725667e-06, "loss": 1.3862, "step": 909 }, { "epoch": 0.12883131591986977, "grad_norm": 10.701044434528159, "learning_rate": 4.9844952434825e-06, "loss": 1.3987, "step": 910 }, { "epoch": 0.12897288879450697, "grad_norm": 9.506166966142747, "learning_rate": 4.98443143863127e-06, "loss": 1.4511, "step": 911 }, { "epoch": 0.1291144616691442, "grad_norm": 10.842656940699149, "learning_rate": 4.984367503175332e-06, "loss": 1.5191, "step": 912 }, { "epoch": 0.1292560345437814, "grad_norm": 10.155933739066274, "learning_rate": 4.984303437118047e-06, "loss": 1.4432, "step": 913 }, { "epoch": 0.12939760741841863, "grad_norm": 11.086667661164087, "learning_rate": 4.984239240462783e-06, "loss": 1.5723, "step": 914 }, { "epoch": 0.12953918029305586, "grad_norm": 11.679889342946558, "learning_rate": 4.984174913212913e-06, "loss": 1.4797, "step": 915 }, { "epoch": 0.12968075316769306, "grad_norm": 10.454857196607941, "learning_rate": 4.984110455371822e-06, "loss": 1.5402, "step": 916 }, { "epoch": 0.1298223260423303, "grad_norm": 10.358202837743237, "learning_rate": 4.984045866942895e-06, "loss": 1.5083, "step": 917 }, { "epoch": 0.1299638989169675, "grad_norm": 7.661321711840226, "learning_rate": 4.98398114792953e-06, "loss": 1.2321, "step": 918 }, { "epoch": 0.13010547179160473, "grad_norm": 10.27901835701197, "learning_rate": 4.983916298335127e-06, "loss": 1.397, "step": 919 }, { "epoch": 0.13024704466624196, "grad_norm": 11.040304385425774, "learning_rate": 4.9838513181630975e-06, "loss": 1.3515, "step": 920 }, { "epoch": 0.13038861754087916, "grad_norm": 12.075057850574735, "learning_rate": 4.983786207416856e-06, "loss": 1.4311, "step": 921 }, { "epoch": 0.1305301904155164, "grad_norm": 9.13195159744221, "learning_rate": 4.983720966099826e-06, "loss": 1.3987, "step": 922 }, { "epoch": 0.13067176329015362, "grad_norm": 10.793177986118781, "learning_rate": 4.983655594215436e-06, "loss": 1.4039, "step": 923 }, { "epoch": 0.13081333616479082, "grad_norm": 10.003489457712563, "learning_rate": 4.983590091767123e-06, "loss": 1.3258, "step": 924 }, { "epoch": 0.13095490903942805, "grad_norm": 9.98012360276489, "learning_rate": 4.983524458758331e-06, "loss": 1.51, "step": 925 }, { "epoch": 0.13109648191406525, "grad_norm": 12.440679555896454, "learning_rate": 4.98345869519251e-06, "loss": 1.3865, "step": 926 }, { "epoch": 0.13123805478870249, "grad_norm": 11.088330817677909, "learning_rate": 4.9833928010731185e-06, "loss": 1.4459, "step": 927 }, { "epoch": 0.13137962766333972, "grad_norm": 10.823825439443981, "learning_rate": 4.983326776403618e-06, "loss": 1.4539, "step": 928 }, { "epoch": 0.13152120053797692, "grad_norm": 7.95486233364943, "learning_rate": 4.983260621187479e-06, "loss": 1.2448, "step": 929 }, { "epoch": 0.13166277341261415, "grad_norm": 10.145504757510489, "learning_rate": 4.983194335428183e-06, "loss": 1.436, "step": 930 }, { "epoch": 0.13180434628725135, "grad_norm": 10.442519756843666, "learning_rate": 4.9831279191292114e-06, "loss": 1.4316, "step": 931 }, { "epoch": 0.13194591916188858, "grad_norm": 11.453988286018543, "learning_rate": 4.983061372294057e-06, "loss": 1.4451, "step": 932 }, { "epoch": 0.1320874920365258, "grad_norm": 11.32529598295614, "learning_rate": 4.982994694926217e-06, "loss": 1.6512, "step": 933 }, { "epoch": 0.132229064911163, "grad_norm": 9.290568288250851, "learning_rate": 4.9829278870291975e-06, "loss": 1.3172, "step": 934 }, { "epoch": 0.13237063778580024, "grad_norm": 9.28137494654954, "learning_rate": 4.982860948606511e-06, "loss": 1.4008, "step": 935 }, { "epoch": 0.13251221066043747, "grad_norm": 9.267330533771199, "learning_rate": 4.9827938796616745e-06, "loss": 1.5217, "step": 936 }, { "epoch": 0.13265378353507468, "grad_norm": 8.726702819624991, "learning_rate": 4.982726680198217e-06, "loss": 1.4964, "step": 937 }, { "epoch": 0.1327953564097119, "grad_norm": 8.565016328046726, "learning_rate": 4.982659350219668e-06, "loss": 1.4946, "step": 938 }, { "epoch": 0.1329369292843491, "grad_norm": 10.88863911850454, "learning_rate": 4.982591889729567e-06, "loss": 1.299, "step": 939 }, { "epoch": 0.13307850215898634, "grad_norm": 10.512614710632137, "learning_rate": 4.982524298731463e-06, "loss": 1.3458, "step": 940 }, { "epoch": 0.13322007503362357, "grad_norm": 10.163914625893948, "learning_rate": 4.982456577228907e-06, "loss": 1.5148, "step": 941 }, { "epoch": 0.13336164790826077, "grad_norm": 9.119460004915313, "learning_rate": 4.98238872522546e-06, "loss": 1.41, "step": 942 }, { "epoch": 0.133503220782898, "grad_norm": 9.26413286362214, "learning_rate": 4.982320742724688e-06, "loss": 1.5651, "step": 943 }, { "epoch": 0.1336447936575352, "grad_norm": 8.904985947166805, "learning_rate": 4.982252629730167e-06, "loss": 1.4956, "step": 944 }, { "epoch": 0.13378636653217243, "grad_norm": 11.05793762955555, "learning_rate": 4.982184386245475e-06, "loss": 1.4573, "step": 945 }, { "epoch": 0.13392793940680967, "grad_norm": 10.553403650637136, "learning_rate": 4.9821160122742e-06, "loss": 1.4644, "step": 946 }, { "epoch": 0.13406951228144687, "grad_norm": 10.332657412500502, "learning_rate": 4.982047507819938e-06, "loss": 1.4091, "step": 947 }, { "epoch": 0.1342110851560841, "grad_norm": 10.716554644895497, "learning_rate": 4.981978872886288e-06, "loss": 1.4655, "step": 948 }, { "epoch": 0.1343526580307213, "grad_norm": 10.035199585104953, "learning_rate": 4.981910107476861e-06, "loss": 1.312, "step": 949 }, { "epoch": 0.13449423090535853, "grad_norm": 10.590095542354252, "learning_rate": 4.9818412115952685e-06, "loss": 1.3752, "step": 950 }, { "epoch": 0.13463580377999576, "grad_norm": 8.911330958841548, "learning_rate": 4.981772185245135e-06, "loss": 1.2763, "step": 951 }, { "epoch": 0.13477737665463296, "grad_norm": 10.757805762638595, "learning_rate": 4.981703028430088e-06, "loss": 1.5149, "step": 952 }, { "epoch": 0.1349189495292702, "grad_norm": 11.379414466851905, "learning_rate": 4.981633741153764e-06, "loss": 1.4202, "step": 953 }, { "epoch": 0.13506052240390742, "grad_norm": 9.974184665485742, "learning_rate": 4.981564323419804e-06, "loss": 1.4334, "step": 954 }, { "epoch": 0.13520209527854463, "grad_norm": 9.387766372822831, "learning_rate": 4.981494775231857e-06, "loss": 1.3727, "step": 955 }, { "epoch": 0.13534366815318186, "grad_norm": 10.292683745461241, "learning_rate": 4.981425096593582e-06, "loss": 1.4829, "step": 956 }, { "epoch": 0.13548524102781906, "grad_norm": 9.037239174356914, "learning_rate": 4.981355287508638e-06, "loss": 1.3898, "step": 957 }, { "epoch": 0.1356268139024563, "grad_norm": 9.444467561980234, "learning_rate": 4.981285347980698e-06, "loss": 1.3918, "step": 958 }, { "epoch": 0.13576838677709352, "grad_norm": 10.264587442810049, "learning_rate": 4.981215278013436e-06, "loss": 1.508, "step": 959 }, { "epoch": 0.13590995965173072, "grad_norm": 8.388120479913654, "learning_rate": 4.981145077610538e-06, "loss": 1.2418, "step": 960 }, { "epoch": 0.13605153252636795, "grad_norm": 10.001835463877837, "learning_rate": 4.981074746775693e-06, "loss": 1.5085, "step": 961 }, { "epoch": 0.13619310540100515, "grad_norm": 10.002614251790238, "learning_rate": 4.9810042855125985e-06, "loss": 1.3224, "step": 962 }, { "epoch": 0.13633467827564238, "grad_norm": 11.522435735058568, "learning_rate": 4.980933693824959e-06, "loss": 1.5053, "step": 963 }, { "epoch": 0.13647625115027961, "grad_norm": 9.132206827382193, "learning_rate": 4.9808629717164845e-06, "loss": 1.4331, "step": 964 }, { "epoch": 0.13661782402491682, "grad_norm": 11.769479391273071, "learning_rate": 4.980792119190894e-06, "loss": 1.3642, "step": 965 }, { "epoch": 0.13675939689955405, "grad_norm": 9.163667454054414, "learning_rate": 4.98072113625191e-06, "loss": 1.3382, "step": 966 }, { "epoch": 0.13690096977419128, "grad_norm": 12.495151647629797, "learning_rate": 4.980650022903267e-06, "loss": 1.4572, "step": 967 }, { "epoch": 0.13704254264882848, "grad_norm": 10.404018403385697, "learning_rate": 4.980578779148702e-06, "loss": 1.4466, "step": 968 }, { "epoch": 0.1371841155234657, "grad_norm": 8.252379565534559, "learning_rate": 4.98050740499196e-06, "loss": 1.4243, "step": 969 }, { "epoch": 0.1373256883981029, "grad_norm": 12.612479771880459, "learning_rate": 4.980435900436793e-06, "loss": 1.2932, "step": 970 }, { "epoch": 0.13746726127274014, "grad_norm": 9.709028183679578, "learning_rate": 4.98036426548696e-06, "loss": 1.4334, "step": 971 }, { "epoch": 0.13760883414737737, "grad_norm": 11.45066734268241, "learning_rate": 4.980292500146227e-06, "loss": 1.6652, "step": 972 }, { "epoch": 0.13775040702201458, "grad_norm": 11.82048418786417, "learning_rate": 4.980220604418367e-06, "loss": 1.7111, "step": 973 }, { "epoch": 0.1378919798966518, "grad_norm": 8.866555938411734, "learning_rate": 4.980148578307159e-06, "loss": 1.4161, "step": 974 }, { "epoch": 0.138033552771289, "grad_norm": 8.834389629426116, "learning_rate": 4.98007642181639e-06, "loss": 1.3748, "step": 975 }, { "epoch": 0.13817512564592624, "grad_norm": 11.778432655921506, "learning_rate": 4.980004134949853e-06, "loss": 1.5432, "step": 976 }, { "epoch": 0.13831669852056347, "grad_norm": 10.606405705043555, "learning_rate": 4.979931717711347e-06, "loss": 1.3311, "step": 977 }, { "epoch": 0.13845827139520067, "grad_norm": 8.512196485847344, "learning_rate": 4.979859170104679e-06, "loss": 1.5009, "step": 978 }, { "epoch": 0.1385998442698379, "grad_norm": 8.835874467889678, "learning_rate": 4.979786492133665e-06, "loss": 1.3571, "step": 979 }, { "epoch": 0.13874141714447513, "grad_norm": 9.35339674014811, "learning_rate": 4.979713683802123e-06, "loss": 1.4426, "step": 980 }, { "epoch": 0.13888299001911233, "grad_norm": 11.408329353489727, "learning_rate": 4.979640745113883e-06, "loss": 1.4879, "step": 981 }, { "epoch": 0.13902456289374956, "grad_norm": 12.359477470735134, "learning_rate": 4.979567676072776e-06, "loss": 1.4438, "step": 982 }, { "epoch": 0.13916613576838677, "grad_norm": 10.322607647183696, "learning_rate": 4.979494476682647e-06, "loss": 1.4606, "step": 983 }, { "epoch": 0.139307708643024, "grad_norm": 10.086477204648922, "learning_rate": 4.979421146947341e-06, "loss": 1.4126, "step": 984 }, { "epoch": 0.13944928151766123, "grad_norm": 10.64736953272068, "learning_rate": 4.979347686870714e-06, "loss": 1.5668, "step": 985 }, { "epoch": 0.13959085439229843, "grad_norm": 10.613767543262023, "learning_rate": 4.979274096456629e-06, "loss": 1.2888, "step": 986 }, { "epoch": 0.13973242726693566, "grad_norm": 10.09852284119904, "learning_rate": 4.979200375708951e-06, "loss": 1.6008, "step": 987 }, { "epoch": 0.13987400014157286, "grad_norm": 9.035037609332392, "learning_rate": 4.97912652463156e-06, "loss": 1.6409, "step": 988 }, { "epoch": 0.1400155730162101, "grad_norm": 12.794224055778182, "learning_rate": 4.979052543228335e-06, "loss": 1.385, "step": 989 }, { "epoch": 0.14015714589084732, "grad_norm": 11.261915212472951, "learning_rate": 4.978978431503167e-06, "loss": 1.3993, "step": 990 }, { "epoch": 0.14029871876548453, "grad_norm": 11.296968536200769, "learning_rate": 4.978904189459951e-06, "loss": 1.494, "step": 991 }, { "epoch": 0.14044029164012176, "grad_norm": 9.663204713219619, "learning_rate": 4.97882981710259e-06, "loss": 1.41, "step": 992 }, { "epoch": 0.14058186451475896, "grad_norm": 11.121170928011969, "learning_rate": 4.978755314434994e-06, "loss": 1.5727, "step": 993 }, { "epoch": 0.1407234373893962, "grad_norm": 13.115443647111098, "learning_rate": 4.978680681461079e-06, "loss": 1.2923, "step": 994 }, { "epoch": 0.14086501026403342, "grad_norm": 10.048980160110816, "learning_rate": 4.978605918184769e-06, "loss": 1.4329, "step": 995 }, { "epoch": 0.14100658313867062, "grad_norm": 8.097353806761502, "learning_rate": 4.978531024609994e-06, "loss": 1.5344, "step": 996 }, { "epoch": 0.14114815601330785, "grad_norm": 9.60738514712689, "learning_rate": 4.978456000740691e-06, "loss": 1.4355, "step": 997 }, { "epoch": 0.14128972888794508, "grad_norm": 11.914958342344496, "learning_rate": 4.9783808465808035e-06, "loss": 1.4211, "step": 998 }, { "epoch": 0.14143130176258228, "grad_norm": 13.603430236855129, "learning_rate": 4.978305562134284e-06, "loss": 1.5307, "step": 999 }, { "epoch": 0.14157287463721951, "grad_norm": 8.529147269769355, "learning_rate": 4.978230147405089e-06, "loss": 1.3799, "step": 1000 }, { "epoch": 0.14171444751185672, "grad_norm": 9.365108306959094, "learning_rate": 4.978154602397182e-06, "loss": 1.5159, "step": 1001 }, { "epoch": 0.14185602038649395, "grad_norm": 11.15910409701634, "learning_rate": 4.978078927114536e-06, "loss": 1.4008, "step": 1002 }, { "epoch": 0.14199759326113118, "grad_norm": 10.990043642549228, "learning_rate": 4.978003121561128e-06, "loss": 1.3022, "step": 1003 }, { "epoch": 0.14213916613576838, "grad_norm": 9.246816963820704, "learning_rate": 4.977927185740944e-06, "loss": 1.4544, "step": 1004 }, { "epoch": 0.1422807390104056, "grad_norm": 10.453005301071016, "learning_rate": 4.977851119657976e-06, "loss": 1.3737, "step": 1005 }, { "epoch": 0.1424223118850428, "grad_norm": 10.253497387681884, "learning_rate": 4.977774923316221e-06, "loss": 1.4559, "step": 1006 }, { "epoch": 0.14256388475968004, "grad_norm": 10.453303451041243, "learning_rate": 4.977698596719686e-06, "loss": 1.3894, "step": 1007 }, { "epoch": 0.14270545763431727, "grad_norm": 9.37325952268419, "learning_rate": 4.977622139872384e-06, "loss": 1.47, "step": 1008 }, { "epoch": 0.14284703050895448, "grad_norm": 9.131975825795323, "learning_rate": 4.977545552778333e-06, "loss": 1.4402, "step": 1009 }, { "epoch": 0.1429886033835917, "grad_norm": 9.718049738638728, "learning_rate": 4.97746883544156e-06, "loss": 1.4262, "step": 1010 }, { "epoch": 0.14313017625822894, "grad_norm": 10.116358707607029, "learning_rate": 4.977391987866097e-06, "loss": 1.445, "step": 1011 }, { "epoch": 0.14327174913286614, "grad_norm": 8.425311291657307, "learning_rate": 4.9773150100559844e-06, "loss": 1.4091, "step": 1012 }, { "epoch": 0.14341332200750337, "grad_norm": 10.365558786672377, "learning_rate": 4.9772379020152695e-06, "loss": 1.4616, "step": 1013 }, { "epoch": 0.14355489488214057, "grad_norm": 15.885079135331438, "learning_rate": 4.977160663748005e-06, "loss": 1.3573, "step": 1014 }, { "epoch": 0.1436964677567778, "grad_norm": 10.502108044410171, "learning_rate": 4.977083295258251e-06, "loss": 1.3841, "step": 1015 }, { "epoch": 0.14383804063141503, "grad_norm": 13.155202067268448, "learning_rate": 4.977005796550076e-06, "loss": 1.5351, "step": 1016 }, { "epoch": 0.14397961350605223, "grad_norm": 11.606896527369852, "learning_rate": 4.976928167627553e-06, "loss": 1.5193, "step": 1017 }, { "epoch": 0.14412118638068946, "grad_norm": 13.255399935173825, "learning_rate": 4.976850408494762e-06, "loss": 1.4378, "step": 1018 }, { "epoch": 0.14426275925532667, "grad_norm": 9.288782832601399, "learning_rate": 4.976772519155793e-06, "loss": 1.4433, "step": 1019 }, { "epoch": 0.1444043321299639, "grad_norm": 10.427544694437525, "learning_rate": 4.976694499614739e-06, "loss": 1.4251, "step": 1020 }, { "epoch": 0.14454590500460113, "grad_norm": 12.341231695250656, "learning_rate": 4.976616349875702e-06, "loss": 1.2695, "step": 1021 }, { "epoch": 0.14468747787923833, "grad_norm": 11.298174644834262, "learning_rate": 4.9765380699427905e-06, "loss": 1.3877, "step": 1022 }, { "epoch": 0.14482905075387556, "grad_norm": 10.643813407561243, "learning_rate": 4.9764596598201185e-06, "loss": 1.4918, "step": 1023 }, { "epoch": 0.1449706236285128, "grad_norm": 9.563820342619284, "learning_rate": 4.97638111951181e-06, "loss": 1.4276, "step": 1024 }, { "epoch": 0.14511219650315, "grad_norm": 13.82821603518763, "learning_rate": 4.976302449021991e-06, "loss": 1.6607, "step": 1025 }, { "epoch": 0.14525376937778722, "grad_norm": 12.852151392282781, "learning_rate": 4.9762236483547985e-06, "loss": 1.4211, "step": 1026 }, { "epoch": 0.14539534225242443, "grad_norm": 10.573630342998147, "learning_rate": 4.976144717514376e-06, "loss": 1.6094, "step": 1027 }, { "epoch": 0.14553691512706166, "grad_norm": 10.723908809469082, "learning_rate": 4.976065656504873e-06, "loss": 1.4079, "step": 1028 }, { "epoch": 0.14567848800169889, "grad_norm": 12.01102259161513, "learning_rate": 4.975986465330443e-06, "loss": 1.3311, "step": 1029 }, { "epoch": 0.1458200608763361, "grad_norm": 11.423504714444578, "learning_rate": 4.975907143995251e-06, "loss": 1.4104, "step": 1030 }, { "epoch": 0.14596163375097332, "grad_norm": 11.202373484752886, "learning_rate": 4.975827692503467e-06, "loss": 1.6661, "step": 1031 }, { "epoch": 0.14610320662561052, "grad_norm": 9.624426490244147, "learning_rate": 4.975748110859267e-06, "loss": 1.3012, "step": 1032 }, { "epoch": 0.14624477950024775, "grad_norm": 10.496106470382133, "learning_rate": 4.975668399066835e-06, "loss": 1.2818, "step": 1033 }, { "epoch": 0.14638635237488498, "grad_norm": 9.560498794886763, "learning_rate": 4.975588557130361e-06, "loss": 1.3187, "step": 1034 }, { "epoch": 0.14652792524952218, "grad_norm": 11.358597887577485, "learning_rate": 4.9755085850540426e-06, "loss": 1.3526, "step": 1035 }, { "epoch": 0.14666949812415941, "grad_norm": 13.353662315679955, "learning_rate": 4.975428482842083e-06, "loss": 1.5669, "step": 1036 }, { "epoch": 0.14681107099879662, "grad_norm": 9.508693933182485, "learning_rate": 4.975348250498695e-06, "loss": 1.4339, "step": 1037 }, { "epoch": 0.14695264387343385, "grad_norm": 10.67028743167267, "learning_rate": 4.975267888028094e-06, "loss": 1.5514, "step": 1038 }, { "epoch": 0.14709421674807108, "grad_norm": 12.432852485365197, "learning_rate": 4.975187395434506e-06, "loss": 1.5356, "step": 1039 }, { "epoch": 0.14723578962270828, "grad_norm": 11.009804257793228, "learning_rate": 4.975106772722164e-06, "loss": 1.3748, "step": 1040 }, { "epoch": 0.1473773624973455, "grad_norm": 13.361634608794887, "learning_rate": 4.975026019895302e-06, "loss": 1.3196, "step": 1041 }, { "epoch": 0.14751893537198274, "grad_norm": 10.463307528853358, "learning_rate": 4.9749451369581694e-06, "loss": 1.3407, "step": 1042 }, { "epoch": 0.14766050824661994, "grad_norm": 10.88527746278865, "learning_rate": 4.974864123915015e-06, "loss": 1.3184, "step": 1043 }, { "epoch": 0.14780208112125717, "grad_norm": 10.910183872033597, "learning_rate": 4.9747829807701e-06, "loss": 1.4277, "step": 1044 }, { "epoch": 0.14794365399589438, "grad_norm": 12.437827561248763, "learning_rate": 4.974701707527688e-06, "loss": 1.3132, "step": 1045 }, { "epoch": 0.1480852268705316, "grad_norm": 11.367001582457263, "learning_rate": 4.9746203041920534e-06, "loss": 1.4632, "step": 1046 }, { "epoch": 0.14822679974516884, "grad_norm": 11.41785448860892, "learning_rate": 4.974538770767474e-06, "loss": 1.5009, "step": 1047 }, { "epoch": 0.14836837261980604, "grad_norm": 13.5083699593978, "learning_rate": 4.9744571072582365e-06, "loss": 1.3323, "step": 1048 }, { "epoch": 0.14850994549444327, "grad_norm": 11.626322353176043, "learning_rate": 4.974375313668633e-06, "loss": 1.397, "step": 1049 }, { "epoch": 0.14865151836908047, "grad_norm": 11.361327285596067, "learning_rate": 4.974293390002966e-06, "loss": 1.5719, "step": 1050 }, { "epoch": 0.1487930912437177, "grad_norm": 9.16248762642518, "learning_rate": 4.97421133626554e-06, "loss": 1.4475, "step": 1051 }, { "epoch": 0.14893466411835493, "grad_norm": 10.151214196252738, "learning_rate": 4.9741291524606684e-06, "loss": 1.4556, "step": 1052 }, { "epoch": 0.14907623699299213, "grad_norm": 10.747275428599734, "learning_rate": 4.974046838592672e-06, "loss": 1.1976, "step": 1053 }, { "epoch": 0.14921780986762936, "grad_norm": 12.961171190064734, "learning_rate": 4.973964394665878e-06, "loss": 1.3879, "step": 1054 }, { "epoch": 0.1493593827422666, "grad_norm": 10.603018607975477, "learning_rate": 4.973881820684621e-06, "loss": 1.4065, "step": 1055 }, { "epoch": 0.1495009556169038, "grad_norm": 9.31069785480428, "learning_rate": 4.973799116653241e-06, "loss": 1.4934, "step": 1056 }, { "epoch": 0.14964252849154103, "grad_norm": 16.04268959383983, "learning_rate": 4.973716282576086e-06, "loss": 1.5151, "step": 1057 }, { "epoch": 0.14978410136617823, "grad_norm": 11.236340516625793, "learning_rate": 4.9736333184575105e-06, "loss": 1.4289, "step": 1058 }, { "epoch": 0.14992567424081546, "grad_norm": 9.126239013163671, "learning_rate": 4.973550224301875e-06, "loss": 1.5669, "step": 1059 }, { "epoch": 0.1500672471154527, "grad_norm": 9.198298537440863, "learning_rate": 4.9734670001135495e-06, "loss": 1.6833, "step": 1060 }, { "epoch": 0.1502088199900899, "grad_norm": 9.131406860257547, "learning_rate": 4.973383645896908e-06, "loss": 1.3644, "step": 1061 }, { "epoch": 0.15035039286472712, "grad_norm": 9.334936367608764, "learning_rate": 4.973300161656332e-06, "loss": 1.3722, "step": 1062 }, { "epoch": 0.15049196573936433, "grad_norm": 14.091431285321775, "learning_rate": 4.973216547396212e-06, "loss": 1.5158, "step": 1063 }, { "epoch": 0.15063353861400156, "grad_norm": 11.170176813131523, "learning_rate": 4.9731328031209414e-06, "loss": 1.4306, "step": 1064 }, { "epoch": 0.15077511148863879, "grad_norm": 8.582498849345885, "learning_rate": 4.973048928834923e-06, "loss": 1.5167, "step": 1065 }, { "epoch": 0.150916684363276, "grad_norm": 9.433267010387112, "learning_rate": 4.972964924542567e-06, "loss": 1.4454, "step": 1066 }, { "epoch": 0.15105825723791322, "grad_norm": 14.339828320783036, "learning_rate": 4.9728807902482885e-06, "loss": 1.2547, "step": 1067 }, { "epoch": 0.15119983011255045, "grad_norm": 14.778738699689868, "learning_rate": 4.97279652595651e-06, "loss": 1.5662, "step": 1068 }, { "epoch": 0.15134140298718765, "grad_norm": 9.461519306919731, "learning_rate": 4.972712131671663e-06, "loss": 1.4678, "step": 1069 }, { "epoch": 0.15148297586182488, "grad_norm": 10.811195956406142, "learning_rate": 4.972627607398183e-06, "loss": 1.5634, "step": 1070 }, { "epoch": 0.15162454873646208, "grad_norm": 11.419955111814554, "learning_rate": 4.972542953140513e-06, "loss": 1.4625, "step": 1071 }, { "epoch": 0.1517661216110993, "grad_norm": 14.291934768045316, "learning_rate": 4.972458168903104e-06, "loss": 1.4495, "step": 1072 }, { "epoch": 0.15190769448573654, "grad_norm": 11.688620773676435, "learning_rate": 4.972373254690411e-06, "loss": 1.3111, "step": 1073 }, { "epoch": 0.15204926736037375, "grad_norm": 10.369326959323612, "learning_rate": 4.972288210506902e-06, "loss": 1.2632, "step": 1074 }, { "epoch": 0.15219084023501098, "grad_norm": 8.705770696521368, "learning_rate": 4.972203036357043e-06, "loss": 1.3816, "step": 1075 }, { "epoch": 0.15233241310964818, "grad_norm": 8.262297279537284, "learning_rate": 4.972117732245314e-06, "loss": 1.4605, "step": 1076 }, { "epoch": 0.1524739859842854, "grad_norm": 14.19886035912907, "learning_rate": 4.972032298176201e-06, "loss": 1.4351, "step": 1077 }, { "epoch": 0.15261555885892264, "grad_norm": 11.331854854711217, "learning_rate": 4.9719467341541914e-06, "loss": 1.4399, "step": 1078 }, { "epoch": 0.15275713173355984, "grad_norm": 11.01909886769136, "learning_rate": 4.971861040183785e-06, "loss": 1.4564, "step": 1079 }, { "epoch": 0.15289870460819707, "grad_norm": 9.907998878303376, "learning_rate": 4.971775216269488e-06, "loss": 1.3605, "step": 1080 }, { "epoch": 0.1530402774828343, "grad_norm": 11.40083882776011, "learning_rate": 4.971689262415811e-06, "loss": 1.4275, "step": 1081 }, { "epoch": 0.1531818503574715, "grad_norm": 10.459875917592225, "learning_rate": 4.971603178627271e-06, "loss": 1.3963, "step": 1082 }, { "epoch": 0.15332342323210874, "grad_norm": 11.487214652119425, "learning_rate": 4.971516964908396e-06, "loss": 1.3589, "step": 1083 }, { "epoch": 0.15346499610674594, "grad_norm": 11.151934304546012, "learning_rate": 4.9714306212637165e-06, "loss": 1.4228, "step": 1084 }, { "epoch": 0.15360656898138317, "grad_norm": 8.532627539454115, "learning_rate": 4.971344147697772e-06, "loss": 1.3938, "step": 1085 }, { "epoch": 0.1537481418560204, "grad_norm": 9.998560229605014, "learning_rate": 4.9712575442151086e-06, "loss": 1.4513, "step": 1086 }, { "epoch": 0.1538897147306576, "grad_norm": 12.77024699737728, "learning_rate": 4.971170810820279e-06, "loss": 1.3814, "step": 1087 }, { "epoch": 0.15403128760529483, "grad_norm": 10.3076896776969, "learning_rate": 4.971083947517842e-06, "loss": 1.342, "step": 1088 }, { "epoch": 0.15417286047993203, "grad_norm": 12.411860276070437, "learning_rate": 4.970996954312365e-06, "loss": 1.4412, "step": 1089 }, { "epoch": 0.15431443335456926, "grad_norm": 10.3758588688178, "learning_rate": 4.97090983120842e-06, "loss": 1.3477, "step": 1090 }, { "epoch": 0.1544560062292065, "grad_norm": 10.890165781768179, "learning_rate": 4.970822578210587e-06, "loss": 1.5636, "step": 1091 }, { "epoch": 0.1545975791038437, "grad_norm": 9.416283208640872, "learning_rate": 4.970735195323454e-06, "loss": 1.2581, "step": 1092 }, { "epoch": 0.15473915197848093, "grad_norm": 8.270322795135762, "learning_rate": 4.970647682551614e-06, "loss": 1.3414, "step": 1093 }, { "epoch": 0.15488072485311813, "grad_norm": 8.862284402955435, "learning_rate": 4.970560039899668e-06, "loss": 1.4531, "step": 1094 }, { "epoch": 0.15502229772775536, "grad_norm": 11.31599094206851, "learning_rate": 4.970472267372223e-06, "loss": 1.425, "step": 1095 }, { "epoch": 0.1551638706023926, "grad_norm": 10.413127654027031, "learning_rate": 4.9703843649738926e-06, "loss": 1.3869, "step": 1096 }, { "epoch": 0.1553054434770298, "grad_norm": 8.314629074145461, "learning_rate": 4.970296332709298e-06, "loss": 1.1668, "step": 1097 }, { "epoch": 0.15544701635166702, "grad_norm": 9.403806248559635, "learning_rate": 4.970208170583066e-06, "loss": 1.3294, "step": 1098 }, { "epoch": 0.15558858922630425, "grad_norm": 9.383870315319077, "learning_rate": 4.9701198785998335e-06, "loss": 1.373, "step": 1099 }, { "epoch": 0.15573016210094145, "grad_norm": 8.001678290755633, "learning_rate": 4.970031456764242e-06, "loss": 1.3347, "step": 1100 }, { "epoch": 0.15587173497557869, "grad_norm": 10.719288448233202, "learning_rate": 4.969942905080936e-06, "loss": 1.4413, "step": 1101 }, { "epoch": 0.1560133078502159, "grad_norm": 7.976514676031953, "learning_rate": 4.969854223554575e-06, "loss": 1.5117, "step": 1102 }, { "epoch": 0.15615488072485312, "grad_norm": 11.376611616387244, "learning_rate": 4.969765412189819e-06, "loss": 1.5925, "step": 1103 }, { "epoch": 0.15629645359949035, "grad_norm": 9.816093418587734, "learning_rate": 4.969676470991336e-06, "loss": 1.4351, "step": 1104 }, { "epoch": 0.15643802647412755, "grad_norm": 9.03913635381994, "learning_rate": 4.969587399963802e-06, "loss": 1.3264, "step": 1105 }, { "epoch": 0.15657959934876478, "grad_norm": 9.631109503254123, "learning_rate": 4.969498199111901e-06, "loss": 1.2027, "step": 1106 }, { "epoch": 0.15672117222340198, "grad_norm": 10.377132702132887, "learning_rate": 4.9694088684403205e-06, "loss": 1.5278, "step": 1107 }, { "epoch": 0.1568627450980392, "grad_norm": 10.224965482993962, "learning_rate": 4.969319407953756e-06, "loss": 1.426, "step": 1108 }, { "epoch": 0.15700431797267644, "grad_norm": 8.847604511438128, "learning_rate": 4.969229817656913e-06, "loss": 1.364, "step": 1109 }, { "epoch": 0.15714589084731365, "grad_norm": 10.746890305810197, "learning_rate": 4.969140097554499e-06, "loss": 1.4391, "step": 1110 }, { "epoch": 0.15728746372195088, "grad_norm": 10.371233152900208, "learning_rate": 4.969050247651231e-06, "loss": 1.2657, "step": 1111 }, { "epoch": 0.1574290365965881, "grad_norm": 7.79930844297882, "learning_rate": 4.968960267951833e-06, "loss": 1.3751, "step": 1112 }, { "epoch": 0.1575706094712253, "grad_norm": 9.171062686957574, "learning_rate": 4.9688701584610345e-06, "loss": 1.3753, "step": 1113 }, { "epoch": 0.15771218234586254, "grad_norm": 9.098981578137046, "learning_rate": 4.968779919183573e-06, "loss": 1.5349, "step": 1114 }, { "epoch": 0.15785375522049974, "grad_norm": 10.660480811635745, "learning_rate": 4.96868955012419e-06, "loss": 1.4799, "step": 1115 }, { "epoch": 0.15799532809513697, "grad_norm": 9.575802793787465, "learning_rate": 4.96859905128764e-06, "loss": 1.3096, "step": 1116 }, { "epoch": 0.1581369009697742, "grad_norm": 8.891622101929187, "learning_rate": 4.968508422678679e-06, "loss": 1.5135, "step": 1117 }, { "epoch": 0.1582784738444114, "grad_norm": 9.200077089733066, "learning_rate": 4.968417664302069e-06, "loss": 1.287, "step": 1118 }, { "epoch": 0.15842004671904863, "grad_norm": 10.188585094867273, "learning_rate": 4.968326776162584e-06, "loss": 1.4761, "step": 1119 }, { "epoch": 0.15856161959368584, "grad_norm": 9.168421028335906, "learning_rate": 4.968235758265001e-06, "loss": 1.3717, "step": 1120 }, { "epoch": 0.15870319246832307, "grad_norm": 9.34101751172199, "learning_rate": 4.968144610614104e-06, "loss": 1.3951, "step": 1121 }, { "epoch": 0.1588447653429603, "grad_norm": 11.010672160600489, "learning_rate": 4.9680533332146855e-06, "loss": 1.4431, "step": 1122 }, { "epoch": 0.1589863382175975, "grad_norm": 11.218364400798432, "learning_rate": 4.967961926071543e-06, "loss": 1.3748, "step": 1123 }, { "epoch": 0.15912791109223473, "grad_norm": 11.261779382749543, "learning_rate": 4.967870389189483e-06, "loss": 1.2774, "step": 1124 }, { "epoch": 0.15926948396687196, "grad_norm": 11.603817909976467, "learning_rate": 4.967778722573317e-06, "loss": 1.4539, "step": 1125 }, { "epoch": 0.15941105684150916, "grad_norm": 10.122911402660465, "learning_rate": 4.967686926227862e-06, "loss": 1.6403, "step": 1126 }, { "epoch": 0.1595526297161464, "grad_norm": 10.39772614783685, "learning_rate": 4.967595000157946e-06, "loss": 1.3066, "step": 1127 }, { "epoch": 0.1596942025907836, "grad_norm": 10.90454368031861, "learning_rate": 4.967502944368402e-06, "loss": 1.3928, "step": 1128 }, { "epoch": 0.15983577546542083, "grad_norm": 11.933325871584245, "learning_rate": 4.967410758864066e-06, "loss": 1.438, "step": 1129 }, { "epoch": 0.15997734834005806, "grad_norm": 9.831154360975924, "learning_rate": 4.967318443649788e-06, "loss": 1.4727, "step": 1130 }, { "epoch": 0.16011892121469526, "grad_norm": 11.812427661815542, "learning_rate": 4.967225998730419e-06, "loss": 1.3898, "step": 1131 }, { "epoch": 0.1602604940893325, "grad_norm": 11.030757903883215, "learning_rate": 4.967133424110817e-06, "loss": 1.5569, "step": 1132 }, { "epoch": 0.1604020669639697, "grad_norm": 9.17609263054572, "learning_rate": 4.967040719795853e-06, "loss": 1.5918, "step": 1133 }, { "epoch": 0.16054363983860692, "grad_norm": 8.318264217508672, "learning_rate": 4.966947885790396e-06, "loss": 1.2553, "step": 1134 }, { "epoch": 0.16068521271324415, "grad_norm": 10.842414180036881, "learning_rate": 4.966854922099329e-06, "loss": 1.4688, "step": 1135 }, { "epoch": 0.16082678558788135, "grad_norm": 12.299192721539187, "learning_rate": 4.966761828727537e-06, "loss": 1.4029, "step": 1136 }, { "epoch": 0.16096835846251858, "grad_norm": 11.009137606545018, "learning_rate": 4.9666686056799165e-06, "loss": 1.3821, "step": 1137 }, { "epoch": 0.1611099313371558, "grad_norm": 11.163513725359461, "learning_rate": 4.966575252961365e-06, "loss": 1.5941, "step": 1138 }, { "epoch": 0.16125150421179302, "grad_norm": 9.323887768397675, "learning_rate": 4.966481770576793e-06, "loss": 1.339, "step": 1139 }, { "epoch": 0.16139307708643025, "grad_norm": 8.477286115427065, "learning_rate": 4.9663881585311126e-06, "loss": 1.3777, "step": 1140 }, { "epoch": 0.16153464996106745, "grad_norm": 9.243336932565304, "learning_rate": 4.9662944168292455e-06, "loss": 1.3516, "step": 1141 }, { "epoch": 0.16167622283570468, "grad_norm": 12.333965663336954, "learning_rate": 4.966200545476121e-06, "loss": 1.4765, "step": 1142 }, { "epoch": 0.1618177957103419, "grad_norm": 9.071883611776828, "learning_rate": 4.966106544476672e-06, "loss": 1.3394, "step": 1143 }, { "epoch": 0.1619593685849791, "grad_norm": 11.024835466813666, "learning_rate": 4.9660124138358415e-06, "loss": 1.3858, "step": 1144 }, { "epoch": 0.16210094145961634, "grad_norm": 8.484420410933952, "learning_rate": 4.965918153558576e-06, "loss": 1.3445, "step": 1145 }, { "epoch": 0.16224251433425355, "grad_norm": 8.714771525221783, "learning_rate": 4.965823763649832e-06, "loss": 1.4312, "step": 1146 }, { "epoch": 0.16238408720889078, "grad_norm": 9.34404843469373, "learning_rate": 4.965729244114572e-06, "loss": 1.2111, "step": 1147 }, { "epoch": 0.162525660083528, "grad_norm": 9.389282739969744, "learning_rate": 4.965634594957763e-06, "loss": 1.4717, "step": 1148 }, { "epoch": 0.1626672329581652, "grad_norm": 10.538472854034158, "learning_rate": 4.9655398161843836e-06, "loss": 1.3414, "step": 1149 }, { "epoch": 0.16280880583280244, "grad_norm": 9.512602628337234, "learning_rate": 4.965444907799413e-06, "loss": 1.5278, "step": 1150 }, { "epoch": 0.16295037870743964, "grad_norm": 8.153282356425157, "learning_rate": 4.9653498698078425e-06, "loss": 1.3913, "step": 1151 }, { "epoch": 0.16309195158207687, "grad_norm": 8.679582459563552, "learning_rate": 4.965254702214668e-06, "loss": 1.3723, "step": 1152 }, { "epoch": 0.1632335244567141, "grad_norm": 13.238001735209368, "learning_rate": 4.96515940502489e-06, "loss": 1.3045, "step": 1153 }, { "epoch": 0.1633750973313513, "grad_norm": 12.673422233300508, "learning_rate": 4.9650639782435225e-06, "loss": 1.5039, "step": 1154 }, { "epoch": 0.16351667020598853, "grad_norm": 8.495696380793566, "learning_rate": 4.964968421875579e-06, "loss": 1.2364, "step": 1155 }, { "epoch": 0.16365824308062576, "grad_norm": 12.792491045470287, "learning_rate": 4.964872735926083e-06, "loss": 1.284, "step": 1156 }, { "epoch": 0.16379981595526297, "grad_norm": 11.505837327310823, "learning_rate": 4.964776920400066e-06, "loss": 1.3871, "step": 1157 }, { "epoch": 0.1639413888299002, "grad_norm": 8.938741957797301, "learning_rate": 4.964680975302563e-06, "loss": 1.3476, "step": 1158 }, { "epoch": 0.1640829617045374, "grad_norm": 8.73522617512371, "learning_rate": 4.96458490063862e-06, "loss": 1.403, "step": 1159 }, { "epoch": 0.16422453457917463, "grad_norm": 9.957762591447775, "learning_rate": 4.964488696413285e-06, "loss": 1.4244, "step": 1160 }, { "epoch": 0.16436610745381186, "grad_norm": 11.11307907054668, "learning_rate": 4.964392362631618e-06, "loss": 1.5481, "step": 1161 }, { "epoch": 0.16450768032844906, "grad_norm": 11.926048018429686, "learning_rate": 4.964295899298682e-06, "loss": 1.423, "step": 1162 }, { "epoch": 0.1646492532030863, "grad_norm": 11.528167726184716, "learning_rate": 4.964199306419548e-06, "loss": 1.3061, "step": 1163 }, { "epoch": 0.1647908260777235, "grad_norm": 8.566770504966597, "learning_rate": 4.964102583999293e-06, "loss": 1.3728, "step": 1164 }, { "epoch": 0.16493239895236073, "grad_norm": 8.776371479723135, "learning_rate": 4.964005732043003e-06, "loss": 1.3834, "step": 1165 }, { "epoch": 0.16507397182699796, "grad_norm": 11.007583085127115, "learning_rate": 4.9639087505557694e-06, "loss": 1.4465, "step": 1166 }, { "epoch": 0.16521554470163516, "grad_norm": 10.09922996078321, "learning_rate": 4.96381163954269e-06, "loss": 1.6076, "step": 1167 }, { "epoch": 0.1653571175762724, "grad_norm": 10.567291798035304, "learning_rate": 4.963714399008869e-06, "loss": 1.4837, "step": 1168 }, { "epoch": 0.16549869045090962, "grad_norm": 11.65468032535977, "learning_rate": 4.9636170289594195e-06, "loss": 1.5359, "step": 1169 }, { "epoch": 0.16564026332554682, "grad_norm": 9.775782607962624, "learning_rate": 4.96351952939946e-06, "loss": 1.452, "step": 1170 }, { "epoch": 0.16578183620018405, "grad_norm": 13.114437830660444, "learning_rate": 4.9634219003341156e-06, "loss": 1.528, "step": 1171 }, { "epoch": 0.16592340907482125, "grad_norm": 11.979407125077575, "learning_rate": 4.963324141768519e-06, "loss": 1.5519, "step": 1172 }, { "epoch": 0.16606498194945848, "grad_norm": 10.885796109876429, "learning_rate": 4.963226253707808e-06, "loss": 1.5312, "step": 1173 }, { "epoch": 0.16620655482409571, "grad_norm": 10.604622721779831, "learning_rate": 4.96312823615713e-06, "loss": 1.396, "step": 1174 }, { "epoch": 0.16634812769873292, "grad_norm": 11.798653373819695, "learning_rate": 4.963030089121636e-06, "loss": 1.2915, "step": 1175 }, { "epoch": 0.16648970057337015, "grad_norm": 11.491131887701354, "learning_rate": 4.9629318126064884e-06, "loss": 1.4561, "step": 1176 }, { "epoch": 0.16663127344800735, "grad_norm": 11.972271991182767, "learning_rate": 4.962833406616851e-06, "loss": 1.5285, "step": 1177 }, { "epoch": 0.16677284632264458, "grad_norm": 9.390168393121174, "learning_rate": 4.9627348711578996e-06, "loss": 1.3934, "step": 1178 }, { "epoch": 0.1669144191972818, "grad_norm": 10.882036760646354, "learning_rate": 4.96263620623481e-06, "loss": 1.2298, "step": 1179 }, { "epoch": 0.167055992071919, "grad_norm": 9.64544567100882, "learning_rate": 4.962537411852772e-06, "loss": 1.3061, "step": 1180 }, { "epoch": 0.16719756494655624, "grad_norm": 9.806827533508105, "learning_rate": 4.962438488016979e-06, "loss": 1.3396, "step": 1181 }, { "epoch": 0.16733913782119345, "grad_norm": 9.71201299479839, "learning_rate": 4.9623394347326306e-06, "loss": 1.4096, "step": 1182 }, { "epoch": 0.16748071069583068, "grad_norm": 13.5619841446755, "learning_rate": 4.9622402520049336e-06, "loss": 1.4384, "step": 1183 }, { "epoch": 0.1676222835704679, "grad_norm": 10.734509685176699, "learning_rate": 4.962140939839103e-06, "loss": 1.5649, "step": 1184 }, { "epoch": 0.1677638564451051, "grad_norm": 11.315798789572646, "learning_rate": 4.962041498240359e-06, "loss": 1.4355, "step": 1185 }, { "epoch": 0.16790542931974234, "grad_norm": 11.55416916412609, "learning_rate": 4.961941927213928e-06, "loss": 1.3427, "step": 1186 }, { "epoch": 0.16804700219437957, "grad_norm": 9.02151418558346, "learning_rate": 4.961842226765047e-06, "loss": 1.1961, "step": 1187 }, { "epoch": 0.16818857506901677, "grad_norm": 12.82667077913701, "learning_rate": 4.9617423968989556e-06, "loss": 1.4451, "step": 1188 }, { "epoch": 0.168330147943654, "grad_norm": 12.902665423253215, "learning_rate": 4.961642437620901e-06, "loss": 1.3572, "step": 1189 }, { "epoch": 0.1684717208182912, "grad_norm": 10.824847805724099, "learning_rate": 4.96154234893614e-06, "loss": 1.2855, "step": 1190 }, { "epoch": 0.16861329369292843, "grad_norm": 10.783067456072368, "learning_rate": 4.961442130849933e-06, "loss": 1.5119, "step": 1191 }, { "epoch": 0.16875486656756566, "grad_norm": 13.67412593077536, "learning_rate": 4.961341783367548e-06, "loss": 1.4632, "step": 1192 }, { "epoch": 0.16889643944220287, "grad_norm": 10.060369230437537, "learning_rate": 4.96124130649426e-06, "loss": 1.5454, "step": 1193 }, { "epoch": 0.1690380123168401, "grad_norm": 12.049706825077218, "learning_rate": 4.961140700235353e-06, "loss": 1.2218, "step": 1194 }, { "epoch": 0.1691795851914773, "grad_norm": 11.13142672296197, "learning_rate": 4.961039964596114e-06, "loss": 1.4, "step": 1195 }, { "epoch": 0.16932115806611453, "grad_norm": 11.373651780935813, "learning_rate": 4.9609390995818395e-06, "loss": 1.4734, "step": 1196 }, { "epoch": 0.16946273094075176, "grad_norm": 10.533761016597309, "learning_rate": 4.960838105197831e-06, "loss": 1.3499, "step": 1197 }, { "epoch": 0.16960430381538896, "grad_norm": 13.118413525937502, "learning_rate": 4.960736981449399e-06, "loss": 1.4746, "step": 1198 }, { "epoch": 0.1697458766900262, "grad_norm": 12.616305237149032, "learning_rate": 4.960635728341858e-06, "loss": 1.4723, "step": 1199 }, { "epoch": 0.16988744956466342, "grad_norm": 8.847532508102674, "learning_rate": 4.960534345880531e-06, "loss": 1.3297, "step": 1200 }, { "epoch": 0.17002902243930063, "grad_norm": 14.640137752864607, "learning_rate": 4.960432834070749e-06, "loss": 1.4149, "step": 1201 }, { "epoch": 0.17017059531393786, "grad_norm": 12.616302213523927, "learning_rate": 4.960331192917847e-06, "loss": 1.3855, "step": 1202 }, { "epoch": 0.17031216818857506, "grad_norm": 14.617087285867052, "learning_rate": 4.960229422427169e-06, "loss": 1.3622, "step": 1203 }, { "epoch": 0.1704537410632123, "grad_norm": 8.348884060236406, "learning_rate": 4.960127522604065e-06, "loss": 1.4954, "step": 1204 }, { "epoch": 0.17059531393784952, "grad_norm": 9.819201082567886, "learning_rate": 4.96002549345389e-06, "loss": 1.4255, "step": 1205 }, { "epoch": 0.17073688681248672, "grad_norm": 9.15721901202432, "learning_rate": 4.95992333498201e-06, "loss": 1.3908, "step": 1206 }, { "epoch": 0.17087845968712395, "grad_norm": 10.813551305917178, "learning_rate": 4.9598210471937945e-06, "loss": 1.326, "step": 1207 }, { "epoch": 0.17102003256176115, "grad_norm": 11.933726088668827, "learning_rate": 4.959718630094621e-06, "loss": 1.4383, "step": 1208 }, { "epoch": 0.17116160543639838, "grad_norm": 9.099036918232361, "learning_rate": 4.9596160836898735e-06, "loss": 1.3666, "step": 1209 }, { "epoch": 0.17130317831103561, "grad_norm": 10.218174349468343, "learning_rate": 4.959513407984941e-06, "loss": 1.572, "step": 1210 }, { "epoch": 0.17144475118567282, "grad_norm": 12.859644946095846, "learning_rate": 4.9594106029852234e-06, "loss": 1.396, "step": 1211 }, { "epoch": 0.17158632406031005, "grad_norm": 9.24482829914984, "learning_rate": 4.959307668696124e-06, "loss": 1.2843, "step": 1212 }, { "epoch": 0.17172789693494728, "grad_norm": 10.265967980262618, "learning_rate": 4.959204605123055e-06, "loss": 1.4165, "step": 1213 }, { "epoch": 0.17186946980958448, "grad_norm": 8.351821481455735, "learning_rate": 4.959101412271433e-06, "loss": 1.2746, "step": 1214 }, { "epoch": 0.1720110426842217, "grad_norm": 11.04192684994654, "learning_rate": 4.958998090146683e-06, "loss": 1.5011, "step": 1215 }, { "epoch": 0.1721526155588589, "grad_norm": 10.159462382592904, "learning_rate": 4.9588946387542366e-06, "loss": 1.3807, "step": 1216 }, { "epoch": 0.17229418843349614, "grad_norm": 11.15726959954774, "learning_rate": 4.958791058099533e-06, "loss": 1.5969, "step": 1217 }, { "epoch": 0.17243576130813337, "grad_norm": 11.748912355121231, "learning_rate": 4.9586873481880175e-06, "loss": 1.4639, "step": 1218 }, { "epoch": 0.17257733418277058, "grad_norm": 11.24281725998454, "learning_rate": 4.95858350902514e-06, "loss": 1.432, "step": 1219 }, { "epoch": 0.1727189070574078, "grad_norm": 11.426358158512341, "learning_rate": 4.958479540616362e-06, "loss": 1.4727, "step": 1220 }, { "epoch": 0.172860479932045, "grad_norm": 10.459059691313787, "learning_rate": 4.958375442967147e-06, "loss": 1.2013, "step": 1221 }, { "epoch": 0.17300205280668224, "grad_norm": 10.903855200227994, "learning_rate": 4.958271216082968e-06, "loss": 1.5968, "step": 1222 }, { "epoch": 0.17314362568131947, "grad_norm": 8.562422035029401, "learning_rate": 4.958166859969304e-06, "loss": 1.2934, "step": 1223 }, { "epoch": 0.17328519855595667, "grad_norm": 9.916248943175406, "learning_rate": 4.958062374631641e-06, "loss": 1.3958, "step": 1224 }, { "epoch": 0.1734267714305939, "grad_norm": 10.51843568782201, "learning_rate": 4.957957760075472e-06, "loss": 1.4059, "step": 1225 }, { "epoch": 0.17356834430523113, "grad_norm": 8.415946501587218, "learning_rate": 4.957853016306297e-06, "loss": 1.1663, "step": 1226 }, { "epoch": 0.17370991717986833, "grad_norm": 11.55198067200119, "learning_rate": 4.95774814332962e-06, "loss": 1.5978, "step": 1227 }, { "epoch": 0.17385149005450556, "grad_norm": 9.472669942000413, "learning_rate": 4.957643141150958e-06, "loss": 1.3526, "step": 1228 }, { "epoch": 0.17399306292914277, "grad_norm": 11.487637716802354, "learning_rate": 4.957538009775826e-06, "loss": 1.4369, "step": 1229 }, { "epoch": 0.17413463580378, "grad_norm": 10.38057145502646, "learning_rate": 4.957432749209755e-06, "loss": 1.2956, "step": 1230 }, { "epoch": 0.17427620867841723, "grad_norm": 8.574535960106083, "learning_rate": 4.957327359458276e-06, "loss": 1.4177, "step": 1231 }, { "epoch": 0.17441778155305443, "grad_norm": 10.08844327049869, "learning_rate": 4.95722184052693e-06, "loss": 1.4099, "step": 1232 }, { "epoch": 0.17455935442769166, "grad_norm": 9.842078851202182, "learning_rate": 4.957116192421264e-06, "loss": 1.4446, "step": 1233 }, { "epoch": 0.17470092730232886, "grad_norm": 11.444186564711783, "learning_rate": 4.957010415146833e-06, "loss": 1.5742, "step": 1234 }, { "epoch": 0.1748425001769661, "grad_norm": 9.278005321806237, "learning_rate": 4.956904508709195e-06, "loss": 1.4318, "step": 1235 }, { "epoch": 0.17498407305160332, "grad_norm": 9.250776103684913, "learning_rate": 4.956798473113919e-06, "loss": 1.3097, "step": 1236 }, { "epoch": 0.17512564592624053, "grad_norm": 9.33031178565744, "learning_rate": 4.95669230836658e-06, "loss": 1.5937, "step": 1237 }, { "epoch": 0.17526721880087776, "grad_norm": 12.027497893011923, "learning_rate": 4.9565860144727575e-06, "loss": 1.4439, "step": 1238 }, { "epoch": 0.17540879167551496, "grad_norm": 9.08825853374944, "learning_rate": 4.956479591438039e-06, "loss": 1.3547, "step": 1239 }, { "epoch": 0.1755503645501522, "grad_norm": 8.161937148592736, "learning_rate": 4.956373039268022e-06, "loss": 1.2735, "step": 1240 }, { "epoch": 0.17569193742478942, "grad_norm": 10.205633769263143, "learning_rate": 4.9562663579683045e-06, "loss": 1.4412, "step": 1241 }, { "epoch": 0.17583351029942662, "grad_norm": 9.403487398638212, "learning_rate": 4.9561595475444965e-06, "loss": 1.3559, "step": 1242 }, { "epoch": 0.17597508317406385, "grad_norm": 9.108079664415538, "learning_rate": 4.956052608002212e-06, "loss": 1.4586, "step": 1243 }, { "epoch": 0.17611665604870108, "grad_norm": 9.24952077912984, "learning_rate": 4.955945539347075e-06, "loss": 1.3813, "step": 1244 }, { "epoch": 0.17625822892333828, "grad_norm": 10.600258057530036, "learning_rate": 4.95583834158471e-06, "loss": 1.4605, "step": 1245 }, { "epoch": 0.17639980179797551, "grad_norm": 10.200256224761281, "learning_rate": 4.955731014720756e-06, "loss": 1.4307, "step": 1246 }, { "epoch": 0.17654137467261272, "grad_norm": 8.814155585425187, "learning_rate": 4.955623558760852e-06, "loss": 1.3241, "step": 1247 }, { "epoch": 0.17668294754724995, "grad_norm": 11.308718223084396, "learning_rate": 4.955515973710651e-06, "loss": 1.4646, "step": 1248 }, { "epoch": 0.17682452042188718, "grad_norm": 9.743309268441, "learning_rate": 4.955408259575804e-06, "loss": 1.4439, "step": 1249 }, { "epoch": 0.17696609329652438, "grad_norm": 9.243629530095223, "learning_rate": 4.955300416361977e-06, "loss": 1.4048, "step": 1250 }, { "epoch": 0.1771076661711616, "grad_norm": 9.133956485104214, "learning_rate": 4.955192444074837e-06, "loss": 1.3724, "step": 1251 }, { "epoch": 0.1772492390457988, "grad_norm": 10.55718210192026, "learning_rate": 4.9550843427200605e-06, "loss": 1.543, "step": 1252 }, { "epoch": 0.17739081192043604, "grad_norm": 9.931020867769547, "learning_rate": 4.9549761123033316e-06, "loss": 1.4349, "step": 1253 }, { "epoch": 0.17753238479507327, "grad_norm": 10.301284887668823, "learning_rate": 4.9548677528303385e-06, "loss": 1.2981, "step": 1254 }, { "epoch": 0.17767395766971047, "grad_norm": 12.779747461919179, "learning_rate": 4.954759264306778e-06, "loss": 1.4602, "step": 1255 }, { "epoch": 0.1778155305443477, "grad_norm": 10.095981032461012, "learning_rate": 4.954650646738354e-06, "loss": 1.4877, "step": 1256 }, { "epoch": 0.17795710341898494, "grad_norm": 9.880629468008708, "learning_rate": 4.954541900130775e-06, "loss": 1.4322, "step": 1257 }, { "epoch": 0.17809867629362214, "grad_norm": 9.479925126693503, "learning_rate": 4.9544330244897586e-06, "loss": 1.4071, "step": 1258 }, { "epoch": 0.17824024916825937, "grad_norm": 13.439660754690644, "learning_rate": 4.954324019821028e-06, "loss": 1.5452, "step": 1259 }, { "epoch": 0.17838182204289657, "grad_norm": 11.647775304995545, "learning_rate": 4.954214886130315e-06, "loss": 1.2609, "step": 1260 }, { "epoch": 0.1785233949175338, "grad_norm": 11.642780447464066, "learning_rate": 4.954105623423354e-06, "loss": 1.4583, "step": 1261 }, { "epoch": 0.17866496779217103, "grad_norm": 8.863395303502623, "learning_rate": 4.953996231705891e-06, "loss": 1.5065, "step": 1262 }, { "epoch": 0.17880654066680823, "grad_norm": 10.095532144820766, "learning_rate": 4.953886710983676e-06, "loss": 1.4411, "step": 1263 }, { "epoch": 0.17894811354144546, "grad_norm": 11.069173922570961, "learning_rate": 4.9537770612624655e-06, "loss": 1.3173, "step": 1264 }, { "epoch": 0.17908968641608267, "grad_norm": 14.270529431450896, "learning_rate": 4.9536672825480255e-06, "loss": 1.6317, "step": 1265 }, { "epoch": 0.1792312592907199, "grad_norm": 11.0541626283303, "learning_rate": 4.953557374846125e-06, "loss": 1.4635, "step": 1266 }, { "epoch": 0.17937283216535713, "grad_norm": 9.765344917858565, "learning_rate": 4.953447338162543e-06, "loss": 1.4278, "step": 1267 }, { "epoch": 0.17951440503999433, "grad_norm": 11.277619768444119, "learning_rate": 4.953337172503064e-06, "loss": 1.521, "step": 1268 }, { "epoch": 0.17965597791463156, "grad_norm": 11.9747054221091, "learning_rate": 4.953226877873479e-06, "loss": 1.4342, "step": 1269 }, { "epoch": 0.1797975507892688, "grad_norm": 12.10329421417826, "learning_rate": 4.953116454279587e-06, "loss": 1.4381, "step": 1270 }, { "epoch": 0.179939123663906, "grad_norm": 9.971966932943507, "learning_rate": 4.953005901727191e-06, "loss": 1.3245, "step": 1271 }, { "epoch": 0.18008069653854322, "grad_norm": 9.615502365565073, "learning_rate": 4.952895220222104e-06, "loss": 1.3928, "step": 1272 }, { "epoch": 0.18022226941318042, "grad_norm": 17.01965273152077, "learning_rate": 4.952784409770145e-06, "loss": 1.6055, "step": 1273 }, { "epoch": 0.18036384228781766, "grad_norm": 13.720290390811819, "learning_rate": 4.952673470377137e-06, "loss": 1.4027, "step": 1274 }, { "epoch": 0.18050541516245489, "grad_norm": 9.988762836087634, "learning_rate": 4.952562402048915e-06, "loss": 1.4202, "step": 1275 }, { "epoch": 0.1806469880370921, "grad_norm": 8.607270082122529, "learning_rate": 4.952451204791315e-06, "loss": 1.3766, "step": 1276 }, { "epoch": 0.18078856091172932, "grad_norm": 10.341802592361498, "learning_rate": 4.952339878610185e-06, "loss": 1.4664, "step": 1277 }, { "epoch": 0.18093013378636652, "grad_norm": 9.014496043374562, "learning_rate": 4.952228423511375e-06, "loss": 1.3828, "step": 1278 }, { "epoch": 0.18107170666100375, "grad_norm": 12.217489894662155, "learning_rate": 4.952116839500747e-06, "loss": 1.4517, "step": 1279 }, { "epoch": 0.18121327953564098, "grad_norm": 8.323724267487021, "learning_rate": 4.9520051265841626e-06, "loss": 1.3604, "step": 1280 }, { "epoch": 0.18135485241027818, "grad_norm": 9.62268514168148, "learning_rate": 4.951893284767498e-06, "loss": 1.2875, "step": 1281 }, { "epoch": 0.1814964252849154, "grad_norm": 9.685343840821625, "learning_rate": 4.951781314056633e-06, "loss": 1.3752, "step": 1282 }, { "epoch": 0.18163799815955262, "grad_norm": 10.753015139705075, "learning_rate": 4.951669214457451e-06, "loss": 1.4195, "step": 1283 }, { "epoch": 0.18177957103418985, "grad_norm": 10.590308426498227, "learning_rate": 4.951556985975847e-06, "loss": 1.4208, "step": 1284 }, { "epoch": 0.18192114390882708, "grad_norm": 9.819562375015314, "learning_rate": 4.95144462861772e-06, "loss": 1.277, "step": 1285 }, { "epoch": 0.18206271678346428, "grad_norm": 7.9818799325146506, "learning_rate": 4.951332142388976e-06, "loss": 1.3249, "step": 1286 }, { "epoch": 0.1822042896581015, "grad_norm": 10.943998487996671, "learning_rate": 4.95121952729553e-06, "loss": 1.4502, "step": 1287 }, { "epoch": 0.18234586253273874, "grad_norm": 10.979825679875306, "learning_rate": 4.951106783343301e-06, "loss": 1.4487, "step": 1288 }, { "epoch": 0.18248743540737594, "grad_norm": 9.770841575461832, "learning_rate": 4.950993910538216e-06, "loss": 1.3723, "step": 1289 }, { "epoch": 0.18262900828201317, "grad_norm": 10.980378771087224, "learning_rate": 4.950880908886208e-06, "loss": 1.4405, "step": 1290 }, { "epoch": 0.18277058115665037, "grad_norm": 11.481429655261184, "learning_rate": 4.95076777839322e-06, "loss": 1.4285, "step": 1291 }, { "epoch": 0.1829121540312876, "grad_norm": 8.182483330093966, "learning_rate": 4.950654519065196e-06, "loss": 1.3006, "step": 1292 }, { "epoch": 0.18305372690592484, "grad_norm": 9.096813826255365, "learning_rate": 4.950541130908091e-06, "loss": 1.3831, "step": 1293 }, { "epoch": 0.18319529978056204, "grad_norm": 9.853855335137629, "learning_rate": 4.9504276139278655e-06, "loss": 1.2732, "step": 1294 }, { "epoch": 0.18333687265519927, "grad_norm": 11.495887352674895, "learning_rate": 4.950313968130488e-06, "loss": 1.3993, "step": 1295 }, { "epoch": 0.18347844552983647, "grad_norm": 10.910807272243023, "learning_rate": 4.950200193521932e-06, "loss": 1.5819, "step": 1296 }, { "epoch": 0.1836200184044737, "grad_norm": 11.001623207318126, "learning_rate": 4.950086290108179e-06, "loss": 1.2754, "step": 1297 }, { "epoch": 0.18376159127911093, "grad_norm": 9.095434498193997, "learning_rate": 4.949972257895217e-06, "loss": 1.3008, "step": 1298 }, { "epoch": 0.18390316415374813, "grad_norm": 10.568705344260884, "learning_rate": 4.94985809688904e-06, "loss": 1.5203, "step": 1299 }, { "epoch": 0.18404473702838536, "grad_norm": 9.619732889444375, "learning_rate": 4.949743807095649e-06, "loss": 1.3809, "step": 1300 }, { "epoch": 0.1841863099030226, "grad_norm": 9.65499682149336, "learning_rate": 4.9496293885210535e-06, "loss": 1.4684, "step": 1301 }, { "epoch": 0.1843278827776598, "grad_norm": 9.362241073715378, "learning_rate": 4.949514841171266e-06, "loss": 1.3595, "step": 1302 }, { "epoch": 0.18446945565229703, "grad_norm": 8.556109863203659, "learning_rate": 4.949400165052312e-06, "loss": 1.4326, "step": 1303 }, { "epoch": 0.18461102852693423, "grad_norm": 10.676470860227965, "learning_rate": 4.949285360170216e-06, "loss": 1.3866, "step": 1304 }, { "epoch": 0.18475260140157146, "grad_norm": 9.947864429809405, "learning_rate": 4.949170426531016e-06, "loss": 1.3765, "step": 1305 }, { "epoch": 0.1848941742762087, "grad_norm": 8.53969237298366, "learning_rate": 4.9490553641407515e-06, "loss": 1.3969, "step": 1306 }, { "epoch": 0.1850357471508459, "grad_norm": 10.972005420801684, "learning_rate": 4.948940173005474e-06, "loss": 1.5874, "step": 1307 }, { "epoch": 0.18517732002548312, "grad_norm": 8.153466228276447, "learning_rate": 4.948824853131237e-06, "loss": 1.2992, "step": 1308 }, { "epoch": 0.18531889290012032, "grad_norm": 8.134814628828991, "learning_rate": 4.948709404524103e-06, "loss": 1.3673, "step": 1309 }, { "epoch": 0.18546046577475755, "grad_norm": 11.671786756067023, "learning_rate": 4.948593827190142e-06, "loss": 1.5002, "step": 1310 }, { "epoch": 0.18560203864939478, "grad_norm": 10.968404998135554, "learning_rate": 4.9484781211354286e-06, "loss": 1.4449, "step": 1311 }, { "epoch": 0.185743611524032, "grad_norm": 9.692236745199738, "learning_rate": 4.948362286366047e-06, "loss": 1.3675, "step": 1312 }, { "epoch": 0.18588518439866922, "grad_norm": 8.732902170656994, "learning_rate": 4.948246322888085e-06, "loss": 1.3472, "step": 1313 }, { "epoch": 0.18602675727330645, "grad_norm": 9.47669533733155, "learning_rate": 4.948130230707639e-06, "loss": 1.4672, "step": 1314 }, { "epoch": 0.18616833014794365, "grad_norm": 10.157481896442764, "learning_rate": 4.9480140098308125e-06, "loss": 1.2868, "step": 1315 }, { "epoch": 0.18630990302258088, "grad_norm": 9.390438135100014, "learning_rate": 4.947897660263715e-06, "loss": 1.3004, "step": 1316 }, { "epoch": 0.18645147589721808, "grad_norm": 11.66483692171289, "learning_rate": 4.947781182012462e-06, "loss": 1.4928, "step": 1317 }, { "epoch": 0.1865930487718553, "grad_norm": 9.531605798302733, "learning_rate": 4.947664575083179e-06, "loss": 1.3808, "step": 1318 }, { "epoch": 0.18673462164649254, "grad_norm": 9.881572226398355, "learning_rate": 4.947547839481993e-06, "loss": 1.3443, "step": 1319 }, { "epoch": 0.18687619452112975, "grad_norm": 12.592229330104862, "learning_rate": 4.947430975215043e-06, "loss": 1.4111, "step": 1320 }, { "epoch": 0.18701776739576698, "grad_norm": 7.8929867675237295, "learning_rate": 4.94731398228847e-06, "loss": 1.394, "step": 1321 }, { "epoch": 0.18715934027040418, "grad_norm": 8.915758270861408, "learning_rate": 4.947196860708426e-06, "loss": 1.3197, "step": 1322 }, { "epoch": 0.1873009131450414, "grad_norm": 10.281649225246678, "learning_rate": 4.947079610481069e-06, "loss": 1.4726, "step": 1323 }, { "epoch": 0.18744248601967864, "grad_norm": 10.346913387005015, "learning_rate": 4.946962231612561e-06, "loss": 1.4057, "step": 1324 }, { "epoch": 0.18758405889431584, "grad_norm": 10.85186446382872, "learning_rate": 4.946844724109073e-06, "loss": 1.4484, "step": 1325 }, { "epoch": 0.18772563176895307, "grad_norm": 10.397265338822459, "learning_rate": 4.946727087976782e-06, "loss": 1.3758, "step": 1326 }, { "epoch": 0.18786720464359027, "grad_norm": 10.717269750764162, "learning_rate": 4.946609323221873e-06, "loss": 1.5345, "step": 1327 }, { "epoch": 0.1880087775182275, "grad_norm": 8.243639256772507, "learning_rate": 4.946491429850535e-06, "loss": 1.5037, "step": 1328 }, { "epoch": 0.18815035039286473, "grad_norm": 8.874825059482907, "learning_rate": 4.946373407868967e-06, "loss": 1.3401, "step": 1329 }, { "epoch": 0.18829192326750194, "grad_norm": 12.616091162701146, "learning_rate": 4.946255257283374e-06, "loss": 1.3707, "step": 1330 }, { "epoch": 0.18843349614213917, "grad_norm": 9.352716844742478, "learning_rate": 4.946136978099966e-06, "loss": 1.2701, "step": 1331 }, { "epoch": 0.1885750690167764, "grad_norm": 10.501154154969147, "learning_rate": 4.94601857032496e-06, "loss": 1.3217, "step": 1332 }, { "epoch": 0.1887166418914136, "grad_norm": 8.499716810109074, "learning_rate": 4.9459000339645824e-06, "loss": 1.3314, "step": 1333 }, { "epoch": 0.18885821476605083, "grad_norm": 14.166111355537906, "learning_rate": 4.9457813690250635e-06, "loss": 1.4995, "step": 1334 }, { "epoch": 0.18899978764068803, "grad_norm": 9.226727877434055, "learning_rate": 4.9456625755126415e-06, "loss": 1.2562, "step": 1335 }, { "epoch": 0.18914136051532526, "grad_norm": 10.948314965598017, "learning_rate": 4.945543653433562e-06, "loss": 1.4601, "step": 1336 }, { "epoch": 0.1892829333899625, "grad_norm": 9.773726717468312, "learning_rate": 4.945424602794076e-06, "loss": 1.3103, "step": 1337 }, { "epoch": 0.1894245062645997, "grad_norm": 12.455311964883823, "learning_rate": 4.945305423600441e-06, "loss": 1.3241, "step": 1338 }, { "epoch": 0.18956607913923693, "grad_norm": 11.529004878618576, "learning_rate": 4.945186115858925e-06, "loss": 1.3585, "step": 1339 }, { "epoch": 0.18970765201387413, "grad_norm": 11.345787685011533, "learning_rate": 4.945066679575796e-06, "loss": 1.4424, "step": 1340 }, { "epoch": 0.18984922488851136, "grad_norm": 10.194054864362695, "learning_rate": 4.944947114757336e-06, "loss": 1.2237, "step": 1341 }, { "epoch": 0.1899907977631486, "grad_norm": 10.348267090431245, "learning_rate": 4.944827421409829e-06, "loss": 1.4031, "step": 1342 }, { "epoch": 0.1901323706377858, "grad_norm": 9.089459327926136, "learning_rate": 4.944707599539567e-06, "loss": 1.4427, "step": 1343 }, { "epoch": 0.19027394351242302, "grad_norm": 9.225534612779986, "learning_rate": 4.94458764915285e-06, "loss": 1.3805, "step": 1344 }, { "epoch": 0.19041551638706025, "grad_norm": 10.78111271425273, "learning_rate": 4.944467570255983e-06, "loss": 1.3714, "step": 1345 }, { "epoch": 0.19055708926169745, "grad_norm": 11.521000801694974, "learning_rate": 4.944347362855278e-06, "loss": 1.516, "step": 1346 }, { "epoch": 0.19069866213633468, "grad_norm": 9.098280993738083, "learning_rate": 4.9442270269570545e-06, "loss": 1.4196, "step": 1347 }, { "epoch": 0.1908402350109719, "grad_norm": 8.869595453612424, "learning_rate": 4.94410656256764e-06, "loss": 1.3407, "step": 1348 }, { "epoch": 0.19098180788560912, "grad_norm": 10.113305767761899, "learning_rate": 4.943985969693365e-06, "loss": 1.4174, "step": 1349 }, { "epoch": 0.19112338076024635, "grad_norm": 11.61574719446589, "learning_rate": 4.94386524834057e-06, "loss": 1.5299, "step": 1350 }, { "epoch": 0.19126495363488355, "grad_norm": 11.476767915324704, "learning_rate": 4.943744398515601e-06, "loss": 1.3625, "step": 1351 }, { "epoch": 0.19140652650952078, "grad_norm": 8.57043757386515, "learning_rate": 4.943623420224811e-06, "loss": 1.4615, "step": 1352 }, { "epoch": 0.19154809938415798, "grad_norm": 10.85613394473715, "learning_rate": 4.94350231347456e-06, "loss": 1.5389, "step": 1353 }, { "epoch": 0.1916896722587952, "grad_norm": 9.767020603402306, "learning_rate": 4.943381078271214e-06, "loss": 1.4154, "step": 1354 }, { "epoch": 0.19183124513343244, "grad_norm": 11.56796943590867, "learning_rate": 4.943259714621148e-06, "loss": 1.3023, "step": 1355 }, { "epoch": 0.19197281800806965, "grad_norm": 10.458139810380096, "learning_rate": 4.943138222530739e-06, "loss": 1.4922, "step": 1356 }, { "epoch": 0.19211439088270688, "grad_norm": 10.467323684318963, "learning_rate": 4.943016602006376e-06, "loss": 1.3893, "step": 1357 }, { "epoch": 0.1922559637573441, "grad_norm": 10.520513568714776, "learning_rate": 4.942894853054452e-06, "loss": 1.3297, "step": 1358 }, { "epoch": 0.1923975366319813, "grad_norm": 11.262506611575473, "learning_rate": 4.942772975681366e-06, "loss": 1.5517, "step": 1359 }, { "epoch": 0.19253910950661854, "grad_norm": 10.089802918035847, "learning_rate": 4.942650969893527e-06, "loss": 1.3762, "step": 1360 }, { "epoch": 0.19268068238125574, "grad_norm": 12.919759335003654, "learning_rate": 4.942528835697348e-06, "loss": 1.3492, "step": 1361 }, { "epoch": 0.19282225525589297, "grad_norm": 10.337501136567539, "learning_rate": 4.942406573099249e-06, "loss": 1.3081, "step": 1362 }, { "epoch": 0.1929638281305302, "grad_norm": 9.915372577078744, "learning_rate": 4.942284182105658e-06, "loss": 1.3189, "step": 1363 }, { "epoch": 0.1931054010051674, "grad_norm": 9.440456710371095, "learning_rate": 4.942161662723007e-06, "loss": 1.404, "step": 1364 }, { "epoch": 0.19324697387980463, "grad_norm": 9.267884980760257, "learning_rate": 4.94203901495774e-06, "loss": 1.2984, "step": 1365 }, { "epoch": 0.19338854675444184, "grad_norm": 8.356946018308943, "learning_rate": 4.9419162388163025e-06, "loss": 1.3929, "step": 1366 }, { "epoch": 0.19353011962907907, "grad_norm": 8.44305101687967, "learning_rate": 4.941793334305149e-06, "loss": 1.2583, "step": 1367 }, { "epoch": 0.1936716925037163, "grad_norm": 11.71486198523089, "learning_rate": 4.94167030143074e-06, "loss": 1.468, "step": 1368 }, { "epoch": 0.1938132653783535, "grad_norm": 10.06407200635613, "learning_rate": 4.941547140199545e-06, "loss": 1.4349, "step": 1369 }, { "epoch": 0.19395483825299073, "grad_norm": 9.51950760730035, "learning_rate": 4.9414238506180365e-06, "loss": 1.3515, "step": 1370 }, { "epoch": 0.19409641112762796, "grad_norm": 7.932155942768058, "learning_rate": 4.941300432692697e-06, "loss": 1.5073, "step": 1371 }, { "epoch": 0.19423798400226516, "grad_norm": 9.294788697104853, "learning_rate": 4.941176886430014e-06, "loss": 1.3969, "step": 1372 }, { "epoch": 0.1943795568769024, "grad_norm": 9.259619170364177, "learning_rate": 4.941053211836482e-06, "loss": 1.4073, "step": 1373 }, { "epoch": 0.1945211297515396, "grad_norm": 9.642528080245363, "learning_rate": 4.940929408918603e-06, "loss": 1.3788, "step": 1374 }, { "epoch": 0.19466270262617683, "grad_norm": 10.991841845578161, "learning_rate": 4.940805477682885e-06, "loss": 1.4656, "step": 1375 }, { "epoch": 0.19480427550081406, "grad_norm": 10.30256090641879, "learning_rate": 4.940681418135843e-06, "loss": 1.3944, "step": 1376 }, { "epoch": 0.19494584837545126, "grad_norm": 9.159377465561763, "learning_rate": 4.940557230283999e-06, "loss": 1.5978, "step": 1377 }, { "epoch": 0.1950874212500885, "grad_norm": 10.441637875049674, "learning_rate": 4.94043291413388e-06, "loss": 1.5533, "step": 1378 }, { "epoch": 0.1952289941247257, "grad_norm": 10.462395678220009, "learning_rate": 4.9403084696920234e-06, "loss": 1.4965, "step": 1379 }, { "epoch": 0.19537056699936292, "grad_norm": 9.704357026000535, "learning_rate": 4.940183896964969e-06, "loss": 1.0718, "step": 1380 }, { "epoch": 0.19551213987400015, "grad_norm": 9.32829798245702, "learning_rate": 4.940059195959268e-06, "loss": 1.3682, "step": 1381 }, { "epoch": 0.19565371274863735, "grad_norm": 9.641984098765143, "learning_rate": 4.939934366681474e-06, "loss": 1.312, "step": 1382 }, { "epoch": 0.19579528562327458, "grad_norm": 8.531528119035794, "learning_rate": 4.93980940913815e-06, "loss": 1.2792, "step": 1383 }, { "epoch": 0.1959368584979118, "grad_norm": 9.18557407667937, "learning_rate": 4.939684323335864e-06, "loss": 1.3971, "step": 1384 }, { "epoch": 0.19607843137254902, "grad_norm": 14.209544259802602, "learning_rate": 4.939559109281192e-06, "loss": 1.3889, "step": 1385 }, { "epoch": 0.19622000424718625, "grad_norm": 9.241449889337112, "learning_rate": 4.939433766980717e-06, "loss": 1.3047, "step": 1386 }, { "epoch": 0.19636157712182345, "grad_norm": 8.97744977664523, "learning_rate": 4.939308296441028e-06, "loss": 1.3446, "step": 1387 }, { "epoch": 0.19650314999646068, "grad_norm": 8.184111614730293, "learning_rate": 4.939182697668721e-06, "loss": 1.2236, "step": 1388 }, { "epoch": 0.1966447228710979, "grad_norm": 9.471581768957849, "learning_rate": 4.939056970670397e-06, "loss": 1.6158, "step": 1389 }, { "epoch": 0.1967862957457351, "grad_norm": 10.4675492694097, "learning_rate": 4.938931115452668e-06, "loss": 1.4516, "step": 1390 }, { "epoch": 0.19692786862037234, "grad_norm": 10.746604205660734, "learning_rate": 4.938805132022148e-06, "loss": 1.4387, "step": 1391 }, { "epoch": 0.19706944149500955, "grad_norm": 9.334096968155919, "learning_rate": 4.9386790203854605e-06, "loss": 1.2403, "step": 1392 }, { "epoch": 0.19721101436964678, "grad_norm": 10.209141498157155, "learning_rate": 4.938552780549236e-06, "loss": 1.3922, "step": 1393 }, { "epoch": 0.197352587244284, "grad_norm": 8.658130648030172, "learning_rate": 4.93842641252011e-06, "loss": 1.3079, "step": 1394 }, { "epoch": 0.1974941601189212, "grad_norm": 10.782423226472128, "learning_rate": 4.938299916304725e-06, "loss": 1.2893, "step": 1395 }, { "epoch": 0.19763573299355844, "grad_norm": 8.928600228399423, "learning_rate": 4.938173291909732e-06, "loss": 1.3279, "step": 1396 }, { "epoch": 0.19777730586819564, "grad_norm": 10.50895200315378, "learning_rate": 4.9380465393417875e-06, "loss": 1.393, "step": 1397 }, { "epoch": 0.19791887874283287, "grad_norm": 9.963414597020888, "learning_rate": 4.937919658607554e-06, "loss": 1.3332, "step": 1398 }, { "epoch": 0.1980604516174701, "grad_norm": 9.79007836845289, "learning_rate": 4.937792649713701e-06, "loss": 1.3706, "step": 1399 }, { "epoch": 0.1982020244921073, "grad_norm": 9.076923967184847, "learning_rate": 4.937665512666907e-06, "loss": 1.4893, "step": 1400 }, { "epoch": 0.19834359736674453, "grad_norm": 10.10727260608377, "learning_rate": 4.937538247473854e-06, "loss": 1.4697, "step": 1401 }, { "epoch": 0.19848517024138176, "grad_norm": 9.718934084997962, "learning_rate": 4.9374108541412336e-06, "loss": 1.3553, "step": 1402 }, { "epoch": 0.19862674311601897, "grad_norm": 9.911916369734234, "learning_rate": 4.937283332675741e-06, "loss": 1.4497, "step": 1403 }, { "epoch": 0.1987683159906562, "grad_norm": 11.253819135271124, "learning_rate": 4.937155683084082e-06, "loss": 1.5985, "step": 1404 }, { "epoch": 0.1989098888652934, "grad_norm": 10.103666082088216, "learning_rate": 4.937027905372965e-06, "loss": 1.3939, "step": 1405 }, { "epoch": 0.19905146173993063, "grad_norm": 10.927866502496574, "learning_rate": 4.936899999549108e-06, "loss": 1.5311, "step": 1406 }, { "epoch": 0.19919303461456786, "grad_norm": 10.733651793437026, "learning_rate": 4.936771965619236e-06, "loss": 1.3847, "step": 1407 }, { "epoch": 0.19933460748920506, "grad_norm": 9.43246750199912, "learning_rate": 4.936643803590079e-06, "loss": 1.4025, "step": 1408 }, { "epoch": 0.1994761803638423, "grad_norm": 9.15864401496592, "learning_rate": 4.936515513468373e-06, "loss": 1.3563, "step": 1409 }, { "epoch": 0.1996177532384795, "grad_norm": 8.389102279048686, "learning_rate": 4.9363870952608634e-06, "loss": 1.258, "step": 1410 }, { "epoch": 0.19975932611311673, "grad_norm": 8.7190982653338, "learning_rate": 4.936258548974301e-06, "loss": 1.3672, "step": 1411 }, { "epoch": 0.19990089898775396, "grad_norm": 11.76299460138393, "learning_rate": 4.936129874615443e-06, "loss": 1.4857, "step": 1412 }, { "epoch": 0.20004247186239116, "grad_norm": 8.861578450357246, "learning_rate": 4.9360010721910545e-06, "loss": 1.3943, "step": 1413 }, { "epoch": 0.2001840447370284, "grad_norm": 9.649234927193751, "learning_rate": 4.935872141707906e-06, "loss": 1.3514, "step": 1414 }, { "epoch": 0.20032561761166562, "grad_norm": 10.492978609415738, "learning_rate": 4.935743083172775e-06, "loss": 1.3357, "step": 1415 }, { "epoch": 0.20046719048630282, "grad_norm": 8.690188204677437, "learning_rate": 4.935613896592446e-06, "loss": 1.3472, "step": 1416 }, { "epoch": 0.20060876336094005, "grad_norm": 10.656528144853313, "learning_rate": 4.93548458197371e-06, "loss": 1.3655, "step": 1417 }, { "epoch": 0.20075033623557725, "grad_norm": 10.809358526318215, "learning_rate": 4.935355139323367e-06, "loss": 1.4321, "step": 1418 }, { "epoch": 0.20089190911021448, "grad_norm": 8.865083554084443, "learning_rate": 4.93522556864822e-06, "loss": 1.2932, "step": 1419 }, { "epoch": 0.20103348198485171, "grad_norm": 10.815000741000068, "learning_rate": 4.935095869955079e-06, "loss": 1.5396, "step": 1420 }, { "epoch": 0.20117505485948892, "grad_norm": 9.005548674089031, "learning_rate": 4.934966043250765e-06, "loss": 1.3457, "step": 1421 }, { "epoch": 0.20131662773412615, "grad_norm": 9.060395516021266, "learning_rate": 4.934836088542102e-06, "loss": 1.2628, "step": 1422 }, { "epoch": 0.20145820060876335, "grad_norm": 8.312645473497977, "learning_rate": 4.934706005835921e-06, "loss": 1.4745, "step": 1423 }, { "epoch": 0.20159977348340058, "grad_norm": 10.644330918413031, "learning_rate": 4.9345757951390605e-06, "loss": 1.3411, "step": 1424 }, { "epoch": 0.2017413463580378, "grad_norm": 11.138239151631005, "learning_rate": 4.934445456458366e-06, "loss": 1.2896, "step": 1425 }, { "epoch": 0.201882919232675, "grad_norm": 10.525070777347013, "learning_rate": 4.934314989800689e-06, "loss": 1.4747, "step": 1426 }, { "epoch": 0.20202449210731224, "grad_norm": 8.294145785376697, "learning_rate": 4.934184395172888e-06, "loss": 1.3776, "step": 1427 }, { "epoch": 0.20216606498194944, "grad_norm": 8.686104861419429, "learning_rate": 4.934053672581828e-06, "loss": 1.3788, "step": 1428 }, { "epoch": 0.20230763785658668, "grad_norm": 11.833327888881524, "learning_rate": 4.933922822034381e-06, "loss": 1.4218, "step": 1429 }, { "epoch": 0.2024492107312239, "grad_norm": 10.166543011356309, "learning_rate": 4.933791843537427e-06, "loss": 1.3424, "step": 1430 }, { "epoch": 0.2025907836058611, "grad_norm": 7.839016523372746, "learning_rate": 4.933660737097851e-06, "loss": 1.2105, "step": 1431 }, { "epoch": 0.20273235648049834, "grad_norm": 9.891923912536239, "learning_rate": 4.933529502722544e-06, "loss": 1.4459, "step": 1432 }, { "epoch": 0.20287392935513557, "grad_norm": 8.607093245584341, "learning_rate": 4.933398140418405e-06, "loss": 1.3991, "step": 1433 }, { "epoch": 0.20301550222977277, "grad_norm": 9.503408523238928, "learning_rate": 4.933266650192341e-06, "loss": 1.4349, "step": 1434 }, { "epoch": 0.20315707510441, "grad_norm": 9.388264541884109, "learning_rate": 4.933135032051263e-06, "loss": 1.4349, "step": 1435 }, { "epoch": 0.2032986479790472, "grad_norm": 9.108658044255888, "learning_rate": 4.933003286002091e-06, "loss": 1.2488, "step": 1436 }, { "epoch": 0.20344022085368443, "grad_norm": 10.038995622345332, "learning_rate": 4.932871412051749e-06, "loss": 1.3661, "step": 1437 }, { "epoch": 0.20358179372832166, "grad_norm": 9.773724765962118, "learning_rate": 4.932739410207172e-06, "loss": 1.4237, "step": 1438 }, { "epoch": 0.20372336660295887, "grad_norm": 9.72634875204394, "learning_rate": 4.932607280475299e-06, "loss": 1.3969, "step": 1439 }, { "epoch": 0.2038649394775961, "grad_norm": 8.614924641266866, "learning_rate": 4.932475022863074e-06, "loss": 1.4623, "step": 1440 }, { "epoch": 0.2040065123522333, "grad_norm": 9.338927999073288, "learning_rate": 4.932342637377451e-06, "loss": 1.3346, "step": 1441 }, { "epoch": 0.20414808522687053, "grad_norm": 9.639524919508643, "learning_rate": 4.93221012402539e-06, "loss": 1.2798, "step": 1442 }, { "epoch": 0.20428965810150776, "grad_norm": 11.458817091469005, "learning_rate": 4.9320774828138555e-06, "loss": 1.3737, "step": 1443 }, { "epoch": 0.20443123097614496, "grad_norm": 9.541156560819365, "learning_rate": 4.931944713749821e-06, "loss": 1.214, "step": 1444 }, { "epoch": 0.2045728038507822, "grad_norm": 13.484618855522426, "learning_rate": 4.9318118168402665e-06, "loss": 1.3556, "step": 1445 }, { "epoch": 0.20471437672541942, "grad_norm": 9.414953115288782, "learning_rate": 4.931678792092177e-06, "loss": 1.3065, "step": 1446 }, { "epoch": 0.20485594960005662, "grad_norm": 8.550091516551518, "learning_rate": 4.9315456395125475e-06, "loss": 1.3077, "step": 1447 }, { "epoch": 0.20499752247469386, "grad_norm": 8.23908598339627, "learning_rate": 4.931412359108377e-06, "loss": 1.2783, "step": 1448 }, { "epoch": 0.20513909534933106, "grad_norm": 9.594267293795038, "learning_rate": 4.931278950886671e-06, "loss": 1.405, "step": 1449 }, { "epoch": 0.2052806682239683, "grad_norm": 9.335629407834787, "learning_rate": 4.931145414854444e-06, "loss": 1.3645, "step": 1450 }, { "epoch": 0.20542224109860552, "grad_norm": 10.11307416683987, "learning_rate": 4.931011751018715e-06, "loss": 1.4034, "step": 1451 }, { "epoch": 0.20556381397324272, "grad_norm": 10.46357986547024, "learning_rate": 4.930877959386511e-06, "loss": 1.4326, "step": 1452 }, { "epoch": 0.20570538684787995, "grad_norm": 9.78150427734581, "learning_rate": 4.930744039964866e-06, "loss": 1.2919, "step": 1453 }, { "epoch": 0.20584695972251715, "grad_norm": 8.010249724382671, "learning_rate": 4.930609992760818e-06, "loss": 1.3415, "step": 1454 }, { "epoch": 0.20598853259715438, "grad_norm": 9.774920575924133, "learning_rate": 4.930475817781415e-06, "loss": 1.3131, "step": 1455 }, { "epoch": 0.2061301054717916, "grad_norm": 9.266830803311725, "learning_rate": 4.930341515033712e-06, "loss": 1.3776, "step": 1456 }, { "epoch": 0.20627167834642882, "grad_norm": 9.520081427089686, "learning_rate": 4.930207084524766e-06, "loss": 1.3398, "step": 1457 }, { "epoch": 0.20641325122106605, "grad_norm": 9.454012742406992, "learning_rate": 4.930072526261647e-06, "loss": 1.4492, "step": 1458 }, { "epoch": 0.20655482409570328, "grad_norm": 10.682764621620075, "learning_rate": 4.9299378402514265e-06, "loss": 1.4325, "step": 1459 }, { "epoch": 0.20669639697034048, "grad_norm": 9.799595991876599, "learning_rate": 4.9298030265011856e-06, "loss": 1.5617, "step": 1460 }, { "epoch": 0.2068379698449777, "grad_norm": 9.761590572887972, "learning_rate": 4.929668085018011e-06, "loss": 1.3869, "step": 1461 }, { "epoch": 0.2069795427196149, "grad_norm": 7.827007526121643, "learning_rate": 4.929533015808997e-06, "loss": 1.282, "step": 1462 }, { "epoch": 0.20712111559425214, "grad_norm": 10.306805882404044, "learning_rate": 4.929397818881244e-06, "loss": 1.2548, "step": 1463 }, { "epoch": 0.20726268846888937, "grad_norm": 9.453074557587525, "learning_rate": 4.929262494241859e-06, "loss": 1.3542, "step": 1464 }, { "epoch": 0.20740426134352657, "grad_norm": 10.337846898228419, "learning_rate": 4.929127041897957e-06, "loss": 1.462, "step": 1465 }, { "epoch": 0.2075458342181638, "grad_norm": 8.907188821132765, "learning_rate": 4.928991461856656e-06, "loss": 1.2085, "step": 1466 }, { "epoch": 0.207687407092801, "grad_norm": 10.152215995733318, "learning_rate": 4.928855754125086e-06, "loss": 1.3402, "step": 1467 }, { "epoch": 0.20782897996743824, "grad_norm": 7.677415140987904, "learning_rate": 4.92871991871038e-06, "loss": 1.25, "step": 1468 }, { "epoch": 0.20797055284207547, "grad_norm": 11.047024176856974, "learning_rate": 4.928583955619678e-06, "loss": 1.4294, "step": 1469 }, { "epoch": 0.20811212571671267, "grad_norm": 9.63685333813596, "learning_rate": 4.928447864860129e-06, "loss": 1.4023, "step": 1470 }, { "epoch": 0.2082536985913499, "grad_norm": 9.649631837136853, "learning_rate": 4.928311646438887e-06, "loss": 1.4555, "step": 1471 }, { "epoch": 0.2083952714659871, "grad_norm": 10.339229091335824, "learning_rate": 4.9281753003631114e-06, "loss": 1.4349, "step": 1472 }, { "epoch": 0.20853684434062433, "grad_norm": 10.985794605101114, "learning_rate": 4.928038826639971e-06, "loss": 1.3927, "step": 1473 }, { "epoch": 0.20867841721526156, "grad_norm": 8.813004283139145, "learning_rate": 4.92790222527664e-06, "loss": 1.2054, "step": 1474 }, { "epoch": 0.20881999008989877, "grad_norm": 11.576492668123501, "learning_rate": 4.927765496280299e-06, "loss": 1.3388, "step": 1475 }, { "epoch": 0.208961562964536, "grad_norm": 9.990122685409386, "learning_rate": 4.927628639658137e-06, "loss": 1.4458, "step": 1476 }, { "epoch": 0.20910313583917323, "grad_norm": 10.015028151702547, "learning_rate": 4.927491655417347e-06, "loss": 1.4468, "step": 1477 }, { "epoch": 0.20924470871381043, "grad_norm": 10.625184539987485, "learning_rate": 4.927354543565131e-06, "loss": 1.5867, "step": 1478 }, { "epoch": 0.20938628158844766, "grad_norm": 10.03079821558568, "learning_rate": 4.927217304108696e-06, "loss": 1.4135, "step": 1479 }, { "epoch": 0.20952785446308486, "grad_norm": 12.49812791156921, "learning_rate": 4.927079937055257e-06, "loss": 1.4131, "step": 1480 }, { "epoch": 0.2096694273377221, "grad_norm": 11.975690378186123, "learning_rate": 4.926942442412036e-06, "loss": 1.4465, "step": 1481 }, { "epoch": 0.20981100021235932, "grad_norm": 10.92930810691147, "learning_rate": 4.92680482018626e-06, "loss": 1.336, "step": 1482 }, { "epoch": 0.20995257308699652, "grad_norm": 9.474153393286983, "learning_rate": 4.9266670703851645e-06, "loss": 1.3764, "step": 1483 }, { "epoch": 0.21009414596163375, "grad_norm": 11.55842741304782, "learning_rate": 4.92652919301599e-06, "loss": 1.3923, "step": 1484 }, { "epoch": 0.21023571883627096, "grad_norm": 11.010146576531975, "learning_rate": 4.9263911880859855e-06, "loss": 1.4297, "step": 1485 }, { "epoch": 0.2103772917109082, "grad_norm": 8.994491162753759, "learning_rate": 4.926253055602405e-06, "loss": 1.463, "step": 1486 }, { "epoch": 0.21051886458554542, "grad_norm": 11.51499028313542, "learning_rate": 4.926114795572511e-06, "loss": 1.4958, "step": 1487 }, { "epoch": 0.21066043746018262, "grad_norm": 10.013220630863167, "learning_rate": 4.925976408003571e-06, "loss": 1.4517, "step": 1488 }, { "epoch": 0.21080201033481985, "grad_norm": 8.711222581517825, "learning_rate": 4.92583789290286e-06, "loss": 1.3842, "step": 1489 }, { "epoch": 0.21094358320945708, "grad_norm": 9.893552325859085, "learning_rate": 4.9256992502776605e-06, "loss": 1.4708, "step": 1490 }, { "epoch": 0.21108515608409428, "grad_norm": 9.82769920021081, "learning_rate": 4.925560480135258e-06, "loss": 1.531, "step": 1491 }, { "epoch": 0.2112267289587315, "grad_norm": 10.151662500589689, "learning_rate": 4.925421582482952e-06, "loss": 1.4268, "step": 1492 }, { "epoch": 0.21136830183336872, "grad_norm": 10.37730804876039, "learning_rate": 4.925282557328041e-06, "loss": 1.4085, "step": 1493 }, { "epoch": 0.21150987470800595, "grad_norm": 9.263557602275899, "learning_rate": 4.925143404677835e-06, "loss": 1.4033, "step": 1494 }, { "epoch": 0.21165144758264318, "grad_norm": 11.239835767530355, "learning_rate": 4.925004124539648e-06, "loss": 1.5579, "step": 1495 }, { "epoch": 0.21179302045728038, "grad_norm": 8.403023820397742, "learning_rate": 4.924864716920801e-06, "loss": 1.363, "step": 1496 }, { "epoch": 0.2119345933319176, "grad_norm": 9.122488433922555, "learning_rate": 4.9247251818286255e-06, "loss": 1.4555, "step": 1497 }, { "epoch": 0.2120761662065548, "grad_norm": 10.882268121143184, "learning_rate": 4.924585519270454e-06, "loss": 1.4714, "step": 1498 }, { "epoch": 0.21221773908119204, "grad_norm": 9.522579864864083, "learning_rate": 4.9244457292536305e-06, "loss": 1.4043, "step": 1499 }, { "epoch": 0.21235931195582927, "grad_norm": 7.675962644962224, "learning_rate": 4.924305811785502e-06, "loss": 1.3249, "step": 1500 }, { "epoch": 0.21250088483046647, "grad_norm": 11.385660060131903, "learning_rate": 4.9241657668734256e-06, "loss": 1.3023, "step": 1501 }, { "epoch": 0.2126424577051037, "grad_norm": 9.791932923504836, "learning_rate": 4.9240255945247616e-06, "loss": 1.4569, "step": 1502 }, { "epoch": 0.21278403057974093, "grad_norm": 11.497188722063415, "learning_rate": 4.9238852947468796e-06, "loss": 1.3869, "step": 1503 }, { "epoch": 0.21292560345437814, "grad_norm": 11.432940465407265, "learning_rate": 4.9237448675471555e-06, "loss": 1.4429, "step": 1504 }, { "epoch": 0.21306717632901537, "grad_norm": 8.152751771653257, "learning_rate": 4.9236043129329705e-06, "loss": 1.4326, "step": 1505 }, { "epoch": 0.21320874920365257, "grad_norm": 9.175219990018462, "learning_rate": 4.923463630911714e-06, "loss": 1.2867, "step": 1506 }, { "epoch": 0.2133503220782898, "grad_norm": 10.35566398698452, "learning_rate": 4.9233228214907815e-06, "loss": 1.4544, "step": 1507 }, { "epoch": 0.21349189495292703, "grad_norm": 8.200842316327368, "learning_rate": 4.923181884677574e-06, "loss": 1.2899, "step": 1508 }, { "epoch": 0.21363346782756423, "grad_norm": 10.63916364093135, "learning_rate": 4.923040820479504e-06, "loss": 1.5962, "step": 1509 }, { "epoch": 0.21377504070220146, "grad_norm": 9.402805039707358, "learning_rate": 4.922899628903983e-06, "loss": 1.5193, "step": 1510 }, { "epoch": 0.21391661357683867, "grad_norm": 10.463490545674233, "learning_rate": 4.9227583099584355e-06, "loss": 1.485, "step": 1511 }, { "epoch": 0.2140581864514759, "grad_norm": 10.342380951695795, "learning_rate": 4.92261686365029e-06, "loss": 1.2978, "step": 1512 }, { "epoch": 0.21419975932611313, "grad_norm": 7.959717182521778, "learning_rate": 4.9224752899869835e-06, "loss": 1.2951, "step": 1513 }, { "epoch": 0.21434133220075033, "grad_norm": 9.874261756052995, "learning_rate": 4.922333588975956e-06, "loss": 1.4394, "step": 1514 }, { "epoch": 0.21448290507538756, "grad_norm": 9.687301141482168, "learning_rate": 4.922191760624659e-06, "loss": 1.2053, "step": 1515 }, { "epoch": 0.2146244779500248, "grad_norm": 9.969404719266992, "learning_rate": 4.922049804940546e-06, "loss": 1.365, "step": 1516 }, { "epoch": 0.214766050824662, "grad_norm": 11.503286140969267, "learning_rate": 4.9219077219310804e-06, "loss": 1.5029, "step": 1517 }, { "epoch": 0.21490762369929922, "grad_norm": 8.344084200791741, "learning_rate": 4.921765511603733e-06, "loss": 1.2568, "step": 1518 }, { "epoch": 0.21504919657393642, "grad_norm": 10.369480365325689, "learning_rate": 4.921623173965978e-06, "loss": 1.258, "step": 1519 }, { "epoch": 0.21519076944857365, "grad_norm": 10.343594368400561, "learning_rate": 4.921480709025298e-06, "loss": 1.3097, "step": 1520 }, { "epoch": 0.21533234232321088, "grad_norm": 11.490199017506793, "learning_rate": 4.921338116789183e-06, "loss": 1.4997, "step": 1521 }, { "epoch": 0.2154739151978481, "grad_norm": 11.184934524392238, "learning_rate": 4.921195397265129e-06, "loss": 1.5516, "step": 1522 }, { "epoch": 0.21561548807248532, "grad_norm": 8.81242641120649, "learning_rate": 4.921052550460638e-06, "loss": 1.4745, "step": 1523 }, { "epoch": 0.21575706094712252, "grad_norm": 12.776630788730003, "learning_rate": 4.920909576383219e-06, "loss": 1.3625, "step": 1524 }, { "epoch": 0.21589863382175975, "grad_norm": 9.412588626196095, "learning_rate": 4.920766475040389e-06, "loss": 1.2281, "step": 1525 }, { "epoch": 0.21604020669639698, "grad_norm": 11.305747024035359, "learning_rate": 4.920623246439671e-06, "loss": 1.704, "step": 1526 }, { "epoch": 0.21618177957103418, "grad_norm": 8.806599257983905, "learning_rate": 4.920479890588593e-06, "loss": 1.504, "step": 1527 }, { "epoch": 0.2163233524456714, "grad_norm": 12.800416486640007, "learning_rate": 4.920336407494692e-06, "loss": 1.4451, "step": 1528 }, { "epoch": 0.21646492532030862, "grad_norm": 16.260756468495995, "learning_rate": 4.920192797165511e-06, "loss": 1.4871, "step": 1529 }, { "epoch": 0.21660649819494585, "grad_norm": 9.968345131619689, "learning_rate": 4.9200490596086e-06, "loss": 1.3504, "step": 1530 }, { "epoch": 0.21674807106958308, "grad_norm": 8.707142092862213, "learning_rate": 4.919905194831514e-06, "loss": 1.2746, "step": 1531 }, { "epoch": 0.21688964394422028, "grad_norm": 10.074383086823635, "learning_rate": 4.919761202841815e-06, "loss": 1.1736, "step": 1532 }, { "epoch": 0.2170312168188575, "grad_norm": 10.549729069851303, "learning_rate": 4.919617083647074e-06, "loss": 1.5386, "step": 1533 }, { "epoch": 0.21717278969349474, "grad_norm": 9.286707856396614, "learning_rate": 4.9194728372548685e-06, "loss": 1.3205, "step": 1534 }, { "epoch": 0.21731436256813194, "grad_norm": 10.474389586951514, "learning_rate": 4.919328463672779e-06, "loss": 1.2737, "step": 1535 }, { "epoch": 0.21745593544276917, "grad_norm": 7.7907568524492135, "learning_rate": 4.919183962908397e-06, "loss": 1.2586, "step": 1536 }, { "epoch": 0.21759750831740637, "grad_norm": 9.552640731561565, "learning_rate": 4.919039334969317e-06, "loss": 1.4414, "step": 1537 }, { "epoch": 0.2177390811920436, "grad_norm": 8.932601760212025, "learning_rate": 4.918894579863143e-06, "loss": 1.3331, "step": 1538 }, { "epoch": 0.21788065406668083, "grad_norm": 10.6544860990873, "learning_rate": 4.9187496975974845e-06, "loss": 1.3279, "step": 1539 }, { "epoch": 0.21802222694131804, "grad_norm": 10.126579255695134, "learning_rate": 4.918604688179959e-06, "loss": 1.3122, "step": 1540 }, { "epoch": 0.21816379981595527, "grad_norm": 9.577808766955373, "learning_rate": 4.918459551618187e-06, "loss": 1.3191, "step": 1541 }, { "epoch": 0.21830537269059247, "grad_norm": 8.286966445341198, "learning_rate": 4.9183142879198e-06, "loss": 1.3315, "step": 1542 }, { "epoch": 0.2184469455652297, "grad_norm": 10.14895771857571, "learning_rate": 4.918168897092435e-06, "loss": 1.2022, "step": 1543 }, { "epoch": 0.21858851843986693, "grad_norm": 11.990143860883718, "learning_rate": 4.9180233791437326e-06, "loss": 1.3755, "step": 1544 }, { "epoch": 0.21873009131450413, "grad_norm": 8.827266361036774, "learning_rate": 4.917877734081345e-06, "loss": 1.1649, "step": 1545 }, { "epoch": 0.21887166418914136, "grad_norm": 8.717116117347544, "learning_rate": 4.917731961912927e-06, "loss": 1.3766, "step": 1546 }, { "epoch": 0.2190132370637786, "grad_norm": 9.903261329528647, "learning_rate": 4.917586062646144e-06, "loss": 1.4392, "step": 1547 }, { "epoch": 0.2191548099384158, "grad_norm": 9.025065380081525, "learning_rate": 4.917440036288663e-06, "loss": 1.2832, "step": 1548 }, { "epoch": 0.21929638281305303, "grad_norm": 10.680669375241546, "learning_rate": 4.917293882848162e-06, "loss": 1.4145, "step": 1549 }, { "epoch": 0.21943795568769023, "grad_norm": 10.0970132471772, "learning_rate": 4.9171476023323245e-06, "loss": 1.3864, "step": 1550 }, { "epoch": 0.21957952856232746, "grad_norm": 9.670280263442997, "learning_rate": 4.917001194748839e-06, "loss": 1.3396, "step": 1551 }, { "epoch": 0.2197211014369647, "grad_norm": 9.124816056579236, "learning_rate": 4.916854660105404e-06, "loss": 1.14, "step": 1552 }, { "epoch": 0.2198626743116019, "grad_norm": 8.583098868220068, "learning_rate": 4.916707998409721e-06, "loss": 1.2965, "step": 1553 }, { "epoch": 0.22000424718623912, "grad_norm": 8.499189899212224, "learning_rate": 4.916561209669501e-06, "loss": 1.3778, "step": 1554 }, { "epoch": 0.22014582006087632, "grad_norm": 8.095214239988254, "learning_rate": 4.9164142938924595e-06, "loss": 1.2705, "step": 1555 }, { "epoch": 0.22028739293551355, "grad_norm": 10.851570585287664, "learning_rate": 4.916267251086321e-06, "loss": 1.2945, "step": 1556 }, { "epoch": 0.22042896581015078, "grad_norm": 9.497433767547173, "learning_rate": 4.916120081258814e-06, "loss": 1.3682, "step": 1557 }, { "epoch": 0.220570538684788, "grad_norm": 10.39243331891458, "learning_rate": 4.915972784417676e-06, "loss": 1.4524, "step": 1558 }, { "epoch": 0.22071211155942522, "grad_norm": 9.775144969128615, "learning_rate": 4.91582536057065e-06, "loss": 1.2462, "step": 1559 }, { "epoch": 0.22085368443406245, "grad_norm": 9.66073514111617, "learning_rate": 4.915677809725487e-06, "loss": 1.4234, "step": 1560 }, { "epoch": 0.22099525730869965, "grad_norm": 8.891409733738664, "learning_rate": 4.915530131889942e-06, "loss": 1.2246, "step": 1561 }, { "epoch": 0.22113683018333688, "grad_norm": 13.191685595391373, "learning_rate": 4.915382327071778e-06, "loss": 1.5836, "step": 1562 }, { "epoch": 0.22127840305797408, "grad_norm": 9.54100303063396, "learning_rate": 4.915234395278768e-06, "loss": 1.4657, "step": 1563 }, { "epoch": 0.2214199759326113, "grad_norm": 8.921518402188381, "learning_rate": 4.915086336518686e-06, "loss": 1.3817, "step": 1564 }, { "epoch": 0.22156154880724854, "grad_norm": 9.487431292982443, "learning_rate": 4.914938150799315e-06, "loss": 1.3946, "step": 1565 }, { "epoch": 0.22170312168188575, "grad_norm": 9.859678333289454, "learning_rate": 4.914789838128447e-06, "loss": 1.4318, "step": 1566 }, { "epoch": 0.22184469455652298, "grad_norm": 10.066287257979292, "learning_rate": 4.914641398513879e-06, "loss": 1.3646, "step": 1567 }, { "epoch": 0.22198626743116018, "grad_norm": 8.845870232261811, "learning_rate": 4.914492831963411e-06, "loss": 1.2332, "step": 1568 }, { "epoch": 0.2221278403057974, "grad_norm": 9.381856623484143, "learning_rate": 4.914344138484856e-06, "loss": 1.2141, "step": 1569 }, { "epoch": 0.22226941318043464, "grad_norm": 10.420039574370403, "learning_rate": 4.91419531808603e-06, "loss": 1.3892, "step": 1570 }, { "epoch": 0.22241098605507184, "grad_norm": 9.315888268488251, "learning_rate": 4.914046370774757e-06, "loss": 1.4113, "step": 1571 }, { "epoch": 0.22255255892970907, "grad_norm": 11.676591484516749, "learning_rate": 4.913897296558865e-06, "loss": 1.463, "step": 1572 }, { "epoch": 0.22269413180434627, "grad_norm": 11.136296398405735, "learning_rate": 4.913748095446192e-06, "loss": 1.5605, "step": 1573 }, { "epoch": 0.2228357046789835, "grad_norm": 10.908263799350056, "learning_rate": 4.9135987674445815e-06, "loss": 1.4836, "step": 1574 }, { "epoch": 0.22297727755362073, "grad_norm": 9.158242070787844, "learning_rate": 4.913449312561884e-06, "loss": 1.502, "step": 1575 }, { "epoch": 0.22311885042825794, "grad_norm": 9.901404895705024, "learning_rate": 4.913299730805956e-06, "loss": 1.3387, "step": 1576 }, { "epoch": 0.22326042330289517, "grad_norm": 8.824922071766634, "learning_rate": 4.913150022184659e-06, "loss": 1.232, "step": 1577 }, { "epoch": 0.2234019961775324, "grad_norm": 10.485567255351706, "learning_rate": 4.913000186705866e-06, "loss": 1.4787, "step": 1578 }, { "epoch": 0.2235435690521696, "grad_norm": 9.56291287596723, "learning_rate": 4.912850224377452e-06, "loss": 1.2489, "step": 1579 }, { "epoch": 0.22368514192680683, "grad_norm": 10.293026565302522, "learning_rate": 4.912700135207301e-06, "loss": 1.3544, "step": 1580 }, { "epoch": 0.22382671480144403, "grad_norm": 9.67762499149839, "learning_rate": 4.9125499192033035e-06, "loss": 1.4636, "step": 1581 }, { "epoch": 0.22396828767608126, "grad_norm": 9.821962107248758, "learning_rate": 4.912399576373354e-06, "loss": 1.3858, "step": 1582 }, { "epoch": 0.2241098605507185, "grad_norm": 8.301830815998416, "learning_rate": 4.9122491067253586e-06, "loss": 1.4359, "step": 1583 }, { "epoch": 0.2242514334253557, "grad_norm": 9.757385195682991, "learning_rate": 4.912098510267226e-06, "loss": 1.3356, "step": 1584 }, { "epoch": 0.22439300629999293, "grad_norm": 8.647216490139689, "learning_rate": 4.911947787006873e-06, "loss": 1.2541, "step": 1585 }, { "epoch": 0.22453457917463013, "grad_norm": 9.041208235081065, "learning_rate": 4.911796936952224e-06, "loss": 1.3042, "step": 1586 }, { "epoch": 0.22467615204926736, "grad_norm": 9.185618512837276, "learning_rate": 4.911645960111208e-06, "loss": 1.4306, "step": 1587 }, { "epoch": 0.2248177249239046, "grad_norm": 10.021144637402955, "learning_rate": 4.911494856491762e-06, "loss": 1.5394, "step": 1588 }, { "epoch": 0.2249592977985418, "grad_norm": 9.980277640048945, "learning_rate": 4.91134362610183e-06, "loss": 1.4319, "step": 1589 }, { "epoch": 0.22510087067317902, "grad_norm": 9.069226524405297, "learning_rate": 4.9111922689493605e-06, "loss": 1.3635, "step": 1590 }, { "epoch": 0.22524244354781625, "grad_norm": 7.0769618680777535, "learning_rate": 4.911040785042313e-06, "loss": 1.3485, "step": 1591 }, { "epoch": 0.22538401642245345, "grad_norm": 10.317627158207513, "learning_rate": 4.910889174388647e-06, "loss": 1.5086, "step": 1592 }, { "epoch": 0.22552558929709068, "grad_norm": 11.564739938228927, "learning_rate": 4.910737436996335e-06, "loss": 1.3707, "step": 1593 }, { "epoch": 0.2256671621717279, "grad_norm": 8.329061659481301, "learning_rate": 4.910585572873355e-06, "loss": 1.2721, "step": 1594 }, { "epoch": 0.22580873504636512, "grad_norm": 10.209611919025589, "learning_rate": 4.910433582027688e-06, "loss": 1.3935, "step": 1595 }, { "epoch": 0.22595030792100235, "grad_norm": 11.074631068713101, "learning_rate": 4.910281464467325e-06, "loss": 1.309, "step": 1596 }, { "epoch": 0.22609188079563955, "grad_norm": 8.066686681717352, "learning_rate": 4.910129220200263e-06, "loss": 1.2175, "step": 1597 }, { "epoch": 0.22623345367027678, "grad_norm": 9.460936693589973, "learning_rate": 4.909976849234504e-06, "loss": 1.4403, "step": 1598 }, { "epoch": 0.22637502654491398, "grad_norm": 7.817846315213723, "learning_rate": 4.90982435157806e-06, "loss": 1.2833, "step": 1599 }, { "epoch": 0.2265165994195512, "grad_norm": 13.046914179394765, "learning_rate": 4.909671727238946e-06, "loss": 1.381, "step": 1600 }, { "epoch": 0.22665817229418844, "grad_norm": 11.524448446121172, "learning_rate": 4.909518976225186e-06, "loss": 1.3667, "step": 1601 }, { "epoch": 0.22679974516882564, "grad_norm": 8.563768724435814, "learning_rate": 4.90936609854481e-06, "loss": 1.2994, "step": 1602 }, { "epoch": 0.22694131804346288, "grad_norm": 11.121294754725476, "learning_rate": 4.909213094205855e-06, "loss": 1.5774, "step": 1603 }, { "epoch": 0.2270828909181001, "grad_norm": 9.873572137671983, "learning_rate": 4.909059963216363e-06, "loss": 1.4187, "step": 1604 }, { "epoch": 0.2272244637927373, "grad_norm": 9.402712945857036, "learning_rate": 4.908906705584387e-06, "loss": 1.5649, "step": 1605 }, { "epoch": 0.22736603666737454, "grad_norm": 10.690728664704022, "learning_rate": 4.90875332131798e-06, "loss": 1.4951, "step": 1606 }, { "epoch": 0.22750760954201174, "grad_norm": 9.96713157598651, "learning_rate": 4.908599810425208e-06, "loss": 1.4153, "step": 1607 }, { "epoch": 0.22764918241664897, "grad_norm": 9.447231101019717, "learning_rate": 4.90844617291414e-06, "loss": 1.4038, "step": 1608 }, { "epoch": 0.2277907552912862, "grad_norm": 7.696831722656702, "learning_rate": 4.908292408792852e-06, "loss": 1.1037, "step": 1609 }, { "epoch": 0.2279323281659234, "grad_norm": 12.810625339226467, "learning_rate": 4.908138518069428e-06, "loss": 1.4156, "step": 1610 }, { "epoch": 0.22807390104056063, "grad_norm": 11.009752978361107, "learning_rate": 4.907984500751956e-06, "loss": 1.3505, "step": 1611 }, { "epoch": 0.22821547391519784, "grad_norm": 8.448708556313873, "learning_rate": 4.907830356848537e-06, "loss": 1.3187, "step": 1612 }, { "epoch": 0.22835704678983507, "grad_norm": 8.473776814009934, "learning_rate": 4.907676086367269e-06, "loss": 1.3345, "step": 1613 }, { "epoch": 0.2284986196644723, "grad_norm": 9.500966876167459, "learning_rate": 4.907521689316265e-06, "loss": 1.3949, "step": 1614 }, { "epoch": 0.2286401925391095, "grad_norm": 9.174339782077602, "learning_rate": 4.907367165703643e-06, "loss": 1.4587, "step": 1615 }, { "epoch": 0.22878176541374673, "grad_norm": 13.026373886311438, "learning_rate": 4.907212515537522e-06, "loss": 1.4158, "step": 1616 }, { "epoch": 0.22892333828838393, "grad_norm": 10.207110854669489, "learning_rate": 4.907057738826034e-06, "loss": 1.361, "step": 1617 }, { "epoch": 0.22906491116302116, "grad_norm": 8.539587397379364, "learning_rate": 4.906902835577316e-06, "loss": 1.391, "step": 1618 }, { "epoch": 0.2292064840376584, "grad_norm": 9.698584730586582, "learning_rate": 4.906747805799511e-06, "loss": 1.3132, "step": 1619 }, { "epoch": 0.2293480569122956, "grad_norm": 11.331294007171163, "learning_rate": 4.906592649500767e-06, "loss": 1.5416, "step": 1620 }, { "epoch": 0.22948962978693282, "grad_norm": 10.57046262827608, "learning_rate": 4.906437366689244e-06, "loss": 1.6555, "step": 1621 }, { "epoch": 0.22963120266157006, "grad_norm": 10.152667261702979, "learning_rate": 4.9062819573731015e-06, "loss": 1.2847, "step": 1622 }, { "epoch": 0.22977277553620726, "grad_norm": 8.514039954262222, "learning_rate": 4.906126421560511e-06, "loss": 1.3593, "step": 1623 }, { "epoch": 0.2299143484108445, "grad_norm": 10.430494687280499, "learning_rate": 4.905970759259648e-06, "loss": 1.3334, "step": 1624 }, { "epoch": 0.2300559212854817, "grad_norm": 8.974351575799485, "learning_rate": 4.905814970478697e-06, "loss": 1.2964, "step": 1625 }, { "epoch": 0.23019749416011892, "grad_norm": 10.66846864102488, "learning_rate": 4.905659055225847e-06, "loss": 1.4034, "step": 1626 }, { "epoch": 0.23033906703475615, "grad_norm": 12.024542824030904, "learning_rate": 4.905503013509293e-06, "loss": 1.2904, "step": 1627 }, { "epoch": 0.23048063990939335, "grad_norm": 8.517342770409815, "learning_rate": 4.90534684533724e-06, "loss": 1.3125, "step": 1628 }, { "epoch": 0.23062221278403058, "grad_norm": 10.118128952938918, "learning_rate": 4.905190550717897e-06, "loss": 1.3416, "step": 1629 }, { "epoch": 0.23076378565866779, "grad_norm": 10.58166715066676, "learning_rate": 4.90503412965948e-06, "loss": 1.4196, "step": 1630 }, { "epoch": 0.23090535853330502, "grad_norm": 10.459125341895492, "learning_rate": 4.904877582170212e-06, "loss": 1.2175, "step": 1631 }, { "epoch": 0.23104693140794225, "grad_norm": 8.637466046367328, "learning_rate": 4.904720908258323e-06, "loss": 1.4072, "step": 1632 }, { "epoch": 0.23118850428257945, "grad_norm": 8.981553565133337, "learning_rate": 4.904564107932048e-06, "loss": 1.3897, "step": 1633 }, { "epoch": 0.23133007715721668, "grad_norm": 8.679080093789553, "learning_rate": 4.904407181199631e-06, "loss": 1.2936, "step": 1634 }, { "epoch": 0.2314716500318539, "grad_norm": 9.810835581743877, "learning_rate": 4.904250128069322e-06, "loss": 1.2341, "step": 1635 }, { "epoch": 0.2316132229064911, "grad_norm": 12.685215335592517, "learning_rate": 4.904092948549376e-06, "loss": 1.2228, "step": 1636 }, { "epoch": 0.23175479578112834, "grad_norm": 8.812257587534678, "learning_rate": 4.9039356426480565e-06, "loss": 1.3251, "step": 1637 }, { "epoch": 0.23189636865576554, "grad_norm": 10.319637903174545, "learning_rate": 4.903778210373632e-06, "loss": 1.3607, "step": 1638 }, { "epoch": 0.23203794153040277, "grad_norm": 8.831457214732149, "learning_rate": 4.90362065173438e-06, "loss": 1.5066, "step": 1639 }, { "epoch": 0.23217951440504, "grad_norm": 8.415631926229512, "learning_rate": 4.9034629667385825e-06, "loss": 1.3136, "step": 1640 }, { "epoch": 0.2323210872796772, "grad_norm": 10.356234205253495, "learning_rate": 4.903305155394529e-06, "loss": 1.2954, "step": 1641 }, { "epoch": 0.23246266015431444, "grad_norm": 10.94311030776935, "learning_rate": 4.903147217710515e-06, "loss": 1.4084, "step": 1642 }, { "epoch": 0.23260423302895164, "grad_norm": 9.512761428990155, "learning_rate": 4.902989153694843e-06, "loss": 1.3351, "step": 1643 }, { "epoch": 0.23274580590358887, "grad_norm": 9.122249240915252, "learning_rate": 4.902830963355825e-06, "loss": 1.5306, "step": 1644 }, { "epoch": 0.2328873787782261, "grad_norm": 9.457005837558793, "learning_rate": 4.902672646701774e-06, "loss": 1.1273, "step": 1645 }, { "epoch": 0.2330289516528633, "grad_norm": 11.188009751283927, "learning_rate": 4.902514203741013e-06, "loss": 1.2471, "step": 1646 }, { "epoch": 0.23317052452750053, "grad_norm": 11.38010166330633, "learning_rate": 4.902355634481872e-06, "loss": 1.3883, "step": 1647 }, { "epoch": 0.23331209740213776, "grad_norm": 9.37294655245592, "learning_rate": 4.9021969389326866e-06, "loss": 1.4865, "step": 1648 }, { "epoch": 0.23345367027677497, "grad_norm": 8.538683660397764, "learning_rate": 4.902038117101798e-06, "loss": 1.2729, "step": 1649 }, { "epoch": 0.2335952431514122, "grad_norm": 10.898910019998487, "learning_rate": 4.901879168997559e-06, "loss": 1.1859, "step": 1650 }, { "epoch": 0.2337368160260494, "grad_norm": 9.525670353886804, "learning_rate": 4.901720094628322e-06, "loss": 1.2875, "step": 1651 }, { "epoch": 0.23387838890068663, "grad_norm": 8.954051663840815, "learning_rate": 4.901560894002449e-06, "loss": 1.4121, "step": 1652 }, { "epoch": 0.23401996177532386, "grad_norm": 8.598487685547711, "learning_rate": 4.9014015671283124e-06, "loss": 1.3599, "step": 1653 }, { "epoch": 0.23416153464996106, "grad_norm": 8.721104020390575, "learning_rate": 4.901242114014285e-06, "loss": 1.3055, "step": 1654 }, { "epoch": 0.2343031075245983, "grad_norm": 7.788706443247301, "learning_rate": 4.901082534668751e-06, "loss": 1.2268, "step": 1655 }, { "epoch": 0.2344446803992355, "grad_norm": 9.324060011216865, "learning_rate": 4.900922829100097e-06, "loss": 1.3695, "step": 1656 }, { "epoch": 0.23458625327387272, "grad_norm": 9.354517419663356, "learning_rate": 4.900762997316722e-06, "loss": 1.362, "step": 1657 }, { "epoch": 0.23472782614850995, "grad_norm": 10.500341863970617, "learning_rate": 4.900603039327024e-06, "loss": 1.3581, "step": 1658 }, { "epoch": 0.23486939902314716, "grad_norm": 15.164161111863805, "learning_rate": 4.9004429551394155e-06, "loss": 1.6463, "step": 1659 }, { "epoch": 0.2350109718977844, "grad_norm": 9.727202931350604, "learning_rate": 4.900282744762311e-06, "loss": 1.4743, "step": 1660 }, { "epoch": 0.23515254477242162, "grad_norm": 12.043032101276772, "learning_rate": 4.900122408204132e-06, "loss": 1.3099, "step": 1661 }, { "epoch": 0.23529411764705882, "grad_norm": 10.007947815122481, "learning_rate": 4.899961945473307e-06, "loss": 1.4496, "step": 1662 }, { "epoch": 0.23543569052169605, "grad_norm": 10.400705871102572, "learning_rate": 4.899801356578273e-06, "loss": 1.2004, "step": 1663 }, { "epoch": 0.23557726339633325, "grad_norm": 8.743759545736166, "learning_rate": 4.89964064152747e-06, "loss": 1.3951, "step": 1664 }, { "epoch": 0.23571883627097048, "grad_norm": 9.95839504930427, "learning_rate": 4.899479800329348e-06, "loss": 1.3875, "step": 1665 }, { "epoch": 0.2358604091456077, "grad_norm": 8.988035090066873, "learning_rate": 4.899318832992363e-06, "loss": 1.2153, "step": 1666 }, { "epoch": 0.23600198202024492, "grad_norm": 9.662321188235719, "learning_rate": 4.8991577395249755e-06, "loss": 1.2486, "step": 1667 }, { "epoch": 0.23614355489488215, "grad_norm": 9.509140034811779, "learning_rate": 4.898996519935654e-06, "loss": 1.3043, "step": 1668 }, { "epoch": 0.23628512776951935, "grad_norm": 10.722584741906369, "learning_rate": 4.898835174232875e-06, "loss": 1.397, "step": 1669 }, { "epoch": 0.23642670064415658, "grad_norm": 10.266395667656825, "learning_rate": 4.898673702425118e-06, "loss": 1.3905, "step": 1670 }, { "epoch": 0.2365682735187938, "grad_norm": 11.577767844103333, "learning_rate": 4.898512104520875e-06, "loss": 1.3519, "step": 1671 }, { "epoch": 0.236709846393431, "grad_norm": 8.060713695799183, "learning_rate": 4.898350380528638e-06, "loss": 1.2698, "step": 1672 }, { "epoch": 0.23685141926806824, "grad_norm": 9.369060618599924, "learning_rate": 4.8981885304569095e-06, "loss": 1.3674, "step": 1673 }, { "epoch": 0.23699299214270544, "grad_norm": 9.038452233342502, "learning_rate": 4.898026554314199e-06, "loss": 1.3332, "step": 1674 }, { "epoch": 0.23713456501734267, "grad_norm": 9.613269742348862, "learning_rate": 4.89786445210902e-06, "loss": 1.3539, "step": 1675 }, { "epoch": 0.2372761378919799, "grad_norm": 9.799111726947858, "learning_rate": 4.897702223849895e-06, "loss": 1.312, "step": 1676 }, { "epoch": 0.2374177107666171, "grad_norm": 8.748959288695652, "learning_rate": 4.897539869545351e-06, "loss": 1.4057, "step": 1677 }, { "epoch": 0.23755928364125434, "grad_norm": 7.760409593685605, "learning_rate": 4.897377389203925e-06, "loss": 1.2402, "step": 1678 }, { "epoch": 0.23770085651589157, "grad_norm": 11.64249801416198, "learning_rate": 4.897214782834156e-06, "loss": 1.4157, "step": 1679 }, { "epoch": 0.23784242939052877, "grad_norm": 8.229238105817943, "learning_rate": 4.897052050444595e-06, "loss": 1.3665, "step": 1680 }, { "epoch": 0.237984002265166, "grad_norm": 7.593852226919293, "learning_rate": 4.8968891920437936e-06, "loss": 1.1967, "step": 1681 }, { "epoch": 0.2381255751398032, "grad_norm": 9.979226469897354, "learning_rate": 4.896726207640315e-06, "loss": 1.3473, "step": 1682 }, { "epoch": 0.23826714801444043, "grad_norm": 8.58880939301547, "learning_rate": 4.896563097242727e-06, "loss": 1.5724, "step": 1683 }, { "epoch": 0.23840872088907766, "grad_norm": 9.94291222937597, "learning_rate": 4.896399860859603e-06, "loss": 1.4598, "step": 1684 }, { "epoch": 0.23855029376371487, "grad_norm": 9.952034164370069, "learning_rate": 4.896236498499526e-06, "loss": 1.5053, "step": 1685 }, { "epoch": 0.2386918666383521, "grad_norm": 7.619764187585733, "learning_rate": 4.896073010171083e-06, "loss": 1.2752, "step": 1686 }, { "epoch": 0.2388334395129893, "grad_norm": 9.338836092260399, "learning_rate": 4.895909395882868e-06, "loss": 1.3313, "step": 1687 }, { "epoch": 0.23897501238762653, "grad_norm": 9.362751601623902, "learning_rate": 4.895745655643482e-06, "loss": 1.4038, "step": 1688 }, { "epoch": 0.23911658526226376, "grad_norm": 9.66912911281533, "learning_rate": 4.895581789461534e-06, "loss": 1.2267, "step": 1689 }, { "epoch": 0.23925815813690096, "grad_norm": 9.689796618520989, "learning_rate": 4.895417797345638e-06, "loss": 1.143, "step": 1690 }, { "epoch": 0.2393997310115382, "grad_norm": 9.572147464392623, "learning_rate": 4.895253679304414e-06, "loss": 1.4366, "step": 1691 }, { "epoch": 0.23954130388617542, "grad_norm": 8.358240011295786, "learning_rate": 4.8950894353464905e-06, "loss": 1.378, "step": 1692 }, { "epoch": 0.23968287676081262, "grad_norm": 9.216533019308079, "learning_rate": 4.8949250654805e-06, "loss": 1.4015, "step": 1693 }, { "epoch": 0.23982444963544985, "grad_norm": 8.690030614449993, "learning_rate": 4.894760569715086e-06, "loss": 1.3284, "step": 1694 }, { "epoch": 0.23996602251008706, "grad_norm": 11.055928665416811, "learning_rate": 4.894595948058893e-06, "loss": 1.4149, "step": 1695 }, { "epoch": 0.2401075953847243, "grad_norm": 8.332000117147649, "learning_rate": 4.894431200520578e-06, "loss": 1.4507, "step": 1696 }, { "epoch": 0.24024916825936152, "grad_norm": 9.263371880354907, "learning_rate": 4.894266327108799e-06, "loss": 1.3002, "step": 1697 }, { "epoch": 0.24039074113399872, "grad_norm": 9.191360596731075, "learning_rate": 4.894101327832225e-06, "loss": 1.2742, "step": 1698 }, { "epoch": 0.24053231400863595, "grad_norm": 10.207296970206572, "learning_rate": 4.8939362026995295e-06, "loss": 1.217, "step": 1699 }, { "epoch": 0.24067388688327315, "grad_norm": 10.276559797637246, "learning_rate": 4.893770951719392e-06, "loss": 1.5503, "step": 1700 }, { "epoch": 0.24081545975791038, "grad_norm": 9.112668418636103, "learning_rate": 4.893605574900501e-06, "loss": 1.4779, "step": 1701 }, { "epoch": 0.2409570326325476, "grad_norm": 11.374350728379511, "learning_rate": 4.893440072251549e-06, "loss": 1.5177, "step": 1702 }, { "epoch": 0.24109860550718482, "grad_norm": 11.652303155309777, "learning_rate": 4.893274443781239e-06, "loss": 1.5781, "step": 1703 }, { "epoch": 0.24124017838182205, "grad_norm": 12.607579062004232, "learning_rate": 4.893108689498274e-06, "loss": 1.3777, "step": 1704 }, { "epoch": 0.24138175125645928, "grad_norm": 8.336872366597296, "learning_rate": 4.89294280941137e-06, "loss": 1.314, "step": 1705 }, { "epoch": 0.24152332413109648, "grad_norm": 10.876179598269266, "learning_rate": 4.892776803529246e-06, "loss": 1.3776, "step": 1706 }, { "epoch": 0.2416648970057337, "grad_norm": 8.940338324304475, "learning_rate": 4.892610671860631e-06, "loss": 1.2331, "step": 1707 }, { "epoch": 0.2418064698803709, "grad_norm": 9.929130820720436, "learning_rate": 4.892444414414257e-06, "loss": 1.2559, "step": 1708 }, { "epoch": 0.24194804275500814, "grad_norm": 9.012840859240992, "learning_rate": 4.892278031198864e-06, "loss": 1.1816, "step": 1709 }, { "epoch": 0.24208961562964537, "grad_norm": 8.873244420883173, "learning_rate": 4.892111522223198e-06, "loss": 1.1529, "step": 1710 }, { "epoch": 0.24223118850428257, "grad_norm": 8.357351810511986, "learning_rate": 4.891944887496013e-06, "loss": 1.346, "step": 1711 }, { "epoch": 0.2423727613789198, "grad_norm": 13.78304301597704, "learning_rate": 4.8917781270260686e-06, "loss": 1.4652, "step": 1712 }, { "epoch": 0.242514334253557, "grad_norm": 9.061841638262784, "learning_rate": 4.891611240822132e-06, "loss": 1.356, "step": 1713 }, { "epoch": 0.24265590712819424, "grad_norm": 10.13390345057164, "learning_rate": 4.891444228892975e-06, "loss": 1.4301, "step": 1714 }, { "epoch": 0.24279748000283147, "grad_norm": 9.907586907027992, "learning_rate": 4.891277091247379e-06, "loss": 1.4988, "step": 1715 }, { "epoch": 0.24293905287746867, "grad_norm": 10.640612810886925, "learning_rate": 4.891109827894129e-06, "loss": 1.4077, "step": 1716 }, { "epoch": 0.2430806257521059, "grad_norm": 9.641339193688495, "learning_rate": 4.890942438842018e-06, "loss": 1.4508, "step": 1717 }, { "epoch": 0.2432221986267431, "grad_norm": 8.104592855535921, "learning_rate": 4.890774924099845e-06, "loss": 1.3778, "step": 1718 }, { "epoch": 0.24336377150138033, "grad_norm": 8.739912921812172, "learning_rate": 4.890607283676418e-06, "loss": 1.2415, "step": 1719 }, { "epoch": 0.24350534437601756, "grad_norm": 7.720582319839733, "learning_rate": 4.890439517580548e-06, "loss": 1.4206, "step": 1720 }, { "epoch": 0.24364691725065477, "grad_norm": 9.93679519169237, "learning_rate": 4.890271625821056e-06, "loss": 1.3853, "step": 1721 }, { "epoch": 0.243788490125292, "grad_norm": 10.219665672970185, "learning_rate": 4.890103608406765e-06, "loss": 1.4307, "step": 1722 }, { "epoch": 0.24393006299992923, "grad_norm": 9.05278725881263, "learning_rate": 4.889935465346511e-06, "loss": 1.3939, "step": 1723 }, { "epoch": 0.24407163587456643, "grad_norm": 8.912648897144413, "learning_rate": 4.8897671966491315e-06, "loss": 1.3925, "step": 1724 }, { "epoch": 0.24421320874920366, "grad_norm": 8.3541113137356, "learning_rate": 4.889598802323471e-06, "loss": 1.3552, "step": 1725 }, { "epoch": 0.24435478162384086, "grad_norm": 10.266420191328713, "learning_rate": 4.8894302823783845e-06, "loss": 1.3436, "step": 1726 }, { "epoch": 0.2444963544984781, "grad_norm": 9.228732429662308, "learning_rate": 4.88926163682273e-06, "loss": 1.3366, "step": 1727 }, { "epoch": 0.24463792737311532, "grad_norm": 9.289283846053936, "learning_rate": 4.889092865665372e-06, "loss": 1.3931, "step": 1728 }, { "epoch": 0.24477950024775252, "grad_norm": 12.577450486572687, "learning_rate": 4.888923968915183e-06, "loss": 1.4741, "step": 1729 }, { "epoch": 0.24492107312238975, "grad_norm": 9.58980158468327, "learning_rate": 4.888754946581044e-06, "loss": 1.1805, "step": 1730 }, { "epoch": 0.24506264599702696, "grad_norm": 11.5471828971974, "learning_rate": 4.8885857986718365e-06, "loss": 1.575, "step": 1731 }, { "epoch": 0.2452042188716642, "grad_norm": 9.41806758036075, "learning_rate": 4.888416525196455e-06, "loss": 1.587, "step": 1732 }, { "epoch": 0.24534579174630142, "grad_norm": 7.028531918209731, "learning_rate": 4.8882471261637985e-06, "loss": 1.2723, "step": 1733 }, { "epoch": 0.24548736462093862, "grad_norm": 8.448151823095642, "learning_rate": 4.888077601582772e-06, "loss": 1.274, "step": 1734 }, { "epoch": 0.24562893749557585, "grad_norm": 9.243440106235264, "learning_rate": 4.887907951462284e-06, "loss": 1.3902, "step": 1735 }, { "epoch": 0.24577051037021308, "grad_norm": 9.17627970322911, "learning_rate": 4.8877381758112576e-06, "loss": 1.5082, "step": 1736 }, { "epoch": 0.24591208324485028, "grad_norm": 9.767402182041149, "learning_rate": 4.887568274638616e-06, "loss": 1.3993, "step": 1737 }, { "epoch": 0.2460536561194875, "grad_norm": 9.856089581119617, "learning_rate": 4.887398247953289e-06, "loss": 1.397, "step": 1738 }, { "epoch": 0.24619522899412472, "grad_norm": 8.905495702021897, "learning_rate": 4.887228095764216e-06, "loss": 1.5131, "step": 1739 }, { "epoch": 0.24633680186876195, "grad_norm": 11.292939361882377, "learning_rate": 4.887057818080343e-06, "loss": 1.4903, "step": 1740 }, { "epoch": 0.24647837474339918, "grad_norm": 10.18083525223277, "learning_rate": 4.886887414910621e-06, "loss": 1.4748, "step": 1741 }, { "epoch": 0.24661994761803638, "grad_norm": 8.959165549249951, "learning_rate": 4.8867168862640056e-06, "loss": 1.4526, "step": 1742 }, { "epoch": 0.2467615204926736, "grad_norm": 8.657862764359596, "learning_rate": 4.886546232149464e-06, "loss": 1.3539, "step": 1743 }, { "epoch": 0.2469030933673108, "grad_norm": 10.992457144547211, "learning_rate": 4.886375452575967e-06, "loss": 1.255, "step": 1744 }, { "epoch": 0.24704466624194804, "grad_norm": 11.584327344073476, "learning_rate": 4.886204547552491e-06, "loss": 1.3928, "step": 1745 }, { "epoch": 0.24718623911658527, "grad_norm": 9.703310560826328, "learning_rate": 4.886033517088021e-06, "loss": 1.4206, "step": 1746 }, { "epoch": 0.24732781199122247, "grad_norm": 8.688226683158607, "learning_rate": 4.885862361191549e-06, "loss": 1.2731, "step": 1747 }, { "epoch": 0.2474693848658597, "grad_norm": 9.303503004344948, "learning_rate": 4.885691079872071e-06, "loss": 1.3939, "step": 1748 }, { "epoch": 0.24761095774049693, "grad_norm": 8.541955065316758, "learning_rate": 4.885519673138592e-06, "loss": 1.4727, "step": 1749 }, { "epoch": 0.24775253061513414, "grad_norm": 9.170345146314782, "learning_rate": 4.8853481410001225e-06, "loss": 1.3429, "step": 1750 }, { "epoch": 0.24789410348977137, "grad_norm": 12.619419186340785, "learning_rate": 4.88517648346568e-06, "loss": 1.4545, "step": 1751 }, { "epoch": 0.24803567636440857, "grad_norm": 11.632230089517176, "learning_rate": 4.885004700544288e-06, "loss": 1.438, "step": 1752 }, { "epoch": 0.2481772492390458, "grad_norm": 10.723204818617951, "learning_rate": 4.884832792244977e-06, "loss": 1.3659, "step": 1753 }, { "epoch": 0.24831882211368303, "grad_norm": 10.115121802870892, "learning_rate": 4.884660758576785e-06, "loss": 1.4124, "step": 1754 }, { "epoch": 0.24846039498832023, "grad_norm": 11.597157597060354, "learning_rate": 4.884488599548755e-06, "loss": 1.3241, "step": 1755 }, { "epoch": 0.24860196786295746, "grad_norm": 11.011307539878857, "learning_rate": 4.884316315169936e-06, "loss": 1.4026, "step": 1756 }, { "epoch": 0.24874354073759466, "grad_norm": 10.238371789350719, "learning_rate": 4.8841439054493864e-06, "loss": 1.243, "step": 1757 }, { "epoch": 0.2488851136122319, "grad_norm": 8.087079101175645, "learning_rate": 4.88397137039617e-06, "loss": 1.2365, "step": 1758 }, { "epoch": 0.24902668648686913, "grad_norm": 12.277673098509636, "learning_rate": 4.883798710019356e-06, "loss": 1.349, "step": 1759 }, { "epoch": 0.24916825936150633, "grad_norm": 12.560781788397655, "learning_rate": 4.883625924328022e-06, "loss": 1.4135, "step": 1760 }, { "epoch": 0.24930983223614356, "grad_norm": 9.514720554307546, "learning_rate": 4.88345301333125e-06, "loss": 1.2723, "step": 1761 }, { "epoch": 0.24945140511078076, "grad_norm": 10.5907993764921, "learning_rate": 4.88327997703813e-06, "loss": 1.4432, "step": 1762 }, { "epoch": 0.249592977985418, "grad_norm": 8.667673003629726, "learning_rate": 4.883106815457758e-06, "loss": 1.4231, "step": 1763 }, { "epoch": 0.24973455086005522, "grad_norm": 10.686868213919347, "learning_rate": 4.882933528599239e-06, "loss": 1.4557, "step": 1764 }, { "epoch": 0.24987612373469242, "grad_norm": 11.97963958249025, "learning_rate": 4.882760116471681e-06, "loss": 1.4463, "step": 1765 }, { "epoch": 0.25001769660932965, "grad_norm": 7.557266568318147, "learning_rate": 4.8825865790841995e-06, "loss": 1.258, "step": 1766 }, { "epoch": 0.2501592694839669, "grad_norm": 10.213426048847342, "learning_rate": 4.882412916445919e-06, "loss": 1.2871, "step": 1767 }, { "epoch": 0.2503008423586041, "grad_norm": 11.878247268798159, "learning_rate": 4.882239128565968e-06, "loss": 1.2109, "step": 1768 }, { "epoch": 0.2504424152332413, "grad_norm": 8.776285852191403, "learning_rate": 4.882065215453481e-06, "loss": 1.4418, "step": 1769 }, { "epoch": 0.2505839881078785, "grad_norm": 9.613834791433263, "learning_rate": 4.881891177117602e-06, "loss": 1.4288, "step": 1770 }, { "epoch": 0.25072556098251575, "grad_norm": 10.7902818967182, "learning_rate": 4.881717013567481e-06, "loss": 1.3688, "step": 1771 }, { "epoch": 0.250867133857153, "grad_norm": 10.669246321912187, "learning_rate": 4.881542724812272e-06, "loss": 1.4007, "step": 1772 }, { "epoch": 0.2510087067317902, "grad_norm": 10.113154133881503, "learning_rate": 4.881368310861137e-06, "loss": 1.4825, "step": 1773 }, { "epoch": 0.2511502796064274, "grad_norm": 10.15737824262107, "learning_rate": 4.881193771723246e-06, "loss": 1.3691, "step": 1774 }, { "epoch": 0.2512918524810646, "grad_norm": 8.958153821935685, "learning_rate": 4.881019107407774e-06, "loss": 1.3584, "step": 1775 }, { "epoch": 0.25143342535570185, "grad_norm": 7.9781402910491686, "learning_rate": 4.8808443179239025e-06, "loss": 1.4088, "step": 1776 }, { "epoch": 0.2515749982303391, "grad_norm": 8.74323381668603, "learning_rate": 4.880669403280821e-06, "loss": 1.4369, "step": 1777 }, { "epoch": 0.2517165711049763, "grad_norm": 10.360190242950038, "learning_rate": 4.880494363487723e-06, "loss": 1.2489, "step": 1778 }, { "epoch": 0.2518581439796135, "grad_norm": 10.13336401315696, "learning_rate": 4.880319198553813e-06, "loss": 1.4695, "step": 1779 }, { "epoch": 0.2519997168542507, "grad_norm": 8.947678965089477, "learning_rate": 4.880143908488296e-06, "loss": 1.2819, "step": 1780 }, { "epoch": 0.25214128972888794, "grad_norm": 9.015849674274804, "learning_rate": 4.87996849330039e-06, "loss": 1.3514, "step": 1781 }, { "epoch": 0.25228286260352517, "grad_norm": 10.088924612727988, "learning_rate": 4.8797929529993135e-06, "loss": 1.5156, "step": 1782 }, { "epoch": 0.2524244354781624, "grad_norm": 8.865327103846937, "learning_rate": 4.8796172875942965e-06, "loss": 1.2843, "step": 1783 }, { "epoch": 0.2525660083527996, "grad_norm": 8.957168810953055, "learning_rate": 4.879441497094572e-06, "loss": 1.3637, "step": 1784 }, { "epoch": 0.2527075812274368, "grad_norm": 13.102375403373905, "learning_rate": 4.879265581509384e-06, "loss": 1.4375, "step": 1785 }, { "epoch": 0.25284915410207404, "grad_norm": 11.229706835567569, "learning_rate": 4.8790895408479776e-06, "loss": 1.3752, "step": 1786 }, { "epoch": 0.25299072697671127, "grad_norm": 10.632626298539535, "learning_rate": 4.878913375119608e-06, "loss": 1.4521, "step": 1787 }, { "epoch": 0.2531322998513485, "grad_norm": 8.629137014512771, "learning_rate": 4.878737084333536e-06, "loss": 1.3273, "step": 1788 }, { "epoch": 0.2532738727259857, "grad_norm": 9.45304630971897, "learning_rate": 4.878560668499029e-06, "loss": 1.3267, "step": 1789 }, { "epoch": 0.2534154456006229, "grad_norm": 10.458959755748335, "learning_rate": 4.8783841276253605e-06, "loss": 1.358, "step": 1790 }, { "epoch": 0.25355701847526013, "grad_norm": 11.156984791212528, "learning_rate": 4.8782074617218135e-06, "loss": 1.282, "step": 1791 }, { "epoch": 0.25369859134989736, "grad_norm": 9.316918880026389, "learning_rate": 4.878030670797672e-06, "loss": 1.3982, "step": 1792 }, { "epoch": 0.2538401642245346, "grad_norm": 8.758544673481051, "learning_rate": 4.877853754862232e-06, "loss": 1.3952, "step": 1793 }, { "epoch": 0.2539817370991718, "grad_norm": 10.387056629987645, "learning_rate": 4.8776767139247936e-06, "loss": 1.5105, "step": 1794 }, { "epoch": 0.254123309973809, "grad_norm": 9.84767910486737, "learning_rate": 4.877499547994662e-06, "loss": 1.5199, "step": 1795 }, { "epoch": 0.2542648828484462, "grad_norm": 10.686633337027995, "learning_rate": 4.877322257081153e-06, "loss": 1.3842, "step": 1796 }, { "epoch": 0.25440645572308346, "grad_norm": 9.648616996053033, "learning_rate": 4.877144841193585e-06, "loss": 1.3408, "step": 1797 }, { "epoch": 0.2545480285977207, "grad_norm": 9.353597803525506, "learning_rate": 4.876967300341285e-06, "loss": 1.5396, "step": 1798 }, { "epoch": 0.2546896014723579, "grad_norm": 10.98662239189239, "learning_rate": 4.876789634533587e-06, "loss": 1.4615, "step": 1799 }, { "epoch": 0.2548311743469951, "grad_norm": 8.387047599951227, "learning_rate": 4.876611843779829e-06, "loss": 1.5166, "step": 1800 }, { "epoch": 0.2549727472216323, "grad_norm": 7.872352518376193, "learning_rate": 4.876433928089359e-06, "loss": 1.3683, "step": 1801 }, { "epoch": 0.25511432009626955, "grad_norm": 9.831721631745918, "learning_rate": 4.87625588747153e-06, "loss": 1.3574, "step": 1802 }, { "epoch": 0.2552558929709068, "grad_norm": 8.218928694142232, "learning_rate": 4.8760777219357e-06, "loss": 1.275, "step": 1803 }, { "epoch": 0.255397465845544, "grad_norm": 11.27077312535665, "learning_rate": 4.875899431491236e-06, "loss": 1.5835, "step": 1804 }, { "epoch": 0.2555390387201812, "grad_norm": 9.42315756195873, "learning_rate": 4.875721016147511e-06, "loss": 1.4386, "step": 1805 }, { "epoch": 0.2556806115948184, "grad_norm": 8.615944794374384, "learning_rate": 4.875542475913902e-06, "loss": 1.3392, "step": 1806 }, { "epoch": 0.25582218446945565, "grad_norm": 9.641346711235418, "learning_rate": 4.875363810799798e-06, "loss": 1.3076, "step": 1807 }, { "epoch": 0.2559637573440929, "grad_norm": 10.790150028914537, "learning_rate": 4.87518502081459e-06, "loss": 1.4063, "step": 1808 }, { "epoch": 0.2561053302187301, "grad_norm": 8.801133637668812, "learning_rate": 4.875006105967675e-06, "loss": 1.353, "step": 1809 }, { "epoch": 0.2562469030933673, "grad_norm": 9.257405796050744, "learning_rate": 4.87482706626846e-06, "loss": 1.4443, "step": 1810 }, { "epoch": 0.2563884759680045, "grad_norm": 10.689816229501226, "learning_rate": 4.874647901726358e-06, "loss": 1.3274, "step": 1811 }, { "epoch": 0.25653004884264174, "grad_norm": 10.14103540374742, "learning_rate": 4.874468612350786e-06, "loss": 1.3938, "step": 1812 }, { "epoch": 0.256671621717279, "grad_norm": 10.808067162814007, "learning_rate": 4.874289198151168e-06, "loss": 1.3144, "step": 1813 }, { "epoch": 0.2568131945919162, "grad_norm": 10.335521179257242, "learning_rate": 4.87410965913694e-06, "loss": 1.3895, "step": 1814 }, { "epoch": 0.25695476746655344, "grad_norm": 12.318419297113254, "learning_rate": 4.873929995317535e-06, "loss": 1.3761, "step": 1815 }, { "epoch": 0.2570963403411906, "grad_norm": 8.865177790563525, "learning_rate": 4.873750206702401e-06, "loss": 1.357, "step": 1816 }, { "epoch": 0.25723791321582784, "grad_norm": 9.750063773704692, "learning_rate": 4.873570293300989e-06, "loss": 1.5309, "step": 1817 }, { "epoch": 0.25737948609046507, "grad_norm": 10.433660292047263, "learning_rate": 4.873390255122756e-06, "loss": 1.2496, "step": 1818 }, { "epoch": 0.2575210589651023, "grad_norm": 10.087053563710661, "learning_rate": 4.873210092177167e-06, "loss": 1.3889, "step": 1819 }, { "epoch": 0.25766263183973953, "grad_norm": 8.385001061291021, "learning_rate": 4.873029804473694e-06, "loss": 1.3556, "step": 1820 }, { "epoch": 0.2578042047143767, "grad_norm": 8.861289596221617, "learning_rate": 4.8728493920218126e-06, "loss": 1.3844, "step": 1821 }, { "epoch": 0.25794577758901394, "grad_norm": 10.213150029873978, "learning_rate": 4.872668854831008e-06, "loss": 1.2854, "step": 1822 }, { "epoch": 0.25808735046365117, "grad_norm": 10.120587929998342, "learning_rate": 4.87248819291077e-06, "loss": 1.3369, "step": 1823 }, { "epoch": 0.2582289233382884, "grad_norm": 9.8152458090956, "learning_rate": 4.872307406270598e-06, "loss": 1.2661, "step": 1824 }, { "epoch": 0.2583704962129256, "grad_norm": 10.365514992609471, "learning_rate": 4.872126494919994e-06, "loss": 1.4486, "step": 1825 }, { "epoch": 0.2585120690875628, "grad_norm": 9.776313291558687, "learning_rate": 4.871945458868469e-06, "loss": 1.3378, "step": 1826 }, { "epoch": 0.25865364196220003, "grad_norm": 8.63784143681163, "learning_rate": 4.87176429812554e-06, "loss": 1.4257, "step": 1827 }, { "epoch": 0.25879521483683726, "grad_norm": 10.775967130162009, "learning_rate": 4.87158301270073e-06, "loss": 1.354, "step": 1828 }, { "epoch": 0.2589367877114745, "grad_norm": 12.254277319453413, "learning_rate": 4.87140160260357e-06, "loss": 1.4764, "step": 1829 }, { "epoch": 0.2590783605861117, "grad_norm": 8.861479009832344, "learning_rate": 4.871220067843595e-06, "loss": 1.3488, "step": 1830 }, { "epoch": 0.2592199334607489, "grad_norm": 11.697663972230805, "learning_rate": 4.8710384084303495e-06, "loss": 1.4801, "step": 1831 }, { "epoch": 0.2593615063353861, "grad_norm": 11.13085474836498, "learning_rate": 4.870856624373383e-06, "loss": 1.3689, "step": 1832 }, { "epoch": 0.25950307921002336, "grad_norm": 10.306524591622281, "learning_rate": 4.870674715682252e-06, "loss": 1.5555, "step": 1833 }, { "epoch": 0.2596446520846606, "grad_norm": 11.985992521424032, "learning_rate": 4.870492682366518e-06, "loss": 1.3943, "step": 1834 }, { "epoch": 0.2597862249592978, "grad_norm": 9.614215704659568, "learning_rate": 4.8703105244357504e-06, "loss": 1.2294, "step": 1835 }, { "epoch": 0.259927797833935, "grad_norm": 9.682165633775497, "learning_rate": 4.870128241899527e-06, "loss": 1.4515, "step": 1836 }, { "epoch": 0.2600693707085722, "grad_norm": 12.579544271635251, "learning_rate": 4.86994583476743e-06, "loss": 1.6148, "step": 1837 }, { "epoch": 0.26021094358320945, "grad_norm": 9.445803595587103, "learning_rate": 4.8697633030490465e-06, "loss": 1.2611, "step": 1838 }, { "epoch": 0.2603525164578467, "grad_norm": 12.742091699517244, "learning_rate": 4.869580646753973e-06, "loss": 1.4125, "step": 1839 }, { "epoch": 0.2604940893324839, "grad_norm": 15.499562903363195, "learning_rate": 4.869397865891812e-06, "loss": 1.6298, "step": 1840 }, { "epoch": 0.2606356622071211, "grad_norm": 9.573827877960644, "learning_rate": 4.869214960472172e-06, "loss": 1.3679, "step": 1841 }, { "epoch": 0.2607772350817583, "grad_norm": 9.681040720835574, "learning_rate": 4.869031930504668e-06, "loss": 1.4703, "step": 1842 }, { "epoch": 0.26091880795639555, "grad_norm": 10.250644198387416, "learning_rate": 4.8688487759989215e-06, "loss": 1.3452, "step": 1843 }, { "epoch": 0.2610603808310328, "grad_norm": 25.619043467381715, "learning_rate": 4.868665496964562e-06, "loss": 1.4307, "step": 1844 }, { "epoch": 0.26120195370567, "grad_norm": 11.75217628615813, "learning_rate": 4.868482093411223e-06, "loss": 1.3149, "step": 1845 }, { "epoch": 0.26134352658030724, "grad_norm": 9.785206929758978, "learning_rate": 4.868298565348546e-06, "loss": 1.3673, "step": 1846 }, { "epoch": 0.2614850994549444, "grad_norm": 9.272386833646094, "learning_rate": 4.8681149127861795e-06, "loss": 1.4277, "step": 1847 }, { "epoch": 0.26162667232958164, "grad_norm": 8.482483261276265, "learning_rate": 4.8679311357337774e-06, "loss": 1.2528, "step": 1848 }, { "epoch": 0.2617682452042189, "grad_norm": 11.661307784428129, "learning_rate": 4.867747234201003e-06, "loss": 1.4143, "step": 1849 }, { "epoch": 0.2619098180788561, "grad_norm": 11.414583856162913, "learning_rate": 4.86756320819752e-06, "loss": 1.4213, "step": 1850 }, { "epoch": 0.26205139095349334, "grad_norm": 9.379412617135493, "learning_rate": 4.867379057733005e-06, "loss": 1.3199, "step": 1851 }, { "epoch": 0.2621929638281305, "grad_norm": 10.210630404955493, "learning_rate": 4.867194782817138e-06, "loss": 1.3957, "step": 1852 }, { "epoch": 0.26233453670276774, "grad_norm": 11.11131902712099, "learning_rate": 4.867010383459606e-06, "loss": 1.3366, "step": 1853 }, { "epoch": 0.26247610957740497, "grad_norm": 9.702525834459776, "learning_rate": 4.8668258596701035e-06, "loss": 1.3718, "step": 1854 }, { "epoch": 0.2626176824520422, "grad_norm": 10.184146258865917, "learning_rate": 4.86664121145833e-06, "loss": 1.3626, "step": 1855 }, { "epoch": 0.26275925532667943, "grad_norm": 9.450047923778945, "learning_rate": 4.866456438833993e-06, "loss": 1.2997, "step": 1856 }, { "epoch": 0.2629008282013166, "grad_norm": 9.335849649822025, "learning_rate": 4.866271541806806e-06, "loss": 1.4811, "step": 1857 }, { "epoch": 0.26304240107595384, "grad_norm": 13.515806763589923, "learning_rate": 4.8660865203864885e-06, "loss": 1.4664, "step": 1858 }, { "epoch": 0.26318397395059107, "grad_norm": 9.684551750212245, "learning_rate": 4.865901374582766e-06, "loss": 1.2608, "step": 1859 }, { "epoch": 0.2633255468252283, "grad_norm": 9.947015393291347, "learning_rate": 4.865716104405373e-06, "loss": 1.3728, "step": 1860 }, { "epoch": 0.2634671196998655, "grad_norm": 10.768844056784179, "learning_rate": 4.865530709864048e-06, "loss": 1.3748, "step": 1861 }, { "epoch": 0.2636086925745027, "grad_norm": 9.376886610300959, "learning_rate": 4.865345190968537e-06, "loss": 1.2756, "step": 1862 }, { "epoch": 0.26375026544913993, "grad_norm": 10.0538569702392, "learning_rate": 4.865159547728593e-06, "loss": 1.2088, "step": 1863 }, { "epoch": 0.26389183832377716, "grad_norm": 10.352365092706851, "learning_rate": 4.8649737801539755e-06, "loss": 1.2887, "step": 1864 }, { "epoch": 0.2640334111984144, "grad_norm": 10.536725802506536, "learning_rate": 4.86478788825445e-06, "loss": 1.392, "step": 1865 }, { "epoch": 0.2641749840730516, "grad_norm": 11.943269144436346, "learning_rate": 4.864601872039788e-06, "loss": 1.505, "step": 1866 }, { "epoch": 0.2643165569476888, "grad_norm": 9.6190317649297, "learning_rate": 4.864415731519769e-06, "loss": 1.5084, "step": 1867 }, { "epoch": 0.264458129822326, "grad_norm": 11.585192048675218, "learning_rate": 4.864229466704178e-06, "loss": 1.433, "step": 1868 }, { "epoch": 0.26459970269696326, "grad_norm": 10.509837673686787, "learning_rate": 4.864043077602807e-06, "loss": 1.4894, "step": 1869 }, { "epoch": 0.2647412755716005, "grad_norm": 11.645941791245486, "learning_rate": 4.863856564225453e-06, "loss": 1.357, "step": 1870 }, { "epoch": 0.2648828484462377, "grad_norm": 11.743442085131425, "learning_rate": 4.863669926581924e-06, "loss": 1.4374, "step": 1871 }, { "epoch": 0.26502442132087495, "grad_norm": 10.688018964693525, "learning_rate": 4.863483164682027e-06, "loss": 1.3877, "step": 1872 }, { "epoch": 0.2651659941955121, "grad_norm": 11.02342884866333, "learning_rate": 4.863296278535584e-06, "loss": 1.3793, "step": 1873 }, { "epoch": 0.26530756707014935, "grad_norm": 12.766841760402462, "learning_rate": 4.863109268152417e-06, "loss": 1.1848, "step": 1874 }, { "epoch": 0.2654491399447866, "grad_norm": 9.137272355235451, "learning_rate": 4.862922133542358e-06, "loss": 1.5362, "step": 1875 }, { "epoch": 0.2655907128194238, "grad_norm": 9.547542667298572, "learning_rate": 4.862734874715245e-06, "loss": 1.42, "step": 1876 }, { "epoch": 0.26573228569406104, "grad_norm": 10.379498345338224, "learning_rate": 4.8625474916809205e-06, "loss": 1.4147, "step": 1877 }, { "epoch": 0.2658738585686982, "grad_norm": 12.462324815014327, "learning_rate": 4.862359984449236e-06, "loss": 1.462, "step": 1878 }, { "epoch": 0.26601543144333545, "grad_norm": 9.485630210004869, "learning_rate": 4.862172353030049e-06, "loss": 1.3242, "step": 1879 }, { "epoch": 0.2661570043179727, "grad_norm": 12.425897294923388, "learning_rate": 4.861984597433223e-06, "loss": 1.4224, "step": 1880 }, { "epoch": 0.2662985771926099, "grad_norm": 10.464057075461488, "learning_rate": 4.861796717668626e-06, "loss": 1.4043, "step": 1881 }, { "epoch": 0.26644015006724714, "grad_norm": 11.016549232614944, "learning_rate": 4.8616087137461385e-06, "loss": 1.5395, "step": 1882 }, { "epoch": 0.2665817229418843, "grad_norm": 9.690211156664347, "learning_rate": 4.861420585675641e-06, "loss": 1.3816, "step": 1883 }, { "epoch": 0.26672329581652154, "grad_norm": 14.342447325616572, "learning_rate": 4.861232333467024e-06, "loss": 1.3882, "step": 1884 }, { "epoch": 0.2668648686911588, "grad_norm": 9.293748653696523, "learning_rate": 4.8610439571301845e-06, "loss": 1.3119, "step": 1885 }, { "epoch": 0.267006441565796, "grad_norm": 11.992063440929966, "learning_rate": 4.860855456675024e-06, "loss": 1.4479, "step": 1886 }, { "epoch": 0.26714801444043323, "grad_norm": 9.826906938380253, "learning_rate": 4.860666832111453e-06, "loss": 1.4371, "step": 1887 }, { "epoch": 0.2672895873150704, "grad_norm": 9.154920878628351, "learning_rate": 4.860478083449387e-06, "loss": 1.4633, "step": 1888 }, { "epoch": 0.26743116018970764, "grad_norm": 9.752857669783292, "learning_rate": 4.8602892106987474e-06, "loss": 1.3054, "step": 1889 }, { "epoch": 0.26757273306434487, "grad_norm": 8.475932436261068, "learning_rate": 4.860100213869464e-06, "loss": 1.2999, "step": 1890 }, { "epoch": 0.2677143059389821, "grad_norm": 8.97104522365065, "learning_rate": 4.859911092971473e-06, "loss": 1.2309, "step": 1891 }, { "epoch": 0.26785587881361933, "grad_norm": 9.451696366736734, "learning_rate": 4.8597218480147145e-06, "loss": 1.2767, "step": 1892 }, { "epoch": 0.2679974516882565, "grad_norm": 8.014641714482527, "learning_rate": 4.859532479009138e-06, "loss": 1.1932, "step": 1893 }, { "epoch": 0.26813902456289374, "grad_norm": 11.629725654495386, "learning_rate": 4.859342985964699e-06, "loss": 1.3231, "step": 1894 }, { "epoch": 0.26828059743753097, "grad_norm": 9.769939281388176, "learning_rate": 4.8591533688913584e-06, "loss": 1.23, "step": 1895 }, { "epoch": 0.2684221703121682, "grad_norm": 10.904951220459699, "learning_rate": 4.858963627799084e-06, "loss": 1.592, "step": 1896 }, { "epoch": 0.2685637431868054, "grad_norm": 9.943557524481657, "learning_rate": 4.85877376269785e-06, "loss": 1.3084, "step": 1897 }, { "epoch": 0.2687053160614426, "grad_norm": 10.102923402624453, "learning_rate": 4.858583773597639e-06, "loss": 1.3706, "step": 1898 }, { "epoch": 0.26884688893607983, "grad_norm": 10.146896985414664, "learning_rate": 4.858393660508437e-06, "loss": 1.2742, "step": 1899 }, { "epoch": 0.26898846181071706, "grad_norm": 9.431736683153934, "learning_rate": 4.85820342344024e-06, "loss": 1.2557, "step": 1900 }, { "epoch": 0.2691300346853543, "grad_norm": 10.70029009844677, "learning_rate": 4.8580130624030454e-06, "loss": 1.3216, "step": 1901 }, { "epoch": 0.2692716075599915, "grad_norm": 10.80380340291094, "learning_rate": 4.857822577406864e-06, "loss": 1.5813, "step": 1902 }, { "epoch": 0.26941318043462875, "grad_norm": 9.866532570554881, "learning_rate": 4.8576319684617064e-06, "loss": 1.4123, "step": 1903 }, { "epoch": 0.2695547533092659, "grad_norm": 10.394361698870842, "learning_rate": 4.857441235577596e-06, "loss": 1.1787, "step": 1904 }, { "epoch": 0.26969632618390316, "grad_norm": 11.198761966944613, "learning_rate": 4.857250378764556e-06, "loss": 1.3047, "step": 1905 }, { "epoch": 0.2698378990585404, "grad_norm": 10.737684070221778, "learning_rate": 4.857059398032622e-06, "loss": 1.3463, "step": 1906 }, { "epoch": 0.2699794719331776, "grad_norm": 10.136406019381134, "learning_rate": 4.8568682933918325e-06, "loss": 1.4447, "step": 1907 }, { "epoch": 0.27012104480781485, "grad_norm": 9.349867027275542, "learning_rate": 4.856677064852234e-06, "loss": 1.199, "step": 1908 }, { "epoch": 0.270262617682452, "grad_norm": 11.251256914919654, "learning_rate": 4.85648571242388e-06, "loss": 1.2627, "step": 1909 }, { "epoch": 0.27040419055708925, "grad_norm": 10.620895131349018, "learning_rate": 4.856294236116829e-06, "loss": 1.5058, "step": 1910 }, { "epoch": 0.2705457634317265, "grad_norm": 7.975654752155834, "learning_rate": 4.856102635941147e-06, "loss": 1.2701, "step": 1911 }, { "epoch": 0.2706873363063637, "grad_norm": 10.944255028993451, "learning_rate": 4.855910911906906e-06, "loss": 1.437, "step": 1912 }, { "epoch": 0.27082890918100094, "grad_norm": 10.130582047787438, "learning_rate": 4.855719064024185e-06, "loss": 1.3837, "step": 1913 }, { "epoch": 0.2709704820556381, "grad_norm": 9.609745769015806, "learning_rate": 4.855527092303069e-06, "loss": 1.3622, "step": 1914 }, { "epoch": 0.27111205493027535, "grad_norm": 9.2090608013133, "learning_rate": 4.855334996753651e-06, "loss": 1.2581, "step": 1915 }, { "epoch": 0.2712536278049126, "grad_norm": 9.10017047764315, "learning_rate": 4.8551427773860284e-06, "loss": 1.3496, "step": 1916 }, { "epoch": 0.2713952006795498, "grad_norm": 7.9957819786099025, "learning_rate": 4.854950434210305e-06, "loss": 1.2388, "step": 1917 }, { "epoch": 0.27153677355418704, "grad_norm": 8.005434574528154, "learning_rate": 4.854757967236594e-06, "loss": 1.2465, "step": 1918 }, { "epoch": 0.2716783464288242, "grad_norm": 9.568635057074166, "learning_rate": 4.8545653764750125e-06, "loss": 1.3637, "step": 1919 }, { "epoch": 0.27181991930346144, "grad_norm": 10.909140829492271, "learning_rate": 4.8543726619356846e-06, "loss": 1.4389, "step": 1920 }, { "epoch": 0.2719614921780987, "grad_norm": 10.665906004804295, "learning_rate": 4.854179823628741e-06, "loss": 1.3744, "step": 1921 }, { "epoch": 0.2721030650527359, "grad_norm": 8.95579906960276, "learning_rate": 4.85398686156432e-06, "loss": 1.2805, "step": 1922 }, { "epoch": 0.27224463792737313, "grad_norm": 10.50865361700942, "learning_rate": 4.853793775752564e-06, "loss": 1.2663, "step": 1923 }, { "epoch": 0.2723862108020103, "grad_norm": 10.097436379376049, "learning_rate": 4.853600566203625e-06, "loss": 1.3178, "step": 1924 }, { "epoch": 0.27252778367664754, "grad_norm": 8.67743962088031, "learning_rate": 4.8534072329276594e-06, "loss": 1.2158, "step": 1925 }, { "epoch": 0.27266935655128477, "grad_norm": 8.556544996683733, "learning_rate": 4.85321377593483e-06, "loss": 1.3911, "step": 1926 }, { "epoch": 0.272810929425922, "grad_norm": 9.698226798097874, "learning_rate": 4.853020195235307e-06, "loss": 1.4272, "step": 1927 }, { "epoch": 0.27295250230055923, "grad_norm": 10.302246915304751, "learning_rate": 4.852826490839266e-06, "loss": 1.2483, "step": 1928 }, { "epoch": 0.2730940751751964, "grad_norm": 11.675696302802391, "learning_rate": 4.852632662756892e-06, "loss": 1.3846, "step": 1929 }, { "epoch": 0.27323564804983363, "grad_norm": 7.2144516867790545, "learning_rate": 4.852438710998373e-06, "loss": 1.246, "step": 1930 }, { "epoch": 0.27337722092447087, "grad_norm": 9.600005753833319, "learning_rate": 4.852244635573905e-06, "loss": 1.5645, "step": 1931 }, { "epoch": 0.2735187937991081, "grad_norm": 8.484666345135867, "learning_rate": 4.85205043649369e-06, "loss": 1.219, "step": 1932 }, { "epoch": 0.2736603666737453, "grad_norm": 9.689184817578784, "learning_rate": 4.851856113767937e-06, "loss": 1.3263, "step": 1933 }, { "epoch": 0.27380193954838256, "grad_norm": 9.307810991795245, "learning_rate": 4.851661667406862e-06, "loss": 1.4133, "step": 1934 }, { "epoch": 0.27394351242301973, "grad_norm": 9.754623490599695, "learning_rate": 4.851467097420687e-06, "loss": 1.4888, "step": 1935 }, { "epoch": 0.27408508529765696, "grad_norm": 8.180806692773718, "learning_rate": 4.8512724038196395e-06, "loss": 1.3118, "step": 1936 }, { "epoch": 0.2742266581722942, "grad_norm": 12.632332362744878, "learning_rate": 4.8510775866139556e-06, "loss": 1.4125, "step": 1937 }, { "epoch": 0.2743682310469314, "grad_norm": 9.28282938148082, "learning_rate": 4.850882645813875e-06, "loss": 1.3902, "step": 1938 }, { "epoch": 0.27450980392156865, "grad_norm": 10.114202322627666, "learning_rate": 4.850687581429647e-06, "loss": 1.4037, "step": 1939 }, { "epoch": 0.2746513767962058, "grad_norm": 9.808811150393407, "learning_rate": 4.8504923934715265e-06, "loss": 1.427, "step": 1940 }, { "epoch": 0.27479294967084306, "grad_norm": 9.491298505181108, "learning_rate": 4.850297081949773e-06, "loss": 1.5069, "step": 1941 }, { "epoch": 0.2749345225454803, "grad_norm": 9.977631920095577, "learning_rate": 4.850101646874654e-06, "loss": 1.3893, "step": 1942 }, { "epoch": 0.2750760954201175, "grad_norm": 10.46994549202416, "learning_rate": 4.8499060882564435e-06, "loss": 1.3556, "step": 1943 }, { "epoch": 0.27521766829475475, "grad_norm": 9.725718854602446, "learning_rate": 4.849710406105422e-06, "loss": 1.4852, "step": 1944 }, { "epoch": 0.2753592411693919, "grad_norm": 10.098251614460064, "learning_rate": 4.849514600431877e-06, "loss": 1.3532, "step": 1945 }, { "epoch": 0.27550081404402915, "grad_norm": 8.879811017199177, "learning_rate": 4.849318671246101e-06, "loss": 1.2974, "step": 1946 }, { "epoch": 0.2756423869186664, "grad_norm": 9.693837314116562, "learning_rate": 4.849122618558395e-06, "loss": 1.4952, "step": 1947 }, { "epoch": 0.2757839597933036, "grad_norm": 11.265295523853162, "learning_rate": 4.848926442379064e-06, "loss": 1.4421, "step": 1948 }, { "epoch": 0.27592553266794084, "grad_norm": 10.757786968923952, "learning_rate": 4.8487301427184204e-06, "loss": 1.2949, "step": 1949 }, { "epoch": 0.276067105542578, "grad_norm": 8.529530110805215, "learning_rate": 4.848533719586787e-06, "loss": 1.264, "step": 1950 }, { "epoch": 0.27620867841721525, "grad_norm": 10.537805707123933, "learning_rate": 4.848337172994485e-06, "loss": 1.4141, "step": 1951 }, { "epoch": 0.2763502512918525, "grad_norm": 9.552210638819629, "learning_rate": 4.848140502951849e-06, "loss": 1.2039, "step": 1952 }, { "epoch": 0.2764918241664897, "grad_norm": 10.133326368162205, "learning_rate": 4.847943709469218e-06, "loss": 1.4797, "step": 1953 }, { "epoch": 0.27663339704112694, "grad_norm": 11.02267027306589, "learning_rate": 4.8477467925569365e-06, "loss": 1.0908, "step": 1954 }, { "epoch": 0.2767749699157641, "grad_norm": 9.920407356389113, "learning_rate": 4.847549752225356e-06, "loss": 1.2937, "step": 1955 }, { "epoch": 0.27691654279040134, "grad_norm": 11.381119978621324, "learning_rate": 4.847352588484837e-06, "loss": 1.316, "step": 1956 }, { "epoch": 0.2770581156650386, "grad_norm": 10.807182283269242, "learning_rate": 4.847155301345743e-06, "loss": 1.4705, "step": 1957 }, { "epoch": 0.2771996885396758, "grad_norm": 10.051293619130263, "learning_rate": 4.846957890818444e-06, "loss": 1.4419, "step": 1958 }, { "epoch": 0.27734126141431303, "grad_norm": 10.352681985456337, "learning_rate": 4.846760356913318e-06, "loss": 1.3314, "step": 1959 }, { "epoch": 0.27748283428895026, "grad_norm": 9.487264026627356, "learning_rate": 4.846562699640751e-06, "loss": 1.124, "step": 1960 }, { "epoch": 0.27762440716358744, "grad_norm": 10.627881904655172, "learning_rate": 4.846364919011132e-06, "loss": 1.5308, "step": 1961 }, { "epoch": 0.27776598003822467, "grad_norm": 8.04871461450838, "learning_rate": 4.8461670150348585e-06, "loss": 1.3599, "step": 1962 }, { "epoch": 0.2779075529128619, "grad_norm": 10.816390026847712, "learning_rate": 4.8459689877223346e-06, "loss": 1.3575, "step": 1963 }, { "epoch": 0.27804912578749913, "grad_norm": 11.074341034928903, "learning_rate": 4.845770837083971e-06, "loss": 1.2996, "step": 1964 }, { "epoch": 0.27819069866213636, "grad_norm": 10.519679201985928, "learning_rate": 4.845572563130182e-06, "loss": 1.3343, "step": 1965 }, { "epoch": 0.27833227153677353, "grad_norm": 8.789659701932791, "learning_rate": 4.845374165871394e-06, "loss": 1.2693, "step": 1966 }, { "epoch": 0.27847384441141076, "grad_norm": 8.067910914446038, "learning_rate": 4.845175645318034e-06, "loss": 1.3378, "step": 1967 }, { "epoch": 0.278615417286048, "grad_norm": 7.679658689862033, "learning_rate": 4.844977001480539e-06, "loss": 1.3687, "step": 1968 }, { "epoch": 0.2787569901606852, "grad_norm": 8.344708676930072, "learning_rate": 4.8447782343693515e-06, "loss": 1.188, "step": 1969 }, { "epoch": 0.27889856303532246, "grad_norm": 12.059576915702399, "learning_rate": 4.844579343994921e-06, "loss": 1.3455, "step": 1970 }, { "epoch": 0.27904013590995963, "grad_norm": 11.259797301830984, "learning_rate": 4.844380330367701e-06, "loss": 1.4203, "step": 1971 }, { "epoch": 0.27918170878459686, "grad_norm": 8.331168185931979, "learning_rate": 4.844181193498157e-06, "loss": 1.3567, "step": 1972 }, { "epoch": 0.2793232816592341, "grad_norm": 8.81365569632706, "learning_rate": 4.843981933396755e-06, "loss": 1.5111, "step": 1973 }, { "epoch": 0.2794648545338713, "grad_norm": 10.072144619146606, "learning_rate": 4.84378255007397e-06, "loss": 1.2982, "step": 1974 }, { "epoch": 0.27960642740850855, "grad_norm": 11.847540336970066, "learning_rate": 4.843583043540284e-06, "loss": 1.3606, "step": 1975 }, { "epoch": 0.2797480002831457, "grad_norm": 10.01517327252559, "learning_rate": 4.8433834138061856e-06, "loss": 1.3367, "step": 1976 }, { "epoch": 0.27988957315778296, "grad_norm": 13.726369075899587, "learning_rate": 4.843183660882168e-06, "loss": 1.4496, "step": 1977 }, { "epoch": 0.2800311460324202, "grad_norm": 9.386937386914827, "learning_rate": 4.842983784778732e-06, "loss": 1.3173, "step": 1978 }, { "epoch": 0.2801727189070574, "grad_norm": 7.588107803359174, "learning_rate": 4.842783785506386e-06, "loss": 1.3139, "step": 1979 }, { "epoch": 0.28031429178169465, "grad_norm": 9.688932054561542, "learning_rate": 4.842583663075643e-06, "loss": 1.4811, "step": 1980 }, { "epoch": 0.2804558646563318, "grad_norm": 8.887345015707597, "learning_rate": 4.842383417497024e-06, "loss": 1.3017, "step": 1981 }, { "epoch": 0.28059743753096905, "grad_norm": 8.33532380809767, "learning_rate": 4.842183048781055e-06, "loss": 1.2095, "step": 1982 }, { "epoch": 0.2807390104056063, "grad_norm": 10.630173085796129, "learning_rate": 4.84198255693827e-06, "loss": 1.4592, "step": 1983 }, { "epoch": 0.2808805832802435, "grad_norm": 9.672621591868083, "learning_rate": 4.841781941979207e-06, "loss": 1.2442, "step": 1984 }, { "epoch": 0.28102215615488074, "grad_norm": 8.915264506628198, "learning_rate": 4.8415812039144145e-06, "loss": 1.2733, "step": 1985 }, { "epoch": 0.2811637290295179, "grad_norm": 8.765121907911315, "learning_rate": 4.841380342754444e-06, "loss": 1.3803, "step": 1986 }, { "epoch": 0.28130530190415515, "grad_norm": 9.515717203730215, "learning_rate": 4.841179358509854e-06, "loss": 1.513, "step": 1987 }, { "epoch": 0.2814468747787924, "grad_norm": 9.364828873846323, "learning_rate": 4.840978251191212e-06, "loss": 1.3883, "step": 1988 }, { "epoch": 0.2815884476534296, "grad_norm": 14.787267641513688, "learning_rate": 4.840777020809087e-06, "loss": 1.3672, "step": 1989 }, { "epoch": 0.28173002052806684, "grad_norm": 9.983094612370994, "learning_rate": 4.8405756673740606e-06, "loss": 1.2824, "step": 1990 }, { "epoch": 0.28187159340270407, "grad_norm": 10.514028714279199, "learning_rate": 4.840374190896716e-06, "loss": 1.3531, "step": 1991 }, { "epoch": 0.28201316627734124, "grad_norm": 9.283529188006591, "learning_rate": 4.840172591387646e-06, "loss": 1.3788, "step": 1992 }, { "epoch": 0.2821547391519785, "grad_norm": 11.377814646833341, "learning_rate": 4.839970868857447e-06, "loss": 1.4467, "step": 1993 }, { "epoch": 0.2822963120266157, "grad_norm": 9.835263423834206, "learning_rate": 4.839769023316725e-06, "loss": 1.4185, "step": 1994 }, { "epoch": 0.28243788490125293, "grad_norm": 11.399009099358839, "learning_rate": 4.83956705477609e-06, "loss": 1.3016, "step": 1995 }, { "epoch": 0.28257945777589016, "grad_norm": 10.303404710124003, "learning_rate": 4.839364963246159e-06, "loss": 1.4154, "step": 1996 }, { "epoch": 0.28272103065052734, "grad_norm": 10.894929360891176, "learning_rate": 4.839162748737556e-06, "loss": 1.5901, "step": 1997 }, { "epoch": 0.28286260352516457, "grad_norm": 14.749064270338845, "learning_rate": 4.838960411260911e-06, "loss": 1.5944, "step": 1998 }, { "epoch": 0.2830041763998018, "grad_norm": 8.72961448910649, "learning_rate": 4.838757950826862e-06, "loss": 1.3303, "step": 1999 }, { "epoch": 0.28314574927443903, "grad_norm": 9.252895417565165, "learning_rate": 4.838555367446052e-06, "loss": 1.4545, "step": 2000 }, { "epoch": 0.28328732214907626, "grad_norm": 12.666460604413691, "learning_rate": 4.838352661129129e-06, "loss": 1.5531, "step": 2001 }, { "epoch": 0.28342889502371343, "grad_norm": 11.027641592351262, "learning_rate": 4.838149831886751e-06, "loss": 1.2482, "step": 2002 }, { "epoch": 0.28357046789835066, "grad_norm": 8.643086363940329, "learning_rate": 4.8379468797295785e-06, "loss": 1.3245, "step": 2003 }, { "epoch": 0.2837120407729879, "grad_norm": 8.615468826511181, "learning_rate": 4.837743804668282e-06, "loss": 1.1585, "step": 2004 }, { "epoch": 0.2838536136476251, "grad_norm": 9.691767976799346, "learning_rate": 4.837540606713538e-06, "loss": 1.4313, "step": 2005 }, { "epoch": 0.28399518652226236, "grad_norm": 11.322768129967159, "learning_rate": 4.837337285876026e-06, "loss": 1.5276, "step": 2006 }, { "epoch": 0.28413675939689953, "grad_norm": 12.132190577870652, "learning_rate": 4.837133842166436e-06, "loss": 1.3579, "step": 2007 }, { "epoch": 0.28427833227153676, "grad_norm": 9.503406516223414, "learning_rate": 4.8369302755954625e-06, "loss": 1.3617, "step": 2008 }, { "epoch": 0.284419905146174, "grad_norm": 7.5562390348671995, "learning_rate": 4.836726586173807e-06, "loss": 1.2545, "step": 2009 }, { "epoch": 0.2845614780208112, "grad_norm": 11.79843868982579, "learning_rate": 4.836522773912178e-06, "loss": 1.5949, "step": 2010 }, { "epoch": 0.28470305089544845, "grad_norm": 14.08126135638135, "learning_rate": 4.836318838821288e-06, "loss": 1.4672, "step": 2011 }, { "epoch": 0.2848446237700856, "grad_norm": 11.859600160494319, "learning_rate": 4.836114780911859e-06, "loss": 1.5006, "step": 2012 }, { "epoch": 0.28498619664472286, "grad_norm": 7.772269463124775, "learning_rate": 4.835910600194618e-06, "loss": 1.2247, "step": 2013 }, { "epoch": 0.2851277695193601, "grad_norm": 7.605368685541653, "learning_rate": 4.835706296680298e-06, "loss": 1.2956, "step": 2014 }, { "epoch": 0.2852693423939973, "grad_norm": 9.696617118610071, "learning_rate": 4.83550187037964e-06, "loss": 1.3699, "step": 2015 }, { "epoch": 0.28541091526863455, "grad_norm": 9.873680316289425, "learning_rate": 4.8352973213033894e-06, "loss": 1.5184, "step": 2016 }, { "epoch": 0.2855524881432718, "grad_norm": 8.855312808783564, "learning_rate": 4.835092649462301e-06, "loss": 1.3545, "step": 2017 }, { "epoch": 0.28569406101790895, "grad_norm": 9.14654922869651, "learning_rate": 4.834887854867132e-06, "loss": 1.3961, "step": 2018 }, { "epoch": 0.2858356338925462, "grad_norm": 10.564496997101084, "learning_rate": 4.83468293752865e-06, "loss": 1.4352, "step": 2019 }, { "epoch": 0.2859772067671834, "grad_norm": 9.868649915751798, "learning_rate": 4.834477897457627e-06, "loss": 1.3999, "step": 2020 }, { "epoch": 0.28611877964182064, "grad_norm": 11.329772913543366, "learning_rate": 4.834272734664841e-06, "loss": 1.4467, "step": 2021 }, { "epoch": 0.28626035251645787, "grad_norm": 12.157721658388613, "learning_rate": 4.8340674491610786e-06, "loss": 1.3021, "step": 2022 }, { "epoch": 0.28640192539109505, "grad_norm": 11.861203179658489, "learning_rate": 4.83386204095713e-06, "loss": 1.4495, "step": 2023 }, { "epoch": 0.2865434982657323, "grad_norm": 8.633196046343983, "learning_rate": 4.833656510063794e-06, "loss": 1.2742, "step": 2024 }, { "epoch": 0.2866850711403695, "grad_norm": 10.730482259046829, "learning_rate": 4.833450856491875e-06, "loss": 1.3002, "step": 2025 }, { "epoch": 0.28682664401500674, "grad_norm": 10.899440968028008, "learning_rate": 4.833245080252186e-06, "loss": 1.51, "step": 2026 }, { "epoch": 0.28696821688964397, "grad_norm": 13.488952072082164, "learning_rate": 4.833039181355542e-06, "loss": 1.5339, "step": 2027 }, { "epoch": 0.28710978976428114, "grad_norm": 11.608685602638158, "learning_rate": 4.832833159812768e-06, "loss": 1.3757, "step": 2028 }, { "epoch": 0.2872513626389184, "grad_norm": 13.041299800567474, "learning_rate": 4.832627015634694e-06, "loss": 1.3655, "step": 2029 }, { "epoch": 0.2873929355135556, "grad_norm": 9.930386666474933, "learning_rate": 4.832420748832157e-06, "loss": 1.4826, "step": 2030 }, { "epoch": 0.28753450838819283, "grad_norm": 8.906193461991597, "learning_rate": 4.832214359416001e-06, "loss": 1.3304, "step": 2031 }, { "epoch": 0.28767608126283006, "grad_norm": 10.91497401347542, "learning_rate": 4.8320078473970745e-06, "loss": 1.3706, "step": 2032 }, { "epoch": 0.28781765413746724, "grad_norm": 10.19635486170525, "learning_rate": 4.831801212786234e-06, "loss": 1.2404, "step": 2033 }, { "epoch": 0.28795922701210447, "grad_norm": 11.555077061975966, "learning_rate": 4.831594455594343e-06, "loss": 1.3211, "step": 2034 }, { "epoch": 0.2881007998867417, "grad_norm": 8.767365580306892, "learning_rate": 4.8313875758322695e-06, "loss": 1.3108, "step": 2035 }, { "epoch": 0.28824237276137893, "grad_norm": 10.575707118424162, "learning_rate": 4.83118057351089e-06, "loss": 1.2787, "step": 2036 }, { "epoch": 0.28838394563601616, "grad_norm": 7.918289848031053, "learning_rate": 4.830973448641086e-06, "loss": 1.2351, "step": 2037 }, { "epoch": 0.28852551851065333, "grad_norm": 9.8764477404937, "learning_rate": 4.830766201233746e-06, "loss": 1.3567, "step": 2038 }, { "epoch": 0.28866709138529056, "grad_norm": 8.545454224488715, "learning_rate": 4.8305588312997635e-06, "loss": 1.4107, "step": 2039 }, { "epoch": 0.2888086642599278, "grad_norm": 9.403464275513539, "learning_rate": 4.8303513388500414e-06, "loss": 1.2542, "step": 2040 }, { "epoch": 0.288950237134565, "grad_norm": 9.766972172703436, "learning_rate": 4.8301437238954875e-06, "loss": 1.4099, "step": 2041 }, { "epoch": 0.28909181000920225, "grad_norm": 9.382850310212119, "learning_rate": 4.829935986447015e-06, "loss": 1.3965, "step": 2042 }, { "epoch": 0.28923338288383943, "grad_norm": 10.309873304208356, "learning_rate": 4.829728126515545e-06, "loss": 1.3775, "step": 2043 }, { "epoch": 0.28937495575847666, "grad_norm": 10.739647071765171, "learning_rate": 4.829520144112005e-06, "loss": 1.4254, "step": 2044 }, { "epoch": 0.2895165286331139, "grad_norm": 11.775895822064212, "learning_rate": 4.829312039247328e-06, "loss": 1.4168, "step": 2045 }, { "epoch": 0.2896581015077511, "grad_norm": 10.802443574044261, "learning_rate": 4.829103811932453e-06, "loss": 1.538, "step": 2046 }, { "epoch": 0.28979967438238835, "grad_norm": 11.099190938520973, "learning_rate": 4.828895462178329e-06, "loss": 1.4369, "step": 2047 }, { "epoch": 0.2899412472570256, "grad_norm": 11.018880766094274, "learning_rate": 4.828686989995905e-06, "loss": 1.4746, "step": 2048 }, { "epoch": 0.29008282013166276, "grad_norm": 11.373787951608913, "learning_rate": 4.828478395396143e-06, "loss": 1.4016, "step": 2049 }, { "epoch": 0.2902243930063, "grad_norm": 9.844088303338495, "learning_rate": 4.828269678390008e-06, "loss": 1.3384, "step": 2050 }, { "epoch": 0.2903659658809372, "grad_norm": 11.415093158036015, "learning_rate": 4.828060838988473e-06, "loss": 1.4478, "step": 2051 }, { "epoch": 0.29050753875557445, "grad_norm": 10.191616233104105, "learning_rate": 4.827851877202515e-06, "loss": 1.3685, "step": 2052 }, { "epoch": 0.2906491116302117, "grad_norm": 9.62846411256708, "learning_rate": 4.827642793043119e-06, "loss": 1.3796, "step": 2053 }, { "epoch": 0.29079068450484885, "grad_norm": 9.293404272712676, "learning_rate": 4.827433586521277e-06, "loss": 1.272, "step": 2054 }, { "epoch": 0.2909322573794861, "grad_norm": 10.509401018403713, "learning_rate": 4.827224257647987e-06, "loss": 1.3307, "step": 2055 }, { "epoch": 0.2910738302541233, "grad_norm": 8.479932095957537, "learning_rate": 4.827014806434254e-06, "loss": 1.1794, "step": 2056 }, { "epoch": 0.29121540312876054, "grad_norm": 9.930651338081262, "learning_rate": 4.826805232891087e-06, "loss": 1.3967, "step": 2057 }, { "epoch": 0.29135697600339777, "grad_norm": 11.034848239617377, "learning_rate": 4.826595537029503e-06, "loss": 1.2711, "step": 2058 }, { "epoch": 0.29149854887803495, "grad_norm": 10.979466781037157, "learning_rate": 4.826385718860527e-06, "loss": 1.4204, "step": 2059 }, { "epoch": 0.2916401217526722, "grad_norm": 8.986111415081487, "learning_rate": 4.826175778395188e-06, "loss": 1.348, "step": 2060 }, { "epoch": 0.2917816946273094, "grad_norm": 9.564909986599863, "learning_rate": 4.825965715644523e-06, "loss": 1.3141, "step": 2061 }, { "epoch": 0.29192326750194664, "grad_norm": 10.359869161243589, "learning_rate": 4.825755530619576e-06, "loss": 1.4402, "step": 2062 }, { "epoch": 0.29206484037658387, "grad_norm": 9.44027123594978, "learning_rate": 4.825545223331392e-06, "loss": 1.2864, "step": 2063 }, { "epoch": 0.29220641325122104, "grad_norm": 10.67094593835234, "learning_rate": 4.825334793791032e-06, "loss": 1.3271, "step": 2064 }, { "epoch": 0.2923479861258583, "grad_norm": 11.241704298163324, "learning_rate": 4.825124242009556e-06, "loss": 1.5296, "step": 2065 }, { "epoch": 0.2924895590004955, "grad_norm": 10.268329513772821, "learning_rate": 4.824913567998031e-06, "loss": 1.4393, "step": 2066 }, { "epoch": 0.29263113187513273, "grad_norm": 9.270317240433643, "learning_rate": 4.8247027717675335e-06, "loss": 1.4089, "step": 2067 }, { "epoch": 0.29277270474976996, "grad_norm": 11.519778947828641, "learning_rate": 4.8244918533291444e-06, "loss": 1.419, "step": 2068 }, { "epoch": 0.29291427762440714, "grad_norm": 9.027187397228857, "learning_rate": 4.824280812693952e-06, "loss": 1.3113, "step": 2069 }, { "epoch": 0.29305585049904437, "grad_norm": 8.875857029757482, "learning_rate": 4.824069649873051e-06, "loss": 1.3806, "step": 2070 }, { "epoch": 0.2931974233736816, "grad_norm": 9.06220029138033, "learning_rate": 4.82385836487754e-06, "loss": 1.4462, "step": 2071 }, { "epoch": 0.29333899624831883, "grad_norm": 9.218802552154237, "learning_rate": 4.823646957718529e-06, "loss": 1.2526, "step": 2072 }, { "epoch": 0.29348056912295606, "grad_norm": 10.128778576735844, "learning_rate": 4.823435428407129e-06, "loss": 1.4443, "step": 2073 }, { "epoch": 0.29362214199759323, "grad_norm": 11.148948695138746, "learning_rate": 4.823223776954462e-06, "loss": 1.4588, "step": 2074 }, { "epoch": 0.29376371487223046, "grad_norm": 13.59133924834034, "learning_rate": 4.8230120033716525e-06, "loss": 1.5776, "step": 2075 }, { "epoch": 0.2939052877468677, "grad_norm": 9.776923542083875, "learning_rate": 4.822800107669835e-06, "loss": 1.3683, "step": 2076 }, { "epoch": 0.2940468606215049, "grad_norm": 9.139804483739056, "learning_rate": 4.822588089860146e-06, "loss": 1.4074, "step": 2077 }, { "epoch": 0.29418843349614215, "grad_norm": 8.997344154986054, "learning_rate": 4.822375949953735e-06, "loss": 1.3273, "step": 2078 }, { "epoch": 0.2943300063707794, "grad_norm": 10.825133247524493, "learning_rate": 4.82216368796175e-06, "loss": 1.3079, "step": 2079 }, { "epoch": 0.29447157924541656, "grad_norm": 8.97234546221119, "learning_rate": 4.8219513038953534e-06, "loss": 1.4166, "step": 2080 }, { "epoch": 0.2946131521200538, "grad_norm": 8.189103995882034, "learning_rate": 4.821738797765707e-06, "loss": 1.2642, "step": 2081 }, { "epoch": 0.294754724994691, "grad_norm": 8.90490712532391, "learning_rate": 4.8215261695839825e-06, "loss": 1.3606, "step": 2082 }, { "epoch": 0.29489629786932825, "grad_norm": 10.550594323941135, "learning_rate": 4.821313419361359e-06, "loss": 1.2495, "step": 2083 }, { "epoch": 0.2950378707439655, "grad_norm": 8.704619958985637, "learning_rate": 4.82110054710902e-06, "loss": 1.2937, "step": 2084 }, { "epoch": 0.29517944361860265, "grad_norm": 10.505472482825432, "learning_rate": 4.820887552838156e-06, "loss": 1.3797, "step": 2085 }, { "epoch": 0.2953210164932399, "grad_norm": 8.57094096658788, "learning_rate": 4.820674436559964e-06, "loss": 1.5596, "step": 2086 }, { "epoch": 0.2954625893678771, "grad_norm": 12.362399381068032, "learning_rate": 4.8204611982856465e-06, "loss": 1.4066, "step": 2087 }, { "epoch": 0.29560416224251435, "grad_norm": 10.221129160872696, "learning_rate": 4.820247838026414e-06, "loss": 1.3604, "step": 2088 }, { "epoch": 0.2957457351171516, "grad_norm": 9.675624730642598, "learning_rate": 4.820034355793483e-06, "loss": 1.3795, "step": 2089 }, { "epoch": 0.29588730799178875, "grad_norm": 8.660483526602356, "learning_rate": 4.819820751598076e-06, "loss": 1.4066, "step": 2090 }, { "epoch": 0.296028880866426, "grad_norm": 8.859606650686182, "learning_rate": 4.819607025451422e-06, "loss": 1.3032, "step": 2091 }, { "epoch": 0.2961704537410632, "grad_norm": 9.576872751541536, "learning_rate": 4.819393177364756e-06, "loss": 1.3406, "step": 2092 }, { "epoch": 0.29631202661570044, "grad_norm": 10.99337777293214, "learning_rate": 4.81917920734932e-06, "loss": 1.4148, "step": 2093 }, { "epoch": 0.29645359949033767, "grad_norm": 11.93353109639854, "learning_rate": 4.818965115416362e-06, "loss": 1.3994, "step": 2094 }, { "epoch": 0.29659517236497485, "grad_norm": 9.7823189343279, "learning_rate": 4.818750901577137e-06, "loss": 1.3582, "step": 2095 }, { "epoch": 0.2967367452396121, "grad_norm": 9.221642819425533, "learning_rate": 4.818536565842907e-06, "loss": 1.3051, "step": 2096 }, { "epoch": 0.2968783181142493, "grad_norm": 8.290119531595701, "learning_rate": 4.8183221082249375e-06, "loss": 1.1853, "step": 2097 }, { "epoch": 0.29701989098888654, "grad_norm": 8.182777498821219, "learning_rate": 4.8181075287345045e-06, "loss": 1.2591, "step": 2098 }, { "epoch": 0.29716146386352377, "grad_norm": 10.577796468887414, "learning_rate": 4.817892827382886e-06, "loss": 1.5066, "step": 2099 }, { "epoch": 0.29730303673816094, "grad_norm": 11.463507428508805, "learning_rate": 4.81767800418137e-06, "loss": 1.3751, "step": 2100 }, { "epoch": 0.29744460961279817, "grad_norm": 9.603549587150138, "learning_rate": 4.8174630591412495e-06, "loss": 1.383, "step": 2101 }, { "epoch": 0.2975861824874354, "grad_norm": 10.575090297015864, "learning_rate": 4.817247992273824e-06, "loss": 1.3946, "step": 2102 }, { "epoch": 0.29772775536207263, "grad_norm": 8.71051883803338, "learning_rate": 4.8170328035904e-06, "loss": 1.3906, "step": 2103 }, { "epoch": 0.29786932823670986, "grad_norm": 10.461757226097943, "learning_rate": 4.816817493102289e-06, "loss": 1.326, "step": 2104 }, { "epoch": 0.2980109011113471, "grad_norm": 10.508427099242667, "learning_rate": 4.81660206082081e-06, "loss": 1.3709, "step": 2105 }, { "epoch": 0.29815247398598427, "grad_norm": 9.618032336216583, "learning_rate": 4.816386506757287e-06, "loss": 1.161, "step": 2106 }, { "epoch": 0.2982940468606215, "grad_norm": 9.327085810912275, "learning_rate": 4.816170830923053e-06, "loss": 1.21, "step": 2107 }, { "epoch": 0.29843561973525873, "grad_norm": 9.141068615704167, "learning_rate": 4.815955033329446e-06, "loss": 1.2455, "step": 2108 }, { "epoch": 0.29857719260989596, "grad_norm": 13.491622020576093, "learning_rate": 4.815739113987809e-06, "loss": 1.6216, "step": 2109 }, { "epoch": 0.2987187654845332, "grad_norm": 10.044947129258064, "learning_rate": 4.815523072909494e-06, "loss": 1.3361, "step": 2110 }, { "epoch": 0.29886033835917036, "grad_norm": 9.918100290827184, "learning_rate": 4.815306910105857e-06, "loss": 1.245, "step": 2111 }, { "epoch": 0.2990019112338076, "grad_norm": 9.750497267342277, "learning_rate": 4.815090625588263e-06, "loss": 1.317, "step": 2112 }, { "epoch": 0.2991434841084448, "grad_norm": 10.802003208453035, "learning_rate": 4.81487421936808e-06, "loss": 1.3706, "step": 2113 }, { "epoch": 0.29928505698308205, "grad_norm": 9.461971261131815, "learning_rate": 4.814657691456685e-06, "loss": 1.3726, "step": 2114 }, { "epoch": 0.2994266298577193, "grad_norm": 10.012700979665933, "learning_rate": 4.814441041865463e-06, "loss": 1.462, "step": 2115 }, { "epoch": 0.29956820273235646, "grad_norm": 8.594717186340898, "learning_rate": 4.814224270605799e-06, "loss": 1.3599, "step": 2116 }, { "epoch": 0.2997097756069937, "grad_norm": 9.51419372317741, "learning_rate": 4.814007377689093e-06, "loss": 1.4325, "step": 2117 }, { "epoch": 0.2998513484816309, "grad_norm": 9.524917449993948, "learning_rate": 4.813790363126743e-06, "loss": 1.2791, "step": 2118 }, { "epoch": 0.29999292135626815, "grad_norm": 8.204016413619698, "learning_rate": 4.813573226930158e-06, "loss": 1.34, "step": 2119 }, { "epoch": 0.3001344942309054, "grad_norm": 9.31606925818716, "learning_rate": 4.813355969110755e-06, "loss": 1.2854, "step": 2120 }, { "epoch": 0.30027606710554255, "grad_norm": 10.39322871806616, "learning_rate": 4.813138589679953e-06, "loss": 1.3706, "step": 2121 }, { "epoch": 0.3004176399801798, "grad_norm": 9.830934350438758, "learning_rate": 4.812921088649181e-06, "loss": 1.3557, "step": 2122 }, { "epoch": 0.300559212854817, "grad_norm": 10.891898870194083, "learning_rate": 4.812703466029871e-06, "loss": 1.3287, "step": 2123 }, { "epoch": 0.30070078572945425, "grad_norm": 9.249139281771752, "learning_rate": 4.812485721833465e-06, "loss": 1.3189, "step": 2124 }, { "epoch": 0.3008423586040915, "grad_norm": 9.101850434689048, "learning_rate": 4.812267856071407e-06, "loss": 1.3882, "step": 2125 }, { "epoch": 0.30098393147872865, "grad_norm": 10.922647211502055, "learning_rate": 4.812049868755154e-06, "loss": 1.3182, "step": 2126 }, { "epoch": 0.3011255043533659, "grad_norm": 11.62284978854288, "learning_rate": 4.8118317598961625e-06, "loss": 1.3851, "step": 2127 }, { "epoch": 0.3012670772280031, "grad_norm": 9.72555333306426, "learning_rate": 4.811613529505899e-06, "loss": 1.3358, "step": 2128 }, { "epoch": 0.30140865010264034, "grad_norm": 8.110378313565906, "learning_rate": 4.811395177595836e-06, "loss": 1.2919, "step": 2129 }, { "epoch": 0.30155022297727757, "grad_norm": 10.578732629450947, "learning_rate": 4.811176704177452e-06, "loss": 1.4054, "step": 2130 }, { "epoch": 0.30169179585191475, "grad_norm": 8.637127297692386, "learning_rate": 4.810958109262232e-06, "loss": 1.2347, "step": 2131 }, { "epoch": 0.301833368726552, "grad_norm": 10.674299330575776, "learning_rate": 4.810739392861667e-06, "loss": 1.4166, "step": 2132 }, { "epoch": 0.3019749416011892, "grad_norm": 10.467692854156777, "learning_rate": 4.810520554987256e-06, "loss": 1.4355, "step": 2133 }, { "epoch": 0.30211651447582644, "grad_norm": 10.889367445552464, "learning_rate": 4.810301595650501e-06, "loss": 1.3214, "step": 2134 }, { "epoch": 0.30225808735046367, "grad_norm": 10.895388728468278, "learning_rate": 4.810082514862915e-06, "loss": 1.5507, "step": 2135 }, { "epoch": 0.3023996602251009, "grad_norm": 8.831910744561583, "learning_rate": 4.809863312636013e-06, "loss": 1.3049, "step": 2136 }, { "epoch": 0.30254123309973807, "grad_norm": 8.917596596209899, "learning_rate": 4.8096439889813186e-06, "loss": 1.5085, "step": 2137 }, { "epoch": 0.3026828059743753, "grad_norm": 8.665414524101644, "learning_rate": 4.809424543910363e-06, "loss": 1.3044, "step": 2138 }, { "epoch": 0.30282437884901253, "grad_norm": 10.472495613160971, "learning_rate": 4.80920497743468e-06, "loss": 1.2828, "step": 2139 }, { "epoch": 0.30296595172364976, "grad_norm": 10.248632200205922, "learning_rate": 4.808985289565813e-06, "loss": 1.4064, "step": 2140 }, { "epoch": 0.303107524598287, "grad_norm": 11.866525939221257, "learning_rate": 4.808765480315312e-06, "loss": 1.4072, "step": 2141 }, { "epoch": 0.30324909747292417, "grad_norm": 9.57578685903809, "learning_rate": 4.80854554969473e-06, "loss": 1.2522, "step": 2142 }, { "epoch": 0.3033906703475614, "grad_norm": 10.062908661878268, "learning_rate": 4.80832549771563e-06, "loss": 1.2444, "step": 2143 }, { "epoch": 0.3035322432221986, "grad_norm": 8.320168536019775, "learning_rate": 4.808105324389581e-06, "loss": 1.4341, "step": 2144 }, { "epoch": 0.30367381609683586, "grad_norm": 9.577208931009238, "learning_rate": 4.807885029728155e-06, "loss": 1.2097, "step": 2145 }, { "epoch": 0.3038153889714731, "grad_norm": 11.866626879450358, "learning_rate": 4.807664613742934e-06, "loss": 1.387, "step": 2146 }, { "epoch": 0.30395696184611026, "grad_norm": 11.990442284451305, "learning_rate": 4.807444076445506e-06, "loss": 1.3301, "step": 2147 }, { "epoch": 0.3040985347207475, "grad_norm": 8.813060120441902, "learning_rate": 4.807223417847462e-06, "loss": 1.2563, "step": 2148 }, { "epoch": 0.3042401075953847, "grad_norm": 8.843693062427137, "learning_rate": 4.807002637960403e-06, "loss": 1.4235, "step": 2149 }, { "epoch": 0.30438168047002195, "grad_norm": 9.913397199283489, "learning_rate": 4.806781736795937e-06, "loss": 1.3566, "step": 2150 }, { "epoch": 0.3045232533446592, "grad_norm": 12.563138271837364, "learning_rate": 4.806560714365674e-06, "loss": 1.2917, "step": 2151 }, { "epoch": 0.30466482621929636, "grad_norm": 11.165381020699757, "learning_rate": 4.806339570681234e-06, "loss": 1.3521, "step": 2152 }, { "epoch": 0.3048063990939336, "grad_norm": 9.014799876940645, "learning_rate": 4.8061183057542424e-06, "loss": 1.3374, "step": 2153 }, { "epoch": 0.3049479719685708, "grad_norm": 8.309626261802624, "learning_rate": 4.805896919596332e-06, "loss": 1.2374, "step": 2154 }, { "epoch": 0.30508954484320805, "grad_norm": 8.973004863710912, "learning_rate": 4.805675412219139e-06, "loss": 1.4541, "step": 2155 }, { "epoch": 0.3052311177178453, "grad_norm": 9.853985021958795, "learning_rate": 4.805453783634309e-06, "loss": 1.2393, "step": 2156 }, { "epoch": 0.30537269059248245, "grad_norm": 12.049780904590458, "learning_rate": 4.805232033853493e-06, "loss": 1.512, "step": 2157 }, { "epoch": 0.3055142634671197, "grad_norm": 10.628002864379267, "learning_rate": 4.805010162888347e-06, "loss": 1.3717, "step": 2158 }, { "epoch": 0.3056558363417569, "grad_norm": 8.528608758666236, "learning_rate": 4.804788170750536e-06, "loss": 1.4219, "step": 2159 }, { "epoch": 0.30579740921639414, "grad_norm": 6.78648322201603, "learning_rate": 4.804566057451729e-06, "loss": 1.1867, "step": 2160 }, { "epoch": 0.3059389820910314, "grad_norm": 12.751289452068354, "learning_rate": 4.8043438230036034e-06, "loss": 1.3117, "step": 2161 }, { "epoch": 0.3060805549656686, "grad_norm": 7.218541526261204, "learning_rate": 4.804121467417841e-06, "loss": 1.2878, "step": 2162 }, { "epoch": 0.3062221278403058, "grad_norm": 10.59821200855693, "learning_rate": 4.8038989907061305e-06, "loss": 1.4835, "step": 2163 }, { "epoch": 0.306363700714943, "grad_norm": 10.455945191897145, "learning_rate": 4.803676392880168e-06, "loss": 1.3167, "step": 2164 }, { "epoch": 0.30650527358958024, "grad_norm": 8.600561487365184, "learning_rate": 4.803453673951656e-06, "loss": 1.3833, "step": 2165 }, { "epoch": 0.30664684646421747, "grad_norm": 10.799638982319617, "learning_rate": 4.803230833932302e-06, "loss": 1.2631, "step": 2166 }, { "epoch": 0.3067884193388547, "grad_norm": 11.055646767125527, "learning_rate": 4.803007872833819e-06, "loss": 1.4719, "step": 2167 }, { "epoch": 0.3069299922134919, "grad_norm": 8.734533973843961, "learning_rate": 4.8027847906679305e-06, "loss": 1.3097, "step": 2168 }, { "epoch": 0.3070715650881291, "grad_norm": 9.331467941610928, "learning_rate": 4.802561587446362e-06, "loss": 1.3733, "step": 2169 }, { "epoch": 0.30721313796276634, "grad_norm": 13.078388097525329, "learning_rate": 4.802338263180848e-06, "loss": 1.5777, "step": 2170 }, { "epoch": 0.30735471083740357, "grad_norm": 10.253762647803976, "learning_rate": 4.802114817883128e-06, "loss": 1.2179, "step": 2171 }, { "epoch": 0.3074962837120408, "grad_norm": 9.510203052362655, "learning_rate": 4.801891251564949e-06, "loss": 1.288, "step": 2172 }, { "epoch": 0.30763785658667797, "grad_norm": 10.616907246577124, "learning_rate": 4.801667564238063e-06, "loss": 1.4672, "step": 2173 }, { "epoch": 0.3077794294613152, "grad_norm": 12.412792104797134, "learning_rate": 4.801443755914229e-06, "loss": 1.3913, "step": 2174 }, { "epoch": 0.30792100233595243, "grad_norm": 13.572873696849353, "learning_rate": 4.801219826605213e-06, "loss": 1.4041, "step": 2175 }, { "epoch": 0.30806257521058966, "grad_norm": 9.510719676098345, "learning_rate": 4.8009957763227875e-06, "loss": 1.4456, "step": 2176 }, { "epoch": 0.3082041480852269, "grad_norm": 10.465766686624884, "learning_rate": 4.800771605078728e-06, "loss": 1.4303, "step": 2177 }, { "epoch": 0.30834572095986407, "grad_norm": 9.451444113859127, "learning_rate": 4.800547312884822e-06, "loss": 1.3074, "step": 2178 }, { "epoch": 0.3084872938345013, "grad_norm": 10.8182594086711, "learning_rate": 4.800322899752859e-06, "loss": 1.3681, "step": 2179 }, { "epoch": 0.3086288667091385, "grad_norm": 8.224565544364923, "learning_rate": 4.800098365694636e-06, "loss": 1.3655, "step": 2180 }, { "epoch": 0.30877043958377576, "grad_norm": 11.465042720803257, "learning_rate": 4.799873710721958e-06, "loss": 1.4696, "step": 2181 }, { "epoch": 0.308912012458413, "grad_norm": 9.94256424370667, "learning_rate": 4.799648934846633e-06, "loss": 1.3052, "step": 2182 }, { "epoch": 0.30905358533305016, "grad_norm": 7.477948131135514, "learning_rate": 4.799424038080478e-06, "loss": 1.2722, "step": 2183 }, { "epoch": 0.3091951582076874, "grad_norm": 9.86239437535031, "learning_rate": 4.799199020435316e-06, "loss": 1.3953, "step": 2184 }, { "epoch": 0.3093367310823246, "grad_norm": 10.05652663832517, "learning_rate": 4.798973881922975e-06, "loss": 1.3786, "step": 2185 }, { "epoch": 0.30947830395696185, "grad_norm": 9.342370381138767, "learning_rate": 4.798748622555293e-06, "loss": 1.3066, "step": 2186 }, { "epoch": 0.3096198768315991, "grad_norm": 10.84374788927393, "learning_rate": 4.798523242344109e-06, "loss": 1.5314, "step": 2187 }, { "epoch": 0.30976144970623626, "grad_norm": 8.644672018595156, "learning_rate": 4.798297741301271e-06, "loss": 1.3766, "step": 2188 }, { "epoch": 0.3099030225808735, "grad_norm": 9.999206892987539, "learning_rate": 4.798072119438636e-06, "loss": 1.3785, "step": 2189 }, { "epoch": 0.3100445954555107, "grad_norm": 7.997373149661199, "learning_rate": 4.797846376768062e-06, "loss": 1.2384, "step": 2190 }, { "epoch": 0.31018616833014795, "grad_norm": 8.239192009726715, "learning_rate": 4.797620513301418e-06, "loss": 1.3864, "step": 2191 }, { "epoch": 0.3103277412047852, "grad_norm": 7.8792294240705205, "learning_rate": 4.797394529050577e-06, "loss": 1.3436, "step": 2192 }, { "epoch": 0.3104693140794224, "grad_norm": 11.454182116758368, "learning_rate": 4.797168424027419e-06, "loss": 1.4547, "step": 2193 }, { "epoch": 0.3106108869540596, "grad_norm": 8.108193724927705, "learning_rate": 4.796942198243828e-06, "loss": 1.2429, "step": 2194 }, { "epoch": 0.3107524598286968, "grad_norm": 9.064592212020067, "learning_rate": 4.796715851711699e-06, "loss": 1.3266, "step": 2195 }, { "epoch": 0.31089403270333404, "grad_norm": 9.215443991419207, "learning_rate": 4.7964893844429315e-06, "loss": 1.3649, "step": 2196 }, { "epoch": 0.3110356055779713, "grad_norm": 8.390685921275578, "learning_rate": 4.796262796449428e-06, "loss": 1.1835, "step": 2197 }, { "epoch": 0.3111771784526085, "grad_norm": 10.643321318850102, "learning_rate": 4.7960360877431025e-06, "loss": 1.222, "step": 2198 }, { "epoch": 0.3113187513272457, "grad_norm": 9.401739616938295, "learning_rate": 4.795809258335872e-06, "loss": 1.4686, "step": 2199 }, { "epoch": 0.3114603242018829, "grad_norm": 11.001899208493398, "learning_rate": 4.795582308239659e-06, "loss": 1.2981, "step": 2200 }, { "epoch": 0.31160189707652014, "grad_norm": 11.63112290449771, "learning_rate": 4.795355237466397e-06, "loss": 1.5022, "step": 2201 }, { "epoch": 0.31174346995115737, "grad_norm": 12.078431987932431, "learning_rate": 4.795128046028021e-06, "loss": 1.4802, "step": 2202 }, { "epoch": 0.3118850428257946, "grad_norm": 9.821779953256707, "learning_rate": 4.794900733936476e-06, "loss": 1.3743, "step": 2203 }, { "epoch": 0.3120266157004318, "grad_norm": 10.702598569968993, "learning_rate": 4.794673301203709e-06, "loss": 1.4356, "step": 2204 }, { "epoch": 0.312168188575069, "grad_norm": 11.096660393434009, "learning_rate": 4.794445747841679e-06, "loss": 1.4567, "step": 2205 }, { "epoch": 0.31230976144970624, "grad_norm": 12.722072158069109, "learning_rate": 4.794218073862346e-06, "loss": 1.2958, "step": 2206 }, { "epoch": 0.31245133432434347, "grad_norm": 8.722778702118541, "learning_rate": 4.79399027927768e-06, "loss": 1.2662, "step": 2207 }, { "epoch": 0.3125929071989807, "grad_norm": 13.433128604007218, "learning_rate": 4.793762364099655e-06, "loss": 1.5376, "step": 2208 }, { "epoch": 0.31273448007361787, "grad_norm": 9.228789058449195, "learning_rate": 4.793534328340253e-06, "loss": 1.3998, "step": 2209 }, { "epoch": 0.3128760529482551, "grad_norm": 11.223913508043225, "learning_rate": 4.7933061720114615e-06, "loss": 1.3947, "step": 2210 }, { "epoch": 0.31301762582289233, "grad_norm": 16.991958511388095, "learning_rate": 4.793077895125274e-06, "loss": 1.4702, "step": 2211 }, { "epoch": 0.31315919869752956, "grad_norm": 11.11055580870813, "learning_rate": 4.792849497693692e-06, "loss": 1.3359, "step": 2212 }, { "epoch": 0.3133007715721668, "grad_norm": 9.2300348038601, "learning_rate": 4.7926209797287216e-06, "loss": 1.3559, "step": 2213 }, { "epoch": 0.31344234444680397, "grad_norm": 7.532482505395667, "learning_rate": 4.792392341242375e-06, "loss": 1.3341, "step": 2214 }, { "epoch": 0.3135839173214412, "grad_norm": 12.45021212600098, "learning_rate": 4.792163582246674e-06, "loss": 1.4448, "step": 2215 }, { "epoch": 0.3137254901960784, "grad_norm": 15.270172784873221, "learning_rate": 4.791934702753641e-06, "loss": 1.4395, "step": 2216 }, { "epoch": 0.31386706307071566, "grad_norm": 8.514443187775203, "learning_rate": 4.79170570277531e-06, "loss": 1.3032, "step": 2217 }, { "epoch": 0.3140086359453529, "grad_norm": 8.84213101830233, "learning_rate": 4.791476582323719e-06, "loss": 1.4588, "step": 2218 }, { "epoch": 0.31415020881999006, "grad_norm": 10.468526262767735, "learning_rate": 4.791247341410913e-06, "loss": 1.4874, "step": 2219 }, { "epoch": 0.3142917816946273, "grad_norm": 8.343746342461746, "learning_rate": 4.791017980048942e-06, "loss": 1.3926, "step": 2220 }, { "epoch": 0.3144333545692645, "grad_norm": 9.74692506474598, "learning_rate": 4.790788498249864e-06, "loss": 1.4704, "step": 2221 }, { "epoch": 0.31457492744390175, "grad_norm": 11.245977403119074, "learning_rate": 4.790558896025743e-06, "loss": 1.546, "step": 2222 }, { "epoch": 0.314716500318539, "grad_norm": 9.823785402967365, "learning_rate": 4.79032917338865e-06, "loss": 1.4752, "step": 2223 }, { "epoch": 0.3148580731931762, "grad_norm": 9.742700827870408, "learning_rate": 4.790099330350658e-06, "loss": 1.3732, "step": 2224 }, { "epoch": 0.3149996460678134, "grad_norm": 9.50971768927896, "learning_rate": 4.789869366923853e-06, "loss": 1.3883, "step": 2225 }, { "epoch": 0.3151412189424506, "grad_norm": 12.088388442296017, "learning_rate": 4.789639283120323e-06, "loss": 1.3612, "step": 2226 }, { "epoch": 0.31528279181708785, "grad_norm": 9.25986223023589, "learning_rate": 4.789409078952162e-06, "loss": 1.2207, "step": 2227 }, { "epoch": 0.3154243646917251, "grad_norm": 10.62953789238573, "learning_rate": 4.789178754431474e-06, "loss": 1.24, "step": 2228 }, { "epoch": 0.3155659375663623, "grad_norm": 9.507486304875876, "learning_rate": 4.788948309570365e-06, "loss": 1.3571, "step": 2229 }, { "epoch": 0.3157075104409995, "grad_norm": 10.187523215799851, "learning_rate": 4.78871774438095e-06, "loss": 1.3901, "step": 2230 }, { "epoch": 0.3158490833156367, "grad_norm": 9.175083619446099, "learning_rate": 4.78848705887535e-06, "loss": 1.2577, "step": 2231 }, { "epoch": 0.31599065619027394, "grad_norm": 10.80137811980996, "learning_rate": 4.788256253065692e-06, "loss": 1.3968, "step": 2232 }, { "epoch": 0.3161322290649112, "grad_norm": 10.744839871412914, "learning_rate": 4.7880253269641085e-06, "loss": 1.3011, "step": 2233 }, { "epoch": 0.3162738019395484, "grad_norm": 10.274067233551241, "learning_rate": 4.787794280582739e-06, "loss": 1.4676, "step": 2234 }, { "epoch": 0.3164153748141856, "grad_norm": 9.015605959549868, "learning_rate": 4.787563113933731e-06, "loss": 1.2941, "step": 2235 }, { "epoch": 0.3165569476888228, "grad_norm": 8.817414773259303, "learning_rate": 4.787331827029236e-06, "loss": 1.2263, "step": 2236 }, { "epoch": 0.31669852056346004, "grad_norm": 8.401299021957715, "learning_rate": 4.787100419881412e-06, "loss": 1.2617, "step": 2237 }, { "epoch": 0.31684009343809727, "grad_norm": 10.51100744295333, "learning_rate": 4.7868688925024245e-06, "loss": 1.2427, "step": 2238 }, { "epoch": 0.3169816663127345, "grad_norm": 12.607021712520405, "learning_rate": 4.786637244904444e-06, "loss": 1.2824, "step": 2239 }, { "epoch": 0.3171232391873717, "grad_norm": 8.439413574239076, "learning_rate": 4.786405477099648e-06, "loss": 1.2391, "step": 2240 }, { "epoch": 0.3172648120620089, "grad_norm": 10.710677151399155, "learning_rate": 4.786173589100222e-06, "loss": 1.4805, "step": 2241 }, { "epoch": 0.31740638493664614, "grad_norm": 10.869154839107956, "learning_rate": 4.785941580918354e-06, "loss": 1.1829, "step": 2242 }, { "epoch": 0.31754795781128337, "grad_norm": 8.519773931951628, "learning_rate": 4.785709452566243e-06, "loss": 1.4121, "step": 2243 }, { "epoch": 0.3176895306859206, "grad_norm": 8.634256012656206, "learning_rate": 4.785477204056089e-06, "loss": 1.3843, "step": 2244 }, { "epoch": 0.31783110356055777, "grad_norm": 9.937636656391353, "learning_rate": 4.785244835400103e-06, "loss": 1.3349, "step": 2245 }, { "epoch": 0.317972676435195, "grad_norm": 11.30760263899498, "learning_rate": 4.7850123466105e-06, "loss": 1.3647, "step": 2246 }, { "epoch": 0.31811424930983223, "grad_norm": 8.575640097880678, "learning_rate": 4.784779737699502e-06, "loss": 1.2162, "step": 2247 }, { "epoch": 0.31825582218446946, "grad_norm": 10.839489014465883, "learning_rate": 4.7845470086793365e-06, "loss": 1.433, "step": 2248 }, { "epoch": 0.3183973950591067, "grad_norm": 7.58261082599368, "learning_rate": 4.784314159562238e-06, "loss": 1.2934, "step": 2249 }, { "epoch": 0.3185389679337439, "grad_norm": 9.281120119566825, "learning_rate": 4.7840811903604475e-06, "loss": 1.5035, "step": 2250 }, { "epoch": 0.3186805408083811, "grad_norm": 8.427742296128955, "learning_rate": 4.783848101086212e-06, "loss": 1.3066, "step": 2251 }, { "epoch": 0.3188221136830183, "grad_norm": 9.039179821786153, "learning_rate": 4.783614891751785e-06, "loss": 1.2166, "step": 2252 }, { "epoch": 0.31896368655765556, "grad_norm": 8.460719731875514, "learning_rate": 4.783381562369425e-06, "loss": 1.4452, "step": 2253 }, { "epoch": 0.3191052594322928, "grad_norm": 8.689664941436671, "learning_rate": 4.7831481129514e-06, "loss": 1.4565, "step": 2254 }, { "epoch": 0.31924683230693, "grad_norm": 10.443022038162649, "learning_rate": 4.78291454350998e-06, "loss": 1.4063, "step": 2255 }, { "epoch": 0.3193884051815672, "grad_norm": 11.192383451555802, "learning_rate": 4.782680854057445e-06, "loss": 1.4301, "step": 2256 }, { "epoch": 0.3195299780562044, "grad_norm": 9.652357977528972, "learning_rate": 4.78244704460608e-06, "loss": 1.485, "step": 2257 }, { "epoch": 0.31967155093084165, "grad_norm": 10.762238389223887, "learning_rate": 4.782213115168176e-06, "loss": 1.4877, "step": 2258 }, { "epoch": 0.3198131238054789, "grad_norm": 10.95125425636706, "learning_rate": 4.781979065756029e-06, "loss": 1.3698, "step": 2259 }, { "epoch": 0.3199546966801161, "grad_norm": 12.830967540577124, "learning_rate": 4.781744896381945e-06, "loss": 1.4194, "step": 2260 }, { "epoch": 0.3200962695547533, "grad_norm": 8.694504363558034, "learning_rate": 4.781510607058233e-06, "loss": 1.3703, "step": 2261 }, { "epoch": 0.3202378424293905, "grad_norm": 9.848073827173959, "learning_rate": 4.781276197797209e-06, "loss": 1.447, "step": 2262 }, { "epoch": 0.32037941530402775, "grad_norm": 10.886910066268372, "learning_rate": 4.781041668611197e-06, "loss": 1.3141, "step": 2263 }, { "epoch": 0.320520988178665, "grad_norm": 9.080760999878482, "learning_rate": 4.780807019512525e-06, "loss": 1.4078, "step": 2264 }, { "epoch": 0.3206625610533022, "grad_norm": 10.550238540628365, "learning_rate": 4.7805722505135285e-06, "loss": 1.4502, "step": 2265 }, { "epoch": 0.3208041339279394, "grad_norm": 10.188039192019483, "learning_rate": 4.7803373616265495e-06, "loss": 1.3492, "step": 2266 }, { "epoch": 0.3209457068025766, "grad_norm": 8.190436615536745, "learning_rate": 4.780102352863935e-06, "loss": 1.3142, "step": 2267 }, { "epoch": 0.32108727967721384, "grad_norm": 9.016211849265735, "learning_rate": 4.77986722423804e-06, "loss": 1.256, "step": 2268 }, { "epoch": 0.3212288525518511, "grad_norm": 9.012697375883526, "learning_rate": 4.779631975761226e-06, "loss": 1.3936, "step": 2269 }, { "epoch": 0.3213704254264883, "grad_norm": 11.531250661627691, "learning_rate": 4.779396607445858e-06, "loss": 1.4671, "step": 2270 }, { "epoch": 0.3215119983011255, "grad_norm": 8.395392798389949, "learning_rate": 4.779161119304311e-06, "loss": 1.4031, "step": 2271 }, { "epoch": 0.3216535711757627, "grad_norm": 10.577285440279857, "learning_rate": 4.7789255113489615e-06, "loss": 1.2265, "step": 2272 }, { "epoch": 0.32179514405039994, "grad_norm": 8.285497878682744, "learning_rate": 4.778689783592198e-06, "loss": 1.3038, "step": 2273 }, { "epoch": 0.32193671692503717, "grad_norm": 7.762175317629787, "learning_rate": 4.778453936046412e-06, "loss": 1.2744, "step": 2274 }, { "epoch": 0.3220782897996744, "grad_norm": 9.52923782871387, "learning_rate": 4.778217968724002e-06, "loss": 1.3339, "step": 2275 }, { "epoch": 0.3222198626743116, "grad_norm": 9.323837035939329, "learning_rate": 4.777981881637372e-06, "loss": 1.4908, "step": 2276 }, { "epoch": 0.3223614355489488, "grad_norm": 11.126942325725407, "learning_rate": 4.777745674798931e-06, "loss": 1.2494, "step": 2277 }, { "epoch": 0.32250300842358604, "grad_norm": 11.58282301256605, "learning_rate": 4.7775093482211e-06, "loss": 1.4775, "step": 2278 }, { "epoch": 0.32264458129822327, "grad_norm": 8.630166538085906, "learning_rate": 4.7772729019163e-06, "loss": 1.2937, "step": 2279 }, { "epoch": 0.3227861541728605, "grad_norm": 9.8891087434977, "learning_rate": 4.777036335896962e-06, "loss": 1.5527, "step": 2280 }, { "epoch": 0.3229277270474977, "grad_norm": 9.82861753845805, "learning_rate": 4.776799650175521e-06, "loss": 1.4105, "step": 2281 }, { "epoch": 0.3230692999221349, "grad_norm": 9.576287994209807, "learning_rate": 4.7765628447644214e-06, "loss": 1.3974, "step": 2282 }, { "epoch": 0.32321087279677213, "grad_norm": 8.882462493791532, "learning_rate": 4.776325919676109e-06, "loss": 1.3288, "step": 2283 }, { "epoch": 0.32335244567140936, "grad_norm": 11.379957523099067, "learning_rate": 4.7760888749230414e-06, "loss": 1.3705, "step": 2284 }, { "epoch": 0.3234940185460466, "grad_norm": 10.140843934221756, "learning_rate": 4.775851710517678e-06, "loss": 1.3088, "step": 2285 }, { "epoch": 0.3236355914206838, "grad_norm": 7.171915424041849, "learning_rate": 4.775614426472488e-06, "loss": 1.3219, "step": 2286 }, { "epoch": 0.323777164295321, "grad_norm": 9.703786240804492, "learning_rate": 4.775377022799944e-06, "loss": 1.371, "step": 2287 }, { "epoch": 0.3239187371699582, "grad_norm": 9.111841617028283, "learning_rate": 4.7751394995125266e-06, "loss": 1.1909, "step": 2288 }, { "epoch": 0.32406031004459546, "grad_norm": 9.973538674605301, "learning_rate": 4.7749018566227214e-06, "loss": 1.4851, "step": 2289 }, { "epoch": 0.3242018829192327, "grad_norm": 8.377841752019132, "learning_rate": 4.774664094143022e-06, "loss": 1.1724, "step": 2290 }, { "epoch": 0.3243434557938699, "grad_norm": 10.320210851245248, "learning_rate": 4.774426212085928e-06, "loss": 1.3609, "step": 2291 }, { "epoch": 0.3244850286685071, "grad_norm": 11.170033037620051, "learning_rate": 4.774188210463944e-06, "loss": 1.4117, "step": 2292 }, { "epoch": 0.3246266015431443, "grad_norm": 8.646501360597163, "learning_rate": 4.77395008928958e-06, "loss": 1.2985, "step": 2293 }, { "epoch": 0.32476817441778155, "grad_norm": 10.119684401706246, "learning_rate": 4.773711848575357e-06, "loss": 1.3323, "step": 2294 }, { "epoch": 0.3249097472924188, "grad_norm": 10.590571734130396, "learning_rate": 4.773473488333797e-06, "loss": 1.4364, "step": 2295 }, { "epoch": 0.325051320167056, "grad_norm": 9.923796027735438, "learning_rate": 4.77323500857743e-06, "loss": 1.3196, "step": 2296 }, { "epoch": 0.3251928930416932, "grad_norm": 8.945486488482613, "learning_rate": 4.772996409318794e-06, "loss": 1.3218, "step": 2297 }, { "epoch": 0.3253344659163304, "grad_norm": 8.596988026338062, "learning_rate": 4.772757690570432e-06, "loss": 1.4391, "step": 2298 }, { "epoch": 0.32547603879096765, "grad_norm": 8.844135181679235, "learning_rate": 4.772518852344893e-06, "loss": 1.3219, "step": 2299 }, { "epoch": 0.3256176116656049, "grad_norm": 11.80762651616795, "learning_rate": 4.772279894654732e-06, "loss": 1.4989, "step": 2300 }, { "epoch": 0.3257591845402421, "grad_norm": 8.014139554580577, "learning_rate": 4.772040817512511e-06, "loss": 1.2117, "step": 2301 }, { "epoch": 0.3259007574148793, "grad_norm": 8.646793419412093, "learning_rate": 4.7718016209307996e-06, "loss": 1.4292, "step": 2302 }, { "epoch": 0.3260423302895165, "grad_norm": 10.227015334464264, "learning_rate": 4.77156230492217e-06, "loss": 1.4192, "step": 2303 }, { "epoch": 0.32618390316415374, "grad_norm": 9.686631292955465, "learning_rate": 4.771322869499203e-06, "loss": 1.2747, "step": 2304 }, { "epoch": 0.326325476038791, "grad_norm": 9.322450378236802, "learning_rate": 4.7710833146744874e-06, "loss": 1.3684, "step": 2305 }, { "epoch": 0.3264670489134282, "grad_norm": 10.317098066423863, "learning_rate": 4.770843640460615e-06, "loss": 1.4558, "step": 2306 }, { "epoch": 0.32660862178806543, "grad_norm": 10.763358399366023, "learning_rate": 4.770603846870185e-06, "loss": 1.4163, "step": 2307 }, { "epoch": 0.3267501946627026, "grad_norm": 10.909116701615794, "learning_rate": 4.770363933915805e-06, "loss": 1.3577, "step": 2308 }, { "epoch": 0.32689176753733984, "grad_norm": 8.153054031408262, "learning_rate": 4.770123901610085e-06, "loss": 1.257, "step": 2309 }, { "epoch": 0.32703334041197707, "grad_norm": 10.641424073451596, "learning_rate": 4.769883749965645e-06, "loss": 1.315, "step": 2310 }, { "epoch": 0.3271749132866143, "grad_norm": 9.483400800718607, "learning_rate": 4.7696434789951074e-06, "loss": 1.2914, "step": 2311 }, { "epoch": 0.32731648616125153, "grad_norm": 10.545548238308163, "learning_rate": 4.769403088711105e-06, "loss": 1.3721, "step": 2312 }, { "epoch": 0.3274580590358887, "grad_norm": 10.520092948822052, "learning_rate": 4.7691625791262756e-06, "loss": 1.4145, "step": 2313 }, { "epoch": 0.32759963191052593, "grad_norm": 13.6578024673556, "learning_rate": 4.76892195025326e-06, "loss": 1.453, "step": 2314 }, { "epoch": 0.32774120478516316, "grad_norm": 9.464754275010426, "learning_rate": 4.768681202104709e-06, "loss": 1.3407, "step": 2315 }, { "epoch": 0.3278827776598004, "grad_norm": 9.949224117833259, "learning_rate": 4.7684403346932795e-06, "loss": 1.3194, "step": 2316 }, { "epoch": 0.3280243505344376, "grad_norm": 8.540068487745906, "learning_rate": 4.768199348031633e-06, "loss": 1.3057, "step": 2317 }, { "epoch": 0.3281659234090748, "grad_norm": 11.972481009117194, "learning_rate": 4.7679582421324385e-06, "loss": 1.5252, "step": 2318 }, { "epoch": 0.32830749628371203, "grad_norm": 9.23813879052422, "learning_rate": 4.76771701700837e-06, "loss": 1.215, "step": 2319 }, { "epoch": 0.32844906915834926, "grad_norm": 10.913021575942759, "learning_rate": 4.767475672672108e-06, "loss": 1.3623, "step": 2320 }, { "epoch": 0.3285906420329865, "grad_norm": 9.77781192696513, "learning_rate": 4.767234209136341e-06, "loss": 1.4048, "step": 2321 }, { "epoch": 0.3287322149076237, "grad_norm": 10.640822533385833, "learning_rate": 4.7669926264137625e-06, "loss": 1.3395, "step": 2322 }, { "epoch": 0.3288737877822609, "grad_norm": 8.838765279578391, "learning_rate": 4.766750924517071e-06, "loss": 1.3281, "step": 2323 }, { "epoch": 0.3290153606568981, "grad_norm": 10.264661769878245, "learning_rate": 4.766509103458975e-06, "loss": 1.3471, "step": 2324 }, { "epoch": 0.32915693353153536, "grad_norm": 10.652897729796523, "learning_rate": 4.766267163252185e-06, "loss": 1.5196, "step": 2325 }, { "epoch": 0.3292985064061726, "grad_norm": 9.872582634671536, "learning_rate": 4.766025103909419e-06, "loss": 1.2749, "step": 2326 }, { "epoch": 0.3294400792808098, "grad_norm": 9.250830123356033, "learning_rate": 4.765782925443404e-06, "loss": 1.279, "step": 2327 }, { "epoch": 0.329581652155447, "grad_norm": 8.12116984542271, "learning_rate": 4.76554062786687e-06, "loss": 1.3079, "step": 2328 }, { "epoch": 0.3297232250300842, "grad_norm": 9.87970438094102, "learning_rate": 4.765298211192554e-06, "loss": 1.3782, "step": 2329 }, { "epoch": 0.32986479790472145, "grad_norm": 9.47212627383424, "learning_rate": 4.7650556754332e-06, "loss": 1.2616, "step": 2330 }, { "epoch": 0.3300063707793587, "grad_norm": 8.945253650188514, "learning_rate": 4.7648130206015585e-06, "loss": 1.2611, "step": 2331 }, { "epoch": 0.3301479436539959, "grad_norm": 11.157412513836093, "learning_rate": 4.764570246710385e-06, "loss": 1.3572, "step": 2332 }, { "epoch": 0.3302895165286331, "grad_norm": 10.66238432594512, "learning_rate": 4.764327353772442e-06, "loss": 1.603, "step": 2333 }, { "epoch": 0.3304310894032703, "grad_norm": 8.616191401800505, "learning_rate": 4.764084341800499e-06, "loss": 1.419, "step": 2334 }, { "epoch": 0.33057266227790755, "grad_norm": 6.801129572678017, "learning_rate": 4.763841210807329e-06, "loss": 1.1657, "step": 2335 }, { "epoch": 0.3307142351525448, "grad_norm": 10.550619633272545, "learning_rate": 4.763597960805716e-06, "loss": 1.3385, "step": 2336 }, { "epoch": 0.330855808027182, "grad_norm": 9.40182725718388, "learning_rate": 4.763354591808446e-06, "loss": 1.4718, "step": 2337 }, { "epoch": 0.33099738090181924, "grad_norm": 10.659467373651529, "learning_rate": 4.763111103828312e-06, "loss": 1.3803, "step": 2338 }, { "epoch": 0.3311389537764564, "grad_norm": 9.795682259555777, "learning_rate": 4.762867496878114e-06, "loss": 1.2628, "step": 2339 }, { "epoch": 0.33128052665109364, "grad_norm": 8.87586089780851, "learning_rate": 4.76262377097066e-06, "loss": 1.6375, "step": 2340 }, { "epoch": 0.3314220995257309, "grad_norm": 8.792083682133196, "learning_rate": 4.762379926118761e-06, "loss": 1.4179, "step": 2341 }, { "epoch": 0.3315636724003681, "grad_norm": 9.767461546057447, "learning_rate": 4.762135962335237e-06, "loss": 1.2659, "step": 2342 }, { "epoch": 0.33170524527500533, "grad_norm": 8.979724934534595, "learning_rate": 4.7618918796329115e-06, "loss": 1.3012, "step": 2343 }, { "epoch": 0.3318468181496425, "grad_norm": 8.851819350311374, "learning_rate": 4.761647678024617e-06, "loss": 1.3692, "step": 2344 }, { "epoch": 0.33198839102427974, "grad_norm": 10.004613956322306, "learning_rate": 4.76140335752319e-06, "loss": 1.3045, "step": 2345 }, { "epoch": 0.33212996389891697, "grad_norm": 9.74926989829519, "learning_rate": 4.7611589181414745e-06, "loss": 1.3116, "step": 2346 }, { "epoch": 0.3322715367735542, "grad_norm": 8.33745666218122, "learning_rate": 4.76091435989232e-06, "loss": 1.2299, "step": 2347 }, { "epoch": 0.33241310964819143, "grad_norm": 8.930186874296824, "learning_rate": 4.760669682788584e-06, "loss": 1.446, "step": 2348 }, { "epoch": 0.3325546825228286, "grad_norm": 8.476120110143675, "learning_rate": 4.760424886843129e-06, "loss": 1.3183, "step": 2349 }, { "epoch": 0.33269625539746583, "grad_norm": 11.249620219284072, "learning_rate": 4.7601799720688235e-06, "loss": 1.4387, "step": 2350 }, { "epoch": 0.33283782827210306, "grad_norm": 9.76422021146034, "learning_rate": 4.759934938478541e-06, "loss": 1.3479, "step": 2351 }, { "epoch": 0.3329794011467403, "grad_norm": 10.096779383469613, "learning_rate": 4.7596897860851644e-06, "loss": 1.3358, "step": 2352 }, { "epoch": 0.3331209740213775, "grad_norm": 8.040990718703837, "learning_rate": 4.75944451490158e-06, "loss": 1.311, "step": 2353 }, { "epoch": 0.3332625468960147, "grad_norm": 9.257857825574698, "learning_rate": 4.759199124940683e-06, "loss": 1.3211, "step": 2354 }, { "epoch": 0.33340411977065193, "grad_norm": 8.645797643626135, "learning_rate": 4.7589536162153725e-06, "loss": 1.2343, "step": 2355 }, { "epoch": 0.33354569264528916, "grad_norm": 10.122427908013456, "learning_rate": 4.758707988738555e-06, "loss": 1.3853, "step": 2356 }, { "epoch": 0.3336872655199264, "grad_norm": 8.854674799680984, "learning_rate": 4.758462242523141e-06, "loss": 1.3234, "step": 2357 }, { "epoch": 0.3338288383945636, "grad_norm": 8.896651385987262, "learning_rate": 4.758216377582052e-06, "loss": 1.238, "step": 2358 }, { "epoch": 0.3339704112692008, "grad_norm": 8.67421711403997, "learning_rate": 4.757970393928212e-06, "loss": 1.1752, "step": 2359 }, { "epoch": 0.334111984143838, "grad_norm": 9.84112145584989, "learning_rate": 4.757724291574552e-06, "loss": 1.3281, "step": 2360 }, { "epoch": 0.33425355701847526, "grad_norm": 6.92637278036478, "learning_rate": 4.7574780705340094e-06, "loss": 1.2771, "step": 2361 }, { "epoch": 0.3343951298931125, "grad_norm": 8.924763173612963, "learning_rate": 4.757231730819528e-06, "loss": 1.3431, "step": 2362 }, { "epoch": 0.3345367027677497, "grad_norm": 9.724217682975759, "learning_rate": 4.7569852724440565e-06, "loss": 1.3765, "step": 2363 }, { "epoch": 0.3346782756423869, "grad_norm": 12.263390520990209, "learning_rate": 4.7567386954205535e-06, "loss": 1.526, "step": 2364 }, { "epoch": 0.3348198485170241, "grad_norm": 11.672985509210665, "learning_rate": 4.756491999761979e-06, "loss": 1.2542, "step": 2365 }, { "epoch": 0.33496142139166135, "grad_norm": 10.361603694203067, "learning_rate": 4.756245185481304e-06, "loss": 1.3895, "step": 2366 }, { "epoch": 0.3351029942662986, "grad_norm": 10.492933892964775, "learning_rate": 4.755998252591501e-06, "loss": 1.3694, "step": 2367 }, { "epoch": 0.3352445671409358, "grad_norm": 9.905220242958933, "learning_rate": 4.755751201105552e-06, "loss": 1.4377, "step": 2368 }, { "epoch": 0.33538614001557304, "grad_norm": 11.117811645229855, "learning_rate": 4.755504031036444e-06, "loss": 1.3639, "step": 2369 }, { "epoch": 0.3355277128902102, "grad_norm": 9.824765066196598, "learning_rate": 4.75525674239717e-06, "loss": 1.3643, "step": 2370 }, { "epoch": 0.33566928576484745, "grad_norm": 10.139854554707627, "learning_rate": 4.755009335200732e-06, "loss": 1.3937, "step": 2371 }, { "epoch": 0.3358108586394847, "grad_norm": 11.270852662996557, "learning_rate": 4.754761809460135e-06, "loss": 1.5049, "step": 2372 }, { "epoch": 0.3359524315141219, "grad_norm": 10.269748550501484, "learning_rate": 4.75451416518839e-06, "loss": 1.3755, "step": 2373 }, { "epoch": 0.33609400438875914, "grad_norm": 10.037125147450634, "learning_rate": 4.754266402398517e-06, "loss": 1.3799, "step": 2374 }, { "epoch": 0.3362355772633963, "grad_norm": 9.123615316600787, "learning_rate": 4.754018521103539e-06, "loss": 1.2609, "step": 2375 }, { "epoch": 0.33637715013803354, "grad_norm": 9.59543617167594, "learning_rate": 4.75377052131649e-06, "loss": 1.3512, "step": 2376 }, { "epoch": 0.3365187230126708, "grad_norm": 9.356217757725632, "learning_rate": 4.753522403050403e-06, "loss": 1.2956, "step": 2377 }, { "epoch": 0.336660295887308, "grad_norm": 11.232466194650437, "learning_rate": 4.7532741663183255e-06, "loss": 1.328, "step": 2378 }, { "epoch": 0.33680186876194523, "grad_norm": 10.207759628264077, "learning_rate": 4.753025811133304e-06, "loss": 1.1741, "step": 2379 }, { "epoch": 0.3369434416365824, "grad_norm": 12.413959865253826, "learning_rate": 4.752777337508395e-06, "loss": 1.2947, "step": 2380 }, { "epoch": 0.33708501451121964, "grad_norm": 9.485770561195777, "learning_rate": 4.752528745456663e-06, "loss": 1.3986, "step": 2381 }, { "epoch": 0.33722658738585687, "grad_norm": 9.665196887999182, "learning_rate": 4.752280034991172e-06, "loss": 1.2756, "step": 2382 }, { "epoch": 0.3373681602604941, "grad_norm": 12.751096192153932, "learning_rate": 4.752031206125e-06, "loss": 1.3544, "step": 2383 }, { "epoch": 0.33750973313513133, "grad_norm": 11.549174780491962, "learning_rate": 4.751782258871227e-06, "loss": 1.3731, "step": 2384 }, { "epoch": 0.3376513060097685, "grad_norm": 12.603433752688131, "learning_rate": 4.751533193242941e-06, "loss": 1.5942, "step": 2385 }, { "epoch": 0.33779287888440573, "grad_norm": 9.164659823750066, "learning_rate": 4.751284009253232e-06, "loss": 1.3042, "step": 2386 }, { "epoch": 0.33793445175904296, "grad_norm": 10.708119985233825, "learning_rate": 4.7510347069152015e-06, "loss": 1.4192, "step": 2387 }, { "epoch": 0.3380760246336802, "grad_norm": 9.668203516720293, "learning_rate": 4.750785286241955e-06, "loss": 1.4275, "step": 2388 }, { "epoch": 0.3382175975083174, "grad_norm": 8.267988072682467, "learning_rate": 4.750535747246604e-06, "loss": 1.3317, "step": 2389 }, { "epoch": 0.3383591703829546, "grad_norm": 8.24473264909652, "learning_rate": 4.750286089942267e-06, "loss": 1.2047, "step": 2390 }, { "epoch": 0.33850074325759183, "grad_norm": 8.861423047141479, "learning_rate": 4.750036314342069e-06, "loss": 1.3129, "step": 2391 }, { "epoch": 0.33864231613222906, "grad_norm": 8.67741764030618, "learning_rate": 4.7497864204591386e-06, "loss": 1.3807, "step": 2392 }, { "epoch": 0.3387838890068663, "grad_norm": 9.661263852419838, "learning_rate": 4.749536408306614e-06, "loss": 1.3761, "step": 2393 }, { "epoch": 0.3389254618815035, "grad_norm": 9.263844207955861, "learning_rate": 4.749286277897637e-06, "loss": 1.385, "step": 2394 }, { "epoch": 0.33906703475614075, "grad_norm": 10.32197939675294, "learning_rate": 4.749036029245358e-06, "loss": 1.2286, "step": 2395 }, { "epoch": 0.3392086076307779, "grad_norm": 8.958861106288976, "learning_rate": 4.7487856623629325e-06, "loss": 1.3419, "step": 2396 }, { "epoch": 0.33935018050541516, "grad_norm": 9.740873320042171, "learning_rate": 4.748535177263522e-06, "loss": 1.3405, "step": 2397 }, { "epoch": 0.3394917533800524, "grad_norm": 8.535847637186006, "learning_rate": 4.748284573960292e-06, "loss": 1.4036, "step": 2398 }, { "epoch": 0.3396333262546896, "grad_norm": 8.808180595909478, "learning_rate": 4.748033852466419e-06, "loss": 1.3464, "step": 2399 }, { "epoch": 0.33977489912932685, "grad_norm": 10.380372276612926, "learning_rate": 4.747783012795083e-06, "loss": 1.4047, "step": 2400 }, { "epoch": 0.339916472003964, "grad_norm": 10.109497635582475, "learning_rate": 4.747532054959469e-06, "loss": 1.328, "step": 2401 }, { "epoch": 0.34005804487860125, "grad_norm": 8.817781637705364, "learning_rate": 4.747280978972772e-06, "loss": 1.4293, "step": 2402 }, { "epoch": 0.3401996177532385, "grad_norm": 10.975016920211367, "learning_rate": 4.747029784848189e-06, "loss": 1.3992, "step": 2403 }, { "epoch": 0.3403411906278757, "grad_norm": 10.78901264640455, "learning_rate": 4.746778472598927e-06, "loss": 1.2605, "step": 2404 }, { "epoch": 0.34048276350251294, "grad_norm": 8.43916677330391, "learning_rate": 4.746527042238194e-06, "loss": 1.2425, "step": 2405 }, { "epoch": 0.3406243363771501, "grad_norm": 10.790810765872196, "learning_rate": 4.74627549377921e-06, "loss": 1.4551, "step": 2406 }, { "epoch": 0.34076590925178735, "grad_norm": 9.619568716679655, "learning_rate": 4.746023827235198e-06, "loss": 1.2343, "step": 2407 }, { "epoch": 0.3409074821264246, "grad_norm": 9.11386976373442, "learning_rate": 4.745772042619389e-06, "loss": 1.2838, "step": 2408 }, { "epoch": 0.3410490550010618, "grad_norm": 10.973562896416166, "learning_rate": 4.745520139945018e-06, "loss": 1.3601, "step": 2409 }, { "epoch": 0.34119062787569904, "grad_norm": 7.775809105480474, "learning_rate": 4.745268119225327e-06, "loss": 1.2794, "step": 2410 }, { "epoch": 0.3413322007503362, "grad_norm": 8.633637015206949, "learning_rate": 4.745015980473565e-06, "loss": 1.4651, "step": 2411 }, { "epoch": 0.34147377362497344, "grad_norm": 9.306035812915697, "learning_rate": 4.744763723702988e-06, "loss": 1.4775, "step": 2412 }, { "epoch": 0.3416153464996107, "grad_norm": 10.251613955084439, "learning_rate": 4.744511348926855e-06, "loss": 1.3409, "step": 2413 }, { "epoch": 0.3417569193742479, "grad_norm": 10.238900478460199, "learning_rate": 4.7442588561584336e-06, "loss": 1.182, "step": 2414 }, { "epoch": 0.34189849224888513, "grad_norm": 11.61571566731328, "learning_rate": 4.744006245410998e-06, "loss": 1.3698, "step": 2415 }, { "epoch": 0.3420400651235223, "grad_norm": 8.391679239374142, "learning_rate": 4.743753516697827e-06, "loss": 1.2354, "step": 2416 }, { "epoch": 0.34218163799815954, "grad_norm": 11.567051337127461, "learning_rate": 4.743500670032207e-06, "loss": 1.3622, "step": 2417 }, { "epoch": 0.34232321087279677, "grad_norm": 9.015487484734386, "learning_rate": 4.743247705427429e-06, "loss": 1.3385, "step": 2418 }, { "epoch": 0.342464783747434, "grad_norm": 11.7226816321368, "learning_rate": 4.742994622896793e-06, "loss": 1.4042, "step": 2419 }, { "epoch": 0.34260635662207123, "grad_norm": 9.418489217913308, "learning_rate": 4.7427414224536014e-06, "loss": 1.2313, "step": 2420 }, { "epoch": 0.3427479294967084, "grad_norm": 12.917606114504816, "learning_rate": 4.742488104111165e-06, "loss": 1.5169, "step": 2421 }, { "epoch": 0.34288950237134563, "grad_norm": 9.235900856117038, "learning_rate": 4.742234667882802e-06, "loss": 1.2721, "step": 2422 }, { "epoch": 0.34303107524598286, "grad_norm": 9.727391170146843, "learning_rate": 4.7419811137818335e-06, "loss": 1.4337, "step": 2423 }, { "epoch": 0.3431726481206201, "grad_norm": 9.8200840889308, "learning_rate": 4.7417274418215895e-06, "loss": 1.3806, "step": 2424 }, { "epoch": 0.3433142209952573, "grad_norm": 8.212820720942101, "learning_rate": 4.741473652015407e-06, "loss": 1.3629, "step": 2425 }, { "epoch": 0.34345579386989455, "grad_norm": 11.93597050799697, "learning_rate": 4.741219744376624e-06, "loss": 1.3855, "step": 2426 }, { "epoch": 0.34359736674453173, "grad_norm": 11.034623189558712, "learning_rate": 4.740965718918591e-06, "loss": 1.4437, "step": 2427 }, { "epoch": 0.34373893961916896, "grad_norm": 12.193340447536567, "learning_rate": 4.74071157565466e-06, "loss": 1.4606, "step": 2428 }, { "epoch": 0.3438805124938062, "grad_norm": 9.871309387484393, "learning_rate": 4.740457314598194e-06, "loss": 1.362, "step": 2429 }, { "epoch": 0.3440220853684434, "grad_norm": 10.137094691366865, "learning_rate": 4.740202935762557e-06, "loss": 1.3682, "step": 2430 }, { "epoch": 0.34416365824308065, "grad_norm": 9.869608505817517, "learning_rate": 4.739948439161122e-06, "loss": 1.3692, "step": 2431 }, { "epoch": 0.3443052311177178, "grad_norm": 8.566807463957137, "learning_rate": 4.7396938248072675e-06, "loss": 1.2732, "step": 2432 }, { "epoch": 0.34444680399235506, "grad_norm": 9.217956301902774, "learning_rate": 4.739439092714379e-06, "loss": 1.1819, "step": 2433 }, { "epoch": 0.3445883768669923, "grad_norm": 10.107586615505276, "learning_rate": 4.7391842428958454e-06, "loss": 1.2226, "step": 2434 }, { "epoch": 0.3447299497416295, "grad_norm": 8.767509162752287, "learning_rate": 4.738929275365068e-06, "loss": 1.4763, "step": 2435 }, { "epoch": 0.34487152261626675, "grad_norm": 8.334738905306446, "learning_rate": 4.738674190135447e-06, "loss": 1.2977, "step": 2436 }, { "epoch": 0.3450130954909039, "grad_norm": 8.396271523020278, "learning_rate": 4.7384189872203935e-06, "loss": 1.2404, "step": 2437 }, { "epoch": 0.34515466836554115, "grad_norm": 8.997820272339313, "learning_rate": 4.738163666633322e-06, "loss": 1.3469, "step": 2438 }, { "epoch": 0.3452962412401784, "grad_norm": 9.050831827113756, "learning_rate": 4.737908228387656e-06, "loss": 1.3806, "step": 2439 }, { "epoch": 0.3454378141148156, "grad_norm": 9.733095430924351, "learning_rate": 4.737652672496823e-06, "loss": 1.4262, "step": 2440 }, { "epoch": 0.34557938698945284, "grad_norm": 8.937461586182746, "learning_rate": 4.737396998974257e-06, "loss": 1.2047, "step": 2441 }, { "epoch": 0.34572095986409, "grad_norm": 9.778144709127428, "learning_rate": 4.7371412078334e-06, "loss": 1.4933, "step": 2442 }, { "epoch": 0.34586253273872725, "grad_norm": 12.563072684979815, "learning_rate": 4.736885299087698e-06, "loss": 1.4023, "step": 2443 }, { "epoch": 0.3460041056133645, "grad_norm": 11.858895715570377, "learning_rate": 4.7366292727506025e-06, "loss": 1.5855, "step": 2444 }, { "epoch": 0.3461456784880017, "grad_norm": 8.26963504082835, "learning_rate": 4.736373128835574e-06, "loss": 1.3152, "step": 2445 }, { "epoch": 0.34628725136263894, "grad_norm": 12.37015384612325, "learning_rate": 4.736116867356079e-06, "loss": 1.4039, "step": 2446 }, { "epoch": 0.3464288242372761, "grad_norm": 12.95708895449918, "learning_rate": 4.735860488325586e-06, "loss": 1.4388, "step": 2447 }, { "epoch": 0.34657039711191334, "grad_norm": 10.891559139344553, "learning_rate": 4.735603991757576e-06, "loss": 1.4925, "step": 2448 }, { "epoch": 0.34671196998655057, "grad_norm": 11.73288795608477, "learning_rate": 4.735347377665529e-06, "loss": 1.3055, "step": 2449 }, { "epoch": 0.3468535428611878, "grad_norm": 9.458170705957222, "learning_rate": 4.735090646062939e-06, "loss": 1.4278, "step": 2450 }, { "epoch": 0.34699511573582503, "grad_norm": 11.603566746068587, "learning_rate": 4.7348337969632985e-06, "loss": 1.368, "step": 2451 }, { "epoch": 0.34713668861046226, "grad_norm": 14.982937072560656, "learning_rate": 4.734576830380113e-06, "loss": 1.3213, "step": 2452 }, { "epoch": 0.34727826148509944, "grad_norm": 8.606679283189969, "learning_rate": 4.7343197463268895e-06, "loss": 1.2988, "step": 2453 }, { "epoch": 0.34741983435973667, "grad_norm": 7.014946919659801, "learning_rate": 4.734062544817143e-06, "loss": 1.2639, "step": 2454 }, { "epoch": 0.3475614072343739, "grad_norm": 9.345754351022096, "learning_rate": 4.733805225864393e-06, "loss": 1.4731, "step": 2455 }, { "epoch": 0.34770298010901113, "grad_norm": 14.54838539419839, "learning_rate": 4.733547789482169e-06, "loss": 1.3937, "step": 2456 }, { "epoch": 0.34784455298364836, "grad_norm": 11.69058399004602, "learning_rate": 4.733290235684002e-06, "loss": 1.2455, "step": 2457 }, { "epoch": 0.34798612585828553, "grad_norm": 9.08465393918911, "learning_rate": 4.733032564483434e-06, "loss": 1.4812, "step": 2458 }, { "epoch": 0.34812769873292276, "grad_norm": 8.110299294778146, "learning_rate": 4.732774775894009e-06, "loss": 1.2938, "step": 2459 }, { "epoch": 0.34826927160756, "grad_norm": 10.381884760917202, "learning_rate": 4.732516869929278e-06, "loss": 1.5566, "step": 2460 }, { "epoch": 0.3484108444821972, "grad_norm": 8.859596316955647, "learning_rate": 4.732258846602801e-06, "loss": 1.432, "step": 2461 }, { "epoch": 0.34855241735683445, "grad_norm": 9.5869617614461, "learning_rate": 4.73200070592814e-06, "loss": 1.233, "step": 2462 }, { "epoch": 0.34869399023147163, "grad_norm": 10.436229148749794, "learning_rate": 4.731742447918866e-06, "loss": 1.3385, "step": 2463 }, { "epoch": 0.34883556310610886, "grad_norm": 9.503508472075383, "learning_rate": 4.731484072588556e-06, "loss": 1.3426, "step": 2464 }, { "epoch": 0.3489771359807461, "grad_norm": 8.858808334500859, "learning_rate": 4.731225579950791e-06, "loss": 1.3543, "step": 2465 }, { "epoch": 0.3491187088553833, "grad_norm": 12.200260622336298, "learning_rate": 4.730966970019163e-06, "loss": 1.3303, "step": 2466 }, { "epoch": 0.34926028173002055, "grad_norm": 9.928332436675584, "learning_rate": 4.730708242807263e-06, "loss": 1.4258, "step": 2467 }, { "epoch": 0.3494018546046577, "grad_norm": 10.947767920289662, "learning_rate": 4.730449398328695e-06, "loss": 1.4295, "step": 2468 }, { "epoch": 0.34954342747929495, "grad_norm": 9.226009704412736, "learning_rate": 4.7301904365970656e-06, "loss": 1.4534, "step": 2469 }, { "epoch": 0.3496850003539322, "grad_norm": 9.048945109391118, "learning_rate": 4.7299313576259865e-06, "loss": 1.1861, "step": 2470 }, { "epoch": 0.3498265732285694, "grad_norm": 9.72319846444092, "learning_rate": 4.72967216142908e-06, "loss": 1.344, "step": 2471 }, { "epoch": 0.34996814610320665, "grad_norm": 10.12867048631322, "learning_rate": 4.729412848019969e-06, "loss": 1.4683, "step": 2472 }, { "epoch": 0.3501097189778438, "grad_norm": 9.249141343961933, "learning_rate": 4.729153417412288e-06, "loss": 1.2761, "step": 2473 }, { "epoch": 0.35025129185248105, "grad_norm": 10.481486393009149, "learning_rate": 4.7288938696196735e-06, "loss": 1.2911, "step": 2474 }, { "epoch": 0.3503928647271183, "grad_norm": 10.44193561894608, "learning_rate": 4.728634204655771e-06, "loss": 1.5429, "step": 2475 }, { "epoch": 0.3505344376017555, "grad_norm": 10.907640953228649, "learning_rate": 4.728374422534229e-06, "loss": 1.4166, "step": 2476 }, { "epoch": 0.35067601047639274, "grad_norm": 14.27181407964746, "learning_rate": 4.728114523268705e-06, "loss": 1.2956, "step": 2477 }, { "epoch": 0.3508175833510299, "grad_norm": 10.388570711518629, "learning_rate": 4.727854506872863e-06, "loss": 1.3249, "step": 2478 }, { "epoch": 0.35095915622566715, "grad_norm": 8.7308860184121, "learning_rate": 4.72759437336037e-06, "loss": 1.4873, "step": 2479 }, { "epoch": 0.3511007291003044, "grad_norm": 8.70348221817957, "learning_rate": 4.727334122744902e-06, "loss": 1.4942, "step": 2480 }, { "epoch": 0.3512423019749416, "grad_norm": 8.534638231562738, "learning_rate": 4.72707375504014e-06, "loss": 1.3337, "step": 2481 }, { "epoch": 0.35138387484957884, "grad_norm": 10.996827448324478, "learning_rate": 4.726813270259772e-06, "loss": 1.2276, "step": 2482 }, { "epoch": 0.35152544772421607, "grad_norm": 11.042791882648, "learning_rate": 4.7265526684174894e-06, "loss": 1.2565, "step": 2483 }, { "epoch": 0.35166702059885324, "grad_norm": 9.603142430655552, "learning_rate": 4.7262919495269946e-06, "loss": 1.229, "step": 2484 }, { "epoch": 0.35180859347349047, "grad_norm": 8.540522304884776, "learning_rate": 4.726031113601991e-06, "loss": 1.419, "step": 2485 }, { "epoch": 0.3519501663481277, "grad_norm": 9.607416514724568, "learning_rate": 4.725770160656191e-06, "loss": 1.2241, "step": 2486 }, { "epoch": 0.35209173922276493, "grad_norm": 10.378372930995697, "learning_rate": 4.725509090703314e-06, "loss": 1.4612, "step": 2487 }, { "epoch": 0.35223331209740216, "grad_norm": 9.008105548680692, "learning_rate": 4.725247903757084e-06, "loss": 1.1582, "step": 2488 }, { "epoch": 0.35237488497203934, "grad_norm": 8.306999509132439, "learning_rate": 4.7249865998312306e-06, "loss": 1.3716, "step": 2489 }, { "epoch": 0.35251645784667657, "grad_norm": 8.346260978825198, "learning_rate": 4.72472517893949e-06, "loss": 1.4011, "step": 2490 }, { "epoch": 0.3526580307213138, "grad_norm": 9.520598316143964, "learning_rate": 4.724463641095606e-06, "loss": 1.1861, "step": 2491 }, { "epoch": 0.35279960359595103, "grad_norm": 13.112219409831521, "learning_rate": 4.7242019863133275e-06, "loss": 1.3587, "step": 2492 }, { "epoch": 0.35294117647058826, "grad_norm": 7.442681310954779, "learning_rate": 4.723940214606408e-06, "loss": 1.3025, "step": 2493 }, { "epoch": 0.35308274934522543, "grad_norm": 10.590268083300435, "learning_rate": 4.723678325988611e-06, "loss": 1.4549, "step": 2494 }, { "epoch": 0.35322432221986266, "grad_norm": 9.431411092607188, "learning_rate": 4.723416320473702e-06, "loss": 1.2775, "step": 2495 }, { "epoch": 0.3533658950944999, "grad_norm": 10.66411687257201, "learning_rate": 4.723154198075454e-06, "loss": 1.4799, "step": 2496 }, { "epoch": 0.3535074679691371, "grad_norm": 11.284105610490844, "learning_rate": 4.7228919588076484e-06, "loss": 1.2211, "step": 2497 }, { "epoch": 0.35364904084377435, "grad_norm": 8.226551371075093, "learning_rate": 4.722629602684069e-06, "loss": 1.3003, "step": 2498 }, { "epoch": 0.35379061371841153, "grad_norm": 9.354155701175598, "learning_rate": 4.72236712971851e-06, "loss": 1.2904, "step": 2499 }, { "epoch": 0.35393218659304876, "grad_norm": 11.82630396421898, "learning_rate": 4.7221045399247666e-06, "loss": 1.5433, "step": 2500 }, { "epoch": 0.354073759467686, "grad_norm": 9.106249212449562, "learning_rate": 4.721841833316645e-06, "loss": 1.4807, "step": 2501 }, { "epoch": 0.3542153323423232, "grad_norm": 9.820050681424316, "learning_rate": 4.721579009907955e-06, "loss": 1.3702, "step": 2502 }, { "epoch": 0.35435690521696045, "grad_norm": 9.252923864196717, "learning_rate": 4.721316069712514e-06, "loss": 1.2999, "step": 2503 }, { "epoch": 0.3544984780915976, "grad_norm": 11.23537970070629, "learning_rate": 4.721053012744142e-06, "loss": 1.3154, "step": 2504 }, { "epoch": 0.35464005096623485, "grad_norm": 9.254745091122723, "learning_rate": 4.7207898390166695e-06, "loss": 1.2765, "step": 2505 }, { "epoch": 0.3547816238408721, "grad_norm": 9.426008288940956, "learning_rate": 4.720526548543931e-06, "loss": 1.3789, "step": 2506 }, { "epoch": 0.3549231967155093, "grad_norm": 7.894592375521029, "learning_rate": 4.720263141339768e-06, "loss": 1.2328, "step": 2507 }, { "epoch": 0.35506476959014655, "grad_norm": 10.412398987425513, "learning_rate": 4.719999617418027e-06, "loss": 1.3842, "step": 2508 }, { "epoch": 0.3552063424647837, "grad_norm": 8.363266654004077, "learning_rate": 4.719735976792562e-06, "loss": 1.3067, "step": 2509 }, { "epoch": 0.35534791533942095, "grad_norm": 10.013587494463577, "learning_rate": 4.71947221947723e-06, "loss": 1.3353, "step": 2510 }, { "epoch": 0.3554894882140582, "grad_norm": 8.351958048689559, "learning_rate": 4.7192083454859e-06, "loss": 1.2974, "step": 2511 }, { "epoch": 0.3556310610886954, "grad_norm": 10.256202867807607, "learning_rate": 4.7189443548324415e-06, "loss": 1.4175, "step": 2512 }, { "epoch": 0.35577263396333264, "grad_norm": 10.632951341741382, "learning_rate": 4.7186802475307325e-06, "loss": 1.461, "step": 2513 }, { "epoch": 0.35591420683796987, "grad_norm": 9.058180628112021, "learning_rate": 4.7184160235946576e-06, "loss": 1.3578, "step": 2514 }, { "epoch": 0.35605577971260705, "grad_norm": 10.103644938943871, "learning_rate": 4.7181516830381065e-06, "loss": 1.2364, "step": 2515 }, { "epoch": 0.3561973525872443, "grad_norm": 10.070669707953847, "learning_rate": 4.717887225874976e-06, "loss": 1.2538, "step": 2516 }, { "epoch": 0.3563389254618815, "grad_norm": 10.586652420508395, "learning_rate": 4.717622652119166e-06, "loss": 1.2633, "step": 2517 }, { "epoch": 0.35648049833651874, "grad_norm": 10.587766865110952, "learning_rate": 4.717357961784587e-06, "loss": 1.3731, "step": 2518 }, { "epoch": 0.35662207121115597, "grad_norm": 10.341077384379568, "learning_rate": 4.717093154885154e-06, "loss": 1.5045, "step": 2519 }, { "epoch": 0.35676364408579314, "grad_norm": 8.080699635799112, "learning_rate": 4.716828231434787e-06, "loss": 1.3251, "step": 2520 }, { "epoch": 0.35690521696043037, "grad_norm": 8.70455027882777, "learning_rate": 4.716563191447413e-06, "loss": 1.3304, "step": 2521 }, { "epoch": 0.3570467898350676, "grad_norm": 8.930146720338772, "learning_rate": 4.7162980349369645e-06, "loss": 1.286, "step": 2522 }, { "epoch": 0.35718836270970483, "grad_norm": 9.43124323718901, "learning_rate": 4.716032761917381e-06, "loss": 1.2823, "step": 2523 }, { "epoch": 0.35732993558434206, "grad_norm": 10.635766173375877, "learning_rate": 4.715767372402608e-06, "loss": 1.3656, "step": 2524 }, { "epoch": 0.35747150845897924, "grad_norm": 8.165111886803722, "learning_rate": 4.715501866406595e-06, "loss": 1.2738, "step": 2525 }, { "epoch": 0.35761308133361647, "grad_norm": 10.392780556487878, "learning_rate": 4.715236243943302e-06, "loss": 1.3055, "step": 2526 }, { "epoch": 0.3577546542082537, "grad_norm": 9.055298778843307, "learning_rate": 4.714970505026691e-06, "loss": 1.2822, "step": 2527 }, { "epoch": 0.3578962270828909, "grad_norm": 9.908153651869883, "learning_rate": 4.714704649670732e-06, "loss": 1.473, "step": 2528 }, { "epoch": 0.35803779995752816, "grad_norm": 8.787560201468528, "learning_rate": 4.7144386778894e-06, "loss": 1.2644, "step": 2529 }, { "epoch": 0.35817937283216533, "grad_norm": 9.619176514844213, "learning_rate": 4.71417258969668e-06, "loss": 1.2005, "step": 2530 }, { "epoch": 0.35832094570680256, "grad_norm": 11.070465151448406, "learning_rate": 4.713906385106556e-06, "loss": 1.586, "step": 2531 }, { "epoch": 0.3584625185814398, "grad_norm": 8.011054031420331, "learning_rate": 4.7136400641330245e-06, "loss": 1.2618, "step": 2532 }, { "epoch": 0.358604091456077, "grad_norm": 11.086488394191859, "learning_rate": 4.713373626790086e-06, "loss": 1.2252, "step": 2533 }, { "epoch": 0.35874566433071425, "grad_norm": 7.931008876204439, "learning_rate": 4.713107073091746e-06, "loss": 1.4278, "step": 2534 }, { "epoch": 0.35888723720535143, "grad_norm": 10.805763571686525, "learning_rate": 4.712840403052018e-06, "loss": 1.5341, "step": 2535 }, { "epoch": 0.35902881007998866, "grad_norm": 7.047689657213527, "learning_rate": 4.712573616684919e-06, "loss": 1.2776, "step": 2536 }, { "epoch": 0.3591703829546259, "grad_norm": 11.720719886517344, "learning_rate": 4.712306714004475e-06, "loss": 1.4437, "step": 2537 }, { "epoch": 0.3593119558292631, "grad_norm": 8.554445808515027, "learning_rate": 4.712039695024717e-06, "loss": 1.4308, "step": 2538 }, { "epoch": 0.35945352870390035, "grad_norm": 8.896887211310553, "learning_rate": 4.7117725597596814e-06, "loss": 1.2809, "step": 2539 }, { "epoch": 0.3595951015785376, "grad_norm": 9.862351828115997, "learning_rate": 4.711505308223412e-06, "loss": 1.3493, "step": 2540 }, { "epoch": 0.35973667445317475, "grad_norm": 11.091800830086019, "learning_rate": 4.711237940429956e-06, "loss": 1.4282, "step": 2541 }, { "epoch": 0.359878247327812, "grad_norm": 9.58673415724864, "learning_rate": 4.710970456393371e-06, "loss": 1.4434, "step": 2542 }, { "epoch": 0.3600198202024492, "grad_norm": 9.502048472350499, "learning_rate": 4.710702856127718e-06, "loss": 1.4473, "step": 2543 }, { "epoch": 0.36016139307708644, "grad_norm": 9.141518886580661, "learning_rate": 4.710435139647064e-06, "loss": 1.1372, "step": 2544 }, { "epoch": 0.3603029659517237, "grad_norm": 10.122395121492021, "learning_rate": 4.710167306965483e-06, "loss": 1.3913, "step": 2545 }, { "epoch": 0.36044453882636085, "grad_norm": 9.19423091623836, "learning_rate": 4.709899358097055e-06, "loss": 1.2436, "step": 2546 }, { "epoch": 0.3605861117009981, "grad_norm": 9.615650342350623, "learning_rate": 4.709631293055865e-06, "loss": 1.2671, "step": 2547 }, { "epoch": 0.3607276845756353, "grad_norm": 9.863310256764278, "learning_rate": 4.7093631118560054e-06, "loss": 1.4019, "step": 2548 }, { "epoch": 0.36086925745027254, "grad_norm": 9.976731121935407, "learning_rate": 4.709094814511574e-06, "loss": 1.5184, "step": 2549 }, { "epoch": 0.36101083032490977, "grad_norm": 9.239052557653622, "learning_rate": 4.708826401036677e-06, "loss": 1.2005, "step": 2550 }, { "epoch": 0.36115240319954695, "grad_norm": 9.617007417333058, "learning_rate": 4.708557871445422e-06, "loss": 1.3951, "step": 2551 }, { "epoch": 0.3612939760741842, "grad_norm": 7.736659720156256, "learning_rate": 4.708289225751926e-06, "loss": 1.3479, "step": 2552 }, { "epoch": 0.3614355489488214, "grad_norm": 11.298348189623333, "learning_rate": 4.7080204639703125e-06, "loss": 1.5634, "step": 2553 }, { "epoch": 0.36157712182345864, "grad_norm": 10.599437166062389, "learning_rate": 4.707751586114709e-06, "loss": 1.5393, "step": 2554 }, { "epoch": 0.36171869469809587, "grad_norm": 10.190222627942495, "learning_rate": 4.7074825921992516e-06, "loss": 1.3217, "step": 2555 }, { "epoch": 0.36186026757273304, "grad_norm": 8.11949041336671, "learning_rate": 4.70721348223808e-06, "loss": 1.3592, "step": 2556 }, { "epoch": 0.36200184044737027, "grad_norm": 9.79908953738656, "learning_rate": 4.706944256245342e-06, "loss": 1.3233, "step": 2557 }, { "epoch": 0.3621434133220075, "grad_norm": 8.344955985820024, "learning_rate": 4.706674914235189e-06, "loss": 1.2246, "step": 2558 }, { "epoch": 0.36228498619664473, "grad_norm": 9.858591861726184, "learning_rate": 4.706405456221782e-06, "loss": 1.4378, "step": 2559 }, { "epoch": 0.36242655907128196, "grad_norm": 13.634505849346333, "learning_rate": 4.706135882219285e-06, "loss": 1.3204, "step": 2560 }, { "epoch": 0.36256813194591914, "grad_norm": 11.384796283894643, "learning_rate": 4.705866192241869e-06, "loss": 1.2606, "step": 2561 }, { "epoch": 0.36270970482055637, "grad_norm": 8.436329852308592, "learning_rate": 4.705596386303713e-06, "loss": 1.2888, "step": 2562 }, { "epoch": 0.3628512776951936, "grad_norm": 11.52850590337958, "learning_rate": 4.705326464418999e-06, "loss": 1.3721, "step": 2563 }, { "epoch": 0.3629928505698308, "grad_norm": 9.831755775590613, "learning_rate": 4.705056426601917e-06, "loss": 1.5144, "step": 2564 }, { "epoch": 0.36313442344446806, "grad_norm": 10.01247505251554, "learning_rate": 4.704786272866663e-06, "loss": 1.338, "step": 2565 }, { "epoch": 0.36327599631910523, "grad_norm": 10.284787946192402, "learning_rate": 4.704516003227439e-06, "loss": 1.3269, "step": 2566 }, { "epoch": 0.36341756919374246, "grad_norm": 8.643244368741025, "learning_rate": 4.704245617698452e-06, "loss": 1.4173, "step": 2567 }, { "epoch": 0.3635591420683797, "grad_norm": 10.542767574034684, "learning_rate": 4.703975116293916e-06, "loss": 1.5485, "step": 2568 }, { "epoch": 0.3637007149430169, "grad_norm": 9.856377534398018, "learning_rate": 4.703704499028052e-06, "loss": 1.3953, "step": 2569 }, { "epoch": 0.36384228781765415, "grad_norm": 10.847304231662275, "learning_rate": 4.703433765915086e-06, "loss": 1.4219, "step": 2570 }, { "epoch": 0.3639838606922914, "grad_norm": 9.12910264684973, "learning_rate": 4.7031629169692495e-06, "loss": 1.3368, "step": 2571 }, { "epoch": 0.36412543356692856, "grad_norm": 8.077602701623388, "learning_rate": 4.702891952204781e-06, "loss": 1.3273, "step": 2572 }, { "epoch": 0.3642670064415658, "grad_norm": 9.766174984512872, "learning_rate": 4.702620871635926e-06, "loss": 1.3585, "step": 2573 }, { "epoch": 0.364408579316203, "grad_norm": 11.398080737862665, "learning_rate": 4.702349675276933e-06, "loss": 1.3, "step": 2574 }, { "epoch": 0.36455015219084025, "grad_norm": 8.936586133211529, "learning_rate": 4.702078363142061e-06, "loss": 1.3563, "step": 2575 }, { "epoch": 0.3646917250654775, "grad_norm": 11.762919364369385, "learning_rate": 4.70180693524557e-06, "loss": 1.427, "step": 2576 }, { "epoch": 0.36483329794011465, "grad_norm": 9.284959867243806, "learning_rate": 4.7015353916017305e-06, "loss": 1.3218, "step": 2577 }, { "epoch": 0.3649748708147519, "grad_norm": 7.756534897630775, "learning_rate": 4.701263732224817e-06, "loss": 1.343, "step": 2578 }, { "epoch": 0.3651164436893891, "grad_norm": 10.33163803304204, "learning_rate": 4.700991957129111e-06, "loss": 1.3813, "step": 2579 }, { "epoch": 0.36525801656402634, "grad_norm": 9.570955016697164, "learning_rate": 4.700720066328899e-06, "loss": 1.3358, "step": 2580 }, { "epoch": 0.3653995894386636, "grad_norm": 10.654731709446887, "learning_rate": 4.7004480598384736e-06, "loss": 1.3442, "step": 2581 }, { "epoch": 0.36554116231330075, "grad_norm": 11.515159235195753, "learning_rate": 4.700175937672134e-06, "loss": 1.3133, "step": 2582 }, { "epoch": 0.365682735187938, "grad_norm": 9.084550641818753, "learning_rate": 4.699903699844186e-06, "loss": 1.2631, "step": 2583 }, { "epoch": 0.3658243080625752, "grad_norm": 9.161513342302756, "learning_rate": 4.699631346368941e-06, "loss": 1.3486, "step": 2584 }, { "epoch": 0.36596588093721244, "grad_norm": 9.07806953242521, "learning_rate": 4.699358877260717e-06, "loss": 1.222, "step": 2585 }, { "epoch": 0.36610745381184967, "grad_norm": 8.25347549751533, "learning_rate": 4.699086292533836e-06, "loss": 1.3341, "step": 2586 }, { "epoch": 0.36624902668648684, "grad_norm": 9.102691973238905, "learning_rate": 4.698813592202628e-06, "loss": 1.31, "step": 2587 }, { "epoch": 0.3663905995611241, "grad_norm": 9.77130381760277, "learning_rate": 4.69854077628143e-06, "loss": 1.5145, "step": 2588 }, { "epoch": 0.3665321724357613, "grad_norm": 8.266146493768744, "learning_rate": 4.698267844784582e-06, "loss": 1.388, "step": 2589 }, { "epoch": 0.36667374531039854, "grad_norm": 10.632951341741382, "learning_rate": 4.697994797726433e-06, "loss": 1.1958, "step": 2590 }, { "epoch": 0.36681531818503577, "grad_norm": 8.01537847604577, "learning_rate": 4.6977216351213355e-06, "loss": 1.2619, "step": 2591 }, { "epoch": 0.36695689105967294, "grad_norm": 8.58004366848021, "learning_rate": 4.697448356983651e-06, "loss": 1.3429, "step": 2592 }, { "epoch": 0.36709846393431017, "grad_norm": 11.742743666136708, "learning_rate": 4.697174963327744e-06, "loss": 1.4673, "step": 2593 }, { "epoch": 0.3672400368089474, "grad_norm": 8.234105838668965, "learning_rate": 4.696901454167989e-06, "loss": 1.3037, "step": 2594 }, { "epoch": 0.36738160968358463, "grad_norm": 9.222872571981243, "learning_rate": 4.696627829518761e-06, "loss": 1.327, "step": 2595 }, { "epoch": 0.36752318255822186, "grad_norm": 9.925308522538637, "learning_rate": 4.696354089394447e-06, "loss": 1.4799, "step": 2596 }, { "epoch": 0.3676647554328591, "grad_norm": 8.405925985565462, "learning_rate": 4.696080233809436e-06, "loss": 1.3716, "step": 2597 }, { "epoch": 0.36780632830749627, "grad_norm": 9.89121662799051, "learning_rate": 4.695806262778124e-06, "loss": 1.3769, "step": 2598 }, { "epoch": 0.3679479011821335, "grad_norm": 9.528220173531517, "learning_rate": 4.695532176314914e-06, "loss": 1.3324, "step": 2599 }, { "epoch": 0.3680894740567707, "grad_norm": 9.626974324446632, "learning_rate": 4.695257974434215e-06, "loss": 1.2736, "step": 2600 }, { "epoch": 0.36823104693140796, "grad_norm": 10.581946895427905, "learning_rate": 4.694983657150442e-06, "loss": 1.1887, "step": 2601 }, { "epoch": 0.3683726198060452, "grad_norm": 9.93566378537222, "learning_rate": 4.6947092244780134e-06, "loss": 1.4897, "step": 2602 }, { "epoch": 0.36851419268068236, "grad_norm": 9.94136787811848, "learning_rate": 4.694434676431358e-06, "loss": 1.3721, "step": 2603 }, { "epoch": 0.3686557655553196, "grad_norm": 8.673867046678605, "learning_rate": 4.694160013024907e-06, "loss": 1.198, "step": 2604 }, { "epoch": 0.3687973384299568, "grad_norm": 9.563005022662937, "learning_rate": 4.693885234273101e-06, "loss": 1.2854, "step": 2605 }, { "epoch": 0.36893891130459405, "grad_norm": 12.187924421696435, "learning_rate": 4.693610340190384e-06, "loss": 1.5656, "step": 2606 }, { "epoch": 0.3690804841792313, "grad_norm": 10.934364436150762, "learning_rate": 4.693335330791207e-06, "loss": 1.3658, "step": 2607 }, { "epoch": 0.36922205705386846, "grad_norm": 8.821289873329006, "learning_rate": 4.693060206090028e-06, "loss": 1.5378, "step": 2608 }, { "epoch": 0.3693636299285057, "grad_norm": 7.801966287289253, "learning_rate": 4.692784966101308e-06, "loss": 1.2421, "step": 2609 }, { "epoch": 0.3695052028031429, "grad_norm": 11.45595774675814, "learning_rate": 4.6925096108395175e-06, "loss": 1.2819, "step": 2610 }, { "epoch": 0.36964677567778015, "grad_norm": 11.173782889973548, "learning_rate": 4.692234140319131e-06, "loss": 1.5621, "step": 2611 }, { "epoch": 0.3697883485524174, "grad_norm": 8.675178846994669, "learning_rate": 4.691958554554631e-06, "loss": 1.3765, "step": 2612 }, { "epoch": 0.36992992142705455, "grad_norm": 9.859656280035791, "learning_rate": 4.6916828535605044e-06, "loss": 1.2981, "step": 2613 }, { "epoch": 0.3700714943016918, "grad_norm": 11.949955538742799, "learning_rate": 4.691407037351244e-06, "loss": 1.3316, "step": 2614 }, { "epoch": 0.370213067176329, "grad_norm": 8.437451171733715, "learning_rate": 4.69113110594135e-06, "loss": 1.4813, "step": 2615 }, { "epoch": 0.37035464005096624, "grad_norm": 11.145629614753819, "learning_rate": 4.690855059345327e-06, "loss": 1.4193, "step": 2616 }, { "epoch": 0.3704962129256035, "grad_norm": 10.327466796585226, "learning_rate": 4.690578897577687e-06, "loss": 1.233, "step": 2617 }, { "epoch": 0.37063778580024065, "grad_norm": 12.800371188503737, "learning_rate": 4.690302620652949e-06, "loss": 1.4209, "step": 2618 }, { "epoch": 0.3707793586748779, "grad_norm": 8.79636935852523, "learning_rate": 4.690026228585634e-06, "loss": 1.2573, "step": 2619 }, { "epoch": 0.3709209315495151, "grad_norm": 13.274270914462567, "learning_rate": 4.689749721390273e-06, "loss": 1.3474, "step": 2620 }, { "epoch": 0.37106250442415234, "grad_norm": 12.466305906682395, "learning_rate": 4.689473099081403e-06, "loss": 1.5922, "step": 2621 }, { "epoch": 0.37120407729878957, "grad_norm": 10.053928302121475, "learning_rate": 4.689196361673565e-06, "loss": 1.3351, "step": 2622 }, { "epoch": 0.37134565017342674, "grad_norm": 11.146311374763563, "learning_rate": 4.688919509181305e-06, "loss": 1.4052, "step": 2623 }, { "epoch": 0.371487223048064, "grad_norm": 10.038712147339838, "learning_rate": 4.68864254161918e-06, "loss": 1.2739, "step": 2624 }, { "epoch": 0.3716287959227012, "grad_norm": 11.223543041125476, "learning_rate": 4.6883654590017475e-06, "loss": 1.4639, "step": 2625 }, { "epoch": 0.37177036879733844, "grad_norm": 11.361158732029713, "learning_rate": 4.688088261343575e-06, "loss": 1.2961, "step": 2626 }, { "epoch": 0.37191194167197567, "grad_norm": 7.985938350264703, "learning_rate": 4.687810948659234e-06, "loss": 1.3817, "step": 2627 }, { "epoch": 0.3720535145466129, "grad_norm": 9.298589570755938, "learning_rate": 4.687533520963302e-06, "loss": 1.3278, "step": 2628 }, { "epoch": 0.37219508742125007, "grad_norm": 12.996455369555637, "learning_rate": 4.6872559782703655e-06, "loss": 1.4197, "step": 2629 }, { "epoch": 0.3723366602958873, "grad_norm": 9.333826824039127, "learning_rate": 4.686978320595012e-06, "loss": 1.1977, "step": 2630 }, { "epoch": 0.37247823317052453, "grad_norm": 9.01050420876609, "learning_rate": 4.686700547951839e-06, "loss": 1.3206, "step": 2631 }, { "epoch": 0.37261980604516176, "grad_norm": 8.41033632020997, "learning_rate": 4.686422660355448e-06, "loss": 1.4791, "step": 2632 }, { "epoch": 0.372761378919799, "grad_norm": 8.891115413072436, "learning_rate": 4.686144657820449e-06, "loss": 1.3807, "step": 2633 }, { "epoch": 0.37290295179443617, "grad_norm": 9.967835387363213, "learning_rate": 4.685866540361456e-06, "loss": 1.4185, "step": 2634 }, { "epoch": 0.3730445246690734, "grad_norm": 8.444053083129162, "learning_rate": 4.685588307993087e-06, "loss": 1.1858, "step": 2635 }, { "epoch": 0.3731860975437106, "grad_norm": 9.64394189293854, "learning_rate": 4.6853099607299725e-06, "loss": 1.2906, "step": 2636 }, { "epoch": 0.37332767041834786, "grad_norm": 9.894015389483874, "learning_rate": 4.685031498586741e-06, "loss": 1.1827, "step": 2637 }, { "epoch": 0.3734692432929851, "grad_norm": 7.9781833238632665, "learning_rate": 4.684752921578033e-06, "loss": 1.4153, "step": 2638 }, { "epoch": 0.37361081616762226, "grad_norm": 9.808859374514649, "learning_rate": 4.684474229718494e-06, "loss": 1.3973, "step": 2639 }, { "epoch": 0.3737523890422595, "grad_norm": 15.439946648698253, "learning_rate": 4.6841954230227725e-06, "loss": 1.5312, "step": 2640 }, { "epoch": 0.3738939619168967, "grad_norm": 10.989721524543365, "learning_rate": 4.683916501505527e-06, "loss": 1.3442, "step": 2641 }, { "epoch": 0.37403553479153395, "grad_norm": 9.180143771905295, "learning_rate": 4.6836374651814186e-06, "loss": 1.3311, "step": 2642 }, { "epoch": 0.3741771076661712, "grad_norm": 8.477762191889235, "learning_rate": 4.6833583140651175e-06, "loss": 1.1947, "step": 2643 }, { "epoch": 0.37431868054080836, "grad_norm": 11.395662760925411, "learning_rate": 4.6830790481712975e-06, "loss": 1.4976, "step": 2644 }, { "epoch": 0.3744602534154456, "grad_norm": 8.805416209030543, "learning_rate": 4.68279966751464e-06, "loss": 1.2381, "step": 2645 }, { "epoch": 0.3746018262900828, "grad_norm": 8.292539106569494, "learning_rate": 4.682520172109831e-06, "loss": 1.1332, "step": 2646 }, { "epoch": 0.37474339916472005, "grad_norm": 8.345654445285714, "learning_rate": 4.682240561971565e-06, "loss": 1.3987, "step": 2647 }, { "epoch": 0.3748849720393573, "grad_norm": 7.703319121539835, "learning_rate": 4.681960837114539e-06, "loss": 1.2385, "step": 2648 }, { "epoch": 0.37502654491399445, "grad_norm": 14.325787768581163, "learning_rate": 4.681680997553459e-06, "loss": 1.4275, "step": 2649 }, { "epoch": 0.3751681177886317, "grad_norm": 9.022912001067278, "learning_rate": 4.681401043303036e-06, "loss": 1.2391, "step": 2650 }, { "epoch": 0.3753096906632689, "grad_norm": 9.914498442075129, "learning_rate": 4.681120974377985e-06, "loss": 1.3294, "step": 2651 }, { "epoch": 0.37545126353790614, "grad_norm": 9.79979335022077, "learning_rate": 4.680840790793032e-06, "loss": 1.2942, "step": 2652 }, { "epoch": 0.3755928364125434, "grad_norm": 11.574444184439741, "learning_rate": 4.680560492562904e-06, "loss": 1.3636, "step": 2653 }, { "epoch": 0.37573440928718055, "grad_norm": 10.376484224713236, "learning_rate": 4.680280079702339e-06, "loss": 1.419, "step": 2654 }, { "epoch": 0.3758759821618178, "grad_norm": 12.461187301571378, "learning_rate": 4.679999552226073e-06, "loss": 1.3748, "step": 2655 }, { "epoch": 0.376017555036455, "grad_norm": 10.263335322443575, "learning_rate": 4.679718910148858e-06, "loss": 1.441, "step": 2656 }, { "epoch": 0.37615912791109224, "grad_norm": 10.1665512662081, "learning_rate": 4.679438153485444e-06, "loss": 1.2852, "step": 2657 }, { "epoch": 0.37630070078572947, "grad_norm": 12.658632708774833, "learning_rate": 4.679157282250592e-06, "loss": 1.2924, "step": 2658 }, { "epoch": 0.3764422736603667, "grad_norm": 15.837187662635163, "learning_rate": 4.678876296459066e-06, "loss": 1.5171, "step": 2659 }, { "epoch": 0.3765838465350039, "grad_norm": 11.478213695886547, "learning_rate": 4.678595196125638e-06, "loss": 1.5487, "step": 2660 }, { "epoch": 0.3767254194096411, "grad_norm": 10.227289114274276, "learning_rate": 4.678313981265086e-06, "loss": 1.3614, "step": 2661 }, { "epoch": 0.37686699228427833, "grad_norm": 10.87434964044007, "learning_rate": 4.678032651892191e-06, "loss": 1.4133, "step": 2662 }, { "epoch": 0.37700856515891557, "grad_norm": 11.892068582295328, "learning_rate": 4.677751208021744e-06, "loss": 1.2706, "step": 2663 }, { "epoch": 0.3771501380335528, "grad_norm": 9.977311909010739, "learning_rate": 4.677469649668539e-06, "loss": 1.4324, "step": 2664 }, { "epoch": 0.37729171090818997, "grad_norm": 11.637420254115533, "learning_rate": 4.677187976847379e-06, "loss": 1.2458, "step": 2665 }, { "epoch": 0.3774332837828272, "grad_norm": 9.215099167851632, "learning_rate": 4.67690618957307e-06, "loss": 1.3439, "step": 2666 }, { "epoch": 0.37757485665746443, "grad_norm": 10.28201652846374, "learning_rate": 4.676624287860425e-06, "loss": 1.5099, "step": 2667 }, { "epoch": 0.37771642953210166, "grad_norm": 10.335607175962174, "learning_rate": 4.676342271724266e-06, "loss": 1.4562, "step": 2668 }, { "epoch": 0.3778580024067389, "grad_norm": 8.475984643321867, "learning_rate": 4.676060141179415e-06, "loss": 1.3604, "step": 2669 }, { "epoch": 0.37799957528137607, "grad_norm": 12.355138387734803, "learning_rate": 4.675777896240706e-06, "loss": 1.4093, "step": 2670 }, { "epoch": 0.3781411481560133, "grad_norm": 10.95136572276936, "learning_rate": 4.675495536922975e-06, "loss": 1.4342, "step": 2671 }, { "epoch": 0.3782827210306505, "grad_norm": 10.148173245813789, "learning_rate": 4.675213063241065e-06, "loss": 1.1531, "step": 2672 }, { "epoch": 0.37842429390528776, "grad_norm": 10.22154715515793, "learning_rate": 4.674930475209827e-06, "loss": 1.243, "step": 2673 }, { "epoch": 0.378565866779925, "grad_norm": 8.33009346606149, "learning_rate": 4.674647772844115e-06, "loss": 1.236, "step": 2674 }, { "epoch": 0.37870743965456216, "grad_norm": 9.257224483567423, "learning_rate": 4.674364956158791e-06, "loss": 1.4091, "step": 2675 }, { "epoch": 0.3788490125291994, "grad_norm": 10.818658210633869, "learning_rate": 4.674082025168723e-06, "loss": 1.3547, "step": 2676 }, { "epoch": 0.3789905854038366, "grad_norm": 11.154588078682158, "learning_rate": 4.673798979888784e-06, "loss": 1.3831, "step": 2677 }, { "epoch": 0.37913215827847385, "grad_norm": 10.073039157021416, "learning_rate": 4.673515820333853e-06, "loss": 1.5053, "step": 2678 }, { "epoch": 0.3792737311531111, "grad_norm": 11.065952602773786, "learning_rate": 4.673232546518817e-06, "loss": 1.418, "step": 2679 }, { "epoch": 0.37941530402774826, "grad_norm": 9.691340122204865, "learning_rate": 4.672949158458565e-06, "loss": 1.3683, "step": 2680 }, { "epoch": 0.3795568769023855, "grad_norm": 11.200709552087083, "learning_rate": 4.672665656167997e-06, "loss": 1.3431, "step": 2681 }, { "epoch": 0.3796984497770227, "grad_norm": 10.345372559673116, "learning_rate": 4.672382039662016e-06, "loss": 1.3055, "step": 2682 }, { "epoch": 0.37984002265165995, "grad_norm": 11.661562284364363, "learning_rate": 4.672098308955529e-06, "loss": 1.3483, "step": 2683 }, { "epoch": 0.3799815955262972, "grad_norm": 12.983219173322889, "learning_rate": 4.671814464063455e-06, "loss": 1.5273, "step": 2684 }, { "epoch": 0.3801231684009344, "grad_norm": 10.982784671615228, "learning_rate": 4.671530505000714e-06, "loss": 1.2853, "step": 2685 }, { "epoch": 0.3802647412755716, "grad_norm": 7.978578019569369, "learning_rate": 4.671246431782234e-06, "loss": 1.387, "step": 2686 }, { "epoch": 0.3804063141502088, "grad_norm": 8.038322214457287, "learning_rate": 4.670962244422946e-06, "loss": 1.4877, "step": 2687 }, { "epoch": 0.38054788702484604, "grad_norm": 10.98707028743225, "learning_rate": 4.670677942937793e-06, "loss": 1.5384, "step": 2688 }, { "epoch": 0.3806894598994833, "grad_norm": 11.313081673689375, "learning_rate": 4.6703935273417195e-06, "loss": 1.3609, "step": 2689 }, { "epoch": 0.3808310327741205, "grad_norm": 10.412177703009249, "learning_rate": 4.670108997649676e-06, "loss": 1.3185, "step": 2690 }, { "epoch": 0.3809726056487577, "grad_norm": 10.054040610867874, "learning_rate": 4.66982435387662e-06, "loss": 1.4195, "step": 2691 }, { "epoch": 0.3811141785233949, "grad_norm": 9.306332587701245, "learning_rate": 4.669539596037517e-06, "loss": 1.4194, "step": 2692 }, { "epoch": 0.38125575139803214, "grad_norm": 10.6979344165708, "learning_rate": 4.669254724147334e-06, "loss": 1.4598, "step": 2693 }, { "epoch": 0.38139732427266937, "grad_norm": 10.702752901834854, "learning_rate": 4.6689697382210475e-06, "loss": 1.3117, "step": 2694 }, { "epoch": 0.3815388971473066, "grad_norm": 9.911225907364658, "learning_rate": 4.668684638273639e-06, "loss": 1.2393, "step": 2695 }, { "epoch": 0.3816804700219438, "grad_norm": 9.745084648410577, "learning_rate": 4.668399424320097e-06, "loss": 1.3565, "step": 2696 }, { "epoch": 0.381822042896581, "grad_norm": 9.837591079232126, "learning_rate": 4.668114096375413e-06, "loss": 1.5382, "step": 2697 }, { "epoch": 0.38196361577121823, "grad_norm": 8.864272820801578, "learning_rate": 4.6678286544545894e-06, "loss": 1.5159, "step": 2698 }, { "epoch": 0.38210518864585546, "grad_norm": 9.222782403970946, "learning_rate": 4.667543098572627e-06, "loss": 1.4108, "step": 2699 }, { "epoch": 0.3822467615204927, "grad_norm": 8.373216581482302, "learning_rate": 4.667257428744542e-06, "loss": 1.368, "step": 2700 }, { "epoch": 0.38238833439512987, "grad_norm": 11.697078268296252, "learning_rate": 4.6669716449853505e-06, "loss": 1.406, "step": 2701 }, { "epoch": 0.3825299072697671, "grad_norm": 9.665612086507297, "learning_rate": 4.666685747310075e-06, "loss": 1.3514, "step": 2702 }, { "epoch": 0.38267148014440433, "grad_norm": 8.82546022420051, "learning_rate": 4.666399735733745e-06, "loss": 1.2088, "step": 2703 }, { "epoch": 0.38281305301904156, "grad_norm": 8.3727855886308, "learning_rate": 4.666113610271395e-06, "loss": 1.2886, "step": 2704 }, { "epoch": 0.3829546258936788, "grad_norm": 9.699658842084707, "learning_rate": 4.66582737093807e-06, "loss": 1.3807, "step": 2705 }, { "epoch": 0.38309619876831597, "grad_norm": 9.08050222343912, "learning_rate": 4.665541017748813e-06, "loss": 1.283, "step": 2706 }, { "epoch": 0.3832377716429532, "grad_norm": 10.709286975081415, "learning_rate": 4.665254550718681e-06, "loss": 1.3762, "step": 2707 }, { "epoch": 0.3833793445175904, "grad_norm": 8.933920830534039, "learning_rate": 4.6649679698627306e-06, "loss": 1.271, "step": 2708 }, { "epoch": 0.38352091739222766, "grad_norm": 8.920955256472721, "learning_rate": 4.664681275196028e-06, "loss": 1.4052, "step": 2709 }, { "epoch": 0.3836624902668649, "grad_norm": 11.627357161896787, "learning_rate": 4.664394466733646e-06, "loss": 1.4375, "step": 2710 }, { "epoch": 0.38380406314150206, "grad_norm": 8.045155402579875, "learning_rate": 4.66410754449066e-06, "loss": 1.3756, "step": 2711 }, { "epoch": 0.3839456360161393, "grad_norm": 10.57837382577547, "learning_rate": 4.6638205084821544e-06, "loss": 1.3694, "step": 2712 }, { "epoch": 0.3840872088907765, "grad_norm": 7.904616598040143, "learning_rate": 4.6635333587232175e-06, "loss": 1.3144, "step": 2713 }, { "epoch": 0.38422878176541375, "grad_norm": 13.495878932805574, "learning_rate": 4.663246095228946e-06, "loss": 1.3702, "step": 2714 }, { "epoch": 0.384370354640051, "grad_norm": 12.100674167484106, "learning_rate": 4.66295871801444e-06, "loss": 1.4133, "step": 2715 }, { "epoch": 0.3845119275146882, "grad_norm": 8.980030794154597, "learning_rate": 4.662671227094806e-06, "loss": 1.3135, "step": 2716 }, { "epoch": 0.3846535003893254, "grad_norm": 8.29613105039171, "learning_rate": 4.662383622485159e-06, "loss": 1.3489, "step": 2717 }, { "epoch": 0.3847950732639626, "grad_norm": 11.41272558049922, "learning_rate": 4.662095904200617e-06, "loss": 1.2931, "step": 2718 }, { "epoch": 0.38493664613859985, "grad_norm": 11.209020959874518, "learning_rate": 4.661808072256306e-06, "loss": 1.3658, "step": 2719 }, { "epoch": 0.3850782190132371, "grad_norm": 8.623691625456964, "learning_rate": 4.661520126667356e-06, "loss": 1.3799, "step": 2720 }, { "epoch": 0.3852197918878743, "grad_norm": 10.883893465249034, "learning_rate": 4.6612320674489045e-06, "loss": 1.3583, "step": 2721 }, { "epoch": 0.3853613647625115, "grad_norm": 10.05328705585598, "learning_rate": 4.660943894616095e-06, "loss": 1.4679, "step": 2722 }, { "epoch": 0.3855029376371487, "grad_norm": 11.036612873996656, "learning_rate": 4.660655608184076e-06, "loss": 1.4387, "step": 2723 }, { "epoch": 0.38564451051178594, "grad_norm": 8.735430112925894, "learning_rate": 4.660367208168004e-06, "loss": 1.2773, "step": 2724 }, { "epoch": 0.3857860833864232, "grad_norm": 8.406797710287742, "learning_rate": 4.660078694583037e-06, "loss": 1.3671, "step": 2725 }, { "epoch": 0.3859276562610604, "grad_norm": 9.412165913725703, "learning_rate": 4.6597900674443445e-06, "loss": 1.4734, "step": 2726 }, { "epoch": 0.3860692291356976, "grad_norm": 10.023768977737015, "learning_rate": 4.659501326767098e-06, "loss": 1.5199, "step": 2727 }, { "epoch": 0.3862108020103348, "grad_norm": 10.510573376841101, "learning_rate": 4.6592124725664776e-06, "loss": 1.3955, "step": 2728 }, { "epoch": 0.38635237488497204, "grad_norm": 9.624021009718746, "learning_rate": 4.6589235048576676e-06, "loss": 1.2931, "step": 2729 }, { "epoch": 0.38649394775960927, "grad_norm": 9.93348276764094, "learning_rate": 4.658634423655858e-06, "loss": 1.2739, "step": 2730 }, { "epoch": 0.3866355206342465, "grad_norm": 10.070906450676684, "learning_rate": 4.658345228976246e-06, "loss": 1.3344, "step": 2731 }, { "epoch": 0.3867770935088837, "grad_norm": 10.792909724888364, "learning_rate": 4.658055920834036e-06, "loss": 1.3413, "step": 2732 }, { "epoch": 0.3869186663835209, "grad_norm": 8.08021055117189, "learning_rate": 4.6577664992444345e-06, "loss": 1.2123, "step": 2733 }, { "epoch": 0.38706023925815813, "grad_norm": 8.778612711453633, "learning_rate": 4.657476964222657e-06, "loss": 1.4105, "step": 2734 }, { "epoch": 0.38720181213279536, "grad_norm": 8.768424119284179, "learning_rate": 4.657187315783925e-06, "loss": 1.3075, "step": 2735 }, { "epoch": 0.3873433850074326, "grad_norm": 7.45720926974518, "learning_rate": 4.656897553943463e-06, "loss": 1.2876, "step": 2736 }, { "epoch": 0.38748495788206977, "grad_norm": 10.199335069113612, "learning_rate": 4.656607678716506e-06, "loss": 1.3631, "step": 2737 }, { "epoch": 0.387626530756707, "grad_norm": 8.402863568445126, "learning_rate": 4.656317690118291e-06, "loss": 1.3287, "step": 2738 }, { "epoch": 0.38776810363134423, "grad_norm": 8.454443076102102, "learning_rate": 4.6560275881640615e-06, "loss": 1.3103, "step": 2739 }, { "epoch": 0.38790967650598146, "grad_norm": 9.652602608705179, "learning_rate": 4.655737372869071e-06, "loss": 1.3687, "step": 2740 }, { "epoch": 0.3880512493806187, "grad_norm": 8.847429029229119, "learning_rate": 4.655447044248573e-06, "loss": 1.2548, "step": 2741 }, { "epoch": 0.3881928222552559, "grad_norm": 7.5432575156727815, "learning_rate": 4.655156602317832e-06, "loss": 1.2472, "step": 2742 }, { "epoch": 0.3883343951298931, "grad_norm": 8.129569309364532, "learning_rate": 4.654866047092115e-06, "loss": 1.2768, "step": 2743 }, { "epoch": 0.3884759680045303, "grad_norm": 9.115501999834368, "learning_rate": 4.654575378586696e-06, "loss": 1.3474, "step": 2744 }, { "epoch": 0.38861754087916756, "grad_norm": 7.998165158618632, "learning_rate": 4.6542845968168575e-06, "loss": 1.1801, "step": 2745 }, { "epoch": 0.3887591137538048, "grad_norm": 9.96245431159634, "learning_rate": 4.653993701797883e-06, "loss": 1.3783, "step": 2746 }, { "epoch": 0.388900686628442, "grad_norm": 8.852742398467992, "learning_rate": 4.653702693545066e-06, "loss": 1.2266, "step": 2747 }, { "epoch": 0.3890422595030792, "grad_norm": 10.187163364956724, "learning_rate": 4.653411572073704e-06, "loss": 1.3879, "step": 2748 }, { "epoch": 0.3891838323777164, "grad_norm": 10.219071409437914, "learning_rate": 4.6531203373991015e-06, "loss": 1.4126, "step": 2749 }, { "epoch": 0.38932540525235365, "grad_norm": 9.434443300902773, "learning_rate": 4.652828989536567e-06, "loss": 1.2364, "step": 2750 }, { "epoch": 0.3894669781269909, "grad_norm": 8.683922339711959, "learning_rate": 4.6525375285014195e-06, "loss": 1.4456, "step": 2751 }, { "epoch": 0.3896085510016281, "grad_norm": 9.651316150783934, "learning_rate": 4.652245954308979e-06, "loss": 1.4638, "step": 2752 }, { "epoch": 0.3897501238762653, "grad_norm": 10.917035822171608, "learning_rate": 4.651954266974573e-06, "loss": 1.3792, "step": 2753 }, { "epoch": 0.3898916967509025, "grad_norm": 9.697098240939585, "learning_rate": 4.651662466513536e-06, "loss": 1.2616, "step": 2754 }, { "epoch": 0.39003326962553975, "grad_norm": 8.890800487750711, "learning_rate": 4.651370552941207e-06, "loss": 1.3711, "step": 2755 }, { "epoch": 0.390174842500177, "grad_norm": 10.705604611266635, "learning_rate": 4.651078526272932e-06, "loss": 1.2014, "step": 2756 }, { "epoch": 0.3903164153748142, "grad_norm": 11.739655831538196, "learning_rate": 4.6507863865240635e-06, "loss": 1.3189, "step": 2757 }, { "epoch": 0.3904579882494514, "grad_norm": 10.28018730260118, "learning_rate": 4.650494133709958e-06, "loss": 1.4561, "step": 2758 }, { "epoch": 0.3905995611240886, "grad_norm": 8.92066319348082, "learning_rate": 4.650201767845979e-06, "loss": 1.2872, "step": 2759 }, { "epoch": 0.39074113399872584, "grad_norm": 8.437134685378606, "learning_rate": 4.649909288947497e-06, "loss": 1.3115, "step": 2760 }, { "epoch": 0.3908827068733631, "grad_norm": 11.024685989717575, "learning_rate": 4.649616697029886e-06, "loss": 1.1658, "step": 2761 }, { "epoch": 0.3910242797480003, "grad_norm": 9.286247783049062, "learning_rate": 4.649323992108529e-06, "loss": 1.3053, "step": 2762 }, { "epoch": 0.3911658526226375, "grad_norm": 9.096330309397095, "learning_rate": 4.649031174198812e-06, "loss": 1.3373, "step": 2763 }, { "epoch": 0.3913074254972747, "grad_norm": 9.109302134410548, "learning_rate": 4.648738243316128e-06, "loss": 1.5556, "step": 2764 }, { "epoch": 0.39144899837191194, "grad_norm": 8.309587699862083, "learning_rate": 4.648445199475877e-06, "loss": 1.2552, "step": 2765 }, { "epoch": 0.39159057124654917, "grad_norm": 9.707568835717588, "learning_rate": 4.648152042693464e-06, "loss": 1.2814, "step": 2766 }, { "epoch": 0.3917321441211864, "grad_norm": 10.405905411430599, "learning_rate": 4.6478587729843e-06, "loss": 1.3209, "step": 2767 }, { "epoch": 0.3918737169958236, "grad_norm": 13.19552931937763, "learning_rate": 4.647565390363802e-06, "loss": 1.3155, "step": 2768 }, { "epoch": 0.3920152898704608, "grad_norm": 10.406235336889068, "learning_rate": 4.6472718948473915e-06, "loss": 1.233, "step": 2769 }, { "epoch": 0.39215686274509803, "grad_norm": 11.194516501708828, "learning_rate": 4.6469782864504995e-06, "loss": 1.2991, "step": 2770 }, { "epoch": 0.39229843561973526, "grad_norm": 8.913558798624878, "learning_rate": 4.64668456518856e-06, "loss": 1.2707, "step": 2771 }, { "epoch": 0.3924400084943725, "grad_norm": 11.988411075787747, "learning_rate": 4.646390731077013e-06, "loss": 1.3388, "step": 2772 }, { "epoch": 0.3925815813690097, "grad_norm": 11.490905482749241, "learning_rate": 4.646096784131306e-06, "loss": 1.3681, "step": 2773 }, { "epoch": 0.3927231542436469, "grad_norm": 10.511080390431855, "learning_rate": 4.645802724366891e-06, "loss": 1.3291, "step": 2774 }, { "epoch": 0.39286472711828413, "grad_norm": 11.634510374959449, "learning_rate": 4.645508551799227e-06, "loss": 1.4062, "step": 2775 }, { "epoch": 0.39300629999292136, "grad_norm": 10.07523691357906, "learning_rate": 4.645214266443778e-06, "loss": 1.3857, "step": 2776 }, { "epoch": 0.3931478728675586, "grad_norm": 8.936610891226685, "learning_rate": 4.644919868316014e-06, "loss": 1.3691, "step": 2777 }, { "epoch": 0.3932894457421958, "grad_norm": 9.13612628009058, "learning_rate": 4.644625357431414e-06, "loss": 1.2615, "step": 2778 }, { "epoch": 0.393431018616833, "grad_norm": 11.423029182450906, "learning_rate": 4.6443307338054565e-06, "loss": 1.3913, "step": 2779 }, { "epoch": 0.3935725914914702, "grad_norm": 7.98584472506825, "learning_rate": 4.644035997453631e-06, "loss": 1.2388, "step": 2780 }, { "epoch": 0.39371416436610746, "grad_norm": 9.030338439344428, "learning_rate": 4.643741148391432e-06, "loss": 1.4152, "step": 2781 }, { "epoch": 0.3938557372407447, "grad_norm": 12.553341388048157, "learning_rate": 4.64344618663436e-06, "loss": 1.445, "step": 2782 }, { "epoch": 0.3939973101153819, "grad_norm": 8.373602451608157, "learning_rate": 4.643151112197919e-06, "loss": 1.2911, "step": 2783 }, { "epoch": 0.3941388829900191, "grad_norm": 8.977310401828671, "learning_rate": 4.642855925097622e-06, "loss": 1.3993, "step": 2784 }, { "epoch": 0.3942804558646563, "grad_norm": 8.865866245864009, "learning_rate": 4.642560625348988e-06, "loss": 1.2814, "step": 2785 }, { "epoch": 0.39442202873929355, "grad_norm": 9.939664784848443, "learning_rate": 4.642265212967539e-06, "loss": 1.2551, "step": 2786 }, { "epoch": 0.3945636016139308, "grad_norm": 10.9379509533152, "learning_rate": 4.6419696879688046e-06, "loss": 1.2219, "step": 2787 }, { "epoch": 0.394705174488568, "grad_norm": 7.322482346252645, "learning_rate": 4.641674050368321e-06, "loss": 1.1737, "step": 2788 }, { "epoch": 0.3948467473632052, "grad_norm": 10.018388244989497, "learning_rate": 4.641378300181629e-06, "loss": 1.412, "step": 2789 }, { "epoch": 0.3949883202378424, "grad_norm": 11.767286206182083, "learning_rate": 4.641082437424277e-06, "loss": 1.4521, "step": 2790 }, { "epoch": 0.39512989311247965, "grad_norm": 8.076085672529674, "learning_rate": 4.6407864621118184e-06, "loss": 1.4024, "step": 2791 }, { "epoch": 0.3952714659871169, "grad_norm": 8.468239631617429, "learning_rate": 4.640490374259811e-06, "loss": 1.383, "step": 2792 }, { "epoch": 0.3954130388617541, "grad_norm": 7.924437099165028, "learning_rate": 4.6401941738838204e-06, "loss": 1.319, "step": 2793 }, { "epoch": 0.3955546117363913, "grad_norm": 9.86252743097124, "learning_rate": 4.639897860999418e-06, "loss": 1.3689, "step": 2794 }, { "epoch": 0.3956961846110285, "grad_norm": 8.282860250286808, "learning_rate": 4.639601435622182e-06, "loss": 1.3725, "step": 2795 }, { "epoch": 0.39583775748566574, "grad_norm": 10.388787358060425, "learning_rate": 4.639304897767692e-06, "loss": 1.4397, "step": 2796 }, { "epoch": 0.395979330360303, "grad_norm": 10.028938383624613, "learning_rate": 4.63900824745154e-06, "loss": 1.3749, "step": 2797 }, { "epoch": 0.3961209032349402, "grad_norm": 7.044476762865275, "learning_rate": 4.638711484689319e-06, "loss": 1.1255, "step": 2798 }, { "epoch": 0.3962624761095774, "grad_norm": 8.991914720151247, "learning_rate": 4.638414609496628e-06, "loss": 1.2067, "step": 2799 }, { "epoch": 0.3964040489842146, "grad_norm": 10.556095506566336, "learning_rate": 4.638117621889078e-06, "loss": 1.3289, "step": 2800 }, { "epoch": 0.39654562185885184, "grad_norm": 8.471367575752785, "learning_rate": 4.637820521882278e-06, "loss": 1.246, "step": 2801 }, { "epoch": 0.39668719473348907, "grad_norm": 11.04779592773327, "learning_rate": 4.637523309491847e-06, "loss": 1.4815, "step": 2802 }, { "epoch": 0.3968287676081263, "grad_norm": 9.4329758467408, "learning_rate": 4.63722598473341e-06, "loss": 1.3778, "step": 2803 }, { "epoch": 0.39697034048276353, "grad_norm": 10.393342131814256, "learning_rate": 4.636928547622596e-06, "loss": 1.2944, "step": 2804 }, { "epoch": 0.3971119133574007, "grad_norm": 11.15508223547347, "learning_rate": 4.636630998175042e-06, "loss": 1.3217, "step": 2805 }, { "epoch": 0.39725348623203793, "grad_norm": 9.297082209481816, "learning_rate": 4.636333336406389e-06, "loss": 1.3225, "step": 2806 }, { "epoch": 0.39739505910667516, "grad_norm": 9.970988342211008, "learning_rate": 4.636035562332286e-06, "loss": 1.5343, "step": 2807 }, { "epoch": 0.3975366319813124, "grad_norm": 9.364636605987817, "learning_rate": 4.6357376759683856e-06, "loss": 1.3674, "step": 2808 }, { "epoch": 0.3976782048559496, "grad_norm": 9.198120207098746, "learning_rate": 4.635439677330349e-06, "loss": 1.3061, "step": 2809 }, { "epoch": 0.3978197777305868, "grad_norm": 10.595354796981825, "learning_rate": 4.635141566433839e-06, "loss": 1.2876, "step": 2810 }, { "epoch": 0.39796135060522403, "grad_norm": 8.575999512544305, "learning_rate": 4.6348433432945314e-06, "loss": 1.185, "step": 2811 }, { "epoch": 0.39810292347986126, "grad_norm": 9.850691609379794, "learning_rate": 4.6345450079281e-06, "loss": 1.4283, "step": 2812 }, { "epoch": 0.3982444963544985, "grad_norm": 9.972257278177693, "learning_rate": 4.634246560350229e-06, "loss": 1.3128, "step": 2813 }, { "epoch": 0.3983860692291357, "grad_norm": 11.871154162399751, "learning_rate": 4.633948000576607e-06, "loss": 1.3242, "step": 2814 }, { "epoch": 0.3985276421037729, "grad_norm": 11.338866809019997, "learning_rate": 4.63364932862293e-06, "loss": 1.4601, "step": 2815 }, { "epoch": 0.3986692149784101, "grad_norm": 9.59754217791897, "learning_rate": 4.633350544504899e-06, "loss": 1.1109, "step": 2816 }, { "epoch": 0.39881078785304735, "grad_norm": 7.835878594463289, "learning_rate": 4.63305164823822e-06, "loss": 1.1786, "step": 2817 }, { "epoch": 0.3989523607276846, "grad_norm": 8.248203428762295, "learning_rate": 4.632752639838607e-06, "loss": 1.1452, "step": 2818 }, { "epoch": 0.3990939336023218, "grad_norm": 8.088448338278244, "learning_rate": 4.632453519321778e-06, "loss": 1.2562, "step": 2819 }, { "epoch": 0.399235506476959, "grad_norm": 11.195044674701155, "learning_rate": 4.632154286703457e-06, "loss": 1.297, "step": 2820 }, { "epoch": 0.3993770793515962, "grad_norm": 9.451737533767353, "learning_rate": 4.6318549419993765e-06, "loss": 1.2949, "step": 2821 }, { "epoch": 0.39951865222623345, "grad_norm": 10.780707569574624, "learning_rate": 4.63155548522527e-06, "loss": 1.4362, "step": 2822 }, { "epoch": 0.3996602251008707, "grad_norm": 9.296989478680677, "learning_rate": 4.6312559163968805e-06, "loss": 1.3343, "step": 2823 }, { "epoch": 0.3998017979755079, "grad_norm": 10.3425318066885, "learning_rate": 4.630956235529957e-06, "loss": 1.5448, "step": 2824 }, { "epoch": 0.3999433708501451, "grad_norm": 10.618772948331774, "learning_rate": 4.630656442640254e-06, "loss": 1.3558, "step": 2825 }, { "epoch": 0.4000849437247823, "grad_norm": 10.152813796712843, "learning_rate": 4.63035653774353e-06, "loss": 1.3271, "step": 2826 }, { "epoch": 0.40022651659941955, "grad_norm": 8.183332241495943, "learning_rate": 4.6300565208555505e-06, "loss": 1.2925, "step": 2827 }, { "epoch": 0.4003680894740568, "grad_norm": 10.801148911576888, "learning_rate": 4.629756391992088e-06, "loss": 1.3348, "step": 2828 }, { "epoch": 0.400509662348694, "grad_norm": 9.96218397531914, "learning_rate": 4.629456151168921e-06, "loss": 1.4639, "step": 2829 }, { "epoch": 0.40065123522333124, "grad_norm": 9.211466352634778, "learning_rate": 4.629155798401832e-06, "loss": 1.5077, "step": 2830 }, { "epoch": 0.4007928080979684, "grad_norm": 9.341980833634594, "learning_rate": 4.628855333706609e-06, "loss": 1.2824, "step": 2831 }, { "epoch": 0.40093438097260564, "grad_norm": 7.8075101252235815, "learning_rate": 4.62855475709905e-06, "loss": 1.3058, "step": 2832 }, { "epoch": 0.40107595384724287, "grad_norm": 8.69829475966837, "learning_rate": 4.628254068594953e-06, "loss": 1.3313, "step": 2833 }, { "epoch": 0.4012175267218801, "grad_norm": 8.456397475530592, "learning_rate": 4.627953268210127e-06, "loss": 1.4015, "step": 2834 }, { "epoch": 0.40135909959651733, "grad_norm": 8.863178385393617, "learning_rate": 4.627652355960384e-06, "loss": 1.3053, "step": 2835 }, { "epoch": 0.4015006724711545, "grad_norm": 10.561840871977786, "learning_rate": 4.627351331861544e-06, "loss": 1.2979, "step": 2836 }, { "epoch": 0.40164224534579174, "grad_norm": 9.036847448994934, "learning_rate": 4.6270501959294315e-06, "loss": 1.4141, "step": 2837 }, { "epoch": 0.40178381822042897, "grad_norm": 9.120070706812838, "learning_rate": 4.6267489481798744e-06, "loss": 1.3535, "step": 2838 }, { "epoch": 0.4019253910950662, "grad_norm": 8.640507122980987, "learning_rate": 4.626447588628712e-06, "loss": 1.2851, "step": 2839 }, { "epoch": 0.40206696396970343, "grad_norm": 9.134253839295447, "learning_rate": 4.626146117291784e-06, "loss": 1.1421, "step": 2840 }, { "epoch": 0.4022085368443406, "grad_norm": 10.464587850314656, "learning_rate": 4.625844534184941e-06, "loss": 1.1815, "step": 2841 }, { "epoch": 0.40235010971897783, "grad_norm": 9.60055370323459, "learning_rate": 4.625542839324036e-06, "loss": 1.3255, "step": 2842 }, { "epoch": 0.40249168259361506, "grad_norm": 10.245405702689148, "learning_rate": 4.625241032724929e-06, "loss": 1.3262, "step": 2843 }, { "epoch": 0.4026332554682523, "grad_norm": 7.434780617448631, "learning_rate": 4.624939114403485e-06, "loss": 1.32, "step": 2844 }, { "epoch": 0.4027748283428895, "grad_norm": 9.711470941452674, "learning_rate": 4.624637084375576e-06, "loss": 1.4756, "step": 2845 }, { "epoch": 0.4029164012175267, "grad_norm": 7.57320233155615, "learning_rate": 4.62433494265708e-06, "loss": 1.216, "step": 2846 }, { "epoch": 0.40305797409216393, "grad_norm": 8.387464215675575, "learning_rate": 4.62403268926388e-06, "loss": 1.2944, "step": 2847 }, { "epoch": 0.40319954696680116, "grad_norm": 7.673864662120323, "learning_rate": 4.623730324211865e-06, "loss": 1.1791, "step": 2848 }, { "epoch": 0.4033411198414384, "grad_norm": 10.604403649900334, "learning_rate": 4.623427847516931e-06, "loss": 1.3722, "step": 2849 }, { "epoch": 0.4034826927160756, "grad_norm": 12.270283624655633, "learning_rate": 4.623125259194978e-06, "loss": 1.3518, "step": 2850 }, { "epoch": 0.4036242655907128, "grad_norm": 9.992890310139918, "learning_rate": 4.622822559261913e-06, "loss": 1.3393, "step": 2851 }, { "epoch": 0.40376583846535, "grad_norm": 11.817354042567347, "learning_rate": 4.622519747733649e-06, "loss": 1.4199, "step": 2852 }, { "epoch": 0.40390741133998725, "grad_norm": 7.2337277083698615, "learning_rate": 4.622216824626104e-06, "loss": 1.2217, "step": 2853 }, { "epoch": 0.4040489842146245, "grad_norm": 9.182868880234652, "learning_rate": 4.621913789955204e-06, "loss": 1.3101, "step": 2854 }, { "epoch": 0.4041905570892617, "grad_norm": 10.463219300477402, "learning_rate": 4.621610643736878e-06, "loss": 1.3445, "step": 2855 }, { "epoch": 0.4043321299638989, "grad_norm": 9.077553079418923, "learning_rate": 4.621307385987062e-06, "loss": 1.2472, "step": 2856 }, { "epoch": 0.4044737028385361, "grad_norm": 9.411493911774432, "learning_rate": 4.621004016721699e-06, "loss": 1.2347, "step": 2857 }, { "epoch": 0.40461527571317335, "grad_norm": 7.8653359834043926, "learning_rate": 4.620700535956735e-06, "loss": 1.3161, "step": 2858 }, { "epoch": 0.4047568485878106, "grad_norm": 11.654349082598415, "learning_rate": 4.620396943708127e-06, "loss": 1.5757, "step": 2859 }, { "epoch": 0.4048984214624478, "grad_norm": 7.7945744057797235, "learning_rate": 4.6200932399918304e-06, "loss": 1.2532, "step": 2860 }, { "epoch": 0.40503999433708504, "grad_norm": 9.359071755312797, "learning_rate": 4.619789424823815e-06, "loss": 1.3075, "step": 2861 }, { "epoch": 0.4051815672117222, "grad_norm": 8.619348858449364, "learning_rate": 4.619485498220049e-06, "loss": 1.4258, "step": 2862 }, { "epoch": 0.40532314008635945, "grad_norm": 10.107078609811163, "learning_rate": 4.6191814601965115e-06, "loss": 1.3049, "step": 2863 }, { "epoch": 0.4054647129609967, "grad_norm": 11.694191715251074, "learning_rate": 4.618877310769184e-06, "loss": 1.3057, "step": 2864 }, { "epoch": 0.4056062858356339, "grad_norm": 8.537532295847361, "learning_rate": 4.6185730499540565e-06, "loss": 1.395, "step": 2865 }, { "epoch": 0.40574785871027114, "grad_norm": 10.253081067987258, "learning_rate": 4.618268677767124e-06, "loss": 1.3485, "step": 2866 }, { "epoch": 0.4058894315849083, "grad_norm": 8.835785531407565, "learning_rate": 4.617964194224386e-06, "loss": 1.3219, "step": 2867 }, { "epoch": 0.40603100445954554, "grad_norm": 10.33394026255705, "learning_rate": 4.617659599341849e-06, "loss": 1.3645, "step": 2868 }, { "epoch": 0.40617257733418277, "grad_norm": 8.01096927107509, "learning_rate": 4.617354893135527e-06, "loss": 1.3142, "step": 2869 }, { "epoch": 0.40631415020882, "grad_norm": 11.839712939519876, "learning_rate": 4.617050075621436e-06, "loss": 1.4168, "step": 2870 }, { "epoch": 0.40645572308345723, "grad_norm": 9.343058378719682, "learning_rate": 4.6167451468156015e-06, "loss": 1.4057, "step": 2871 }, { "epoch": 0.4065972959580944, "grad_norm": 9.023607868386256, "learning_rate": 4.616440106734053e-06, "loss": 1.3064, "step": 2872 }, { "epoch": 0.40673886883273164, "grad_norm": 11.444121231732979, "learning_rate": 4.6161349553928255e-06, "loss": 1.465, "step": 2873 }, { "epoch": 0.40688044170736887, "grad_norm": 8.79233662973868, "learning_rate": 4.615829692807962e-06, "loss": 1.358, "step": 2874 }, { "epoch": 0.4070220145820061, "grad_norm": 9.538569807096017, "learning_rate": 4.61552431899551e-06, "loss": 1.2733, "step": 2875 }, { "epoch": 0.4071635874566433, "grad_norm": 10.202008547024192, "learning_rate": 4.615218833971521e-06, "loss": 1.3722, "step": 2876 }, { "epoch": 0.4073051603312805, "grad_norm": 9.294024477652883, "learning_rate": 4.614913237752054e-06, "loss": 1.4096, "step": 2877 }, { "epoch": 0.40744673320591773, "grad_norm": 7.522779330959049, "learning_rate": 4.614607530353177e-06, "loss": 1.2713, "step": 2878 }, { "epoch": 0.40758830608055496, "grad_norm": 11.881676654088205, "learning_rate": 4.614301711790958e-06, "loss": 1.3986, "step": 2879 }, { "epoch": 0.4077298789551922, "grad_norm": 10.853539700744847, "learning_rate": 4.613995782081474e-06, "loss": 1.2894, "step": 2880 }, { "epoch": 0.4078714518298294, "grad_norm": 8.327340781557309, "learning_rate": 4.6136897412408084e-06, "loss": 1.3703, "step": 2881 }, { "epoch": 0.4080130247044666, "grad_norm": 8.498762601654276, "learning_rate": 4.61338358928505e-06, "loss": 1.3157, "step": 2882 }, { "epoch": 0.40815459757910383, "grad_norm": 9.725242286154831, "learning_rate": 4.6130773262302905e-06, "loss": 1.519, "step": 2883 }, { "epoch": 0.40829617045374106, "grad_norm": 9.786682375376444, "learning_rate": 4.612770952092632e-06, "loss": 1.314, "step": 2884 }, { "epoch": 0.4084377433283783, "grad_norm": 10.386248637819218, "learning_rate": 4.612464466888181e-06, "loss": 1.462, "step": 2885 }, { "epoch": 0.4085793162030155, "grad_norm": 13.302955608799403, "learning_rate": 4.612157870633047e-06, "loss": 1.3059, "step": 2886 }, { "epoch": 0.40872088907765275, "grad_norm": 8.745500115935739, "learning_rate": 4.61185116334335e-06, "loss": 1.172, "step": 2887 }, { "epoch": 0.4088624619522899, "grad_norm": 8.994894486932834, "learning_rate": 4.61154434503521e-06, "loss": 1.3584, "step": 2888 }, { "epoch": 0.40900403482692715, "grad_norm": 10.070377654463393, "learning_rate": 4.611237415724759e-06, "loss": 1.3452, "step": 2889 }, { "epoch": 0.4091456077015644, "grad_norm": 12.937222242829083, "learning_rate": 4.610930375428132e-06, "loss": 1.3379, "step": 2890 }, { "epoch": 0.4092871805762016, "grad_norm": 12.90620448217609, "learning_rate": 4.610623224161468e-06, "loss": 1.4758, "step": 2891 }, { "epoch": 0.40942875345083884, "grad_norm": 8.02744306888072, "learning_rate": 4.610315961940916e-06, "loss": 1.3856, "step": 2892 }, { "epoch": 0.409570326325476, "grad_norm": 8.773157483844008, "learning_rate": 4.610008588782626e-06, "loss": 1.3475, "step": 2893 }, { "epoch": 0.40971189920011325, "grad_norm": 11.1904950954351, "learning_rate": 4.609701104702759e-06, "loss": 1.3716, "step": 2894 }, { "epoch": 0.4098534720747505, "grad_norm": 12.397059651897111, "learning_rate": 4.609393509717478e-06, "loss": 1.2934, "step": 2895 }, { "epoch": 0.4099950449493877, "grad_norm": 9.448914351494302, "learning_rate": 4.6090858038429535e-06, "loss": 1.4021, "step": 2896 }, { "epoch": 0.41013661782402494, "grad_norm": 8.241076180262304, "learning_rate": 4.6087779870953595e-06, "loss": 1.4614, "step": 2897 }, { "epoch": 0.4102781906986621, "grad_norm": 9.309347758488592, "learning_rate": 4.608470059490879e-06, "loss": 1.2883, "step": 2898 }, { "epoch": 0.41041976357329935, "grad_norm": 7.377042584511843, "learning_rate": 4.6081620210457e-06, "loss": 1.1782, "step": 2899 }, { "epoch": 0.4105613364479366, "grad_norm": 9.38246732230916, "learning_rate": 4.6078538717760165e-06, "loss": 1.2599, "step": 2900 }, { "epoch": 0.4107029093225738, "grad_norm": 9.005225889601766, "learning_rate": 4.607545611698025e-06, "loss": 1.3542, "step": 2901 }, { "epoch": 0.41084448219721104, "grad_norm": 8.880245753831225, "learning_rate": 4.607237240827933e-06, "loss": 1.2004, "step": 2902 }, { "epoch": 0.4109860550718482, "grad_norm": 7.717043297998502, "learning_rate": 4.606928759181951e-06, "loss": 1.3793, "step": 2903 }, { "epoch": 0.41112762794648544, "grad_norm": 8.960946865459361, "learning_rate": 4.6066201667762944e-06, "loss": 1.3845, "step": 2904 }, { "epoch": 0.41126920082112267, "grad_norm": 9.703055806919153, "learning_rate": 4.606311463627186e-06, "loss": 1.2743, "step": 2905 }, { "epoch": 0.4114107736957599, "grad_norm": 9.742251716408244, "learning_rate": 4.606002649750856e-06, "loss": 1.4747, "step": 2906 }, { "epoch": 0.41155234657039713, "grad_norm": 9.531209576630468, "learning_rate": 4.605693725163536e-06, "loss": 1.236, "step": 2907 }, { "epoch": 0.4116939194450343, "grad_norm": 8.90294706435257, "learning_rate": 4.605384689881467e-06, "loss": 1.3823, "step": 2908 }, { "epoch": 0.41183549231967154, "grad_norm": 9.436535514406144, "learning_rate": 4.605075543920895e-06, "loss": 1.2442, "step": 2909 }, { "epoch": 0.41197706519430877, "grad_norm": 9.742146385613925, "learning_rate": 4.604766287298071e-06, "loss": 1.3926, "step": 2910 }, { "epoch": 0.412118638068946, "grad_norm": 9.452176234025794, "learning_rate": 4.604456920029252e-06, "loss": 1.3479, "step": 2911 }, { "epoch": 0.4122602109435832, "grad_norm": 10.318224618476947, "learning_rate": 4.604147442130703e-06, "loss": 1.3946, "step": 2912 }, { "epoch": 0.4124017838182204, "grad_norm": 8.778446279358885, "learning_rate": 4.603837853618691e-06, "loss": 1.4644, "step": 2913 }, { "epoch": 0.41254335669285763, "grad_norm": 8.673991067133677, "learning_rate": 4.603528154509492e-06, "loss": 1.3483, "step": 2914 }, { "epoch": 0.41268492956749486, "grad_norm": 11.179017671342658, "learning_rate": 4.6032183448193865e-06, "loss": 1.4868, "step": 2915 }, { "epoch": 0.4128265024421321, "grad_norm": 10.5387982673974, "learning_rate": 4.602908424564661e-06, "loss": 1.3698, "step": 2916 }, { "epoch": 0.4129680753167693, "grad_norm": 8.414427002898615, "learning_rate": 4.602598393761607e-06, "loss": 1.2531, "step": 2917 }, { "epoch": 0.41310964819140655, "grad_norm": 8.128276927075175, "learning_rate": 4.602288252426524e-06, "loss": 1.261, "step": 2918 }, { "epoch": 0.41325122106604373, "grad_norm": 11.478268199913288, "learning_rate": 4.601978000575715e-06, "loss": 1.4729, "step": 2919 }, { "epoch": 0.41339279394068096, "grad_norm": 10.49689292349709, "learning_rate": 4.6016676382254895e-06, "loss": 1.4525, "step": 2920 }, { "epoch": 0.4135343668153182, "grad_norm": 8.703406392724292, "learning_rate": 4.601357165392163e-06, "loss": 1.4825, "step": 2921 }, { "epoch": 0.4136759396899554, "grad_norm": 7.874606894717849, "learning_rate": 4.601046582092058e-06, "loss": 1.1975, "step": 2922 }, { "epoch": 0.41381751256459265, "grad_norm": 9.090142547194754, "learning_rate": 4.6007358883414996e-06, "loss": 1.3577, "step": 2923 }, { "epoch": 0.4139590854392298, "grad_norm": 9.477396124067015, "learning_rate": 4.600425084156823e-06, "loss": 1.3033, "step": 2924 }, { "epoch": 0.41410065831386705, "grad_norm": 9.51908242940734, "learning_rate": 4.6001141695543655e-06, "loss": 1.3843, "step": 2925 }, { "epoch": 0.4142422311885043, "grad_norm": 10.102764061127266, "learning_rate": 4.599803144550472e-06, "loss": 1.4004, "step": 2926 }, { "epoch": 0.4143838040631415, "grad_norm": 10.238350179208066, "learning_rate": 4.5994920091614935e-06, "loss": 1.3924, "step": 2927 }, { "epoch": 0.41452537693777874, "grad_norm": 8.889701594181536, "learning_rate": 4.5991807634037846e-06, "loss": 1.2656, "step": 2928 }, { "epoch": 0.4146669498124159, "grad_norm": 10.40860169560794, "learning_rate": 4.598869407293708e-06, "loss": 1.447, "step": 2929 }, { "epoch": 0.41480852268705315, "grad_norm": 9.649687577166116, "learning_rate": 4.5985579408476324e-06, "loss": 1.333, "step": 2930 }, { "epoch": 0.4149500955616904, "grad_norm": 9.680687656410393, "learning_rate": 4.5982463640819304e-06, "loss": 1.2791, "step": 2931 }, { "epoch": 0.4150916684363276, "grad_norm": 9.052368815097559, "learning_rate": 4.597934677012982e-06, "loss": 1.3863, "step": 2932 }, { "epoch": 0.41523324131096484, "grad_norm": 9.591604583123926, "learning_rate": 4.597622879657171e-06, "loss": 1.2454, "step": 2933 }, { "epoch": 0.415374814185602, "grad_norm": 9.10072965945438, "learning_rate": 4.597310972030889e-06, "loss": 1.4127, "step": 2934 }, { "epoch": 0.41551638706023925, "grad_norm": 13.008030098592887, "learning_rate": 4.596998954150534e-06, "loss": 1.421, "step": 2935 }, { "epoch": 0.4156579599348765, "grad_norm": 9.193379914532505, "learning_rate": 4.596686826032507e-06, "loss": 1.2345, "step": 2936 }, { "epoch": 0.4157995328095137, "grad_norm": 10.191871875143248, "learning_rate": 4.596374587693218e-06, "loss": 1.3568, "step": 2937 }, { "epoch": 0.41594110568415094, "grad_norm": 8.622760869053648, "learning_rate": 4.596062239149079e-06, "loss": 1.3892, "step": 2938 }, { "epoch": 0.4160826785587881, "grad_norm": 9.51838230717833, "learning_rate": 4.595749780416511e-06, "loss": 1.1925, "step": 2939 }, { "epoch": 0.41622425143342534, "grad_norm": 11.959013561790695, "learning_rate": 4.59543721151194e-06, "loss": 1.3084, "step": 2940 }, { "epoch": 0.41636582430806257, "grad_norm": 10.736073544200616, "learning_rate": 4.595124532451797e-06, "loss": 1.4099, "step": 2941 }, { "epoch": 0.4165073971826998, "grad_norm": 9.052495235177435, "learning_rate": 4.5948117432525195e-06, "loss": 1.3936, "step": 2942 }, { "epoch": 0.41664897005733703, "grad_norm": 11.190913015418015, "learning_rate": 4.594498843930551e-06, "loss": 1.3138, "step": 2943 }, { "epoch": 0.4167905429319742, "grad_norm": 10.946567112248879, "learning_rate": 4.59418583450234e-06, "loss": 1.4157, "step": 2944 }, { "epoch": 0.41693211580661144, "grad_norm": 10.756453595182405, "learning_rate": 4.593872714984341e-06, "loss": 1.3117, "step": 2945 }, { "epoch": 0.41707368868124867, "grad_norm": 8.822330267709267, "learning_rate": 4.593559485393015e-06, "loss": 1.2645, "step": 2946 }, { "epoch": 0.4172152615558859, "grad_norm": 10.83254540341245, "learning_rate": 4.593246145744827e-06, "loss": 1.191, "step": 2947 }, { "epoch": 0.4173568344305231, "grad_norm": 9.487513316752018, "learning_rate": 4.59293269605625e-06, "loss": 1.398, "step": 2948 }, { "epoch": 0.41749840730516036, "grad_norm": 9.329909468658277, "learning_rate": 4.592619136343762e-06, "loss": 1.2172, "step": 2949 }, { "epoch": 0.41763998017979753, "grad_norm": 11.37047243664999, "learning_rate": 4.592305466623847e-06, "loss": 1.4658, "step": 2950 }, { "epoch": 0.41778155305443476, "grad_norm": 9.002633874984127, "learning_rate": 4.591991686912993e-06, "loss": 1.2678, "step": 2951 }, { "epoch": 0.417923125929072, "grad_norm": 8.490146984899338, "learning_rate": 4.591677797227696e-06, "loss": 1.2745, "step": 2952 }, { "epoch": 0.4180646988037092, "grad_norm": 8.768196150458209, "learning_rate": 4.591363797584457e-06, "loss": 1.2899, "step": 2953 }, { "epoch": 0.41820627167834645, "grad_norm": 10.501775682258156, "learning_rate": 4.591049687999782e-06, "loss": 1.3561, "step": 2954 }, { "epoch": 0.4183478445529836, "grad_norm": 7.304186407027378, "learning_rate": 4.590735468490184e-06, "loss": 1.386, "step": 2955 }, { "epoch": 0.41848941742762086, "grad_norm": 8.552900070115108, "learning_rate": 4.590421139072182e-06, "loss": 1.2299, "step": 2956 }, { "epoch": 0.4186309903022581, "grad_norm": 9.898295677752749, "learning_rate": 4.590106699762299e-06, "loss": 1.3088, "step": 2957 }, { "epoch": 0.4187725631768953, "grad_norm": 9.343023673823636, "learning_rate": 4.589792150577065e-06, "loss": 1.3906, "step": 2958 }, { "epoch": 0.41891413605153255, "grad_norm": 10.519419196743536, "learning_rate": 4.589477491533016e-06, "loss": 1.4471, "step": 2959 }, { "epoch": 0.4190557089261697, "grad_norm": 9.798166484974889, "learning_rate": 4.589162722646694e-06, "loss": 1.3329, "step": 2960 }, { "epoch": 0.41919728180080695, "grad_norm": 11.082837736670301, "learning_rate": 4.588847843934645e-06, "loss": 1.3647, "step": 2961 }, { "epoch": 0.4193388546754442, "grad_norm": 8.226680280199128, "learning_rate": 4.588532855413422e-06, "loss": 1.3566, "step": 2962 }, { "epoch": 0.4194804275500814, "grad_norm": 9.032017869875911, "learning_rate": 4.588217757099584e-06, "loss": 1.186, "step": 2963 }, { "epoch": 0.41962200042471864, "grad_norm": 7.809783707479816, "learning_rate": 4.587902549009696e-06, "loss": 1.4253, "step": 2964 }, { "epoch": 0.4197635732993558, "grad_norm": 8.521746026165802, "learning_rate": 4.587587231160329e-06, "loss": 1.2788, "step": 2965 }, { "epoch": 0.41990514617399305, "grad_norm": 10.238144880577883, "learning_rate": 4.5872718035680554e-06, "loss": 1.4273, "step": 2966 }, { "epoch": 0.4200467190486303, "grad_norm": 8.24442634729054, "learning_rate": 4.586956266249461e-06, "loss": 1.236, "step": 2967 }, { "epoch": 0.4201882919232675, "grad_norm": 8.178961341782408, "learning_rate": 4.586640619221131e-06, "loss": 1.229, "step": 2968 }, { "epoch": 0.42032986479790474, "grad_norm": 9.866781170498507, "learning_rate": 4.586324862499661e-06, "loss": 1.3249, "step": 2969 }, { "epoch": 0.4204714376725419, "grad_norm": 8.712818602944314, "learning_rate": 4.586008996101646e-06, "loss": 1.269, "step": 2970 }, { "epoch": 0.42061301054717914, "grad_norm": 8.419087077687205, "learning_rate": 4.5856930200436955e-06, "loss": 1.2025, "step": 2971 }, { "epoch": 0.4207545834218164, "grad_norm": 8.613630679561709, "learning_rate": 4.585376934342418e-06, "loss": 1.2191, "step": 2972 }, { "epoch": 0.4208961562964536, "grad_norm": 9.209983252096947, "learning_rate": 4.585060739014429e-06, "loss": 1.3578, "step": 2973 }, { "epoch": 0.42103772917109084, "grad_norm": 9.854652011094336, "learning_rate": 4.584744434076352e-06, "loss": 1.2992, "step": 2974 }, { "epoch": 0.42117930204572807, "grad_norm": 8.504343605973187, "learning_rate": 4.584428019544815e-06, "loss": 1.2136, "step": 2975 }, { "epoch": 0.42132087492036524, "grad_norm": 8.476324881462116, "learning_rate": 4.58411149543645e-06, "loss": 1.3656, "step": 2976 }, { "epoch": 0.42146244779500247, "grad_norm": 8.168322752072866, "learning_rate": 4.583794861767899e-06, "loss": 1.3296, "step": 2977 }, { "epoch": 0.4216040206696397, "grad_norm": 9.02228626652345, "learning_rate": 4.583478118555806e-06, "loss": 1.2181, "step": 2978 }, { "epoch": 0.42174559354427693, "grad_norm": 9.331980153036126, "learning_rate": 4.583161265816821e-06, "loss": 1.0937, "step": 2979 }, { "epoch": 0.42188716641891416, "grad_norm": 11.656178655738431, "learning_rate": 4.582844303567602e-06, "loss": 1.3695, "step": 2980 }, { "epoch": 0.42202873929355134, "grad_norm": 9.286296666924486, "learning_rate": 4.58252723182481e-06, "loss": 1.2848, "step": 2981 }, { "epoch": 0.42217031216818857, "grad_norm": 9.805842187982886, "learning_rate": 4.582210050605115e-06, "loss": 1.1828, "step": 2982 }, { "epoch": 0.4223118850428258, "grad_norm": 7.966396088840117, "learning_rate": 4.58189275992519e-06, "loss": 1.265, "step": 2983 }, { "epoch": 0.422453457917463, "grad_norm": 10.98751990063702, "learning_rate": 4.581575359801715e-06, "loss": 1.3565, "step": 2984 }, { "epoch": 0.42259503079210026, "grad_norm": 12.293565782292568, "learning_rate": 4.581257850251376e-06, "loss": 1.4169, "step": 2985 }, { "epoch": 0.42273660366673743, "grad_norm": 8.987911583146824, "learning_rate": 4.580940231290864e-06, "loss": 1.4284, "step": 2986 }, { "epoch": 0.42287817654137466, "grad_norm": 8.220514028399519, "learning_rate": 4.580622502936875e-06, "loss": 1.2508, "step": 2987 }, { "epoch": 0.4230197494160119, "grad_norm": 11.443337873603712, "learning_rate": 4.580304665206111e-06, "loss": 1.2951, "step": 2988 }, { "epoch": 0.4231613222906491, "grad_norm": 9.432100684122794, "learning_rate": 4.579986718115283e-06, "loss": 1.3227, "step": 2989 }, { "epoch": 0.42330289516528635, "grad_norm": 9.840079068122092, "learning_rate": 4.579668661681105e-06, "loss": 1.349, "step": 2990 }, { "epoch": 0.4234444680399235, "grad_norm": 9.27899162996418, "learning_rate": 4.579350495920295e-06, "loss": 1.2828, "step": 2991 }, { "epoch": 0.42358604091456076, "grad_norm": 6.697593131391463, "learning_rate": 4.579032220849581e-06, "loss": 1.1961, "step": 2992 }, { "epoch": 0.423727613789198, "grad_norm": 7.976592389834424, "learning_rate": 4.578713836485692e-06, "loss": 1.3202, "step": 2993 }, { "epoch": 0.4238691866638352, "grad_norm": 7.958975987342206, "learning_rate": 4.578395342845367e-06, "loss": 1.2972, "step": 2994 }, { "epoch": 0.42401075953847245, "grad_norm": 10.002984555231855, "learning_rate": 4.578076739945349e-06, "loss": 1.3501, "step": 2995 }, { "epoch": 0.4241523324131096, "grad_norm": 9.943777728566578, "learning_rate": 4.577758027802386e-06, "loss": 1.3184, "step": 2996 }, { "epoch": 0.42429390528774685, "grad_norm": 9.976442436386, "learning_rate": 4.5774392064332325e-06, "loss": 1.2964, "step": 2997 }, { "epoch": 0.4244354781623841, "grad_norm": 8.876351696829097, "learning_rate": 4.577120275854649e-06, "loss": 1.2717, "step": 2998 }, { "epoch": 0.4245770510370213, "grad_norm": 9.088474277075257, "learning_rate": 4.576801236083402e-06, "loss": 1.3159, "step": 2999 }, { "epoch": 0.42471862391165854, "grad_norm": 9.734267624558276, "learning_rate": 4.576482087136262e-06, "loss": 1.3154, "step": 3000 }, { "epoch": 0.4248601967862957, "grad_norm": 7.1059749982862686, "learning_rate": 4.576162829030007e-06, "loss": 1.2792, "step": 3001 }, { "epoch": 0.42500176966093295, "grad_norm": 9.015256454366025, "learning_rate": 4.57584346178142e-06, "loss": 1.3329, "step": 3002 }, { "epoch": 0.4251433425355702, "grad_norm": 10.009769245426174, "learning_rate": 4.5755239854072904e-06, "loss": 1.4544, "step": 3003 }, { "epoch": 0.4252849154102074, "grad_norm": 8.499095644244717, "learning_rate": 4.575204399924412e-06, "loss": 1.4657, "step": 3004 }, { "epoch": 0.42542648828484464, "grad_norm": 9.011095201525563, "learning_rate": 4.574884705349586e-06, "loss": 1.2891, "step": 3005 }, { "epoch": 0.42556806115948187, "grad_norm": 10.60061684019275, "learning_rate": 4.574564901699618e-06, "loss": 1.3723, "step": 3006 }, { "epoch": 0.42570963403411904, "grad_norm": 9.796833725740408, "learning_rate": 4.57424498899132e-06, "loss": 1.2056, "step": 3007 }, { "epoch": 0.4258512069087563, "grad_norm": 9.61568247641865, "learning_rate": 4.573924967241509e-06, "loss": 1.2731, "step": 3008 }, { "epoch": 0.4259927797833935, "grad_norm": 9.058445516790618, "learning_rate": 4.57360483646701e-06, "loss": 1.3853, "step": 3009 }, { "epoch": 0.42613435265803074, "grad_norm": 8.174806029092142, "learning_rate": 4.57328459668465e-06, "loss": 1.2625, "step": 3010 }, { "epoch": 0.42627592553266797, "grad_norm": 11.209794489314948, "learning_rate": 4.572964247911265e-06, "loss": 1.4644, "step": 3011 }, { "epoch": 0.42641749840730514, "grad_norm": 10.459453588623003, "learning_rate": 4.572643790163696e-06, "loss": 1.4552, "step": 3012 }, { "epoch": 0.42655907128194237, "grad_norm": 9.489729700449727, "learning_rate": 4.572323223458786e-06, "loss": 1.4446, "step": 3013 }, { "epoch": 0.4267006441565796, "grad_norm": 9.840557052847748, "learning_rate": 4.572002547813391e-06, "loss": 1.2101, "step": 3014 }, { "epoch": 0.42684221703121683, "grad_norm": 10.00866781327487, "learning_rate": 4.571681763244367e-06, "loss": 1.2079, "step": 3015 }, { "epoch": 0.42698378990585406, "grad_norm": 10.449534895918365, "learning_rate": 4.571360869768578e-06, "loss": 1.2348, "step": 3016 }, { "epoch": 0.42712536278049124, "grad_norm": 10.278261254655153, "learning_rate": 4.571039867402891e-06, "loss": 1.4295, "step": 3017 }, { "epoch": 0.42726693565512847, "grad_norm": 10.991873079858179, "learning_rate": 4.570718756164183e-06, "loss": 1.2489, "step": 3018 }, { "epoch": 0.4274085085297657, "grad_norm": 8.425358831987745, "learning_rate": 4.570397536069335e-06, "loss": 1.2335, "step": 3019 }, { "epoch": 0.4275500814044029, "grad_norm": 9.089839973006306, "learning_rate": 4.570076207135231e-06, "loss": 1.3523, "step": 3020 }, { "epoch": 0.42769165427904016, "grad_norm": 8.701501773698364, "learning_rate": 4.569754769378765e-06, "loss": 1.2193, "step": 3021 }, { "epoch": 0.42783322715367733, "grad_norm": 9.068797481946252, "learning_rate": 4.569433222816834e-06, "loss": 1.4387, "step": 3022 }, { "epoch": 0.42797480002831456, "grad_norm": 8.790540674308929, "learning_rate": 4.569111567466341e-06, "loss": 1.2454, "step": 3023 }, { "epoch": 0.4281163729029518, "grad_norm": 9.05835034305364, "learning_rate": 4.568789803344196e-06, "loss": 1.4937, "step": 3024 }, { "epoch": 0.428257945777589, "grad_norm": 9.197134351168167, "learning_rate": 4.568467930467314e-06, "loss": 1.4753, "step": 3025 }, { "epoch": 0.42839951865222625, "grad_norm": 9.287261146564804, "learning_rate": 4.568145948852614e-06, "loss": 1.1303, "step": 3026 }, { "epoch": 0.4285410915268634, "grad_norm": 9.020376662279382, "learning_rate": 4.567823858517024e-06, "loss": 1.4017, "step": 3027 }, { "epoch": 0.42868266440150066, "grad_norm": 8.378096733913527, "learning_rate": 4.567501659477477e-06, "loss": 1.2225, "step": 3028 }, { "epoch": 0.4288242372761379, "grad_norm": 10.151587346172649, "learning_rate": 4.567179351750908e-06, "loss": 1.4223, "step": 3029 }, { "epoch": 0.4289658101507751, "grad_norm": 10.24201058497568, "learning_rate": 4.566856935354262e-06, "loss": 1.3197, "step": 3030 }, { "epoch": 0.42910738302541235, "grad_norm": 8.686756129629982, "learning_rate": 4.566534410304488e-06, "loss": 1.3219, "step": 3031 }, { "epoch": 0.4292489559000496, "grad_norm": 11.93073756225058, "learning_rate": 4.566211776618541e-06, "loss": 1.3179, "step": 3032 }, { "epoch": 0.42939052877468675, "grad_norm": 8.736220927340872, "learning_rate": 4.565889034313382e-06, "loss": 1.3479, "step": 3033 }, { "epoch": 0.429532101649324, "grad_norm": 9.728271530327135, "learning_rate": 4.565566183405976e-06, "loss": 1.4753, "step": 3034 }, { "epoch": 0.4296736745239612, "grad_norm": 10.451881232058778, "learning_rate": 4.565243223913297e-06, "loss": 1.295, "step": 3035 }, { "epoch": 0.42981524739859844, "grad_norm": 9.851195798215826, "learning_rate": 4.564920155852321e-06, "loss": 1.4287, "step": 3036 }, { "epoch": 0.4299568202732357, "grad_norm": 9.16591012318092, "learning_rate": 4.564596979240031e-06, "loss": 1.2543, "step": 3037 }, { "epoch": 0.43009839314787285, "grad_norm": 10.521734358978012, "learning_rate": 4.564273694093419e-06, "loss": 1.3226, "step": 3038 }, { "epoch": 0.4302399660225101, "grad_norm": 9.824007126821114, "learning_rate": 4.5639503004294774e-06, "loss": 1.3749, "step": 3039 }, { "epoch": 0.4303815388971473, "grad_norm": 10.908357520449416, "learning_rate": 4.5636267982652075e-06, "loss": 1.2588, "step": 3040 }, { "epoch": 0.43052311177178454, "grad_norm": 10.841541252222067, "learning_rate": 4.5633031876176156e-06, "loss": 1.3138, "step": 3041 }, { "epoch": 0.43066468464642177, "grad_norm": 8.834673318053065, "learning_rate": 4.562979468503713e-06, "loss": 1.2716, "step": 3042 }, { "epoch": 0.43080625752105894, "grad_norm": 10.236994983108065, "learning_rate": 4.562655640940519e-06, "loss": 1.2831, "step": 3043 }, { "epoch": 0.4309478303956962, "grad_norm": 10.790343764435288, "learning_rate": 4.562331704945055e-06, "loss": 1.423, "step": 3044 }, { "epoch": 0.4310894032703334, "grad_norm": 11.044993195779897, "learning_rate": 4.562007660534351e-06, "loss": 1.3862, "step": 3045 }, { "epoch": 0.43123097614497063, "grad_norm": 12.332888689703527, "learning_rate": 4.5616835077254425e-06, "loss": 1.4661, "step": 3046 }, { "epoch": 0.43137254901960786, "grad_norm": 10.849197118911611, "learning_rate": 4.561359246535369e-06, "loss": 1.4065, "step": 3047 }, { "epoch": 0.43151412189424504, "grad_norm": 8.376650974892996, "learning_rate": 4.561034876981177e-06, "loss": 1.3778, "step": 3048 }, { "epoch": 0.43165569476888227, "grad_norm": 8.628744003822115, "learning_rate": 4.560710399079918e-06, "loss": 1.083, "step": 3049 }, { "epoch": 0.4317972676435195, "grad_norm": 11.673885473602233, "learning_rate": 4.56038581284865e-06, "loss": 1.3416, "step": 3050 }, { "epoch": 0.43193884051815673, "grad_norm": 9.612728073529647, "learning_rate": 4.560061118304436e-06, "loss": 1.2255, "step": 3051 }, { "epoch": 0.43208041339279396, "grad_norm": 8.484020696446608, "learning_rate": 4.559736315464345e-06, "loss": 1.3194, "step": 3052 }, { "epoch": 0.43222198626743114, "grad_norm": 8.821593442471594, "learning_rate": 4.559411404345452e-06, "loss": 1.2654, "step": 3053 }, { "epoch": 0.43236355914206837, "grad_norm": 9.367404556623004, "learning_rate": 4.5590863849648364e-06, "loss": 1.4809, "step": 3054 }, { "epoch": 0.4325051320167056, "grad_norm": 10.422537123480637, "learning_rate": 4.5587612573395855e-06, "loss": 1.1988, "step": 3055 }, { "epoch": 0.4326467048913428, "grad_norm": 10.311931062667696, "learning_rate": 4.55843602148679e-06, "loss": 1.3014, "step": 3056 }, { "epoch": 0.43278827776598006, "grad_norm": 9.492143293364139, "learning_rate": 4.558110677423548e-06, "loss": 1.2735, "step": 3057 }, { "epoch": 0.43292985064061723, "grad_norm": 7.195667701262488, "learning_rate": 4.557785225166962e-06, "loss": 1.0809, "step": 3058 }, { "epoch": 0.43307142351525446, "grad_norm": 9.930121603660076, "learning_rate": 4.5574596647341414e-06, "loss": 1.3027, "step": 3059 }, { "epoch": 0.4332129963898917, "grad_norm": 11.033317393803296, "learning_rate": 4.5571339961422e-06, "loss": 1.4836, "step": 3060 }, { "epoch": 0.4333545692645289, "grad_norm": 11.598193034479081, "learning_rate": 4.5568082194082584e-06, "loss": 1.3783, "step": 3061 }, { "epoch": 0.43349614213916615, "grad_norm": 8.999477371300124, "learning_rate": 4.556482334549442e-06, "loss": 1.2521, "step": 3062 }, { "epoch": 0.4336377150138034, "grad_norm": 10.854868177318998, "learning_rate": 4.556156341582884e-06, "loss": 1.3511, "step": 3063 }, { "epoch": 0.43377928788844056, "grad_norm": 9.737455465388773, "learning_rate": 4.555830240525719e-06, "loss": 1.242, "step": 3064 }, { "epoch": 0.4339208607630778, "grad_norm": 9.264873609262324, "learning_rate": 4.5555040313950915e-06, "loss": 1.3034, "step": 3065 }, { "epoch": 0.434062433637715, "grad_norm": 11.170402888696575, "learning_rate": 4.555177714208149e-06, "loss": 1.2996, "step": 3066 }, { "epoch": 0.43420400651235225, "grad_norm": 12.213147842575614, "learning_rate": 4.554851288982047e-06, "loss": 1.1687, "step": 3067 }, { "epoch": 0.4343455793869895, "grad_norm": 10.859375, "learning_rate": 4.554524755733946e-06, "loss": 1.4498, "step": 3068 }, { "epoch": 0.43448715226162665, "grad_norm": 7.512556881300493, "learning_rate": 4.554198114481009e-06, "loss": 1.1112, "step": 3069 }, { "epoch": 0.4346287251362639, "grad_norm": 9.410281511853098, "learning_rate": 4.553871365240409e-06, "loss": 1.2818, "step": 3070 }, { "epoch": 0.4347702980109011, "grad_norm": 10.569840809066328, "learning_rate": 4.553544508029323e-06, "loss": 1.3229, "step": 3071 }, { "epoch": 0.43491187088553834, "grad_norm": 10.448983642214847, "learning_rate": 4.5532175428649335e-06, "loss": 1.3744, "step": 3072 }, { "epoch": 0.4350534437601756, "grad_norm": 11.05334058987602, "learning_rate": 4.5528904697644296e-06, "loss": 1.3426, "step": 3073 }, { "epoch": 0.43519501663481275, "grad_norm": 11.329418703067429, "learning_rate": 4.552563288745004e-06, "loss": 1.2047, "step": 3074 }, { "epoch": 0.43533658950945, "grad_norm": 9.204631999387082, "learning_rate": 4.552235999823856e-06, "loss": 1.2107, "step": 3075 }, { "epoch": 0.4354781623840872, "grad_norm": 11.009199630375718, "learning_rate": 4.551908603018191e-06, "loss": 1.3022, "step": 3076 }, { "epoch": 0.43561973525872444, "grad_norm": 9.02502649361481, "learning_rate": 4.551581098345222e-06, "loss": 1.2204, "step": 3077 }, { "epoch": 0.43576130813336167, "grad_norm": 10.874110042944858, "learning_rate": 4.551253485822164e-06, "loss": 1.3745, "step": 3078 }, { "epoch": 0.43590288100799884, "grad_norm": 9.553690124507765, "learning_rate": 4.55092576546624e-06, "loss": 1.2848, "step": 3079 }, { "epoch": 0.4360444538826361, "grad_norm": 9.0072026571534, "learning_rate": 4.550597937294677e-06, "loss": 1.3833, "step": 3080 }, { "epoch": 0.4361860267572733, "grad_norm": 8.863687962379633, "learning_rate": 4.55027000132471e-06, "loss": 1.2718, "step": 3081 }, { "epoch": 0.43632759963191053, "grad_norm": 12.993981949138272, "learning_rate": 4.549941957573578e-06, "loss": 1.2825, "step": 3082 }, { "epoch": 0.43646917250654776, "grad_norm": 10.601714344666076, "learning_rate": 4.549613806058526e-06, "loss": 1.2658, "step": 3083 }, { "epoch": 0.43661074538118494, "grad_norm": 8.193230174413383, "learning_rate": 4.5492855467968036e-06, "loss": 1.3301, "step": 3084 }, { "epoch": 0.43675231825582217, "grad_norm": 9.33320722040623, "learning_rate": 4.548957179805668e-06, "loss": 1.261, "step": 3085 }, { "epoch": 0.4368938911304594, "grad_norm": 10.67340014438182, "learning_rate": 4.548628705102382e-06, "loss": 1.4827, "step": 3086 }, { "epoch": 0.43703546400509663, "grad_norm": 8.798425134708724, "learning_rate": 4.5483001227042126e-06, "loss": 1.2903, "step": 3087 }, { "epoch": 0.43717703687973386, "grad_norm": 9.737127952666615, "learning_rate": 4.5479714326284316e-06, "loss": 1.2286, "step": 3088 }, { "epoch": 0.43731860975437103, "grad_norm": 7.151878654321032, "learning_rate": 4.547642634892321e-06, "loss": 1.1878, "step": 3089 }, { "epoch": 0.43746018262900827, "grad_norm": 6.8817668557844245, "learning_rate": 4.547313729513163e-06, "loss": 1.2536, "step": 3090 }, { "epoch": 0.4376017555036455, "grad_norm": 10.294464431178763, "learning_rate": 4.546984716508249e-06, "loss": 1.513, "step": 3091 }, { "epoch": 0.4377433283782827, "grad_norm": 9.439208425534611, "learning_rate": 4.546655595894875e-06, "loss": 1.3117, "step": 3092 }, { "epoch": 0.43788490125291996, "grad_norm": 9.89486049220282, "learning_rate": 4.546326367690342e-06, "loss": 1.4076, "step": 3093 }, { "epoch": 0.4380264741275572, "grad_norm": 7.474559360134471, "learning_rate": 4.545997031911958e-06, "loss": 1.2868, "step": 3094 }, { "epoch": 0.43816804700219436, "grad_norm": 7.963975381028554, "learning_rate": 4.545667588577035e-06, "loss": 1.2087, "step": 3095 }, { "epoch": 0.4383096198768316, "grad_norm": 11.794429459884004, "learning_rate": 4.545338037702893e-06, "loss": 1.4677, "step": 3096 }, { "epoch": 0.4384511927514688, "grad_norm": 10.917537237112153, "learning_rate": 4.545008379306854e-06, "loss": 1.3663, "step": 3097 }, { "epoch": 0.43859276562610605, "grad_norm": 9.276785345036858, "learning_rate": 4.5446786134062515e-06, "loss": 1.3235, "step": 3098 }, { "epoch": 0.4387343385007433, "grad_norm": 8.705132682617345, "learning_rate": 4.544348740018417e-06, "loss": 1.3545, "step": 3099 }, { "epoch": 0.43887591137538046, "grad_norm": 11.00221923636318, "learning_rate": 4.544018759160694e-06, "loss": 1.4109, "step": 3100 }, { "epoch": 0.4390174842500177, "grad_norm": 10.718087668016032, "learning_rate": 4.5436886708504295e-06, "loss": 1.332, "step": 3101 }, { "epoch": 0.4391590571246549, "grad_norm": 12.594016552700234, "learning_rate": 4.543358475104975e-06, "loss": 1.3964, "step": 3102 }, { "epoch": 0.43930062999929215, "grad_norm": 8.729894591019557, "learning_rate": 4.543028171941689e-06, "loss": 1.265, "step": 3103 }, { "epoch": 0.4394422028739294, "grad_norm": 9.425929372282077, "learning_rate": 4.5426977613779355e-06, "loss": 1.3441, "step": 3104 }, { "epoch": 0.43958377574856655, "grad_norm": 11.25893200106864, "learning_rate": 4.542367243431084e-06, "loss": 1.4898, "step": 3105 }, { "epoch": 0.4397253486232038, "grad_norm": 9.60517168613436, "learning_rate": 4.54203661811851e-06, "loss": 1.4478, "step": 3106 }, { "epoch": 0.439866921497841, "grad_norm": 9.132640409520643, "learning_rate": 4.541705885457593e-06, "loss": 1.3283, "step": 3107 }, { "epoch": 0.44000849437247824, "grad_norm": 9.664628526522065, "learning_rate": 4.541375045465719e-06, "loss": 1.2846, "step": 3108 }, { "epoch": 0.4401500672471155, "grad_norm": 10.291805703978165, "learning_rate": 4.541044098160281e-06, "loss": 1.3529, "step": 3109 }, { "epoch": 0.44029164012175265, "grad_norm": 9.426709606427169, "learning_rate": 4.540713043558678e-06, "loss": 1.3674, "step": 3110 }, { "epoch": 0.4404332129963899, "grad_norm": 8.417076428199078, "learning_rate": 4.54038188167831e-06, "loss": 1.237, "step": 3111 }, { "epoch": 0.4405747858710271, "grad_norm": 11.901394392110635, "learning_rate": 4.54005061253659e-06, "loss": 1.3909, "step": 3112 }, { "epoch": 0.44071635874566434, "grad_norm": 9.76257804028607, "learning_rate": 4.539719236150929e-06, "loss": 1.2295, "step": 3113 }, { "epoch": 0.44085793162030157, "grad_norm": 11.185736661073284, "learning_rate": 4.53938775253875e-06, "loss": 1.3937, "step": 3114 }, { "epoch": 0.44099950449493874, "grad_norm": 10.165759520369177, "learning_rate": 4.539056161717477e-06, "loss": 1.2309, "step": 3115 }, { "epoch": 0.441141077369576, "grad_norm": 9.44335715324431, "learning_rate": 4.5387244637045414e-06, "loss": 1.3241, "step": 3116 }, { "epoch": 0.4412826502442132, "grad_norm": 8.8982047166929, "learning_rate": 4.53839265851738e-06, "loss": 1.2732, "step": 3117 }, { "epoch": 0.44142422311885043, "grad_norm": 11.560728195116742, "learning_rate": 4.538060746173438e-06, "loss": 1.2203, "step": 3118 }, { "epoch": 0.44156579599348766, "grad_norm": 10.134885883187554, "learning_rate": 4.537728726690162e-06, "loss": 1.4116, "step": 3119 }, { "epoch": 0.4417073688681249, "grad_norm": 10.522975670777418, "learning_rate": 4.537396600085006e-06, "loss": 1.3016, "step": 3120 }, { "epoch": 0.44184894174276207, "grad_norm": 9.166501500346758, "learning_rate": 4.537064366375429e-06, "loss": 1.3841, "step": 3121 }, { "epoch": 0.4419905146173993, "grad_norm": 8.760118138461644, "learning_rate": 4.5367320255788985e-06, "loss": 1.2636, "step": 3122 }, { "epoch": 0.44213208749203653, "grad_norm": 10.317603495702443, "learning_rate": 4.536399577712883e-06, "loss": 1.4575, "step": 3123 }, { "epoch": 0.44227366036667376, "grad_norm": 11.414296444813326, "learning_rate": 4.536067022794861e-06, "loss": 1.24, "step": 3124 }, { "epoch": 0.442415233241311, "grad_norm": 11.971259349585534, "learning_rate": 4.535734360842313e-06, "loss": 1.4596, "step": 3125 }, { "epoch": 0.44255680611594816, "grad_norm": 10.195941446716104, "learning_rate": 4.535401591872729e-06, "loss": 1.3361, "step": 3126 }, { "epoch": 0.4426983789905854, "grad_norm": 10.43556899065362, "learning_rate": 4.5350687159036e-06, "loss": 1.2408, "step": 3127 }, { "epoch": 0.4428399518652226, "grad_norm": 11.75071746583646, "learning_rate": 4.5347357329524254e-06, "loss": 1.4091, "step": 3128 }, { "epoch": 0.44298152473985986, "grad_norm": 12.364239570875988, "learning_rate": 4.534402643036711e-06, "loss": 1.2198, "step": 3129 }, { "epoch": 0.4431230976144971, "grad_norm": 12.726608660329116, "learning_rate": 4.534069446173967e-06, "loss": 1.4787, "step": 3130 }, { "epoch": 0.44326467048913426, "grad_norm": 7.861125773793248, "learning_rate": 4.533736142381708e-06, "loss": 1.1898, "step": 3131 }, { "epoch": 0.4434062433637715, "grad_norm": 8.447383263270469, "learning_rate": 4.533402731677457e-06, "loss": 1.2915, "step": 3132 }, { "epoch": 0.4435478162384087, "grad_norm": 10.905808229434765, "learning_rate": 4.53306921407874e-06, "loss": 1.3213, "step": 3133 }, { "epoch": 0.44368938911304595, "grad_norm": 11.396845705765067, "learning_rate": 4.532735589603091e-06, "loss": 1.1893, "step": 3134 }, { "epoch": 0.4438309619876832, "grad_norm": 8.867132863956126, "learning_rate": 4.5324018582680476e-06, "loss": 1.2807, "step": 3135 }, { "epoch": 0.44397253486232036, "grad_norm": 11.019615024273902, "learning_rate": 4.532068020091154e-06, "loss": 1.3727, "step": 3136 }, { "epoch": 0.4441141077369576, "grad_norm": 7.731520000606309, "learning_rate": 4.531734075089959e-06, "loss": 1.1939, "step": 3137 }, { "epoch": 0.4442556806115948, "grad_norm": 9.294963941581106, "learning_rate": 4.53140002328202e-06, "loss": 1.3626, "step": 3138 }, { "epoch": 0.44439725348623205, "grad_norm": 8.911547989044056, "learning_rate": 4.531065864684896e-06, "loss": 1.2683, "step": 3139 }, { "epoch": 0.4445388263608693, "grad_norm": 11.140847908775385, "learning_rate": 4.530731599316153e-06, "loss": 1.2741, "step": 3140 }, { "epoch": 0.44468039923550645, "grad_norm": 8.95785658403159, "learning_rate": 4.530397227193365e-06, "loss": 1.2367, "step": 3141 }, { "epoch": 0.4448219721101437, "grad_norm": 8.660210870373534, "learning_rate": 4.530062748334109e-06, "loss": 1.4926, "step": 3142 }, { "epoch": 0.4449635449847809, "grad_norm": 9.44908108557739, "learning_rate": 4.529728162755966e-06, "loss": 1.2297, "step": 3143 }, { "epoch": 0.44510511785941814, "grad_norm": 9.266787991541394, "learning_rate": 4.5293934704765285e-06, "loss": 1.1719, "step": 3144 }, { "epoch": 0.4452466907340554, "grad_norm": 8.746465786666304, "learning_rate": 4.529058671513389e-06, "loss": 1.2027, "step": 3145 }, { "epoch": 0.44538826360869255, "grad_norm": 8.659352762117331, "learning_rate": 4.528723765884149e-06, "loss": 1.3485, "step": 3146 }, { "epoch": 0.4455298364833298, "grad_norm": 9.683806472048373, "learning_rate": 4.528388753606412e-06, "loss": 1.4244, "step": 3147 }, { "epoch": 0.445671409357967, "grad_norm": 9.861943752136924, "learning_rate": 4.528053634697791e-06, "loss": 1.3722, "step": 3148 }, { "epoch": 0.44581298223260424, "grad_norm": 8.834934977112434, "learning_rate": 4.527718409175903e-06, "loss": 1.2167, "step": 3149 }, { "epoch": 0.44595455510724147, "grad_norm": 9.459291474018004, "learning_rate": 4.52738307705837e-06, "loss": 1.348, "step": 3150 }, { "epoch": 0.4460961279818787, "grad_norm": 8.471428816980751, "learning_rate": 4.52704763836282e-06, "loss": 1.3216, "step": 3151 }, { "epoch": 0.4462377008565159, "grad_norm": 7.678654241273744, "learning_rate": 4.526712093106888e-06, "loss": 1.3076, "step": 3152 }, { "epoch": 0.4463792737311531, "grad_norm": 10.061071448479344, "learning_rate": 4.5263764413082115e-06, "loss": 1.3076, "step": 3153 }, { "epoch": 0.44652084660579033, "grad_norm": 9.874505525594687, "learning_rate": 4.5260406829844364e-06, "loss": 1.3502, "step": 3154 }, { "epoch": 0.44666241948042756, "grad_norm": 10.6748528856776, "learning_rate": 4.525704818153214e-06, "loss": 1.4079, "step": 3155 }, { "epoch": 0.4468039923550648, "grad_norm": 9.156270414466363, "learning_rate": 4.525368846832199e-06, "loss": 1.4252, "step": 3156 }, { "epoch": 0.44694556522970197, "grad_norm": 9.830513328894392, "learning_rate": 4.525032769039054e-06, "loss": 1.4442, "step": 3157 }, { "epoch": 0.4470871381043392, "grad_norm": 9.060527297416336, "learning_rate": 4.524696584791447e-06, "loss": 1.264, "step": 3158 }, { "epoch": 0.44722871097897643, "grad_norm": 10.566238012754942, "learning_rate": 4.524360294107049e-06, "loss": 1.397, "step": 3159 }, { "epoch": 0.44737028385361366, "grad_norm": 8.874897701049942, "learning_rate": 4.5240238970035414e-06, "loss": 1.4483, "step": 3160 }, { "epoch": 0.4475118567282509, "grad_norm": 8.433540828197781, "learning_rate": 4.523687393498605e-06, "loss": 1.2369, "step": 3161 }, { "epoch": 0.44765342960288806, "grad_norm": 8.078385192104404, "learning_rate": 4.523350783609932e-06, "loss": 1.3928, "step": 3162 }, { "epoch": 0.4477950024775253, "grad_norm": 11.374170294044735, "learning_rate": 4.523014067355217e-06, "loss": 1.4235, "step": 3163 }, { "epoch": 0.4479365753521625, "grad_norm": 11.331362010572622, "learning_rate": 4.52267724475216e-06, "loss": 1.4934, "step": 3164 }, { "epoch": 0.44807814822679976, "grad_norm": 10.30197438695764, "learning_rate": 4.52234031581847e-06, "loss": 1.3151, "step": 3165 }, { "epoch": 0.448219721101437, "grad_norm": 9.272745571016422, "learning_rate": 4.5220032805718575e-06, "loss": 1.3527, "step": 3166 }, { "epoch": 0.44836129397607416, "grad_norm": 9.110467076476512, "learning_rate": 4.521666139030039e-06, "loss": 1.291, "step": 3167 }, { "epoch": 0.4485028668507114, "grad_norm": 11.446045065079726, "learning_rate": 4.52132889121074e-06, "loss": 1.3233, "step": 3168 }, { "epoch": 0.4486444397253486, "grad_norm": 9.804212435577556, "learning_rate": 4.520991537131687e-06, "loss": 1.4171, "step": 3169 }, { "epoch": 0.44878601259998585, "grad_norm": 9.107125529638738, "learning_rate": 4.520654076810617e-06, "loss": 1.1498, "step": 3170 }, { "epoch": 0.4489275854746231, "grad_norm": 10.668184728681512, "learning_rate": 4.520316510265268e-06, "loss": 1.3108, "step": 3171 }, { "epoch": 0.44906915834926026, "grad_norm": 12.098556154978008, "learning_rate": 4.519978837513388e-06, "loss": 1.3501, "step": 3172 }, { "epoch": 0.4492107312238975, "grad_norm": 8.42656400296088, "learning_rate": 4.519641058572725e-06, "loss": 1.3183, "step": 3173 }, { "epoch": 0.4493523040985347, "grad_norm": 9.976170567257345, "learning_rate": 4.519303173461038e-06, "loss": 1.3579, "step": 3174 }, { "epoch": 0.44949387697317195, "grad_norm": 10.836201175707757, "learning_rate": 4.5189651821960885e-06, "loss": 1.2673, "step": 3175 }, { "epoch": 0.4496354498478092, "grad_norm": 10.987552883120745, "learning_rate": 4.518627084795646e-06, "loss": 1.3422, "step": 3176 }, { "epoch": 0.4497770227224464, "grad_norm": 9.642205651290443, "learning_rate": 4.5182888812774814e-06, "loss": 1.2973, "step": 3177 }, { "epoch": 0.4499185955970836, "grad_norm": 9.726344045614074, "learning_rate": 4.517950571659376e-06, "loss": 1.3535, "step": 3178 }, { "epoch": 0.4500601684717208, "grad_norm": 12.45520721985983, "learning_rate": 4.517612155959114e-06, "loss": 1.2379, "step": 3179 }, { "epoch": 0.45020174134635804, "grad_norm": 9.546269224336575, "learning_rate": 4.5172736341944845e-06, "loss": 1.3366, "step": 3180 }, { "epoch": 0.45034331422099527, "grad_norm": 8.812116898670821, "learning_rate": 4.516935006383285e-06, "loss": 1.3875, "step": 3181 }, { "epoch": 0.4504848870956325, "grad_norm": 11.13176632968247, "learning_rate": 4.516596272543316e-06, "loss": 1.2997, "step": 3182 }, { "epoch": 0.4506264599702697, "grad_norm": 10.404922538150025, "learning_rate": 4.516257432692383e-06, "loss": 1.1973, "step": 3183 }, { "epoch": 0.4507680328449069, "grad_norm": 9.60335653753985, "learning_rate": 4.515918486848302e-06, "loss": 1.2088, "step": 3184 }, { "epoch": 0.45090960571954414, "grad_norm": 9.756709602277994, "learning_rate": 4.5155794350288885e-06, "loss": 1.4947, "step": 3185 }, { "epoch": 0.45105117859418137, "grad_norm": 10.096271967493822, "learning_rate": 4.515240277251968e-06, "loss": 1.2796, "step": 3186 }, { "epoch": 0.4511927514688186, "grad_norm": 8.147424826964356, "learning_rate": 4.514901013535368e-06, "loss": 1.196, "step": 3187 }, { "epoch": 0.4513343243434558, "grad_norm": 7.610143477782887, "learning_rate": 4.514561643896924e-06, "loss": 1.2552, "step": 3188 }, { "epoch": 0.451475897218093, "grad_norm": 10.23272663848731, "learning_rate": 4.514222168354476e-06, "loss": 1.3236, "step": 3189 }, { "epoch": 0.45161747009273023, "grad_norm": 9.633191474829236, "learning_rate": 4.513882586925872e-06, "loss": 1.4203, "step": 3190 }, { "epoch": 0.45175904296736746, "grad_norm": 9.282904994358034, "learning_rate": 4.51354289962896e-06, "loss": 1.273, "step": 3191 }, { "epoch": 0.4519006158420047, "grad_norm": 9.641142152736746, "learning_rate": 4.5132031064816e-06, "loss": 1.3095, "step": 3192 }, { "epoch": 0.45204218871664187, "grad_norm": 9.781113498698094, "learning_rate": 4.512863207501654e-06, "loss": 1.2868, "step": 3193 }, { "epoch": 0.4521837615912791, "grad_norm": 8.88302421485274, "learning_rate": 4.51252320270699e-06, "loss": 1.2813, "step": 3194 }, { "epoch": 0.45232533446591633, "grad_norm": 8.370907822830228, "learning_rate": 4.512183092115482e-06, "loss": 1.373, "step": 3195 }, { "epoch": 0.45246690734055356, "grad_norm": 7.942942277975461, "learning_rate": 4.511842875745009e-06, "loss": 1.2062, "step": 3196 }, { "epoch": 0.4526084802151908, "grad_norm": 12.005361630777399, "learning_rate": 4.511502553613456e-06, "loss": 1.3965, "step": 3197 }, { "epoch": 0.45275005308982796, "grad_norm": 8.87677671888801, "learning_rate": 4.511162125738714e-06, "loss": 1.3295, "step": 3198 }, { "epoch": 0.4528916259644652, "grad_norm": 8.942758573740159, "learning_rate": 4.510821592138678e-06, "loss": 1.4553, "step": 3199 }, { "epoch": 0.4530331988391024, "grad_norm": 8.010966413969177, "learning_rate": 4.510480952831251e-06, "loss": 1.3207, "step": 3200 }, { "epoch": 0.45317477171373965, "grad_norm": 8.886272310214732, "learning_rate": 4.510140207834339e-06, "loss": 1.2813, "step": 3201 }, { "epoch": 0.4533163445883769, "grad_norm": 11.775548552028134, "learning_rate": 4.509799357165855e-06, "loss": 1.4606, "step": 3202 }, { "epoch": 0.45345791746301406, "grad_norm": 14.433285052517551, "learning_rate": 4.509458400843717e-06, "loss": 1.4704, "step": 3203 }, { "epoch": 0.4535994903376513, "grad_norm": 8.541884951399734, "learning_rate": 4.50911733888585e-06, "loss": 1.2468, "step": 3204 }, { "epoch": 0.4537410632122885, "grad_norm": 8.993390093210254, "learning_rate": 4.508776171310183e-06, "loss": 1.163, "step": 3205 }, { "epoch": 0.45388263608692575, "grad_norm": 10.70473620685259, "learning_rate": 4.5084348981346495e-06, "loss": 1.6362, "step": 3206 }, { "epoch": 0.454024208961563, "grad_norm": 11.7699046260888, "learning_rate": 4.5080935193771905e-06, "loss": 1.2889, "step": 3207 }, { "epoch": 0.4541657818362002, "grad_norm": 9.366538743563066, "learning_rate": 4.5077520350557534e-06, "loss": 1.3717, "step": 3208 }, { "epoch": 0.4543073547108374, "grad_norm": 9.072879293359689, "learning_rate": 4.5074104451882886e-06, "loss": 1.3097, "step": 3209 }, { "epoch": 0.4544489275854746, "grad_norm": 12.280965476586497, "learning_rate": 4.507068749792754e-06, "loss": 1.4465, "step": 3210 }, { "epoch": 0.45459050046011185, "grad_norm": 8.913846386923435, "learning_rate": 4.50672694888711e-06, "loss": 1.3334, "step": 3211 }, { "epoch": 0.4547320733347491, "grad_norm": 7.284352963421194, "learning_rate": 4.506385042489329e-06, "loss": 1.3025, "step": 3212 }, { "epoch": 0.4548736462093863, "grad_norm": 9.897400377913518, "learning_rate": 4.5060430306173805e-06, "loss": 1.278, "step": 3213 }, { "epoch": 0.4550152190840235, "grad_norm": 10.443245225963498, "learning_rate": 4.505700913289246e-06, "loss": 1.4003, "step": 3214 }, { "epoch": 0.4551567919586607, "grad_norm": 7.951699119708999, "learning_rate": 4.505358690522911e-06, "loss": 1.4502, "step": 3215 }, { "epoch": 0.45529836483329794, "grad_norm": 8.612044624946524, "learning_rate": 4.505016362336364e-06, "loss": 1.2054, "step": 3216 }, { "epoch": 0.45543993770793517, "grad_norm": 8.499526515563648, "learning_rate": 4.504673928747601e-06, "loss": 1.2961, "step": 3217 }, { "epoch": 0.4555815105825724, "grad_norm": 9.39633639751752, "learning_rate": 4.504331389774626e-06, "loss": 1.4038, "step": 3218 }, { "epoch": 0.4557230834572096, "grad_norm": 10.632916541785246, "learning_rate": 4.503988745435443e-06, "loss": 1.3619, "step": 3219 }, { "epoch": 0.4558646563318468, "grad_norm": 10.004456480743851, "learning_rate": 4.503645995748067e-06, "loss": 1.335, "step": 3220 }, { "epoch": 0.45600622920648404, "grad_norm": 10.117666720672581, "learning_rate": 4.503303140730515e-06, "loss": 1.3547, "step": 3221 }, { "epoch": 0.45614780208112127, "grad_norm": 9.632834676576257, "learning_rate": 4.502960180400809e-06, "loss": 1.4667, "step": 3222 }, { "epoch": 0.4562893749557585, "grad_norm": 8.971782530517686, "learning_rate": 4.5026171147769816e-06, "loss": 1.3089, "step": 3223 }, { "epoch": 0.4564309478303957, "grad_norm": 9.50421771735819, "learning_rate": 4.5022739438770655e-06, "loss": 1.2125, "step": 3224 }, { "epoch": 0.4565725207050329, "grad_norm": 7.235031197038416, "learning_rate": 4.5019306677191e-06, "loss": 1.2378, "step": 3225 }, { "epoch": 0.45671409357967013, "grad_norm": 8.898889330879568, "learning_rate": 4.501587286321133e-06, "loss": 1.1922, "step": 3226 }, { "epoch": 0.45685566645430736, "grad_norm": 9.749798112392988, "learning_rate": 4.501243799701215e-06, "loss": 1.3738, "step": 3227 }, { "epoch": 0.4569972393289446, "grad_norm": 11.875630894013984, "learning_rate": 4.500900207877402e-06, "loss": 1.5478, "step": 3228 }, { "epoch": 0.45713881220358177, "grad_norm": 9.774898721701993, "learning_rate": 4.500556510867756e-06, "loss": 1.3492, "step": 3229 }, { "epoch": 0.457280385078219, "grad_norm": 8.716197525538256, "learning_rate": 4.500212708690348e-06, "loss": 1.4596, "step": 3230 }, { "epoch": 0.45742195795285623, "grad_norm": 8.814170733137102, "learning_rate": 4.499868801363248e-06, "loss": 1.3531, "step": 3231 }, { "epoch": 0.45756353082749346, "grad_norm": 9.629444631880752, "learning_rate": 4.499524788904537e-06, "loss": 1.3064, "step": 3232 }, { "epoch": 0.4577051037021307, "grad_norm": 9.418471396919452, "learning_rate": 4.4991806713322986e-06, "loss": 1.3221, "step": 3233 }, { "epoch": 0.45784667657676786, "grad_norm": 7.7351576900374885, "learning_rate": 4.498836448664622e-06, "loss": 1.2575, "step": 3234 }, { "epoch": 0.4579882494514051, "grad_norm": 8.952964794596326, "learning_rate": 4.498492120919604e-06, "loss": 1.2034, "step": 3235 }, { "epoch": 0.4581298223260423, "grad_norm": 6.566863434969175, "learning_rate": 4.498147688115346e-06, "loss": 1.1197, "step": 3236 }, { "epoch": 0.45827139520067955, "grad_norm": 10.026478616019684, "learning_rate": 4.497803150269954e-06, "loss": 1.2201, "step": 3237 }, { "epoch": 0.4584129680753168, "grad_norm": 11.348740670000433, "learning_rate": 4.4974585074015394e-06, "loss": 1.2637, "step": 3238 }, { "epoch": 0.458554540949954, "grad_norm": 12.90940452614785, "learning_rate": 4.497113759528221e-06, "loss": 1.2358, "step": 3239 }, { "epoch": 0.4586961138245912, "grad_norm": 13.178044549715898, "learning_rate": 4.4967689066681205e-06, "loss": 1.4231, "step": 3240 }, { "epoch": 0.4588376866992284, "grad_norm": 10.142630591827034, "learning_rate": 4.496423948839369e-06, "loss": 1.3004, "step": 3241 }, { "epoch": 0.45897925957386565, "grad_norm": 10.967747308270118, "learning_rate": 4.496078886060098e-06, "loss": 1.3302, "step": 3242 }, { "epoch": 0.4591208324485029, "grad_norm": 10.541721469500478, "learning_rate": 4.495733718348449e-06, "loss": 1.3297, "step": 3243 }, { "epoch": 0.4592624053231401, "grad_norm": 9.200843307986775, "learning_rate": 4.4953884457225645e-06, "loss": 1.274, "step": 3244 }, { "epoch": 0.4594039781977773, "grad_norm": 8.245540453962352, "learning_rate": 4.4950430682005995e-06, "loss": 1.2966, "step": 3245 }, { "epoch": 0.4595455510724145, "grad_norm": 10.052984251982366, "learning_rate": 4.4946975858007066e-06, "loss": 1.494, "step": 3246 }, { "epoch": 0.45968712394705175, "grad_norm": 8.351571178520983, "learning_rate": 4.494351998541049e-06, "loss": 1.1227, "step": 3247 }, { "epoch": 0.459828696821689, "grad_norm": 10.239792369883302, "learning_rate": 4.494006306439795e-06, "loss": 1.3481, "step": 3248 }, { "epoch": 0.4599702696963262, "grad_norm": 9.406344492491533, "learning_rate": 4.493660509515115e-06, "loss": 1.266, "step": 3249 }, { "epoch": 0.4601118425709634, "grad_norm": 7.671390432682611, "learning_rate": 4.493314607785189e-06, "loss": 1.2963, "step": 3250 }, { "epoch": 0.4602534154456006, "grad_norm": 9.019766400139526, "learning_rate": 4.492968601268202e-06, "loss": 1.3098, "step": 3251 }, { "epoch": 0.46039498832023784, "grad_norm": 9.496522367306566, "learning_rate": 4.492622489982339e-06, "loss": 1.4174, "step": 3252 }, { "epoch": 0.46053656119487507, "grad_norm": 9.37830182741074, "learning_rate": 4.4922762739457995e-06, "loss": 1.2159, "step": 3253 }, { "epoch": 0.4606781340695123, "grad_norm": 8.859737112997529, "learning_rate": 4.49192995317678e-06, "loss": 1.3583, "step": 3254 }, { "epoch": 0.4608197069441495, "grad_norm": 8.56592441217854, "learning_rate": 4.491583527693489e-06, "loss": 1.341, "step": 3255 }, { "epoch": 0.4609612798187867, "grad_norm": 9.862574232085578, "learning_rate": 4.491236997514138e-06, "loss": 1.3556, "step": 3256 }, { "epoch": 0.46110285269342394, "grad_norm": 7.131503968058599, "learning_rate": 4.490890362656941e-06, "loss": 1.1969, "step": 3257 }, { "epoch": 0.46124442556806117, "grad_norm": 9.748361303096184, "learning_rate": 4.490543623140123e-06, "loss": 1.4553, "step": 3258 }, { "epoch": 0.4613859984426984, "grad_norm": 8.828153951141688, "learning_rate": 4.490196778981911e-06, "loss": 1.4396, "step": 3259 }, { "epoch": 0.46152757131733557, "grad_norm": 8.37164011501522, "learning_rate": 4.489849830200538e-06, "loss": 1.1906, "step": 3260 }, { "epoch": 0.4616691441919728, "grad_norm": 9.207988703796193, "learning_rate": 4.489502776814243e-06, "loss": 1.3078, "step": 3261 }, { "epoch": 0.46181071706661003, "grad_norm": 8.774819622739894, "learning_rate": 4.4891556188412705e-06, "loss": 1.2048, "step": 3262 }, { "epoch": 0.46195228994124726, "grad_norm": 10.246473124290933, "learning_rate": 4.48880835629987e-06, "loss": 1.3487, "step": 3263 }, { "epoch": 0.4620938628158845, "grad_norm": 8.833248737817822, "learning_rate": 4.488460989208298e-06, "loss": 1.1817, "step": 3264 }, { "epoch": 0.4622354356905217, "grad_norm": 8.943249540368903, "learning_rate": 4.4881135175848145e-06, "loss": 1.2893, "step": 3265 }, { "epoch": 0.4623770085651589, "grad_norm": 8.038399567995242, "learning_rate": 4.4877659414476845e-06, "loss": 1.2918, "step": 3266 }, { "epoch": 0.46251858143979613, "grad_norm": 9.364879791579932, "learning_rate": 4.487418260815182e-06, "loss": 1.335, "step": 3267 }, { "epoch": 0.46266015431443336, "grad_norm": 9.23252909585772, "learning_rate": 4.487070475705584e-06, "loss": 1.4068, "step": 3268 }, { "epoch": 0.4628017271890706, "grad_norm": 9.358687386703906, "learning_rate": 4.486722586137171e-06, "loss": 1.3595, "step": 3269 }, { "epoch": 0.4629433000637078, "grad_norm": 11.284439608173185, "learning_rate": 4.486374592128235e-06, "loss": 1.2428, "step": 3270 }, { "epoch": 0.463084872938345, "grad_norm": 12.492526452366869, "learning_rate": 4.486026493697067e-06, "loss": 1.4356, "step": 3271 }, { "epoch": 0.4632264458129822, "grad_norm": 9.686459983866293, "learning_rate": 4.485678290861967e-06, "loss": 1.2912, "step": 3272 }, { "epoch": 0.46336801868761945, "grad_norm": 9.577028494918476, "learning_rate": 4.485329983641239e-06, "loss": 1.2799, "step": 3273 }, { "epoch": 0.4635095915622567, "grad_norm": 9.931194488197397, "learning_rate": 4.484981572053195e-06, "loss": 1.3687, "step": 3274 }, { "epoch": 0.4636511644368939, "grad_norm": 9.679305222223388, "learning_rate": 4.48463305611615e-06, "loss": 1.5219, "step": 3275 }, { "epoch": 0.4637927373115311, "grad_norm": 11.033254814060635, "learning_rate": 4.484284435848424e-06, "loss": 1.2891, "step": 3276 }, { "epoch": 0.4639343101861683, "grad_norm": 9.583450007073964, "learning_rate": 4.483935711268346e-06, "loss": 1.3474, "step": 3277 }, { "epoch": 0.46407588306080555, "grad_norm": 8.243442588089335, "learning_rate": 4.483586882394247e-06, "loss": 1.2774, "step": 3278 }, { "epoch": 0.4642174559354428, "grad_norm": 10.087503584963931, "learning_rate": 4.483237949244463e-06, "loss": 1.3466, "step": 3279 }, { "epoch": 0.46435902881008, "grad_norm": 8.750990893251434, "learning_rate": 4.4828889118373395e-06, "loss": 1.3056, "step": 3280 }, { "epoch": 0.4645006016847172, "grad_norm": 8.361928731567641, "learning_rate": 4.482539770191225e-06, "loss": 1.2845, "step": 3281 }, { "epoch": 0.4646421745593544, "grad_norm": 10.802102795506414, "learning_rate": 4.482190524324473e-06, "loss": 1.2964, "step": 3282 }, { "epoch": 0.46478374743399165, "grad_norm": 9.15804546576013, "learning_rate": 4.481841174255443e-06, "loss": 1.3128, "step": 3283 }, { "epoch": 0.4649253203086289, "grad_norm": 7.88685974327599, "learning_rate": 4.481491720002499e-06, "loss": 1.1702, "step": 3284 }, { "epoch": 0.4650668931832661, "grad_norm": 7.434902987947156, "learning_rate": 4.481142161584014e-06, "loss": 1.2375, "step": 3285 }, { "epoch": 0.4652084660579033, "grad_norm": 9.736891321787704, "learning_rate": 4.480792499018362e-06, "loss": 1.5175, "step": 3286 }, { "epoch": 0.4653500389325405, "grad_norm": 10.790820310732434, "learning_rate": 4.4804427323239265e-06, "loss": 1.4002, "step": 3287 }, { "epoch": 0.46549161180717774, "grad_norm": 10.921232671227274, "learning_rate": 4.480092861519092e-06, "loss": 1.2724, "step": 3288 }, { "epoch": 0.46563318468181497, "grad_norm": 8.312275130593289, "learning_rate": 4.479742886622254e-06, "loss": 1.2592, "step": 3289 }, { "epoch": 0.4657747575564522, "grad_norm": 9.479073615543173, "learning_rate": 4.479392807651807e-06, "loss": 1.3858, "step": 3290 }, { "epoch": 0.4659163304310894, "grad_norm": 9.41287636783323, "learning_rate": 4.479042624626156e-06, "loss": 1.4659, "step": 3291 }, { "epoch": 0.4660579033057266, "grad_norm": 8.56819575970695, "learning_rate": 4.47869233756371e-06, "loss": 1.3994, "step": 3292 }, { "epoch": 0.46619947618036384, "grad_norm": 9.527841027151528, "learning_rate": 4.478341946482884e-06, "loss": 1.3773, "step": 3293 }, { "epoch": 0.46634104905500107, "grad_norm": 8.57228231265464, "learning_rate": 4.4779914514020964e-06, "loss": 1.2585, "step": 3294 }, { "epoch": 0.4664826219296383, "grad_norm": 8.727010116093956, "learning_rate": 4.4776408523397725e-06, "loss": 1.2767, "step": 3295 }, { "epoch": 0.4666241948042755, "grad_norm": 9.725105390982797, "learning_rate": 4.477290149314344e-06, "loss": 1.473, "step": 3296 }, { "epoch": 0.4667657676789127, "grad_norm": 9.541599545527314, "learning_rate": 4.476939342344246e-06, "loss": 1.3345, "step": 3297 }, { "epoch": 0.46690734055354993, "grad_norm": 9.200681611394767, "learning_rate": 4.4765884314479226e-06, "loss": 1.2867, "step": 3298 }, { "epoch": 0.46704891342818716, "grad_norm": 10.27103220119708, "learning_rate": 4.4762374166438185e-06, "loss": 1.2709, "step": 3299 }, { "epoch": 0.4671904863028244, "grad_norm": 11.200905722371957, "learning_rate": 4.475886297950386e-06, "loss": 1.2594, "step": 3300 }, { "epoch": 0.4673320591774616, "grad_norm": 7.771025899865278, "learning_rate": 4.475535075386085e-06, "loss": 1.1906, "step": 3301 }, { "epoch": 0.4674736320520988, "grad_norm": 11.82315148387866, "learning_rate": 4.475183748969377e-06, "loss": 1.3449, "step": 3302 }, { "epoch": 0.46761520492673603, "grad_norm": 11.009657002025362, "learning_rate": 4.474832318718733e-06, "loss": 1.4395, "step": 3303 }, { "epoch": 0.46775677780137326, "grad_norm": 10.85257944130266, "learning_rate": 4.474480784652627e-06, "loss": 1.3836, "step": 3304 }, { "epoch": 0.4678983506760105, "grad_norm": 9.984152252625822, "learning_rate": 4.474129146789538e-06, "loss": 1.2862, "step": 3305 }, { "epoch": 0.4680399235506477, "grad_norm": 10.859866782929188, "learning_rate": 4.473777405147952e-06, "loss": 1.3728, "step": 3306 }, { "epoch": 0.4681814964252849, "grad_norm": 9.750273089374616, "learning_rate": 4.473425559746358e-06, "loss": 1.5771, "step": 3307 }, { "epoch": 0.4683230692999221, "grad_norm": 9.665149525819315, "learning_rate": 4.473073610603255e-06, "loss": 1.3418, "step": 3308 }, { "epoch": 0.46846464217455935, "grad_norm": 9.522228937234468, "learning_rate": 4.4727215577371445e-06, "loss": 1.3143, "step": 3309 }, { "epoch": 0.4686062150491966, "grad_norm": 10.783238324351624, "learning_rate": 4.472369401166531e-06, "loss": 1.4904, "step": 3310 }, { "epoch": 0.4687477879238338, "grad_norm": 8.783965535970037, "learning_rate": 4.472017140909929e-06, "loss": 1.2859, "step": 3311 }, { "epoch": 0.468889360798471, "grad_norm": 8.246028984823829, "learning_rate": 4.471664776985857e-06, "loss": 1.228, "step": 3312 }, { "epoch": 0.4690309336731082, "grad_norm": 9.71298900733152, "learning_rate": 4.471312309412837e-06, "loss": 1.3732, "step": 3313 }, { "epoch": 0.46917250654774545, "grad_norm": 10.049951156573435, "learning_rate": 4.470959738209399e-06, "loss": 1.4026, "step": 3314 }, { "epoch": 0.4693140794223827, "grad_norm": 9.235875248267622, "learning_rate": 4.470607063394077e-06, "loss": 1.2798, "step": 3315 }, { "epoch": 0.4694556522970199, "grad_norm": 8.449984434215045, "learning_rate": 4.470254284985411e-06, "loss": 1.2991, "step": 3316 }, { "epoch": 0.4695972251716571, "grad_norm": 9.86079718100529, "learning_rate": 4.469901403001947e-06, "loss": 1.3747, "step": 3317 }, { "epoch": 0.4697387980462943, "grad_norm": 7.97149228052482, "learning_rate": 4.469548417462234e-06, "loss": 1.3058, "step": 3318 }, { "epoch": 0.46988037092093154, "grad_norm": 9.18021191986082, "learning_rate": 4.46919532838483e-06, "loss": 1.3873, "step": 3319 }, { "epoch": 0.4700219437955688, "grad_norm": 8.841378153397082, "learning_rate": 4.468842135788296e-06, "loss": 1.355, "step": 3320 }, { "epoch": 0.470163516670206, "grad_norm": 9.986842081985067, "learning_rate": 4.468488839691199e-06, "loss": 1.2938, "step": 3321 }, { "epoch": 0.47030508954484324, "grad_norm": 8.56606112862133, "learning_rate": 4.468135440112111e-06, "loss": 1.2546, "step": 3322 }, { "epoch": 0.4704466624194804, "grad_norm": 11.083652080646713, "learning_rate": 4.467781937069611e-06, "loss": 1.2982, "step": 3323 }, { "epoch": 0.47058823529411764, "grad_norm": 8.18163945937268, "learning_rate": 4.467428330582281e-06, "loss": 1.2117, "step": 3324 }, { "epoch": 0.47072980816875487, "grad_norm": 7.566663116364619, "learning_rate": 4.467074620668711e-06, "loss": 1.238, "step": 3325 }, { "epoch": 0.4708713810433921, "grad_norm": 8.230004325114674, "learning_rate": 4.466720807347495e-06, "loss": 1.2425, "step": 3326 }, { "epoch": 0.47101295391802933, "grad_norm": 9.049692509100922, "learning_rate": 4.466366890637232e-06, "loss": 1.1213, "step": 3327 }, { "epoch": 0.4711545267926665, "grad_norm": 9.390071706609776, "learning_rate": 4.466012870556529e-06, "loss": 1.2452, "step": 3328 }, { "epoch": 0.47129609966730374, "grad_norm": 8.88200982727318, "learning_rate": 4.4656587471239944e-06, "loss": 1.4023, "step": 3329 }, { "epoch": 0.47143767254194097, "grad_norm": 8.632074964959676, "learning_rate": 4.4653045203582455e-06, "loss": 1.3724, "step": 3330 }, { "epoch": 0.4715792454165782, "grad_norm": 10.313202435238768, "learning_rate": 4.464950190277903e-06, "loss": 1.29, "step": 3331 }, { "epoch": 0.4717208182912154, "grad_norm": 8.109906541165891, "learning_rate": 4.464595756901594e-06, "loss": 1.3016, "step": 3332 }, { "epoch": 0.4718623911658526, "grad_norm": 10.934266053709356, "learning_rate": 4.4642412202479515e-06, "loss": 1.3023, "step": 3333 }, { "epoch": 0.47200396404048983, "grad_norm": 11.156242477457198, "learning_rate": 4.463886580335612e-06, "loss": 1.4055, "step": 3334 }, { "epoch": 0.47214553691512706, "grad_norm": 9.938144878839939, "learning_rate": 4.463531837183221e-06, "loss": 1.2743, "step": 3335 }, { "epoch": 0.4722871097897643, "grad_norm": 8.150472765357119, "learning_rate": 4.463176990809423e-06, "loss": 1.3304, "step": 3336 }, { "epoch": 0.4724286826644015, "grad_norm": 9.369379418665783, "learning_rate": 4.462822041232876e-06, "loss": 1.2673, "step": 3337 }, { "epoch": 0.4725702555390387, "grad_norm": 10.743470448956318, "learning_rate": 4.462466988472237e-06, "loss": 1.2548, "step": 3338 }, { "epoch": 0.4727118284136759, "grad_norm": 9.183108986700587, "learning_rate": 4.462111832546172e-06, "loss": 1.428, "step": 3339 }, { "epoch": 0.47285340128831316, "grad_norm": 10.822847035061335, "learning_rate": 4.461756573473352e-06, "loss": 1.3728, "step": 3340 }, { "epoch": 0.4729949741629504, "grad_norm": 11.407641182284355, "learning_rate": 4.4614012112724494e-06, "loss": 1.2326, "step": 3341 }, { "epoch": 0.4731365470375876, "grad_norm": 8.606294555105483, "learning_rate": 4.461045745962149e-06, "loss": 1.3514, "step": 3342 }, { "epoch": 0.4732781199122248, "grad_norm": 9.657803604919257, "learning_rate": 4.460690177561136e-06, "loss": 1.3295, "step": 3343 }, { "epoch": 0.473419692786862, "grad_norm": 9.597390742155765, "learning_rate": 4.460334506088102e-06, "loss": 1.2861, "step": 3344 }, { "epoch": 0.47356126566149925, "grad_norm": 12.029684227456043, "learning_rate": 4.459978731561745e-06, "loss": 1.4862, "step": 3345 }, { "epoch": 0.4737028385361365, "grad_norm": 10.5469733791708, "learning_rate": 4.459622854000767e-06, "loss": 1.1787, "step": 3346 }, { "epoch": 0.4738444114107737, "grad_norm": 12.360378682776698, "learning_rate": 4.4592668734238775e-06, "loss": 1.392, "step": 3347 }, { "epoch": 0.4739859842854109, "grad_norm": 9.819201471061527, "learning_rate": 4.458910789849789e-06, "loss": 1.2712, "step": 3348 }, { "epoch": 0.4741275571600481, "grad_norm": 8.532456756312325, "learning_rate": 4.45855460329722e-06, "loss": 1.3634, "step": 3349 }, { "epoch": 0.47426913003468535, "grad_norm": 10.730650409706396, "learning_rate": 4.458198313784897e-06, "loss": 1.4829, "step": 3350 }, { "epoch": 0.4744107029093226, "grad_norm": 8.714331597736713, "learning_rate": 4.457841921331549e-06, "loss": 1.2372, "step": 3351 }, { "epoch": 0.4745522757839598, "grad_norm": 11.919138263045435, "learning_rate": 4.457485425955911e-06, "loss": 1.1936, "step": 3352 }, { "epoch": 0.47469384865859704, "grad_norm": 11.785309676454068, "learning_rate": 4.457128827676722e-06, "loss": 1.4456, "step": 3353 }, { "epoch": 0.4748354215332342, "grad_norm": 13.32696807240603, "learning_rate": 4.456772126512732e-06, "loss": 1.3831, "step": 3354 }, { "epoch": 0.47497699440787144, "grad_norm": 10.549274538959532, "learning_rate": 4.456415322482689e-06, "loss": 1.2348, "step": 3355 }, { "epoch": 0.4751185672825087, "grad_norm": 7.4648473271511575, "learning_rate": 4.456058415605352e-06, "loss": 1.3127, "step": 3356 }, { "epoch": 0.4752601401571459, "grad_norm": 8.485942660621474, "learning_rate": 4.4557014058994815e-06, "loss": 1.1686, "step": 3357 }, { "epoch": 0.47540171303178314, "grad_norm": 8.97252870501425, "learning_rate": 4.455344293383847e-06, "loss": 1.2194, "step": 3358 }, { "epoch": 0.4755432859064203, "grad_norm": 10.287742165071036, "learning_rate": 4.454987078077221e-06, "loss": 1.3788, "step": 3359 }, { "epoch": 0.47568485878105754, "grad_norm": 8.953767923169142, "learning_rate": 4.454629759998382e-06, "loss": 1.3881, "step": 3360 }, { "epoch": 0.47582643165569477, "grad_norm": 8.667323552204207, "learning_rate": 4.454272339166114e-06, "loss": 1.2893, "step": 3361 }, { "epoch": 0.475968004530332, "grad_norm": 7.590423232027882, "learning_rate": 4.453914815599206e-06, "loss": 1.2031, "step": 3362 }, { "epoch": 0.47610957740496923, "grad_norm": 8.132311069886736, "learning_rate": 4.453557189316454e-06, "loss": 1.4738, "step": 3363 }, { "epoch": 0.4762511502796064, "grad_norm": 8.708753317115999, "learning_rate": 4.453199460336656e-06, "loss": 1.2831, "step": 3364 }, { "epoch": 0.47639272315424364, "grad_norm": 11.766892647306772, "learning_rate": 4.452841628678619e-06, "loss": 1.4864, "step": 3365 }, { "epoch": 0.47653429602888087, "grad_norm": 9.33298691606233, "learning_rate": 4.452483694361154e-06, "loss": 1.3037, "step": 3366 }, { "epoch": 0.4766758689035181, "grad_norm": 8.070994552343896, "learning_rate": 4.452125657403077e-06, "loss": 1.3575, "step": 3367 }, { "epoch": 0.4768174417781553, "grad_norm": 7.9543317018003235, "learning_rate": 4.45176751782321e-06, "loss": 1.3963, "step": 3368 }, { "epoch": 0.4769590146527925, "grad_norm": 10.643802655683942, "learning_rate": 4.451409275640379e-06, "loss": 1.2926, "step": 3369 }, { "epoch": 0.47710058752742973, "grad_norm": 8.969720555336176, "learning_rate": 4.451050930873418e-06, "loss": 1.0985, "step": 3370 }, { "epoch": 0.47724216040206696, "grad_norm": 10.472906488874377, "learning_rate": 4.450692483541165e-06, "loss": 1.2868, "step": 3371 }, { "epoch": 0.4773837332767042, "grad_norm": 9.501467541228129, "learning_rate": 4.450333933662462e-06, "loss": 1.3563, "step": 3372 }, { "epoch": 0.4775253061513414, "grad_norm": 9.55311872278069, "learning_rate": 4.449975281256158e-06, "loss": 1.3114, "step": 3373 }, { "epoch": 0.4776668790259786, "grad_norm": 9.753607840474299, "learning_rate": 4.4496165263411075e-06, "loss": 1.3747, "step": 3374 }, { "epoch": 0.4778084519006158, "grad_norm": 10.520492175522701, "learning_rate": 4.4492576689361705e-06, "loss": 1.3805, "step": 3375 }, { "epoch": 0.47795002477525306, "grad_norm": 9.4708016064559, "learning_rate": 4.448898709060211e-06, "loss": 1.3336, "step": 3376 }, { "epoch": 0.4780915976498903, "grad_norm": 9.41028880860868, "learning_rate": 4.448539646732099e-06, "loss": 1.367, "step": 3377 }, { "epoch": 0.4782331705245275, "grad_norm": 9.48183632171207, "learning_rate": 4.448180481970711e-06, "loss": 1.4832, "step": 3378 }, { "epoch": 0.4783747433991647, "grad_norm": 8.148024580561211, "learning_rate": 4.447821214794928e-06, "loss": 1.3189, "step": 3379 }, { "epoch": 0.4785163162738019, "grad_norm": 9.112478365565025, "learning_rate": 4.447461845223636e-06, "loss": 1.4257, "step": 3380 }, { "epoch": 0.47865788914843915, "grad_norm": 9.032214261753031, "learning_rate": 4.447102373275727e-06, "loss": 1.1645, "step": 3381 }, { "epoch": 0.4787994620230764, "grad_norm": 9.151593896005348, "learning_rate": 4.446742798970097e-06, "loss": 1.2593, "step": 3382 }, { "epoch": 0.4789410348977136, "grad_norm": 7.459905391226147, "learning_rate": 4.44638312232565e-06, "loss": 1.1881, "step": 3383 }, { "epoch": 0.47908260777235084, "grad_norm": 8.48430845680627, "learning_rate": 4.446023343361294e-06, "loss": 1.4111, "step": 3384 }, { "epoch": 0.479224180646988, "grad_norm": 8.674326617135314, "learning_rate": 4.445663462095943e-06, "loss": 1.316, "step": 3385 }, { "epoch": 0.47936575352162525, "grad_norm": 10.636365488877686, "learning_rate": 4.445303478548513e-06, "loss": 1.3581, "step": 3386 }, { "epoch": 0.4795073263962625, "grad_norm": 9.766792508335524, "learning_rate": 4.4449433927379295e-06, "loss": 1.24, "step": 3387 }, { "epoch": 0.4796488992708997, "grad_norm": 8.294771721695485, "learning_rate": 4.444583204683123e-06, "loss": 1.2707, "step": 3388 }, { "epoch": 0.47979047214553694, "grad_norm": 8.190590311651555, "learning_rate": 4.444222914403027e-06, "loss": 1.4181, "step": 3389 }, { "epoch": 0.4799320450201741, "grad_norm": 9.852753121822847, "learning_rate": 4.443862521916582e-06, "loss": 1.3177, "step": 3390 }, { "epoch": 0.48007361789481134, "grad_norm": 9.840110856959189, "learning_rate": 4.443502027242733e-06, "loss": 1.2816, "step": 3391 }, { "epoch": 0.4802151907694486, "grad_norm": 7.748540679373898, "learning_rate": 4.443141430400432e-06, "loss": 1.1012, "step": 3392 }, { "epoch": 0.4803567636440858, "grad_norm": 8.496620403935568, "learning_rate": 4.4427807314086355e-06, "loss": 1.3381, "step": 3393 }, { "epoch": 0.48049833651872303, "grad_norm": 9.451851751158548, "learning_rate": 4.442419930286304e-06, "loss": 1.4037, "step": 3394 }, { "epoch": 0.4806399093933602, "grad_norm": 12.605350726709645, "learning_rate": 4.442059027052406e-06, "loss": 1.2878, "step": 3395 }, { "epoch": 0.48078148226799744, "grad_norm": 9.130671580622293, "learning_rate": 4.441698021725911e-06, "loss": 1.3892, "step": 3396 }, { "epoch": 0.48092305514263467, "grad_norm": 8.490407579970672, "learning_rate": 4.4413369143258e-06, "loss": 1.2068, "step": 3397 }, { "epoch": 0.4810646280172719, "grad_norm": 9.469724095161197, "learning_rate": 4.440975704871055e-06, "loss": 1.3062, "step": 3398 }, { "epoch": 0.48120620089190913, "grad_norm": 8.864194067280023, "learning_rate": 4.4406143933806646e-06, "loss": 1.308, "step": 3399 }, { "epoch": 0.4813477737665463, "grad_norm": 9.4638872911025, "learning_rate": 4.4402529798736224e-06, "loss": 1.2528, "step": 3400 }, { "epoch": 0.48148934664118354, "grad_norm": 9.904593633088695, "learning_rate": 4.439891464368927e-06, "loss": 1.4716, "step": 3401 }, { "epoch": 0.48163091951582077, "grad_norm": 9.161474618613534, "learning_rate": 4.439529846885585e-06, "loss": 1.2288, "step": 3402 }, { "epoch": 0.481772492390458, "grad_norm": 9.821604010265974, "learning_rate": 4.439168127442604e-06, "loss": 1.3187, "step": 3403 }, { "epoch": 0.4819140652650952, "grad_norm": 10.094562887522011, "learning_rate": 4.438806306059001e-06, "loss": 1.4525, "step": 3404 }, { "epoch": 0.4820556381397324, "grad_norm": 8.621567637490994, "learning_rate": 4.438444382753796e-06, "loss": 1.37, "step": 3405 }, { "epoch": 0.48219721101436963, "grad_norm": 9.63505959888786, "learning_rate": 4.438082357546015e-06, "loss": 1.3754, "step": 3406 }, { "epoch": 0.48233878388900686, "grad_norm": 8.11166790425772, "learning_rate": 4.4377202304546905e-06, "loss": 1.3439, "step": 3407 }, { "epoch": 0.4824803567636441, "grad_norm": 11.120298270746657, "learning_rate": 4.437358001498857e-06, "loss": 1.3521, "step": 3408 }, { "epoch": 0.4826219296382813, "grad_norm": 11.543246348184848, "learning_rate": 4.436995670697559e-06, "loss": 1.459, "step": 3409 }, { "epoch": 0.48276350251291855, "grad_norm": 8.839181751338929, "learning_rate": 4.436633238069843e-06, "loss": 1.31, "step": 3410 }, { "epoch": 0.4829050753875557, "grad_norm": 10.705106454361804, "learning_rate": 4.436270703634761e-06, "loss": 1.2944, "step": 3411 }, { "epoch": 0.48304664826219296, "grad_norm": 8.098644058328288, "learning_rate": 4.435908067411372e-06, "loss": 1.2795, "step": 3412 }, { "epoch": 0.4831882211368302, "grad_norm": 9.73433737954207, "learning_rate": 4.435545329418739e-06, "loss": 1.3463, "step": 3413 }, { "epoch": 0.4833297940114674, "grad_norm": 10.624746880601696, "learning_rate": 4.435182489675931e-06, "loss": 1.521, "step": 3414 }, { "epoch": 0.48347136688610465, "grad_norm": 10.558780895724006, "learning_rate": 4.434819548202024e-06, "loss": 1.2736, "step": 3415 }, { "epoch": 0.4836129397607418, "grad_norm": 7.880278014492468, "learning_rate": 4.434456505016094e-06, "loss": 1.2651, "step": 3416 }, { "epoch": 0.48375451263537905, "grad_norm": 8.683512039932705, "learning_rate": 4.43409336013723e-06, "loss": 1.4235, "step": 3417 }, { "epoch": 0.4838960855100163, "grad_norm": 8.59988816654326, "learning_rate": 4.433730113584519e-06, "loss": 1.2992, "step": 3418 }, { "epoch": 0.4840376583846535, "grad_norm": 11.107621925126141, "learning_rate": 4.433366765377057e-06, "loss": 1.4835, "step": 3419 }, { "epoch": 0.48417923125929074, "grad_norm": 12.15130467572374, "learning_rate": 4.433003315533947e-06, "loss": 1.2637, "step": 3420 }, { "epoch": 0.4843208041339279, "grad_norm": 11.528056542474294, "learning_rate": 4.432639764074294e-06, "loss": 1.4657, "step": 3421 }, { "epoch": 0.48446237700856515, "grad_norm": 10.737668793920294, "learning_rate": 4.4322761110172085e-06, "loss": 1.3733, "step": 3422 }, { "epoch": 0.4846039498832024, "grad_norm": 9.893377273681345, "learning_rate": 4.43191235638181e-06, "loss": 1.2788, "step": 3423 }, { "epoch": 0.4847455227578396, "grad_norm": 8.617032558953825, "learning_rate": 4.431548500187218e-06, "loss": 1.3411, "step": 3424 }, { "epoch": 0.48488709563247684, "grad_norm": 9.163736973384975, "learning_rate": 4.431184542452563e-06, "loss": 1.4696, "step": 3425 }, { "epoch": 0.485028668507114, "grad_norm": 9.653885734025703, "learning_rate": 4.430820483196976e-06, "loss": 1.268, "step": 3426 }, { "epoch": 0.48517024138175124, "grad_norm": 7.877979986300471, "learning_rate": 4.430456322439596e-06, "loss": 1.2177, "step": 3427 }, { "epoch": 0.4853118142563885, "grad_norm": 11.880387218599703, "learning_rate": 4.430092060199566e-06, "loss": 1.2671, "step": 3428 }, { "epoch": 0.4854533871310257, "grad_norm": 10.003591655415608, "learning_rate": 4.429727696496036e-06, "loss": 1.3087, "step": 3429 }, { "epoch": 0.48559496000566293, "grad_norm": 9.194522587142236, "learning_rate": 4.42936323134816e-06, "loss": 1.3968, "step": 3430 }, { "epoch": 0.4857365328803001, "grad_norm": 9.455427712177533, "learning_rate": 4.4289986647750975e-06, "loss": 1.2233, "step": 3431 }, { "epoch": 0.48587810575493734, "grad_norm": 8.772100385781199, "learning_rate": 4.428633996796012e-06, "loss": 1.3964, "step": 3432 }, { "epoch": 0.48601967862957457, "grad_norm": 8.549537815741443, "learning_rate": 4.4282692274300775e-06, "loss": 1.3488, "step": 3433 }, { "epoch": 0.4861612515042118, "grad_norm": 8.196448641009534, "learning_rate": 4.427904356696467e-06, "loss": 1.276, "step": 3434 }, { "epoch": 0.48630282437884903, "grad_norm": 11.087022401657862, "learning_rate": 4.427539384614361e-06, "loss": 1.4526, "step": 3435 }, { "epoch": 0.4864443972534862, "grad_norm": 8.414942448329127, "learning_rate": 4.427174311202948e-06, "loss": 1.2913, "step": 3436 }, { "epoch": 0.48658597012812344, "grad_norm": 8.885956354659529, "learning_rate": 4.426809136481417e-06, "loss": 1.4002, "step": 3437 }, { "epoch": 0.48672754300276067, "grad_norm": 7.577955358120551, "learning_rate": 4.426443860468967e-06, "loss": 1.2457, "step": 3438 }, { "epoch": 0.4868691158773979, "grad_norm": 10.244151982536602, "learning_rate": 4.4260784831848e-06, "loss": 1.2117, "step": 3439 }, { "epoch": 0.4870106887520351, "grad_norm": 11.175580883930003, "learning_rate": 4.425713004648123e-06, "loss": 1.3842, "step": 3440 }, { "epoch": 0.48715226162667236, "grad_norm": 9.023359290110195, "learning_rate": 4.4253474248781494e-06, "loss": 1.3275, "step": 3441 }, { "epoch": 0.48729383450130953, "grad_norm": 10.093258683379188, "learning_rate": 4.424981743894097e-06, "loss": 1.3398, "step": 3442 }, { "epoch": 0.48743540737594676, "grad_norm": 8.537192709713947, "learning_rate": 4.42461596171519e-06, "loss": 1.429, "step": 3443 }, { "epoch": 0.487576980250584, "grad_norm": 9.05612273933079, "learning_rate": 4.424250078360657e-06, "loss": 1.158, "step": 3444 }, { "epoch": 0.4877185531252212, "grad_norm": 9.549594922487223, "learning_rate": 4.4238840938497315e-06, "loss": 1.2439, "step": 3445 }, { "epoch": 0.48786012599985845, "grad_norm": 8.946737144332852, "learning_rate": 4.423518008201655e-06, "loss": 1.2446, "step": 3446 }, { "epoch": 0.4880016988744956, "grad_norm": 8.746167896905442, "learning_rate": 4.42315182143567e-06, "loss": 1.4049, "step": 3447 }, { "epoch": 0.48814327174913286, "grad_norm": 10.593720472626623, "learning_rate": 4.422785533571028e-06, "loss": 1.3256, "step": 3448 }, { "epoch": 0.4882848446237701, "grad_norm": 9.233199249652802, "learning_rate": 4.422419144626984e-06, "loss": 1.3141, "step": 3449 }, { "epoch": 0.4884264174984073, "grad_norm": 8.103139254862365, "learning_rate": 4.4220526546228e-06, "loss": 1.2957, "step": 3450 }, { "epoch": 0.48856799037304455, "grad_norm": 9.156296661570522, "learning_rate": 4.4216860635777395e-06, "loss": 1.2217, "step": 3451 }, { "epoch": 0.4887095632476817, "grad_norm": 7.781067627757413, "learning_rate": 4.4213193715110755e-06, "loss": 1.186, "step": 3452 }, { "epoch": 0.48885113612231895, "grad_norm": 7.799748137882678, "learning_rate": 4.420952578442086e-06, "loss": 1.3251, "step": 3453 }, { "epoch": 0.4889927089969562, "grad_norm": 9.797772866800601, "learning_rate": 4.420585684390051e-06, "loss": 1.3668, "step": 3454 }, { "epoch": 0.4891342818715934, "grad_norm": 8.752253650901952, "learning_rate": 4.420218689374259e-06, "loss": 1.1432, "step": 3455 }, { "epoch": 0.48927585474623064, "grad_norm": 11.068069693855694, "learning_rate": 4.419851593414002e-06, "loss": 1.4114, "step": 3456 }, { "epoch": 0.4894174276208678, "grad_norm": 9.288293702416162, "learning_rate": 4.4194843965285786e-06, "loss": 1.3921, "step": 3457 }, { "epoch": 0.48955900049550505, "grad_norm": 7.513001172300603, "learning_rate": 4.419117098737291e-06, "loss": 1.3331, "step": 3458 }, { "epoch": 0.4897005733701423, "grad_norm": 7.596131504698291, "learning_rate": 4.418749700059449e-06, "loss": 1.2346, "step": 3459 }, { "epoch": 0.4898421462447795, "grad_norm": 10.067346768005244, "learning_rate": 4.418382200514366e-06, "loss": 1.215, "step": 3460 }, { "epoch": 0.48998371911941674, "grad_norm": 8.49556751243838, "learning_rate": 4.418014600121361e-06, "loss": 1.0883, "step": 3461 }, { "epoch": 0.4901252919940539, "grad_norm": 8.735744962581585, "learning_rate": 4.4176468988997586e-06, "loss": 1.4575, "step": 3462 }, { "epoch": 0.49026686486869114, "grad_norm": 10.256025822599518, "learning_rate": 4.4172790968688885e-06, "loss": 1.3552, "step": 3463 }, { "epoch": 0.4904084377433284, "grad_norm": 10.129171383437003, "learning_rate": 4.416911194048086e-06, "loss": 1.368, "step": 3464 }, { "epoch": 0.4905500106179656, "grad_norm": 9.082178258218812, "learning_rate": 4.4165431904566915e-06, "loss": 1.3062, "step": 3465 }, { "epoch": 0.49069158349260283, "grad_norm": 7.39781100914661, "learning_rate": 4.416175086114049e-06, "loss": 1.1872, "step": 3466 }, { "epoch": 0.49083315636724006, "grad_norm": 8.249298065841819, "learning_rate": 4.415806881039513e-06, "loss": 1.2094, "step": 3467 }, { "epoch": 0.49097472924187724, "grad_norm": 9.215722986879458, "learning_rate": 4.415438575252438e-06, "loss": 1.3325, "step": 3468 }, { "epoch": 0.49111630211651447, "grad_norm": 9.235067325464357, "learning_rate": 4.415070168772184e-06, "loss": 1.2323, "step": 3469 }, { "epoch": 0.4912578749911517, "grad_norm": 11.979672062481901, "learning_rate": 4.414701661618119e-06, "loss": 1.3187, "step": 3470 }, { "epoch": 0.49139944786578893, "grad_norm": 9.125377490452749, "learning_rate": 4.414333053809616e-06, "loss": 1.2218, "step": 3471 }, { "epoch": 0.49154102074042616, "grad_norm": 10.200375897830003, "learning_rate": 4.413964345366051e-06, "loss": 1.3388, "step": 3472 }, { "epoch": 0.49168259361506333, "grad_norm": 10.976519753586695, "learning_rate": 4.413595536306808e-06, "loss": 1.4129, "step": 3473 }, { "epoch": 0.49182416648970056, "grad_norm": 9.693768054741206, "learning_rate": 4.4132266266512745e-06, "loss": 1.4295, "step": 3474 }, { "epoch": 0.4919657393643378, "grad_norm": 11.58491413846918, "learning_rate": 4.412857616418844e-06, "loss": 1.3145, "step": 3475 }, { "epoch": 0.492107312238975, "grad_norm": 12.517276269228674, "learning_rate": 4.412488505628915e-06, "loss": 1.4455, "step": 3476 }, { "epoch": 0.49224888511361226, "grad_norm": 7.812865714096454, "learning_rate": 4.41211929430089e-06, "loss": 1.221, "step": 3477 }, { "epoch": 0.49239045798824943, "grad_norm": 9.548812744477795, "learning_rate": 4.411749982454181e-06, "loss": 1.3289, "step": 3478 }, { "epoch": 0.49253203086288666, "grad_norm": 10.069094564271321, "learning_rate": 4.4113805701082e-06, "loss": 1.3622, "step": 3479 }, { "epoch": 0.4926736037375239, "grad_norm": 9.197308553002918, "learning_rate": 4.411011057282368e-06, "loss": 1.2031, "step": 3480 }, { "epoch": 0.4928151766121611, "grad_norm": 10.996033039907896, "learning_rate": 4.41064144399611e-06, "loss": 1.4659, "step": 3481 }, { "epoch": 0.49295674948679835, "grad_norm": 9.667449481004462, "learning_rate": 4.4102717302688556e-06, "loss": 1.4367, "step": 3482 }, { "epoch": 0.4930983223614355, "grad_norm": 8.600762408860733, "learning_rate": 4.40990191612004e-06, "loss": 1.2087, "step": 3483 }, { "epoch": 0.49323989523607276, "grad_norm": 9.517159875313913, "learning_rate": 4.409532001569106e-06, "loss": 1.2846, "step": 3484 }, { "epoch": 0.49338146811071, "grad_norm": 8.894193285190958, "learning_rate": 4.4091619866354975e-06, "loss": 1.2055, "step": 3485 }, { "epoch": 0.4935230409853472, "grad_norm": 8.597307502297594, "learning_rate": 4.408791871338667e-06, "loss": 1.273, "step": 3486 }, { "epoch": 0.49366461385998445, "grad_norm": 8.340951971889258, "learning_rate": 4.4084216556980715e-06, "loss": 1.152, "step": 3487 }, { "epoch": 0.4938061867346216, "grad_norm": 9.637160112891344, "learning_rate": 4.408051339733172e-06, "loss": 1.2504, "step": 3488 }, { "epoch": 0.49394775960925885, "grad_norm": 8.322996926109685, "learning_rate": 4.407680923463437e-06, "loss": 1.3272, "step": 3489 }, { "epoch": 0.4940893324838961, "grad_norm": 8.646537096419909, "learning_rate": 4.407310406908338e-06, "loss": 1.2358, "step": 3490 }, { "epoch": 0.4942309053585333, "grad_norm": 8.791834632269415, "learning_rate": 4.406939790087353e-06, "loss": 1.2557, "step": 3491 }, { "epoch": 0.49437247823317054, "grad_norm": 12.798504885138806, "learning_rate": 4.406569073019965e-06, "loss": 1.491, "step": 3492 }, { "epoch": 0.4945140511078077, "grad_norm": 10.362869340782074, "learning_rate": 4.406198255725662e-06, "loss": 1.3549, "step": 3493 }, { "epoch": 0.49465562398244495, "grad_norm": 9.330569766393012, "learning_rate": 4.4058273382239395e-06, "loss": 1.1749, "step": 3494 }, { "epoch": 0.4947971968570822, "grad_norm": 11.090738064943888, "learning_rate": 4.4054563205342935e-06, "loss": 1.3639, "step": 3495 }, { "epoch": 0.4949387697317194, "grad_norm": 9.794254128846651, "learning_rate": 4.4050852026762295e-06, "loss": 1.2714, "step": 3496 }, { "epoch": 0.49508034260635664, "grad_norm": 10.010516549577023, "learning_rate": 4.404713984669257e-06, "loss": 1.2737, "step": 3497 }, { "epoch": 0.49522191548099387, "grad_norm": 10.07306225792103, "learning_rate": 4.404342666532891e-06, "loss": 1.3022, "step": 3498 }, { "epoch": 0.49536348835563104, "grad_norm": 9.733838502847256, "learning_rate": 4.403971248286651e-06, "loss": 1.4025, "step": 3499 }, { "epoch": 0.4955050612302683, "grad_norm": 7.948625592120372, "learning_rate": 4.403599729950062e-06, "loss": 1.2245, "step": 3500 }, { "epoch": 0.4956466341049055, "grad_norm": 9.864400849203642, "learning_rate": 4.403228111542654e-06, "loss": 1.364, "step": 3501 }, { "epoch": 0.49578820697954273, "grad_norm": 9.020945864732456, "learning_rate": 4.402856393083964e-06, "loss": 1.317, "step": 3502 }, { "epoch": 0.49592977985417996, "grad_norm": 8.336857266795088, "learning_rate": 4.402484574593532e-06, "loss": 1.182, "step": 3503 }, { "epoch": 0.49607135272881714, "grad_norm": 9.669971383495257, "learning_rate": 4.402112656090904e-06, "loss": 1.322, "step": 3504 }, { "epoch": 0.49621292560345437, "grad_norm": 10.250370670222278, "learning_rate": 4.401740637595633e-06, "loss": 1.3543, "step": 3505 }, { "epoch": 0.4963544984780916, "grad_norm": 8.281678847239164, "learning_rate": 4.401368519127274e-06, "loss": 1.2596, "step": 3506 }, { "epoch": 0.49649607135272883, "grad_norm": 8.381710723115901, "learning_rate": 4.400996300705389e-06, "loss": 1.3443, "step": 3507 }, { "epoch": 0.49663764422736606, "grad_norm": 12.359167586755186, "learning_rate": 4.400623982349547e-06, "loss": 1.3197, "step": 3508 }, { "epoch": 0.49677921710200323, "grad_norm": 10.596853516179195, "learning_rate": 4.400251564079319e-06, "loss": 1.2304, "step": 3509 }, { "epoch": 0.49692078997664046, "grad_norm": 8.16007724575805, "learning_rate": 4.399879045914283e-06, "loss": 1.2855, "step": 3510 }, { "epoch": 0.4970623628512777, "grad_norm": 7.999841688497068, "learning_rate": 4.399506427874023e-06, "loss": 1.2656, "step": 3511 }, { "epoch": 0.4972039357259149, "grad_norm": 9.19851335870389, "learning_rate": 4.399133709978126e-06, "loss": 1.2572, "step": 3512 }, { "epoch": 0.49734550860055216, "grad_norm": 10.543692011015143, "learning_rate": 4.398760892246185e-06, "loss": 1.1363, "step": 3513 }, { "epoch": 0.49748708147518933, "grad_norm": 9.467562660833908, "learning_rate": 4.398387974697801e-06, "loss": 1.2989, "step": 3514 }, { "epoch": 0.49762865434982656, "grad_norm": 7.648051453804365, "learning_rate": 4.398014957352576e-06, "loss": 1.2101, "step": 3515 }, { "epoch": 0.4977702272244638, "grad_norm": 9.913867416446465, "learning_rate": 4.3976418402301196e-06, "loss": 1.2813, "step": 3516 }, { "epoch": 0.497911800099101, "grad_norm": 9.01964375065179, "learning_rate": 4.397268623350047e-06, "loss": 1.3348, "step": 3517 }, { "epoch": 0.49805337297373825, "grad_norm": 7.68456702378329, "learning_rate": 4.396895306731978e-06, "loss": 1.361, "step": 3518 }, { "epoch": 0.4981949458483754, "grad_norm": 9.472732763866244, "learning_rate": 4.396521890395536e-06, "loss": 1.3689, "step": 3519 }, { "epoch": 0.49833651872301266, "grad_norm": 9.573949404616071, "learning_rate": 4.396148374360354e-06, "loss": 1.3755, "step": 3520 }, { "epoch": 0.4984780915976499, "grad_norm": 10.137299778627336, "learning_rate": 4.395774758646064e-06, "loss": 1.3647, "step": 3521 }, { "epoch": 0.4986196644722871, "grad_norm": 9.6458344318782, "learning_rate": 4.395401043272309e-06, "loss": 1.3787, "step": 3522 }, { "epoch": 0.49876123734692435, "grad_norm": 8.89610896401944, "learning_rate": 4.395027228258735e-06, "loss": 1.2711, "step": 3523 }, { "epoch": 0.4989028102215615, "grad_norm": 7.0881088614273775, "learning_rate": 4.3946533136249926e-06, "loss": 1.0665, "step": 3524 }, { "epoch": 0.49904438309619875, "grad_norm": 7.967308721912542, "learning_rate": 4.394279299390737e-06, "loss": 1.2285, "step": 3525 }, { "epoch": 0.499185955970836, "grad_norm": 8.603327848050146, "learning_rate": 4.393905185575632e-06, "loss": 1.1893, "step": 3526 }, { "epoch": 0.4993275288454732, "grad_norm": 7.9065891841460605, "learning_rate": 4.393530972199344e-06, "loss": 1.2525, "step": 3527 }, { "epoch": 0.49946910172011044, "grad_norm": 9.666588443132678, "learning_rate": 4.393156659281545e-06, "loss": 1.3401, "step": 3528 }, { "epoch": 0.4996106745947477, "grad_norm": 9.827809824536981, "learning_rate": 4.39278224684191e-06, "loss": 1.1489, "step": 3529 }, { "epoch": 0.49975224746938485, "grad_norm": 9.545156271101385, "learning_rate": 4.392407734900125e-06, "loss": 1.4004, "step": 3530 }, { "epoch": 0.4998938203440221, "grad_norm": 11.119915089454581, "learning_rate": 4.392033123475876e-06, "loss": 1.3414, "step": 3531 }, { "epoch": 0.5000353932186593, "grad_norm": 9.601895828606867, "learning_rate": 4.3916584125888575e-06, "loss": 1.2758, "step": 3532 }, { "epoch": 0.5001769660932965, "grad_norm": 9.405676535934713, "learning_rate": 4.391283602258765e-06, "loss": 1.2702, "step": 3533 }, { "epoch": 0.5003185389679338, "grad_norm": 8.478410566830677, "learning_rate": 4.390908692505305e-06, "loss": 1.3231, "step": 3534 }, { "epoch": 0.500460111842571, "grad_norm": 11.824971714752111, "learning_rate": 4.390533683348184e-06, "loss": 1.2985, "step": 3535 }, { "epoch": 0.5006016847172082, "grad_norm": 11.866457145069932, "learning_rate": 4.390158574807118e-06, "loss": 1.2969, "step": 3536 }, { "epoch": 0.5007432575918453, "grad_norm": 9.758740151186469, "learning_rate": 4.389783366901824e-06, "loss": 1.3713, "step": 3537 }, { "epoch": 0.5008848304664826, "grad_norm": 8.803027100538342, "learning_rate": 4.3894080596520286e-06, "loss": 1.3853, "step": 3538 }, { "epoch": 0.5010264033411198, "grad_norm": 7.430376247567812, "learning_rate": 4.38903265307746e-06, "loss": 1.1696, "step": 3539 }, { "epoch": 0.501167976215757, "grad_norm": 11.430323616312567, "learning_rate": 4.388657147197852e-06, "loss": 1.2981, "step": 3540 }, { "epoch": 0.5013095490903943, "grad_norm": 11.699443080069626, "learning_rate": 4.388281542032948e-06, "loss": 1.4888, "step": 3541 }, { "epoch": 0.5014511219650315, "grad_norm": 10.964092254707024, "learning_rate": 4.38790583760249e-06, "loss": 1.4003, "step": 3542 }, { "epoch": 0.5015926948396687, "grad_norm": 10.87441699339735, "learning_rate": 4.3875300339262304e-06, "loss": 1.0645, "step": 3543 }, { "epoch": 0.501734267714306, "grad_norm": 8.311762956339816, "learning_rate": 4.387154131023924e-06, "loss": 1.2044, "step": 3544 }, { "epoch": 0.5018758405889432, "grad_norm": 10.6818265077422, "learning_rate": 4.386778128915332e-06, "loss": 1.378, "step": 3545 }, { "epoch": 0.5020174134635804, "grad_norm": 11.54980695819131, "learning_rate": 4.386402027620221e-06, "loss": 1.3755, "step": 3546 }, { "epoch": 0.5021589863382176, "grad_norm": 10.072475251418078, "learning_rate": 4.386025827158362e-06, "loss": 1.316, "step": 3547 }, { "epoch": 0.5023005592128548, "grad_norm": 9.081131507851355, "learning_rate": 4.385649527549531e-06, "loss": 1.1906, "step": 3548 }, { "epoch": 0.502442132087492, "grad_norm": 10.1457130586883, "learning_rate": 4.385273128813511e-06, "loss": 1.276, "step": 3549 }, { "epoch": 0.5025837049621292, "grad_norm": 7.972295234277907, "learning_rate": 4.384896630970088e-06, "loss": 1.4091, "step": 3550 }, { "epoch": 0.5027252778367665, "grad_norm": 7.956317133594052, "learning_rate": 4.384520034039054e-06, "loss": 1.242, "step": 3551 }, { "epoch": 0.5028668507114037, "grad_norm": 8.472580609966649, "learning_rate": 4.384143338040207e-06, "loss": 1.3097, "step": 3552 }, { "epoch": 0.5030084235860409, "grad_norm": 9.706047563965718, "learning_rate": 4.3837665429933505e-06, "loss": 1.3847, "step": 3553 }, { "epoch": 0.5031499964606782, "grad_norm": 9.229073850510874, "learning_rate": 4.383389648918291e-06, "loss": 1.2678, "step": 3554 }, { "epoch": 0.5032915693353154, "grad_norm": 8.90064456620124, "learning_rate": 4.3830126558348425e-06, "loss": 1.2976, "step": 3555 }, { "epoch": 0.5034331422099526, "grad_norm": 11.393540918353834, "learning_rate": 4.382635563762822e-06, "loss": 1.4591, "step": 3556 }, { "epoch": 0.5035747150845898, "grad_norm": 9.831399587377154, "learning_rate": 4.382258372722054e-06, "loss": 1.3694, "step": 3557 }, { "epoch": 0.503716287959227, "grad_norm": 7.35223625573914, "learning_rate": 4.381881082732367e-06, "loss": 1.2936, "step": 3558 }, { "epoch": 0.5038578608338642, "grad_norm": 9.41638894360809, "learning_rate": 4.381503693813594e-06, "loss": 1.3006, "step": 3559 }, { "epoch": 0.5039994337085014, "grad_norm": 11.04441881729042, "learning_rate": 4.381126205985575e-06, "loss": 1.3834, "step": 3560 }, { "epoch": 0.5041410065831387, "grad_norm": 9.708416808810474, "learning_rate": 4.380748619268154e-06, "loss": 1.1973, "step": 3561 }, { "epoch": 0.5042825794577759, "grad_norm": 10.062146293157456, "learning_rate": 4.3803709336811804e-06, "loss": 1.3512, "step": 3562 }, { "epoch": 0.5044241523324131, "grad_norm": 8.74817224213864, "learning_rate": 4.379993149244509e-06, "loss": 1.3635, "step": 3563 }, { "epoch": 0.5045657252070503, "grad_norm": 8.895195991025156, "learning_rate": 4.379615265978e-06, "loss": 1.1614, "step": 3564 }, { "epoch": 0.5047072980816876, "grad_norm": 9.433851332534102, "learning_rate": 4.379237283901518e-06, "loss": 1.0763, "step": 3565 }, { "epoch": 0.5048488709563248, "grad_norm": 10.162210915971169, "learning_rate": 4.378859203034932e-06, "loss": 1.2676, "step": 3566 }, { "epoch": 0.504990443830962, "grad_norm": 7.859618166125194, "learning_rate": 4.378481023398119e-06, "loss": 1.1299, "step": 3567 }, { "epoch": 0.5051320167055992, "grad_norm": 8.570361461516935, "learning_rate": 4.37810274501096e-06, "loss": 1.3747, "step": 3568 }, { "epoch": 0.5052735895802364, "grad_norm": 9.698002198549267, "learning_rate": 4.37772436789334e-06, "loss": 1.344, "step": 3569 }, { "epoch": 0.5054151624548736, "grad_norm": 10.693662060499207, "learning_rate": 4.377345892065149e-06, "loss": 1.4562, "step": 3570 }, { "epoch": 0.5055567353295108, "grad_norm": 8.804870766033025, "learning_rate": 4.376967317546285e-06, "loss": 1.2438, "step": 3571 }, { "epoch": 0.5056983082041481, "grad_norm": 11.900100681936872, "learning_rate": 4.376588644356649e-06, "loss": 1.478, "step": 3572 }, { "epoch": 0.5058398810787853, "grad_norm": 10.120219668088787, "learning_rate": 4.376209872516146e-06, "loss": 1.2629, "step": 3573 }, { "epoch": 0.5059814539534225, "grad_norm": 9.222704643694506, "learning_rate": 4.37583100204469e-06, "loss": 1.3411, "step": 3574 }, { "epoch": 0.5061230268280598, "grad_norm": 14.113778036682808, "learning_rate": 4.375452032962197e-06, "loss": 1.4009, "step": 3575 }, { "epoch": 0.506264599702697, "grad_norm": 9.563826325630156, "learning_rate": 4.375072965288589e-06, "loss": 1.3481, "step": 3576 }, { "epoch": 0.5064061725773342, "grad_norm": 10.58969353625942, "learning_rate": 4.374693799043792e-06, "loss": 1.4274, "step": 3577 }, { "epoch": 0.5065477454519715, "grad_norm": 8.766919589534027, "learning_rate": 4.374314534247741e-06, "loss": 1.1455, "step": 3578 }, { "epoch": 0.5066893183266086, "grad_norm": 11.15061864120296, "learning_rate": 4.3739351709203725e-06, "loss": 1.2594, "step": 3579 }, { "epoch": 0.5068308912012458, "grad_norm": 10.297119878655883, "learning_rate": 4.3735557090816295e-06, "loss": 1.3052, "step": 3580 }, { "epoch": 0.506972464075883, "grad_norm": 9.146231003257101, "learning_rate": 4.37317614875146e-06, "loss": 1.287, "step": 3581 }, { "epoch": 0.5071140369505203, "grad_norm": 9.882043571865589, "learning_rate": 4.372796489949816e-06, "loss": 1.3389, "step": 3582 }, { "epoch": 0.5072556098251575, "grad_norm": 7.811374186460933, "learning_rate": 4.3724167326966575e-06, "loss": 1.1854, "step": 3583 }, { "epoch": 0.5073971826997947, "grad_norm": 10.818054536518021, "learning_rate": 4.372036877011948e-06, "loss": 1.3886, "step": 3584 }, { "epoch": 0.507538755574432, "grad_norm": 9.699870818546213, "learning_rate": 4.371656922915655e-06, "loss": 1.3545, "step": 3585 }, { "epoch": 0.5076803284490692, "grad_norm": 11.007507883138821, "learning_rate": 4.3712768704277535e-06, "loss": 1.393, "step": 3586 }, { "epoch": 0.5078219013237064, "grad_norm": 11.568869655985734, "learning_rate": 4.3708967195682215e-06, "loss": 1.4189, "step": 3587 }, { "epoch": 0.5079634741983436, "grad_norm": 10.3037623524479, "learning_rate": 4.3705164703570444e-06, "loss": 1.2912, "step": 3588 }, { "epoch": 0.5081050470729808, "grad_norm": 11.373174164505347, "learning_rate": 4.3701361228142115e-06, "loss": 1.207, "step": 3589 }, { "epoch": 0.508246619947618, "grad_norm": 11.237121670711891, "learning_rate": 4.369755676959717e-06, "loss": 1.3908, "step": 3590 }, { "epoch": 0.5083881928222552, "grad_norm": 8.524258721752355, "learning_rate": 4.36937513281356e-06, "loss": 1.4201, "step": 3591 }, { "epoch": 0.5085297656968925, "grad_norm": 10.984852157799436, "learning_rate": 4.3689944903957475e-06, "loss": 1.3405, "step": 3592 }, { "epoch": 0.5086713385715297, "grad_norm": 11.462342013036782, "learning_rate": 4.368613749726287e-06, "loss": 1.2763, "step": 3593 }, { "epoch": 0.5088129114461669, "grad_norm": 10.990650712135832, "learning_rate": 4.368232910825196e-06, "loss": 1.2023, "step": 3594 }, { "epoch": 0.5089544843208041, "grad_norm": 10.185433581884306, "learning_rate": 4.367851973712492e-06, "loss": 1.4747, "step": 3595 }, { "epoch": 0.5090960571954414, "grad_norm": 9.753900774678078, "learning_rate": 4.367470938408204e-06, "loss": 1.1592, "step": 3596 }, { "epoch": 0.5092376300700786, "grad_norm": 11.563049633262372, "learning_rate": 4.367089804932362e-06, "loss": 1.4018, "step": 3597 }, { "epoch": 0.5093792029447158, "grad_norm": 9.857701082396586, "learning_rate": 4.366708573304999e-06, "loss": 1.2494, "step": 3598 }, { "epoch": 0.5095207758193531, "grad_norm": 9.952626741354987, "learning_rate": 4.36632724354616e-06, "loss": 1.3116, "step": 3599 }, { "epoch": 0.5096623486939902, "grad_norm": 8.726732107237186, "learning_rate": 4.365945815675888e-06, "loss": 1.2629, "step": 3600 }, { "epoch": 0.5098039215686274, "grad_norm": 9.500316012296437, "learning_rate": 4.365564289714237e-06, "loss": 1.2569, "step": 3601 }, { "epoch": 0.5099454944432646, "grad_norm": 11.518580146816396, "learning_rate": 4.365182665681261e-06, "loss": 1.395, "step": 3602 }, { "epoch": 0.5100870673179019, "grad_norm": 10.404613469289115, "learning_rate": 4.364800943597024e-06, "loss": 1.2535, "step": 3603 }, { "epoch": 0.5102286401925391, "grad_norm": 11.070715660663398, "learning_rate": 4.364419123481592e-06, "loss": 1.24, "step": 3604 }, { "epoch": 0.5103702130671763, "grad_norm": 8.089870627822323, "learning_rate": 4.364037205355036e-06, "loss": 1.1674, "step": 3605 }, { "epoch": 0.5105117859418136, "grad_norm": 11.554065494160891, "learning_rate": 4.3636551892374346e-06, "loss": 1.4152, "step": 3606 }, { "epoch": 0.5106533588164508, "grad_norm": 8.718538233875382, "learning_rate": 4.3632730751488695e-06, "loss": 1.3727, "step": 3607 }, { "epoch": 0.510794931691088, "grad_norm": 8.05165683921299, "learning_rate": 4.362890863109428e-06, "loss": 1.2115, "step": 3608 }, { "epoch": 0.5109365045657253, "grad_norm": 11.408877051924852, "learning_rate": 4.362508553139203e-06, "loss": 1.2151, "step": 3609 }, { "epoch": 0.5110780774403624, "grad_norm": 9.166796550408547, "learning_rate": 4.362126145258292e-06, "loss": 1.2188, "step": 3610 }, { "epoch": 0.5112196503149996, "grad_norm": 10.785255285220781, "learning_rate": 4.361743639486797e-06, "loss": 1.2269, "step": 3611 }, { "epoch": 0.5113612231896368, "grad_norm": 10.09594702652033, "learning_rate": 4.361361035844829e-06, "loss": 1.4284, "step": 3612 }, { "epoch": 0.5115027960642741, "grad_norm": 8.383403604363346, "learning_rate": 4.360978334352498e-06, "loss": 1.2688, "step": 3613 }, { "epoch": 0.5116443689389113, "grad_norm": 11.109635963003862, "learning_rate": 4.360595535029924e-06, "loss": 1.2878, "step": 3614 }, { "epoch": 0.5117859418135485, "grad_norm": 10.5244122048835, "learning_rate": 4.36021263789723e-06, "loss": 1.333, "step": 3615 }, { "epoch": 0.5119275146881858, "grad_norm": 8.728630346849018, "learning_rate": 4.359829642974544e-06, "loss": 1.3274, "step": 3616 }, { "epoch": 0.512069087562823, "grad_norm": 9.188280552514641, "learning_rate": 4.359446550282001e-06, "loss": 1.3884, "step": 3617 }, { "epoch": 0.5122106604374602, "grad_norm": 8.004876081280718, "learning_rate": 4.359063359839739e-06, "loss": 1.2561, "step": 3618 }, { "epoch": 0.5123522333120974, "grad_norm": 8.528752782408311, "learning_rate": 4.358680071667903e-06, "loss": 1.3053, "step": 3619 }, { "epoch": 0.5124938061867346, "grad_norm": 9.882068663336362, "learning_rate": 4.35829668578664e-06, "loss": 1.3428, "step": 3620 }, { "epoch": 0.5126353790613718, "grad_norm": 9.819680083554626, "learning_rate": 4.357913202216108e-06, "loss": 1.2256, "step": 3621 }, { "epoch": 0.512776951936009, "grad_norm": 9.118700335068658, "learning_rate": 4.357529620976463e-06, "loss": 1.3428, "step": 3622 }, { "epoch": 0.5129185248106463, "grad_norm": 8.052748823280151, "learning_rate": 4.3571459420878705e-06, "loss": 1.2902, "step": 3623 }, { "epoch": 0.5130600976852835, "grad_norm": 10.424274402862382, "learning_rate": 4.3567621655705015e-06, "loss": 1.263, "step": 3624 }, { "epoch": 0.5132016705599207, "grad_norm": 9.079106131765146, "learning_rate": 4.356378291444529e-06, "loss": 1.2425, "step": 3625 }, { "epoch": 0.513343243434558, "grad_norm": 9.654248076386382, "learning_rate": 4.355994319730135e-06, "loss": 1.1795, "step": 3626 }, { "epoch": 0.5134848163091952, "grad_norm": 9.326823234779999, "learning_rate": 4.355610250447503e-06, "loss": 1.334, "step": 3627 }, { "epoch": 0.5136263891838324, "grad_norm": 8.823396912122204, "learning_rate": 4.355226083616824e-06, "loss": 1.3492, "step": 3628 }, { "epoch": 0.5137679620584696, "grad_norm": 10.588597667950674, "learning_rate": 4.354841819258293e-06, "loss": 1.4545, "step": 3629 }, { "epoch": 0.5139095349331069, "grad_norm": 9.90376245692629, "learning_rate": 4.35445745739211e-06, "loss": 1.3612, "step": 3630 }, { "epoch": 0.514051107807744, "grad_norm": 7.019821984623312, "learning_rate": 4.354072998038482e-06, "loss": 1.2451, "step": 3631 }, { "epoch": 0.5141926806823812, "grad_norm": 9.001079494586808, "learning_rate": 4.353688441217618e-06, "loss": 1.4295, "step": 3632 }, { "epoch": 0.5143342535570185, "grad_norm": 9.289357353131576, "learning_rate": 4.353303786949735e-06, "loss": 1.3349, "step": 3633 }, { "epoch": 0.5144758264316557, "grad_norm": 10.835440758373545, "learning_rate": 4.352919035255055e-06, "loss": 1.3358, "step": 3634 }, { "epoch": 0.5146173993062929, "grad_norm": 9.205593017837504, "learning_rate": 4.352534186153802e-06, "loss": 1.3733, "step": 3635 }, { "epoch": 0.5147589721809301, "grad_norm": 8.234261036035372, "learning_rate": 4.352149239666208e-06, "loss": 1.2727, "step": 3636 }, { "epoch": 0.5149005450555674, "grad_norm": 8.594434010420503, "learning_rate": 4.35176419581251e-06, "loss": 1.3088, "step": 3637 }, { "epoch": 0.5150421179302046, "grad_norm": 9.286494253748415, "learning_rate": 4.351379054612949e-06, "loss": 1.3359, "step": 3638 }, { "epoch": 0.5151836908048418, "grad_norm": 11.955219638774025, "learning_rate": 4.35099381608777e-06, "loss": 1.3881, "step": 3639 }, { "epoch": 0.5153252636794791, "grad_norm": 9.614671987095223, "learning_rate": 4.3506084802572276e-06, "loss": 1.2515, "step": 3640 }, { "epoch": 0.5154668365541162, "grad_norm": 9.39151093275718, "learning_rate": 4.350223047141577e-06, "loss": 1.5442, "step": 3641 }, { "epoch": 0.5156084094287534, "grad_norm": 10.597233292449017, "learning_rate": 4.349837516761081e-06, "loss": 1.2808, "step": 3642 }, { "epoch": 0.5157499823033906, "grad_norm": 11.097924363181543, "learning_rate": 4.3494518891360054e-06, "loss": 1.404, "step": 3643 }, { "epoch": 0.5158915551780279, "grad_norm": 9.34133931067636, "learning_rate": 4.3490661642866225e-06, "loss": 1.2152, "step": 3644 }, { "epoch": 0.5160331280526651, "grad_norm": 11.91366034157324, "learning_rate": 4.3486803422332115e-06, "loss": 1.3273, "step": 3645 }, { "epoch": 0.5161747009273023, "grad_norm": 8.33580295843862, "learning_rate": 4.348294422996052e-06, "loss": 1.1015, "step": 3646 }, { "epoch": 0.5163162738019396, "grad_norm": 8.955641467763272, "learning_rate": 4.347908406595433e-06, "loss": 1.3313, "step": 3647 }, { "epoch": 0.5164578466765768, "grad_norm": 12.988385807849822, "learning_rate": 4.3475222930516484e-06, "loss": 1.396, "step": 3648 }, { "epoch": 0.516599419551214, "grad_norm": 7.239411349735131, "learning_rate": 4.347136082384993e-06, "loss": 1.2539, "step": 3649 }, { "epoch": 0.5167409924258513, "grad_norm": 8.506620633629561, "learning_rate": 4.3467497746157715e-06, "loss": 1.1788, "step": 3650 }, { "epoch": 0.5168825653004884, "grad_norm": 14.303134302624242, "learning_rate": 4.3463633697642905e-06, "loss": 1.3677, "step": 3651 }, { "epoch": 0.5170241381751256, "grad_norm": 10.011149485132657, "learning_rate": 4.345976867850865e-06, "loss": 1.3191, "step": 3652 }, { "epoch": 0.5171657110497628, "grad_norm": 9.000102572386416, "learning_rate": 4.345590268895812e-06, "loss": 1.195, "step": 3653 }, { "epoch": 0.5173072839244001, "grad_norm": 7.775654078995617, "learning_rate": 4.3452035729194544e-06, "loss": 1.1391, "step": 3654 }, { "epoch": 0.5174488567990373, "grad_norm": 7.1321180158041, "learning_rate": 4.34481677994212e-06, "loss": 1.2042, "step": 3655 }, { "epoch": 0.5175904296736745, "grad_norm": 9.070762918806732, "learning_rate": 4.3444298899841445e-06, "loss": 1.3308, "step": 3656 }, { "epoch": 0.5177320025483118, "grad_norm": 10.013534922934658, "learning_rate": 4.344042903065864e-06, "loss": 1.3021, "step": 3657 }, { "epoch": 0.517873575422949, "grad_norm": 7.335698035478601, "learning_rate": 4.3436558192076225e-06, "loss": 1.2039, "step": 3658 }, { "epoch": 0.5180151482975862, "grad_norm": 10.284138838897249, "learning_rate": 4.3432686384297705e-06, "loss": 1.4557, "step": 3659 }, { "epoch": 0.5181567211722234, "grad_norm": 8.810776562277079, "learning_rate": 4.34288136075266e-06, "loss": 1.2821, "step": 3660 }, { "epoch": 0.5182982940468607, "grad_norm": 10.57991316992278, "learning_rate": 4.34249398619665e-06, "loss": 1.2703, "step": 3661 }, { "epoch": 0.5184398669214978, "grad_norm": 6.48705910891587, "learning_rate": 4.342106514782106e-06, "loss": 1.1235, "step": 3662 }, { "epoch": 0.518581439796135, "grad_norm": 7.753502085263255, "learning_rate": 4.341718946529395e-06, "loss": 1.2455, "step": 3663 }, { "epoch": 0.5187230126707723, "grad_norm": 8.439793705882364, "learning_rate": 4.341331281458893e-06, "loss": 1.1997, "step": 3664 }, { "epoch": 0.5188645855454095, "grad_norm": 10.299057956318027, "learning_rate": 4.3409435195909785e-06, "loss": 1.3715, "step": 3665 }, { "epoch": 0.5190061584200467, "grad_norm": 8.981694572982525, "learning_rate": 4.340555660946035e-06, "loss": 1.355, "step": 3666 }, { "epoch": 0.519147731294684, "grad_norm": 9.441629276947932, "learning_rate": 4.340167705544454e-06, "loss": 1.4047, "step": 3667 }, { "epoch": 0.5192893041693212, "grad_norm": 9.941222831068275, "learning_rate": 4.339779653406628e-06, "loss": 1.3555, "step": 3668 }, { "epoch": 0.5194308770439584, "grad_norm": 10.917321299563834, "learning_rate": 4.3393915045529575e-06, "loss": 1.3726, "step": 3669 }, { "epoch": 0.5195724499185956, "grad_norm": 8.371894830137022, "learning_rate": 4.339003259003848e-06, "loss": 1.1966, "step": 3670 }, { "epoch": 0.5197140227932329, "grad_norm": 8.603376621677885, "learning_rate": 4.338614916779706e-06, "loss": 1.2495, "step": 3671 }, { "epoch": 0.51985559566787, "grad_norm": 9.122696677272973, "learning_rate": 4.3382264779009504e-06, "loss": 1.226, "step": 3672 }, { "epoch": 0.5199971685425072, "grad_norm": 8.785167105512413, "learning_rate": 4.337837942388e-06, "loss": 1.4481, "step": 3673 }, { "epoch": 0.5201387414171444, "grad_norm": 10.615710696822555, "learning_rate": 4.337449310261279e-06, "loss": 1.4717, "step": 3674 }, { "epoch": 0.5202803142917817, "grad_norm": 8.372563705224755, "learning_rate": 4.337060581541217e-06, "loss": 1.3384, "step": 3675 }, { "epoch": 0.5204218871664189, "grad_norm": 10.21246910137232, "learning_rate": 4.336671756248251e-06, "loss": 1.3008, "step": 3676 }, { "epoch": 0.5205634600410561, "grad_norm": 8.116534708923036, "learning_rate": 4.33628283440282e-06, "loss": 1.2539, "step": 3677 }, { "epoch": 0.5207050329156934, "grad_norm": 8.300047302111306, "learning_rate": 4.335893816025369e-06, "loss": 1.3735, "step": 3678 }, { "epoch": 0.5208466057903306, "grad_norm": 11.848857171042514, "learning_rate": 4.33550470113635e-06, "loss": 1.2463, "step": 3679 }, { "epoch": 0.5209881786649678, "grad_norm": 10.828314885487831, "learning_rate": 4.335115489756217e-06, "loss": 1.138, "step": 3680 }, { "epoch": 0.5211297515396051, "grad_norm": 12.146072795376535, "learning_rate": 4.33472618190543e-06, "loss": 1.3875, "step": 3681 }, { "epoch": 0.5212713244142422, "grad_norm": 9.531580184470373, "learning_rate": 4.334336777604458e-06, "loss": 1.4485, "step": 3682 }, { "epoch": 0.5214128972888794, "grad_norm": 8.865400253529078, "learning_rate": 4.333947276873767e-06, "loss": 1.4834, "step": 3683 }, { "epoch": 0.5215544701635166, "grad_norm": 8.923370075722556, "learning_rate": 4.333557679733836e-06, "loss": 1.3215, "step": 3684 }, { "epoch": 0.5216960430381539, "grad_norm": 8.08497833607637, "learning_rate": 4.333167986205145e-06, "loss": 1.258, "step": 3685 }, { "epoch": 0.5218376159127911, "grad_norm": 8.797134747913619, "learning_rate": 4.33277819630818e-06, "loss": 1.3794, "step": 3686 }, { "epoch": 0.5219791887874283, "grad_norm": 8.57299206415875, "learning_rate": 4.332388310063431e-06, "loss": 1.4255, "step": 3687 }, { "epoch": 0.5221207616620656, "grad_norm": 7.5749540321289865, "learning_rate": 4.331998327491396e-06, "loss": 1.2958, "step": 3688 }, { "epoch": 0.5222623345367028, "grad_norm": 9.085293432943997, "learning_rate": 4.331608248612574e-06, "loss": 1.3056, "step": 3689 }, { "epoch": 0.52240390741134, "grad_norm": 9.309875118348167, "learning_rate": 4.331218073447472e-06, "loss": 1.2508, "step": 3690 }, { "epoch": 0.5225454802859772, "grad_norm": 6.781244655905873, "learning_rate": 4.330827802016603e-06, "loss": 1.074, "step": 3691 }, { "epoch": 0.5226870531606145, "grad_norm": 8.588133374085094, "learning_rate": 4.3304374343404794e-06, "loss": 1.1986, "step": 3692 }, { "epoch": 0.5228286260352516, "grad_norm": 8.845392836175764, "learning_rate": 4.330046970439625e-06, "loss": 1.3916, "step": 3693 }, { "epoch": 0.5229701989098888, "grad_norm": 9.157712643815552, "learning_rate": 4.329656410334567e-06, "loss": 1.4306, "step": 3694 }, { "epoch": 0.5231117717845261, "grad_norm": 7.468568424587415, "learning_rate": 4.329265754045835e-06, "loss": 1.3161, "step": 3695 }, { "epoch": 0.5232533446591633, "grad_norm": 9.661809906984868, "learning_rate": 4.328875001593966e-06, "loss": 1.3855, "step": 3696 }, { "epoch": 0.5233949175338005, "grad_norm": 10.046344559165032, "learning_rate": 4.3284841529995025e-06, "loss": 1.2471, "step": 3697 }, { "epoch": 0.5235364904084377, "grad_norm": 9.783668487197572, "learning_rate": 4.32809320828299e-06, "loss": 1.3684, "step": 3698 }, { "epoch": 0.523678063283075, "grad_norm": 8.446637214703637, "learning_rate": 4.327702167464981e-06, "loss": 1.3046, "step": 3699 }, { "epoch": 0.5238196361577122, "grad_norm": 9.662885342868043, "learning_rate": 4.327311030566033e-06, "loss": 1.2554, "step": 3700 }, { "epoch": 0.5239612090323494, "grad_norm": 9.533697095568884, "learning_rate": 4.326919797606705e-06, "loss": 1.299, "step": 3701 }, { "epoch": 0.5241027819069867, "grad_norm": 10.043671804810923, "learning_rate": 4.326528468607566e-06, "loss": 1.461, "step": 3702 }, { "epoch": 0.5242443547816238, "grad_norm": 9.063247123702022, "learning_rate": 4.3261370435891866e-06, "loss": 1.3196, "step": 3703 }, { "epoch": 0.524385927656261, "grad_norm": 8.418371600714407, "learning_rate": 4.325745522572145e-06, "loss": 1.2188, "step": 3704 }, { "epoch": 0.5245275005308982, "grad_norm": 8.679098553938903, "learning_rate": 4.325353905577023e-06, "loss": 1.2621, "step": 3705 }, { "epoch": 0.5246690734055355, "grad_norm": 9.902775970596736, "learning_rate": 4.324962192624407e-06, "loss": 1.3326, "step": 3706 }, { "epoch": 0.5248106462801727, "grad_norm": 8.49646954987845, "learning_rate": 4.324570383734888e-06, "loss": 1.3364, "step": 3707 }, { "epoch": 0.5249522191548099, "grad_norm": 9.435015016512502, "learning_rate": 4.3241784789290665e-06, "loss": 1.3184, "step": 3708 }, { "epoch": 0.5250937920294472, "grad_norm": 7.803848236751282, "learning_rate": 4.323786478227541e-06, "loss": 1.2536, "step": 3709 }, { "epoch": 0.5252353649040844, "grad_norm": 8.806566337497168, "learning_rate": 4.323394381650921e-06, "loss": 1.3474, "step": 3710 }, { "epoch": 0.5253769377787216, "grad_norm": 11.336184498299778, "learning_rate": 4.323002189219818e-06, "loss": 1.2696, "step": 3711 }, { "epoch": 0.5255185106533589, "grad_norm": 8.99746456460422, "learning_rate": 4.322609900954848e-06, "loss": 1.376, "step": 3712 }, { "epoch": 0.525660083527996, "grad_norm": 8.925488062744979, "learning_rate": 4.322217516876635e-06, "loss": 1.3037, "step": 3713 }, { "epoch": 0.5258016564026332, "grad_norm": 8.48769430351045, "learning_rate": 4.321825037005807e-06, "loss": 1.4551, "step": 3714 }, { "epoch": 0.5259432292772704, "grad_norm": 10.143781032738074, "learning_rate": 4.321432461362994e-06, "loss": 1.2989, "step": 3715 }, { "epoch": 0.5260848021519077, "grad_norm": 8.499526964376551, "learning_rate": 4.3210397899688355e-06, "loss": 1.2254, "step": 3716 }, { "epoch": 0.5262263750265449, "grad_norm": 9.220475374821806, "learning_rate": 4.320647022843972e-06, "loss": 1.2873, "step": 3717 }, { "epoch": 0.5263679479011821, "grad_norm": 9.261975346376955, "learning_rate": 4.320254160009053e-06, "loss": 1.2735, "step": 3718 }, { "epoch": 0.5265095207758194, "grad_norm": 8.534135379451435, "learning_rate": 4.31986120148473e-06, "loss": 1.3807, "step": 3719 }, { "epoch": 0.5266510936504566, "grad_norm": 7.39427330506257, "learning_rate": 4.31946814729166e-06, "loss": 1.1963, "step": 3720 }, { "epoch": 0.5267926665250938, "grad_norm": 7.837385414660903, "learning_rate": 4.319074997450506e-06, "loss": 1.2716, "step": 3721 }, { "epoch": 0.526934239399731, "grad_norm": 10.246172678880134, "learning_rate": 4.318681751981937e-06, "loss": 1.2566, "step": 3722 }, { "epoch": 0.5270758122743683, "grad_norm": 9.192920149973514, "learning_rate": 4.318288410906623e-06, "loss": 1.2458, "step": 3723 }, { "epoch": 0.5272173851490054, "grad_norm": 9.242925877230254, "learning_rate": 4.3178949742452435e-06, "loss": 1.4706, "step": 3724 }, { "epoch": 0.5273589580236426, "grad_norm": 9.36737645764377, "learning_rate": 4.317501442018481e-06, "loss": 1.4173, "step": 3725 }, { "epoch": 0.5275005308982799, "grad_norm": 9.726227168541872, "learning_rate": 4.317107814247022e-06, "loss": 1.3803, "step": 3726 }, { "epoch": 0.5276421037729171, "grad_norm": 7.187997817339006, "learning_rate": 4.316714090951562e-06, "loss": 1.2546, "step": 3727 }, { "epoch": 0.5277836766475543, "grad_norm": 8.389041800996536, "learning_rate": 4.316320272152795e-06, "loss": 1.2617, "step": 3728 }, { "epoch": 0.5279252495221916, "grad_norm": 8.012227727158999, "learning_rate": 4.315926357871426e-06, "loss": 1.1987, "step": 3729 }, { "epoch": 0.5280668223968288, "grad_norm": 11.816103431037186, "learning_rate": 4.3155323481281625e-06, "loss": 1.4112, "step": 3730 }, { "epoch": 0.528208395271466, "grad_norm": 9.837851267727011, "learning_rate": 4.3151382429437175e-06, "loss": 1.3665, "step": 3731 }, { "epoch": 0.5283499681461032, "grad_norm": 8.2459026912672, "learning_rate": 4.314744042338808e-06, "loss": 1.3858, "step": 3732 }, { "epoch": 0.5284915410207405, "grad_norm": 8.227777317117797, "learning_rate": 4.314349746334158e-06, "loss": 1.325, "step": 3733 }, { "epoch": 0.5286331138953776, "grad_norm": 8.044267724603506, "learning_rate": 4.313955354950494e-06, "loss": 1.3163, "step": 3734 }, { "epoch": 0.5287746867700148, "grad_norm": 9.149196785447762, "learning_rate": 4.313560868208549e-06, "loss": 1.3381, "step": 3735 }, { "epoch": 0.528916259644652, "grad_norm": 10.910249605199411, "learning_rate": 4.313166286129063e-06, "loss": 1.2926, "step": 3736 }, { "epoch": 0.5290578325192893, "grad_norm": 9.420299088251358, "learning_rate": 4.312771608732776e-06, "loss": 1.3802, "step": 3737 }, { "epoch": 0.5291994053939265, "grad_norm": 7.1822373736466165, "learning_rate": 4.312376836040437e-06, "loss": 1.2094, "step": 3738 }, { "epoch": 0.5293409782685637, "grad_norm": 9.514509264047438, "learning_rate": 4.3119819680728e-06, "loss": 1.3992, "step": 3739 }, { "epoch": 0.529482551143201, "grad_norm": 9.179810088942089, "learning_rate": 4.311587004850622e-06, "loss": 1.2906, "step": 3740 }, { "epoch": 0.5296241240178382, "grad_norm": 8.193832627497429, "learning_rate": 4.311191946394665e-06, "loss": 1.2981, "step": 3741 }, { "epoch": 0.5297656968924754, "grad_norm": 9.607520146186268, "learning_rate": 4.3107967927256985e-06, "loss": 1.2537, "step": 3742 }, { "epoch": 0.5299072697671127, "grad_norm": 9.35474945042749, "learning_rate": 4.310401543864495e-06, "loss": 1.3894, "step": 3743 }, { "epoch": 0.5300488426417499, "grad_norm": 8.19207868603825, "learning_rate": 4.3100061998318325e-06, "loss": 1.3194, "step": 3744 }, { "epoch": 0.530190415516387, "grad_norm": 9.319360855518532, "learning_rate": 4.309610760648493e-06, "loss": 1.2224, "step": 3745 }, { "epoch": 0.5303319883910242, "grad_norm": 8.166832383738633, "learning_rate": 4.309215226335265e-06, "loss": 1.4899, "step": 3746 }, { "epoch": 0.5304735612656615, "grad_norm": 7.6209468387460975, "learning_rate": 4.308819596912942e-06, "loss": 1.2953, "step": 3747 }, { "epoch": 0.5306151341402987, "grad_norm": 8.33983460186852, "learning_rate": 4.308423872402322e-06, "loss": 1.3378, "step": 3748 }, { "epoch": 0.5307567070149359, "grad_norm": 11.117907717249562, "learning_rate": 4.308028052824207e-06, "loss": 1.2586, "step": 3749 }, { "epoch": 0.5308982798895732, "grad_norm": 8.57107537768061, "learning_rate": 4.307632138199405e-06, "loss": 1.3302, "step": 3750 }, { "epoch": 0.5310398527642104, "grad_norm": 8.718196509509175, "learning_rate": 4.30723612854873e-06, "loss": 1.3645, "step": 3751 }, { "epoch": 0.5311814256388476, "grad_norm": 8.200230609163198, "learning_rate": 4.306840023892998e-06, "loss": 1.2245, "step": 3752 }, { "epoch": 0.5313229985134849, "grad_norm": 7.938204756560521, "learning_rate": 4.306443824253035e-06, "loss": 1.3367, "step": 3753 }, { "epoch": 0.5314645713881221, "grad_norm": 8.581486722455251, "learning_rate": 4.306047529649665e-06, "loss": 1.27, "step": 3754 }, { "epoch": 0.5316061442627592, "grad_norm": 9.008671291997818, "learning_rate": 4.305651140103725e-06, "loss": 1.2821, "step": 3755 }, { "epoch": 0.5317477171373964, "grad_norm": 8.407587221562652, "learning_rate": 4.305254655636049e-06, "loss": 1.3098, "step": 3756 }, { "epoch": 0.5318892900120337, "grad_norm": 8.358605122988571, "learning_rate": 4.304858076267483e-06, "loss": 1.3409, "step": 3757 }, { "epoch": 0.5320308628866709, "grad_norm": 10.766707101594408, "learning_rate": 4.304461402018873e-06, "loss": 1.3799, "step": 3758 }, { "epoch": 0.5321724357613081, "grad_norm": 8.36639098464267, "learning_rate": 4.304064632911073e-06, "loss": 1.2808, "step": 3759 }, { "epoch": 0.5323140086359454, "grad_norm": 8.113480603593183, "learning_rate": 4.303667768964941e-06, "loss": 1.1976, "step": 3760 }, { "epoch": 0.5324555815105826, "grad_norm": 8.901602834322565, "learning_rate": 4.303270810201339e-06, "loss": 1.2706, "step": 3761 }, { "epoch": 0.5325971543852198, "grad_norm": 9.071849130980599, "learning_rate": 4.302873756641135e-06, "loss": 1.2448, "step": 3762 }, { "epoch": 0.532738727259857, "grad_norm": 10.29780150718335, "learning_rate": 4.302476608305201e-06, "loss": 1.5945, "step": 3763 }, { "epoch": 0.5328803001344943, "grad_norm": 7.131010498518458, "learning_rate": 4.3020793652144165e-06, "loss": 1.3053, "step": 3764 }, { "epoch": 0.5330218730091314, "grad_norm": 9.07283094141536, "learning_rate": 4.301682027389663e-06, "loss": 1.3515, "step": 3765 }, { "epoch": 0.5331634458837686, "grad_norm": 11.180529932554803, "learning_rate": 4.301284594851829e-06, "loss": 1.3664, "step": 3766 }, { "epoch": 0.5333050187584059, "grad_norm": 11.172287811975364, "learning_rate": 4.300887067621807e-06, "loss": 1.4482, "step": 3767 }, { "epoch": 0.5334465916330431, "grad_norm": 10.850151700835907, "learning_rate": 4.300489445720495e-06, "loss": 1.3595, "step": 3768 }, { "epoch": 0.5335881645076803, "grad_norm": 7.363800338135322, "learning_rate": 4.300091729168795e-06, "loss": 1.1683, "step": 3769 }, { "epoch": 0.5337297373823175, "grad_norm": 10.455684696291378, "learning_rate": 4.299693917987615e-06, "loss": 1.2372, "step": 3770 }, { "epoch": 0.5338713102569548, "grad_norm": 10.216469606508069, "learning_rate": 4.299296012197868e-06, "loss": 1.2713, "step": 3771 }, { "epoch": 0.534012883131592, "grad_norm": 8.05126122492692, "learning_rate": 4.29889801182047e-06, "loss": 1.2125, "step": 3772 }, { "epoch": 0.5341544560062292, "grad_norm": 8.856083441805563, "learning_rate": 4.298499916876347e-06, "loss": 1.2822, "step": 3773 }, { "epoch": 0.5342960288808665, "grad_norm": 9.315437416867665, "learning_rate": 4.298101727386422e-06, "loss": 1.2636, "step": 3774 }, { "epoch": 0.5344376017555037, "grad_norm": 8.90434250063711, "learning_rate": 4.297703443371632e-06, "loss": 1.2247, "step": 3775 }, { "epoch": 0.5345791746301408, "grad_norm": 9.398827548665354, "learning_rate": 4.2973050648529114e-06, "loss": 1.351, "step": 3776 }, { "epoch": 0.534720747504778, "grad_norm": 8.691716111049539, "learning_rate": 4.296906591851203e-06, "loss": 1.2276, "step": 3777 }, { "epoch": 0.5348623203794153, "grad_norm": 7.4955217342981495, "learning_rate": 4.2965080243874555e-06, "loss": 1.2306, "step": 3778 }, { "epoch": 0.5350038932540525, "grad_norm": 9.717986932463495, "learning_rate": 4.296109362482621e-06, "loss": 1.2148, "step": 3779 }, { "epoch": 0.5351454661286897, "grad_norm": 12.919732170990125, "learning_rate": 4.2957106061576565e-06, "loss": 1.449, "step": 3780 }, { "epoch": 0.535287039003327, "grad_norm": 8.310619629161563, "learning_rate": 4.295311755433525e-06, "loss": 1.2079, "step": 3781 }, { "epoch": 0.5354286118779642, "grad_norm": 8.333207803416514, "learning_rate": 4.294912810331191e-06, "loss": 1.2454, "step": 3782 }, { "epoch": 0.5355701847526014, "grad_norm": 8.226709493137134, "learning_rate": 4.2945137708716315e-06, "loss": 1.2467, "step": 3783 }, { "epoch": 0.5357117576272387, "grad_norm": 8.564012825412403, "learning_rate": 4.294114637075819e-06, "loss": 1.25, "step": 3784 }, { "epoch": 0.5358533305018759, "grad_norm": 10.986431075906522, "learning_rate": 4.293715408964738e-06, "loss": 1.2417, "step": 3785 }, { "epoch": 0.535994903376513, "grad_norm": 12.830127332119446, "learning_rate": 4.293316086559377e-06, "loss": 1.343, "step": 3786 }, { "epoch": 0.5361364762511502, "grad_norm": 11.530465282835376, "learning_rate": 4.292916669880726e-06, "loss": 1.2581, "step": 3787 }, { "epoch": 0.5362780491257875, "grad_norm": 7.81021768420434, "learning_rate": 4.292517158949781e-06, "loss": 1.3628, "step": 3788 }, { "epoch": 0.5364196220004247, "grad_norm": 11.09230053526097, "learning_rate": 4.292117553787547e-06, "loss": 1.2315, "step": 3789 }, { "epoch": 0.5365611948750619, "grad_norm": 12.064321429840138, "learning_rate": 4.291717854415029e-06, "loss": 1.4015, "step": 3790 }, { "epoch": 0.5367027677496992, "grad_norm": 11.411218015901953, "learning_rate": 4.29131806085324e-06, "loss": 1.4319, "step": 3791 }, { "epoch": 0.5368443406243364, "grad_norm": 9.096376439607955, "learning_rate": 4.2909181731231955e-06, "loss": 1.399, "step": 3792 }, { "epoch": 0.5369859134989736, "grad_norm": 10.253796502388877, "learning_rate": 4.290518191245918e-06, "loss": 1.5076, "step": 3793 }, { "epoch": 0.5371274863736109, "grad_norm": 8.416439191809895, "learning_rate": 4.290118115242434e-06, "loss": 1.3435, "step": 3794 }, { "epoch": 0.5372690592482481, "grad_norm": 12.65265610142386, "learning_rate": 4.289717945133775e-06, "loss": 1.3518, "step": 3795 }, { "epoch": 0.5374106321228852, "grad_norm": 8.239730454267566, "learning_rate": 4.289317680940979e-06, "loss": 1.2544, "step": 3796 }, { "epoch": 0.5375522049975224, "grad_norm": 10.11796344152822, "learning_rate": 4.288917322685087e-06, "loss": 1.4314, "step": 3797 }, { "epoch": 0.5376937778721597, "grad_norm": 10.265843869551295, "learning_rate": 4.288516870387145e-06, "loss": 1.3034, "step": 3798 }, { "epoch": 0.5378353507467969, "grad_norm": 7.357765102659108, "learning_rate": 4.288116324068205e-06, "loss": 1.3147, "step": 3799 }, { "epoch": 0.5379769236214341, "grad_norm": 10.353789191525902, "learning_rate": 4.287715683749322e-06, "loss": 1.119, "step": 3800 }, { "epoch": 0.5381184964960714, "grad_norm": 11.382719334081665, "learning_rate": 4.287314949451559e-06, "loss": 1.3458, "step": 3801 }, { "epoch": 0.5382600693707086, "grad_norm": 10.026398718615196, "learning_rate": 4.286914121195982e-06, "loss": 1.2459, "step": 3802 }, { "epoch": 0.5384016422453458, "grad_norm": 9.596730517905605, "learning_rate": 4.286513199003661e-06, "loss": 1.3503, "step": 3803 }, { "epoch": 0.538543215119983, "grad_norm": 9.417496051103955, "learning_rate": 4.2861121828956745e-06, "loss": 1.3815, "step": 3804 }, { "epoch": 0.5386847879946203, "grad_norm": 8.762975608554052, "learning_rate": 4.285711072893102e-06, "loss": 1.3892, "step": 3805 }, { "epoch": 0.5388263608692575, "grad_norm": 10.854305176147093, "learning_rate": 4.28530986901703e-06, "loss": 1.3796, "step": 3806 }, { "epoch": 0.5389679337438946, "grad_norm": 9.194964017398329, "learning_rate": 4.2849085712885495e-06, "loss": 1.3069, "step": 3807 }, { "epoch": 0.5391095066185319, "grad_norm": 8.462941785631767, "learning_rate": 4.284507179728756e-06, "loss": 1.189, "step": 3808 }, { "epoch": 0.5392510794931691, "grad_norm": 8.592889694154024, "learning_rate": 4.2841056943587505e-06, "loss": 1.4696, "step": 3809 }, { "epoch": 0.5393926523678063, "grad_norm": 10.675523617340488, "learning_rate": 4.283704115199639e-06, "loss": 1.2997, "step": 3810 }, { "epoch": 0.5395342252424435, "grad_norm": 9.256598516132543, "learning_rate": 4.283302442272532e-06, "loss": 1.3584, "step": 3811 }, { "epoch": 0.5396757981170808, "grad_norm": 9.037229043742473, "learning_rate": 4.282900675598546e-06, "loss": 1.3556, "step": 3812 }, { "epoch": 0.539817370991718, "grad_norm": 9.087580210083816, "learning_rate": 4.2824988151988e-06, "loss": 1.2756, "step": 3813 }, { "epoch": 0.5399589438663552, "grad_norm": 9.099506876233734, "learning_rate": 4.282096861094421e-06, "loss": 1.3425, "step": 3814 }, { "epoch": 0.5401005167409925, "grad_norm": 7.9981274797525606, "learning_rate": 4.281694813306538e-06, "loss": 1.3205, "step": 3815 }, { "epoch": 0.5402420896156297, "grad_norm": 10.945932505525846, "learning_rate": 4.281292671856288e-06, "loss": 1.2949, "step": 3816 }, { "epoch": 0.5403836624902668, "grad_norm": 9.951468001911397, "learning_rate": 4.28089043676481e-06, "loss": 1.366, "step": 3817 }, { "epoch": 0.540525235364904, "grad_norm": 8.392730164654362, "learning_rate": 4.28048810805325e-06, "loss": 1.4103, "step": 3818 }, { "epoch": 0.5406668082395413, "grad_norm": 10.772852183950455, "learning_rate": 4.280085685742758e-06, "loss": 1.51, "step": 3819 }, { "epoch": 0.5408083811141785, "grad_norm": 9.209863964157194, "learning_rate": 4.279683169854488e-06, "loss": 1.3649, "step": 3820 }, { "epoch": 0.5409499539888157, "grad_norm": 9.89426445554246, "learning_rate": 4.279280560409601e-06, "loss": 1.2052, "step": 3821 }, { "epoch": 0.541091526863453, "grad_norm": 8.602507966316967, "learning_rate": 4.278877857429261e-06, "loss": 1.3298, "step": 3822 }, { "epoch": 0.5412330997380902, "grad_norm": 7.288258335446747, "learning_rate": 4.278475060934639e-06, "loss": 1.3723, "step": 3823 }, { "epoch": 0.5413746726127274, "grad_norm": 10.062815785635973, "learning_rate": 4.278072170946909e-06, "loss": 1.3148, "step": 3824 }, { "epoch": 0.5415162454873647, "grad_norm": 9.021728565175907, "learning_rate": 4.277669187487251e-06, "loss": 1.2265, "step": 3825 }, { "epoch": 0.5416578183620019, "grad_norm": 9.875565911024456, "learning_rate": 4.2772661105768495e-06, "loss": 1.3717, "step": 3826 }, { "epoch": 0.541799391236639, "grad_norm": 9.287798385653032, "learning_rate": 4.276862940236894e-06, "loss": 1.1936, "step": 3827 }, { "epoch": 0.5419409641112762, "grad_norm": 7.438987775466173, "learning_rate": 4.276459676488578e-06, "loss": 1.1359, "step": 3828 }, { "epoch": 0.5420825369859135, "grad_norm": 9.610728594959726, "learning_rate": 4.276056319353101e-06, "loss": 1.3033, "step": 3829 }, { "epoch": 0.5422241098605507, "grad_norm": 9.769129058844467, "learning_rate": 4.275652868851669e-06, "loss": 1.1335, "step": 3830 }, { "epoch": 0.5423656827351879, "grad_norm": 9.548318157595707, "learning_rate": 4.275249325005488e-06, "loss": 1.4988, "step": 3831 }, { "epoch": 0.5425072556098252, "grad_norm": 8.376892331543802, "learning_rate": 4.2748456878357746e-06, "loss": 1.2484, "step": 3832 }, { "epoch": 0.5426488284844624, "grad_norm": 8.369703297523408, "learning_rate": 4.274441957363747e-06, "loss": 1.3197, "step": 3833 }, { "epoch": 0.5427904013590996, "grad_norm": 10.780061783480837, "learning_rate": 4.274038133610629e-06, "loss": 1.2089, "step": 3834 }, { "epoch": 0.5429319742337368, "grad_norm": 8.484618237909494, "learning_rate": 4.273634216597648e-06, "loss": 1.265, "step": 3835 }, { "epoch": 0.5430735471083741, "grad_norm": 9.896298388880451, "learning_rate": 4.273230206346039e-06, "loss": 1.2697, "step": 3836 }, { "epoch": 0.5432151199830113, "grad_norm": 9.02052256083359, "learning_rate": 4.27282610287704e-06, "loss": 1.2435, "step": 3837 }, { "epoch": 0.5433566928576484, "grad_norm": 9.389666262414828, "learning_rate": 4.272421906211895e-06, "loss": 1.2807, "step": 3838 }, { "epoch": 0.5434982657322857, "grad_norm": 8.862092854303704, "learning_rate": 4.272017616371853e-06, "loss": 1.133, "step": 3839 }, { "epoch": 0.5436398386069229, "grad_norm": 8.87649781388704, "learning_rate": 4.2716132333781646e-06, "loss": 1.2745, "step": 3840 }, { "epoch": 0.5437814114815601, "grad_norm": 8.060252741407746, "learning_rate": 4.27120875725209e-06, "loss": 1.3365, "step": 3841 }, { "epoch": 0.5439229843561973, "grad_norm": 10.195015785313576, "learning_rate": 4.270804188014892e-06, "loss": 1.2496, "step": 3842 }, { "epoch": 0.5440645572308346, "grad_norm": 9.210757757735228, "learning_rate": 4.270399525687839e-06, "loss": 1.3056, "step": 3843 }, { "epoch": 0.5442061301054718, "grad_norm": 9.63697209081263, "learning_rate": 4.269994770292201e-06, "loss": 1.3461, "step": 3844 }, { "epoch": 0.544347702980109, "grad_norm": 10.935560653958545, "learning_rate": 4.269589921849259e-06, "loss": 1.346, "step": 3845 }, { "epoch": 0.5444892758547463, "grad_norm": 7.843649570992402, "learning_rate": 4.269184980380294e-06, "loss": 1.2839, "step": 3846 }, { "epoch": 0.5446308487293835, "grad_norm": 8.791173357630155, "learning_rate": 4.268779945906594e-06, "loss": 1.4653, "step": 3847 }, { "epoch": 0.5447724216040206, "grad_norm": 8.60438750370791, "learning_rate": 4.26837481844945e-06, "loss": 1.2194, "step": 3848 }, { "epoch": 0.5449139944786578, "grad_norm": 11.286698565752527, "learning_rate": 4.267969598030162e-06, "loss": 1.2291, "step": 3849 }, { "epoch": 0.5450555673532951, "grad_norm": 11.30844937299892, "learning_rate": 4.267564284670029e-06, "loss": 1.3229, "step": 3850 }, { "epoch": 0.5451971402279323, "grad_norm": 7.571405133110846, "learning_rate": 4.267158878390361e-06, "loss": 1.3607, "step": 3851 }, { "epoch": 0.5453387131025695, "grad_norm": 9.64490778679715, "learning_rate": 4.266753379212467e-06, "loss": 1.4209, "step": 3852 }, { "epoch": 0.5454802859772068, "grad_norm": 9.44048418775323, "learning_rate": 4.266347787157666e-06, "loss": 1.2349, "step": 3853 }, { "epoch": 0.545621858851844, "grad_norm": 10.792723811349312, "learning_rate": 4.265942102247278e-06, "loss": 1.2019, "step": 3854 }, { "epoch": 0.5457634317264812, "grad_norm": 8.462127686372307, "learning_rate": 4.265536324502631e-06, "loss": 1.4757, "step": 3855 }, { "epoch": 0.5459050046011185, "grad_norm": 9.020336909800465, "learning_rate": 4.265130453945056e-06, "loss": 1.4742, "step": 3856 }, { "epoch": 0.5460465774757557, "grad_norm": 9.984397923976031, "learning_rate": 4.26472449059589e-06, "loss": 1.1975, "step": 3857 }, { "epoch": 0.5461881503503928, "grad_norm": 10.000811353193699, "learning_rate": 4.264318434476472e-06, "loss": 1.1722, "step": 3858 }, { "epoch": 0.54632972322503, "grad_norm": 9.09923270207997, "learning_rate": 4.26391228560815e-06, "loss": 1.2925, "step": 3859 }, { "epoch": 0.5464712960996673, "grad_norm": 8.567224688730155, "learning_rate": 4.263506044012275e-06, "loss": 1.4225, "step": 3860 }, { "epoch": 0.5466128689743045, "grad_norm": 8.330194670565131, "learning_rate": 4.2630997097102e-06, "loss": 1.1049, "step": 3861 }, { "epoch": 0.5467544418489417, "grad_norm": 9.5470500124609, "learning_rate": 4.26269328272329e-06, "loss": 1.2031, "step": 3862 }, { "epoch": 0.546896014723579, "grad_norm": 9.326081274101805, "learning_rate": 4.262286763072908e-06, "loss": 1.1552, "step": 3863 }, { "epoch": 0.5470375875982162, "grad_norm": 8.004953758015642, "learning_rate": 4.261880150780424e-06, "loss": 1.2716, "step": 3864 }, { "epoch": 0.5471791604728534, "grad_norm": 9.31978368418076, "learning_rate": 4.261473445867215e-06, "loss": 1.2814, "step": 3865 }, { "epoch": 0.5473207333474907, "grad_norm": 10.30458902886044, "learning_rate": 4.26106664835466e-06, "loss": 1.2704, "step": 3866 }, { "epoch": 0.5474623062221279, "grad_norm": 9.483258000919923, "learning_rate": 4.260659758264145e-06, "loss": 1.444, "step": 3867 }, { "epoch": 0.5476038790967651, "grad_norm": 8.50272056573912, "learning_rate": 4.260252775617058e-06, "loss": 1.2459, "step": 3868 }, { "epoch": 0.5477454519714022, "grad_norm": 10.287330568570779, "learning_rate": 4.259845700434797e-06, "loss": 1.3593, "step": 3869 }, { "epoch": 0.5478870248460395, "grad_norm": 10.38580899022063, "learning_rate": 4.259438532738759e-06, "loss": 1.3086, "step": 3870 }, { "epoch": 0.5480285977206767, "grad_norm": 13.206299313310083, "learning_rate": 4.259031272550349e-06, "loss": 1.1755, "step": 3871 }, { "epoch": 0.5481701705953139, "grad_norm": 8.029775996331997, "learning_rate": 4.258623919890976e-06, "loss": 1.2079, "step": 3872 }, { "epoch": 0.5483117434699512, "grad_norm": 9.207539611946636, "learning_rate": 4.258216474782056e-06, "loss": 1.2685, "step": 3873 }, { "epoch": 0.5484533163445884, "grad_norm": 8.362834236068592, "learning_rate": 4.257808937245006e-06, "loss": 1.2919, "step": 3874 }, { "epoch": 0.5485948892192256, "grad_norm": 10.937712051515847, "learning_rate": 4.257401307301251e-06, "loss": 1.4021, "step": 3875 }, { "epoch": 0.5487364620938628, "grad_norm": 8.617910816481006, "learning_rate": 4.25699358497222e-06, "loss": 1.2984, "step": 3876 }, { "epoch": 0.5488780349685001, "grad_norm": 9.977715266578716, "learning_rate": 4.256585770279345e-06, "loss": 1.4345, "step": 3877 }, { "epoch": 0.5490196078431373, "grad_norm": 8.197416168277508, "learning_rate": 4.256177863244067e-06, "loss": 1.2174, "step": 3878 }, { "epoch": 0.5491611807177744, "grad_norm": 9.198009474658006, "learning_rate": 4.255769863887829e-06, "loss": 1.2334, "step": 3879 }, { "epoch": 0.5493027535924117, "grad_norm": 10.268482571147738, "learning_rate": 4.2553617722320775e-06, "loss": 1.4306, "step": 3880 }, { "epoch": 0.5494443264670489, "grad_norm": 8.838350083015575, "learning_rate": 4.254953588298266e-06, "loss": 1.2747, "step": 3881 }, { "epoch": 0.5495858993416861, "grad_norm": 9.648404684281239, "learning_rate": 4.254545312107854e-06, "loss": 1.2828, "step": 3882 }, { "epoch": 0.5497274722163233, "grad_norm": 9.836843047448543, "learning_rate": 4.254136943682302e-06, "loss": 1.3361, "step": 3883 }, { "epoch": 0.5498690450909606, "grad_norm": 10.753850912498377, "learning_rate": 4.253728483043081e-06, "loss": 1.3186, "step": 3884 }, { "epoch": 0.5500106179655978, "grad_norm": 9.270613512726316, "learning_rate": 4.253319930211659e-06, "loss": 1.2812, "step": 3885 }, { "epoch": 0.550152190840235, "grad_norm": 9.870724295618695, "learning_rate": 4.252911285209516e-06, "loss": 1.3674, "step": 3886 }, { "epoch": 0.5502937637148723, "grad_norm": 8.822222169267008, "learning_rate": 4.252502548058134e-06, "loss": 1.265, "step": 3887 }, { "epoch": 0.5504353365895095, "grad_norm": 8.88414712104428, "learning_rate": 4.252093718779e-06, "loss": 1.179, "step": 3888 }, { "epoch": 0.5505769094641467, "grad_norm": 9.032675449114896, "learning_rate": 4.2516847973936045e-06, "loss": 1.1383, "step": 3889 }, { "epoch": 0.5507184823387838, "grad_norm": 8.974505023536421, "learning_rate": 4.251275783923447e-06, "loss": 1.3363, "step": 3890 }, { "epoch": 0.5508600552134211, "grad_norm": 11.909944216503886, "learning_rate": 4.250866678390026e-06, "loss": 1.3671, "step": 3891 }, { "epoch": 0.5510016280880583, "grad_norm": 9.085628068005905, "learning_rate": 4.25045748081485e-06, "loss": 1.2891, "step": 3892 }, { "epoch": 0.5511432009626955, "grad_norm": 10.615959360305052, "learning_rate": 4.250048191219429e-06, "loss": 1.3517, "step": 3893 }, { "epoch": 0.5512847738373328, "grad_norm": 8.26563699934148, "learning_rate": 4.24963880962528e-06, "loss": 1.2922, "step": 3894 }, { "epoch": 0.55142634671197, "grad_norm": 8.213152353513921, "learning_rate": 4.249229336053924e-06, "loss": 1.1266, "step": 3895 }, { "epoch": 0.5515679195866072, "grad_norm": 10.072769137659511, "learning_rate": 4.248819770526884e-06, "loss": 1.2876, "step": 3896 }, { "epoch": 0.5517094924612445, "grad_norm": 10.924830133789195, "learning_rate": 4.248410113065694e-06, "loss": 1.3718, "step": 3897 }, { "epoch": 0.5518510653358817, "grad_norm": 8.877531738224516, "learning_rate": 4.248000363691888e-06, "loss": 1.3678, "step": 3898 }, { "epoch": 0.5519926382105189, "grad_norm": 7.9190900071755745, "learning_rate": 4.247590522427006e-06, "loss": 1.166, "step": 3899 }, { "epoch": 0.552134211085156, "grad_norm": 9.59546837343879, "learning_rate": 4.2471805892925935e-06, "loss": 1.2101, "step": 3900 }, { "epoch": 0.5522757839597933, "grad_norm": 10.600981008371052, "learning_rate": 4.2467705643102005e-06, "loss": 1.2175, "step": 3901 }, { "epoch": 0.5524173568344305, "grad_norm": 9.066829515378432, "learning_rate": 4.246360447501381e-06, "loss": 1.2571, "step": 3902 }, { "epoch": 0.5525589297090677, "grad_norm": 8.946474064868202, "learning_rate": 4.245950238887695e-06, "loss": 1.3015, "step": 3903 }, { "epoch": 0.552700502583705, "grad_norm": 8.60411395656398, "learning_rate": 4.245539938490706e-06, "loss": 1.2021, "step": 3904 }, { "epoch": 0.5528420754583422, "grad_norm": 10.611472819911233, "learning_rate": 4.245129546331985e-06, "loss": 1.3199, "step": 3905 }, { "epoch": 0.5529836483329794, "grad_norm": 9.527245252412126, "learning_rate": 4.244719062433105e-06, "loss": 1.3476, "step": 3906 }, { "epoch": 0.5531252212076166, "grad_norm": 10.188152643372986, "learning_rate": 4.2443084868156434e-06, "loss": 1.2356, "step": 3907 }, { "epoch": 0.5532667940822539, "grad_norm": 7.848986576048916, "learning_rate": 4.243897819501187e-06, "loss": 1.3208, "step": 3908 }, { "epoch": 0.5534083669568911, "grad_norm": 8.180269033023102, "learning_rate": 4.243487060511321e-06, "loss": 1.15, "step": 3909 }, { "epoch": 0.5535499398315282, "grad_norm": 9.1096928379103, "learning_rate": 4.243076209867642e-06, "loss": 1.222, "step": 3910 }, { "epoch": 0.5536915127061655, "grad_norm": 11.483003744665417, "learning_rate": 4.242665267591744e-06, "loss": 1.3441, "step": 3911 }, { "epoch": 0.5538330855808027, "grad_norm": 8.968909072794455, "learning_rate": 4.242254233705234e-06, "loss": 1.1886, "step": 3912 }, { "epoch": 0.5539746584554399, "grad_norm": 8.052105020655896, "learning_rate": 4.241843108229718e-06, "loss": 1.2047, "step": 3913 }, { "epoch": 0.5541162313300771, "grad_norm": 9.475547641840706, "learning_rate": 4.241431891186808e-06, "loss": 1.3334, "step": 3914 }, { "epoch": 0.5542578042047144, "grad_norm": 8.920308257793701, "learning_rate": 4.241020582598122e-06, "loss": 1.4727, "step": 3915 }, { "epoch": 0.5543993770793516, "grad_norm": 12.93356130602243, "learning_rate": 4.240609182485282e-06, "loss": 1.4707, "step": 3916 }, { "epoch": 0.5545409499539888, "grad_norm": 9.251476685471157, "learning_rate": 4.240197690869916e-06, "loss": 1.281, "step": 3917 }, { "epoch": 0.5546825228286261, "grad_norm": 9.269773638450157, "learning_rate": 4.239786107773655e-06, "loss": 1.2668, "step": 3918 }, { "epoch": 0.5548240957032633, "grad_norm": 9.605753096645309, "learning_rate": 4.239374433218134e-06, "loss": 1.2175, "step": 3919 }, { "epoch": 0.5549656685779005, "grad_norm": 9.44602934560068, "learning_rate": 4.238962667224997e-06, "loss": 1.1684, "step": 3920 }, { "epoch": 0.5551072414525376, "grad_norm": 8.4638518072182, "learning_rate": 4.238550809815889e-06, "loss": 1.2356, "step": 3921 }, { "epoch": 0.5552488143271749, "grad_norm": 9.708291464348312, "learning_rate": 4.238138861012461e-06, "loss": 1.3439, "step": 3922 }, { "epoch": 0.5553903872018121, "grad_norm": 8.007427105817477, "learning_rate": 4.23772682083637e-06, "loss": 1.0959, "step": 3923 }, { "epoch": 0.5555319600764493, "grad_norm": 10.06603145826833, "learning_rate": 4.237314689309275e-06, "loss": 1.235, "step": 3924 }, { "epoch": 0.5556735329510866, "grad_norm": 10.216550257788677, "learning_rate": 4.236902466452843e-06, "loss": 1.303, "step": 3925 }, { "epoch": 0.5558151058257238, "grad_norm": 9.261371530422032, "learning_rate": 4.2364901522887415e-06, "loss": 1.3251, "step": 3926 }, { "epoch": 0.555956678700361, "grad_norm": 7.663141628863676, "learning_rate": 4.236077746838649e-06, "loss": 1.2319, "step": 3927 }, { "epoch": 0.5560982515749983, "grad_norm": 9.258850400021618, "learning_rate": 4.2356652501242435e-06, "loss": 1.4111, "step": 3928 }, { "epoch": 0.5562398244496355, "grad_norm": 10.22504569418847, "learning_rate": 4.235252662167211e-06, "loss": 1.3654, "step": 3929 }, { "epoch": 0.5563813973242727, "grad_norm": 10.49624239578871, "learning_rate": 4.234839982989238e-06, "loss": 1.2713, "step": 3930 }, { "epoch": 0.5565229701989098, "grad_norm": 10.477913652035491, "learning_rate": 4.234427212612021e-06, "loss": 1.3615, "step": 3931 }, { "epoch": 0.5566645430735471, "grad_norm": 8.923713775189336, "learning_rate": 4.23401435105726e-06, "loss": 1.4025, "step": 3932 }, { "epoch": 0.5568061159481843, "grad_norm": 7.778498764529385, "learning_rate": 4.2336013983466565e-06, "loss": 1.1741, "step": 3933 }, { "epoch": 0.5569476888228215, "grad_norm": 8.31098086594486, "learning_rate": 4.233188354501921e-06, "loss": 1.444, "step": 3934 }, { "epoch": 0.5570892616974588, "grad_norm": 10.002611963569816, "learning_rate": 4.2327752195447645e-06, "loss": 1.3012, "step": 3935 }, { "epoch": 0.557230834572096, "grad_norm": 9.265312922916868, "learning_rate": 4.232361993496908e-06, "loss": 1.1693, "step": 3936 }, { "epoch": 0.5573724074467332, "grad_norm": 7.822657215541612, "learning_rate": 4.231948676380073e-06, "loss": 1.258, "step": 3937 }, { "epoch": 0.5575139803213705, "grad_norm": 10.559329307527287, "learning_rate": 4.231535268215987e-06, "loss": 1.3802, "step": 3938 }, { "epoch": 0.5576555531960077, "grad_norm": 8.962176209882966, "learning_rate": 4.231121769026383e-06, "loss": 1.2706, "step": 3939 }, { "epoch": 0.5577971260706449, "grad_norm": 8.705316291685286, "learning_rate": 4.230708178832999e-06, "loss": 1.2669, "step": 3940 }, { "epoch": 0.557938698945282, "grad_norm": 7.607993750568635, "learning_rate": 4.230294497657576e-06, "loss": 1.3831, "step": 3941 }, { "epoch": 0.5580802718199193, "grad_norm": 8.035168593512639, "learning_rate": 4.2298807255218615e-06, "loss": 1.1535, "step": 3942 }, { "epoch": 0.5582218446945565, "grad_norm": 10.220256542711056, "learning_rate": 4.229466862447608e-06, "loss": 1.4397, "step": 3943 }, { "epoch": 0.5583634175691937, "grad_norm": 9.789522712770406, "learning_rate": 4.22905290845657e-06, "loss": 1.3219, "step": 3944 }, { "epoch": 0.558504990443831, "grad_norm": 9.173154673426021, "learning_rate": 4.22863886357051e-06, "loss": 1.299, "step": 3945 }, { "epoch": 0.5586465633184682, "grad_norm": 9.434174008422328, "learning_rate": 4.228224727811194e-06, "loss": 1.3329, "step": 3946 }, { "epoch": 0.5587881361931054, "grad_norm": 9.233007959171328, "learning_rate": 4.227810501200393e-06, "loss": 1.3644, "step": 3947 }, { "epoch": 0.5589297090677426, "grad_norm": 11.008831293787644, "learning_rate": 4.227396183759882e-06, "loss": 1.3278, "step": 3948 }, { "epoch": 0.5590712819423799, "grad_norm": 8.857562920941488, "learning_rate": 4.226981775511442e-06, "loss": 1.3322, "step": 3949 }, { "epoch": 0.5592128548170171, "grad_norm": 8.709322737806284, "learning_rate": 4.2265672764768565e-06, "loss": 1.305, "step": 3950 }, { "epoch": 0.5593544276916543, "grad_norm": 11.057660957215434, "learning_rate": 4.226152686677918e-06, "loss": 1.3068, "step": 3951 }, { "epoch": 0.5594960005662915, "grad_norm": 7.660894238736714, "learning_rate": 4.22573800613642e-06, "loss": 1.1924, "step": 3952 }, { "epoch": 0.5596375734409287, "grad_norm": 9.41320340997155, "learning_rate": 4.22532323487416e-06, "loss": 1.4382, "step": 3953 }, { "epoch": 0.5597791463155659, "grad_norm": 10.710179942780526, "learning_rate": 4.224908372912946e-06, "loss": 1.4634, "step": 3954 }, { "epoch": 0.5599207191902031, "grad_norm": 10.883030173814651, "learning_rate": 4.224493420274584e-06, "loss": 1.3562, "step": 3955 }, { "epoch": 0.5600622920648404, "grad_norm": 9.825760937603238, "learning_rate": 4.224078376980888e-06, "loss": 1.1998, "step": 3956 }, { "epoch": 0.5602038649394776, "grad_norm": 8.823075678509786, "learning_rate": 4.223663243053679e-06, "loss": 1.2976, "step": 3957 }, { "epoch": 0.5603454378141148, "grad_norm": 8.931672016741794, "learning_rate": 4.2232480185147775e-06, "loss": 1.1244, "step": 3958 }, { "epoch": 0.5604870106887521, "grad_norm": 8.269618434383352, "learning_rate": 4.222832703386013e-06, "loss": 1.2311, "step": 3959 }, { "epoch": 0.5606285835633893, "grad_norm": 9.878983092697384, "learning_rate": 4.222417297689217e-06, "loss": 1.4189, "step": 3960 }, { "epoch": 0.5607701564380265, "grad_norm": 9.339911141146457, "learning_rate": 4.2220018014462284e-06, "loss": 1.1555, "step": 3961 }, { "epoch": 0.5609117293126636, "grad_norm": 10.378360066311808, "learning_rate": 4.221586214678889e-06, "loss": 1.2975, "step": 3962 }, { "epoch": 0.5610533021873009, "grad_norm": 7.9985571990732, "learning_rate": 4.221170537409046e-06, "loss": 1.2423, "step": 3963 }, { "epoch": 0.5611948750619381, "grad_norm": 7.330204585240074, "learning_rate": 4.220754769658551e-06, "loss": 1.3254, "step": 3964 }, { "epoch": 0.5613364479365753, "grad_norm": 9.173440776831514, "learning_rate": 4.220338911449262e-06, "loss": 1.2561, "step": 3965 }, { "epoch": 0.5614780208112126, "grad_norm": 8.968869942849981, "learning_rate": 4.219922962803038e-06, "loss": 1.2153, "step": 3966 }, { "epoch": 0.5616195936858498, "grad_norm": 11.015273847461547, "learning_rate": 4.2195069237417466e-06, "loss": 1.2902, "step": 3967 }, { "epoch": 0.561761166560487, "grad_norm": 9.710709266098105, "learning_rate": 4.219090794287258e-06, "loss": 1.4394, "step": 3968 }, { "epoch": 0.5619027394351243, "grad_norm": 8.141751436581574, "learning_rate": 4.218674574461449e-06, "loss": 1.2047, "step": 3969 }, { "epoch": 0.5620443123097615, "grad_norm": 10.936495140782327, "learning_rate": 4.218258264286198e-06, "loss": 1.3615, "step": 3970 }, { "epoch": 0.5621858851843987, "grad_norm": 8.813053194904189, "learning_rate": 4.217841863783393e-06, "loss": 1.2381, "step": 3971 }, { "epoch": 0.5623274580590358, "grad_norm": 7.62163907382335, "learning_rate": 4.21742537297492e-06, "loss": 1.1516, "step": 3972 }, { "epoch": 0.5624690309336731, "grad_norm": 8.123646784847365, "learning_rate": 4.217008791882678e-06, "loss": 1.2296, "step": 3973 }, { "epoch": 0.5626106038083103, "grad_norm": 11.302506727500555, "learning_rate": 4.216592120528562e-06, "loss": 1.2831, "step": 3974 }, { "epoch": 0.5627521766829475, "grad_norm": 9.564798714593193, "learning_rate": 4.216175358934479e-06, "loss": 1.2436, "step": 3975 }, { "epoch": 0.5628937495575848, "grad_norm": 8.25523071927827, "learning_rate": 4.215758507122337e-06, "loss": 1.156, "step": 3976 }, { "epoch": 0.563035322432222, "grad_norm": 9.74254499206256, "learning_rate": 4.21534156511405e-06, "loss": 1.3321, "step": 3977 }, { "epoch": 0.5631768953068592, "grad_norm": 7.734892610126242, "learning_rate": 4.214924532931534e-06, "loss": 1.2227, "step": 3978 }, { "epoch": 0.5633184681814964, "grad_norm": 11.81290689403372, "learning_rate": 4.214507410596716e-06, "loss": 1.4239, "step": 3979 }, { "epoch": 0.5634600410561337, "grad_norm": 9.190044855085125, "learning_rate": 4.214090198131522e-06, "loss": 1.3598, "step": 3980 }, { "epoch": 0.5636016139307709, "grad_norm": 8.522732125945588, "learning_rate": 4.2136728955578835e-06, "loss": 1.3838, "step": 3981 }, { "epoch": 0.5637431868054081, "grad_norm": 8.72695591380433, "learning_rate": 4.2132555028977386e-06, "loss": 1.3263, "step": 3982 }, { "epoch": 0.5638847596800453, "grad_norm": 9.908969060738846, "learning_rate": 4.212838020173029e-06, "loss": 1.3238, "step": 3983 }, { "epoch": 0.5640263325546825, "grad_norm": 8.703608008084833, "learning_rate": 4.212420447405703e-06, "loss": 1.4024, "step": 3984 }, { "epoch": 0.5641679054293197, "grad_norm": 8.65483924579801, "learning_rate": 4.21200278461771e-06, "loss": 1.2956, "step": 3985 }, { "epoch": 0.564309478303957, "grad_norm": 9.057567437952997, "learning_rate": 4.211585031831007e-06, "loss": 1.2764, "step": 3986 }, { "epoch": 0.5644510511785942, "grad_norm": 10.333656018869052, "learning_rate": 4.211167189067556e-06, "loss": 1.1176, "step": 3987 }, { "epoch": 0.5645926240532314, "grad_norm": 10.712143711143144, "learning_rate": 4.210749256349322e-06, "loss": 1.3899, "step": 3988 }, { "epoch": 0.5647341969278686, "grad_norm": 8.730181675229876, "learning_rate": 4.210331233698274e-06, "loss": 1.2285, "step": 3989 }, { "epoch": 0.5648757698025059, "grad_norm": 11.994088942294203, "learning_rate": 4.209913121136389e-06, "loss": 1.5159, "step": 3990 }, { "epoch": 0.5650173426771431, "grad_norm": 7.743951467582645, "learning_rate": 4.209494918685646e-06, "loss": 1.2967, "step": 3991 }, { "epoch": 0.5651589155517803, "grad_norm": 6.975223288841671, "learning_rate": 4.20907662636803e-06, "loss": 1.2729, "step": 3992 }, { "epoch": 0.5653004884264174, "grad_norm": 12.26026890918735, "learning_rate": 4.208658244205529e-06, "loss": 1.3463, "step": 3993 }, { "epoch": 0.5654420613010547, "grad_norm": 10.536044062517234, "learning_rate": 4.208239772220139e-06, "loss": 1.2454, "step": 3994 }, { "epoch": 0.5655836341756919, "grad_norm": 8.749478133851532, "learning_rate": 4.207821210433858e-06, "loss": 1.3127, "step": 3995 }, { "epoch": 0.5657252070503291, "grad_norm": 11.028668585179208, "learning_rate": 4.20740255886869e-06, "loss": 1.389, "step": 3996 }, { "epoch": 0.5658667799249664, "grad_norm": 8.687641828737739, "learning_rate": 4.206983817546641e-06, "loss": 1.3978, "step": 3997 }, { "epoch": 0.5660083527996036, "grad_norm": 9.167787749198574, "learning_rate": 4.206564986489726e-06, "loss": 1.2349, "step": 3998 }, { "epoch": 0.5661499256742408, "grad_norm": 9.44003282121028, "learning_rate": 4.206146065719963e-06, "loss": 1.3295, "step": 3999 }, { "epoch": 0.5662914985488781, "grad_norm": 9.429496959161291, "learning_rate": 4.205727055259372e-06, "loss": 1.3879, "step": 4000 }, { "epoch": 0.5664330714235153, "grad_norm": 7.914240602558501, "learning_rate": 4.2053079551299835e-06, "loss": 1.3111, "step": 4001 }, { "epoch": 0.5665746442981525, "grad_norm": 9.314233400965227, "learning_rate": 4.204888765353826e-06, "loss": 1.3822, "step": 4002 }, { "epoch": 0.5667162171727896, "grad_norm": 9.724507972018511, "learning_rate": 4.204469485952938e-06, "loss": 1.324, "step": 4003 }, { "epoch": 0.5668577900474269, "grad_norm": 10.77763328149509, "learning_rate": 4.204050116949359e-06, "loss": 1.4136, "step": 4004 }, { "epoch": 0.5669993629220641, "grad_norm": 7.25211145632437, "learning_rate": 4.203630658365136e-06, "loss": 1.289, "step": 4005 }, { "epoch": 0.5671409357967013, "grad_norm": 8.932308796828204, "learning_rate": 4.203211110222321e-06, "loss": 1.2495, "step": 4006 }, { "epoch": 0.5672825086713386, "grad_norm": 9.763473591142724, "learning_rate": 4.202791472542968e-06, "loss": 1.2553, "step": 4007 }, { "epoch": 0.5674240815459758, "grad_norm": 10.20450263778067, "learning_rate": 4.202371745349135e-06, "loss": 1.5306, "step": 4008 }, { "epoch": 0.567565654420613, "grad_norm": 11.344267862232368, "learning_rate": 4.2019519286628895e-06, "loss": 1.2417, "step": 4009 }, { "epoch": 0.5677072272952502, "grad_norm": 8.322937801049381, "learning_rate": 4.2015320225063e-06, "loss": 1.2648, "step": 4010 }, { "epoch": 0.5678488001698875, "grad_norm": 7.998736520175512, "learning_rate": 4.201112026901442e-06, "loss": 1.1849, "step": 4011 }, { "epoch": 0.5679903730445247, "grad_norm": 11.424821674449397, "learning_rate": 4.200691941870392e-06, "loss": 1.4517, "step": 4012 }, { "epoch": 0.5681319459191619, "grad_norm": 8.34412763219636, "learning_rate": 4.200271767435235e-06, "loss": 1.2483, "step": 4013 }, { "epoch": 0.5682735187937991, "grad_norm": 13.818855584067746, "learning_rate": 4.199851503618059e-06, "loss": 1.5549, "step": 4014 }, { "epoch": 0.5684150916684363, "grad_norm": 8.583192645157553, "learning_rate": 4.1994311504409566e-06, "loss": 1.1992, "step": 4015 }, { "epoch": 0.5685566645430735, "grad_norm": 9.186966445896456, "learning_rate": 4.199010707926026e-06, "loss": 1.3832, "step": 4016 }, { "epoch": 0.5686982374177108, "grad_norm": 9.861566605293731, "learning_rate": 4.19859017609537e-06, "loss": 1.2229, "step": 4017 }, { "epoch": 0.568839810292348, "grad_norm": 9.354184655481612, "learning_rate": 4.198169554971095e-06, "loss": 1.2292, "step": 4018 }, { "epoch": 0.5689813831669852, "grad_norm": 10.081232109455994, "learning_rate": 4.197748844575311e-06, "loss": 1.2947, "step": 4019 }, { "epoch": 0.5691229560416224, "grad_norm": 9.903906511916261, "learning_rate": 4.197328044930137e-06, "loss": 1.2948, "step": 4020 }, { "epoch": 0.5692645289162597, "grad_norm": 10.387209039645866, "learning_rate": 4.196907156057694e-06, "loss": 1.4249, "step": 4021 }, { "epoch": 0.5694061017908969, "grad_norm": 12.67081109224471, "learning_rate": 4.196486177980107e-06, "loss": 1.3234, "step": 4022 }, { "epoch": 0.5695476746655341, "grad_norm": 9.520234893978374, "learning_rate": 4.196065110719505e-06, "loss": 1.2537, "step": 4023 }, { "epoch": 0.5696892475401713, "grad_norm": 8.172388939533159, "learning_rate": 4.195643954298026e-06, "loss": 1.2794, "step": 4024 }, { "epoch": 0.5698308204148085, "grad_norm": 10.854697382296315, "learning_rate": 4.195222708737809e-06, "loss": 1.3495, "step": 4025 }, { "epoch": 0.5699723932894457, "grad_norm": 9.63085085903852, "learning_rate": 4.1948013740609976e-06, "loss": 1.4094, "step": 4026 }, { "epoch": 0.5701139661640829, "grad_norm": 9.714263371033178, "learning_rate": 4.194379950289742e-06, "loss": 1.3291, "step": 4027 }, { "epoch": 0.5702555390387202, "grad_norm": 10.67618716068799, "learning_rate": 4.193958437446195e-06, "loss": 1.289, "step": 4028 }, { "epoch": 0.5703971119133574, "grad_norm": 11.130568578683015, "learning_rate": 4.193536835552517e-06, "loss": 1.4433, "step": 4029 }, { "epoch": 0.5705386847879946, "grad_norm": 7.7055558708036225, "learning_rate": 4.19311514463087e-06, "loss": 1.1338, "step": 4030 }, { "epoch": 0.5706802576626319, "grad_norm": 10.845592858873562, "learning_rate": 4.192693364703422e-06, "loss": 1.1937, "step": 4031 }, { "epoch": 0.5708218305372691, "grad_norm": 11.160384579393353, "learning_rate": 4.192271495792346e-06, "loss": 1.476, "step": 4032 }, { "epoch": 0.5709634034119063, "grad_norm": 11.97586748329903, "learning_rate": 4.191849537919819e-06, "loss": 1.4384, "step": 4033 }, { "epoch": 0.5711049762865436, "grad_norm": 11.02026269561534, "learning_rate": 4.191427491108024e-06, "loss": 1.3535, "step": 4034 }, { "epoch": 0.5712465491611807, "grad_norm": 11.076597380129169, "learning_rate": 4.191005355379147e-06, "loss": 1.4715, "step": 4035 }, { "epoch": 0.5713881220358179, "grad_norm": 13.630783673355127, "learning_rate": 4.190583130755379e-06, "loss": 1.5196, "step": 4036 }, { "epoch": 0.5715296949104551, "grad_norm": 9.454915733022409, "learning_rate": 4.190160817258916e-06, "loss": 1.1094, "step": 4037 }, { "epoch": 0.5716712677850924, "grad_norm": 7.027689528214836, "learning_rate": 4.189738414911959e-06, "loss": 1.2616, "step": 4038 }, { "epoch": 0.5718128406597296, "grad_norm": 9.746620962067965, "learning_rate": 4.189315923736715e-06, "loss": 1.2875, "step": 4039 }, { "epoch": 0.5719544135343668, "grad_norm": 9.14818230589843, "learning_rate": 4.18889334375539e-06, "loss": 1.1365, "step": 4040 }, { "epoch": 0.572095986409004, "grad_norm": 8.50184611299814, "learning_rate": 4.188470674990203e-06, "loss": 1.2362, "step": 4041 }, { "epoch": 0.5722375592836413, "grad_norm": 11.061030230973415, "learning_rate": 4.1880479174633715e-06, "loss": 1.2669, "step": 4042 }, { "epoch": 0.5723791321582785, "grad_norm": 12.490187799219258, "learning_rate": 4.187625071197119e-06, "loss": 1.2018, "step": 4043 }, { "epoch": 0.5725207050329157, "grad_norm": 8.406156518125663, "learning_rate": 4.187202136213675e-06, "loss": 1.1499, "step": 4044 }, { "epoch": 0.5726622779075529, "grad_norm": 11.233616067137405, "learning_rate": 4.186779112535273e-06, "loss": 1.33, "step": 4045 }, { "epoch": 0.5728038507821901, "grad_norm": 8.913889182008104, "learning_rate": 4.186356000184151e-06, "loss": 1.3218, "step": 4046 }, { "epoch": 0.5729454236568273, "grad_norm": 9.400851807654062, "learning_rate": 4.185932799182551e-06, "loss": 1.2802, "step": 4047 }, { "epoch": 0.5730869965314646, "grad_norm": 12.035056089326805, "learning_rate": 4.185509509552721e-06, "loss": 1.3415, "step": 4048 }, { "epoch": 0.5732285694061018, "grad_norm": 9.969855459498937, "learning_rate": 4.185086131316914e-06, "loss": 1.2827, "step": 4049 }, { "epoch": 0.573370142280739, "grad_norm": 11.782683672299973, "learning_rate": 4.184662664497383e-06, "loss": 1.3546, "step": 4050 }, { "epoch": 0.5735117151553762, "grad_norm": 9.273937285789769, "learning_rate": 4.184239109116393e-06, "loss": 1.2622, "step": 4051 }, { "epoch": 0.5736532880300135, "grad_norm": 11.027567910845995, "learning_rate": 4.183815465196209e-06, "loss": 1.2695, "step": 4052 }, { "epoch": 0.5737948609046507, "grad_norm": 12.565017068458923, "learning_rate": 4.183391732759102e-06, "loss": 1.2512, "step": 4053 }, { "epoch": 0.5739364337792879, "grad_norm": 10.499490634734011, "learning_rate": 4.182967911827347e-06, "loss": 1.1947, "step": 4054 }, { "epoch": 0.574078006653925, "grad_norm": 9.128055008852044, "learning_rate": 4.182544002423223e-06, "loss": 1.4035, "step": 4055 }, { "epoch": 0.5742195795285623, "grad_norm": 12.010612881004189, "learning_rate": 4.182120004569015e-06, "loss": 1.3527, "step": 4056 }, { "epoch": 0.5743611524031995, "grad_norm": 12.484041937633258, "learning_rate": 4.181695918287013e-06, "loss": 1.3475, "step": 4057 }, { "epoch": 0.5745027252778367, "grad_norm": 8.742132955454283, "learning_rate": 4.181271743599511e-06, "loss": 1.125, "step": 4058 }, { "epoch": 0.574644298152474, "grad_norm": 9.796007425201829, "learning_rate": 4.180847480528806e-06, "loss": 1.3344, "step": 4059 }, { "epoch": 0.5747858710271112, "grad_norm": 8.785474962260151, "learning_rate": 4.180423129097203e-06, "loss": 1.274, "step": 4060 }, { "epoch": 0.5749274439017484, "grad_norm": 9.487116057778044, "learning_rate": 4.179998689327009e-06, "loss": 1.2426, "step": 4061 }, { "epoch": 0.5750690167763857, "grad_norm": 12.359359567586763, "learning_rate": 4.1795741612405365e-06, "loss": 1.2552, "step": 4062 }, { "epoch": 0.5752105896510229, "grad_norm": 11.006467131899587, "learning_rate": 4.179149544860102e-06, "loss": 1.4515, "step": 4063 }, { "epoch": 0.5753521625256601, "grad_norm": 9.084932752243848, "learning_rate": 4.178724840208029e-06, "loss": 1.3155, "step": 4064 }, { "epoch": 0.5754937354002974, "grad_norm": 7.823565162026991, "learning_rate": 4.178300047306643e-06, "loss": 1.1647, "step": 4065 }, { "epoch": 0.5756353082749345, "grad_norm": 8.734492920436105, "learning_rate": 4.177875166178274e-06, "loss": 1.2078, "step": 4066 }, { "epoch": 0.5757768811495717, "grad_norm": 11.718471350853799, "learning_rate": 4.17745019684526e-06, "loss": 1.3783, "step": 4067 }, { "epoch": 0.5759184540242089, "grad_norm": 8.271578687109903, "learning_rate": 4.177025139329939e-06, "loss": 1.2457, "step": 4068 }, { "epoch": 0.5760600268988462, "grad_norm": 10.270998032018625, "learning_rate": 4.176599993654657e-06, "loss": 1.4844, "step": 4069 }, { "epoch": 0.5762015997734834, "grad_norm": 8.147896299740696, "learning_rate": 4.176174759841762e-06, "loss": 1.1219, "step": 4070 }, { "epoch": 0.5763431726481206, "grad_norm": 11.015605953551797, "learning_rate": 4.175749437913611e-06, "loss": 1.4256, "step": 4071 }, { "epoch": 0.5764847455227579, "grad_norm": 11.520507439987385, "learning_rate": 4.175324027892562e-06, "loss": 1.3405, "step": 4072 }, { "epoch": 0.5766263183973951, "grad_norm": 10.44394872945461, "learning_rate": 4.174898529800977e-06, "loss": 1.3389, "step": 4073 }, { "epoch": 0.5767678912720323, "grad_norm": 10.107789282271604, "learning_rate": 4.1744729436612255e-06, "loss": 1.4504, "step": 4074 }, { "epoch": 0.5769094641466695, "grad_norm": 11.903219322898467, "learning_rate": 4.174047269495681e-06, "loss": 1.338, "step": 4075 }, { "epoch": 0.5770510370213067, "grad_norm": 9.693550435415567, "learning_rate": 4.173621507326719e-06, "loss": 1.1975, "step": 4076 }, { "epoch": 0.5771926098959439, "grad_norm": 9.151979459512775, "learning_rate": 4.1731956571767215e-06, "loss": 1.379, "step": 4077 }, { "epoch": 0.5773341827705811, "grad_norm": 8.620201655814906, "learning_rate": 4.172769719068076e-06, "loss": 1.2086, "step": 4078 }, { "epoch": 0.5774757556452184, "grad_norm": 8.879476788202476, "learning_rate": 4.172343693023174e-06, "loss": 1.2427, "step": 4079 }, { "epoch": 0.5776173285198556, "grad_norm": 9.814871143150103, "learning_rate": 4.171917579064412e-06, "loss": 1.2655, "step": 4080 }, { "epoch": 0.5777589013944928, "grad_norm": 9.95580711936251, "learning_rate": 4.1714913772141885e-06, "loss": 1.3481, "step": 4081 }, { "epoch": 0.57790047426913, "grad_norm": 9.202632146050789, "learning_rate": 4.171065087494909e-06, "loss": 1.2457, "step": 4082 }, { "epoch": 0.5780420471437673, "grad_norm": 9.79335671848165, "learning_rate": 4.170638709928984e-06, "loss": 1.3516, "step": 4083 }, { "epoch": 0.5781836200184045, "grad_norm": 9.862630315773043, "learning_rate": 4.170212244538829e-06, "loss": 1.2601, "step": 4084 }, { "epoch": 0.5783251928930417, "grad_norm": 10.801914214990221, "learning_rate": 4.169785691346861e-06, "loss": 1.4142, "step": 4085 }, { "epoch": 0.5784667657676789, "grad_norm": 8.500223044666653, "learning_rate": 4.169359050375505e-06, "loss": 1.2821, "step": 4086 }, { "epoch": 0.5786083386423161, "grad_norm": 9.396585664141773, "learning_rate": 4.168932321647186e-06, "loss": 1.254, "step": 4087 }, { "epoch": 0.5787499115169533, "grad_norm": 6.856409079028495, "learning_rate": 4.168505505184341e-06, "loss": 1.1527, "step": 4088 }, { "epoch": 0.5788914843915905, "grad_norm": 10.286032263064163, "learning_rate": 4.168078601009407e-06, "loss": 1.2787, "step": 4089 }, { "epoch": 0.5790330572662278, "grad_norm": 9.009980919211248, "learning_rate": 4.167651609144822e-06, "loss": 1.0996, "step": 4090 }, { "epoch": 0.579174630140865, "grad_norm": 7.641831878018797, "learning_rate": 4.167224529613038e-06, "loss": 1.2995, "step": 4091 }, { "epoch": 0.5793162030155022, "grad_norm": 10.481753526124427, "learning_rate": 4.166797362436502e-06, "loss": 1.4115, "step": 4092 }, { "epoch": 0.5794577758901395, "grad_norm": 9.842518756704212, "learning_rate": 4.1663701076376715e-06, "loss": 1.376, "step": 4093 }, { "epoch": 0.5795993487647767, "grad_norm": 9.700514585079084, "learning_rate": 4.1659427652390075e-06, "loss": 1.4156, "step": 4094 }, { "epoch": 0.5797409216394139, "grad_norm": 8.148497423077844, "learning_rate": 4.165515335262974e-06, "loss": 1.2253, "step": 4095 }, { "epoch": 0.5798824945140512, "grad_norm": 7.88431301145494, "learning_rate": 4.165087817732041e-06, "loss": 1.1943, "step": 4096 }, { "epoch": 0.5800240673886883, "grad_norm": 7.080211205645827, "learning_rate": 4.164660212668684e-06, "loss": 1.2877, "step": 4097 }, { "epoch": 0.5801656402633255, "grad_norm": 10.296864626796747, "learning_rate": 4.164232520095379e-06, "loss": 1.2709, "step": 4098 }, { "epoch": 0.5803072131379627, "grad_norm": 8.931127450474609, "learning_rate": 4.163804740034613e-06, "loss": 1.3159, "step": 4099 }, { "epoch": 0.5804487860126, "grad_norm": 8.644934133319559, "learning_rate": 4.163376872508872e-06, "loss": 1.4045, "step": 4100 }, { "epoch": 0.5805903588872372, "grad_norm": 9.140295299876549, "learning_rate": 4.162948917540649e-06, "loss": 1.4402, "step": 4101 }, { "epoch": 0.5807319317618744, "grad_norm": 9.348685399625547, "learning_rate": 4.162520875152441e-06, "loss": 1.1882, "step": 4102 }, { "epoch": 0.5808735046365117, "grad_norm": 10.385102282898783, "learning_rate": 4.1620927453667515e-06, "loss": 1.4739, "step": 4103 }, { "epoch": 0.5810150775111489, "grad_norm": 9.167091174844405, "learning_rate": 4.161664528206084e-06, "loss": 1.3071, "step": 4104 }, { "epoch": 0.5811566503857861, "grad_norm": 8.357660363724474, "learning_rate": 4.1612362236929524e-06, "loss": 1.2992, "step": 4105 }, { "epoch": 0.5812982232604234, "grad_norm": 9.159604444333835, "learning_rate": 4.16080783184987e-06, "loss": 1.458, "step": 4106 }, { "epoch": 0.5814397961350605, "grad_norm": 7.483690712373594, "learning_rate": 4.16037935269936e-06, "loss": 1.2443, "step": 4107 }, { "epoch": 0.5815813690096977, "grad_norm": 9.158667339682387, "learning_rate": 4.159950786263944e-06, "loss": 1.0712, "step": 4108 }, { "epoch": 0.5817229418843349, "grad_norm": 10.829086723389711, "learning_rate": 4.159522132566153e-06, "loss": 1.3074, "step": 4109 }, { "epoch": 0.5818645147589722, "grad_norm": 7.526434027312035, "learning_rate": 4.159093391628521e-06, "loss": 1.1811, "step": 4110 }, { "epoch": 0.5820060876336094, "grad_norm": 11.85923025171052, "learning_rate": 4.158664563473587e-06, "loss": 1.2287, "step": 4111 }, { "epoch": 0.5821476605082466, "grad_norm": 8.351366545730844, "learning_rate": 4.158235648123894e-06, "loss": 1.3547, "step": 4112 }, { "epoch": 0.5822892333828839, "grad_norm": 9.583570217577288, "learning_rate": 4.1578066456019885e-06, "loss": 1.3035, "step": 4113 }, { "epoch": 0.5824308062575211, "grad_norm": 7.866899471825269, "learning_rate": 4.157377555930424e-06, "loss": 1.1571, "step": 4114 }, { "epoch": 0.5825723791321583, "grad_norm": 10.043854872055746, "learning_rate": 4.156948379131757e-06, "loss": 1.3169, "step": 4115 }, { "epoch": 0.5827139520067955, "grad_norm": 8.264059859643414, "learning_rate": 4.15651911522855e-06, "loss": 1.172, "step": 4116 }, { "epoch": 0.5828555248814327, "grad_norm": 8.42883850856781, "learning_rate": 4.1560897642433674e-06, "loss": 1.172, "step": 4117 }, { "epoch": 0.5829970977560699, "grad_norm": 7.2746221804745534, "learning_rate": 4.155660326198781e-06, "loss": 1.1268, "step": 4118 }, { "epoch": 0.5831386706307071, "grad_norm": 10.52055925572508, "learning_rate": 4.155230801117366e-06, "loss": 1.2304, "step": 4119 }, { "epoch": 0.5832802435053444, "grad_norm": 9.94000251017794, "learning_rate": 4.154801189021701e-06, "loss": 1.2415, "step": 4120 }, { "epoch": 0.5834218163799816, "grad_norm": 8.713967382258353, "learning_rate": 4.154371489934372e-06, "loss": 1.4548, "step": 4121 }, { "epoch": 0.5835633892546188, "grad_norm": 10.188475392459557, "learning_rate": 4.153941703877967e-06, "loss": 1.3028, "step": 4122 }, { "epoch": 0.583704962129256, "grad_norm": 9.225317939717156, "learning_rate": 4.153511830875081e-06, "loss": 1.3946, "step": 4123 }, { "epoch": 0.5838465350038933, "grad_norm": 11.208570021691097, "learning_rate": 4.15308187094831e-06, "loss": 1.3035, "step": 4124 }, { "epoch": 0.5839881078785305, "grad_norm": 9.236145366200445, "learning_rate": 4.152651824120258e-06, "loss": 1.1708, "step": 4125 }, { "epoch": 0.5841296807531677, "grad_norm": 7.85116239069157, "learning_rate": 4.152221690413531e-06, "loss": 1.2654, "step": 4126 }, { "epoch": 0.584271253627805, "grad_norm": 10.347390447581798, "learning_rate": 4.151791469850743e-06, "loss": 1.2106, "step": 4127 }, { "epoch": 0.5844128265024421, "grad_norm": 10.209683657237347, "learning_rate": 4.151361162454509e-06, "loss": 1.3812, "step": 4128 }, { "epoch": 0.5845543993770793, "grad_norm": 8.414446497011804, "learning_rate": 4.150930768247449e-06, "loss": 1.3775, "step": 4129 }, { "epoch": 0.5846959722517165, "grad_norm": 10.989011304289425, "learning_rate": 4.15050028725219e-06, "loss": 1.2695, "step": 4130 }, { "epoch": 0.5848375451263538, "grad_norm": 8.541710333983909, "learning_rate": 4.1500697194913615e-06, "loss": 1.3316, "step": 4131 }, { "epoch": 0.584979118000991, "grad_norm": 10.84539940682025, "learning_rate": 4.149639064987598e-06, "loss": 1.3944, "step": 4132 }, { "epoch": 0.5851206908756282, "grad_norm": 10.01601272284824, "learning_rate": 4.149208323763539e-06, "loss": 1.3725, "step": 4133 }, { "epoch": 0.5852622637502655, "grad_norm": 8.069735333759937, "learning_rate": 4.148777495841829e-06, "loss": 1.2595, "step": 4134 }, { "epoch": 0.5854038366249027, "grad_norm": 9.111119831153397, "learning_rate": 4.1483465812451144e-06, "loss": 1.3365, "step": 4135 }, { "epoch": 0.5855454094995399, "grad_norm": 9.181374512536406, "learning_rate": 4.147915579996049e-06, "loss": 1.3463, "step": 4136 }, { "epoch": 0.5856869823741772, "grad_norm": 8.296689708530078, "learning_rate": 4.147484492117291e-06, "loss": 1.2156, "step": 4137 }, { "epoch": 0.5858285552488143, "grad_norm": 7.238002189119403, "learning_rate": 4.147053317631501e-06, "loss": 1.2255, "step": 4138 }, { "epoch": 0.5859701281234515, "grad_norm": 10.512198129281874, "learning_rate": 4.146622056561347e-06, "loss": 1.4638, "step": 4139 }, { "epoch": 0.5861117009980887, "grad_norm": 9.387938662666281, "learning_rate": 4.146190708929498e-06, "loss": 1.4163, "step": 4140 }, { "epoch": 0.586253273872726, "grad_norm": 11.186801313181775, "learning_rate": 4.145759274758632e-06, "loss": 1.3861, "step": 4141 }, { "epoch": 0.5863948467473632, "grad_norm": 8.815845909447436, "learning_rate": 4.145327754071427e-06, "loss": 1.1801, "step": 4142 }, { "epoch": 0.5865364196220004, "grad_norm": 8.116792729813298, "learning_rate": 4.1448961468905706e-06, "loss": 1.2811, "step": 4143 }, { "epoch": 0.5866779924966377, "grad_norm": 7.484573775739817, "learning_rate": 4.1444644532387485e-06, "loss": 1.2223, "step": 4144 }, { "epoch": 0.5868195653712749, "grad_norm": 7.521497054325849, "learning_rate": 4.1440326731386575e-06, "loss": 1.2205, "step": 4145 }, { "epoch": 0.5869611382459121, "grad_norm": 9.35156576336618, "learning_rate": 4.143600806612993e-06, "loss": 1.2512, "step": 4146 }, { "epoch": 0.5871027111205493, "grad_norm": 12.910634919265384, "learning_rate": 4.143168853684461e-06, "loss": 1.428, "step": 4147 }, { "epoch": 0.5872442839951865, "grad_norm": 8.992089079609087, "learning_rate": 4.142736814375768e-06, "loss": 1.3322, "step": 4148 }, { "epoch": 0.5873858568698237, "grad_norm": 7.200080807550281, "learning_rate": 4.142304688709624e-06, "loss": 1.0917, "step": 4149 }, { "epoch": 0.5875274297444609, "grad_norm": 8.400130643282802, "learning_rate": 4.141872476708748e-06, "loss": 1.2513, "step": 4150 }, { "epoch": 0.5876690026190982, "grad_norm": 7.3763599758227585, "learning_rate": 4.1414401783958605e-06, "loss": 1.3233, "step": 4151 }, { "epoch": 0.5878105754937354, "grad_norm": 7.93254475347408, "learning_rate": 4.141007793793686e-06, "loss": 1.3307, "step": 4152 }, { "epoch": 0.5879521483683726, "grad_norm": 8.321346766703016, "learning_rate": 4.140575322924955e-06, "loss": 1.35, "step": 4153 }, { "epoch": 0.5880937212430098, "grad_norm": 9.427070564275422, "learning_rate": 4.140142765812404e-06, "loss": 1.2608, "step": 4154 }, { "epoch": 0.5882352941176471, "grad_norm": 7.743133210009949, "learning_rate": 4.13971012247877e-06, "loss": 1.2402, "step": 4155 }, { "epoch": 0.5883768669922843, "grad_norm": 8.820637726510695, "learning_rate": 4.139277392946797e-06, "loss": 1.4348, "step": 4156 }, { "epoch": 0.5885184398669215, "grad_norm": 6.945427841069822, "learning_rate": 4.138844577239234e-06, "loss": 1.2472, "step": 4157 }, { "epoch": 0.5886600127415588, "grad_norm": 9.800819004635331, "learning_rate": 4.138411675378833e-06, "loss": 1.3971, "step": 4158 }, { "epoch": 0.5888015856161959, "grad_norm": 9.414456763657963, "learning_rate": 4.137978687388352e-06, "loss": 1.1955, "step": 4159 }, { "epoch": 0.5889431584908331, "grad_norm": 7.976632322478442, "learning_rate": 4.137545613290554e-06, "loss": 1.3104, "step": 4160 }, { "epoch": 0.5890847313654703, "grad_norm": 7.448090266269734, "learning_rate": 4.137112453108203e-06, "loss": 1.3195, "step": 4161 }, { "epoch": 0.5892263042401076, "grad_norm": 9.052823076358859, "learning_rate": 4.136679206864072e-06, "loss": 1.4387, "step": 4162 }, { "epoch": 0.5893678771147448, "grad_norm": 7.524941996100551, "learning_rate": 4.136245874580935e-06, "loss": 1.3678, "step": 4163 }, { "epoch": 0.589509449989382, "grad_norm": 7.463197056405142, "learning_rate": 4.135812456281571e-06, "loss": 1.3537, "step": 4164 }, { "epoch": 0.5896510228640193, "grad_norm": 7.563400845428782, "learning_rate": 4.1353789519887685e-06, "loss": 1.397, "step": 4165 }, { "epoch": 0.5897925957386565, "grad_norm": 9.066798801955226, "learning_rate": 4.134945361725312e-06, "loss": 1.3751, "step": 4166 }, { "epoch": 0.5899341686132937, "grad_norm": 6.957897819854299, "learning_rate": 4.134511685513998e-06, "loss": 1.3364, "step": 4167 }, { "epoch": 0.590075741487931, "grad_norm": 9.610120096240221, "learning_rate": 4.134077923377622e-06, "loss": 1.5167, "step": 4168 }, { "epoch": 0.5902173143625681, "grad_norm": 9.717573186639518, "learning_rate": 4.13364407533899e-06, "loss": 1.4057, "step": 4169 }, { "epoch": 0.5903588872372053, "grad_norm": 8.115303239403964, "learning_rate": 4.133210141420905e-06, "loss": 1.3296, "step": 4170 }, { "epoch": 0.5905004601118425, "grad_norm": 10.199956661955076, "learning_rate": 4.132776121646182e-06, "loss": 1.3797, "step": 4171 }, { "epoch": 0.5906420329864798, "grad_norm": 8.412870501540102, "learning_rate": 4.132342016037635e-06, "loss": 1.2732, "step": 4172 }, { "epoch": 0.590783605861117, "grad_norm": 9.307024478901885, "learning_rate": 4.131907824618086e-06, "loss": 1.1531, "step": 4173 }, { "epoch": 0.5909251787357542, "grad_norm": 9.919377530780071, "learning_rate": 4.131473547410359e-06, "loss": 1.3308, "step": 4174 }, { "epoch": 0.5910667516103915, "grad_norm": 9.214601572564778, "learning_rate": 4.131039184437283e-06, "loss": 1.4607, "step": 4175 }, { "epoch": 0.5912083244850287, "grad_norm": 10.293931554827278, "learning_rate": 4.130604735721695e-06, "loss": 1.3565, "step": 4176 }, { "epoch": 0.5913498973596659, "grad_norm": 9.854639624011657, "learning_rate": 4.130170201286432e-06, "loss": 1.3207, "step": 4177 }, { "epoch": 0.5914914702343032, "grad_norm": 8.998376487952997, "learning_rate": 4.129735581154336e-06, "loss": 1.2457, "step": 4178 }, { "epoch": 0.5916330431089404, "grad_norm": 10.279192408444557, "learning_rate": 4.129300875348255e-06, "loss": 1.2339, "step": 4179 }, { "epoch": 0.5917746159835775, "grad_norm": 8.394476718645588, "learning_rate": 4.128866083891043e-06, "loss": 1.4164, "step": 4180 }, { "epoch": 0.5919161888582147, "grad_norm": 10.717050133978974, "learning_rate": 4.128431206805556e-06, "loss": 1.1917, "step": 4181 }, { "epoch": 0.592057761732852, "grad_norm": 8.370711865706038, "learning_rate": 4.127996244114654e-06, "loss": 1.1718, "step": 4182 }, { "epoch": 0.5921993346074892, "grad_norm": 8.82795647654415, "learning_rate": 4.127561195841203e-06, "loss": 1.2888, "step": 4183 }, { "epoch": 0.5923409074821264, "grad_norm": 8.620091022621585, "learning_rate": 4.1271260620080745e-06, "loss": 1.3475, "step": 4184 }, { "epoch": 0.5924824803567637, "grad_norm": 8.80815677616248, "learning_rate": 4.126690842638141e-06, "loss": 1.2851, "step": 4185 }, { "epoch": 0.5926240532314009, "grad_norm": 8.839685375431701, "learning_rate": 4.1262555377542834e-06, "loss": 1.1419, "step": 4186 }, { "epoch": 0.5927656261060381, "grad_norm": 8.113681362756838, "learning_rate": 4.125820147379384e-06, "loss": 1.2025, "step": 4187 }, { "epoch": 0.5929071989806753, "grad_norm": 10.177735686828258, "learning_rate": 4.125384671536333e-06, "loss": 1.459, "step": 4188 }, { "epoch": 0.5930487718553126, "grad_norm": 9.713183019867452, "learning_rate": 4.124949110248021e-06, "loss": 1.3714, "step": 4189 }, { "epoch": 0.5931903447299497, "grad_norm": 7.59974540484784, "learning_rate": 4.124513463537346e-06, "loss": 1.2469, "step": 4190 }, { "epoch": 0.5933319176045869, "grad_norm": 9.409538834690197, "learning_rate": 4.124077731427209e-06, "loss": 1.335, "step": 4191 }, { "epoch": 0.5934734904792242, "grad_norm": 8.758904422200649, "learning_rate": 4.123641913940518e-06, "loss": 1.4199, "step": 4192 }, { "epoch": 0.5936150633538614, "grad_norm": 10.996955797137643, "learning_rate": 4.123206011100182e-06, "loss": 1.409, "step": 4193 }, { "epoch": 0.5937566362284986, "grad_norm": 9.221507549042661, "learning_rate": 4.122770022929114e-06, "loss": 1.3449, "step": 4194 }, { "epoch": 0.5938982091031358, "grad_norm": 10.022329196326133, "learning_rate": 4.1223339494502375e-06, "loss": 1.2695, "step": 4195 }, { "epoch": 0.5940397819777731, "grad_norm": 8.257001737783147, "learning_rate": 4.1218977906864754e-06, "loss": 1.1852, "step": 4196 }, { "epoch": 0.5941813548524103, "grad_norm": 9.57662897421455, "learning_rate": 4.121461546660756e-06, "loss": 1.3706, "step": 4197 }, { "epoch": 0.5943229277270475, "grad_norm": 10.405780037013608, "learning_rate": 4.121025217396011e-06, "loss": 1.3716, "step": 4198 }, { "epoch": 0.5944645006016848, "grad_norm": 11.104956583885436, "learning_rate": 4.12058880291518e-06, "loss": 1.3602, "step": 4199 }, { "epoch": 0.5946060734763219, "grad_norm": 9.207577313303904, "learning_rate": 4.120152303241203e-06, "loss": 1.3722, "step": 4200 }, { "epoch": 0.5947476463509591, "grad_norm": 6.501513378321704, "learning_rate": 4.119715718397028e-06, "loss": 1.2333, "step": 4201 }, { "epoch": 0.5948892192255963, "grad_norm": 9.852741506695175, "learning_rate": 4.119279048405606e-06, "loss": 1.251, "step": 4202 }, { "epoch": 0.5950307921002336, "grad_norm": 8.915282905612663, "learning_rate": 4.1188422932898905e-06, "loss": 1.3836, "step": 4203 }, { "epoch": 0.5951723649748708, "grad_norm": 9.124308520728862, "learning_rate": 4.1184054530728436e-06, "loss": 1.3777, "step": 4204 }, { "epoch": 0.595313937849508, "grad_norm": 10.068472469963751, "learning_rate": 4.117968527777428e-06, "loss": 1.3827, "step": 4205 }, { "epoch": 0.5954555107241453, "grad_norm": 9.045062527585406, "learning_rate": 4.117531517426614e-06, "loss": 1.2726, "step": 4206 }, { "epoch": 0.5955970835987825, "grad_norm": 8.24668262715544, "learning_rate": 4.117094422043374e-06, "loss": 1.3372, "step": 4207 }, { "epoch": 0.5957386564734197, "grad_norm": 8.056614346548052, "learning_rate": 4.116657241650687e-06, "loss": 1.3905, "step": 4208 }, { "epoch": 0.595880229348057, "grad_norm": 8.549830509758978, "learning_rate": 4.116219976271533e-06, "loss": 1.3367, "step": 4209 }, { "epoch": 0.5960218022226942, "grad_norm": 11.800541390106192, "learning_rate": 4.1157826259289e-06, "loss": 1.3447, "step": 4210 }, { "epoch": 0.5961633750973313, "grad_norm": 8.434524576642383, "learning_rate": 4.115345190645779e-06, "loss": 1.3059, "step": 4211 }, { "epoch": 0.5963049479719685, "grad_norm": 8.272085048658898, "learning_rate": 4.114907670445166e-06, "loss": 1.537, "step": 4212 }, { "epoch": 0.5964465208466058, "grad_norm": 9.336312590529836, "learning_rate": 4.114470065350061e-06, "loss": 1.3672, "step": 4213 }, { "epoch": 0.596588093721243, "grad_norm": 9.214118026610077, "learning_rate": 4.114032375383469e-06, "loss": 1.295, "step": 4214 }, { "epoch": 0.5967296665958802, "grad_norm": 9.80538585349092, "learning_rate": 4.113594600568398e-06, "loss": 1.1247, "step": 4215 }, { "epoch": 0.5968712394705175, "grad_norm": 9.829609526817789, "learning_rate": 4.113156740927862e-06, "loss": 1.2777, "step": 4216 }, { "epoch": 0.5970128123451547, "grad_norm": 7.534249495889328, "learning_rate": 4.1127187964848785e-06, "loss": 1.1646, "step": 4217 }, { "epoch": 0.5971543852197919, "grad_norm": 10.20447086261397, "learning_rate": 4.112280767262471e-06, "loss": 1.1244, "step": 4218 }, { "epoch": 0.5972959580944291, "grad_norm": 9.114117547622206, "learning_rate": 4.111842653283665e-06, "loss": 1.3489, "step": 4219 }, { "epoch": 0.5974375309690664, "grad_norm": 10.14670111504554, "learning_rate": 4.1114044545714935e-06, "loss": 1.1647, "step": 4220 }, { "epoch": 0.5975791038437035, "grad_norm": 9.722992096951577, "learning_rate": 4.110966171148991e-06, "loss": 1.2979, "step": 4221 }, { "epoch": 0.5977206767183407, "grad_norm": 10.70686415354381, "learning_rate": 4.110527803039198e-06, "loss": 1.3059, "step": 4222 }, { "epoch": 0.597862249592978, "grad_norm": 10.674833588564962, "learning_rate": 4.11008935026516e-06, "loss": 1.4859, "step": 4223 }, { "epoch": 0.5980038224676152, "grad_norm": 10.76235571220698, "learning_rate": 4.109650812849924e-06, "loss": 1.4563, "step": 4224 }, { "epoch": 0.5981453953422524, "grad_norm": 9.715954148443087, "learning_rate": 4.109212190816546e-06, "loss": 1.3545, "step": 4225 }, { "epoch": 0.5982869682168896, "grad_norm": 10.243787045554392, "learning_rate": 4.108773484188082e-06, "loss": 1.1764, "step": 4226 }, { "epoch": 0.5984285410915269, "grad_norm": 10.56306013619718, "learning_rate": 4.1083346929875966e-06, "loss": 1.3388, "step": 4227 }, { "epoch": 0.5985701139661641, "grad_norm": 8.89106478550345, "learning_rate": 4.107895817238155e-06, "loss": 1.306, "step": 4228 }, { "epoch": 0.5987116868408013, "grad_norm": 8.917144429844951, "learning_rate": 4.107456856962829e-06, "loss": 1.2348, "step": 4229 }, { "epoch": 0.5988532597154386, "grad_norm": 9.68269238695269, "learning_rate": 4.107017812184695e-06, "loss": 1.2823, "step": 4230 }, { "epoch": 0.5989948325900757, "grad_norm": 9.62401268589265, "learning_rate": 4.106578682926833e-06, "loss": 1.2783, "step": 4231 }, { "epoch": 0.5991364054647129, "grad_norm": 9.71476325295862, "learning_rate": 4.106139469212327e-06, "loss": 1.3511, "step": 4232 }, { "epoch": 0.5992779783393501, "grad_norm": 7.214090007409789, "learning_rate": 4.105700171064267e-06, "loss": 1.0994, "step": 4233 }, { "epoch": 0.5994195512139874, "grad_norm": 9.486793172086204, "learning_rate": 4.105260788505746e-06, "loss": 1.2135, "step": 4234 }, { "epoch": 0.5995611240886246, "grad_norm": 9.596202226296024, "learning_rate": 4.104821321559863e-06, "loss": 1.2072, "step": 4235 }, { "epoch": 0.5997026969632618, "grad_norm": 10.421154632150293, "learning_rate": 4.1043817702497195e-06, "loss": 1.4463, "step": 4236 }, { "epoch": 0.5998442698378991, "grad_norm": 9.422343430649175, "learning_rate": 4.103942134598422e-06, "loss": 1.2408, "step": 4237 }, { "epoch": 0.5999858427125363, "grad_norm": 9.46151688783821, "learning_rate": 4.103502414629082e-06, "loss": 1.2977, "step": 4238 }, { "epoch": 0.6001274155871735, "grad_norm": 9.539230857569382, "learning_rate": 4.103062610364817e-06, "loss": 1.4523, "step": 4239 }, { "epoch": 0.6002689884618108, "grad_norm": 10.558063004025533, "learning_rate": 4.102622721828746e-06, "loss": 1.3707, "step": 4240 }, { "epoch": 0.600410561336448, "grad_norm": 9.85381778370956, "learning_rate": 4.102182749043993e-06, "loss": 1.262, "step": 4241 }, { "epoch": 0.6005521342110851, "grad_norm": 10.878000481256418, "learning_rate": 4.101742692033687e-06, "loss": 1.2124, "step": 4242 }, { "epoch": 0.6006937070857223, "grad_norm": 10.844341690674522, "learning_rate": 4.101302550820962e-06, "loss": 1.3568, "step": 4243 }, { "epoch": 0.6008352799603596, "grad_norm": 8.609963841733427, "learning_rate": 4.100862325428957e-06, "loss": 1.2617, "step": 4244 }, { "epoch": 0.6009768528349968, "grad_norm": 8.546043453178394, "learning_rate": 4.1004220158808114e-06, "loss": 1.2455, "step": 4245 }, { "epoch": 0.601118425709634, "grad_norm": 9.939477495855856, "learning_rate": 4.0999816221996755e-06, "loss": 1.2697, "step": 4246 }, { "epoch": 0.6012599985842713, "grad_norm": 10.979689487312152, "learning_rate": 4.099541144408698e-06, "loss": 1.1973, "step": 4247 }, { "epoch": 0.6014015714589085, "grad_norm": 9.62615127613904, "learning_rate": 4.099100582531035e-06, "loss": 1.3901, "step": 4248 }, { "epoch": 0.6015431443335457, "grad_norm": 8.984460342665317, "learning_rate": 4.098659936589847e-06, "loss": 1.4636, "step": 4249 }, { "epoch": 0.601684717208183, "grad_norm": 8.417668298727525, "learning_rate": 4.098219206608298e-06, "loss": 1.2802, "step": 4250 }, { "epoch": 0.6018262900828202, "grad_norm": 9.138177840821893, "learning_rate": 4.0977783926095575e-06, "loss": 1.1306, "step": 4251 }, { "epoch": 0.6019678629574573, "grad_norm": 11.411769587028205, "learning_rate": 4.097337494616798e-06, "loss": 1.2857, "step": 4252 }, { "epoch": 0.6021094358320945, "grad_norm": 7.890900801569177, "learning_rate": 4.096896512653197e-06, "loss": 1.1621, "step": 4253 }, { "epoch": 0.6022510087067318, "grad_norm": 10.981042652202577, "learning_rate": 4.096455446741937e-06, "loss": 1.2874, "step": 4254 }, { "epoch": 0.602392581581369, "grad_norm": 9.548491545574015, "learning_rate": 4.096014296906205e-06, "loss": 1.1753, "step": 4255 }, { "epoch": 0.6025341544560062, "grad_norm": 11.598824513826349, "learning_rate": 4.095573063169191e-06, "loss": 1.1754, "step": 4256 }, { "epoch": 0.6026757273306435, "grad_norm": 9.9142802810497, "learning_rate": 4.095131745554092e-06, "loss": 1.2718, "step": 4257 }, { "epoch": 0.6028173002052807, "grad_norm": 10.000480640304714, "learning_rate": 4.094690344084105e-06, "loss": 1.3403, "step": 4258 }, { "epoch": 0.6029588730799179, "grad_norm": 10.037900817393258, "learning_rate": 4.094248858782436e-06, "loss": 1.158, "step": 4259 }, { "epoch": 0.6031004459545551, "grad_norm": 10.100806447683011, "learning_rate": 4.093807289672294e-06, "loss": 1.31, "step": 4260 }, { "epoch": 0.6032420188291924, "grad_norm": 9.691503079171659, "learning_rate": 4.09336563677689e-06, "loss": 1.3099, "step": 4261 }, { "epoch": 0.6033835917038295, "grad_norm": 9.808028253063437, "learning_rate": 4.092923900119443e-06, "loss": 1.1953, "step": 4262 }, { "epoch": 0.6035251645784667, "grad_norm": 9.744472795438199, "learning_rate": 4.092482079723175e-06, "loss": 1.3315, "step": 4263 }, { "epoch": 0.603666737453104, "grad_norm": 8.99530712271313, "learning_rate": 4.09204017561131e-06, "loss": 1.242, "step": 4264 }, { "epoch": 0.6038083103277412, "grad_norm": 12.424569160467584, "learning_rate": 4.091598187807082e-06, "loss": 1.2044, "step": 4265 }, { "epoch": 0.6039498832023784, "grad_norm": 12.105131564963601, "learning_rate": 4.091156116333723e-06, "loss": 1.2879, "step": 4266 }, { "epoch": 0.6040914560770156, "grad_norm": 7.171434309972857, "learning_rate": 4.090713961214473e-06, "loss": 1.1638, "step": 4267 }, { "epoch": 0.6042330289516529, "grad_norm": 8.370008205104439, "learning_rate": 4.090271722472577e-06, "loss": 1.2424, "step": 4268 }, { "epoch": 0.6043746018262901, "grad_norm": 10.844627322894086, "learning_rate": 4.089829400131282e-06, "loss": 1.1316, "step": 4269 }, { "epoch": 0.6045161747009273, "grad_norm": 9.781013266529165, "learning_rate": 4.0893869942138405e-06, "loss": 1.2994, "step": 4270 }, { "epoch": 0.6046577475755646, "grad_norm": 8.716459239974045, "learning_rate": 4.08894450474351e-06, "loss": 1.2168, "step": 4271 }, { "epoch": 0.6047993204502018, "grad_norm": 9.759488697104283, "learning_rate": 4.088501931743551e-06, "loss": 1.311, "step": 4272 }, { "epoch": 0.6049408933248389, "grad_norm": 9.158968056599848, "learning_rate": 4.0880592752372315e-06, "loss": 1.2884, "step": 4273 }, { "epoch": 0.6050824661994761, "grad_norm": 11.083603207928403, "learning_rate": 4.087616535247819e-06, "loss": 1.4595, "step": 4274 }, { "epoch": 0.6052240390741134, "grad_norm": 8.023514997039317, "learning_rate": 4.087173711798589e-06, "loss": 1.222, "step": 4275 }, { "epoch": 0.6053656119487506, "grad_norm": 9.005092028074007, "learning_rate": 4.086730804912821e-06, "loss": 1.3289, "step": 4276 }, { "epoch": 0.6055071848233878, "grad_norm": 9.634798685137381, "learning_rate": 4.086287814613797e-06, "loss": 1.3519, "step": 4277 }, { "epoch": 0.6056487576980251, "grad_norm": 9.78098050559753, "learning_rate": 4.085844740924805e-06, "loss": 1.3494, "step": 4278 }, { "epoch": 0.6057903305726623, "grad_norm": 9.153631571527534, "learning_rate": 4.085401583869138e-06, "loss": 1.3494, "step": 4279 }, { "epoch": 0.6059319034472995, "grad_norm": 8.53490104381045, "learning_rate": 4.0849583434700915e-06, "loss": 1.2609, "step": 4280 }, { "epoch": 0.6060734763219368, "grad_norm": 12.055369112494091, "learning_rate": 4.0845150197509675e-06, "loss": 1.3751, "step": 4281 }, { "epoch": 0.606215049196574, "grad_norm": 7.8527823795820435, "learning_rate": 4.08407161273507e-06, "loss": 1.2882, "step": 4282 }, { "epoch": 0.6063566220712111, "grad_norm": 9.31534527829262, "learning_rate": 4.083628122445708e-06, "loss": 1.1896, "step": 4283 }, { "epoch": 0.6064981949458483, "grad_norm": 11.455248460210553, "learning_rate": 4.083184548906198e-06, "loss": 1.2071, "step": 4284 }, { "epoch": 0.6066397678204856, "grad_norm": 8.307179060185732, "learning_rate": 4.082740892139856e-06, "loss": 1.2738, "step": 4285 }, { "epoch": 0.6067813406951228, "grad_norm": 9.623605205984113, "learning_rate": 4.082297152170005e-06, "loss": 1.2996, "step": 4286 }, { "epoch": 0.60692291356976, "grad_norm": 10.917704603244719, "learning_rate": 4.081853329019973e-06, "loss": 1.4357, "step": 4287 }, { "epoch": 0.6070644864443973, "grad_norm": 9.55334872506981, "learning_rate": 4.081409422713091e-06, "loss": 1.4415, "step": 4288 }, { "epoch": 0.6072060593190345, "grad_norm": 9.294485807760646, "learning_rate": 4.080965433272695e-06, "loss": 1.1785, "step": 4289 }, { "epoch": 0.6073476321936717, "grad_norm": 8.309806215159057, "learning_rate": 4.080521360722124e-06, "loss": 1.1485, "step": 4290 }, { "epoch": 0.607489205068309, "grad_norm": 9.302460654630957, "learning_rate": 4.080077205084725e-06, "loss": 1.2852, "step": 4291 }, { "epoch": 0.6076307779429462, "grad_norm": 9.344085993514787, "learning_rate": 4.079632966383845e-06, "loss": 1.2185, "step": 4292 }, { "epoch": 0.6077723508175833, "grad_norm": 8.94647491765044, "learning_rate": 4.079188644642838e-06, "loss": 1.2808, "step": 4293 }, { "epoch": 0.6079139236922205, "grad_norm": 10.361379852309616, "learning_rate": 4.07874423988506e-06, "loss": 1.1069, "step": 4294 }, { "epoch": 0.6080554965668578, "grad_norm": 10.021622741017607, "learning_rate": 4.078299752133876e-06, "loss": 1.3216, "step": 4295 }, { "epoch": 0.608197069441495, "grad_norm": 11.272311994448845, "learning_rate": 4.07785518141265e-06, "loss": 1.5121, "step": 4296 }, { "epoch": 0.6083386423161322, "grad_norm": 9.02219155697343, "learning_rate": 4.077410527744754e-06, "loss": 1.3684, "step": 4297 }, { "epoch": 0.6084802151907694, "grad_norm": 10.189397155479309, "learning_rate": 4.076965791153562e-06, "loss": 1.4353, "step": 4298 }, { "epoch": 0.6086217880654067, "grad_norm": 13.017259730961824, "learning_rate": 4.076520971662456e-06, "loss": 1.314, "step": 4299 }, { "epoch": 0.6087633609400439, "grad_norm": 9.891247481152135, "learning_rate": 4.076076069294817e-06, "loss": 1.3424, "step": 4300 }, { "epoch": 0.6089049338146811, "grad_norm": 11.572815288897404, "learning_rate": 4.075631084074033e-06, "loss": 1.2986, "step": 4301 }, { "epoch": 0.6090465066893184, "grad_norm": 8.77712687830095, "learning_rate": 4.075186016023499e-06, "loss": 1.3688, "step": 4302 }, { "epoch": 0.6091880795639556, "grad_norm": 8.600504270451511, "learning_rate": 4.074740865166611e-06, "loss": 1.3807, "step": 4303 }, { "epoch": 0.6093296524385927, "grad_norm": 10.845304086448616, "learning_rate": 4.074295631526769e-06, "loss": 1.3597, "step": 4304 }, { "epoch": 0.60947122531323, "grad_norm": 11.430640660017158, "learning_rate": 4.07385031512738e-06, "loss": 1.2049, "step": 4305 }, { "epoch": 0.6096127981878672, "grad_norm": 8.998097960499113, "learning_rate": 4.0734049159918535e-06, "loss": 1.3524, "step": 4306 }, { "epoch": 0.6097543710625044, "grad_norm": 10.099125331358954, "learning_rate": 4.072959434143603e-06, "loss": 1.3238, "step": 4307 }, { "epoch": 0.6098959439371416, "grad_norm": 8.104859257958067, "learning_rate": 4.0725138696060485e-06, "loss": 1.2972, "step": 4308 }, { "epoch": 0.6100375168117789, "grad_norm": 9.414390311330948, "learning_rate": 4.072068222402612e-06, "loss": 1.4536, "step": 4309 }, { "epoch": 0.6101790896864161, "grad_norm": 9.167110732911059, "learning_rate": 4.0716224925567225e-06, "loss": 1.2934, "step": 4310 }, { "epoch": 0.6103206625610533, "grad_norm": 11.20647198467304, "learning_rate": 4.071176680091809e-06, "loss": 1.4046, "step": 4311 }, { "epoch": 0.6104622354356906, "grad_norm": 10.38236499106139, "learning_rate": 4.07073078503131e-06, "loss": 1.1812, "step": 4312 }, { "epoch": 0.6106038083103278, "grad_norm": 8.389854807819326, "learning_rate": 4.070284807398664e-06, "loss": 1.2544, "step": 4313 }, { "epoch": 0.6107453811849649, "grad_norm": 8.94157476837156, "learning_rate": 4.069838747217317e-06, "loss": 1.3234, "step": 4314 }, { "epoch": 0.6108869540596021, "grad_norm": 9.157473537730715, "learning_rate": 4.069392604510717e-06, "loss": 1.2179, "step": 4315 }, { "epoch": 0.6110285269342394, "grad_norm": 12.845059738762712, "learning_rate": 4.068946379302318e-06, "loss": 1.4357, "step": 4316 }, { "epoch": 0.6111700998088766, "grad_norm": 10.336433519844078, "learning_rate": 4.068500071615578e-06, "loss": 1.3255, "step": 4317 }, { "epoch": 0.6113116726835138, "grad_norm": 8.36549817724073, "learning_rate": 4.068053681473959e-06, "loss": 1.276, "step": 4318 }, { "epoch": 0.6114532455581511, "grad_norm": 8.726178685885811, "learning_rate": 4.067607208900927e-06, "loss": 1.3458, "step": 4319 }, { "epoch": 0.6115948184327883, "grad_norm": 9.763780685218354, "learning_rate": 4.067160653919952e-06, "loss": 1.2625, "step": 4320 }, { "epoch": 0.6117363913074255, "grad_norm": 12.194023694711582, "learning_rate": 4.066714016554511e-06, "loss": 1.3425, "step": 4321 }, { "epoch": 0.6118779641820628, "grad_norm": 13.066840075944624, "learning_rate": 4.066267296828083e-06, "loss": 1.3261, "step": 4322 }, { "epoch": 0.6120195370567, "grad_norm": 8.86730795623029, "learning_rate": 4.06582049476415e-06, "loss": 1.3282, "step": 4323 }, { "epoch": 0.6121611099313372, "grad_norm": 10.5959848392849, "learning_rate": 4.065373610386201e-06, "loss": 1.2778, "step": 4324 }, { "epoch": 0.6123026828059743, "grad_norm": 10.289744291548223, "learning_rate": 4.064926643717729e-06, "loss": 1.404, "step": 4325 }, { "epoch": 0.6124442556806116, "grad_norm": 11.386765651704435, "learning_rate": 4.06447959478223e-06, "loss": 1.3306, "step": 4326 }, { "epoch": 0.6125858285552488, "grad_norm": 9.620277732599186, "learning_rate": 4.0640324636032044e-06, "loss": 1.3497, "step": 4327 }, { "epoch": 0.612727401429886, "grad_norm": 10.244025000836997, "learning_rate": 4.0635852502041595e-06, "loss": 1.3035, "step": 4328 }, { "epoch": 0.6128689743045233, "grad_norm": 10.280581374297597, "learning_rate": 4.0631379546086045e-06, "loss": 1.2415, "step": 4329 }, { "epoch": 0.6130105471791605, "grad_norm": 9.4787387849142, "learning_rate": 4.0626905768400515e-06, "loss": 1.2434, "step": 4330 }, { "epoch": 0.6131521200537977, "grad_norm": 9.311914207928481, "learning_rate": 4.06224311692202e-06, "loss": 1.1213, "step": 4331 }, { "epoch": 0.6132936929284349, "grad_norm": 7.873500620684918, "learning_rate": 4.0617955748780336e-06, "loss": 1.1822, "step": 4332 }, { "epoch": 0.6134352658030722, "grad_norm": 7.421714893421096, "learning_rate": 4.061347950731617e-06, "loss": 1.2243, "step": 4333 }, { "epoch": 0.6135768386777094, "grad_norm": 11.878649341598734, "learning_rate": 4.060900244506304e-06, "loss": 1.2123, "step": 4334 }, { "epoch": 0.6137184115523465, "grad_norm": 8.516117746638393, "learning_rate": 4.060452456225629e-06, "loss": 1.253, "step": 4335 }, { "epoch": 0.6138599844269838, "grad_norm": 9.725951835122416, "learning_rate": 4.060004585913131e-06, "loss": 1.291, "step": 4336 }, { "epoch": 0.614001557301621, "grad_norm": 8.311586258079037, "learning_rate": 4.059556633592356e-06, "loss": 1.1378, "step": 4337 }, { "epoch": 0.6141431301762582, "grad_norm": 7.500441728935063, "learning_rate": 4.0591085992868504e-06, "loss": 1.2248, "step": 4338 }, { "epoch": 0.6142847030508954, "grad_norm": 9.221870333942821, "learning_rate": 4.05866048302017e-06, "loss": 1.2561, "step": 4339 }, { "epoch": 0.6144262759255327, "grad_norm": 9.140482270520266, "learning_rate": 4.058212284815869e-06, "loss": 1.29, "step": 4340 }, { "epoch": 0.6145678488001699, "grad_norm": 9.589586777361518, "learning_rate": 4.057764004697511e-06, "loss": 1.2393, "step": 4341 }, { "epoch": 0.6147094216748071, "grad_norm": 9.207989532358697, "learning_rate": 4.05731564268866e-06, "loss": 1.3892, "step": 4342 }, { "epoch": 0.6148509945494444, "grad_norm": 9.763891252348897, "learning_rate": 4.056867198812886e-06, "loss": 1.4448, "step": 4343 }, { "epoch": 0.6149925674240816, "grad_norm": 11.942147992787588, "learning_rate": 4.056418673093766e-06, "loss": 1.3005, "step": 4344 }, { "epoch": 0.6151341402987187, "grad_norm": 9.049141555452925, "learning_rate": 4.055970065554876e-06, "loss": 1.3959, "step": 4345 }, { "epoch": 0.6152757131733559, "grad_norm": 8.473453130748076, "learning_rate": 4.0555213762198e-06, "loss": 1.3395, "step": 4346 }, { "epoch": 0.6154172860479932, "grad_norm": 9.13131954889881, "learning_rate": 4.055072605112125e-06, "loss": 1.2652, "step": 4347 }, { "epoch": 0.6155588589226304, "grad_norm": 11.622568183776055, "learning_rate": 4.054623752255443e-06, "loss": 1.2696, "step": 4348 }, { "epoch": 0.6157004317972676, "grad_norm": 8.86277810589854, "learning_rate": 4.0541748176733485e-06, "loss": 1.1152, "step": 4349 }, { "epoch": 0.6158420046719049, "grad_norm": 10.210629284153743, "learning_rate": 4.0537258013894434e-06, "loss": 1.2609, "step": 4350 }, { "epoch": 0.6159835775465421, "grad_norm": 10.16110686152681, "learning_rate": 4.053276703427332e-06, "loss": 1.3304, "step": 4351 }, { "epoch": 0.6161251504211793, "grad_norm": 7.342904029980636, "learning_rate": 4.052827523810622e-06, "loss": 1.2114, "step": 4352 }, { "epoch": 0.6162667232958166, "grad_norm": 8.93247022682009, "learning_rate": 4.052378262562926e-06, "loss": 1.1988, "step": 4353 }, { "epoch": 0.6164082961704538, "grad_norm": 7.926688453039072, "learning_rate": 4.051928919707863e-06, "loss": 1.2389, "step": 4354 }, { "epoch": 0.616549869045091, "grad_norm": 9.35602245882298, "learning_rate": 4.051479495269054e-06, "loss": 1.2066, "step": 4355 }, { "epoch": 0.6166914419197281, "grad_norm": 9.588768078674214, "learning_rate": 4.051029989270125e-06, "loss": 1.2625, "step": 4356 }, { "epoch": 0.6168330147943654, "grad_norm": 8.102382695644515, "learning_rate": 4.0505804017347055e-06, "loss": 1.1829, "step": 4357 }, { "epoch": 0.6169745876690026, "grad_norm": 8.763865357166047, "learning_rate": 4.05013073268643e-06, "loss": 1.2749, "step": 4358 }, { "epoch": 0.6171161605436398, "grad_norm": 9.376656754650416, "learning_rate": 4.049680982148938e-06, "loss": 1.2917, "step": 4359 }, { "epoch": 0.617257733418277, "grad_norm": 8.891846906365966, "learning_rate": 4.049231150145873e-06, "loss": 1.2704, "step": 4360 }, { "epoch": 0.6173993062929143, "grad_norm": 9.9846481739576, "learning_rate": 4.048781236700882e-06, "loss": 1.2862, "step": 4361 }, { "epoch": 0.6175408791675515, "grad_norm": 10.69322577684809, "learning_rate": 4.048331241837615e-06, "loss": 1.149, "step": 4362 }, { "epoch": 0.6176824520421887, "grad_norm": 8.190978730796422, "learning_rate": 4.047881165579729e-06, "loss": 1.2451, "step": 4363 }, { "epoch": 0.617824024916826, "grad_norm": 8.909295668929516, "learning_rate": 4.047431007950885e-06, "loss": 1.3481, "step": 4364 }, { "epoch": 0.6179655977914632, "grad_norm": 8.943529775961817, "learning_rate": 4.046980768974746e-06, "loss": 1.3652, "step": 4365 }, { "epoch": 0.6181071706661003, "grad_norm": 9.302819462462542, "learning_rate": 4.046530448674982e-06, "loss": 1.1403, "step": 4366 }, { "epoch": 0.6182487435407376, "grad_norm": 8.630988212222935, "learning_rate": 4.046080047075265e-06, "loss": 1.2074, "step": 4367 }, { "epoch": 0.6183903164153748, "grad_norm": 8.552134679035367, "learning_rate": 4.045629564199274e-06, "loss": 1.1836, "step": 4368 }, { "epoch": 0.618531889290012, "grad_norm": 10.569827455611858, "learning_rate": 4.045179000070688e-06, "loss": 1.3122, "step": 4369 }, { "epoch": 0.6186734621646492, "grad_norm": 11.181030448689926, "learning_rate": 4.044728354713195e-06, "loss": 1.3547, "step": 4370 }, { "epoch": 0.6188150350392865, "grad_norm": 8.524274832123506, "learning_rate": 4.044277628150482e-06, "loss": 1.3675, "step": 4371 }, { "epoch": 0.6189566079139237, "grad_norm": 9.082983821012423, "learning_rate": 4.0438268204062485e-06, "loss": 1.333, "step": 4372 }, { "epoch": 0.6190981807885609, "grad_norm": 8.800754809519757, "learning_rate": 4.043375931504189e-06, "loss": 1.255, "step": 4373 }, { "epoch": 0.6192397536631982, "grad_norm": 8.375915932744451, "learning_rate": 4.042924961468007e-06, "loss": 1.2986, "step": 4374 }, { "epoch": 0.6193813265378354, "grad_norm": 9.08539336289815, "learning_rate": 4.04247391032141e-06, "loss": 1.1271, "step": 4375 }, { "epoch": 0.6195228994124725, "grad_norm": 9.768504653553885, "learning_rate": 4.042022778088111e-06, "loss": 1.2372, "step": 4376 }, { "epoch": 0.6196644722871097, "grad_norm": 7.838719430388462, "learning_rate": 4.0415715647918235e-06, "loss": 1.1449, "step": 4377 }, { "epoch": 0.619806045161747, "grad_norm": 8.621618520220153, "learning_rate": 4.041120270456268e-06, "loss": 1.3527, "step": 4378 }, { "epoch": 0.6199476180363842, "grad_norm": 8.911129334331513, "learning_rate": 4.04066889510517e-06, "loss": 1.2397, "step": 4379 }, { "epoch": 0.6200891909110214, "grad_norm": 10.775846769335178, "learning_rate": 4.040217438762256e-06, "loss": 1.4797, "step": 4380 }, { "epoch": 0.6202307637856587, "grad_norm": 9.671456914933854, "learning_rate": 4.03976590145126e-06, "loss": 1.1854, "step": 4381 }, { "epoch": 0.6203723366602959, "grad_norm": 9.402945004404703, "learning_rate": 4.0393142831959186e-06, "loss": 1.3047, "step": 4382 }, { "epoch": 0.6205139095349331, "grad_norm": 8.974202801250302, "learning_rate": 4.038862584019973e-06, "loss": 1.2111, "step": 4383 }, { "epoch": 0.6206554824095704, "grad_norm": 10.26057855830139, "learning_rate": 4.038410803947169e-06, "loss": 1.2288, "step": 4384 }, { "epoch": 0.6207970552842076, "grad_norm": 11.59381712202605, "learning_rate": 4.037958943001257e-06, "loss": 1.3663, "step": 4385 }, { "epoch": 0.6209386281588448, "grad_norm": 8.342693819212881, "learning_rate": 4.0375070012059884e-06, "loss": 1.2905, "step": 4386 }, { "epoch": 0.6210802010334819, "grad_norm": 10.36827880818875, "learning_rate": 4.037054978585124e-06, "loss": 1.4141, "step": 4387 }, { "epoch": 0.6212217739081192, "grad_norm": 9.43481770980686, "learning_rate": 4.036602875162426e-06, "loss": 1.2055, "step": 4388 }, { "epoch": 0.6213633467827564, "grad_norm": 8.82685840263986, "learning_rate": 4.03615069096166e-06, "loss": 1.266, "step": 4389 }, { "epoch": 0.6215049196573936, "grad_norm": 9.695654408883595, "learning_rate": 4.035698426006597e-06, "loss": 1.1519, "step": 4390 }, { "epoch": 0.6216464925320309, "grad_norm": 9.887336847283025, "learning_rate": 4.035246080321014e-06, "loss": 1.3082, "step": 4391 }, { "epoch": 0.6217880654066681, "grad_norm": 8.890014842034942, "learning_rate": 4.034793653928688e-06, "loss": 1.3871, "step": 4392 }, { "epoch": 0.6219296382813053, "grad_norm": 10.477784770353304, "learning_rate": 4.034341146853406e-06, "loss": 1.2215, "step": 4393 }, { "epoch": 0.6220712111559425, "grad_norm": 8.452794659095188, "learning_rate": 4.0338885591189515e-06, "loss": 1.2396, "step": 4394 }, { "epoch": 0.6222127840305798, "grad_norm": 9.584911537140272, "learning_rate": 4.033435890749121e-06, "loss": 1.1965, "step": 4395 }, { "epoch": 0.622354356905217, "grad_norm": 9.52252938976845, "learning_rate": 4.032983141767708e-06, "loss": 1.2357, "step": 4396 }, { "epoch": 0.6224959297798541, "grad_norm": 8.172842635857384, "learning_rate": 4.032530312198515e-06, "loss": 1.2753, "step": 4397 }, { "epoch": 0.6226375026544914, "grad_norm": 10.321045819896657, "learning_rate": 4.032077402065346e-06, "loss": 1.4806, "step": 4398 }, { "epoch": 0.6227790755291286, "grad_norm": 8.470497091900022, "learning_rate": 4.03162441139201e-06, "loss": 1.3248, "step": 4399 }, { "epoch": 0.6229206484037658, "grad_norm": 10.099197098950839, "learning_rate": 4.031171340202321e-06, "loss": 1.3563, "step": 4400 }, { "epoch": 0.623062221278403, "grad_norm": 8.549661854978398, "learning_rate": 4.030718188520096e-06, "loss": 1.1736, "step": 4401 }, { "epoch": 0.6232037941530403, "grad_norm": 10.075012767395563, "learning_rate": 4.030264956369158e-06, "loss": 1.4943, "step": 4402 }, { "epoch": 0.6233453670276775, "grad_norm": 11.016665232355791, "learning_rate": 4.029811643773332e-06, "loss": 1.3043, "step": 4403 }, { "epoch": 0.6234869399023147, "grad_norm": 11.725973685042309, "learning_rate": 4.029358250756448e-06, "loss": 1.2226, "step": 4404 }, { "epoch": 0.623628512776952, "grad_norm": 11.92602690747642, "learning_rate": 4.028904777342341e-06, "loss": 1.2731, "step": 4405 }, { "epoch": 0.6237700856515892, "grad_norm": 11.967686060536293, "learning_rate": 4.02845122355485e-06, "loss": 1.4376, "step": 4406 }, { "epoch": 0.6239116585262263, "grad_norm": 10.688182429784016, "learning_rate": 4.027997589417818e-06, "loss": 1.4111, "step": 4407 }, { "epoch": 0.6240532314008636, "grad_norm": 11.467711523785965, "learning_rate": 4.027543874955092e-06, "loss": 1.2651, "step": 4408 }, { "epoch": 0.6241948042755008, "grad_norm": 10.433258839626689, "learning_rate": 4.027090080190524e-06, "loss": 1.2079, "step": 4409 }, { "epoch": 0.624336377150138, "grad_norm": 11.481852268626117, "learning_rate": 4.026636205147969e-06, "loss": 1.4114, "step": 4410 }, { "epoch": 0.6244779500247752, "grad_norm": 11.123649600932957, "learning_rate": 4.026182249851287e-06, "loss": 1.28, "step": 4411 }, { "epoch": 0.6246195228994125, "grad_norm": 10.145505133509248, "learning_rate": 4.025728214324341e-06, "loss": 1.2318, "step": 4412 }, { "epoch": 0.6247610957740497, "grad_norm": 9.781870861884947, "learning_rate": 4.025274098591001e-06, "loss": 1.2932, "step": 4413 }, { "epoch": 0.6249026686486869, "grad_norm": 10.08448692427527, "learning_rate": 4.02481990267514e-06, "loss": 1.2873, "step": 4414 }, { "epoch": 0.6250442415233242, "grad_norm": 7.828986997980695, "learning_rate": 4.024365626600632e-06, "loss": 1.2186, "step": 4415 }, { "epoch": 0.6251858143979614, "grad_norm": 9.27869274731743, "learning_rate": 4.023911270391361e-06, "loss": 1.2698, "step": 4416 }, { "epoch": 0.6253273872725986, "grad_norm": 10.657578729476972, "learning_rate": 4.02345683407121e-06, "loss": 1.3076, "step": 4417 }, { "epoch": 0.6254689601472357, "grad_norm": 11.769588294335705, "learning_rate": 4.02300231766407e-06, "loss": 1.2803, "step": 4418 }, { "epoch": 0.625610533021873, "grad_norm": 10.233733877819535, "learning_rate": 4.022547721193833e-06, "loss": 1.1996, "step": 4419 }, { "epoch": 0.6257521058965102, "grad_norm": 10.409854299091814, "learning_rate": 4.022093044684397e-06, "loss": 1.4227, "step": 4420 }, { "epoch": 0.6258936787711474, "grad_norm": 9.251183099091174, "learning_rate": 4.021638288159666e-06, "loss": 1.2448, "step": 4421 }, { "epoch": 0.6260352516457847, "grad_norm": 9.291910849699835, "learning_rate": 4.021183451643544e-06, "loss": 1.3266, "step": 4422 }, { "epoch": 0.6261768245204219, "grad_norm": 9.08552730090275, "learning_rate": 4.020728535159942e-06, "loss": 1.2715, "step": 4423 }, { "epoch": 0.6263183973950591, "grad_norm": 8.493141548869925, "learning_rate": 4.020273538732775e-06, "loss": 1.3049, "step": 4424 }, { "epoch": 0.6264599702696964, "grad_norm": 10.482063231881783, "learning_rate": 4.019818462385962e-06, "loss": 1.3682, "step": 4425 }, { "epoch": 0.6266015431443336, "grad_norm": 9.497973978938342, "learning_rate": 4.019363306143425e-06, "loss": 1.2477, "step": 4426 }, { "epoch": 0.6267431160189708, "grad_norm": 8.90372300019268, "learning_rate": 4.018908070029093e-06, "loss": 1.2161, "step": 4427 }, { "epoch": 0.6268846888936079, "grad_norm": 9.09937062864253, "learning_rate": 4.018452754066895e-06, "loss": 1.2541, "step": 4428 }, { "epoch": 0.6270262617682452, "grad_norm": 8.838147656828678, "learning_rate": 4.017997358280769e-06, "loss": 1.302, "step": 4429 }, { "epoch": 0.6271678346428824, "grad_norm": 12.753683717430734, "learning_rate": 4.017541882694653e-06, "loss": 1.3656, "step": 4430 }, { "epoch": 0.6273094075175196, "grad_norm": 10.017429802165253, "learning_rate": 4.017086327332492e-06, "loss": 1.2565, "step": 4431 }, { "epoch": 0.6274509803921569, "grad_norm": 11.503099106871156, "learning_rate": 4.0166306922182335e-06, "loss": 1.3535, "step": 4432 }, { "epoch": 0.6275925532667941, "grad_norm": 10.404212729326993, "learning_rate": 4.016174977375831e-06, "loss": 1.4475, "step": 4433 }, { "epoch": 0.6277341261414313, "grad_norm": 7.578385244403827, "learning_rate": 4.01571918282924e-06, "loss": 1.3443, "step": 4434 }, { "epoch": 0.6278756990160685, "grad_norm": 8.271647864012978, "learning_rate": 4.015263308602422e-06, "loss": 1.1755, "step": 4435 }, { "epoch": 0.6280172718907058, "grad_norm": 11.524885369845808, "learning_rate": 4.014807354719342e-06, "loss": 1.4746, "step": 4436 }, { "epoch": 0.628158844765343, "grad_norm": 9.406298260232301, "learning_rate": 4.014351321203969e-06, "loss": 1.4196, "step": 4437 }, { "epoch": 0.6283004176399801, "grad_norm": 9.374548735884028, "learning_rate": 4.013895208080275e-06, "loss": 1.2803, "step": 4438 }, { "epoch": 0.6284419905146174, "grad_norm": 10.284823182273238, "learning_rate": 4.013439015372239e-06, "loss": 1.2783, "step": 4439 }, { "epoch": 0.6285835633892546, "grad_norm": 10.43897753096968, "learning_rate": 4.012982743103844e-06, "loss": 1.3695, "step": 4440 }, { "epoch": 0.6287251362638918, "grad_norm": 8.799163900016607, "learning_rate": 4.012526391299073e-06, "loss": 1.1853, "step": 4441 }, { "epoch": 0.628866709138529, "grad_norm": 8.66641376811064, "learning_rate": 4.012069959981917e-06, "loss": 1.3673, "step": 4442 }, { "epoch": 0.6290082820131663, "grad_norm": 9.568712796787368, "learning_rate": 4.0116134491763716e-06, "loss": 1.3232, "step": 4443 }, { "epoch": 0.6291498548878035, "grad_norm": 9.12903077445023, "learning_rate": 4.0111568589064335e-06, "loss": 1.1527, "step": 4444 }, { "epoch": 0.6292914277624407, "grad_norm": 9.48670591466438, "learning_rate": 4.010700189196106e-06, "loss": 1.2234, "step": 4445 }, { "epoch": 0.629433000637078, "grad_norm": 8.116013942324873, "learning_rate": 4.010243440069397e-06, "loss": 1.247, "step": 4446 }, { "epoch": 0.6295745735117152, "grad_norm": 10.827171294693223, "learning_rate": 4.0097866115503156e-06, "loss": 1.27, "step": 4447 }, { "epoch": 0.6297161463863524, "grad_norm": 8.804276328991843, "learning_rate": 4.009329703662878e-06, "loss": 1.2204, "step": 4448 }, { "epoch": 0.6298577192609895, "grad_norm": 9.27141751509756, "learning_rate": 4.008872716431104e-06, "loss": 1.2983, "step": 4449 }, { "epoch": 0.6299992921356268, "grad_norm": 7.840256357478151, "learning_rate": 4.008415649879015e-06, "loss": 1.2159, "step": 4450 }, { "epoch": 0.630140865010264, "grad_norm": 9.115769407575526, "learning_rate": 4.007958504030641e-06, "loss": 1.2202, "step": 4451 }, { "epoch": 0.6302824378849012, "grad_norm": 9.649540122345412, "learning_rate": 4.007501278910013e-06, "loss": 1.1243, "step": 4452 }, { "epoch": 0.6304240107595385, "grad_norm": 11.260927784506528, "learning_rate": 4.007043974541166e-06, "loss": 1.3887, "step": 4453 }, { "epoch": 0.6305655836341757, "grad_norm": 9.371582629103854, "learning_rate": 4.006586590948141e-06, "loss": 1.0903, "step": 4454 }, { "epoch": 0.6307071565088129, "grad_norm": 9.773959334215307, "learning_rate": 4.006129128154983e-06, "loss": 1.2734, "step": 4455 }, { "epoch": 0.6308487293834502, "grad_norm": 9.232998869676168, "learning_rate": 4.00567158618574e-06, "loss": 1.3845, "step": 4456 }, { "epoch": 0.6309903022580874, "grad_norm": 11.92470771863213, "learning_rate": 4.0052139650644625e-06, "loss": 1.4115, "step": 4457 }, { "epoch": 0.6311318751327246, "grad_norm": 10.621958218658449, "learning_rate": 4.004756264815211e-06, "loss": 1.4166, "step": 4458 }, { "epoch": 0.6312734480073617, "grad_norm": 9.669514554451531, "learning_rate": 4.004298485462044e-06, "loss": 1.4698, "step": 4459 }, { "epoch": 0.631415020881999, "grad_norm": 8.072225220304759, "learning_rate": 4.003840627029028e-06, "loss": 1.2047, "step": 4460 }, { "epoch": 0.6315565937566362, "grad_norm": 9.158387022639156, "learning_rate": 4.00338268954023e-06, "loss": 1.3197, "step": 4461 }, { "epoch": 0.6316981666312734, "grad_norm": 12.285420182975656, "learning_rate": 4.002924673019726e-06, "loss": 1.3133, "step": 4462 }, { "epoch": 0.6318397395059107, "grad_norm": 11.332261502314353, "learning_rate": 4.002466577491593e-06, "loss": 1.4357, "step": 4463 }, { "epoch": 0.6319813123805479, "grad_norm": 8.076309560872891, "learning_rate": 4.002008402979911e-06, "loss": 1.3517, "step": 4464 }, { "epoch": 0.6321228852551851, "grad_norm": 8.863774036680981, "learning_rate": 4.001550149508768e-06, "loss": 1.3846, "step": 4465 }, { "epoch": 0.6322644581298223, "grad_norm": 7.941229957616521, "learning_rate": 4.001091817102253e-06, "loss": 1.367, "step": 4466 }, { "epoch": 0.6324060310044596, "grad_norm": 11.668571607291621, "learning_rate": 4.000633405784461e-06, "loss": 1.2163, "step": 4467 }, { "epoch": 0.6325476038790968, "grad_norm": 10.807793623822427, "learning_rate": 4.000174915579489e-06, "loss": 1.1841, "step": 4468 }, { "epoch": 0.632689176753734, "grad_norm": 8.982912160121842, "learning_rate": 3.999716346511442e-06, "loss": 1.2435, "step": 4469 }, { "epoch": 0.6328307496283712, "grad_norm": 10.236803072541548, "learning_rate": 3.999257698604423e-06, "loss": 1.4105, "step": 4470 }, { "epoch": 0.6329723225030084, "grad_norm": 8.06798278328643, "learning_rate": 3.998798971882545e-06, "loss": 1.2638, "step": 4471 }, { "epoch": 0.6331138953776456, "grad_norm": 7.203514767111955, "learning_rate": 3.998340166369923e-06, "loss": 1.1306, "step": 4472 }, { "epoch": 0.6332554682522828, "grad_norm": 10.393339929617337, "learning_rate": 3.997881282090676e-06, "loss": 1.2977, "step": 4473 }, { "epoch": 0.6333970411269201, "grad_norm": 10.344383566085737, "learning_rate": 3.997422319068926e-06, "loss": 1.3082, "step": 4474 }, { "epoch": 0.6335386140015573, "grad_norm": 7.95229828440498, "learning_rate": 3.996963277328802e-06, "loss": 1.1325, "step": 4475 }, { "epoch": 0.6336801868761945, "grad_norm": 10.863500368609975, "learning_rate": 3.996504156894434e-06, "loss": 1.3121, "step": 4476 }, { "epoch": 0.6338217597508318, "grad_norm": 9.411853831935074, "learning_rate": 3.996044957789959e-06, "loss": 1.2073, "step": 4477 }, { "epoch": 0.633963332625469, "grad_norm": 9.085565508560904, "learning_rate": 3.995585680039515e-06, "loss": 1.4125, "step": 4478 }, { "epoch": 0.6341049055001062, "grad_norm": 9.45590295249336, "learning_rate": 3.995126323667248e-06, "loss": 1.3561, "step": 4479 }, { "epoch": 0.6342464783747433, "grad_norm": 10.933386503260895, "learning_rate": 3.994666888697304e-06, "loss": 1.2764, "step": 4480 }, { "epoch": 0.6343880512493806, "grad_norm": 10.147579306597777, "learning_rate": 3.994207375153836e-06, "loss": 1.3019, "step": 4481 }, { "epoch": 0.6345296241240178, "grad_norm": 9.289153667288112, "learning_rate": 3.993747783061001e-06, "loss": 1.4612, "step": 4482 }, { "epoch": 0.634671196998655, "grad_norm": 10.569251436489454, "learning_rate": 3.99328811244296e-06, "loss": 1.3308, "step": 4483 }, { "epoch": 0.6348127698732923, "grad_norm": 12.556287453473571, "learning_rate": 3.9928283633238755e-06, "loss": 1.3408, "step": 4484 }, { "epoch": 0.6349543427479295, "grad_norm": 8.894221163484776, "learning_rate": 3.992368535727917e-06, "loss": 1.1773, "step": 4485 }, { "epoch": 0.6350959156225667, "grad_norm": 10.943097758395686, "learning_rate": 3.991908629679257e-06, "loss": 1.3305, "step": 4486 }, { "epoch": 0.635237488497204, "grad_norm": 9.905810999243544, "learning_rate": 3.991448645202073e-06, "loss": 1.2113, "step": 4487 }, { "epoch": 0.6353790613718412, "grad_norm": 11.318922717052597, "learning_rate": 3.990988582320546e-06, "loss": 1.2476, "step": 4488 }, { "epoch": 0.6355206342464784, "grad_norm": 10.109773088544346, "learning_rate": 3.990528441058861e-06, "loss": 1.1969, "step": 4489 }, { "epoch": 0.6356622071211155, "grad_norm": 7.2795064909784415, "learning_rate": 3.990068221441207e-06, "loss": 1.2358, "step": 4490 }, { "epoch": 0.6358037799957528, "grad_norm": 8.193110982155556, "learning_rate": 3.989607923491777e-06, "loss": 1.1882, "step": 4491 }, { "epoch": 0.63594535287039, "grad_norm": 9.34835650845536, "learning_rate": 3.98914754723477e-06, "loss": 1.3959, "step": 4492 }, { "epoch": 0.6360869257450272, "grad_norm": 9.646380173498251, "learning_rate": 3.988687092694386e-06, "loss": 1.354, "step": 4493 }, { "epoch": 0.6362284986196645, "grad_norm": 10.527208950899494, "learning_rate": 3.988226559894832e-06, "loss": 1.2932, "step": 4494 }, { "epoch": 0.6363700714943017, "grad_norm": 9.236564157284928, "learning_rate": 3.9877659488603186e-06, "loss": 1.2389, "step": 4495 }, { "epoch": 0.6365116443689389, "grad_norm": 8.212324175991961, "learning_rate": 3.9873052596150565e-06, "loss": 1.3639, "step": 4496 }, { "epoch": 0.6366532172435762, "grad_norm": 10.585008103228365, "learning_rate": 3.986844492183267e-06, "loss": 1.2861, "step": 4497 }, { "epoch": 0.6367947901182134, "grad_norm": 9.748597655560516, "learning_rate": 3.986383646589171e-06, "loss": 1.1851, "step": 4498 }, { "epoch": 0.6369363629928506, "grad_norm": 8.370581073165793, "learning_rate": 3.985922722856996e-06, "loss": 1.4422, "step": 4499 }, { "epoch": 0.6370779358674878, "grad_norm": 8.164878286095746, "learning_rate": 3.9854617210109705e-06, "loss": 1.3022, "step": 4500 }, { "epoch": 0.637219508742125, "grad_norm": 7.482927345641961, "learning_rate": 3.985000641075329e-06, "loss": 1.2614, "step": 4501 }, { "epoch": 0.6373610816167622, "grad_norm": 9.260509397530884, "learning_rate": 3.984539483074313e-06, "loss": 1.3309, "step": 4502 }, { "epoch": 0.6375026544913994, "grad_norm": 8.636141008491853, "learning_rate": 3.984078247032162e-06, "loss": 1.242, "step": 4503 }, { "epoch": 0.6376442273660367, "grad_norm": 7.992221149810026, "learning_rate": 3.983616932973124e-06, "loss": 1.3112, "step": 4504 }, { "epoch": 0.6377858002406739, "grad_norm": 7.9025510797536285, "learning_rate": 3.98315554092145e-06, "loss": 1.1481, "step": 4505 }, { "epoch": 0.6379273731153111, "grad_norm": 7.188198685894846, "learning_rate": 3.982694070901396e-06, "loss": 1.3925, "step": 4506 }, { "epoch": 0.6380689459899483, "grad_norm": 8.100737512238469, "learning_rate": 3.98223252293722e-06, "loss": 1.2566, "step": 4507 }, { "epoch": 0.6382105188645856, "grad_norm": 10.982000710235333, "learning_rate": 3.9817708970531855e-06, "loss": 1.2336, "step": 4508 }, { "epoch": 0.6383520917392228, "grad_norm": 10.084725233958215, "learning_rate": 3.9813091932735596e-06, "loss": 1.323, "step": 4509 }, { "epoch": 0.63849366461386, "grad_norm": 8.383923232127488, "learning_rate": 3.9808474116226135e-06, "loss": 1.2865, "step": 4510 }, { "epoch": 0.6386352374884972, "grad_norm": 8.226071885595955, "learning_rate": 3.980385552124624e-06, "loss": 1.1954, "step": 4511 }, { "epoch": 0.6387768103631344, "grad_norm": 8.082549014498912, "learning_rate": 3.979923614803869e-06, "loss": 1.4014, "step": 4512 }, { "epoch": 0.6389183832377716, "grad_norm": 8.504475032655142, "learning_rate": 3.979461599684633e-06, "loss": 1.2166, "step": 4513 }, { "epoch": 0.6390599561124088, "grad_norm": 7.697082501865713, "learning_rate": 3.978999506791205e-06, "loss": 1.2058, "step": 4514 }, { "epoch": 0.6392015289870461, "grad_norm": 8.27877321452279, "learning_rate": 3.978537336147875e-06, "loss": 1.1736, "step": 4515 }, { "epoch": 0.6393431018616833, "grad_norm": 10.339033175042475, "learning_rate": 3.97807508777894e-06, "loss": 1.1608, "step": 4516 }, { "epoch": 0.6394846747363205, "grad_norm": 9.005202591061163, "learning_rate": 3.977612761708699e-06, "loss": 1.2999, "step": 4517 }, { "epoch": 0.6396262476109578, "grad_norm": 8.041114063415552, "learning_rate": 3.977150357961457e-06, "loss": 1.1359, "step": 4518 }, { "epoch": 0.639767820485595, "grad_norm": 8.760230922355202, "learning_rate": 3.976687876561523e-06, "loss": 1.2871, "step": 4519 }, { "epoch": 0.6399093933602322, "grad_norm": 10.793807790631337, "learning_rate": 3.976225317533208e-06, "loss": 1.278, "step": 4520 }, { "epoch": 0.6400509662348693, "grad_norm": 9.946114507013334, "learning_rate": 3.9757626809008274e-06, "loss": 1.3256, "step": 4521 }, { "epoch": 0.6401925391095066, "grad_norm": 8.377708794095906, "learning_rate": 3.975299966688705e-06, "loss": 1.175, "step": 4522 }, { "epoch": 0.6403341119841438, "grad_norm": 8.167464340594504, "learning_rate": 3.974837174921162e-06, "loss": 1.2688, "step": 4523 }, { "epoch": 0.640475684858781, "grad_norm": 9.493890102575717, "learning_rate": 3.974374305622529e-06, "loss": 1.1958, "step": 4524 }, { "epoch": 0.6406172577334183, "grad_norm": 7.390338271145841, "learning_rate": 3.973911358817139e-06, "loss": 1.1987, "step": 4525 }, { "epoch": 0.6407588306080555, "grad_norm": 9.730456204829327, "learning_rate": 3.973448334529326e-06, "loss": 1.1872, "step": 4526 }, { "epoch": 0.6409004034826927, "grad_norm": 10.135041332188823, "learning_rate": 3.972985232783434e-06, "loss": 1.3345, "step": 4527 }, { "epoch": 0.64104197635733, "grad_norm": 10.050203569922104, "learning_rate": 3.972522053603806e-06, "loss": 1.2604, "step": 4528 }, { "epoch": 0.6411835492319672, "grad_norm": 9.80803292029676, "learning_rate": 3.972058797014792e-06, "loss": 1.246, "step": 4529 }, { "epoch": 0.6413251221066044, "grad_norm": 8.94614616407338, "learning_rate": 3.971595463040744e-06, "loss": 1.2121, "step": 4530 }, { "epoch": 0.6414666949812416, "grad_norm": 12.27530159201006, "learning_rate": 3.97113205170602e-06, "loss": 1.1771, "step": 4531 }, { "epoch": 0.6416082678558788, "grad_norm": 11.287395800408442, "learning_rate": 3.970668563034982e-06, "loss": 1.289, "step": 4532 }, { "epoch": 0.641749840730516, "grad_norm": 8.999899545744725, "learning_rate": 3.9702049970519925e-06, "loss": 1.2248, "step": 4533 }, { "epoch": 0.6418914136051532, "grad_norm": 9.297963516239264, "learning_rate": 3.969741353781424e-06, "loss": 1.2605, "step": 4534 }, { "epoch": 0.6420329864797905, "grad_norm": 8.791894509013241, "learning_rate": 3.969277633247648e-06, "loss": 1.2902, "step": 4535 }, { "epoch": 0.6421745593544277, "grad_norm": 9.192746279947057, "learning_rate": 3.968813835475043e-06, "loss": 1.1481, "step": 4536 }, { "epoch": 0.6423161322290649, "grad_norm": 8.710676057595707, "learning_rate": 3.968349960487988e-06, "loss": 1.288, "step": 4537 }, { "epoch": 0.6424577051037021, "grad_norm": 7.43571643119981, "learning_rate": 3.967886008310872e-06, "loss": 1.1648, "step": 4538 }, { "epoch": 0.6425992779783394, "grad_norm": 9.329103146998845, "learning_rate": 3.967421978968083e-06, "loss": 1.2601, "step": 4539 }, { "epoch": 0.6427408508529766, "grad_norm": 10.373412596817337, "learning_rate": 3.966957872484013e-06, "loss": 1.2354, "step": 4540 }, { "epoch": 0.6428824237276138, "grad_norm": 10.495111689961456, "learning_rate": 3.966493688883064e-06, "loss": 1.2922, "step": 4541 }, { "epoch": 0.643023996602251, "grad_norm": 11.185066512570454, "learning_rate": 3.966029428189634e-06, "loss": 1.3681, "step": 4542 }, { "epoch": 0.6431655694768882, "grad_norm": 8.456977572722263, "learning_rate": 3.965565090428129e-06, "loss": 1.2131, "step": 4543 }, { "epoch": 0.6433071423515254, "grad_norm": 8.71356462626483, "learning_rate": 3.965100675622962e-06, "loss": 1.3791, "step": 4544 }, { "epoch": 0.6434487152261626, "grad_norm": 10.646160282457034, "learning_rate": 3.9646361837985435e-06, "loss": 1.2969, "step": 4545 }, { "epoch": 0.6435902881007999, "grad_norm": 11.544338169614319, "learning_rate": 3.964171614979294e-06, "loss": 1.2989, "step": 4546 }, { "epoch": 0.6437318609754371, "grad_norm": 8.382203605274299, "learning_rate": 3.963706969189634e-06, "loss": 1.13, "step": 4547 }, { "epoch": 0.6438734338500743, "grad_norm": 11.14617516299471, "learning_rate": 3.963242246453989e-06, "loss": 1.3873, "step": 4548 }, { "epoch": 0.6440150067247116, "grad_norm": 8.286849061652171, "learning_rate": 3.962777446796791e-06, "loss": 1.3765, "step": 4549 }, { "epoch": 0.6441565795993488, "grad_norm": 9.197138913639249, "learning_rate": 3.962312570242473e-06, "loss": 1.3262, "step": 4550 }, { "epoch": 0.644298152473986, "grad_norm": 9.135437313275439, "learning_rate": 3.961847616815474e-06, "loss": 1.2912, "step": 4551 }, { "epoch": 0.6444397253486231, "grad_norm": 9.019064315003195, "learning_rate": 3.961382586540236e-06, "loss": 1.3678, "step": 4552 }, { "epoch": 0.6445812982232604, "grad_norm": 11.228449886472635, "learning_rate": 3.960917479441204e-06, "loss": 1.3546, "step": 4553 }, { "epoch": 0.6447228710978976, "grad_norm": 8.524037201061368, "learning_rate": 3.96045229554283e-06, "loss": 1.2301, "step": 4554 }, { "epoch": 0.6448644439725348, "grad_norm": 10.986792872114505, "learning_rate": 3.959987034869568e-06, "loss": 1.2826, "step": 4555 }, { "epoch": 0.6450060168471721, "grad_norm": 8.755498411935568, "learning_rate": 3.959521697445876e-06, "loss": 1.1196, "step": 4556 }, { "epoch": 0.6451475897218093, "grad_norm": 8.305228588028992, "learning_rate": 3.9590562832962174e-06, "loss": 1.2003, "step": 4557 }, { "epoch": 0.6452891625964465, "grad_norm": 9.141973725282194, "learning_rate": 3.958590792445057e-06, "loss": 1.0965, "step": 4558 }, { "epoch": 0.6454307354710838, "grad_norm": 7.296154828034984, "learning_rate": 3.958125224916866e-06, "loss": 1.3694, "step": 4559 }, { "epoch": 0.645572308345721, "grad_norm": 8.54069649994083, "learning_rate": 3.95765958073612e-06, "loss": 1.2157, "step": 4560 }, { "epoch": 0.6457138812203582, "grad_norm": 8.535806075091122, "learning_rate": 3.957193859927295e-06, "loss": 1.1252, "step": 4561 }, { "epoch": 0.6458554540949955, "grad_norm": 8.37248898341131, "learning_rate": 3.9567280625148776e-06, "loss": 1.2161, "step": 4562 }, { "epoch": 0.6459970269696326, "grad_norm": 9.247824335693648, "learning_rate": 3.956262188523351e-06, "loss": 1.335, "step": 4563 }, { "epoch": 0.6461385998442698, "grad_norm": 8.42045714381008, "learning_rate": 3.955796237977207e-06, "loss": 1.3997, "step": 4564 }, { "epoch": 0.646280172718907, "grad_norm": 8.951732906744448, "learning_rate": 3.955330210900941e-06, "loss": 1.308, "step": 4565 }, { "epoch": 0.6464217455935443, "grad_norm": 7.34964492874406, "learning_rate": 3.95486410731905e-06, "loss": 1.1371, "step": 4566 }, { "epoch": 0.6465633184681815, "grad_norm": 9.438853589937983, "learning_rate": 3.954397927256037e-06, "loss": 1.2917, "step": 4567 }, { "epoch": 0.6467048913428187, "grad_norm": 9.596369183898245, "learning_rate": 3.953931670736411e-06, "loss": 1.062, "step": 4568 }, { "epoch": 0.646846464217456, "grad_norm": 7.392884116850849, "learning_rate": 3.953465337784681e-06, "loss": 1.2698, "step": 4569 }, { "epoch": 0.6469880370920932, "grad_norm": 8.37752847792459, "learning_rate": 3.952998928425361e-06, "loss": 1.2358, "step": 4570 }, { "epoch": 0.6471296099667304, "grad_norm": 9.829551702388155, "learning_rate": 3.9525324426829716e-06, "loss": 1.2021, "step": 4571 }, { "epoch": 0.6472711828413676, "grad_norm": 8.378887583749604, "learning_rate": 3.952065880582034e-06, "loss": 1.2161, "step": 4572 }, { "epoch": 0.6474127557160048, "grad_norm": 9.996242198609522, "learning_rate": 3.951599242147076e-06, "loss": 1.4276, "step": 4573 }, { "epoch": 0.647554328590642, "grad_norm": 7.370737686816128, "learning_rate": 3.951132527402629e-06, "loss": 1.2587, "step": 4574 }, { "epoch": 0.6476959014652792, "grad_norm": 7.792620948927396, "learning_rate": 3.950665736373226e-06, "loss": 1.1225, "step": 4575 }, { "epoch": 0.6478374743399165, "grad_norm": 10.221100423137583, "learning_rate": 3.950198869083407e-06, "loss": 1.3911, "step": 4576 }, { "epoch": 0.6479790472145537, "grad_norm": 9.008337185906202, "learning_rate": 3.949731925557715e-06, "loss": 1.2453, "step": 4577 }, { "epoch": 0.6481206200891909, "grad_norm": 8.818027791731108, "learning_rate": 3.949264905820697e-06, "loss": 1.3209, "step": 4578 }, { "epoch": 0.6482621929638281, "grad_norm": 8.400859479984259, "learning_rate": 3.948797809896903e-06, "loss": 1.3933, "step": 4579 }, { "epoch": 0.6484037658384654, "grad_norm": 8.540756797401697, "learning_rate": 3.948330637810888e-06, "loss": 1.1702, "step": 4580 }, { "epoch": 0.6485453387131026, "grad_norm": 10.753573156505897, "learning_rate": 3.947863389587212e-06, "loss": 1.2679, "step": 4581 }, { "epoch": 0.6486869115877398, "grad_norm": 9.196627488015393, "learning_rate": 3.947396065250437e-06, "loss": 1.1645, "step": 4582 }, { "epoch": 0.648828484462377, "grad_norm": 8.509767193954985, "learning_rate": 3.9469286648251304e-06, "loss": 1.1602, "step": 4583 }, { "epoch": 0.6489700573370142, "grad_norm": 8.003479677657937, "learning_rate": 3.946461188335863e-06, "loss": 1.2507, "step": 4584 }, { "epoch": 0.6491116302116514, "grad_norm": 7.837138151488563, "learning_rate": 3.945993635807209e-06, "loss": 1.3798, "step": 4585 }, { "epoch": 0.6492532030862886, "grad_norm": 8.407113069948473, "learning_rate": 3.945526007263747e-06, "loss": 1.289, "step": 4586 }, { "epoch": 0.6493947759609259, "grad_norm": 7.552325253174438, "learning_rate": 3.945058302730061e-06, "loss": 1.3831, "step": 4587 }, { "epoch": 0.6495363488355631, "grad_norm": 9.39706996947615, "learning_rate": 3.944590522230738e-06, "loss": 1.311, "step": 4588 }, { "epoch": 0.6496779217102003, "grad_norm": 7.979705340841863, "learning_rate": 3.9441226657903686e-06, "loss": 1.1337, "step": 4589 }, { "epoch": 0.6498194945848376, "grad_norm": 10.095939091784356, "learning_rate": 3.943654733433547e-06, "loss": 1.1595, "step": 4590 }, { "epoch": 0.6499610674594748, "grad_norm": 8.854476193272337, "learning_rate": 3.943186725184872e-06, "loss": 1.3115, "step": 4591 }, { "epoch": 0.650102640334112, "grad_norm": 9.967975072128452, "learning_rate": 3.942718641068947e-06, "loss": 1.2999, "step": 4592 }, { "epoch": 0.6502442132087493, "grad_norm": 8.894950257066286, "learning_rate": 3.94225048111038e-06, "loss": 1.2693, "step": 4593 }, { "epoch": 0.6503857860833864, "grad_norm": 10.949477607424397, "learning_rate": 3.941782245333781e-06, "loss": 1.2845, "step": 4594 }, { "epoch": 0.6505273589580236, "grad_norm": 8.579297595316525, "learning_rate": 3.941313933763763e-06, "loss": 1.3784, "step": 4595 }, { "epoch": 0.6506689318326608, "grad_norm": 9.680855521236069, "learning_rate": 3.9408455464249466e-06, "loss": 1.194, "step": 4596 }, { "epoch": 0.6508105047072981, "grad_norm": 10.294165015872993, "learning_rate": 3.9403770833419535e-06, "loss": 1.2491, "step": 4597 }, { "epoch": 0.6509520775819353, "grad_norm": 10.598087109399412, "learning_rate": 3.939908544539412e-06, "loss": 1.285, "step": 4598 }, { "epoch": 0.6510936504565725, "grad_norm": 7.861733297012155, "learning_rate": 3.9394399300419516e-06, "loss": 1.3311, "step": 4599 }, { "epoch": 0.6512352233312098, "grad_norm": 10.966459986199114, "learning_rate": 3.938971239874208e-06, "loss": 1.2349, "step": 4600 }, { "epoch": 0.651376796205847, "grad_norm": 8.929810957835322, "learning_rate": 3.938502474060818e-06, "loss": 1.1407, "step": 4601 }, { "epoch": 0.6515183690804842, "grad_norm": 11.06092814684765, "learning_rate": 3.938033632626426e-06, "loss": 1.2706, "step": 4602 }, { "epoch": 0.6516599419551214, "grad_norm": 8.566818596151958, "learning_rate": 3.937564715595678e-06, "loss": 1.2413, "step": 4603 }, { "epoch": 0.6518015148297586, "grad_norm": 10.213578061524162, "learning_rate": 3.937095722993225e-06, "loss": 1.3742, "step": 4604 }, { "epoch": 0.6519430877043958, "grad_norm": 9.928866492784712, "learning_rate": 3.936626654843722e-06, "loss": 1.2688, "step": 4605 }, { "epoch": 0.652084660579033, "grad_norm": 7.009017041301434, "learning_rate": 3.936157511171827e-06, "loss": 1.197, "step": 4606 }, { "epoch": 0.6522262334536703, "grad_norm": 8.583935268986389, "learning_rate": 3.935688292002201e-06, "loss": 1.2885, "step": 4607 }, { "epoch": 0.6523678063283075, "grad_norm": 10.19460792883934, "learning_rate": 3.935218997359513e-06, "loss": 1.3376, "step": 4608 }, { "epoch": 0.6525093792029447, "grad_norm": 13.232404433798468, "learning_rate": 3.934749627268433e-06, "loss": 1.4152, "step": 4609 }, { "epoch": 0.652650952077582, "grad_norm": 11.668081215979571, "learning_rate": 3.934280181753634e-06, "loss": 1.3362, "step": 4610 }, { "epoch": 0.6527925249522192, "grad_norm": 11.183078339612146, "learning_rate": 3.9338106608397955e-06, "loss": 1.3332, "step": 4611 }, { "epoch": 0.6529340978268564, "grad_norm": 9.257939411049948, "learning_rate": 3.9333410645516e-06, "loss": 1.421, "step": 4612 }, { "epoch": 0.6530756707014936, "grad_norm": 9.200625224222621, "learning_rate": 3.932871392913733e-06, "loss": 1.2629, "step": 4613 }, { "epoch": 0.6532172435761309, "grad_norm": 8.1308882284649, "learning_rate": 3.932401645950885e-06, "loss": 1.374, "step": 4614 }, { "epoch": 0.653358816450768, "grad_norm": 9.282588977751994, "learning_rate": 3.931931823687751e-06, "loss": 1.4547, "step": 4615 }, { "epoch": 0.6535003893254052, "grad_norm": 11.301839790122738, "learning_rate": 3.931461926149029e-06, "loss": 1.33, "step": 4616 }, { "epoch": 0.6536419622000424, "grad_norm": 11.148793696679174, "learning_rate": 3.930991953359421e-06, "loss": 1.3109, "step": 4617 }, { "epoch": 0.6537835350746797, "grad_norm": 11.73103912928153, "learning_rate": 3.930521905343632e-06, "loss": 1.5208, "step": 4618 }, { "epoch": 0.6539251079493169, "grad_norm": 9.841946294323677, "learning_rate": 3.930051782126374e-06, "loss": 1.183, "step": 4619 }, { "epoch": 0.6540666808239541, "grad_norm": 14.472099559131497, "learning_rate": 3.92958158373236e-06, "loss": 1.3281, "step": 4620 }, { "epoch": 0.6542082536985914, "grad_norm": 8.873107708422875, "learning_rate": 3.929111310186307e-06, "loss": 1.2404, "step": 4621 }, { "epoch": 0.6543498265732286, "grad_norm": 10.24282921032727, "learning_rate": 3.928640961512939e-06, "loss": 1.3684, "step": 4622 }, { "epoch": 0.6544913994478658, "grad_norm": 8.252328717413885, "learning_rate": 3.9281705377369814e-06, "loss": 1.1585, "step": 4623 }, { "epoch": 0.6546329723225031, "grad_norm": 11.980247134810401, "learning_rate": 3.927700038883162e-06, "loss": 1.3393, "step": 4624 }, { "epoch": 0.6547745451971402, "grad_norm": 12.187084953870288, "learning_rate": 3.927229464976218e-06, "loss": 1.3627, "step": 4625 }, { "epoch": 0.6549161180717774, "grad_norm": 10.376033501682006, "learning_rate": 3.9267588160408845e-06, "loss": 1.264, "step": 4626 }, { "epoch": 0.6550576909464146, "grad_norm": 7.409167523900158, "learning_rate": 3.926288092101903e-06, "loss": 1.248, "step": 4627 }, { "epoch": 0.6551992638210519, "grad_norm": 11.873949506372462, "learning_rate": 3.92581729318402e-06, "loss": 1.3143, "step": 4628 }, { "epoch": 0.6553408366956891, "grad_norm": 8.455597633973381, "learning_rate": 3.925346419311986e-06, "loss": 1.2605, "step": 4629 }, { "epoch": 0.6554824095703263, "grad_norm": 8.984553751618533, "learning_rate": 3.924875470510553e-06, "loss": 1.2186, "step": 4630 }, { "epoch": 0.6556239824449636, "grad_norm": 9.111036930971114, "learning_rate": 3.924404446804479e-06, "loss": 1.2979, "step": 4631 }, { "epoch": 0.6557655553196008, "grad_norm": 9.777319560247744, "learning_rate": 3.923933348218525e-06, "loss": 1.2109, "step": 4632 }, { "epoch": 0.655907128194238, "grad_norm": 10.421919289284642, "learning_rate": 3.923462174777458e-06, "loss": 1.2298, "step": 4633 }, { "epoch": 0.6560487010688753, "grad_norm": 7.759112999157275, "learning_rate": 3.922990926506044e-06, "loss": 1.1936, "step": 4634 }, { "epoch": 0.6561902739435124, "grad_norm": 8.203826234908814, "learning_rate": 3.922519603429059e-06, "loss": 1.3227, "step": 4635 }, { "epoch": 0.6563318468181496, "grad_norm": 11.33607614276003, "learning_rate": 3.922048205571279e-06, "loss": 1.3759, "step": 4636 }, { "epoch": 0.6564734196927868, "grad_norm": 8.75694217491944, "learning_rate": 3.921576732957486e-06, "loss": 1.2932, "step": 4637 }, { "epoch": 0.6566149925674241, "grad_norm": 10.196205959414359, "learning_rate": 3.9211051856124625e-06, "loss": 1.3996, "step": 4638 }, { "epoch": 0.6567565654420613, "grad_norm": 9.274089478581619, "learning_rate": 3.920633563560999e-06, "loss": 1.2138, "step": 4639 }, { "epoch": 0.6568981383166985, "grad_norm": 9.49254194940074, "learning_rate": 3.92016186682789e-06, "loss": 1.1334, "step": 4640 }, { "epoch": 0.6570397111913358, "grad_norm": 9.033510763342555, "learning_rate": 3.919690095437929e-06, "loss": 1.2722, "step": 4641 }, { "epoch": 0.657181284065973, "grad_norm": 9.76655190913598, "learning_rate": 3.9192182494159196e-06, "loss": 1.247, "step": 4642 }, { "epoch": 0.6573228569406102, "grad_norm": 9.152801801784737, "learning_rate": 3.918746328786665e-06, "loss": 1.3533, "step": 4643 }, { "epoch": 0.6574644298152474, "grad_norm": 8.640647074169362, "learning_rate": 3.918274333574972e-06, "loss": 1.217, "step": 4644 }, { "epoch": 0.6576060026898847, "grad_norm": 9.642594938396826, "learning_rate": 3.9178022638056565e-06, "loss": 1.3, "step": 4645 }, { "epoch": 0.6577475755645218, "grad_norm": 9.574504821562575, "learning_rate": 3.9173301195035326e-06, "loss": 1.2672, "step": 4646 }, { "epoch": 0.657889148439159, "grad_norm": 9.426161263967236, "learning_rate": 3.916857900693421e-06, "loss": 1.2747, "step": 4647 }, { "epoch": 0.6580307213137963, "grad_norm": 9.189313437102497, "learning_rate": 3.916385607400146e-06, "loss": 1.3363, "step": 4648 }, { "epoch": 0.6581722941884335, "grad_norm": 8.093173091167255, "learning_rate": 3.915913239648535e-06, "loss": 1.1261, "step": 4649 }, { "epoch": 0.6583138670630707, "grad_norm": 7.913039597949131, "learning_rate": 3.915440797463422e-06, "loss": 1.2451, "step": 4650 }, { "epoch": 0.6584554399377079, "grad_norm": 7.413631318138842, "learning_rate": 3.914968280869642e-06, "loss": 1.1505, "step": 4651 }, { "epoch": 0.6585970128123452, "grad_norm": 10.025152617170404, "learning_rate": 3.9144956898920336e-06, "loss": 1.3655, "step": 4652 }, { "epoch": 0.6587385856869824, "grad_norm": 8.98979286371059, "learning_rate": 3.914023024555441e-06, "loss": 1.2845, "step": 4653 }, { "epoch": 0.6588801585616196, "grad_norm": 8.799575309909315, "learning_rate": 3.913550284884714e-06, "loss": 1.3207, "step": 4654 }, { "epoch": 0.6590217314362569, "grad_norm": 9.287413121018616, "learning_rate": 3.913077470904701e-06, "loss": 1.1765, "step": 4655 }, { "epoch": 0.659163304310894, "grad_norm": 7.852379903424508, "learning_rate": 3.912604582640259e-06, "loss": 1.1587, "step": 4656 }, { "epoch": 0.6593048771855312, "grad_norm": 8.19189847486885, "learning_rate": 3.912131620116249e-06, "loss": 1.2102, "step": 4657 }, { "epoch": 0.6594464500601684, "grad_norm": 8.565648300656925, "learning_rate": 3.9116585833575305e-06, "loss": 1.1998, "step": 4658 }, { "epoch": 0.6595880229348057, "grad_norm": 8.754304535506554, "learning_rate": 3.911185472388974e-06, "loss": 1.3548, "step": 4659 }, { "epoch": 0.6597295958094429, "grad_norm": 9.265908659891755, "learning_rate": 3.91071228723545e-06, "loss": 1.2935, "step": 4660 }, { "epoch": 0.6598711686840801, "grad_norm": 11.044067544047369, "learning_rate": 3.9102390279218315e-06, "loss": 1.3586, "step": 4661 }, { "epoch": 0.6600127415587174, "grad_norm": 9.680372803500187, "learning_rate": 3.909765694473e-06, "loss": 1.1364, "step": 4662 }, { "epoch": 0.6601543144333546, "grad_norm": 8.844968462467845, "learning_rate": 3.909292286913836e-06, "loss": 1.2279, "step": 4663 }, { "epoch": 0.6602958873079918, "grad_norm": 11.758176515336904, "learning_rate": 3.908818805269229e-06, "loss": 1.2748, "step": 4664 }, { "epoch": 0.660437460182629, "grad_norm": 8.570045877376154, "learning_rate": 3.908345249564066e-06, "loss": 1.4263, "step": 4665 }, { "epoch": 0.6605790330572662, "grad_norm": 10.146288308680722, "learning_rate": 3.907871619823244e-06, "loss": 1.2092, "step": 4666 }, { "epoch": 0.6607206059319034, "grad_norm": 10.397212872806872, "learning_rate": 3.907397916071661e-06, "loss": 1.5012, "step": 4667 }, { "epoch": 0.6608621788065406, "grad_norm": 11.05734184398697, "learning_rate": 3.90692413833422e-06, "loss": 1.3039, "step": 4668 }, { "epoch": 0.6610037516811779, "grad_norm": 10.163919880340673, "learning_rate": 3.906450286635824e-06, "loss": 1.2885, "step": 4669 }, { "epoch": 0.6611453245558151, "grad_norm": 9.551042461918751, "learning_rate": 3.905976361001385e-06, "loss": 1.3176, "step": 4670 }, { "epoch": 0.6612868974304523, "grad_norm": 8.718251203809992, "learning_rate": 3.905502361455819e-06, "loss": 1.2059, "step": 4671 }, { "epoch": 0.6614284703050896, "grad_norm": 8.670378354802613, "learning_rate": 3.9050282880240405e-06, "loss": 1.1592, "step": 4672 }, { "epoch": 0.6615700431797268, "grad_norm": 8.335801127926214, "learning_rate": 3.904554140730973e-06, "loss": 1.2916, "step": 4673 }, { "epoch": 0.661711616054364, "grad_norm": 8.47951327528545, "learning_rate": 3.904079919601543e-06, "loss": 1.0678, "step": 4674 }, { "epoch": 0.6618531889290012, "grad_norm": 9.280965986947637, "learning_rate": 3.903605624660676e-06, "loss": 1.2001, "step": 4675 }, { "epoch": 0.6619947618036385, "grad_norm": 10.093796106947714, "learning_rate": 3.903131255933309e-06, "loss": 1.4236, "step": 4676 }, { "epoch": 0.6621363346782756, "grad_norm": 9.994122876242301, "learning_rate": 3.902656813444378e-06, "loss": 1.4558, "step": 4677 }, { "epoch": 0.6622779075529128, "grad_norm": 8.271412660182028, "learning_rate": 3.902182297218824e-06, "loss": 1.2751, "step": 4678 }, { "epoch": 0.6624194804275501, "grad_norm": 11.09582225088974, "learning_rate": 3.901707707281592e-06, "loss": 1.2278, "step": 4679 }, { "epoch": 0.6625610533021873, "grad_norm": 11.37197265583069, "learning_rate": 3.901233043657632e-06, "loss": 1.1396, "step": 4680 }, { "epoch": 0.6627026261768245, "grad_norm": 8.634188415521551, "learning_rate": 3.900758306371895e-06, "loss": 1.2587, "step": 4681 }, { "epoch": 0.6628441990514617, "grad_norm": 6.909001833510338, "learning_rate": 3.900283495449339e-06, "loss": 1.1305, "step": 4682 }, { "epoch": 0.662985771926099, "grad_norm": 8.721562413922255, "learning_rate": 3.899808610914923e-06, "loss": 1.4387, "step": 4683 }, { "epoch": 0.6631273448007362, "grad_norm": 8.905918904892822, "learning_rate": 3.899333652793612e-06, "loss": 1.1504, "step": 4684 }, { "epoch": 0.6632689176753734, "grad_norm": 12.640640089028386, "learning_rate": 3.898858621110374e-06, "loss": 1.4105, "step": 4685 }, { "epoch": 0.6634104905500107, "grad_norm": 9.240920688618631, "learning_rate": 3.898383515890182e-06, "loss": 1.2304, "step": 4686 }, { "epoch": 0.6635520634246478, "grad_norm": 7.7611262532569425, "learning_rate": 3.89790833715801e-06, "loss": 1.3515, "step": 4687 }, { "epoch": 0.663693636299285, "grad_norm": 8.562253182745163, "learning_rate": 3.897433084938841e-06, "loss": 1.3841, "step": 4688 }, { "epoch": 0.6638352091739222, "grad_norm": 8.944901822587136, "learning_rate": 3.8969577592576555e-06, "loss": 1.292, "step": 4689 }, { "epoch": 0.6639767820485595, "grad_norm": 9.118050214913257, "learning_rate": 3.896482360139443e-06, "loss": 1.1421, "step": 4690 }, { "epoch": 0.6641183549231967, "grad_norm": 10.32537260329973, "learning_rate": 3.896006887609193e-06, "loss": 1.2994, "step": 4691 }, { "epoch": 0.6642599277978339, "grad_norm": 9.20856287973637, "learning_rate": 3.8955313416919026e-06, "loss": 1.4732, "step": 4692 }, { "epoch": 0.6644015006724712, "grad_norm": 9.649592305006053, "learning_rate": 3.89505572241257e-06, "loss": 1.3284, "step": 4693 }, { "epoch": 0.6645430735471084, "grad_norm": 8.540492825369933, "learning_rate": 3.894580029796198e-06, "loss": 1.2739, "step": 4694 }, { "epoch": 0.6646846464217456, "grad_norm": 8.204920283458588, "learning_rate": 3.894104263867794e-06, "loss": 1.2845, "step": 4695 }, { "epoch": 0.6648262192963829, "grad_norm": 10.383306280583572, "learning_rate": 3.893628424652368e-06, "loss": 1.2526, "step": 4696 }, { "epoch": 0.66496779217102, "grad_norm": 11.082563062781432, "learning_rate": 3.893152512174935e-06, "loss": 1.248, "step": 4697 }, { "epoch": 0.6651093650456572, "grad_norm": 9.469452986099938, "learning_rate": 3.892676526460513e-06, "loss": 1.1932, "step": 4698 }, { "epoch": 0.6652509379202944, "grad_norm": 10.380858088840466, "learning_rate": 3.8922004675341244e-06, "loss": 1.5706, "step": 4699 }, { "epoch": 0.6653925107949317, "grad_norm": 10.109827046259172, "learning_rate": 3.891724335420796e-06, "loss": 1.3353, "step": 4700 }, { "epoch": 0.6655340836695689, "grad_norm": 11.433855320291208, "learning_rate": 3.891248130145556e-06, "loss": 1.2226, "step": 4701 }, { "epoch": 0.6656756565442061, "grad_norm": 8.960476026151422, "learning_rate": 3.8907718517334405e-06, "loss": 1.2614, "step": 4702 }, { "epoch": 0.6658172294188434, "grad_norm": 9.644308564283158, "learning_rate": 3.890295500209485e-06, "loss": 1.1152, "step": 4703 }, { "epoch": 0.6659588022934806, "grad_norm": 10.801067680989524, "learning_rate": 3.8898190755987314e-06, "loss": 1.3476, "step": 4704 }, { "epoch": 0.6661003751681178, "grad_norm": 11.518616576309467, "learning_rate": 3.889342577926225e-06, "loss": 1.3235, "step": 4705 }, { "epoch": 0.666241948042755, "grad_norm": 11.056218147124127, "learning_rate": 3.888866007217017e-06, "loss": 1.387, "step": 4706 }, { "epoch": 0.6663835209173923, "grad_norm": 9.772245412161313, "learning_rate": 3.888389363496157e-06, "loss": 1.3894, "step": 4707 }, { "epoch": 0.6665250937920294, "grad_norm": 10.211513932060612, "learning_rate": 3.887912646788704e-06, "loss": 1.238, "step": 4708 }, { "epoch": 0.6666666666666666, "grad_norm": 9.377092051733293, "learning_rate": 3.8874358571197164e-06, "loss": 1.3729, "step": 4709 }, { "epoch": 0.6668082395413039, "grad_norm": 8.944960248250041, "learning_rate": 3.886958994514263e-06, "loss": 1.1385, "step": 4710 }, { "epoch": 0.6669498124159411, "grad_norm": 10.487787683335444, "learning_rate": 3.8864820589974075e-06, "loss": 1.1926, "step": 4711 }, { "epoch": 0.6670913852905783, "grad_norm": 6.89901008692141, "learning_rate": 3.886005050594225e-06, "loss": 1.1615, "step": 4712 }, { "epoch": 0.6672329581652156, "grad_norm": 9.964138578782789, "learning_rate": 3.88552796932979e-06, "loss": 1.2164, "step": 4713 }, { "epoch": 0.6673745310398528, "grad_norm": 10.48815140457335, "learning_rate": 3.885050815229182e-06, "loss": 1.3762, "step": 4714 }, { "epoch": 0.66751610391449, "grad_norm": 8.75151877165026, "learning_rate": 3.884573588317486e-06, "loss": 1.2617, "step": 4715 }, { "epoch": 0.6676576767891272, "grad_norm": 8.611999001070169, "learning_rate": 3.88409628861979e-06, "loss": 1.1672, "step": 4716 }, { "epoch": 0.6677992496637645, "grad_norm": 11.104187431578458, "learning_rate": 3.883618916161183e-06, "loss": 1.457, "step": 4717 }, { "epoch": 0.6679408225384016, "grad_norm": 8.473591789347655, "learning_rate": 3.883141470966761e-06, "loss": 1.2161, "step": 4718 }, { "epoch": 0.6680823954130388, "grad_norm": 10.49672248186681, "learning_rate": 3.8826639530616235e-06, "loss": 1.3224, "step": 4719 }, { "epoch": 0.668223968287676, "grad_norm": 8.533561421366027, "learning_rate": 3.8821863624708725e-06, "loss": 1.2082, "step": 4720 }, { "epoch": 0.6683655411623133, "grad_norm": 10.534537054245224, "learning_rate": 3.881708699219616e-06, "loss": 1.239, "step": 4721 }, { "epoch": 0.6685071140369505, "grad_norm": 7.541590259930702, "learning_rate": 3.881230963332963e-06, "loss": 1.2193, "step": 4722 }, { "epoch": 0.6686486869115877, "grad_norm": 8.801551890083255, "learning_rate": 3.880753154836028e-06, "loss": 1.2039, "step": 4723 }, { "epoch": 0.668790259786225, "grad_norm": 11.77716430729799, "learning_rate": 3.880275273753929e-06, "loss": 1.4025, "step": 4724 }, { "epoch": 0.6689318326608622, "grad_norm": 8.987702763462723, "learning_rate": 3.879797320111788e-06, "loss": 1.3582, "step": 4725 }, { "epoch": 0.6690734055354994, "grad_norm": 8.809895015441754, "learning_rate": 3.879319293934732e-06, "loss": 1.3058, "step": 4726 }, { "epoch": 0.6692149784101367, "grad_norm": 9.009608755476444, "learning_rate": 3.878841195247888e-06, "loss": 1.3183, "step": 4727 }, { "epoch": 0.6693565512847738, "grad_norm": 12.265779880590618, "learning_rate": 3.87836302407639e-06, "loss": 1.288, "step": 4728 }, { "epoch": 0.669498124159411, "grad_norm": 11.517952216718108, "learning_rate": 3.877884780445377e-06, "loss": 1.3632, "step": 4729 }, { "epoch": 0.6696396970340482, "grad_norm": 7.897654086086359, "learning_rate": 3.877406464379987e-06, "loss": 1.1174, "step": 4730 }, { "epoch": 0.6697812699086855, "grad_norm": 9.41320259947224, "learning_rate": 3.876928075905368e-06, "loss": 1.2543, "step": 4731 }, { "epoch": 0.6699228427833227, "grad_norm": 13.186361399306671, "learning_rate": 3.876449615046665e-06, "loss": 1.3903, "step": 4732 }, { "epoch": 0.6700644156579599, "grad_norm": 9.53177949028747, "learning_rate": 3.875971081829033e-06, "loss": 1.4155, "step": 4733 }, { "epoch": 0.6702059885325972, "grad_norm": 9.03423157152232, "learning_rate": 3.875492476277627e-06, "loss": 1.3188, "step": 4734 }, { "epoch": 0.6703475614072344, "grad_norm": 8.80547252766976, "learning_rate": 3.875013798417606e-06, "loss": 1.2529, "step": 4735 }, { "epoch": 0.6704891342818716, "grad_norm": 10.940343519881257, "learning_rate": 3.874535048274136e-06, "loss": 1.1599, "step": 4736 }, { "epoch": 0.6706307071565089, "grad_norm": 8.179205733633644, "learning_rate": 3.8740562258723845e-06, "loss": 1.1824, "step": 4737 }, { "epoch": 0.6707722800311461, "grad_norm": 10.56226488562752, "learning_rate": 3.87357733123752e-06, "loss": 1.4012, "step": 4738 }, { "epoch": 0.6709138529057832, "grad_norm": 10.997049456136981, "learning_rate": 3.87309836439472e-06, "loss": 1.3415, "step": 4739 }, { "epoch": 0.6710554257804204, "grad_norm": 10.250767376714947, "learning_rate": 3.872619325369162e-06, "loss": 1.256, "step": 4740 }, { "epoch": 0.6711969986550577, "grad_norm": 7.77096871129424, "learning_rate": 3.872140214186031e-06, "loss": 1.0503, "step": 4741 }, { "epoch": 0.6713385715296949, "grad_norm": 10.395123189294367, "learning_rate": 3.871661030870512e-06, "loss": 1.2908, "step": 4742 }, { "epoch": 0.6714801444043321, "grad_norm": 12.245377271704093, "learning_rate": 3.871181775447794e-06, "loss": 1.3588, "step": 4743 }, { "epoch": 0.6716217172789694, "grad_norm": 9.24244546409327, "learning_rate": 3.870702447943073e-06, "loss": 1.3548, "step": 4744 }, { "epoch": 0.6717632901536066, "grad_norm": 8.16697391269404, "learning_rate": 3.870223048381546e-06, "loss": 1.1961, "step": 4745 }, { "epoch": 0.6719048630282438, "grad_norm": 7.159235302458413, "learning_rate": 3.869743576788416e-06, "loss": 1.1588, "step": 4746 }, { "epoch": 0.672046435902881, "grad_norm": 7.961058012075459, "learning_rate": 3.869264033188887e-06, "loss": 1.2391, "step": 4747 }, { "epoch": 0.6721880087775183, "grad_norm": 7.66129258367897, "learning_rate": 3.868784417608169e-06, "loss": 1.0882, "step": 4748 }, { "epoch": 0.6723295816521554, "grad_norm": 7.844432056454443, "learning_rate": 3.868304730071475e-06, "loss": 1.1944, "step": 4749 }, { "epoch": 0.6724711545267926, "grad_norm": 8.717104301869284, "learning_rate": 3.86782497060402e-06, "loss": 1.2575, "step": 4750 }, { "epoch": 0.6726127274014299, "grad_norm": 10.275795089513982, "learning_rate": 3.867345139231028e-06, "loss": 1.3419, "step": 4751 }, { "epoch": 0.6727543002760671, "grad_norm": 9.047092151835782, "learning_rate": 3.86686523597772e-06, "loss": 1.4378, "step": 4752 }, { "epoch": 0.6728958731507043, "grad_norm": 9.094596063117345, "learning_rate": 3.866385260869327e-06, "loss": 1.2947, "step": 4753 }, { "epoch": 0.6730374460253415, "grad_norm": 9.2048591055129, "learning_rate": 3.86590521393108e-06, "loss": 1.149, "step": 4754 }, { "epoch": 0.6731790188999788, "grad_norm": 9.13083994817452, "learning_rate": 3.865425095188214e-06, "loss": 1.1344, "step": 4755 }, { "epoch": 0.673320591774616, "grad_norm": 8.096169364213143, "learning_rate": 3.864944904665967e-06, "loss": 1.2516, "step": 4756 }, { "epoch": 0.6734621646492532, "grad_norm": 8.777600599129128, "learning_rate": 3.864464642389586e-06, "loss": 1.3176, "step": 4757 }, { "epoch": 0.6736037375238905, "grad_norm": 9.952646288895002, "learning_rate": 3.863984308384317e-06, "loss": 1.1161, "step": 4758 }, { "epoch": 0.6737453103985277, "grad_norm": 7.307060631389395, "learning_rate": 3.8635039026754075e-06, "loss": 1.1842, "step": 4759 }, { "epoch": 0.6738868832731648, "grad_norm": 9.024237738460457, "learning_rate": 3.863023425288116e-06, "loss": 1.2689, "step": 4760 }, { "epoch": 0.674028456147802, "grad_norm": 7.500455206726154, "learning_rate": 3.862542876247699e-06, "loss": 1.2734, "step": 4761 }, { "epoch": 0.6741700290224393, "grad_norm": 9.76164958146263, "learning_rate": 3.862062255579419e-06, "loss": 1.3511, "step": 4762 }, { "epoch": 0.6743116018970765, "grad_norm": 7.188269001830398, "learning_rate": 3.861581563308542e-06, "loss": 1.2622, "step": 4763 }, { "epoch": 0.6744531747717137, "grad_norm": 9.486964869830933, "learning_rate": 3.861100799460336e-06, "loss": 1.2705, "step": 4764 }, { "epoch": 0.674594747646351, "grad_norm": 7.801895635067691, "learning_rate": 3.860619964060078e-06, "loss": 1.131, "step": 4765 }, { "epoch": 0.6747363205209882, "grad_norm": 8.33878844684469, "learning_rate": 3.860139057133042e-06, "loss": 1.3314, "step": 4766 }, { "epoch": 0.6748778933956254, "grad_norm": 8.569900321959429, "learning_rate": 3.85965807870451e-06, "loss": 1.3622, "step": 4767 }, { "epoch": 0.6750194662702627, "grad_norm": 12.0173983015658, "learning_rate": 3.859177028799766e-06, "loss": 1.3004, "step": 4768 }, { "epoch": 0.6751610391448999, "grad_norm": 8.833300992318694, "learning_rate": 3.858695907444101e-06, "loss": 1.3106, "step": 4769 }, { "epoch": 0.675302612019537, "grad_norm": 7.292680273904279, "learning_rate": 3.858214714662804e-06, "loss": 1.3107, "step": 4770 }, { "epoch": 0.6754441848941742, "grad_norm": 8.149689707085622, "learning_rate": 3.857733450481172e-06, "loss": 1.3722, "step": 4771 }, { "epoch": 0.6755857577688115, "grad_norm": 10.553113027921103, "learning_rate": 3.857252114924504e-06, "loss": 1.2356, "step": 4772 }, { "epoch": 0.6757273306434487, "grad_norm": 9.691910459601532, "learning_rate": 3.8567707080181054e-06, "loss": 1.3115, "step": 4773 }, { "epoch": 0.6758689035180859, "grad_norm": 8.159470430281411, "learning_rate": 3.856289229787283e-06, "loss": 1.3113, "step": 4774 }, { "epoch": 0.6760104763927232, "grad_norm": 8.482748588789217, "learning_rate": 3.855807680257347e-06, "loss": 1.3203, "step": 4775 }, { "epoch": 0.6761520492673604, "grad_norm": 8.542061351558479, "learning_rate": 3.85532605945361e-06, "loss": 1.2293, "step": 4776 }, { "epoch": 0.6762936221419976, "grad_norm": 6.992166768474853, "learning_rate": 3.854844367401395e-06, "loss": 1.1647, "step": 4777 }, { "epoch": 0.6764351950166348, "grad_norm": 7.472507696674221, "learning_rate": 3.854362604126021e-06, "loss": 1.1595, "step": 4778 }, { "epoch": 0.6765767678912721, "grad_norm": 9.8817162188401, "learning_rate": 3.853880769652815e-06, "loss": 1.2575, "step": 4779 }, { "epoch": 0.6767183407659092, "grad_norm": 9.18851304470186, "learning_rate": 3.853398864007105e-06, "loss": 1.364, "step": 4780 }, { "epoch": 0.6768599136405464, "grad_norm": 8.503753730832546, "learning_rate": 3.852916887214227e-06, "loss": 1.3348, "step": 4781 }, { "epoch": 0.6770014865151837, "grad_norm": 7.019435060380182, "learning_rate": 3.852434839299517e-06, "loss": 1.2191, "step": 4782 }, { "epoch": 0.6771430593898209, "grad_norm": 10.773125547853985, "learning_rate": 3.851952720288316e-06, "loss": 1.2722, "step": 4783 }, { "epoch": 0.6772846322644581, "grad_norm": 9.792468575180543, "learning_rate": 3.851470530205969e-06, "loss": 1.3847, "step": 4784 }, { "epoch": 0.6774262051390953, "grad_norm": 9.84385889386694, "learning_rate": 3.8509882690778234e-06, "loss": 1.2809, "step": 4785 }, { "epoch": 0.6775677780137326, "grad_norm": 9.130853734958151, "learning_rate": 3.850505936929232e-06, "loss": 1.1152, "step": 4786 }, { "epoch": 0.6777093508883698, "grad_norm": 9.01465600159809, "learning_rate": 3.8500235337855495e-06, "loss": 1.4216, "step": 4787 }, { "epoch": 0.677850923763007, "grad_norm": 8.528043374872746, "learning_rate": 3.849541059672137e-06, "loss": 1.2083, "step": 4788 }, { "epoch": 0.6779924966376443, "grad_norm": 8.468393691334017, "learning_rate": 3.8490585146143574e-06, "loss": 1.2187, "step": 4789 }, { "epoch": 0.6781340695122815, "grad_norm": 7.867391634795735, "learning_rate": 3.848575898637579e-06, "loss": 1.2716, "step": 4790 }, { "epoch": 0.6782756423869186, "grad_norm": 8.042000192106672, "learning_rate": 3.84809321176717e-06, "loss": 1.383, "step": 4791 }, { "epoch": 0.6784172152615559, "grad_norm": 9.972081312378254, "learning_rate": 3.8476104540285054e-06, "loss": 1.3433, "step": 4792 }, { "epoch": 0.6785587881361931, "grad_norm": 10.570669051174251, "learning_rate": 3.847127625446964e-06, "loss": 1.2914, "step": 4793 }, { "epoch": 0.6787003610108303, "grad_norm": 9.185658795897464, "learning_rate": 3.846644726047928e-06, "loss": 1.3038, "step": 4794 }, { "epoch": 0.6788419338854675, "grad_norm": 9.106672720434691, "learning_rate": 3.846161755856784e-06, "loss": 1.3459, "step": 4795 }, { "epoch": 0.6789835067601048, "grad_norm": 8.133883271099426, "learning_rate": 3.84567871489892e-06, "loss": 1.2426, "step": 4796 }, { "epoch": 0.679125079634742, "grad_norm": 7.802122991144482, "learning_rate": 3.845195603199728e-06, "loss": 1.3181, "step": 4797 }, { "epoch": 0.6792666525093792, "grad_norm": 8.620689310008075, "learning_rate": 3.844712420784607e-06, "loss": 1.2222, "step": 4798 }, { "epoch": 0.6794082253840165, "grad_norm": 8.608577407473824, "learning_rate": 3.844229167678957e-06, "loss": 1.239, "step": 4799 }, { "epoch": 0.6795497982586537, "grad_norm": 8.809920562489927, "learning_rate": 3.843745843908181e-06, "loss": 1.3082, "step": 4800 }, { "epoch": 0.6796913711332908, "grad_norm": 8.164666170863365, "learning_rate": 3.843262449497689e-06, "loss": 1.2073, "step": 4801 }, { "epoch": 0.679832944007928, "grad_norm": 8.885976531504966, "learning_rate": 3.842778984472891e-06, "loss": 1.276, "step": 4802 }, { "epoch": 0.6799745168825653, "grad_norm": 9.771479885505165, "learning_rate": 3.842295448859203e-06, "loss": 1.3372, "step": 4803 }, { "epoch": 0.6801160897572025, "grad_norm": 8.959611337270239, "learning_rate": 3.841811842682044e-06, "loss": 1.3028, "step": 4804 }, { "epoch": 0.6802576626318397, "grad_norm": 8.911352362449506, "learning_rate": 3.841328165966837e-06, "loss": 1.2702, "step": 4805 }, { "epoch": 0.680399235506477, "grad_norm": 8.827737823368171, "learning_rate": 3.84084441873901e-06, "loss": 1.3341, "step": 4806 }, { "epoch": 0.6805408083811142, "grad_norm": 7.497514948815697, "learning_rate": 3.840360601023989e-06, "loss": 1.3327, "step": 4807 }, { "epoch": 0.6806823812557514, "grad_norm": 9.542872412402595, "learning_rate": 3.839876712847211e-06, "loss": 1.3502, "step": 4808 }, { "epoch": 0.6808239541303887, "grad_norm": 9.708653347869701, "learning_rate": 3.839392754234115e-06, "loss": 1.3405, "step": 4809 }, { "epoch": 0.6809655270050259, "grad_norm": 7.378071549451005, "learning_rate": 3.8389087252101395e-06, "loss": 1.2281, "step": 4810 }, { "epoch": 0.681107099879663, "grad_norm": 9.017813537578562, "learning_rate": 3.838424625800732e-06, "loss": 1.3734, "step": 4811 }, { "epoch": 0.6812486727543002, "grad_norm": 8.087267308373692, "learning_rate": 3.837940456031338e-06, "loss": 1.2613, "step": 4812 }, { "epoch": 0.6813902456289375, "grad_norm": 8.939736253231622, "learning_rate": 3.837456215927413e-06, "loss": 1.3696, "step": 4813 }, { "epoch": 0.6815318185035747, "grad_norm": 9.082651608517153, "learning_rate": 3.8369719055144115e-06, "loss": 1.3684, "step": 4814 }, { "epoch": 0.6816733913782119, "grad_norm": 8.346916826458289, "learning_rate": 3.836487524817794e-06, "loss": 1.357, "step": 4815 }, { "epoch": 0.6818149642528492, "grad_norm": 9.876793420883551, "learning_rate": 3.836003073863024e-06, "loss": 1.4028, "step": 4816 }, { "epoch": 0.6819565371274864, "grad_norm": 8.284023065333212, "learning_rate": 3.8355185526755676e-06, "loss": 1.217, "step": 4817 }, { "epoch": 0.6820981100021236, "grad_norm": 8.66388065845582, "learning_rate": 3.835033961280898e-06, "loss": 1.2823, "step": 4818 }, { "epoch": 0.6822396828767608, "grad_norm": 7.6570730584385585, "learning_rate": 3.834549299704487e-06, "loss": 1.2476, "step": 4819 }, { "epoch": 0.6823812557513981, "grad_norm": 7.969892113313176, "learning_rate": 3.8340645679718155e-06, "loss": 1.2261, "step": 4820 }, { "epoch": 0.6825228286260353, "grad_norm": 7.802978572915281, "learning_rate": 3.833579766108365e-06, "loss": 1.235, "step": 4821 }, { "epoch": 0.6826644015006724, "grad_norm": 8.494741720726738, "learning_rate": 3.83309489413962e-06, "loss": 1.1466, "step": 4822 }, { "epoch": 0.6828059743753097, "grad_norm": 8.76625425966628, "learning_rate": 3.83260995209107e-06, "loss": 1.2334, "step": 4823 }, { "epoch": 0.6829475472499469, "grad_norm": 7.87206274083574, "learning_rate": 3.832124939988208e-06, "loss": 1.2439, "step": 4824 }, { "epoch": 0.6830891201245841, "grad_norm": 10.990223441382067, "learning_rate": 3.831639857856532e-06, "loss": 1.2422, "step": 4825 }, { "epoch": 0.6832306929992213, "grad_norm": 7.787539226990389, "learning_rate": 3.831154705721542e-06, "loss": 1.2779, "step": 4826 }, { "epoch": 0.6833722658738586, "grad_norm": 10.78724039589768, "learning_rate": 3.830669483608741e-06, "loss": 1.4133, "step": 4827 }, { "epoch": 0.6835138387484958, "grad_norm": 10.05320509480547, "learning_rate": 3.830184191543638e-06, "loss": 1.3761, "step": 4828 }, { "epoch": 0.683655411623133, "grad_norm": 10.306256618000264, "learning_rate": 3.829698829551743e-06, "loss": 1.3405, "step": 4829 }, { "epoch": 0.6837969844977703, "grad_norm": 8.523670224824661, "learning_rate": 3.829213397658572e-06, "loss": 1.3476, "step": 4830 }, { "epoch": 0.6839385573724075, "grad_norm": 8.277314715764321, "learning_rate": 3.828727895889644e-06, "loss": 1.2608, "step": 4831 }, { "epoch": 0.6840801302470446, "grad_norm": 8.800583161191694, "learning_rate": 3.828242324270482e-06, "loss": 1.3519, "step": 4832 }, { "epoch": 0.6842217031216818, "grad_norm": 8.351556105294557, "learning_rate": 3.82775668282661e-06, "loss": 1.2853, "step": 4833 }, { "epoch": 0.6843632759963191, "grad_norm": 8.904091878055368, "learning_rate": 3.827270971583561e-06, "loss": 1.2004, "step": 4834 }, { "epoch": 0.6845048488709563, "grad_norm": 9.734357757308164, "learning_rate": 3.826785190566865e-06, "loss": 1.1402, "step": 4835 }, { "epoch": 0.6846464217455935, "grad_norm": 9.730894884160554, "learning_rate": 3.826299339802062e-06, "loss": 1.2034, "step": 4836 }, { "epoch": 0.6847879946202308, "grad_norm": 8.717583472309267, "learning_rate": 3.825813419314691e-06, "loss": 1.2383, "step": 4837 }, { "epoch": 0.684929567494868, "grad_norm": 10.936473514890187, "learning_rate": 3.825327429130297e-06, "loss": 1.3807, "step": 4838 }, { "epoch": 0.6850711403695052, "grad_norm": 9.229014743447191, "learning_rate": 3.824841369274429e-06, "loss": 1.1613, "step": 4839 }, { "epoch": 0.6852127132441425, "grad_norm": 9.637919684745738, "learning_rate": 3.824355239772637e-06, "loss": 1.3447, "step": 4840 }, { "epoch": 0.6853542861187797, "grad_norm": 8.461368511067114, "learning_rate": 3.823869040650478e-06, "loss": 1.3274, "step": 4841 }, { "epoch": 0.6854958589934168, "grad_norm": 8.53732899261385, "learning_rate": 3.823382771933512e-06, "loss": 1.2686, "step": 4842 }, { "epoch": 0.685637431868054, "grad_norm": 9.630034480048037, "learning_rate": 3.822896433647299e-06, "loss": 1.1866, "step": 4843 }, { "epoch": 0.6857790047426913, "grad_norm": 10.554048847836528, "learning_rate": 3.8224100258174066e-06, "loss": 1.1205, "step": 4844 }, { "epoch": 0.6859205776173285, "grad_norm": 12.381863626893974, "learning_rate": 3.821923548469405e-06, "loss": 1.3488, "step": 4845 }, { "epoch": 0.6860621504919657, "grad_norm": 9.246195242630547, "learning_rate": 3.82143700162887e-06, "loss": 1.3653, "step": 4846 }, { "epoch": 0.686203723366603, "grad_norm": 7.1759803341497195, "learning_rate": 3.820950385321375e-06, "loss": 1.2145, "step": 4847 }, { "epoch": 0.6863452962412402, "grad_norm": 7.657642969583354, "learning_rate": 3.820463699572505e-06, "loss": 1.1532, "step": 4848 }, { "epoch": 0.6864868691158774, "grad_norm": 8.965665386503888, "learning_rate": 3.819976944407841e-06, "loss": 1.2173, "step": 4849 }, { "epoch": 0.6866284419905146, "grad_norm": 11.30741607911509, "learning_rate": 3.819490119852975e-06, "loss": 1.2635, "step": 4850 }, { "epoch": 0.6867700148651519, "grad_norm": 10.939592782484274, "learning_rate": 3.819003225933497e-06, "loss": 1.3329, "step": 4851 }, { "epoch": 0.6869115877397891, "grad_norm": 8.384501519032378, "learning_rate": 3.818516262675001e-06, "loss": 1.2366, "step": 4852 }, { "epoch": 0.6870531606144262, "grad_norm": 7.4198756044556236, "learning_rate": 3.81802923010309e-06, "loss": 1.1622, "step": 4853 }, { "epoch": 0.6871947334890635, "grad_norm": 7.859488818214743, "learning_rate": 3.817542128243365e-06, "loss": 1.2607, "step": 4854 }, { "epoch": 0.6873363063637007, "grad_norm": 9.076631459272814, "learning_rate": 3.817054957121432e-06, "loss": 1.3061, "step": 4855 }, { "epoch": 0.6874778792383379, "grad_norm": 8.62395083915556, "learning_rate": 3.8165677167629025e-06, "loss": 1.0993, "step": 4856 }, { "epoch": 0.6876194521129751, "grad_norm": 8.468221162281976, "learning_rate": 3.81608040719339e-06, "loss": 1.259, "step": 4857 }, { "epoch": 0.6877610249876124, "grad_norm": 9.043172918861481, "learning_rate": 3.8155930284385116e-06, "loss": 1.5566, "step": 4858 }, { "epoch": 0.6879025978622496, "grad_norm": 8.081348713023363, "learning_rate": 3.815105580523888e-06, "loss": 1.3152, "step": 4859 }, { "epoch": 0.6880441707368868, "grad_norm": 10.90814035204633, "learning_rate": 3.814618063475145e-06, "loss": 1.2363, "step": 4860 }, { "epoch": 0.6881857436115241, "grad_norm": 8.673167749222445, "learning_rate": 3.814130477317911e-06, "loss": 1.3721, "step": 4861 }, { "epoch": 0.6883273164861613, "grad_norm": 7.016159479049006, "learning_rate": 3.8136428220778177e-06, "loss": 1.183, "step": 4862 }, { "epoch": 0.6884688893607984, "grad_norm": 9.97570443402761, "learning_rate": 3.8131550977805005e-06, "loss": 1.2668, "step": 4863 }, { "epoch": 0.6886104622354356, "grad_norm": 9.893252730268289, "learning_rate": 3.8126673044515993e-06, "loss": 1.398, "step": 4864 }, { "epoch": 0.6887520351100729, "grad_norm": 8.145822458349386, "learning_rate": 3.812179442116756e-06, "loss": 1.2755, "step": 4865 }, { "epoch": 0.6888936079847101, "grad_norm": 9.068607771406871, "learning_rate": 3.811691510801618e-06, "loss": 1.2065, "step": 4866 }, { "epoch": 0.6890351808593473, "grad_norm": 9.543888904932176, "learning_rate": 3.8112035105318353e-06, "loss": 1.4804, "step": 4867 }, { "epoch": 0.6891767537339846, "grad_norm": 9.212144250095648, "learning_rate": 3.8107154413330616e-06, "loss": 1.3663, "step": 4868 }, { "epoch": 0.6893183266086218, "grad_norm": 8.87680766004302, "learning_rate": 3.8102273032309554e-06, "loss": 1.324, "step": 4869 }, { "epoch": 0.689459899483259, "grad_norm": 10.407569961917481, "learning_rate": 3.809739096251176e-06, "loss": 1.2862, "step": 4870 }, { "epoch": 0.6896014723578963, "grad_norm": 8.306561406643771, "learning_rate": 3.809250820419389e-06, "loss": 1.2067, "step": 4871 }, { "epoch": 0.6897430452325335, "grad_norm": 13.55818409961679, "learning_rate": 3.808762475761263e-06, "loss": 1.3582, "step": 4872 }, { "epoch": 0.6898846181071706, "grad_norm": 9.277232318409736, "learning_rate": 3.808274062302469e-06, "loss": 1.1801, "step": 4873 }, { "epoch": 0.6900261909818078, "grad_norm": 8.73827120934269, "learning_rate": 3.807785580068683e-06, "loss": 1.2244, "step": 4874 }, { "epoch": 0.6901677638564451, "grad_norm": 8.894662915554903, "learning_rate": 3.8072970290855843e-06, "loss": 1.233, "step": 4875 }, { "epoch": 0.6903093367310823, "grad_norm": 10.834295455072864, "learning_rate": 3.8068084093788554e-06, "loss": 1.3662, "step": 4876 }, { "epoch": 0.6904509096057195, "grad_norm": 10.548957042757422, "learning_rate": 3.806319720974183e-06, "loss": 1.3206, "step": 4877 }, { "epoch": 0.6905924824803568, "grad_norm": 9.53101586226249, "learning_rate": 3.8058309638972567e-06, "loss": 1.4186, "step": 4878 }, { "epoch": 0.690734055354994, "grad_norm": 7.531737252456372, "learning_rate": 3.805342138173771e-06, "loss": 1.3403, "step": 4879 }, { "epoch": 0.6908756282296312, "grad_norm": 6.571754542256723, "learning_rate": 3.8048532438294215e-06, "loss": 1.3278, "step": 4880 }, { "epoch": 0.6910172011042685, "grad_norm": 9.468575554679171, "learning_rate": 3.8043642808899106e-06, "loss": 1.2402, "step": 4881 }, { "epoch": 0.6911587739789057, "grad_norm": 9.293587342142708, "learning_rate": 3.8038752493809416e-06, "loss": 1.3141, "step": 4882 }, { "epoch": 0.6913003468535429, "grad_norm": 8.455438829580713, "learning_rate": 3.803386149328223e-06, "loss": 1.2819, "step": 4883 }, { "epoch": 0.69144191972818, "grad_norm": 8.30907168715291, "learning_rate": 3.8028969807574665e-06, "loss": 1.297, "step": 4884 }, { "epoch": 0.6915834926028173, "grad_norm": 8.69552614784137, "learning_rate": 3.8024077436943875e-06, "loss": 1.324, "step": 4885 }, { "epoch": 0.6917250654774545, "grad_norm": 9.752911646451848, "learning_rate": 3.8019184381647044e-06, "loss": 1.3015, "step": 4886 }, { "epoch": 0.6918666383520917, "grad_norm": 9.233345090489069, "learning_rate": 3.8014290641941392e-06, "loss": 1.379, "step": 4887 }, { "epoch": 0.692008211226729, "grad_norm": 9.109040399756417, "learning_rate": 3.800939621808419e-06, "loss": 1.2431, "step": 4888 }, { "epoch": 0.6921497841013662, "grad_norm": 11.482537321083617, "learning_rate": 3.8004501110332726e-06, "loss": 1.3521, "step": 4889 }, { "epoch": 0.6922913569760034, "grad_norm": 8.180537634158695, "learning_rate": 3.799960531894434e-06, "loss": 1.3423, "step": 4890 }, { "epoch": 0.6924329298506406, "grad_norm": 8.634630659075832, "learning_rate": 3.7994708844176385e-06, "loss": 1.3508, "step": 4891 }, { "epoch": 0.6925745027252779, "grad_norm": 8.636722725151731, "learning_rate": 3.7989811686286283e-06, "loss": 1.3805, "step": 4892 }, { "epoch": 0.6927160755999151, "grad_norm": 10.306962809714598, "learning_rate": 3.7984913845531466e-06, "loss": 1.5042, "step": 4893 }, { "epoch": 0.6928576484745522, "grad_norm": 6.41779073142109, "learning_rate": 3.798001532216941e-06, "loss": 1.0354, "step": 4894 }, { "epoch": 0.6929992213491895, "grad_norm": 10.878149519237665, "learning_rate": 3.7975116116457626e-06, "loss": 1.2593, "step": 4895 }, { "epoch": 0.6931407942238267, "grad_norm": 11.194988451019368, "learning_rate": 3.7970216228653667e-06, "loss": 1.2403, "step": 4896 }, { "epoch": 0.6932823670984639, "grad_norm": 10.147827036104207, "learning_rate": 3.7965315659015108e-06, "loss": 1.2987, "step": 4897 }, { "epoch": 0.6934239399731011, "grad_norm": 9.562818733837345, "learning_rate": 3.7960414407799565e-06, "loss": 1.2448, "step": 4898 }, { "epoch": 0.6935655128477384, "grad_norm": 7.588697471581375, "learning_rate": 3.795551247526471e-06, "loss": 1.2787, "step": 4899 }, { "epoch": 0.6937070857223756, "grad_norm": 9.40032265292016, "learning_rate": 3.795060986166822e-06, "loss": 1.1789, "step": 4900 }, { "epoch": 0.6938486585970128, "grad_norm": 7.7511015078478325, "learning_rate": 3.794570656726784e-06, "loss": 1.3604, "step": 4901 }, { "epoch": 0.6939902314716501, "grad_norm": 8.439650424027358, "learning_rate": 3.79408025923213e-06, "loss": 1.2041, "step": 4902 }, { "epoch": 0.6941318043462873, "grad_norm": 7.807440500365535, "learning_rate": 3.793589793708642e-06, "loss": 1.2344, "step": 4903 }, { "epoch": 0.6942733772209245, "grad_norm": 10.730722575037614, "learning_rate": 3.7930992601821028e-06, "loss": 1.315, "step": 4904 }, { "epoch": 0.6944149500955616, "grad_norm": 10.432444187419975, "learning_rate": 3.7926086586783008e-06, "loss": 1.4356, "step": 4905 }, { "epoch": 0.6945565229701989, "grad_norm": 8.661101045807259, "learning_rate": 3.7921179892230246e-06, "loss": 1.2717, "step": 4906 }, { "epoch": 0.6946980958448361, "grad_norm": 8.00855798740308, "learning_rate": 3.7916272518420694e-06, "loss": 1.2995, "step": 4907 }, { "epoch": 0.6948396687194733, "grad_norm": 7.93278182968453, "learning_rate": 3.791136446561233e-06, "loss": 1.3208, "step": 4908 }, { "epoch": 0.6949812415941106, "grad_norm": 9.795611773124005, "learning_rate": 3.7906455734063156e-06, "loss": 1.1817, "step": 4909 }, { "epoch": 0.6951228144687478, "grad_norm": 9.790631654836352, "learning_rate": 3.7901546324031236e-06, "loss": 1.3801, "step": 4910 }, { "epoch": 0.695264387343385, "grad_norm": 9.033039483315065, "learning_rate": 3.7896636235774636e-06, "loss": 1.3694, "step": 4911 }, { "epoch": 0.6954059602180223, "grad_norm": 10.933521179198245, "learning_rate": 3.789172546955149e-06, "loss": 1.2528, "step": 4912 }, { "epoch": 0.6955475330926595, "grad_norm": 9.977625802895078, "learning_rate": 3.7886814025619944e-06, "loss": 1.2312, "step": 4913 }, { "epoch": 0.6956891059672967, "grad_norm": 9.23632420140274, "learning_rate": 3.7881901904238203e-06, "loss": 1.2931, "step": 4914 }, { "epoch": 0.6958306788419338, "grad_norm": 9.593054134219733, "learning_rate": 3.7876989105664476e-06, "loss": 1.1769, "step": 4915 }, { "epoch": 0.6959722517165711, "grad_norm": 10.587787401786535, "learning_rate": 3.7872075630157035e-06, "loss": 1.2248, "step": 4916 }, { "epoch": 0.6961138245912083, "grad_norm": 10.627061352198849, "learning_rate": 3.786716147797418e-06, "loss": 1.2254, "step": 4917 }, { "epoch": 0.6962553974658455, "grad_norm": 9.291088502512686, "learning_rate": 3.786224664937424e-06, "loss": 1.3238, "step": 4918 }, { "epoch": 0.6963969703404828, "grad_norm": 9.503380023578906, "learning_rate": 3.7857331144615576e-06, "loss": 1.2969, "step": 4919 }, { "epoch": 0.69653854321512, "grad_norm": 9.650350503040963, "learning_rate": 3.785241496395661e-06, "loss": 1.3473, "step": 4920 }, { "epoch": 0.6966801160897572, "grad_norm": 11.706166420967653, "learning_rate": 3.7847498107655768e-06, "loss": 1.2749, "step": 4921 }, { "epoch": 0.6968216889643944, "grad_norm": 10.62040305829329, "learning_rate": 3.7842580575971533e-06, "loss": 1.2057, "step": 4922 }, { "epoch": 0.6969632618390317, "grad_norm": 9.882318030490707, "learning_rate": 3.783766236916241e-06, "loss": 1.3365, "step": 4923 }, { "epoch": 0.6971048347136689, "grad_norm": 8.968806994320214, "learning_rate": 3.7832743487486945e-06, "loss": 1.2977, "step": 4924 }, { "epoch": 0.697246407588306, "grad_norm": 8.612794948006497, "learning_rate": 3.782782393120373e-06, "loss": 1.2234, "step": 4925 }, { "epoch": 0.6973879804629433, "grad_norm": 7.715316193999354, "learning_rate": 3.7822903700571372e-06, "loss": 1.3272, "step": 4926 }, { "epoch": 0.6975295533375805, "grad_norm": 9.49020000733384, "learning_rate": 3.781798279584853e-06, "loss": 1.3736, "step": 4927 }, { "epoch": 0.6976711262122177, "grad_norm": 8.702009420619065, "learning_rate": 3.7813061217293887e-06, "loss": 1.4032, "step": 4928 }, { "epoch": 0.697812699086855, "grad_norm": 8.669712216222022, "learning_rate": 3.7808138965166167e-06, "loss": 1.3451, "step": 4929 }, { "epoch": 0.6979542719614922, "grad_norm": 8.419875889676677, "learning_rate": 3.780321603972414e-06, "loss": 1.2725, "step": 4930 }, { "epoch": 0.6980958448361294, "grad_norm": 8.896024489005432, "learning_rate": 3.7798292441226584e-06, "loss": 1.2032, "step": 4931 }, { "epoch": 0.6982374177107666, "grad_norm": 9.903564859374029, "learning_rate": 3.7793368169932343e-06, "loss": 1.3223, "step": 4932 }, { "epoch": 0.6983789905854039, "grad_norm": 9.68202300756629, "learning_rate": 3.7788443226100274e-06, "loss": 1.324, "step": 4933 }, { "epoch": 0.6985205634600411, "grad_norm": 8.713065533228137, "learning_rate": 3.7783517609989284e-06, "loss": 1.1389, "step": 4934 }, { "epoch": 0.6986621363346783, "grad_norm": 8.591505388681894, "learning_rate": 3.77785913218583e-06, "loss": 1.3227, "step": 4935 }, { "epoch": 0.6988037092093154, "grad_norm": 8.368398313418838, "learning_rate": 3.77736643619663e-06, "loss": 1.3473, "step": 4936 }, { "epoch": 0.6989452820839527, "grad_norm": 7.110504664284424, "learning_rate": 3.776873673057229e-06, "loss": 1.2929, "step": 4937 }, { "epoch": 0.6990868549585899, "grad_norm": 8.805998872921933, "learning_rate": 3.776380842793531e-06, "loss": 1.2378, "step": 4938 }, { "epoch": 0.6992284278332271, "grad_norm": 7.941074557678906, "learning_rate": 3.775887945431444e-06, "loss": 1.295, "step": 4939 }, { "epoch": 0.6993700007078644, "grad_norm": 9.061558325941162, "learning_rate": 3.775394980996879e-06, "loss": 1.3317, "step": 4940 }, { "epoch": 0.6995115735825016, "grad_norm": 8.211162357173276, "learning_rate": 3.77490194951575e-06, "loss": 1.2875, "step": 4941 }, { "epoch": 0.6996531464571388, "grad_norm": 10.379192929800633, "learning_rate": 3.7744088510139763e-06, "loss": 1.1362, "step": 4942 }, { "epoch": 0.6997947193317761, "grad_norm": 9.463786520735693, "learning_rate": 3.773915685517481e-06, "loss": 1.1189, "step": 4943 }, { "epoch": 0.6999362922064133, "grad_norm": 9.401063217744227, "learning_rate": 3.7734224530521867e-06, "loss": 1.2399, "step": 4944 }, { "epoch": 0.7000778650810505, "grad_norm": 7.873647180041247, "learning_rate": 3.772929153644024e-06, "loss": 1.2202, "step": 4945 }, { "epoch": 0.7002194379556876, "grad_norm": 8.531410523186462, "learning_rate": 3.772435787318925e-06, "loss": 1.269, "step": 4946 }, { "epoch": 0.7003610108303249, "grad_norm": 10.453007125760099, "learning_rate": 3.771942354102825e-06, "loss": 1.3982, "step": 4947 }, { "epoch": 0.7005025837049621, "grad_norm": 8.377815798069728, "learning_rate": 3.7714488540216637e-06, "loss": 1.3188, "step": 4948 }, { "epoch": 0.7006441565795993, "grad_norm": 9.427031312802487, "learning_rate": 3.7709552871013844e-06, "loss": 1.2008, "step": 4949 }, { "epoch": 0.7007857294542366, "grad_norm": 11.28356470258672, "learning_rate": 3.770461653367934e-06, "loss": 1.3115, "step": 4950 }, { "epoch": 0.7009273023288738, "grad_norm": 14.18976914328925, "learning_rate": 3.769967952847261e-06, "loss": 1.3739, "step": 4951 }, { "epoch": 0.701068875203511, "grad_norm": 7.797662179232041, "learning_rate": 3.7694741855653195e-06, "loss": 1.1321, "step": 4952 }, { "epoch": 0.7012104480781483, "grad_norm": 11.12324767295709, "learning_rate": 3.7689803515480674e-06, "loss": 1.4091, "step": 4953 }, { "epoch": 0.7013520209527855, "grad_norm": 9.021052850485358, "learning_rate": 3.7684864508214638e-06, "loss": 1.1714, "step": 4954 }, { "epoch": 0.7014935938274227, "grad_norm": 9.991549927127876, "learning_rate": 3.7679924834114735e-06, "loss": 1.3709, "step": 4955 }, { "epoch": 0.7016351667020598, "grad_norm": 8.742622535115187, "learning_rate": 3.7674984493440632e-06, "loss": 1.122, "step": 4956 }, { "epoch": 0.7017767395766971, "grad_norm": 8.72004673656573, "learning_rate": 3.7670043486452047e-06, "loss": 1.3288, "step": 4957 }, { "epoch": 0.7019183124513343, "grad_norm": 9.946118725913186, "learning_rate": 3.7665101813408726e-06, "loss": 1.2583, "step": 4958 }, { "epoch": 0.7020598853259715, "grad_norm": 9.257154017984972, "learning_rate": 3.766015947457046e-06, "loss": 1.3945, "step": 4959 }, { "epoch": 0.7022014582006088, "grad_norm": 8.272610746433783, "learning_rate": 3.7655216470197033e-06, "loss": 1.1771, "step": 4960 }, { "epoch": 0.702343031075246, "grad_norm": 9.948398587735074, "learning_rate": 3.7650272800548316e-06, "loss": 1.1787, "step": 4961 }, { "epoch": 0.7024846039498832, "grad_norm": 8.159066017404639, "learning_rate": 3.764532846588419e-06, "loss": 1.3023, "step": 4962 }, { "epoch": 0.7026261768245204, "grad_norm": 9.168489263424764, "learning_rate": 3.764038346646457e-06, "loss": 1.3405, "step": 4963 }, { "epoch": 0.7027677496991577, "grad_norm": 8.404070603580204, "learning_rate": 3.7635437802549426e-06, "loss": 1.2132, "step": 4964 }, { "epoch": 0.7029093225737949, "grad_norm": 11.718537758494685, "learning_rate": 3.7630491474398734e-06, "loss": 1.2017, "step": 4965 }, { "epoch": 0.7030508954484321, "grad_norm": 12.474050212779227, "learning_rate": 3.7625544482272523e-06, "loss": 1.2649, "step": 4966 }, { "epoch": 0.7031924683230693, "grad_norm": 6.741970231145808, "learning_rate": 3.762059682643085e-06, "loss": 1.1768, "step": 4967 }, { "epoch": 0.7033340411977065, "grad_norm": 11.35848773077972, "learning_rate": 3.7615648507133816e-06, "loss": 1.3184, "step": 4968 }, { "epoch": 0.7034756140723437, "grad_norm": 8.199391356419246, "learning_rate": 3.7610699524641547e-06, "loss": 1.3233, "step": 4969 }, { "epoch": 0.7036171869469809, "grad_norm": 10.094598031810287, "learning_rate": 3.7605749879214203e-06, "loss": 1.2596, "step": 4970 }, { "epoch": 0.7037587598216182, "grad_norm": 8.296501194351212, "learning_rate": 3.760079957111199e-06, "loss": 1.3658, "step": 4971 }, { "epoch": 0.7039003326962554, "grad_norm": 7.5787169549729025, "learning_rate": 3.7595848600595135e-06, "loss": 1.2563, "step": 4972 }, { "epoch": 0.7040419055708926, "grad_norm": 10.078567471512608, "learning_rate": 3.7590896967923917e-06, "loss": 1.2453, "step": 4973 }, { "epoch": 0.7041834784455299, "grad_norm": 8.939170414540452, "learning_rate": 3.7585944673358632e-06, "loss": 1.231, "step": 4974 }, { "epoch": 0.7043250513201671, "grad_norm": 8.413405992261696, "learning_rate": 3.758099171715962e-06, "loss": 1.2109, "step": 4975 }, { "epoch": 0.7044666241948043, "grad_norm": 8.775853355190959, "learning_rate": 3.7576038099587252e-06, "loss": 1.1238, "step": 4976 }, { "epoch": 0.7046081970694414, "grad_norm": 10.32144572340954, "learning_rate": 3.7571083820901943e-06, "loss": 1.4115, "step": 4977 }, { "epoch": 0.7047497699440787, "grad_norm": 9.182614227589786, "learning_rate": 3.7566128881364116e-06, "loss": 1.3227, "step": 4978 }, { "epoch": 0.7048913428187159, "grad_norm": 9.414778483535498, "learning_rate": 3.7561173281234276e-06, "loss": 1.2265, "step": 4979 }, { "epoch": 0.7050329156933531, "grad_norm": 8.004611593493797, "learning_rate": 3.755621702077293e-06, "loss": 1.3549, "step": 4980 }, { "epoch": 0.7051744885679904, "grad_norm": 9.88227942917567, "learning_rate": 3.7551260100240604e-06, "loss": 1.4022, "step": 4981 }, { "epoch": 0.7053160614426276, "grad_norm": 9.64821964898289, "learning_rate": 3.7546302519897904e-06, "loss": 1.2917, "step": 4982 }, { "epoch": 0.7054576343172648, "grad_norm": 9.18839762967279, "learning_rate": 3.7541344280005427e-06, "loss": 1.1958, "step": 4983 }, { "epoch": 0.7055992071919021, "grad_norm": 9.064649024561103, "learning_rate": 3.7536385380823835e-06, "loss": 1.3827, "step": 4984 }, { "epoch": 0.7057407800665393, "grad_norm": 8.231336811276988, "learning_rate": 3.753142582261381e-06, "loss": 1.4214, "step": 4985 }, { "epoch": 0.7058823529411765, "grad_norm": 10.374437431801649, "learning_rate": 3.7526465605636075e-06, "loss": 1.26, "step": 4986 }, { "epoch": 0.7060239258158136, "grad_norm": 9.600248142850104, "learning_rate": 3.7521504730151382e-06, "loss": 1.1791, "step": 4987 }, { "epoch": 0.7061654986904509, "grad_norm": 6.98349887891427, "learning_rate": 3.751654319642052e-06, "loss": 1.2551, "step": 4988 }, { "epoch": 0.7063070715650881, "grad_norm": 10.001691675149065, "learning_rate": 3.7511581004704317e-06, "loss": 1.2154, "step": 4989 }, { "epoch": 0.7064486444397253, "grad_norm": 9.385124741112929, "learning_rate": 3.750661815526363e-06, "loss": 1.2442, "step": 4990 }, { "epoch": 0.7065902173143626, "grad_norm": 10.464403029983401, "learning_rate": 3.7501654648359353e-06, "loss": 1.3223, "step": 4991 }, { "epoch": 0.7067317901889998, "grad_norm": 8.444390090410522, "learning_rate": 3.7496690484252413e-06, "loss": 1.0963, "step": 4992 }, { "epoch": 0.706873363063637, "grad_norm": 9.482831197718014, "learning_rate": 3.7491725663203765e-06, "loss": 1.2321, "step": 4993 }, { "epoch": 0.7070149359382742, "grad_norm": 8.805193963500367, "learning_rate": 3.748676018547442e-06, "loss": 1.3718, "step": 4994 }, { "epoch": 0.7071565088129115, "grad_norm": 11.822903688846585, "learning_rate": 3.7481794051325404e-06, "loss": 1.3167, "step": 4995 }, { "epoch": 0.7072980816875487, "grad_norm": 9.544257821373009, "learning_rate": 3.7476827261017777e-06, "loss": 1.3914, "step": 4996 }, { "epoch": 0.7074396545621859, "grad_norm": 9.001110432256226, "learning_rate": 3.747185981481265e-06, "loss": 1.2617, "step": 4997 }, { "epoch": 0.7075812274368231, "grad_norm": 8.184641098701322, "learning_rate": 3.7466891712971144e-06, "loss": 1.1824, "step": 4998 }, { "epoch": 0.7077228003114603, "grad_norm": 12.008651158090935, "learning_rate": 3.7461922955754445e-06, "loss": 1.286, "step": 4999 }, { "epoch": 0.7078643731860975, "grad_norm": 9.794927522119508, "learning_rate": 3.745695354342374e-06, "loss": 1.2449, "step": 5000 }, { "epoch": 0.7080059460607347, "grad_norm": 8.958425074854249, "learning_rate": 3.745198347624027e-06, "loss": 1.2338, "step": 5001 }, { "epoch": 0.708147518935372, "grad_norm": 11.579249771403788, "learning_rate": 3.744701275446533e-06, "loss": 1.4563, "step": 5002 }, { "epoch": 0.7082890918100092, "grad_norm": 10.495944738661272, "learning_rate": 3.7442041378360204e-06, "loss": 1.3621, "step": 5003 }, { "epoch": 0.7084306646846464, "grad_norm": 8.77236217182866, "learning_rate": 3.743706934818624e-06, "loss": 1.3206, "step": 5004 }, { "epoch": 0.7085722375592837, "grad_norm": 9.502509337844415, "learning_rate": 3.743209666420481e-06, "loss": 1.1139, "step": 5005 }, { "epoch": 0.7087138104339209, "grad_norm": 10.7020432419806, "learning_rate": 3.7427123326677326e-06, "loss": 1.2896, "step": 5006 }, { "epoch": 0.7088553833085581, "grad_norm": 10.623382086145739, "learning_rate": 3.7422149335865244e-06, "loss": 1.2087, "step": 5007 }, { "epoch": 0.7089969561831952, "grad_norm": 9.880895086353236, "learning_rate": 3.7417174692030027e-06, "loss": 1.2494, "step": 5008 }, { "epoch": 0.7091385290578325, "grad_norm": 9.604660142866114, "learning_rate": 3.74121993954332e-06, "loss": 1.3388, "step": 5009 }, { "epoch": 0.7092801019324697, "grad_norm": 9.17151439679219, "learning_rate": 3.74072234463363e-06, "loss": 1.2659, "step": 5010 }, { "epoch": 0.7094216748071069, "grad_norm": 11.273154611402175, "learning_rate": 3.7402246845000916e-06, "loss": 1.4765, "step": 5011 }, { "epoch": 0.7095632476817442, "grad_norm": 9.971095846514118, "learning_rate": 3.7397269591688666e-06, "loss": 1.1789, "step": 5012 }, { "epoch": 0.7097048205563814, "grad_norm": 9.702325318043528, "learning_rate": 3.73922916866612e-06, "loss": 1.2988, "step": 5013 }, { "epoch": 0.7098463934310186, "grad_norm": 10.191715421479442, "learning_rate": 3.7387313130180192e-06, "loss": 1.405, "step": 5014 }, { "epoch": 0.7099879663056559, "grad_norm": 12.596437683312306, "learning_rate": 3.7382333922507375e-06, "loss": 1.2271, "step": 5015 }, { "epoch": 0.7101295391802931, "grad_norm": 12.016182478533766, "learning_rate": 3.7377354063904484e-06, "loss": 1.2301, "step": 5016 }, { "epoch": 0.7102711120549303, "grad_norm": 9.42498596352258, "learning_rate": 3.7372373554633334e-06, "loss": 1.3243, "step": 5017 }, { "epoch": 0.7104126849295674, "grad_norm": 7.656363848404175, "learning_rate": 3.7367392394955726e-06, "loss": 1.2675, "step": 5018 }, { "epoch": 0.7105542578042047, "grad_norm": 8.849569070960417, "learning_rate": 3.7362410585133523e-06, "loss": 1.2072, "step": 5019 }, { "epoch": 0.7106958306788419, "grad_norm": 7.657419294479364, "learning_rate": 3.7357428125428612e-06, "loss": 1.1971, "step": 5020 }, { "epoch": 0.7108374035534791, "grad_norm": 9.371855348246527, "learning_rate": 3.7352445016102917e-06, "loss": 1.3024, "step": 5021 }, { "epoch": 0.7109789764281164, "grad_norm": 8.012282479557655, "learning_rate": 3.7347461257418403e-06, "loss": 1.2458, "step": 5022 }, { "epoch": 0.7111205493027536, "grad_norm": 9.751261702844575, "learning_rate": 3.7342476849637053e-06, "loss": 1.2864, "step": 5023 }, { "epoch": 0.7112621221773908, "grad_norm": 10.964090863001292, "learning_rate": 3.7337491793020898e-06, "loss": 1.4105, "step": 5024 }, { "epoch": 0.711403695052028, "grad_norm": 9.97001386421487, "learning_rate": 3.7332506087832e-06, "loss": 1.1974, "step": 5025 }, { "epoch": 0.7115452679266653, "grad_norm": 8.936195545102652, "learning_rate": 3.7327519734332453e-06, "loss": 1.3504, "step": 5026 }, { "epoch": 0.7116868408013025, "grad_norm": 9.123390787445707, "learning_rate": 3.732253273278438e-06, "loss": 1.3465, "step": 5027 }, { "epoch": 0.7118284136759397, "grad_norm": 7.1676284085584285, "learning_rate": 3.731754508344996e-06, "loss": 1.1825, "step": 5028 }, { "epoch": 0.7119699865505769, "grad_norm": 8.013093247374155, "learning_rate": 3.731255678659137e-06, "loss": 1.2602, "step": 5029 }, { "epoch": 0.7121115594252141, "grad_norm": 9.722948547358968, "learning_rate": 3.730756784247085e-06, "loss": 1.0952, "step": 5030 }, { "epoch": 0.7122531322998513, "grad_norm": 8.54383542200233, "learning_rate": 3.730257825135067e-06, "loss": 1.3665, "step": 5031 }, { "epoch": 0.7123947051744886, "grad_norm": 10.047621442673556, "learning_rate": 3.7297588013493124e-06, "loss": 1.2516, "step": 5032 }, { "epoch": 0.7125362780491258, "grad_norm": 8.435079948078071, "learning_rate": 3.7292597129160547e-06, "loss": 1.2167, "step": 5033 }, { "epoch": 0.712677850923763, "grad_norm": 9.458597008703082, "learning_rate": 3.72876055986153e-06, "loss": 1.2492, "step": 5034 }, { "epoch": 0.7128194237984002, "grad_norm": 8.587319149239676, "learning_rate": 3.7282613422119794e-06, "loss": 1.2576, "step": 5035 }, { "epoch": 0.7129609966730375, "grad_norm": 10.347435055676614, "learning_rate": 3.7277620599936453e-06, "loss": 1.1688, "step": 5036 }, { "epoch": 0.7131025695476747, "grad_norm": 9.786348324344, "learning_rate": 3.7272627132327753e-06, "loss": 1.3657, "step": 5037 }, { "epoch": 0.7132441424223119, "grad_norm": 7.784879152196689, "learning_rate": 3.7267633019556194e-06, "loss": 1.1277, "step": 5038 }, { "epoch": 0.713385715296949, "grad_norm": 9.534572935097172, "learning_rate": 3.726263826188432e-06, "loss": 1.2699, "step": 5039 }, { "epoch": 0.7135272881715863, "grad_norm": 8.423108748337999, "learning_rate": 3.7257642859574694e-06, "loss": 1.2522, "step": 5040 }, { "epoch": 0.7136688610462235, "grad_norm": 8.928291325126445, "learning_rate": 3.7252646812889926e-06, "loss": 1.3809, "step": 5041 }, { "epoch": 0.7138104339208607, "grad_norm": 7.569674784881908, "learning_rate": 3.724765012209264e-06, "loss": 1.131, "step": 5042 }, { "epoch": 0.713952006795498, "grad_norm": 9.579735874047046, "learning_rate": 3.7242652787445527e-06, "loss": 1.4202, "step": 5043 }, { "epoch": 0.7140935796701352, "grad_norm": 10.404194396846076, "learning_rate": 3.723765480921129e-06, "loss": 1.3617, "step": 5044 }, { "epoch": 0.7142351525447724, "grad_norm": 8.316968132879904, "learning_rate": 3.7232656187652655e-06, "loss": 1.1848, "step": 5045 }, { "epoch": 0.7143767254194097, "grad_norm": 8.295859294437525, "learning_rate": 3.7227656923032406e-06, "loss": 1.1614, "step": 5046 }, { "epoch": 0.7145182982940469, "grad_norm": 10.247191625916914, "learning_rate": 3.7222657015613354e-06, "loss": 1.339, "step": 5047 }, { "epoch": 0.7146598711686841, "grad_norm": 7.654310580253742, "learning_rate": 3.7217656465658335e-06, "loss": 1.1711, "step": 5048 }, { "epoch": 0.7148014440433214, "grad_norm": 10.291922829923928, "learning_rate": 3.721265527343023e-06, "loss": 1.3388, "step": 5049 }, { "epoch": 0.7149430169179585, "grad_norm": 10.685927247445367, "learning_rate": 3.7207653439191944e-06, "loss": 1.2639, "step": 5050 }, { "epoch": 0.7150845897925957, "grad_norm": 8.494415243678121, "learning_rate": 3.720265096320641e-06, "loss": 1.3357, "step": 5051 }, { "epoch": 0.7152261626672329, "grad_norm": 10.687088093990381, "learning_rate": 3.7197647845736616e-06, "loss": 1.3178, "step": 5052 }, { "epoch": 0.7153677355418702, "grad_norm": 11.36310063989331, "learning_rate": 3.719264408704557e-06, "loss": 1.0806, "step": 5053 }, { "epoch": 0.7155093084165074, "grad_norm": 11.5902011920361, "learning_rate": 3.718763968739632e-06, "loss": 1.284, "step": 5054 }, { "epoch": 0.7156508812911446, "grad_norm": 8.875307319585607, "learning_rate": 3.718263464705194e-06, "loss": 1.2202, "step": 5055 }, { "epoch": 0.7157924541657819, "grad_norm": 12.197269221626772, "learning_rate": 3.7177628966275535e-06, "loss": 1.1655, "step": 5056 }, { "epoch": 0.7159340270404191, "grad_norm": 9.117819273044319, "learning_rate": 3.717262264533026e-06, "loss": 1.2692, "step": 5057 }, { "epoch": 0.7160755999150563, "grad_norm": 9.817635716878883, "learning_rate": 3.716761568447928e-06, "loss": 1.2274, "step": 5058 }, { "epoch": 0.7162171727896935, "grad_norm": 8.612251922622267, "learning_rate": 3.7162608083985824e-06, "loss": 1.3801, "step": 5059 }, { "epoch": 0.7163587456643307, "grad_norm": 9.877895268801257, "learning_rate": 3.715759984411313e-06, "loss": 1.3553, "step": 5060 }, { "epoch": 0.7165003185389679, "grad_norm": 10.316661018365927, "learning_rate": 3.715259096512447e-06, "loss": 1.3419, "step": 5061 }, { "epoch": 0.7166418914136051, "grad_norm": 8.241782980298373, "learning_rate": 3.7147581447283172e-06, "loss": 1.118, "step": 5062 }, { "epoch": 0.7167834642882424, "grad_norm": 10.945054241131407, "learning_rate": 3.714257129085257e-06, "loss": 1.3159, "step": 5063 }, { "epoch": 0.7169250371628796, "grad_norm": 8.084213471098414, "learning_rate": 3.7137560496096054e-06, "loss": 1.3527, "step": 5064 }, { "epoch": 0.7170666100375168, "grad_norm": 12.411323029035007, "learning_rate": 3.7132549063277033e-06, "loss": 1.2772, "step": 5065 }, { "epoch": 0.717208182912154, "grad_norm": 12.841622652097639, "learning_rate": 3.712753699265895e-06, "loss": 1.2729, "step": 5066 }, { "epoch": 0.7173497557867913, "grad_norm": 7.832156180814695, "learning_rate": 3.712252428450529e-06, "loss": 1.1791, "step": 5067 }, { "epoch": 0.7174913286614285, "grad_norm": 10.107018975967463, "learning_rate": 3.7117510939079563e-06, "loss": 1.4276, "step": 5068 }, { "epoch": 0.7176329015360657, "grad_norm": 8.051396257137835, "learning_rate": 3.7112496956645326e-06, "loss": 1.252, "step": 5069 }, { "epoch": 0.7177744744107029, "grad_norm": 8.89724179507664, "learning_rate": 3.710748233746616e-06, "loss": 1.1197, "step": 5070 }, { "epoch": 0.7179160472853401, "grad_norm": 11.028007571124057, "learning_rate": 3.7102467081805676e-06, "loss": 1.2125, "step": 5071 }, { "epoch": 0.7180576201599773, "grad_norm": 7.730168965653262, "learning_rate": 3.709745118992751e-06, "loss": 1.2158, "step": 5072 }, { "epoch": 0.7181991930346145, "grad_norm": 9.114115454881091, "learning_rate": 3.709243466209537e-06, "loss": 1.301, "step": 5073 }, { "epoch": 0.7183407659092518, "grad_norm": 8.239047091299833, "learning_rate": 3.7087417498572946e-06, "loss": 1.2783, "step": 5074 }, { "epoch": 0.718482338783889, "grad_norm": 9.781960166037347, "learning_rate": 3.7082399699623996e-06, "loss": 1.2208, "step": 5075 }, { "epoch": 0.7186239116585262, "grad_norm": 9.778672533807311, "learning_rate": 3.707738126551231e-06, "loss": 1.2108, "step": 5076 }, { "epoch": 0.7187654845331635, "grad_norm": 8.112918263425525, "learning_rate": 3.707236219650169e-06, "loss": 1.1804, "step": 5077 }, { "epoch": 0.7189070574078007, "grad_norm": 10.245028150602643, "learning_rate": 3.7067342492855997e-06, "loss": 1.5004, "step": 5078 }, { "epoch": 0.7190486302824379, "grad_norm": 9.740069295961744, "learning_rate": 3.7062322154839098e-06, "loss": 1.3034, "step": 5079 }, { "epoch": 0.7191902031570752, "grad_norm": 9.389542756896832, "learning_rate": 3.7057301182714924e-06, "loss": 1.3087, "step": 5080 }, { "epoch": 0.7193317760317123, "grad_norm": 9.929339819222902, "learning_rate": 3.705227957674742e-06, "loss": 1.197, "step": 5081 }, { "epoch": 0.7194733489063495, "grad_norm": 9.69728391709, "learning_rate": 3.7047257337200554e-06, "loss": 1.2315, "step": 5082 }, { "epoch": 0.7196149217809867, "grad_norm": 10.551050967257806, "learning_rate": 3.704223446433836e-06, "loss": 1.3738, "step": 5083 }, { "epoch": 0.719756494655624, "grad_norm": 8.099172064770919, "learning_rate": 3.703721095842488e-06, "loss": 1.2885, "step": 5084 }, { "epoch": 0.7198980675302612, "grad_norm": 11.808551108093699, "learning_rate": 3.703218681972419e-06, "loss": 1.2763, "step": 5085 }, { "epoch": 0.7200396404048984, "grad_norm": 9.39011273756184, "learning_rate": 3.702716204850042e-06, "loss": 1.2684, "step": 5086 }, { "epoch": 0.7201812132795357, "grad_norm": 8.913345241191541, "learning_rate": 3.7022136645017704e-06, "loss": 1.21, "step": 5087 }, { "epoch": 0.7203227861541729, "grad_norm": 8.793358325165967, "learning_rate": 3.701711060954023e-06, "loss": 1.2362, "step": 5088 }, { "epoch": 0.7204643590288101, "grad_norm": 10.928511233836288, "learning_rate": 3.701208394233221e-06, "loss": 1.3687, "step": 5089 }, { "epoch": 0.7206059319034473, "grad_norm": 12.936455578502573, "learning_rate": 3.7007056643657884e-06, "loss": 1.2005, "step": 5090 }, { "epoch": 0.7207475047780845, "grad_norm": 10.206311789966241, "learning_rate": 3.700202871378156e-06, "loss": 1.1142, "step": 5091 }, { "epoch": 0.7208890776527217, "grad_norm": 8.977644812771183, "learning_rate": 3.6997000152967526e-06, "loss": 1.2408, "step": 5092 }, { "epoch": 0.7210306505273589, "grad_norm": 8.699778709666584, "learning_rate": 3.699197096148014e-06, "loss": 1.2759, "step": 5093 }, { "epoch": 0.7211722234019962, "grad_norm": 9.161047814420874, "learning_rate": 3.698694113958379e-06, "loss": 1.2863, "step": 5094 }, { "epoch": 0.7213137962766334, "grad_norm": 11.545414028219508, "learning_rate": 3.6981910687542873e-06, "loss": 1.3082, "step": 5095 }, { "epoch": 0.7214553691512706, "grad_norm": 9.716589390058633, "learning_rate": 3.697687960562185e-06, "loss": 1.2858, "step": 5096 }, { "epoch": 0.7215969420259079, "grad_norm": 9.355097689666481, "learning_rate": 3.697184789408519e-06, "loss": 1.2806, "step": 5097 }, { "epoch": 0.7217385149005451, "grad_norm": 8.547630151168912, "learning_rate": 3.6966815553197416e-06, "loss": 1.2902, "step": 5098 }, { "epoch": 0.7218800877751823, "grad_norm": 10.05820615151271, "learning_rate": 3.696178258322307e-06, "loss": 1.2081, "step": 5099 }, { "epoch": 0.7220216606498195, "grad_norm": 10.637390093117958, "learning_rate": 3.6956748984426736e-06, "loss": 1.2723, "step": 5100 }, { "epoch": 0.7221632335244567, "grad_norm": 9.689525367915023, "learning_rate": 3.695171475707302e-06, "loss": 1.3748, "step": 5101 }, { "epoch": 0.7223048063990939, "grad_norm": 9.350124468459422, "learning_rate": 3.694667990142658e-06, "loss": 1.2944, "step": 5102 }, { "epoch": 0.7224463792737311, "grad_norm": 8.448328825793327, "learning_rate": 3.6941644417752077e-06, "loss": 1.1687, "step": 5103 }, { "epoch": 0.7225879521483684, "grad_norm": 9.714593225308157, "learning_rate": 3.6936608306314227e-06, "loss": 1.2232, "step": 5104 }, { "epoch": 0.7227295250230056, "grad_norm": 10.601770476266939, "learning_rate": 3.6931571567377785e-06, "loss": 1.2866, "step": 5105 }, { "epoch": 0.7228710978976428, "grad_norm": 9.290412669958478, "learning_rate": 3.692653420120752e-06, "loss": 1.3162, "step": 5106 }, { "epoch": 0.72301267077228, "grad_norm": 8.447918824094849, "learning_rate": 3.6921496208068253e-06, "loss": 1.084, "step": 5107 }, { "epoch": 0.7231542436469173, "grad_norm": 9.640653093936983, "learning_rate": 3.691645758822481e-06, "loss": 1.3319, "step": 5108 }, { "epoch": 0.7232958165215545, "grad_norm": 10.499879382212713, "learning_rate": 3.6911418341942078e-06, "loss": 1.2394, "step": 5109 }, { "epoch": 0.7234373893961917, "grad_norm": 8.463411908059792, "learning_rate": 3.690637846948497e-06, "loss": 1.3194, "step": 5110 }, { "epoch": 0.723578962270829, "grad_norm": 11.193678188892353, "learning_rate": 3.6901337971118415e-06, "loss": 1.3656, "step": 5111 }, { "epoch": 0.7237205351454661, "grad_norm": 11.75890840511494, "learning_rate": 3.6896296847107406e-06, "loss": 1.2511, "step": 5112 }, { "epoch": 0.7238621080201033, "grad_norm": 9.590755430187953, "learning_rate": 3.6891255097716937e-06, "loss": 1.2729, "step": 5113 }, { "epoch": 0.7240036808947405, "grad_norm": 9.97994739342778, "learning_rate": 3.6886212723212057e-06, "loss": 1.3541, "step": 5114 }, { "epoch": 0.7241452537693778, "grad_norm": 8.372523610676062, "learning_rate": 3.6881169723857833e-06, "loss": 1.3336, "step": 5115 }, { "epoch": 0.724286826644015, "grad_norm": 10.088960910975104, "learning_rate": 3.687612609991938e-06, "loss": 1.2664, "step": 5116 }, { "epoch": 0.7244283995186522, "grad_norm": 8.184992981513293, "learning_rate": 3.6871081851661825e-06, "loss": 1.1688, "step": 5117 }, { "epoch": 0.7245699723932895, "grad_norm": 10.104178034720302, "learning_rate": 3.686603697935036e-06, "loss": 1.2276, "step": 5118 }, { "epoch": 0.7247115452679267, "grad_norm": 8.67186356277399, "learning_rate": 3.6860991483250167e-06, "loss": 1.2104, "step": 5119 }, { "epoch": 0.7248531181425639, "grad_norm": 9.984238219182414, "learning_rate": 3.6855945363626504e-06, "loss": 1.2063, "step": 5120 }, { "epoch": 0.7249946910172012, "grad_norm": 11.920107329224615, "learning_rate": 3.685089862074463e-06, "loss": 1.5612, "step": 5121 }, { "epoch": 0.7251362638918383, "grad_norm": 9.639229300940677, "learning_rate": 3.684585125486985e-06, "loss": 1.3208, "step": 5122 }, { "epoch": 0.7252778367664755, "grad_norm": 11.766652744994605, "learning_rate": 3.684080326626751e-06, "loss": 1.153, "step": 5123 }, { "epoch": 0.7254194096411127, "grad_norm": 9.629451762564607, "learning_rate": 3.683575465520297e-06, "loss": 1.2977, "step": 5124 }, { "epoch": 0.72556098251575, "grad_norm": 8.08162626613411, "learning_rate": 3.6830705421941624e-06, "loss": 1.2736, "step": 5125 }, { "epoch": 0.7257025553903872, "grad_norm": 8.238129832801244, "learning_rate": 3.6825655566748927e-06, "loss": 1.2341, "step": 5126 }, { "epoch": 0.7258441282650244, "grad_norm": 9.32968213552852, "learning_rate": 3.6820605089890323e-06, "loss": 1.4364, "step": 5127 }, { "epoch": 0.7259857011396617, "grad_norm": 9.24666019665914, "learning_rate": 3.6815553991631323e-06, "loss": 1.2405, "step": 5128 }, { "epoch": 0.7261272740142989, "grad_norm": 12.25977044791406, "learning_rate": 3.681050227223747e-06, "loss": 1.2526, "step": 5129 }, { "epoch": 0.7262688468889361, "grad_norm": 10.39845474207342, "learning_rate": 3.6805449931974313e-06, "loss": 1.3095, "step": 5130 }, { "epoch": 0.7264104197635733, "grad_norm": 8.981352242861403, "learning_rate": 3.6800396971107456e-06, "loss": 1.2415, "step": 5131 }, { "epoch": 0.7265519926382105, "grad_norm": 9.19225826440444, "learning_rate": 3.6795343389902534e-06, "loss": 1.3882, "step": 5132 }, { "epoch": 0.7266935655128477, "grad_norm": 8.356703629089258, "learning_rate": 3.6790289188625196e-06, "loss": 1.0816, "step": 5133 }, { "epoch": 0.7268351383874849, "grad_norm": 9.05614337950522, "learning_rate": 3.678523436754115e-06, "loss": 1.1913, "step": 5134 }, { "epoch": 0.7269767112621222, "grad_norm": 9.848994138838494, "learning_rate": 3.678017892691612e-06, "loss": 1.2706, "step": 5135 }, { "epoch": 0.7271182841367594, "grad_norm": 11.712371310867532, "learning_rate": 3.677512286701587e-06, "loss": 1.3705, "step": 5136 }, { "epoch": 0.7272598570113966, "grad_norm": 10.811858229802482, "learning_rate": 3.677006618810619e-06, "loss": 1.3665, "step": 5137 }, { "epoch": 0.7274014298860338, "grad_norm": 10.016031004105134, "learning_rate": 3.676500889045291e-06, "loss": 1.2419, "step": 5138 }, { "epoch": 0.7275430027606711, "grad_norm": 8.983784372569948, "learning_rate": 3.6759950974321883e-06, "loss": 1.2226, "step": 5139 }, { "epoch": 0.7276845756353083, "grad_norm": 12.871581577439795, "learning_rate": 3.6754892439979e-06, "loss": 1.4036, "step": 5140 }, { "epoch": 0.7278261485099455, "grad_norm": 10.447809849526141, "learning_rate": 3.6749833287690183e-06, "loss": 1.3349, "step": 5141 }, { "epoch": 0.7279677213845828, "grad_norm": 10.822068054786103, "learning_rate": 3.6744773517721394e-06, "loss": 1.4621, "step": 5142 }, { "epoch": 0.7281092942592199, "grad_norm": 10.550093548256012, "learning_rate": 3.6739713130338617e-06, "loss": 1.1373, "step": 5143 }, { "epoch": 0.7282508671338571, "grad_norm": 10.37083222657552, "learning_rate": 3.673465212580788e-06, "loss": 1.3429, "step": 5144 }, { "epoch": 0.7283924400084943, "grad_norm": 10.45853520131423, "learning_rate": 3.672959050439523e-06, "loss": 1.2876, "step": 5145 }, { "epoch": 0.7285340128831316, "grad_norm": 7.2325695578022104, "learning_rate": 3.672452826636675e-06, "loss": 1.2238, "step": 5146 }, { "epoch": 0.7286755857577688, "grad_norm": 9.470589739151634, "learning_rate": 3.671946541198856e-06, "loss": 1.2213, "step": 5147 }, { "epoch": 0.728817158632406, "grad_norm": 10.43340618689048, "learning_rate": 3.671440194152681e-06, "loss": 1.239, "step": 5148 }, { "epoch": 0.7289587315070433, "grad_norm": 9.251342675989996, "learning_rate": 3.670933785524769e-06, "loss": 1.1187, "step": 5149 }, { "epoch": 0.7291003043816805, "grad_norm": 11.6569385478604, "learning_rate": 3.6704273153417407e-06, "loss": 1.2649, "step": 5150 }, { "epoch": 0.7292418772563177, "grad_norm": 9.851036257381164, "learning_rate": 3.669920783630221e-06, "loss": 1.2911, "step": 5151 }, { "epoch": 0.729383450130955, "grad_norm": 8.520755783014176, "learning_rate": 3.669414190416838e-06, "loss": 1.1824, "step": 5152 }, { "epoch": 0.7295250230055921, "grad_norm": 8.439489511536348, "learning_rate": 3.6689075357282235e-06, "loss": 1.3285, "step": 5153 }, { "epoch": 0.7296665958802293, "grad_norm": 14.098412260658446, "learning_rate": 3.668400819591011e-06, "loss": 1.3154, "step": 5154 }, { "epoch": 0.7298081687548665, "grad_norm": 13.188097031805327, "learning_rate": 3.6678940420318385e-06, "loss": 1.3574, "step": 5155 }, { "epoch": 0.7299497416295038, "grad_norm": 11.443517884046846, "learning_rate": 3.6673872030773473e-06, "loss": 1.3242, "step": 5156 }, { "epoch": 0.730091314504141, "grad_norm": 8.977181648169891, "learning_rate": 3.666880302754181e-06, "loss": 1.1358, "step": 5157 }, { "epoch": 0.7302328873787782, "grad_norm": 10.533142100984456, "learning_rate": 3.6663733410889875e-06, "loss": 1.2687, "step": 5158 }, { "epoch": 0.7303744602534155, "grad_norm": 11.251551881352613, "learning_rate": 3.665866318108417e-06, "loss": 1.2583, "step": 5159 }, { "epoch": 0.7305160331280527, "grad_norm": 13.205848115076765, "learning_rate": 3.665359233839124e-06, "loss": 1.2862, "step": 5160 }, { "epoch": 0.7306576060026899, "grad_norm": 10.22570675969889, "learning_rate": 3.6648520883077644e-06, "loss": 1.2053, "step": 5161 }, { "epoch": 0.7307991788773271, "grad_norm": 8.696329364115625, "learning_rate": 3.6643448815409994e-06, "loss": 1.4039, "step": 5162 }, { "epoch": 0.7309407517519643, "grad_norm": 7.9356937305664434, "learning_rate": 3.663837613565492e-06, "loss": 1.0407, "step": 5163 }, { "epoch": 0.7310823246266015, "grad_norm": 13.382554817279846, "learning_rate": 3.663330284407908e-06, "loss": 1.3257, "step": 5164 }, { "epoch": 0.7312238975012387, "grad_norm": 10.880869016162036, "learning_rate": 3.6628228940949195e-06, "loss": 1.169, "step": 5165 }, { "epoch": 0.731365470375876, "grad_norm": 7.946631278059502, "learning_rate": 3.662315442653199e-06, "loss": 1.2242, "step": 5166 }, { "epoch": 0.7315070432505132, "grad_norm": 11.988402166219794, "learning_rate": 3.661807930109422e-06, "loss": 1.4152, "step": 5167 }, { "epoch": 0.7316486161251504, "grad_norm": 7.152900811611465, "learning_rate": 3.6613003564902678e-06, "loss": 1.2662, "step": 5168 }, { "epoch": 0.7317901889997876, "grad_norm": 9.56794214906076, "learning_rate": 3.66079272182242e-06, "loss": 1.3481, "step": 5169 }, { "epoch": 0.7319317618744249, "grad_norm": 13.643073457587942, "learning_rate": 3.6602850261325645e-06, "loss": 1.3628, "step": 5170 }, { "epoch": 0.7320733347490621, "grad_norm": 11.209311592682038, "learning_rate": 3.6597772694473902e-06, "loss": 1.3356, "step": 5171 }, { "epoch": 0.7322149076236993, "grad_norm": 10.96889710939011, "learning_rate": 3.6592694517935895e-06, "loss": 1.3145, "step": 5172 }, { "epoch": 0.7323564804983366, "grad_norm": 8.02562329495105, "learning_rate": 3.6587615731978583e-06, "loss": 1.173, "step": 5173 }, { "epoch": 0.7324980533729737, "grad_norm": 8.242029212046422, "learning_rate": 3.658253633686895e-06, "loss": 1.2253, "step": 5174 }, { "epoch": 0.7326396262476109, "grad_norm": 9.624548963407243, "learning_rate": 3.6577456332874025e-06, "loss": 1.2072, "step": 5175 }, { "epoch": 0.7327811991222482, "grad_norm": 11.886017196065142, "learning_rate": 3.657237572026085e-06, "loss": 1.2405, "step": 5176 }, { "epoch": 0.7329227719968854, "grad_norm": 11.947720770766738, "learning_rate": 3.656729449929651e-06, "loss": 1.4367, "step": 5177 }, { "epoch": 0.7330643448715226, "grad_norm": 9.6891135563987, "learning_rate": 3.656221267024812e-06, "loss": 1.2576, "step": 5178 }, { "epoch": 0.7332059177461598, "grad_norm": 9.565747875083682, "learning_rate": 3.6557130233382833e-06, "loss": 1.391, "step": 5179 }, { "epoch": 0.7333474906207971, "grad_norm": 8.39992265211552, "learning_rate": 3.6552047188967827e-06, "loss": 1.2423, "step": 5180 }, { "epoch": 0.7334890634954343, "grad_norm": 12.055189378121888, "learning_rate": 3.6546963537270314e-06, "loss": 1.1793, "step": 5181 }, { "epoch": 0.7336306363700715, "grad_norm": 10.032347812177951, "learning_rate": 3.654187927855754e-06, "loss": 1.307, "step": 5182 }, { "epoch": 0.7337722092447088, "grad_norm": 10.115008663790391, "learning_rate": 3.6536794413096775e-06, "loss": 1.1515, "step": 5183 }, { "epoch": 0.7339137821193459, "grad_norm": 7.275401635897708, "learning_rate": 3.6531708941155337e-06, "loss": 1.1348, "step": 5184 }, { "epoch": 0.7340553549939831, "grad_norm": 8.263025808989074, "learning_rate": 3.652662286300055e-06, "loss": 1.1498, "step": 5185 }, { "epoch": 0.7341969278686203, "grad_norm": 9.488657554806307, "learning_rate": 3.6521536178899798e-06, "loss": 1.159, "step": 5186 }, { "epoch": 0.7343385007432576, "grad_norm": 10.357143248008382, "learning_rate": 3.6516448889120475e-06, "loss": 1.2947, "step": 5187 }, { "epoch": 0.7344800736178948, "grad_norm": 7.931221468778715, "learning_rate": 3.651136099393003e-06, "loss": 1.3574, "step": 5188 }, { "epoch": 0.734621646492532, "grad_norm": 9.464668829636222, "learning_rate": 3.650627249359591e-06, "loss": 1.3439, "step": 5189 }, { "epoch": 0.7347632193671693, "grad_norm": 9.078569568772332, "learning_rate": 3.650118338838563e-06, "loss": 1.2525, "step": 5190 }, { "epoch": 0.7349047922418065, "grad_norm": 8.137622594422638, "learning_rate": 3.6496093678566713e-06, "loss": 1.286, "step": 5191 }, { "epoch": 0.7350463651164437, "grad_norm": 10.942569625873524, "learning_rate": 3.649100336440673e-06, "loss": 1.2613, "step": 5192 }, { "epoch": 0.735187937991081, "grad_norm": 7.51561941674039, "learning_rate": 3.648591244617326e-06, "loss": 1.2604, "step": 5193 }, { "epoch": 0.7353295108657182, "grad_norm": 10.150951141741903, "learning_rate": 3.648082092413394e-06, "loss": 1.3052, "step": 5194 }, { "epoch": 0.7354710837403553, "grad_norm": 10.146964279735998, "learning_rate": 3.6475728798556426e-06, "loss": 1.314, "step": 5195 }, { "epoch": 0.7356126566149925, "grad_norm": 9.023857285284148, "learning_rate": 3.6470636069708405e-06, "loss": 1.277, "step": 5196 }, { "epoch": 0.7357542294896298, "grad_norm": 9.044065892542598, "learning_rate": 3.6465542737857603e-06, "loss": 1.1713, "step": 5197 }, { "epoch": 0.735895802364267, "grad_norm": 9.533871949814973, "learning_rate": 3.646044880327176e-06, "loss": 1.2165, "step": 5198 }, { "epoch": 0.7360373752389042, "grad_norm": 10.233152360175358, "learning_rate": 3.6455354266218675e-06, "loss": 1.2245, "step": 5199 }, { "epoch": 0.7361789481135415, "grad_norm": 9.382753548290033, "learning_rate": 3.645025912696615e-06, "loss": 1.3147, "step": 5200 }, { "epoch": 0.7363205209881787, "grad_norm": 7.402912917184953, "learning_rate": 3.644516338578204e-06, "loss": 1.3148, "step": 5201 }, { "epoch": 0.7364620938628159, "grad_norm": 8.865087857141843, "learning_rate": 3.644006704293423e-06, "loss": 1.3511, "step": 5202 }, { "epoch": 0.7366036667374531, "grad_norm": 8.840681318721947, "learning_rate": 3.643497009869063e-06, "loss": 1.2473, "step": 5203 }, { "epoch": 0.7367452396120904, "grad_norm": 9.007718061761024, "learning_rate": 3.642987255331917e-06, "loss": 1.2677, "step": 5204 }, { "epoch": 0.7368868124867275, "grad_norm": 11.260637806365692, "learning_rate": 3.642477440708784e-06, "loss": 1.4095, "step": 5205 }, { "epoch": 0.7370283853613647, "grad_norm": 11.671754726622622, "learning_rate": 3.641967566026463e-06, "loss": 1.2258, "step": 5206 }, { "epoch": 0.737169958236002, "grad_norm": 9.308226558536221, "learning_rate": 3.641457631311759e-06, "loss": 1.2104, "step": 5207 }, { "epoch": 0.7373115311106392, "grad_norm": 9.40385411568065, "learning_rate": 3.6409476365914786e-06, "loss": 1.2737, "step": 5208 }, { "epoch": 0.7374531039852764, "grad_norm": 9.16649192874707, "learning_rate": 3.6404375818924315e-06, "loss": 1.1916, "step": 5209 }, { "epoch": 0.7375946768599136, "grad_norm": 11.417330608877974, "learning_rate": 3.639927467241431e-06, "loss": 1.1516, "step": 5210 }, { "epoch": 0.7377362497345509, "grad_norm": 11.48392125485434, "learning_rate": 3.639417292665293e-06, "loss": 1.2893, "step": 5211 }, { "epoch": 0.7378778226091881, "grad_norm": 9.155499215966035, "learning_rate": 3.638907058190838e-06, "loss": 1.3363, "step": 5212 }, { "epoch": 0.7380193954838253, "grad_norm": 11.557946870627923, "learning_rate": 3.638396763844889e-06, "loss": 1.3463, "step": 5213 }, { "epoch": 0.7381609683584626, "grad_norm": 9.347628499749867, "learning_rate": 3.63788640965427e-06, "loss": 1.1756, "step": 5214 }, { "epoch": 0.7383025412330997, "grad_norm": 10.072452527897951, "learning_rate": 3.637375995645811e-06, "loss": 1.2819, "step": 5215 }, { "epoch": 0.7384441141077369, "grad_norm": 10.296166634397544, "learning_rate": 3.6368655218463435e-06, "loss": 1.3802, "step": 5216 }, { "epoch": 0.7385856869823741, "grad_norm": 8.96127039350056, "learning_rate": 3.636354988282704e-06, "loss": 1.1792, "step": 5217 }, { "epoch": 0.7387272598570114, "grad_norm": 9.653528910318924, "learning_rate": 3.635844394981729e-06, "loss": 1.1588, "step": 5218 }, { "epoch": 0.7388688327316486, "grad_norm": 7.880949164993448, "learning_rate": 3.6353337419702627e-06, "loss": 1.1791, "step": 5219 }, { "epoch": 0.7390104056062858, "grad_norm": 9.767389293753098, "learning_rate": 3.6348230292751476e-06, "loss": 1.1924, "step": 5220 }, { "epoch": 0.7391519784809231, "grad_norm": 10.268147476545984, "learning_rate": 3.6343122569232313e-06, "loss": 1.2072, "step": 5221 }, { "epoch": 0.7392935513555603, "grad_norm": 7.454385774767277, "learning_rate": 3.6338014249413657e-06, "loss": 1.0635, "step": 5222 }, { "epoch": 0.7394351242301975, "grad_norm": 9.895154256442344, "learning_rate": 3.6332905333564046e-06, "loss": 1.2943, "step": 5223 }, { "epoch": 0.7395766971048348, "grad_norm": 9.344166009630145, "learning_rate": 3.632779582195205e-06, "loss": 1.371, "step": 5224 }, { "epoch": 0.739718269979472, "grad_norm": 10.983969712139917, "learning_rate": 3.6322685714846277e-06, "loss": 1.3387, "step": 5225 }, { "epoch": 0.7398598428541091, "grad_norm": 7.9552967875975416, "learning_rate": 3.631757501251536e-06, "loss": 1.2348, "step": 5226 }, { "epoch": 0.7400014157287463, "grad_norm": 10.672934796503263, "learning_rate": 3.631246371522796e-06, "loss": 1.3486, "step": 5227 }, { "epoch": 0.7401429886033836, "grad_norm": 7.750074570820043, "learning_rate": 3.6307351823252778e-06, "loss": 1.3397, "step": 5228 }, { "epoch": 0.7402845614780208, "grad_norm": 8.244614201409192, "learning_rate": 3.6302239336858547e-06, "loss": 1.1519, "step": 5229 }, { "epoch": 0.740426134352658, "grad_norm": 7.881839022909351, "learning_rate": 3.6297126256314013e-06, "loss": 1.2336, "step": 5230 }, { "epoch": 0.7405677072272953, "grad_norm": 7.101234017172676, "learning_rate": 3.629201258188798e-06, "loss": 1.1502, "step": 5231 }, { "epoch": 0.7407092801019325, "grad_norm": 7.568237902250983, "learning_rate": 3.6286898313849267e-06, "loss": 1.3364, "step": 5232 }, { "epoch": 0.7408508529765697, "grad_norm": 7.549710390713181, "learning_rate": 3.6281783452466725e-06, "loss": 1.1825, "step": 5233 }, { "epoch": 0.740992425851207, "grad_norm": 7.560515844921469, "learning_rate": 3.6276667998009242e-06, "loss": 1.234, "step": 5234 }, { "epoch": 0.7411339987258442, "grad_norm": 8.701372446131074, "learning_rate": 3.627155195074572e-06, "loss": 1.2128, "step": 5235 }, { "epoch": 0.7412755716004813, "grad_norm": 7.467335842436728, "learning_rate": 3.6266435310945125e-06, "loss": 1.1976, "step": 5236 }, { "epoch": 0.7414171444751185, "grad_norm": 9.152944339155766, "learning_rate": 3.6261318078876416e-06, "loss": 1.2441, "step": 5237 }, { "epoch": 0.7415587173497558, "grad_norm": 10.944610899957333, "learning_rate": 3.625620025480862e-06, "loss": 1.3752, "step": 5238 }, { "epoch": 0.741700290224393, "grad_norm": 7.505441344802678, "learning_rate": 3.625108183901077e-06, "loss": 1.302, "step": 5239 }, { "epoch": 0.7418418630990302, "grad_norm": 10.305213527253336, "learning_rate": 3.624596283175194e-06, "loss": 1.3533, "step": 5240 }, { "epoch": 0.7419834359736674, "grad_norm": 8.32104420198139, "learning_rate": 3.6240843233301228e-06, "loss": 1.2248, "step": 5241 }, { "epoch": 0.7421250088483047, "grad_norm": 11.633731967104188, "learning_rate": 3.623572304392776e-06, "loss": 1.2812, "step": 5242 }, { "epoch": 0.7422665817229419, "grad_norm": 8.398006893030669, "learning_rate": 3.6230602263900714e-06, "loss": 1.376, "step": 5243 }, { "epoch": 0.7424081545975791, "grad_norm": 8.585537180015796, "learning_rate": 3.6225480893489283e-06, "loss": 1.1023, "step": 5244 }, { "epoch": 0.7425497274722164, "grad_norm": 10.077017032725113, "learning_rate": 3.6220358932962696e-06, "loss": 1.2636, "step": 5245 }, { "epoch": 0.7426913003468535, "grad_norm": 8.6280246904256, "learning_rate": 3.6215236382590197e-06, "loss": 1.4506, "step": 5246 }, { "epoch": 0.7428328732214907, "grad_norm": 9.937228984855937, "learning_rate": 3.621011324264109e-06, "loss": 1.2891, "step": 5247 }, { "epoch": 0.742974446096128, "grad_norm": 9.42783532840345, "learning_rate": 3.620498951338468e-06, "loss": 1.273, "step": 5248 }, { "epoch": 0.7431160189707652, "grad_norm": 8.656916733827684, "learning_rate": 3.6199865195090333e-06, "loss": 1.1711, "step": 5249 }, { "epoch": 0.7432575918454024, "grad_norm": 11.235246266415, "learning_rate": 3.619474028802743e-06, "loss": 1.2557, "step": 5250 }, { "epoch": 0.7433991647200396, "grad_norm": 9.600454367375463, "learning_rate": 3.618961479246537e-06, "loss": 1.2663, "step": 5251 }, { "epoch": 0.7435407375946769, "grad_norm": 9.765776561323893, "learning_rate": 3.6184488708673605e-06, "loss": 1.2687, "step": 5252 }, { "epoch": 0.7436823104693141, "grad_norm": 12.398651640400258, "learning_rate": 3.61793620369216e-06, "loss": 1.3407, "step": 5253 }, { "epoch": 0.7438238833439513, "grad_norm": 13.636893304883964, "learning_rate": 3.617423477747888e-06, "loss": 1.3015, "step": 5254 }, { "epoch": 0.7439654562185886, "grad_norm": 11.255447425168075, "learning_rate": 3.616910693061496e-06, "loss": 1.2419, "step": 5255 }, { "epoch": 0.7441070290932258, "grad_norm": 8.31845361423784, "learning_rate": 3.6163978496599428e-06, "loss": 1.3042, "step": 5256 }, { "epoch": 0.7442486019678629, "grad_norm": 10.546068762587073, "learning_rate": 3.6158849475701863e-06, "loss": 1.1896, "step": 5257 }, { "epoch": 0.7443901748425001, "grad_norm": 10.675714073151251, "learning_rate": 3.6153719868191905e-06, "loss": 1.3167, "step": 5258 }, { "epoch": 0.7445317477171374, "grad_norm": 10.373663391053274, "learning_rate": 3.614858967433921e-06, "loss": 1.2217, "step": 5259 }, { "epoch": 0.7446733205917746, "grad_norm": 8.450029127082058, "learning_rate": 3.6143458894413463e-06, "loss": 1.3335, "step": 5260 }, { "epoch": 0.7448148934664118, "grad_norm": 6.388803833473635, "learning_rate": 3.613832752868439e-06, "loss": 1.1314, "step": 5261 }, { "epoch": 0.7449564663410491, "grad_norm": 8.252981860801983, "learning_rate": 3.613319557742175e-06, "loss": 1.3585, "step": 5262 }, { "epoch": 0.7450980392156863, "grad_norm": 6.856671402255315, "learning_rate": 3.6128063040895318e-06, "loss": 1.2872, "step": 5263 }, { "epoch": 0.7452396120903235, "grad_norm": 8.472556296955371, "learning_rate": 3.612292991937491e-06, "loss": 1.3495, "step": 5264 }, { "epoch": 0.7453811849649608, "grad_norm": 9.880284694013914, "learning_rate": 3.6117796213130367e-06, "loss": 1.2844, "step": 5265 }, { "epoch": 0.745522757839598, "grad_norm": 7.931238302788852, "learning_rate": 3.6112661922431576e-06, "loss": 1.2506, "step": 5266 }, { "epoch": 0.7456643307142351, "grad_norm": 7.967856921845544, "learning_rate": 3.610752704754842e-06, "loss": 1.3228, "step": 5267 }, { "epoch": 0.7458059035888723, "grad_norm": 8.608071784163315, "learning_rate": 3.610239158875085e-06, "loss": 1.191, "step": 5268 }, { "epoch": 0.7459474764635096, "grad_norm": 8.973574095278705, "learning_rate": 3.609725554630884e-06, "loss": 1.3282, "step": 5269 }, { "epoch": 0.7460890493381468, "grad_norm": 9.432502282226027, "learning_rate": 3.609211892049238e-06, "loss": 1.2991, "step": 5270 }, { "epoch": 0.746230622212784, "grad_norm": 8.022942071790416, "learning_rate": 3.60869817115715e-06, "loss": 1.1099, "step": 5271 }, { "epoch": 0.7463721950874213, "grad_norm": 8.421975101590862, "learning_rate": 3.6081843919816263e-06, "loss": 1.2083, "step": 5272 }, { "epoch": 0.7465137679620585, "grad_norm": 10.732731633666823, "learning_rate": 3.6076705545496743e-06, "loss": 1.2363, "step": 5273 }, { "epoch": 0.7466553408366957, "grad_norm": 7.49359238612331, "learning_rate": 3.6071566588883077e-06, "loss": 1.1887, "step": 5274 }, { "epoch": 0.7467969137113329, "grad_norm": 11.894291997799039, "learning_rate": 3.606642705024541e-06, "loss": 1.1765, "step": 5275 }, { "epoch": 0.7469384865859702, "grad_norm": 10.302552760543819, "learning_rate": 3.6061286929853915e-06, "loss": 1.4551, "step": 5276 }, { "epoch": 0.7470800594606073, "grad_norm": 10.404042602663031, "learning_rate": 3.6056146227978827e-06, "loss": 1.4332, "step": 5277 }, { "epoch": 0.7472216323352445, "grad_norm": 10.008923935649095, "learning_rate": 3.6051004944890373e-06, "loss": 1.3871, "step": 5278 }, { "epoch": 0.7473632052098818, "grad_norm": 9.475782344999525, "learning_rate": 3.6045863080858824e-06, "loss": 1.1792, "step": 5279 }, { "epoch": 0.747504778084519, "grad_norm": 11.504719968037847, "learning_rate": 3.604072063615449e-06, "loss": 1.2039, "step": 5280 }, { "epoch": 0.7476463509591562, "grad_norm": 9.388876449105211, "learning_rate": 3.6035577611047713e-06, "loss": 1.1879, "step": 5281 }, { "epoch": 0.7477879238337934, "grad_norm": 8.516453245988806, "learning_rate": 3.603043400580884e-06, "loss": 1.1095, "step": 5282 }, { "epoch": 0.7479294967084307, "grad_norm": 8.259742302003605, "learning_rate": 3.6025289820708277e-06, "loss": 1.1363, "step": 5283 }, { "epoch": 0.7480710695830679, "grad_norm": 10.252895783545359, "learning_rate": 3.6020145056016454e-06, "loss": 1.1861, "step": 5284 }, { "epoch": 0.7482126424577051, "grad_norm": 8.41947401709925, "learning_rate": 3.601499971200382e-06, "loss": 1.2405, "step": 5285 }, { "epoch": 0.7483542153323424, "grad_norm": 8.139128153831477, "learning_rate": 3.600985378894086e-06, "loss": 1.2881, "step": 5286 }, { "epoch": 0.7484957882069796, "grad_norm": 7.33889155279139, "learning_rate": 3.6004707287098104e-06, "loss": 1.1257, "step": 5287 }, { "epoch": 0.7486373610816167, "grad_norm": 7.309652442445243, "learning_rate": 3.5999560206746088e-06, "loss": 1.1952, "step": 5288 }, { "epoch": 0.748778933956254, "grad_norm": 9.943298567445673, "learning_rate": 3.5994412548155387e-06, "loss": 1.2129, "step": 5289 }, { "epoch": 0.7489205068308912, "grad_norm": 8.876568722768898, "learning_rate": 3.5989264311596617e-06, "loss": 1.1777, "step": 5290 }, { "epoch": 0.7490620797055284, "grad_norm": 10.343759219808014, "learning_rate": 3.598411549734042e-06, "loss": 1.3628, "step": 5291 }, { "epoch": 0.7492036525801656, "grad_norm": 7.60362098164228, "learning_rate": 3.5978966105657465e-06, "loss": 1.266, "step": 5292 }, { "epoch": 0.7493452254548029, "grad_norm": 7.800356773628911, "learning_rate": 3.597381613681845e-06, "loss": 1.3083, "step": 5293 }, { "epoch": 0.7494867983294401, "grad_norm": 9.358927873684262, "learning_rate": 3.5968665591094097e-06, "loss": 1.3291, "step": 5294 }, { "epoch": 0.7496283712040773, "grad_norm": 8.452653402828005, "learning_rate": 3.5963514468755172e-06, "loss": 1.2212, "step": 5295 }, { "epoch": 0.7497699440787146, "grad_norm": 8.017151090561205, "learning_rate": 3.5958362770072465e-06, "loss": 1.276, "step": 5296 }, { "epoch": 0.7499115169533518, "grad_norm": 7.80914869526292, "learning_rate": 3.59532104953168e-06, "loss": 1.2712, "step": 5297 }, { "epoch": 0.7500530898279889, "grad_norm": 7.932903009919819, "learning_rate": 3.5948057644759025e-06, "loss": 1.2068, "step": 5298 }, { "epoch": 0.7501946627026261, "grad_norm": 9.928133791210303, "learning_rate": 3.5942904218670025e-06, "loss": 1.2994, "step": 5299 }, { "epoch": 0.7503362355772634, "grad_norm": 8.069404898141885, "learning_rate": 3.5937750217320712e-06, "loss": 1.3877, "step": 5300 }, { "epoch": 0.7504778084519006, "grad_norm": 8.728716441874845, "learning_rate": 3.5932595640982023e-06, "loss": 1.2207, "step": 5301 }, { "epoch": 0.7506193813265378, "grad_norm": 7.979224647198435, "learning_rate": 3.592744048992493e-06, "loss": 1.2443, "step": 5302 }, { "epoch": 0.7507609542011751, "grad_norm": 9.121182413915955, "learning_rate": 3.5922284764420445e-06, "loss": 1.3691, "step": 5303 }, { "epoch": 0.7509025270758123, "grad_norm": 9.537829120201927, "learning_rate": 3.5917128464739586e-06, "loss": 1.2502, "step": 5304 }, { "epoch": 0.7510440999504495, "grad_norm": 8.272773521515093, "learning_rate": 3.5911971591153426e-06, "loss": 1.258, "step": 5305 }, { "epoch": 0.7511856728250867, "grad_norm": 9.101662251331055, "learning_rate": 3.590681414393306e-06, "loss": 1.256, "step": 5306 }, { "epoch": 0.751327245699724, "grad_norm": 7.359632094766185, "learning_rate": 3.5901656123349606e-06, "loss": 1.2355, "step": 5307 }, { "epoch": 0.7514688185743611, "grad_norm": 10.783641251457539, "learning_rate": 3.5896497529674213e-06, "loss": 1.2127, "step": 5308 }, { "epoch": 0.7516103914489983, "grad_norm": 8.58459695446475, "learning_rate": 3.589133836317808e-06, "loss": 1.2717, "step": 5309 }, { "epoch": 0.7517519643236356, "grad_norm": 9.141900702244968, "learning_rate": 3.5886178624132407e-06, "loss": 1.2816, "step": 5310 }, { "epoch": 0.7518935371982728, "grad_norm": 8.704105890359212, "learning_rate": 3.5881018312808435e-06, "loss": 1.3212, "step": 5311 }, { "epoch": 0.75203511007291, "grad_norm": 9.366763145882077, "learning_rate": 3.5875857429477447e-06, "loss": 1.2922, "step": 5312 }, { "epoch": 0.7521766829475472, "grad_norm": 10.042524710015382, "learning_rate": 3.5870695974410734e-06, "loss": 1.3256, "step": 5313 }, { "epoch": 0.7523182558221845, "grad_norm": 11.201364120789213, "learning_rate": 3.586553394787965e-06, "loss": 1.2475, "step": 5314 }, { "epoch": 0.7524598286968217, "grad_norm": 9.741371053273692, "learning_rate": 3.5860371350155547e-06, "loss": 1.4311, "step": 5315 }, { "epoch": 0.7526014015714589, "grad_norm": 8.629194041573577, "learning_rate": 3.5855208181509817e-06, "loss": 1.1594, "step": 5316 }, { "epoch": 0.7527429744460962, "grad_norm": 10.454647392409242, "learning_rate": 3.585004444221389e-06, "loss": 1.3115, "step": 5317 }, { "epoch": 0.7528845473207334, "grad_norm": 10.059580124573532, "learning_rate": 3.584488013253921e-06, "loss": 1.2082, "step": 5318 }, { "epoch": 0.7530261201953705, "grad_norm": 9.382911700662108, "learning_rate": 3.5839715252757273e-06, "loss": 1.2735, "step": 5319 }, { "epoch": 0.7531676930700077, "grad_norm": 8.450132055210041, "learning_rate": 3.583454980313959e-06, "loss": 1.158, "step": 5320 }, { "epoch": 0.753309265944645, "grad_norm": 8.475178998559098, "learning_rate": 3.58293837839577e-06, "loss": 1.1053, "step": 5321 }, { "epoch": 0.7534508388192822, "grad_norm": 8.929461512204934, "learning_rate": 3.5824217195483178e-06, "loss": 1.1672, "step": 5322 }, { "epoch": 0.7535924116939194, "grad_norm": 9.61319672796549, "learning_rate": 3.581905003798763e-06, "loss": 1.1835, "step": 5323 }, { "epoch": 0.7537339845685567, "grad_norm": 7.649149938489514, "learning_rate": 3.581388231174269e-06, "loss": 1.2791, "step": 5324 }, { "epoch": 0.7538755574431939, "grad_norm": 9.523851668854968, "learning_rate": 3.580871401702002e-06, "loss": 1.3424, "step": 5325 }, { "epoch": 0.7540171303178311, "grad_norm": 9.407599976449601, "learning_rate": 3.5803545154091312e-06, "loss": 1.2421, "step": 5326 }, { "epoch": 0.7541587031924684, "grad_norm": 8.847290624343792, "learning_rate": 3.5798375723228283e-06, "loss": 1.3853, "step": 5327 }, { "epoch": 0.7543002760671056, "grad_norm": 9.164333485843152, "learning_rate": 3.57932057247027e-06, "loss": 1.2116, "step": 5328 }, { "epoch": 0.7544418489417427, "grad_norm": 8.626470025880915, "learning_rate": 3.5788035158786346e-06, "loss": 1.2988, "step": 5329 }, { "epoch": 0.7545834218163799, "grad_norm": 9.651666337267233, "learning_rate": 3.5782864025751025e-06, "loss": 1.2906, "step": 5330 }, { "epoch": 0.7547249946910172, "grad_norm": 8.668026841851304, "learning_rate": 3.577769232586858e-06, "loss": 1.2244, "step": 5331 }, { "epoch": 0.7548665675656544, "grad_norm": 7.459332134633462, "learning_rate": 3.5772520059410887e-06, "loss": 1.1949, "step": 5332 }, { "epoch": 0.7550081404402916, "grad_norm": 9.00175077575516, "learning_rate": 3.576734722664984e-06, "loss": 1.2193, "step": 5333 }, { "epoch": 0.7551497133149289, "grad_norm": 12.356309127087288, "learning_rate": 3.576217382785738e-06, "loss": 1.2756, "step": 5334 }, { "epoch": 0.7552912861895661, "grad_norm": 10.230640271567118, "learning_rate": 3.5756999863305475e-06, "loss": 1.3386, "step": 5335 }, { "epoch": 0.7554328590642033, "grad_norm": 8.503283147101781, "learning_rate": 3.57518253332661e-06, "loss": 1.2507, "step": 5336 }, { "epoch": 0.7555744319388406, "grad_norm": 8.088929850901941, "learning_rate": 3.574665023801129e-06, "loss": 1.2718, "step": 5337 }, { "epoch": 0.7557160048134778, "grad_norm": 10.92199933986805, "learning_rate": 3.5741474577813086e-06, "loss": 1.4943, "step": 5338 }, { "epoch": 0.755857577688115, "grad_norm": 8.557361703881325, "learning_rate": 3.573629835294357e-06, "loss": 1.1788, "step": 5339 }, { "epoch": 0.7559991505627521, "grad_norm": 9.624587409353866, "learning_rate": 3.5731121563674863e-06, "loss": 1.4047, "step": 5340 }, { "epoch": 0.7561407234373894, "grad_norm": 9.233238498829625, "learning_rate": 3.572594421027909e-06, "loss": 1.2387, "step": 5341 }, { "epoch": 0.7562822963120266, "grad_norm": 9.328867615959279, "learning_rate": 3.572076629302843e-06, "loss": 1.2711, "step": 5342 }, { "epoch": 0.7564238691866638, "grad_norm": 13.469992171004186, "learning_rate": 3.571558781219508e-06, "loss": 1.1601, "step": 5343 }, { "epoch": 0.756565442061301, "grad_norm": 10.2317122162172, "learning_rate": 3.5710408768051262e-06, "loss": 1.3433, "step": 5344 }, { "epoch": 0.7567070149359383, "grad_norm": 10.51368656968832, "learning_rate": 3.5705229160869247e-06, "loss": 1.2007, "step": 5345 }, { "epoch": 0.7568485878105755, "grad_norm": 10.6477020076838, "learning_rate": 3.570004899092133e-06, "loss": 1.2715, "step": 5346 }, { "epoch": 0.7569901606852127, "grad_norm": 7.604044901948542, "learning_rate": 3.5694868258479798e-06, "loss": 1.2446, "step": 5347 }, { "epoch": 0.75713173355985, "grad_norm": 10.246955605550797, "learning_rate": 3.5689686963817023e-06, "loss": 1.2238, "step": 5348 }, { "epoch": 0.7572733064344872, "grad_norm": 11.310056981112679, "learning_rate": 3.568450510720537e-06, "loss": 1.2834, "step": 5349 }, { "epoch": 0.7574148793091243, "grad_norm": 8.219199280733267, "learning_rate": 3.567932268891725e-06, "loss": 1.1608, "step": 5350 }, { "epoch": 0.7575564521837616, "grad_norm": 7.860543924395659, "learning_rate": 3.5674139709225104e-06, "loss": 1.1325, "step": 5351 }, { "epoch": 0.7576980250583988, "grad_norm": 8.352059901727884, "learning_rate": 3.5668956168401392e-06, "loss": 1.1746, "step": 5352 }, { "epoch": 0.757839597933036, "grad_norm": 10.535924943568263, "learning_rate": 3.5663772066718606e-06, "loss": 1.1046, "step": 5353 }, { "epoch": 0.7579811708076732, "grad_norm": 8.203800195426775, "learning_rate": 3.565858740444927e-06, "loss": 1.4213, "step": 5354 }, { "epoch": 0.7581227436823105, "grad_norm": 10.349988824958112, "learning_rate": 3.5653402181865954e-06, "loss": 1.3312, "step": 5355 }, { "epoch": 0.7582643165569477, "grad_norm": 8.670470747789953, "learning_rate": 3.564821639924122e-06, "loss": 1.1981, "step": 5356 }, { "epoch": 0.7584058894315849, "grad_norm": 7.199453746418972, "learning_rate": 3.5643030056847695e-06, "loss": 1.3764, "step": 5357 }, { "epoch": 0.7585474623062222, "grad_norm": 8.10137368316011, "learning_rate": 3.5637843154958006e-06, "loss": 1.1499, "step": 5358 }, { "epoch": 0.7586890351808594, "grad_norm": 9.54389250223609, "learning_rate": 3.563265569384484e-06, "loss": 1.4019, "step": 5359 }, { "epoch": 0.7588306080554965, "grad_norm": 9.521531850843177, "learning_rate": 3.56274676737809e-06, "loss": 1.3365, "step": 5360 }, { "epoch": 0.7589721809301337, "grad_norm": 9.950297626280458, "learning_rate": 3.5622279095038896e-06, "loss": 1.3648, "step": 5361 }, { "epoch": 0.759113753804771, "grad_norm": 9.067591848105435, "learning_rate": 3.5617089957891614e-06, "loss": 1.2903, "step": 5362 }, { "epoch": 0.7592553266794082, "grad_norm": 9.284668983060193, "learning_rate": 3.561190026261182e-06, "loss": 1.2457, "step": 5363 }, { "epoch": 0.7593968995540454, "grad_norm": 7.642858136002562, "learning_rate": 3.5606710009472335e-06, "loss": 1.3543, "step": 5364 }, { "epoch": 0.7595384724286827, "grad_norm": 9.692873934739175, "learning_rate": 3.560151919874602e-06, "loss": 1.2858, "step": 5365 }, { "epoch": 0.7596800453033199, "grad_norm": 9.272634495561725, "learning_rate": 3.5596327830705746e-06, "loss": 1.3427, "step": 5366 }, { "epoch": 0.7598216181779571, "grad_norm": 8.203252417165192, "learning_rate": 3.559113590562443e-06, "loss": 1.215, "step": 5367 }, { "epoch": 0.7599631910525944, "grad_norm": 9.338349992295925, "learning_rate": 3.558594342377498e-06, "loss": 1.3898, "step": 5368 }, { "epoch": 0.7601047639272316, "grad_norm": 10.978270830140868, "learning_rate": 3.5580750385430385e-06, "loss": 1.3058, "step": 5369 }, { "epoch": 0.7602463368018688, "grad_norm": 9.009701056903365, "learning_rate": 3.557555679086363e-06, "loss": 1.2145, "step": 5370 }, { "epoch": 0.7603879096765059, "grad_norm": 9.228056164157632, "learning_rate": 3.5570362640347743e-06, "loss": 1.2172, "step": 5371 }, { "epoch": 0.7605294825511432, "grad_norm": 9.74740370392471, "learning_rate": 3.556516793415577e-06, "loss": 1.2049, "step": 5372 }, { "epoch": 0.7606710554257804, "grad_norm": 8.704978430478599, "learning_rate": 3.5559972672560795e-06, "loss": 1.2718, "step": 5373 }, { "epoch": 0.7608126283004176, "grad_norm": 15.921987126997234, "learning_rate": 3.5554776855835934e-06, "loss": 1.1393, "step": 5374 }, { "epoch": 0.7609542011750549, "grad_norm": 8.619904270569892, "learning_rate": 3.554958048425432e-06, "loss": 1.2907, "step": 5375 }, { "epoch": 0.7610957740496921, "grad_norm": 8.557927825245395, "learning_rate": 3.5544383558089128e-06, "loss": 1.3369, "step": 5376 }, { "epoch": 0.7612373469243293, "grad_norm": 9.796514817815751, "learning_rate": 3.5539186077613562e-06, "loss": 1.281, "step": 5377 }, { "epoch": 0.7613789197989665, "grad_norm": 10.80376138531395, "learning_rate": 3.553398804310083e-06, "loss": 1.2066, "step": 5378 }, { "epoch": 0.7615204926736038, "grad_norm": 10.597387718907134, "learning_rate": 3.5528789454824205e-06, "loss": 1.1269, "step": 5379 }, { "epoch": 0.761662065548241, "grad_norm": 10.899414368742155, "learning_rate": 3.5523590313056965e-06, "loss": 1.4492, "step": 5380 }, { "epoch": 0.7618036384228781, "grad_norm": 8.384127980281914, "learning_rate": 3.551839061807244e-06, "loss": 1.2166, "step": 5381 }, { "epoch": 0.7619452112975154, "grad_norm": 9.63551608197587, "learning_rate": 3.551319037014396e-06, "loss": 1.0935, "step": 5382 }, { "epoch": 0.7620867841721526, "grad_norm": 8.622255192635718, "learning_rate": 3.5507989569544896e-06, "loss": 1.1544, "step": 5383 }, { "epoch": 0.7622283570467898, "grad_norm": 9.178292364062873, "learning_rate": 3.550278821654866e-06, "loss": 1.3381, "step": 5384 }, { "epoch": 0.762369929921427, "grad_norm": 8.601797989368405, "learning_rate": 3.5497586311428676e-06, "loss": 1.1961, "step": 5385 }, { "epoch": 0.7625115027960643, "grad_norm": 12.600705178524484, "learning_rate": 3.5492383854458405e-06, "loss": 1.2077, "step": 5386 }, { "epoch": 0.7626530756707015, "grad_norm": 10.239226098618627, "learning_rate": 3.548718084591134e-06, "loss": 1.3073, "step": 5387 }, { "epoch": 0.7627946485453387, "grad_norm": 8.755206057979205, "learning_rate": 3.5481977286061e-06, "loss": 1.219, "step": 5388 }, { "epoch": 0.762936221419976, "grad_norm": 11.836689719289856, "learning_rate": 3.547677317518093e-06, "loss": 1.2465, "step": 5389 }, { "epoch": 0.7630777942946132, "grad_norm": 8.15844182639479, "learning_rate": 3.54715685135447e-06, "loss": 1.2314, "step": 5390 }, { "epoch": 0.7632193671692503, "grad_norm": 6.587824034276553, "learning_rate": 3.546636330142593e-06, "loss": 1.1834, "step": 5391 }, { "epoch": 0.7633609400438875, "grad_norm": 9.534615744743064, "learning_rate": 3.5461157539098236e-06, "loss": 1.305, "step": 5392 }, { "epoch": 0.7635025129185248, "grad_norm": 9.207615428804877, "learning_rate": 3.5455951226835296e-06, "loss": 1.1615, "step": 5393 }, { "epoch": 0.763644085793162, "grad_norm": 9.551452238699834, "learning_rate": 3.5450744364910794e-06, "loss": 1.3527, "step": 5394 }, { "epoch": 0.7637856586677992, "grad_norm": 9.11376512324515, "learning_rate": 3.544553695359845e-06, "loss": 1.2108, "step": 5395 }, { "epoch": 0.7639272315424365, "grad_norm": 8.49560523020809, "learning_rate": 3.5440328993172023e-06, "loss": 1.3475, "step": 5396 }, { "epoch": 0.7640688044170737, "grad_norm": 11.042948368945643, "learning_rate": 3.5435120483905285e-06, "loss": 1.3518, "step": 5397 }, { "epoch": 0.7642103772917109, "grad_norm": 8.200392029858047, "learning_rate": 3.542991142607204e-06, "loss": 1.3248, "step": 5398 }, { "epoch": 0.7643519501663482, "grad_norm": 9.924163506375212, "learning_rate": 3.5424701819946137e-06, "loss": 1.2304, "step": 5399 }, { "epoch": 0.7644935230409854, "grad_norm": 9.450466116253374, "learning_rate": 3.5419491665801424e-06, "loss": 1.3625, "step": 5400 }, { "epoch": 0.7646350959156226, "grad_norm": 8.495996766687846, "learning_rate": 3.54142809639118e-06, "loss": 1.3641, "step": 5401 }, { "epoch": 0.7647766687902597, "grad_norm": 8.419900807863344, "learning_rate": 3.54090697145512e-06, "loss": 1.3652, "step": 5402 }, { "epoch": 0.764918241664897, "grad_norm": 8.328496012197839, "learning_rate": 3.5403857917993554e-06, "loss": 1.3622, "step": 5403 }, { "epoch": 0.7650598145395342, "grad_norm": 9.873553206292073, "learning_rate": 3.5398645574512876e-06, "loss": 1.3174, "step": 5404 }, { "epoch": 0.7652013874141714, "grad_norm": 8.460556965703965, "learning_rate": 3.5393432684383137e-06, "loss": 1.3148, "step": 5405 }, { "epoch": 0.7653429602888087, "grad_norm": 9.382275415909536, "learning_rate": 3.5388219247878395e-06, "loss": 1.1494, "step": 5406 }, { "epoch": 0.7654845331634459, "grad_norm": 7.977982980231364, "learning_rate": 3.5383005265272713e-06, "loss": 1.3072, "step": 5407 }, { "epoch": 0.7656261060380831, "grad_norm": 8.750325660094877, "learning_rate": 3.537779073684019e-06, "loss": 1.2862, "step": 5408 }, { "epoch": 0.7657676789127204, "grad_norm": 7.7065483982202645, "learning_rate": 3.5372575662854937e-06, "loss": 1.1662, "step": 5409 }, { "epoch": 0.7659092517873576, "grad_norm": 10.094998970352883, "learning_rate": 3.536736004359112e-06, "loss": 1.4555, "step": 5410 }, { "epoch": 0.7660508246619948, "grad_norm": 9.310068926858252, "learning_rate": 3.536214387932292e-06, "loss": 1.398, "step": 5411 }, { "epoch": 0.7661923975366319, "grad_norm": 7.72973074144275, "learning_rate": 3.535692717032454e-06, "loss": 1.2172, "step": 5412 }, { "epoch": 0.7663339704112692, "grad_norm": 8.507916522550483, "learning_rate": 3.535170991687022e-06, "loss": 1.2608, "step": 5413 }, { "epoch": 0.7664755432859064, "grad_norm": 9.001397766078558, "learning_rate": 3.5346492119234225e-06, "loss": 1.3417, "step": 5414 }, { "epoch": 0.7666171161605436, "grad_norm": 11.749411710256364, "learning_rate": 3.5341273777690867e-06, "loss": 1.2301, "step": 5415 }, { "epoch": 0.7667586890351809, "grad_norm": 8.631702417315703, "learning_rate": 3.5336054892514437e-06, "loss": 1.2455, "step": 5416 }, { "epoch": 0.7669002619098181, "grad_norm": 8.752309875776856, "learning_rate": 3.5330835463979318e-06, "loss": 1.1372, "step": 5417 }, { "epoch": 0.7670418347844553, "grad_norm": 10.427093617640304, "learning_rate": 3.532561549235988e-06, "loss": 1.2277, "step": 5418 }, { "epoch": 0.7671834076590925, "grad_norm": 10.4905654664633, "learning_rate": 3.532039497793054e-06, "loss": 1.1747, "step": 5419 }, { "epoch": 0.7673249805337298, "grad_norm": 8.752088896863066, "learning_rate": 3.5315173920965736e-06, "loss": 1.2775, "step": 5420 }, { "epoch": 0.767466553408367, "grad_norm": 8.510440921320878, "learning_rate": 3.5309952321739922e-06, "loss": 1.1961, "step": 5421 }, { "epoch": 0.7676081262830041, "grad_norm": 10.48789934709729, "learning_rate": 3.53047301805276e-06, "loss": 1.376, "step": 5422 }, { "epoch": 0.7677496991576414, "grad_norm": 8.165620646017544, "learning_rate": 3.5299507497603303e-06, "loss": 1.2991, "step": 5423 }, { "epoch": 0.7678912720322786, "grad_norm": 6.60089447433665, "learning_rate": 3.5294284273241565e-06, "loss": 1.186, "step": 5424 }, { "epoch": 0.7680328449069158, "grad_norm": 8.916626784571172, "learning_rate": 3.5289060507716986e-06, "loss": 1.3774, "step": 5425 }, { "epoch": 0.768174417781553, "grad_norm": 13.241632086434814, "learning_rate": 3.528383620130417e-06, "loss": 1.1918, "step": 5426 }, { "epoch": 0.7683159906561903, "grad_norm": 13.688613637273384, "learning_rate": 3.527861135427775e-06, "loss": 1.3085, "step": 5427 }, { "epoch": 0.7684575635308275, "grad_norm": 8.99781136921836, "learning_rate": 3.5273385966912398e-06, "loss": 1.1974, "step": 5428 }, { "epoch": 0.7685991364054647, "grad_norm": 9.285371937571643, "learning_rate": 3.52681600394828e-06, "loss": 1.1918, "step": 5429 }, { "epoch": 0.768740709280102, "grad_norm": 11.113968083455099, "learning_rate": 3.526293357226369e-06, "loss": 1.2307, "step": 5430 }, { "epoch": 0.7688822821547392, "grad_norm": 9.08327444448251, "learning_rate": 3.5257706565529813e-06, "loss": 1.2019, "step": 5431 }, { "epoch": 0.7690238550293764, "grad_norm": 9.248765192866204, "learning_rate": 3.525247901955595e-06, "loss": 1.3567, "step": 5432 }, { "epoch": 0.7691654279040135, "grad_norm": 9.429215793141697, "learning_rate": 3.5247250934616907e-06, "loss": 1.32, "step": 5433 }, { "epoch": 0.7693070007786508, "grad_norm": 9.146949601463799, "learning_rate": 3.524202231098753e-06, "loss": 1.3089, "step": 5434 }, { "epoch": 0.769448573653288, "grad_norm": 11.329950688011552, "learning_rate": 3.5236793148942673e-06, "loss": 1.3104, "step": 5435 }, { "epoch": 0.7695901465279252, "grad_norm": 10.168649282114918, "learning_rate": 3.5231563448757233e-06, "loss": 1.2718, "step": 5436 }, { "epoch": 0.7697317194025625, "grad_norm": 9.745619743608495, "learning_rate": 3.5226333210706133e-06, "loss": 1.3217, "step": 5437 }, { "epoch": 0.7698732922771997, "grad_norm": 10.09635546807822, "learning_rate": 3.5221102435064314e-06, "loss": 1.1219, "step": 5438 }, { "epoch": 0.7700148651518369, "grad_norm": 8.353417214685116, "learning_rate": 3.5215871122106767e-06, "loss": 1.2251, "step": 5439 }, { "epoch": 0.7701564380264742, "grad_norm": 8.419198539809734, "learning_rate": 3.5210639272108487e-06, "loss": 1.2997, "step": 5440 }, { "epoch": 0.7702980109011114, "grad_norm": 9.619544526735945, "learning_rate": 3.520540688534453e-06, "loss": 1.2629, "step": 5441 }, { "epoch": 0.7704395837757486, "grad_norm": 8.994618396308697, "learning_rate": 3.520017396208993e-06, "loss": 1.211, "step": 5442 }, { "epoch": 0.7705811566503857, "grad_norm": 9.242419874376912, "learning_rate": 3.519494050261979e-06, "loss": 1.2418, "step": 5443 }, { "epoch": 0.770722729525023, "grad_norm": 8.18689801281692, "learning_rate": 3.518970650720923e-06, "loss": 1.221, "step": 5444 }, { "epoch": 0.7708643023996602, "grad_norm": 7.619345147136426, "learning_rate": 3.5184471976133396e-06, "loss": 1.4087, "step": 5445 }, { "epoch": 0.7710058752742974, "grad_norm": 9.541033816832709, "learning_rate": 3.517923690966747e-06, "loss": 1.4306, "step": 5446 }, { "epoch": 0.7711474481489347, "grad_norm": 11.760383725296167, "learning_rate": 3.5174001308086643e-06, "loss": 1.1966, "step": 5447 }, { "epoch": 0.7712890210235719, "grad_norm": 8.014610300662433, "learning_rate": 3.516876517166615e-06, "loss": 1.1912, "step": 5448 }, { "epoch": 0.7714305938982091, "grad_norm": 8.30662891450797, "learning_rate": 3.5163528500681266e-06, "loss": 1.1034, "step": 5449 }, { "epoch": 0.7715721667728463, "grad_norm": 8.91239594001532, "learning_rate": 3.515829129540726e-06, "loss": 1.3547, "step": 5450 }, { "epoch": 0.7717137396474836, "grad_norm": 9.30009299611182, "learning_rate": 3.5153053556119454e-06, "loss": 1.3894, "step": 5451 }, { "epoch": 0.7718553125221208, "grad_norm": 8.937526889573842, "learning_rate": 3.51478152830932e-06, "loss": 1.3018, "step": 5452 }, { "epoch": 0.7719968853967579, "grad_norm": 8.883434746922614, "learning_rate": 3.514257647660385e-06, "loss": 1.3539, "step": 5453 }, { "epoch": 0.7721384582713952, "grad_norm": 8.192155519052347, "learning_rate": 3.5137337136926825e-06, "loss": 1.115, "step": 5454 }, { "epoch": 0.7722800311460324, "grad_norm": 7.968102522332182, "learning_rate": 3.5132097264337546e-06, "loss": 1.2732, "step": 5455 }, { "epoch": 0.7724216040206696, "grad_norm": 7.907011575922194, "learning_rate": 3.512685685911147e-06, "loss": 1.1516, "step": 5456 }, { "epoch": 0.7725631768953068, "grad_norm": 8.322253936494374, "learning_rate": 3.5121615921524084e-06, "loss": 1.2185, "step": 5457 }, { "epoch": 0.7727047497699441, "grad_norm": 7.840670889351063, "learning_rate": 3.5116374451850887e-06, "loss": 1.138, "step": 5458 }, { "epoch": 0.7728463226445813, "grad_norm": 8.161330938815745, "learning_rate": 3.511113245036743e-06, "loss": 1.2449, "step": 5459 }, { "epoch": 0.7729878955192185, "grad_norm": 9.14046223813423, "learning_rate": 3.510588991734928e-06, "loss": 1.2158, "step": 5460 }, { "epoch": 0.7731294683938558, "grad_norm": 7.26171638607748, "learning_rate": 3.510064685307203e-06, "loss": 1.1781, "step": 5461 }, { "epoch": 0.773271041268493, "grad_norm": 8.644723647881532, "learning_rate": 3.5095403257811313e-06, "loss": 1.2568, "step": 5462 }, { "epoch": 0.7734126141431302, "grad_norm": 10.316894704571471, "learning_rate": 3.5090159131842773e-06, "loss": 1.4347, "step": 5463 }, { "epoch": 0.7735541870177673, "grad_norm": 9.650224009073208, "learning_rate": 3.5084914475442085e-06, "loss": 1.3756, "step": 5464 }, { "epoch": 0.7736957598924046, "grad_norm": 7.680512268545334, "learning_rate": 3.5079669288884965e-06, "loss": 1.3281, "step": 5465 }, { "epoch": 0.7738373327670418, "grad_norm": 8.421780785651126, "learning_rate": 3.507442357244715e-06, "loss": 1.1736, "step": 5466 }, { "epoch": 0.773978905641679, "grad_norm": 7.567845244633938, "learning_rate": 3.5069177326404393e-06, "loss": 1.2378, "step": 5467 }, { "epoch": 0.7741204785163163, "grad_norm": 9.771867926399542, "learning_rate": 3.5063930551032494e-06, "loss": 1.1704, "step": 5468 }, { "epoch": 0.7742620513909535, "grad_norm": 9.400107168033129, "learning_rate": 3.5058683246607273e-06, "loss": 1.1966, "step": 5469 }, { "epoch": 0.7744036242655907, "grad_norm": 9.412853267773007, "learning_rate": 3.505343541340457e-06, "loss": 1.3289, "step": 5470 }, { "epoch": 0.774545197140228, "grad_norm": 7.500725774616487, "learning_rate": 3.5048187051700265e-06, "loss": 1.2478, "step": 5471 }, { "epoch": 0.7746867700148652, "grad_norm": 9.864557853647026, "learning_rate": 3.5042938161770257e-06, "loss": 1.2422, "step": 5472 }, { "epoch": 0.7748283428895024, "grad_norm": 10.158962227993797, "learning_rate": 3.5037688743890484e-06, "loss": 1.173, "step": 5473 }, { "epoch": 0.7749699157641395, "grad_norm": 7.924725442937898, "learning_rate": 3.50324387983369e-06, "loss": 1.2199, "step": 5474 }, { "epoch": 0.7751114886387768, "grad_norm": 10.143574572237297, "learning_rate": 3.502718832538548e-06, "loss": 1.2245, "step": 5475 }, { "epoch": 0.775253061513414, "grad_norm": 7.611037932304793, "learning_rate": 3.502193732531225e-06, "loss": 1.2681, "step": 5476 }, { "epoch": 0.7753946343880512, "grad_norm": 9.975076133035943, "learning_rate": 3.5016685798393244e-06, "loss": 1.4298, "step": 5477 }, { "epoch": 0.7755362072626885, "grad_norm": 9.45192439754437, "learning_rate": 3.5011433744904543e-06, "loss": 1.2472, "step": 5478 }, { "epoch": 0.7756777801373257, "grad_norm": 8.689539827688932, "learning_rate": 3.5006181165122233e-06, "loss": 1.263, "step": 5479 }, { "epoch": 0.7758193530119629, "grad_norm": 9.533825935850604, "learning_rate": 3.500092805932244e-06, "loss": 1.2074, "step": 5480 }, { "epoch": 0.7759609258866002, "grad_norm": 9.124429763325452, "learning_rate": 3.499567442778131e-06, "loss": 1.1691, "step": 5481 }, { "epoch": 0.7761024987612374, "grad_norm": 10.084326534905426, "learning_rate": 3.4990420270775026e-06, "loss": 1.303, "step": 5482 }, { "epoch": 0.7762440716358746, "grad_norm": 7.407887466910665, "learning_rate": 3.4985165588579806e-06, "loss": 1.1125, "step": 5483 }, { "epoch": 0.7763856445105118, "grad_norm": 9.494042787438186, "learning_rate": 3.497991038147187e-06, "loss": 1.2572, "step": 5484 }, { "epoch": 0.776527217385149, "grad_norm": 8.71984112656079, "learning_rate": 3.497465464972749e-06, "loss": 1.2741, "step": 5485 }, { "epoch": 0.7766687902597862, "grad_norm": 7.4607521553385405, "learning_rate": 3.496939839362295e-06, "loss": 1.2329, "step": 5486 }, { "epoch": 0.7768103631344234, "grad_norm": 7.2149621843284155, "learning_rate": 3.496414161343457e-06, "loss": 1.2614, "step": 5487 }, { "epoch": 0.7769519360090607, "grad_norm": 8.936484112390936, "learning_rate": 3.49588843094387e-06, "loss": 1.39, "step": 5488 }, { "epoch": 0.7770935088836979, "grad_norm": 8.528876229394358, "learning_rate": 3.4953626481911707e-06, "loss": 1.4697, "step": 5489 }, { "epoch": 0.7772350817583351, "grad_norm": 9.885489771378024, "learning_rate": 3.4948368131129984e-06, "loss": 1.1757, "step": 5490 }, { "epoch": 0.7773766546329723, "grad_norm": 8.873166606816543, "learning_rate": 3.4943109257369973e-06, "loss": 1.3923, "step": 5491 }, { "epoch": 0.7775182275076096, "grad_norm": 9.690695352554203, "learning_rate": 3.493784986090812e-06, "loss": 1.2505, "step": 5492 }, { "epoch": 0.7776598003822468, "grad_norm": 8.375543377457715, "learning_rate": 3.4932589942020912e-06, "loss": 1.151, "step": 5493 }, { "epoch": 0.777801373256884, "grad_norm": 8.710869184149155, "learning_rate": 3.4927329500984857e-06, "loss": 1.4355, "step": 5494 }, { "epoch": 0.7779429461315212, "grad_norm": 8.375614428400024, "learning_rate": 3.4922068538076493e-06, "loss": 1.2463, "step": 5495 }, { "epoch": 0.7780845190061584, "grad_norm": 8.486554900235255, "learning_rate": 3.4916807053572376e-06, "loss": 1.3791, "step": 5496 }, { "epoch": 0.7782260918807956, "grad_norm": 7.281954358786332, "learning_rate": 3.4911545047749113e-06, "loss": 1.248, "step": 5497 }, { "epoch": 0.7783676647554328, "grad_norm": 10.757751154395715, "learning_rate": 3.4906282520883312e-06, "loss": 1.2237, "step": 5498 }, { "epoch": 0.7785092376300701, "grad_norm": 9.567778682432806, "learning_rate": 3.4901019473251635e-06, "loss": 1.3286, "step": 5499 }, { "epoch": 0.7786508105047073, "grad_norm": 7.4761318612917655, "learning_rate": 3.489575590513074e-06, "loss": 1.2674, "step": 5500 }, { "epoch": 0.7787923833793445, "grad_norm": 7.788549469454166, "learning_rate": 3.4890491816797333e-06, "loss": 1.2032, "step": 5501 }, { "epoch": 0.7789339562539818, "grad_norm": 8.113897161564072, "learning_rate": 3.4885227208528148e-06, "loss": 1.2634, "step": 5502 }, { "epoch": 0.779075529128619, "grad_norm": 7.740300908116774, "learning_rate": 3.487996208059994e-06, "loss": 1.253, "step": 5503 }, { "epoch": 0.7792171020032562, "grad_norm": 8.218236174676042, "learning_rate": 3.48746964332895e-06, "loss": 1.1058, "step": 5504 }, { "epoch": 0.7793586748778933, "grad_norm": 8.751082435140301, "learning_rate": 3.486943026687362e-06, "loss": 1.4038, "step": 5505 }, { "epoch": 0.7795002477525306, "grad_norm": 10.953999483078908, "learning_rate": 3.486416358162916e-06, "loss": 1.2419, "step": 5506 }, { "epoch": 0.7796418206271678, "grad_norm": 16.530116484059103, "learning_rate": 3.4858896377832966e-06, "loss": 1.2148, "step": 5507 }, { "epoch": 0.779783393501805, "grad_norm": 10.025584489711887, "learning_rate": 3.4853628655761946e-06, "loss": 1.1716, "step": 5508 }, { "epoch": 0.7799249663764423, "grad_norm": 9.203683729325618, "learning_rate": 3.4848360415693013e-06, "loss": 1.2937, "step": 5509 }, { "epoch": 0.7800665392510795, "grad_norm": 10.032283171214885, "learning_rate": 3.484309165790312e-06, "loss": 1.3109, "step": 5510 }, { "epoch": 0.7802081121257167, "grad_norm": 9.29328646603793, "learning_rate": 3.4837822382669235e-06, "loss": 1.2025, "step": 5511 }, { "epoch": 0.780349685000354, "grad_norm": 14.092542778465873, "learning_rate": 3.4832552590268363e-06, "loss": 1.2795, "step": 5512 }, { "epoch": 0.7804912578749912, "grad_norm": 8.985934817042288, "learning_rate": 3.4827282280977527e-06, "loss": 1.4931, "step": 5513 }, { "epoch": 0.7806328307496284, "grad_norm": 8.988789676243961, "learning_rate": 3.4822011455073788e-06, "loss": 1.3214, "step": 5514 }, { "epoch": 0.7807744036242656, "grad_norm": 12.091474925299256, "learning_rate": 3.4816740112834248e-06, "loss": 1.1619, "step": 5515 }, { "epoch": 0.7809159764989028, "grad_norm": 10.994343083330826, "learning_rate": 3.4811468254535984e-06, "loss": 1.0956, "step": 5516 }, { "epoch": 0.78105754937354, "grad_norm": 22.159586452109515, "learning_rate": 3.4806195880456158e-06, "loss": 1.0633, "step": 5517 }, { "epoch": 0.7811991222481772, "grad_norm": 8.229526431432403, "learning_rate": 3.4800922990871924e-06, "loss": 1.1971, "step": 5518 }, { "epoch": 0.7813406951228145, "grad_norm": 7.868738985546302, "learning_rate": 3.479564958606047e-06, "loss": 1.2206, "step": 5519 }, { "epoch": 0.7814822679974517, "grad_norm": 10.99448603366848, "learning_rate": 3.4790375666299026e-06, "loss": 1.2235, "step": 5520 }, { "epoch": 0.7816238408720889, "grad_norm": 10.759739921291226, "learning_rate": 3.478510123186483e-06, "loss": 1.3007, "step": 5521 }, { "epoch": 0.7817654137467261, "grad_norm": 8.434034299806498, "learning_rate": 3.477982628303516e-06, "loss": 1.2613, "step": 5522 }, { "epoch": 0.7819069866213634, "grad_norm": 9.521582731863782, "learning_rate": 3.4774550820087317e-06, "loss": 1.1967, "step": 5523 }, { "epoch": 0.7820485594960006, "grad_norm": 15.624236797766965, "learning_rate": 3.476927484329863e-06, "loss": 1.3067, "step": 5524 }, { "epoch": 0.7821901323706378, "grad_norm": 8.909475926871563, "learning_rate": 3.4763998352946436e-06, "loss": 1.3097, "step": 5525 }, { "epoch": 0.782331705245275, "grad_norm": 9.015574225376954, "learning_rate": 3.4758721349308146e-06, "loss": 1.1777, "step": 5526 }, { "epoch": 0.7824732781199122, "grad_norm": 9.180314556375718, "learning_rate": 3.4753443832661134e-06, "loss": 1.3338, "step": 5527 }, { "epoch": 0.7826148509945494, "grad_norm": 8.44331080651946, "learning_rate": 3.4748165803282856e-06, "loss": 1.0747, "step": 5528 }, { "epoch": 0.7827564238691866, "grad_norm": 8.588667708829776, "learning_rate": 3.4742887261450776e-06, "loss": 1.1963, "step": 5529 }, { "epoch": 0.7828979967438239, "grad_norm": 8.882259355028989, "learning_rate": 3.4737608207442373e-06, "loss": 1.1885, "step": 5530 }, { "epoch": 0.7830395696184611, "grad_norm": 10.73378542151359, "learning_rate": 3.4732328641535174e-06, "loss": 1.3579, "step": 5531 }, { "epoch": 0.7831811424930983, "grad_norm": 9.662255256188494, "learning_rate": 3.472704856400671e-06, "loss": 1.225, "step": 5532 }, { "epoch": 0.7833227153677356, "grad_norm": 10.6484375, "learning_rate": 3.4721767975134557e-06, "loss": 1.3898, "step": 5533 }, { "epoch": 0.7834642882423728, "grad_norm": 7.664505474265059, "learning_rate": 3.471648687519631e-06, "loss": 1.0599, "step": 5534 }, { "epoch": 0.78360586111701, "grad_norm": 9.354009297044728, "learning_rate": 3.4711205264469583e-06, "loss": 1.2397, "step": 5535 }, { "epoch": 0.7837474339916471, "grad_norm": 7.536578181187557, "learning_rate": 3.470592314323205e-06, "loss": 1.2021, "step": 5536 }, { "epoch": 0.7838890068662844, "grad_norm": 9.640071018321455, "learning_rate": 3.4700640511761373e-06, "loss": 1.3154, "step": 5537 }, { "epoch": 0.7840305797409216, "grad_norm": 8.46283766102455, "learning_rate": 3.4695357370335255e-06, "loss": 1.2629, "step": 5538 }, { "epoch": 0.7841721526155588, "grad_norm": 8.580180159947018, "learning_rate": 3.4690073719231426e-06, "loss": 1.2506, "step": 5539 }, { "epoch": 0.7843137254901961, "grad_norm": 8.282409356493746, "learning_rate": 3.468478955872765e-06, "loss": 1.3113, "step": 5540 }, { "epoch": 0.7844552983648333, "grad_norm": 9.065026924105942, "learning_rate": 3.4679504889101704e-06, "loss": 1.2778, "step": 5541 }, { "epoch": 0.7845968712394705, "grad_norm": 9.151498857315627, "learning_rate": 3.4674219710631406e-06, "loss": 1.1897, "step": 5542 }, { "epoch": 0.7847384441141078, "grad_norm": 9.849976716764399, "learning_rate": 3.466893402359459e-06, "loss": 1.1618, "step": 5543 }, { "epoch": 0.784880016988745, "grad_norm": 8.524146843335014, "learning_rate": 3.4663647828269124e-06, "loss": 1.2727, "step": 5544 }, { "epoch": 0.7850215898633822, "grad_norm": 8.557075508593202, "learning_rate": 3.46583611249329e-06, "loss": 1.2128, "step": 5545 }, { "epoch": 0.7851631627380194, "grad_norm": 9.751943150092535, "learning_rate": 3.465307391386383e-06, "loss": 1.3209, "step": 5546 }, { "epoch": 0.7853047356126566, "grad_norm": 9.700664017749192, "learning_rate": 3.464778619533987e-06, "loss": 1.3515, "step": 5547 }, { "epoch": 0.7854463084872938, "grad_norm": 8.403841829491865, "learning_rate": 3.4642497969638973e-06, "loss": 1.2963, "step": 5548 }, { "epoch": 0.785587881361931, "grad_norm": 9.735347595122198, "learning_rate": 3.463720923703915e-06, "loss": 1.2579, "step": 5549 }, { "epoch": 0.7857294542365683, "grad_norm": 10.904572016220243, "learning_rate": 3.4631919997818415e-06, "loss": 1.4626, "step": 5550 }, { "epoch": 0.7858710271112055, "grad_norm": 9.190132853779492, "learning_rate": 3.4626630252254835e-06, "loss": 1.3259, "step": 5551 }, { "epoch": 0.7860125999858427, "grad_norm": 7.690800400291191, "learning_rate": 3.462134000062649e-06, "loss": 1.2157, "step": 5552 }, { "epoch": 0.78615417286048, "grad_norm": 7.796214960633291, "learning_rate": 3.4616049243211463e-06, "loss": 1.3386, "step": 5553 }, { "epoch": 0.7862957457351172, "grad_norm": 9.094391370880093, "learning_rate": 3.46107579802879e-06, "loss": 1.1755, "step": 5554 }, { "epoch": 0.7864373186097544, "grad_norm": 9.618787866310903, "learning_rate": 3.4605466212133957e-06, "loss": 1.3907, "step": 5555 }, { "epoch": 0.7865788914843916, "grad_norm": 9.569144141054892, "learning_rate": 3.460017393902782e-06, "loss": 1.2509, "step": 5556 }, { "epoch": 0.7867204643590288, "grad_norm": 8.434056914678669, "learning_rate": 3.4594881161247694e-06, "loss": 1.2551, "step": 5557 }, { "epoch": 0.786862037233666, "grad_norm": 10.435352218671603, "learning_rate": 3.458958787907182e-06, "loss": 1.2707, "step": 5558 }, { "epoch": 0.7870036101083032, "grad_norm": 8.046871681583054, "learning_rate": 3.458429409277846e-06, "loss": 1.1766, "step": 5559 }, { "epoch": 0.7871451829829405, "grad_norm": 10.451977950541863, "learning_rate": 3.4578999802645905e-06, "loss": 1.488, "step": 5560 }, { "epoch": 0.7872867558575777, "grad_norm": 10.717291818976726, "learning_rate": 3.457370500895247e-06, "loss": 1.2668, "step": 5561 }, { "epoch": 0.7874283287322149, "grad_norm": 8.462857494381838, "learning_rate": 3.4568409711976515e-06, "loss": 1.269, "step": 5562 }, { "epoch": 0.7875699016068521, "grad_norm": 11.117160049157958, "learning_rate": 3.4563113911996395e-06, "loss": 1.4372, "step": 5563 }, { "epoch": 0.7877114744814894, "grad_norm": 8.639611734920488, "learning_rate": 3.455781760929049e-06, "loss": 1.2509, "step": 5564 }, { "epoch": 0.7878530473561266, "grad_norm": 7.696141046369617, "learning_rate": 3.4552520804137248e-06, "loss": 1.1885, "step": 5565 }, { "epoch": 0.7879946202307638, "grad_norm": 10.143700178944195, "learning_rate": 3.4547223496815115e-06, "loss": 1.2476, "step": 5566 }, { "epoch": 0.788136193105401, "grad_norm": 6.984152975969817, "learning_rate": 3.4541925687602553e-06, "loss": 1.237, "step": 5567 }, { "epoch": 0.7882777659800382, "grad_norm": 7.960806683634468, "learning_rate": 3.453662737677808e-06, "loss": 1.2389, "step": 5568 }, { "epoch": 0.7884193388546754, "grad_norm": 8.492173125562811, "learning_rate": 3.4531328564620215e-06, "loss": 1.3729, "step": 5569 }, { "epoch": 0.7885609117293126, "grad_norm": 8.232115346828913, "learning_rate": 3.452602925140751e-06, "loss": 1.2559, "step": 5570 }, { "epoch": 0.7887024846039499, "grad_norm": 8.852262356489724, "learning_rate": 3.4520729437418553e-06, "loss": 1.2993, "step": 5571 }, { "epoch": 0.7888440574785871, "grad_norm": 11.383204592809435, "learning_rate": 3.4515429122931955e-06, "loss": 1.5127, "step": 5572 }, { "epoch": 0.7889856303532243, "grad_norm": 9.526026761040029, "learning_rate": 3.451012830822633e-06, "loss": 1.2808, "step": 5573 }, { "epoch": 0.7891272032278616, "grad_norm": 8.604970037041321, "learning_rate": 3.4504826993580364e-06, "loss": 1.2112, "step": 5574 }, { "epoch": 0.7892687761024988, "grad_norm": 6.93880608438577, "learning_rate": 3.449952517927272e-06, "loss": 1.2835, "step": 5575 }, { "epoch": 0.789410348977136, "grad_norm": 7.063499717510955, "learning_rate": 3.4494222865582126e-06, "loss": 1.1498, "step": 5576 }, { "epoch": 0.7895519218517733, "grad_norm": 7.834703068660118, "learning_rate": 3.4488920052787313e-06, "loss": 1.321, "step": 5577 }, { "epoch": 0.7896934947264104, "grad_norm": 8.524971129069286, "learning_rate": 3.4483616741167046e-06, "loss": 1.2464, "step": 5578 }, { "epoch": 0.7898350676010476, "grad_norm": 8.158739199753223, "learning_rate": 3.4478312931000123e-06, "loss": 1.3348, "step": 5579 }, { "epoch": 0.7899766404756848, "grad_norm": 7.9625032960893884, "learning_rate": 3.4473008622565353e-06, "loss": 1.274, "step": 5580 }, { "epoch": 0.7901182133503221, "grad_norm": 8.497624682471015, "learning_rate": 3.4467703816141584e-06, "loss": 1.2214, "step": 5581 }, { "epoch": 0.7902597862249593, "grad_norm": 10.317705909591357, "learning_rate": 3.4462398512007684e-06, "loss": 1.4899, "step": 5582 }, { "epoch": 0.7904013590995965, "grad_norm": 8.237014723797609, "learning_rate": 3.445709271044255e-06, "loss": 1.2765, "step": 5583 }, { "epoch": 0.7905429319742338, "grad_norm": 8.349577622381144, "learning_rate": 3.445178641172511e-06, "loss": 1.4235, "step": 5584 }, { "epoch": 0.790684504848871, "grad_norm": 8.211595329262305, "learning_rate": 3.44464796161343e-06, "loss": 1.298, "step": 5585 }, { "epoch": 0.7908260777235082, "grad_norm": 7.690084627675469, "learning_rate": 3.44411723239491e-06, "loss": 1.3429, "step": 5586 }, { "epoch": 0.7909676505981454, "grad_norm": 6.456464665288666, "learning_rate": 3.4435864535448504e-06, "loss": 1.2353, "step": 5587 }, { "epoch": 0.7911092234727826, "grad_norm": 7.188827524737252, "learning_rate": 3.443055625091155e-06, "loss": 1.2068, "step": 5588 }, { "epoch": 0.7912507963474198, "grad_norm": 8.277983399069694, "learning_rate": 3.4425247470617294e-06, "loss": 1.239, "step": 5589 }, { "epoch": 0.791392369222057, "grad_norm": 8.61755403515569, "learning_rate": 3.44199381948448e-06, "loss": 1.1848, "step": 5590 }, { "epoch": 0.7915339420966943, "grad_norm": 8.636622904121047, "learning_rate": 3.441462842387318e-06, "loss": 1.244, "step": 5591 }, { "epoch": 0.7916755149713315, "grad_norm": 10.733984083584279, "learning_rate": 3.4409318157981565e-06, "loss": 1.2015, "step": 5592 }, { "epoch": 0.7918170878459687, "grad_norm": 8.407007346373183, "learning_rate": 3.4404007397449104e-06, "loss": 1.2199, "step": 5593 }, { "epoch": 0.791958660720606, "grad_norm": 8.554856501612614, "learning_rate": 3.439869614255499e-06, "loss": 1.1478, "step": 5594 }, { "epoch": 0.7921002335952432, "grad_norm": 8.861314564611071, "learning_rate": 3.4393384393578427e-06, "loss": 1.2609, "step": 5595 }, { "epoch": 0.7922418064698804, "grad_norm": 8.379438448278982, "learning_rate": 3.438807215079865e-06, "loss": 1.2001, "step": 5596 }, { "epoch": 0.7923833793445176, "grad_norm": 8.867109632780352, "learning_rate": 3.438275941449492e-06, "loss": 1.3096, "step": 5597 }, { "epoch": 0.7925249522191548, "grad_norm": 8.696471048857305, "learning_rate": 3.437744618494653e-06, "loss": 1.2164, "step": 5598 }, { "epoch": 0.792666525093792, "grad_norm": 8.979009097340152, "learning_rate": 3.437213246243277e-06, "loss": 1.1722, "step": 5599 }, { "epoch": 0.7928080979684292, "grad_norm": 9.212779448992647, "learning_rate": 3.4366818247233015e-06, "loss": 1.3019, "step": 5600 }, { "epoch": 0.7929496708430664, "grad_norm": 7.874520907890752, "learning_rate": 3.4361503539626593e-06, "loss": 1.0359, "step": 5601 }, { "epoch": 0.7930912437177037, "grad_norm": 9.574102008034691, "learning_rate": 3.4356188339892915e-06, "loss": 1.1509, "step": 5602 }, { "epoch": 0.7932328165923409, "grad_norm": 8.488210692729073, "learning_rate": 3.4350872648311396e-06, "loss": 1.1364, "step": 5603 }, { "epoch": 0.7933743894669781, "grad_norm": 8.547791259301881, "learning_rate": 3.434555646516147e-06, "loss": 1.3167, "step": 5604 }, { "epoch": 0.7935159623416154, "grad_norm": 9.546013076757285, "learning_rate": 3.434023979072262e-06, "loss": 1.3116, "step": 5605 }, { "epoch": 0.7936575352162526, "grad_norm": 9.470644116193256, "learning_rate": 3.4334922625274312e-06, "loss": 1.216, "step": 5606 }, { "epoch": 0.7937991080908898, "grad_norm": 9.151447169192824, "learning_rate": 3.432960496909609e-06, "loss": 1.4724, "step": 5607 }, { "epoch": 0.7939406809655271, "grad_norm": 9.052620389317445, "learning_rate": 3.4324286822467496e-06, "loss": 1.3992, "step": 5608 }, { "epoch": 0.7940822538401642, "grad_norm": 8.265736223997656, "learning_rate": 3.431896818566809e-06, "loss": 1.1354, "step": 5609 }, { "epoch": 0.7942238267148014, "grad_norm": 7.843102173820278, "learning_rate": 3.4313649058977473e-06, "loss": 1.2133, "step": 5610 }, { "epoch": 0.7943653995894386, "grad_norm": 7.995851633257364, "learning_rate": 3.4308329442675276e-06, "loss": 1.1694, "step": 5611 }, { "epoch": 0.7945069724640759, "grad_norm": 8.525993991750363, "learning_rate": 3.430300933704114e-06, "loss": 1.3214, "step": 5612 }, { "epoch": 0.7946485453387131, "grad_norm": 8.147266570800516, "learning_rate": 3.4297688742354728e-06, "loss": 1.1988, "step": 5613 }, { "epoch": 0.7947901182133503, "grad_norm": 7.242645216286927, "learning_rate": 3.4292367658895764e-06, "loss": 1.2691, "step": 5614 }, { "epoch": 0.7949316910879876, "grad_norm": 8.2230538242661, "learning_rate": 3.4287046086943956e-06, "loss": 1.2563, "step": 5615 }, { "epoch": 0.7950732639626248, "grad_norm": 9.038339544743504, "learning_rate": 3.428172402677906e-06, "loss": 1.1168, "step": 5616 }, { "epoch": 0.795214836837262, "grad_norm": 8.276043561611514, "learning_rate": 3.4276401478680856e-06, "loss": 1.3338, "step": 5617 }, { "epoch": 0.7953564097118992, "grad_norm": 8.339188718421314, "learning_rate": 3.427107844292914e-06, "loss": 1.223, "step": 5618 }, { "epoch": 0.7954979825865364, "grad_norm": 9.479483284046633, "learning_rate": 3.426575491980374e-06, "loss": 1.0724, "step": 5619 }, { "epoch": 0.7956395554611736, "grad_norm": 8.523904733412785, "learning_rate": 3.426043090958452e-06, "loss": 1.2427, "step": 5620 }, { "epoch": 0.7957811283358108, "grad_norm": 7.696081070835789, "learning_rate": 3.4255106412551352e-06, "loss": 1.1902, "step": 5621 }, { "epoch": 0.7959227012104481, "grad_norm": 8.22717642236786, "learning_rate": 3.4249781428984143e-06, "loss": 1.1763, "step": 5622 }, { "epoch": 0.7960642740850853, "grad_norm": 8.097816417856402, "learning_rate": 3.424445595916281e-06, "loss": 1.2244, "step": 5623 }, { "epoch": 0.7962058469597225, "grad_norm": 9.24424440868479, "learning_rate": 3.423913000336732e-06, "loss": 1.4762, "step": 5624 }, { "epoch": 0.7963474198343597, "grad_norm": 9.483687198340979, "learning_rate": 3.423380356187766e-06, "loss": 1.1896, "step": 5625 }, { "epoch": 0.796488992708997, "grad_norm": 8.566310508043404, "learning_rate": 3.422847663497384e-06, "loss": 1.3442, "step": 5626 }, { "epoch": 0.7966305655836342, "grad_norm": 9.111993586697889, "learning_rate": 3.4223149222935875e-06, "loss": 1.2176, "step": 5627 }, { "epoch": 0.7967721384582714, "grad_norm": 7.070711746324367, "learning_rate": 3.421782132604383e-06, "loss": 1.2957, "step": 5628 }, { "epoch": 0.7969137113329087, "grad_norm": 11.02989642346222, "learning_rate": 3.4212492944577796e-06, "loss": 1.2928, "step": 5629 }, { "epoch": 0.7970552842075458, "grad_norm": 8.58350863478324, "learning_rate": 3.420716407881788e-06, "loss": 1.4643, "step": 5630 }, { "epoch": 0.797196857082183, "grad_norm": 9.116241850593676, "learning_rate": 3.4201834729044208e-06, "loss": 1.2764, "step": 5631 }, { "epoch": 0.7973384299568202, "grad_norm": 7.7847855590215955, "learning_rate": 3.4196504895536948e-06, "loss": 1.1452, "step": 5632 }, { "epoch": 0.7974800028314575, "grad_norm": 8.556360870155725, "learning_rate": 3.419117457857628e-06, "loss": 1.1578, "step": 5633 }, { "epoch": 0.7976215757060947, "grad_norm": 8.951461024881942, "learning_rate": 3.4185843778442417e-06, "loss": 1.0394, "step": 5634 }, { "epoch": 0.7977631485807319, "grad_norm": 10.7570965431011, "learning_rate": 3.4180512495415603e-06, "loss": 1.1933, "step": 5635 }, { "epoch": 0.7979047214553692, "grad_norm": 9.825068303227512, "learning_rate": 3.417518072977609e-06, "loss": 1.2837, "step": 5636 }, { "epoch": 0.7980462943300064, "grad_norm": 9.592960685542053, "learning_rate": 3.4169848481804165e-06, "loss": 1.3342, "step": 5637 }, { "epoch": 0.7981878672046436, "grad_norm": 10.020034176583549, "learning_rate": 3.416451575178014e-06, "loss": 1.2307, "step": 5638 }, { "epoch": 0.7983294400792809, "grad_norm": 8.567049697249148, "learning_rate": 3.4159182539984352e-06, "loss": 1.3354, "step": 5639 }, { "epoch": 0.798471012953918, "grad_norm": 9.662281708021961, "learning_rate": 3.4153848846697174e-06, "loss": 1.2123, "step": 5640 }, { "epoch": 0.7986125858285552, "grad_norm": 9.86836077434133, "learning_rate": 3.4148514672198986e-06, "loss": 1.2004, "step": 5641 }, { "epoch": 0.7987541587031924, "grad_norm": 10.748266390805302, "learning_rate": 3.414318001677021e-06, "loss": 1.3468, "step": 5642 }, { "epoch": 0.7988957315778297, "grad_norm": 9.166969248083715, "learning_rate": 3.4137844880691275e-06, "loss": 1.2797, "step": 5643 }, { "epoch": 0.7990373044524669, "grad_norm": 8.436891434939822, "learning_rate": 3.413250926424264e-06, "loss": 1.1905, "step": 5644 }, { "epoch": 0.7991788773271041, "grad_norm": 6.7295366152076115, "learning_rate": 3.4127173167704807e-06, "loss": 1.2038, "step": 5645 }, { "epoch": 0.7993204502017414, "grad_norm": 8.626195852333316, "learning_rate": 3.4121836591358288e-06, "loss": 1.3686, "step": 5646 }, { "epoch": 0.7994620230763786, "grad_norm": 8.057552268497568, "learning_rate": 3.4116499535483623e-06, "loss": 1.2902, "step": 5647 }, { "epoch": 0.7996035959510158, "grad_norm": 11.803151139888648, "learning_rate": 3.4111162000361363e-06, "loss": 1.1868, "step": 5648 }, { "epoch": 0.799745168825653, "grad_norm": 9.451934890865063, "learning_rate": 3.4105823986272125e-06, "loss": 1.2612, "step": 5649 }, { "epoch": 0.7998867417002902, "grad_norm": 10.10039743075872, "learning_rate": 3.41004854934965e-06, "loss": 1.3255, "step": 5650 }, { "epoch": 0.8000283145749274, "grad_norm": 10.933035151749698, "learning_rate": 3.4095146522315144e-06, "loss": 1.3242, "step": 5651 }, { "epoch": 0.8001698874495646, "grad_norm": 7.958998514174557, "learning_rate": 3.408980707300871e-06, "loss": 1.2654, "step": 5652 }, { "epoch": 0.8003114603242019, "grad_norm": 11.123939034951617, "learning_rate": 3.4084467145857903e-06, "loss": 1.1701, "step": 5653 }, { "epoch": 0.8004530331988391, "grad_norm": 10.711848492212592, "learning_rate": 3.4079126741143427e-06, "loss": 1.1418, "step": 5654 }, { "epoch": 0.8005946060734763, "grad_norm": 10.441935984270799, "learning_rate": 3.407378585914603e-06, "loss": 1.2137, "step": 5655 }, { "epoch": 0.8007361789481136, "grad_norm": 8.93912048587752, "learning_rate": 3.4068444500146476e-06, "loss": 1.2602, "step": 5656 }, { "epoch": 0.8008777518227508, "grad_norm": 8.427958200605431, "learning_rate": 3.4063102664425557e-06, "loss": 1.2651, "step": 5657 }, { "epoch": 0.801019324697388, "grad_norm": 10.770274361150376, "learning_rate": 3.405776035226409e-06, "loss": 1.2877, "step": 5658 }, { "epoch": 0.8011608975720252, "grad_norm": 9.005203861892205, "learning_rate": 3.405241756394291e-06, "loss": 1.219, "step": 5659 }, { "epoch": 0.8013024704466625, "grad_norm": 9.356698852454713, "learning_rate": 3.4047074299742894e-06, "loss": 1.1592, "step": 5660 }, { "epoch": 0.8014440433212996, "grad_norm": 11.599161946508382, "learning_rate": 3.4041730559944918e-06, "loss": 1.3808, "step": 5661 }, { "epoch": 0.8015856161959368, "grad_norm": 9.418640289983973, "learning_rate": 3.403638634482992e-06, "loss": 1.2182, "step": 5662 }, { "epoch": 0.801727189070574, "grad_norm": 8.685475065407557, "learning_rate": 3.403104165467883e-06, "loss": 1.062, "step": 5663 }, { "epoch": 0.8018687619452113, "grad_norm": 9.927485956524551, "learning_rate": 3.4025696489772607e-06, "loss": 1.1007, "step": 5664 }, { "epoch": 0.8020103348198485, "grad_norm": 9.949100656935578, "learning_rate": 3.402035085039225e-06, "loss": 1.2968, "step": 5665 }, { "epoch": 0.8021519076944857, "grad_norm": 7.956457852437678, "learning_rate": 3.401500473681878e-06, "loss": 1.1431, "step": 5666 }, { "epoch": 0.802293480569123, "grad_norm": 9.215133940561063, "learning_rate": 3.4009658149333223e-06, "loss": 1.3138, "step": 5667 }, { "epoch": 0.8024350534437602, "grad_norm": 10.059827366707106, "learning_rate": 3.4004311088216667e-06, "loss": 1.2733, "step": 5668 }, { "epoch": 0.8025766263183974, "grad_norm": 9.659862447513568, "learning_rate": 3.3998963553750186e-06, "loss": 1.3302, "step": 5669 }, { "epoch": 0.8027181991930347, "grad_norm": 9.559092194937163, "learning_rate": 3.3993615546214898e-06, "loss": 1.259, "step": 5670 }, { "epoch": 0.8028597720676718, "grad_norm": 9.024914482747105, "learning_rate": 3.3988267065891945e-06, "loss": 1.3378, "step": 5671 }, { "epoch": 0.803001344942309, "grad_norm": 7.609980063694268, "learning_rate": 3.39829181130625e-06, "loss": 1.1763, "step": 5672 }, { "epoch": 0.8031429178169462, "grad_norm": 9.446252263396868, "learning_rate": 3.3977568688007745e-06, "loss": 1.1, "step": 5673 }, { "epoch": 0.8032844906915835, "grad_norm": 7.7497236141020345, "learning_rate": 3.3972218791008902e-06, "loss": 1.2873, "step": 5674 }, { "epoch": 0.8034260635662207, "grad_norm": 10.646799500300988, "learning_rate": 3.3966868422347204e-06, "loss": 1.2446, "step": 5675 }, { "epoch": 0.8035676364408579, "grad_norm": 8.674363117859908, "learning_rate": 3.3961517582303916e-06, "loss": 1.2408, "step": 5676 }, { "epoch": 0.8037092093154952, "grad_norm": 9.125404244443734, "learning_rate": 3.395616627116033e-06, "loss": 1.1184, "step": 5677 }, { "epoch": 0.8038507821901324, "grad_norm": 9.05781044478802, "learning_rate": 3.395081448919777e-06, "loss": 1.4428, "step": 5678 }, { "epoch": 0.8039923550647696, "grad_norm": 7.770786343211715, "learning_rate": 3.394546223669756e-06, "loss": 1.1872, "step": 5679 }, { "epoch": 0.8041339279394069, "grad_norm": 8.151431710333018, "learning_rate": 3.394010951394107e-06, "loss": 1.1701, "step": 5680 }, { "epoch": 0.804275500814044, "grad_norm": 9.815665930972642, "learning_rate": 3.3934756321209693e-06, "loss": 1.0741, "step": 5681 }, { "epoch": 0.8044170736886812, "grad_norm": 7.856385528924963, "learning_rate": 3.3929402658784837e-06, "loss": 1.3475, "step": 5682 }, { "epoch": 0.8045586465633184, "grad_norm": 10.336521723261129, "learning_rate": 3.3924048526947937e-06, "loss": 1.3204, "step": 5683 }, { "epoch": 0.8047002194379557, "grad_norm": 9.272296735726865, "learning_rate": 3.3918693925980455e-06, "loss": 1.2548, "step": 5684 }, { "epoch": 0.8048417923125929, "grad_norm": 9.000115711739879, "learning_rate": 3.3913338856163897e-06, "loss": 1.1703, "step": 5685 }, { "epoch": 0.8049833651872301, "grad_norm": 7.944264090505631, "learning_rate": 3.390798331777976e-06, "loss": 1.3635, "step": 5686 }, { "epoch": 0.8051249380618674, "grad_norm": 8.843932025235844, "learning_rate": 3.390262731110957e-06, "loss": 1.2866, "step": 5687 }, { "epoch": 0.8052665109365046, "grad_norm": 8.707819824653747, "learning_rate": 3.3897270836434914e-06, "loss": 1.3295, "step": 5688 }, { "epoch": 0.8054080838111418, "grad_norm": 9.813498282221088, "learning_rate": 3.3891913894037354e-06, "loss": 1.1751, "step": 5689 }, { "epoch": 0.805549656685779, "grad_norm": 7.50491858370216, "learning_rate": 3.3886556484198517e-06, "loss": 1.0916, "step": 5690 }, { "epoch": 0.8056912295604163, "grad_norm": 9.5539827999274, "learning_rate": 3.388119860720003e-06, "loss": 1.2394, "step": 5691 }, { "epoch": 0.8058328024350534, "grad_norm": 11.081805781214477, "learning_rate": 3.3875840263323552e-06, "loss": 1.2033, "step": 5692 }, { "epoch": 0.8059743753096906, "grad_norm": 7.940553093006278, "learning_rate": 3.3870481452850765e-06, "loss": 1.3941, "step": 5693 }, { "epoch": 0.8061159481843279, "grad_norm": 8.269844924967325, "learning_rate": 3.386512217606339e-06, "loss": 1.2094, "step": 5694 }, { "epoch": 0.8062575210589651, "grad_norm": 10.456945157471525, "learning_rate": 3.385976243324316e-06, "loss": 1.4803, "step": 5695 }, { "epoch": 0.8063990939336023, "grad_norm": 8.420499275246902, "learning_rate": 3.3854402224671813e-06, "loss": 1.164, "step": 5696 }, { "epoch": 0.8065406668082395, "grad_norm": 9.44990381262535, "learning_rate": 3.3849041550631145e-06, "loss": 1.303, "step": 5697 }, { "epoch": 0.8066822396828768, "grad_norm": 8.779848468837159, "learning_rate": 3.384368041140296e-06, "loss": 1.2503, "step": 5698 }, { "epoch": 0.806823812557514, "grad_norm": 8.759397419449323, "learning_rate": 3.383831880726909e-06, "loss": 1.2359, "step": 5699 }, { "epoch": 0.8069653854321512, "grad_norm": 7.709895565052969, "learning_rate": 3.3832956738511395e-06, "loss": 1.294, "step": 5700 }, { "epoch": 0.8071069583067885, "grad_norm": 9.090776619379504, "learning_rate": 3.3827594205411746e-06, "loss": 1.1822, "step": 5701 }, { "epoch": 0.8072485311814256, "grad_norm": 8.03849922483032, "learning_rate": 3.3822231208252053e-06, "loss": 1.0817, "step": 5702 }, { "epoch": 0.8073901040560628, "grad_norm": 7.125607916345821, "learning_rate": 3.3816867747314242e-06, "loss": 1.2914, "step": 5703 }, { "epoch": 0.8075316769307, "grad_norm": 8.453011277539096, "learning_rate": 3.381150382288027e-06, "loss": 1.2479, "step": 5704 }, { "epoch": 0.8076732498053373, "grad_norm": 10.073548879313872, "learning_rate": 3.380613943523211e-06, "loss": 1.309, "step": 5705 }, { "epoch": 0.8078148226799745, "grad_norm": 8.562512919840845, "learning_rate": 3.3800774584651767e-06, "loss": 1.2482, "step": 5706 }, { "epoch": 0.8079563955546117, "grad_norm": 10.208308960762253, "learning_rate": 3.379540927142127e-06, "loss": 1.2201, "step": 5707 }, { "epoch": 0.808097968429249, "grad_norm": 7.7457839819239025, "learning_rate": 3.3790043495822663e-06, "loss": 1.2991, "step": 5708 }, { "epoch": 0.8082395413038862, "grad_norm": 8.12549718656087, "learning_rate": 3.378467725813802e-06, "loss": 1.1942, "step": 5709 }, { "epoch": 0.8083811141785234, "grad_norm": 10.461712376197559, "learning_rate": 3.3779310558649447e-06, "loss": 1.1936, "step": 5710 }, { "epoch": 0.8085226870531607, "grad_norm": 9.241004074830567, "learning_rate": 3.3773943397639068e-06, "loss": 1.2278, "step": 5711 }, { "epoch": 0.8086642599277978, "grad_norm": 9.421854351267797, "learning_rate": 3.3768575775389022e-06, "loss": 1.3372, "step": 5712 }, { "epoch": 0.808805832802435, "grad_norm": 8.77460747082682, "learning_rate": 3.3763207692181483e-06, "loss": 1.1974, "step": 5713 }, { "epoch": 0.8089474056770722, "grad_norm": 9.397134108629418, "learning_rate": 3.375783914829865e-06, "loss": 1.1459, "step": 5714 }, { "epoch": 0.8090889785517095, "grad_norm": 8.770899635722774, "learning_rate": 3.3752470144022745e-06, "loss": 1.222, "step": 5715 }, { "epoch": 0.8092305514263467, "grad_norm": 9.86846862353186, "learning_rate": 3.374710067963602e-06, "loss": 1.2676, "step": 5716 }, { "epoch": 0.8093721243009839, "grad_norm": 7.810323427270744, "learning_rate": 3.374173075542072e-06, "loss": 1.2889, "step": 5717 }, { "epoch": 0.8095136971756212, "grad_norm": 11.692840498394666, "learning_rate": 3.373636037165916e-06, "loss": 1.1544, "step": 5718 }, { "epoch": 0.8096552700502584, "grad_norm": 10.841585938262828, "learning_rate": 3.373098952863365e-06, "loss": 1.2813, "step": 5719 }, { "epoch": 0.8097968429248956, "grad_norm": 8.097200225340721, "learning_rate": 3.372561822662652e-06, "loss": 1.2274, "step": 5720 }, { "epoch": 0.8099384157995329, "grad_norm": 9.111290653392937, "learning_rate": 3.3720246465920154e-06, "loss": 1.2787, "step": 5721 }, { "epoch": 0.8100799886741701, "grad_norm": 7.576439235421684, "learning_rate": 3.3714874246796935e-06, "loss": 1.3201, "step": 5722 }, { "epoch": 0.8102215615488072, "grad_norm": 11.635460525926364, "learning_rate": 3.3709501569539277e-06, "loss": 1.3422, "step": 5723 }, { "epoch": 0.8103631344234444, "grad_norm": 9.257492330302275, "learning_rate": 3.370412843442961e-06, "loss": 1.3257, "step": 5724 }, { "epoch": 0.8105047072980817, "grad_norm": 11.3304826479781, "learning_rate": 3.3698754841750403e-06, "loss": 1.1832, "step": 5725 }, { "epoch": 0.8106462801727189, "grad_norm": 8.619545358940007, "learning_rate": 3.369338079178414e-06, "loss": 1.2059, "step": 5726 }, { "epoch": 0.8107878530473561, "grad_norm": 9.574552632132153, "learning_rate": 3.368800628481333e-06, "loss": 1.1932, "step": 5727 }, { "epoch": 0.8109294259219934, "grad_norm": 11.274154841176012, "learning_rate": 3.3682631321120507e-06, "loss": 1.1458, "step": 5728 }, { "epoch": 0.8110709987966306, "grad_norm": 9.77512584712588, "learning_rate": 3.3677255900988236e-06, "loss": 1.3902, "step": 5729 }, { "epoch": 0.8112125716712678, "grad_norm": 9.738156289937336, "learning_rate": 3.3671880024699085e-06, "loss": 1.4208, "step": 5730 }, { "epoch": 0.811354144545905, "grad_norm": 8.874653124410738, "learning_rate": 3.3666503692535667e-06, "loss": 1.1856, "step": 5731 }, { "epoch": 0.8114957174205423, "grad_norm": 8.190131543492267, "learning_rate": 3.3661126904780624e-06, "loss": 1.3293, "step": 5732 }, { "epoch": 0.8116372902951794, "grad_norm": 10.124061446782248, "learning_rate": 3.3655749661716585e-06, "loss": 1.2568, "step": 5733 }, { "epoch": 0.8117788631698166, "grad_norm": 8.001928573842616, "learning_rate": 3.3650371963626243e-06, "loss": 1.2065, "step": 5734 }, { "epoch": 0.8119204360444539, "grad_norm": 9.421370510177331, "learning_rate": 3.3644993810792297e-06, "loss": 1.1964, "step": 5735 }, { "epoch": 0.8120620089190911, "grad_norm": 10.04224361382039, "learning_rate": 3.3639615203497467e-06, "loss": 1.2417, "step": 5736 }, { "epoch": 0.8122035817937283, "grad_norm": 9.133219740038243, "learning_rate": 3.3634236142024516e-06, "loss": 1.1444, "step": 5737 }, { "epoch": 0.8123451546683655, "grad_norm": 8.613305165783125, "learning_rate": 3.362885662665621e-06, "loss": 1.0802, "step": 5738 }, { "epoch": 0.8124867275430028, "grad_norm": 9.283473714957596, "learning_rate": 3.3623476657675342e-06, "loss": 1.1694, "step": 5739 }, { "epoch": 0.81262830041764, "grad_norm": 10.280085257091324, "learning_rate": 3.3618096235364734e-06, "loss": 1.1441, "step": 5740 }, { "epoch": 0.8127698732922772, "grad_norm": 8.17799163114885, "learning_rate": 3.361271536000723e-06, "loss": 1.1994, "step": 5741 }, { "epoch": 0.8129114461669145, "grad_norm": 8.906841485978498, "learning_rate": 3.3607334031885707e-06, "loss": 1.4136, "step": 5742 }, { "epoch": 0.8130530190415516, "grad_norm": 7.647586825857762, "learning_rate": 3.3601952251283056e-06, "loss": 1.2736, "step": 5743 }, { "epoch": 0.8131945919161888, "grad_norm": 8.735988188180169, "learning_rate": 3.359657001848218e-06, "loss": 1.3136, "step": 5744 }, { "epoch": 0.813336164790826, "grad_norm": 9.902353380789203, "learning_rate": 3.359118733376603e-06, "loss": 1.2284, "step": 5745 }, { "epoch": 0.8134777376654633, "grad_norm": 11.10953192190948, "learning_rate": 3.358580419741757e-06, "loss": 1.295, "step": 5746 }, { "epoch": 0.8136193105401005, "grad_norm": 8.691002887409763, "learning_rate": 3.3580420609719783e-06, "loss": 1.2711, "step": 5747 }, { "epoch": 0.8137608834147377, "grad_norm": 9.449332997836615, "learning_rate": 3.3575036570955687e-06, "loss": 1.4344, "step": 5748 }, { "epoch": 0.813902456289375, "grad_norm": 7.7101357766947505, "learning_rate": 3.356965208140831e-06, "loss": 1.2796, "step": 5749 }, { "epoch": 0.8140440291640122, "grad_norm": 12.521884529218672, "learning_rate": 3.3564267141360706e-06, "loss": 1.2829, "step": 5750 }, { "epoch": 0.8141856020386494, "grad_norm": 9.171050208469559, "learning_rate": 3.3558881751095975e-06, "loss": 1.2371, "step": 5751 }, { "epoch": 0.8143271749132867, "grad_norm": 8.35984318928376, "learning_rate": 3.3553495910897206e-06, "loss": 1.3266, "step": 5752 }, { "epoch": 0.8144687477879239, "grad_norm": 9.186319080673886, "learning_rate": 3.354810962104754e-06, "loss": 1.3528, "step": 5753 }, { "epoch": 0.814610320662561, "grad_norm": 9.92245515343721, "learning_rate": 3.354272288183012e-06, "loss": 1.2931, "step": 5754 }, { "epoch": 0.8147518935371982, "grad_norm": 9.000545485178202, "learning_rate": 3.353733569352813e-06, "loss": 1.1994, "step": 5755 }, { "epoch": 0.8148934664118355, "grad_norm": 9.546811866863674, "learning_rate": 3.3531948056424766e-06, "loss": 1.2789, "step": 5756 }, { "epoch": 0.8150350392864727, "grad_norm": 8.987409898415056, "learning_rate": 3.352655997080325e-06, "loss": 1.2027, "step": 5757 }, { "epoch": 0.8151766121611099, "grad_norm": 9.203767867376, "learning_rate": 3.3521171436946844e-06, "loss": 1.2488, "step": 5758 }, { "epoch": 0.8153181850357472, "grad_norm": 9.96048176040768, "learning_rate": 3.35157824551388e-06, "loss": 1.3027, "step": 5759 }, { "epoch": 0.8154597579103844, "grad_norm": 8.354264738374507, "learning_rate": 3.351039302566243e-06, "loss": 1.1957, "step": 5760 }, { "epoch": 0.8156013307850216, "grad_norm": 7.372815083277029, "learning_rate": 3.350500314880104e-06, "loss": 1.2295, "step": 5761 }, { "epoch": 0.8157429036596588, "grad_norm": 7.665575226203444, "learning_rate": 3.3499612824837978e-06, "loss": 1.1923, "step": 5762 }, { "epoch": 0.8158844765342961, "grad_norm": 9.862287620123983, "learning_rate": 3.3494222054056606e-06, "loss": 1.2083, "step": 5763 }, { "epoch": 0.8160260494089332, "grad_norm": 8.435976244065039, "learning_rate": 3.3488830836740315e-06, "loss": 1.018, "step": 5764 }, { "epoch": 0.8161676222835704, "grad_norm": 8.804736457966149, "learning_rate": 3.3483439173172517e-06, "loss": 1.2604, "step": 5765 }, { "epoch": 0.8163091951582077, "grad_norm": 10.398662378414539, "learning_rate": 3.347804706363664e-06, "loss": 1.2921, "step": 5766 }, { "epoch": 0.8164507680328449, "grad_norm": 8.126078842137382, "learning_rate": 3.3472654508416157e-06, "loss": 1.2832, "step": 5767 }, { "epoch": 0.8165923409074821, "grad_norm": 10.62705596778715, "learning_rate": 3.346726150779455e-06, "loss": 1.3776, "step": 5768 }, { "epoch": 0.8167339137821193, "grad_norm": 10.797273885201337, "learning_rate": 3.3461868062055313e-06, "loss": 1.2638, "step": 5769 }, { "epoch": 0.8168754866567566, "grad_norm": 9.257458540833651, "learning_rate": 3.345647417148198e-06, "loss": 1.3619, "step": 5770 }, { "epoch": 0.8170170595313938, "grad_norm": 8.406975583665952, "learning_rate": 3.3451079836358107e-06, "loss": 1.2552, "step": 5771 }, { "epoch": 0.817158632406031, "grad_norm": 8.469831446892526, "learning_rate": 3.344568505696727e-06, "loss": 1.2308, "step": 5772 }, { "epoch": 0.8173002052806683, "grad_norm": 8.184268226232922, "learning_rate": 3.3440289833593053e-06, "loss": 1.1529, "step": 5773 }, { "epoch": 0.8174417781553055, "grad_norm": 9.639955073841557, "learning_rate": 3.3434894166519104e-06, "loss": 1.2679, "step": 5774 }, { "epoch": 0.8175833510299426, "grad_norm": 9.865645988183447, "learning_rate": 3.3429498056029066e-06, "loss": 1.218, "step": 5775 }, { "epoch": 0.8177249239045798, "grad_norm": 9.256978881904157, "learning_rate": 3.342410150240659e-06, "loss": 1.1522, "step": 5776 }, { "epoch": 0.8178664967792171, "grad_norm": 10.285385459608909, "learning_rate": 3.3418704505935383e-06, "loss": 1.2748, "step": 5777 }, { "epoch": 0.8180080696538543, "grad_norm": 9.182132736160627, "learning_rate": 3.341330706689916e-06, "loss": 1.2354, "step": 5778 }, { "epoch": 0.8181496425284915, "grad_norm": 7.2766399933970805, "learning_rate": 3.3407909185581656e-06, "loss": 1.222, "step": 5779 }, { "epoch": 0.8182912154031288, "grad_norm": 7.855705967095196, "learning_rate": 3.340251086226663e-06, "loss": 1.06, "step": 5780 }, { "epoch": 0.818432788277766, "grad_norm": 9.061272899816085, "learning_rate": 3.339711209723788e-06, "loss": 1.1216, "step": 5781 }, { "epoch": 0.8185743611524032, "grad_norm": 10.818833100579893, "learning_rate": 3.33917128907792e-06, "loss": 1.2011, "step": 5782 }, { "epoch": 0.8187159340270405, "grad_norm": 8.167004740458038, "learning_rate": 3.3386313243174436e-06, "loss": 1.2743, "step": 5783 }, { "epoch": 0.8188575069016777, "grad_norm": 8.46982739341356, "learning_rate": 3.338091315470744e-06, "loss": 1.1859, "step": 5784 }, { "epoch": 0.8189990797763148, "grad_norm": 8.770135436453103, "learning_rate": 3.337551262566209e-06, "loss": 1.2815, "step": 5785 }, { "epoch": 0.819140652650952, "grad_norm": 9.622831422326774, "learning_rate": 3.337011165632228e-06, "loss": 1.4267, "step": 5786 }, { "epoch": 0.8192822255255893, "grad_norm": 9.777491618313533, "learning_rate": 3.3364710246971937e-06, "loss": 1.2398, "step": 5787 }, { "epoch": 0.8194237984002265, "grad_norm": 8.19824631709619, "learning_rate": 3.335930839789502e-06, "loss": 1.1298, "step": 5788 }, { "epoch": 0.8195653712748637, "grad_norm": 10.421669656311938, "learning_rate": 3.335390610937549e-06, "loss": 1.3415, "step": 5789 }, { "epoch": 0.819706944149501, "grad_norm": 7.026395350990291, "learning_rate": 3.3348503381697358e-06, "loss": 1.2138, "step": 5790 }, { "epoch": 0.8198485170241382, "grad_norm": 8.888978078182783, "learning_rate": 3.3343100215144614e-06, "loss": 1.2158, "step": 5791 }, { "epoch": 0.8199900898987754, "grad_norm": 8.125238972597446, "learning_rate": 3.3337696610001314e-06, "loss": 1.2409, "step": 5792 }, { "epoch": 0.8201316627734127, "grad_norm": 9.622902381513626, "learning_rate": 3.333229256655153e-06, "loss": 1.2563, "step": 5793 }, { "epoch": 0.8202732356480499, "grad_norm": 9.127172760103395, "learning_rate": 3.332688808507932e-06, "loss": 1.0686, "step": 5794 }, { "epoch": 0.820414808522687, "grad_norm": 8.432657844520547, "learning_rate": 3.332148316586882e-06, "loss": 1.4241, "step": 5795 }, { "epoch": 0.8205563813973242, "grad_norm": 7.637573131954107, "learning_rate": 3.3316077809204168e-06, "loss": 1.2642, "step": 5796 }, { "epoch": 0.8206979542719615, "grad_norm": 10.29096317332192, "learning_rate": 3.3310672015369495e-06, "loss": 1.3033, "step": 5797 }, { "epoch": 0.8208395271465987, "grad_norm": 10.722875040459101, "learning_rate": 3.330526578464899e-06, "loss": 1.26, "step": 5798 }, { "epoch": 0.8209811000212359, "grad_norm": 8.417976000576925, "learning_rate": 3.329985911732686e-06, "loss": 1.3613, "step": 5799 }, { "epoch": 0.8211226728958732, "grad_norm": 8.071176990393278, "learning_rate": 3.329445201368732e-06, "loss": 1.4301, "step": 5800 }, { "epoch": 0.8212642457705104, "grad_norm": 10.513087517721587, "learning_rate": 3.3289044474014624e-06, "loss": 1.4134, "step": 5801 }, { "epoch": 0.8214058186451476, "grad_norm": 9.155695459023248, "learning_rate": 3.3283636498593043e-06, "loss": 1.1919, "step": 5802 }, { "epoch": 0.8215473915197848, "grad_norm": 7.6811899479256, "learning_rate": 3.3278228087706863e-06, "loss": 1.2305, "step": 5803 }, { "epoch": 0.8216889643944221, "grad_norm": 9.794453542205899, "learning_rate": 3.327281924164041e-06, "loss": 1.1461, "step": 5804 }, { "epoch": 0.8218305372690593, "grad_norm": 9.580737306896062, "learning_rate": 3.3267409960678015e-06, "loss": 1.3496, "step": 5805 }, { "epoch": 0.8219721101436964, "grad_norm": 8.389678390213234, "learning_rate": 3.326200024510405e-06, "loss": 1.1337, "step": 5806 }, { "epoch": 0.8221136830183337, "grad_norm": 8.652449121113456, "learning_rate": 3.3256590095202883e-06, "loss": 1.2108, "step": 5807 }, { "epoch": 0.8222552558929709, "grad_norm": 7.339662623746403, "learning_rate": 3.3251179511258934e-06, "loss": 1.2666, "step": 5808 }, { "epoch": 0.8223968287676081, "grad_norm": 13.083294392586522, "learning_rate": 3.324576849355663e-06, "loss": 1.2809, "step": 5809 }, { "epoch": 0.8225384016422453, "grad_norm": 9.316673214204384, "learning_rate": 3.3240357042380423e-06, "loss": 1.2696, "step": 5810 }, { "epoch": 0.8226799745168826, "grad_norm": 7.149379313811368, "learning_rate": 3.3234945158014792e-06, "loss": 1.3139, "step": 5811 }, { "epoch": 0.8228215473915198, "grad_norm": 8.732767629380161, "learning_rate": 3.322953284074424e-06, "loss": 1.1775, "step": 5812 }, { "epoch": 0.822963120266157, "grad_norm": 11.730762072797903, "learning_rate": 3.3224120090853275e-06, "loss": 1.2239, "step": 5813 }, { "epoch": 0.8231046931407943, "grad_norm": 9.660363564925648, "learning_rate": 3.321870690862645e-06, "loss": 1.1945, "step": 5814 }, { "epoch": 0.8232462660154315, "grad_norm": 7.426209671689228, "learning_rate": 3.3213293294348335e-06, "loss": 1.2712, "step": 5815 }, { "epoch": 0.8233878388900686, "grad_norm": 8.505761830310371, "learning_rate": 3.3207879248303513e-06, "loss": 1.2508, "step": 5816 }, { "epoch": 0.8235294117647058, "grad_norm": 10.314720700763818, "learning_rate": 3.3202464770776597e-06, "loss": 1.046, "step": 5817 }, { "epoch": 0.8236709846393431, "grad_norm": 8.783344928461151, "learning_rate": 3.319704986205223e-06, "loss": 1.2614, "step": 5818 }, { "epoch": 0.8238125575139803, "grad_norm": 10.047122175103915, "learning_rate": 3.3191634522415064e-06, "loss": 1.2368, "step": 5819 }, { "epoch": 0.8239541303886175, "grad_norm": 7.597951723447952, "learning_rate": 3.3186218752149767e-06, "loss": 1.1545, "step": 5820 }, { "epoch": 0.8240957032632548, "grad_norm": 8.045624333937768, "learning_rate": 3.3180802551541063e-06, "loss": 1.2201, "step": 5821 }, { "epoch": 0.824237276137892, "grad_norm": 7.8146779190830316, "learning_rate": 3.3175385920873674e-06, "loss": 1.3016, "step": 5822 }, { "epoch": 0.8243788490125292, "grad_norm": 8.732118919132462, "learning_rate": 3.316996886043234e-06, "loss": 1.2619, "step": 5823 }, { "epoch": 0.8245204218871665, "grad_norm": 8.040869744167964, "learning_rate": 3.3164551370501826e-06, "loss": 1.4168, "step": 5824 }, { "epoch": 0.8246619947618037, "grad_norm": 8.449517627920025, "learning_rate": 3.3159133451366937e-06, "loss": 1.2166, "step": 5825 }, { "epoch": 0.8248035676364408, "grad_norm": 11.931230586392338, "learning_rate": 3.315371510331249e-06, "loss": 1.4154, "step": 5826 }, { "epoch": 0.824945140511078, "grad_norm": 8.812474460429526, "learning_rate": 3.3148296326623327e-06, "loss": 1.1507, "step": 5827 }, { "epoch": 0.8250867133857153, "grad_norm": 8.239547581520755, "learning_rate": 3.3142877121584295e-06, "loss": 1.149, "step": 5828 }, { "epoch": 0.8252282862603525, "grad_norm": 8.408971409803843, "learning_rate": 3.313745748848028e-06, "loss": 1.3267, "step": 5829 }, { "epoch": 0.8253698591349897, "grad_norm": 10.075580695671118, "learning_rate": 3.3132037427596193e-06, "loss": 1.1872, "step": 5830 }, { "epoch": 0.825511432009627, "grad_norm": 10.988502389016347, "learning_rate": 3.3126616939216967e-06, "loss": 1.2302, "step": 5831 }, { "epoch": 0.8256530048842642, "grad_norm": 7.6696261899492, "learning_rate": 3.3121196023627543e-06, "loss": 1.1779, "step": 5832 }, { "epoch": 0.8257945777589014, "grad_norm": 8.894897507006826, "learning_rate": 3.31157746811129e-06, "loss": 1.1744, "step": 5833 }, { "epoch": 0.8259361506335386, "grad_norm": 8.8592458241244, "learning_rate": 3.311035291195803e-06, "loss": 1.2951, "step": 5834 }, { "epoch": 0.8260777235081759, "grad_norm": 9.51600543484687, "learning_rate": 3.3104930716447965e-06, "loss": 1.2819, "step": 5835 }, { "epoch": 0.8262192963828131, "grad_norm": 9.815609578943633, "learning_rate": 3.3099508094867727e-06, "loss": 1.1567, "step": 5836 }, { "epoch": 0.8263608692574502, "grad_norm": 8.073682024371784, "learning_rate": 3.3094085047502395e-06, "loss": 1.2914, "step": 5837 }, { "epoch": 0.8265024421320875, "grad_norm": 9.37007683232907, "learning_rate": 3.308866157463705e-06, "loss": 1.2548, "step": 5838 }, { "epoch": 0.8266440150067247, "grad_norm": 10.269663488117791, "learning_rate": 3.3083237676556777e-06, "loss": 1.327, "step": 5839 }, { "epoch": 0.8267855878813619, "grad_norm": 7.846174009938747, "learning_rate": 3.3077813353546744e-06, "loss": 1.21, "step": 5840 }, { "epoch": 0.8269271607559991, "grad_norm": 10.08779703338724, "learning_rate": 3.307238860589208e-06, "loss": 1.2295, "step": 5841 }, { "epoch": 0.8270687336306364, "grad_norm": 9.073215227145528, "learning_rate": 3.3066963433877967e-06, "loss": 1.2399, "step": 5842 }, { "epoch": 0.8272103065052736, "grad_norm": 7.751481683667731, "learning_rate": 3.306153783778961e-06, "loss": 1.2157, "step": 5843 }, { "epoch": 0.8273518793799108, "grad_norm": 8.698710501682985, "learning_rate": 3.305611181791221e-06, "loss": 1.1845, "step": 5844 }, { "epoch": 0.8274934522545481, "grad_norm": 9.467346288794612, "learning_rate": 3.305068537453102e-06, "loss": 1.2113, "step": 5845 }, { "epoch": 0.8276350251291853, "grad_norm": 9.2788896738296, "learning_rate": 3.3045258507931306e-06, "loss": 1.2204, "step": 5846 }, { "epoch": 0.8277765980038224, "grad_norm": 12.44496000069688, "learning_rate": 3.3039831218398346e-06, "loss": 1.173, "step": 5847 }, { "epoch": 0.8279181708784596, "grad_norm": 8.790503354117442, "learning_rate": 3.303440350621745e-06, "loss": 1.1795, "step": 5848 }, { "epoch": 0.8280597437530969, "grad_norm": 12.828205884336963, "learning_rate": 3.3028975371673966e-06, "loss": 1.491, "step": 5849 }, { "epoch": 0.8282013166277341, "grad_norm": 10.69936564338739, "learning_rate": 3.3023546815053227e-06, "loss": 1.2983, "step": 5850 }, { "epoch": 0.8283428895023713, "grad_norm": 11.888193611262697, "learning_rate": 3.301811783664061e-06, "loss": 1.2951, "step": 5851 }, { "epoch": 0.8284844623770086, "grad_norm": 9.806834146230866, "learning_rate": 3.3012688436721518e-06, "loss": 1.3313, "step": 5852 }, { "epoch": 0.8286260352516458, "grad_norm": 8.858471590567891, "learning_rate": 3.3007258615581372e-06, "loss": 1.2521, "step": 5853 }, { "epoch": 0.828767608126283, "grad_norm": 7.465035636188253, "learning_rate": 3.300182837350561e-06, "loss": 1.2956, "step": 5854 }, { "epoch": 0.8289091810009203, "grad_norm": 7.403056941236864, "learning_rate": 3.29963977107797e-06, "loss": 1.2105, "step": 5855 }, { "epoch": 0.8290507538755575, "grad_norm": 13.747503990426658, "learning_rate": 3.2990966627689126e-06, "loss": 1.2676, "step": 5856 }, { "epoch": 0.8291923267501946, "grad_norm": 11.429846365522728, "learning_rate": 3.2985535124519387e-06, "loss": 1.3988, "step": 5857 }, { "epoch": 0.8293338996248318, "grad_norm": 9.509389553754678, "learning_rate": 3.2980103201556023e-06, "loss": 1.1809, "step": 5858 }, { "epoch": 0.8294754724994691, "grad_norm": 9.228402570405391, "learning_rate": 3.297467085908459e-06, "loss": 1.2574, "step": 5859 }, { "epoch": 0.8296170453741063, "grad_norm": 9.144524158343042, "learning_rate": 3.2969238097390655e-06, "loss": 1.1609, "step": 5860 }, { "epoch": 0.8297586182487435, "grad_norm": 10.280244447644245, "learning_rate": 3.2963804916759805e-06, "loss": 1.1501, "step": 5861 }, { "epoch": 0.8299001911233808, "grad_norm": 12.538659091057458, "learning_rate": 3.295837131747768e-06, "loss": 1.3012, "step": 5862 }, { "epoch": 0.830041763998018, "grad_norm": 11.570277552065722, "learning_rate": 3.2952937299829902e-06, "loss": 1.2281, "step": 5863 }, { "epoch": 0.8301833368726552, "grad_norm": 7.935119512243277, "learning_rate": 3.294750286410214e-06, "loss": 1.2364, "step": 5864 }, { "epoch": 0.8303249097472925, "grad_norm": 9.001117636903704, "learning_rate": 3.2942068010580088e-06, "loss": 1.2286, "step": 5865 }, { "epoch": 0.8304664826219297, "grad_norm": 9.382190439152438, "learning_rate": 3.2936632739549437e-06, "loss": 1.1816, "step": 5866 }, { "epoch": 0.8306080554965669, "grad_norm": 8.158136025505124, "learning_rate": 3.2931197051295915e-06, "loss": 1.2333, "step": 5867 }, { "epoch": 0.830749628371204, "grad_norm": 10.72579004183302, "learning_rate": 3.2925760946105277e-06, "loss": 1.2405, "step": 5868 }, { "epoch": 0.8308912012458413, "grad_norm": 8.455551617098322, "learning_rate": 3.2920324424263305e-06, "loss": 1.145, "step": 5869 }, { "epoch": 0.8310327741204785, "grad_norm": 10.306222565604594, "learning_rate": 3.291488748605578e-06, "loss": 1.3003, "step": 5870 }, { "epoch": 0.8311743469951157, "grad_norm": 9.836105042258534, "learning_rate": 3.290945013176852e-06, "loss": 1.3071, "step": 5871 }, { "epoch": 0.831315919869753, "grad_norm": 8.795622118123552, "learning_rate": 3.2904012361687367e-06, "loss": 1.1783, "step": 5872 }, { "epoch": 0.8314574927443902, "grad_norm": 11.163468961107272, "learning_rate": 3.2898574176098176e-06, "loss": 1.2682, "step": 5873 }, { "epoch": 0.8315990656190274, "grad_norm": 9.070793198260308, "learning_rate": 3.2893135575286828e-06, "loss": 1.2204, "step": 5874 }, { "epoch": 0.8317406384936646, "grad_norm": 8.488735139440864, "learning_rate": 3.288769655953923e-06, "loss": 1.1961, "step": 5875 }, { "epoch": 0.8318822113683019, "grad_norm": 9.540679569631852, "learning_rate": 3.2882257129141305e-06, "loss": 1.2737, "step": 5876 }, { "epoch": 0.8320237842429391, "grad_norm": 7.417867699005636, "learning_rate": 3.287681728437899e-06, "loss": 1.2604, "step": 5877 }, { "epoch": 0.8321653571175762, "grad_norm": 8.749735364999305, "learning_rate": 3.2871377025538274e-06, "loss": 1.3587, "step": 5878 }, { "epoch": 0.8323069299922135, "grad_norm": 9.502875445023617, "learning_rate": 3.2865936352905144e-06, "loss": 1.3759, "step": 5879 }, { "epoch": 0.8324485028668507, "grad_norm": 9.086425348306756, "learning_rate": 3.28604952667656e-06, "loss": 1.2752, "step": 5880 }, { "epoch": 0.8325900757414879, "grad_norm": 7.650496333358282, "learning_rate": 3.2855053767405674e-06, "loss": 1.2235, "step": 5881 }, { "epoch": 0.8327316486161251, "grad_norm": 9.169045942922196, "learning_rate": 3.2849611855111433e-06, "loss": 1.2497, "step": 5882 }, { "epoch": 0.8328732214907624, "grad_norm": 7.007511196229601, "learning_rate": 3.284416953016895e-06, "loss": 1.2661, "step": 5883 }, { "epoch": 0.8330147943653996, "grad_norm": 8.770174148205026, "learning_rate": 3.2838726792864315e-06, "loss": 1.2316, "step": 5884 }, { "epoch": 0.8331563672400368, "grad_norm": 8.785004705646017, "learning_rate": 3.2833283643483672e-06, "loss": 1.2102, "step": 5885 }, { "epoch": 0.8332979401146741, "grad_norm": 8.202097219840237, "learning_rate": 3.2827840082313147e-06, "loss": 1.2252, "step": 5886 }, { "epoch": 0.8334395129893113, "grad_norm": 8.091768353573316, "learning_rate": 3.28223961096389e-06, "loss": 1.2711, "step": 5887 }, { "epoch": 0.8335810858639484, "grad_norm": 9.38766844251655, "learning_rate": 3.281695172574712e-06, "loss": 1.4509, "step": 5888 }, { "epoch": 0.8337226587385856, "grad_norm": 9.851819996263103, "learning_rate": 3.281150693092402e-06, "loss": 1.177, "step": 5889 }, { "epoch": 0.8338642316132229, "grad_norm": 9.992160776205468, "learning_rate": 3.280606172545582e-06, "loss": 1.3416, "step": 5890 }, { "epoch": 0.8340058044878601, "grad_norm": 9.248953270326307, "learning_rate": 3.280061610962878e-06, "loss": 1.3421, "step": 5891 }, { "epoch": 0.8341473773624973, "grad_norm": 7.9433341625030165, "learning_rate": 3.279517008372917e-06, "loss": 1.1594, "step": 5892 }, { "epoch": 0.8342889502371346, "grad_norm": 7.67769412988689, "learning_rate": 3.2789723648043276e-06, "loss": 1.1809, "step": 5893 }, { "epoch": 0.8344305231117718, "grad_norm": 9.193892765557488, "learning_rate": 3.2784276802857418e-06, "loss": 1.3067, "step": 5894 }, { "epoch": 0.834572095986409, "grad_norm": 8.800630408161377, "learning_rate": 3.2778829548457935e-06, "loss": 1.2551, "step": 5895 }, { "epoch": 0.8347136688610463, "grad_norm": 8.271316731993585, "learning_rate": 3.277338188513119e-06, "loss": 1.1988, "step": 5896 }, { "epoch": 0.8348552417356835, "grad_norm": 9.210744504710501, "learning_rate": 3.2767933813163542e-06, "loss": 1.2474, "step": 5897 }, { "epoch": 0.8349968146103207, "grad_norm": 9.696502243681417, "learning_rate": 3.276248533284141e-06, "loss": 1.2096, "step": 5898 }, { "epoch": 0.8351383874849578, "grad_norm": 8.953909794628911, "learning_rate": 3.2757036444451212e-06, "loss": 1.2615, "step": 5899 }, { "epoch": 0.8352799603595951, "grad_norm": 9.833463980458792, "learning_rate": 3.2751587148279395e-06, "loss": 1.3571, "step": 5900 }, { "epoch": 0.8354215332342323, "grad_norm": 9.189432991808445, "learning_rate": 3.274613744461242e-06, "loss": 1.1349, "step": 5901 }, { "epoch": 0.8355631061088695, "grad_norm": 11.322111146145142, "learning_rate": 3.2740687333736776e-06, "loss": 1.2061, "step": 5902 }, { "epoch": 0.8357046789835068, "grad_norm": 7.51288261264391, "learning_rate": 3.2735236815938975e-06, "loss": 1.1914, "step": 5903 }, { "epoch": 0.835846251858144, "grad_norm": 6.728519026668225, "learning_rate": 3.2729785891505533e-06, "loss": 1.0824, "step": 5904 }, { "epoch": 0.8359878247327812, "grad_norm": 6.970345763465099, "learning_rate": 3.2724334560723015e-06, "loss": 1.1031, "step": 5905 }, { "epoch": 0.8361293976074184, "grad_norm": 8.38452517744278, "learning_rate": 3.271888282387799e-06, "loss": 1.2494, "step": 5906 }, { "epoch": 0.8362709704820557, "grad_norm": 10.571697135960369, "learning_rate": 3.2713430681257046e-06, "loss": 1.3539, "step": 5907 }, { "epoch": 0.8364125433566929, "grad_norm": 9.695758277549011, "learning_rate": 3.2707978133146805e-06, "loss": 1.1409, "step": 5908 }, { "epoch": 0.83655411623133, "grad_norm": 9.531607799380488, "learning_rate": 3.27025251798339e-06, "loss": 1.1902, "step": 5909 }, { "epoch": 0.8366956891059673, "grad_norm": 7.746753135544362, "learning_rate": 3.2697071821604986e-06, "loss": 1.1991, "step": 5910 }, { "epoch": 0.8368372619806045, "grad_norm": 9.695204691936869, "learning_rate": 3.2691618058746757e-06, "loss": 1.2063, "step": 5911 }, { "epoch": 0.8369788348552417, "grad_norm": 9.21134377085892, "learning_rate": 3.26861638915459e-06, "loss": 1.2594, "step": 5912 }, { "epoch": 0.837120407729879, "grad_norm": 10.77521626903754, "learning_rate": 3.2680709320289123e-06, "loss": 1.1042, "step": 5913 }, { "epoch": 0.8372619806045162, "grad_norm": 8.913165917470812, "learning_rate": 3.26752543452632e-06, "loss": 1.2468, "step": 5914 }, { "epoch": 0.8374035534791534, "grad_norm": 8.1899270689981, "learning_rate": 3.266979896675487e-06, "loss": 1.2606, "step": 5915 }, { "epoch": 0.8375451263537906, "grad_norm": 9.838575571438293, "learning_rate": 3.266434318505093e-06, "loss": 1.2783, "step": 5916 }, { "epoch": 0.8376866992284279, "grad_norm": 9.342483077387378, "learning_rate": 3.2658887000438183e-06, "loss": 1.2648, "step": 5917 }, { "epoch": 0.8378282721030651, "grad_norm": 12.773456015850494, "learning_rate": 3.265343041320346e-06, "loss": 1.3355, "step": 5918 }, { "epoch": 0.8379698449777023, "grad_norm": 7.877220446275991, "learning_rate": 3.26479734236336e-06, "loss": 1.1559, "step": 5919 }, { "epoch": 0.8381114178523394, "grad_norm": 9.416039730121085, "learning_rate": 3.2642516032015486e-06, "loss": 1.336, "step": 5920 }, { "epoch": 0.8382529907269767, "grad_norm": 7.639406696070628, "learning_rate": 3.2637058238635995e-06, "loss": 1.1896, "step": 5921 }, { "epoch": 0.8383945636016139, "grad_norm": 8.139557459093387, "learning_rate": 3.2631600043782054e-06, "loss": 1.1648, "step": 5922 }, { "epoch": 0.8385361364762511, "grad_norm": 10.672186694752686, "learning_rate": 3.262614144774059e-06, "loss": 1.2457, "step": 5923 }, { "epoch": 0.8386777093508884, "grad_norm": 8.42366260814379, "learning_rate": 3.2620682450798557e-06, "loss": 1.2058, "step": 5924 }, { "epoch": 0.8388192822255256, "grad_norm": 10.84174814352451, "learning_rate": 3.2615223053242924e-06, "loss": 1.3454, "step": 5925 }, { "epoch": 0.8389608551001628, "grad_norm": 8.120361412536688, "learning_rate": 3.2609763255360696e-06, "loss": 1.1696, "step": 5926 }, { "epoch": 0.8391024279748001, "grad_norm": 7.961398694396769, "learning_rate": 3.2604303057438883e-06, "loss": 1.1316, "step": 5927 }, { "epoch": 0.8392440008494373, "grad_norm": 9.398236178226144, "learning_rate": 3.2598842459764535e-06, "loss": 1.2886, "step": 5928 }, { "epoch": 0.8393855737240745, "grad_norm": 10.35071525129442, "learning_rate": 3.2593381462624705e-06, "loss": 1.2996, "step": 5929 }, { "epoch": 0.8395271465987116, "grad_norm": 7.930304923238268, "learning_rate": 3.2587920066306474e-06, "loss": 1.2238, "step": 5930 }, { "epoch": 0.8396687194733489, "grad_norm": 8.11050812847449, "learning_rate": 3.258245827109693e-06, "loss": 1.209, "step": 5931 }, { "epoch": 0.8398102923479861, "grad_norm": 7.8951229154985345, "learning_rate": 3.2576996077283222e-06, "loss": 1.0637, "step": 5932 }, { "epoch": 0.8399518652226233, "grad_norm": 8.496254488422016, "learning_rate": 3.2571533485152485e-06, "loss": 1.194, "step": 5933 }, { "epoch": 0.8400934380972606, "grad_norm": 10.145973617137413, "learning_rate": 3.256607049499187e-06, "loss": 1.2642, "step": 5934 }, { "epoch": 0.8402350109718978, "grad_norm": 10.435746279736733, "learning_rate": 3.256060710708857e-06, "loss": 1.3127, "step": 5935 }, { "epoch": 0.840376583846535, "grad_norm": 10.192721848177571, "learning_rate": 3.255514332172979e-06, "loss": 1.2476, "step": 5936 }, { "epoch": 0.8405181567211722, "grad_norm": 10.540442555237133, "learning_rate": 3.2549679139202756e-06, "loss": 1.1851, "step": 5937 }, { "epoch": 0.8406597295958095, "grad_norm": 7.808616710665342, "learning_rate": 3.254421455979472e-06, "loss": 1.2253, "step": 5938 }, { "epoch": 0.8408013024704467, "grad_norm": 10.094010388112943, "learning_rate": 3.253874958379296e-06, "loss": 1.216, "step": 5939 }, { "epoch": 0.8409428753450838, "grad_norm": 9.84151876684608, "learning_rate": 3.253328421148475e-06, "loss": 1.1844, "step": 5940 }, { "epoch": 0.8410844482197211, "grad_norm": 8.648513366311324, "learning_rate": 3.2527818443157406e-06, "loss": 1.2491, "step": 5941 }, { "epoch": 0.8412260210943583, "grad_norm": 6.803487152847784, "learning_rate": 3.2522352279098256e-06, "loss": 1.1703, "step": 5942 }, { "epoch": 0.8413675939689955, "grad_norm": 7.481080160228838, "learning_rate": 3.251688571959466e-06, "loss": 1.305, "step": 5943 }, { "epoch": 0.8415091668436327, "grad_norm": 8.045954323847813, "learning_rate": 3.2511418764933983e-06, "loss": 1.3141, "step": 5944 }, { "epoch": 0.84165073971827, "grad_norm": 8.906669740657314, "learning_rate": 3.2505951415403625e-06, "loss": 1.2247, "step": 5945 }, { "epoch": 0.8417923125929072, "grad_norm": 7.575555802403678, "learning_rate": 3.2500483671290993e-06, "loss": 1.318, "step": 5946 }, { "epoch": 0.8419338854675444, "grad_norm": 9.327365965768417, "learning_rate": 3.2495015532883533e-06, "loss": 1.4416, "step": 5947 }, { "epoch": 0.8420754583421817, "grad_norm": 8.152872021845422, "learning_rate": 3.248954700046869e-06, "loss": 1.182, "step": 5948 }, { "epoch": 0.8422170312168189, "grad_norm": 9.350193825410996, "learning_rate": 3.248407807433396e-06, "loss": 1.2032, "step": 5949 }, { "epoch": 0.8423586040914561, "grad_norm": 7.698327851487255, "learning_rate": 3.2478608754766804e-06, "loss": 1.2997, "step": 5950 }, { "epoch": 0.8425001769660933, "grad_norm": 8.673698604710193, "learning_rate": 3.2473139042054773e-06, "loss": 1.3685, "step": 5951 }, { "epoch": 0.8426417498407305, "grad_norm": 9.475962696135664, "learning_rate": 3.2467668936485397e-06, "loss": 1.3351, "step": 5952 }, { "epoch": 0.8427833227153677, "grad_norm": 8.400287768566418, "learning_rate": 3.2462198438346227e-06, "loss": 1.125, "step": 5953 }, { "epoch": 0.8429248955900049, "grad_norm": 9.852919603814815, "learning_rate": 3.2456727547924855e-06, "loss": 1.1806, "step": 5954 }, { "epoch": 0.8430664684646422, "grad_norm": 8.749423198762027, "learning_rate": 3.245125626550888e-06, "loss": 1.1974, "step": 5955 }, { "epoch": 0.8432080413392794, "grad_norm": 7.781660321918624, "learning_rate": 3.244578459138591e-06, "loss": 1.2147, "step": 5956 }, { "epoch": 0.8433496142139166, "grad_norm": 8.094941393974079, "learning_rate": 3.2440312525843596e-06, "loss": 1.1997, "step": 5957 }, { "epoch": 0.8434911870885539, "grad_norm": 7.639718031148727, "learning_rate": 3.24348400691696e-06, "loss": 1.0505, "step": 5958 }, { "epoch": 0.8436327599631911, "grad_norm": 9.651538279575405, "learning_rate": 3.2429367221651603e-06, "loss": 1.1792, "step": 5959 }, { "epoch": 0.8437743328378283, "grad_norm": 11.092514098190486, "learning_rate": 3.242389398357732e-06, "loss": 1.353, "step": 5960 }, { "epoch": 0.8439159057124654, "grad_norm": 10.22972222481505, "learning_rate": 3.2418420355234466e-06, "loss": 1.3402, "step": 5961 }, { "epoch": 0.8440574785871027, "grad_norm": 9.32096774794875, "learning_rate": 3.2412946336910778e-06, "loss": 1.3673, "step": 5962 }, { "epoch": 0.8441990514617399, "grad_norm": 6.828757948531263, "learning_rate": 3.240747192889403e-06, "loss": 1.183, "step": 5963 }, { "epoch": 0.8443406243363771, "grad_norm": 8.794795873028674, "learning_rate": 3.240199713147201e-06, "loss": 1.2729, "step": 5964 }, { "epoch": 0.8444821972110144, "grad_norm": 8.25533468990114, "learning_rate": 3.239652194493251e-06, "loss": 1.2694, "step": 5965 }, { "epoch": 0.8446237700856516, "grad_norm": 11.079490588214602, "learning_rate": 3.2391046369563374e-06, "loss": 1.2609, "step": 5966 }, { "epoch": 0.8447653429602888, "grad_norm": 8.438377052217483, "learning_rate": 3.2385570405652444e-06, "loss": 1.1424, "step": 5967 }, { "epoch": 0.844906915834926, "grad_norm": 9.007149293939804, "learning_rate": 3.2380094053487576e-06, "loss": 1.228, "step": 5968 }, { "epoch": 0.8450484887095633, "grad_norm": 8.81846773708338, "learning_rate": 3.237461731335667e-06, "loss": 1.4507, "step": 5969 }, { "epoch": 0.8451900615842005, "grad_norm": 7.5825670574781165, "learning_rate": 3.2369140185547643e-06, "loss": 1.0591, "step": 5970 }, { "epoch": 0.8453316344588376, "grad_norm": 10.779274048999659, "learning_rate": 3.23636626703484e-06, "loss": 1.3676, "step": 5971 }, { "epoch": 0.8454732073334749, "grad_norm": 7.6259790010921, "learning_rate": 3.2358184768046895e-06, "loss": 1.2247, "step": 5972 }, { "epoch": 0.8456147802081121, "grad_norm": 11.201681514942614, "learning_rate": 3.235270647893111e-06, "loss": 1.1276, "step": 5973 }, { "epoch": 0.8457563530827493, "grad_norm": 9.818074774881085, "learning_rate": 3.2347227803289027e-06, "loss": 1.2806, "step": 5974 }, { "epoch": 0.8458979259573866, "grad_norm": 7.222767954365378, "learning_rate": 3.234174874140866e-06, "loss": 1.1001, "step": 5975 }, { "epoch": 0.8460394988320238, "grad_norm": 11.55956532119781, "learning_rate": 3.2336269293578032e-06, "loss": 1.2543, "step": 5976 }, { "epoch": 0.846181071706661, "grad_norm": 8.873289561327308, "learning_rate": 3.23307894600852e-06, "loss": 1.3062, "step": 5977 }, { "epoch": 0.8463226445812982, "grad_norm": 10.334516108921907, "learning_rate": 3.2325309241218227e-06, "loss": 1.2763, "step": 5978 }, { "epoch": 0.8464642174559355, "grad_norm": 9.900144988260234, "learning_rate": 3.2319828637265217e-06, "loss": 1.2396, "step": 5979 }, { "epoch": 0.8466057903305727, "grad_norm": 10.179822404132967, "learning_rate": 3.2314347648514265e-06, "loss": 1.359, "step": 5980 }, { "epoch": 0.8467473632052099, "grad_norm": 7.872244216480219, "learning_rate": 3.2308866275253516e-06, "loss": 1.2057, "step": 5981 }, { "epoch": 0.846888936079847, "grad_norm": 8.396227907007185, "learning_rate": 3.230338451777112e-06, "loss": 1.1566, "step": 5982 }, { "epoch": 0.8470305089544843, "grad_norm": 11.956201733313774, "learning_rate": 3.2297902376355238e-06, "loss": 1.3092, "step": 5983 }, { "epoch": 0.8471720818291215, "grad_norm": 9.012131038737452, "learning_rate": 3.2292419851294072e-06, "loss": 1.151, "step": 5984 }, { "epoch": 0.8473136547037587, "grad_norm": 10.014262137920706, "learning_rate": 3.2286936942875837e-06, "loss": 1.2233, "step": 5985 }, { "epoch": 0.847455227578396, "grad_norm": 9.311658987689967, "learning_rate": 3.2281453651388755e-06, "loss": 1.4721, "step": 5986 }, { "epoch": 0.8475968004530332, "grad_norm": 8.422596520142307, "learning_rate": 3.227596997712108e-06, "loss": 1.2042, "step": 5987 }, { "epoch": 0.8477383733276704, "grad_norm": 10.057164643623237, "learning_rate": 3.2270485920361093e-06, "loss": 1.2514, "step": 5988 }, { "epoch": 0.8478799462023077, "grad_norm": 8.329368970766746, "learning_rate": 3.2265001481397084e-06, "loss": 1.267, "step": 5989 }, { "epoch": 0.8480215190769449, "grad_norm": 8.052719926693927, "learning_rate": 3.225951666051736e-06, "loss": 1.1719, "step": 5990 }, { "epoch": 0.8481630919515821, "grad_norm": 7.057432263840115, "learning_rate": 3.225403145801026e-06, "loss": 1.3219, "step": 5991 }, { "epoch": 0.8483046648262192, "grad_norm": 8.847047440422479, "learning_rate": 3.2248545874164145e-06, "loss": 1.2698, "step": 5992 }, { "epoch": 0.8484462377008565, "grad_norm": 7.280664248501923, "learning_rate": 3.2243059909267367e-06, "loss": 1.297, "step": 5993 }, { "epoch": 0.8485878105754937, "grad_norm": 8.48797879336312, "learning_rate": 3.2237573563608333e-06, "loss": 1.311, "step": 5994 }, { "epoch": 0.8487293834501309, "grad_norm": 8.988031694710315, "learning_rate": 3.2232086837475444e-06, "loss": 1.4122, "step": 5995 }, { "epoch": 0.8488709563247682, "grad_norm": 9.963019849742707, "learning_rate": 3.222659973115715e-06, "loss": 1.2663, "step": 5996 }, { "epoch": 0.8490125291994054, "grad_norm": 8.693258668465782, "learning_rate": 3.2221112244941905e-06, "loss": 1.2414, "step": 5997 }, { "epoch": 0.8491541020740426, "grad_norm": 8.520684151559928, "learning_rate": 3.2215624379118164e-06, "loss": 1.1709, "step": 5998 }, { "epoch": 0.8492956749486799, "grad_norm": 7.9094366956676145, "learning_rate": 3.2210136133974434e-06, "loss": 1.2435, "step": 5999 }, { "epoch": 0.8494372478233171, "grad_norm": 9.976226394715253, "learning_rate": 3.220464750979922e-06, "loss": 1.2943, "step": 6000 }, { "epoch": 0.8495788206979543, "grad_norm": 7.314166800136883, "learning_rate": 3.219915850688106e-06, "loss": 1.2773, "step": 6001 }, { "epoch": 0.8497203935725914, "grad_norm": 7.5908735188256555, "learning_rate": 3.2193669125508504e-06, "loss": 1.2011, "step": 6002 }, { "epoch": 0.8498619664472287, "grad_norm": 9.56006985371499, "learning_rate": 3.218817936597013e-06, "loss": 1.1682, "step": 6003 }, { "epoch": 0.8500035393218659, "grad_norm": 8.236316314536825, "learning_rate": 3.218268922855452e-06, "loss": 1.2421, "step": 6004 }, { "epoch": 0.8501451121965031, "grad_norm": 9.454581660754897, "learning_rate": 3.2177198713550295e-06, "loss": 1.1552, "step": 6005 }, { "epoch": 0.8502866850711404, "grad_norm": 8.564607903746783, "learning_rate": 3.2171707821246083e-06, "loss": 1.1245, "step": 6006 }, { "epoch": 0.8504282579457776, "grad_norm": 9.372428846641172, "learning_rate": 3.216621655193055e-06, "loss": 1.1873, "step": 6007 }, { "epoch": 0.8505698308204148, "grad_norm": 9.311787213178555, "learning_rate": 3.216072490589235e-06, "loss": 1.3156, "step": 6008 }, { "epoch": 0.850711403695052, "grad_norm": 8.376580843543383, "learning_rate": 3.2155232883420172e-06, "loss": 1.3195, "step": 6009 }, { "epoch": 0.8508529765696893, "grad_norm": 9.64917127776558, "learning_rate": 3.2149740484802736e-06, "loss": 1.1685, "step": 6010 }, { "epoch": 0.8509945494443265, "grad_norm": 7.860700916361363, "learning_rate": 3.2144247710328787e-06, "loss": 1.2483, "step": 6011 }, { "epoch": 0.8511361223189637, "grad_norm": 7.960749420849317, "learning_rate": 3.2138754560287057e-06, "loss": 1.0661, "step": 6012 }, { "epoch": 0.8512776951936009, "grad_norm": 10.04510548909813, "learning_rate": 3.2133261034966325e-06, "loss": 1.2465, "step": 6013 }, { "epoch": 0.8514192680682381, "grad_norm": 10.601575813481876, "learning_rate": 3.2127767134655374e-06, "loss": 1.3014, "step": 6014 }, { "epoch": 0.8515608409428753, "grad_norm": 7.71862372785442, "learning_rate": 3.2122272859643022e-06, "loss": 1.2404, "step": 6015 }, { "epoch": 0.8517024138175125, "grad_norm": 9.810915679348446, "learning_rate": 3.2116778210218103e-06, "loss": 1.1378, "step": 6016 }, { "epoch": 0.8518439866921498, "grad_norm": 8.481789772887845, "learning_rate": 3.211128318666945e-06, "loss": 1.395, "step": 6017 }, { "epoch": 0.851985559566787, "grad_norm": 8.790704707690534, "learning_rate": 3.2105787789285947e-06, "loss": 1.1797, "step": 6018 }, { "epoch": 0.8521271324414242, "grad_norm": 9.415893072799756, "learning_rate": 3.2100292018356477e-06, "loss": 1.3236, "step": 6019 }, { "epoch": 0.8522687053160615, "grad_norm": 9.083268564909089, "learning_rate": 3.209479587416995e-06, "loss": 1.1641, "step": 6020 }, { "epoch": 0.8524102781906987, "grad_norm": 8.653164640799414, "learning_rate": 3.208929935701529e-06, "loss": 1.2693, "step": 6021 }, { "epoch": 0.8525518510653359, "grad_norm": 9.603643330005594, "learning_rate": 3.2083802467181452e-06, "loss": 1.3006, "step": 6022 }, { "epoch": 0.852693423939973, "grad_norm": 8.733038893921972, "learning_rate": 3.2078305204957406e-06, "loss": 1.2698, "step": 6023 }, { "epoch": 0.8528349968146103, "grad_norm": 10.092009875643113, "learning_rate": 3.2072807570632125e-06, "loss": 1.3358, "step": 6024 }, { "epoch": 0.8529765696892475, "grad_norm": 10.350414514764198, "learning_rate": 3.2067309564494626e-06, "loss": 1.0834, "step": 6025 }, { "epoch": 0.8531181425638847, "grad_norm": 7.626689035920815, "learning_rate": 3.206181118683393e-06, "loss": 1.1821, "step": 6026 }, { "epoch": 0.853259715438522, "grad_norm": 10.659402241220505, "learning_rate": 3.205631243793909e-06, "loss": 1.2764, "step": 6027 }, { "epoch": 0.8534012883131592, "grad_norm": 9.231815919465598, "learning_rate": 3.2050813318099166e-06, "loss": 1.2872, "step": 6028 }, { "epoch": 0.8535428611877964, "grad_norm": 9.655401396267628, "learning_rate": 3.204531382760325e-06, "loss": 1.2321, "step": 6029 }, { "epoch": 0.8536844340624337, "grad_norm": 8.710313441070832, "learning_rate": 3.203981396674043e-06, "loss": 1.2565, "step": 6030 }, { "epoch": 0.8538260069370709, "grad_norm": 9.490206840680884, "learning_rate": 3.2034313735799837e-06, "loss": 1.3511, "step": 6031 }, { "epoch": 0.8539675798117081, "grad_norm": 8.059488369816233, "learning_rate": 3.2028813135070625e-06, "loss": 1.2268, "step": 6032 }, { "epoch": 0.8541091526863452, "grad_norm": 9.852717502054597, "learning_rate": 3.2023312164841937e-06, "loss": 1.3017, "step": 6033 }, { "epoch": 0.8542507255609825, "grad_norm": 8.497331536989229, "learning_rate": 3.201781082540297e-06, "loss": 1.172, "step": 6034 }, { "epoch": 0.8543922984356197, "grad_norm": 7.972992848342258, "learning_rate": 3.201230911704292e-06, "loss": 1.2276, "step": 6035 }, { "epoch": 0.8545338713102569, "grad_norm": 9.667958491156174, "learning_rate": 3.2006807040051013e-06, "loss": 1.4034, "step": 6036 }, { "epoch": 0.8546754441848942, "grad_norm": 10.002536070629203, "learning_rate": 3.2001304594716476e-06, "loss": 1.3692, "step": 6037 }, { "epoch": 0.8548170170595314, "grad_norm": 10.498332663261158, "learning_rate": 3.1995801781328585e-06, "loss": 1.2534, "step": 6038 }, { "epoch": 0.8549585899341686, "grad_norm": 9.011609960196182, "learning_rate": 3.1990298600176607e-06, "loss": 1.3948, "step": 6039 }, { "epoch": 0.8551001628088059, "grad_norm": 8.275613039479264, "learning_rate": 3.198479505154984e-06, "loss": 1.2394, "step": 6040 }, { "epoch": 0.8552417356834431, "grad_norm": 8.0969265039912, "learning_rate": 3.197929113573761e-06, "loss": 1.1914, "step": 6041 }, { "epoch": 0.8553833085580803, "grad_norm": 8.167269107110942, "learning_rate": 3.197378685302925e-06, "loss": 1.2016, "step": 6042 }, { "epoch": 0.8555248814327175, "grad_norm": 8.928372504061928, "learning_rate": 3.196828220371411e-06, "loss": 1.2362, "step": 6043 }, { "epoch": 0.8556664543073547, "grad_norm": 9.760376714721684, "learning_rate": 3.196277718808157e-06, "loss": 1.1691, "step": 6044 }, { "epoch": 0.8558080271819919, "grad_norm": 9.785750746037538, "learning_rate": 3.195727180642104e-06, "loss": 1.3898, "step": 6045 }, { "epoch": 0.8559496000566291, "grad_norm": 8.369219706817152, "learning_rate": 3.1951766059021905e-06, "loss": 1.2656, "step": 6046 }, { "epoch": 0.8560911729312664, "grad_norm": 8.68670562844701, "learning_rate": 3.1946259946173607e-06, "loss": 1.2107, "step": 6047 }, { "epoch": 0.8562327458059036, "grad_norm": 9.268045504876891, "learning_rate": 3.1940753468165607e-06, "loss": 1.4617, "step": 6048 }, { "epoch": 0.8563743186805408, "grad_norm": 8.548751597141473, "learning_rate": 3.193524662528738e-06, "loss": 1.3136, "step": 6049 }, { "epoch": 0.856515891555178, "grad_norm": 10.551033613003723, "learning_rate": 3.192973941782841e-06, "loss": 1.3367, "step": 6050 }, { "epoch": 0.8566574644298153, "grad_norm": 8.335216258629707, "learning_rate": 3.1924231846078198e-06, "loss": 1.1175, "step": 6051 }, { "epoch": 0.8567990373044525, "grad_norm": 8.372705857077545, "learning_rate": 3.1918723910326283e-06, "loss": 1.3324, "step": 6052 }, { "epoch": 0.8569406101790897, "grad_norm": 8.86869953724522, "learning_rate": 3.1913215610862208e-06, "loss": 1.1618, "step": 6053 }, { "epoch": 0.8570821830537269, "grad_norm": 8.900697282200705, "learning_rate": 3.1907706947975546e-06, "loss": 1.2607, "step": 6054 }, { "epoch": 0.8572237559283641, "grad_norm": 8.509305908729562, "learning_rate": 3.190219792195588e-06, "loss": 1.1501, "step": 6055 }, { "epoch": 0.8573653288030013, "grad_norm": 8.656242067629488, "learning_rate": 3.189668853309282e-06, "loss": 1.166, "step": 6056 }, { "epoch": 0.8575069016776385, "grad_norm": 7.400062004035791, "learning_rate": 3.189117878167598e-06, "loss": 1.2186, "step": 6057 }, { "epoch": 0.8576484745522758, "grad_norm": 8.890661470844572, "learning_rate": 3.1885668667995006e-06, "loss": 1.2709, "step": 6058 }, { "epoch": 0.857790047426913, "grad_norm": 10.507634974685335, "learning_rate": 3.1880158192339574e-06, "loss": 1.238, "step": 6059 }, { "epoch": 0.8579316203015502, "grad_norm": 7.893747931666643, "learning_rate": 3.1874647354999354e-06, "loss": 1.2133, "step": 6060 }, { "epoch": 0.8580731931761875, "grad_norm": 8.877500369889196, "learning_rate": 3.186913615626405e-06, "loss": 1.2945, "step": 6061 }, { "epoch": 0.8582147660508247, "grad_norm": 7.831505203655477, "learning_rate": 3.186362459642337e-06, "loss": 1.1996, "step": 6062 }, { "epoch": 0.8583563389254619, "grad_norm": 8.21376449163375, "learning_rate": 3.1858112675767074e-06, "loss": 1.2313, "step": 6063 }, { "epoch": 0.8584979118000992, "grad_norm": 9.67649442580104, "learning_rate": 3.18526003945849e-06, "loss": 1.3579, "step": 6064 }, { "epoch": 0.8586394846747363, "grad_norm": 9.595758581981514, "learning_rate": 3.184708775316663e-06, "loss": 1.164, "step": 6065 }, { "epoch": 0.8587810575493735, "grad_norm": 8.67610573881022, "learning_rate": 3.184157475180208e-06, "loss": 1.3042, "step": 6066 }, { "epoch": 0.8589226304240107, "grad_norm": 8.2723345286979, "learning_rate": 3.183606139078103e-06, "loss": 1.2736, "step": 6067 }, { "epoch": 0.859064203298648, "grad_norm": 8.983256129379544, "learning_rate": 3.1830547670393337e-06, "loss": 1.2973, "step": 6068 }, { "epoch": 0.8592057761732852, "grad_norm": 8.265092396297572, "learning_rate": 3.1825033590928844e-06, "loss": 1.2415, "step": 6069 }, { "epoch": 0.8593473490479224, "grad_norm": 8.016236042133684, "learning_rate": 3.181951915267742e-06, "loss": 1.2429, "step": 6070 }, { "epoch": 0.8594889219225597, "grad_norm": 8.923258926296079, "learning_rate": 3.181400435592897e-06, "loss": 1.0945, "step": 6071 }, { "epoch": 0.8596304947971969, "grad_norm": 9.033573683277893, "learning_rate": 3.180848920097338e-06, "loss": 1.3172, "step": 6072 }, { "epoch": 0.8597720676718341, "grad_norm": 7.908890234335632, "learning_rate": 3.1802973688100596e-06, "loss": 1.2942, "step": 6073 }, { "epoch": 0.8599136405464713, "grad_norm": 7.9609044367259925, "learning_rate": 3.179745781760055e-06, "loss": 1.1271, "step": 6074 }, { "epoch": 0.8600552134211085, "grad_norm": 9.097553100263244, "learning_rate": 3.1791941589763225e-06, "loss": 1.1573, "step": 6075 }, { "epoch": 0.8601967862957457, "grad_norm": 9.419602153176168, "learning_rate": 3.178642500487859e-06, "loss": 1.2492, "step": 6076 }, { "epoch": 0.8603383591703829, "grad_norm": 8.22695663971543, "learning_rate": 3.1780908063236653e-06, "loss": 1.198, "step": 6077 }, { "epoch": 0.8604799320450202, "grad_norm": 10.67159725514762, "learning_rate": 3.1775390765127433e-06, "loss": 1.2491, "step": 6078 }, { "epoch": 0.8606215049196574, "grad_norm": 9.910763647872164, "learning_rate": 3.1769873110840977e-06, "loss": 1.2277, "step": 6079 }, { "epoch": 0.8607630777942946, "grad_norm": 9.335910532126322, "learning_rate": 3.176435510066734e-06, "loss": 1.1241, "step": 6080 }, { "epoch": 0.8609046506689318, "grad_norm": 10.707512216714663, "learning_rate": 3.175883673489659e-06, "loss": 1.1552, "step": 6081 }, { "epoch": 0.8610462235435691, "grad_norm": 7.7651241762704855, "learning_rate": 3.1753318013818848e-06, "loss": 1.2507, "step": 6082 }, { "epoch": 0.8611877964182063, "grad_norm": 8.810932425822045, "learning_rate": 3.1747798937724207e-06, "loss": 1.2015, "step": 6083 }, { "epoch": 0.8613293692928435, "grad_norm": 10.945875002275043, "learning_rate": 3.1742279506902798e-06, "loss": 1.2305, "step": 6084 }, { "epoch": 0.8614709421674807, "grad_norm": 10.208743547774072, "learning_rate": 3.173675972164479e-06, "loss": 1.1556, "step": 6085 }, { "epoch": 0.8616125150421179, "grad_norm": 8.60718454510283, "learning_rate": 3.1731239582240343e-06, "loss": 1.2858, "step": 6086 }, { "epoch": 0.8617540879167551, "grad_norm": 8.708075219911949, "learning_rate": 3.1725719088979655e-06, "loss": 1.2691, "step": 6087 }, { "epoch": 0.8618956607913923, "grad_norm": 7.826167975573024, "learning_rate": 3.172019824215293e-06, "loss": 1.3113, "step": 6088 }, { "epoch": 0.8620372336660296, "grad_norm": 10.18467326820444, "learning_rate": 3.171467704205039e-06, "loss": 1.3, "step": 6089 }, { "epoch": 0.8621788065406668, "grad_norm": 9.601817960266162, "learning_rate": 3.1709155488962283e-06, "loss": 1.2323, "step": 6090 }, { "epoch": 0.862320379415304, "grad_norm": 6.853295484406172, "learning_rate": 3.1703633583178885e-06, "loss": 1.0844, "step": 6091 }, { "epoch": 0.8624619522899413, "grad_norm": 7.907625945611181, "learning_rate": 3.1698111324990454e-06, "loss": 1.1177, "step": 6092 }, { "epoch": 0.8626035251645785, "grad_norm": 7.203794369657736, "learning_rate": 3.169258871468731e-06, "loss": 1.1412, "step": 6093 }, { "epoch": 0.8627450980392157, "grad_norm": 9.315047562089223, "learning_rate": 3.1687065752559777e-06, "loss": 1.3111, "step": 6094 }, { "epoch": 0.862886670913853, "grad_norm": 10.154362429762909, "learning_rate": 3.168154243889817e-06, "loss": 1.162, "step": 6095 }, { "epoch": 0.8630282437884901, "grad_norm": 9.841352479610192, "learning_rate": 3.1676018773992866e-06, "loss": 1.2833, "step": 6096 }, { "epoch": 0.8631698166631273, "grad_norm": 9.929264134611527, "learning_rate": 3.1670494758134234e-06, "loss": 1.2461, "step": 6097 }, { "epoch": 0.8633113895377645, "grad_norm": 10.576350955028193, "learning_rate": 3.1664970391612666e-06, "loss": 1.2649, "step": 6098 }, { "epoch": 0.8634529624124018, "grad_norm": 9.369578510522432, "learning_rate": 3.1659445674718563e-06, "loss": 1.1844, "step": 6099 }, { "epoch": 0.863594535287039, "grad_norm": 9.874847024626034, "learning_rate": 3.165392060774238e-06, "loss": 1.2554, "step": 6100 }, { "epoch": 0.8637361081616762, "grad_norm": 9.161155662430863, "learning_rate": 3.1648395190974546e-06, "loss": 1.2064, "step": 6101 }, { "epoch": 0.8638776810363135, "grad_norm": 10.498626619166522, "learning_rate": 3.1642869424705537e-06, "loss": 1.3747, "step": 6102 }, { "epoch": 0.8640192539109507, "grad_norm": 8.37863763409754, "learning_rate": 3.1637343309225833e-06, "loss": 1.1301, "step": 6103 }, { "epoch": 0.8641608267855879, "grad_norm": 8.209253198295839, "learning_rate": 3.163181684482594e-06, "loss": 1.2942, "step": 6104 }, { "epoch": 0.8643023996602252, "grad_norm": 8.717902467067004, "learning_rate": 3.162629003179638e-06, "loss": 1.0343, "step": 6105 }, { "epoch": 0.8644439725348623, "grad_norm": 9.063624660817961, "learning_rate": 3.1620762870427703e-06, "loss": 1.2129, "step": 6106 }, { "epoch": 0.8645855454094995, "grad_norm": 8.170367070780177, "learning_rate": 3.1615235361010442e-06, "loss": 1.2354, "step": 6107 }, { "epoch": 0.8647271182841367, "grad_norm": 8.73143346180062, "learning_rate": 3.1609707503835203e-06, "loss": 1.3822, "step": 6108 }, { "epoch": 0.864868691158774, "grad_norm": 8.416385708894223, "learning_rate": 3.1604179299192565e-06, "loss": 1.2537, "step": 6109 }, { "epoch": 0.8650102640334112, "grad_norm": 8.597989898682806, "learning_rate": 3.1598650747373144e-06, "loss": 1.1489, "step": 6110 }, { "epoch": 0.8651518369080484, "grad_norm": 9.483470791947363, "learning_rate": 3.1593121848667575e-06, "loss": 1.2462, "step": 6111 }, { "epoch": 0.8652934097826857, "grad_norm": 9.257287531265487, "learning_rate": 3.158759260336651e-06, "loss": 1.1838, "step": 6112 }, { "epoch": 0.8654349826573229, "grad_norm": 9.273222357577026, "learning_rate": 3.1582063011760604e-06, "loss": 1.3338, "step": 6113 }, { "epoch": 0.8655765555319601, "grad_norm": 9.044224484221745, "learning_rate": 3.1576533074140564e-06, "loss": 1.1965, "step": 6114 }, { "epoch": 0.8657181284065973, "grad_norm": 10.618349035639946, "learning_rate": 3.157100279079708e-06, "loss": 1.2321, "step": 6115 }, { "epoch": 0.8658597012812345, "grad_norm": 8.14886163355665, "learning_rate": 3.1565472162020876e-06, "loss": 1.1802, "step": 6116 }, { "epoch": 0.8660012741558717, "grad_norm": 9.188975105460322, "learning_rate": 3.15599411881027e-06, "loss": 1.1916, "step": 6117 }, { "epoch": 0.8661428470305089, "grad_norm": 9.180630769355785, "learning_rate": 3.15544098693333e-06, "loss": 1.1746, "step": 6118 }, { "epoch": 0.8662844199051462, "grad_norm": 7.898463821744593, "learning_rate": 3.1548878206003477e-06, "loss": 1.2612, "step": 6119 }, { "epoch": 0.8664259927797834, "grad_norm": 7.492097633308143, "learning_rate": 3.1543346198403998e-06, "loss": 1.2228, "step": 6120 }, { "epoch": 0.8665675656544206, "grad_norm": 8.837282221807284, "learning_rate": 3.1537813846825684e-06, "loss": 1.0724, "step": 6121 }, { "epoch": 0.8667091385290578, "grad_norm": 12.743080206121613, "learning_rate": 3.1532281151559372e-06, "loss": 1.3951, "step": 6122 }, { "epoch": 0.8668507114036951, "grad_norm": 8.466677253696501, "learning_rate": 3.152674811289591e-06, "loss": 1.2453, "step": 6123 }, { "epoch": 0.8669922842783323, "grad_norm": 7.811274562094449, "learning_rate": 3.152121473112618e-06, "loss": 1.2697, "step": 6124 }, { "epoch": 0.8671338571529695, "grad_norm": 7.440308881931242, "learning_rate": 3.151568100654104e-06, "loss": 1.2005, "step": 6125 }, { "epoch": 0.8672754300276068, "grad_norm": 12.430934687586097, "learning_rate": 3.1510146939431414e-06, "loss": 1.2692, "step": 6126 }, { "epoch": 0.8674170029022439, "grad_norm": 8.392846976508851, "learning_rate": 3.150461253008822e-06, "loss": 1.2748, "step": 6127 }, { "epoch": 0.8675585757768811, "grad_norm": 9.299057648199158, "learning_rate": 3.149907777880239e-06, "loss": 1.3429, "step": 6128 }, { "epoch": 0.8677001486515183, "grad_norm": 9.282779657459576, "learning_rate": 3.1493542685864886e-06, "loss": 1.3, "step": 6129 }, { "epoch": 0.8678417215261556, "grad_norm": 16.316471244932796, "learning_rate": 3.1488007251566687e-06, "loss": 1.1889, "step": 6130 }, { "epoch": 0.8679832944007928, "grad_norm": 8.259359425974958, "learning_rate": 3.1482471476198784e-06, "loss": 1.2825, "step": 6131 }, { "epoch": 0.86812486727543, "grad_norm": 7.937624232003322, "learning_rate": 3.1476935360052184e-06, "loss": 1.3326, "step": 6132 }, { "epoch": 0.8682664401500673, "grad_norm": 11.294201630859776, "learning_rate": 3.1471398903417926e-06, "loss": 1.1909, "step": 6133 }, { "epoch": 0.8684080130247045, "grad_norm": 6.915198143404774, "learning_rate": 3.146586210658706e-06, "loss": 1.1509, "step": 6134 }, { "epoch": 0.8685495858993417, "grad_norm": 8.833809269973045, "learning_rate": 3.1460324969850643e-06, "loss": 1.2966, "step": 6135 }, { "epoch": 0.868691158773979, "grad_norm": 8.908806256646118, "learning_rate": 3.1454787493499746e-06, "loss": 1.2386, "step": 6136 }, { "epoch": 0.8688327316486161, "grad_norm": 9.625118898920645, "learning_rate": 3.144924967782549e-06, "loss": 1.3764, "step": 6137 }, { "epoch": 0.8689743045232533, "grad_norm": 8.837201069439306, "learning_rate": 3.144371152311899e-06, "loss": 1.0703, "step": 6138 }, { "epoch": 0.8691158773978905, "grad_norm": 8.921064296573237, "learning_rate": 3.143817302967138e-06, "loss": 1.2298, "step": 6139 }, { "epoch": 0.8692574502725278, "grad_norm": 8.292923670685298, "learning_rate": 3.1432634197773814e-06, "loss": 1.2824, "step": 6140 }, { "epoch": 0.869399023147165, "grad_norm": 9.6700617210525, "learning_rate": 3.142709502771747e-06, "loss": 1.397, "step": 6141 }, { "epoch": 0.8695405960218022, "grad_norm": 9.01959976560161, "learning_rate": 3.142155551979352e-06, "loss": 1.4066, "step": 6142 }, { "epoch": 0.8696821688964395, "grad_norm": 9.036659600529381, "learning_rate": 3.1416015674293195e-06, "loss": 1.1132, "step": 6143 }, { "epoch": 0.8698237417710767, "grad_norm": 8.657952206704206, "learning_rate": 3.14104754915077e-06, "loss": 1.2351, "step": 6144 }, { "epoch": 0.8699653146457139, "grad_norm": 10.163269809596528, "learning_rate": 3.1404934971728297e-06, "loss": 1.1825, "step": 6145 }, { "epoch": 0.8701068875203511, "grad_norm": 9.084094607681527, "learning_rate": 3.1399394115246235e-06, "loss": 1.3014, "step": 6146 }, { "epoch": 0.8702484603949883, "grad_norm": 9.720747273294863, "learning_rate": 3.1393852922352795e-06, "loss": 1.2511, "step": 6147 }, { "epoch": 0.8703900332696255, "grad_norm": 7.691501475413566, "learning_rate": 3.138831139333928e-06, "loss": 1.1642, "step": 6148 }, { "epoch": 0.8705316061442627, "grad_norm": 8.610946041152088, "learning_rate": 3.1382769528496993e-06, "loss": 1.2282, "step": 6149 }, { "epoch": 0.8706731790189, "grad_norm": 8.25823240196019, "learning_rate": 3.1377227328117264e-06, "loss": 1.2698, "step": 6150 }, { "epoch": 0.8708147518935372, "grad_norm": 8.726627633066018, "learning_rate": 3.137168479249146e-06, "loss": 1.2825, "step": 6151 }, { "epoch": 0.8709563247681744, "grad_norm": 9.625126032809177, "learning_rate": 3.1366141921910936e-06, "loss": 1.2148, "step": 6152 }, { "epoch": 0.8710978976428116, "grad_norm": 8.815340490662207, "learning_rate": 3.136059871666708e-06, "loss": 1.2335, "step": 6153 }, { "epoch": 0.8712394705174489, "grad_norm": 9.122727620658344, "learning_rate": 3.1355055177051286e-06, "loss": 1.2651, "step": 6154 }, { "epoch": 0.8713810433920861, "grad_norm": 9.919587119339397, "learning_rate": 3.1349511303354983e-06, "loss": 1.2769, "step": 6155 }, { "epoch": 0.8715226162667233, "grad_norm": 6.947436940008911, "learning_rate": 3.134396709586961e-06, "loss": 1.1077, "step": 6156 }, { "epoch": 0.8716641891413606, "grad_norm": 8.691300912283015, "learning_rate": 3.133842255488661e-06, "loss": 1.136, "step": 6157 }, { "epoch": 0.8718057620159977, "grad_norm": 10.504832654270999, "learning_rate": 3.133287768069746e-06, "loss": 1.2395, "step": 6158 }, { "epoch": 0.8719473348906349, "grad_norm": 10.272733459716308, "learning_rate": 3.1327332473593657e-06, "loss": 1.2766, "step": 6159 }, { "epoch": 0.8720889077652721, "grad_norm": 8.582276609487591, "learning_rate": 3.1321786933866705e-06, "loss": 1.3397, "step": 6160 }, { "epoch": 0.8722304806399094, "grad_norm": 11.051068795415043, "learning_rate": 3.131624106180813e-06, "loss": 1.2918, "step": 6161 }, { "epoch": 0.8723720535145466, "grad_norm": 9.264466391779619, "learning_rate": 3.1310694857709467e-06, "loss": 1.216, "step": 6162 }, { "epoch": 0.8725136263891838, "grad_norm": 8.068350155450434, "learning_rate": 3.130514832186228e-06, "loss": 1.1015, "step": 6163 }, { "epoch": 0.8726551992638211, "grad_norm": 9.545344503189517, "learning_rate": 3.129960145455815e-06, "loss": 1.2871, "step": 6164 }, { "epoch": 0.8727967721384583, "grad_norm": 9.749367717879837, "learning_rate": 3.129405425608867e-06, "loss": 1.1633, "step": 6165 }, { "epoch": 0.8729383450130955, "grad_norm": 9.814826057902147, "learning_rate": 3.128850672674545e-06, "loss": 1.3736, "step": 6166 }, { "epoch": 0.8730799178877328, "grad_norm": 11.054649886751381, "learning_rate": 3.1282958866820113e-06, "loss": 1.3458, "step": 6167 }, { "epoch": 0.8732214907623699, "grad_norm": 10.868149978720432, "learning_rate": 3.127741067660432e-06, "loss": 1.3641, "step": 6168 }, { "epoch": 0.8733630636370071, "grad_norm": 10.926928484861666, "learning_rate": 3.127186215638973e-06, "loss": 1.1623, "step": 6169 }, { "epoch": 0.8735046365116443, "grad_norm": 13.343348048139532, "learning_rate": 3.1266313306468018e-06, "loss": 1.2626, "step": 6170 }, { "epoch": 0.8736462093862816, "grad_norm": 8.716519196857178, "learning_rate": 3.1260764127130887e-06, "loss": 1.3274, "step": 6171 }, { "epoch": 0.8737877822609188, "grad_norm": 8.68069476884897, "learning_rate": 3.125521461867006e-06, "loss": 1.2352, "step": 6172 }, { "epoch": 0.873929355135556, "grad_norm": 8.593420181881209, "learning_rate": 3.1249664781377257e-06, "loss": 1.1867, "step": 6173 }, { "epoch": 0.8740709280101933, "grad_norm": 9.156637867078102, "learning_rate": 3.1244114615544242e-06, "loss": 1.2022, "step": 6174 }, { "epoch": 0.8742125008848305, "grad_norm": 8.704658962100948, "learning_rate": 3.1238564121462776e-06, "loss": 1.2517, "step": 6175 }, { "epoch": 0.8743540737594677, "grad_norm": 9.489813312265957, "learning_rate": 3.1233013299424646e-06, "loss": 1.3737, "step": 6176 }, { "epoch": 0.874495646634105, "grad_norm": 7.858112453728899, "learning_rate": 3.122746214972166e-06, "loss": 1.2309, "step": 6177 }, { "epoch": 0.8746372195087421, "grad_norm": 11.779224170984797, "learning_rate": 3.122191067264563e-06, "loss": 1.3173, "step": 6178 }, { "epoch": 0.8747787923833793, "grad_norm": 9.597799335014006, "learning_rate": 3.121635886848839e-06, "loss": 1.2484, "step": 6179 }, { "epoch": 0.8749203652580165, "grad_norm": 8.234741434416701, "learning_rate": 3.12108067375418e-06, "loss": 1.1983, "step": 6180 }, { "epoch": 0.8750619381326538, "grad_norm": 12.614440732360448, "learning_rate": 3.120525428009773e-06, "loss": 1.3316, "step": 6181 }, { "epoch": 0.875203511007291, "grad_norm": 9.748421565692619, "learning_rate": 3.1199701496448074e-06, "loss": 1.2508, "step": 6182 }, { "epoch": 0.8753450838819282, "grad_norm": 8.664674040776731, "learning_rate": 3.119414838688473e-06, "loss": 1.0831, "step": 6183 }, { "epoch": 0.8754866567565655, "grad_norm": 9.923755665758396, "learning_rate": 3.1188594951699623e-06, "loss": 1.3047, "step": 6184 }, { "epoch": 0.8756282296312027, "grad_norm": 10.05510065256936, "learning_rate": 3.1183041191184695e-06, "loss": 1.2947, "step": 6185 }, { "epoch": 0.8757698025058399, "grad_norm": 9.69909210675476, "learning_rate": 3.11774871056319e-06, "loss": 1.0044, "step": 6186 }, { "epoch": 0.8759113753804771, "grad_norm": 11.551607517158326, "learning_rate": 3.1171932695333216e-06, "loss": 1.2423, "step": 6187 }, { "epoch": 0.8760529482551144, "grad_norm": 10.346217664958685, "learning_rate": 3.1166377960580635e-06, "loss": 1.2481, "step": 6188 }, { "epoch": 0.8761945211297515, "grad_norm": 7.19876330669039, "learning_rate": 3.116082290166616e-06, "loss": 1.1278, "step": 6189 }, { "epoch": 0.8763360940043887, "grad_norm": 10.908496002390967, "learning_rate": 3.1155267518881816e-06, "loss": 1.0993, "step": 6190 }, { "epoch": 0.876477666879026, "grad_norm": 10.89707512766112, "learning_rate": 3.114971181251965e-06, "loss": 1.3674, "step": 6191 }, { "epoch": 0.8766192397536632, "grad_norm": 7.913766778239129, "learning_rate": 3.1144155782871723e-06, "loss": 1.3281, "step": 6192 }, { "epoch": 0.8767608126283004, "grad_norm": 9.805160207105834, "learning_rate": 3.113859943023011e-06, "loss": 1.1855, "step": 6193 }, { "epoch": 0.8769023855029376, "grad_norm": 11.930608387836148, "learning_rate": 3.1133042754886896e-06, "loss": 1.3658, "step": 6194 }, { "epoch": 0.8770439583775749, "grad_norm": 10.675895235281537, "learning_rate": 3.1127485757134194e-06, "loss": 1.2449, "step": 6195 }, { "epoch": 0.8771855312522121, "grad_norm": 8.063233807903499, "learning_rate": 3.1121928437264138e-06, "loss": 1.2144, "step": 6196 }, { "epoch": 0.8773271041268493, "grad_norm": 8.35360581440967, "learning_rate": 3.111637079556887e-06, "loss": 1.1791, "step": 6197 }, { "epoch": 0.8774686770014866, "grad_norm": 9.852241656060515, "learning_rate": 3.1110812832340552e-06, "loss": 1.1812, "step": 6198 }, { "epoch": 0.8776102498761237, "grad_norm": 8.886815332579557, "learning_rate": 3.1105254547871354e-06, "loss": 1.2226, "step": 6199 }, { "epoch": 0.8777518227507609, "grad_norm": 14.150947975781737, "learning_rate": 3.1099695942453485e-06, "loss": 1.2749, "step": 6200 }, { "epoch": 0.8778933956253981, "grad_norm": 12.544345168366702, "learning_rate": 3.109413701637914e-06, "loss": 1.166, "step": 6201 }, { "epoch": 0.8780349685000354, "grad_norm": 11.015474705313359, "learning_rate": 3.108857776994056e-06, "loss": 1.1435, "step": 6202 }, { "epoch": 0.8781765413746726, "grad_norm": 8.460712067056804, "learning_rate": 3.108301820342998e-06, "loss": 1.2881, "step": 6203 }, { "epoch": 0.8783181142493098, "grad_norm": 10.58203521536932, "learning_rate": 3.107745831713968e-06, "loss": 1.271, "step": 6204 }, { "epoch": 0.8784596871239471, "grad_norm": 10.399197958317243, "learning_rate": 3.107189811136192e-06, "loss": 1.2306, "step": 6205 }, { "epoch": 0.8786012599985843, "grad_norm": 11.508938176868636, "learning_rate": 3.1066337586389007e-06, "loss": 1.2203, "step": 6206 }, { "epoch": 0.8787428328732215, "grad_norm": 13.19477650720903, "learning_rate": 3.1060776742513247e-06, "loss": 1.289, "step": 6207 }, { "epoch": 0.8788844057478588, "grad_norm": 8.389526067904132, "learning_rate": 3.1055215580026976e-06, "loss": 1.2408, "step": 6208 }, { "epoch": 0.879025978622496, "grad_norm": 8.567153000540376, "learning_rate": 3.1049654099222542e-06, "loss": 1.085, "step": 6209 }, { "epoch": 0.8791675514971331, "grad_norm": 13.222869736090468, "learning_rate": 3.104409230039229e-06, "loss": 1.1543, "step": 6210 }, { "epoch": 0.8793091243717703, "grad_norm": 10.778192501238323, "learning_rate": 3.103853018382862e-06, "loss": 1.4546, "step": 6211 }, { "epoch": 0.8794506972464076, "grad_norm": 10.71419399403868, "learning_rate": 3.1032967749823917e-06, "loss": 1.2073, "step": 6212 }, { "epoch": 0.8795922701210448, "grad_norm": 11.757383583331197, "learning_rate": 3.10274049986706e-06, "loss": 1.4574, "step": 6213 }, { "epoch": 0.879733842995682, "grad_norm": 9.64233462416621, "learning_rate": 3.1021841930661108e-06, "loss": 1.3004, "step": 6214 }, { "epoch": 0.8798754158703193, "grad_norm": 10.823913547889575, "learning_rate": 3.1016278546087864e-06, "loss": 1.1158, "step": 6215 }, { "epoch": 0.8800169887449565, "grad_norm": 9.058262327513097, "learning_rate": 3.101071484524334e-06, "loss": 1.2567, "step": 6216 }, { "epoch": 0.8801585616195937, "grad_norm": 10.913446975641413, "learning_rate": 3.100515082842002e-06, "loss": 1.3773, "step": 6217 }, { "epoch": 0.880300134494231, "grad_norm": 9.231709723366734, "learning_rate": 3.09995864959104e-06, "loss": 1.2831, "step": 6218 }, { "epoch": 0.8804417073688682, "grad_norm": 11.593767109572955, "learning_rate": 3.0994021848006996e-06, "loss": 1.2663, "step": 6219 }, { "epoch": 0.8805832802435053, "grad_norm": 8.250789257790121, "learning_rate": 3.0988456885002327e-06, "loss": 1.2291, "step": 6220 }, { "epoch": 0.8807248531181425, "grad_norm": 9.724780208192689, "learning_rate": 3.0982891607188948e-06, "loss": 1.4442, "step": 6221 }, { "epoch": 0.8808664259927798, "grad_norm": 10.207108238563457, "learning_rate": 3.0977326014859415e-06, "loss": 1.2751, "step": 6222 }, { "epoch": 0.881007998867417, "grad_norm": 10.516041808348872, "learning_rate": 3.0971760108306316e-06, "loss": 1.1888, "step": 6223 }, { "epoch": 0.8811495717420542, "grad_norm": 9.070451707453499, "learning_rate": 3.0966193887822232e-06, "loss": 1.1995, "step": 6224 }, { "epoch": 0.8812911446166914, "grad_norm": 10.467348830555297, "learning_rate": 3.096062735369979e-06, "loss": 1.0655, "step": 6225 }, { "epoch": 0.8814327174913287, "grad_norm": 8.867144049315348, "learning_rate": 3.095506050623161e-06, "loss": 1.3422, "step": 6226 }, { "epoch": 0.8815742903659659, "grad_norm": 7.997808633127509, "learning_rate": 3.0949493345710343e-06, "loss": 1.1864, "step": 6227 }, { "epoch": 0.8817158632406031, "grad_norm": 10.800985743430326, "learning_rate": 3.094392587242864e-06, "loss": 1.1408, "step": 6228 }, { "epoch": 0.8818574361152404, "grad_norm": 12.255609590333076, "learning_rate": 3.093835808667919e-06, "loss": 1.1045, "step": 6229 }, { "epoch": 0.8819990089898775, "grad_norm": 8.201521420643932, "learning_rate": 3.0932789988754695e-06, "loss": 1.3115, "step": 6230 }, { "epoch": 0.8821405818645147, "grad_norm": 12.992990217798392, "learning_rate": 3.0927221578947843e-06, "loss": 1.4626, "step": 6231 }, { "epoch": 0.882282154739152, "grad_norm": 8.699447210880836, "learning_rate": 3.092165285755137e-06, "loss": 1.4467, "step": 6232 }, { "epoch": 0.8824237276137892, "grad_norm": 7.931901774819256, "learning_rate": 3.0916083824858017e-06, "loss": 1.0405, "step": 6233 }, { "epoch": 0.8825653004884264, "grad_norm": 19.112844156942685, "learning_rate": 3.091051448116056e-06, "loss": 1.293, "step": 6234 }, { "epoch": 0.8827068733630636, "grad_norm": 13.97918789099699, "learning_rate": 3.090494482675176e-06, "loss": 1.3832, "step": 6235 }, { "epoch": 0.8828484462377009, "grad_norm": 10.544778075688807, "learning_rate": 3.0899374861924413e-06, "loss": 1.2645, "step": 6236 }, { "epoch": 0.8829900191123381, "grad_norm": 8.357674969511322, "learning_rate": 3.0893804586971327e-06, "loss": 1.2761, "step": 6237 }, { "epoch": 0.8831315919869753, "grad_norm": 9.183390626076694, "learning_rate": 3.088823400218533e-06, "loss": 1.2273, "step": 6238 }, { "epoch": 0.8832731648616126, "grad_norm": 13.27352601684729, "learning_rate": 3.0882663107859256e-06, "loss": 1.2152, "step": 6239 }, { "epoch": 0.8834147377362498, "grad_norm": 11.009608840299743, "learning_rate": 3.0877091904285976e-06, "loss": 1.2795, "step": 6240 }, { "epoch": 0.8835563106108869, "grad_norm": 12.436869389635149, "learning_rate": 3.087152039175835e-06, "loss": 1.1974, "step": 6241 }, { "epoch": 0.8836978834855241, "grad_norm": 8.58545320375501, "learning_rate": 3.0865948570569283e-06, "loss": 1.2387, "step": 6242 }, { "epoch": 0.8838394563601614, "grad_norm": 9.959082624478592, "learning_rate": 3.086037644101167e-06, "loss": 1.3251, "step": 6243 }, { "epoch": 0.8839810292347986, "grad_norm": 8.662489202143117, "learning_rate": 3.0854804003378437e-06, "loss": 1.3044, "step": 6244 }, { "epoch": 0.8841226021094358, "grad_norm": 7.929577575823687, "learning_rate": 3.084923125796252e-06, "loss": 1.2051, "step": 6245 }, { "epoch": 0.8842641749840731, "grad_norm": 11.780150992133375, "learning_rate": 3.0843658205056886e-06, "loss": 1.2823, "step": 6246 }, { "epoch": 0.8844057478587103, "grad_norm": 9.443724341771068, "learning_rate": 3.0838084844954485e-06, "loss": 1.3908, "step": 6247 }, { "epoch": 0.8845473207333475, "grad_norm": 10.81132051065373, "learning_rate": 3.0832511177948326e-06, "loss": 1.2362, "step": 6248 }, { "epoch": 0.8846888936079847, "grad_norm": 8.869660826588069, "learning_rate": 3.0826937204331403e-06, "loss": 1.1382, "step": 6249 }, { "epoch": 0.884830466482622, "grad_norm": 8.489392562002145, "learning_rate": 3.0821362924396732e-06, "loss": 1.3003, "step": 6250 }, { "epoch": 0.8849720393572591, "grad_norm": 9.210850528374385, "learning_rate": 3.081578833843736e-06, "loss": 1.2464, "step": 6251 }, { "epoch": 0.8851136122318963, "grad_norm": 7.777346938174259, "learning_rate": 3.0810213446746323e-06, "loss": 1.252, "step": 6252 }, { "epoch": 0.8852551851065336, "grad_norm": 7.951372174537412, "learning_rate": 3.0804638249616704e-06, "loss": 1.2446, "step": 6253 }, { "epoch": 0.8853967579811708, "grad_norm": 7.513421066615898, "learning_rate": 3.0799062747341574e-06, "loss": 1.1525, "step": 6254 }, { "epoch": 0.885538330855808, "grad_norm": 10.735704009340601, "learning_rate": 3.0793486940214034e-06, "loss": 1.4418, "step": 6255 }, { "epoch": 0.8856799037304453, "grad_norm": 9.873968529249096, "learning_rate": 3.0787910828527217e-06, "loss": 1.0402, "step": 6256 }, { "epoch": 0.8858214766050825, "grad_norm": 8.914748463828905, "learning_rate": 3.0782334412574244e-06, "loss": 1.1807, "step": 6257 }, { "epoch": 0.8859630494797197, "grad_norm": 8.572905295184084, "learning_rate": 3.0776757692648256e-06, "loss": 1.1416, "step": 6258 }, { "epoch": 0.8861046223543569, "grad_norm": 7.582828658641088, "learning_rate": 3.0771180669042422e-06, "loss": 1.1477, "step": 6259 }, { "epoch": 0.8862461952289942, "grad_norm": 9.78028743017284, "learning_rate": 3.076560334204993e-06, "loss": 1.4051, "step": 6260 }, { "epoch": 0.8863877681036313, "grad_norm": 8.25312237463277, "learning_rate": 3.0760025711963964e-06, "loss": 1.1215, "step": 6261 }, { "epoch": 0.8865293409782685, "grad_norm": 8.5216873847724, "learning_rate": 3.0754447779077745e-06, "loss": 1.1845, "step": 6262 }, { "epoch": 0.8866709138529058, "grad_norm": 9.089197861265342, "learning_rate": 3.0748869543684495e-06, "loss": 1.2284, "step": 6263 }, { "epoch": 0.886812486727543, "grad_norm": 8.252472940358402, "learning_rate": 3.0743291006077458e-06, "loss": 1.3061, "step": 6264 }, { "epoch": 0.8869540596021802, "grad_norm": 8.324055606097213, "learning_rate": 3.0737712166549897e-06, "loss": 1.2924, "step": 6265 }, { "epoch": 0.8870956324768174, "grad_norm": 9.882026586833828, "learning_rate": 3.073213302539508e-06, "loss": 1.3726, "step": 6266 }, { "epoch": 0.8872372053514547, "grad_norm": 9.356928790689976, "learning_rate": 3.072655358290632e-06, "loss": 1.2517, "step": 6267 }, { "epoch": 0.8873787782260919, "grad_norm": 8.146684554975801, "learning_rate": 3.07209738393769e-06, "loss": 1.122, "step": 6268 }, { "epoch": 0.8875203511007291, "grad_norm": 8.935958622685519, "learning_rate": 3.0715393795100146e-06, "loss": 1.1829, "step": 6269 }, { "epoch": 0.8876619239753664, "grad_norm": 7.780509699033722, "learning_rate": 3.07098134503694e-06, "loss": 1.1908, "step": 6270 }, { "epoch": 0.8878034968500036, "grad_norm": 6.587059929931397, "learning_rate": 3.0704232805478025e-06, "loss": 1.2292, "step": 6271 }, { "epoch": 0.8879450697246407, "grad_norm": 8.489507145139982, "learning_rate": 3.0698651860719387e-06, "loss": 1.1308, "step": 6272 }, { "epoch": 0.8880866425992779, "grad_norm": 8.469093681765916, "learning_rate": 3.0693070616386862e-06, "loss": 1.3385, "step": 6273 }, { "epoch": 0.8882282154739152, "grad_norm": 7.816546559444701, "learning_rate": 3.0687489072773864e-06, "loss": 1.3707, "step": 6274 }, { "epoch": 0.8883697883485524, "grad_norm": 9.332008767370365, "learning_rate": 3.0681907230173803e-06, "loss": 1.0924, "step": 6275 }, { "epoch": 0.8885113612231896, "grad_norm": 9.345183297651214, "learning_rate": 3.0676325088880122e-06, "loss": 1.2227, "step": 6276 }, { "epoch": 0.8886529340978269, "grad_norm": 7.323114500091154, "learning_rate": 3.067074264918626e-06, "loss": 1.1269, "step": 6277 }, { "epoch": 0.8887945069724641, "grad_norm": 9.873520752413489, "learning_rate": 3.0665159911385677e-06, "loss": 1.1985, "step": 6278 }, { "epoch": 0.8889360798471013, "grad_norm": 8.438264035141222, "learning_rate": 3.0659576875771868e-06, "loss": 1.246, "step": 6279 }, { "epoch": 0.8890776527217386, "grad_norm": 8.227804671621433, "learning_rate": 3.065399354263833e-06, "loss": 1.3775, "step": 6280 }, { "epoch": 0.8892192255963758, "grad_norm": 10.380270848498299, "learning_rate": 3.0648409912278553e-06, "loss": 1.2751, "step": 6281 }, { "epoch": 0.8893607984710129, "grad_norm": 8.464969931569918, "learning_rate": 3.064282598498609e-06, "loss": 1.1406, "step": 6282 }, { "epoch": 0.8895023713456501, "grad_norm": 7.667740649864468, "learning_rate": 3.063724176105447e-06, "loss": 1.1024, "step": 6283 }, { "epoch": 0.8896439442202874, "grad_norm": 8.55685884971953, "learning_rate": 3.0631657240777254e-06, "loss": 1.1826, "step": 6284 }, { "epoch": 0.8897855170949246, "grad_norm": 8.897831307545623, "learning_rate": 3.062607242444801e-06, "loss": 1.2255, "step": 6285 }, { "epoch": 0.8899270899695618, "grad_norm": 7.788706443247301, "learning_rate": 3.0620487312360337e-06, "loss": 1.0592, "step": 6286 }, { "epoch": 0.890068662844199, "grad_norm": 8.420668251902205, "learning_rate": 3.0614901904807836e-06, "loss": 1.2572, "step": 6287 }, { "epoch": 0.8902102357188363, "grad_norm": 8.319471605101247, "learning_rate": 3.060931620208414e-06, "loss": 1.1948, "step": 6288 }, { "epoch": 0.8903518085934735, "grad_norm": 9.061063665809847, "learning_rate": 3.060373020448286e-06, "loss": 1.1543, "step": 6289 }, { "epoch": 0.8904933814681107, "grad_norm": 8.059221887631711, "learning_rate": 3.0598143912297667e-06, "loss": 1.1737, "step": 6290 }, { "epoch": 0.890634954342748, "grad_norm": 8.693668069460275, "learning_rate": 3.0592557325822225e-06, "loss": 1.0537, "step": 6291 }, { "epoch": 0.8907765272173851, "grad_norm": 8.589498682103821, "learning_rate": 3.0586970445350206e-06, "loss": 1.3416, "step": 6292 }, { "epoch": 0.8909181000920223, "grad_norm": 10.75862272559365, "learning_rate": 3.0581383271175324e-06, "loss": 1.2821, "step": 6293 }, { "epoch": 0.8910596729666596, "grad_norm": 11.863845887122709, "learning_rate": 3.0575795803591278e-06, "loss": 1.1512, "step": 6294 }, { "epoch": 0.8912012458412968, "grad_norm": 7.085923233774917, "learning_rate": 3.0570208042891815e-06, "loss": 1.1885, "step": 6295 }, { "epoch": 0.891342818715934, "grad_norm": 8.363418542339936, "learning_rate": 3.0564619989370656e-06, "loss": 1.0867, "step": 6296 }, { "epoch": 0.8914843915905712, "grad_norm": 9.525281494466613, "learning_rate": 3.055903164332158e-06, "loss": 1.4128, "step": 6297 }, { "epoch": 0.8916259644652085, "grad_norm": 9.724744511952688, "learning_rate": 3.055344300503836e-06, "loss": 1.2141, "step": 6298 }, { "epoch": 0.8917675373398457, "grad_norm": 8.45707906308655, "learning_rate": 3.0547854074814777e-06, "loss": 1.161, "step": 6299 }, { "epoch": 0.8919091102144829, "grad_norm": 9.322097236523927, "learning_rate": 3.0542264852944635e-06, "loss": 1.2592, "step": 6300 }, { "epoch": 0.8920506830891202, "grad_norm": 7.94706354191339, "learning_rate": 3.0536675339721774e-06, "loss": 1.2628, "step": 6301 }, { "epoch": 0.8921922559637574, "grad_norm": 8.212132331595773, "learning_rate": 3.053108553544001e-06, "loss": 1.216, "step": 6302 }, { "epoch": 0.8923338288383945, "grad_norm": 8.626764531629439, "learning_rate": 3.052549544039321e-06, "loss": 1.3845, "step": 6303 }, { "epoch": 0.8924754017130317, "grad_norm": 8.610408658598406, "learning_rate": 3.0519905054875237e-06, "loss": 1.2056, "step": 6304 }, { "epoch": 0.892616974587669, "grad_norm": 8.907822640759006, "learning_rate": 3.0514314379179967e-06, "loss": 1.3217, "step": 6305 }, { "epoch": 0.8927585474623062, "grad_norm": 8.712597935918353, "learning_rate": 3.05087234136013e-06, "loss": 1.1248, "step": 6306 }, { "epoch": 0.8929001203369434, "grad_norm": 8.114042434652882, "learning_rate": 3.0503132158433145e-06, "loss": 1.2559, "step": 6307 }, { "epoch": 0.8930416932115807, "grad_norm": 8.643509174779199, "learning_rate": 3.049754061396944e-06, "loss": 1.316, "step": 6308 }, { "epoch": 0.8931832660862179, "grad_norm": 7.972266763820414, "learning_rate": 3.049194878050413e-06, "loss": 1.1483, "step": 6309 }, { "epoch": 0.8933248389608551, "grad_norm": 9.517291745191272, "learning_rate": 3.048635665833116e-06, "loss": 1.1012, "step": 6310 }, { "epoch": 0.8934664118354924, "grad_norm": 9.599382666766015, "learning_rate": 3.048076424774452e-06, "loss": 1.2113, "step": 6311 }, { "epoch": 0.8936079847101296, "grad_norm": 8.847115998268386, "learning_rate": 3.0475171549038187e-06, "loss": 1.2254, "step": 6312 }, { "epoch": 0.8937495575847667, "grad_norm": 7.4237364030936055, "learning_rate": 3.0469578562506165e-06, "loss": 1.2262, "step": 6313 }, { "epoch": 0.8938911304594039, "grad_norm": 8.02243424989855, "learning_rate": 3.046398528844248e-06, "loss": 1.1974, "step": 6314 }, { "epoch": 0.8940327033340412, "grad_norm": 7.168885382892981, "learning_rate": 3.0458391727141156e-06, "loss": 1.2026, "step": 6315 }, { "epoch": 0.8941742762086784, "grad_norm": 6.7336074143831075, "learning_rate": 3.045279787889625e-06, "loss": 1.1784, "step": 6316 }, { "epoch": 0.8943158490833156, "grad_norm": 8.246716857527161, "learning_rate": 3.044720374400183e-06, "loss": 1.3041, "step": 6317 }, { "epoch": 0.8944574219579529, "grad_norm": 10.182478527230957, "learning_rate": 3.044160932275197e-06, "loss": 1.455, "step": 6318 }, { "epoch": 0.8945989948325901, "grad_norm": 9.790415798938945, "learning_rate": 3.043601461544076e-06, "loss": 1.3586, "step": 6319 }, { "epoch": 0.8947405677072273, "grad_norm": 10.072049934366444, "learning_rate": 3.0430419622362327e-06, "loss": 1.1644, "step": 6320 }, { "epoch": 0.8948821405818645, "grad_norm": 10.442161387185852, "learning_rate": 3.0424824343810773e-06, "loss": 1.2317, "step": 6321 }, { "epoch": 0.8950237134565018, "grad_norm": 8.196208021258023, "learning_rate": 3.0419228780080246e-06, "loss": 1.1599, "step": 6322 }, { "epoch": 0.8951652863311389, "grad_norm": 8.816398894408762, "learning_rate": 3.041363293146491e-06, "loss": 1.298, "step": 6323 }, { "epoch": 0.8953068592057761, "grad_norm": 8.682125048000696, "learning_rate": 3.0408036798258924e-06, "loss": 1.1652, "step": 6324 }, { "epoch": 0.8954484320804134, "grad_norm": 7.546881318336512, "learning_rate": 3.040244038075648e-06, "loss": 1.1111, "step": 6325 }, { "epoch": 0.8955900049550506, "grad_norm": 8.191727107598927, "learning_rate": 3.0396843679251777e-06, "loss": 1.3247, "step": 6326 }, { "epoch": 0.8957315778296878, "grad_norm": 7.873745772982424, "learning_rate": 3.0391246694039016e-06, "loss": 1.0939, "step": 6327 }, { "epoch": 0.895873150704325, "grad_norm": 10.660554167637478, "learning_rate": 3.038564942541244e-06, "loss": 1.3446, "step": 6328 }, { "epoch": 0.8960147235789623, "grad_norm": 12.354959926686895, "learning_rate": 3.0380051873666287e-06, "loss": 1.1454, "step": 6329 }, { "epoch": 0.8961562964535995, "grad_norm": 9.744282537739398, "learning_rate": 3.037445403909482e-06, "loss": 1.1725, "step": 6330 }, { "epoch": 0.8962978693282367, "grad_norm": 9.57631428515616, "learning_rate": 3.0368855921992314e-06, "loss": 1.2289, "step": 6331 }, { "epoch": 0.896439442202874, "grad_norm": 8.637791091275792, "learning_rate": 3.036325752265305e-06, "loss": 1.2423, "step": 6332 }, { "epoch": 0.8965810150775112, "grad_norm": 9.678657286026382, "learning_rate": 3.035765884137134e-06, "loss": 1.2476, "step": 6333 }, { "epoch": 0.8967225879521483, "grad_norm": 10.290877174232365, "learning_rate": 3.0352059878441496e-06, "loss": 1.1264, "step": 6334 }, { "epoch": 0.8968641608267856, "grad_norm": 7.6848092681307865, "learning_rate": 3.0346460634157865e-06, "loss": 1.3084, "step": 6335 }, { "epoch": 0.8970057337014228, "grad_norm": 11.74172877305885, "learning_rate": 3.034086110881478e-06, "loss": 1.3236, "step": 6336 }, { "epoch": 0.89714730657606, "grad_norm": 8.817999239946845, "learning_rate": 3.0335261302706605e-06, "loss": 1.3416, "step": 6337 }, { "epoch": 0.8972888794506972, "grad_norm": 8.309859006858492, "learning_rate": 3.032966121612772e-06, "loss": 1.2956, "step": 6338 }, { "epoch": 0.8974304523253345, "grad_norm": 9.168981456370474, "learning_rate": 3.0324060849372526e-06, "loss": 1.1325, "step": 6339 }, { "epoch": 0.8975720251999717, "grad_norm": 8.691068725890075, "learning_rate": 3.0318460202735417e-06, "loss": 1.1302, "step": 6340 }, { "epoch": 0.8977135980746089, "grad_norm": 9.030806903463096, "learning_rate": 3.0312859276510833e-06, "loss": 1.183, "step": 6341 }, { "epoch": 0.8978551709492462, "grad_norm": 10.264567746046767, "learning_rate": 3.0307258070993186e-06, "loss": 1.1714, "step": 6342 }, { "epoch": 0.8979967438238834, "grad_norm": 7.715284797519527, "learning_rate": 3.0301656586476943e-06, "loss": 1.1966, "step": 6343 }, { "epoch": 0.8981383166985205, "grad_norm": 7.756251613177209, "learning_rate": 3.029605482325656e-06, "loss": 1.3079, "step": 6344 }, { "epoch": 0.8982798895731577, "grad_norm": 8.977807552091035, "learning_rate": 3.0290452781626526e-06, "loss": 1.2568, "step": 6345 }, { "epoch": 0.898421462447795, "grad_norm": 9.4983338350518, "learning_rate": 3.028485046188134e-06, "loss": 1.2757, "step": 6346 }, { "epoch": 0.8985630353224322, "grad_norm": 9.000370441866286, "learning_rate": 3.0279247864315508e-06, "loss": 1.2352, "step": 6347 }, { "epoch": 0.8987046081970694, "grad_norm": 10.457652846205875, "learning_rate": 3.0273644989223543e-06, "loss": 1.3436, "step": 6348 }, { "epoch": 0.8988461810717067, "grad_norm": 12.142736302904506, "learning_rate": 3.0268041836900002e-06, "loss": 1.4648, "step": 6349 }, { "epoch": 0.8989877539463439, "grad_norm": 8.781843389370461, "learning_rate": 3.026243840763942e-06, "loss": 1.3238, "step": 6350 }, { "epoch": 0.8991293268209811, "grad_norm": 11.492445745101499, "learning_rate": 3.025683470173638e-06, "loss": 1.298, "step": 6351 }, { "epoch": 0.8992708996956184, "grad_norm": 10.954769081875119, "learning_rate": 3.0251230719485465e-06, "loss": 1.1836, "step": 6352 }, { "epoch": 0.8994124725702556, "grad_norm": 11.941380693871901, "learning_rate": 3.0245626461181256e-06, "loss": 1.1857, "step": 6353 }, { "epoch": 0.8995540454448928, "grad_norm": 8.936855906506064, "learning_rate": 3.024002192711838e-06, "loss": 1.2258, "step": 6354 }, { "epoch": 0.8996956183195299, "grad_norm": 7.986312840074637, "learning_rate": 3.023441711759146e-06, "loss": 1.2752, "step": 6355 }, { "epoch": 0.8998371911941672, "grad_norm": 11.525054838832045, "learning_rate": 3.0228812032895133e-06, "loss": 1.1743, "step": 6356 }, { "epoch": 0.8999787640688044, "grad_norm": 10.301047884774349, "learning_rate": 3.022320667332406e-06, "loss": 1.248, "step": 6357 }, { "epoch": 0.9001203369434416, "grad_norm": 10.042012653220539, "learning_rate": 3.02176010391729e-06, "loss": 1.1239, "step": 6358 }, { "epoch": 0.9002619098180789, "grad_norm": 9.33120998570728, "learning_rate": 3.021199513073635e-06, "loss": 1.1528, "step": 6359 }, { "epoch": 0.9004034826927161, "grad_norm": 8.919390491137746, "learning_rate": 3.0206388948309094e-06, "loss": 1.2946, "step": 6360 }, { "epoch": 0.9005450555673533, "grad_norm": 8.181794719469742, "learning_rate": 3.020078249218586e-06, "loss": 1.2956, "step": 6361 }, { "epoch": 0.9006866284419905, "grad_norm": 9.456690394905484, "learning_rate": 3.019517576266137e-06, "loss": 1.2716, "step": 6362 }, { "epoch": 0.9008282013166278, "grad_norm": 9.038060982626948, "learning_rate": 3.0189568760030363e-06, "loss": 1.0887, "step": 6363 }, { "epoch": 0.900969774191265, "grad_norm": 10.872618666628656, "learning_rate": 3.018396148458759e-06, "loss": 1.2292, "step": 6364 }, { "epoch": 0.9011113470659021, "grad_norm": 11.774350521374643, "learning_rate": 3.0178353936627835e-06, "loss": 1.2357, "step": 6365 }, { "epoch": 0.9012529199405394, "grad_norm": 9.797577803863254, "learning_rate": 3.017274611644587e-06, "loss": 1.2752, "step": 6366 }, { "epoch": 0.9013944928151766, "grad_norm": 7.063485135938562, "learning_rate": 3.016713802433649e-06, "loss": 1.1336, "step": 6367 }, { "epoch": 0.9015360656898138, "grad_norm": 9.75770890094624, "learning_rate": 3.016152966059453e-06, "loss": 1.3332, "step": 6368 }, { "epoch": 0.901677638564451, "grad_norm": 9.642026035883752, "learning_rate": 3.01559210255148e-06, "loss": 1.1531, "step": 6369 }, { "epoch": 0.9018192114390883, "grad_norm": 9.768905698935143, "learning_rate": 3.0150312119392144e-06, "loss": 1.3142, "step": 6370 }, { "epoch": 0.9019607843137255, "grad_norm": 11.648996177288263, "learning_rate": 3.0144702942521424e-06, "loss": 1.2917, "step": 6371 }, { "epoch": 0.9021023571883627, "grad_norm": 8.213550387813088, "learning_rate": 3.0139093495197504e-06, "loss": 1.1842, "step": 6372 }, { "epoch": 0.902243930063, "grad_norm": 9.290728009856117, "learning_rate": 3.0133483777715272e-06, "loss": 1.3223, "step": 6373 }, { "epoch": 0.9023855029376372, "grad_norm": 10.550309770565166, "learning_rate": 3.0127873790369627e-06, "loss": 1.2476, "step": 6374 }, { "epoch": 0.9025270758122743, "grad_norm": 8.935648266159733, "learning_rate": 3.0122263533455474e-06, "loss": 1.1548, "step": 6375 }, { "epoch": 0.9026686486869115, "grad_norm": 9.669337024388536, "learning_rate": 3.0116653007267753e-06, "loss": 1.3593, "step": 6376 }, { "epoch": 0.9028102215615488, "grad_norm": 9.007638868393784, "learning_rate": 3.0111042212101394e-06, "loss": 1.1329, "step": 6377 }, { "epoch": 0.902951794436186, "grad_norm": 10.42905765303239, "learning_rate": 3.0105431148251364e-06, "loss": 1.1712, "step": 6378 }, { "epoch": 0.9030933673108232, "grad_norm": 8.758220190109261, "learning_rate": 3.0099819816012623e-06, "loss": 1.2157, "step": 6379 }, { "epoch": 0.9032349401854605, "grad_norm": 10.446368258981018, "learning_rate": 3.0094208215680156e-06, "loss": 1.1432, "step": 6380 }, { "epoch": 0.9033765130600977, "grad_norm": 9.30316267483239, "learning_rate": 3.008859634754895e-06, "loss": 1.1996, "step": 6381 }, { "epoch": 0.9035180859347349, "grad_norm": 9.381474001635317, "learning_rate": 3.0082984211914033e-06, "loss": 1.1329, "step": 6382 }, { "epoch": 0.9036596588093722, "grad_norm": 9.473282034742006, "learning_rate": 3.007737180907044e-06, "loss": 1.2563, "step": 6383 }, { "epoch": 0.9038012316840094, "grad_norm": 10.849906646748813, "learning_rate": 3.007175913931319e-06, "loss": 1.1662, "step": 6384 }, { "epoch": 0.9039428045586466, "grad_norm": 8.447360232490439, "learning_rate": 3.006614620293734e-06, "loss": 1.1733, "step": 6385 }, { "epoch": 0.9040843774332837, "grad_norm": 7.321208235452361, "learning_rate": 3.0060533000237964e-06, "loss": 1.121, "step": 6386 }, { "epoch": 0.904225950307921, "grad_norm": 8.539812534963763, "learning_rate": 3.005491953151014e-06, "loss": 1.2219, "step": 6387 }, { "epoch": 0.9043675231825582, "grad_norm": 8.221925071410151, "learning_rate": 3.0049305797048965e-06, "loss": 1.2439, "step": 6388 }, { "epoch": 0.9045090960571954, "grad_norm": 8.538068456979847, "learning_rate": 3.0043691797149548e-06, "loss": 1.1283, "step": 6389 }, { "epoch": 0.9046506689318327, "grad_norm": 9.595499779992288, "learning_rate": 3.003807753210702e-06, "loss": 1.0282, "step": 6390 }, { "epoch": 0.9047922418064699, "grad_norm": 9.08147049680157, "learning_rate": 3.0032463002216504e-06, "loss": 1.3119, "step": 6391 }, { "epoch": 0.9049338146811071, "grad_norm": 9.664257100026372, "learning_rate": 3.0026848207773163e-06, "loss": 1.3406, "step": 6392 }, { "epoch": 0.9050753875557443, "grad_norm": 9.729871660348634, "learning_rate": 3.0021233149072164e-06, "loss": 1.3096, "step": 6393 }, { "epoch": 0.9052169604303816, "grad_norm": 8.502453618179688, "learning_rate": 3.0015617826408684e-06, "loss": 1.289, "step": 6394 }, { "epoch": 0.9053585333050188, "grad_norm": 8.43590570151675, "learning_rate": 3.001000224007791e-06, "loss": 1.3218, "step": 6395 }, { "epoch": 0.9055001061796559, "grad_norm": 8.786962422712644, "learning_rate": 3.000438639037505e-06, "loss": 1.2339, "step": 6396 }, { "epoch": 0.9056416790542932, "grad_norm": 9.193820569813285, "learning_rate": 2.9998770277595337e-06, "loss": 1.2073, "step": 6397 }, { "epoch": 0.9057832519289304, "grad_norm": 6.436003575885318, "learning_rate": 2.999315390203399e-06, "loss": 1.2531, "step": 6398 }, { "epoch": 0.9059248248035676, "grad_norm": 8.586831376922637, "learning_rate": 2.9987537263986277e-06, "loss": 1.2088, "step": 6399 }, { "epoch": 0.9060663976782048, "grad_norm": 8.42936212373085, "learning_rate": 2.998192036374744e-06, "loss": 1.3507, "step": 6400 }, { "epoch": 0.9062079705528421, "grad_norm": 8.628376175575918, "learning_rate": 2.9976303201612765e-06, "loss": 1.1113, "step": 6401 }, { "epoch": 0.9063495434274793, "grad_norm": 8.35556645133388, "learning_rate": 2.9970685777877545e-06, "loss": 1.2264, "step": 6402 }, { "epoch": 0.9064911163021165, "grad_norm": 7.044515481156799, "learning_rate": 2.9965068092837074e-06, "loss": 1.1671, "step": 6403 }, { "epoch": 0.9066326891767538, "grad_norm": 9.767202997508102, "learning_rate": 2.9959450146786674e-06, "loss": 1.3445, "step": 6404 }, { "epoch": 0.906774262051391, "grad_norm": 9.968555986674934, "learning_rate": 2.995383194002169e-06, "loss": 1.2756, "step": 6405 }, { "epoch": 0.9069158349260281, "grad_norm": 9.036995192235757, "learning_rate": 2.9948213472837443e-06, "loss": 1.1977, "step": 6406 }, { "epoch": 0.9070574078006653, "grad_norm": 8.40312914013007, "learning_rate": 2.994259474552931e-06, "loss": 1.183, "step": 6407 }, { "epoch": 0.9071989806753026, "grad_norm": 7.6390796182885, "learning_rate": 2.993697575839265e-06, "loss": 1.1469, "step": 6408 }, { "epoch": 0.9073405535499398, "grad_norm": 8.691859626517783, "learning_rate": 2.9931356511722857e-06, "loss": 1.3147, "step": 6409 }, { "epoch": 0.907482126424577, "grad_norm": 7.447806774120077, "learning_rate": 2.9925737005815337e-06, "loss": 1.1726, "step": 6410 }, { "epoch": 0.9076236992992143, "grad_norm": 8.972764661812407, "learning_rate": 2.9920117240965487e-06, "loss": 1.1582, "step": 6411 }, { "epoch": 0.9077652721738515, "grad_norm": 9.121758708362865, "learning_rate": 2.991449721746875e-06, "loss": 1.2689, "step": 6412 }, { "epoch": 0.9079068450484887, "grad_norm": 8.512862403090121, "learning_rate": 2.9908876935620544e-06, "loss": 1.3412, "step": 6413 }, { "epoch": 0.908048417923126, "grad_norm": 10.784472528468195, "learning_rate": 2.990325639571635e-06, "loss": 1.3371, "step": 6414 }, { "epoch": 0.9081899907977632, "grad_norm": 8.208516178579654, "learning_rate": 2.9897635598051626e-06, "loss": 1.1457, "step": 6415 }, { "epoch": 0.9083315636724004, "grad_norm": 6.974895419085345, "learning_rate": 2.9892014542921845e-06, "loss": 1.2132, "step": 6416 }, { "epoch": 0.9084731365470375, "grad_norm": 9.179742353559515, "learning_rate": 2.9886393230622507e-06, "loss": 1.1152, "step": 6417 }, { "epoch": 0.9086147094216748, "grad_norm": 7.738695821965245, "learning_rate": 2.9880771661449115e-06, "loss": 1.2667, "step": 6418 }, { "epoch": 0.908756282296312, "grad_norm": 8.59065551530366, "learning_rate": 2.9875149835697203e-06, "loss": 1.2835, "step": 6419 }, { "epoch": 0.9088978551709492, "grad_norm": 7.358845491251419, "learning_rate": 2.98695277536623e-06, "loss": 1.1297, "step": 6420 }, { "epoch": 0.9090394280455865, "grad_norm": 7.874679074484436, "learning_rate": 2.9863905415639954e-06, "loss": 1.2685, "step": 6421 }, { "epoch": 0.9091810009202237, "grad_norm": 8.333876121645767, "learning_rate": 2.9858282821925723e-06, "loss": 1.2, "step": 6422 }, { "epoch": 0.9093225737948609, "grad_norm": 7.335895379771253, "learning_rate": 2.985265997281519e-06, "loss": 1.2327, "step": 6423 }, { "epoch": 0.9094641466694982, "grad_norm": 8.320982770830675, "learning_rate": 2.984703686860394e-06, "loss": 1.2931, "step": 6424 }, { "epoch": 0.9096057195441354, "grad_norm": 7.602678991266058, "learning_rate": 2.984141350958757e-06, "loss": 1.1223, "step": 6425 }, { "epoch": 0.9097472924187726, "grad_norm": 7.659656249617517, "learning_rate": 2.9835789896061707e-06, "loss": 1.2239, "step": 6426 }, { "epoch": 0.9098888652934097, "grad_norm": 8.158327269300234, "learning_rate": 2.9830166028321975e-06, "loss": 1.1796, "step": 6427 }, { "epoch": 0.910030438168047, "grad_norm": 11.612345709458385, "learning_rate": 2.9824541906664018e-06, "loss": 1.2315, "step": 6428 }, { "epoch": 0.9101720110426842, "grad_norm": 7.8789401415359315, "learning_rate": 2.9818917531383483e-06, "loss": 0.9946, "step": 6429 }, { "epoch": 0.9103135839173214, "grad_norm": 10.622936093201087, "learning_rate": 2.981329290277605e-06, "loss": 1.3208, "step": 6430 }, { "epoch": 0.9104551567919587, "grad_norm": 8.029761744250797, "learning_rate": 2.980766802113741e-06, "loss": 1.2817, "step": 6431 }, { "epoch": 0.9105967296665959, "grad_norm": 8.318997475305581, "learning_rate": 2.9802042886763234e-06, "loss": 1.2706, "step": 6432 }, { "epoch": 0.9107383025412331, "grad_norm": 7.846921970921121, "learning_rate": 2.9796417499949244e-06, "loss": 1.2824, "step": 6433 }, { "epoch": 0.9108798754158703, "grad_norm": 9.585738126364923, "learning_rate": 2.9790791860991165e-06, "loss": 1.1358, "step": 6434 }, { "epoch": 0.9110214482905076, "grad_norm": 7.428398654311898, "learning_rate": 2.9785165970184724e-06, "loss": 1.102, "step": 6435 }, { "epoch": 0.9111630211651448, "grad_norm": 7.55728978779877, "learning_rate": 2.977953982782569e-06, "loss": 1.1591, "step": 6436 }, { "epoch": 0.9113045940397819, "grad_norm": 8.136612326026292, "learning_rate": 2.97739134342098e-06, "loss": 1.182, "step": 6437 }, { "epoch": 0.9114461669144192, "grad_norm": 8.580713656251788, "learning_rate": 2.9768286789632845e-06, "loss": 1.2164, "step": 6438 }, { "epoch": 0.9115877397890564, "grad_norm": 11.073400956356378, "learning_rate": 2.9762659894390603e-06, "loss": 1.0768, "step": 6439 }, { "epoch": 0.9117293126636936, "grad_norm": 10.099464900883595, "learning_rate": 2.9757032748778886e-06, "loss": 1.2209, "step": 6440 }, { "epoch": 0.9118708855383308, "grad_norm": 9.563535148092916, "learning_rate": 2.97514053530935e-06, "loss": 1.3414, "step": 6441 }, { "epoch": 0.9120124584129681, "grad_norm": 7.784923253302788, "learning_rate": 2.9745777707630284e-06, "loss": 1.2911, "step": 6442 }, { "epoch": 0.9121540312876053, "grad_norm": 9.855012390965028, "learning_rate": 2.974014981268507e-06, "loss": 1.2118, "step": 6443 }, { "epoch": 0.9122956041622425, "grad_norm": 9.307070384555304, "learning_rate": 2.973452166855372e-06, "loss": 1.2809, "step": 6444 }, { "epoch": 0.9124371770368798, "grad_norm": 10.073003558810091, "learning_rate": 2.972889327553209e-06, "loss": 1.3, "step": 6445 }, { "epoch": 0.912578749911517, "grad_norm": 10.35201502548613, "learning_rate": 2.972326463391606e-06, "loss": 1.3025, "step": 6446 }, { "epoch": 0.9127203227861542, "grad_norm": 8.559207478137715, "learning_rate": 2.971763574400154e-06, "loss": 1.2166, "step": 6447 }, { "epoch": 0.9128618956607913, "grad_norm": 8.528292524090906, "learning_rate": 2.971200660608442e-06, "loss": 1.2967, "step": 6448 }, { "epoch": 0.9130034685354286, "grad_norm": 10.303260687089928, "learning_rate": 2.970637722046063e-06, "loss": 1.3361, "step": 6449 }, { "epoch": 0.9131450414100658, "grad_norm": 10.061814563223052, "learning_rate": 2.9700747587426097e-06, "loss": 1.1667, "step": 6450 }, { "epoch": 0.913286614284703, "grad_norm": 7.867206168128918, "learning_rate": 2.9695117707276774e-06, "loss": 1.0266, "step": 6451 }, { "epoch": 0.9134281871593403, "grad_norm": 8.960659511680898, "learning_rate": 2.9689487580308613e-06, "loss": 1.202, "step": 6452 }, { "epoch": 0.9135697600339775, "grad_norm": 8.085401080486003, "learning_rate": 2.9683857206817583e-06, "loss": 1.1715, "step": 6453 }, { "epoch": 0.9137113329086147, "grad_norm": 8.000528318126287, "learning_rate": 2.9678226587099674e-06, "loss": 1.1593, "step": 6454 }, { "epoch": 0.913852905783252, "grad_norm": 10.43202038223522, "learning_rate": 2.967259572145088e-06, "loss": 1.2522, "step": 6455 }, { "epoch": 0.9139944786578892, "grad_norm": 9.455900128555237, "learning_rate": 2.966696461016721e-06, "loss": 1.2506, "step": 6456 }, { "epoch": 0.9141360515325264, "grad_norm": 8.153396516535878, "learning_rate": 2.966133325354469e-06, "loss": 1.3149, "step": 6457 }, { "epoch": 0.9142776244071635, "grad_norm": 7.568649440005999, "learning_rate": 2.9655701651879364e-06, "loss": 1.1497, "step": 6458 }, { "epoch": 0.9144191972818008, "grad_norm": 9.723909731748408, "learning_rate": 2.965006980546727e-06, "loss": 1.2849, "step": 6459 }, { "epoch": 0.914560770156438, "grad_norm": 9.52303332753405, "learning_rate": 2.9644437714604475e-06, "loss": 1.2456, "step": 6460 }, { "epoch": 0.9147023430310752, "grad_norm": 10.213129486891646, "learning_rate": 2.963880537958705e-06, "loss": 1.3195, "step": 6461 }, { "epoch": 0.9148439159057125, "grad_norm": 9.98045651907274, "learning_rate": 2.9633172800711085e-06, "loss": 1.2047, "step": 6462 }, { "epoch": 0.9149854887803497, "grad_norm": 8.873159298277992, "learning_rate": 2.962753997827268e-06, "loss": 1.1601, "step": 6463 }, { "epoch": 0.9151270616549869, "grad_norm": 8.262692484727237, "learning_rate": 2.962190691256795e-06, "loss": 1.3333, "step": 6464 }, { "epoch": 0.9152686345296241, "grad_norm": 8.146948176305182, "learning_rate": 2.961627360389302e-06, "loss": 1.2101, "step": 6465 }, { "epoch": 0.9154102074042614, "grad_norm": 8.489085651312072, "learning_rate": 2.9610640052544026e-06, "loss": 1.2931, "step": 6466 }, { "epoch": 0.9155517802788986, "grad_norm": 6.91591330718934, "learning_rate": 2.960500625881712e-06, "loss": 1.1977, "step": 6467 }, { "epoch": 0.9156933531535357, "grad_norm": 9.004312541673825, "learning_rate": 2.9599372223008483e-06, "loss": 1.3075, "step": 6468 }, { "epoch": 0.915834926028173, "grad_norm": 10.500690255864582, "learning_rate": 2.9593737945414264e-06, "loss": 1.2912, "step": 6469 }, { "epoch": 0.9159764989028102, "grad_norm": 8.578844940047063, "learning_rate": 2.9588103426330665e-06, "loss": 1.1719, "step": 6470 }, { "epoch": 0.9161180717774474, "grad_norm": 8.140275416858122, "learning_rate": 2.95824686660539e-06, "loss": 1.116, "step": 6471 }, { "epoch": 0.9162596446520846, "grad_norm": 11.202481090390933, "learning_rate": 2.957683366488017e-06, "loss": 1.3659, "step": 6472 }, { "epoch": 0.9164012175267219, "grad_norm": 7.892674789837395, "learning_rate": 2.9571198423105708e-06, "loss": 1.2416, "step": 6473 }, { "epoch": 0.9165427904013591, "grad_norm": 8.13595265386528, "learning_rate": 2.956556294102675e-06, "loss": 1.2847, "step": 6474 }, { "epoch": 0.9166843632759963, "grad_norm": 8.775409969305965, "learning_rate": 2.9559927218939555e-06, "loss": 1.1961, "step": 6475 }, { "epoch": 0.9168259361506336, "grad_norm": 9.962084416201629, "learning_rate": 2.9554291257140384e-06, "loss": 1.4218, "step": 6476 }, { "epoch": 0.9169675090252708, "grad_norm": 8.54542119076858, "learning_rate": 2.9548655055925516e-06, "loss": 1.2313, "step": 6477 }, { "epoch": 0.917109081899908, "grad_norm": 9.070677967577941, "learning_rate": 2.954301861559124e-06, "loss": 1.1667, "step": 6478 }, { "epoch": 0.9172506547745451, "grad_norm": 11.356856076029167, "learning_rate": 2.9537381936433873e-06, "loss": 1.3187, "step": 6479 }, { "epoch": 0.9173922276491824, "grad_norm": 7.624975735985344, "learning_rate": 2.953174501874971e-06, "loss": 1.2858, "step": 6480 }, { "epoch": 0.9175338005238196, "grad_norm": 9.766443715680877, "learning_rate": 2.9526107862835103e-06, "loss": 1.2515, "step": 6481 }, { "epoch": 0.9176753733984568, "grad_norm": 8.73792589101974, "learning_rate": 2.952047046898637e-06, "loss": 1.1683, "step": 6482 }, { "epoch": 0.9178169462730941, "grad_norm": 10.562436075412148, "learning_rate": 2.9514832837499884e-06, "loss": 1.416, "step": 6483 }, { "epoch": 0.9179585191477313, "grad_norm": 9.160659717640556, "learning_rate": 2.9509194968671995e-06, "loss": 1.2619, "step": 6484 }, { "epoch": 0.9181000920223685, "grad_norm": 8.062768266339994, "learning_rate": 2.9503556862799094e-06, "loss": 1.3337, "step": 6485 }, { "epoch": 0.9182416648970058, "grad_norm": 8.993661980002743, "learning_rate": 2.949791852017756e-06, "loss": 1.2085, "step": 6486 }, { "epoch": 0.918383237771643, "grad_norm": 8.161877324187012, "learning_rate": 2.949227994110381e-06, "loss": 1.2753, "step": 6487 }, { "epoch": 0.9185248106462802, "grad_norm": 9.470916802278627, "learning_rate": 2.948664112587425e-06, "loss": 1.2525, "step": 6488 }, { "epoch": 0.9186663835209173, "grad_norm": 8.161135091184468, "learning_rate": 2.9481002074785315e-06, "loss": 1.2482, "step": 6489 }, { "epoch": 0.9188079563955546, "grad_norm": 8.90278338509666, "learning_rate": 2.9475362788133437e-06, "loss": 1.0873, "step": 6490 }, { "epoch": 0.9189495292701918, "grad_norm": 8.68302835330378, "learning_rate": 2.946972326621507e-06, "loss": 1.2802, "step": 6491 }, { "epoch": 0.919091102144829, "grad_norm": 9.005396178922695, "learning_rate": 2.946408350932669e-06, "loss": 1.2585, "step": 6492 }, { "epoch": 0.9192326750194663, "grad_norm": 8.448294057698961, "learning_rate": 2.9458443517764767e-06, "loss": 1.2888, "step": 6493 }, { "epoch": 0.9193742478941035, "grad_norm": 9.657258113840788, "learning_rate": 2.9452803291825793e-06, "loss": 1.2292, "step": 6494 }, { "epoch": 0.9195158207687407, "grad_norm": 8.737244382074353, "learning_rate": 2.9447162831806275e-06, "loss": 1.2961, "step": 6495 }, { "epoch": 0.919657393643378, "grad_norm": 8.853173293693288, "learning_rate": 2.944152213800272e-06, "loss": 1.1379, "step": 6496 }, { "epoch": 0.9197989665180152, "grad_norm": 8.280969003192558, "learning_rate": 2.9435881210711652e-06, "loss": 1.1281, "step": 6497 }, { "epoch": 0.9199405393926524, "grad_norm": 7.902628314126904, "learning_rate": 2.943024005022962e-06, "loss": 1.2456, "step": 6498 }, { "epoch": 0.9200821122672896, "grad_norm": 12.032726167558103, "learning_rate": 2.9424598656853176e-06, "loss": 1.3131, "step": 6499 }, { "epoch": 0.9202236851419268, "grad_norm": 9.63629557682036, "learning_rate": 2.9418957030878876e-06, "loss": 1.2276, "step": 6500 }, { "epoch": 0.920365258016564, "grad_norm": 8.784113189801802, "learning_rate": 2.9413315172603296e-06, "loss": 1.2601, "step": 6501 }, { "epoch": 0.9205068308912012, "grad_norm": 9.711407307119067, "learning_rate": 2.9407673082323033e-06, "loss": 1.3415, "step": 6502 }, { "epoch": 0.9206484037658385, "grad_norm": 8.853689478562737, "learning_rate": 2.9402030760334684e-06, "loss": 1.1936, "step": 6503 }, { "epoch": 0.9207899766404757, "grad_norm": 7.8336369340649235, "learning_rate": 2.9396388206934858e-06, "loss": 1.2454, "step": 6504 }, { "epoch": 0.9209315495151129, "grad_norm": 7.996675754344251, "learning_rate": 2.9390745422420186e-06, "loss": 1.2818, "step": 6505 }, { "epoch": 0.9210731223897501, "grad_norm": 7.803238406749364, "learning_rate": 2.9385102407087296e-06, "loss": 1.2055, "step": 6506 }, { "epoch": 0.9212146952643874, "grad_norm": 8.03171309386534, "learning_rate": 2.937945916123284e-06, "loss": 1.2708, "step": 6507 }, { "epoch": 0.9213562681390246, "grad_norm": 10.345677867453121, "learning_rate": 2.9373815685153485e-06, "loss": 1.1563, "step": 6508 }, { "epoch": 0.9214978410136618, "grad_norm": 8.579224229433933, "learning_rate": 2.9368171979145898e-06, "loss": 1.2308, "step": 6509 }, { "epoch": 0.921639413888299, "grad_norm": 9.380351255952977, "learning_rate": 2.936252804350677e-06, "loss": 1.14, "step": 6510 }, { "epoch": 0.9217809867629362, "grad_norm": 8.47768164775049, "learning_rate": 2.9356883878532794e-06, "loss": 1.3185, "step": 6511 }, { "epoch": 0.9219225596375734, "grad_norm": 8.530777802866218, "learning_rate": 2.9351239484520684e-06, "loss": 1.0688, "step": 6512 }, { "epoch": 0.9220641325122106, "grad_norm": 8.894424886825613, "learning_rate": 2.9345594861767157e-06, "loss": 1.2556, "step": 6513 }, { "epoch": 0.9222057053868479, "grad_norm": 10.457352631524797, "learning_rate": 2.9339950010568945e-06, "loss": 1.2066, "step": 6514 }, { "epoch": 0.9223472782614851, "grad_norm": 10.22220882009466, "learning_rate": 2.9334304931222795e-06, "loss": 1.2513, "step": 6515 }, { "epoch": 0.9224888511361223, "grad_norm": 8.56878476014102, "learning_rate": 2.932865962402548e-06, "loss": 1.2552, "step": 6516 }, { "epoch": 0.9226304240107596, "grad_norm": 8.176954160670288, "learning_rate": 2.9323014089273743e-06, "loss": 1.2333, "step": 6517 }, { "epoch": 0.9227719968853968, "grad_norm": 8.04221317101134, "learning_rate": 2.9317368327264383e-06, "loss": 1.1657, "step": 6518 }, { "epoch": 0.922913569760034, "grad_norm": 7.82155968841718, "learning_rate": 2.9311722338294193e-06, "loss": 1.2114, "step": 6519 }, { "epoch": 0.9230551426346711, "grad_norm": 9.961220890576586, "learning_rate": 2.930607612265997e-06, "loss": 1.2957, "step": 6520 }, { "epoch": 0.9231967155093084, "grad_norm": 9.857127953749107, "learning_rate": 2.9300429680658538e-06, "loss": 1.1555, "step": 6521 }, { "epoch": 0.9233382883839456, "grad_norm": 10.170434055724266, "learning_rate": 2.9294783012586725e-06, "loss": 1.2641, "step": 6522 }, { "epoch": 0.9234798612585828, "grad_norm": 7.777075202938326, "learning_rate": 2.9289136118741367e-06, "loss": 1.2041, "step": 6523 }, { "epoch": 0.9236214341332201, "grad_norm": 7.917105839912152, "learning_rate": 2.9283488999419324e-06, "loss": 1.1089, "step": 6524 }, { "epoch": 0.9237630070078573, "grad_norm": 9.263016486343394, "learning_rate": 2.927784165491746e-06, "loss": 1.1236, "step": 6525 }, { "epoch": 0.9239045798824945, "grad_norm": 8.203189173856124, "learning_rate": 2.927219408553265e-06, "loss": 1.217, "step": 6526 }, { "epoch": 0.9240461527571318, "grad_norm": 10.661088040991569, "learning_rate": 2.926654629156178e-06, "loss": 1.1884, "step": 6527 }, { "epoch": 0.924187725631769, "grad_norm": 9.314354219109502, "learning_rate": 2.926089827330175e-06, "loss": 1.1752, "step": 6528 }, { "epoch": 0.9243292985064062, "grad_norm": 9.09723274159167, "learning_rate": 2.925525003104948e-06, "loss": 1.3269, "step": 6529 }, { "epoch": 0.9244708713810434, "grad_norm": 8.588628623125846, "learning_rate": 2.924960156510188e-06, "loss": 1.2631, "step": 6530 }, { "epoch": 0.9246124442556806, "grad_norm": 8.772254327360027, "learning_rate": 2.9243952875755905e-06, "loss": 1.3361, "step": 6531 }, { "epoch": 0.9247540171303178, "grad_norm": 8.669865335828824, "learning_rate": 2.923830396330849e-06, "loss": 1.3818, "step": 6532 }, { "epoch": 0.924895590004955, "grad_norm": 9.035643462598728, "learning_rate": 2.9232654828056596e-06, "loss": 1.3856, "step": 6533 }, { "epoch": 0.9250371628795923, "grad_norm": 9.141663686310784, "learning_rate": 2.9227005470297194e-06, "loss": 1.3205, "step": 6534 }, { "epoch": 0.9251787357542295, "grad_norm": 9.26223687790492, "learning_rate": 2.922135589032726e-06, "loss": 1.3437, "step": 6535 }, { "epoch": 0.9253203086288667, "grad_norm": 8.514968258792406, "learning_rate": 2.9215706088443794e-06, "loss": 1.3431, "step": 6536 }, { "epoch": 0.925461881503504, "grad_norm": 7.885474127119602, "learning_rate": 2.921005606494381e-06, "loss": 1.2736, "step": 6537 }, { "epoch": 0.9256034543781412, "grad_norm": 8.976660665667548, "learning_rate": 2.9204405820124315e-06, "loss": 1.3204, "step": 6538 }, { "epoch": 0.9257450272527784, "grad_norm": 10.771088607586536, "learning_rate": 2.9198755354282337e-06, "loss": 1.468, "step": 6539 }, { "epoch": 0.9258866001274156, "grad_norm": 10.145839766879591, "learning_rate": 2.9193104667714926e-06, "loss": 1.242, "step": 6540 }, { "epoch": 0.9260281730020528, "grad_norm": 8.732201921785366, "learning_rate": 2.9187453760719126e-06, "loss": 1.2131, "step": 6541 }, { "epoch": 0.92616974587669, "grad_norm": 8.998410826357194, "learning_rate": 2.918180263359201e-06, "loss": 1.2405, "step": 6542 }, { "epoch": 0.9263113187513272, "grad_norm": 8.836307049080233, "learning_rate": 2.9176151286630642e-06, "loss": 1.2127, "step": 6543 }, { "epoch": 0.9264528916259644, "grad_norm": 9.40945126627075, "learning_rate": 2.917049972013211e-06, "loss": 1.2778, "step": 6544 }, { "epoch": 0.9265944645006017, "grad_norm": 8.517241550260758, "learning_rate": 2.9164847934393523e-06, "loss": 1.3066, "step": 6545 }, { "epoch": 0.9267360373752389, "grad_norm": 8.341810822614773, "learning_rate": 2.9159195929711985e-06, "loss": 1.0654, "step": 6546 }, { "epoch": 0.9268776102498761, "grad_norm": 8.256160863240677, "learning_rate": 2.915354370638462e-06, "loss": 1.2866, "step": 6547 }, { "epoch": 0.9270191831245134, "grad_norm": 9.54032930777451, "learning_rate": 2.914789126470856e-06, "loss": 1.1501, "step": 6548 }, { "epoch": 0.9271607559991506, "grad_norm": 7.692091648295752, "learning_rate": 2.914223860498095e-06, "loss": 1.2652, "step": 6549 }, { "epoch": 0.9273023288737878, "grad_norm": 9.025024802895064, "learning_rate": 2.9136585727498946e-06, "loss": 1.2444, "step": 6550 }, { "epoch": 0.927443901748425, "grad_norm": 9.81864124725936, "learning_rate": 2.9130932632559707e-06, "loss": 1.17, "step": 6551 }, { "epoch": 0.9275854746230622, "grad_norm": 8.530824755455992, "learning_rate": 2.912527932046042e-06, "loss": 1.1944, "step": 6552 }, { "epoch": 0.9277270474976994, "grad_norm": 8.884459276634882, "learning_rate": 2.911962579149828e-06, "loss": 1.148, "step": 6553 }, { "epoch": 0.9278686203723366, "grad_norm": 8.602029481107795, "learning_rate": 2.9113972045970483e-06, "loss": 1.0878, "step": 6554 }, { "epoch": 0.9280101932469739, "grad_norm": 8.691055558233913, "learning_rate": 2.910831808417424e-06, "loss": 1.3351, "step": 6555 }, { "epoch": 0.9281517661216111, "grad_norm": 12.100091577608966, "learning_rate": 2.910266390640678e-06, "loss": 1.3048, "step": 6556 }, { "epoch": 0.9282933389962483, "grad_norm": 10.632452292300533, "learning_rate": 2.909700951296534e-06, "loss": 1.2498, "step": 6557 }, { "epoch": 0.9284349118708856, "grad_norm": 7.623131695098826, "learning_rate": 2.9091354904147175e-06, "loss": 1.1857, "step": 6558 }, { "epoch": 0.9285764847455228, "grad_norm": 8.922412435241423, "learning_rate": 2.908570008024951e-06, "loss": 1.2422, "step": 6559 }, { "epoch": 0.92871805762016, "grad_norm": 8.38762021383673, "learning_rate": 2.9080045041569647e-06, "loss": 1.0616, "step": 6560 }, { "epoch": 0.9288596304947973, "grad_norm": 11.360767221466672, "learning_rate": 2.9074389788404867e-06, "loss": 1.3668, "step": 6561 }, { "epoch": 0.9290012033694344, "grad_norm": 8.864397189692093, "learning_rate": 2.9068734321052445e-06, "loss": 1.2145, "step": 6562 }, { "epoch": 0.9291427762440716, "grad_norm": 10.646781585525403, "learning_rate": 2.9063078639809707e-06, "loss": 1.2051, "step": 6563 }, { "epoch": 0.9292843491187088, "grad_norm": 6.992434364095357, "learning_rate": 2.905742274497394e-06, "loss": 1.0544, "step": 6564 }, { "epoch": 0.9294259219933461, "grad_norm": 8.031606703380897, "learning_rate": 2.9051766636842488e-06, "loss": 1.2495, "step": 6565 }, { "epoch": 0.9295674948679833, "grad_norm": 10.29987056613816, "learning_rate": 2.9046110315712682e-06, "loss": 1.2713, "step": 6566 }, { "epoch": 0.9297090677426205, "grad_norm": 7.349402017985725, "learning_rate": 2.904045378188187e-06, "loss": 1.2658, "step": 6567 }, { "epoch": 0.9298506406172578, "grad_norm": 8.982236073515942, "learning_rate": 2.9034797035647427e-06, "loss": 1.2825, "step": 6568 }, { "epoch": 0.929992213491895, "grad_norm": 8.777587561283344, "learning_rate": 2.9029140077306717e-06, "loss": 1.2847, "step": 6569 }, { "epoch": 0.9301337863665322, "grad_norm": 11.367242536378793, "learning_rate": 2.902348290715711e-06, "loss": 1.2888, "step": 6570 }, { "epoch": 0.9302753592411694, "grad_norm": 9.303505464512968, "learning_rate": 2.9017825525496e-06, "loss": 1.2233, "step": 6571 }, { "epoch": 0.9304169321158066, "grad_norm": 10.395080987670774, "learning_rate": 2.9012167932620806e-06, "loss": 1.1406, "step": 6572 }, { "epoch": 0.9305585049904438, "grad_norm": 9.30059257998474, "learning_rate": 2.900651012882893e-06, "loss": 1.2561, "step": 6573 }, { "epoch": 0.930700077865081, "grad_norm": 8.809778970290406, "learning_rate": 2.9000852114417804e-06, "loss": 1.2869, "step": 6574 }, { "epoch": 0.9308416507397183, "grad_norm": 8.26092915109754, "learning_rate": 2.899519388968487e-06, "loss": 1.2141, "step": 6575 }, { "epoch": 0.9309832236143555, "grad_norm": 9.375918737534555, "learning_rate": 2.898953545492757e-06, "loss": 1.1435, "step": 6576 }, { "epoch": 0.9311247964889927, "grad_norm": 9.410420149241686, "learning_rate": 2.8983876810443364e-06, "loss": 1.2931, "step": 6577 }, { "epoch": 0.9312663693636299, "grad_norm": 7.852693724867427, "learning_rate": 2.8978217956529726e-06, "loss": 1.2174, "step": 6578 }, { "epoch": 0.9314079422382672, "grad_norm": 8.63799865431531, "learning_rate": 2.8972558893484145e-06, "loss": 1.3064, "step": 6579 }, { "epoch": 0.9315495151129044, "grad_norm": 9.545467191745589, "learning_rate": 2.8966899621604094e-06, "loss": 1.2518, "step": 6580 }, { "epoch": 0.9316910879875416, "grad_norm": 7.515757981866646, "learning_rate": 2.8961240141187085e-06, "loss": 1.1501, "step": 6581 }, { "epoch": 0.9318326608621788, "grad_norm": 9.66726165343509, "learning_rate": 2.8955580452530642e-06, "loss": 1.3135, "step": 6582 }, { "epoch": 0.931974233736816, "grad_norm": 9.553780363724155, "learning_rate": 2.8949920555932283e-06, "loss": 1.3534, "step": 6583 }, { "epoch": 0.9321158066114532, "grad_norm": 10.275422737725433, "learning_rate": 2.8944260451689544e-06, "loss": 1.2514, "step": 6584 }, { "epoch": 0.9322573794860904, "grad_norm": 11.076326684302103, "learning_rate": 2.8938600140099975e-06, "loss": 1.2176, "step": 6585 }, { "epoch": 0.9323989523607277, "grad_norm": 10.177300525379346, "learning_rate": 2.893293962146114e-06, "loss": 1.3281, "step": 6586 }, { "epoch": 0.9325405252353649, "grad_norm": 9.81252993439403, "learning_rate": 2.8927278896070593e-06, "loss": 1.3277, "step": 6587 }, { "epoch": 0.9326820981100021, "grad_norm": 9.789854317536792, "learning_rate": 2.8921617964225923e-06, "loss": 1.1849, "step": 6588 }, { "epoch": 0.9328236709846394, "grad_norm": 8.39506654732316, "learning_rate": 2.8915956826224724e-06, "loss": 1.1546, "step": 6589 }, { "epoch": 0.9329652438592766, "grad_norm": 8.86667640322196, "learning_rate": 2.8910295482364594e-06, "loss": 1.3569, "step": 6590 }, { "epoch": 0.9331068167339138, "grad_norm": 10.095854832060486, "learning_rate": 2.8904633932943145e-06, "loss": 1.4124, "step": 6591 }, { "epoch": 0.933248389608551, "grad_norm": 12.49996154779242, "learning_rate": 2.8898972178258e-06, "loss": 1.2336, "step": 6592 }, { "epoch": 0.9333899624831882, "grad_norm": 8.006989286954632, "learning_rate": 2.8893310218606797e-06, "loss": 1.2139, "step": 6593 }, { "epoch": 0.9335315353578254, "grad_norm": 8.39306650567314, "learning_rate": 2.8887648054287176e-06, "loss": 1.154, "step": 6594 }, { "epoch": 0.9336731082324626, "grad_norm": 9.482620403753367, "learning_rate": 2.888198568559681e-06, "loss": 1.3703, "step": 6595 }, { "epoch": 0.9338146811070999, "grad_norm": 8.343588609599017, "learning_rate": 2.887632311283333e-06, "loss": 1.1616, "step": 6596 }, { "epoch": 0.9339562539817371, "grad_norm": 9.855993207780553, "learning_rate": 2.8870660336294444e-06, "loss": 1.3033, "step": 6597 }, { "epoch": 0.9340978268563743, "grad_norm": 10.421036030230361, "learning_rate": 2.886499735627783e-06, "loss": 1.2913, "step": 6598 }, { "epoch": 0.9342393997310116, "grad_norm": 8.3549984892793, "learning_rate": 2.885933417308118e-06, "loss": 1.1509, "step": 6599 }, { "epoch": 0.9343809726056488, "grad_norm": 7.817584527086021, "learning_rate": 2.8853670787002224e-06, "loss": 1.2136, "step": 6600 }, { "epoch": 0.934522545480286, "grad_norm": 9.11718666318548, "learning_rate": 2.8848007198338663e-06, "loss": 1.2433, "step": 6601 }, { "epoch": 0.9346641183549232, "grad_norm": 8.048604183141013, "learning_rate": 2.884234340738823e-06, "loss": 1.2108, "step": 6602 }, { "epoch": 0.9348056912295604, "grad_norm": 9.074352852942575, "learning_rate": 2.883667941444867e-06, "loss": 1.3088, "step": 6603 }, { "epoch": 0.9349472641041976, "grad_norm": 8.91064473192452, "learning_rate": 2.8831015219817725e-06, "loss": 1.226, "step": 6604 }, { "epoch": 0.9350888369788348, "grad_norm": 9.804149403187383, "learning_rate": 2.882535082379318e-06, "loss": 1.1824, "step": 6605 }, { "epoch": 0.9352304098534721, "grad_norm": 8.09444751458716, "learning_rate": 2.8819686226672794e-06, "loss": 1.3012, "step": 6606 }, { "epoch": 0.9353719827281093, "grad_norm": 10.753721081088338, "learning_rate": 2.881402142875435e-06, "loss": 1.322, "step": 6607 }, { "epoch": 0.9355135556027465, "grad_norm": 11.677907342390627, "learning_rate": 2.880835643033564e-06, "loss": 1.3912, "step": 6608 }, { "epoch": 0.9356551284773837, "grad_norm": 10.115198736722796, "learning_rate": 2.8802691231714463e-06, "loss": 1.4155, "step": 6609 }, { "epoch": 0.935796701352021, "grad_norm": 9.13534586468311, "learning_rate": 2.879702583318866e-06, "loss": 1.3405, "step": 6610 }, { "epoch": 0.9359382742266582, "grad_norm": 10.582909362791248, "learning_rate": 2.8791360235056024e-06, "loss": 1.4045, "step": 6611 }, { "epoch": 0.9360798471012954, "grad_norm": 7.476164006980943, "learning_rate": 2.878569443761442e-06, "loss": 1.3615, "step": 6612 }, { "epoch": 0.9362214199759326, "grad_norm": 7.875077262378302, "learning_rate": 2.878002844116168e-06, "loss": 1.1884, "step": 6613 }, { "epoch": 0.9363629928505698, "grad_norm": 9.875924755206867, "learning_rate": 2.877436224599566e-06, "loss": 1.3544, "step": 6614 }, { "epoch": 0.936504565725207, "grad_norm": 8.316409002171927, "learning_rate": 2.876869585241423e-06, "loss": 1.2387, "step": 6615 }, { "epoch": 0.9366461385998442, "grad_norm": 8.63823447520573, "learning_rate": 2.8763029260715282e-06, "loss": 1.2931, "step": 6616 }, { "epoch": 0.9367877114744815, "grad_norm": 11.335977881561432, "learning_rate": 2.8757362471196677e-06, "loss": 1.3856, "step": 6617 }, { "epoch": 0.9369292843491187, "grad_norm": 9.340413087781002, "learning_rate": 2.875169548415633e-06, "loss": 1.3035, "step": 6618 }, { "epoch": 0.9370708572237559, "grad_norm": 8.148765667079441, "learning_rate": 2.874602829989215e-06, "loss": 1.1685, "step": 6619 }, { "epoch": 0.9372124300983932, "grad_norm": 9.296628804513292, "learning_rate": 2.8740360918702053e-06, "loss": 1.1287, "step": 6620 }, { "epoch": 0.9373540029730304, "grad_norm": 9.537239969091024, "learning_rate": 2.873469334088398e-06, "loss": 1.3485, "step": 6621 }, { "epoch": 0.9374955758476676, "grad_norm": 6.564642856061305, "learning_rate": 2.8729025566735856e-06, "loss": 1.1953, "step": 6622 }, { "epoch": 0.9376371487223049, "grad_norm": 7.5244512617712855, "learning_rate": 2.8723357596555644e-06, "loss": 1.3298, "step": 6623 }, { "epoch": 0.937778721596942, "grad_norm": 10.462845597660719, "learning_rate": 2.871768943064129e-06, "loss": 1.1884, "step": 6624 }, { "epoch": 0.9379202944715792, "grad_norm": 12.074268668005839, "learning_rate": 2.8712021069290786e-06, "loss": 1.3375, "step": 6625 }, { "epoch": 0.9380618673462164, "grad_norm": 9.815056534622306, "learning_rate": 2.87063525128021e-06, "loss": 1.245, "step": 6626 }, { "epoch": 0.9382034402208537, "grad_norm": 9.321579162015976, "learning_rate": 2.870068376147322e-06, "loss": 1.3478, "step": 6627 }, { "epoch": 0.9383450130954909, "grad_norm": 11.36984639220873, "learning_rate": 2.8695014815602157e-06, "loss": 1.16, "step": 6628 }, { "epoch": 0.9384865859701281, "grad_norm": 9.01681854439789, "learning_rate": 2.8689345675486917e-06, "loss": 1.3142, "step": 6629 }, { "epoch": 0.9386281588447654, "grad_norm": 8.121628281771189, "learning_rate": 2.868367634142553e-06, "loss": 1.1232, "step": 6630 }, { "epoch": 0.9387697317194026, "grad_norm": 8.358435804424156, "learning_rate": 2.8678006813716024e-06, "loss": 1.2222, "step": 6631 }, { "epoch": 0.9389113045940398, "grad_norm": 7.502891237556854, "learning_rate": 2.8672337092656444e-06, "loss": 1.315, "step": 6632 }, { "epoch": 0.939052877468677, "grad_norm": 7.91995631350404, "learning_rate": 2.8666667178544833e-06, "loss": 1.0387, "step": 6633 }, { "epoch": 0.9391944503433142, "grad_norm": 7.539960327448809, "learning_rate": 2.866099707167927e-06, "loss": 1.0807, "step": 6634 }, { "epoch": 0.9393360232179514, "grad_norm": 7.293464075807713, "learning_rate": 2.8655326772357816e-06, "loss": 1.2311, "step": 6635 }, { "epoch": 0.9394775960925886, "grad_norm": 10.682224689473923, "learning_rate": 2.8649656280878563e-06, "loss": 1.2224, "step": 6636 }, { "epoch": 0.9396191689672259, "grad_norm": 8.749801633493657, "learning_rate": 2.8643985597539597e-06, "loss": 1.1853, "step": 6637 }, { "epoch": 0.9397607418418631, "grad_norm": 8.766213789968038, "learning_rate": 2.863831472263904e-06, "loss": 1.2071, "step": 6638 }, { "epoch": 0.9399023147165003, "grad_norm": 7.2149629774088835, "learning_rate": 2.8632643656474974e-06, "loss": 1.124, "step": 6639 }, { "epoch": 0.9400438875911376, "grad_norm": 7.72372585023067, "learning_rate": 2.8626972399345543e-06, "loss": 1.1952, "step": 6640 }, { "epoch": 0.9401854604657748, "grad_norm": 8.779337067135474, "learning_rate": 2.8621300951548877e-06, "loss": 1.2185, "step": 6641 }, { "epoch": 0.940327033340412, "grad_norm": 11.538343775221612, "learning_rate": 2.861562931338312e-06, "loss": 1.3245, "step": 6642 }, { "epoch": 0.9404686062150492, "grad_norm": 9.511468891097136, "learning_rate": 2.8609957485146433e-06, "loss": 1.1659, "step": 6643 }, { "epoch": 0.9406101790896865, "grad_norm": 9.478499325467647, "learning_rate": 2.8604285467136966e-06, "loss": 1.3014, "step": 6644 }, { "epoch": 0.9407517519643236, "grad_norm": 10.434167752602654, "learning_rate": 2.8598613259652895e-06, "loss": 1.2932, "step": 6645 }, { "epoch": 0.9408933248389608, "grad_norm": 8.258077655097292, "learning_rate": 2.8592940862992417e-06, "loss": 1.357, "step": 6646 }, { "epoch": 0.941034897713598, "grad_norm": 9.970529236057146, "learning_rate": 2.858726827745372e-06, "loss": 1.2924, "step": 6647 }, { "epoch": 0.9411764705882353, "grad_norm": 9.385962828488799, "learning_rate": 2.858159550333499e-06, "loss": 1.1887, "step": 6648 }, { "epoch": 0.9413180434628725, "grad_norm": 9.759135734564026, "learning_rate": 2.8575922540934464e-06, "loss": 1.2149, "step": 6649 }, { "epoch": 0.9414596163375097, "grad_norm": 9.761685533531923, "learning_rate": 2.857024939055036e-06, "loss": 1.1797, "step": 6650 }, { "epoch": 0.941601189212147, "grad_norm": 8.749087476831289, "learning_rate": 2.8564576052480895e-06, "loss": 1.0866, "step": 6651 }, { "epoch": 0.9417427620867842, "grad_norm": 8.643305716195238, "learning_rate": 2.8558902527024337e-06, "loss": 1.1655, "step": 6652 }, { "epoch": 0.9418843349614214, "grad_norm": 9.836617347061923, "learning_rate": 2.8553228814478927e-06, "loss": 1.2315, "step": 6653 }, { "epoch": 0.9420259078360587, "grad_norm": 11.094659472513573, "learning_rate": 2.8547554915142923e-06, "loss": 1.1191, "step": 6654 }, { "epoch": 0.9421674807106958, "grad_norm": 10.738379296569487, "learning_rate": 2.8541880829314604e-06, "loss": 1.2574, "step": 6655 }, { "epoch": 0.942309053585333, "grad_norm": 9.502183763580636, "learning_rate": 2.8536206557292244e-06, "loss": 1.2936, "step": 6656 }, { "epoch": 0.9424506264599702, "grad_norm": 8.120911963603932, "learning_rate": 2.8530532099374146e-06, "loss": 1.3893, "step": 6657 }, { "epoch": 0.9425921993346075, "grad_norm": 8.22825809404439, "learning_rate": 2.8524857455858606e-06, "loss": 1.2178, "step": 6658 }, { "epoch": 0.9427337722092447, "grad_norm": 10.257884273001952, "learning_rate": 2.8519182627043953e-06, "loss": 1.242, "step": 6659 }, { "epoch": 0.9428753450838819, "grad_norm": 8.29622301316567, "learning_rate": 2.8513507613228474e-06, "loss": 1.152, "step": 6660 }, { "epoch": 0.9430169179585192, "grad_norm": 8.595488993086631, "learning_rate": 2.8507832414710528e-06, "loss": 1.2203, "step": 6661 }, { "epoch": 0.9431584908331564, "grad_norm": 8.479722463276685, "learning_rate": 2.850215703178845e-06, "loss": 1.1957, "step": 6662 }, { "epoch": 0.9433000637077936, "grad_norm": 10.341718122634523, "learning_rate": 2.8496481464760585e-06, "loss": 1.2457, "step": 6663 }, { "epoch": 0.9434416365824309, "grad_norm": 9.997703288501908, "learning_rate": 2.8490805713925298e-06, "loss": 1.2865, "step": 6664 }, { "epoch": 0.943583209457068, "grad_norm": 10.00966215647743, "learning_rate": 2.848512977958095e-06, "loss": 1.2826, "step": 6665 }, { "epoch": 0.9437247823317052, "grad_norm": 8.2884025346956, "learning_rate": 2.8479453662025937e-06, "loss": 1.1933, "step": 6666 }, { "epoch": 0.9438663552063424, "grad_norm": 8.962681009547659, "learning_rate": 2.847377736155863e-06, "loss": 1.1813, "step": 6667 }, { "epoch": 0.9440079280809797, "grad_norm": 9.53113433267418, "learning_rate": 2.8468100878477443e-06, "loss": 1.2806, "step": 6668 }, { "epoch": 0.9441495009556169, "grad_norm": 7.465224451470957, "learning_rate": 2.8462424213080786e-06, "loss": 1.172, "step": 6669 }, { "epoch": 0.9442910738302541, "grad_norm": 9.754021622140058, "learning_rate": 2.845674736566706e-06, "loss": 1.1674, "step": 6670 }, { "epoch": 0.9444326467048914, "grad_norm": 10.837934094175306, "learning_rate": 2.8451070336534703e-06, "loss": 1.2877, "step": 6671 }, { "epoch": 0.9445742195795286, "grad_norm": 8.216037549223708, "learning_rate": 2.8445393125982152e-06, "loss": 1.2972, "step": 6672 }, { "epoch": 0.9447157924541658, "grad_norm": 8.300658546887211, "learning_rate": 2.8439715734307856e-06, "loss": 1.2543, "step": 6673 }, { "epoch": 0.944857365328803, "grad_norm": 9.202218443385549, "learning_rate": 2.8434038161810266e-06, "loss": 1.1877, "step": 6674 }, { "epoch": 0.9449989382034403, "grad_norm": 7.0570895645838725, "learning_rate": 2.8428360408787857e-06, "loss": 1.1736, "step": 6675 }, { "epoch": 0.9451405110780774, "grad_norm": 8.71189386566219, "learning_rate": 2.84226824755391e-06, "loss": 1.1358, "step": 6676 }, { "epoch": 0.9452820839527146, "grad_norm": 10.216462512151994, "learning_rate": 2.8417004362362465e-06, "loss": 1.2196, "step": 6677 }, { "epoch": 0.9454236568273519, "grad_norm": 8.727450278672416, "learning_rate": 2.8411326069556456e-06, "loss": 1.1787, "step": 6678 }, { "epoch": 0.9455652297019891, "grad_norm": 7.6877515798201586, "learning_rate": 2.840564759741959e-06, "loss": 1.0634, "step": 6679 }, { "epoch": 0.9457068025766263, "grad_norm": 9.35676122987493, "learning_rate": 2.8399968946250373e-06, "loss": 1.2692, "step": 6680 }, { "epoch": 0.9458483754512635, "grad_norm": 8.242019955346866, "learning_rate": 2.839429011634731e-06, "loss": 1.1728, "step": 6681 }, { "epoch": 0.9459899483259008, "grad_norm": 8.42091876601179, "learning_rate": 2.8388611108008957e-06, "loss": 1.1809, "step": 6682 }, { "epoch": 0.946131521200538, "grad_norm": 9.156832835945472, "learning_rate": 2.838293192153384e-06, "loss": 1.0894, "step": 6683 }, { "epoch": 0.9462730940751752, "grad_norm": 9.752816600378836, "learning_rate": 2.8377252557220513e-06, "loss": 1.1777, "step": 6684 }, { "epoch": 0.9464146669498125, "grad_norm": 7.14149145967106, "learning_rate": 2.8371573015367543e-06, "loss": 1.0272, "step": 6685 }, { "epoch": 0.9465562398244496, "grad_norm": 7.558869554480379, "learning_rate": 2.836589329627349e-06, "loss": 1.0719, "step": 6686 }, { "epoch": 0.9466978126990868, "grad_norm": 9.61855110027257, "learning_rate": 2.8360213400236936e-06, "loss": 1.3298, "step": 6687 }, { "epoch": 0.946839385573724, "grad_norm": 9.068647732936899, "learning_rate": 2.8354533327556465e-06, "loss": 1.3652, "step": 6688 }, { "epoch": 0.9469809584483613, "grad_norm": 9.62198740393909, "learning_rate": 2.834885307853068e-06, "loss": 1.0451, "step": 6689 }, { "epoch": 0.9471225313229985, "grad_norm": 8.424316052484524, "learning_rate": 2.8343172653458194e-06, "loss": 1.3598, "step": 6690 }, { "epoch": 0.9472641041976357, "grad_norm": 9.631444185689432, "learning_rate": 2.833749205263761e-06, "loss": 1.2112, "step": 6691 }, { "epoch": 0.947405677072273, "grad_norm": 8.663914561406047, "learning_rate": 2.8331811276367554e-06, "loss": 1.3214, "step": 6692 }, { "epoch": 0.9475472499469102, "grad_norm": 9.474846718386218, "learning_rate": 2.832613032494666e-06, "loss": 1.2373, "step": 6693 }, { "epoch": 0.9476888228215474, "grad_norm": 8.070017540592383, "learning_rate": 2.8320449198673585e-06, "loss": 1.2941, "step": 6694 }, { "epoch": 0.9478303956961847, "grad_norm": 11.21716195345503, "learning_rate": 2.8314767897846963e-06, "loss": 1.3578, "step": 6695 }, { "epoch": 0.9479719685708218, "grad_norm": 10.92286339300041, "learning_rate": 2.830908642276547e-06, "loss": 1.2414, "step": 6696 }, { "epoch": 0.948113541445459, "grad_norm": 9.3684771415976, "learning_rate": 2.830340477372777e-06, "loss": 1.194, "step": 6697 }, { "epoch": 0.9482551143200962, "grad_norm": 9.048311900138472, "learning_rate": 2.829772295103254e-06, "loss": 1.293, "step": 6698 }, { "epoch": 0.9483966871947335, "grad_norm": 8.21412395057383, "learning_rate": 2.829204095497848e-06, "loss": 1.2903, "step": 6699 }, { "epoch": 0.9485382600693707, "grad_norm": 10.927307262374736, "learning_rate": 2.828635878586428e-06, "loss": 1.3195, "step": 6700 }, { "epoch": 0.9486798329440079, "grad_norm": 11.07254796148452, "learning_rate": 2.828067644398864e-06, "loss": 1.3441, "step": 6701 }, { "epoch": 0.9488214058186452, "grad_norm": 10.045070551410403, "learning_rate": 2.8274993929650297e-06, "loss": 1.2464, "step": 6702 }, { "epoch": 0.9489629786932824, "grad_norm": 10.989669457168947, "learning_rate": 2.826931124314796e-06, "loss": 1.2063, "step": 6703 }, { "epoch": 0.9491045515679196, "grad_norm": 7.9623782545331085, "learning_rate": 2.826362838478037e-06, "loss": 1.1038, "step": 6704 }, { "epoch": 0.9492461244425568, "grad_norm": 9.658052442896864, "learning_rate": 2.825794535484627e-06, "loss": 1.2891, "step": 6705 }, { "epoch": 0.9493876973171941, "grad_norm": 8.674017014451389, "learning_rate": 2.825226215364441e-06, "loss": 1.2031, "step": 6706 }, { "epoch": 0.9495292701918312, "grad_norm": 8.15375582973021, "learning_rate": 2.824657878147355e-06, "loss": 1.1695, "step": 6707 }, { "epoch": 0.9496708430664684, "grad_norm": 9.403665485359785, "learning_rate": 2.8240895238632473e-06, "loss": 1.1995, "step": 6708 }, { "epoch": 0.9498124159411057, "grad_norm": 9.62037329506635, "learning_rate": 2.8235211525419937e-06, "loss": 1.159, "step": 6709 }, { "epoch": 0.9499539888157429, "grad_norm": 11.371698256647262, "learning_rate": 2.8229527642134753e-06, "loss": 1.3686, "step": 6710 }, { "epoch": 0.9500955616903801, "grad_norm": 9.444839957937598, "learning_rate": 2.8223843589075705e-06, "loss": 1.3162, "step": 6711 }, { "epoch": 0.9502371345650173, "grad_norm": 7.781367167008184, "learning_rate": 2.8218159366541615e-06, "loss": 1.0773, "step": 6712 }, { "epoch": 0.9503787074396546, "grad_norm": 10.177178706825584, "learning_rate": 2.8212474974831277e-06, "loss": 1.1938, "step": 6713 }, { "epoch": 0.9505202803142918, "grad_norm": 9.689766698652575, "learning_rate": 2.8206790414243525e-06, "loss": 1.233, "step": 6714 }, { "epoch": 0.950661853188929, "grad_norm": 11.059873797229539, "learning_rate": 2.8201105685077184e-06, "loss": 1.3191, "step": 6715 }, { "epoch": 0.9508034260635663, "grad_norm": 7.8112943405624105, "learning_rate": 2.8195420787631113e-06, "loss": 1.1986, "step": 6716 }, { "epoch": 0.9509449989382034, "grad_norm": 9.008086915811397, "learning_rate": 2.818973572220416e-06, "loss": 1.1906, "step": 6717 }, { "epoch": 0.9510865718128406, "grad_norm": 9.613768526862186, "learning_rate": 2.818405048909517e-06, "loss": 1.2139, "step": 6718 }, { "epoch": 0.9512281446874779, "grad_norm": 9.910347942322238, "learning_rate": 2.817836508860302e-06, "loss": 1.1895, "step": 6719 }, { "epoch": 0.9513697175621151, "grad_norm": 8.13985364777688, "learning_rate": 2.817267952102659e-06, "loss": 1.3553, "step": 6720 }, { "epoch": 0.9515112904367523, "grad_norm": 8.009168616116577, "learning_rate": 2.8166993786664757e-06, "loss": 1.2722, "step": 6721 }, { "epoch": 0.9516528633113895, "grad_norm": 10.192194880731844, "learning_rate": 2.816130788581643e-06, "loss": 1.3005, "step": 6722 }, { "epoch": 0.9517944361860268, "grad_norm": 11.828695830223436, "learning_rate": 2.8155621818780497e-06, "loss": 1.3706, "step": 6723 }, { "epoch": 0.951936009060664, "grad_norm": 9.347325282090017, "learning_rate": 2.8149935585855885e-06, "loss": 1.2674, "step": 6724 }, { "epoch": 0.9520775819353012, "grad_norm": 9.533119293513296, "learning_rate": 2.8144249187341506e-06, "loss": 1.0612, "step": 6725 }, { "epoch": 0.9522191548099385, "grad_norm": 8.130122052129325, "learning_rate": 2.8138562623536293e-06, "loss": 1.2535, "step": 6726 }, { "epoch": 0.9523607276845756, "grad_norm": 9.106597738754484, "learning_rate": 2.8132875894739175e-06, "loss": 1.2063, "step": 6727 }, { "epoch": 0.9525023005592128, "grad_norm": 8.951163564732669, "learning_rate": 2.812718900124912e-06, "loss": 1.1636, "step": 6728 }, { "epoch": 0.95264387343385, "grad_norm": 9.735434975019453, "learning_rate": 2.8121501943365066e-06, "loss": 1.3208, "step": 6729 }, { "epoch": 0.9527854463084873, "grad_norm": 10.947793356760267, "learning_rate": 2.8115814721385975e-06, "loss": 1.3122, "step": 6730 }, { "epoch": 0.9529270191831245, "grad_norm": 10.693640300248262, "learning_rate": 2.8110127335610833e-06, "loss": 1.1756, "step": 6731 }, { "epoch": 0.9530685920577617, "grad_norm": 8.222967073994642, "learning_rate": 2.8104439786338617e-06, "loss": 1.1511, "step": 6732 }, { "epoch": 0.953210164932399, "grad_norm": 12.19957021988089, "learning_rate": 2.809875207386832e-06, "loss": 1.2904, "step": 6733 }, { "epoch": 0.9533517378070362, "grad_norm": 9.71107773744778, "learning_rate": 2.809306419849893e-06, "loss": 1.4051, "step": 6734 }, { "epoch": 0.9534933106816734, "grad_norm": 9.331471620803617, "learning_rate": 2.8087376160529463e-06, "loss": 1.284, "step": 6735 }, { "epoch": 0.9536348835563107, "grad_norm": 11.550949678557279, "learning_rate": 2.808168796025893e-06, "loss": 1.1989, "step": 6736 }, { "epoch": 0.9537764564309479, "grad_norm": 12.279651489282674, "learning_rate": 2.8075999597986364e-06, "loss": 1.2099, "step": 6737 }, { "epoch": 0.953918029305585, "grad_norm": 11.756331665469277, "learning_rate": 2.8070311074010793e-06, "loss": 1.3089, "step": 6738 }, { "epoch": 0.9540596021802222, "grad_norm": 10.087303914146121, "learning_rate": 2.806462238863125e-06, "loss": 1.2616, "step": 6739 }, { "epoch": 0.9542011750548595, "grad_norm": 10.370676633561121, "learning_rate": 2.8058933542146804e-06, "loss": 1.0093, "step": 6740 }, { "epoch": 0.9543427479294967, "grad_norm": 15.277681335761336, "learning_rate": 2.80532445348565e-06, "loss": 1.3581, "step": 6741 }, { "epoch": 0.9544843208041339, "grad_norm": 12.232899466017987, "learning_rate": 2.8047555367059404e-06, "loss": 1.4334, "step": 6742 }, { "epoch": 0.9546258936787712, "grad_norm": 9.48459942974786, "learning_rate": 2.80418660390546e-06, "loss": 1.0948, "step": 6743 }, { "epoch": 0.9547674665534084, "grad_norm": 10.2445236090826, "learning_rate": 2.803617655114116e-06, "loss": 1.248, "step": 6744 }, { "epoch": 0.9549090394280456, "grad_norm": 8.632542946866362, "learning_rate": 2.803048690361818e-06, "loss": 1.1209, "step": 6745 }, { "epoch": 0.9550506123026828, "grad_norm": 8.678577698935745, "learning_rate": 2.8024797096784766e-06, "loss": 1.1799, "step": 6746 }, { "epoch": 0.9551921851773201, "grad_norm": 11.284371322001617, "learning_rate": 2.8019107130940025e-06, "loss": 1.1782, "step": 6747 }, { "epoch": 0.9553337580519572, "grad_norm": 8.964302901032788, "learning_rate": 2.8013417006383078e-06, "loss": 1.2962, "step": 6748 }, { "epoch": 0.9554753309265944, "grad_norm": 8.894267055255048, "learning_rate": 2.8007726723413046e-06, "loss": 1.1119, "step": 6749 }, { "epoch": 0.9556169038012317, "grad_norm": 12.9287114907305, "learning_rate": 2.800203628232906e-06, "loss": 1.3522, "step": 6750 }, { "epoch": 0.9557584766758689, "grad_norm": 7.423010809472061, "learning_rate": 2.7996345683430266e-06, "loss": 1.1029, "step": 6751 }, { "epoch": 0.9559000495505061, "grad_norm": 7.135210718306006, "learning_rate": 2.799065492701581e-06, "loss": 1.1291, "step": 6752 }, { "epoch": 0.9560416224251433, "grad_norm": 11.898341318143755, "learning_rate": 2.7984964013384853e-06, "loss": 1.3048, "step": 6753 }, { "epoch": 0.9561831952997806, "grad_norm": 11.185522149712938, "learning_rate": 2.7979272942836566e-06, "loss": 1.3042, "step": 6754 }, { "epoch": 0.9563247681744178, "grad_norm": 9.982277997703605, "learning_rate": 2.7973581715670124e-06, "loss": 1.272, "step": 6755 }, { "epoch": 0.956466341049055, "grad_norm": 8.709874164359917, "learning_rate": 2.7967890332184705e-06, "loss": 1.2257, "step": 6756 }, { "epoch": 0.9566079139236923, "grad_norm": 7.691879885588205, "learning_rate": 2.7962198792679506e-06, "loss": 1.1192, "step": 6757 }, { "epoch": 0.9567494867983294, "grad_norm": 8.540626822344755, "learning_rate": 2.795650709745373e-06, "loss": 1.1699, "step": 6758 }, { "epoch": 0.9568910596729666, "grad_norm": 10.538374757489738, "learning_rate": 2.7950815246806575e-06, "loss": 1.2333, "step": 6759 }, { "epoch": 0.9570326325476038, "grad_norm": 11.345002244501073, "learning_rate": 2.794512324103726e-06, "loss": 1.286, "step": 6760 }, { "epoch": 0.9571742054222411, "grad_norm": 10.294283967798245, "learning_rate": 2.7939431080445016e-06, "loss": 1.2395, "step": 6761 }, { "epoch": 0.9573157782968783, "grad_norm": 7.0718465078793855, "learning_rate": 2.7933738765329073e-06, "loss": 1.1913, "step": 6762 }, { "epoch": 0.9574573511715155, "grad_norm": 7.779537395462685, "learning_rate": 2.7928046295988666e-06, "loss": 1.1875, "step": 6763 }, { "epoch": 0.9575989240461528, "grad_norm": 8.56115533893088, "learning_rate": 2.792235367272305e-06, "loss": 1.0891, "step": 6764 }, { "epoch": 0.95774049692079, "grad_norm": 8.55687177807783, "learning_rate": 2.7916660895831487e-06, "loss": 1.1343, "step": 6765 }, { "epoch": 0.9578820697954272, "grad_norm": 9.368613140183031, "learning_rate": 2.791096796561323e-06, "loss": 1.3072, "step": 6766 }, { "epoch": 0.9580236426700645, "grad_norm": 9.204747625286357, "learning_rate": 2.790527488236755e-06, "loss": 1.3667, "step": 6767 }, { "epoch": 0.9581652155447017, "grad_norm": 7.11704278290027, "learning_rate": 2.7899581646393746e-06, "loss": 1.1033, "step": 6768 }, { "epoch": 0.9583067884193388, "grad_norm": 9.534550129880236, "learning_rate": 2.789388825799109e-06, "loss": 1.2974, "step": 6769 }, { "epoch": 0.958448361293976, "grad_norm": 8.966253803868739, "learning_rate": 2.78881947174589e-06, "loss": 1.161, "step": 6770 }, { "epoch": 0.9585899341686133, "grad_norm": 8.989070613969483, "learning_rate": 2.788250102509646e-06, "loss": 1.2873, "step": 6771 }, { "epoch": 0.9587315070432505, "grad_norm": 7.3011488232609185, "learning_rate": 2.7876807181203085e-06, "loss": 1.1408, "step": 6772 }, { "epoch": 0.9588730799178877, "grad_norm": 8.961812276803613, "learning_rate": 2.7871113186078102e-06, "loss": 1.339, "step": 6773 }, { "epoch": 0.959014652792525, "grad_norm": 9.435664320514672, "learning_rate": 2.786541904002085e-06, "loss": 1.1699, "step": 6774 }, { "epoch": 0.9591562256671622, "grad_norm": 8.149750557107033, "learning_rate": 2.785972474333064e-06, "loss": 1.2308, "step": 6775 }, { "epoch": 0.9592977985417994, "grad_norm": 9.812349160365313, "learning_rate": 2.7854030296306846e-06, "loss": 1.1855, "step": 6776 }, { "epoch": 0.9594393714164366, "grad_norm": 7.781170090108004, "learning_rate": 2.7848335699248796e-06, "loss": 1.1634, "step": 6777 }, { "epoch": 0.9595809442910739, "grad_norm": 7.651292589477605, "learning_rate": 2.7842640952455867e-06, "loss": 1.2265, "step": 6778 }, { "epoch": 0.959722517165711, "grad_norm": 7.977948314056554, "learning_rate": 2.783694605622743e-06, "loss": 1.2134, "step": 6779 }, { "epoch": 0.9598640900403482, "grad_norm": 8.116024752801456, "learning_rate": 2.7831251010862847e-06, "loss": 1.0956, "step": 6780 }, { "epoch": 0.9600056629149855, "grad_norm": 7.218599127972222, "learning_rate": 2.7825555816661503e-06, "loss": 1.1939, "step": 6781 }, { "epoch": 0.9601472357896227, "grad_norm": 8.665751286606842, "learning_rate": 2.7819860473922805e-06, "loss": 1.2139, "step": 6782 }, { "epoch": 0.9602888086642599, "grad_norm": 9.267588209623069, "learning_rate": 2.781416498294614e-06, "loss": 1.1568, "step": 6783 }, { "epoch": 0.9604303815388971, "grad_norm": 8.906223872631633, "learning_rate": 2.7808469344030923e-06, "loss": 1.2797, "step": 6784 }, { "epoch": 0.9605719544135344, "grad_norm": 7.787460606262596, "learning_rate": 2.780277355747657e-06, "loss": 1.1727, "step": 6785 }, { "epoch": 0.9607135272881716, "grad_norm": 8.694141072541774, "learning_rate": 2.7797077623582503e-06, "loss": 1.2807, "step": 6786 }, { "epoch": 0.9608551001628088, "grad_norm": 13.779870896309054, "learning_rate": 2.779138154264814e-06, "loss": 1.1821, "step": 6787 }, { "epoch": 0.9609966730374461, "grad_norm": 12.026936498464018, "learning_rate": 2.778568531497294e-06, "loss": 1.2051, "step": 6788 }, { "epoch": 0.9611382459120833, "grad_norm": 7.0609272243680765, "learning_rate": 2.777998894085634e-06, "loss": 1.1013, "step": 6789 }, { "epoch": 0.9612798187867204, "grad_norm": 11.759092019249934, "learning_rate": 2.7774292420597784e-06, "loss": 1.1277, "step": 6790 }, { "epoch": 0.9614213916613576, "grad_norm": 12.403394394474555, "learning_rate": 2.776859575449675e-06, "loss": 1.2321, "step": 6791 }, { "epoch": 0.9615629645359949, "grad_norm": 9.635096815144466, "learning_rate": 2.7762898942852705e-06, "loss": 1.1893, "step": 6792 }, { "epoch": 0.9617045374106321, "grad_norm": 9.582157847971276, "learning_rate": 2.775720198596512e-06, "loss": 1.2294, "step": 6793 }, { "epoch": 0.9618461102852693, "grad_norm": 10.949881036538958, "learning_rate": 2.7751504884133484e-06, "loss": 1.2419, "step": 6794 }, { "epoch": 0.9619876831599066, "grad_norm": 9.256407709214153, "learning_rate": 2.7745807637657287e-06, "loss": 1.2259, "step": 6795 }, { "epoch": 0.9621292560345438, "grad_norm": 8.986366966676, "learning_rate": 2.774011024683603e-06, "loss": 1.1775, "step": 6796 }, { "epoch": 0.962270828909181, "grad_norm": 9.635300710169384, "learning_rate": 2.7734412711969215e-06, "loss": 1.1805, "step": 6797 }, { "epoch": 0.9624124017838183, "grad_norm": 12.047289176975536, "learning_rate": 2.7728715033356366e-06, "loss": 1.1853, "step": 6798 }, { "epoch": 0.9625539746584555, "grad_norm": 10.516026210074628, "learning_rate": 2.7723017211297006e-06, "loss": 1.2077, "step": 6799 }, { "epoch": 0.9626955475330926, "grad_norm": 9.285853416322787, "learning_rate": 2.7717319246090657e-06, "loss": 1.3549, "step": 6800 }, { "epoch": 0.9628371204077298, "grad_norm": 9.115124518903578, "learning_rate": 2.7711621138036864e-06, "loss": 1.1669, "step": 6801 }, { "epoch": 0.9629786932823671, "grad_norm": 11.605474009170804, "learning_rate": 2.7705922887435172e-06, "loss": 1.3779, "step": 6802 }, { "epoch": 0.9631202661570043, "grad_norm": 9.729126717092655, "learning_rate": 2.770022449458513e-06, "loss": 1.1608, "step": 6803 }, { "epoch": 0.9632618390316415, "grad_norm": 9.6604034478582, "learning_rate": 2.7694525959786297e-06, "loss": 1.2459, "step": 6804 }, { "epoch": 0.9634034119062788, "grad_norm": 10.115209296225428, "learning_rate": 2.7688827283338236e-06, "loss": 1.1445, "step": 6805 }, { "epoch": 0.963544984780916, "grad_norm": 10.226904552267582, "learning_rate": 2.7683128465540545e-06, "loss": 1.1872, "step": 6806 }, { "epoch": 0.9636865576555532, "grad_norm": 8.215287672266014, "learning_rate": 2.7677429506692788e-06, "loss": 1.2181, "step": 6807 }, { "epoch": 0.9638281305301905, "grad_norm": 7.688158456779034, "learning_rate": 2.7671730407094553e-06, "loss": 1.287, "step": 6808 }, { "epoch": 0.9639697034048277, "grad_norm": 10.801384476825687, "learning_rate": 2.7666031167045444e-06, "loss": 1.3857, "step": 6809 }, { "epoch": 0.9641112762794648, "grad_norm": 8.96447311665763, "learning_rate": 2.766033178684506e-06, "loss": 1.2443, "step": 6810 }, { "epoch": 0.964252849154102, "grad_norm": 8.28223801962788, "learning_rate": 2.7654632266793025e-06, "loss": 1.309, "step": 6811 }, { "epoch": 0.9643944220287393, "grad_norm": 8.100642388384529, "learning_rate": 2.764893260718895e-06, "loss": 1.1993, "step": 6812 }, { "epoch": 0.9645359949033765, "grad_norm": 9.109710844205708, "learning_rate": 2.764323280833246e-06, "loss": 1.1567, "step": 6813 }, { "epoch": 0.9646775677780137, "grad_norm": 8.685445199469529, "learning_rate": 2.7637532870523193e-06, "loss": 1.187, "step": 6814 }, { "epoch": 0.964819140652651, "grad_norm": 10.407845956118349, "learning_rate": 2.7631832794060787e-06, "loss": 1.2914, "step": 6815 }, { "epoch": 0.9649607135272882, "grad_norm": 8.8739644910589, "learning_rate": 2.7626132579244896e-06, "loss": 1.2506, "step": 6816 }, { "epoch": 0.9651022864019254, "grad_norm": 7.525050227102152, "learning_rate": 2.7620432226375175e-06, "loss": 1.1452, "step": 6817 }, { "epoch": 0.9652438592765626, "grad_norm": 10.033387333755243, "learning_rate": 2.761473173575129e-06, "loss": 1.3081, "step": 6818 }, { "epoch": 0.9653854321511999, "grad_norm": 9.549092785949249, "learning_rate": 2.7609031107672896e-06, "loss": 1.2597, "step": 6819 }, { "epoch": 0.9655270050258371, "grad_norm": 7.9333684786918335, "learning_rate": 2.7603330342439686e-06, "loss": 1.2474, "step": 6820 }, { "epoch": 0.9656685779004742, "grad_norm": 8.514025616719064, "learning_rate": 2.7597629440351346e-06, "loss": 1.2384, "step": 6821 }, { "epoch": 0.9658101507751115, "grad_norm": 8.963746698943055, "learning_rate": 2.7591928401707555e-06, "loss": 1.2677, "step": 6822 }, { "epoch": 0.9659517236497487, "grad_norm": 8.317880366359118, "learning_rate": 2.7586227226808038e-06, "loss": 1.0506, "step": 6823 }, { "epoch": 0.9660932965243859, "grad_norm": 7.989184937041572, "learning_rate": 2.7580525915952465e-06, "loss": 1.2297, "step": 6824 }, { "epoch": 0.9662348693990231, "grad_norm": 7.4073534437262545, "learning_rate": 2.757482446944058e-06, "loss": 1.1061, "step": 6825 }, { "epoch": 0.9663764422736604, "grad_norm": 8.961118421914868, "learning_rate": 2.756912288757209e-06, "loss": 1.1123, "step": 6826 }, { "epoch": 0.9665180151482976, "grad_norm": 8.917801068835317, "learning_rate": 2.7563421170646714e-06, "loss": 1.2417, "step": 6827 }, { "epoch": 0.9666595880229348, "grad_norm": 12.467908022950361, "learning_rate": 2.7557719318964216e-06, "loss": 1.2221, "step": 6828 }, { "epoch": 0.9668011608975721, "grad_norm": 9.585531983196285, "learning_rate": 2.755201733282431e-06, "loss": 1.2904, "step": 6829 }, { "epoch": 0.9669427337722093, "grad_norm": 9.878735185969493, "learning_rate": 2.754631521252676e-06, "loss": 1.3014, "step": 6830 }, { "epoch": 0.9670843066468464, "grad_norm": 9.015026687523328, "learning_rate": 2.7540612958371315e-06, "loss": 1.2131, "step": 6831 }, { "epoch": 0.9672258795214836, "grad_norm": 8.835075734390841, "learning_rate": 2.7534910570657743e-06, "loss": 1.1847, "step": 6832 }, { "epoch": 0.9673674523961209, "grad_norm": 9.451198312178875, "learning_rate": 2.752920804968581e-06, "loss": 1.244, "step": 6833 }, { "epoch": 0.9675090252707581, "grad_norm": 8.339795722197774, "learning_rate": 2.7523505395755296e-06, "loss": 1.1699, "step": 6834 }, { "epoch": 0.9676505981453953, "grad_norm": 8.201437698493212, "learning_rate": 2.7517802609165985e-06, "loss": 1.1847, "step": 6835 }, { "epoch": 0.9677921710200326, "grad_norm": 9.093995815245096, "learning_rate": 2.751209969021767e-06, "loss": 1.2135, "step": 6836 }, { "epoch": 0.9679337438946698, "grad_norm": 9.679525920513795, "learning_rate": 2.750639663921014e-06, "loss": 1.2543, "step": 6837 }, { "epoch": 0.968075316769307, "grad_norm": 8.99707704339065, "learning_rate": 2.7500693456443217e-06, "loss": 1.2358, "step": 6838 }, { "epoch": 0.9682168896439443, "grad_norm": 7.232786593305299, "learning_rate": 2.749499014221671e-06, "loss": 1.2684, "step": 6839 }, { "epoch": 0.9683584625185815, "grad_norm": 7.636355589990381, "learning_rate": 2.748928669683042e-06, "loss": 1.0955, "step": 6840 }, { "epoch": 0.9685000353932186, "grad_norm": 7.772285659794125, "learning_rate": 2.748358312058418e-06, "loss": 1.2902, "step": 6841 }, { "epoch": 0.9686416082678558, "grad_norm": 9.126189689943768, "learning_rate": 2.7477879413777834e-06, "loss": 1.133, "step": 6842 }, { "epoch": 0.9687831811424931, "grad_norm": 10.261113910253565, "learning_rate": 2.7472175576711213e-06, "loss": 1.2474, "step": 6843 }, { "epoch": 0.9689247540171303, "grad_norm": 11.603930997709758, "learning_rate": 2.7466471609684175e-06, "loss": 1.3023, "step": 6844 }, { "epoch": 0.9690663268917675, "grad_norm": 9.440700367308189, "learning_rate": 2.7460767512996556e-06, "loss": 1.1517, "step": 6845 }, { "epoch": 0.9692078997664048, "grad_norm": 9.684792023229157, "learning_rate": 2.745506328694822e-06, "loss": 1.0535, "step": 6846 }, { "epoch": 0.969349472641042, "grad_norm": 9.124844902497188, "learning_rate": 2.7449358931839042e-06, "loss": 1.1206, "step": 6847 }, { "epoch": 0.9694910455156792, "grad_norm": 8.102630338859298, "learning_rate": 2.7443654447968894e-06, "loss": 1.2422, "step": 6848 }, { "epoch": 0.9696326183903164, "grad_norm": 10.34293641178191, "learning_rate": 2.7437949835637644e-06, "loss": 1.3034, "step": 6849 }, { "epoch": 0.9697741912649537, "grad_norm": 10.262781501282506, "learning_rate": 2.7432245095145193e-06, "loss": 1.3562, "step": 6850 }, { "epoch": 0.9699157641395909, "grad_norm": 7.535063351957804, "learning_rate": 2.7426540226791437e-06, "loss": 1.1684, "step": 6851 }, { "epoch": 0.970057337014228, "grad_norm": 9.36825155875238, "learning_rate": 2.7420835230876264e-06, "loss": 1.3498, "step": 6852 }, { "epoch": 0.9701989098888653, "grad_norm": 8.334614871348942, "learning_rate": 2.7415130107699588e-06, "loss": 1.373, "step": 6853 }, { "epoch": 0.9703404827635025, "grad_norm": 9.61680908450269, "learning_rate": 2.740942485756133e-06, "loss": 1.2375, "step": 6854 }, { "epoch": 0.9704820556381397, "grad_norm": 7.804914774750157, "learning_rate": 2.7403719480761406e-06, "loss": 1.1462, "step": 6855 }, { "epoch": 0.970623628512777, "grad_norm": 9.38624163243318, "learning_rate": 2.7398013977599722e-06, "loss": 1.2008, "step": 6856 }, { "epoch": 0.9707652013874142, "grad_norm": 9.411750883204464, "learning_rate": 2.7392308348376243e-06, "loss": 1.3367, "step": 6857 }, { "epoch": 0.9709067742620514, "grad_norm": 9.621263051063197, "learning_rate": 2.73866025933909e-06, "loss": 1.2406, "step": 6858 }, { "epoch": 0.9710483471366886, "grad_norm": 8.550025484816734, "learning_rate": 2.738089671294364e-06, "loss": 1.1948, "step": 6859 }, { "epoch": 0.9711899200113259, "grad_norm": 8.318497638041473, "learning_rate": 2.7375190707334416e-06, "loss": 1.3192, "step": 6860 }, { "epoch": 0.9713314928859631, "grad_norm": 9.371181269782015, "learning_rate": 2.736948457686318e-06, "loss": 1.3007, "step": 6861 }, { "epoch": 0.9714730657606002, "grad_norm": 17.021315730841266, "learning_rate": 2.736377832182991e-06, "loss": 1.1558, "step": 6862 }, { "epoch": 0.9716146386352374, "grad_norm": 11.397491687755728, "learning_rate": 2.7358071942534574e-06, "loss": 1.3553, "step": 6863 }, { "epoch": 0.9717562115098747, "grad_norm": 9.128772530578198, "learning_rate": 2.735236543927715e-06, "loss": 1.1386, "step": 6864 }, { "epoch": 0.9718977843845119, "grad_norm": 8.414082448328257, "learning_rate": 2.734665881235764e-06, "loss": 1.1614, "step": 6865 }, { "epoch": 0.9720393572591491, "grad_norm": 8.681180782605177, "learning_rate": 2.7340952062076022e-06, "loss": 1.2788, "step": 6866 }, { "epoch": 0.9721809301337864, "grad_norm": 7.974424727687184, "learning_rate": 2.73352451887323e-06, "loss": 1.1504, "step": 6867 }, { "epoch": 0.9723225030084236, "grad_norm": 8.883126849648896, "learning_rate": 2.7329538192626478e-06, "loss": 1.2526, "step": 6868 }, { "epoch": 0.9724640758830608, "grad_norm": 7.582118289846873, "learning_rate": 2.7323831074058572e-06, "loss": 1.2576, "step": 6869 }, { "epoch": 0.9726056487576981, "grad_norm": 9.532737940978299, "learning_rate": 2.7318123833328598e-06, "loss": 1.197, "step": 6870 }, { "epoch": 0.9727472216323353, "grad_norm": 7.593381268375378, "learning_rate": 2.731241647073658e-06, "loss": 1.183, "step": 6871 }, { "epoch": 0.9728887945069724, "grad_norm": 8.961202708994994, "learning_rate": 2.730670898658255e-06, "loss": 1.2875, "step": 6872 }, { "epoch": 0.9730303673816096, "grad_norm": 9.389703232500674, "learning_rate": 2.7301001381166553e-06, "loss": 1.1677, "step": 6873 }, { "epoch": 0.9731719402562469, "grad_norm": 9.132471657454992, "learning_rate": 2.729529365478863e-06, "loss": 1.2873, "step": 6874 }, { "epoch": 0.9733135131308841, "grad_norm": 8.328304553646033, "learning_rate": 2.7289585807748832e-06, "loss": 1.2309, "step": 6875 }, { "epoch": 0.9734550860055213, "grad_norm": 7.786822303932303, "learning_rate": 2.7283877840347217e-06, "loss": 1.0204, "step": 6876 }, { "epoch": 0.9735966588801586, "grad_norm": 7.540143219171967, "learning_rate": 2.7278169752883845e-06, "loss": 1.2125, "step": 6877 }, { "epoch": 0.9737382317547958, "grad_norm": 8.005334029558336, "learning_rate": 2.727246154565878e-06, "loss": 1.1226, "step": 6878 }, { "epoch": 0.973879804629433, "grad_norm": 7.951958171634691, "learning_rate": 2.726675321897211e-06, "loss": 1.2192, "step": 6879 }, { "epoch": 0.9740213775040703, "grad_norm": 8.015506022204224, "learning_rate": 2.7261044773123913e-06, "loss": 1.3853, "step": 6880 }, { "epoch": 0.9741629503787075, "grad_norm": 8.678138134527106, "learning_rate": 2.725533620841429e-06, "loss": 1.2095, "step": 6881 }, { "epoch": 0.9743045232533447, "grad_norm": 8.403758307241079, "learning_rate": 2.7249627525143313e-06, "loss": 1.2609, "step": 6882 }, { "epoch": 0.9744460961279818, "grad_norm": 9.143916341190327, "learning_rate": 2.7243918723611095e-06, "loss": 1.1683, "step": 6883 }, { "epoch": 0.9745876690026191, "grad_norm": 9.408454712680845, "learning_rate": 2.7238209804117744e-06, "loss": 1.2685, "step": 6884 }, { "epoch": 0.9747292418772563, "grad_norm": 8.221527442560143, "learning_rate": 2.7232500766963373e-06, "loss": 1.182, "step": 6885 }, { "epoch": 0.9748708147518935, "grad_norm": 9.99897112321809, "learning_rate": 2.72267916124481e-06, "loss": 1.2708, "step": 6886 }, { "epoch": 0.9750123876265308, "grad_norm": 7.211240619475449, "learning_rate": 2.722108234087205e-06, "loss": 1.1409, "step": 6887 }, { "epoch": 0.975153960501168, "grad_norm": 11.228463815598424, "learning_rate": 2.7215372952535364e-06, "loss": 1.3685, "step": 6888 }, { "epoch": 0.9752955333758052, "grad_norm": 11.003711767783498, "learning_rate": 2.7209663447738164e-06, "loss": 1.3019, "step": 6889 }, { "epoch": 0.9754371062504424, "grad_norm": 9.040779550749697, "learning_rate": 2.7203953826780615e-06, "loss": 1.178, "step": 6890 }, { "epoch": 0.9755786791250797, "grad_norm": 8.382346958910182, "learning_rate": 2.719824408996285e-06, "loss": 1.2288, "step": 6891 }, { "epoch": 0.9757202519997169, "grad_norm": 7.6662723882159485, "learning_rate": 2.7192534237585037e-06, "loss": 1.1314, "step": 6892 }, { "epoch": 0.975861824874354, "grad_norm": 9.65478465001595, "learning_rate": 2.7186824269947334e-06, "loss": 1.2274, "step": 6893 }, { "epoch": 0.9760033977489913, "grad_norm": 10.431812312819034, "learning_rate": 2.71811141873499e-06, "loss": 1.2369, "step": 6894 }, { "epoch": 0.9761449706236285, "grad_norm": 10.860950031912637, "learning_rate": 2.717540399009293e-06, "loss": 1.3003, "step": 6895 }, { "epoch": 0.9762865434982657, "grad_norm": 10.73794447452984, "learning_rate": 2.716969367847659e-06, "loss": 1.2471, "step": 6896 }, { "epoch": 0.9764281163729029, "grad_norm": 9.05550014216261, "learning_rate": 2.7163983252801076e-06, "loss": 1.2145, "step": 6897 }, { "epoch": 0.9765696892475402, "grad_norm": 7.74147869603237, "learning_rate": 2.7158272713366573e-06, "loss": 1.0228, "step": 6898 }, { "epoch": 0.9767112621221774, "grad_norm": 9.065973287136158, "learning_rate": 2.715256206047328e-06, "loss": 1.2788, "step": 6899 }, { "epoch": 0.9768528349968146, "grad_norm": 9.08185693717128, "learning_rate": 2.7146851294421404e-06, "loss": 1.2308, "step": 6900 }, { "epoch": 0.9769944078714519, "grad_norm": 8.65525046406533, "learning_rate": 2.714114041551115e-06, "loss": 1.1772, "step": 6901 }, { "epoch": 0.9771359807460891, "grad_norm": 11.416864175317855, "learning_rate": 2.7135429424042758e-06, "loss": 1.1866, "step": 6902 }, { "epoch": 0.9772775536207262, "grad_norm": 8.508470241878594, "learning_rate": 2.712971832031642e-06, "loss": 1.1496, "step": 6903 }, { "epoch": 0.9774191264953634, "grad_norm": 11.846634244371769, "learning_rate": 2.712400710463239e-06, "loss": 1.2424, "step": 6904 }, { "epoch": 0.9775606993700007, "grad_norm": 10.40688415977804, "learning_rate": 2.7118295777290875e-06, "loss": 1.377, "step": 6905 }, { "epoch": 0.9777022722446379, "grad_norm": 10.443639354473545, "learning_rate": 2.711258433859214e-06, "loss": 1.2962, "step": 6906 }, { "epoch": 0.9778438451192751, "grad_norm": 9.988084178736353, "learning_rate": 2.710687278883642e-06, "loss": 1.2723, "step": 6907 }, { "epoch": 0.9779854179939124, "grad_norm": 11.151299754111836, "learning_rate": 2.7101161128323967e-06, "loss": 1.1949, "step": 6908 }, { "epoch": 0.9781269908685496, "grad_norm": 9.229667793486664, "learning_rate": 2.7095449357355042e-06, "loss": 1.0558, "step": 6909 }, { "epoch": 0.9782685637431868, "grad_norm": 9.601129433632648, "learning_rate": 2.7089737476229906e-06, "loss": 1.2438, "step": 6910 }, { "epoch": 0.9784101366178241, "grad_norm": 10.102100237040748, "learning_rate": 2.7084025485248827e-06, "loss": 1.1734, "step": 6911 }, { "epoch": 0.9785517094924613, "grad_norm": 9.178696755046097, "learning_rate": 2.7078313384712084e-06, "loss": 1.3564, "step": 6912 }, { "epoch": 0.9786932823670985, "grad_norm": 8.574309954668683, "learning_rate": 2.7072601174919965e-06, "loss": 1.1461, "step": 6913 }, { "epoch": 0.9788348552417356, "grad_norm": 10.274845807108896, "learning_rate": 2.7066888856172737e-06, "loss": 1.4199, "step": 6914 }, { "epoch": 0.9789764281163729, "grad_norm": 7.7895049770101075, "learning_rate": 2.70611764287707e-06, "loss": 1.0802, "step": 6915 }, { "epoch": 0.9791180009910101, "grad_norm": 7.749541730560034, "learning_rate": 2.7055463893014156e-06, "loss": 1.1431, "step": 6916 }, { "epoch": 0.9792595738656473, "grad_norm": 8.968540734165767, "learning_rate": 2.7049751249203414e-06, "loss": 1.0827, "step": 6917 }, { "epoch": 0.9794011467402846, "grad_norm": 8.332591621451444, "learning_rate": 2.7044038497638782e-06, "loss": 1.2306, "step": 6918 }, { "epoch": 0.9795427196149218, "grad_norm": 8.115950019212505, "learning_rate": 2.7038325638620563e-06, "loss": 1.1321, "step": 6919 }, { "epoch": 0.979684292489559, "grad_norm": 8.376326270918236, "learning_rate": 2.7032612672449084e-06, "loss": 1.2603, "step": 6920 }, { "epoch": 0.9798258653641962, "grad_norm": 7.132507384544104, "learning_rate": 2.7026899599424674e-06, "loss": 1.1337, "step": 6921 }, { "epoch": 0.9799674382388335, "grad_norm": 7.559724156428531, "learning_rate": 2.702118641984766e-06, "loss": 1.1001, "step": 6922 }, { "epoch": 0.9801090111134707, "grad_norm": 11.61432510339804, "learning_rate": 2.7015473134018382e-06, "loss": 1.2655, "step": 6923 }, { "epoch": 0.9802505839881078, "grad_norm": 7.88909063245442, "learning_rate": 2.700975974223719e-06, "loss": 1.2066, "step": 6924 }, { "epoch": 0.9803921568627451, "grad_norm": 8.175875499635918, "learning_rate": 2.700404624480443e-06, "loss": 1.1803, "step": 6925 }, { "epoch": 0.9805337297373823, "grad_norm": 8.71493085666351, "learning_rate": 2.699833264202044e-06, "loss": 1.1722, "step": 6926 }, { "epoch": 0.9806753026120195, "grad_norm": 8.9036077495042, "learning_rate": 2.6992618934185604e-06, "loss": 1.1914, "step": 6927 }, { "epoch": 0.9808168754866567, "grad_norm": 10.444006074223529, "learning_rate": 2.698690512160027e-06, "loss": 1.3527, "step": 6928 }, { "epoch": 0.980958448361294, "grad_norm": 7.672874867220718, "learning_rate": 2.6981191204564825e-06, "loss": 1.1391, "step": 6929 }, { "epoch": 0.9811000212359312, "grad_norm": 8.627884534492065, "learning_rate": 2.6975477183379624e-06, "loss": 1.0999, "step": 6930 }, { "epoch": 0.9812415941105684, "grad_norm": 8.552696240138207, "learning_rate": 2.6969763058345067e-06, "loss": 1.1112, "step": 6931 }, { "epoch": 0.9813831669852057, "grad_norm": 10.068284167093017, "learning_rate": 2.696404882976153e-06, "loss": 1.2233, "step": 6932 }, { "epoch": 0.9815247398598429, "grad_norm": 9.21052872621892, "learning_rate": 2.6958334497929416e-06, "loss": 1.1868, "step": 6933 }, { "epoch": 0.9816663127344801, "grad_norm": 7.955335388594219, "learning_rate": 2.695262006314912e-06, "loss": 1.1145, "step": 6934 }, { "epoch": 0.9818078856091172, "grad_norm": 10.054057684722839, "learning_rate": 2.694690552572104e-06, "loss": 1.2774, "step": 6935 }, { "epoch": 0.9819494584837545, "grad_norm": 8.777287685485165, "learning_rate": 2.6941190885945582e-06, "loss": 1.2422, "step": 6936 }, { "epoch": 0.9820910313583917, "grad_norm": 8.478787600380333, "learning_rate": 2.6935476144123173e-06, "loss": 1.2292, "step": 6937 }, { "epoch": 0.9822326042330289, "grad_norm": 7.632055551562677, "learning_rate": 2.692976130055422e-06, "loss": 1.1047, "step": 6938 }, { "epoch": 0.9823741771076662, "grad_norm": 8.200942324945947, "learning_rate": 2.692404635553917e-06, "loss": 1.2703, "step": 6939 }, { "epoch": 0.9825157499823034, "grad_norm": 8.44131315745753, "learning_rate": 2.691833130937842e-06, "loss": 1.0987, "step": 6940 }, { "epoch": 0.9826573228569406, "grad_norm": 8.869232022693017, "learning_rate": 2.6912616162372434e-06, "loss": 1.21, "step": 6941 }, { "epoch": 0.9827988957315779, "grad_norm": 9.767317822028904, "learning_rate": 2.690690091482164e-06, "loss": 1.2053, "step": 6942 }, { "epoch": 0.9829404686062151, "grad_norm": 8.240471626043554, "learning_rate": 2.6901185567026484e-06, "loss": 1.0489, "step": 6943 }, { "epoch": 0.9830820414808523, "grad_norm": 9.865787893115943, "learning_rate": 2.689547011928742e-06, "loss": 1.1645, "step": 6944 }, { "epoch": 0.9832236143554894, "grad_norm": 8.824204484135215, "learning_rate": 2.6889754571904907e-06, "loss": 1.1923, "step": 6945 }, { "epoch": 0.9833651872301267, "grad_norm": 9.617908193574086, "learning_rate": 2.68840389251794e-06, "loss": 1.2233, "step": 6946 }, { "epoch": 0.9835067601047639, "grad_norm": 10.093474865638754, "learning_rate": 2.687832317941138e-06, "loss": 1.3215, "step": 6947 }, { "epoch": 0.9836483329794011, "grad_norm": 10.184490110268277, "learning_rate": 2.687260733490131e-06, "loss": 1.3175, "step": 6948 }, { "epoch": 0.9837899058540384, "grad_norm": 8.262630619742165, "learning_rate": 2.6866891391949664e-06, "loss": 1.275, "step": 6949 }, { "epoch": 0.9839314787286756, "grad_norm": 9.016236388633711, "learning_rate": 2.6861175350856937e-06, "loss": 1.103, "step": 6950 }, { "epoch": 0.9840730516033128, "grad_norm": 8.931087727871095, "learning_rate": 2.6855459211923603e-06, "loss": 1.2476, "step": 6951 }, { "epoch": 0.98421462447795, "grad_norm": 7.636386811440051, "learning_rate": 2.6849742975450165e-06, "loss": 1.1124, "step": 6952 }, { "epoch": 0.9843561973525873, "grad_norm": 8.58606367912288, "learning_rate": 2.684402664173711e-06, "loss": 1.2197, "step": 6953 }, { "epoch": 0.9844977702272245, "grad_norm": 8.185205502082896, "learning_rate": 2.6838310211084954e-06, "loss": 1.4081, "step": 6954 }, { "epoch": 0.9846393431018616, "grad_norm": 9.78376752246259, "learning_rate": 2.6832593683794206e-06, "loss": 1.3271, "step": 6955 }, { "epoch": 0.9847809159764989, "grad_norm": 8.74959192686829, "learning_rate": 2.6826877060165373e-06, "loss": 1.2404, "step": 6956 }, { "epoch": 0.9849224888511361, "grad_norm": 7.483184018872096, "learning_rate": 2.6821160340498975e-06, "loss": 1.1457, "step": 6957 }, { "epoch": 0.9850640617257733, "grad_norm": 9.448405249319649, "learning_rate": 2.681544352509553e-06, "loss": 1.3839, "step": 6958 }, { "epoch": 0.9852056346004106, "grad_norm": 7.673394885883759, "learning_rate": 2.6809726614255575e-06, "loss": 1.2553, "step": 6959 }, { "epoch": 0.9853472074750478, "grad_norm": 8.796769624285606, "learning_rate": 2.680400960827965e-06, "loss": 1.2977, "step": 6960 }, { "epoch": 0.985488780349685, "grad_norm": 7.690039982685573, "learning_rate": 2.679829250746827e-06, "loss": 1.2095, "step": 6961 }, { "epoch": 0.9856303532243222, "grad_norm": 9.624927074601981, "learning_rate": 2.6792575312122005e-06, "loss": 1.2079, "step": 6962 }, { "epoch": 0.9857719260989595, "grad_norm": 9.040372788161216, "learning_rate": 2.6786858022541385e-06, "loss": 1.2774, "step": 6963 }, { "epoch": 0.9859134989735967, "grad_norm": 8.219852272061324, "learning_rate": 2.6781140639026975e-06, "loss": 1.2119, "step": 6964 }, { "epoch": 0.9860550718482339, "grad_norm": 7.350012393863116, "learning_rate": 2.6775423161879333e-06, "loss": 1.1852, "step": 6965 }, { "epoch": 0.986196644722871, "grad_norm": 7.344168922974842, "learning_rate": 2.676970559139902e-06, "loss": 1.1703, "step": 6966 }, { "epoch": 0.9863382175975083, "grad_norm": 9.683837986028497, "learning_rate": 2.676398792788659e-06, "loss": 1.2587, "step": 6967 }, { "epoch": 0.9864797904721455, "grad_norm": 8.230452528015576, "learning_rate": 2.675827017164264e-06, "loss": 1.2781, "step": 6968 }, { "epoch": 0.9866213633467827, "grad_norm": 7.651247718136377, "learning_rate": 2.675255232296774e-06, "loss": 1.1719, "step": 6969 }, { "epoch": 0.98676293622142, "grad_norm": 8.000490650389677, "learning_rate": 2.674683438216247e-06, "loss": 1.1515, "step": 6970 }, { "epoch": 0.9869045090960572, "grad_norm": 7.655943074205081, "learning_rate": 2.674111634952742e-06, "loss": 1.2224, "step": 6971 }, { "epoch": 0.9870460819706944, "grad_norm": 8.606003781455467, "learning_rate": 2.673539822536318e-06, "loss": 1.1354, "step": 6972 }, { "epoch": 0.9871876548453317, "grad_norm": 9.031785572895629, "learning_rate": 2.672968000997035e-06, "loss": 1.1807, "step": 6973 }, { "epoch": 0.9873292277199689, "grad_norm": 6.967298963895185, "learning_rate": 2.6723961703649525e-06, "loss": 1.1244, "step": 6974 }, { "epoch": 0.9874708005946061, "grad_norm": 7.055124127805281, "learning_rate": 2.6718243306701317e-06, "loss": 1.2148, "step": 6975 }, { "epoch": 0.9876123734692432, "grad_norm": 6.892932047179288, "learning_rate": 2.6712524819426355e-06, "loss": 1.1844, "step": 6976 }, { "epoch": 0.9877539463438805, "grad_norm": 9.176941078518144, "learning_rate": 2.6706806242125232e-06, "loss": 1.3817, "step": 6977 }, { "epoch": 0.9878955192185177, "grad_norm": 8.900253258005675, "learning_rate": 2.670108757509858e-06, "loss": 1.262, "step": 6978 }, { "epoch": 0.9880370920931549, "grad_norm": 9.7833678661389, "learning_rate": 2.6695368818647015e-06, "loss": 1.1912, "step": 6979 }, { "epoch": 0.9881786649677922, "grad_norm": 10.023315333798578, "learning_rate": 2.668964997307118e-06, "loss": 1.2809, "step": 6980 }, { "epoch": 0.9883202378424294, "grad_norm": 7.764126116821529, "learning_rate": 2.6683931038671705e-06, "loss": 1.2382, "step": 6981 }, { "epoch": 0.9884618107170666, "grad_norm": 8.48031311032905, "learning_rate": 2.6678212015749234e-06, "loss": 1.2131, "step": 6982 }, { "epoch": 0.9886033835917039, "grad_norm": 8.213150960127766, "learning_rate": 2.6672492904604403e-06, "loss": 1.1728, "step": 6983 }, { "epoch": 0.9887449564663411, "grad_norm": 9.345924558178712, "learning_rate": 2.6666773705537873e-06, "loss": 1.1832, "step": 6984 }, { "epoch": 0.9888865293409783, "grad_norm": 9.08312199431309, "learning_rate": 2.6661054418850286e-06, "loss": 1.2423, "step": 6985 }, { "epoch": 0.9890281022156154, "grad_norm": 9.115927588743926, "learning_rate": 2.665533504484231e-06, "loss": 1.2104, "step": 6986 }, { "epoch": 0.9891696750902527, "grad_norm": 9.995620913128151, "learning_rate": 2.6649615583814613e-06, "loss": 1.2574, "step": 6987 }, { "epoch": 0.9893112479648899, "grad_norm": 9.211373174068735, "learning_rate": 2.6643896036067847e-06, "loss": 1.2619, "step": 6988 }, { "epoch": 0.9894528208395271, "grad_norm": 9.183614103683315, "learning_rate": 2.6638176401902693e-06, "loss": 1.24, "step": 6989 }, { "epoch": 0.9895943937141644, "grad_norm": 8.256173338378948, "learning_rate": 2.6632456681619817e-06, "loss": 1.1479, "step": 6990 }, { "epoch": 0.9897359665888016, "grad_norm": 9.787569484732, "learning_rate": 2.662673687551992e-06, "loss": 1.2323, "step": 6991 }, { "epoch": 0.9898775394634388, "grad_norm": 9.85565763542737, "learning_rate": 2.6621016983903686e-06, "loss": 1.0817, "step": 6992 }, { "epoch": 0.990019112338076, "grad_norm": 8.624915523391781, "learning_rate": 2.661529700707179e-06, "loss": 1.2362, "step": 6993 }, { "epoch": 0.9901606852127133, "grad_norm": 9.389969737823764, "learning_rate": 2.6609576945324933e-06, "loss": 1.1308, "step": 6994 }, { "epoch": 0.9903022580873505, "grad_norm": 9.168258343849736, "learning_rate": 2.6603856798963817e-06, "loss": 1.156, "step": 6995 }, { "epoch": 0.9904438309619877, "grad_norm": 10.76236705454238, "learning_rate": 2.6598136568289144e-06, "loss": 1.3487, "step": 6996 }, { "epoch": 0.9905854038366249, "grad_norm": 8.462586133960885, "learning_rate": 2.6592416253601626e-06, "loss": 1.269, "step": 6997 }, { "epoch": 0.9907269767112621, "grad_norm": 10.796861220708465, "learning_rate": 2.658669585520197e-06, "loss": 1.2753, "step": 6998 }, { "epoch": 0.9908685495858993, "grad_norm": 9.814840827230322, "learning_rate": 2.65809753733909e-06, "loss": 1.1819, "step": 6999 }, { "epoch": 0.9910101224605365, "grad_norm": 10.902407780961198, "learning_rate": 2.657525480846913e-06, "loss": 1.3346, "step": 7000 }, { "epoch": 0.9911516953351738, "grad_norm": 7.986173363779295, "learning_rate": 2.6569534160737386e-06, "loss": 1.0989, "step": 7001 }, { "epoch": 0.991293268209811, "grad_norm": 9.373850841028128, "learning_rate": 2.656381343049641e-06, "loss": 1.2983, "step": 7002 }, { "epoch": 0.9914348410844482, "grad_norm": 9.527294100936645, "learning_rate": 2.655809261804693e-06, "loss": 1.1775, "step": 7003 }, { "epoch": 0.9915764139590855, "grad_norm": 10.926670489711066, "learning_rate": 2.655237172368967e-06, "loss": 1.1785, "step": 7004 }, { "epoch": 0.9917179868337227, "grad_norm": 9.850782225865405, "learning_rate": 2.654665074772539e-06, "loss": 1.2633, "step": 7005 }, { "epoch": 0.9918595597083599, "grad_norm": 9.485398967840075, "learning_rate": 2.6540929690454835e-06, "loss": 1.2236, "step": 7006 }, { "epoch": 0.992001132582997, "grad_norm": 10.871357625705107, "learning_rate": 2.653520855217876e-06, "loss": 1.2835, "step": 7007 }, { "epoch": 0.9921427054576343, "grad_norm": 9.437269599577899, "learning_rate": 2.652948733319792e-06, "loss": 1.2935, "step": 7008 }, { "epoch": 0.9922842783322715, "grad_norm": 8.530975002006704, "learning_rate": 2.652376603381306e-06, "loss": 1.2001, "step": 7009 }, { "epoch": 0.9924258512069087, "grad_norm": 9.670609645620525, "learning_rate": 2.651804465432496e-06, "loss": 1.2522, "step": 7010 }, { "epoch": 0.992567424081546, "grad_norm": 9.51511625987643, "learning_rate": 2.6512323195034384e-06, "loss": 1.2304, "step": 7011 }, { "epoch": 0.9927089969561832, "grad_norm": 8.215554199842325, "learning_rate": 2.6506601656242105e-06, "loss": 1.166, "step": 7012 }, { "epoch": 0.9928505698308204, "grad_norm": 9.827261348554488, "learning_rate": 2.65008800382489e-06, "loss": 1.2859, "step": 7013 }, { "epoch": 0.9929921427054577, "grad_norm": 9.039601830009836, "learning_rate": 2.6495158341355548e-06, "loss": 1.0756, "step": 7014 }, { "epoch": 0.9931337155800949, "grad_norm": 8.518054860468851, "learning_rate": 2.648943656586284e-06, "loss": 1.0618, "step": 7015 }, { "epoch": 0.9932752884547321, "grad_norm": 10.047614608760224, "learning_rate": 2.648371471207156e-06, "loss": 1.3582, "step": 7016 }, { "epoch": 0.9934168613293692, "grad_norm": 9.621515609407403, "learning_rate": 2.6477992780282507e-06, "loss": 1.0955, "step": 7017 }, { "epoch": 0.9935584342040065, "grad_norm": 10.624664660379066, "learning_rate": 2.6472270770796475e-06, "loss": 1.3452, "step": 7018 }, { "epoch": 0.9937000070786437, "grad_norm": 9.488084648925502, "learning_rate": 2.646654868391427e-06, "loss": 1.1763, "step": 7019 }, { "epoch": 0.9938415799532809, "grad_norm": 10.226087636391934, "learning_rate": 2.646082651993668e-06, "loss": 1.2588, "step": 7020 }, { "epoch": 0.9939831528279182, "grad_norm": 9.08797478570261, "learning_rate": 2.6455104279164546e-06, "loss": 1.2209, "step": 7021 }, { "epoch": 0.9941247257025554, "grad_norm": 8.597349654489049, "learning_rate": 2.6449381961898658e-06, "loss": 1.0251, "step": 7022 }, { "epoch": 0.9942662985771926, "grad_norm": 10.984157598088641, "learning_rate": 2.644365956843984e-06, "loss": 1.2307, "step": 7023 }, { "epoch": 0.9944078714518299, "grad_norm": 10.148459677492127, "learning_rate": 2.643793709908892e-06, "loss": 1.1305, "step": 7024 }, { "epoch": 0.9945494443264671, "grad_norm": 9.56977517624255, "learning_rate": 2.6432214554146717e-06, "loss": 1.1742, "step": 7025 }, { "epoch": 0.9946910172011043, "grad_norm": 8.588187119981288, "learning_rate": 2.6426491933914062e-06, "loss": 1.0909, "step": 7026 }, { "epoch": 0.9948325900757415, "grad_norm": 10.265247819959136, "learning_rate": 2.642076923869178e-06, "loss": 1.1916, "step": 7027 }, { "epoch": 0.9949741629503787, "grad_norm": 10.311317329792717, "learning_rate": 2.6415046468780726e-06, "loss": 1.2954, "step": 7028 }, { "epoch": 0.9951157358250159, "grad_norm": 8.922084077700164, "learning_rate": 2.6409323624481743e-06, "loss": 1.3435, "step": 7029 }, { "epoch": 0.9952573086996531, "grad_norm": 7.947887682247349, "learning_rate": 2.6403600706095655e-06, "loss": 1.151, "step": 7030 }, { "epoch": 0.9953988815742904, "grad_norm": 9.666406912985915, "learning_rate": 2.6397877713923333e-06, "loss": 1.4629, "step": 7031 }, { "epoch": 0.9955404544489276, "grad_norm": 10.263840425887668, "learning_rate": 2.6392154648265617e-06, "loss": 1.1688, "step": 7032 }, { "epoch": 0.9956820273235648, "grad_norm": 7.892056597777875, "learning_rate": 2.6386431509423373e-06, "loss": 1.2165, "step": 7033 }, { "epoch": 0.995823600198202, "grad_norm": 8.444099162666513, "learning_rate": 2.6380708297697456e-06, "loss": 1.3189, "step": 7034 }, { "epoch": 0.9959651730728393, "grad_norm": 7.916307648666143, "learning_rate": 2.637498501338873e-06, "loss": 1.0153, "step": 7035 }, { "epoch": 0.9961067459474765, "grad_norm": 7.6700879907845225, "learning_rate": 2.6369261656798067e-06, "loss": 1.3518, "step": 7036 }, { "epoch": 0.9962483188221137, "grad_norm": 9.91047073123906, "learning_rate": 2.636353822822635e-06, "loss": 1.2152, "step": 7037 }, { "epoch": 0.9963898916967509, "grad_norm": 8.761205854638034, "learning_rate": 2.6357814727974434e-06, "loss": 1.218, "step": 7038 }, { "epoch": 0.9965314645713881, "grad_norm": 9.690952005930583, "learning_rate": 2.6352091156343213e-06, "loss": 1.3257, "step": 7039 }, { "epoch": 0.9966730374460253, "grad_norm": 7.988620771360553, "learning_rate": 2.6346367513633574e-06, "loss": 1.193, "step": 7040 }, { "epoch": 0.9968146103206625, "grad_norm": 7.801890745616922, "learning_rate": 2.6340643800146387e-06, "loss": 1.2243, "step": 7041 }, { "epoch": 0.9969561831952998, "grad_norm": 8.198431041693901, "learning_rate": 2.6334920016182565e-06, "loss": 1.2215, "step": 7042 }, { "epoch": 0.997097756069937, "grad_norm": 8.54841424168034, "learning_rate": 2.6329196162042987e-06, "loss": 1.1432, "step": 7043 }, { "epoch": 0.9972393289445742, "grad_norm": 8.337351885485123, "learning_rate": 2.6323472238028564e-06, "loss": 1.0609, "step": 7044 }, { "epoch": 0.9973809018192115, "grad_norm": 10.117814139618655, "learning_rate": 2.6317748244440194e-06, "loss": 1.2827, "step": 7045 }, { "epoch": 0.9975224746938487, "grad_norm": 9.519660282004061, "learning_rate": 2.6312024181578776e-06, "loss": 1.2342, "step": 7046 }, { "epoch": 0.9976640475684859, "grad_norm": 9.047589683963098, "learning_rate": 2.6306300049745227e-06, "loss": 1.2003, "step": 7047 }, { "epoch": 0.997805620443123, "grad_norm": 7.590477760366289, "learning_rate": 2.6300575849240455e-06, "loss": 1.0887, "step": 7048 }, { "epoch": 0.9979471933177603, "grad_norm": 8.613690466469551, "learning_rate": 2.629485158036538e-06, "loss": 1.1703, "step": 7049 }, { "epoch": 0.9980887661923975, "grad_norm": 9.023699603877315, "learning_rate": 2.6289127243420924e-06, "loss": 1.1895, "step": 7050 }, { "epoch": 0.9982303390670347, "grad_norm": 8.671928226891831, "learning_rate": 2.628340283870801e-06, "loss": 1.1918, "step": 7051 }, { "epoch": 0.998371911941672, "grad_norm": 9.863318378642989, "learning_rate": 2.627767836652757e-06, "loss": 1.2431, "step": 7052 }, { "epoch": 0.9985134848163092, "grad_norm": 7.879488437913523, "learning_rate": 2.627195382718053e-06, "loss": 1.1791, "step": 7053 }, { "epoch": 0.9986550576909464, "grad_norm": 9.281600996887859, "learning_rate": 2.626622922096782e-06, "loss": 1.2246, "step": 7054 }, { "epoch": 0.9987966305655837, "grad_norm": 6.961691801600995, "learning_rate": 2.626050454819039e-06, "loss": 1.1959, "step": 7055 }, { "epoch": 0.9989382034402209, "grad_norm": 8.449928003489743, "learning_rate": 2.6254779809149174e-06, "loss": 1.2544, "step": 7056 }, { "epoch": 0.9990797763148581, "grad_norm": 10.108870857792736, "learning_rate": 2.6249055004145118e-06, "loss": 1.2686, "step": 7057 }, { "epoch": 0.9992213491894953, "grad_norm": 9.230316251252962, "learning_rate": 2.6243330133479173e-06, "loss": 1.387, "step": 7058 }, { "epoch": 0.9993629220641325, "grad_norm": 9.95298395700694, "learning_rate": 2.6237605197452287e-06, "loss": 1.3227, "step": 7059 }, { "epoch": 0.9995044949387697, "grad_norm": 9.313629286734718, "learning_rate": 2.6231880196365423e-06, "loss": 1.0544, "step": 7060 }, { "epoch": 0.9996460678134069, "grad_norm": 8.087784737670175, "learning_rate": 2.6226155130519536e-06, "loss": 1.2075, "step": 7061 }, { "epoch": 0.9997876406880442, "grad_norm": 7.428065880141785, "learning_rate": 2.6220430000215584e-06, "loss": 1.2952, "step": 7062 }, { "epoch": 0.9999292135626814, "grad_norm": 6.510009101826155, "learning_rate": 2.6214704805754537e-06, "loss": 1.1718, "step": 7063 }, { "epoch": 1.0000707864373186, "grad_norm": 10.349935750715813, "learning_rate": 2.620897954743736e-06, "loss": 1.1248, "step": 7064 }, { "epoch": 1.0002123593119558, "grad_norm": 7.592974839069222, "learning_rate": 2.6203254225565034e-06, "loss": 1.0205, "step": 7065 }, { "epoch": 1.000353932186593, "grad_norm": 9.214016594658414, "learning_rate": 2.619752884043854e-06, "loss": 1.0973, "step": 7066 }, { "epoch": 1.0004955050612303, "grad_norm": 8.688788229296113, "learning_rate": 2.619180339235884e-06, "loss": 1.0005, "step": 7067 }, { "epoch": 1.0006370779358675, "grad_norm": 8.655717192960484, "learning_rate": 2.618607788162692e-06, "loss": 1.0573, "step": 7068 }, { "epoch": 1.0007786508105048, "grad_norm": 7.8980663299132265, "learning_rate": 2.618035230854378e-06, "loss": 0.9802, "step": 7069 }, { "epoch": 1.000920223685142, "grad_norm": 8.96437098767057, "learning_rate": 2.6174626673410385e-06, "loss": 1.0547, "step": 7070 }, { "epoch": 1.0010617965597792, "grad_norm": 9.114165680535223, "learning_rate": 2.616890097652775e-06, "loss": 1.1219, "step": 7071 }, { "epoch": 1.0012033694344165, "grad_norm": 9.533132098370423, "learning_rate": 2.6163175218196862e-06, "loss": 1.0271, "step": 7072 }, { "epoch": 1.0013449423090537, "grad_norm": 10.47848522683149, "learning_rate": 2.615744939871872e-06, "loss": 1.1114, "step": 7073 }, { "epoch": 1.0014865151836907, "grad_norm": 8.846134578722125, "learning_rate": 2.6151723518394327e-06, "loss": 1.0637, "step": 7074 }, { "epoch": 1.001628088058328, "grad_norm": 8.399767336575088, "learning_rate": 2.6145997577524683e-06, "loss": 1.0347, "step": 7075 }, { "epoch": 1.0017696609329652, "grad_norm": 9.040024662490557, "learning_rate": 2.6140271576410807e-06, "loss": 0.9576, "step": 7076 }, { "epoch": 1.0019112338076024, "grad_norm": 7.336748917202514, "learning_rate": 2.613454551535371e-06, "loss": 0.9849, "step": 7077 }, { "epoch": 1.0020528066822396, "grad_norm": 8.731090057590174, "learning_rate": 2.6128819394654385e-06, "loss": 1.0454, "step": 7078 }, { "epoch": 1.0021943795568768, "grad_norm": 8.909624069623348, "learning_rate": 2.6123093214613875e-06, "loss": 1.0204, "step": 7079 }, { "epoch": 1.002335952431514, "grad_norm": 12.345101661557093, "learning_rate": 2.6117366975533187e-06, "loss": 1.0752, "step": 7080 }, { "epoch": 1.0024775253061513, "grad_norm": 7.750708270779547, "learning_rate": 2.6111640677713356e-06, "loss": 1.0356, "step": 7081 }, { "epoch": 1.0026190981807885, "grad_norm": 9.784476336158802, "learning_rate": 2.6105914321455405e-06, "loss": 1.083, "step": 7082 }, { "epoch": 1.0027606710554258, "grad_norm": 7.996059162342757, "learning_rate": 2.6100187907060365e-06, "loss": 1.0148, "step": 7083 }, { "epoch": 1.002902243930063, "grad_norm": 10.897380731135184, "learning_rate": 2.609446143482926e-06, "loss": 1.0549, "step": 7084 }, { "epoch": 1.0030438168047002, "grad_norm": 9.67243307595802, "learning_rate": 2.6088734905063134e-06, "loss": 1.1552, "step": 7085 }, { "epoch": 1.0031853896793375, "grad_norm": 8.55095702052097, "learning_rate": 2.6083008318063023e-06, "loss": 1.0147, "step": 7086 }, { "epoch": 1.0033269625539747, "grad_norm": 8.224528902702374, "learning_rate": 2.6077281674129974e-06, "loss": 1.1196, "step": 7087 }, { "epoch": 1.003468535428612, "grad_norm": 9.537612742386349, "learning_rate": 2.607155497356504e-06, "loss": 1.1787, "step": 7088 }, { "epoch": 1.0036101083032491, "grad_norm": 9.026766499014714, "learning_rate": 2.6065828216669254e-06, "loss": 0.9629, "step": 7089 }, { "epoch": 1.0037516811778864, "grad_norm": 9.90086049665476, "learning_rate": 2.606010140374367e-06, "loss": 1.1771, "step": 7090 }, { "epoch": 1.0038932540525236, "grad_norm": 6.835348188884107, "learning_rate": 2.6054374535089345e-06, "loss": 1.0329, "step": 7091 }, { "epoch": 1.0040348269271608, "grad_norm": 7.016166547166488, "learning_rate": 2.604864761100734e-06, "loss": 0.9642, "step": 7092 }, { "epoch": 1.004176399801798, "grad_norm": 8.884205516871816, "learning_rate": 2.604292063179871e-06, "loss": 0.992, "step": 7093 }, { "epoch": 1.0043179726764353, "grad_norm": 9.072399126103322, "learning_rate": 2.6037193597764524e-06, "loss": 1.1243, "step": 7094 }, { "epoch": 1.0044595455510723, "grad_norm": 7.6535293416212005, "learning_rate": 2.6031466509205843e-06, "loss": 0.9634, "step": 7095 }, { "epoch": 1.0046011184257095, "grad_norm": 8.157128297717874, "learning_rate": 2.6025739366423735e-06, "loss": 1.0443, "step": 7096 }, { "epoch": 1.0047426913003468, "grad_norm": 10.492365286677835, "learning_rate": 2.602001216971927e-06, "loss": 1.0894, "step": 7097 }, { "epoch": 1.004884264174984, "grad_norm": 10.362846885921584, "learning_rate": 2.601428491939354e-06, "loss": 1.0285, "step": 7098 }, { "epoch": 1.0050258370496212, "grad_norm": 9.666360740643901, "learning_rate": 2.600855761574759e-06, "loss": 1.0366, "step": 7099 }, { "epoch": 1.0051674099242585, "grad_norm": 9.042829541315111, "learning_rate": 2.6002830259082527e-06, "loss": 1.0044, "step": 7100 }, { "epoch": 1.0053089827988957, "grad_norm": 10.153598286642223, "learning_rate": 2.5997102849699424e-06, "loss": 1.0295, "step": 7101 }, { "epoch": 1.005450555673533, "grad_norm": 10.381207918360863, "learning_rate": 2.5991375387899364e-06, "loss": 1.1255, "step": 7102 }, { "epoch": 1.0055921285481701, "grad_norm": 7.951932266821886, "learning_rate": 2.598564787398345e-06, "loss": 1.1094, "step": 7103 }, { "epoch": 1.0057337014228074, "grad_norm": 9.06960465382368, "learning_rate": 2.5979920308252753e-06, "loss": 1.0108, "step": 7104 }, { "epoch": 1.0058752742974446, "grad_norm": 9.612095889562106, "learning_rate": 2.597419269100838e-06, "loss": 1.0878, "step": 7105 }, { "epoch": 1.0060168471720818, "grad_norm": 9.29212720154473, "learning_rate": 2.596846502255142e-06, "loss": 0.9923, "step": 7106 }, { "epoch": 1.006158420046719, "grad_norm": 7.4034699320597035, "learning_rate": 2.596273730318298e-06, "loss": 0.8166, "step": 7107 }, { "epoch": 1.0062999929213563, "grad_norm": 9.774496361131327, "learning_rate": 2.595700953320415e-06, "loss": 1.0381, "step": 7108 }, { "epoch": 1.0064415657959935, "grad_norm": 9.572975554231508, "learning_rate": 2.595128171291605e-06, "loss": 1.0603, "step": 7109 }, { "epoch": 1.0065831386706308, "grad_norm": 11.085369372444706, "learning_rate": 2.5945553842619776e-06, "loss": 0.9761, "step": 7110 }, { "epoch": 1.006724711545268, "grad_norm": 10.341637709764326, "learning_rate": 2.5939825922616443e-06, "loss": 1.101, "step": 7111 }, { "epoch": 1.0068662844199052, "grad_norm": 9.832170924216474, "learning_rate": 2.593409795320716e-06, "loss": 0.9372, "step": 7112 }, { "epoch": 1.0070078572945425, "grad_norm": 8.177896006508137, "learning_rate": 2.5928369934693043e-06, "loss": 0.982, "step": 7113 }, { "epoch": 1.0071494301691797, "grad_norm": 9.35012895627774, "learning_rate": 2.592264186737522e-06, "loss": 1.0891, "step": 7114 }, { "epoch": 1.007291003043817, "grad_norm": 9.434187756272687, "learning_rate": 2.5916913751554795e-06, "loss": 0.9491, "step": 7115 }, { "epoch": 1.007432575918454, "grad_norm": 10.203950109607332, "learning_rate": 2.5911185587532895e-06, "loss": 1.1604, "step": 7116 }, { "epoch": 1.0075741487930912, "grad_norm": 9.87648945452343, "learning_rate": 2.5905457375610647e-06, "loss": 1.0827, "step": 7117 }, { "epoch": 1.0077157216677284, "grad_norm": 10.02684879901851, "learning_rate": 2.5899729116089183e-06, "loss": 0.9636, "step": 7118 }, { "epoch": 1.0078572945423656, "grad_norm": 8.437332264681112, "learning_rate": 2.589400080926964e-06, "loss": 1.0512, "step": 7119 }, { "epoch": 1.0079988674170028, "grad_norm": 10.372435103346973, "learning_rate": 2.5888272455453136e-06, "loss": 1.0843, "step": 7120 }, { "epoch": 1.00814044029164, "grad_norm": 9.08145201445881, "learning_rate": 2.5882544054940806e-06, "loss": 0.9635, "step": 7121 }, { "epoch": 1.0082820131662773, "grad_norm": 12.139173266243901, "learning_rate": 2.5876815608033797e-06, "loss": 1.1186, "step": 7122 }, { "epoch": 1.0084235860409145, "grad_norm": 9.865721387516158, "learning_rate": 2.587108711503324e-06, "loss": 1.0222, "step": 7123 }, { "epoch": 1.0085651589155518, "grad_norm": 11.387404835677073, "learning_rate": 2.586535857624028e-06, "loss": 1.0469, "step": 7124 }, { "epoch": 1.008706731790189, "grad_norm": 11.370568386824518, "learning_rate": 2.5859629991956075e-06, "loss": 0.9239, "step": 7125 }, { "epoch": 1.0088483046648262, "grad_norm": 8.027429763073247, "learning_rate": 2.585390136248176e-06, "loss": 0.9598, "step": 7126 }, { "epoch": 1.0089898775394635, "grad_norm": 7.954605534314255, "learning_rate": 2.5848172688118482e-06, "loss": 1.0084, "step": 7127 }, { "epoch": 1.0091314504141007, "grad_norm": 9.208363206449588, "learning_rate": 2.5842443969167402e-06, "loss": 1.0427, "step": 7128 }, { "epoch": 1.009273023288738, "grad_norm": 9.660323286945463, "learning_rate": 2.583671520592967e-06, "loss": 0.9827, "step": 7129 }, { "epoch": 1.0094145961633751, "grad_norm": 8.825098001514098, "learning_rate": 2.583098639870644e-06, "loss": 0.9594, "step": 7130 }, { "epoch": 1.0095561690380124, "grad_norm": 9.901100529083205, "learning_rate": 2.582525754779888e-06, "loss": 0.958, "step": 7131 }, { "epoch": 1.0096977419126496, "grad_norm": 9.223055386984658, "learning_rate": 2.581952865350815e-06, "loss": 1.0264, "step": 7132 }, { "epoch": 1.0098393147872868, "grad_norm": 9.986792043485446, "learning_rate": 2.58137997161354e-06, "loss": 1.1449, "step": 7133 }, { "epoch": 1.009980887661924, "grad_norm": 8.776861322690156, "learning_rate": 2.580807073598181e-06, "loss": 1.0334, "step": 7134 }, { "epoch": 1.0101224605365613, "grad_norm": 10.076033122288022, "learning_rate": 2.580234171334855e-06, "loss": 1.0493, "step": 7135 }, { "epoch": 1.0102640334111985, "grad_norm": 8.826514821355117, "learning_rate": 2.5796612648536776e-06, "loss": 1.1065, "step": 7136 }, { "epoch": 1.0104056062858355, "grad_norm": 9.758353151930553, "learning_rate": 2.579088354184767e-06, "loss": 1.0187, "step": 7137 }, { "epoch": 1.0105471791604728, "grad_norm": 7.717624103424435, "learning_rate": 2.5785154393582405e-06, "loss": 1.0211, "step": 7138 }, { "epoch": 1.01068875203511, "grad_norm": 9.51919463653939, "learning_rate": 2.577942520404216e-06, "loss": 1.068, "step": 7139 }, { "epoch": 1.0108303249097472, "grad_norm": 7.488422868053142, "learning_rate": 2.577369597352812e-06, "loss": 1.0417, "step": 7140 }, { "epoch": 1.0109718977843845, "grad_norm": 10.566323214784644, "learning_rate": 2.5767966702341454e-06, "loss": 1.2169, "step": 7141 }, { "epoch": 1.0111134706590217, "grad_norm": 11.25021124747411, "learning_rate": 2.576223739078335e-06, "loss": 1.0513, "step": 7142 }, { "epoch": 1.011255043533659, "grad_norm": 7.124617984636226, "learning_rate": 2.5756508039155e-06, "loss": 0.9381, "step": 7143 }, { "epoch": 1.0113966164082961, "grad_norm": 8.886081707936135, "learning_rate": 2.575077864775758e-06, "loss": 1.0706, "step": 7144 }, { "epoch": 1.0115381892829334, "grad_norm": 8.963680309854073, "learning_rate": 2.5745049216892286e-06, "loss": 1.0373, "step": 7145 }, { "epoch": 1.0116797621575706, "grad_norm": 9.214227737464034, "learning_rate": 2.5739319746860312e-06, "loss": 0.9432, "step": 7146 }, { "epoch": 1.0118213350322078, "grad_norm": 9.390360138215714, "learning_rate": 2.5733590237962854e-06, "loss": 1.047, "step": 7147 }, { "epoch": 1.011962907906845, "grad_norm": 7.3188696612862625, "learning_rate": 2.57278606905011e-06, "loss": 1.0271, "step": 7148 }, { "epoch": 1.0121044807814823, "grad_norm": 8.995122223511917, "learning_rate": 2.572213110477625e-06, "loss": 0.9565, "step": 7149 }, { "epoch": 1.0122460536561195, "grad_norm": 8.770384232597928, "learning_rate": 2.571640148108951e-06, "loss": 1.0733, "step": 7150 }, { "epoch": 1.0123876265307568, "grad_norm": 9.157454792216251, "learning_rate": 2.5710671819742083e-06, "loss": 1.0573, "step": 7151 }, { "epoch": 1.012529199405394, "grad_norm": 10.980713669560421, "learning_rate": 2.5704942121035163e-06, "loss": 1.1159, "step": 7152 }, { "epoch": 1.0126707722800312, "grad_norm": 9.387897215870133, "learning_rate": 2.5699212385269954e-06, "loss": 1.0929, "step": 7153 }, { "epoch": 1.0128123451546684, "grad_norm": 10.731119297944588, "learning_rate": 2.569348261274768e-06, "loss": 0.9617, "step": 7154 }, { "epoch": 1.0129539180293057, "grad_norm": 8.948075446665543, "learning_rate": 2.5687752803769538e-06, "loss": 1.0633, "step": 7155 }, { "epoch": 1.013095490903943, "grad_norm": 10.395149978062186, "learning_rate": 2.5682022958636752e-06, "loss": 1.0466, "step": 7156 }, { "epoch": 1.01323706377858, "grad_norm": 8.77749108062265, "learning_rate": 2.5676293077650528e-06, "loss": 0.9077, "step": 7157 }, { "epoch": 1.0133786366532171, "grad_norm": 9.498784840817716, "learning_rate": 2.5670563161112073e-06, "loss": 1.1627, "step": 7158 }, { "epoch": 1.0135202095278544, "grad_norm": 8.94861429238663, "learning_rate": 2.5664833209322614e-06, "loss": 0.9476, "step": 7159 }, { "epoch": 1.0136617824024916, "grad_norm": 9.011132454784791, "learning_rate": 2.565910322258337e-06, "loss": 1.0639, "step": 7160 }, { "epoch": 1.0138033552771288, "grad_norm": 9.421186279632058, "learning_rate": 2.5653373201195554e-06, "loss": 1.0923, "step": 7161 }, { "epoch": 1.013944928151766, "grad_norm": 8.303178422244164, "learning_rate": 2.564764314546041e-06, "loss": 0.9811, "step": 7162 }, { "epoch": 1.0140865010264033, "grad_norm": 10.655473876611788, "learning_rate": 2.564191305567914e-06, "loss": 1.0617, "step": 7163 }, { "epoch": 1.0142280739010405, "grad_norm": 10.46165294066748, "learning_rate": 2.563618293215298e-06, "loss": 1.0123, "step": 7164 }, { "epoch": 1.0143696467756778, "grad_norm": 8.461734582722238, "learning_rate": 2.563045277518316e-06, "loss": 1.0883, "step": 7165 }, { "epoch": 1.014511219650315, "grad_norm": 8.17755641338187, "learning_rate": 2.5624722585070907e-06, "loss": 0.9451, "step": 7166 }, { "epoch": 1.0146527925249522, "grad_norm": 9.049204366705958, "learning_rate": 2.5618992362117453e-06, "loss": 1.0367, "step": 7167 }, { "epoch": 1.0147943653995894, "grad_norm": 8.578077416904685, "learning_rate": 2.561326210662403e-06, "loss": 1.0955, "step": 7168 }, { "epoch": 1.0149359382742267, "grad_norm": 10.11759960852271, "learning_rate": 2.5607531818891877e-06, "loss": 1.1771, "step": 7169 }, { "epoch": 1.015077511148864, "grad_norm": 6.825976839130764, "learning_rate": 2.5601801499222227e-06, "loss": 0.9695, "step": 7170 }, { "epoch": 1.0152190840235011, "grad_norm": 10.015344291478725, "learning_rate": 2.5596071147916325e-06, "loss": 1.0821, "step": 7171 }, { "epoch": 1.0153606568981384, "grad_norm": 11.435980357123547, "learning_rate": 2.5590340765275414e-06, "loss": 0.9436, "step": 7172 }, { "epoch": 1.0155022297727756, "grad_norm": 7.254143155881706, "learning_rate": 2.558461035160072e-06, "loss": 1.0127, "step": 7173 }, { "epoch": 1.0156438026474128, "grad_norm": 7.773501540643867, "learning_rate": 2.5578879907193495e-06, "loss": 1.0092, "step": 7174 }, { "epoch": 1.01578537552205, "grad_norm": 8.633247302398733, "learning_rate": 2.557314943235498e-06, "loss": 1.0217, "step": 7175 }, { "epoch": 1.0159269483966873, "grad_norm": 8.480161516342818, "learning_rate": 2.556741892738643e-06, "loss": 1.0268, "step": 7176 }, { "epoch": 1.0160685212713245, "grad_norm": 11.015896494881272, "learning_rate": 2.5561688392589095e-06, "loss": 1.0972, "step": 7177 }, { "epoch": 1.0162100941459615, "grad_norm": 7.824115144954966, "learning_rate": 2.555595782826423e-06, "loss": 1.0008, "step": 7178 }, { "epoch": 1.0163516670205988, "grad_norm": 8.665244156907994, "learning_rate": 2.555022723471306e-06, "loss": 1.0914, "step": 7179 }, { "epoch": 1.016493239895236, "grad_norm": 8.544636825602673, "learning_rate": 2.554449661223686e-06, "loss": 0.9705, "step": 7180 }, { "epoch": 1.0166348127698732, "grad_norm": 10.63805350489679, "learning_rate": 2.553876596113688e-06, "loss": 1.0275, "step": 7181 }, { "epoch": 1.0167763856445104, "grad_norm": 9.337738859295357, "learning_rate": 2.5533035281714368e-06, "loss": 1.0911, "step": 7182 }, { "epoch": 1.0169179585191477, "grad_norm": 9.965160716244668, "learning_rate": 2.5527304574270596e-06, "loss": 1.0496, "step": 7183 }, { "epoch": 1.017059531393785, "grad_norm": 8.557922030492009, "learning_rate": 2.5521573839106815e-06, "loss": 1.0635, "step": 7184 }, { "epoch": 1.0172011042684221, "grad_norm": 10.246496951059244, "learning_rate": 2.551584307652428e-06, "loss": 1.0226, "step": 7185 }, { "epoch": 1.0173426771430594, "grad_norm": 8.45142125924803, "learning_rate": 2.551011228682427e-06, "loss": 0.9665, "step": 7186 }, { "epoch": 1.0174842500176966, "grad_norm": 9.230590251263392, "learning_rate": 2.5504381470308034e-06, "loss": 1.0244, "step": 7187 }, { "epoch": 1.0176258228923338, "grad_norm": 9.548135177927497, "learning_rate": 2.549865062727684e-06, "loss": 1.026, "step": 7188 }, { "epoch": 1.017767395766971, "grad_norm": 9.620206754052777, "learning_rate": 2.5492919758031953e-06, "loss": 1.0964, "step": 7189 }, { "epoch": 1.0179089686416083, "grad_norm": 8.205351260405653, "learning_rate": 2.5487188862874635e-06, "loss": 1.0282, "step": 7190 }, { "epoch": 1.0180505415162455, "grad_norm": 7.7150266987801555, "learning_rate": 2.5481457942106165e-06, "loss": 1.0056, "step": 7191 }, { "epoch": 1.0181921143908828, "grad_norm": 11.69638099452735, "learning_rate": 2.547572699602781e-06, "loss": 1.0727, "step": 7192 }, { "epoch": 1.01833368726552, "grad_norm": 11.486109430040582, "learning_rate": 2.5469996024940853e-06, "loss": 1.0631, "step": 7193 }, { "epoch": 1.0184752601401572, "grad_norm": 9.376638854150878, "learning_rate": 2.5464265029146546e-06, "loss": 1.0251, "step": 7194 }, { "epoch": 1.0186168330147944, "grad_norm": 11.199590362142052, "learning_rate": 2.545853400894617e-06, "loss": 1.0833, "step": 7195 }, { "epoch": 1.0187584058894317, "grad_norm": 9.791521133531727, "learning_rate": 2.545280296464101e-06, "loss": 1.0162, "step": 7196 }, { "epoch": 1.018899978764069, "grad_norm": 8.926042375146455, "learning_rate": 2.544707189653233e-06, "loss": 1.0494, "step": 7197 }, { "epoch": 1.019041551638706, "grad_norm": 9.823594351429893, "learning_rate": 2.5441340804921413e-06, "loss": 0.9484, "step": 7198 }, { "epoch": 1.0191831245133431, "grad_norm": 9.860123256818266, "learning_rate": 2.5435609690109545e-06, "loss": 1.0485, "step": 7199 }, { "epoch": 1.0193246973879804, "grad_norm": 8.378878478246516, "learning_rate": 2.5429878552398e-06, "loss": 1.1666, "step": 7200 }, { "epoch": 1.0194662702626176, "grad_norm": 8.746856996900014, "learning_rate": 2.5424147392088057e-06, "loss": 1.0509, "step": 7201 }, { "epoch": 1.0196078431372548, "grad_norm": 9.432546768375849, "learning_rate": 2.5418416209481002e-06, "loss": 1.1939, "step": 7202 }, { "epoch": 1.019749416011892, "grad_norm": 9.917693739417784, "learning_rate": 2.541268500487812e-06, "loss": 1.0888, "step": 7203 }, { "epoch": 1.0198909888865293, "grad_norm": 9.547358074546763, "learning_rate": 2.540695377858069e-06, "loss": 0.9846, "step": 7204 }, { "epoch": 1.0200325617611665, "grad_norm": 12.113097966002933, "learning_rate": 2.540122253089001e-06, "loss": 1.1663, "step": 7205 }, { "epoch": 1.0201741346358038, "grad_norm": 8.950601431815123, "learning_rate": 2.539549126210735e-06, "loss": 1.0652, "step": 7206 }, { "epoch": 1.020315707510441, "grad_norm": 9.949265526735347, "learning_rate": 2.5389759972534024e-06, "loss": 1.0736, "step": 7207 }, { "epoch": 1.0204572803850782, "grad_norm": 9.47549450069572, "learning_rate": 2.53840286624713e-06, "loss": 0.9575, "step": 7208 }, { "epoch": 1.0205988532597154, "grad_norm": 9.963165345224834, "learning_rate": 2.5378297332220474e-06, "loss": 1.1346, "step": 7209 }, { "epoch": 1.0207404261343527, "grad_norm": 8.401053825759051, "learning_rate": 2.5372565982082843e-06, "loss": 0.9566, "step": 7210 }, { "epoch": 1.02088199900899, "grad_norm": 7.934368086365268, "learning_rate": 2.5366834612359697e-06, "loss": 0.9967, "step": 7211 }, { "epoch": 1.0210235718836271, "grad_norm": 8.753131850926675, "learning_rate": 2.5361103223352325e-06, "loss": 0.9269, "step": 7212 }, { "epoch": 1.0211651447582644, "grad_norm": 9.222535057838837, "learning_rate": 2.5355371815362017e-06, "loss": 1.0411, "step": 7213 }, { "epoch": 1.0213067176329016, "grad_norm": 8.19546191549328, "learning_rate": 2.534964038869009e-06, "loss": 1.1464, "step": 7214 }, { "epoch": 1.0214482905075388, "grad_norm": 8.459426984567369, "learning_rate": 2.534390894363783e-06, "loss": 1.04, "step": 7215 }, { "epoch": 1.021589863382176, "grad_norm": 10.381869695955015, "learning_rate": 2.533817748050653e-06, "loss": 1.087, "step": 7216 }, { "epoch": 1.0217314362568133, "grad_norm": 7.918574079984671, "learning_rate": 2.533244599959749e-06, "loss": 1.023, "step": 7217 }, { "epoch": 1.0218730091314505, "grad_norm": 11.403206197042374, "learning_rate": 2.5326714501212014e-06, "loss": 1.0915, "step": 7218 }, { "epoch": 1.0220145820060877, "grad_norm": 9.441068467271643, "learning_rate": 2.53209829856514e-06, "loss": 1.0268, "step": 7219 }, { "epoch": 1.0221561548807248, "grad_norm": 8.884919546575595, "learning_rate": 2.531525145321695e-06, "loss": 1.09, "step": 7220 }, { "epoch": 1.022297727755362, "grad_norm": 8.785609564782895, "learning_rate": 2.5309519904209962e-06, "loss": 1.0902, "step": 7221 }, { "epoch": 1.0224393006299992, "grad_norm": 12.433507230495394, "learning_rate": 2.5303788338931744e-06, "loss": 1.147, "step": 7222 }, { "epoch": 1.0225808735046364, "grad_norm": 10.29183053775523, "learning_rate": 2.5298056757683604e-06, "loss": 1.0077, "step": 7223 }, { "epoch": 1.0227224463792737, "grad_norm": 8.193947153752951, "learning_rate": 2.529232516076684e-06, "loss": 0.9938, "step": 7224 }, { "epoch": 1.022864019253911, "grad_norm": 8.92523461468919, "learning_rate": 2.528659354848277e-06, "loss": 1.0438, "step": 7225 }, { "epoch": 1.0230055921285481, "grad_norm": 8.24513563610174, "learning_rate": 2.5280861921132677e-06, "loss": 1.0239, "step": 7226 }, { "epoch": 1.0231471650031854, "grad_norm": 8.231844257983463, "learning_rate": 2.5275130279017884e-06, "loss": 1.122, "step": 7227 }, { "epoch": 1.0232887378778226, "grad_norm": 12.310886955133261, "learning_rate": 2.52693986224397e-06, "loss": 1.0044, "step": 7228 }, { "epoch": 1.0234303107524598, "grad_norm": 11.021060203327586, "learning_rate": 2.526366695169943e-06, "loss": 1.1637, "step": 7229 }, { "epoch": 1.023571883627097, "grad_norm": 10.062609180308305, "learning_rate": 2.5257935267098395e-06, "loss": 1.1228, "step": 7230 }, { "epoch": 1.0237134565017343, "grad_norm": 9.810574287924572, "learning_rate": 2.5252203568937884e-06, "loss": 1.0034, "step": 7231 }, { "epoch": 1.0238550293763715, "grad_norm": 9.491493432058034, "learning_rate": 2.524647185751922e-06, "loss": 1.1027, "step": 7232 }, { "epoch": 1.0239966022510087, "grad_norm": 6.961238079667053, "learning_rate": 2.5240740133143714e-06, "loss": 0.9253, "step": 7233 }, { "epoch": 1.024138175125646, "grad_norm": 13.351635070567843, "learning_rate": 2.5235008396112688e-06, "loss": 1.1571, "step": 7234 }, { "epoch": 1.0242797480002832, "grad_norm": 8.834162067428565, "learning_rate": 2.5229276646727428e-06, "loss": 1.0838, "step": 7235 }, { "epoch": 1.0244213208749204, "grad_norm": 8.872081867855023, "learning_rate": 2.5223544885289287e-06, "loss": 1.1049, "step": 7236 }, { "epoch": 1.0245628937495577, "grad_norm": 11.348024009746421, "learning_rate": 2.5217813112099543e-06, "loss": 1.1046, "step": 7237 }, { "epoch": 1.024704466624195, "grad_norm": 9.17545364464128, "learning_rate": 2.521208132745953e-06, "loss": 1.0508, "step": 7238 }, { "epoch": 1.0248460394988321, "grad_norm": 8.577409002165496, "learning_rate": 2.520634953167056e-06, "loss": 0.9555, "step": 7239 }, { "epoch": 1.0249876123734691, "grad_norm": 9.250358780784786, "learning_rate": 2.5200617725033947e-06, "loss": 0.9257, "step": 7240 }, { "epoch": 1.0251291852481064, "grad_norm": 10.287810762886764, "learning_rate": 2.519488590785102e-06, "loss": 1.1022, "step": 7241 }, { "epoch": 1.0252707581227436, "grad_norm": 9.129629972382078, "learning_rate": 2.5189154080423073e-06, "loss": 1.0405, "step": 7242 }, { "epoch": 1.0254123309973808, "grad_norm": 8.782785518966518, "learning_rate": 2.518342224305144e-06, "loss": 0.9807, "step": 7243 }, { "epoch": 1.025553903872018, "grad_norm": 7.839211659402874, "learning_rate": 2.517769039603744e-06, "loss": 0.9566, "step": 7244 }, { "epoch": 1.0256954767466553, "grad_norm": 7.088152454088607, "learning_rate": 2.517195853968239e-06, "loss": 0.9241, "step": 7245 }, { "epoch": 1.0258370496212925, "grad_norm": 8.657126482166161, "learning_rate": 2.516622667428761e-06, "loss": 0.9712, "step": 7246 }, { "epoch": 1.0259786224959297, "grad_norm": 7.952885892080382, "learning_rate": 2.516049480015441e-06, "loss": 0.9556, "step": 7247 }, { "epoch": 1.026120195370567, "grad_norm": 8.929676820176248, "learning_rate": 2.5154762917584125e-06, "loss": 1.0468, "step": 7248 }, { "epoch": 1.0262617682452042, "grad_norm": 9.330052980066075, "learning_rate": 2.5149031026878063e-06, "loss": 1.1408, "step": 7249 }, { "epoch": 1.0264033411198414, "grad_norm": 8.860770409975691, "learning_rate": 2.5143299128337543e-06, "loss": 1.0949, "step": 7250 }, { "epoch": 1.0265449139944787, "grad_norm": 7.866402186241999, "learning_rate": 2.513756722226391e-06, "loss": 0.9972, "step": 7251 }, { "epoch": 1.026686486869116, "grad_norm": 9.913592292090142, "learning_rate": 2.5131835308958467e-06, "loss": 0.9892, "step": 7252 }, { "epoch": 1.0268280597437531, "grad_norm": 8.694672841195906, "learning_rate": 2.512610338872254e-06, "loss": 0.8639, "step": 7253 }, { "epoch": 1.0269696326183904, "grad_norm": 8.352250815736129, "learning_rate": 2.512037146185745e-06, "loss": 1.0022, "step": 7254 }, { "epoch": 1.0271112054930276, "grad_norm": 9.034648744123233, "learning_rate": 2.511463952866452e-06, "loss": 1.0295, "step": 7255 }, { "epoch": 1.0272527783676648, "grad_norm": 12.870532992337028, "learning_rate": 2.510890758944508e-06, "loss": 0.9877, "step": 7256 }, { "epoch": 1.027394351242302, "grad_norm": 9.056142537050043, "learning_rate": 2.5103175644500444e-06, "loss": 1.0977, "step": 7257 }, { "epoch": 1.0275359241169393, "grad_norm": 10.565357070138768, "learning_rate": 2.5097443694131947e-06, "loss": 1.1082, "step": 7258 }, { "epoch": 1.0276774969915765, "grad_norm": 9.135355468916195, "learning_rate": 2.50917117386409e-06, "loss": 1.0075, "step": 7259 }, { "epoch": 1.0278190698662137, "grad_norm": 10.615494369228642, "learning_rate": 2.508597977832864e-06, "loss": 1.0026, "step": 7260 }, { "epoch": 1.0279606427408507, "grad_norm": 8.119303892768455, "learning_rate": 2.508024781349649e-06, "loss": 0.9348, "step": 7261 }, { "epoch": 1.028102215615488, "grad_norm": 9.136560928788729, "learning_rate": 2.5074515844445774e-06, "loss": 1.1098, "step": 7262 }, { "epoch": 1.0282437884901252, "grad_norm": 8.891667149075104, "learning_rate": 2.5068783871477807e-06, "loss": 1.0721, "step": 7263 }, { "epoch": 1.0283853613647624, "grad_norm": 8.245032924866274, "learning_rate": 2.5063051894893925e-06, "loss": 0.9986, "step": 7264 }, { "epoch": 1.0285269342393997, "grad_norm": 9.597677315427635, "learning_rate": 2.5057319914995454e-06, "loss": 1.0767, "step": 7265 }, { "epoch": 1.028668507114037, "grad_norm": 9.33685109285128, "learning_rate": 2.5051587932083715e-06, "loss": 1.0223, "step": 7266 }, { "epoch": 1.0288100799886741, "grad_norm": 10.114580609484593, "learning_rate": 2.504585594646004e-06, "loss": 1.039, "step": 7267 }, { "epoch": 1.0289516528633114, "grad_norm": 8.56440078910748, "learning_rate": 2.504012395842576e-06, "loss": 1.0217, "step": 7268 }, { "epoch": 1.0290932257379486, "grad_norm": 8.636419725007807, "learning_rate": 2.5034391968282186e-06, "loss": 1.0884, "step": 7269 }, { "epoch": 1.0292347986125858, "grad_norm": 10.11137471769495, "learning_rate": 2.502865997633065e-06, "loss": 1.1048, "step": 7270 }, { "epoch": 1.029376371487223, "grad_norm": 10.473687763914702, "learning_rate": 2.502292798287248e-06, "loss": 1.0733, "step": 7271 }, { "epoch": 1.0295179443618603, "grad_norm": 8.3903635790572, "learning_rate": 2.5017195988208997e-06, "loss": 0.9721, "step": 7272 }, { "epoch": 1.0296595172364975, "grad_norm": 8.16401110181189, "learning_rate": 2.5011463992641548e-06, "loss": 1.0333, "step": 7273 }, { "epoch": 1.0298010901111347, "grad_norm": 8.631377143116778, "learning_rate": 2.500573199647144e-06, "loss": 1.0556, "step": 7274 }, { "epoch": 1.029942662985772, "grad_norm": 11.880383365495106, "learning_rate": 2.5e-06, "loss": 1.1122, "step": 7275 }, { "epoch": 1.0300842358604092, "grad_norm": 9.3504936863106, "learning_rate": 2.499426800352857e-06, "loss": 1.0004, "step": 7276 }, { "epoch": 1.0302258087350464, "grad_norm": 9.003241908788414, "learning_rate": 2.4988536007358456e-06, "loss": 0.955, "step": 7277 }, { "epoch": 1.0303673816096837, "grad_norm": 8.999557060362184, "learning_rate": 2.4982804011791003e-06, "loss": 1.1626, "step": 7278 }, { "epoch": 1.030508954484321, "grad_norm": 9.885799635023366, "learning_rate": 2.497707201712753e-06, "loss": 1.0781, "step": 7279 }, { "epoch": 1.0306505273589581, "grad_norm": 7.837037881128567, "learning_rate": 2.4971340023669356e-06, "loss": 0.9846, "step": 7280 }, { "epoch": 1.0307921002335951, "grad_norm": 9.463151642749464, "learning_rate": 2.4965608031717827e-06, "loss": 1.1419, "step": 7281 }, { "epoch": 1.0309336731082324, "grad_norm": 9.428022665923123, "learning_rate": 2.4959876041574256e-06, "loss": 0.9622, "step": 7282 }, { "epoch": 1.0310752459828696, "grad_norm": 10.522703783813123, "learning_rate": 2.4954144053539966e-06, "loss": 1.0655, "step": 7283 }, { "epoch": 1.0312168188575068, "grad_norm": 8.590322025822095, "learning_rate": 2.494841206791629e-06, "loss": 1.0003, "step": 7284 }, { "epoch": 1.031358391732144, "grad_norm": 9.29645605345438, "learning_rate": 2.4942680085004554e-06, "loss": 1.0324, "step": 7285 }, { "epoch": 1.0314999646067813, "grad_norm": 7.536115032638253, "learning_rate": 2.4936948105106084e-06, "loss": 0.9096, "step": 7286 }, { "epoch": 1.0316415374814185, "grad_norm": 8.864273251146768, "learning_rate": 2.4931216128522197e-06, "loss": 0.9755, "step": 7287 }, { "epoch": 1.0317831103560557, "grad_norm": 7.956715550852483, "learning_rate": 2.4925484155554235e-06, "loss": 0.9847, "step": 7288 }, { "epoch": 1.031924683230693, "grad_norm": 9.871921107615298, "learning_rate": 2.491975218650351e-06, "loss": 0.9689, "step": 7289 }, { "epoch": 1.0320662561053302, "grad_norm": 10.073551151420881, "learning_rate": 2.491402022167136e-06, "loss": 1.0183, "step": 7290 }, { "epoch": 1.0322078289799674, "grad_norm": 11.743574617628344, "learning_rate": 2.4908288261359108e-06, "loss": 1.0932, "step": 7291 }, { "epoch": 1.0323494018546047, "grad_norm": 9.75289287199232, "learning_rate": 2.4902556305868065e-06, "loss": 1.0286, "step": 7292 }, { "epoch": 1.032490974729242, "grad_norm": 10.267832804585868, "learning_rate": 2.4896824355499565e-06, "loss": 1.0464, "step": 7293 }, { "epoch": 1.0326325476038791, "grad_norm": 10.294089048970994, "learning_rate": 2.489109241055493e-06, "loss": 1.0827, "step": 7294 }, { "epoch": 1.0327741204785164, "grad_norm": 11.620779932737612, "learning_rate": 2.4885360471335483e-06, "loss": 1.0525, "step": 7295 }, { "epoch": 1.0329156933531536, "grad_norm": 10.42995339958336, "learning_rate": 2.4879628538142557e-06, "loss": 1.0662, "step": 7296 }, { "epoch": 1.0330572662277908, "grad_norm": 10.621031614727222, "learning_rate": 2.4873896611277467e-06, "loss": 1.1333, "step": 7297 }, { "epoch": 1.033198839102428, "grad_norm": 9.380692444967499, "learning_rate": 2.4868164691041537e-06, "loss": 1.1156, "step": 7298 }, { "epoch": 1.0333404119770653, "grad_norm": 8.694695655625061, "learning_rate": 2.4862432777736094e-06, "loss": 0.9902, "step": 7299 }, { "epoch": 1.0334819848517025, "grad_norm": 9.050077777338652, "learning_rate": 2.4856700871662452e-06, "loss": 0.9603, "step": 7300 }, { "epoch": 1.0336235577263397, "grad_norm": 9.74959309046729, "learning_rate": 2.4850968973121945e-06, "loss": 1.1073, "step": 7301 }, { "epoch": 1.033765130600977, "grad_norm": 8.970964430863843, "learning_rate": 2.4845237082415887e-06, "loss": 1.0156, "step": 7302 }, { "epoch": 1.033906703475614, "grad_norm": 8.618972662579965, "learning_rate": 2.48395051998456e-06, "loss": 1.0402, "step": 7303 }, { "epoch": 1.0340482763502512, "grad_norm": 9.235620818035528, "learning_rate": 2.48337733257124e-06, "loss": 1.0212, "step": 7304 }, { "epoch": 1.0341898492248884, "grad_norm": 9.57262248907578, "learning_rate": 2.482804146031762e-06, "loss": 0.9959, "step": 7305 }, { "epoch": 1.0343314220995257, "grad_norm": 8.340188627236834, "learning_rate": 2.482230960396256e-06, "loss": 0.9693, "step": 7306 }, { "epoch": 1.034472994974163, "grad_norm": 9.287712544490276, "learning_rate": 2.4816577756948564e-06, "loss": 1.0662, "step": 7307 }, { "epoch": 1.0346145678488001, "grad_norm": 8.462053304628322, "learning_rate": 2.481084591957693e-06, "loss": 1.1267, "step": 7308 }, { "epoch": 1.0347561407234374, "grad_norm": 11.214588636727413, "learning_rate": 2.480511409214899e-06, "loss": 1.1207, "step": 7309 }, { "epoch": 1.0348977135980746, "grad_norm": 9.54198214302319, "learning_rate": 2.479938227496605e-06, "loss": 1.0575, "step": 7310 }, { "epoch": 1.0350392864727118, "grad_norm": 9.402952712544899, "learning_rate": 2.479365046832944e-06, "loss": 1.0026, "step": 7311 }, { "epoch": 1.035180859347349, "grad_norm": 8.191436520121705, "learning_rate": 2.478791867254047e-06, "loss": 1.0332, "step": 7312 }, { "epoch": 1.0353224322219863, "grad_norm": 11.444758545681307, "learning_rate": 2.478218688790047e-06, "loss": 0.9679, "step": 7313 }, { "epoch": 1.0354640050966235, "grad_norm": 8.623665084365504, "learning_rate": 2.477645511471073e-06, "loss": 0.9727, "step": 7314 }, { "epoch": 1.0356055779712607, "grad_norm": 8.860463877777198, "learning_rate": 2.4770723353272576e-06, "loss": 1.066, "step": 7315 }, { "epoch": 1.035747150845898, "grad_norm": 11.712251453347701, "learning_rate": 2.4764991603887325e-06, "loss": 1.0889, "step": 7316 }, { "epoch": 1.0358887237205352, "grad_norm": 9.75037285507525, "learning_rate": 2.475925986685629e-06, "loss": 1.1371, "step": 7317 }, { "epoch": 1.0360302965951724, "grad_norm": 8.850869053127239, "learning_rate": 2.4753528142480784e-06, "loss": 0.991, "step": 7318 }, { "epoch": 1.0361718694698097, "grad_norm": 8.93911664519959, "learning_rate": 2.4747796431062124e-06, "loss": 1.0719, "step": 7319 }, { "epoch": 1.036313442344447, "grad_norm": 8.0298306290756, "learning_rate": 2.4742064732901618e-06, "loss": 1.0858, "step": 7320 }, { "epoch": 1.0364550152190841, "grad_norm": 8.31682640437022, "learning_rate": 2.473633304830057e-06, "loss": 1.0728, "step": 7321 }, { "epoch": 1.0365965880937213, "grad_norm": 9.66277914692689, "learning_rate": 2.4730601377560305e-06, "loss": 1.0537, "step": 7322 }, { "epoch": 1.0367381609683584, "grad_norm": 7.129602919726228, "learning_rate": 2.4724869720982124e-06, "loss": 0.9731, "step": 7323 }, { "epoch": 1.0368797338429956, "grad_norm": 9.141506785058443, "learning_rate": 2.471913807886733e-06, "loss": 1.0211, "step": 7324 }, { "epoch": 1.0370213067176328, "grad_norm": 8.8534637052443, "learning_rate": 2.4713406451517247e-06, "loss": 1.0429, "step": 7325 }, { "epoch": 1.03716287959227, "grad_norm": 9.690250129423944, "learning_rate": 2.4707674839233168e-06, "loss": 1.1006, "step": 7326 }, { "epoch": 1.0373044524669073, "grad_norm": 9.407754872742245, "learning_rate": 2.4701943242316405e-06, "loss": 1.0018, "step": 7327 }, { "epoch": 1.0374460253415445, "grad_norm": 8.519139451817402, "learning_rate": 2.469621166106826e-06, "loss": 1.0132, "step": 7328 }, { "epoch": 1.0375875982161817, "grad_norm": 10.000706075536733, "learning_rate": 2.4690480095790046e-06, "loss": 0.9392, "step": 7329 }, { "epoch": 1.037729171090819, "grad_norm": 10.873054768891961, "learning_rate": 2.468474854678306e-06, "loss": 0.9717, "step": 7330 }, { "epoch": 1.0378707439654562, "grad_norm": 8.878506679581594, "learning_rate": 2.4679017014348606e-06, "loss": 1.0104, "step": 7331 }, { "epoch": 1.0380123168400934, "grad_norm": 7.3275094170251185, "learning_rate": 2.467328549878799e-06, "loss": 0.9902, "step": 7332 }, { "epoch": 1.0381538897147307, "grad_norm": 7.872470510021129, "learning_rate": 2.4667554000402513e-06, "loss": 0.9466, "step": 7333 }, { "epoch": 1.038295462589368, "grad_norm": 10.171192058366787, "learning_rate": 2.4661822519493485e-06, "loss": 0.9781, "step": 7334 }, { "epoch": 1.0384370354640051, "grad_norm": 10.240633895446434, "learning_rate": 2.465609105636218e-06, "loss": 1.0264, "step": 7335 }, { "epoch": 1.0385786083386424, "grad_norm": 9.741452113634114, "learning_rate": 2.465035961130992e-06, "loss": 1.0165, "step": 7336 }, { "epoch": 1.0387201812132796, "grad_norm": 10.279005367980915, "learning_rate": 2.4644628184637987e-06, "loss": 1.0178, "step": 7337 }, { "epoch": 1.0388617540879168, "grad_norm": 9.043027807582533, "learning_rate": 2.4638896776647684e-06, "loss": 1.0256, "step": 7338 }, { "epoch": 1.039003326962554, "grad_norm": 9.66011202224004, "learning_rate": 2.463316538764031e-06, "loss": 1.1664, "step": 7339 }, { "epoch": 1.0391448998371913, "grad_norm": 8.868530064248995, "learning_rate": 2.462743401791716e-06, "loss": 1.0465, "step": 7340 }, { "epoch": 1.0392864727118285, "grad_norm": 8.067198338077409, "learning_rate": 2.462170266777953e-06, "loss": 1.0009, "step": 7341 }, { "epoch": 1.0394280455864657, "grad_norm": 9.768803779517471, "learning_rate": 2.4615971337528704e-06, "loss": 0.9289, "step": 7342 }, { "epoch": 1.039569618461103, "grad_norm": 10.1133755487589, "learning_rate": 2.461024002746598e-06, "loss": 0.9745, "step": 7343 }, { "epoch": 1.03971119133574, "grad_norm": 8.309729092769201, "learning_rate": 2.4604508737892653e-06, "loss": 0.9507, "step": 7344 }, { "epoch": 1.0398527642103772, "grad_norm": 10.395034015227385, "learning_rate": 2.459877746911e-06, "loss": 1.2991, "step": 7345 }, { "epoch": 1.0399943370850144, "grad_norm": 9.754591032104488, "learning_rate": 2.4593046221419317e-06, "loss": 1.159, "step": 7346 }, { "epoch": 1.0401359099596517, "grad_norm": 11.094382684423568, "learning_rate": 2.4587314995121893e-06, "loss": 1.0649, "step": 7347 }, { "epoch": 1.040277482834289, "grad_norm": 8.311428373857861, "learning_rate": 2.458158379051901e-06, "loss": 1.0387, "step": 7348 }, { "epoch": 1.0404190557089261, "grad_norm": 10.406479841563673, "learning_rate": 2.4575852607911956e-06, "loss": 1.1433, "step": 7349 }, { "epoch": 1.0405606285835634, "grad_norm": 8.129972843439184, "learning_rate": 2.457012144760201e-06, "loss": 0.946, "step": 7350 }, { "epoch": 1.0407022014582006, "grad_norm": 7.983931377949258, "learning_rate": 2.4564390309890463e-06, "loss": 1.0915, "step": 7351 }, { "epoch": 1.0408437743328378, "grad_norm": 8.98471212003383, "learning_rate": 2.455865919507859e-06, "loss": 1.0293, "step": 7352 }, { "epoch": 1.040985347207475, "grad_norm": 9.251211550987946, "learning_rate": 2.4552928103467677e-06, "loss": 1.1346, "step": 7353 }, { "epoch": 1.0411269200821123, "grad_norm": 10.349844344438305, "learning_rate": 2.4547197035359e-06, "loss": 1.0218, "step": 7354 }, { "epoch": 1.0412684929567495, "grad_norm": 8.896466580319744, "learning_rate": 2.454146599105384e-06, "loss": 1.0052, "step": 7355 }, { "epoch": 1.0414100658313867, "grad_norm": 10.809884697707666, "learning_rate": 2.4535734970853466e-06, "loss": 1.1439, "step": 7356 }, { "epoch": 1.041551638706024, "grad_norm": 9.177263227093112, "learning_rate": 2.453000397505916e-06, "loss": 1.0017, "step": 7357 }, { "epoch": 1.0416932115806612, "grad_norm": 8.239271181336937, "learning_rate": 2.4524273003972194e-06, "loss": 1.0709, "step": 7358 }, { "epoch": 1.0418347844552984, "grad_norm": 7.7441253546507705, "learning_rate": 2.451854205789384e-06, "loss": 0.9886, "step": 7359 }, { "epoch": 1.0419763573299357, "grad_norm": 10.203309692767512, "learning_rate": 2.4512811137125374e-06, "loss": 1.0331, "step": 7360 }, { "epoch": 1.0421179302045729, "grad_norm": 10.221731888257008, "learning_rate": 2.4507080241968055e-06, "loss": 0.9936, "step": 7361 }, { "epoch": 1.0422595030792101, "grad_norm": 11.09669133267727, "learning_rate": 2.450134937272317e-06, "loss": 1.1567, "step": 7362 }, { "epoch": 1.0424010759538473, "grad_norm": 9.745252186862425, "learning_rate": 2.449561852969197e-06, "loss": 0.9645, "step": 7363 }, { "epoch": 1.0425426488284844, "grad_norm": 8.504579544573645, "learning_rate": 2.448988771317573e-06, "loss": 1.0211, "step": 7364 }, { "epoch": 1.0426842217031216, "grad_norm": 8.022961090712451, "learning_rate": 2.448415692347572e-06, "loss": 1.0442, "step": 7365 }, { "epoch": 1.0428257945777588, "grad_norm": 10.282160107063358, "learning_rate": 2.4478426160893197e-06, "loss": 1.1275, "step": 7366 }, { "epoch": 1.042967367452396, "grad_norm": 8.16393120034968, "learning_rate": 2.4472695425729412e-06, "loss": 1.1157, "step": 7367 }, { "epoch": 1.0431089403270333, "grad_norm": 8.89120765747817, "learning_rate": 2.4466964718285636e-06, "loss": 1.0701, "step": 7368 }, { "epoch": 1.0432505132016705, "grad_norm": 9.635308628335745, "learning_rate": 2.446123403886313e-06, "loss": 1.081, "step": 7369 }, { "epoch": 1.0433920860763077, "grad_norm": 8.584886231421201, "learning_rate": 2.445550338776315e-06, "loss": 0.8563, "step": 7370 }, { "epoch": 1.043533658950945, "grad_norm": 7.9834294365797245, "learning_rate": 2.4449772765286947e-06, "loss": 0.9142, "step": 7371 }, { "epoch": 1.0436752318255822, "grad_norm": 10.942927643243989, "learning_rate": 2.4444042171735784e-06, "loss": 1.1677, "step": 7372 }, { "epoch": 1.0438168047002194, "grad_norm": 8.37626160186342, "learning_rate": 2.4438311607410905e-06, "loss": 1.0234, "step": 7373 }, { "epoch": 1.0439583775748567, "grad_norm": 9.574424339897918, "learning_rate": 2.443258107261357e-06, "loss": 0.9915, "step": 7374 }, { "epoch": 1.0440999504494939, "grad_norm": 13.070961579818592, "learning_rate": 2.4426850567645022e-06, "loss": 1.0307, "step": 7375 }, { "epoch": 1.0442415233241311, "grad_norm": 10.370333437879081, "learning_rate": 2.442112009280652e-06, "loss": 1.0932, "step": 7376 }, { "epoch": 1.0443830961987683, "grad_norm": 9.21237117102722, "learning_rate": 2.4415389648399294e-06, "loss": 1.0574, "step": 7377 }, { "epoch": 1.0445246690734056, "grad_norm": 7.347513735908579, "learning_rate": 2.44096592347246e-06, "loss": 0.952, "step": 7378 }, { "epoch": 1.0446662419480428, "grad_norm": 8.85267819328409, "learning_rate": 2.440392885208368e-06, "loss": 1.0987, "step": 7379 }, { "epoch": 1.04480781482268, "grad_norm": 9.278448125057066, "learning_rate": 2.4398198500777777e-06, "loss": 1.0547, "step": 7380 }, { "epoch": 1.0449493876973173, "grad_norm": 8.58921844301995, "learning_rate": 2.4392468181108127e-06, "loss": 0.9522, "step": 7381 }, { "epoch": 1.0450909605719545, "grad_norm": 12.089131895440282, "learning_rate": 2.438673789337598e-06, "loss": 1.078, "step": 7382 }, { "epoch": 1.0452325334465917, "grad_norm": 8.988046549385789, "learning_rate": 2.4381007637882555e-06, "loss": 1.1976, "step": 7383 }, { "epoch": 1.045374106321229, "grad_norm": 8.456043804217646, "learning_rate": 2.4375277414929098e-06, "loss": 1.1808, "step": 7384 }, { "epoch": 1.045515679195866, "grad_norm": 9.133745157280476, "learning_rate": 2.4369547224816843e-06, "loss": 1.0313, "step": 7385 }, { "epoch": 1.0456572520705032, "grad_norm": 8.002178372394187, "learning_rate": 2.436381706784702e-06, "loss": 0.9878, "step": 7386 }, { "epoch": 1.0457988249451404, "grad_norm": 9.547720064761984, "learning_rate": 2.435808694432087e-06, "loss": 0.9941, "step": 7387 }, { "epoch": 1.0459403978197777, "grad_norm": 8.642122825224687, "learning_rate": 2.4352356854539607e-06, "loss": 1.0064, "step": 7388 }, { "epoch": 1.0460819706944149, "grad_norm": 10.086612977235673, "learning_rate": 2.434662679880445e-06, "loss": 1.1187, "step": 7389 }, { "epoch": 1.0462235435690521, "grad_norm": 10.648991324785252, "learning_rate": 2.4340896777416636e-06, "loss": 1.049, "step": 7390 }, { "epoch": 1.0463651164436893, "grad_norm": 8.315536059305694, "learning_rate": 2.4335166790677395e-06, "loss": 0.9957, "step": 7391 }, { "epoch": 1.0465066893183266, "grad_norm": 8.231352104648611, "learning_rate": 2.4329436838887936e-06, "loss": 1.0116, "step": 7392 }, { "epoch": 1.0466482621929638, "grad_norm": 7.912954028754473, "learning_rate": 2.432370692234948e-06, "loss": 1.0076, "step": 7393 }, { "epoch": 1.046789835067601, "grad_norm": 10.820444352761625, "learning_rate": 2.431797704136325e-06, "loss": 1.0035, "step": 7394 }, { "epoch": 1.0469314079422383, "grad_norm": 7.989490997563392, "learning_rate": 2.431224719623046e-06, "loss": 0.9078, "step": 7395 }, { "epoch": 1.0470729808168755, "grad_norm": 7.90563407579562, "learning_rate": 2.430651738725232e-06, "loss": 1.0209, "step": 7396 }, { "epoch": 1.0472145536915127, "grad_norm": 9.444083841458216, "learning_rate": 2.430078761473005e-06, "loss": 1.1117, "step": 7397 }, { "epoch": 1.04735612656615, "grad_norm": 8.365569769499228, "learning_rate": 2.429505787896485e-06, "loss": 1.0587, "step": 7398 }, { "epoch": 1.0474976994407872, "grad_norm": 8.880494472036515, "learning_rate": 2.428932818025793e-06, "loss": 1.115, "step": 7399 }, { "epoch": 1.0476392723154244, "grad_norm": 10.051198358401917, "learning_rate": 2.42835985189105e-06, "loss": 1.1142, "step": 7400 }, { "epoch": 1.0477808451900616, "grad_norm": 10.293889679508263, "learning_rate": 2.427786889522376e-06, "loss": 1.0585, "step": 7401 }, { "epoch": 1.0479224180646989, "grad_norm": 8.739877131316424, "learning_rate": 2.427213930949891e-06, "loss": 0.9236, "step": 7402 }, { "epoch": 1.048063990939336, "grad_norm": 7.40055248156992, "learning_rate": 2.426640976203716e-06, "loss": 0.9182, "step": 7403 }, { "epoch": 1.0482055638139733, "grad_norm": 7.439979460493059, "learning_rate": 2.4260680253139696e-06, "loss": 1.0562, "step": 7404 }, { "epoch": 1.0483471366886103, "grad_norm": 10.835450968033774, "learning_rate": 2.425495078310772e-06, "loss": 1.0225, "step": 7405 }, { "epoch": 1.0484887095632476, "grad_norm": 8.07585563732428, "learning_rate": 2.424922135224243e-06, "loss": 0.9842, "step": 7406 }, { "epoch": 1.0486302824378848, "grad_norm": 9.526546530939338, "learning_rate": 2.4243491960845004e-06, "loss": 1.0997, "step": 7407 }, { "epoch": 1.048771855312522, "grad_norm": 10.040784826113764, "learning_rate": 2.4237762609216666e-06, "loss": 1.0446, "step": 7408 }, { "epoch": 1.0489134281871593, "grad_norm": 8.212250318752563, "learning_rate": 2.423203329765856e-06, "loss": 0.9347, "step": 7409 }, { "epoch": 1.0490550010617965, "grad_norm": 10.446095838825421, "learning_rate": 2.4226304026471894e-06, "loss": 1.051, "step": 7410 }, { "epoch": 1.0491965739364337, "grad_norm": 9.696006928318683, "learning_rate": 2.4220574795957844e-06, "loss": 1.1205, "step": 7411 }, { "epoch": 1.049338146811071, "grad_norm": 9.253727754583934, "learning_rate": 2.4214845606417604e-06, "loss": 1.081, "step": 7412 }, { "epoch": 1.0494797196857082, "grad_norm": 7.838217436885164, "learning_rate": 2.4209116458152334e-06, "loss": 0.9763, "step": 7413 }, { "epoch": 1.0496212925603454, "grad_norm": 8.383122391318883, "learning_rate": 2.4203387351463228e-06, "loss": 0.9703, "step": 7414 }, { "epoch": 1.0497628654349827, "grad_norm": 8.015021050438037, "learning_rate": 2.4197658286651456e-06, "loss": 1.024, "step": 7415 }, { "epoch": 1.0499044383096199, "grad_norm": 8.809010782118085, "learning_rate": 2.419192926401819e-06, "loss": 1.0705, "step": 7416 }, { "epoch": 1.050046011184257, "grad_norm": 11.197766926082155, "learning_rate": 2.41862002838646e-06, "loss": 1.0028, "step": 7417 }, { "epoch": 1.0501875840588943, "grad_norm": 9.275878173712432, "learning_rate": 2.4180471346491864e-06, "loss": 1.0607, "step": 7418 }, { "epoch": 1.0503291569335316, "grad_norm": 9.164316835638369, "learning_rate": 2.4174742452201123e-06, "loss": 1.0635, "step": 7419 }, { "epoch": 1.0504707298081688, "grad_norm": 8.922479558928234, "learning_rate": 2.4169013601293563e-06, "loss": 1.0073, "step": 7420 }, { "epoch": 1.050612302682806, "grad_norm": 8.78936588018064, "learning_rate": 2.4163284794070333e-06, "loss": 1.0175, "step": 7421 }, { "epoch": 1.0507538755574433, "grad_norm": 8.696424990603179, "learning_rate": 2.4157556030832606e-06, "loss": 0.9943, "step": 7422 }, { "epoch": 1.0508954484320805, "grad_norm": 8.219926525075314, "learning_rate": 2.415182731188152e-06, "loss": 0.9774, "step": 7423 }, { "epoch": 1.0510370213067177, "grad_norm": 9.341655789952293, "learning_rate": 2.4146098637518248e-06, "loss": 1.0768, "step": 7424 }, { "epoch": 1.051178594181355, "grad_norm": 11.264832954234892, "learning_rate": 2.414037000804393e-06, "loss": 0.9999, "step": 7425 }, { "epoch": 1.0513201670559922, "grad_norm": 9.55175376886544, "learning_rate": 2.413464142375972e-06, "loss": 0.9889, "step": 7426 }, { "epoch": 1.0514617399306292, "grad_norm": 11.649746714445863, "learning_rate": 2.412891288496677e-06, "loss": 0.9963, "step": 7427 }, { "epoch": 1.0516033128052664, "grad_norm": 10.661961789427814, "learning_rate": 2.4123184391966216e-06, "loss": 1.0136, "step": 7428 }, { "epoch": 1.0517448856799037, "grad_norm": 7.904336931576289, "learning_rate": 2.411745594505921e-06, "loss": 0.9287, "step": 7429 }, { "epoch": 1.0518864585545409, "grad_norm": 10.12675140680247, "learning_rate": 2.411172754454688e-06, "loss": 0.9755, "step": 7430 }, { "epoch": 1.0520280314291781, "grad_norm": 8.39187561603367, "learning_rate": 2.410599919073037e-06, "loss": 1.0251, "step": 7431 }, { "epoch": 1.0521696043038153, "grad_norm": 9.198270751312615, "learning_rate": 2.410027088391082e-06, "loss": 1.0477, "step": 7432 }, { "epoch": 1.0523111771784526, "grad_norm": 10.456864901086286, "learning_rate": 2.4094542624389357e-06, "loss": 1.016, "step": 7433 }, { "epoch": 1.0524527500530898, "grad_norm": 10.90719294365678, "learning_rate": 2.4088814412467117e-06, "loss": 1.144, "step": 7434 }, { "epoch": 1.052594322927727, "grad_norm": 9.978391570781524, "learning_rate": 2.4083086248445213e-06, "loss": 1.103, "step": 7435 }, { "epoch": 1.0527358958023643, "grad_norm": 8.334473442923722, "learning_rate": 2.4077358132624786e-06, "loss": 1.034, "step": 7436 }, { "epoch": 1.0528774686770015, "grad_norm": 9.319795144903601, "learning_rate": 2.4071630065306956e-06, "loss": 1.109, "step": 7437 }, { "epoch": 1.0530190415516387, "grad_norm": 10.292705241527363, "learning_rate": 2.406590204679284e-06, "loss": 1.0838, "step": 7438 }, { "epoch": 1.053160614426276, "grad_norm": 12.020376072235376, "learning_rate": 2.406017407738356e-06, "loss": 0.9354, "step": 7439 }, { "epoch": 1.0533021873009132, "grad_norm": 10.526694379322697, "learning_rate": 2.4054446157380237e-06, "loss": 1.0142, "step": 7440 }, { "epoch": 1.0534437601755504, "grad_norm": 9.033335936523663, "learning_rate": 2.404871828708396e-06, "loss": 1.102, "step": 7441 }, { "epoch": 1.0535853330501876, "grad_norm": 9.269411493774454, "learning_rate": 2.4042990466795857e-06, "loss": 1.065, "step": 7442 }, { "epoch": 1.0537269059248249, "grad_norm": 10.041772950006996, "learning_rate": 2.4037262696817034e-06, "loss": 1.0405, "step": 7443 }, { "epoch": 1.053868478799462, "grad_norm": 7.9139371750935785, "learning_rate": 2.403153497744859e-06, "loss": 1.0156, "step": 7444 }, { "epoch": 1.0540100516740993, "grad_norm": 11.498725488775307, "learning_rate": 2.402580730899163e-06, "loss": 0.9957, "step": 7445 }, { "epoch": 1.0541516245487366, "grad_norm": 9.634300197492818, "learning_rate": 2.4020079691747256e-06, "loss": 1.0286, "step": 7446 }, { "epoch": 1.0542931974233736, "grad_norm": 9.790758672508519, "learning_rate": 2.4014352126016562e-06, "loss": 1.1437, "step": 7447 }, { "epoch": 1.0544347702980108, "grad_norm": 11.123388966699501, "learning_rate": 2.4008624612100636e-06, "loss": 1.0142, "step": 7448 }, { "epoch": 1.054576343172648, "grad_norm": 8.86699003428257, "learning_rate": 2.400289715030058e-06, "loss": 1.0012, "step": 7449 }, { "epoch": 1.0547179160472853, "grad_norm": 9.74539271338942, "learning_rate": 2.3997169740917485e-06, "loss": 1.0673, "step": 7450 }, { "epoch": 1.0548594889219225, "grad_norm": 9.064014808070317, "learning_rate": 2.3991442384252417e-06, "loss": 0.9631, "step": 7451 }, { "epoch": 1.0550010617965597, "grad_norm": 8.205789652780748, "learning_rate": 2.3985715080606473e-06, "loss": 0.9411, "step": 7452 }, { "epoch": 1.055142634671197, "grad_norm": 10.336576342573002, "learning_rate": 2.3979987830280733e-06, "loss": 1.0801, "step": 7453 }, { "epoch": 1.0552842075458342, "grad_norm": 10.655507528890503, "learning_rate": 2.3974260633576274e-06, "loss": 0.9103, "step": 7454 }, { "epoch": 1.0554257804204714, "grad_norm": 10.800628672068774, "learning_rate": 2.3968533490794165e-06, "loss": 1.0545, "step": 7455 }, { "epoch": 1.0555673532951086, "grad_norm": 8.01789808854545, "learning_rate": 2.3962806402235484e-06, "loss": 0.9241, "step": 7456 }, { "epoch": 1.0557089261697459, "grad_norm": 9.37761722907368, "learning_rate": 2.3957079368201293e-06, "loss": 1.0685, "step": 7457 }, { "epoch": 1.055850499044383, "grad_norm": 9.14335813171532, "learning_rate": 2.395135238899266e-06, "loss": 0.9655, "step": 7458 }, { "epoch": 1.0559920719190203, "grad_norm": 10.899534415004487, "learning_rate": 2.3945625464910654e-06, "loss": 1.048, "step": 7459 }, { "epoch": 1.0561336447936576, "grad_norm": 8.692197997645557, "learning_rate": 2.3939898596256334e-06, "loss": 1.0757, "step": 7460 }, { "epoch": 1.0562752176682948, "grad_norm": 9.893519166433249, "learning_rate": 2.3934171783330763e-06, "loss": 1.0491, "step": 7461 }, { "epoch": 1.056416790542932, "grad_norm": 8.223221291452829, "learning_rate": 2.3928445026434973e-06, "loss": 0.9495, "step": 7462 }, { "epoch": 1.0565583634175693, "grad_norm": 10.593648454297613, "learning_rate": 2.3922718325870034e-06, "loss": 1.1302, "step": 7463 }, { "epoch": 1.0566999362922065, "grad_norm": 10.237682477840863, "learning_rate": 2.391699168193698e-06, "loss": 1.0432, "step": 7464 }, { "epoch": 1.0568415091668437, "grad_norm": 9.583816206556483, "learning_rate": 2.3911265094936874e-06, "loss": 1.1287, "step": 7465 }, { "epoch": 1.056983082041481, "grad_norm": 9.153815769374454, "learning_rate": 2.390553856517075e-06, "loss": 0.9714, "step": 7466 }, { "epoch": 1.0571246549161182, "grad_norm": 7.5271466105152625, "learning_rate": 2.3899812092939644e-06, "loss": 0.9705, "step": 7467 }, { "epoch": 1.0572662277907552, "grad_norm": 11.066749575507353, "learning_rate": 2.38940856785446e-06, "loss": 0.969, "step": 7468 }, { "epoch": 1.0574078006653924, "grad_norm": 11.928444828245961, "learning_rate": 2.3888359322286644e-06, "loss": 1.1624, "step": 7469 }, { "epoch": 1.0575493735400296, "grad_norm": 9.401907187621479, "learning_rate": 2.3882633024466813e-06, "loss": 1.1833, "step": 7470 }, { "epoch": 1.0576909464146669, "grad_norm": 9.678072372061866, "learning_rate": 2.3876906785386133e-06, "loss": 0.9596, "step": 7471 }, { "epoch": 1.057832519289304, "grad_norm": 10.176842480050949, "learning_rate": 2.3871180605345623e-06, "loss": 1.1021, "step": 7472 }, { "epoch": 1.0579740921639413, "grad_norm": 8.927904219629756, "learning_rate": 2.3865454484646307e-06, "loss": 0.938, "step": 7473 }, { "epoch": 1.0581156650385786, "grad_norm": 8.439479567405346, "learning_rate": 2.3859728423589197e-06, "loss": 1.0449, "step": 7474 }, { "epoch": 1.0582572379132158, "grad_norm": 11.674893848010125, "learning_rate": 2.385400242247532e-06, "loss": 1.0956, "step": 7475 }, { "epoch": 1.058398810787853, "grad_norm": 10.11644882981521, "learning_rate": 2.384827648160568e-06, "loss": 1.0673, "step": 7476 }, { "epoch": 1.0585403836624903, "grad_norm": 7.42717584139516, "learning_rate": 2.3842550601281288e-06, "loss": 0.9683, "step": 7477 }, { "epoch": 1.0586819565371275, "grad_norm": 9.009872531814796, "learning_rate": 2.3836824781803146e-06, "loss": 1.0957, "step": 7478 }, { "epoch": 1.0588235294117647, "grad_norm": 8.403560846444936, "learning_rate": 2.3831099023472253e-06, "loss": 0.9979, "step": 7479 }, { "epoch": 1.058965102286402, "grad_norm": 9.178549630275455, "learning_rate": 2.382537332658962e-06, "loss": 1.1397, "step": 7480 }, { "epoch": 1.0591066751610392, "grad_norm": 8.419751750702453, "learning_rate": 2.3819647691456226e-06, "loss": 1.0023, "step": 7481 }, { "epoch": 1.0592482480356764, "grad_norm": 8.335860619373683, "learning_rate": 2.3813922118373094e-06, "loss": 0.9092, "step": 7482 }, { "epoch": 1.0593898209103136, "grad_norm": 9.399728941560213, "learning_rate": 2.3808196607641176e-06, "loss": 1.0917, "step": 7483 }, { "epoch": 1.0595313937849509, "grad_norm": 8.07240006957663, "learning_rate": 2.3802471159561473e-06, "loss": 0.9735, "step": 7484 }, { "epoch": 1.059672966659588, "grad_norm": 9.443841887556566, "learning_rate": 2.379674577443497e-06, "loss": 0.9612, "step": 7485 }, { "epoch": 1.0598145395342253, "grad_norm": 9.864320799178298, "learning_rate": 2.3791020452562647e-06, "loss": 1.0167, "step": 7486 }, { "epoch": 1.0599561124088626, "grad_norm": 8.57554935219281, "learning_rate": 2.378529519424547e-06, "loss": 1.014, "step": 7487 }, { "epoch": 1.0600976852834996, "grad_norm": 9.815218992195033, "learning_rate": 2.377956999978442e-06, "loss": 1.0771, "step": 7488 }, { "epoch": 1.0602392581581368, "grad_norm": 7.617897853495991, "learning_rate": 2.3773844869480473e-06, "loss": 1.0386, "step": 7489 }, { "epoch": 1.060380831032774, "grad_norm": 8.427827391216127, "learning_rate": 2.376811980363458e-06, "loss": 0.9884, "step": 7490 }, { "epoch": 1.0605224039074113, "grad_norm": 44.78719893537173, "learning_rate": 2.3762394802547717e-06, "loss": 1.0641, "step": 7491 }, { "epoch": 1.0606639767820485, "grad_norm": 9.825761714071778, "learning_rate": 2.375666986652083e-06, "loss": 1.0613, "step": 7492 }, { "epoch": 1.0608055496566857, "grad_norm": 8.474914784410952, "learning_rate": 2.375094499585489e-06, "loss": 0.9479, "step": 7493 }, { "epoch": 1.060947122531323, "grad_norm": 9.035022831915931, "learning_rate": 2.3745220190850834e-06, "loss": 0.8919, "step": 7494 }, { "epoch": 1.0610886954059602, "grad_norm": 8.373772374607414, "learning_rate": 2.3739495451809617e-06, "loss": 0.9939, "step": 7495 }, { "epoch": 1.0612302682805974, "grad_norm": 9.37769289123942, "learning_rate": 2.3733770779032185e-06, "loss": 0.9442, "step": 7496 }, { "epoch": 1.0613718411552346, "grad_norm": 11.435227131791658, "learning_rate": 2.372804617281948e-06, "loss": 1.1097, "step": 7497 }, { "epoch": 1.0615134140298719, "grad_norm": 10.723540278335053, "learning_rate": 2.3722321633472435e-06, "loss": 1.1448, "step": 7498 }, { "epoch": 1.061654986904509, "grad_norm": 8.292190867456213, "learning_rate": 2.3716597161291993e-06, "loss": 0.9924, "step": 7499 }, { "epoch": 1.0617965597791463, "grad_norm": 9.894225900835185, "learning_rate": 2.371087275657908e-06, "loss": 1.0008, "step": 7500 }, { "epoch": 1.0619381326537836, "grad_norm": 9.736379254818104, "learning_rate": 2.3705148419634627e-06, "loss": 1.1855, "step": 7501 }, { "epoch": 1.0620797055284208, "grad_norm": 10.372292406602622, "learning_rate": 2.3699424150759553e-06, "loss": 1.0181, "step": 7502 }, { "epoch": 1.062221278403058, "grad_norm": 9.725822009965597, "learning_rate": 2.369369995025479e-06, "loss": 1.0559, "step": 7503 }, { "epoch": 1.0623628512776953, "grad_norm": 9.425636363111478, "learning_rate": 2.3687975818421236e-06, "loss": 0.9781, "step": 7504 }, { "epoch": 1.0625044241523325, "grad_norm": 7.006820081101397, "learning_rate": 2.3682251755559823e-06, "loss": 0.9362, "step": 7505 }, { "epoch": 1.0626459970269697, "grad_norm": 7.867854918701063, "learning_rate": 2.367652776197145e-06, "loss": 1.0182, "step": 7506 }, { "epoch": 1.062787569901607, "grad_norm": 10.38405205049092, "learning_rate": 2.3670803837957017e-06, "loss": 1.0864, "step": 7507 }, { "epoch": 1.0629291427762442, "grad_norm": 10.040905260103932, "learning_rate": 2.3665079983817443e-06, "loss": 0.9872, "step": 7508 }, { "epoch": 1.0630707156508814, "grad_norm": 8.5514905556082, "learning_rate": 2.3659356199853617e-06, "loss": 0.9157, "step": 7509 }, { "epoch": 1.0632122885255184, "grad_norm": 9.411094254567818, "learning_rate": 2.365363248636643e-06, "loss": 1.0827, "step": 7510 }, { "epoch": 1.0633538614001556, "grad_norm": 11.395897083464472, "learning_rate": 2.3647908843656787e-06, "loss": 1.0242, "step": 7511 }, { "epoch": 1.0634954342747929, "grad_norm": 10.519360087263284, "learning_rate": 2.364218527202557e-06, "loss": 1.08, "step": 7512 }, { "epoch": 1.06363700714943, "grad_norm": 11.938757066298232, "learning_rate": 2.3636461771773655e-06, "loss": 1.1544, "step": 7513 }, { "epoch": 1.0637785800240673, "grad_norm": 10.222980894969249, "learning_rate": 2.363073834320194e-06, "loss": 1.0384, "step": 7514 }, { "epoch": 1.0639201528987046, "grad_norm": 11.570729888867548, "learning_rate": 2.3625014986611282e-06, "loss": 1.0998, "step": 7515 }, { "epoch": 1.0640617257733418, "grad_norm": 8.366500869060625, "learning_rate": 2.3619291702302557e-06, "loss": 1.0219, "step": 7516 }, { "epoch": 1.064203298647979, "grad_norm": 8.409324792707098, "learning_rate": 2.3613568490576635e-06, "loss": 1.0752, "step": 7517 }, { "epoch": 1.0643448715226163, "grad_norm": 8.350524209750949, "learning_rate": 2.360784535173439e-06, "loss": 1.0525, "step": 7518 }, { "epoch": 1.0644864443972535, "grad_norm": 8.792160912191006, "learning_rate": 2.3602122286076675e-06, "loss": 1.0706, "step": 7519 }, { "epoch": 1.0646280172718907, "grad_norm": 9.499224179361471, "learning_rate": 2.359639929390435e-06, "loss": 1.0254, "step": 7520 }, { "epoch": 1.064769590146528, "grad_norm": 9.421575791398507, "learning_rate": 2.359067637551827e-06, "loss": 0.9523, "step": 7521 }, { "epoch": 1.0649111630211652, "grad_norm": 8.576775672280183, "learning_rate": 2.3584953531219278e-06, "loss": 1.0711, "step": 7522 }, { "epoch": 1.0650527358958024, "grad_norm": 9.480943942175166, "learning_rate": 2.3579230761308223e-06, "loss": 1.0658, "step": 7523 }, { "epoch": 1.0651943087704396, "grad_norm": 8.876783164970865, "learning_rate": 2.3573508066085954e-06, "loss": 1.0383, "step": 7524 }, { "epoch": 1.0653358816450769, "grad_norm": 9.358605456677292, "learning_rate": 2.3567785445853295e-06, "loss": 1.0861, "step": 7525 }, { "epoch": 1.065477454519714, "grad_norm": 8.960908977858923, "learning_rate": 2.356206290091109e-06, "loss": 0.9274, "step": 7526 }, { "epoch": 1.0656190273943513, "grad_norm": 9.123749111607712, "learning_rate": 2.355634043156017e-06, "loss": 1.053, "step": 7527 }, { "epoch": 1.0657606002689886, "grad_norm": 9.29662264954259, "learning_rate": 2.355061803810135e-06, "loss": 0.9494, "step": 7528 }, { "epoch": 1.0659021731436256, "grad_norm": 10.611773707111244, "learning_rate": 2.354489572083546e-06, "loss": 1.1368, "step": 7529 }, { "epoch": 1.0660437460182628, "grad_norm": 8.31471165655434, "learning_rate": 2.3539173480063323e-06, "loss": 1.0472, "step": 7530 }, { "epoch": 1.0661853188929, "grad_norm": 8.195277123970358, "learning_rate": 2.3533451316085744e-06, "loss": 1.0943, "step": 7531 }, { "epoch": 1.0663268917675373, "grad_norm": 8.93126882733976, "learning_rate": 2.352772922920353e-06, "loss": 0.9857, "step": 7532 }, { "epoch": 1.0664684646421745, "grad_norm": 7.963243444400351, "learning_rate": 2.3522007219717493e-06, "loss": 1.0022, "step": 7533 }, { "epoch": 1.0666100375168117, "grad_norm": 8.245094459228564, "learning_rate": 2.351628528792844e-06, "loss": 0.9591, "step": 7534 }, { "epoch": 1.066751610391449, "grad_norm": 8.538979853642822, "learning_rate": 2.3510563434137175e-06, "loss": 1.0668, "step": 7535 }, { "epoch": 1.0668931832660862, "grad_norm": 9.144042329945544, "learning_rate": 2.3504841658644465e-06, "loss": 1.0432, "step": 7536 }, { "epoch": 1.0670347561407234, "grad_norm": 7.936675261656128, "learning_rate": 2.3499119961751114e-06, "loss": 0.9805, "step": 7537 }, { "epoch": 1.0671763290153606, "grad_norm": 10.3025509092077, "learning_rate": 2.3493398343757904e-06, "loss": 1.1458, "step": 7538 }, { "epoch": 1.0673179018899979, "grad_norm": 9.632969714983286, "learning_rate": 2.3487676804965624e-06, "loss": 1.081, "step": 7539 }, { "epoch": 1.067459474764635, "grad_norm": 9.041277008332742, "learning_rate": 2.3481955345675052e-06, "loss": 1.0794, "step": 7540 }, { "epoch": 1.0676010476392723, "grad_norm": 9.211785637372715, "learning_rate": 2.347623396618695e-06, "loss": 1.0748, "step": 7541 }, { "epoch": 1.0677426205139096, "grad_norm": 9.046387127254302, "learning_rate": 2.3470512666802094e-06, "loss": 1.0845, "step": 7542 }, { "epoch": 1.0678841933885468, "grad_norm": 8.90225933414891, "learning_rate": 2.3464791447821244e-06, "loss": 0.9886, "step": 7543 }, { "epoch": 1.068025766263184, "grad_norm": 8.421351372288587, "learning_rate": 2.3459070309545165e-06, "loss": 1.0193, "step": 7544 }, { "epoch": 1.0681673391378212, "grad_norm": 10.534755044270058, "learning_rate": 2.345334925227461e-06, "loss": 1.1358, "step": 7545 }, { "epoch": 1.0683089120124585, "grad_norm": 9.497812922846123, "learning_rate": 2.344762827631034e-06, "loss": 1.0011, "step": 7546 }, { "epoch": 1.0684504848870957, "grad_norm": 8.24182926502297, "learning_rate": 2.3441907381953084e-06, "loss": 1.0364, "step": 7547 }, { "epoch": 1.068592057761733, "grad_norm": 9.148699775408875, "learning_rate": 2.3436186569503598e-06, "loss": 1.0329, "step": 7548 }, { "epoch": 1.0687336306363702, "grad_norm": 9.043820407385098, "learning_rate": 2.343046583926262e-06, "loss": 1.0228, "step": 7549 }, { "epoch": 1.0688752035110074, "grad_norm": 8.594758907396386, "learning_rate": 2.3424745191530877e-06, "loss": 1.0809, "step": 7550 }, { "epoch": 1.0690167763856444, "grad_norm": 9.344665686606108, "learning_rate": 2.3419024626609112e-06, "loss": 1.1405, "step": 7551 }, { "epoch": 1.0691583492602816, "grad_norm": 9.549499850133268, "learning_rate": 2.341330414479804e-06, "loss": 1.0882, "step": 7552 }, { "epoch": 1.0692999221349189, "grad_norm": 10.230383361017264, "learning_rate": 2.340758374639838e-06, "loss": 1.0373, "step": 7553 }, { "epoch": 1.069441495009556, "grad_norm": 8.692472283640338, "learning_rate": 2.3401863431710864e-06, "loss": 1.0514, "step": 7554 }, { "epoch": 1.0695830678841933, "grad_norm": 9.637445503605244, "learning_rate": 2.3396143201036187e-06, "loss": 1.1465, "step": 7555 }, { "epoch": 1.0697246407588306, "grad_norm": 8.662155395523184, "learning_rate": 2.3390423054675084e-06, "loss": 1.0576, "step": 7556 }, { "epoch": 1.0698662136334678, "grad_norm": 8.536790103916127, "learning_rate": 2.3384702992928228e-06, "loss": 1.0677, "step": 7557 }, { "epoch": 1.070007786508105, "grad_norm": 10.436694451044024, "learning_rate": 2.337898301609633e-06, "loss": 1.0987, "step": 7558 }, { "epoch": 1.0701493593827422, "grad_norm": 10.19896029988991, "learning_rate": 2.3373263124480086e-06, "loss": 0.9822, "step": 7559 }, { "epoch": 1.0702909322573795, "grad_norm": 9.661233054496758, "learning_rate": 2.336754331838019e-06, "loss": 0.9748, "step": 7560 }, { "epoch": 1.0704325051320167, "grad_norm": 8.559019397648099, "learning_rate": 2.3361823598097316e-06, "loss": 0.9739, "step": 7561 }, { "epoch": 1.070574078006654, "grad_norm": 9.554060259588468, "learning_rate": 2.335610396393216e-06, "loss": 1.1289, "step": 7562 }, { "epoch": 1.0707156508812912, "grad_norm": 8.463159496579914, "learning_rate": 2.3350384416185395e-06, "loss": 1.032, "step": 7563 }, { "epoch": 1.0708572237559284, "grad_norm": 10.191357590196215, "learning_rate": 2.334466495515769e-06, "loss": 0.998, "step": 7564 }, { "epoch": 1.0709987966305656, "grad_norm": 9.053430267549146, "learning_rate": 2.3338945581149713e-06, "loss": 0.9941, "step": 7565 }, { "epoch": 1.0711403695052029, "grad_norm": 8.129001981656385, "learning_rate": 2.333322629446213e-06, "loss": 1.0167, "step": 7566 }, { "epoch": 1.07128194237984, "grad_norm": 9.492179864323061, "learning_rate": 2.33275070953956e-06, "loss": 1.0354, "step": 7567 }, { "epoch": 1.0714235152544773, "grad_norm": 8.602694209156223, "learning_rate": 2.3321787984250774e-06, "loss": 0.9789, "step": 7568 }, { "epoch": 1.0715650881291146, "grad_norm": 8.797910476912437, "learning_rate": 2.33160689613283e-06, "loss": 0.9363, "step": 7569 }, { "epoch": 1.0717066610037518, "grad_norm": 6.969957460905765, "learning_rate": 2.3310350026928826e-06, "loss": 1.0095, "step": 7570 }, { "epoch": 1.0718482338783888, "grad_norm": 9.413726167487841, "learning_rate": 2.330463118135299e-06, "loss": 1.0979, "step": 7571 }, { "epoch": 1.071989806753026, "grad_norm": 8.387036683977179, "learning_rate": 2.3298912424901434e-06, "loss": 1.0238, "step": 7572 }, { "epoch": 1.0721313796276633, "grad_norm": 9.939060688423432, "learning_rate": 2.3293193757874776e-06, "loss": 1.0362, "step": 7573 }, { "epoch": 1.0722729525023005, "grad_norm": 10.570062762663268, "learning_rate": 2.3287475180573653e-06, "loss": 1.0544, "step": 7574 }, { "epoch": 1.0724145253769377, "grad_norm": 9.105055241173675, "learning_rate": 2.328175669329868e-06, "loss": 1.025, "step": 7575 }, { "epoch": 1.072556098251575, "grad_norm": 11.088854416067612, "learning_rate": 2.327603829635048e-06, "loss": 1.1367, "step": 7576 }, { "epoch": 1.0726976711262122, "grad_norm": 9.655531378119365, "learning_rate": 2.3270319990029668e-06, "loss": 1.0396, "step": 7577 }, { "epoch": 1.0728392440008494, "grad_norm": 10.027969156577688, "learning_rate": 2.326460177463683e-06, "loss": 0.9739, "step": 7578 }, { "epoch": 1.0729808168754866, "grad_norm": 10.362189783622785, "learning_rate": 2.325888365047259e-06, "loss": 1.1163, "step": 7579 }, { "epoch": 1.0731223897501239, "grad_norm": 11.000120682487847, "learning_rate": 2.325316561783754e-06, "loss": 0.9568, "step": 7580 }, { "epoch": 1.073263962624761, "grad_norm": 9.672524573678682, "learning_rate": 2.324744767703227e-06, "loss": 1.1665, "step": 7581 }, { "epoch": 1.0734055354993983, "grad_norm": 8.545132809896494, "learning_rate": 2.3241729828357367e-06, "loss": 0.9102, "step": 7582 }, { "epoch": 1.0735471083740356, "grad_norm": 8.571139912058946, "learning_rate": 2.3236012072113414e-06, "loss": 0.9142, "step": 7583 }, { "epoch": 1.0736886812486728, "grad_norm": 10.062648606239625, "learning_rate": 2.323029440860099e-06, "loss": 1.0738, "step": 7584 }, { "epoch": 1.07383025412331, "grad_norm": 8.952926021049146, "learning_rate": 2.322457683812067e-06, "loss": 1.0582, "step": 7585 }, { "epoch": 1.0739718269979472, "grad_norm": 11.544865537895447, "learning_rate": 2.3218859360973025e-06, "loss": 0.981, "step": 7586 }, { "epoch": 1.0741133998725845, "grad_norm": 9.927080173765455, "learning_rate": 2.3213141977458615e-06, "loss": 1.0019, "step": 7587 }, { "epoch": 1.0742549727472217, "grad_norm": 9.426869853993988, "learning_rate": 2.320742468787801e-06, "loss": 0.9743, "step": 7588 }, { "epoch": 1.074396545621859, "grad_norm": 8.573862375139457, "learning_rate": 2.3201707492531743e-06, "loss": 0.9915, "step": 7589 }, { "epoch": 1.0745381184964962, "grad_norm": 8.825072930658722, "learning_rate": 2.3195990391720364e-06, "loss": 0.9889, "step": 7590 }, { "epoch": 1.0746796913711334, "grad_norm": 9.107280091178687, "learning_rate": 2.319027338574443e-06, "loss": 1.1608, "step": 7591 }, { "epoch": 1.0748212642457706, "grad_norm": 8.363964040466389, "learning_rate": 2.318455647490448e-06, "loss": 0.9965, "step": 7592 }, { "epoch": 1.0749628371204076, "grad_norm": 10.239768900051644, "learning_rate": 2.3178839659501033e-06, "loss": 1.1727, "step": 7593 }, { "epoch": 1.0751044099950449, "grad_norm": 8.010476881444953, "learning_rate": 2.3173122939834635e-06, "loss": 1.0997, "step": 7594 }, { "epoch": 1.075245982869682, "grad_norm": 10.515513630655608, "learning_rate": 2.31674063162058e-06, "loss": 0.9931, "step": 7595 }, { "epoch": 1.0753875557443193, "grad_norm": 8.430326902447431, "learning_rate": 2.316168978891505e-06, "loss": 0.9655, "step": 7596 }, { "epoch": 1.0755291286189566, "grad_norm": 8.19697872420553, "learning_rate": 2.315597335826289e-06, "loss": 1.0555, "step": 7597 }, { "epoch": 1.0756707014935938, "grad_norm": 8.090950382271474, "learning_rate": 2.3150257024549847e-06, "loss": 1.0724, "step": 7598 }, { "epoch": 1.075812274368231, "grad_norm": 10.104813031369051, "learning_rate": 2.314454078807641e-06, "loss": 1.0371, "step": 7599 }, { "epoch": 1.0759538472428682, "grad_norm": 10.284888832333436, "learning_rate": 2.3138824649143076e-06, "loss": 1.1612, "step": 7600 }, { "epoch": 1.0760954201175055, "grad_norm": 10.839606909011716, "learning_rate": 2.313310860805034e-06, "loss": 1.1999, "step": 7601 }, { "epoch": 1.0762369929921427, "grad_norm": 9.432213521570846, "learning_rate": 2.31273926650987e-06, "loss": 1.0189, "step": 7602 }, { "epoch": 1.07637856586678, "grad_norm": 10.746136348061698, "learning_rate": 2.312167682058863e-06, "loss": 1.1511, "step": 7603 }, { "epoch": 1.0765201387414172, "grad_norm": 8.736116566645366, "learning_rate": 2.3115961074820604e-06, "loss": 1.01, "step": 7604 }, { "epoch": 1.0766617116160544, "grad_norm": 8.468705405930791, "learning_rate": 2.31102454280951e-06, "loss": 1.0307, "step": 7605 }, { "epoch": 1.0768032844906916, "grad_norm": 9.659659860755362, "learning_rate": 2.3104529880712586e-06, "loss": 1.1288, "step": 7606 }, { "epoch": 1.0769448573653289, "grad_norm": 9.13959955185465, "learning_rate": 2.309881443297352e-06, "loss": 0.9819, "step": 7607 }, { "epoch": 1.077086430239966, "grad_norm": 9.486434888798534, "learning_rate": 2.3093099085178366e-06, "loss": 1.0496, "step": 7608 }, { "epoch": 1.0772280031146033, "grad_norm": 10.195095857971255, "learning_rate": 2.308738383762758e-06, "loss": 1.0185, "step": 7609 }, { "epoch": 1.0773695759892405, "grad_norm": 10.244302049788686, "learning_rate": 2.308166869062159e-06, "loss": 1.034, "step": 7610 }, { "epoch": 1.0775111488638778, "grad_norm": 8.815227546373148, "learning_rate": 2.3075953644460847e-06, "loss": 1.0702, "step": 7611 }, { "epoch": 1.0776527217385148, "grad_norm": 8.303317167781172, "learning_rate": 2.3070238699445783e-06, "loss": 1.0735, "step": 7612 }, { "epoch": 1.077794294613152, "grad_norm": 8.720823636135416, "learning_rate": 2.306452385587683e-06, "loss": 1.0025, "step": 7613 }, { "epoch": 1.0779358674877892, "grad_norm": 9.55585563050503, "learning_rate": 2.305880911405442e-06, "loss": 1.0346, "step": 7614 }, { "epoch": 1.0780774403624265, "grad_norm": 10.31422141764217, "learning_rate": 2.3053094474278967e-06, "loss": 1.0165, "step": 7615 }, { "epoch": 1.0782190132370637, "grad_norm": 8.179505616709916, "learning_rate": 2.3047379936850885e-06, "loss": 0.9981, "step": 7616 }, { "epoch": 1.078360586111701, "grad_norm": 12.459715359346598, "learning_rate": 2.3041665502070584e-06, "loss": 1.0345, "step": 7617 }, { "epoch": 1.0785021589863382, "grad_norm": 9.739345108355511, "learning_rate": 2.3035951170238468e-06, "loss": 1.1414, "step": 7618 }, { "epoch": 1.0786437318609754, "grad_norm": 8.550311469935476, "learning_rate": 2.3030236941654933e-06, "loss": 0.9981, "step": 7619 }, { "epoch": 1.0787853047356126, "grad_norm": 11.159426453321418, "learning_rate": 2.302452281662038e-06, "loss": 1.1189, "step": 7620 }, { "epoch": 1.0789268776102499, "grad_norm": 9.114550734688729, "learning_rate": 2.3018808795435187e-06, "loss": 0.9502, "step": 7621 }, { "epoch": 1.079068450484887, "grad_norm": 10.070544326753613, "learning_rate": 2.3013094878399735e-06, "loss": 1.0786, "step": 7622 }, { "epoch": 1.0792100233595243, "grad_norm": 8.176592601135354, "learning_rate": 2.3007381065814405e-06, "loss": 0.9752, "step": 7623 }, { "epoch": 1.0793515962341615, "grad_norm": 12.16553760323732, "learning_rate": 2.3001667357979564e-06, "loss": 1.0666, "step": 7624 }, { "epoch": 1.0794931691087988, "grad_norm": 7.9957342695971185, "learning_rate": 2.2995953755195584e-06, "loss": 1.0594, "step": 7625 }, { "epoch": 1.079634741983436, "grad_norm": 8.24833801174847, "learning_rate": 2.2990240257762817e-06, "loss": 1.0021, "step": 7626 }, { "epoch": 1.0797763148580732, "grad_norm": 9.774932673776094, "learning_rate": 2.298452686598162e-06, "loss": 1.0486, "step": 7627 }, { "epoch": 1.0799178877327105, "grad_norm": 8.911125481589451, "learning_rate": 2.2978813580152347e-06, "loss": 1.1131, "step": 7628 }, { "epoch": 1.0800594606073477, "grad_norm": 11.145918135751186, "learning_rate": 2.297310040057533e-06, "loss": 1.0947, "step": 7629 }, { "epoch": 1.080201033481985, "grad_norm": 9.426859737435226, "learning_rate": 2.296738732755093e-06, "loss": 1.0032, "step": 7630 }, { "epoch": 1.0803426063566222, "grad_norm": 7.989684607018534, "learning_rate": 2.296167436137945e-06, "loss": 1.0034, "step": 7631 }, { "epoch": 1.0804841792312594, "grad_norm": 8.108459538584823, "learning_rate": 2.2955961502361235e-06, "loss": 0.9895, "step": 7632 }, { "epoch": 1.0806257521058966, "grad_norm": 10.055539584864327, "learning_rate": 2.2950248750796594e-06, "loss": 1.1081, "step": 7633 }, { "epoch": 1.0807673249805336, "grad_norm": 8.766694627531109, "learning_rate": 2.2944536106985848e-06, "loss": 1.0177, "step": 7634 }, { "epoch": 1.0809088978551709, "grad_norm": 8.586219179012323, "learning_rate": 2.2938823571229303e-06, "loss": 0.9885, "step": 7635 }, { "epoch": 1.081050470729808, "grad_norm": 9.754665725588383, "learning_rate": 2.2933111143827268e-06, "loss": 1.0543, "step": 7636 }, { "epoch": 1.0811920436044453, "grad_norm": 9.79943405385632, "learning_rate": 2.2927398825080043e-06, "loss": 1.0073, "step": 7637 }, { "epoch": 1.0813336164790825, "grad_norm": 9.57366491028661, "learning_rate": 2.2921686615287916e-06, "loss": 0.9666, "step": 7638 }, { "epoch": 1.0814751893537198, "grad_norm": 9.657501040899787, "learning_rate": 2.2915974514751173e-06, "loss": 1.0884, "step": 7639 }, { "epoch": 1.081616762228357, "grad_norm": 9.903028283169048, "learning_rate": 2.29102625237701e-06, "loss": 1.0415, "step": 7640 }, { "epoch": 1.0817583351029942, "grad_norm": 11.397057913213763, "learning_rate": 2.290455064264497e-06, "loss": 1.0836, "step": 7641 }, { "epoch": 1.0818999079776315, "grad_norm": 9.603969436793887, "learning_rate": 2.2898838871676037e-06, "loss": 1.0682, "step": 7642 }, { "epoch": 1.0820414808522687, "grad_norm": 8.117496253301463, "learning_rate": 2.2893127211163583e-06, "loss": 1.0038, "step": 7643 }, { "epoch": 1.082183053726906, "grad_norm": 11.419017095004964, "learning_rate": 2.2887415661407866e-06, "loss": 1.08, "step": 7644 }, { "epoch": 1.0823246266015432, "grad_norm": 9.815669428674125, "learning_rate": 2.288170422270913e-06, "loss": 0.9932, "step": 7645 }, { "epoch": 1.0824661994761804, "grad_norm": 9.353419171367896, "learning_rate": 2.287599289536762e-06, "loss": 1.0943, "step": 7646 }, { "epoch": 1.0826077723508176, "grad_norm": 9.570405771229169, "learning_rate": 2.2870281679683582e-06, "loss": 1.0414, "step": 7647 }, { "epoch": 1.0827493452254549, "grad_norm": 9.470628810094594, "learning_rate": 2.2864570575957246e-06, "loss": 0.9579, "step": 7648 }, { "epoch": 1.082890918100092, "grad_norm": 7.667450602160556, "learning_rate": 2.2858859584488848e-06, "loss": 0.9566, "step": 7649 }, { "epoch": 1.0830324909747293, "grad_norm": 8.345708838562869, "learning_rate": 2.28531487055786e-06, "loss": 0.9589, "step": 7650 }, { "epoch": 1.0831740638493665, "grad_norm": 8.064585807745384, "learning_rate": 2.2847437939526735e-06, "loss": 1.0519, "step": 7651 }, { "epoch": 1.0833156367240038, "grad_norm": 9.78636898360678, "learning_rate": 2.2841727286633444e-06, "loss": 1.1408, "step": 7652 }, { "epoch": 1.083457209598641, "grad_norm": 9.294435325243633, "learning_rate": 2.2836016747198937e-06, "loss": 0.9799, "step": 7653 }, { "epoch": 1.083598782473278, "grad_norm": 10.168030277378195, "learning_rate": 2.283030632152342e-06, "loss": 1.1197, "step": 7654 }, { "epoch": 1.0837403553479152, "grad_norm": 10.03081456840678, "learning_rate": 2.282459600990708e-06, "loss": 0.9611, "step": 7655 }, { "epoch": 1.0838819282225525, "grad_norm": 9.644854392242456, "learning_rate": 2.2818885812650105e-06, "loss": 1.0302, "step": 7656 }, { "epoch": 1.0840235010971897, "grad_norm": 7.801215483031815, "learning_rate": 2.281317573005268e-06, "loss": 0.9302, "step": 7657 }, { "epoch": 1.084165073971827, "grad_norm": 9.939059920806143, "learning_rate": 2.2807465762414967e-06, "loss": 1.126, "step": 7658 }, { "epoch": 1.0843066468464642, "grad_norm": 11.202817862116559, "learning_rate": 2.280175591003715e-06, "loss": 1.1611, "step": 7659 }, { "epoch": 1.0844482197211014, "grad_norm": 7.416443067744781, "learning_rate": 2.279604617321939e-06, "loss": 1.0521, "step": 7660 }, { "epoch": 1.0845897925957386, "grad_norm": 9.568454060628161, "learning_rate": 2.279033655226183e-06, "loss": 1.0449, "step": 7661 }, { "epoch": 1.0847313654703759, "grad_norm": 9.001526597144595, "learning_rate": 2.278462704746465e-06, "loss": 1.0712, "step": 7662 }, { "epoch": 1.084872938345013, "grad_norm": 9.36044116944916, "learning_rate": 2.277891765912796e-06, "loss": 1.1602, "step": 7663 }, { "epoch": 1.0850145112196503, "grad_norm": 11.156793319337334, "learning_rate": 2.2773208387551906e-06, "loss": 1.0423, "step": 7664 }, { "epoch": 1.0851560840942875, "grad_norm": 7.3507467500386054, "learning_rate": 2.2767499233036635e-06, "loss": 0.9809, "step": 7665 }, { "epoch": 1.0852976569689248, "grad_norm": 8.960702083192503, "learning_rate": 2.2761790195882264e-06, "loss": 1.0466, "step": 7666 }, { "epoch": 1.085439229843562, "grad_norm": 7.927822431144372, "learning_rate": 2.275608127638891e-06, "loss": 1.0564, "step": 7667 }, { "epoch": 1.0855808027181992, "grad_norm": 9.043047633970193, "learning_rate": 2.2750372474856696e-06, "loss": 1.0261, "step": 7668 }, { "epoch": 1.0857223755928365, "grad_norm": 10.09663279085552, "learning_rate": 2.274466379158572e-06, "loss": 1.0218, "step": 7669 }, { "epoch": 1.0858639484674737, "grad_norm": 8.525699137101144, "learning_rate": 2.2738955226876086e-06, "loss": 0.9961, "step": 7670 }, { "epoch": 1.086005521342111, "grad_norm": 11.99271807661776, "learning_rate": 2.273324678102789e-06, "loss": 1.0604, "step": 7671 }, { "epoch": 1.0861470942167482, "grad_norm": 9.177452354213951, "learning_rate": 2.272753845434122e-06, "loss": 1.0426, "step": 7672 }, { "epoch": 1.0862886670913854, "grad_norm": 10.419361915307864, "learning_rate": 2.272183024711617e-06, "loss": 1.0957, "step": 7673 }, { "epoch": 1.0864302399660226, "grad_norm": 10.99726105316168, "learning_rate": 2.2716122159652795e-06, "loss": 1.0077, "step": 7674 }, { "epoch": 1.0865718128406598, "grad_norm": 8.882301872828833, "learning_rate": 2.2710414192251176e-06, "loss": 0.9087, "step": 7675 }, { "epoch": 1.0867133857152969, "grad_norm": 10.913501503902568, "learning_rate": 2.2704706345211375e-06, "loss": 1.0816, "step": 7676 }, { "epoch": 1.086854958589934, "grad_norm": 10.37287347900406, "learning_rate": 2.269899861883345e-06, "loss": 1.0579, "step": 7677 }, { "epoch": 1.0869965314645713, "grad_norm": 12.051901791501297, "learning_rate": 2.269329101341745e-06, "loss": 1.0264, "step": 7678 }, { "epoch": 1.0871381043392085, "grad_norm": 11.378512668055935, "learning_rate": 2.268758352926343e-06, "loss": 1.0168, "step": 7679 }, { "epoch": 1.0872796772138458, "grad_norm": 9.31274577590942, "learning_rate": 2.268187616667141e-06, "loss": 1.0531, "step": 7680 }, { "epoch": 1.087421250088483, "grad_norm": 10.252377490204902, "learning_rate": 2.267616892594143e-06, "loss": 1.0819, "step": 7681 }, { "epoch": 1.0875628229631202, "grad_norm": 8.939547644314054, "learning_rate": 2.2670461807373526e-06, "loss": 1.0136, "step": 7682 }, { "epoch": 1.0877043958377575, "grad_norm": 10.368618760706887, "learning_rate": 2.2664754811267713e-06, "loss": 0.9791, "step": 7683 }, { "epoch": 1.0878459687123947, "grad_norm": 11.510761698849269, "learning_rate": 2.265904793792399e-06, "loss": 1.1665, "step": 7684 }, { "epoch": 1.087987541587032, "grad_norm": 9.947644699666387, "learning_rate": 2.2653341187642368e-06, "loss": 0.9754, "step": 7685 }, { "epoch": 1.0881291144616692, "grad_norm": 10.969700090173813, "learning_rate": 2.2647634560722857e-06, "loss": 1.1402, "step": 7686 }, { "epoch": 1.0882706873363064, "grad_norm": 10.189176643807434, "learning_rate": 2.264192805746543e-06, "loss": 1.0696, "step": 7687 }, { "epoch": 1.0884122602109436, "grad_norm": 9.120044355451904, "learning_rate": 2.2636221678170097e-06, "loss": 1.0583, "step": 7688 }, { "epoch": 1.0885538330855808, "grad_norm": 9.18988961005092, "learning_rate": 2.2630515423136827e-06, "loss": 0.945, "step": 7689 }, { "epoch": 1.088695405960218, "grad_norm": 11.108340015299667, "learning_rate": 2.2624809292665593e-06, "loss": 0.997, "step": 7690 }, { "epoch": 1.0888369788348553, "grad_norm": 7.679130153669242, "learning_rate": 2.2619103287056366e-06, "loss": 0.9474, "step": 7691 }, { "epoch": 1.0889785517094925, "grad_norm": 8.779118506600176, "learning_rate": 2.26133974066091e-06, "loss": 0.8826, "step": 7692 }, { "epoch": 1.0891201245841298, "grad_norm": 9.125149242278049, "learning_rate": 2.2607691651623757e-06, "loss": 0.962, "step": 7693 }, { "epoch": 1.089261697458767, "grad_norm": 8.997584548641981, "learning_rate": 2.260198602240028e-06, "loss": 0.9691, "step": 7694 }, { "epoch": 1.089403270333404, "grad_norm": 8.385621580538452, "learning_rate": 2.259628051923861e-06, "loss": 0.9728, "step": 7695 }, { "epoch": 1.0895448432080412, "grad_norm": 9.623169960332676, "learning_rate": 2.259057514243868e-06, "loss": 0.9338, "step": 7696 }, { "epoch": 1.0896864160826785, "grad_norm": 10.274288522261378, "learning_rate": 2.2584869892300416e-06, "loss": 1.0994, "step": 7697 }, { "epoch": 1.0898279889573157, "grad_norm": 8.513218642945667, "learning_rate": 2.2579164769123744e-06, "loss": 0.9245, "step": 7698 }, { "epoch": 1.089969561831953, "grad_norm": 9.786391981602726, "learning_rate": 2.257345977320857e-06, "loss": 0.9919, "step": 7699 }, { "epoch": 1.0901111347065902, "grad_norm": 11.641735213416291, "learning_rate": 2.256775490485481e-06, "loss": 1.1819, "step": 7700 }, { "epoch": 1.0902527075812274, "grad_norm": 9.37426510791521, "learning_rate": 2.256205016436236e-06, "loss": 1.0364, "step": 7701 }, { "epoch": 1.0903942804558646, "grad_norm": 8.332175924669883, "learning_rate": 2.255634555203112e-06, "loss": 0.9456, "step": 7702 }, { "epoch": 1.0905358533305018, "grad_norm": 10.42595504667337, "learning_rate": 2.2550641068160966e-06, "loss": 1.0606, "step": 7703 }, { "epoch": 1.090677426205139, "grad_norm": 9.828479366251528, "learning_rate": 2.254493671305179e-06, "loss": 0.9927, "step": 7704 }, { "epoch": 1.0908189990797763, "grad_norm": 9.255296170417694, "learning_rate": 2.253923248700346e-06, "loss": 0.9486, "step": 7705 }, { "epoch": 1.0909605719544135, "grad_norm": 8.101819114336346, "learning_rate": 2.2533528390315838e-06, "loss": 1.0565, "step": 7706 }, { "epoch": 1.0911021448290508, "grad_norm": 9.014622148242594, "learning_rate": 2.252782442328879e-06, "loss": 1.0009, "step": 7707 }, { "epoch": 1.091243717703688, "grad_norm": 7.9628506234431855, "learning_rate": 2.2522120586222174e-06, "loss": 0.9535, "step": 7708 }, { "epoch": 1.0913852905783252, "grad_norm": 8.272793810535871, "learning_rate": 2.2516416879415825e-06, "loss": 0.9023, "step": 7709 }, { "epoch": 1.0915268634529625, "grad_norm": 8.443421497067906, "learning_rate": 2.2510713303169588e-06, "loss": 0.8847, "step": 7710 }, { "epoch": 1.0916684363275997, "grad_norm": 8.624934541757947, "learning_rate": 2.25050098577833e-06, "loss": 1.0302, "step": 7711 }, { "epoch": 1.091810009202237, "grad_norm": 8.205256419525002, "learning_rate": 2.2499306543556783e-06, "loss": 0.9775, "step": 7712 }, { "epoch": 1.0919515820768741, "grad_norm": 8.186967439293428, "learning_rate": 2.2493603360789855e-06, "loss": 0.9296, "step": 7713 }, { "epoch": 1.0920931549515114, "grad_norm": 8.639929193277208, "learning_rate": 2.2487900309782333e-06, "loss": 1.0713, "step": 7714 }, { "epoch": 1.0922347278261486, "grad_norm": 8.928655342797635, "learning_rate": 2.2482197390834027e-06, "loss": 1.0599, "step": 7715 }, { "epoch": 1.0923763007007858, "grad_norm": 10.45201736762921, "learning_rate": 2.2476494604244712e-06, "loss": 1.112, "step": 7716 }, { "epoch": 1.0925178735754228, "grad_norm": 8.775411273414946, "learning_rate": 2.24707919503142e-06, "loss": 1.056, "step": 7717 }, { "epoch": 1.09265944645006, "grad_norm": 8.390255371135678, "learning_rate": 2.246508942934227e-06, "loss": 0.9532, "step": 7718 }, { "epoch": 1.0928010193246973, "grad_norm": 7.990181381210233, "learning_rate": 2.2459387041628694e-06, "loss": 1.1127, "step": 7719 }, { "epoch": 1.0929425921993345, "grad_norm": 8.45343141123838, "learning_rate": 2.2453684787473252e-06, "loss": 1.077, "step": 7720 }, { "epoch": 1.0930841650739718, "grad_norm": 9.221113723051952, "learning_rate": 2.24479826671757e-06, "loss": 1.1885, "step": 7721 }, { "epoch": 1.093225737948609, "grad_norm": 11.063909925710634, "learning_rate": 2.2442280681035792e-06, "loss": 1.1772, "step": 7722 }, { "epoch": 1.0933673108232462, "grad_norm": 8.349031184293695, "learning_rate": 2.2436578829353286e-06, "loss": 1.0305, "step": 7723 }, { "epoch": 1.0935088836978835, "grad_norm": 7.962855414059384, "learning_rate": 2.243087711242792e-06, "loss": 0.939, "step": 7724 }, { "epoch": 1.0936504565725207, "grad_norm": 8.687542153674709, "learning_rate": 2.242517553055943e-06, "loss": 0.9381, "step": 7725 }, { "epoch": 1.093792029447158, "grad_norm": 9.488402264177855, "learning_rate": 2.2419474084047544e-06, "loss": 1.0553, "step": 7726 }, { "epoch": 1.0939336023217952, "grad_norm": 9.454007900401786, "learning_rate": 2.241377277319198e-06, "loss": 0.9759, "step": 7727 }, { "epoch": 1.0940751751964324, "grad_norm": 7.6079789590634865, "learning_rate": 2.240807159829245e-06, "loss": 0.9607, "step": 7728 }, { "epoch": 1.0942167480710696, "grad_norm": 9.27520493437784, "learning_rate": 2.2402370559648663e-06, "loss": 0.9963, "step": 7729 }, { "epoch": 1.0943583209457068, "grad_norm": 8.21726924190423, "learning_rate": 2.239666965756032e-06, "loss": 0.9653, "step": 7730 }, { "epoch": 1.094499893820344, "grad_norm": 9.022025812868549, "learning_rate": 2.2390968892327108e-06, "loss": 0.8999, "step": 7731 }, { "epoch": 1.0946414666949813, "grad_norm": 9.147770727863199, "learning_rate": 2.2385268264248717e-06, "loss": 1.089, "step": 7732 }, { "epoch": 1.0947830395696185, "grad_norm": 7.946780328940491, "learning_rate": 2.2379567773624825e-06, "loss": 0.8407, "step": 7733 }, { "epoch": 1.0949246124442558, "grad_norm": 8.03118492723516, "learning_rate": 2.2373867420755104e-06, "loss": 0.9642, "step": 7734 }, { "epoch": 1.095066185318893, "grad_norm": 7.612126473719699, "learning_rate": 2.2368167205939213e-06, "loss": 0.9955, "step": 7735 }, { "epoch": 1.0952077581935302, "grad_norm": 8.96515947837272, "learning_rate": 2.236246712947682e-06, "loss": 1.0186, "step": 7736 }, { "epoch": 1.0953493310681672, "grad_norm": 8.014281876079592, "learning_rate": 2.2356767191667554e-06, "loss": 0.9941, "step": 7737 }, { "epoch": 1.0954909039428045, "grad_norm": 8.536370050991284, "learning_rate": 2.235106739281106e-06, "loss": 0.9696, "step": 7738 }, { "epoch": 1.0956324768174417, "grad_norm": 8.662389678032387, "learning_rate": 2.2345367733206984e-06, "loss": 1.0061, "step": 7739 }, { "epoch": 1.095774049692079, "grad_norm": 9.343277220741811, "learning_rate": 2.2339668213154943e-06, "loss": 1.041, "step": 7740 }, { "epoch": 1.0959156225667162, "grad_norm": 9.206414714720061, "learning_rate": 2.2333968832954564e-06, "loss": 1.1017, "step": 7741 }, { "epoch": 1.0960571954413534, "grad_norm": 10.134804958429648, "learning_rate": 2.2328269592905455e-06, "loss": 1.006, "step": 7742 }, { "epoch": 1.0961987683159906, "grad_norm": 9.102404484256867, "learning_rate": 2.232257049330722e-06, "loss": 0.9261, "step": 7743 }, { "epoch": 1.0963403411906278, "grad_norm": 9.874009481078184, "learning_rate": 2.231687153445946e-06, "loss": 1.0771, "step": 7744 }, { "epoch": 1.096481914065265, "grad_norm": 9.080172860941047, "learning_rate": 2.231117271666176e-06, "loss": 1.0088, "step": 7745 }, { "epoch": 1.0966234869399023, "grad_norm": 10.036347897206909, "learning_rate": 2.2305474040213707e-06, "loss": 1.0919, "step": 7746 }, { "epoch": 1.0967650598145395, "grad_norm": 8.800795120384098, "learning_rate": 2.229977550541488e-06, "loss": 1.1106, "step": 7747 }, { "epoch": 1.0969066326891768, "grad_norm": 9.406771116300888, "learning_rate": 2.2294077112564836e-06, "loss": 1.0828, "step": 7748 }, { "epoch": 1.097048205563814, "grad_norm": 10.89126177859493, "learning_rate": 2.2288378861963144e-06, "loss": 1.0741, "step": 7749 }, { "epoch": 1.0971897784384512, "grad_norm": 9.505126373077008, "learning_rate": 2.228268075390935e-06, "loss": 1.0401, "step": 7750 }, { "epoch": 1.0973313513130885, "grad_norm": 9.297048563899821, "learning_rate": 2.2276982788703003e-06, "loss": 1.0652, "step": 7751 }, { "epoch": 1.0974729241877257, "grad_norm": 6.338555073991658, "learning_rate": 2.227128496664364e-06, "loss": 0.9475, "step": 7752 }, { "epoch": 1.097614497062363, "grad_norm": 8.560655826864853, "learning_rate": 2.226558728803079e-06, "loss": 0.9878, "step": 7753 }, { "epoch": 1.0977560699370001, "grad_norm": 6.600506976914343, "learning_rate": 2.225988975316398e-06, "loss": 0.9095, "step": 7754 }, { "epoch": 1.0978976428116374, "grad_norm": 10.377256217046888, "learning_rate": 2.2254192362342718e-06, "loss": 1.1227, "step": 7755 }, { "epoch": 1.0980392156862746, "grad_norm": 10.846653266559272, "learning_rate": 2.224849511586652e-06, "loss": 1.0037, "step": 7756 }, { "epoch": 1.0981807885609118, "grad_norm": 8.596047721946888, "learning_rate": 2.224279801403489e-06, "loss": 1.0701, "step": 7757 }, { "epoch": 1.098322361435549, "grad_norm": 8.894401726933529, "learning_rate": 2.2237101057147308e-06, "loss": 1.0191, "step": 7758 }, { "epoch": 1.098463934310186, "grad_norm": 11.463059512993793, "learning_rate": 2.223140424550326e-06, "loss": 0.9573, "step": 7759 }, { "epoch": 1.0986055071848233, "grad_norm": 8.979626802365766, "learning_rate": 2.2225707579402225e-06, "loss": 0.993, "step": 7760 }, { "epoch": 1.0987470800594605, "grad_norm": 9.261402422371114, "learning_rate": 2.222001105914367e-06, "loss": 0.9979, "step": 7761 }, { "epoch": 1.0988886529340978, "grad_norm": 9.639204764587811, "learning_rate": 2.2214314685027067e-06, "loss": 1.0512, "step": 7762 }, { "epoch": 1.099030225808735, "grad_norm": 8.806966139817424, "learning_rate": 2.2208618457351862e-06, "loss": 1.1104, "step": 7763 }, { "epoch": 1.0991717986833722, "grad_norm": 8.827257718045892, "learning_rate": 2.2202922376417505e-06, "loss": 1.0458, "step": 7764 }, { "epoch": 1.0993133715580095, "grad_norm": 9.11874175041929, "learning_rate": 2.219722644252343e-06, "loss": 1.0584, "step": 7765 }, { "epoch": 1.0994549444326467, "grad_norm": 8.84603755198302, "learning_rate": 2.2191530655969077e-06, "loss": 0.9996, "step": 7766 }, { "epoch": 1.099596517307284, "grad_norm": 9.130026491091215, "learning_rate": 2.2185835017053857e-06, "loss": 1.011, "step": 7767 }, { "epoch": 1.0997380901819211, "grad_norm": 7.634757630623852, "learning_rate": 2.2180139526077203e-06, "loss": 0.9961, "step": 7768 }, { "epoch": 1.0998796630565584, "grad_norm": 7.979406075998614, "learning_rate": 2.21744441833385e-06, "loss": 0.8946, "step": 7769 }, { "epoch": 1.1000212359311956, "grad_norm": 9.563863021348292, "learning_rate": 2.2168748989137166e-06, "loss": 1.0558, "step": 7770 }, { "epoch": 1.1001628088058328, "grad_norm": 8.542379755787602, "learning_rate": 2.2163053943772585e-06, "loss": 0.9893, "step": 7771 }, { "epoch": 1.10030438168047, "grad_norm": 7.921303632547718, "learning_rate": 2.2157359047544137e-06, "loss": 1.0239, "step": 7772 }, { "epoch": 1.1004459545551073, "grad_norm": 9.18988794966205, "learning_rate": 2.215166430075121e-06, "loss": 1.0612, "step": 7773 }, { "epoch": 1.1005875274297445, "grad_norm": 8.202046059997011, "learning_rate": 2.2145969703693167e-06, "loss": 0.9253, "step": 7774 }, { "epoch": 1.1007291003043818, "grad_norm": 8.514714687582755, "learning_rate": 2.2140275256669365e-06, "loss": 1.0718, "step": 7775 }, { "epoch": 1.100870673179019, "grad_norm": 9.586372048524634, "learning_rate": 2.2134580959979164e-06, "loss": 1.065, "step": 7776 }, { "epoch": 1.1010122460536562, "grad_norm": 10.079152230836819, "learning_rate": 2.2128886813921906e-06, "loss": 1.0661, "step": 7777 }, { "epoch": 1.1011538189282932, "grad_norm": 8.512827450452162, "learning_rate": 2.2123192818796928e-06, "loss": 1.049, "step": 7778 }, { "epoch": 1.1012953918029305, "grad_norm": 8.659169940592378, "learning_rate": 2.211749897490356e-06, "loss": 1.097, "step": 7779 }, { "epoch": 1.1014369646775677, "grad_norm": 9.101764935382853, "learning_rate": 2.2111805282541114e-06, "loss": 0.9961, "step": 7780 }, { "epoch": 1.101578537552205, "grad_norm": 8.450917066660761, "learning_rate": 2.2106111742008914e-06, "loss": 1.0164, "step": 7781 }, { "epoch": 1.1017201104268421, "grad_norm": 9.923255163603937, "learning_rate": 2.2100418353606262e-06, "loss": 1.0182, "step": 7782 }, { "epoch": 1.1018616833014794, "grad_norm": 8.211094064426694, "learning_rate": 2.2094725117632454e-06, "loss": 0.959, "step": 7783 }, { "epoch": 1.1020032561761166, "grad_norm": 8.42353807211339, "learning_rate": 2.2089032034386775e-06, "loss": 1.0588, "step": 7784 }, { "epoch": 1.1021448290507538, "grad_norm": 8.487069111421844, "learning_rate": 2.208333910416852e-06, "loss": 0.9729, "step": 7785 }, { "epoch": 1.102286401925391, "grad_norm": 7.11172935990434, "learning_rate": 2.2077646327276948e-06, "loss": 1.0025, "step": 7786 }, { "epoch": 1.1024279748000283, "grad_norm": 8.418096993657706, "learning_rate": 2.207195370401134e-06, "loss": 1.0769, "step": 7787 }, { "epoch": 1.1025695476746655, "grad_norm": 9.769274318142827, "learning_rate": 2.206626123467093e-06, "loss": 1.0167, "step": 7788 }, { "epoch": 1.1027111205493028, "grad_norm": 9.893347583907866, "learning_rate": 2.2060568919554997e-06, "loss": 1.031, "step": 7789 }, { "epoch": 1.10285269342394, "grad_norm": 9.281943760897754, "learning_rate": 2.205487675896275e-06, "loss": 1.005, "step": 7790 }, { "epoch": 1.1029942662985772, "grad_norm": 10.364650850060139, "learning_rate": 2.2049184753193438e-06, "loss": 1.1171, "step": 7791 }, { "epoch": 1.1031358391732144, "grad_norm": 8.53594461462041, "learning_rate": 2.2043492902546284e-06, "loss": 1.0729, "step": 7792 }, { "epoch": 1.1032774120478517, "grad_norm": 10.022475733890593, "learning_rate": 2.20378012073205e-06, "loss": 1.0861, "step": 7793 }, { "epoch": 1.103418984922489, "grad_norm": 6.9351247209419045, "learning_rate": 2.20321096678153e-06, "loss": 0.946, "step": 7794 }, { "epoch": 1.1035605577971261, "grad_norm": 11.310217189710283, "learning_rate": 2.202641828432988e-06, "loss": 1.1024, "step": 7795 }, { "epoch": 1.1037021306717634, "grad_norm": 11.556890003066082, "learning_rate": 2.202072705716344e-06, "loss": 1.0444, "step": 7796 }, { "epoch": 1.1038437035464006, "grad_norm": 9.071852915464635, "learning_rate": 2.201503598661515e-06, "loss": 1.0538, "step": 7797 }, { "epoch": 1.1039852764210378, "grad_norm": 9.933639832276123, "learning_rate": 2.2009345072984198e-06, "loss": 1.0406, "step": 7798 }, { "epoch": 1.104126849295675, "grad_norm": 10.296545275212079, "learning_rate": 2.2003654316569746e-06, "loss": 1.1622, "step": 7799 }, { "epoch": 1.104268422170312, "grad_norm": 8.596945428646444, "learning_rate": 2.1997963717670952e-06, "loss": 0.9745, "step": 7800 }, { "epoch": 1.1044099950449493, "grad_norm": 8.02059146136597, "learning_rate": 2.1992273276586966e-06, "loss": 1.0064, "step": 7801 }, { "epoch": 1.1045515679195865, "grad_norm": 8.61662881376805, "learning_rate": 2.1986582993616926e-06, "loss": 0.9164, "step": 7802 }, { "epoch": 1.1046931407942238, "grad_norm": 9.493205803146713, "learning_rate": 2.198089286905998e-06, "loss": 1.0304, "step": 7803 }, { "epoch": 1.104834713668861, "grad_norm": 9.839652622573125, "learning_rate": 2.197520290321524e-06, "loss": 1.0535, "step": 7804 }, { "epoch": 1.1049762865434982, "grad_norm": 11.799986655017612, "learning_rate": 2.1969513096381823e-06, "loss": 0.9748, "step": 7805 }, { "epoch": 1.1051178594181355, "grad_norm": 11.471650715638162, "learning_rate": 2.1963823448858852e-06, "loss": 1.0501, "step": 7806 }, { "epoch": 1.1052594322927727, "grad_norm": 10.218384155817365, "learning_rate": 2.195813396094541e-06, "loss": 1.0938, "step": 7807 }, { "epoch": 1.10540100516741, "grad_norm": 8.75339158948391, "learning_rate": 2.19524446329406e-06, "loss": 0.8999, "step": 7808 }, { "epoch": 1.1055425780420471, "grad_norm": 8.846795625913355, "learning_rate": 2.1946755465143505e-06, "loss": 1.0111, "step": 7809 }, { "epoch": 1.1056841509166844, "grad_norm": 11.613022407138933, "learning_rate": 2.1941066457853213e-06, "loss": 1.0618, "step": 7810 }, { "epoch": 1.1058257237913216, "grad_norm": 10.392189584977613, "learning_rate": 2.1935377611368758e-06, "loss": 0.9997, "step": 7811 }, { "epoch": 1.1059672966659588, "grad_norm": 10.425432916155463, "learning_rate": 2.192968892598922e-06, "loss": 0.9846, "step": 7812 }, { "epoch": 1.106108869540596, "grad_norm": 7.836066506446779, "learning_rate": 2.1924000402013644e-06, "loss": 0.886, "step": 7813 }, { "epoch": 1.1062504424152333, "grad_norm": 8.872744851301038, "learning_rate": 2.1918312039741075e-06, "loss": 1.0536, "step": 7814 }, { "epoch": 1.1063920152898705, "grad_norm": 9.285995143792984, "learning_rate": 2.1912623839470545e-06, "loss": 1.1311, "step": 7815 }, { "epoch": 1.1065335881645078, "grad_norm": 10.056498947523584, "learning_rate": 2.190693580150108e-06, "loss": 1.0296, "step": 7816 }, { "epoch": 1.106675161039145, "grad_norm": 9.135165052671505, "learning_rate": 2.190124792613169e-06, "loss": 0.929, "step": 7817 }, { "epoch": 1.1068167339137822, "grad_norm": 9.019150175335474, "learning_rate": 2.1895560213661387e-06, "loss": 1.0635, "step": 7818 }, { "epoch": 1.1069583067884192, "grad_norm": 11.394734796347898, "learning_rate": 2.188987266438917e-06, "loss": 1.1556, "step": 7819 }, { "epoch": 1.1070998796630565, "grad_norm": 8.757656126642535, "learning_rate": 2.188418527861403e-06, "loss": 0.9113, "step": 7820 }, { "epoch": 1.1072414525376937, "grad_norm": 7.98163117146029, "learning_rate": 2.1878498056634946e-06, "loss": 0.931, "step": 7821 }, { "epoch": 1.107383025412331, "grad_norm": 10.444348675263972, "learning_rate": 2.187281099875089e-06, "loss": 1.0304, "step": 7822 }, { "epoch": 1.1075245982869681, "grad_norm": 8.888515872546098, "learning_rate": 2.186712410526083e-06, "loss": 0.9608, "step": 7823 }, { "epoch": 1.1076661711616054, "grad_norm": 8.011196407734664, "learning_rate": 2.186143737646372e-06, "loss": 1.0289, "step": 7824 }, { "epoch": 1.1078077440362426, "grad_norm": 10.80088720562857, "learning_rate": 2.18557508126585e-06, "loss": 1.1047, "step": 7825 }, { "epoch": 1.1079493169108798, "grad_norm": 10.38545564257094, "learning_rate": 2.1850064414144124e-06, "loss": 1.0457, "step": 7826 }, { "epoch": 1.108090889785517, "grad_norm": 10.136529080595478, "learning_rate": 2.1844378181219507e-06, "loss": 0.9977, "step": 7827 }, { "epoch": 1.1082324626601543, "grad_norm": 9.23349175529589, "learning_rate": 2.183869211418358e-06, "loss": 0.8995, "step": 7828 }, { "epoch": 1.1083740355347915, "grad_norm": 9.801688488673975, "learning_rate": 2.1833006213335243e-06, "loss": 1.0122, "step": 7829 }, { "epoch": 1.1085156084094288, "grad_norm": 10.161674482874288, "learning_rate": 2.1827320478973414e-06, "loss": 1.0053, "step": 7830 }, { "epoch": 1.108657181284066, "grad_norm": 8.761062168889595, "learning_rate": 2.1821634911396993e-06, "loss": 1.0247, "step": 7831 }, { "epoch": 1.1087987541587032, "grad_norm": 7.865806905456412, "learning_rate": 2.1815949510904843e-06, "loss": 0.9465, "step": 7832 }, { "epoch": 1.1089403270333404, "grad_norm": 10.398850935033302, "learning_rate": 2.1810264277795856e-06, "loss": 1.0947, "step": 7833 }, { "epoch": 1.1090818999079777, "grad_norm": 10.371144509056576, "learning_rate": 2.180457921236889e-06, "loss": 0.9703, "step": 7834 }, { "epoch": 1.109223472782615, "grad_norm": 9.485849784303518, "learning_rate": 2.1798894314922824e-06, "loss": 1.0218, "step": 7835 }, { "epoch": 1.1093650456572521, "grad_norm": 9.205507238897301, "learning_rate": 2.1793209585756483e-06, "loss": 0.9762, "step": 7836 }, { "epoch": 1.1095066185318894, "grad_norm": 8.52784431865079, "learning_rate": 2.178752502516873e-06, "loss": 1.029, "step": 7837 }, { "epoch": 1.1096481914065266, "grad_norm": 8.883766250606223, "learning_rate": 2.1781840633458394e-06, "loss": 0.8951, "step": 7838 }, { "epoch": 1.1097897642811638, "grad_norm": 8.201810720607961, "learning_rate": 2.177615641092429e-06, "loss": 0.9395, "step": 7839 }, { "epoch": 1.109931337155801, "grad_norm": 9.167014190559643, "learning_rate": 2.1770472357865247e-06, "loss": 1.0432, "step": 7840 }, { "epoch": 1.110072910030438, "grad_norm": 8.607058232405286, "learning_rate": 2.1764788474580062e-06, "loss": 1.0063, "step": 7841 }, { "epoch": 1.1102144829050753, "grad_norm": 8.790181352395855, "learning_rate": 2.175910476136754e-06, "loss": 1.0843, "step": 7842 }, { "epoch": 1.1103560557797125, "grad_norm": 7.807874118759506, "learning_rate": 2.1753421218526458e-06, "loss": 0.9903, "step": 7843 }, { "epoch": 1.1104976286543498, "grad_norm": 9.813986890356132, "learning_rate": 2.1747737846355603e-06, "loss": 1.0126, "step": 7844 }, { "epoch": 1.110639201528987, "grad_norm": 8.732767192554459, "learning_rate": 2.1742054645153744e-06, "loss": 1.0369, "step": 7845 }, { "epoch": 1.1107807744036242, "grad_norm": 12.243096099419025, "learning_rate": 2.173637161521964e-06, "loss": 1.1151, "step": 7846 }, { "epoch": 1.1109223472782614, "grad_norm": 11.19269019412841, "learning_rate": 2.1730688756852046e-06, "loss": 1.1045, "step": 7847 }, { "epoch": 1.1110639201528987, "grad_norm": 9.477838064014165, "learning_rate": 2.172500607034971e-06, "loss": 1.1106, "step": 7848 }, { "epoch": 1.111205493027536, "grad_norm": 8.211750952064394, "learning_rate": 2.1719323556011364e-06, "loss": 0.9405, "step": 7849 }, { "epoch": 1.1113470659021731, "grad_norm": 9.046782656183504, "learning_rate": 2.171364121413573e-06, "loss": 1.0377, "step": 7850 }, { "epoch": 1.1114886387768104, "grad_norm": 9.894427540292448, "learning_rate": 2.170795904502153e-06, "loss": 1.0051, "step": 7851 }, { "epoch": 1.1116302116514476, "grad_norm": 10.025012207023819, "learning_rate": 2.170227704896746e-06, "loss": 1.1424, "step": 7852 }, { "epoch": 1.1117717845260848, "grad_norm": 8.998719442182932, "learning_rate": 2.169659522627224e-06, "loss": 1.0936, "step": 7853 }, { "epoch": 1.111913357400722, "grad_norm": 9.21873303427994, "learning_rate": 2.1690913577234542e-06, "loss": 1.0375, "step": 7854 }, { "epoch": 1.1120549302753593, "grad_norm": 9.00291226210823, "learning_rate": 2.1685232102153045e-06, "loss": 1.0632, "step": 7855 }, { "epoch": 1.1121965031499965, "grad_norm": 9.883803680532738, "learning_rate": 2.1679550801326428e-06, "loss": 1.0728, "step": 7856 }, { "epoch": 1.1123380760246337, "grad_norm": 9.28426345557889, "learning_rate": 2.167386967505335e-06, "loss": 1.0415, "step": 7857 }, { "epoch": 1.112479648899271, "grad_norm": 8.567408136521475, "learning_rate": 2.1668188723632454e-06, "loss": 0.8916, "step": 7858 }, { "epoch": 1.1126212217739082, "grad_norm": 10.247226246743708, "learning_rate": 2.1662507947362397e-06, "loss": 1.1484, "step": 7859 }, { "epoch": 1.1127627946485454, "grad_norm": 9.908659921647887, "learning_rate": 2.165682734654181e-06, "loss": 1.0896, "step": 7860 }, { "epoch": 1.1129043675231824, "grad_norm": 9.299683218862425, "learning_rate": 2.165114692146932e-06, "loss": 1.1774, "step": 7861 }, { "epoch": 1.1130459403978197, "grad_norm": 9.145647908780806, "learning_rate": 2.1645466672443535e-06, "loss": 1.0031, "step": 7862 }, { "epoch": 1.113187513272457, "grad_norm": 9.504962227478908, "learning_rate": 2.163978659976308e-06, "loss": 0.9978, "step": 7863 }, { "epoch": 1.1133290861470941, "grad_norm": 10.092625227579543, "learning_rate": 2.163410670372652e-06, "loss": 1.0091, "step": 7864 }, { "epoch": 1.1134706590217314, "grad_norm": 8.239877675454006, "learning_rate": 2.1628426984632465e-06, "loss": 1.0185, "step": 7865 }, { "epoch": 1.1136122318963686, "grad_norm": 8.629191831229402, "learning_rate": 2.1622747442779495e-06, "loss": 1.0476, "step": 7866 }, { "epoch": 1.1137538047710058, "grad_norm": 9.088570814259015, "learning_rate": 2.161706807846617e-06, "loss": 1.0619, "step": 7867 }, { "epoch": 1.113895377645643, "grad_norm": 8.060916715603657, "learning_rate": 2.161138889199105e-06, "loss": 1.0739, "step": 7868 }, { "epoch": 1.1140369505202803, "grad_norm": 8.998134843625234, "learning_rate": 2.1605709883652693e-06, "loss": 1.0203, "step": 7869 }, { "epoch": 1.1141785233949175, "grad_norm": 8.444906869016803, "learning_rate": 2.160003105374964e-06, "loss": 0.9814, "step": 7870 }, { "epoch": 1.1143200962695547, "grad_norm": 10.015457794650516, "learning_rate": 2.1594352402580413e-06, "loss": 1.0041, "step": 7871 }, { "epoch": 1.114461669144192, "grad_norm": 7.8688240659832065, "learning_rate": 2.1588673930443544e-06, "loss": 1.1133, "step": 7872 }, { "epoch": 1.1146032420188292, "grad_norm": 11.050582069497834, "learning_rate": 2.1582995637637543e-06, "loss": 0.9915, "step": 7873 }, { "epoch": 1.1147448148934664, "grad_norm": 7.951269026902787, "learning_rate": 2.1577317524460917e-06, "loss": 1.0291, "step": 7874 }, { "epoch": 1.1148863877681037, "grad_norm": 9.644200186070584, "learning_rate": 2.157163959121215e-06, "loss": 0.9561, "step": 7875 }, { "epoch": 1.115027960642741, "grad_norm": 8.964656094849694, "learning_rate": 2.1565961838189738e-06, "loss": 0.9856, "step": 7876 }, { "epoch": 1.1151695335173781, "grad_norm": 9.878142424257014, "learning_rate": 2.156028426569215e-06, "loss": 1.0686, "step": 7877 }, { "epoch": 1.1153111063920154, "grad_norm": 9.01360818131613, "learning_rate": 2.155460687401785e-06, "loss": 0.9961, "step": 7878 }, { "epoch": 1.1154526792666526, "grad_norm": 9.991307486048147, "learning_rate": 2.1548929663465305e-06, "loss": 1.1562, "step": 7879 }, { "epoch": 1.1155942521412898, "grad_norm": 10.60197844845406, "learning_rate": 2.154325263433295e-06, "loss": 0.9516, "step": 7880 }, { "epoch": 1.115735825015927, "grad_norm": 9.548024509483174, "learning_rate": 2.1537575786919222e-06, "loss": 1.0218, "step": 7881 }, { "epoch": 1.1158773978905643, "grad_norm": 11.71265727032219, "learning_rate": 2.1531899121522557e-06, "loss": 1.1345, "step": 7882 }, { "epoch": 1.1160189707652013, "grad_norm": 8.893298131061908, "learning_rate": 2.152622263844137e-06, "loss": 1.0003, "step": 7883 }, { "epoch": 1.1161605436398385, "grad_norm": 9.550234039794194, "learning_rate": 2.152054633797408e-06, "loss": 1.0498, "step": 7884 }, { "epoch": 1.1163021165144758, "grad_norm": 10.2149105966981, "learning_rate": 2.1514870220419063e-06, "loss": 1.0762, "step": 7885 }, { "epoch": 1.116443689389113, "grad_norm": 8.609192889549673, "learning_rate": 2.150919428607472e-06, "loss": 1.038, "step": 7886 }, { "epoch": 1.1165852622637502, "grad_norm": 10.285628386547529, "learning_rate": 2.1503518535239427e-06, "loss": 1.0682, "step": 7887 }, { "epoch": 1.1167268351383874, "grad_norm": 11.22115340561929, "learning_rate": 2.149784296821156e-06, "loss": 1.0644, "step": 7888 }, { "epoch": 1.1168684080130247, "grad_norm": 8.99617262096342, "learning_rate": 2.1492167585289476e-06, "loss": 1.077, "step": 7889 }, { "epoch": 1.117009980887662, "grad_norm": 9.741669837640051, "learning_rate": 2.148649238677153e-06, "loss": 1.0438, "step": 7890 }, { "epoch": 1.1171515537622991, "grad_norm": 8.135227749853852, "learning_rate": 2.148081737295606e-06, "loss": 1.0089, "step": 7891 }, { "epoch": 1.1172931266369364, "grad_norm": 9.527539540961762, "learning_rate": 2.147514254414139e-06, "loss": 1.0526, "step": 7892 }, { "epoch": 1.1174346995115736, "grad_norm": 8.062439910901324, "learning_rate": 2.146946790062586e-06, "loss": 0.9349, "step": 7893 }, { "epoch": 1.1175762723862108, "grad_norm": 7.62080943536809, "learning_rate": 2.146379344270776e-06, "loss": 1.0176, "step": 7894 }, { "epoch": 1.117717845260848, "grad_norm": 7.9262274047356245, "learning_rate": 2.145811917068541e-06, "loss": 0.9441, "step": 7895 }, { "epoch": 1.1178594181354853, "grad_norm": 8.196296916418405, "learning_rate": 2.145244508485709e-06, "loss": 0.932, "step": 7896 }, { "epoch": 1.1180009910101225, "grad_norm": 9.486013858483439, "learning_rate": 2.1446771185521086e-06, "loss": 1.0541, "step": 7897 }, { "epoch": 1.1181425638847597, "grad_norm": 9.564372758175217, "learning_rate": 2.1441097472975667e-06, "loss": 1.0544, "step": 7898 }, { "epoch": 1.118284136759397, "grad_norm": 9.37944149347349, "learning_rate": 2.143542394751911e-06, "loss": 1.1405, "step": 7899 }, { "epoch": 1.1184257096340342, "grad_norm": 8.286115721199963, "learning_rate": 2.142975060944965e-06, "loss": 1.0873, "step": 7900 }, { "epoch": 1.1185672825086714, "grad_norm": 10.514465177378666, "learning_rate": 2.1424077459065544e-06, "loss": 0.9992, "step": 7901 }, { "epoch": 1.1187088553833084, "grad_norm": 9.345594345304754, "learning_rate": 2.1418404496665015e-06, "loss": 0.9512, "step": 7902 }, { "epoch": 1.1188504282579457, "grad_norm": 10.627173347343694, "learning_rate": 2.1412731722546294e-06, "loss": 1.1375, "step": 7903 }, { "epoch": 1.118992001132583, "grad_norm": 7.8341594287503575, "learning_rate": 2.1407059137007587e-06, "loss": 0.9872, "step": 7904 }, { "epoch": 1.1191335740072201, "grad_norm": 7.246573855388598, "learning_rate": 2.14013867403471e-06, "loss": 0.9653, "step": 7905 }, { "epoch": 1.1192751468818574, "grad_norm": 7.9611737307014065, "learning_rate": 2.139571453286305e-06, "loss": 0.9162, "step": 7906 }, { "epoch": 1.1194167197564946, "grad_norm": 10.936081799796302, "learning_rate": 2.139004251485358e-06, "loss": 1.0329, "step": 7907 }, { "epoch": 1.1195582926311318, "grad_norm": 9.701249535689252, "learning_rate": 2.138437068661689e-06, "loss": 0.9006, "step": 7908 }, { "epoch": 1.119699865505769, "grad_norm": 10.442739667710677, "learning_rate": 2.1378699048451136e-06, "loss": 1.1187, "step": 7909 }, { "epoch": 1.1198414383804063, "grad_norm": 9.723252998083826, "learning_rate": 2.1373027600654465e-06, "loss": 0.9159, "step": 7910 }, { "epoch": 1.1199830112550435, "grad_norm": 8.813965587627559, "learning_rate": 2.1367356343525035e-06, "loss": 1.0564, "step": 7911 }, { "epoch": 1.1201245841296807, "grad_norm": 8.378785601549803, "learning_rate": 2.1361685277360973e-06, "loss": 0.9797, "step": 7912 }, { "epoch": 1.120266157004318, "grad_norm": 8.817735347867902, "learning_rate": 2.1356014402460403e-06, "loss": 0.9677, "step": 7913 }, { "epoch": 1.1204077298789552, "grad_norm": 10.871655881756382, "learning_rate": 2.1350343719121437e-06, "loss": 1.0749, "step": 7914 }, { "epoch": 1.1205493027535924, "grad_norm": 7.10053939919507, "learning_rate": 2.134467322764218e-06, "loss": 0.9512, "step": 7915 }, { "epoch": 1.1206908756282297, "grad_norm": 9.676013461711928, "learning_rate": 2.1339002928320737e-06, "loss": 1.0426, "step": 7916 }, { "epoch": 1.120832448502867, "grad_norm": 10.29967316044126, "learning_rate": 2.133333282145517e-06, "loss": 0.9794, "step": 7917 }, { "epoch": 1.1209740213775041, "grad_norm": 10.862816661788816, "learning_rate": 2.1327662907343564e-06, "loss": 1.0977, "step": 7918 }, { "epoch": 1.1211155942521414, "grad_norm": 9.01062825272406, "learning_rate": 2.1321993186283985e-06, "loss": 0.9615, "step": 7919 }, { "epoch": 1.1212571671267786, "grad_norm": 11.193598784464207, "learning_rate": 2.1316323658574477e-06, "loss": 1.0648, "step": 7920 }, { "epoch": 1.1213987400014158, "grad_norm": 10.931330221327816, "learning_rate": 2.1310654324513087e-06, "loss": 1.0338, "step": 7921 }, { "epoch": 1.121540312876053, "grad_norm": 9.76840038686806, "learning_rate": 2.130498518439785e-06, "loss": 1.0693, "step": 7922 }, { "epoch": 1.1216818857506903, "grad_norm": 10.560737056441324, "learning_rate": 2.1299316238526786e-06, "loss": 1.1267, "step": 7923 }, { "epoch": 1.1218234586253273, "grad_norm": 8.199281558686302, "learning_rate": 2.129364748719791e-06, "loss": 1.1759, "step": 7924 }, { "epoch": 1.1219650314999645, "grad_norm": 7.599564449478493, "learning_rate": 2.128797893070922e-06, "loss": 0.919, "step": 7925 }, { "epoch": 1.1221066043746017, "grad_norm": 8.951948105185245, "learning_rate": 2.1282310569358704e-06, "loss": 1.0479, "step": 7926 }, { "epoch": 1.122248177249239, "grad_norm": 9.727749205970403, "learning_rate": 2.127664240344437e-06, "loss": 1.0551, "step": 7927 }, { "epoch": 1.1223897501238762, "grad_norm": 7.470684232002254, "learning_rate": 2.1270974433264152e-06, "loss": 0.9578, "step": 7928 }, { "epoch": 1.1225313229985134, "grad_norm": 8.669891295494056, "learning_rate": 2.126530665911603e-06, "loss": 1.0469, "step": 7929 }, { "epoch": 1.1226728958731507, "grad_norm": 8.567713577284362, "learning_rate": 2.125963908129795e-06, "loss": 0.9389, "step": 7930 }, { "epoch": 1.122814468747788, "grad_norm": 9.16401837558644, "learning_rate": 2.125397170010786e-06, "loss": 1.0092, "step": 7931 }, { "epoch": 1.1229560416224251, "grad_norm": 10.13174326953104, "learning_rate": 2.124830451584368e-06, "loss": 1.0343, "step": 7932 }, { "epoch": 1.1230976144970624, "grad_norm": 9.618365094054163, "learning_rate": 2.124263752880333e-06, "loss": 1.0939, "step": 7933 }, { "epoch": 1.1232391873716996, "grad_norm": 11.733338575650981, "learning_rate": 2.123697073928473e-06, "loss": 1.0159, "step": 7934 }, { "epoch": 1.1233807602463368, "grad_norm": 8.545054686467873, "learning_rate": 2.123130414758577e-06, "loss": 0.9889, "step": 7935 }, { "epoch": 1.123522333120974, "grad_norm": 8.334928843029157, "learning_rate": 2.122563775400434e-06, "loss": 0.896, "step": 7936 }, { "epoch": 1.1236639059956113, "grad_norm": 9.977700738342767, "learning_rate": 2.1219971558838333e-06, "loss": 1.12, "step": 7937 }, { "epoch": 1.1238054788702485, "grad_norm": 8.392824705113393, "learning_rate": 2.1214305562385592e-06, "loss": 0.8673, "step": 7938 }, { "epoch": 1.1239470517448857, "grad_norm": 8.691418978221018, "learning_rate": 2.120863976494398e-06, "loss": 0.9854, "step": 7939 }, { "epoch": 1.124088624619523, "grad_norm": 9.860537597980722, "learning_rate": 2.1202974166811354e-06, "loss": 1.115, "step": 7940 }, { "epoch": 1.1242301974941602, "grad_norm": 8.813308570507694, "learning_rate": 2.119730876828554e-06, "loss": 1.0559, "step": 7941 }, { "epoch": 1.1243717703687974, "grad_norm": 8.64098436046342, "learning_rate": 2.1191643569664373e-06, "loss": 1.0031, "step": 7942 }, { "epoch": 1.1245133432434347, "grad_norm": 9.451181360103217, "learning_rate": 2.1185978571245665e-06, "loss": 1.0964, "step": 7943 }, { "epoch": 1.1246549161180717, "grad_norm": 8.222707745533526, "learning_rate": 2.1180313773327214e-06, "loss": 1.0209, "step": 7944 }, { "epoch": 1.124796488992709, "grad_norm": 8.393217854734337, "learning_rate": 2.1174649176206826e-06, "loss": 0.9588, "step": 7945 }, { "epoch": 1.1249380618673461, "grad_norm": 8.402324681812393, "learning_rate": 2.116898478018227e-06, "loss": 1.0144, "step": 7946 }, { "epoch": 1.1250796347419834, "grad_norm": 9.69474472362979, "learning_rate": 2.1163320585551335e-06, "loss": 1.0302, "step": 7947 }, { "epoch": 1.1252212076166206, "grad_norm": 9.926973345621892, "learning_rate": 2.115765659261178e-06, "loss": 1.2149, "step": 7948 }, { "epoch": 1.1253627804912578, "grad_norm": 8.64509651690605, "learning_rate": 2.115199280166135e-06, "loss": 0.9183, "step": 7949 }, { "epoch": 1.125504353365895, "grad_norm": 8.126274126360604, "learning_rate": 2.1146329212997784e-06, "loss": 0.994, "step": 7950 }, { "epoch": 1.1256459262405323, "grad_norm": 8.090560932870853, "learning_rate": 2.1140665826918823e-06, "loss": 0.9284, "step": 7951 }, { "epoch": 1.1257874991151695, "grad_norm": 8.756191134203897, "learning_rate": 2.113500264372218e-06, "loss": 1.1091, "step": 7952 }, { "epoch": 1.1259290719898067, "grad_norm": 9.883311962428369, "learning_rate": 2.1129339663705565e-06, "loss": 1.1051, "step": 7953 }, { "epoch": 1.126070644864444, "grad_norm": 9.553291624323785, "learning_rate": 2.1123676887166678e-06, "loss": 1.0834, "step": 7954 }, { "epoch": 1.1262122177390812, "grad_norm": 8.609951436152828, "learning_rate": 2.11180143144032e-06, "loss": 1.0195, "step": 7955 }, { "epoch": 1.1263537906137184, "grad_norm": 9.452036594754286, "learning_rate": 2.1112351945712824e-06, "loss": 1.1198, "step": 7956 }, { "epoch": 1.1264953634883557, "grad_norm": 10.486633148560145, "learning_rate": 2.1106689781393203e-06, "loss": 1.1534, "step": 7957 }, { "epoch": 1.126636936362993, "grad_norm": 8.843902694408394, "learning_rate": 2.1101027821742013e-06, "loss": 1.0617, "step": 7958 }, { "epoch": 1.1267785092376301, "grad_norm": 9.135781803177736, "learning_rate": 2.1095366067056868e-06, "loss": 0.9613, "step": 7959 }, { "epoch": 1.1269200821122674, "grad_norm": 8.07449418600715, "learning_rate": 2.108970451763542e-06, "loss": 0.9439, "step": 7960 }, { "epoch": 1.1270616549869046, "grad_norm": 9.810047013134154, "learning_rate": 2.1084043173775284e-06, "loss": 1.1328, "step": 7961 }, { "epoch": 1.1272032278615418, "grad_norm": 8.86066751622065, "learning_rate": 2.1078382035774085e-06, "loss": 0.8801, "step": 7962 }, { "epoch": 1.127344800736179, "grad_norm": 8.685554121806623, "learning_rate": 2.1072721103929415e-06, "loss": 1.0805, "step": 7963 }, { "epoch": 1.1274863736108163, "grad_norm": 8.932267798270466, "learning_rate": 2.106706037853887e-06, "loss": 0.9953, "step": 7964 }, { "epoch": 1.1276279464854535, "grad_norm": 12.640280361687802, "learning_rate": 2.106139985990003e-06, "loss": 1.235, "step": 7965 }, { "epoch": 1.1277695193600905, "grad_norm": 9.746966941720295, "learning_rate": 2.105573954831046e-06, "loss": 1.0546, "step": 7966 }, { "epoch": 1.1279110922347277, "grad_norm": 9.411191535775206, "learning_rate": 2.105007944406772e-06, "loss": 1.0139, "step": 7967 }, { "epoch": 1.128052665109365, "grad_norm": 7.278335184327521, "learning_rate": 2.104441954746936e-06, "loss": 0.8612, "step": 7968 }, { "epoch": 1.1281942379840022, "grad_norm": 11.216958925729553, "learning_rate": 2.1038759858812924e-06, "loss": 1.0682, "step": 7969 }, { "epoch": 1.1283358108586394, "grad_norm": 10.717381159179364, "learning_rate": 2.103310037839592e-06, "loss": 0.9938, "step": 7970 }, { "epoch": 1.1284773837332767, "grad_norm": 8.87819173667609, "learning_rate": 2.1027441106515872e-06, "loss": 0.9956, "step": 7971 }, { "epoch": 1.128618956607914, "grad_norm": 9.42883468854897, "learning_rate": 2.1021782043470282e-06, "loss": 1.0717, "step": 7972 }, { "epoch": 1.1287605294825511, "grad_norm": 9.333884449990864, "learning_rate": 2.1016123189556644e-06, "loss": 1.0782, "step": 7973 }, { "epoch": 1.1289021023571884, "grad_norm": 8.99169411453239, "learning_rate": 2.101046454507244e-06, "loss": 0.9551, "step": 7974 }, { "epoch": 1.1290436752318256, "grad_norm": 10.829521760675371, "learning_rate": 2.100480611031514e-06, "loss": 1.1676, "step": 7975 }, { "epoch": 1.1291852481064628, "grad_norm": 8.839318556951635, "learning_rate": 2.0999147885582204e-06, "loss": 1.0783, "step": 7976 }, { "epoch": 1.1293268209811, "grad_norm": 11.716345456433597, "learning_rate": 2.099348987117108e-06, "loss": 1.0465, "step": 7977 }, { "epoch": 1.1294683938557373, "grad_norm": 8.572201321600355, "learning_rate": 2.09878320673792e-06, "loss": 1.0768, "step": 7978 }, { "epoch": 1.1296099667303745, "grad_norm": 9.357807722359567, "learning_rate": 2.0982174474504004e-06, "loss": 0.9845, "step": 7979 }, { "epoch": 1.1297515396050117, "grad_norm": 10.147456379392803, "learning_rate": 2.097651709284291e-06, "loss": 1.0364, "step": 7980 }, { "epoch": 1.129893112479649, "grad_norm": 8.505086386733575, "learning_rate": 2.09708599226933e-06, "loss": 0.9784, "step": 7981 }, { "epoch": 1.1300346853542862, "grad_norm": 9.681298812240769, "learning_rate": 2.096520296435258e-06, "loss": 1.2178, "step": 7982 }, { "epoch": 1.1301762582289234, "grad_norm": 10.40769274908589, "learning_rate": 2.0959546218118133e-06, "loss": 1.0085, "step": 7983 }, { "epoch": 1.1303178311035604, "grad_norm": 10.107909295312538, "learning_rate": 2.095388968428732e-06, "loss": 1.0502, "step": 7984 }, { "epoch": 1.1304594039781977, "grad_norm": 9.561202615726977, "learning_rate": 2.094823336315752e-06, "loss": 1.1084, "step": 7985 }, { "epoch": 1.130600976852835, "grad_norm": 8.687706814506589, "learning_rate": 2.0942577255026068e-06, "loss": 0.9856, "step": 7986 }, { "epoch": 1.1307425497274721, "grad_norm": 10.15867872102143, "learning_rate": 2.0936921360190305e-06, "loss": 1.0634, "step": 7987 }, { "epoch": 1.1308841226021094, "grad_norm": 9.78312728465563, "learning_rate": 2.0931265678947555e-06, "loss": 1.1421, "step": 7988 }, { "epoch": 1.1310256954767466, "grad_norm": 9.323734342779536, "learning_rate": 2.0925610211595137e-06, "loss": 0.9878, "step": 7989 }, { "epoch": 1.1311672683513838, "grad_norm": 9.468234309389475, "learning_rate": 2.0919954958430357e-06, "loss": 1.1031, "step": 7990 }, { "epoch": 1.131308841226021, "grad_norm": 7.9022341695627745, "learning_rate": 2.0914299919750497e-06, "loss": 1.0088, "step": 7991 }, { "epoch": 1.1314504141006583, "grad_norm": 8.476129561236496, "learning_rate": 2.090864509585284e-06, "loss": 0.9829, "step": 7992 }, { "epoch": 1.1315919869752955, "grad_norm": 8.070563017953445, "learning_rate": 2.0902990487034664e-06, "loss": 1.0507, "step": 7993 }, { "epoch": 1.1317335598499327, "grad_norm": 9.435109625231533, "learning_rate": 2.0897336093593223e-06, "loss": 1.1032, "step": 7994 }, { "epoch": 1.13187513272457, "grad_norm": 8.201140943097776, "learning_rate": 2.0891681915825763e-06, "loss": 1.014, "step": 7995 }, { "epoch": 1.1320167055992072, "grad_norm": 10.103534314270846, "learning_rate": 2.0886027954029525e-06, "loss": 1.0338, "step": 7996 }, { "epoch": 1.1321582784738444, "grad_norm": 8.456362740610247, "learning_rate": 2.0880374208501724e-06, "loss": 1.0103, "step": 7997 }, { "epoch": 1.1322998513484817, "grad_norm": 8.061569751627257, "learning_rate": 2.0874720679539585e-06, "loss": 1.0092, "step": 7998 }, { "epoch": 1.1324414242231189, "grad_norm": 8.685051662223334, "learning_rate": 2.0869067367440297e-06, "loss": 1.0877, "step": 7999 }, { "epoch": 1.1325829970977561, "grad_norm": 11.152793199130796, "learning_rate": 2.0863414272501067e-06, "loss": 0.9724, "step": 8000 }, { "epoch": 1.1327245699723933, "grad_norm": 8.752993262435776, "learning_rate": 2.0857761395019064e-06, "loss": 1.1512, "step": 8001 }, { "epoch": 1.1328661428470306, "grad_norm": 7.755569917555624, "learning_rate": 2.0852108735291448e-06, "loss": 1.0533, "step": 8002 }, { "epoch": 1.1330077157216678, "grad_norm": 9.39111570577313, "learning_rate": 2.0846456293615384e-06, "loss": 1.0496, "step": 8003 }, { "epoch": 1.133149288596305, "grad_norm": 8.627694413971724, "learning_rate": 2.084080407028802e-06, "loss": 0.9824, "step": 8004 }, { "epoch": 1.1332908614709423, "grad_norm": 9.60940636110228, "learning_rate": 2.083515206560648e-06, "loss": 0.9822, "step": 8005 }, { "epoch": 1.1334324343455795, "grad_norm": 7.814035249543191, "learning_rate": 2.0829500279867895e-06, "loss": 0.9972, "step": 8006 }, { "epoch": 1.1335740072202167, "grad_norm": 7.174974224828485, "learning_rate": 2.082384871336936e-06, "loss": 0.8575, "step": 8007 }, { "epoch": 1.1337155800948537, "grad_norm": 9.122722602819199, "learning_rate": 2.081819736640799e-06, "loss": 1.045, "step": 8008 }, { "epoch": 1.133857152969491, "grad_norm": 8.935204698024856, "learning_rate": 2.0812546239280873e-06, "loss": 1.1077, "step": 8009 }, { "epoch": 1.1339987258441282, "grad_norm": 8.292838111312054, "learning_rate": 2.0806895332285078e-06, "loss": 0.9519, "step": 8010 }, { "epoch": 1.1341402987187654, "grad_norm": 8.12898837281319, "learning_rate": 2.080124464571767e-06, "loss": 1.071, "step": 8011 }, { "epoch": 1.1342818715934027, "grad_norm": 8.235889273863974, "learning_rate": 2.0795594179875697e-06, "loss": 1.0817, "step": 8012 }, { "epoch": 1.1344234444680399, "grad_norm": 8.647352362478319, "learning_rate": 2.0789943935056196e-06, "loss": 0.9052, "step": 8013 }, { "epoch": 1.1345650173426771, "grad_norm": 10.492335110419221, "learning_rate": 2.078429391155621e-06, "loss": 1.0947, "step": 8014 }, { "epoch": 1.1347065902173143, "grad_norm": 10.051136115844729, "learning_rate": 2.0778644109672747e-06, "loss": 1.0322, "step": 8015 }, { "epoch": 1.1348481630919516, "grad_norm": 8.969945954124912, "learning_rate": 2.077299452970282e-06, "loss": 0.9908, "step": 8016 }, { "epoch": 1.1349897359665888, "grad_norm": 9.964929117504777, "learning_rate": 2.0767345171943412e-06, "loss": 1.0698, "step": 8017 }, { "epoch": 1.135131308841226, "grad_norm": 7.516043732650395, "learning_rate": 2.0761696036691515e-06, "loss": 1.0633, "step": 8018 }, { "epoch": 1.1352728817158633, "grad_norm": 9.581862051986596, "learning_rate": 2.07560471242441e-06, "loss": 0.9925, "step": 8019 }, { "epoch": 1.1354144545905005, "grad_norm": 8.488256981960246, "learning_rate": 2.075039843489812e-06, "loss": 1.1314, "step": 8020 }, { "epoch": 1.1355560274651377, "grad_norm": 9.457431386103288, "learning_rate": 2.0744749968950527e-06, "loss": 1.0696, "step": 8021 }, { "epoch": 1.135697600339775, "grad_norm": 9.089939013655298, "learning_rate": 2.073910172669826e-06, "loss": 1.1089, "step": 8022 }, { "epoch": 1.1358391732144122, "grad_norm": 7.467943730588255, "learning_rate": 2.0733453708438233e-06, "loss": 0.9835, "step": 8023 }, { "epoch": 1.1359807460890494, "grad_norm": 8.916624645478768, "learning_rate": 2.072780591446736e-06, "loss": 0.9835, "step": 8024 }, { "epoch": 1.1361223189636867, "grad_norm": 10.029849706343358, "learning_rate": 2.072215834508255e-06, "loss": 1.1443, "step": 8025 }, { "epoch": 1.1362638918383237, "grad_norm": 9.251054033591085, "learning_rate": 2.0716511000580684e-06, "loss": 0.9697, "step": 8026 }, { "epoch": 1.1364054647129609, "grad_norm": 8.842683222293118, "learning_rate": 2.071086388125864e-06, "loss": 1.0331, "step": 8027 }, { "epoch": 1.1365470375875981, "grad_norm": 9.931943863672046, "learning_rate": 2.0705216987413284e-06, "loss": 1.1126, "step": 8028 }, { "epoch": 1.1366886104622353, "grad_norm": 8.02191355643612, "learning_rate": 2.069957031934147e-06, "loss": 1.028, "step": 8029 }, { "epoch": 1.1368301833368726, "grad_norm": 10.592853699523642, "learning_rate": 2.0693923877340032e-06, "loss": 1.0255, "step": 8030 }, { "epoch": 1.1369717562115098, "grad_norm": 8.438373887759946, "learning_rate": 2.0688277661705807e-06, "loss": 0.923, "step": 8031 }, { "epoch": 1.137113329086147, "grad_norm": 9.124596573782172, "learning_rate": 2.0682631672735616e-06, "loss": 1.0087, "step": 8032 }, { "epoch": 1.1372549019607843, "grad_norm": 10.565259945330373, "learning_rate": 2.0676985910726265e-06, "loss": 0.9799, "step": 8033 }, { "epoch": 1.1373964748354215, "grad_norm": 12.358729908694286, "learning_rate": 2.0671340375974536e-06, "loss": 1.1541, "step": 8034 }, { "epoch": 1.1375380477100587, "grad_norm": 8.40083450531804, "learning_rate": 2.066569506877721e-06, "loss": 0.9223, "step": 8035 }, { "epoch": 1.137679620584696, "grad_norm": 7.7088455717330415, "learning_rate": 2.066004998943106e-06, "loss": 0.989, "step": 8036 }, { "epoch": 1.1378211934593332, "grad_norm": 9.187298208083691, "learning_rate": 2.065440513823285e-06, "loss": 0.9814, "step": 8037 }, { "epoch": 1.1379627663339704, "grad_norm": 7.9804340946231145, "learning_rate": 2.064876051547932e-06, "loss": 0.9448, "step": 8038 }, { "epoch": 1.1381043392086077, "grad_norm": 9.854569172182817, "learning_rate": 2.064311612146721e-06, "loss": 1.0189, "step": 8039 }, { "epoch": 1.1382459120832449, "grad_norm": 7.092473793344417, "learning_rate": 2.0637471956493236e-06, "loss": 0.9163, "step": 8040 }, { "epoch": 1.1383874849578821, "grad_norm": 9.049028578410537, "learning_rate": 2.0631828020854106e-06, "loss": 0.9935, "step": 8041 }, { "epoch": 1.1385290578325193, "grad_norm": 8.806393502922766, "learning_rate": 2.062618431484652e-06, "loss": 1.1103, "step": 8042 }, { "epoch": 1.1386706307071566, "grad_norm": 11.133159945236166, "learning_rate": 2.062054083876717e-06, "loss": 0.9773, "step": 8043 }, { "epoch": 1.1388122035817938, "grad_norm": 7.561351342098338, "learning_rate": 2.0614897592912716e-06, "loss": 0.9507, "step": 8044 }, { "epoch": 1.138953776456431, "grad_norm": 9.866084069587288, "learning_rate": 2.060925457757983e-06, "loss": 1.0788, "step": 8045 }, { "epoch": 1.1390953493310683, "grad_norm": 9.291410799024574, "learning_rate": 2.060361179306515e-06, "loss": 0.9243, "step": 8046 }, { "epoch": 1.1392369222057055, "grad_norm": 7.633834973417174, "learning_rate": 2.0597969239665325e-06, "loss": 1.0265, "step": 8047 }, { "epoch": 1.1393784950803427, "grad_norm": 9.38968291928471, "learning_rate": 2.0592326917676975e-06, "loss": 1.0203, "step": 8048 }, { "epoch": 1.1395200679549797, "grad_norm": 9.676870113086508, "learning_rate": 2.0586684827396708e-06, "loss": 0.9761, "step": 8049 }, { "epoch": 1.139661640829617, "grad_norm": 9.849908555279246, "learning_rate": 2.0581042969121136e-06, "loss": 0.9953, "step": 8050 }, { "epoch": 1.1398032137042542, "grad_norm": 9.767549419761359, "learning_rate": 2.0575401343146832e-06, "loss": 1.0537, "step": 8051 }, { "epoch": 1.1399447865788914, "grad_norm": 8.768162215660515, "learning_rate": 2.056975994977038e-06, "loss": 0.9714, "step": 8052 }, { "epoch": 1.1400863594535287, "grad_norm": 12.742974832933378, "learning_rate": 2.0564118789288347e-06, "loss": 0.9571, "step": 8053 }, { "epoch": 1.1402279323281659, "grad_norm": 8.553929404195076, "learning_rate": 2.0558477861997293e-06, "loss": 0.9957, "step": 8054 }, { "epoch": 1.1403695052028031, "grad_norm": 7.878918838301793, "learning_rate": 2.0552837168193738e-06, "loss": 1.0468, "step": 8055 }, { "epoch": 1.1405110780774403, "grad_norm": 8.437293834286782, "learning_rate": 2.0547196708174215e-06, "loss": 0.9866, "step": 8056 }, { "epoch": 1.1406526509520776, "grad_norm": 10.676700244016684, "learning_rate": 2.054155648223524e-06, "loss": 1.0787, "step": 8057 }, { "epoch": 1.1407942238267148, "grad_norm": 8.643597441655785, "learning_rate": 2.0535916490673313e-06, "loss": 0.9822, "step": 8058 }, { "epoch": 1.140935796701352, "grad_norm": 10.821220629761171, "learning_rate": 2.0530276733784933e-06, "loss": 1.0839, "step": 8059 }, { "epoch": 1.1410773695759893, "grad_norm": 8.0929341144338, "learning_rate": 2.052463721186657e-06, "loss": 0.9958, "step": 8060 }, { "epoch": 1.1412189424506265, "grad_norm": 10.852548509132149, "learning_rate": 2.0518997925214694e-06, "loss": 1.0119, "step": 8061 }, { "epoch": 1.1413605153252637, "grad_norm": 9.942020947351606, "learning_rate": 2.0513358874125754e-06, "loss": 0.9566, "step": 8062 }, { "epoch": 1.141502088199901, "grad_norm": 8.28661014618425, "learning_rate": 2.0507720058896195e-06, "loss": 0.9255, "step": 8063 }, { "epoch": 1.1416436610745382, "grad_norm": 10.33645603208482, "learning_rate": 2.0502081479822447e-06, "loss": 1.0833, "step": 8064 }, { "epoch": 1.1417852339491754, "grad_norm": 10.449865634182142, "learning_rate": 2.0496443137200915e-06, "loss": 1.078, "step": 8065 }, { "epoch": 1.1419268068238126, "grad_norm": 7.924155243817792, "learning_rate": 2.0490805031328013e-06, "loss": 0.9409, "step": 8066 }, { "epoch": 1.1420683796984497, "grad_norm": 9.61418356568906, "learning_rate": 2.0485167162500124e-06, "loss": 0.9887, "step": 8067 }, { "epoch": 1.1422099525730869, "grad_norm": 9.63394660801552, "learning_rate": 2.047952953101363e-06, "loss": 1.101, "step": 8068 }, { "epoch": 1.1423515254477241, "grad_norm": 10.812077332101891, "learning_rate": 2.0473892137164906e-06, "loss": 1.008, "step": 8069 }, { "epoch": 1.1424930983223613, "grad_norm": 10.885017078615808, "learning_rate": 2.0468254981250293e-06, "loss": 1.0406, "step": 8070 }, { "epoch": 1.1426346711969986, "grad_norm": 11.843984475746275, "learning_rate": 2.0462618063566135e-06, "loss": 0.9937, "step": 8071 }, { "epoch": 1.1427762440716358, "grad_norm": 8.070204727620181, "learning_rate": 2.045698138440876e-06, "loss": 1.0747, "step": 8072 }, { "epoch": 1.142917816946273, "grad_norm": 8.8077847466633, "learning_rate": 2.045134494407449e-06, "loss": 0.9604, "step": 8073 }, { "epoch": 1.1430593898209103, "grad_norm": 9.887253510577235, "learning_rate": 2.044570874285963e-06, "loss": 1.1546, "step": 8074 }, { "epoch": 1.1432009626955475, "grad_norm": 10.890931134730645, "learning_rate": 2.044007278106046e-06, "loss": 1.0558, "step": 8075 }, { "epoch": 1.1433425355701847, "grad_norm": 13.41527788702615, "learning_rate": 2.043443705897326e-06, "loss": 0.9699, "step": 8076 }, { "epoch": 1.143484108444822, "grad_norm": 10.238131467093984, "learning_rate": 2.042880157689431e-06, "loss": 1.0226, "step": 8077 }, { "epoch": 1.1436256813194592, "grad_norm": 9.233603714749467, "learning_rate": 2.0423166335119844e-06, "loss": 1.0827, "step": 8078 }, { "epoch": 1.1437672541940964, "grad_norm": 9.626435804407919, "learning_rate": 2.0417531333946113e-06, "loss": 1.1515, "step": 8079 }, { "epoch": 1.1439088270687336, "grad_norm": 8.869090087047711, "learning_rate": 2.041189657366934e-06, "loss": 1.052, "step": 8080 }, { "epoch": 1.1440503999433709, "grad_norm": 9.034394980493946, "learning_rate": 2.040626205458574e-06, "loss": 0.991, "step": 8081 }, { "epoch": 1.144191972818008, "grad_norm": 10.058958958782597, "learning_rate": 2.0400627776991526e-06, "loss": 1.0415, "step": 8082 }, { "epoch": 1.1443335456926453, "grad_norm": 13.921460694592854, "learning_rate": 2.039499374118288e-06, "loss": 1.0467, "step": 8083 }, { "epoch": 1.1444751185672826, "grad_norm": 8.230305138234975, "learning_rate": 2.0389359947455978e-06, "loss": 0.9296, "step": 8084 }, { "epoch": 1.1446166914419198, "grad_norm": 10.088149084600454, "learning_rate": 2.0383726396106983e-06, "loss": 1.0213, "step": 8085 }, { "epoch": 1.144758264316557, "grad_norm": 10.705583944273691, "learning_rate": 2.0378093087432067e-06, "loss": 1.101, "step": 8086 }, { "epoch": 1.1448998371911943, "grad_norm": 8.217631797000534, "learning_rate": 2.037246002172733e-06, "loss": 1.2336, "step": 8087 }, { "epoch": 1.1450414100658315, "grad_norm": 7.678301758957597, "learning_rate": 2.0366827199288923e-06, "loss": 0.9199, "step": 8088 }, { "epoch": 1.1451829829404687, "grad_norm": 10.33578802506713, "learning_rate": 2.036119462041296e-06, "loss": 1.0549, "step": 8089 }, { "epoch": 1.145324555815106, "grad_norm": 9.325724588423201, "learning_rate": 2.0355562285395537e-06, "loss": 1.04, "step": 8090 }, { "epoch": 1.145466128689743, "grad_norm": 11.812436704112825, "learning_rate": 2.0349930194532734e-06, "loss": 1.1286, "step": 8091 }, { "epoch": 1.1456077015643802, "grad_norm": 9.149119650592585, "learning_rate": 2.034429834812064e-06, "loss": 0.9491, "step": 8092 }, { "epoch": 1.1457492744390174, "grad_norm": 7.924789705103675, "learning_rate": 2.033866674645531e-06, "loss": 0.9858, "step": 8093 }, { "epoch": 1.1458908473136546, "grad_norm": 8.853859235624757, "learning_rate": 2.0333035389832795e-06, "loss": 1.0049, "step": 8094 }, { "epoch": 1.1460324201882919, "grad_norm": 10.188231646704645, "learning_rate": 2.0327404278549127e-06, "loss": 1.0459, "step": 8095 }, { "epoch": 1.146173993062929, "grad_norm": 8.818815524954788, "learning_rate": 2.032177341290034e-06, "loss": 1.0456, "step": 8096 }, { "epoch": 1.1463155659375663, "grad_norm": 10.423848069764974, "learning_rate": 2.031614279318243e-06, "loss": 1.0331, "step": 8097 }, { "epoch": 1.1464571388122036, "grad_norm": 10.372402003761556, "learning_rate": 2.03105124196914e-06, "loss": 1.0609, "step": 8098 }, { "epoch": 1.1465987116868408, "grad_norm": 12.180992854199632, "learning_rate": 2.030488229272323e-06, "loss": 1.0503, "step": 8099 }, { "epoch": 1.146740284561478, "grad_norm": 8.213922859849525, "learning_rate": 2.0299252412573907e-06, "loss": 0.9891, "step": 8100 }, { "epoch": 1.1468818574361153, "grad_norm": 8.187037331124762, "learning_rate": 2.0293622779539372e-06, "loss": 1.0672, "step": 8101 }, { "epoch": 1.1470234303107525, "grad_norm": 9.492834098990514, "learning_rate": 2.0287993393915585e-06, "loss": 1.0816, "step": 8102 }, { "epoch": 1.1471650031853897, "grad_norm": 8.328336158351433, "learning_rate": 2.0282364255998465e-06, "loss": 0.9507, "step": 8103 }, { "epoch": 1.147306576060027, "grad_norm": 11.574823853571637, "learning_rate": 2.027673536608394e-06, "loss": 1.179, "step": 8104 }, { "epoch": 1.1474481489346642, "grad_norm": 10.101774350664257, "learning_rate": 2.0271106724467915e-06, "loss": 1.0451, "step": 8105 }, { "epoch": 1.1475897218093014, "grad_norm": 10.570831804848034, "learning_rate": 2.0265478331446285e-06, "loss": 1.0749, "step": 8106 }, { "epoch": 1.1477312946839386, "grad_norm": 9.13600059971582, "learning_rate": 2.025985018731494e-06, "loss": 0.9994, "step": 8107 }, { "epoch": 1.1478728675585759, "grad_norm": 8.742297897119078, "learning_rate": 2.0254222292369725e-06, "loss": 0.9962, "step": 8108 }, { "epoch": 1.1480144404332129, "grad_norm": 8.370113484514553, "learning_rate": 2.024859464690651e-06, "loss": 1.0271, "step": 8109 }, { "epoch": 1.14815601330785, "grad_norm": 9.431426462351604, "learning_rate": 2.0242967251221118e-06, "loss": 0.97, "step": 8110 }, { "epoch": 1.1482975861824873, "grad_norm": 8.517266183593625, "learning_rate": 2.02373401056094e-06, "loss": 0.9592, "step": 8111 }, { "epoch": 1.1484391590571246, "grad_norm": 7.77563126629314, "learning_rate": 2.0231713210367163e-06, "loss": 1.0124, "step": 8112 }, { "epoch": 1.1485807319317618, "grad_norm": 9.49314351860047, "learning_rate": 2.0226086565790207e-06, "loss": 0.9668, "step": 8113 }, { "epoch": 1.148722304806399, "grad_norm": 10.58981132994853, "learning_rate": 2.022046017217432e-06, "loss": 1.0644, "step": 8114 }, { "epoch": 1.1488638776810363, "grad_norm": 9.590851684556291, "learning_rate": 2.0214834029815276e-06, "loss": 1.0311, "step": 8115 }, { "epoch": 1.1490054505556735, "grad_norm": 9.89141717182084, "learning_rate": 2.020920813900884e-06, "loss": 1.1153, "step": 8116 }, { "epoch": 1.1491470234303107, "grad_norm": 9.09175386838647, "learning_rate": 2.020358250005077e-06, "loss": 1.0654, "step": 8117 }, { "epoch": 1.149288596304948, "grad_norm": 9.520371930205938, "learning_rate": 2.019795711323678e-06, "loss": 0.9853, "step": 8118 }, { "epoch": 1.1494301691795852, "grad_norm": 9.488998065631101, "learning_rate": 2.0192331978862604e-06, "loss": 1.0678, "step": 8119 }, { "epoch": 1.1495717420542224, "grad_norm": 8.176968622719155, "learning_rate": 2.0186707097223952e-06, "loss": 0.9608, "step": 8120 }, { "epoch": 1.1497133149288596, "grad_norm": 8.470760092830687, "learning_rate": 2.018108246861652e-06, "loss": 1.0261, "step": 8121 }, { "epoch": 1.1498548878034969, "grad_norm": 8.87064179344633, "learning_rate": 2.017545809333599e-06, "loss": 1.0997, "step": 8122 }, { "epoch": 1.149996460678134, "grad_norm": 10.86678661756094, "learning_rate": 2.0169833971678033e-06, "loss": 1.0236, "step": 8123 }, { "epoch": 1.1501380335527713, "grad_norm": 9.423436884689501, "learning_rate": 2.0164210103938297e-06, "loss": 1.117, "step": 8124 }, { "epoch": 1.1502796064274086, "grad_norm": 9.930916770880534, "learning_rate": 2.0158586490412436e-06, "loss": 1.0899, "step": 8125 }, { "epoch": 1.1504211793020458, "grad_norm": 9.290606063274302, "learning_rate": 2.0152963131396068e-06, "loss": 1.0653, "step": 8126 }, { "epoch": 1.150562752176683, "grad_norm": 10.575052781570776, "learning_rate": 2.0147340027184816e-06, "loss": 0.9412, "step": 8127 }, { "epoch": 1.1507043250513203, "grad_norm": 9.185759710416963, "learning_rate": 2.014171717807429e-06, "loss": 1.129, "step": 8128 }, { "epoch": 1.1508458979259575, "grad_norm": 10.477013631576055, "learning_rate": 2.013609458436006e-06, "loss": 1.1289, "step": 8129 }, { "epoch": 1.1509874708005947, "grad_norm": 11.419735312793279, "learning_rate": 2.013047224633771e-06, "loss": 1.1081, "step": 8130 }, { "epoch": 1.151129043675232, "grad_norm": 7.320833853249558, "learning_rate": 2.0124850164302805e-06, "loss": 0.9368, "step": 8131 }, { "epoch": 1.151270616549869, "grad_norm": 8.932787526475936, "learning_rate": 2.0119228338550894e-06, "loss": 1.057, "step": 8132 }, { "epoch": 1.1514121894245062, "grad_norm": 9.652750411936315, "learning_rate": 2.0113606769377497e-06, "loss": 1.0295, "step": 8133 }, { "epoch": 1.1515537622991434, "grad_norm": 8.514669438216016, "learning_rate": 2.010798545707816e-06, "loss": 1.1143, "step": 8134 }, { "epoch": 1.1516953351737806, "grad_norm": 10.096614277719382, "learning_rate": 2.0102364401948378e-06, "loss": 1.0335, "step": 8135 }, { "epoch": 1.1518369080484179, "grad_norm": 11.770738845490966, "learning_rate": 2.009674360428365e-06, "loss": 1.1081, "step": 8136 }, { "epoch": 1.151978480923055, "grad_norm": 9.479871205287953, "learning_rate": 2.009112306437945e-06, "loss": 1.016, "step": 8137 }, { "epoch": 1.1521200537976923, "grad_norm": 10.886039655218973, "learning_rate": 2.008550278253127e-06, "loss": 1.0603, "step": 8138 }, { "epoch": 1.1522616266723296, "grad_norm": 10.827472177177764, "learning_rate": 2.0079882759034517e-06, "loss": 1.1815, "step": 8139 }, { "epoch": 1.1524031995469668, "grad_norm": 10.517367577105647, "learning_rate": 2.007426299418467e-06, "loss": 1.112, "step": 8140 }, { "epoch": 1.152544772421604, "grad_norm": 10.999976765001206, "learning_rate": 2.0068643488277147e-06, "loss": 1.0547, "step": 8141 }, { "epoch": 1.1526863452962413, "grad_norm": 9.282788287267488, "learning_rate": 2.0063024241607356e-06, "loss": 0.9826, "step": 8142 }, { "epoch": 1.1528279181708785, "grad_norm": 11.79943835974699, "learning_rate": 2.00574052544707e-06, "loss": 1.0112, "step": 8143 }, { "epoch": 1.1529694910455157, "grad_norm": 11.938208753706325, "learning_rate": 2.005178652716256e-06, "loss": 1.0901, "step": 8144 }, { "epoch": 1.153111063920153, "grad_norm": 7.917425286603298, "learning_rate": 2.004616805997832e-06, "loss": 0.88, "step": 8145 }, { "epoch": 1.1532526367947902, "grad_norm": 8.715347993592342, "learning_rate": 2.0040549853213326e-06, "loss": 1.0154, "step": 8146 }, { "epoch": 1.1533942096694274, "grad_norm": 11.061114380692997, "learning_rate": 2.003493190716293e-06, "loss": 1.1288, "step": 8147 }, { "epoch": 1.1535357825440646, "grad_norm": 7.1519365262451435, "learning_rate": 2.0029314222122463e-06, "loss": 0.9903, "step": 8148 }, { "epoch": 1.1536773554187019, "grad_norm": 9.228278560116479, "learning_rate": 2.0023696798387247e-06, "loss": 0.9655, "step": 8149 }, { "epoch": 1.1538189282933389, "grad_norm": 9.402343344282293, "learning_rate": 2.001807963625257e-06, "loss": 0.9923, "step": 8150 }, { "epoch": 1.153960501167976, "grad_norm": 8.887664355597252, "learning_rate": 2.0012462736013735e-06, "loss": 1.0333, "step": 8151 }, { "epoch": 1.1541020740426133, "grad_norm": 9.05291030202572, "learning_rate": 2.0006846097966016e-06, "loss": 0.917, "step": 8152 }, { "epoch": 1.1542436469172506, "grad_norm": 10.581778905259961, "learning_rate": 2.000122972240467e-06, "loss": 1.0396, "step": 8153 }, { "epoch": 1.1543852197918878, "grad_norm": 10.856446674082964, "learning_rate": 1.9995613609624957e-06, "loss": 1.0469, "step": 8154 }, { "epoch": 1.154526792666525, "grad_norm": 9.356366165856798, "learning_rate": 1.9989997759922093e-06, "loss": 1.0474, "step": 8155 }, { "epoch": 1.1546683655411623, "grad_norm": 9.25391696785998, "learning_rate": 1.998438217359132e-06, "loss": 1.1443, "step": 8156 }, { "epoch": 1.1548099384157995, "grad_norm": 9.756099651382966, "learning_rate": 1.997876685092784e-06, "loss": 0.9722, "step": 8157 }, { "epoch": 1.1549515112904367, "grad_norm": 10.962179884374404, "learning_rate": 1.9973151792226837e-06, "loss": 1.034, "step": 8158 }, { "epoch": 1.155093084165074, "grad_norm": 7.806330327520931, "learning_rate": 1.9967536997783495e-06, "loss": 0.8793, "step": 8159 }, { "epoch": 1.1552346570397112, "grad_norm": 10.1640486134369, "learning_rate": 1.9961922467892997e-06, "loss": 0.9333, "step": 8160 }, { "epoch": 1.1553762299143484, "grad_norm": 9.569250180198576, "learning_rate": 1.9956308202850456e-06, "loss": 1.0597, "step": 8161 }, { "epoch": 1.1555178027889856, "grad_norm": 9.741296649450609, "learning_rate": 1.9950694202951044e-06, "loss": 1.0386, "step": 8162 }, { "epoch": 1.1556593756636229, "grad_norm": 9.58523111764204, "learning_rate": 1.994508046848987e-06, "loss": 0.8913, "step": 8163 }, { "epoch": 1.15580094853826, "grad_norm": 8.838468774252558, "learning_rate": 1.9939466999762044e-06, "loss": 1.1497, "step": 8164 }, { "epoch": 1.1559425214128973, "grad_norm": 8.207762361086864, "learning_rate": 1.993385379706267e-06, "loss": 0.9558, "step": 8165 }, { "epoch": 1.1560840942875346, "grad_norm": 9.103413589255998, "learning_rate": 1.9928240860686822e-06, "loss": 0.9657, "step": 8166 }, { "epoch": 1.1562256671621718, "grad_norm": 8.489973100635344, "learning_rate": 1.9922628190929567e-06, "loss": 1.0642, "step": 8167 }, { "epoch": 1.156367240036809, "grad_norm": 9.767981356341341, "learning_rate": 1.9917015788085962e-06, "loss": 0.9958, "step": 8168 }, { "epoch": 1.1565088129114462, "grad_norm": 11.305075553766423, "learning_rate": 1.991140365245105e-06, "loss": 0.9525, "step": 8169 }, { "epoch": 1.1566503857860835, "grad_norm": 8.633453649126814, "learning_rate": 1.990579178431986e-06, "loss": 0.9846, "step": 8170 }, { "epoch": 1.1567919586607207, "grad_norm": 9.214477790528795, "learning_rate": 1.990018018398739e-06, "loss": 1.024, "step": 8171 }, { "epoch": 1.156933531535358, "grad_norm": 11.652287441198174, "learning_rate": 1.989456885174865e-06, "loss": 1.151, "step": 8172 }, { "epoch": 1.157075104409995, "grad_norm": 8.892464660282604, "learning_rate": 1.988895778789861e-06, "loss": 1.0767, "step": 8173 }, { "epoch": 1.1572166772846322, "grad_norm": 10.10388543960008, "learning_rate": 1.9883346992732256e-06, "loss": 1.1081, "step": 8174 }, { "epoch": 1.1573582501592694, "grad_norm": 9.928377006485508, "learning_rate": 1.987773646654453e-06, "loss": 1.0217, "step": 8175 }, { "epoch": 1.1574998230339066, "grad_norm": 8.066123916820514, "learning_rate": 1.987212620963038e-06, "loss": 0.9915, "step": 8176 }, { "epoch": 1.1576413959085439, "grad_norm": 8.719700259035392, "learning_rate": 1.9866516222284736e-06, "loss": 0.957, "step": 8177 }, { "epoch": 1.157782968783181, "grad_norm": 9.732962957503172, "learning_rate": 1.9860906504802496e-06, "loss": 1.0144, "step": 8178 }, { "epoch": 1.1579245416578183, "grad_norm": 8.930663154814743, "learning_rate": 1.985529705747858e-06, "loss": 1.051, "step": 8179 }, { "epoch": 1.1580661145324556, "grad_norm": 8.170725637682926, "learning_rate": 1.9849687880607855e-06, "loss": 1.0091, "step": 8180 }, { "epoch": 1.1582076874070928, "grad_norm": 8.84925266709643, "learning_rate": 1.984407897448521e-06, "loss": 1.0475, "step": 8181 }, { "epoch": 1.15834926028173, "grad_norm": 9.29124164607097, "learning_rate": 1.983847033940548e-06, "loss": 1.0372, "step": 8182 }, { "epoch": 1.1584908331563673, "grad_norm": 9.621220230466847, "learning_rate": 1.9832861975663516e-06, "loss": 1.26, "step": 8183 }, { "epoch": 1.1586324060310045, "grad_norm": 8.51372676305718, "learning_rate": 1.982725388355414e-06, "loss": 1.0401, "step": 8184 }, { "epoch": 1.1587739789056417, "grad_norm": 7.903960970901932, "learning_rate": 1.9821646063372174e-06, "loss": 0.9476, "step": 8185 }, { "epoch": 1.158915551780279, "grad_norm": 9.918419520083187, "learning_rate": 1.9816038515412412e-06, "loss": 1.0303, "step": 8186 }, { "epoch": 1.1590571246549162, "grad_norm": 8.684948443587528, "learning_rate": 1.9810431239969646e-06, "loss": 1.0609, "step": 8187 }, { "epoch": 1.1591986975295534, "grad_norm": 8.763155393809622, "learning_rate": 1.9804824237338636e-06, "loss": 0.9426, "step": 8188 }, { "epoch": 1.1593402704041906, "grad_norm": 7.973989641529681, "learning_rate": 1.9799217507814144e-06, "loss": 0.9821, "step": 8189 }, { "epoch": 1.1594818432788279, "grad_norm": 8.14394622823954, "learning_rate": 1.9793611051690905e-06, "loss": 0.99, "step": 8190 }, { "epoch": 1.159623416153465, "grad_norm": 10.333581818905186, "learning_rate": 1.978800486926366e-06, "loss": 0.926, "step": 8191 }, { "epoch": 1.159764989028102, "grad_norm": 8.857447930984277, "learning_rate": 1.9782398960827105e-06, "loss": 0.999, "step": 8192 }, { "epoch": 1.1599065619027393, "grad_norm": 8.968933316261358, "learning_rate": 1.977679332667595e-06, "loss": 1.0229, "step": 8193 }, { "epoch": 1.1600481347773766, "grad_norm": 8.399780506717729, "learning_rate": 1.9771187967104875e-06, "loss": 1.0581, "step": 8194 }, { "epoch": 1.1601897076520138, "grad_norm": 10.551125084063347, "learning_rate": 1.9765582882408544e-06, "loss": 1.0374, "step": 8195 }, { "epoch": 1.160331280526651, "grad_norm": 8.358396554887188, "learning_rate": 1.9759978072881623e-06, "loss": 0.9252, "step": 8196 }, { "epoch": 1.1604728534012883, "grad_norm": 9.42847420283139, "learning_rate": 1.975437353881875e-06, "loss": 1.0217, "step": 8197 }, { "epoch": 1.1606144262759255, "grad_norm": 8.353202123603465, "learning_rate": 1.9748769280514544e-06, "loss": 1.0278, "step": 8198 }, { "epoch": 1.1607559991505627, "grad_norm": 10.79610615931156, "learning_rate": 1.9743165298263624e-06, "loss": 1.1285, "step": 8199 }, { "epoch": 1.1608975720252, "grad_norm": 9.563454175229719, "learning_rate": 1.9737561592360583e-06, "loss": 1.1077, "step": 8200 }, { "epoch": 1.1610391448998372, "grad_norm": 8.543681829664713, "learning_rate": 1.97319581631e-06, "loss": 1.0679, "step": 8201 }, { "epoch": 1.1611807177744744, "grad_norm": 11.307339834950854, "learning_rate": 1.9726355010776466e-06, "loss": 0.9842, "step": 8202 }, { "epoch": 1.1613222906491116, "grad_norm": 7.635242524357803, "learning_rate": 1.9720752135684505e-06, "loss": 0.9919, "step": 8203 }, { "epoch": 1.1614638635237489, "grad_norm": 9.283479878646077, "learning_rate": 1.9715149538118667e-06, "loss": 0.9529, "step": 8204 }, { "epoch": 1.161605436398386, "grad_norm": 8.75077598400654, "learning_rate": 1.970954721837348e-06, "loss": 1.0419, "step": 8205 }, { "epoch": 1.1617470092730233, "grad_norm": 9.449460162532104, "learning_rate": 1.970394517674345e-06, "loss": 1.1112, "step": 8206 }, { "epoch": 1.1618885821476606, "grad_norm": 8.503376009824164, "learning_rate": 1.9698343413523065e-06, "loss": 0.918, "step": 8207 }, { "epoch": 1.1620301550222978, "grad_norm": 8.992479785242127, "learning_rate": 1.969274192900682e-06, "loss": 1.017, "step": 8208 }, { "epoch": 1.162171727896935, "grad_norm": 9.634526677999393, "learning_rate": 1.9687140723489175e-06, "loss": 1.0779, "step": 8209 }, { "epoch": 1.1623133007715722, "grad_norm": 11.338430118786011, "learning_rate": 1.9681539797264583e-06, "loss": 0.98, "step": 8210 }, { "epoch": 1.1624548736462095, "grad_norm": 9.068451709429235, "learning_rate": 1.967593915062748e-06, "loss": 1.0472, "step": 8211 }, { "epoch": 1.1625964465208467, "grad_norm": 9.000028398257063, "learning_rate": 1.9670338783872277e-06, "loss": 0.9681, "step": 8212 }, { "epoch": 1.162738019395484, "grad_norm": 8.961002206610319, "learning_rate": 1.9664738697293404e-06, "loss": 0.9444, "step": 8213 }, { "epoch": 1.1628795922701212, "grad_norm": 10.005258322092283, "learning_rate": 1.965913889118523e-06, "loss": 1.0289, "step": 8214 }, { "epoch": 1.1630211651447582, "grad_norm": 11.098524844462192, "learning_rate": 1.9653539365842143e-06, "loss": 1.1094, "step": 8215 }, { "epoch": 1.1631627380193954, "grad_norm": 9.30545822269785, "learning_rate": 1.9647940121558508e-06, "loss": 0.998, "step": 8216 }, { "epoch": 1.1633043108940326, "grad_norm": 10.213211285066905, "learning_rate": 1.9642341158628665e-06, "loss": 1.148, "step": 8217 }, { "epoch": 1.1634458837686699, "grad_norm": 8.964799921597656, "learning_rate": 1.963674247734696e-06, "loss": 0.9944, "step": 8218 }, { "epoch": 1.163587456643307, "grad_norm": 8.873143391437981, "learning_rate": 1.96311440780077e-06, "loss": 1.1245, "step": 8219 }, { "epoch": 1.1637290295179443, "grad_norm": 9.669562684262551, "learning_rate": 1.9625545960905187e-06, "loss": 1.0586, "step": 8220 }, { "epoch": 1.1638706023925816, "grad_norm": 10.023129227284059, "learning_rate": 1.961994812633372e-06, "loss": 1.049, "step": 8221 }, { "epoch": 1.1640121752672188, "grad_norm": 9.857345831698467, "learning_rate": 1.961435057458757e-06, "loss": 1.0398, "step": 8222 }, { "epoch": 1.164153748141856, "grad_norm": 8.250859996014242, "learning_rate": 1.9608753305960997e-06, "loss": 1.0934, "step": 8223 }, { "epoch": 1.1642953210164932, "grad_norm": 8.52998404140254, "learning_rate": 1.960315632074824e-06, "loss": 1.0028, "step": 8224 }, { "epoch": 1.1644368938911305, "grad_norm": 9.504587370912459, "learning_rate": 1.9597559619243527e-06, "loss": 1.0834, "step": 8225 }, { "epoch": 1.1645784667657677, "grad_norm": 8.651360518343886, "learning_rate": 1.959196320174108e-06, "loss": 1.0145, "step": 8226 }, { "epoch": 1.164720039640405, "grad_norm": 9.41574519777349, "learning_rate": 1.95863670685351e-06, "loss": 1.0803, "step": 8227 }, { "epoch": 1.1648616125150422, "grad_norm": 9.788829462758894, "learning_rate": 1.958077121991976e-06, "loss": 1.0567, "step": 8228 }, { "epoch": 1.1650031853896794, "grad_norm": 10.22073055754249, "learning_rate": 1.9575175656189236e-06, "loss": 1.0336, "step": 8229 }, { "epoch": 1.1651447582643166, "grad_norm": 8.063585311823438, "learning_rate": 1.9569580377637677e-06, "loss": 0.997, "step": 8230 }, { "epoch": 1.1652863311389539, "grad_norm": 10.025624441738147, "learning_rate": 1.956398538455924e-06, "loss": 1.0174, "step": 8231 }, { "epoch": 1.165427904013591, "grad_norm": 9.388625352560522, "learning_rate": 1.955839067724803e-06, "loss": 1.0907, "step": 8232 }, { "epoch": 1.165569476888228, "grad_norm": 9.138107709597596, "learning_rate": 1.9552796255998173e-06, "loss": 0.9516, "step": 8233 }, { "epoch": 1.1657110497628653, "grad_norm": 8.052065699180911, "learning_rate": 1.9547202121103757e-06, "loss": 1.0289, "step": 8234 }, { "epoch": 1.1658526226375026, "grad_norm": 8.092969937825716, "learning_rate": 1.9541608272858856e-06, "loss": 1.0886, "step": 8235 }, { "epoch": 1.1659941955121398, "grad_norm": 10.15959267408013, "learning_rate": 1.953601471155753e-06, "loss": 1.1149, "step": 8236 }, { "epoch": 1.166135768386777, "grad_norm": 10.101014160001306, "learning_rate": 1.9530421437493843e-06, "loss": 0.9729, "step": 8237 }, { "epoch": 1.1662773412614142, "grad_norm": 9.660260500347302, "learning_rate": 1.952482845096182e-06, "loss": 1.002, "step": 8238 }, { "epoch": 1.1664189141360515, "grad_norm": 9.55416247343783, "learning_rate": 1.9519235752255487e-06, "loss": 1.0966, "step": 8239 }, { "epoch": 1.1665604870106887, "grad_norm": 8.363809881558959, "learning_rate": 1.951364334166884e-06, "loss": 0.9236, "step": 8240 }, { "epoch": 1.166702059885326, "grad_norm": 9.819764382202491, "learning_rate": 1.9508051219495877e-06, "loss": 0.8954, "step": 8241 }, { "epoch": 1.1668436327599632, "grad_norm": 9.370341046130646, "learning_rate": 1.950245938603056e-06, "loss": 1.1683, "step": 8242 }, { "epoch": 1.1669852056346004, "grad_norm": 9.07307774385813, "learning_rate": 1.949686784156686e-06, "loss": 0.9998, "step": 8243 }, { "epoch": 1.1671267785092376, "grad_norm": 9.877421794360659, "learning_rate": 1.949127658639872e-06, "loss": 1.0633, "step": 8244 }, { "epoch": 1.1672683513838749, "grad_norm": 7.359038067857761, "learning_rate": 1.948568562082005e-06, "loss": 0.985, "step": 8245 }, { "epoch": 1.167409924258512, "grad_norm": 8.076292084579979, "learning_rate": 1.948009494512478e-06, "loss": 0.934, "step": 8246 }, { "epoch": 1.1675514971331493, "grad_norm": 10.808149399634713, "learning_rate": 1.94745045596068e-06, "loss": 1.0898, "step": 8247 }, { "epoch": 1.1676930700077865, "grad_norm": 8.931381158419601, "learning_rate": 1.9468914464559994e-06, "loss": 1.0936, "step": 8248 }, { "epoch": 1.1678346428824238, "grad_norm": 8.703561549307164, "learning_rate": 1.9463324660278235e-06, "loss": 1.0634, "step": 8249 }, { "epoch": 1.167976215757061, "grad_norm": 10.220534609195054, "learning_rate": 1.945773514705537e-06, "loss": 1.0458, "step": 8250 }, { "epoch": 1.1681177886316982, "grad_norm": 8.020000252664234, "learning_rate": 1.9452145925185235e-06, "loss": 1.1703, "step": 8251 }, { "epoch": 1.1682593615063355, "grad_norm": 11.452819237264041, "learning_rate": 1.9446556994961645e-06, "loss": 1.0633, "step": 8252 }, { "epoch": 1.1684009343809727, "grad_norm": 8.470229128786341, "learning_rate": 1.944096835667842e-06, "loss": 1.005, "step": 8253 }, { "epoch": 1.16854250725561, "grad_norm": 9.479784688959104, "learning_rate": 1.9435380010629343e-06, "loss": 0.9811, "step": 8254 }, { "epoch": 1.1686840801302472, "grad_norm": 8.797017233530422, "learning_rate": 1.94297919571082e-06, "loss": 0.9382, "step": 8255 }, { "epoch": 1.1688256530048842, "grad_norm": 9.684815262434306, "learning_rate": 1.942420419640873e-06, "loss": 1.1246, "step": 8256 }, { "epoch": 1.1689672258795214, "grad_norm": 8.037044586995854, "learning_rate": 1.941861672882469e-06, "loss": 0.9783, "step": 8257 }, { "epoch": 1.1691087987541586, "grad_norm": 10.474114617778783, "learning_rate": 1.9413029554649798e-06, "loss": 1.067, "step": 8258 }, { "epoch": 1.1692503716287959, "grad_norm": 10.112779188604904, "learning_rate": 1.9407442674177783e-06, "loss": 0.9603, "step": 8259 }, { "epoch": 1.169391944503433, "grad_norm": 8.073101318872764, "learning_rate": 1.9401856087702337e-06, "loss": 0.9985, "step": 8260 }, { "epoch": 1.1695335173780703, "grad_norm": 9.351506614677417, "learning_rate": 1.9396269795517147e-06, "loss": 1.0915, "step": 8261 }, { "epoch": 1.1696750902527075, "grad_norm": 7.6305914217416095, "learning_rate": 1.939068379791587e-06, "loss": 0.9656, "step": 8262 }, { "epoch": 1.1698166631273448, "grad_norm": 9.132417773094057, "learning_rate": 1.938509809519216e-06, "loss": 1.0164, "step": 8263 }, { "epoch": 1.169958236001982, "grad_norm": 11.329536549987383, "learning_rate": 1.9379512687639663e-06, "loss": 1.0681, "step": 8264 }, { "epoch": 1.1700998088766192, "grad_norm": 16.62076371220508, "learning_rate": 1.937392757555199e-06, "loss": 0.9696, "step": 8265 }, { "epoch": 1.1702413817512565, "grad_norm": 8.682007734434544, "learning_rate": 1.936834275922276e-06, "loss": 1.0164, "step": 8266 }, { "epoch": 1.1703829546258937, "grad_norm": 7.334470429549334, "learning_rate": 1.936275823894554e-06, "loss": 0.9907, "step": 8267 }, { "epoch": 1.170524527500531, "grad_norm": 8.534920709707624, "learning_rate": 1.9357174015013917e-06, "loss": 0.8614, "step": 8268 }, { "epoch": 1.1706661003751682, "grad_norm": 8.875687693426915, "learning_rate": 1.935159008772145e-06, "loss": 1.0155, "step": 8269 }, { "epoch": 1.1708076732498054, "grad_norm": 8.967858884982359, "learning_rate": 1.9346006457361684e-06, "loss": 1.0998, "step": 8270 }, { "epoch": 1.1709492461244426, "grad_norm": 8.562670183926912, "learning_rate": 1.9340423124228136e-06, "loss": 1.0179, "step": 8271 }, { "epoch": 1.1710908189990799, "grad_norm": 9.451200733901487, "learning_rate": 1.9334840088614327e-06, "loss": 1.0354, "step": 8272 }, { "epoch": 1.171232391873717, "grad_norm": 9.87119499972806, "learning_rate": 1.9329257350813753e-06, "loss": 1.0118, "step": 8273 }, { "epoch": 1.171373964748354, "grad_norm": 9.25532460967043, "learning_rate": 1.932367491111989e-06, "loss": 1.0498, "step": 8274 }, { "epoch": 1.1715155376229913, "grad_norm": 8.689474855628593, "learning_rate": 1.9318092769826197e-06, "loss": 1.011, "step": 8275 }, { "epoch": 1.1716571104976286, "grad_norm": 8.906216591220234, "learning_rate": 1.931251092722615e-06, "loss": 1.0057, "step": 8276 }, { "epoch": 1.1717986833722658, "grad_norm": 11.223931521260512, "learning_rate": 1.930692938361315e-06, "loss": 0.9597, "step": 8277 }, { "epoch": 1.171940256246903, "grad_norm": 9.329796620530844, "learning_rate": 1.930134813928063e-06, "loss": 1.0189, "step": 8278 }, { "epoch": 1.1720818291215402, "grad_norm": 10.294401065547225, "learning_rate": 1.9295767194521988e-06, "loss": 1.0459, "step": 8279 }, { "epoch": 1.1722234019961775, "grad_norm": 10.893253485372588, "learning_rate": 1.9290186549630606e-06, "loss": 1.003, "step": 8280 }, { "epoch": 1.1723649748708147, "grad_norm": 9.257567738061908, "learning_rate": 1.9284606204899862e-06, "loss": 1.104, "step": 8281 }, { "epoch": 1.172506547745452, "grad_norm": 8.310958375167663, "learning_rate": 1.927902616062311e-06, "loss": 1.0259, "step": 8282 }, { "epoch": 1.1726481206200892, "grad_norm": 12.904545040986287, "learning_rate": 1.9273446417093687e-06, "loss": 0.8959, "step": 8283 }, { "epoch": 1.1727896934947264, "grad_norm": 8.514201697998422, "learning_rate": 1.9267866974604914e-06, "loss": 1.003, "step": 8284 }, { "epoch": 1.1729312663693636, "grad_norm": 10.113325759123764, "learning_rate": 1.9262287833450107e-06, "loss": 1.0643, "step": 8285 }, { "epoch": 1.1730728392440009, "grad_norm": 8.731235547282465, "learning_rate": 1.9256708993922542e-06, "loss": 0.939, "step": 8286 }, { "epoch": 1.173214412118638, "grad_norm": 10.466396511341292, "learning_rate": 1.9251130456315514e-06, "loss": 0.9995, "step": 8287 }, { "epoch": 1.1733559849932753, "grad_norm": 11.27555386352259, "learning_rate": 1.9245552220922264e-06, "loss": 1.0218, "step": 8288 }, { "epoch": 1.1734975578679125, "grad_norm": 9.381414634891575, "learning_rate": 1.9239974288036044e-06, "loss": 1.132, "step": 8289 }, { "epoch": 1.1736391307425498, "grad_norm": 8.518131888026646, "learning_rate": 1.9234396657950076e-06, "loss": 0.9822, "step": 8290 }, { "epoch": 1.173780703617187, "grad_norm": 9.072841032277191, "learning_rate": 1.922881933095758e-06, "loss": 1.1083, "step": 8291 }, { "epoch": 1.1739222764918242, "grad_norm": 9.200439475799596, "learning_rate": 1.9223242307351753e-06, "loss": 1.009, "step": 8292 }, { "epoch": 1.1740638493664615, "grad_norm": 9.066936380451141, "learning_rate": 1.9217665587425764e-06, "loss": 1.0801, "step": 8293 }, { "epoch": 1.1742054222410987, "grad_norm": 8.946575118997396, "learning_rate": 1.9212089171472787e-06, "loss": 1.1177, "step": 8294 }, { "epoch": 1.174346995115736, "grad_norm": 11.718400385409838, "learning_rate": 1.9206513059785966e-06, "loss": 1.0042, "step": 8295 }, { "epoch": 1.1744885679903732, "grad_norm": 11.230239467768172, "learning_rate": 1.9200937252658435e-06, "loss": 1.0636, "step": 8296 }, { "epoch": 1.1746301408650104, "grad_norm": 10.215910257421768, "learning_rate": 1.9195361750383312e-06, "loss": 1.0209, "step": 8297 }, { "epoch": 1.1747717137396474, "grad_norm": 8.457402019874735, "learning_rate": 1.918978655325369e-06, "loss": 0.9838, "step": 8298 }, { "epoch": 1.1749132866142846, "grad_norm": 10.46016329577681, "learning_rate": 1.9184211661562653e-06, "loss": 0.9851, "step": 8299 }, { "epoch": 1.1750548594889219, "grad_norm": 9.749591134130439, "learning_rate": 1.9178637075603276e-06, "loss": 1.1274, "step": 8300 }, { "epoch": 1.175196432363559, "grad_norm": 10.010009715011714, "learning_rate": 1.9173062795668606e-06, "loss": 1.0226, "step": 8301 }, { "epoch": 1.1753380052381963, "grad_norm": 12.891804515164917, "learning_rate": 1.916748882205168e-06, "loss": 1.0584, "step": 8302 }, { "epoch": 1.1754795781128335, "grad_norm": 9.923102163283856, "learning_rate": 1.916191515504552e-06, "loss": 0.9875, "step": 8303 }, { "epoch": 1.1756211509874708, "grad_norm": 9.292627213669263, "learning_rate": 1.915634179494312e-06, "loss": 0.9438, "step": 8304 }, { "epoch": 1.175762723862108, "grad_norm": 10.919043090589952, "learning_rate": 1.9150768742037477e-06, "loss": 1.0707, "step": 8305 }, { "epoch": 1.1759042967367452, "grad_norm": 6.787656227969401, "learning_rate": 1.9145195996621567e-06, "loss": 1.0433, "step": 8306 }, { "epoch": 1.1760458696113825, "grad_norm": 8.071584861582908, "learning_rate": 1.9139623558988334e-06, "loss": 0.9602, "step": 8307 }, { "epoch": 1.1761874424860197, "grad_norm": 8.913886186358864, "learning_rate": 1.913405142943073e-06, "loss": 0.9386, "step": 8308 }, { "epoch": 1.176329015360657, "grad_norm": 8.853715330102068, "learning_rate": 1.9128479608241656e-06, "loss": 1.0128, "step": 8309 }, { "epoch": 1.1764705882352942, "grad_norm": 9.302941658999167, "learning_rate": 1.9122908095714032e-06, "loss": 1.0369, "step": 8310 }, { "epoch": 1.1766121611099314, "grad_norm": 9.123405003608207, "learning_rate": 1.911733689214075e-06, "loss": 1.0642, "step": 8311 }, { "epoch": 1.1767537339845686, "grad_norm": 8.592442638482114, "learning_rate": 1.911176599781468e-06, "loss": 1.0919, "step": 8312 }, { "epoch": 1.1768953068592058, "grad_norm": 9.676716370839836, "learning_rate": 1.910619541302868e-06, "loss": 1.0415, "step": 8313 }, { "epoch": 1.177036879733843, "grad_norm": 9.64569403683055, "learning_rate": 1.9100625138075595e-06, "loss": 1.1778, "step": 8314 }, { "epoch": 1.1771784526084803, "grad_norm": 9.321638500603717, "learning_rate": 1.909505517324825e-06, "loss": 1.0323, "step": 8315 }, { "epoch": 1.1773200254831173, "grad_norm": 7.659545189325814, "learning_rate": 1.9089485518839446e-06, "loss": 1.0098, "step": 8316 }, { "epoch": 1.1774615983577545, "grad_norm": 9.160284930466094, "learning_rate": 1.9083916175141983e-06, "loss": 0.9559, "step": 8317 }, { "epoch": 1.1776031712323918, "grad_norm": 8.330857277819518, "learning_rate": 1.9078347142448638e-06, "loss": 1.0233, "step": 8318 }, { "epoch": 1.177744744107029, "grad_norm": 10.933505827620763, "learning_rate": 1.9072778421052172e-06, "loss": 1.0061, "step": 8319 }, { "epoch": 1.1778863169816662, "grad_norm": 8.950870782697887, "learning_rate": 1.9067210011245318e-06, "loss": 1.1157, "step": 8320 }, { "epoch": 1.1780278898563035, "grad_norm": 10.323134239411127, "learning_rate": 1.906164191332081e-06, "loss": 1.0099, "step": 8321 }, { "epoch": 1.1781694627309407, "grad_norm": 8.775123930053237, "learning_rate": 1.905607412757136e-06, "loss": 1.0073, "step": 8322 }, { "epoch": 1.178311035605578, "grad_norm": 10.135748162227861, "learning_rate": 1.9050506654289663e-06, "loss": 1.0394, "step": 8323 }, { "epoch": 1.1784526084802152, "grad_norm": 9.710812188074218, "learning_rate": 1.9044939493768394e-06, "loss": 1.175, "step": 8324 }, { "epoch": 1.1785941813548524, "grad_norm": 8.534961829164379, "learning_rate": 1.9039372646300216e-06, "loss": 1.0643, "step": 8325 }, { "epoch": 1.1787357542294896, "grad_norm": 9.561868484687343, "learning_rate": 1.9033806112177772e-06, "loss": 1.0531, "step": 8326 }, { "epoch": 1.1788773271041268, "grad_norm": 10.71471308991151, "learning_rate": 1.902823989169369e-06, "loss": 1.0761, "step": 8327 }, { "epoch": 1.179018899978764, "grad_norm": 9.73037622898607, "learning_rate": 1.9022673985140585e-06, "loss": 1.034, "step": 8328 }, { "epoch": 1.1791604728534013, "grad_norm": 10.13175757688071, "learning_rate": 1.9017108392811065e-06, "loss": 1.068, "step": 8329 }, { "epoch": 1.1793020457280385, "grad_norm": 8.062472084647652, "learning_rate": 1.9011543114997684e-06, "loss": 1.0828, "step": 8330 }, { "epoch": 1.1794436186026758, "grad_norm": 9.247995932739842, "learning_rate": 1.9005978151993014e-06, "loss": 0.9995, "step": 8331 }, { "epoch": 1.179585191477313, "grad_norm": 11.0463366343576, "learning_rate": 1.9000413504089607e-06, "loss": 1.1252, "step": 8332 }, { "epoch": 1.1797267643519502, "grad_norm": 8.668194073795105, "learning_rate": 1.8994849171579981e-06, "loss": 1.071, "step": 8333 }, { "epoch": 1.1798683372265875, "grad_norm": 10.698599779010584, "learning_rate": 1.8989285154756665e-06, "loss": 1.1295, "step": 8334 }, { "epoch": 1.1800099101012247, "grad_norm": 10.118423398209313, "learning_rate": 1.8983721453912146e-06, "loss": 1.1028, "step": 8335 }, { "epoch": 1.180151482975862, "grad_norm": 8.915617503775765, "learning_rate": 1.89781580693389e-06, "loss": 0.8722, "step": 8336 }, { "epoch": 1.1802930558504992, "grad_norm": 6.772450728295297, "learning_rate": 1.8972595001329398e-06, "loss": 0.9287, "step": 8337 }, { "epoch": 1.1804346287251364, "grad_norm": 8.475731256045524, "learning_rate": 1.8967032250176083e-06, "loss": 0.9448, "step": 8338 }, { "epoch": 1.1805762015997734, "grad_norm": 8.95460846812315, "learning_rate": 1.8961469816171383e-06, "loss": 1.0601, "step": 8339 }, { "epoch": 1.1807177744744106, "grad_norm": 8.223276494570296, "learning_rate": 1.8955907699607717e-06, "loss": 0.999, "step": 8340 }, { "epoch": 1.1808593473490478, "grad_norm": 9.896891219496498, "learning_rate": 1.895034590077747e-06, "loss": 1.1173, "step": 8341 }, { "epoch": 1.181000920223685, "grad_norm": 7.756800467409932, "learning_rate": 1.894478441997303e-06, "loss": 0.9485, "step": 8342 }, { "epoch": 1.1811424930983223, "grad_norm": 8.839752695758404, "learning_rate": 1.8939223257486759e-06, "loss": 0.9065, "step": 8343 }, { "epoch": 1.1812840659729595, "grad_norm": 8.829082929914113, "learning_rate": 1.8933662413611e-06, "loss": 0.9803, "step": 8344 }, { "epoch": 1.1814256388475968, "grad_norm": 9.662650446935766, "learning_rate": 1.8928101888638087e-06, "loss": 0.9456, "step": 8345 }, { "epoch": 1.181567211722234, "grad_norm": 9.981907690423546, "learning_rate": 1.892254168286033e-06, "loss": 0.9484, "step": 8346 }, { "epoch": 1.1817087845968712, "grad_norm": 8.86293477658523, "learning_rate": 1.8916981796570023e-06, "loss": 0.9827, "step": 8347 }, { "epoch": 1.1818503574715085, "grad_norm": 8.293653189490085, "learning_rate": 1.8911422230059448e-06, "loss": 0.9345, "step": 8348 }, { "epoch": 1.1819919303461457, "grad_norm": 9.77966685782791, "learning_rate": 1.8905862983620863e-06, "loss": 1.0124, "step": 8349 }, { "epoch": 1.182133503220783, "grad_norm": 10.524728991718556, "learning_rate": 1.8900304057546532e-06, "loss": 1.0671, "step": 8350 }, { "epoch": 1.1822750760954202, "grad_norm": 9.560883828012654, "learning_rate": 1.8894745452128657e-06, "loss": 0.9069, "step": 8351 }, { "epoch": 1.1824166489700574, "grad_norm": 9.5882930581794, "learning_rate": 1.8889187167659462e-06, "loss": 1.1039, "step": 8352 }, { "epoch": 1.1825582218446946, "grad_norm": 9.810993443387444, "learning_rate": 1.888362920443114e-06, "loss": 1.0151, "step": 8353 }, { "epoch": 1.1826997947193318, "grad_norm": 10.264292730686828, "learning_rate": 1.8878071562735873e-06, "loss": 0.9497, "step": 8354 }, { "epoch": 1.182841367593969, "grad_norm": 8.313834864771575, "learning_rate": 1.887251424286581e-06, "loss": 0.9836, "step": 8355 }, { "epoch": 1.1829829404686063, "grad_norm": 8.42169608242812, "learning_rate": 1.8866957245113113e-06, "loss": 1.072, "step": 8356 }, { "epoch": 1.1831245133432433, "grad_norm": 8.048861064070552, "learning_rate": 1.88614005697699e-06, "loss": 0.9713, "step": 8357 }, { "epoch": 1.1832660862178805, "grad_norm": 8.750900658166653, "learning_rate": 1.8855844217128281e-06, "loss": 0.999, "step": 8358 }, { "epoch": 1.1834076590925178, "grad_norm": 12.41614205363702, "learning_rate": 1.885028818748035e-06, "loss": 1.0176, "step": 8359 }, { "epoch": 1.183549231967155, "grad_norm": 8.357002163961491, "learning_rate": 1.8844732481118184e-06, "loss": 1.097, "step": 8360 }, { "epoch": 1.1836908048417922, "grad_norm": 9.426303714669467, "learning_rate": 1.8839177098333856e-06, "loss": 0.9959, "step": 8361 }, { "epoch": 1.1838323777164295, "grad_norm": 9.17443042619102, "learning_rate": 1.8833622039419371e-06, "loss": 1.0802, "step": 8362 }, { "epoch": 1.1839739505910667, "grad_norm": 9.988328225715698, "learning_rate": 1.8828067304666788e-06, "loss": 1.1025, "step": 8363 }, { "epoch": 1.184115523465704, "grad_norm": 9.561505035298525, "learning_rate": 1.8822512894368106e-06, "loss": 0.936, "step": 8364 }, { "epoch": 1.1842570963403412, "grad_norm": 9.499761076482603, "learning_rate": 1.8816958808815311e-06, "loss": 0.9903, "step": 8365 }, { "epoch": 1.1843986692149784, "grad_norm": 7.860068076423742, "learning_rate": 1.8811405048300383e-06, "loss": 1.0492, "step": 8366 }, { "epoch": 1.1845402420896156, "grad_norm": 8.479740007841393, "learning_rate": 1.8805851613115278e-06, "loss": 1.0333, "step": 8367 }, { "epoch": 1.1846818149642528, "grad_norm": 9.510103173884586, "learning_rate": 1.8800298503551934e-06, "loss": 1.0087, "step": 8368 }, { "epoch": 1.18482338783889, "grad_norm": 11.688785257915557, "learning_rate": 1.8794745719902274e-06, "loss": 1.0439, "step": 8369 }, { "epoch": 1.1849649607135273, "grad_norm": 8.970360588047162, "learning_rate": 1.8789193262458205e-06, "loss": 0.9948, "step": 8370 }, { "epoch": 1.1851065335881645, "grad_norm": 8.033033358511892, "learning_rate": 1.8783641131511624e-06, "loss": 1.0166, "step": 8371 }, { "epoch": 1.1852481064628018, "grad_norm": 10.647636086749014, "learning_rate": 1.8778089327354385e-06, "loss": 1.0928, "step": 8372 }, { "epoch": 1.185389679337439, "grad_norm": 9.894013461703551, "learning_rate": 1.8772537850278352e-06, "loss": 1.0793, "step": 8373 }, { "epoch": 1.1855312522120762, "grad_norm": 9.6374740026299, "learning_rate": 1.876698670057536e-06, "loss": 0.9504, "step": 8374 }, { "epoch": 1.1856728250867135, "grad_norm": 9.380184113539896, "learning_rate": 1.876143587853723e-06, "loss": 1.0639, "step": 8375 }, { "epoch": 1.1858143979613507, "grad_norm": 10.188881622426493, "learning_rate": 1.8755885384455764e-06, "loss": 1.044, "step": 8376 }, { "epoch": 1.185955970835988, "grad_norm": 7.297895143741977, "learning_rate": 1.8750335218622749e-06, "loss": 1.0381, "step": 8377 }, { "epoch": 1.1860975437106251, "grad_norm": 9.19722228203049, "learning_rate": 1.8744785381329944e-06, "loss": 1.1543, "step": 8378 }, { "epoch": 1.1862391165852624, "grad_norm": 7.208429530683373, "learning_rate": 1.8739235872869113e-06, "loss": 0.8362, "step": 8379 }, { "epoch": 1.1863806894598996, "grad_norm": 9.043787085003007, "learning_rate": 1.8733686693531986e-06, "loss": 0.9393, "step": 8380 }, { "epoch": 1.1865222623345366, "grad_norm": 6.612694594290839, "learning_rate": 1.8728137843610276e-06, "loss": 0.9442, "step": 8381 }, { "epoch": 1.1866638352091738, "grad_norm": 9.276539027598947, "learning_rate": 1.8722589323395693e-06, "loss": 1.0456, "step": 8382 }, { "epoch": 1.186805408083811, "grad_norm": 9.520242907847155, "learning_rate": 1.8717041133179897e-06, "loss": 0.9579, "step": 8383 }, { "epoch": 1.1869469809584483, "grad_norm": 10.348520337176002, "learning_rate": 1.871149327325456e-06, "loss": 0.9645, "step": 8384 }, { "epoch": 1.1870885538330855, "grad_norm": 9.312432000852828, "learning_rate": 1.8705945743911341e-06, "loss": 1.0256, "step": 8385 }, { "epoch": 1.1872301267077228, "grad_norm": 9.060814852080988, "learning_rate": 1.8700398545441857e-06, "loss": 1.1009, "step": 8386 }, { "epoch": 1.18737169958236, "grad_norm": 8.665480556996906, "learning_rate": 1.8694851678137726e-06, "loss": 1.0255, "step": 8387 }, { "epoch": 1.1875132724569972, "grad_norm": 8.780947642703847, "learning_rate": 1.868930514229054e-06, "loss": 0.9973, "step": 8388 }, { "epoch": 1.1876548453316345, "grad_norm": 7.6794806110071, "learning_rate": 1.8683758938191877e-06, "loss": 1.0364, "step": 8389 }, { "epoch": 1.1877964182062717, "grad_norm": 11.074094396898817, "learning_rate": 1.86782130661333e-06, "loss": 1.0479, "step": 8390 }, { "epoch": 1.187937991080909, "grad_norm": 11.867593804762773, "learning_rate": 1.8672667526406345e-06, "loss": 0.9321, "step": 8391 }, { "epoch": 1.1880795639555461, "grad_norm": 7.542829420402081, "learning_rate": 1.8667122319302542e-06, "loss": 0.857, "step": 8392 }, { "epoch": 1.1882211368301834, "grad_norm": 9.209379341013143, "learning_rate": 1.8661577445113399e-06, "loss": 1.1485, "step": 8393 }, { "epoch": 1.1883627097048206, "grad_norm": 9.677958065338634, "learning_rate": 1.8656032904130402e-06, "loss": 0.9217, "step": 8394 }, { "epoch": 1.1885042825794578, "grad_norm": 9.22545770269523, "learning_rate": 1.8650488696645025e-06, "loss": 1.0739, "step": 8395 }, { "epoch": 1.188645855454095, "grad_norm": 8.190870683195293, "learning_rate": 1.864494482294872e-06, "loss": 1.1016, "step": 8396 }, { "epoch": 1.1887874283287323, "grad_norm": 8.698054865730937, "learning_rate": 1.863940128333293e-06, "loss": 0.9625, "step": 8397 }, { "epoch": 1.1889290012033695, "grad_norm": 8.979196877366038, "learning_rate": 1.863385807808907e-06, "loss": 1.0781, "step": 8398 }, { "epoch": 1.1890705740780065, "grad_norm": 8.909527734298951, "learning_rate": 1.8628315207508547e-06, "loss": 1.1176, "step": 8399 }, { "epoch": 1.1892121469526438, "grad_norm": 8.61957102759221, "learning_rate": 1.8622772671882738e-06, "loss": 1.043, "step": 8400 }, { "epoch": 1.189353719827281, "grad_norm": 9.325931974990405, "learning_rate": 1.861723047150301e-06, "loss": 1.0411, "step": 8401 }, { "epoch": 1.1894952927019182, "grad_norm": 9.243022039403478, "learning_rate": 1.8611688606660728e-06, "loss": 1.0553, "step": 8402 }, { "epoch": 1.1896368655765555, "grad_norm": 8.484165027471997, "learning_rate": 1.8606147077647216e-06, "loss": 1.0197, "step": 8403 }, { "epoch": 1.1897784384511927, "grad_norm": 10.940996581160459, "learning_rate": 1.8600605884753775e-06, "loss": 1.1583, "step": 8404 }, { "epoch": 1.18992001132583, "grad_norm": 10.398142179197619, "learning_rate": 1.8595065028271713e-06, "loss": 1.0532, "step": 8405 }, { "epoch": 1.1900615842004671, "grad_norm": 10.541481187317018, "learning_rate": 1.8589524508492308e-06, "loss": 1.1411, "step": 8406 }, { "epoch": 1.1902031570751044, "grad_norm": 9.975609216275457, "learning_rate": 1.8583984325706813e-06, "loss": 1.0064, "step": 8407 }, { "epoch": 1.1903447299497416, "grad_norm": 9.032848599461898, "learning_rate": 1.8578444480206487e-06, "loss": 1.1194, "step": 8408 }, { "epoch": 1.1904863028243788, "grad_norm": 8.593717595819589, "learning_rate": 1.8572904972282541e-06, "loss": 0.9969, "step": 8409 }, { "epoch": 1.190627875699016, "grad_norm": 9.972404934039584, "learning_rate": 1.856736580222619e-06, "loss": 0.9462, "step": 8410 }, { "epoch": 1.1907694485736533, "grad_norm": 9.86751340205556, "learning_rate": 1.8561826970328623e-06, "loss": 1.0164, "step": 8411 }, { "epoch": 1.1909110214482905, "grad_norm": 10.392440660209543, "learning_rate": 1.8556288476881012e-06, "loss": 1.1156, "step": 8412 }, { "epoch": 1.1910525943229278, "grad_norm": 9.3944788687469, "learning_rate": 1.855075032217451e-06, "loss": 0.9336, "step": 8413 }, { "epoch": 1.191194167197565, "grad_norm": 9.643858826295801, "learning_rate": 1.854521250650026e-06, "loss": 1.026, "step": 8414 }, { "epoch": 1.1913357400722022, "grad_norm": 10.207709551586525, "learning_rate": 1.8539675030149373e-06, "loss": 1.0191, "step": 8415 }, { "epoch": 1.1914773129468395, "grad_norm": 11.684901637135473, "learning_rate": 1.853413789341295e-06, "loss": 1.1757, "step": 8416 }, { "epoch": 1.1916188858214767, "grad_norm": 10.041383942440016, "learning_rate": 1.8528601096582078e-06, "loss": 1.0263, "step": 8417 }, { "epoch": 1.191760458696114, "grad_norm": 8.218628392933507, "learning_rate": 1.8523064639947818e-06, "loss": 1.0122, "step": 8418 }, { "epoch": 1.1919020315707511, "grad_norm": 7.842926103704157, "learning_rate": 1.8517528523801226e-06, "loss": 0.9544, "step": 8419 }, { "epoch": 1.1920436044453884, "grad_norm": 9.093552420131298, "learning_rate": 1.8511992748433321e-06, "loss": 1.0281, "step": 8420 }, { "epoch": 1.1921851773200256, "grad_norm": 10.367519761419217, "learning_rate": 1.8506457314135123e-06, "loss": 0.9604, "step": 8421 }, { "epoch": 1.1923267501946626, "grad_norm": 8.051776230364775, "learning_rate": 1.850092222119762e-06, "loss": 0.9799, "step": 8422 }, { "epoch": 1.1924683230692998, "grad_norm": 8.95352166646921, "learning_rate": 1.849538746991179e-06, "loss": 0.9513, "step": 8423 }, { "epoch": 1.192609895943937, "grad_norm": 8.71759222403585, "learning_rate": 1.8489853060568597e-06, "loss": 0.9722, "step": 8424 }, { "epoch": 1.1927514688185743, "grad_norm": 9.17758993654963, "learning_rate": 1.848431899345897e-06, "loss": 0.9069, "step": 8425 }, { "epoch": 1.1928930416932115, "grad_norm": 9.466395321391316, "learning_rate": 1.8478785268873834e-06, "loss": 1.0003, "step": 8426 }, { "epoch": 1.1930346145678488, "grad_norm": 8.887607270142018, "learning_rate": 1.8473251887104093e-06, "loss": 0.9416, "step": 8427 }, { "epoch": 1.193176187442486, "grad_norm": 7.427507628260714, "learning_rate": 1.8467718848440636e-06, "loss": 0.9518, "step": 8428 }, { "epoch": 1.1933177603171232, "grad_norm": 9.840454324918174, "learning_rate": 1.8462186153174327e-06, "loss": 1.0905, "step": 8429 }, { "epoch": 1.1934593331917605, "grad_norm": 8.325285064123978, "learning_rate": 1.8456653801596013e-06, "loss": 0.8983, "step": 8430 }, { "epoch": 1.1936009060663977, "grad_norm": 8.662418742713902, "learning_rate": 1.8451121793996534e-06, "loss": 1.0154, "step": 8431 }, { "epoch": 1.193742478941035, "grad_norm": 9.345975987069457, "learning_rate": 1.84455901306667e-06, "loss": 1.0191, "step": 8432 }, { "epoch": 1.1938840518156721, "grad_norm": 8.995095506111234, "learning_rate": 1.8440058811897304e-06, "loss": 0.9838, "step": 8433 }, { "epoch": 1.1940256246903094, "grad_norm": 10.97945774761398, "learning_rate": 1.8434527837979128e-06, "loss": 0.916, "step": 8434 }, { "epoch": 1.1941671975649466, "grad_norm": 8.824577118248667, "learning_rate": 1.8428997209202935e-06, "loss": 1.1356, "step": 8435 }, { "epoch": 1.1943087704395838, "grad_norm": 7.489718447029333, "learning_rate": 1.8423466925859445e-06, "loss": 0.9395, "step": 8436 }, { "epoch": 1.194450343314221, "grad_norm": 8.700074680051692, "learning_rate": 1.84179369882394e-06, "loss": 0.9982, "step": 8437 }, { "epoch": 1.1945919161888583, "grad_norm": 9.770196976642914, "learning_rate": 1.84124073966335e-06, "loss": 1.0072, "step": 8438 }, { "epoch": 1.1947334890634955, "grad_norm": 9.896742436997025, "learning_rate": 1.8406878151332431e-06, "loss": 1.0414, "step": 8439 }, { "epoch": 1.1948750619381325, "grad_norm": 9.428187341834823, "learning_rate": 1.840134925262686e-06, "loss": 1.0463, "step": 8440 }, { "epoch": 1.1950166348127698, "grad_norm": 12.400834676891462, "learning_rate": 1.8395820700807444e-06, "loss": 1.2321, "step": 8441 }, { "epoch": 1.195158207687407, "grad_norm": 10.067449833129446, "learning_rate": 1.8390292496164805e-06, "loss": 1.0776, "step": 8442 }, { "epoch": 1.1952997805620442, "grad_norm": 9.489550012948042, "learning_rate": 1.838476463898956e-06, "loss": 1.0913, "step": 8443 }, { "epoch": 1.1954413534366815, "grad_norm": 9.41557220130193, "learning_rate": 1.8379237129572307e-06, "loss": 0.969, "step": 8444 }, { "epoch": 1.1955829263113187, "grad_norm": 8.100176171164813, "learning_rate": 1.8373709968203624e-06, "loss": 1.047, "step": 8445 }, { "epoch": 1.195724499185956, "grad_norm": 11.822466808392692, "learning_rate": 1.8368183155174069e-06, "loss": 0.9243, "step": 8446 }, { "epoch": 1.1958660720605931, "grad_norm": 8.991372105461279, "learning_rate": 1.8362656690774177e-06, "loss": 1.0711, "step": 8447 }, { "epoch": 1.1960076449352304, "grad_norm": 9.87166606782208, "learning_rate": 1.8357130575294474e-06, "loss": 1.0547, "step": 8448 }, { "epoch": 1.1961492178098676, "grad_norm": 9.613479655612071, "learning_rate": 1.8351604809025465e-06, "loss": 0.9087, "step": 8449 }, { "epoch": 1.1962907906845048, "grad_norm": 9.315114313592344, "learning_rate": 1.8346079392257632e-06, "loss": 0.9894, "step": 8450 }, { "epoch": 1.196432363559142, "grad_norm": 11.338184515194042, "learning_rate": 1.834055432528144e-06, "loss": 0.9865, "step": 8451 }, { "epoch": 1.1965739364337793, "grad_norm": 8.795858049982913, "learning_rate": 1.8335029608387342e-06, "loss": 1.063, "step": 8452 }, { "epoch": 1.1967155093084165, "grad_norm": 9.493275320254456, "learning_rate": 1.8329505241865772e-06, "loss": 0.9676, "step": 8453 }, { "epoch": 1.1968570821830538, "grad_norm": 8.69973880770369, "learning_rate": 1.8323981226007136e-06, "loss": 0.9026, "step": 8454 }, { "epoch": 1.196998655057691, "grad_norm": 10.845848562020098, "learning_rate": 1.8318457561101833e-06, "loss": 1.0866, "step": 8455 }, { "epoch": 1.1971402279323282, "grad_norm": 9.405958811159369, "learning_rate": 1.831293424744024e-06, "loss": 0.9801, "step": 8456 }, { "epoch": 1.1972818008069654, "grad_norm": 11.560154032489482, "learning_rate": 1.8307411285312699e-06, "loss": 1.0029, "step": 8457 }, { "epoch": 1.1974233736816027, "grad_norm": 9.449168286734825, "learning_rate": 1.8301888675009554e-06, "loss": 0.9909, "step": 8458 }, { "epoch": 1.19756494655624, "grad_norm": 10.470588545411603, "learning_rate": 1.8296366416821127e-06, "loss": 1.0513, "step": 8459 }, { "epoch": 1.1977065194308771, "grad_norm": 10.318803929650675, "learning_rate": 1.829084451103772e-06, "loss": 1.0241, "step": 8460 }, { "epoch": 1.1978480923055144, "grad_norm": 7.5630273556283285, "learning_rate": 1.8285322957949615e-06, "loss": 0.8926, "step": 8461 }, { "epoch": 1.1979896651801516, "grad_norm": 9.24844347255467, "learning_rate": 1.8279801757847077e-06, "loss": 0.9094, "step": 8462 }, { "epoch": 1.1981312380547886, "grad_norm": 11.606314131108158, "learning_rate": 1.8274280911020349e-06, "loss": 1.0174, "step": 8463 }, { "epoch": 1.1982728109294258, "grad_norm": 10.973098110429582, "learning_rate": 1.8268760417759659e-06, "loss": 1.1189, "step": 8464 }, { "epoch": 1.198414383804063, "grad_norm": 9.963640105177817, "learning_rate": 1.8263240278355216e-06, "loss": 1.0906, "step": 8465 }, { "epoch": 1.1985559566787003, "grad_norm": 10.2152918759256, "learning_rate": 1.8257720493097207e-06, "loss": 0.9286, "step": 8466 }, { "epoch": 1.1986975295533375, "grad_norm": 9.900023227240547, "learning_rate": 1.825220106227581e-06, "loss": 1.0565, "step": 8467 }, { "epoch": 1.1988391024279748, "grad_norm": 9.160413192691683, "learning_rate": 1.8246681986181165e-06, "loss": 0.9151, "step": 8468 }, { "epoch": 1.198980675302612, "grad_norm": 9.330252502230103, "learning_rate": 1.8241163265103411e-06, "loss": 0.9403, "step": 8469 }, { "epoch": 1.1991222481772492, "grad_norm": 10.2582554020673, "learning_rate": 1.8235644899332669e-06, "loss": 1.0111, "step": 8470 }, { "epoch": 1.1992638210518864, "grad_norm": 9.780355296789851, "learning_rate": 1.8230126889159027e-06, "loss": 1.0563, "step": 8471 }, { "epoch": 1.1994053939265237, "grad_norm": 9.748936130484676, "learning_rate": 1.822460923487257e-06, "loss": 0.9082, "step": 8472 }, { "epoch": 1.199546966801161, "grad_norm": 9.80572236819447, "learning_rate": 1.8219091936763353e-06, "loss": 0.972, "step": 8473 }, { "epoch": 1.1996885396757981, "grad_norm": 9.567326941837607, "learning_rate": 1.8213574995121417e-06, "loss": 0.9879, "step": 8474 }, { "epoch": 1.1998301125504354, "grad_norm": 8.190758908314944, "learning_rate": 1.8208058410236777e-06, "loss": 0.8931, "step": 8475 }, { "epoch": 1.1999716854250726, "grad_norm": 10.712040082746844, "learning_rate": 1.8202542182399446e-06, "loss": 1.0079, "step": 8476 }, { "epoch": 1.2001132582997098, "grad_norm": 7.9683557749452625, "learning_rate": 1.8197026311899419e-06, "loss": 1.0501, "step": 8477 }, { "epoch": 1.200254831174347, "grad_norm": 9.198911469788364, "learning_rate": 1.8191510799026629e-06, "loss": 1.0688, "step": 8478 }, { "epoch": 1.2003964040489843, "grad_norm": 8.849491910843138, "learning_rate": 1.8185995644071047e-06, "loss": 1.096, "step": 8479 }, { "epoch": 1.2005379769236215, "grad_norm": 9.582531262433106, "learning_rate": 1.818048084732259e-06, "loss": 0.955, "step": 8480 }, { "epoch": 1.2006795497982587, "grad_norm": 8.406893000065528, "learning_rate": 1.8174966409071162e-06, "loss": 0.92, "step": 8481 }, { "epoch": 1.2008211226728958, "grad_norm": 9.2377192449297, "learning_rate": 1.8169452329606667e-06, "loss": 1.1017, "step": 8482 }, { "epoch": 1.200962695547533, "grad_norm": 8.975613934868884, "learning_rate": 1.8163938609218974e-06, "loss": 1.0914, "step": 8483 }, { "epoch": 1.2011042684221702, "grad_norm": 9.429306414472265, "learning_rate": 1.8158425248197931e-06, "loss": 1.0969, "step": 8484 }, { "epoch": 1.2012458412968074, "grad_norm": 8.458118254333932, "learning_rate": 1.8152912246833368e-06, "loss": 1.0557, "step": 8485 }, { "epoch": 1.2013874141714447, "grad_norm": 8.710235047349649, "learning_rate": 1.8147399605415104e-06, "loss": 1.0271, "step": 8486 }, { "epoch": 1.201528987046082, "grad_norm": 8.863907450196248, "learning_rate": 1.8141887324232932e-06, "loss": 0.9685, "step": 8487 }, { "epoch": 1.2016705599207191, "grad_norm": 8.269614744057714, "learning_rate": 1.8136375403576636e-06, "loss": 1.0167, "step": 8488 }, { "epoch": 1.2018121327953564, "grad_norm": 8.845035742584518, "learning_rate": 1.8130863843735964e-06, "loss": 1.0011, "step": 8489 }, { "epoch": 1.2019537056699936, "grad_norm": 9.690662679934203, "learning_rate": 1.8125352645000654e-06, "loss": 1.0633, "step": 8490 }, { "epoch": 1.2020952785446308, "grad_norm": 8.444928099629395, "learning_rate": 1.8119841807660432e-06, "loss": 1.0055, "step": 8491 }, { "epoch": 1.202236851419268, "grad_norm": 11.033103030779372, "learning_rate": 1.8114331332004998e-06, "loss": 1.0047, "step": 8492 }, { "epoch": 1.2023784242939053, "grad_norm": 9.103293323892201, "learning_rate": 1.810882121832403e-06, "loss": 1.0614, "step": 8493 }, { "epoch": 1.2025199971685425, "grad_norm": 8.737042669680422, "learning_rate": 1.8103311466907191e-06, "loss": 0.9373, "step": 8494 }, { "epoch": 1.2026615700431798, "grad_norm": 8.128472627790671, "learning_rate": 1.8097802078044125e-06, "loss": 1.0737, "step": 8495 }, { "epoch": 1.202803142917817, "grad_norm": 10.080761373920607, "learning_rate": 1.809229305202446e-06, "loss": 1.0834, "step": 8496 }, { "epoch": 1.2029447157924542, "grad_norm": 9.147597250722496, "learning_rate": 1.8086784389137796e-06, "loss": 1.0507, "step": 8497 }, { "epoch": 1.2030862886670914, "grad_norm": 10.06873654270516, "learning_rate": 1.8081276089673719e-06, "loss": 1.0707, "step": 8498 }, { "epoch": 1.2032278615417287, "grad_norm": 10.906791782858464, "learning_rate": 1.8075768153921813e-06, "loss": 1.0575, "step": 8499 }, { "epoch": 1.203369434416366, "grad_norm": 8.851997762080762, "learning_rate": 1.8070260582171605e-06, "loss": 1.0014, "step": 8500 }, { "epoch": 1.2035110072910031, "grad_norm": 9.820446902670167, "learning_rate": 1.8064753374712629e-06, "loss": 1.1144, "step": 8501 }, { "epoch": 1.2036525801656404, "grad_norm": 8.156937493475167, "learning_rate": 1.8059246531834395e-06, "loss": 0.8863, "step": 8502 }, { "epoch": 1.2037941530402776, "grad_norm": 9.224302733827255, "learning_rate": 1.8053740053826399e-06, "loss": 0.9502, "step": 8503 }, { "epoch": 1.2039357259149148, "grad_norm": 8.327541423913313, "learning_rate": 1.8048233940978103e-06, "loss": 1.0274, "step": 8504 }, { "epoch": 1.2040772987895518, "grad_norm": 9.378290844944152, "learning_rate": 1.8042728193578968e-06, "loss": 1.096, "step": 8505 }, { "epoch": 1.204218871664189, "grad_norm": 9.622919031079444, "learning_rate": 1.8037222811918426e-06, "loss": 1.0316, "step": 8506 }, { "epoch": 1.2043604445388263, "grad_norm": 10.332640060631096, "learning_rate": 1.803171779628589e-06, "loss": 1.0074, "step": 8507 }, { "epoch": 1.2045020174134635, "grad_norm": 10.166267220409729, "learning_rate": 1.8026213146970752e-06, "loss": 1.0473, "step": 8508 }, { "epoch": 1.2046435902881008, "grad_norm": 9.188669143298174, "learning_rate": 1.80207088642624e-06, "loss": 1.0144, "step": 8509 }, { "epoch": 1.204785163162738, "grad_norm": 9.203208314165543, "learning_rate": 1.8015204948450166e-06, "loss": 1.0924, "step": 8510 }, { "epoch": 1.2049267360373752, "grad_norm": 8.525166672674207, "learning_rate": 1.80097013998234e-06, "loss": 1.0815, "step": 8511 }, { "epoch": 1.2050683089120124, "grad_norm": 9.435434683857434, "learning_rate": 1.8004198218671423e-06, "loss": 1.005, "step": 8512 }, { "epoch": 1.2052098817866497, "grad_norm": 7.970364754824661, "learning_rate": 1.7998695405283528e-06, "loss": 0.953, "step": 8513 }, { "epoch": 1.205351454661287, "grad_norm": 8.208617023142034, "learning_rate": 1.7993192959948996e-06, "loss": 1.0734, "step": 8514 }, { "epoch": 1.2054930275359241, "grad_norm": 10.46516270452251, "learning_rate": 1.7987690882957084e-06, "loss": 1.0637, "step": 8515 }, { "epoch": 1.2056346004105614, "grad_norm": 8.247452313625544, "learning_rate": 1.7982189174597037e-06, "loss": 0.9523, "step": 8516 }, { "epoch": 1.2057761732851986, "grad_norm": 9.579190716098653, "learning_rate": 1.797668783515807e-06, "loss": 0.9919, "step": 8517 }, { "epoch": 1.2059177461598358, "grad_norm": 9.890379699268534, "learning_rate": 1.7971186864929386e-06, "loss": 1.1501, "step": 8518 }, { "epoch": 1.206059319034473, "grad_norm": 11.894086095939326, "learning_rate": 1.7965686264200165e-06, "loss": 1.0604, "step": 8519 }, { "epoch": 1.2062008919091103, "grad_norm": 8.485550210677543, "learning_rate": 1.7960186033259585e-06, "loss": 1.0043, "step": 8520 }, { "epoch": 1.2063424647837475, "grad_norm": 10.136129784792624, "learning_rate": 1.7954686172396764e-06, "loss": 0.9925, "step": 8521 }, { "epoch": 1.2064840376583847, "grad_norm": 8.920235558380293, "learning_rate": 1.7949186681900843e-06, "loss": 0.9819, "step": 8522 }, { "epoch": 1.2066256105330218, "grad_norm": 9.14431849789888, "learning_rate": 1.7943687562060919e-06, "loss": 0.9472, "step": 8523 }, { "epoch": 1.206767183407659, "grad_norm": 9.374221159054612, "learning_rate": 1.7938188813166074e-06, "loss": 1.0098, "step": 8524 }, { "epoch": 1.2069087562822962, "grad_norm": 11.049859880620552, "learning_rate": 1.7932690435505385e-06, "loss": 1.0082, "step": 8525 }, { "epoch": 1.2070503291569334, "grad_norm": 9.894195828059216, "learning_rate": 1.7927192429367885e-06, "loss": 1.0714, "step": 8526 }, { "epoch": 1.2071919020315707, "grad_norm": 9.27727878266726, "learning_rate": 1.79216947950426e-06, "loss": 1.0928, "step": 8527 }, { "epoch": 1.207333474906208, "grad_norm": 10.034445755672564, "learning_rate": 1.7916197532818548e-06, "loss": 1.168, "step": 8528 }, { "epoch": 1.2074750477808451, "grad_norm": 9.146396581964414, "learning_rate": 1.791070064298471e-06, "loss": 0.9499, "step": 8529 }, { "epoch": 1.2076166206554824, "grad_norm": 11.37497987850212, "learning_rate": 1.7905204125830066e-06, "loss": 1.064, "step": 8530 }, { "epoch": 1.2077581935301196, "grad_norm": 9.93778828136789, "learning_rate": 1.7899707981643538e-06, "loss": 1.022, "step": 8531 }, { "epoch": 1.2078997664047568, "grad_norm": 12.245645177191117, "learning_rate": 1.7894212210714068e-06, "loss": 1.0483, "step": 8532 }, { "epoch": 1.208041339279394, "grad_norm": 9.274805985141237, "learning_rate": 1.788871681333056e-06, "loss": 1.119, "step": 8533 }, { "epoch": 1.2081829121540313, "grad_norm": 9.192986958345399, "learning_rate": 1.7883221789781908e-06, "loss": 0.9025, "step": 8534 }, { "epoch": 1.2083244850286685, "grad_norm": 8.738554089532409, "learning_rate": 1.7877727140356982e-06, "loss": 1.0451, "step": 8535 }, { "epoch": 1.2084660579033057, "grad_norm": 8.847016826370018, "learning_rate": 1.787223286534463e-06, "loss": 1.0872, "step": 8536 }, { "epoch": 1.208607630777943, "grad_norm": 10.104723182899537, "learning_rate": 1.7866738965033681e-06, "loss": 1.0752, "step": 8537 }, { "epoch": 1.2087492036525802, "grad_norm": 11.668166218616935, "learning_rate": 1.7861245439712945e-06, "loss": 0.9805, "step": 8538 }, { "epoch": 1.2088907765272174, "grad_norm": 10.609738867563642, "learning_rate": 1.7855752289671215e-06, "loss": 1.0796, "step": 8539 }, { "epoch": 1.2090323494018547, "grad_norm": 9.241418930691275, "learning_rate": 1.785025951519726e-06, "loss": 1.0158, "step": 8540 }, { "epoch": 1.209173922276492, "grad_norm": 9.02490645172582, "learning_rate": 1.7844767116579836e-06, "loss": 1.0432, "step": 8541 }, { "epoch": 1.2093154951511291, "grad_norm": 9.838670176765149, "learning_rate": 1.7839275094107666e-06, "loss": 0.8862, "step": 8542 }, { "epoch": 1.2094570680257664, "grad_norm": 9.365678145442722, "learning_rate": 1.7833783448069464e-06, "loss": 0.9995, "step": 8543 }, { "epoch": 1.2095986409004036, "grad_norm": 9.299446122278054, "learning_rate": 1.782829217875392e-06, "loss": 1.0153, "step": 8544 }, { "epoch": 1.2097402137750408, "grad_norm": 8.863947043498252, "learning_rate": 1.782280128644971e-06, "loss": 1.0726, "step": 8545 }, { "epoch": 1.2098817866496778, "grad_norm": 13.122033937549586, "learning_rate": 1.781731077144549e-06, "loss": 1.0847, "step": 8546 }, { "epoch": 1.210023359524315, "grad_norm": 9.897953446135974, "learning_rate": 1.781182063402988e-06, "loss": 1.1131, "step": 8547 }, { "epoch": 1.2101649323989523, "grad_norm": 8.50266089583551, "learning_rate": 1.7806330874491504e-06, "loss": 0.9699, "step": 8548 }, { "epoch": 1.2103065052735895, "grad_norm": 9.644932704154849, "learning_rate": 1.7800841493118942e-06, "loss": 1.0342, "step": 8549 }, { "epoch": 1.2104480781482267, "grad_norm": 9.084491014346709, "learning_rate": 1.7795352490200782e-06, "loss": 1.0142, "step": 8550 }, { "epoch": 1.210589651022864, "grad_norm": 8.618865996998604, "learning_rate": 1.778986386602558e-06, "loss": 0.9499, "step": 8551 }, { "epoch": 1.2107312238975012, "grad_norm": 11.022566795432253, "learning_rate": 1.7784375620881847e-06, "loss": 1.1112, "step": 8552 }, { "epoch": 1.2108727967721384, "grad_norm": 10.354829599405045, "learning_rate": 1.7778887755058108e-06, "loss": 1.0095, "step": 8553 }, { "epoch": 1.2110143696467757, "grad_norm": 8.866830423702448, "learning_rate": 1.7773400268842855e-06, "loss": 0.8931, "step": 8554 }, { "epoch": 1.211155942521413, "grad_norm": 8.20765871738669, "learning_rate": 1.7767913162524562e-06, "loss": 0.9572, "step": 8555 }, { "epoch": 1.2112975153960501, "grad_norm": 9.192239589825638, "learning_rate": 1.7762426436391675e-06, "loss": 0.9966, "step": 8556 }, { "epoch": 1.2114390882706874, "grad_norm": 8.476657005521565, "learning_rate": 1.775694009073264e-06, "loss": 1.0065, "step": 8557 }, { "epoch": 1.2115806611453246, "grad_norm": 10.03553181469278, "learning_rate": 1.7751454125835866e-06, "loss": 1.1265, "step": 8558 }, { "epoch": 1.2117222340199618, "grad_norm": 8.641070004363609, "learning_rate": 1.774596854198974e-06, "loss": 1.0001, "step": 8559 }, { "epoch": 1.211863806894599, "grad_norm": 10.427752483185719, "learning_rate": 1.774048333948264e-06, "loss": 0.9759, "step": 8560 }, { "epoch": 1.2120053797692363, "grad_norm": 9.096224209024305, "learning_rate": 1.773499851860292e-06, "loss": 1.0225, "step": 8561 }, { "epoch": 1.2121469526438735, "grad_norm": 9.51999768040733, "learning_rate": 1.7729514079638915e-06, "loss": 1.106, "step": 8562 }, { "epoch": 1.2122885255185107, "grad_norm": 9.808528023351071, "learning_rate": 1.7724030022878928e-06, "loss": 1.0409, "step": 8563 }, { "epoch": 1.2124300983931477, "grad_norm": 10.022798489593027, "learning_rate": 1.7718546348611254e-06, "loss": 0.9684, "step": 8564 }, { "epoch": 1.212571671267785, "grad_norm": 9.123126111904348, "learning_rate": 1.7713063057124174e-06, "loss": 1.0391, "step": 8565 }, { "epoch": 1.2127132441424222, "grad_norm": 9.268636127371765, "learning_rate": 1.7707580148705936e-06, "loss": 0.9213, "step": 8566 }, { "epoch": 1.2128548170170594, "grad_norm": 8.495203796780165, "learning_rate": 1.770209762364477e-06, "loss": 1.124, "step": 8567 }, { "epoch": 1.2129963898916967, "grad_norm": 9.186356453864663, "learning_rate": 1.7696615482228891e-06, "loss": 0.986, "step": 8568 }, { "epoch": 1.213137962766334, "grad_norm": 7.326407745803101, "learning_rate": 1.769113372474649e-06, "loss": 1.1681, "step": 8569 }, { "epoch": 1.2132795356409711, "grad_norm": 8.727630358650972, "learning_rate": 1.768565235148574e-06, "loss": 0.9633, "step": 8570 }, { "epoch": 1.2134211085156084, "grad_norm": 9.583175348278846, "learning_rate": 1.7680171362734794e-06, "loss": 0.9056, "step": 8571 }, { "epoch": 1.2135626813902456, "grad_norm": 11.054470445638547, "learning_rate": 1.767469075878177e-06, "loss": 1.1372, "step": 8572 }, { "epoch": 1.2137042542648828, "grad_norm": 11.058600304108795, "learning_rate": 1.7669210539914813e-06, "loss": 1.1413, "step": 8573 }, { "epoch": 1.21384582713952, "grad_norm": 8.171315277945284, "learning_rate": 1.7663730706421978e-06, "loss": 1.0095, "step": 8574 }, { "epoch": 1.2139874000141573, "grad_norm": 9.939600308708343, "learning_rate": 1.7658251258591352e-06, "loss": 0.989, "step": 8575 }, { "epoch": 1.2141289728887945, "grad_norm": 8.998667724334991, "learning_rate": 1.7652772196710982e-06, "loss": 1.0257, "step": 8576 }, { "epoch": 1.2142705457634317, "grad_norm": 9.994379753038666, "learning_rate": 1.7647293521068898e-06, "loss": 1.0341, "step": 8577 }, { "epoch": 1.214412118638069, "grad_norm": 9.601670962187429, "learning_rate": 1.7641815231953107e-06, "loss": 1.0089, "step": 8578 }, { "epoch": 1.2145536915127062, "grad_norm": 9.580979785231754, "learning_rate": 1.763633732965161e-06, "loss": 1.0846, "step": 8579 }, { "epoch": 1.2146952643873434, "grad_norm": 9.809235047857763, "learning_rate": 1.7630859814452367e-06, "loss": 0.9812, "step": 8580 }, { "epoch": 1.2148368372619807, "grad_norm": 8.940670704783125, "learning_rate": 1.7625382686643328e-06, "loss": 1.0715, "step": 8581 }, { "epoch": 1.214978410136618, "grad_norm": 9.988050187369412, "learning_rate": 1.7619905946512421e-06, "loss": 1.0506, "step": 8582 }, { "epoch": 1.2151199830112551, "grad_norm": 10.456023996011858, "learning_rate": 1.761442959434757e-06, "loss": 1.0251, "step": 8583 }, { "epoch": 1.2152615558858924, "grad_norm": 10.251496740710826, "learning_rate": 1.7608953630436632e-06, "loss": 1.0419, "step": 8584 }, { "epoch": 1.2154031287605296, "grad_norm": 7.841179293473804, "learning_rate": 1.7603478055067493e-06, "loss": 0.8789, "step": 8585 }, { "epoch": 1.2155447016351668, "grad_norm": 8.576379372561002, "learning_rate": 1.7598002868528002e-06, "loss": 0.9773, "step": 8586 }, { "epoch": 1.215686274509804, "grad_norm": 9.489883658175327, "learning_rate": 1.7592528071105978e-06, "loss": 1.0412, "step": 8587 }, { "epoch": 1.215827847384441, "grad_norm": 8.580562502333125, "learning_rate": 1.7587053663089233e-06, "loss": 1.0493, "step": 8588 }, { "epoch": 1.2159694202590783, "grad_norm": 9.918633359476827, "learning_rate": 1.7581579644765544e-06, "loss": 1.0383, "step": 8589 }, { "epoch": 1.2161109931337155, "grad_norm": 10.075762425958418, "learning_rate": 1.7576106016422684e-06, "loss": 0.9581, "step": 8590 }, { "epoch": 1.2162525660083527, "grad_norm": 8.661514609301795, "learning_rate": 1.7570632778348394e-06, "loss": 1.0034, "step": 8591 }, { "epoch": 1.21639413888299, "grad_norm": 10.765718899830878, "learning_rate": 1.7565159930830405e-06, "loss": 0.9802, "step": 8592 }, { "epoch": 1.2165357117576272, "grad_norm": 9.585778319786753, "learning_rate": 1.7559687474156412e-06, "loss": 1.0421, "step": 8593 }, { "epoch": 1.2166772846322644, "grad_norm": 9.661375592778006, "learning_rate": 1.7554215408614102e-06, "loss": 1.0471, "step": 8594 }, { "epoch": 1.2168188575069017, "grad_norm": 9.330758239093234, "learning_rate": 1.7548743734491136e-06, "loss": 1.072, "step": 8595 }, { "epoch": 1.216960430381539, "grad_norm": 8.369968098314168, "learning_rate": 1.7543272452075156e-06, "loss": 0.9833, "step": 8596 }, { "epoch": 1.2171020032561761, "grad_norm": 9.237868730879695, "learning_rate": 1.7537801561653777e-06, "loss": 1.0365, "step": 8597 }, { "epoch": 1.2172435761308134, "grad_norm": 13.367312494849552, "learning_rate": 1.7532331063514613e-06, "loss": 1.0782, "step": 8598 }, { "epoch": 1.2173851490054506, "grad_norm": 8.40059973982629, "learning_rate": 1.7526860957945233e-06, "loss": 0.9536, "step": 8599 }, { "epoch": 1.2175267218800878, "grad_norm": 10.0680621394656, "learning_rate": 1.7521391245233202e-06, "loss": 1.099, "step": 8600 }, { "epoch": 1.217668294754725, "grad_norm": 10.697476911312611, "learning_rate": 1.7515921925666053e-06, "loss": 1.0487, "step": 8601 }, { "epoch": 1.2178098676293623, "grad_norm": 9.586556287969229, "learning_rate": 1.7510452999531308e-06, "loss": 1.0333, "step": 8602 }, { "epoch": 1.2179514405039995, "grad_norm": 9.122157660114002, "learning_rate": 1.7504984467116467e-06, "loss": 1.0818, "step": 8603 }, { "epoch": 1.2180930133786367, "grad_norm": 9.224444166356186, "learning_rate": 1.7499516328709016e-06, "loss": 0.925, "step": 8604 }, { "epoch": 1.218234586253274, "grad_norm": 11.961459257480225, "learning_rate": 1.7494048584596388e-06, "loss": 1.1279, "step": 8605 }, { "epoch": 1.218376159127911, "grad_norm": 8.809055385630707, "learning_rate": 1.7488581235066027e-06, "loss": 1.0624, "step": 8606 }, { "epoch": 1.2185177320025482, "grad_norm": 8.748686337632487, "learning_rate": 1.7483114280405348e-06, "loss": 1.1313, "step": 8607 }, { "epoch": 1.2186593048771854, "grad_norm": 10.152938161946643, "learning_rate": 1.747764772090175e-06, "loss": 0.9986, "step": 8608 }, { "epoch": 1.2188008777518227, "grad_norm": 8.851685323645867, "learning_rate": 1.7472181556842602e-06, "loss": 1.07, "step": 8609 }, { "epoch": 1.21894245062646, "grad_norm": 9.823316310440225, "learning_rate": 1.7466715788515256e-06, "loss": 1.1456, "step": 8610 }, { "epoch": 1.2190840235010971, "grad_norm": 7.834198139691347, "learning_rate": 1.7461250416207045e-06, "loss": 1.1363, "step": 8611 }, { "epoch": 1.2192255963757344, "grad_norm": 7.741813519757954, "learning_rate": 1.745578544020528e-06, "loss": 1.0277, "step": 8612 }, { "epoch": 1.2193671692503716, "grad_norm": 8.287272095459379, "learning_rate": 1.7450320860797248e-06, "loss": 1.0458, "step": 8613 }, { "epoch": 1.2195087421250088, "grad_norm": 8.652380784345121, "learning_rate": 1.7444856678270218e-06, "loss": 1.0445, "step": 8614 }, { "epoch": 1.219650314999646, "grad_norm": 9.792309246216398, "learning_rate": 1.7439392892911443e-06, "loss": 1.041, "step": 8615 }, { "epoch": 1.2197918878742833, "grad_norm": 7.264240617018641, "learning_rate": 1.7433929505008145e-06, "loss": 0.9001, "step": 8616 }, { "epoch": 1.2199334607489205, "grad_norm": 9.312582335652168, "learning_rate": 1.7428466514847531e-06, "loss": 1.128, "step": 8617 }, { "epoch": 1.2200750336235577, "grad_norm": 7.419082790967392, "learning_rate": 1.7423003922716784e-06, "loss": 0.9174, "step": 8618 }, { "epoch": 1.220216606498195, "grad_norm": 10.691218928186354, "learning_rate": 1.741754172890307e-06, "loss": 1.0832, "step": 8619 }, { "epoch": 1.2203581793728322, "grad_norm": 10.591104812986602, "learning_rate": 1.7412079933693538e-06, "loss": 1.0712, "step": 8620 }, { "epoch": 1.2204997522474694, "grad_norm": 8.788115834434228, "learning_rate": 1.7406618537375303e-06, "loss": 1.0147, "step": 8621 }, { "epoch": 1.2206413251221067, "grad_norm": 8.346845531233079, "learning_rate": 1.740115754023547e-06, "loss": 1.0001, "step": 8622 }, { "epoch": 1.2207828979967439, "grad_norm": 7.387309776477671, "learning_rate": 1.7395696942561119e-06, "loss": 1.019, "step": 8623 }, { "epoch": 1.2209244708713811, "grad_norm": 8.6040190775654, "learning_rate": 1.7390236744639304e-06, "loss": 1.0948, "step": 8624 }, { "epoch": 1.2210660437460183, "grad_norm": 8.584652055588936, "learning_rate": 1.7384776946757075e-06, "loss": 0.9884, "step": 8625 }, { "epoch": 1.2212076166206556, "grad_norm": 10.459448117928623, "learning_rate": 1.7379317549201458e-06, "loss": 1.1494, "step": 8626 }, { "epoch": 1.2213491894952928, "grad_norm": 11.056462078745193, "learning_rate": 1.7373858552259421e-06, "loss": 1.0511, "step": 8627 }, { "epoch": 1.22149076236993, "grad_norm": 11.920542551276636, "learning_rate": 1.7368399956217954e-06, "loss": 0.9935, "step": 8628 }, { "epoch": 1.221632335244567, "grad_norm": 12.494870773813425, "learning_rate": 1.7362941761364012e-06, "loss": 1.1138, "step": 8629 }, { "epoch": 1.2217739081192043, "grad_norm": 9.845198461141239, "learning_rate": 1.7357483967984524e-06, "loss": 0.9648, "step": 8630 }, { "epoch": 1.2219154809938415, "grad_norm": 9.309171965229918, "learning_rate": 1.7352026576366405e-06, "loss": 1.0357, "step": 8631 }, { "epoch": 1.2220570538684787, "grad_norm": 8.491342062044636, "learning_rate": 1.734656958679655e-06, "loss": 1.094, "step": 8632 }, { "epoch": 1.222198626743116, "grad_norm": 9.349708315945568, "learning_rate": 1.7341112999561823e-06, "loss": 0.8822, "step": 8633 }, { "epoch": 1.2223401996177532, "grad_norm": 10.299381674567554, "learning_rate": 1.7335656814949075e-06, "loss": 0.974, "step": 8634 }, { "epoch": 1.2224817724923904, "grad_norm": 7.510213382701648, "learning_rate": 1.7330201033245137e-06, "loss": 0.9435, "step": 8635 }, { "epoch": 1.2226233453670277, "grad_norm": 9.480929055059441, "learning_rate": 1.7324745654736812e-06, "loss": 1.1537, "step": 8636 }, { "epoch": 1.2227649182416649, "grad_norm": 10.186765304940018, "learning_rate": 1.7319290679710885e-06, "loss": 1.1243, "step": 8637 }, { "epoch": 1.2229064911163021, "grad_norm": 10.51043654793761, "learning_rate": 1.7313836108454118e-06, "loss": 0.9928, "step": 8638 }, { "epoch": 1.2230480639909393, "grad_norm": 10.41200734011391, "learning_rate": 1.7308381941253256e-06, "loss": 0.9867, "step": 8639 }, { "epoch": 1.2231896368655766, "grad_norm": 11.81033612166227, "learning_rate": 1.7302928178395018e-06, "loss": 1.0656, "step": 8640 }, { "epoch": 1.2233312097402138, "grad_norm": 7.949007838495008, "learning_rate": 1.7297474820166108e-06, "loss": 0.9244, "step": 8641 }, { "epoch": 1.223472782614851, "grad_norm": 9.447779431900793, "learning_rate": 1.7292021866853204e-06, "loss": 1.0183, "step": 8642 }, { "epoch": 1.2236143554894883, "grad_norm": 10.851624721406388, "learning_rate": 1.7286569318742962e-06, "loss": 0.9978, "step": 8643 }, { "epoch": 1.2237559283641255, "grad_norm": 9.88903776573479, "learning_rate": 1.728111717612202e-06, "loss": 1.0194, "step": 8644 }, { "epoch": 1.2238975012387627, "grad_norm": 11.788957659822099, "learning_rate": 1.727566543927699e-06, "loss": 1.0137, "step": 8645 }, { "epoch": 1.2240390741134, "grad_norm": 9.825769090519827, "learning_rate": 1.7270214108494469e-06, "loss": 0.911, "step": 8646 }, { "epoch": 1.224180646988037, "grad_norm": 9.66785274551616, "learning_rate": 1.726476318406104e-06, "loss": 0.9926, "step": 8647 }, { "epoch": 1.2243222198626742, "grad_norm": 9.536038756016568, "learning_rate": 1.7259312666263235e-06, "loss": 1.1147, "step": 8648 }, { "epoch": 1.2244637927373114, "grad_norm": 10.841015209785871, "learning_rate": 1.7253862555387587e-06, "loss": 1.1747, "step": 8649 }, { "epoch": 1.2246053656119487, "grad_norm": 9.420405183084036, "learning_rate": 1.7248412851720613e-06, "loss": 0.9412, "step": 8650 }, { "epoch": 1.224746938486586, "grad_norm": 8.763618117104453, "learning_rate": 1.7242963555548794e-06, "loss": 1.0978, "step": 8651 }, { "epoch": 1.2248885113612231, "grad_norm": 8.739042560152917, "learning_rate": 1.7237514667158598e-06, "loss": 0.9239, "step": 8652 }, { "epoch": 1.2250300842358604, "grad_norm": 9.570629379320286, "learning_rate": 1.723206618683646e-06, "loss": 1.0463, "step": 8653 }, { "epoch": 1.2251716571104976, "grad_norm": 8.831206251550343, "learning_rate": 1.722661811486882e-06, "loss": 1.0051, "step": 8654 }, { "epoch": 1.2253132299851348, "grad_norm": 8.658091875906644, "learning_rate": 1.7221170451542067e-06, "loss": 1.0169, "step": 8655 }, { "epoch": 1.225454802859772, "grad_norm": 7.5333816234199915, "learning_rate": 1.721572319714258e-06, "loss": 1.0178, "step": 8656 }, { "epoch": 1.2255963757344093, "grad_norm": 11.068206177528554, "learning_rate": 1.7210276351956736e-06, "loss": 1.0446, "step": 8657 }, { "epoch": 1.2257379486090465, "grad_norm": 7.474674444859831, "learning_rate": 1.7204829916270842e-06, "loss": 1.0098, "step": 8658 }, { "epoch": 1.2258795214836837, "grad_norm": 9.359707579650259, "learning_rate": 1.7199383890371228e-06, "loss": 1.0338, "step": 8659 }, { "epoch": 1.226021094358321, "grad_norm": 7.939195346753111, "learning_rate": 1.7193938274544187e-06, "loss": 0.9867, "step": 8660 }, { "epoch": 1.2261626672329582, "grad_norm": 11.662170052230778, "learning_rate": 1.718849306907599e-06, "loss": 1.0529, "step": 8661 }, { "epoch": 1.2263042401075954, "grad_norm": 10.208979704238935, "learning_rate": 1.7183048274252889e-06, "loss": 1.1053, "step": 8662 }, { "epoch": 1.2264458129822327, "grad_norm": 11.355073011895113, "learning_rate": 1.717760389036111e-06, "loss": 1.0011, "step": 8663 }, { "epoch": 1.2265873858568699, "grad_norm": 10.777964923540361, "learning_rate": 1.7172159917686866e-06, "loss": 1.1294, "step": 8664 }, { "epoch": 1.2267289587315071, "grad_norm": 10.705588220206538, "learning_rate": 1.7166716356516334e-06, "loss": 0.9895, "step": 8665 }, { "epoch": 1.2268705316061443, "grad_norm": 8.887251014279968, "learning_rate": 1.716127320713568e-06, "loss": 0.9558, "step": 8666 }, { "epoch": 1.2270121044807816, "grad_norm": 11.153825768975771, "learning_rate": 1.7155830469831057e-06, "loss": 1.0899, "step": 8667 }, { "epoch": 1.2271536773554188, "grad_norm": 9.15444293108095, "learning_rate": 1.7150388144888577e-06, "loss": 1.0003, "step": 8668 }, { "epoch": 1.227295250230056, "grad_norm": 8.376280729382335, "learning_rate": 1.7144946232594334e-06, "loss": 0.9502, "step": 8669 }, { "epoch": 1.2274368231046933, "grad_norm": 9.785638476765286, "learning_rate": 1.7139504733234413e-06, "loss": 1.0415, "step": 8670 }, { "epoch": 1.2275783959793303, "grad_norm": 9.496358474688344, "learning_rate": 1.7134063647094866e-06, "loss": 0.9669, "step": 8671 }, { "epoch": 1.2277199688539675, "grad_norm": 10.285438496056079, "learning_rate": 1.7128622974461728e-06, "loss": 1.0216, "step": 8672 }, { "epoch": 1.2278615417286047, "grad_norm": 9.75535983925051, "learning_rate": 1.7123182715621012e-06, "loss": 1.0014, "step": 8673 }, { "epoch": 1.228003114603242, "grad_norm": 7.941786682353525, "learning_rate": 1.7117742870858706e-06, "loss": 0.9778, "step": 8674 }, { "epoch": 1.2281446874778792, "grad_norm": 11.374412437606667, "learning_rate": 1.7112303440460775e-06, "loss": 1.1064, "step": 8675 }, { "epoch": 1.2282862603525164, "grad_norm": 8.1556391578614, "learning_rate": 1.7106864424713177e-06, "loss": 0.989, "step": 8676 }, { "epoch": 1.2284278332271537, "grad_norm": 10.749336044743975, "learning_rate": 1.710142582390183e-06, "loss": 1.0834, "step": 8677 }, { "epoch": 1.2285694061017909, "grad_norm": 11.605226168156419, "learning_rate": 1.709598763831264e-06, "loss": 1.1054, "step": 8678 }, { "epoch": 1.2287109789764281, "grad_norm": 9.758085370734316, "learning_rate": 1.7090549868231492e-06, "loss": 1.0601, "step": 8679 }, { "epoch": 1.2288525518510653, "grad_norm": 9.336006962346191, "learning_rate": 1.7085112513944235e-06, "loss": 1.0468, "step": 8680 }, { "epoch": 1.2289941247257026, "grad_norm": 8.821702845883793, "learning_rate": 1.7079675575736704e-06, "loss": 0.9766, "step": 8681 }, { "epoch": 1.2291356976003398, "grad_norm": 9.93721554906868, "learning_rate": 1.7074239053894725e-06, "loss": 0.9999, "step": 8682 }, { "epoch": 1.229277270474977, "grad_norm": 8.131012555231965, "learning_rate": 1.7068802948704094e-06, "loss": 0.9935, "step": 8683 }, { "epoch": 1.2294188433496143, "grad_norm": 7.9759558317134225, "learning_rate": 1.7063367260450576e-06, "loss": 1.0255, "step": 8684 }, { "epoch": 1.2295604162242515, "grad_norm": 9.014266680333996, "learning_rate": 1.7057931989419923e-06, "loss": 0.9762, "step": 8685 }, { "epoch": 1.2297019890988887, "grad_norm": 8.787499426981444, "learning_rate": 1.705249713589786e-06, "loss": 0.9686, "step": 8686 }, { "epoch": 1.229843561973526, "grad_norm": 10.1570515008259, "learning_rate": 1.7047062700170104e-06, "loss": 1.0672, "step": 8687 }, { "epoch": 1.2299851348481632, "grad_norm": 10.057198022096616, "learning_rate": 1.7041628682522326e-06, "loss": 1.0043, "step": 8688 }, { "epoch": 1.2301267077228002, "grad_norm": 9.919647879965297, "learning_rate": 1.7036195083240203e-06, "loss": 1.0074, "step": 8689 }, { "epoch": 1.2302682805974374, "grad_norm": 9.245708810507251, "learning_rate": 1.703076190260936e-06, "loss": 0.9921, "step": 8690 }, { "epoch": 1.2304098534720747, "grad_norm": 12.81088916608, "learning_rate": 1.702532914091542e-06, "loss": 1.0534, "step": 8691 }, { "epoch": 1.2305514263467119, "grad_norm": 7.594916611214295, "learning_rate": 1.7019896798443984e-06, "loss": 0.9018, "step": 8692 }, { "epoch": 1.2306929992213491, "grad_norm": 9.34364017689953, "learning_rate": 1.7014464875480618e-06, "loss": 1.0415, "step": 8693 }, { "epoch": 1.2308345720959863, "grad_norm": 9.160263692067119, "learning_rate": 1.7009033372310884e-06, "loss": 1.1074, "step": 8694 }, { "epoch": 1.2309761449706236, "grad_norm": 9.367250622050665, "learning_rate": 1.7003602289220305e-06, "loss": 1.023, "step": 8695 }, { "epoch": 1.2311177178452608, "grad_norm": 8.565882996017681, "learning_rate": 1.6998171626494392e-06, "loss": 0.9498, "step": 8696 }, { "epoch": 1.231259290719898, "grad_norm": 9.504865103320732, "learning_rate": 1.6992741384418632e-06, "loss": 1.0849, "step": 8697 }, { "epoch": 1.2314008635945353, "grad_norm": 8.96203063842467, "learning_rate": 1.698731156327848e-06, "loss": 1.0155, "step": 8698 }, { "epoch": 1.2315424364691725, "grad_norm": 11.649051192125384, "learning_rate": 1.6981882163359391e-06, "loss": 1.1641, "step": 8699 }, { "epoch": 1.2316840093438097, "grad_norm": 9.462701760563682, "learning_rate": 1.6976453184946786e-06, "loss": 1.0021, "step": 8700 }, { "epoch": 1.231825582218447, "grad_norm": 9.154689616801898, "learning_rate": 1.6971024628326046e-06, "loss": 1.0303, "step": 8701 }, { "epoch": 1.2319671550930842, "grad_norm": 8.423479652759257, "learning_rate": 1.6965596493782555e-06, "loss": 0.9856, "step": 8702 }, { "epoch": 1.2321087279677214, "grad_norm": 8.319599074460598, "learning_rate": 1.6960168781601665e-06, "loss": 0.9093, "step": 8703 }, { "epoch": 1.2322503008423586, "grad_norm": 10.187040166481902, "learning_rate": 1.6954741492068698e-06, "loss": 0.9491, "step": 8704 }, { "epoch": 1.2323918737169959, "grad_norm": 8.770810910254394, "learning_rate": 1.6949314625468985e-06, "loss": 1.0696, "step": 8705 }, { "epoch": 1.232533446591633, "grad_norm": 10.371032323852932, "learning_rate": 1.6943888182087796e-06, "loss": 1.0399, "step": 8706 }, { "epoch": 1.2326750194662703, "grad_norm": 10.821075743049597, "learning_rate": 1.6938462162210395e-06, "loss": 0.9808, "step": 8707 }, { "epoch": 1.2328165923409076, "grad_norm": 9.475562537411202, "learning_rate": 1.6933036566122029e-06, "loss": 0.991, "step": 8708 }, { "epoch": 1.2329581652155448, "grad_norm": 10.289991564214304, "learning_rate": 1.6927611394107918e-06, "loss": 1.0875, "step": 8709 }, { "epoch": 1.233099738090182, "grad_norm": 8.04447732407025, "learning_rate": 1.6922186646453263e-06, "loss": 0.8924, "step": 8710 }, { "epoch": 1.2332413109648193, "grad_norm": 11.623840930540801, "learning_rate": 1.6916762323443225e-06, "loss": 1.0923, "step": 8711 }, { "epoch": 1.2333828838394563, "grad_norm": 7.691024344568804, "learning_rate": 1.6911338425362967e-06, "loss": 0.9438, "step": 8712 }, { "epoch": 1.2335244567140935, "grad_norm": 8.834803284990144, "learning_rate": 1.6905914952497616e-06, "loss": 0.8803, "step": 8713 }, { "epoch": 1.2336660295887307, "grad_norm": 9.010364075141556, "learning_rate": 1.6900491905132277e-06, "loss": 0.9326, "step": 8714 }, { "epoch": 1.233807602463368, "grad_norm": 11.318523678973525, "learning_rate": 1.689506928355204e-06, "loss": 1.093, "step": 8715 }, { "epoch": 1.2339491753380052, "grad_norm": 9.593085946327845, "learning_rate": 1.6889647088041972e-06, "loss": 1.1622, "step": 8716 }, { "epoch": 1.2340907482126424, "grad_norm": 8.991357256279978, "learning_rate": 1.6884225318887107e-06, "loss": 1.0424, "step": 8717 }, { "epoch": 1.2342323210872796, "grad_norm": 9.399695663395338, "learning_rate": 1.6878803976372465e-06, "loss": 1.0407, "step": 8718 }, { "epoch": 1.2343738939619169, "grad_norm": 9.167272605520454, "learning_rate": 1.6873383060783043e-06, "loss": 0.9277, "step": 8719 }, { "epoch": 1.234515466836554, "grad_norm": 8.563188733990703, "learning_rate": 1.6867962572403811e-06, "loss": 0.9512, "step": 8720 }, { "epoch": 1.2346570397111913, "grad_norm": 9.27859325467199, "learning_rate": 1.6862542511519734e-06, "loss": 0.9198, "step": 8721 }, { "epoch": 1.2347986125858286, "grad_norm": 9.083289563368119, "learning_rate": 1.6857122878415721e-06, "loss": 0.8937, "step": 8722 }, { "epoch": 1.2349401854604658, "grad_norm": 9.132737732932833, "learning_rate": 1.6851703673376688e-06, "loss": 1.065, "step": 8723 }, { "epoch": 1.235081758335103, "grad_norm": 8.67471316520236, "learning_rate": 1.6846284896687514e-06, "loss": 1.0153, "step": 8724 }, { "epoch": 1.2352233312097403, "grad_norm": 9.02485065706487, "learning_rate": 1.6840866548633068e-06, "loss": 0.9634, "step": 8725 }, { "epoch": 1.2353649040843775, "grad_norm": 9.17366283349867, "learning_rate": 1.6835448629498182e-06, "loss": 0.997, "step": 8726 }, { "epoch": 1.2355064769590147, "grad_norm": 9.290059952886345, "learning_rate": 1.683003113956767e-06, "loss": 1.0071, "step": 8727 }, { "epoch": 1.235648049833652, "grad_norm": 10.389542280878034, "learning_rate": 1.6824614079126334e-06, "loss": 1.0603, "step": 8728 }, { "epoch": 1.2357896227082892, "grad_norm": 9.325243941074511, "learning_rate": 1.6819197448458935e-06, "loss": 1.0291, "step": 8729 }, { "epoch": 1.2359311955829262, "grad_norm": 11.063957161490144, "learning_rate": 1.681378124785023e-06, "loss": 1.0239, "step": 8730 }, { "epoch": 1.2360727684575634, "grad_norm": 8.260936077731815, "learning_rate": 1.6808365477584953e-06, "loss": 1.1004, "step": 8731 }, { "epoch": 1.2362143413322007, "grad_norm": 9.568194918787254, "learning_rate": 1.6802950137947783e-06, "loss": 1.0066, "step": 8732 }, { "epoch": 1.2363559142068379, "grad_norm": 10.772921233777758, "learning_rate": 1.6797535229223405e-06, "loss": 1.0425, "step": 8733 }, { "epoch": 1.236497487081475, "grad_norm": 10.869757079048249, "learning_rate": 1.6792120751696495e-06, "loss": 1.0203, "step": 8734 }, { "epoch": 1.2366390599561123, "grad_norm": 8.321681408809553, "learning_rate": 1.678670670565167e-06, "loss": 0.9615, "step": 8735 }, { "epoch": 1.2367806328307496, "grad_norm": 10.819762861152828, "learning_rate": 1.678129309137355e-06, "loss": 0.9188, "step": 8736 }, { "epoch": 1.2369222057053868, "grad_norm": 7.745114170757356, "learning_rate": 1.677587990914673e-06, "loss": 0.9706, "step": 8737 }, { "epoch": 1.237063778580024, "grad_norm": 8.183139251251387, "learning_rate": 1.6770467159255768e-06, "loss": 0.9432, "step": 8738 }, { "epoch": 1.2372053514546613, "grad_norm": 9.840091861203028, "learning_rate": 1.6765054841985212e-06, "loss": 1.0281, "step": 8739 }, { "epoch": 1.2373469243292985, "grad_norm": 9.792776318163206, "learning_rate": 1.6759642957619581e-06, "loss": 1.0371, "step": 8740 }, { "epoch": 1.2374884972039357, "grad_norm": 9.637561082460856, "learning_rate": 1.6754231506443375e-06, "loss": 0.9265, "step": 8741 }, { "epoch": 1.237630070078573, "grad_norm": 9.992718525153665, "learning_rate": 1.6748820488741077e-06, "loss": 1.0228, "step": 8742 }, { "epoch": 1.2377716429532102, "grad_norm": 7.957871615079616, "learning_rate": 1.674340990479713e-06, "loss": 0.9177, "step": 8743 }, { "epoch": 1.2379132158278474, "grad_norm": 9.587997053608225, "learning_rate": 1.6737999754895965e-06, "loss": 1.0801, "step": 8744 }, { "epoch": 1.2380547887024846, "grad_norm": 8.074687410805854, "learning_rate": 1.6732590039321993e-06, "loss": 0.8634, "step": 8745 }, { "epoch": 1.2381963615771219, "grad_norm": 8.604611389144958, "learning_rate": 1.6727180758359598e-06, "loss": 1.0435, "step": 8746 }, { "epoch": 1.238337934451759, "grad_norm": 9.899258718770428, "learning_rate": 1.6721771912293145e-06, "loss": 1.0139, "step": 8747 }, { "epoch": 1.2384795073263963, "grad_norm": 8.998889006812652, "learning_rate": 1.6716363501406966e-06, "loss": 0.9449, "step": 8748 }, { "epoch": 1.2386210802010336, "grad_norm": 11.056711870188673, "learning_rate": 1.6710955525985384e-06, "loss": 1.076, "step": 8749 }, { "epoch": 1.2387626530756708, "grad_norm": 8.986925164569593, "learning_rate": 1.6705547986312681e-06, "loss": 0.9852, "step": 8750 }, { "epoch": 1.238904225950308, "grad_norm": 9.850911565900855, "learning_rate": 1.6700140882673145e-06, "loss": 1.1623, "step": 8751 }, { "epoch": 1.2390457988249453, "grad_norm": 7.289272358480871, "learning_rate": 1.669473421535101e-06, "loss": 1.0276, "step": 8752 }, { "epoch": 1.2391873716995823, "grad_norm": 9.180777029757186, "learning_rate": 1.668932798463052e-06, "loss": 1.0978, "step": 8753 }, { "epoch": 1.2393289445742195, "grad_norm": 10.79965240484293, "learning_rate": 1.668392219079585e-06, "loss": 0.9198, "step": 8754 }, { "epoch": 1.2394705174488567, "grad_norm": 9.233302123456555, "learning_rate": 1.6678516834131184e-06, "loss": 0.997, "step": 8755 }, { "epoch": 1.239612090323494, "grad_norm": 8.296136108370769, "learning_rate": 1.667311191492068e-06, "loss": 1.0147, "step": 8756 }, { "epoch": 1.2397536631981312, "grad_norm": 7.944821562926888, "learning_rate": 1.6667707433448482e-06, "loss": 0.976, "step": 8757 }, { "epoch": 1.2398952360727684, "grad_norm": 9.926511050871117, "learning_rate": 1.666230338999869e-06, "loss": 1.0933, "step": 8758 }, { "epoch": 1.2400368089474056, "grad_norm": 8.549685056362502, "learning_rate": 1.6656899784855393e-06, "loss": 1.019, "step": 8759 }, { "epoch": 1.2401783818220429, "grad_norm": 10.862535722013149, "learning_rate": 1.6651496618302653e-06, "loss": 1.0167, "step": 8760 }, { "epoch": 1.24031995469668, "grad_norm": 8.11850843062835, "learning_rate": 1.6646093890624509e-06, "loss": 1.0101, "step": 8761 }, { "epoch": 1.2404615275713173, "grad_norm": 8.572726414834067, "learning_rate": 1.6640691602104983e-06, "loss": 1.0303, "step": 8762 }, { "epoch": 1.2406031004459546, "grad_norm": 7.1063569422368955, "learning_rate": 1.6635289753028073e-06, "loss": 1.0061, "step": 8763 }, { "epoch": 1.2407446733205918, "grad_norm": 9.088075105920911, "learning_rate": 1.6629888343677734e-06, "loss": 0.9873, "step": 8764 }, { "epoch": 1.240886246195229, "grad_norm": 8.616104182386508, "learning_rate": 1.6624487374337925e-06, "loss": 0.8955, "step": 8765 }, { "epoch": 1.2410278190698663, "grad_norm": 10.33086964760379, "learning_rate": 1.661908684529257e-06, "loss": 1.0941, "step": 8766 }, { "epoch": 1.2411693919445035, "grad_norm": 8.174448107834301, "learning_rate": 1.661368675682557e-06, "loss": 1.0201, "step": 8767 }, { "epoch": 1.2413109648191407, "grad_norm": 8.129361434576788, "learning_rate": 1.6608287109220805e-06, "loss": 1.0281, "step": 8768 }, { "epoch": 1.241452537693778, "grad_norm": 9.397235593737086, "learning_rate": 1.6602887902762132e-06, "loss": 1.0707, "step": 8769 }, { "epoch": 1.2415941105684152, "grad_norm": 9.50299025204534, "learning_rate": 1.6597489137733377e-06, "loss": 0.8212, "step": 8770 }, { "epoch": 1.2417356834430524, "grad_norm": 8.399652891707602, "learning_rate": 1.6592090814418354e-06, "loss": 0.8712, "step": 8771 }, { "epoch": 1.2418772563176894, "grad_norm": 8.10934207152515, "learning_rate": 1.6586692933100846e-06, "loss": 1.0564, "step": 8772 }, { "epoch": 1.2420188291923266, "grad_norm": 8.815467280802887, "learning_rate": 1.6581295494064615e-06, "loss": 1.0332, "step": 8773 }, { "epoch": 1.2421604020669639, "grad_norm": 8.369202386367794, "learning_rate": 1.6575898497593417e-06, "loss": 0.9394, "step": 8774 }, { "epoch": 1.242301974941601, "grad_norm": 10.03880410659681, "learning_rate": 1.6570501943970945e-06, "loss": 0.9963, "step": 8775 }, { "epoch": 1.2424435478162383, "grad_norm": 8.561945319482518, "learning_rate": 1.65651058334809e-06, "loss": 0.9828, "step": 8776 }, { "epoch": 1.2425851206908756, "grad_norm": 9.144326424046318, "learning_rate": 1.655971016640695e-06, "loss": 0.959, "step": 8777 }, { "epoch": 1.2427266935655128, "grad_norm": 10.274340502139601, "learning_rate": 1.655431494303274e-06, "loss": 0.9216, "step": 8778 }, { "epoch": 1.24286826644015, "grad_norm": 7.7529411887628825, "learning_rate": 1.65489201636419e-06, "loss": 0.9195, "step": 8779 }, { "epoch": 1.2430098393147873, "grad_norm": 9.348006793860732, "learning_rate": 1.6543525828518025e-06, "loss": 1.0151, "step": 8780 }, { "epoch": 1.2431514121894245, "grad_norm": 10.93981943877628, "learning_rate": 1.6538131937944693e-06, "loss": 0.9956, "step": 8781 }, { "epoch": 1.2432929850640617, "grad_norm": 9.474341425491133, "learning_rate": 1.6532738492205456e-06, "loss": 1.0112, "step": 8782 }, { "epoch": 1.243434557938699, "grad_norm": 8.99863211515215, "learning_rate": 1.652734549158384e-06, "loss": 1.0026, "step": 8783 }, { "epoch": 1.2435761308133362, "grad_norm": 9.639224551974028, "learning_rate": 1.652195293636336e-06, "loss": 1.0355, "step": 8784 }, { "epoch": 1.2437177036879734, "grad_norm": 13.708068164861889, "learning_rate": 1.6516560826827494e-06, "loss": 1.0775, "step": 8785 }, { "epoch": 1.2438592765626106, "grad_norm": 9.340270143968251, "learning_rate": 1.6511169163259693e-06, "loss": 1.0214, "step": 8786 }, { "epoch": 1.2440008494372479, "grad_norm": 10.622720271856974, "learning_rate": 1.6505777945943402e-06, "loss": 1.0345, "step": 8787 }, { "epoch": 1.244142422311885, "grad_norm": 8.936779926876671, "learning_rate": 1.650038717516203e-06, "loss": 1.0001, "step": 8788 }, { "epoch": 1.2442839951865223, "grad_norm": 9.117679115253534, "learning_rate": 1.6494996851198965e-06, "loss": 0.9364, "step": 8789 }, { "epoch": 1.2444255680611596, "grad_norm": 9.641461847669087, "learning_rate": 1.6489606974337574e-06, "loss": 0.9957, "step": 8790 }, { "epoch": 1.2445671409357968, "grad_norm": 9.989819112520452, "learning_rate": 1.6484217544861204e-06, "loss": 1.0333, "step": 8791 }, { "epoch": 1.244708713810434, "grad_norm": 8.28714136696924, "learning_rate": 1.6478828563053162e-06, "loss": 1.1241, "step": 8792 }, { "epoch": 1.2448502866850713, "grad_norm": 9.942063921025937, "learning_rate": 1.6473440029196752e-06, "loss": 1.0479, "step": 8793 }, { "epoch": 1.2449918595597085, "grad_norm": 10.453668007196685, "learning_rate": 1.6468051943575242e-06, "loss": 1.099, "step": 8794 }, { "epoch": 1.2451334324343455, "grad_norm": 9.438532286517477, "learning_rate": 1.6462664306471882e-06, "loss": 1.0963, "step": 8795 }, { "epoch": 1.2452750053089827, "grad_norm": 10.006041037231935, "learning_rate": 1.6457277118169893e-06, "loss": 1.156, "step": 8796 }, { "epoch": 1.24541657818362, "grad_norm": 7.823881114391928, "learning_rate": 1.6451890378952472e-06, "loss": 0.9324, "step": 8797 }, { "epoch": 1.2455581510582572, "grad_norm": 8.085876169755334, "learning_rate": 1.6446504089102803e-06, "loss": 0.9447, "step": 8798 }, { "epoch": 1.2456997239328944, "grad_norm": 10.008657522506804, "learning_rate": 1.6441118248904038e-06, "loss": 0.9152, "step": 8799 }, { "epoch": 1.2458412968075316, "grad_norm": 9.523822429284934, "learning_rate": 1.6435732858639298e-06, "loss": 1.0813, "step": 8800 }, { "epoch": 1.2459828696821689, "grad_norm": 10.861174114680884, "learning_rate": 1.6430347918591693e-06, "loss": 1.0717, "step": 8801 }, { "epoch": 1.246124442556806, "grad_norm": 9.142495718692386, "learning_rate": 1.6424963429044315e-06, "loss": 1.0366, "step": 8802 }, { "epoch": 1.2462660154314433, "grad_norm": 7.8440636803873955, "learning_rate": 1.6419579390280217e-06, "loss": 0.9698, "step": 8803 }, { "epoch": 1.2464075883060806, "grad_norm": 9.227006945132715, "learning_rate": 1.6414195802582434e-06, "loss": 0.9781, "step": 8804 }, { "epoch": 1.2465491611807178, "grad_norm": 8.921703971653201, "learning_rate": 1.640881266623397e-06, "loss": 1.0156, "step": 8805 }, { "epoch": 1.246690734055355, "grad_norm": 8.020624754204245, "learning_rate": 1.6403429981517831e-06, "loss": 0.8817, "step": 8806 }, { "epoch": 1.2468323069299923, "grad_norm": 9.447501232654888, "learning_rate": 1.6398047748716955e-06, "loss": 1.0614, "step": 8807 }, { "epoch": 1.2469738798046295, "grad_norm": 8.844165805706501, "learning_rate": 1.6392665968114297e-06, "loss": 1.0103, "step": 8808 }, { "epoch": 1.2471154526792667, "grad_norm": 8.223987607479035, "learning_rate": 1.6387284639992773e-06, "loss": 0.9303, "step": 8809 }, { "epoch": 1.247257025553904, "grad_norm": 9.946945212233542, "learning_rate": 1.6381903764635274e-06, "loss": 1.0362, "step": 8810 }, { "epoch": 1.2473985984285412, "grad_norm": 10.501276574416218, "learning_rate": 1.6376523342324668e-06, "loss": 1.102, "step": 8811 }, { "epoch": 1.2475401713031784, "grad_norm": 10.67203656768253, "learning_rate": 1.6371143373343798e-06, "loss": 1.0517, "step": 8812 }, { "epoch": 1.2476817441778154, "grad_norm": 8.919885737623863, "learning_rate": 1.6365763857975486e-06, "loss": 1.1911, "step": 8813 }, { "epoch": 1.2478233170524526, "grad_norm": 10.028873340314664, "learning_rate": 1.6360384796502532e-06, "loss": 1.0533, "step": 8814 }, { "epoch": 1.2479648899270899, "grad_norm": 9.900718709115578, "learning_rate": 1.635500618920771e-06, "loss": 0.9456, "step": 8815 }, { "epoch": 1.248106462801727, "grad_norm": 8.200680904495876, "learning_rate": 1.634962803637377e-06, "loss": 0.9934, "step": 8816 }, { "epoch": 1.2482480356763643, "grad_norm": 10.359654119307251, "learning_rate": 1.6344250338283426e-06, "loss": 0.9905, "step": 8817 }, { "epoch": 1.2483896085510016, "grad_norm": 8.130479578791915, "learning_rate": 1.6338873095219391e-06, "loss": 1.034, "step": 8818 }, { "epoch": 1.2485311814256388, "grad_norm": 10.92531059050604, "learning_rate": 1.6333496307464335e-06, "loss": 1.1075, "step": 8819 }, { "epoch": 1.248672754300276, "grad_norm": 10.057485906831452, "learning_rate": 1.6328119975300921e-06, "loss": 0.8949, "step": 8820 }, { "epoch": 1.2488143271749133, "grad_norm": 9.401855658940047, "learning_rate": 1.6322744099011772e-06, "loss": 0.9819, "step": 8821 }, { "epoch": 1.2489559000495505, "grad_norm": 9.37270113416047, "learning_rate": 1.6317368678879497e-06, "loss": 1.0554, "step": 8822 }, { "epoch": 1.2490974729241877, "grad_norm": 8.010880700317948, "learning_rate": 1.6311993715186674e-06, "loss": 1.05, "step": 8823 }, { "epoch": 1.249239045798825, "grad_norm": 10.601424326660368, "learning_rate": 1.6306619208215862e-06, "loss": 1.0944, "step": 8824 }, { "epoch": 1.2493806186734622, "grad_norm": 9.764888644113336, "learning_rate": 1.6301245158249599e-06, "loss": 1.0645, "step": 8825 }, { "epoch": 1.2495221915480994, "grad_norm": 10.293905243806018, "learning_rate": 1.6295871565570392e-06, "loss": 1.0033, "step": 8826 }, { "epoch": 1.2496637644227366, "grad_norm": 6.776894854099877, "learning_rate": 1.6290498430460736e-06, "loss": 0.9759, "step": 8827 }, { "epoch": 1.2498053372973739, "grad_norm": 10.10317789070404, "learning_rate": 1.6285125753203073e-06, "loss": 1.0398, "step": 8828 }, { "epoch": 1.249946910172011, "grad_norm": 8.975861285438278, "learning_rate": 1.6279753534079853e-06, "loss": 1.0212, "step": 8829 }, { "epoch": 1.2500884830466483, "grad_norm": 8.748991117897805, "learning_rate": 1.6274381773373482e-06, "loss": 0.8805, "step": 8830 }, { "epoch": 1.2502300559212856, "grad_norm": 10.321195138732572, "learning_rate": 1.6269010471366359e-06, "loss": 1.1054, "step": 8831 }, { "epoch": 1.2503716287959228, "grad_norm": 9.972636741681859, "learning_rate": 1.6263639628340847e-06, "loss": 1.2093, "step": 8832 }, { "epoch": 1.25051320167056, "grad_norm": 10.681455096155739, "learning_rate": 1.6258269244579283e-06, "loss": 1.0249, "step": 8833 }, { "epoch": 1.2506547745451972, "grad_norm": 8.826105963808445, "learning_rate": 1.6252899320363992e-06, "loss": 1.033, "step": 8834 }, { "epoch": 1.2507963474198345, "grad_norm": 9.1868717731377, "learning_rate": 1.6247529855977256e-06, "loss": 1.0192, "step": 8835 }, { "epoch": 1.2509379202944717, "grad_norm": 9.380442349347582, "learning_rate": 1.6242160851701353e-06, "loss": 1.0257, "step": 8836 }, { "epoch": 1.251079493169109, "grad_norm": 9.528161320753174, "learning_rate": 1.6236792307818528e-06, "loss": 1.0383, "step": 8837 }, { "epoch": 1.251221066043746, "grad_norm": 9.610224095489519, "learning_rate": 1.6231424224610992e-06, "loss": 0.9312, "step": 8838 }, { "epoch": 1.2513626389183832, "grad_norm": 7.098932062344395, "learning_rate": 1.6226056602360945e-06, "loss": 1.0178, "step": 8839 }, { "epoch": 1.2515042117930204, "grad_norm": 10.09002257693348, "learning_rate": 1.6220689441350561e-06, "loss": 1.0993, "step": 8840 }, { "epoch": 1.2516457846676576, "grad_norm": 9.286028418624612, "learning_rate": 1.6215322741861988e-06, "loss": 1.137, "step": 8841 }, { "epoch": 1.2517873575422949, "grad_norm": 8.553356774353272, "learning_rate": 1.6209956504177345e-06, "loss": 1.0672, "step": 8842 }, { "epoch": 1.251928930416932, "grad_norm": 10.283585767156744, "learning_rate": 1.6204590728578739e-06, "loss": 1.0152, "step": 8843 }, { "epoch": 1.2520705032915693, "grad_norm": 10.633817716306444, "learning_rate": 1.6199225415348239e-06, "loss": 1.0604, "step": 8844 }, { "epoch": 1.2522120761662066, "grad_norm": 10.227642704600544, "learning_rate": 1.6193860564767893e-06, "loss": 0.946, "step": 8845 }, { "epoch": 1.2523536490408438, "grad_norm": 9.056571757811811, "learning_rate": 1.6188496177119737e-06, "loss": 1.0804, "step": 8846 }, { "epoch": 1.252495221915481, "grad_norm": 9.372055608593367, "learning_rate": 1.6183132252685758e-06, "loss": 1.1075, "step": 8847 }, { "epoch": 1.2526367947901182, "grad_norm": 10.99200808033653, "learning_rate": 1.6177768791747957e-06, "loss": 1.1096, "step": 8848 }, { "epoch": 1.2527783676647555, "grad_norm": 10.292672997411344, "learning_rate": 1.6172405794588264e-06, "loss": 0.9507, "step": 8849 }, { "epoch": 1.2529199405393927, "grad_norm": 10.609561969081534, "learning_rate": 1.616704326148862e-06, "loss": 1.0453, "step": 8850 }, { "epoch": 1.25306151341403, "grad_norm": 8.939633414926012, "learning_rate": 1.6161681192730918e-06, "loss": 0.903, "step": 8851 }, { "epoch": 1.2532030862886672, "grad_norm": 8.703174967596686, "learning_rate": 1.615631958859705e-06, "loss": 0.9911, "step": 8852 }, { "epoch": 1.2533446591633042, "grad_norm": 10.71578751710841, "learning_rate": 1.6150958449368862e-06, "loss": 1.0148, "step": 8853 }, { "epoch": 1.2534862320379414, "grad_norm": 9.731651059744838, "learning_rate": 1.6145597775328192e-06, "loss": 0.9567, "step": 8854 }, { "epoch": 1.2536278049125786, "grad_norm": 10.028428295823462, "learning_rate": 1.614023756675685e-06, "loss": 1.1196, "step": 8855 }, { "epoch": 1.2537693777872159, "grad_norm": 10.95058055714738, "learning_rate": 1.613487782393661e-06, "loss": 1.0311, "step": 8856 }, { "epoch": 1.253910950661853, "grad_norm": 8.87108944985585, "learning_rate": 1.612951854714923e-06, "loss": 1.1054, "step": 8857 }, { "epoch": 1.2540525235364903, "grad_norm": 9.353306606886244, "learning_rate": 1.6124159736676452e-06, "loss": 0.9048, "step": 8858 }, { "epoch": 1.2541940964111276, "grad_norm": 8.260532476151328, "learning_rate": 1.611880139279998e-06, "loss": 0.9493, "step": 8859 }, { "epoch": 1.2543356692857648, "grad_norm": 9.14313408738718, "learning_rate": 1.6113443515801492e-06, "loss": 1.1572, "step": 8860 }, { "epoch": 1.254477242160402, "grad_norm": 7.534986653256002, "learning_rate": 1.610808610596265e-06, "loss": 1.0231, "step": 8861 }, { "epoch": 1.2546188150350392, "grad_norm": 9.239922885325278, "learning_rate": 1.6102729163565095e-06, "loss": 0.9575, "step": 8862 }, { "epoch": 1.2547603879096765, "grad_norm": 8.76466100495269, "learning_rate": 1.6097372688890433e-06, "loss": 1.0437, "step": 8863 }, { "epoch": 1.2549019607843137, "grad_norm": 9.810208387115576, "learning_rate": 1.6092016682220252e-06, "loss": 0.9984, "step": 8864 }, { "epoch": 1.255043533658951, "grad_norm": 7.799708522338977, "learning_rate": 1.6086661143836107e-06, "loss": 0.9667, "step": 8865 }, { "epoch": 1.2551851065335882, "grad_norm": 6.983371329678369, "learning_rate": 1.6081306074019543e-06, "loss": 0.9403, "step": 8866 }, { "epoch": 1.2553266794082254, "grad_norm": 10.813915005744443, "learning_rate": 1.607595147305207e-06, "loss": 1.058, "step": 8867 }, { "epoch": 1.2554682522828626, "grad_norm": 7.959432502721589, "learning_rate": 1.6070597341215171e-06, "loss": 0.9496, "step": 8868 }, { "epoch": 1.2556098251574999, "grad_norm": 9.12878464898151, "learning_rate": 1.6065243678790321e-06, "loss": 0.9087, "step": 8869 }, { "epoch": 1.255751398032137, "grad_norm": 9.716475536457956, "learning_rate": 1.6059890486058937e-06, "loss": 1.1533, "step": 8870 }, { "epoch": 1.2558929709067743, "grad_norm": 8.632725890402645, "learning_rate": 1.605453776330245e-06, "loss": 1.0051, "step": 8871 }, { "epoch": 1.2560345437814115, "grad_norm": 7.204282853745235, "learning_rate": 1.604918551080224e-06, "loss": 0.9868, "step": 8872 }, { "epoch": 1.2561761166560488, "grad_norm": 9.815623569822405, "learning_rate": 1.6043833728839675e-06, "loss": 1.1004, "step": 8873 }, { "epoch": 1.256317689530686, "grad_norm": 9.771339734150855, "learning_rate": 1.6038482417696095e-06, "loss": 1.0565, "step": 8874 }, { "epoch": 1.2564592624053232, "grad_norm": 8.332001490658294, "learning_rate": 1.60331315776528e-06, "loss": 0.9647, "step": 8875 }, { "epoch": 1.2566008352799605, "grad_norm": 10.994647024056468, "learning_rate": 1.6027781208991102e-06, "loss": 1.0955, "step": 8876 }, { "epoch": 1.2567424081545977, "grad_norm": 10.306529403239665, "learning_rate": 1.6022431311992257e-06, "loss": 0.9632, "step": 8877 }, { "epoch": 1.256883981029235, "grad_norm": 8.436610648296924, "learning_rate": 1.6017081886937502e-06, "loss": 0.9575, "step": 8878 }, { "epoch": 1.257025553903872, "grad_norm": 10.805594118687436, "learning_rate": 1.6011732934108055e-06, "loss": 1.0269, "step": 8879 }, { "epoch": 1.2571671267785092, "grad_norm": 9.445211127583592, "learning_rate": 1.6006384453785115e-06, "loss": 0.9981, "step": 8880 }, { "epoch": 1.2573086996531464, "grad_norm": 7.333696471962218, "learning_rate": 1.6001036446249824e-06, "loss": 0.9285, "step": 8881 }, { "epoch": 1.2574502725277836, "grad_norm": 9.89143298375761, "learning_rate": 1.5995688911783341e-06, "loss": 0.9571, "step": 8882 }, { "epoch": 1.2575918454024209, "grad_norm": 9.126702136836244, "learning_rate": 1.5990341850666779e-06, "loss": 0.9359, "step": 8883 }, { "epoch": 1.257733418277058, "grad_norm": 9.64327140604497, "learning_rate": 1.598499526318123e-06, "loss": 1.0468, "step": 8884 }, { "epoch": 1.2578749911516953, "grad_norm": 8.31581542934202, "learning_rate": 1.5979649149607755e-06, "loss": 1.0708, "step": 8885 }, { "epoch": 1.2580165640263326, "grad_norm": 9.6101466915752, "learning_rate": 1.59743035102274e-06, "loss": 1.0358, "step": 8886 }, { "epoch": 1.2581581369009698, "grad_norm": 9.610192737055765, "learning_rate": 1.5968958345321178e-06, "loss": 1.0129, "step": 8887 }, { "epoch": 1.258299709775607, "grad_norm": 7.921932062248259, "learning_rate": 1.5963613655170082e-06, "loss": 0.9886, "step": 8888 }, { "epoch": 1.2584412826502442, "grad_norm": 8.226647357557358, "learning_rate": 1.595826944005508e-06, "loss": 1.0401, "step": 8889 }, { "epoch": 1.2585828555248815, "grad_norm": 10.61108600376037, "learning_rate": 1.5952925700257116e-06, "loss": 1.0665, "step": 8890 }, { "epoch": 1.2587244283995187, "grad_norm": 11.388309949927821, "learning_rate": 1.5947582436057097e-06, "loss": 0.9522, "step": 8891 }, { "epoch": 1.258866001274156, "grad_norm": 10.65841375404621, "learning_rate": 1.5942239647735918e-06, "loss": 1.0386, "step": 8892 }, { "epoch": 1.2590075741487932, "grad_norm": 10.449691505076574, "learning_rate": 1.5936897335574453e-06, "loss": 0.9493, "step": 8893 }, { "epoch": 1.2591491470234304, "grad_norm": 12.93953434526396, "learning_rate": 1.5931555499853529e-06, "loss": 1.0549, "step": 8894 }, { "epoch": 1.2592907198980674, "grad_norm": 8.036450791288452, "learning_rate": 1.5926214140853976e-06, "loss": 0.9518, "step": 8895 }, { "epoch": 1.2594322927727046, "grad_norm": 9.039155344704676, "learning_rate": 1.592087325885658e-06, "loss": 0.9799, "step": 8896 }, { "epoch": 1.2595738656473419, "grad_norm": 10.216808636307988, "learning_rate": 1.5915532854142105e-06, "loss": 1.0795, "step": 8897 }, { "epoch": 1.259715438521979, "grad_norm": 9.087303996956713, "learning_rate": 1.5910192926991291e-06, "loss": 0.9737, "step": 8898 }, { "epoch": 1.2598570113966163, "grad_norm": 8.059129113791013, "learning_rate": 1.5904853477684863e-06, "loss": 0.8801, "step": 8899 }, { "epoch": 1.2599985842712536, "grad_norm": 10.930110506020885, "learning_rate": 1.5899514506503499e-06, "loss": 1.0074, "step": 8900 }, { "epoch": 1.2601401571458908, "grad_norm": 9.346966142123962, "learning_rate": 1.5894176013727891e-06, "loss": 1.03, "step": 8901 }, { "epoch": 1.260281730020528, "grad_norm": 9.829373569960195, "learning_rate": 1.5888837999638646e-06, "loss": 1.0743, "step": 8902 }, { "epoch": 1.2604233028951652, "grad_norm": 10.045950032751849, "learning_rate": 1.5883500464516394e-06, "loss": 0.9407, "step": 8903 }, { "epoch": 1.2605648757698025, "grad_norm": 10.372985276542297, "learning_rate": 1.5878163408641717e-06, "loss": 1.017, "step": 8904 }, { "epoch": 1.2607064486444397, "grad_norm": 9.615081432551149, "learning_rate": 1.5872826832295197e-06, "loss": 0.9163, "step": 8905 }, { "epoch": 1.260848021519077, "grad_norm": 8.766473576071563, "learning_rate": 1.5867490735757366e-06, "loss": 0.9634, "step": 8906 }, { "epoch": 1.2609895943937142, "grad_norm": 8.239803139270123, "learning_rate": 1.5862155119308737e-06, "loss": 0.9556, "step": 8907 }, { "epoch": 1.2611311672683514, "grad_norm": 9.102179850978048, "learning_rate": 1.5856819983229796e-06, "loss": 0.9512, "step": 8908 }, { "epoch": 1.2612727401429886, "grad_norm": 9.500906650044067, "learning_rate": 1.5851485327801014e-06, "loss": 1.0471, "step": 8909 }, { "epoch": 1.2614143130176259, "grad_norm": 9.06357205068019, "learning_rate": 1.5846151153302824e-06, "loss": 1.0671, "step": 8910 }, { "epoch": 1.261555885892263, "grad_norm": 6.916042927879601, "learning_rate": 1.584081746001565e-06, "loss": 0.8267, "step": 8911 }, { "epoch": 1.2616974587669003, "grad_norm": 8.92030440901608, "learning_rate": 1.583548424821987e-06, "loss": 0.9514, "step": 8912 }, { "epoch": 1.2618390316415375, "grad_norm": 8.638272894853289, "learning_rate": 1.5830151518195846e-06, "loss": 0.9517, "step": 8913 }, { "epoch": 1.2619806045161748, "grad_norm": 9.910499599885766, "learning_rate": 1.5824819270223922e-06, "loss": 1.0185, "step": 8914 }, { "epoch": 1.262122177390812, "grad_norm": 9.005139049280416, "learning_rate": 1.5819487504584408e-06, "loss": 0.9352, "step": 8915 }, { "epoch": 1.2622637502654492, "grad_norm": 8.489302242438807, "learning_rate": 1.5814156221557587e-06, "loss": 0.9836, "step": 8916 }, { "epoch": 1.2624053231400865, "grad_norm": 10.278624225391104, "learning_rate": 1.5808825421423729e-06, "loss": 1.0421, "step": 8917 }, { "epoch": 1.2625468960147237, "grad_norm": 9.670244366024718, "learning_rate": 1.5803495104463063e-06, "loss": 1.0334, "step": 8918 }, { "epoch": 1.262688468889361, "grad_norm": 8.751561924628396, "learning_rate": 1.57981652709558e-06, "loss": 1.1205, "step": 8919 }, { "epoch": 1.262830041763998, "grad_norm": 8.895334508223037, "learning_rate": 1.5792835921182128e-06, "loss": 0.9956, "step": 8920 }, { "epoch": 1.2629716146386352, "grad_norm": 9.795216494509777, "learning_rate": 1.5787507055422201e-06, "loss": 1.0085, "step": 8921 }, { "epoch": 1.2631131875132724, "grad_norm": 9.059920581865331, "learning_rate": 1.5782178673956179e-06, "loss": 1.0175, "step": 8922 }, { "epoch": 1.2632547603879096, "grad_norm": 10.968549677657936, "learning_rate": 1.5776850777064137e-06, "loss": 1.0882, "step": 8923 }, { "epoch": 1.2633963332625469, "grad_norm": 9.804524867924105, "learning_rate": 1.5771523365026175e-06, "loss": 1.0481, "step": 8924 }, { "epoch": 1.263537906137184, "grad_norm": 9.703983582183207, "learning_rate": 1.5766196438122344e-06, "loss": 1.0085, "step": 8925 }, { "epoch": 1.2636794790118213, "grad_norm": 8.909520455587671, "learning_rate": 1.5760869996632685e-06, "loss": 0.937, "step": 8926 }, { "epoch": 1.2638210518864585, "grad_norm": 9.392635999704726, "learning_rate": 1.5755544040837195e-06, "loss": 1.0018, "step": 8927 }, { "epoch": 1.2639626247610958, "grad_norm": 8.96889120914533, "learning_rate": 1.575021857101587e-06, "loss": 1.0181, "step": 8928 }, { "epoch": 1.264104197635733, "grad_norm": 8.637901056145626, "learning_rate": 1.5744893587448654e-06, "loss": 0.9094, "step": 8929 }, { "epoch": 1.2642457705103702, "grad_norm": 11.32488167471471, "learning_rate": 1.5739569090415482e-06, "loss": 1.0567, "step": 8930 }, { "epoch": 1.2643873433850075, "grad_norm": 10.094752589647326, "learning_rate": 1.573424508019626e-06, "loss": 0.9937, "step": 8931 }, { "epoch": 1.2645289162596447, "grad_norm": 10.092610486786274, "learning_rate": 1.5728921557070864e-06, "loss": 0.9859, "step": 8932 }, { "epoch": 1.264670489134282, "grad_norm": 12.360682364087323, "learning_rate": 1.5723598521319152e-06, "loss": 1.117, "step": 8933 }, { "epoch": 1.2648120620089192, "grad_norm": 8.71087487715719, "learning_rate": 1.5718275973220944e-06, "loss": 0.8706, "step": 8934 }, { "epoch": 1.2649536348835564, "grad_norm": 8.823171228356749, "learning_rate": 1.571295391305605e-06, "loss": 0.9591, "step": 8935 }, { "epoch": 1.2650952077581934, "grad_norm": 11.222777257440208, "learning_rate": 1.5707632341104246e-06, "loss": 1.1001, "step": 8936 }, { "epoch": 1.2652367806328306, "grad_norm": 8.078214722361986, "learning_rate": 1.5702311257645274e-06, "loss": 0.9835, "step": 8937 }, { "epoch": 1.2653783535074679, "grad_norm": 8.990455228590216, "learning_rate": 1.5696990662958872e-06, "loss": 1.0003, "step": 8938 }, { "epoch": 1.265519926382105, "grad_norm": 9.998543442505998, "learning_rate": 1.5691670557324734e-06, "loss": 1.0539, "step": 8939 }, { "epoch": 1.2656614992567423, "grad_norm": 9.111586235291005, "learning_rate": 1.5686350941022533e-06, "loss": 1.0081, "step": 8940 }, { "epoch": 1.2658030721313795, "grad_norm": 9.28938076029118, "learning_rate": 1.5681031814331918e-06, "loss": 1.1441, "step": 8941 }, { "epoch": 1.2659446450060168, "grad_norm": 9.675165014313668, "learning_rate": 1.5675713177532514e-06, "loss": 1.0411, "step": 8942 }, { "epoch": 1.266086217880654, "grad_norm": 8.630658933164112, "learning_rate": 1.5670395030903918e-06, "loss": 0.8984, "step": 8943 }, { "epoch": 1.2662277907552912, "grad_norm": 9.632667558976475, "learning_rate": 1.5665077374725696e-06, "loss": 0.976, "step": 8944 }, { "epoch": 1.2663693636299285, "grad_norm": 10.972935760946894, "learning_rate": 1.5659760209277395e-06, "loss": 1.0773, "step": 8945 }, { "epoch": 1.2665109365045657, "grad_norm": 9.422100108732891, "learning_rate": 1.5654443534838537e-06, "loss": 1.0603, "step": 8946 }, { "epoch": 1.266652509379203, "grad_norm": 8.830348776196631, "learning_rate": 1.564912735168861e-06, "loss": 0.8976, "step": 8947 }, { "epoch": 1.2667940822538402, "grad_norm": 9.602643888706918, "learning_rate": 1.564381166010709e-06, "loss": 1.0034, "step": 8948 }, { "epoch": 1.2669356551284774, "grad_norm": 10.02285672153162, "learning_rate": 1.5638496460373415e-06, "loss": 1.069, "step": 8949 }, { "epoch": 1.2670772280031146, "grad_norm": 8.561355848511598, "learning_rate": 1.563318175276699e-06, "loss": 0.9194, "step": 8950 }, { "epoch": 1.2672188008777518, "grad_norm": 9.900950268693633, "learning_rate": 1.5627867537567225e-06, "loss": 1.1824, "step": 8951 }, { "epoch": 1.267360373752389, "grad_norm": 9.879283121024095, "learning_rate": 1.5622553815053476e-06, "loss": 1.0229, "step": 8952 }, { "epoch": 1.2675019466270263, "grad_norm": 10.036782328601072, "learning_rate": 1.5617240585505084e-06, "loss": 1.0283, "step": 8953 }, { "epoch": 1.2676435195016635, "grad_norm": 9.577444329275483, "learning_rate": 1.5611927849201364e-06, "loss": 0.9504, "step": 8954 }, { "epoch": 1.2677850923763008, "grad_norm": 10.333198259352708, "learning_rate": 1.5606615606421588e-06, "loss": 1.069, "step": 8955 }, { "epoch": 1.267926665250938, "grad_norm": 9.47535439988489, "learning_rate": 1.5601303857445018e-06, "loss": 0.9476, "step": 8956 }, { "epoch": 1.2680682381255752, "grad_norm": 8.575246859631115, "learning_rate": 1.5595992602550903e-06, "loss": 1.028, "step": 8957 }, { "epoch": 1.2682098110002125, "grad_norm": 9.74507877668362, "learning_rate": 1.5590681842018446e-06, "loss": 0.9363, "step": 8958 }, { "epoch": 1.2683513838748497, "grad_norm": 10.355421229497718, "learning_rate": 1.5585371576126828e-06, "loss": 1.1036, "step": 8959 }, { "epoch": 1.268492956749487, "grad_norm": 10.159400803325815, "learning_rate": 1.5580061805155205e-06, "loss": 1.0512, "step": 8960 }, { "epoch": 1.2686345296241242, "grad_norm": 8.849039282081435, "learning_rate": 1.5574752529382714e-06, "loss": 0.9858, "step": 8961 }, { "epoch": 1.2687761024987612, "grad_norm": 8.416468652592851, "learning_rate": 1.5569443749088449e-06, "loss": 1.0487, "step": 8962 }, { "epoch": 1.2689176753733984, "grad_norm": 10.712452097622078, "learning_rate": 1.5564135464551496e-06, "loss": 1.0264, "step": 8963 }, { "epoch": 1.2690592482480356, "grad_norm": 7.79337453909043, "learning_rate": 1.5558827676050914e-06, "loss": 0.9627, "step": 8964 }, { "epoch": 1.2692008211226729, "grad_norm": 9.86887604317071, "learning_rate": 1.555352038386571e-06, "loss": 1.0898, "step": 8965 }, { "epoch": 1.26934239399731, "grad_norm": 9.015212871050332, "learning_rate": 1.55482135882749e-06, "loss": 1.0287, "step": 8966 }, { "epoch": 1.2694839668719473, "grad_norm": 8.942352898091414, "learning_rate": 1.5542907289557457e-06, "loss": 0.9464, "step": 8967 }, { "epoch": 1.2696255397465845, "grad_norm": 11.209024022788828, "learning_rate": 1.5537601487992325e-06, "loss": 1.071, "step": 8968 }, { "epoch": 1.2697671126212218, "grad_norm": 7.287650900794814, "learning_rate": 1.5532296183858424e-06, "loss": 0.8518, "step": 8969 }, { "epoch": 1.269908685495859, "grad_norm": 8.435972174318673, "learning_rate": 1.5526991377434655e-06, "loss": 1.0161, "step": 8970 }, { "epoch": 1.2700502583704962, "grad_norm": 9.424664592087076, "learning_rate": 1.5521687068999885e-06, "loss": 1.0293, "step": 8971 }, { "epoch": 1.2701918312451335, "grad_norm": 9.644297489193217, "learning_rate": 1.5516383258832956e-06, "loss": 1.0086, "step": 8972 }, { "epoch": 1.2703334041197707, "grad_norm": 11.233369530261967, "learning_rate": 1.551107994721269e-06, "loss": 1.0038, "step": 8973 }, { "epoch": 1.270474976994408, "grad_norm": 10.710991278470937, "learning_rate": 1.5505777134417876e-06, "loss": 1.0272, "step": 8974 }, { "epoch": 1.2706165498690452, "grad_norm": 9.004755671023185, "learning_rate": 1.550047482072729e-06, "loss": 0.9832, "step": 8975 }, { "epoch": 1.2707581227436824, "grad_norm": 8.989382096591267, "learning_rate": 1.549517300641965e-06, "loss": 1.0583, "step": 8976 }, { "epoch": 1.2708996956183194, "grad_norm": 8.85155646651383, "learning_rate": 1.5489871691773677e-06, "loss": 1.0322, "step": 8977 }, { "epoch": 1.2710412684929566, "grad_norm": 7.725302690915079, "learning_rate": 1.5484570877068055e-06, "loss": 0.9349, "step": 8978 }, { "epoch": 1.2711828413675939, "grad_norm": 10.013182153216633, "learning_rate": 1.547927056258145e-06, "loss": 1.0468, "step": 8979 }, { "epoch": 1.271324414242231, "grad_norm": 10.663231137465587, "learning_rate": 1.5473970748592493e-06, "loss": 1.0724, "step": 8980 }, { "epoch": 1.2714659871168683, "grad_norm": 7.466476030413822, "learning_rate": 1.5468671435379789e-06, "loss": 0.9101, "step": 8981 }, { "epoch": 1.2716075599915055, "grad_norm": 11.946372045676082, "learning_rate": 1.5463372623221923e-06, "loss": 0.9707, "step": 8982 }, { "epoch": 1.2717491328661428, "grad_norm": 9.396793110639404, "learning_rate": 1.5458074312397447e-06, "loss": 1.0128, "step": 8983 }, { "epoch": 1.27189070574078, "grad_norm": 7.8306015878748, "learning_rate": 1.5452776503184891e-06, "loss": 0.9315, "step": 8984 }, { "epoch": 1.2720322786154172, "grad_norm": 8.628946921588168, "learning_rate": 1.5447479195862752e-06, "loss": 1.1318, "step": 8985 }, { "epoch": 1.2721738514900545, "grad_norm": 9.458981753626004, "learning_rate": 1.5442182390709517e-06, "loss": 1.0379, "step": 8986 }, { "epoch": 1.2723154243646917, "grad_norm": 9.285196512060374, "learning_rate": 1.5436886088003622e-06, "loss": 0.9821, "step": 8987 }, { "epoch": 1.272456997239329, "grad_norm": 8.744887684122087, "learning_rate": 1.5431590288023496e-06, "loss": 1.0274, "step": 8988 }, { "epoch": 1.2725985701139662, "grad_norm": 10.172601770551454, "learning_rate": 1.542629499104753e-06, "loss": 1.0418, "step": 8989 }, { "epoch": 1.2727401429886034, "grad_norm": 7.672592471383685, "learning_rate": 1.5421000197354099e-06, "loss": 0.9632, "step": 8990 }, { "epoch": 1.2728817158632406, "grad_norm": 9.736906601107531, "learning_rate": 1.5415705907221545e-06, "loss": 1.0115, "step": 8991 }, { "epoch": 1.2730232887378778, "grad_norm": 9.286281878552346, "learning_rate": 1.5410412120928189e-06, "loss": 0.9723, "step": 8992 }, { "epoch": 1.273164861612515, "grad_norm": 8.209689523678426, "learning_rate": 1.5405118838752314e-06, "loss": 0.9597, "step": 8993 }, { "epoch": 1.2733064344871523, "grad_norm": 8.07410299793057, "learning_rate": 1.539982606097219e-06, "loss": 0.9347, "step": 8994 }, { "epoch": 1.2734480073617895, "grad_norm": 10.71467748743134, "learning_rate": 1.5394533787866045e-06, "loss": 1.0568, "step": 8995 }, { "epoch": 1.2735895802364268, "grad_norm": 10.87389113812929, "learning_rate": 1.5389242019712107e-06, "loss": 0.9972, "step": 8996 }, { "epoch": 1.273731153111064, "grad_norm": 8.205745024299048, "learning_rate": 1.5383950756788545e-06, "loss": 0.9743, "step": 8997 }, { "epoch": 1.2738727259857012, "grad_norm": 8.671661649261768, "learning_rate": 1.5378659999373524e-06, "loss": 1.0148, "step": 8998 }, { "epoch": 1.2740142988603385, "grad_norm": 9.017629099853217, "learning_rate": 1.5373369747745171e-06, "loss": 1.0632, "step": 8999 }, { "epoch": 1.2741558717349757, "grad_norm": 8.311830880905449, "learning_rate": 1.5368080002181591e-06, "loss": 0.8972, "step": 9000 }, { "epoch": 1.274297444609613, "grad_norm": 7.4279652232895135, "learning_rate": 1.536279076296086e-06, "loss": 0.8769, "step": 9001 }, { "epoch": 1.2744390174842501, "grad_norm": 8.925758598849814, "learning_rate": 1.5357502030361036e-06, "loss": 1.0983, "step": 9002 }, { "epoch": 1.2745805903588872, "grad_norm": 10.569272370064894, "learning_rate": 1.535221380466014e-06, "loss": 0.9836, "step": 9003 }, { "epoch": 1.2747221632335244, "grad_norm": 8.647961997147164, "learning_rate": 1.5346926086136171e-06, "loss": 1.0015, "step": 9004 }, { "epoch": 1.2748637361081616, "grad_norm": 8.75046777837231, "learning_rate": 1.5341638875067102e-06, "loss": 1.1112, "step": 9005 }, { "epoch": 1.2750053089827988, "grad_norm": 9.35690962938619, "learning_rate": 1.5336352171730876e-06, "loss": 1.0989, "step": 9006 }, { "epoch": 1.275146881857436, "grad_norm": 10.295016547954464, "learning_rate": 1.5331065976405412e-06, "loss": 1.0584, "step": 9007 }, { "epoch": 1.2752884547320733, "grad_norm": 9.868998575009277, "learning_rate": 1.53257802893686e-06, "loss": 1.0431, "step": 9008 }, { "epoch": 1.2754300276067105, "grad_norm": 10.371582571460399, "learning_rate": 1.5320495110898304e-06, "loss": 1.0983, "step": 9009 }, { "epoch": 1.2755716004813478, "grad_norm": 8.200415754245663, "learning_rate": 1.531521044127236e-06, "loss": 0.9556, "step": 9010 }, { "epoch": 1.275713173355985, "grad_norm": 8.101055839388508, "learning_rate": 1.5309926280768583e-06, "loss": 1.0405, "step": 9011 }, { "epoch": 1.2758547462306222, "grad_norm": 9.868137341085172, "learning_rate": 1.5304642629664756e-06, "loss": 0.9731, "step": 9012 }, { "epoch": 1.2759963191052595, "grad_norm": 12.58910562527521, "learning_rate": 1.5299359488238635e-06, "loss": 1.077, "step": 9013 }, { "epoch": 1.2761378919798967, "grad_norm": 9.762036840805816, "learning_rate": 1.5294076856767956e-06, "loss": 0.9745, "step": 9014 }, { "epoch": 1.276279464854534, "grad_norm": 10.59030446422668, "learning_rate": 1.5288794735530416e-06, "loss": 1.0658, "step": 9015 }, { "epoch": 1.2764210377291711, "grad_norm": 9.556955762290773, "learning_rate": 1.52835131248037e-06, "loss": 0.9936, "step": 9016 }, { "epoch": 1.2765626106038084, "grad_norm": 8.577999148809276, "learning_rate": 1.5278232024865458e-06, "loss": 1.0346, "step": 9017 }, { "epoch": 1.2767041834784456, "grad_norm": 9.347747253918696, "learning_rate": 1.5272951435993303e-06, "loss": 0.9903, "step": 9018 }, { "epoch": 1.2768457563530826, "grad_norm": 9.956080694010296, "learning_rate": 1.5267671358464837e-06, "loss": 1.0669, "step": 9019 }, { "epoch": 1.2769873292277198, "grad_norm": 9.59046944560128, "learning_rate": 1.5262391792557635e-06, "loss": 1.0304, "step": 9020 }, { "epoch": 1.277128902102357, "grad_norm": 8.4267876333093, "learning_rate": 1.5257112738549233e-06, "loss": 1.1083, "step": 9021 }, { "epoch": 1.2772704749769943, "grad_norm": 9.954272049901842, "learning_rate": 1.525183419671715e-06, "loss": 0.983, "step": 9022 }, { "epoch": 1.2774120478516315, "grad_norm": 9.258147079924559, "learning_rate": 1.5246556167338875e-06, "loss": 0.8739, "step": 9023 }, { "epoch": 1.2775536207262688, "grad_norm": 8.882453904508745, "learning_rate": 1.5241278650691866e-06, "loss": 0.8765, "step": 9024 }, { "epoch": 1.277695193600906, "grad_norm": 8.656643965256452, "learning_rate": 1.5236001647053564e-06, "loss": 1.0235, "step": 9025 }, { "epoch": 1.2778367664755432, "grad_norm": 9.398009686075575, "learning_rate": 1.5230725156701375e-06, "loss": 1.0261, "step": 9026 }, { "epoch": 1.2779783393501805, "grad_norm": 9.527575175284797, "learning_rate": 1.5225449179912683e-06, "loss": 0.9796, "step": 9027 }, { "epoch": 1.2781199122248177, "grad_norm": 8.015071024350977, "learning_rate": 1.5220173716964847e-06, "loss": 0.959, "step": 9028 }, { "epoch": 1.278261485099455, "grad_norm": 9.707914635357215, "learning_rate": 1.521489876813518e-06, "loss": 0.9286, "step": 9029 }, { "epoch": 1.2784030579740921, "grad_norm": 7.930372747870432, "learning_rate": 1.5209624333700985e-06, "loss": 1.0793, "step": 9030 }, { "epoch": 1.2785446308487294, "grad_norm": 10.72075240668897, "learning_rate": 1.520435041393954e-06, "loss": 0.9781, "step": 9031 }, { "epoch": 1.2786862037233666, "grad_norm": 9.879270378693562, "learning_rate": 1.519907700912809e-06, "loss": 1.0346, "step": 9032 }, { "epoch": 1.2788277765980038, "grad_norm": 8.187220910826566, "learning_rate": 1.5193804119543853e-06, "loss": 0.9894, "step": 9033 }, { "epoch": 1.278969349472641, "grad_norm": 8.226898215490872, "learning_rate": 1.5188531745464023e-06, "loss": 0.9669, "step": 9034 }, { "epoch": 1.2791109223472783, "grad_norm": 9.698977654449148, "learning_rate": 1.5183259887165763e-06, "loss": 1.0364, "step": 9035 }, { "epoch": 1.2792524952219155, "grad_norm": 8.499270688352427, "learning_rate": 1.5177988544926208e-06, "loss": 0.9655, "step": 9036 }, { "epoch": 1.2793940680965528, "grad_norm": 9.949517427176627, "learning_rate": 1.5172717719022475e-06, "loss": 0.9626, "step": 9037 }, { "epoch": 1.27953564097119, "grad_norm": 9.643827972789126, "learning_rate": 1.5167447409731645e-06, "loss": 0.9619, "step": 9038 }, { "epoch": 1.2796772138458272, "grad_norm": 10.26041497296676, "learning_rate": 1.5162177617330775e-06, "loss": 1.0353, "step": 9039 }, { "epoch": 1.2798187867204645, "grad_norm": 8.78889322753662, "learning_rate": 1.515690834209689e-06, "loss": 0.9741, "step": 9040 }, { "epoch": 1.2799603595951017, "grad_norm": 9.821980361300538, "learning_rate": 1.5151639584306993e-06, "loss": 1.0082, "step": 9041 }, { "epoch": 1.280101932469739, "grad_norm": 10.853516152206664, "learning_rate": 1.5146371344238063e-06, "loss": 1.0202, "step": 9042 }, { "epoch": 1.2802435053443761, "grad_norm": 10.796212514158793, "learning_rate": 1.5141103622167042e-06, "loss": 1.1095, "step": 9043 }, { "epoch": 1.2803850782190134, "grad_norm": 8.998704181209535, "learning_rate": 1.513583641837085e-06, "loss": 1.0678, "step": 9044 }, { "epoch": 1.2805266510936504, "grad_norm": 9.432769195977915, "learning_rate": 1.5130569733126382e-06, "loss": 1.0856, "step": 9045 }, { "epoch": 1.2806682239682876, "grad_norm": 9.88705674056799, "learning_rate": 1.5125303566710508e-06, "loss": 1.0256, "step": 9046 }, { "epoch": 1.2808097968429248, "grad_norm": 10.347724450629343, "learning_rate": 1.5120037919400054e-06, "loss": 0.9876, "step": 9047 }, { "epoch": 1.280951369717562, "grad_norm": 9.573682442394489, "learning_rate": 1.5114772791471848e-06, "loss": 1.145, "step": 9048 }, { "epoch": 1.2810929425921993, "grad_norm": 9.603942824352792, "learning_rate": 1.5109508183202675e-06, "loss": 1.1661, "step": 9049 }, { "epoch": 1.2812345154668365, "grad_norm": 10.43959508644122, "learning_rate": 1.5104244094869272e-06, "loss": 0.986, "step": 9050 }, { "epoch": 1.2813760883414738, "grad_norm": 9.067163569727597, "learning_rate": 1.509898052674838e-06, "loss": 1.0031, "step": 9051 }, { "epoch": 1.281517661216111, "grad_norm": 9.356119904922819, "learning_rate": 1.5093717479116696e-06, "loss": 1.1085, "step": 9052 }, { "epoch": 1.2816592340907482, "grad_norm": 8.994636632970767, "learning_rate": 1.508845495225089e-06, "loss": 1.0897, "step": 9053 }, { "epoch": 1.2818008069653855, "grad_norm": 7.887948184572462, "learning_rate": 1.5083192946427626e-06, "loss": 1.0325, "step": 9054 }, { "epoch": 1.2819423798400227, "grad_norm": 9.546357136118075, "learning_rate": 1.5077931461923518e-06, "loss": 1.0165, "step": 9055 }, { "epoch": 1.28208395271466, "grad_norm": 9.825534594408488, "learning_rate": 1.5072670499015151e-06, "loss": 0.9491, "step": 9056 }, { "epoch": 1.2822255255892971, "grad_norm": 10.306595655281372, "learning_rate": 1.5067410057979094e-06, "loss": 1.0766, "step": 9057 }, { "epoch": 1.2823670984639344, "grad_norm": 8.409972093606159, "learning_rate": 1.5062150139091882e-06, "loss": 0.9335, "step": 9058 }, { "epoch": 1.2825086713385716, "grad_norm": 10.138000054496965, "learning_rate": 1.505689074263003e-06, "loss": 1.1344, "step": 9059 }, { "epoch": 1.2826502442132086, "grad_norm": 9.880090487911353, "learning_rate": 1.505163186887002e-06, "loss": 0.9886, "step": 9060 }, { "epoch": 1.2827918170878458, "grad_norm": 8.738395188824674, "learning_rate": 1.5046373518088303e-06, "loss": 1.0736, "step": 9061 }, { "epoch": 1.282933389962483, "grad_norm": 8.123808788128523, "learning_rate": 1.5041115690561308e-06, "loss": 0.9611, "step": 9062 }, { "epoch": 1.2830749628371203, "grad_norm": 9.432304114100589, "learning_rate": 1.5035858386565433e-06, "loss": 1.0986, "step": 9063 }, { "epoch": 1.2832165357117575, "grad_norm": 8.980220676760695, "learning_rate": 1.5030601606377054e-06, "loss": 1.0519, "step": 9064 }, { "epoch": 1.2833581085863948, "grad_norm": 10.307144531515734, "learning_rate": 1.5025345350272518e-06, "loss": 0.9533, "step": 9065 }, { "epoch": 1.283499681461032, "grad_norm": 9.154347921969366, "learning_rate": 1.502008961852814e-06, "loss": 1.0107, "step": 9066 }, { "epoch": 1.2836412543356692, "grad_norm": 7.997523639783535, "learning_rate": 1.5014834411420204e-06, "loss": 1.0032, "step": 9067 }, { "epoch": 1.2837828272103065, "grad_norm": 8.026716919717385, "learning_rate": 1.5009579729224982e-06, "loss": 1.0337, "step": 9068 }, { "epoch": 1.2839244000849437, "grad_norm": 9.273985000555443, "learning_rate": 1.5004325572218698e-06, "loss": 0.9099, "step": 9069 }, { "epoch": 1.284065972959581, "grad_norm": 9.202535976348916, "learning_rate": 1.4999071940677578e-06, "loss": 0.9909, "step": 9070 }, { "epoch": 1.2842075458342181, "grad_norm": 8.255052810952883, "learning_rate": 1.4993818834877783e-06, "loss": 0.9638, "step": 9071 }, { "epoch": 1.2843491187088554, "grad_norm": 11.463235885976484, "learning_rate": 1.498856625509547e-06, "loss": 0.9906, "step": 9072 }, { "epoch": 1.2844906915834926, "grad_norm": 7.8055980254345645, "learning_rate": 1.4983314201606764e-06, "loss": 0.9826, "step": 9073 }, { "epoch": 1.2846322644581298, "grad_norm": 10.092170898877429, "learning_rate": 1.497806267468776e-06, "loss": 1.0812, "step": 9074 }, { "epoch": 1.284773837332767, "grad_norm": 10.614376726220566, "learning_rate": 1.4972811674614523e-06, "loss": 0.9293, "step": 9075 }, { "epoch": 1.2849154102074043, "grad_norm": 10.92283265986574, "learning_rate": 1.4967561201663108e-06, "loss": 0.9896, "step": 9076 }, { "epoch": 1.2850569830820415, "grad_norm": 11.434644665787062, "learning_rate": 1.4962311256109518e-06, "loss": 0.9402, "step": 9077 }, { "epoch": 1.2851985559566788, "grad_norm": 10.203216225185598, "learning_rate": 1.4957061838229743e-06, "loss": 1.011, "step": 9078 }, { "epoch": 1.285340128831316, "grad_norm": 7.50309206483882, "learning_rate": 1.4951812948299737e-06, "loss": 0.9378, "step": 9079 }, { "epoch": 1.2854817017059532, "grad_norm": 8.5307889820777, "learning_rate": 1.494656458659543e-06, "loss": 1.1479, "step": 9080 }, { "epoch": 1.2856232745805904, "grad_norm": 10.350780851974669, "learning_rate": 1.4941316753392738e-06, "loss": 1.0419, "step": 9081 }, { "epoch": 1.2857648474552277, "grad_norm": 9.136930844053897, "learning_rate": 1.493606944896751e-06, "loss": 1.0618, "step": 9082 }, { "epoch": 1.285906420329865, "grad_norm": 8.3414971099704, "learning_rate": 1.4930822673595613e-06, "loss": 1.0033, "step": 9083 }, { "epoch": 1.2860479932045021, "grad_norm": 10.110844642379005, "learning_rate": 1.4925576427552864e-06, "loss": 1.0289, "step": 9084 }, { "epoch": 1.2861895660791394, "grad_norm": 8.542506578290515, "learning_rate": 1.4920330711115043e-06, "loss": 0.9765, "step": 9085 }, { "epoch": 1.2863311389537764, "grad_norm": 8.86926169985901, "learning_rate": 1.4915085524557924e-06, "loss": 1.059, "step": 9086 }, { "epoch": 1.2864727118284136, "grad_norm": 8.738360701751, "learning_rate": 1.4909840868157237e-06, "loss": 0.9547, "step": 9087 }, { "epoch": 1.2866142847030508, "grad_norm": 9.088320235184588, "learning_rate": 1.4904596742188695e-06, "loss": 0.996, "step": 9088 }, { "epoch": 1.286755857577688, "grad_norm": 8.720699406844112, "learning_rate": 1.4899353146927975e-06, "loss": 0.9485, "step": 9089 }, { "epoch": 1.2868974304523253, "grad_norm": 9.00899522333551, "learning_rate": 1.4894110082650726e-06, "loss": 0.9032, "step": 9090 }, { "epoch": 1.2870390033269625, "grad_norm": 9.959538811113601, "learning_rate": 1.488886754963258e-06, "loss": 1.039, "step": 9091 }, { "epoch": 1.2871805762015998, "grad_norm": 7.344570162811766, "learning_rate": 1.4883625548149125e-06, "loss": 0.9606, "step": 9092 }, { "epoch": 1.287322149076237, "grad_norm": 7.50200499119665, "learning_rate": 1.4878384078475933e-06, "loss": 0.9493, "step": 9093 }, { "epoch": 1.2874637219508742, "grad_norm": 10.045265365444909, "learning_rate": 1.4873143140888537e-06, "loss": 1.1145, "step": 9094 }, { "epoch": 1.2876052948255114, "grad_norm": 7.869588536675362, "learning_rate": 1.486790273566246e-06, "loss": 0.9708, "step": 9095 }, { "epoch": 1.2877468677001487, "grad_norm": 11.044041293090862, "learning_rate": 1.486266286307318e-06, "loss": 1.0763, "step": 9096 }, { "epoch": 1.287888440574786, "grad_norm": 9.951878539984769, "learning_rate": 1.4857423523396157e-06, "loss": 1.0614, "step": 9097 }, { "epoch": 1.2880300134494231, "grad_norm": 10.076186071963734, "learning_rate": 1.4852184716906808e-06, "loss": 0.913, "step": 9098 }, { "epoch": 1.2881715863240604, "grad_norm": 9.206968272482957, "learning_rate": 1.484694644388055e-06, "loss": 0.9647, "step": 9099 }, { "epoch": 1.2883131591986976, "grad_norm": 8.749812096894255, "learning_rate": 1.4841708704592745e-06, "loss": 0.978, "step": 9100 }, { "epoch": 1.2884547320733348, "grad_norm": 10.78601853330418, "learning_rate": 1.4836471499318738e-06, "loss": 1.0866, "step": 9101 }, { "epoch": 1.2885963049479718, "grad_norm": 9.544145509210166, "learning_rate": 1.4831234828333856e-06, "loss": 1.0592, "step": 9102 }, { "epoch": 1.288737877822609, "grad_norm": 9.68810088232233, "learning_rate": 1.4825998691913372e-06, "loss": 0.983, "step": 9103 }, { "epoch": 1.2888794506972463, "grad_norm": 11.212577460626386, "learning_rate": 1.482076309033254e-06, "loss": 0.9137, "step": 9104 }, { "epoch": 1.2890210235718835, "grad_norm": 10.493197462792933, "learning_rate": 1.481552802386661e-06, "loss": 1.0005, "step": 9105 }, { "epoch": 1.2891625964465208, "grad_norm": 11.205216514473536, "learning_rate": 1.4810293492790778e-06, "loss": 1.1734, "step": 9106 }, { "epoch": 1.289304169321158, "grad_norm": 10.469701376430042, "learning_rate": 1.480505949738022e-06, "loss": 1.1547, "step": 9107 }, { "epoch": 1.2894457421957952, "grad_norm": 8.84934319251801, "learning_rate": 1.4799826037910082e-06, "loss": 1.026, "step": 9108 }, { "epoch": 1.2895873150704324, "grad_norm": 11.005087282977678, "learning_rate": 1.479459311465548e-06, "loss": 1.0625, "step": 9109 }, { "epoch": 1.2897288879450697, "grad_norm": 9.731920352257092, "learning_rate": 1.478936072789151e-06, "loss": 1.0354, "step": 9110 }, { "epoch": 1.289870460819707, "grad_norm": 9.047332488567633, "learning_rate": 1.4784128877893237e-06, "loss": 0.9962, "step": 9111 }, { "epoch": 1.2900120336943441, "grad_norm": 8.029109922035985, "learning_rate": 1.477889756493569e-06, "loss": 0.9668, "step": 9112 }, { "epoch": 1.2901536065689814, "grad_norm": 9.501184891541776, "learning_rate": 1.4773666789293881e-06, "loss": 1.0886, "step": 9113 }, { "epoch": 1.2902951794436186, "grad_norm": 7.738029341085241, "learning_rate": 1.4768436551242776e-06, "loss": 0.9879, "step": 9114 }, { "epoch": 1.2904367523182558, "grad_norm": 9.231305588520637, "learning_rate": 1.4763206851057338e-06, "loss": 0.9892, "step": 9115 }, { "epoch": 1.290578325192893, "grad_norm": 10.009828696447634, "learning_rate": 1.4757977689012482e-06, "loss": 0.9909, "step": 9116 }, { "epoch": 1.2907198980675303, "grad_norm": 9.002315223448033, "learning_rate": 1.4752749065383099e-06, "loss": 0.9638, "step": 9117 }, { "epoch": 1.2908614709421675, "grad_norm": 7.899176165926533, "learning_rate": 1.4747520980444058e-06, "loss": 0.9875, "step": 9118 }, { "epoch": 1.2910030438168048, "grad_norm": 10.919748080076033, "learning_rate": 1.4742293434470196e-06, "loss": 1.2018, "step": 9119 }, { "epoch": 1.291144616691442, "grad_norm": 10.299688715999352, "learning_rate": 1.4737066427736317e-06, "loss": 1.018, "step": 9120 }, { "epoch": 1.2912861895660792, "grad_norm": 11.274633945519861, "learning_rate": 1.4731839960517202e-06, "loss": 1.0342, "step": 9121 }, { "epoch": 1.2914277624407164, "grad_norm": 9.205652689672313, "learning_rate": 1.4726614033087604e-06, "loss": 1.06, "step": 9122 }, { "epoch": 1.2915693353153537, "grad_norm": 9.9017982468089, "learning_rate": 1.4721388645722262e-06, "loss": 1.0607, "step": 9123 }, { "epoch": 1.291710908189991, "grad_norm": 9.270207781885794, "learning_rate": 1.4716163798695842e-06, "loss": 1.0007, "step": 9124 }, { "epoch": 1.2918524810646281, "grad_norm": 10.55473773755161, "learning_rate": 1.4710939492283022e-06, "loss": 0.9933, "step": 9125 }, { "epoch": 1.2919940539392654, "grad_norm": 10.004961690224084, "learning_rate": 1.4705715726758444e-06, "loss": 1.0361, "step": 9126 }, { "epoch": 1.2921356268139026, "grad_norm": 10.01790655991868, "learning_rate": 1.4700492502396708e-06, "loss": 1.0062, "step": 9127 }, { "epoch": 1.2922771996885396, "grad_norm": 8.630361466247281, "learning_rate": 1.4695269819472406e-06, "loss": 0.9947, "step": 9128 }, { "epoch": 1.2924187725631768, "grad_norm": 8.957563169034652, "learning_rate": 1.4690047678260086e-06, "loss": 1.1213, "step": 9129 }, { "epoch": 1.292560345437814, "grad_norm": 9.154200405868378, "learning_rate": 1.468482607903427e-06, "loss": 0.8793, "step": 9130 }, { "epoch": 1.2927019183124513, "grad_norm": 9.352318754665454, "learning_rate": 1.467960502206946e-06, "loss": 0.9854, "step": 9131 }, { "epoch": 1.2928434911870885, "grad_norm": 9.35465892241334, "learning_rate": 1.4674384507640115e-06, "loss": 1.0034, "step": 9132 }, { "epoch": 1.2929850640617258, "grad_norm": 9.114934935554643, "learning_rate": 1.466916453602068e-06, "loss": 1.0388, "step": 9133 }, { "epoch": 1.293126636936363, "grad_norm": 9.00750292521484, "learning_rate": 1.4663945107485567e-06, "loss": 1.0283, "step": 9134 }, { "epoch": 1.2932682098110002, "grad_norm": 10.448381245642642, "learning_rate": 1.465872622230915e-06, "loss": 0.9898, "step": 9135 }, { "epoch": 1.2934097826856374, "grad_norm": 9.259431309549713, "learning_rate": 1.4653507880765783e-06, "loss": 0.8986, "step": 9136 }, { "epoch": 1.2935513555602747, "grad_norm": 9.549283736765492, "learning_rate": 1.464829008312979e-06, "loss": 1.0168, "step": 9137 }, { "epoch": 1.293692928434912, "grad_norm": 8.657490885787867, "learning_rate": 1.464307282967547e-06, "loss": 1.0379, "step": 9138 }, { "epoch": 1.2938345013095491, "grad_norm": 11.020997900115736, "learning_rate": 1.4637856120677088e-06, "loss": 1.0004, "step": 9139 }, { "epoch": 1.2939760741841864, "grad_norm": 10.764218530563678, "learning_rate": 1.4632639956408884e-06, "loss": 0.9795, "step": 9140 }, { "epoch": 1.2941176470588236, "grad_norm": 8.185205502082896, "learning_rate": 1.4627424337145069e-06, "loss": 0.9732, "step": 9141 }, { "epoch": 1.2942592199334608, "grad_norm": 10.681941856981693, "learning_rate": 1.462220926315982e-06, "loss": 1.0373, "step": 9142 }, { "epoch": 1.2944007928080978, "grad_norm": 8.820710381858424, "learning_rate": 1.4616994734727293e-06, "loss": 1.091, "step": 9143 }, { "epoch": 1.294542365682735, "grad_norm": 9.250143513983817, "learning_rate": 1.461178075212162e-06, "loss": 1.0615, "step": 9144 }, { "epoch": 1.2946839385573723, "grad_norm": 9.137071541526884, "learning_rate": 1.4606567315616876e-06, "loss": 1.0008, "step": 9145 }, { "epoch": 1.2948255114320095, "grad_norm": 11.30996355315659, "learning_rate": 1.4601354425487141e-06, "loss": 1.083, "step": 9146 }, { "epoch": 1.2949670843066468, "grad_norm": 10.576059880812847, "learning_rate": 1.4596142082006448e-06, "loss": 0.9081, "step": 9147 }, { "epoch": 1.295108657181284, "grad_norm": 10.067346768005244, "learning_rate": 1.4590930285448807e-06, "loss": 1.0478, "step": 9148 }, { "epoch": 1.2952502300559212, "grad_norm": 7.698905857178998, "learning_rate": 1.4585719036088205e-06, "loss": 1.1135, "step": 9149 }, { "epoch": 1.2953918029305584, "grad_norm": 10.41718402849331, "learning_rate": 1.458050833419858e-06, "loss": 1.0757, "step": 9150 }, { "epoch": 1.2955333758051957, "grad_norm": 9.83300621221372, "learning_rate": 1.4575298180053875e-06, "loss": 0.9724, "step": 9151 }, { "epoch": 1.295674948679833, "grad_norm": 10.378437254175948, "learning_rate": 1.4570088573927959e-06, "loss": 1.0005, "step": 9152 }, { "epoch": 1.2958165215544701, "grad_norm": 9.965427526632975, "learning_rate": 1.4564879516094721e-06, "loss": 0.933, "step": 9153 }, { "epoch": 1.2959580944291074, "grad_norm": 7.851423302170954, "learning_rate": 1.4559671006827977e-06, "loss": 0.8402, "step": 9154 }, { "epoch": 1.2960996673037446, "grad_norm": 13.233355738495149, "learning_rate": 1.4554463046401554e-06, "loss": 0.9269, "step": 9155 }, { "epoch": 1.2962412401783818, "grad_norm": 10.686651898915477, "learning_rate": 1.4549255635089219e-06, "loss": 1.0889, "step": 9156 }, { "epoch": 1.296382813053019, "grad_norm": 9.522585072596819, "learning_rate": 1.4544048773164712e-06, "loss": 0.9597, "step": 9157 }, { "epoch": 1.2965243859276563, "grad_norm": 9.273220300742011, "learning_rate": 1.4538842460901774e-06, "loss": 0.9552, "step": 9158 }, { "epoch": 1.2966659588022935, "grad_norm": 9.308501133876227, "learning_rate": 1.453363669857408e-06, "loss": 0.9467, "step": 9159 }, { "epoch": 1.2968075316769307, "grad_norm": 11.35312301085518, "learning_rate": 1.4528431486455311e-06, "loss": 1.0987, "step": 9160 }, { "epoch": 1.296949104551568, "grad_norm": 10.043320853293846, "learning_rate": 1.4523226824819081e-06, "loss": 1.0955, "step": 9161 }, { "epoch": 1.2970906774262052, "grad_norm": 9.464077542629823, "learning_rate": 1.4518022713939e-06, "loss": 0.9905, "step": 9162 }, { "epoch": 1.2972322503008424, "grad_norm": 10.0126807873962, "learning_rate": 1.4512819154088665e-06, "loss": 1.0091, "step": 9163 }, { "epoch": 1.2973738231754797, "grad_norm": 9.99697677689282, "learning_rate": 1.4507616145541595e-06, "loss": 1.0881, "step": 9164 }, { "epoch": 1.297515396050117, "grad_norm": 10.214304105606823, "learning_rate": 1.4502413688571332e-06, "loss": 0.9918, "step": 9165 }, { "epoch": 1.2976569689247541, "grad_norm": 9.662715191930841, "learning_rate": 1.4497211783451355e-06, "loss": 1.0239, "step": 9166 }, { "epoch": 1.2977985417993914, "grad_norm": 11.011281903693032, "learning_rate": 1.4492010430455108e-06, "loss": 1.0773, "step": 9167 }, { "epoch": 1.2979401146740286, "grad_norm": 8.968078375467911, "learning_rate": 1.4486809629856052e-06, "loss": 1.0759, "step": 9168 }, { "epoch": 1.2980816875486656, "grad_norm": 9.944452121194141, "learning_rate": 1.4481609381927565e-06, "loss": 0.9592, "step": 9169 }, { "epoch": 1.2982232604233028, "grad_norm": 9.039537264028171, "learning_rate": 1.4476409686943039e-06, "loss": 1.0487, "step": 9170 }, { "epoch": 1.29836483329794, "grad_norm": 10.204583383641461, "learning_rate": 1.4471210545175795e-06, "loss": 0.9428, "step": 9171 }, { "epoch": 1.2985064061725773, "grad_norm": 8.97496960138045, "learning_rate": 1.446601195689918e-06, "loss": 1.0442, "step": 9172 }, { "epoch": 1.2986479790472145, "grad_norm": 7.737389672498846, "learning_rate": 1.4460813922386446e-06, "loss": 1.0238, "step": 9173 }, { "epoch": 1.2987895519218517, "grad_norm": 9.66997414491781, "learning_rate": 1.4455616441910878e-06, "loss": 1.0838, "step": 9174 }, { "epoch": 1.298931124796489, "grad_norm": 9.31320167304628, "learning_rate": 1.445041951574568e-06, "loss": 0.9115, "step": 9175 }, { "epoch": 1.2990726976711262, "grad_norm": 9.305198315900752, "learning_rate": 1.4445223144164073e-06, "loss": 1.1832, "step": 9176 }, { "epoch": 1.2992142705457634, "grad_norm": 9.184878851227094, "learning_rate": 1.4440027327439215e-06, "loss": 0.9302, "step": 9177 }, { "epoch": 1.2993558434204007, "grad_norm": 9.717765144909661, "learning_rate": 1.443483206584424e-06, "loss": 1.1071, "step": 9178 }, { "epoch": 1.299497416295038, "grad_norm": 9.203429238127, "learning_rate": 1.4429637359652271e-06, "loss": 1.0073, "step": 9179 }, { "epoch": 1.2996389891696751, "grad_norm": 11.247666519604431, "learning_rate": 1.4424443209136375e-06, "loss": 1.0551, "step": 9180 }, { "epoch": 1.2997805620443124, "grad_norm": 9.597893134032317, "learning_rate": 1.4419249614569626e-06, "loss": 0.9677, "step": 9181 }, { "epoch": 1.2999221349189496, "grad_norm": 10.395505564892312, "learning_rate": 1.4414056576225025e-06, "loss": 0.9605, "step": 9182 }, { "epoch": 1.3000637077935868, "grad_norm": 9.236269683878868, "learning_rate": 1.4408864094375586e-06, "loss": 1.0012, "step": 9183 }, { "epoch": 1.300205280668224, "grad_norm": 9.170130081169319, "learning_rate": 1.4403672169294252e-06, "loss": 1.0954, "step": 9184 }, { "epoch": 1.300346853542861, "grad_norm": 11.050683558798488, "learning_rate": 1.4398480801253976e-06, "loss": 1.0856, "step": 9185 }, { "epoch": 1.3004884264174983, "grad_norm": 9.314138793120893, "learning_rate": 1.4393289990527665e-06, "loss": 0.9389, "step": 9186 }, { "epoch": 1.3006299992921355, "grad_norm": 8.797139084207691, "learning_rate": 1.4388099737388196e-06, "loss": 1.0298, "step": 9187 }, { "epoch": 1.3007715721667727, "grad_norm": 9.65269706076364, "learning_rate": 1.4382910042108405e-06, "loss": 0.9415, "step": 9188 }, { "epoch": 1.30091314504141, "grad_norm": 11.022749178434733, "learning_rate": 1.437772090496111e-06, "loss": 1.0113, "step": 9189 }, { "epoch": 1.3010547179160472, "grad_norm": 8.606818454781896, "learning_rate": 1.4372532326219104e-06, "loss": 1.0574, "step": 9190 }, { "epoch": 1.3011962907906844, "grad_norm": 9.597474608451083, "learning_rate": 1.4367344306155163e-06, "loss": 1.0117, "step": 9191 }, { "epoch": 1.3013378636653217, "grad_norm": 9.319865136869396, "learning_rate": 1.4362156845041992e-06, "loss": 0.9161, "step": 9192 }, { "epoch": 1.301479436539959, "grad_norm": 10.973525005101946, "learning_rate": 1.4356969943152315e-06, "loss": 0.9902, "step": 9193 }, { "epoch": 1.3016210094145961, "grad_norm": 7.999466401424506, "learning_rate": 1.435178360075878e-06, "loss": 0.8888, "step": 9194 }, { "epoch": 1.3017625822892334, "grad_norm": 7.896913828601488, "learning_rate": 1.4346597818134052e-06, "loss": 1.09, "step": 9195 }, { "epoch": 1.3019041551638706, "grad_norm": 8.401615496101684, "learning_rate": 1.4341412595550724e-06, "loss": 1.0049, "step": 9196 }, { "epoch": 1.3020457280385078, "grad_norm": 9.552512544385614, "learning_rate": 1.4336227933281398e-06, "loss": 1.1129, "step": 9197 }, { "epoch": 1.302187300913145, "grad_norm": 9.548521109108664, "learning_rate": 1.433104383159862e-06, "loss": 0.9204, "step": 9198 }, { "epoch": 1.3023288737877823, "grad_norm": 9.273727502442089, "learning_rate": 1.43258602907749e-06, "loss": 1.0326, "step": 9199 }, { "epoch": 1.3024704466624195, "grad_norm": 9.175675652595869, "learning_rate": 1.432067731108276e-06, "loss": 1.0182, "step": 9200 }, { "epoch": 1.3026120195370567, "grad_norm": 7.542113766844245, "learning_rate": 1.4315494892794635e-06, "loss": 0.9209, "step": 9201 }, { "epoch": 1.302753592411694, "grad_norm": 8.886170570317967, "learning_rate": 1.4310313036182994e-06, "loss": 1.0321, "step": 9202 }, { "epoch": 1.3028951652863312, "grad_norm": 10.825199144747291, "learning_rate": 1.4305131741520209e-06, "loss": 1.0001, "step": 9203 }, { "epoch": 1.3030367381609684, "grad_norm": 10.455914545692137, "learning_rate": 1.4299951009078688e-06, "loss": 1.034, "step": 9204 }, { "epoch": 1.3031783110356057, "grad_norm": 9.197619204753181, "learning_rate": 1.429477083913075e-06, "loss": 0.9569, "step": 9205 }, { "epoch": 1.303319883910243, "grad_norm": 10.162028854728565, "learning_rate": 1.4289591231948742e-06, "loss": 1.0533, "step": 9206 }, { "epoch": 1.3034614567848801, "grad_norm": 6.704818973927971, "learning_rate": 1.4284412187804925e-06, "loss": 0.8744, "step": 9207 }, { "epoch": 1.3036030296595174, "grad_norm": 10.607677745314358, "learning_rate": 1.4279233706971579e-06, "loss": 0.9951, "step": 9208 }, { "epoch": 1.3037446025341546, "grad_norm": 8.095617601704477, "learning_rate": 1.4274055789720923e-06, "loss": 0.9625, "step": 9209 }, { "epoch": 1.3038861754087916, "grad_norm": 8.76535561431538, "learning_rate": 1.4268878436325145e-06, "loss": 1.0731, "step": 9210 }, { "epoch": 1.3040277482834288, "grad_norm": 8.912609092183844, "learning_rate": 1.4263701647056439e-06, "loss": 0.9855, "step": 9211 }, { "epoch": 1.304169321158066, "grad_norm": 10.383608268743142, "learning_rate": 1.425852542218692e-06, "loss": 1.0372, "step": 9212 }, { "epoch": 1.3043108940327033, "grad_norm": 9.114651599446733, "learning_rate": 1.4253349761988714e-06, "loss": 1.0325, "step": 9213 }, { "epoch": 1.3044524669073405, "grad_norm": 8.892046823094308, "learning_rate": 1.4248174666733905e-06, "loss": 1.0282, "step": 9214 }, { "epoch": 1.3045940397819777, "grad_norm": 8.298699187410337, "learning_rate": 1.4243000136694527e-06, "loss": 0.9995, "step": 9215 }, { "epoch": 1.304735612656615, "grad_norm": 9.254085978626128, "learning_rate": 1.423782617214262e-06, "loss": 1.0137, "step": 9216 }, { "epoch": 1.3048771855312522, "grad_norm": 7.793641790328642, "learning_rate": 1.4232652773350159e-06, "loss": 0.9762, "step": 9217 }, { "epoch": 1.3050187584058894, "grad_norm": 8.887764361563582, "learning_rate": 1.4227479940589122e-06, "loss": 1.178, "step": 9218 }, { "epoch": 1.3051603312805267, "grad_norm": 7.492718277031045, "learning_rate": 1.422230767413143e-06, "loss": 0.9416, "step": 9219 }, { "epoch": 1.305301904155164, "grad_norm": 8.953247282514623, "learning_rate": 1.421713597424898e-06, "loss": 1.0597, "step": 9220 }, { "epoch": 1.3054434770298011, "grad_norm": 8.942958205473385, "learning_rate": 1.4211964841213663e-06, "loss": 1.0513, "step": 9221 }, { "epoch": 1.3055850499044384, "grad_norm": 9.439691756436911, "learning_rate": 1.4206794275297298e-06, "loss": 1.0112, "step": 9222 }, { "epoch": 1.3057266227790756, "grad_norm": 8.570162943088874, "learning_rate": 1.4201624276771723e-06, "loss": 0.9729, "step": 9223 }, { "epoch": 1.3058681956537128, "grad_norm": 10.52952566094652, "learning_rate": 1.4196454845908696e-06, "loss": 0.9895, "step": 9224 }, { "epoch": 1.30600976852835, "grad_norm": 9.617897088083895, "learning_rate": 1.4191285982979992e-06, "loss": 1.0664, "step": 9225 }, { "epoch": 1.306151341402987, "grad_norm": 10.127655434603351, "learning_rate": 1.4186117688257317e-06, "loss": 1.0758, "step": 9226 }, { "epoch": 1.3062929142776243, "grad_norm": 8.586346242545487, "learning_rate": 1.4180949962012377e-06, "loss": 0.9836, "step": 9227 }, { "epoch": 1.3064344871522615, "grad_norm": 9.65981347956372, "learning_rate": 1.4175782804516824e-06, "loss": 0.9318, "step": 9228 }, { "epoch": 1.3065760600268987, "grad_norm": 9.600701910425693, "learning_rate": 1.417061621604231e-06, "loss": 1.0413, "step": 9229 }, { "epoch": 1.306717632901536, "grad_norm": 9.748774524848143, "learning_rate": 1.4165450196860423e-06, "loss": 0.9689, "step": 9230 }, { "epoch": 1.3068592057761732, "grad_norm": 8.561893191032485, "learning_rate": 1.4160284747242731e-06, "loss": 0.994, "step": 9231 }, { "epoch": 1.3070007786508104, "grad_norm": 8.857102090768123, "learning_rate": 1.4155119867460799e-06, "loss": 0.9995, "step": 9232 }, { "epoch": 1.3071423515254477, "grad_norm": 8.661546319425574, "learning_rate": 1.4149955557786118e-06, "loss": 1.004, "step": 9233 }, { "epoch": 1.307283924400085, "grad_norm": 9.607962453344769, "learning_rate": 1.4144791818490194e-06, "loss": 1.1404, "step": 9234 }, { "epoch": 1.3074254972747221, "grad_norm": 9.657909460588549, "learning_rate": 1.4139628649844462e-06, "loss": 0.8855, "step": 9235 }, { "epoch": 1.3075670701493594, "grad_norm": 8.326682933896343, "learning_rate": 1.4134466052120349e-06, "loss": 0.9045, "step": 9236 }, { "epoch": 1.3077086430239966, "grad_norm": 8.465965798672759, "learning_rate": 1.412930402558927e-06, "loss": 0.9687, "step": 9237 }, { "epoch": 1.3078502158986338, "grad_norm": 7.793979757022779, "learning_rate": 1.412414257052256e-06, "loss": 0.9504, "step": 9238 }, { "epoch": 1.307991788773271, "grad_norm": 7.73239622138331, "learning_rate": 1.4118981687191573e-06, "loss": 0.8713, "step": 9239 }, { "epoch": 1.3081333616479083, "grad_norm": 8.682905779933531, "learning_rate": 1.411382137586761e-06, "loss": 0.9947, "step": 9240 }, { "epoch": 1.3082749345225455, "grad_norm": 8.909981143473356, "learning_rate": 1.4108661636821928e-06, "loss": 1.0371, "step": 9241 }, { "epoch": 1.3084165073971827, "grad_norm": 10.124347430262944, "learning_rate": 1.4103502470325791e-06, "loss": 1.107, "step": 9242 }, { "epoch": 1.30855808027182, "grad_norm": 9.545207425836596, "learning_rate": 1.4098343876650398e-06, "loss": 1.0248, "step": 9243 }, { "epoch": 1.3086996531464572, "grad_norm": 10.11932289240645, "learning_rate": 1.4093185856066945e-06, "loss": 0.9979, "step": 9244 }, { "epoch": 1.3088412260210944, "grad_norm": 9.836347818337877, "learning_rate": 1.4088028408846572e-06, "loss": 1.0582, "step": 9245 }, { "epoch": 1.3089827988957317, "grad_norm": 8.514058772250973, "learning_rate": 1.4082871535260418e-06, "loss": 0.9716, "step": 9246 }, { "epoch": 1.3091243717703689, "grad_norm": 9.825079174547797, "learning_rate": 1.4077715235579559e-06, "loss": 0.9755, "step": 9247 }, { "epoch": 1.3092659446450061, "grad_norm": 8.409707191622555, "learning_rate": 1.4072559510075073e-06, "loss": 1.0548, "step": 9248 }, { "epoch": 1.3094075175196433, "grad_norm": 8.690192594336455, "learning_rate": 1.4067404359017977e-06, "loss": 1.0918, "step": 9249 }, { "epoch": 1.3095490903942806, "grad_norm": 9.019883549989496, "learning_rate": 1.4062249782679294e-06, "loss": 1.0154, "step": 9250 }, { "epoch": 1.3096906632689178, "grad_norm": 9.698872247007357, "learning_rate": 1.4057095781329983e-06, "loss": 0.9101, "step": 9251 }, { "epoch": 1.3098322361435548, "grad_norm": 9.080068672357415, "learning_rate": 1.4051942355240977e-06, "loss": 0.923, "step": 9252 }, { "epoch": 1.309973809018192, "grad_norm": 8.085760584568222, "learning_rate": 1.404678950468321e-06, "loss": 1.1172, "step": 9253 }, { "epoch": 1.3101153818928293, "grad_norm": 9.168204253739184, "learning_rate": 1.4041637229927541e-06, "loss": 1.0795, "step": 9254 }, { "epoch": 1.3102569547674665, "grad_norm": 9.833545445455165, "learning_rate": 1.403648553124484e-06, "loss": 1.0066, "step": 9255 }, { "epoch": 1.3103985276421037, "grad_norm": 8.560290420454177, "learning_rate": 1.4031334408905911e-06, "loss": 1.0598, "step": 9256 }, { "epoch": 1.310540100516741, "grad_norm": 9.91338565521012, "learning_rate": 1.4026183863181563e-06, "loss": 0.9445, "step": 9257 }, { "epoch": 1.3106816733913782, "grad_norm": 9.923540783517186, "learning_rate": 1.4021033894342539e-06, "loss": 1.0357, "step": 9258 }, { "epoch": 1.3108232462660154, "grad_norm": 9.98873801263007, "learning_rate": 1.4015884502659574e-06, "loss": 1.0552, "step": 9259 }, { "epoch": 1.3109648191406527, "grad_norm": 8.925662010281217, "learning_rate": 1.4010735688403383e-06, "loss": 1.0466, "step": 9260 }, { "epoch": 1.31110639201529, "grad_norm": 8.653439723194976, "learning_rate": 1.4005587451844621e-06, "loss": 1.0211, "step": 9261 }, { "epoch": 1.3112479648899271, "grad_norm": 8.255595303840952, "learning_rate": 1.4000439793253931e-06, "loss": 0.9291, "step": 9262 }, { "epoch": 1.3113895377645644, "grad_norm": 9.138866183617655, "learning_rate": 1.3995292712901908e-06, "loss": 1.0579, "step": 9263 }, { "epoch": 1.3115311106392016, "grad_norm": 10.438035048973239, "learning_rate": 1.3990146211059141e-06, "loss": 0.9722, "step": 9264 }, { "epoch": 1.3116726835138388, "grad_norm": 9.06995921469606, "learning_rate": 1.398500028799619e-06, "loss": 0.9909, "step": 9265 }, { "epoch": 1.311814256388476, "grad_norm": 8.691410639043836, "learning_rate": 1.397985494398355e-06, "loss": 0.946, "step": 9266 }, { "epoch": 1.311955829263113, "grad_norm": 11.05422888569457, "learning_rate": 1.3974710179291729e-06, "loss": 1.0536, "step": 9267 }, { "epoch": 1.3120974021377503, "grad_norm": 8.46029139312252, "learning_rate": 1.3969565994191165e-06, "loss": 1.078, "step": 9268 }, { "epoch": 1.3122389750123875, "grad_norm": 10.602842676714035, "learning_rate": 1.3964422388952298e-06, "loss": 1.0038, "step": 9269 }, { "epoch": 1.3123805478870247, "grad_norm": 9.376588000272566, "learning_rate": 1.3959279363845508e-06, "loss": 0.9828, "step": 9270 }, { "epoch": 1.312522120761662, "grad_norm": 7.569749116355859, "learning_rate": 1.3954136919141182e-06, "loss": 0.954, "step": 9271 }, { "epoch": 1.3126636936362992, "grad_norm": 8.928598092175589, "learning_rate": 1.3948995055109641e-06, "loss": 1.0501, "step": 9272 }, { "epoch": 1.3128052665109364, "grad_norm": 7.536348128990452, "learning_rate": 1.3943853772021179e-06, "loss": 1.002, "step": 9273 }, { "epoch": 1.3129468393855737, "grad_norm": 8.032799715967993, "learning_rate": 1.3938713070146093e-06, "loss": 0.9542, "step": 9274 }, { "epoch": 1.313088412260211, "grad_norm": 8.395009747443297, "learning_rate": 1.3933572949754598e-06, "loss": 0.9093, "step": 9275 }, { "epoch": 1.3132299851348481, "grad_norm": 8.880803319663096, "learning_rate": 1.3928433411116938e-06, "loss": 1.0384, "step": 9276 }, { "epoch": 1.3133715580094854, "grad_norm": 10.846637088646991, "learning_rate": 1.3923294454503263e-06, "loss": 1.0619, "step": 9277 }, { "epoch": 1.3135131308841226, "grad_norm": 8.209033400475185, "learning_rate": 1.3918156080183754e-06, "loss": 0.8577, "step": 9278 }, { "epoch": 1.3136547037587598, "grad_norm": 9.779688701394917, "learning_rate": 1.3913018288428503e-06, "loss": 1.0353, "step": 9279 }, { "epoch": 1.313796276633397, "grad_norm": 8.836042840331405, "learning_rate": 1.3907881079507623e-06, "loss": 1.0757, "step": 9280 }, { "epoch": 1.3139378495080343, "grad_norm": 9.591035042477698, "learning_rate": 1.3902744453691158e-06, "loss": 0.959, "step": 9281 }, { "epoch": 1.3140794223826715, "grad_norm": 9.27924034889908, "learning_rate": 1.3897608411249153e-06, "loss": 1.0454, "step": 9282 }, { "epoch": 1.3142209952573087, "grad_norm": 9.314965657750871, "learning_rate": 1.3892472952451592e-06, "loss": 0.9813, "step": 9283 }, { "epoch": 1.314362568131946, "grad_norm": 9.19217692596175, "learning_rate": 1.3887338077568437e-06, "loss": 0.9924, "step": 9284 }, { "epoch": 1.3145041410065832, "grad_norm": 8.386554547618216, "learning_rate": 1.3882203786869644e-06, "loss": 0.9874, "step": 9285 }, { "epoch": 1.3146457138812204, "grad_norm": 9.15552463197064, "learning_rate": 1.3877070080625098e-06, "loss": 0.9185, "step": 9286 }, { "epoch": 1.3147872867558577, "grad_norm": 9.316207659702064, "learning_rate": 1.3871936959104684e-06, "loss": 1.0378, "step": 9287 }, { "epoch": 1.3149288596304949, "grad_norm": 8.982334601867286, "learning_rate": 1.3866804422578256e-06, "loss": 0.9764, "step": 9288 }, { "epoch": 1.3150704325051321, "grad_norm": 10.336124986489285, "learning_rate": 1.386167247131561e-06, "loss": 1.0397, "step": 9289 }, { "epoch": 1.3152120053797693, "grad_norm": 8.190121762368486, "learning_rate": 1.3856541105586545e-06, "loss": 0.9885, "step": 9290 }, { "epoch": 1.3153535782544066, "grad_norm": 9.599639377178447, "learning_rate": 1.3851410325660796e-06, "loss": 0.9801, "step": 9291 }, { "epoch": 1.3154951511290438, "grad_norm": 7.8240237275994415, "learning_rate": 1.3846280131808103e-06, "loss": 0.9057, "step": 9292 }, { "epoch": 1.3156367240036808, "grad_norm": 9.234415070370972, "learning_rate": 1.3841150524298148e-06, "loss": 1.1202, "step": 9293 }, { "epoch": 1.315778296878318, "grad_norm": 8.810141822807864, "learning_rate": 1.3836021503400583e-06, "loss": 1.1233, "step": 9294 }, { "epoch": 1.3159198697529553, "grad_norm": 8.75636365273538, "learning_rate": 1.3830893069385046e-06, "loss": 0.9229, "step": 9295 }, { "epoch": 1.3160614426275925, "grad_norm": 8.83292268002438, "learning_rate": 1.3825765222521127e-06, "loss": 0.9919, "step": 9296 }, { "epoch": 1.3162030155022297, "grad_norm": 8.868865996147264, "learning_rate": 1.3820637963078406e-06, "loss": 1.0244, "step": 9297 }, { "epoch": 1.316344588376867, "grad_norm": 10.118970794813524, "learning_rate": 1.3815511291326404e-06, "loss": 0.9589, "step": 9298 }, { "epoch": 1.3164861612515042, "grad_norm": 8.25273040902837, "learning_rate": 1.3810385207534641e-06, "loss": 1.0533, "step": 9299 }, { "epoch": 1.3166277341261414, "grad_norm": 10.682998155244666, "learning_rate": 1.3805259711972577e-06, "loss": 1.203, "step": 9300 }, { "epoch": 1.3167693070007787, "grad_norm": 8.243720699371023, "learning_rate": 1.380013480490967e-06, "loss": 0.9706, "step": 9301 }, { "epoch": 1.3169108798754159, "grad_norm": 7.677327939350389, "learning_rate": 1.3795010486615318e-06, "loss": 0.9221, "step": 9302 }, { "epoch": 1.3170524527500531, "grad_norm": 9.65815000143378, "learning_rate": 1.3789886757358916e-06, "loss": 1.0548, "step": 9303 }, { "epoch": 1.3171940256246903, "grad_norm": 8.047131462084263, "learning_rate": 1.3784763617409814e-06, "loss": 1.0044, "step": 9304 }, { "epoch": 1.3173355984993276, "grad_norm": 8.556701924640617, "learning_rate": 1.3779641067037313e-06, "loss": 1.0343, "step": 9305 }, { "epoch": 1.3174771713739648, "grad_norm": 12.655101582734764, "learning_rate": 1.3774519106510725e-06, "loss": 0.9896, "step": 9306 }, { "epoch": 1.317618744248602, "grad_norm": 8.221871715096977, "learning_rate": 1.3769397736099288e-06, "loss": 0.8893, "step": 9307 }, { "epoch": 1.3177603171232393, "grad_norm": 8.259836055430982, "learning_rate": 1.3764276956072248e-06, "loss": 1.0051, "step": 9308 }, { "epoch": 1.3179018899978763, "grad_norm": 10.77733631693745, "learning_rate": 1.3759156766698783e-06, "loss": 1.1019, "step": 9309 }, { "epoch": 1.3180434628725135, "grad_norm": 8.148011003489643, "learning_rate": 1.3754037168248063e-06, "loss": 0.9543, "step": 9310 }, { "epoch": 1.3181850357471507, "grad_norm": 9.128676000653234, "learning_rate": 1.3748918160989232e-06, "loss": 0.9453, "step": 9311 }, { "epoch": 1.318326608621788, "grad_norm": 8.165766867548708, "learning_rate": 1.3743799745191377e-06, "loss": 1.0606, "step": 9312 }, { "epoch": 1.3184681814964252, "grad_norm": 9.626503170594036, "learning_rate": 1.3738681921123586e-06, "loss": 0.9754, "step": 9313 }, { "epoch": 1.3186097543710624, "grad_norm": 7.257631691773667, "learning_rate": 1.373356468905489e-06, "loss": 1.094, "step": 9314 }, { "epoch": 1.3187513272456997, "grad_norm": 9.577877670007124, "learning_rate": 1.3728448049254296e-06, "loss": 0.9891, "step": 9315 }, { "epoch": 1.3188929001203369, "grad_norm": 7.746647263380603, "learning_rate": 1.3723332001990774e-06, "loss": 0.9637, "step": 9316 }, { "epoch": 1.3190344729949741, "grad_norm": 8.842785031138867, "learning_rate": 1.3718216547533282e-06, "loss": 0.9399, "step": 9317 }, { "epoch": 1.3191760458696113, "grad_norm": 9.136311665925895, "learning_rate": 1.3713101686150742e-06, "loss": 0.9162, "step": 9318 }, { "epoch": 1.3193176187442486, "grad_norm": 9.62765704054377, "learning_rate": 1.370798741811202e-06, "loss": 1.0525, "step": 9319 }, { "epoch": 1.3194591916188858, "grad_norm": 10.239314766845057, "learning_rate": 1.370287374368599e-06, "loss": 1.0594, "step": 9320 }, { "epoch": 1.319600764493523, "grad_norm": 9.852927347094816, "learning_rate": 1.3697760663141457e-06, "loss": 0.9849, "step": 9321 }, { "epoch": 1.3197423373681603, "grad_norm": 9.212379038623494, "learning_rate": 1.3692648176747224e-06, "loss": 1.0392, "step": 9322 }, { "epoch": 1.3198839102427975, "grad_norm": 9.552979361060693, "learning_rate": 1.368753628477204e-06, "loss": 1.0169, "step": 9323 }, { "epoch": 1.3200254831174347, "grad_norm": 8.470713707985665, "learning_rate": 1.3682424987484647e-06, "loss": 0.9959, "step": 9324 }, { "epoch": 1.320167055992072, "grad_norm": 9.446490117345668, "learning_rate": 1.367731428515373e-06, "loss": 0.9232, "step": 9325 }, { "epoch": 1.3203086288667092, "grad_norm": 10.46527679675111, "learning_rate": 1.3672204178047955e-06, "loss": 0.9771, "step": 9326 }, { "epoch": 1.3204502017413464, "grad_norm": 8.231684380889615, "learning_rate": 1.3667094666435964e-06, "loss": 1.0872, "step": 9327 }, { "epoch": 1.3205917746159836, "grad_norm": 9.701473273642474, "learning_rate": 1.3661985750586348e-06, "loss": 0.9733, "step": 9328 }, { "epoch": 1.3207333474906209, "grad_norm": 7.826571555959649, "learning_rate": 1.36568774307677e-06, "loss": 0.9767, "step": 9329 }, { "epoch": 1.320874920365258, "grad_norm": 8.151851008557667, "learning_rate": 1.3651769707248535e-06, "loss": 0.9511, "step": 9330 }, { "epoch": 1.3210164932398953, "grad_norm": 9.451989778814225, "learning_rate": 1.3646662580297385e-06, "loss": 1.0072, "step": 9331 }, { "epoch": 1.3211580661145326, "grad_norm": 8.948449743258168, "learning_rate": 1.364155605018271e-06, "loss": 0.994, "step": 9332 }, { "epoch": 1.3212996389891698, "grad_norm": 10.451080806636961, "learning_rate": 1.3636450117172962e-06, "loss": 1.0278, "step": 9333 }, { "epoch": 1.321441211863807, "grad_norm": 7.9436787263687645, "learning_rate": 1.3631344781536565e-06, "loss": 0.9729, "step": 9334 }, { "epoch": 1.321582784738444, "grad_norm": 8.738044636660174, "learning_rate": 1.3626240043541901e-06, "loss": 0.9645, "step": 9335 }, { "epoch": 1.3217243576130813, "grad_norm": 8.198241198723139, "learning_rate": 1.3621135903457318e-06, "loss": 0.8561, "step": 9336 }, { "epoch": 1.3218659304877185, "grad_norm": 8.960855764666467, "learning_rate": 1.3616032361551124e-06, "loss": 1.0485, "step": 9337 }, { "epoch": 1.3220075033623557, "grad_norm": 8.476913515197653, "learning_rate": 1.3610929418091618e-06, "loss": 0.9732, "step": 9338 }, { "epoch": 1.322149076236993, "grad_norm": 9.285453281297588, "learning_rate": 1.3605827073347074e-06, "loss": 0.9808, "step": 9339 }, { "epoch": 1.3222906491116302, "grad_norm": 11.979056201370204, "learning_rate": 1.3600725327585695e-06, "loss": 1.0286, "step": 9340 }, { "epoch": 1.3224322219862674, "grad_norm": 7.193608350378328, "learning_rate": 1.3595624181075695e-06, "loss": 0.9666, "step": 9341 }, { "epoch": 1.3225737948609047, "grad_norm": 8.192487522519679, "learning_rate": 1.3590523634085218e-06, "loss": 0.8864, "step": 9342 }, { "epoch": 1.3227153677355419, "grad_norm": 9.625489457801411, "learning_rate": 1.3585423686882415e-06, "loss": 1.1868, "step": 9343 }, { "epoch": 1.322856940610179, "grad_norm": 10.733526693304437, "learning_rate": 1.3580324339735369e-06, "loss": 0.974, "step": 9344 }, { "epoch": 1.3229985134848163, "grad_norm": 9.70799833273667, "learning_rate": 1.3575225592912166e-06, "loss": 1.0443, "step": 9345 }, { "epoch": 1.3231400863594536, "grad_norm": 10.05698978412857, "learning_rate": 1.3570127446680838e-06, "loss": 0.9424, "step": 9346 }, { "epoch": 1.3232816592340908, "grad_norm": 8.681707632966939, "learning_rate": 1.3565029901309378e-06, "loss": 0.9985, "step": 9347 }, { "epoch": 1.323423232108728, "grad_norm": 10.618530457973648, "learning_rate": 1.3559932957065777e-06, "loss": 1.0356, "step": 9348 }, { "epoch": 1.3235648049833653, "grad_norm": 8.556988132423552, "learning_rate": 1.3554836614217963e-06, "loss": 0.8964, "step": 9349 }, { "epoch": 1.3237063778580023, "grad_norm": 7.659447325447106, "learning_rate": 1.354974087303386e-06, "loss": 0.948, "step": 9350 }, { "epoch": 1.3238479507326395, "grad_norm": 9.586158358167838, "learning_rate": 1.3544645733781335e-06, "loss": 1.1109, "step": 9351 }, { "epoch": 1.3239895236072767, "grad_norm": 7.2331120025009055, "learning_rate": 1.3539551196728252e-06, "loss": 1.0244, "step": 9352 }, { "epoch": 1.324131096481914, "grad_norm": 8.801945852676955, "learning_rate": 1.3534457262142408e-06, "loss": 0.9099, "step": 9353 }, { "epoch": 1.3242726693565512, "grad_norm": 13.411445944573567, "learning_rate": 1.3529363930291606e-06, "loss": 1.0798, "step": 9354 }, { "epoch": 1.3244142422311884, "grad_norm": 10.660047106325658, "learning_rate": 1.3524271201443578e-06, "loss": 1.0275, "step": 9355 }, { "epoch": 1.3245558151058257, "grad_norm": 8.196150308700574, "learning_rate": 1.3519179075866067e-06, "loss": 0.9788, "step": 9356 }, { "epoch": 1.3246973879804629, "grad_norm": 8.902821091579447, "learning_rate": 1.3514087553826753e-06, "loss": 1.0657, "step": 9357 }, { "epoch": 1.3248389608551001, "grad_norm": 9.3787102033342, "learning_rate": 1.350899663559328e-06, "loss": 0.9677, "step": 9358 }, { "epoch": 1.3249805337297373, "grad_norm": 9.292871462684323, "learning_rate": 1.3503906321433298e-06, "loss": 1.0464, "step": 9359 }, { "epoch": 1.3251221066043746, "grad_norm": 11.598700851827989, "learning_rate": 1.3498816611614373e-06, "loss": 1.1955, "step": 9360 }, { "epoch": 1.3252636794790118, "grad_norm": 8.345531487747696, "learning_rate": 1.3493727506404092e-06, "loss": 0.9694, "step": 9361 }, { "epoch": 1.325405252353649, "grad_norm": 10.106713630077317, "learning_rate": 1.348863900606998e-06, "loss": 1.1043, "step": 9362 }, { "epoch": 1.3255468252282863, "grad_norm": 11.403742768104415, "learning_rate": 1.3483551110879525e-06, "loss": 1.042, "step": 9363 }, { "epoch": 1.3256883981029235, "grad_norm": 9.202439391110941, "learning_rate": 1.347846382110021e-06, "loss": 1.0678, "step": 9364 }, { "epoch": 1.3258299709775607, "grad_norm": 10.649148941063098, "learning_rate": 1.3473377136999452e-06, "loss": 0.9584, "step": 9365 }, { "epoch": 1.325971543852198, "grad_norm": 8.054489532909097, "learning_rate": 1.3468291058844673e-06, "loss": 0.9748, "step": 9366 }, { "epoch": 1.3261131167268352, "grad_norm": 10.857923056599146, "learning_rate": 1.3463205586903233e-06, "loss": 1.0231, "step": 9367 }, { "epoch": 1.3262546896014724, "grad_norm": 10.17665768178657, "learning_rate": 1.3458120721442464e-06, "loss": 1.0594, "step": 9368 }, { "epoch": 1.3263962624761096, "grad_norm": 9.710046532437904, "learning_rate": 1.3453036462729697e-06, "loss": 1.0389, "step": 9369 }, { "epoch": 1.3265378353507469, "grad_norm": 8.56545100906949, "learning_rate": 1.3447952811032177e-06, "loss": 0.9617, "step": 9370 }, { "epoch": 1.326679408225384, "grad_norm": 8.357991726234285, "learning_rate": 1.3442869766617178e-06, "loss": 0.9527, "step": 9371 }, { "epoch": 1.3268209811000213, "grad_norm": 9.588300219467099, "learning_rate": 1.3437787329751887e-06, "loss": 1.0988, "step": 9372 }, { "epoch": 1.3269625539746586, "grad_norm": 7.927296726263656, "learning_rate": 1.3432705500703501e-06, "loss": 1.0545, "step": 9373 }, { "epoch": 1.3271041268492958, "grad_norm": 9.336045779314551, "learning_rate": 1.342762427973916e-06, "loss": 0.9728, "step": 9374 }, { "epoch": 1.327245699723933, "grad_norm": 9.653399296670424, "learning_rate": 1.3422543667125988e-06, "loss": 1.0305, "step": 9375 }, { "epoch": 1.32738727259857, "grad_norm": 7.383084795739124, "learning_rate": 1.341746366313105e-06, "loss": 0.9894, "step": 9376 }, { "epoch": 1.3275288454732073, "grad_norm": 8.193428048356424, "learning_rate": 1.3412384268021421e-06, "loss": 0.9595, "step": 9377 }, { "epoch": 1.3276704183478445, "grad_norm": 9.461808382708549, "learning_rate": 1.3407305482064115e-06, "loss": 1.008, "step": 9378 }, { "epoch": 1.3278119912224817, "grad_norm": 10.332902919870332, "learning_rate": 1.3402227305526106e-06, "loss": 1.1508, "step": 9379 }, { "epoch": 1.327953564097119, "grad_norm": 10.554449320152148, "learning_rate": 1.3397149738674363e-06, "loss": 1.0234, "step": 9380 }, { "epoch": 1.3280951369717562, "grad_norm": 10.732479988753036, "learning_rate": 1.3392072781775806e-06, "loss": 1.0535, "step": 9381 }, { "epoch": 1.3282367098463934, "grad_norm": 10.630895796430812, "learning_rate": 1.3386996435097333e-06, "loss": 1.1745, "step": 9382 }, { "epoch": 1.3283782827210306, "grad_norm": 8.599214793000135, "learning_rate": 1.3381920698905788e-06, "loss": 0.9446, "step": 9383 }, { "epoch": 1.3285198555956679, "grad_norm": 9.641034134602487, "learning_rate": 1.3376845573468012e-06, "loss": 0.9709, "step": 9384 }, { "epoch": 1.328661428470305, "grad_norm": 7.310013568411575, "learning_rate": 1.3371771059050803e-06, "loss": 0.9258, "step": 9385 }, { "epoch": 1.3288030013449423, "grad_norm": 9.605888118641168, "learning_rate": 1.3366697155920913e-06, "loss": 1.0765, "step": 9386 }, { "epoch": 1.3289445742195796, "grad_norm": 9.534876599365967, "learning_rate": 1.3361623864345086e-06, "loss": 0.9905, "step": 9387 }, { "epoch": 1.3290861470942168, "grad_norm": 7.7414631740514865, "learning_rate": 1.3356551184590017e-06, "loss": 0.9322, "step": 9388 }, { "epoch": 1.329227719968854, "grad_norm": 8.800756976775226, "learning_rate": 1.3351479116922372e-06, "loss": 0.9767, "step": 9389 }, { "epoch": 1.3293692928434913, "grad_norm": 9.803192583993603, "learning_rate": 1.3346407661608771e-06, "loss": 0.9746, "step": 9390 }, { "epoch": 1.3295108657181285, "grad_norm": 9.819305586803543, "learning_rate": 1.3341336818915832e-06, "loss": 1.0583, "step": 9391 }, { "epoch": 1.3296524385927655, "grad_norm": 9.01611707590717, "learning_rate": 1.3336266589110131e-06, "loss": 1.0699, "step": 9392 }, { "epoch": 1.3297940114674027, "grad_norm": 12.285784711667599, "learning_rate": 1.333119697245819e-06, "loss": 1.1801, "step": 9393 }, { "epoch": 1.32993558434204, "grad_norm": 8.347209770318884, "learning_rate": 1.3326127969226535e-06, "loss": 0.9246, "step": 9394 }, { "epoch": 1.3300771572166772, "grad_norm": 8.026550580491685, "learning_rate": 1.3321059579681617e-06, "loss": 0.8929, "step": 9395 }, { "epoch": 1.3302187300913144, "grad_norm": 10.595450205794776, "learning_rate": 1.3315991804089897e-06, "loss": 1.0853, "step": 9396 }, { "epoch": 1.3303603029659516, "grad_norm": 8.406793172652335, "learning_rate": 1.3310924642717767e-06, "loss": 1.072, "step": 9397 }, { "epoch": 1.3305018758405889, "grad_norm": 10.009114879761823, "learning_rate": 1.3305858095831626e-06, "loss": 0.9873, "step": 9398 }, { "epoch": 1.330643448715226, "grad_norm": 7.470086014136233, "learning_rate": 1.33007921636978e-06, "loss": 0.9447, "step": 9399 }, { "epoch": 1.3307850215898633, "grad_norm": 9.488741176069949, "learning_rate": 1.3295726846582602e-06, "loss": 1.0638, "step": 9400 }, { "epoch": 1.3309265944645006, "grad_norm": 9.81334706920547, "learning_rate": 1.3290662144752322e-06, "loss": 0.9436, "step": 9401 }, { "epoch": 1.3310681673391378, "grad_norm": 8.184439283586796, "learning_rate": 1.3285598058473195e-06, "loss": 1.0645, "step": 9402 }, { "epoch": 1.331209740213775, "grad_norm": 8.650588845733548, "learning_rate": 1.3280534588011451e-06, "loss": 0.8941, "step": 9403 }, { "epoch": 1.3313513130884123, "grad_norm": 10.401473131751429, "learning_rate": 1.3275471733633258e-06, "loss": 1.0813, "step": 9404 }, { "epoch": 1.3314928859630495, "grad_norm": 10.403923439011107, "learning_rate": 1.3270409495604783e-06, "loss": 0.9888, "step": 9405 }, { "epoch": 1.3316344588376867, "grad_norm": 11.140258611831081, "learning_rate": 1.3265347874192125e-06, "loss": 1.0109, "step": 9406 }, { "epoch": 1.331776031712324, "grad_norm": 8.702329424616767, "learning_rate": 1.3260286869661378e-06, "loss": 0.9798, "step": 9407 }, { "epoch": 1.3319176045869612, "grad_norm": 9.538167475670853, "learning_rate": 1.325522648227861e-06, "loss": 0.9253, "step": 9408 }, { "epoch": 1.3320591774615984, "grad_norm": 8.79601894841185, "learning_rate": 1.3250166712309825e-06, "loss": 1.0069, "step": 9409 }, { "epoch": 1.3322007503362356, "grad_norm": 10.132074215948109, "learning_rate": 1.3245107560021015e-06, "loss": 1.0758, "step": 9410 }, { "epoch": 1.3323423232108729, "grad_norm": 9.552726588053972, "learning_rate": 1.324004902567813e-06, "loss": 0.8801, "step": 9411 }, { "epoch": 1.33248389608551, "grad_norm": 10.670493710937476, "learning_rate": 1.3234991109547104e-06, "loss": 1.1222, "step": 9412 }, { "epoch": 1.3326254689601473, "grad_norm": 8.623953935517838, "learning_rate": 1.3229933811893814e-06, "loss": 1.0817, "step": 9413 }, { "epoch": 1.3327670418347846, "grad_norm": 9.442576678719432, "learning_rate": 1.3224877132984131e-06, "loss": 1.0001, "step": 9414 }, { "epoch": 1.3329086147094218, "grad_norm": 8.629113142607975, "learning_rate": 1.3219821073083882e-06, "loss": 1.0904, "step": 9415 }, { "epoch": 1.333050187584059, "grad_norm": 8.78783845643103, "learning_rate": 1.3214765632458852e-06, "loss": 1.0422, "step": 9416 }, { "epoch": 1.3331917604586963, "grad_norm": 8.003245172339419, "learning_rate": 1.320971081137481e-06, "loss": 0.9466, "step": 9417 }, { "epoch": 1.3333333333333333, "grad_norm": 8.730388352538094, "learning_rate": 1.3204656610097472e-06, "loss": 1.084, "step": 9418 }, { "epoch": 1.3334749062079705, "grad_norm": 9.825378519391322, "learning_rate": 1.3199603028892548e-06, "loss": 0.9988, "step": 9419 }, { "epoch": 1.3336164790826077, "grad_norm": 9.253639123941067, "learning_rate": 1.3194550068025697e-06, "loss": 1.1228, "step": 9420 }, { "epoch": 1.333758051957245, "grad_norm": 9.277704762712732, "learning_rate": 1.3189497727762535e-06, "loss": 1.0576, "step": 9421 }, { "epoch": 1.3338996248318822, "grad_norm": 8.765102323292806, "learning_rate": 1.318444600836868e-06, "loss": 1.018, "step": 9422 }, { "epoch": 1.3340411977065194, "grad_norm": 9.03244570304289, "learning_rate": 1.3179394910109683e-06, "loss": 0.8907, "step": 9423 }, { "epoch": 1.3341827705811566, "grad_norm": 8.752172581841823, "learning_rate": 1.3174344433251086e-06, "loss": 1.0984, "step": 9424 }, { "epoch": 1.3343243434557939, "grad_norm": 10.83891817557439, "learning_rate": 1.3169294578058378e-06, "loss": 1.0141, "step": 9425 }, { "epoch": 1.334465916330431, "grad_norm": 10.005884727849493, "learning_rate": 1.3164245344797045e-06, "loss": 1.0761, "step": 9426 }, { "epoch": 1.3346074892050683, "grad_norm": 8.242028286376934, "learning_rate": 1.3159196733732494e-06, "loss": 0.9642, "step": 9427 }, { "epoch": 1.3347490620797056, "grad_norm": 10.58411106245687, "learning_rate": 1.3154148745130151e-06, "loss": 0.9854, "step": 9428 }, { "epoch": 1.3348906349543428, "grad_norm": 8.766264268272513, "learning_rate": 1.314910137925537e-06, "loss": 1.1182, "step": 9429 }, { "epoch": 1.33503220782898, "grad_norm": 10.438453493709307, "learning_rate": 1.3144054636373505e-06, "loss": 1.0181, "step": 9430 }, { "epoch": 1.3351737807036173, "grad_norm": 9.054239652023218, "learning_rate": 1.313900851674984e-06, "loss": 0.9111, "step": 9431 }, { "epoch": 1.3353153535782545, "grad_norm": 9.237233605369942, "learning_rate": 1.3133963020649648e-06, "loss": 0.9884, "step": 9432 }, { "epoch": 1.3354569264528915, "grad_norm": 9.368091530083149, "learning_rate": 1.3128918148338183e-06, "loss": 0.9712, "step": 9433 }, { "epoch": 1.3355984993275287, "grad_norm": 7.323106425952724, "learning_rate": 1.312387390008063e-06, "loss": 0.9256, "step": 9434 }, { "epoch": 1.335740072202166, "grad_norm": 9.439387050707605, "learning_rate": 1.3118830276142169e-06, "loss": 0.9979, "step": 9435 }, { "epoch": 1.3358816450768032, "grad_norm": 8.384746744315454, "learning_rate": 1.3113787276787951e-06, "loss": 0.9268, "step": 9436 }, { "epoch": 1.3360232179514404, "grad_norm": 9.42649310601944, "learning_rate": 1.3108744902283065e-06, "loss": 1.0394, "step": 9437 }, { "epoch": 1.3361647908260776, "grad_norm": 9.108342264304243, "learning_rate": 1.31037031528926e-06, "loss": 0.9926, "step": 9438 }, { "epoch": 1.3363063637007149, "grad_norm": 8.405221188350588, "learning_rate": 1.309866202888158e-06, "loss": 0.9916, "step": 9439 }, { "epoch": 1.336447936575352, "grad_norm": 9.704074782420657, "learning_rate": 1.3093621530515038e-06, "loss": 1.1328, "step": 9440 }, { "epoch": 1.3365895094499893, "grad_norm": 8.086962589762775, "learning_rate": 1.308858165805793e-06, "loss": 0.938, "step": 9441 }, { "epoch": 1.3367310823246266, "grad_norm": 9.323103839266937, "learning_rate": 1.3083542411775196e-06, "loss": 1.1214, "step": 9442 }, { "epoch": 1.3368726551992638, "grad_norm": 10.022230615304542, "learning_rate": 1.307850379193176e-06, "loss": 0.9719, "step": 9443 }, { "epoch": 1.337014228073901, "grad_norm": 8.893381774099367, "learning_rate": 1.3073465798792482e-06, "loss": 1.0309, "step": 9444 }, { "epoch": 1.3371558009485383, "grad_norm": 9.147037180670598, "learning_rate": 1.3068428432622221e-06, "loss": 1.0852, "step": 9445 }, { "epoch": 1.3372973738231755, "grad_norm": 9.189057302514046, "learning_rate": 1.3063391693685773e-06, "loss": 1.1068, "step": 9446 }, { "epoch": 1.3374389466978127, "grad_norm": 8.524202335213552, "learning_rate": 1.3058355582247933e-06, "loss": 0.9943, "step": 9447 }, { "epoch": 1.33758051957245, "grad_norm": 8.967926093879955, "learning_rate": 1.3053320098573428e-06, "loss": 0.9873, "step": 9448 }, { "epoch": 1.3377220924470872, "grad_norm": 8.441336204735997, "learning_rate": 1.3048285242926983e-06, "loss": 0.9806, "step": 9449 }, { "epoch": 1.3378636653217244, "grad_norm": 8.723296917946353, "learning_rate": 1.3043251015573266e-06, "loss": 1.0361, "step": 9450 }, { "epoch": 1.3380052381963616, "grad_norm": 9.5665274721801, "learning_rate": 1.3038217416776936e-06, "loss": 1.0757, "step": 9451 }, { "epoch": 1.3381468110709989, "grad_norm": 8.923215748645497, "learning_rate": 1.3033184446802596e-06, "loss": 0.8886, "step": 9452 }, { "epoch": 1.338288383945636, "grad_norm": 9.780112690968693, "learning_rate": 1.3028152105914818e-06, "loss": 1.0142, "step": 9453 }, { "epoch": 1.3384299568202733, "grad_norm": 10.318174338500368, "learning_rate": 1.3023120394378167e-06, "loss": 0.9736, "step": 9454 }, { "epoch": 1.3385715296949106, "grad_norm": 11.090079718562103, "learning_rate": 1.3018089312457137e-06, "loss": 1.0266, "step": 9455 }, { "epoch": 1.3387131025695478, "grad_norm": 7.6906635009099515, "learning_rate": 1.3013058860416229e-06, "loss": 0.8916, "step": 9456 }, { "epoch": 1.338854675444185, "grad_norm": 9.354037028365205, "learning_rate": 1.3008029038519866e-06, "loss": 0.9122, "step": 9457 }, { "epoch": 1.3389962483188222, "grad_norm": 8.990277018623065, "learning_rate": 1.3002999847032476e-06, "loss": 0.9772, "step": 9458 }, { "epoch": 1.3391378211934593, "grad_norm": 7.470868564344636, "learning_rate": 1.2997971286218448e-06, "loss": 0.9139, "step": 9459 }, { "epoch": 1.3392793940680965, "grad_norm": 11.42635081379536, "learning_rate": 1.2992943356342111e-06, "loss": 1.0055, "step": 9460 }, { "epoch": 1.3394209669427337, "grad_norm": 8.207432369867998, "learning_rate": 1.2987916057667799e-06, "loss": 0.9767, "step": 9461 }, { "epoch": 1.339562539817371, "grad_norm": 9.130757644820994, "learning_rate": 1.2982889390459781e-06, "loss": 0.9929, "step": 9462 }, { "epoch": 1.3397041126920082, "grad_norm": 9.835356897453847, "learning_rate": 1.297786335498231e-06, "loss": 1.0049, "step": 9463 }, { "epoch": 1.3398456855666454, "grad_norm": 7.288576819296154, "learning_rate": 1.297283795149959e-06, "loss": 0.9063, "step": 9464 }, { "epoch": 1.3399872584412826, "grad_norm": 9.843710084864512, "learning_rate": 1.2967813180275809e-06, "loss": 0.8993, "step": 9465 }, { "epoch": 1.3401288313159199, "grad_norm": 8.785986875055775, "learning_rate": 1.2962789041575127e-06, "loss": 1.0074, "step": 9466 }, { "epoch": 1.340270404190557, "grad_norm": 8.638432754080878, "learning_rate": 1.2957765535661644e-06, "loss": 0.9533, "step": 9467 }, { "epoch": 1.3404119770651943, "grad_norm": 9.691602269027326, "learning_rate": 1.295274266279945e-06, "loss": 1.0652, "step": 9468 }, { "epoch": 1.3405535499398316, "grad_norm": 10.276591721107916, "learning_rate": 1.2947720423252586e-06, "loss": 1.0174, "step": 9469 }, { "epoch": 1.3406951228144688, "grad_norm": 8.895270181619212, "learning_rate": 1.2942698817285082e-06, "loss": 1.0347, "step": 9470 }, { "epoch": 1.340836695689106, "grad_norm": 11.156279748187755, "learning_rate": 1.29376778451609e-06, "loss": 0.9454, "step": 9471 }, { "epoch": 1.3409782685637432, "grad_norm": 9.58115417472364, "learning_rate": 1.2932657507144014e-06, "loss": 1.0018, "step": 9472 }, { "epoch": 1.3411198414383805, "grad_norm": 8.603947695946893, "learning_rate": 1.2927637803498323e-06, "loss": 0.9447, "step": 9473 }, { "epoch": 1.3412614143130177, "grad_norm": 8.72061848188206, "learning_rate": 1.2922618734487697e-06, "loss": 0.9232, "step": 9474 }, { "epoch": 1.3414029871876547, "grad_norm": 14.20815303087731, "learning_rate": 1.2917600300376012e-06, "loss": 1.0186, "step": 9475 }, { "epoch": 1.341544560062292, "grad_norm": 10.973110625493412, "learning_rate": 1.2912582501427062e-06, "loss": 1.0393, "step": 9476 }, { "epoch": 1.3416861329369292, "grad_norm": 10.376254453999316, "learning_rate": 1.2907565337904642e-06, "loss": 1.0023, "step": 9477 }, { "epoch": 1.3418277058115664, "grad_norm": 8.532938695762704, "learning_rate": 1.290254881007249e-06, "loss": 1.0281, "step": 9478 }, { "epoch": 1.3419692786862036, "grad_norm": 9.83745303306371, "learning_rate": 1.2897532918194336e-06, "loss": 1.0012, "step": 9479 }, { "epoch": 1.3421108515608409, "grad_norm": 9.452290042542064, "learning_rate": 1.2892517662533844e-06, "loss": 1.0051, "step": 9480 }, { "epoch": 1.342252424435478, "grad_norm": 12.624333430152424, "learning_rate": 1.2887503043354668e-06, "loss": 1.0808, "step": 9481 }, { "epoch": 1.3423939973101153, "grad_norm": 8.884780867004402, "learning_rate": 1.2882489060920436e-06, "loss": 0.937, "step": 9482 }, { "epoch": 1.3425355701847526, "grad_norm": 10.775794376555298, "learning_rate": 1.287747571549472e-06, "loss": 1.0334, "step": 9483 }, { "epoch": 1.3426771430593898, "grad_norm": 10.045871809120074, "learning_rate": 1.2872463007341065e-06, "loss": 0.9463, "step": 9484 }, { "epoch": 1.342818715934027, "grad_norm": 8.30379127410641, "learning_rate": 1.286745093672298e-06, "loss": 1.066, "step": 9485 }, { "epoch": 1.3429602888086642, "grad_norm": 8.489353019227, "learning_rate": 1.2862439503903958e-06, "loss": 0.9741, "step": 9486 }, { "epoch": 1.3431018616833015, "grad_norm": 9.617685288066612, "learning_rate": 1.2857428709147434e-06, "loss": 0.8693, "step": 9487 }, { "epoch": 1.3432434345579387, "grad_norm": 8.86918471106363, "learning_rate": 1.285241855271683e-06, "loss": 0.9342, "step": 9488 }, { "epoch": 1.343385007432576, "grad_norm": 9.604067147512339, "learning_rate": 1.2847409034875536e-06, "loss": 1.0445, "step": 9489 }, { "epoch": 1.3435265803072132, "grad_norm": 8.732534798182304, "learning_rate": 1.2842400155886876e-06, "loss": 0.9979, "step": 9490 }, { "epoch": 1.3436681531818504, "grad_norm": 9.689647805630914, "learning_rate": 1.2837391916014182e-06, "loss": 1.0376, "step": 9491 }, { "epoch": 1.3438097260564876, "grad_norm": 8.384371851558997, "learning_rate": 1.2832384315520717e-06, "loss": 1.033, "step": 9492 }, { "epoch": 1.3439512989311249, "grad_norm": 8.84787225522748, "learning_rate": 1.2827377354669752e-06, "loss": 1.066, "step": 9493 }, { "epoch": 1.344092871805762, "grad_norm": 8.657782573885745, "learning_rate": 1.2822371033724478e-06, "loss": 1.0269, "step": 9494 }, { "epoch": 1.3442344446803993, "grad_norm": 10.392064779242697, "learning_rate": 1.2817365352948069e-06, "loss": 0.9845, "step": 9495 }, { "epoch": 1.3443760175550366, "grad_norm": 9.625305963582568, "learning_rate": 1.2812360312603689e-06, "loss": 0.9159, "step": 9496 }, { "epoch": 1.3445175904296738, "grad_norm": 8.6930813870264, "learning_rate": 1.2807355912954433e-06, "loss": 0.9879, "step": 9497 }, { "epoch": 1.344659163304311, "grad_norm": 9.742903645615813, "learning_rate": 1.2802352154263392e-06, "loss": 0.9942, "step": 9498 }, { "epoch": 1.3448007361789482, "grad_norm": 8.924119443223184, "learning_rate": 1.2797349036793595e-06, "loss": 0.9481, "step": 9499 }, { "epoch": 1.3449423090535852, "grad_norm": 9.979526160234888, "learning_rate": 1.2792346560808068e-06, "loss": 0.967, "step": 9500 }, { "epoch": 1.3450838819282225, "grad_norm": 8.428024283469638, "learning_rate": 1.2787344726569772e-06, "loss": 0.8941, "step": 9501 }, { "epoch": 1.3452254548028597, "grad_norm": 8.397973733561557, "learning_rate": 1.2782343534341667e-06, "loss": 0.9647, "step": 9502 }, { "epoch": 1.345367027677497, "grad_norm": 9.570126353419278, "learning_rate": 1.2777342984386648e-06, "loss": 1.0227, "step": 9503 }, { "epoch": 1.3455086005521342, "grad_norm": 9.174919805773934, "learning_rate": 1.2772343076967596e-06, "loss": 1.0053, "step": 9504 }, { "epoch": 1.3456501734267714, "grad_norm": 9.248931410642504, "learning_rate": 1.2767343812347356e-06, "loss": 1.0498, "step": 9505 }, { "epoch": 1.3457917463014086, "grad_norm": 9.638127477977156, "learning_rate": 1.2762345190788722e-06, "loss": 1.062, "step": 9506 }, { "epoch": 1.3459333191760459, "grad_norm": 10.344623263741894, "learning_rate": 1.2757347212554484e-06, "loss": 0.9909, "step": 9507 }, { "epoch": 1.346074892050683, "grad_norm": 8.10460932943794, "learning_rate": 1.2752349877907364e-06, "loss": 0.9645, "step": 9508 }, { "epoch": 1.3462164649253203, "grad_norm": 7.937997156628527, "learning_rate": 1.274735318711009e-06, "loss": 0.9582, "step": 9509 }, { "epoch": 1.3463580377999576, "grad_norm": 7.891002321191944, "learning_rate": 1.274235714042531e-06, "loss": 0.9381, "step": 9510 }, { "epoch": 1.3464996106745948, "grad_norm": 8.265307472634488, "learning_rate": 1.2737361738115681e-06, "loss": 0.9583, "step": 9511 }, { "epoch": 1.346641183549232, "grad_norm": 9.666125929167054, "learning_rate": 1.2732366980443808e-06, "loss": 1.1085, "step": 9512 }, { "epoch": 1.3467827564238692, "grad_norm": 8.023998504608107, "learning_rate": 1.2727372867672247e-06, "loss": 0.9111, "step": 9513 }, { "epoch": 1.3469243292985065, "grad_norm": 9.49374545149352, "learning_rate": 1.2722379400063553e-06, "loss": 1.0045, "step": 9514 }, { "epoch": 1.3470659021731437, "grad_norm": 9.993852633199149, "learning_rate": 1.271738657788022e-06, "loss": 1.0083, "step": 9515 }, { "epoch": 1.3472074750477807, "grad_norm": 8.854969039164978, "learning_rate": 1.2712394401384703e-06, "loss": 1.0628, "step": 9516 }, { "epoch": 1.347349047922418, "grad_norm": 8.861117398516564, "learning_rate": 1.2707402870839464e-06, "loss": 1.0443, "step": 9517 }, { "epoch": 1.3474906207970552, "grad_norm": 9.794592194414365, "learning_rate": 1.270241198650688e-06, "loss": 1.1491, "step": 9518 }, { "epoch": 1.3476321936716924, "grad_norm": 9.503009921640658, "learning_rate": 1.269742174864934e-06, "loss": 1.0519, "step": 9519 }, { "epoch": 1.3477737665463296, "grad_norm": 9.350716025939079, "learning_rate": 1.2692432157529153e-06, "loss": 0.9837, "step": 9520 }, { "epoch": 1.3479153394209669, "grad_norm": 8.134345680562076, "learning_rate": 1.268744321340864e-06, "loss": 0.939, "step": 9521 }, { "epoch": 1.348056912295604, "grad_norm": 9.102261155454167, "learning_rate": 1.2682454916550046e-06, "loss": 1.0031, "step": 9522 }, { "epoch": 1.3481984851702413, "grad_norm": 9.79039475857887, "learning_rate": 1.2677467267215626e-06, "loss": 0.9879, "step": 9523 }, { "epoch": 1.3483400580448786, "grad_norm": 9.953588741404168, "learning_rate": 1.2672480265667553e-06, "loss": 0.9561, "step": 9524 }, { "epoch": 1.3484816309195158, "grad_norm": 9.155759205862843, "learning_rate": 1.2667493912168008e-06, "loss": 1.1067, "step": 9525 }, { "epoch": 1.348623203794153, "grad_norm": 9.518413166541174, "learning_rate": 1.2662508206979113e-06, "loss": 0.9234, "step": 9526 }, { "epoch": 1.3487647766687902, "grad_norm": 8.92670135096259, "learning_rate": 1.2657523150362955e-06, "loss": 1.0793, "step": 9527 }, { "epoch": 1.3489063495434275, "grad_norm": 8.618548648023044, "learning_rate": 1.265253874258161e-06, "loss": 0.9885, "step": 9528 }, { "epoch": 1.3490479224180647, "grad_norm": 9.523669821304935, "learning_rate": 1.2647554983897087e-06, "loss": 0.9443, "step": 9529 }, { "epoch": 1.349189495292702, "grad_norm": 10.011396018135212, "learning_rate": 1.2642571874571396e-06, "loss": 1.064, "step": 9530 }, { "epoch": 1.3493310681673392, "grad_norm": 9.341578202476725, "learning_rate": 1.2637589414866483e-06, "loss": 0.9889, "step": 9531 }, { "epoch": 1.3494726410419764, "grad_norm": 8.332903380428613, "learning_rate": 1.2632607605044272e-06, "loss": 0.9862, "step": 9532 }, { "epoch": 1.3496142139166136, "grad_norm": 7.259294274448248, "learning_rate": 1.262762644536667e-06, "loss": 0.8609, "step": 9533 }, { "epoch": 1.3497557867912509, "grad_norm": 9.97834760666678, "learning_rate": 1.262264593609551e-06, "loss": 0.8963, "step": 9534 }, { "epoch": 1.349897359665888, "grad_norm": 9.663860001288912, "learning_rate": 1.2617666077492636e-06, "loss": 0.8603, "step": 9535 }, { "epoch": 1.3500389325405253, "grad_norm": 7.752447172622761, "learning_rate": 1.2612686869819818e-06, "loss": 0.9752, "step": 9536 }, { "epoch": 1.3501805054151625, "grad_norm": 7.515237207380957, "learning_rate": 1.2607708313338818e-06, "loss": 0.9382, "step": 9537 }, { "epoch": 1.3503220782897998, "grad_norm": 7.600907331815439, "learning_rate": 1.2602730408311342e-06, "loss": 1.0571, "step": 9538 }, { "epoch": 1.350463651164437, "grad_norm": 8.152781249490923, "learning_rate": 1.2597753154999088e-06, "loss": 0.9751, "step": 9539 }, { "epoch": 1.3506052240390742, "grad_norm": 8.64669768520763, "learning_rate": 1.259277655366371e-06, "loss": 1.0024, "step": 9540 }, { "epoch": 1.3507467969137115, "grad_norm": 9.515286243841858, "learning_rate": 1.2587800604566808e-06, "loss": 0.9883, "step": 9541 }, { "epoch": 1.3508883697883485, "grad_norm": 10.112799935437252, "learning_rate": 1.2582825307969981e-06, "loss": 1.0535, "step": 9542 }, { "epoch": 1.3510299426629857, "grad_norm": 9.132760288425391, "learning_rate": 1.257785066413476e-06, "loss": 0.9531, "step": 9543 }, { "epoch": 1.351171515537623, "grad_norm": 9.810043124571727, "learning_rate": 1.2572876673322676e-06, "loss": 1.0128, "step": 9544 }, { "epoch": 1.3513130884122602, "grad_norm": 8.559986051744167, "learning_rate": 1.2567903335795191e-06, "loss": 0.9778, "step": 9545 }, { "epoch": 1.3514546612868974, "grad_norm": 8.425263298289723, "learning_rate": 1.2562930651813772e-06, "loss": 1.0095, "step": 9546 }, { "epoch": 1.3515962341615346, "grad_norm": 7.041278384486854, "learning_rate": 1.255795862163981e-06, "loss": 0.9359, "step": 9547 }, { "epoch": 1.3517378070361719, "grad_norm": 8.500342418281004, "learning_rate": 1.2552987245534675e-06, "loss": 0.9564, "step": 9548 }, { "epoch": 1.351879379910809, "grad_norm": 10.954018984892564, "learning_rate": 1.2548016523759733e-06, "loss": 1.1077, "step": 9549 }, { "epoch": 1.3520209527854463, "grad_norm": 9.210803729016956, "learning_rate": 1.2543046456576267e-06, "loss": 0.9975, "step": 9550 }, { "epoch": 1.3521625256600835, "grad_norm": 11.844928129275218, "learning_rate": 1.253807704424557e-06, "loss": 1.0775, "step": 9551 }, { "epoch": 1.3523040985347208, "grad_norm": 8.5650198910956, "learning_rate": 1.2533108287028862e-06, "loss": 0.9736, "step": 9552 }, { "epoch": 1.352445671409358, "grad_norm": 10.914638495895947, "learning_rate": 1.2528140185187362e-06, "loss": 1.0348, "step": 9553 }, { "epoch": 1.3525872442839952, "grad_norm": 8.559936585235999, "learning_rate": 1.2523172738982225e-06, "loss": 0.9718, "step": 9554 }, { "epoch": 1.3527288171586325, "grad_norm": 11.018461514232602, "learning_rate": 1.2518205948674593e-06, "loss": 1.0722, "step": 9555 }, { "epoch": 1.3528703900332697, "grad_norm": 12.489548242735491, "learning_rate": 1.2513239814525583e-06, "loss": 1.0353, "step": 9556 }, { "epoch": 1.3530119629079067, "grad_norm": 7.4509307382344385, "learning_rate": 1.250827433679624e-06, "loss": 1.0113, "step": 9557 }, { "epoch": 1.353153535782544, "grad_norm": 8.811009923613947, "learning_rate": 1.2503309515747602e-06, "loss": 0.9508, "step": 9558 }, { "epoch": 1.3532951086571812, "grad_norm": 9.593952785672386, "learning_rate": 1.2498345351640655e-06, "loss": 0.9177, "step": 9559 }, { "epoch": 1.3534366815318184, "grad_norm": 10.88590088760529, "learning_rate": 1.2493381844736382e-06, "loss": 1.0201, "step": 9560 }, { "epoch": 1.3535782544064556, "grad_norm": 8.765791675526545, "learning_rate": 1.2488418995295689e-06, "loss": 0.9386, "step": 9561 }, { "epoch": 1.3537198272810929, "grad_norm": 9.325786354924464, "learning_rate": 1.2483456803579484e-06, "loss": 1.04, "step": 9562 }, { "epoch": 1.35386140015573, "grad_norm": 8.01180969216425, "learning_rate": 1.2478495269848626e-06, "loss": 0.9701, "step": 9563 }, { "epoch": 1.3540029730303673, "grad_norm": 9.28723198361495, "learning_rate": 1.247353439436393e-06, "loss": 1.0943, "step": 9564 }, { "epoch": 1.3541445459050045, "grad_norm": 7.534624918491532, "learning_rate": 1.2468574177386198e-06, "loss": 0.9001, "step": 9565 }, { "epoch": 1.3542861187796418, "grad_norm": 8.662442082463308, "learning_rate": 1.2463614619176167e-06, "loss": 0.9839, "step": 9566 }, { "epoch": 1.354427691654279, "grad_norm": 9.541936967733093, "learning_rate": 1.2458655719994582e-06, "loss": 1.0245, "step": 9567 }, { "epoch": 1.3545692645289162, "grad_norm": 8.377026668553949, "learning_rate": 1.2453697480102111e-06, "loss": 1.0731, "step": 9568 }, { "epoch": 1.3547108374035535, "grad_norm": 9.867591879827415, "learning_rate": 1.2448739899759398e-06, "loss": 1.065, "step": 9569 }, { "epoch": 1.3548524102781907, "grad_norm": 7.634324173010554, "learning_rate": 1.2443782979227084e-06, "loss": 1.0125, "step": 9570 }, { "epoch": 1.354993983152828, "grad_norm": 8.159361030394255, "learning_rate": 1.2438826718765724e-06, "loss": 0.9996, "step": 9571 }, { "epoch": 1.3551355560274652, "grad_norm": 8.10986844070163, "learning_rate": 1.2433871118635888e-06, "loss": 0.8906, "step": 9572 }, { "epoch": 1.3552771289021024, "grad_norm": 10.653616750461913, "learning_rate": 1.2428916179098065e-06, "loss": 1.0464, "step": 9573 }, { "epoch": 1.3554187017767396, "grad_norm": 9.540574012617604, "learning_rate": 1.2423961900412756e-06, "loss": 1.0449, "step": 9574 }, { "epoch": 1.3555602746513769, "grad_norm": 8.23614077663672, "learning_rate": 1.2419008282840387e-06, "loss": 0.9628, "step": 9575 }, { "epoch": 1.355701847526014, "grad_norm": 9.248226098778138, "learning_rate": 1.2414055326641378e-06, "loss": 1.0261, "step": 9576 }, { "epoch": 1.3558434204006513, "grad_norm": 7.94722458478897, "learning_rate": 1.2409103032076087e-06, "loss": 1.025, "step": 9577 }, { "epoch": 1.3559849932752885, "grad_norm": 10.081365303945768, "learning_rate": 1.2404151399404859e-06, "loss": 1.0034, "step": 9578 }, { "epoch": 1.3561265661499258, "grad_norm": 8.81529894810344, "learning_rate": 1.2399200428888023e-06, "loss": 0.949, "step": 9579 }, { "epoch": 1.356268139024563, "grad_norm": 8.313388404123344, "learning_rate": 1.2394250120785806e-06, "loss": 0.9819, "step": 9580 }, { "epoch": 1.3564097118992002, "grad_norm": 8.72720900064379, "learning_rate": 1.2389300475358468e-06, "loss": 1.0425, "step": 9581 }, { "epoch": 1.3565512847738375, "grad_norm": 8.885057365305281, "learning_rate": 1.2384351492866192e-06, "loss": 0.9193, "step": 9582 }, { "epoch": 1.3566928576484745, "grad_norm": 9.695965224678954, "learning_rate": 1.237940317356916e-06, "loss": 1.0027, "step": 9583 }, { "epoch": 1.3568344305231117, "grad_norm": 8.953456479738444, "learning_rate": 1.2374455517727485e-06, "loss": 0.9521, "step": 9584 }, { "epoch": 1.356976003397749, "grad_norm": 8.54277227426102, "learning_rate": 1.236950852560127e-06, "loss": 0.8983, "step": 9585 }, { "epoch": 1.3571175762723862, "grad_norm": 12.629437865782627, "learning_rate": 1.2364562197450583e-06, "loss": 1.0679, "step": 9586 }, { "epoch": 1.3572591491470234, "grad_norm": 10.743101878697521, "learning_rate": 1.235961653353543e-06, "loss": 1.1284, "step": 9587 }, { "epoch": 1.3574007220216606, "grad_norm": 9.918924112119832, "learning_rate": 1.235467153411582e-06, "loss": 0.9153, "step": 9588 }, { "epoch": 1.3575422948962979, "grad_norm": 9.8967316443971, "learning_rate": 1.2349727199451696e-06, "loss": 0.9983, "step": 9589 }, { "epoch": 1.357683867770935, "grad_norm": 7.551804474691768, "learning_rate": 1.2344783529802975e-06, "loss": 1.0082, "step": 9590 }, { "epoch": 1.3578254406455723, "grad_norm": 9.540479650055598, "learning_rate": 1.2339840525429559e-06, "loss": 1.1455, "step": 9591 }, { "epoch": 1.3579670135202095, "grad_norm": 8.77949131632565, "learning_rate": 1.2334898186591274e-06, "loss": 0.915, "step": 9592 }, { "epoch": 1.3581085863948468, "grad_norm": 10.535642528520485, "learning_rate": 1.2329956513547957e-06, "loss": 0.904, "step": 9593 }, { "epoch": 1.358250159269484, "grad_norm": 10.098988216068046, "learning_rate": 1.232501550655937e-06, "loss": 0.8354, "step": 9594 }, { "epoch": 1.3583917321441212, "grad_norm": 7.761559510193389, "learning_rate": 1.2320075165885278e-06, "loss": 0.9405, "step": 9595 }, { "epoch": 1.3585333050187585, "grad_norm": 11.872388050533933, "learning_rate": 1.2315135491785369e-06, "loss": 1.0809, "step": 9596 }, { "epoch": 1.3586748778933957, "grad_norm": 9.522017813582766, "learning_rate": 1.2310196484519339e-06, "loss": 1.0603, "step": 9597 }, { "epoch": 1.358816450768033, "grad_norm": 9.913695031729032, "learning_rate": 1.2305258144346807e-06, "loss": 0.943, "step": 9598 }, { "epoch": 1.35895802364267, "grad_norm": 8.870266793936104, "learning_rate": 1.23003204715274e-06, "loss": 1.0191, "step": 9599 }, { "epoch": 1.3590995965173072, "grad_norm": 8.412297339074273, "learning_rate": 1.2295383466320677e-06, "loss": 1.0343, "step": 9600 }, { "epoch": 1.3592411693919444, "grad_norm": 9.55969476380134, "learning_rate": 1.229044712898616e-06, "loss": 1.0051, "step": 9601 }, { "epoch": 1.3593827422665816, "grad_norm": 9.076386014326115, "learning_rate": 1.2285511459783373e-06, "loss": 1.0018, "step": 9602 }, { "epoch": 1.3595243151412189, "grad_norm": 10.530280637415812, "learning_rate": 1.2280576458971757e-06, "loss": 0.9578, "step": 9603 }, { "epoch": 1.359665888015856, "grad_norm": 10.626367099548887, "learning_rate": 1.2275642126810764e-06, "loss": 1.0179, "step": 9604 }, { "epoch": 1.3598074608904933, "grad_norm": 7.715285044736404, "learning_rate": 1.2270708463559766e-06, "loss": 1.0612, "step": 9605 }, { "epoch": 1.3599490337651305, "grad_norm": 11.790027374158147, "learning_rate": 1.226577546947814e-06, "loss": 1.0514, "step": 9606 }, { "epoch": 1.3600906066397678, "grad_norm": 10.371101106400404, "learning_rate": 1.2260843144825196e-06, "loss": 1.0323, "step": 9607 }, { "epoch": 1.360232179514405, "grad_norm": 8.446507146222716, "learning_rate": 1.2255911489860228e-06, "loss": 0.9512, "step": 9608 }, { "epoch": 1.3603737523890422, "grad_norm": 8.539988531243443, "learning_rate": 1.2250980504842503e-06, "loss": 0.9883, "step": 9609 }, { "epoch": 1.3605153252636795, "grad_norm": 8.174701967552137, "learning_rate": 1.2246050190031222e-06, "loss": 1.0503, "step": 9610 }, { "epoch": 1.3606568981383167, "grad_norm": 8.06895767760184, "learning_rate": 1.2241120545685575e-06, "loss": 0.9838, "step": 9611 }, { "epoch": 1.360798471012954, "grad_norm": 8.693459641566802, "learning_rate": 1.2236191572064697e-06, "loss": 0.9402, "step": 9612 }, { "epoch": 1.3609400438875912, "grad_norm": 9.575615558807597, "learning_rate": 1.2231263269427716e-06, "loss": 1.0395, "step": 9613 }, { "epoch": 1.3610816167622284, "grad_norm": 8.248159492260417, "learning_rate": 1.2226335638033708e-06, "loss": 0.9826, "step": 9614 }, { "epoch": 1.3612231896368656, "grad_norm": 10.0052899673894, "learning_rate": 1.2221408678141702e-06, "loss": 1.0039, "step": 9615 }, { "epoch": 1.3613647625115028, "grad_norm": 9.05581228858321, "learning_rate": 1.2216482390010726e-06, "loss": 0.9621, "step": 9616 }, { "epoch": 1.36150633538614, "grad_norm": 9.287766349331019, "learning_rate": 1.2211556773899728e-06, "loss": 0.9464, "step": 9617 }, { "epoch": 1.3616479082607773, "grad_norm": 8.360268461306562, "learning_rate": 1.2206631830067663e-06, "loss": 0.9416, "step": 9618 }, { "epoch": 1.3617894811354145, "grad_norm": 8.453319949230746, "learning_rate": 1.2201707558773416e-06, "loss": 1.1088, "step": 9619 }, { "epoch": 1.3619310540100518, "grad_norm": 8.564244447643025, "learning_rate": 1.2196783960275867e-06, "loss": 1.0565, "step": 9620 }, { "epoch": 1.362072626884689, "grad_norm": 8.489336393232449, "learning_rate": 1.2191861034833841e-06, "loss": 0.9496, "step": 9621 }, { "epoch": 1.3622141997593262, "grad_norm": 9.506762858042821, "learning_rate": 1.218693878270612e-06, "loss": 0.9779, "step": 9622 }, { "epoch": 1.3623557726339635, "grad_norm": 8.391391030121893, "learning_rate": 1.2182017204151484e-06, "loss": 1.0053, "step": 9623 }, { "epoch": 1.3624973455086007, "grad_norm": 8.313958291080146, "learning_rate": 1.2177096299428634e-06, "loss": 0.9859, "step": 9624 }, { "epoch": 1.3626389183832377, "grad_norm": 8.858195123703776, "learning_rate": 1.2172176068796281e-06, "loss": 1.013, "step": 9625 }, { "epoch": 1.362780491257875, "grad_norm": 9.755287497315157, "learning_rate": 1.216725651251306e-06, "loss": 1.0487, "step": 9626 }, { "epoch": 1.3629220641325122, "grad_norm": 8.21912084402769, "learning_rate": 1.2162337630837604e-06, "loss": 1.0427, "step": 9627 }, { "epoch": 1.3630636370071494, "grad_norm": 8.381917800867997, "learning_rate": 1.2157419424028473e-06, "loss": 0.9835, "step": 9628 }, { "epoch": 1.3632052098817866, "grad_norm": 10.330470847434182, "learning_rate": 1.2152501892344232e-06, "loss": 0.9817, "step": 9629 }, { "epoch": 1.3633467827564238, "grad_norm": 9.689132454442273, "learning_rate": 1.2147585036043397e-06, "loss": 0.9478, "step": 9630 }, { "epoch": 1.363488355631061, "grad_norm": 11.455617427895227, "learning_rate": 1.2142668855384421e-06, "loss": 1.075, "step": 9631 }, { "epoch": 1.3636299285056983, "grad_norm": 9.451136961665727, "learning_rate": 1.2137753350625774e-06, "loss": 0.9589, "step": 9632 }, { "epoch": 1.3637715013803355, "grad_norm": 8.820140799784737, "learning_rate": 1.2132838522025827e-06, "loss": 0.9763, "step": 9633 }, { "epoch": 1.3639130742549728, "grad_norm": 9.944736365136599, "learning_rate": 1.2127924369842975e-06, "loss": 1.0433, "step": 9634 }, { "epoch": 1.36405464712961, "grad_norm": 9.027407135487119, "learning_rate": 1.212301089433553e-06, "loss": 1.0213, "step": 9635 }, { "epoch": 1.3641962200042472, "grad_norm": 10.150102932412766, "learning_rate": 1.21180980957618e-06, "loss": 1.0499, "step": 9636 }, { "epoch": 1.3643377928788845, "grad_norm": 8.349325424630825, "learning_rate": 1.211318597438006e-06, "loss": 1.0617, "step": 9637 }, { "epoch": 1.3644793657535217, "grad_norm": 6.949561828412896, "learning_rate": 1.2108274530448513e-06, "loss": 0.9203, "step": 9638 }, { "epoch": 1.364620938628159, "grad_norm": 8.238000176718929, "learning_rate": 1.210336376422537e-06, "loss": 1.0096, "step": 9639 }, { "epoch": 1.364762511502796, "grad_norm": 11.087781391207134, "learning_rate": 1.2098453675968772e-06, "loss": 0.9337, "step": 9640 }, { "epoch": 1.3649040843774332, "grad_norm": 9.289278918184351, "learning_rate": 1.2093544265936848e-06, "loss": 0.9778, "step": 9641 }, { "epoch": 1.3650456572520704, "grad_norm": 9.652871340169373, "learning_rate": 1.2088635534387684e-06, "loss": 1.0751, "step": 9642 }, { "epoch": 1.3651872301267076, "grad_norm": 9.664252363358264, "learning_rate": 1.208372748157931e-06, "loss": 1.0104, "step": 9643 }, { "epoch": 1.3653288030013448, "grad_norm": 9.748744785988796, "learning_rate": 1.2078820107769762e-06, "loss": 1.0245, "step": 9644 }, { "epoch": 1.365470375875982, "grad_norm": 9.191264724128226, "learning_rate": 1.2073913413216998e-06, "loss": 1.0178, "step": 9645 }, { "epoch": 1.3656119487506193, "grad_norm": 7.81065603238614, "learning_rate": 1.2069007398178978e-06, "loss": 0.9117, "step": 9646 }, { "epoch": 1.3657535216252565, "grad_norm": 9.530141697217465, "learning_rate": 1.2064102062913585e-06, "loss": 0.9524, "step": 9647 }, { "epoch": 1.3658950944998938, "grad_norm": 8.12837548198127, "learning_rate": 1.2059197407678714e-06, "loss": 1.0036, "step": 9648 }, { "epoch": 1.366036667374531, "grad_norm": 8.38041398147947, "learning_rate": 1.2054293432732172e-06, "loss": 0.9733, "step": 9649 }, { "epoch": 1.3661782402491682, "grad_norm": 8.546024259247597, "learning_rate": 1.2049390138331785e-06, "loss": 0.9741, "step": 9650 }, { "epoch": 1.3663198131238055, "grad_norm": 9.159183800327604, "learning_rate": 1.204448752473529e-06, "loss": 0.946, "step": 9651 }, { "epoch": 1.3664613859984427, "grad_norm": 8.566475272607248, "learning_rate": 1.2039585592200428e-06, "loss": 0.8692, "step": 9652 }, { "epoch": 1.36660295887308, "grad_norm": 9.055903276607665, "learning_rate": 1.2034684340984907e-06, "loss": 1.0206, "step": 9653 }, { "epoch": 1.3667445317477172, "grad_norm": 10.227058602605776, "learning_rate": 1.2029783771346344e-06, "loss": 0.9963, "step": 9654 }, { "epoch": 1.3668861046223544, "grad_norm": 10.182769987899405, "learning_rate": 1.2024883883542384e-06, "loss": 1.1043, "step": 9655 }, { "epoch": 1.3670276774969916, "grad_norm": 10.413559188184427, "learning_rate": 1.2019984677830597e-06, "loss": 0.9897, "step": 9656 }, { "epoch": 1.3671692503716288, "grad_norm": 8.485006235150736, "learning_rate": 1.2015086154468544e-06, "loss": 1.0187, "step": 9657 }, { "epoch": 1.367310823246266, "grad_norm": 9.477925403076828, "learning_rate": 1.201018831371372e-06, "loss": 0.9543, "step": 9658 }, { "epoch": 1.3674523961209033, "grad_norm": 9.6507789890358, "learning_rate": 1.2005291155823612e-06, "loss": 0.99, "step": 9659 }, { "epoch": 1.3675939689955405, "grad_norm": 8.150358096253683, "learning_rate": 1.200039468105567e-06, "loss": 0.9918, "step": 9660 }, { "epoch": 1.3677355418701778, "grad_norm": 11.500409326317545, "learning_rate": 1.1995498889667276e-06, "loss": 1.0365, "step": 9661 }, { "epoch": 1.367877114744815, "grad_norm": 9.691115363051896, "learning_rate": 1.1990603781915816e-06, "loss": 0.9404, "step": 9662 }, { "epoch": 1.3680186876194522, "grad_norm": 9.51303451179268, "learning_rate": 1.1985709358058616e-06, "loss": 0.931, "step": 9663 }, { "epoch": 1.3681602604940895, "grad_norm": 9.224606687023217, "learning_rate": 1.1980815618352964e-06, "loss": 1.1028, "step": 9664 }, { "epoch": 1.3683018333687267, "grad_norm": 8.762761428568275, "learning_rate": 1.1975922563056136e-06, "loss": 1.0513, "step": 9665 }, { "epoch": 1.3684434062433637, "grad_norm": 8.052176556822763, "learning_rate": 1.1971030192425337e-06, "loss": 1.0296, "step": 9666 }, { "epoch": 1.368584979118001, "grad_norm": 7.58138420571134, "learning_rate": 1.1966138506717776e-06, "loss": 0.8276, "step": 9667 }, { "epoch": 1.3687265519926382, "grad_norm": 10.58930376309093, "learning_rate": 1.1961247506190588e-06, "loss": 0.9945, "step": 9668 }, { "epoch": 1.3688681248672754, "grad_norm": 9.26694071339333, "learning_rate": 1.1956357191100903e-06, "loss": 0.9149, "step": 9669 }, { "epoch": 1.3690096977419126, "grad_norm": 9.445864576891863, "learning_rate": 1.1951467561705784e-06, "loss": 1.0755, "step": 9670 }, { "epoch": 1.3691512706165498, "grad_norm": 11.453263556460666, "learning_rate": 1.19465786182623e-06, "loss": 0.999, "step": 9671 }, { "epoch": 1.369292843491187, "grad_norm": 9.69348628990812, "learning_rate": 1.1941690361027432e-06, "loss": 1.0317, "step": 9672 }, { "epoch": 1.3694344163658243, "grad_norm": 8.132649268895062, "learning_rate": 1.1936802790258176e-06, "loss": 0.987, "step": 9673 }, { "epoch": 1.3695759892404615, "grad_norm": 9.723799494607452, "learning_rate": 1.1931915906211456e-06, "loss": 0.9963, "step": 9674 }, { "epoch": 1.3697175621150988, "grad_norm": 8.64590530059154, "learning_rate": 1.1927029709144163e-06, "loss": 0.974, "step": 9675 }, { "epoch": 1.369859134989736, "grad_norm": 8.86614763657668, "learning_rate": 1.1922144199313181e-06, "loss": 1.0156, "step": 9676 }, { "epoch": 1.3700007078643732, "grad_norm": 8.127739547160967, "learning_rate": 1.1917259376975318e-06, "loss": 0.9515, "step": 9677 }, { "epoch": 1.3701422807390105, "grad_norm": 9.187866618827849, "learning_rate": 1.1912375242387384e-06, "loss": 1.0706, "step": 9678 }, { "epoch": 1.3702838536136477, "grad_norm": 13.043665983535421, "learning_rate": 1.1907491795806117e-06, "loss": 1.0553, "step": 9679 }, { "epoch": 1.370425426488285, "grad_norm": 9.772706416875993, "learning_rate": 1.190260903748825e-06, "loss": 0.9948, "step": 9680 }, { "epoch": 1.3705669993629221, "grad_norm": 9.954204219355006, "learning_rate": 1.1897726967690454e-06, "loss": 1.0427, "step": 9681 }, { "epoch": 1.3707085722375592, "grad_norm": 9.8477128060157, "learning_rate": 1.189284558666938e-06, "loss": 0.9792, "step": 9682 }, { "epoch": 1.3708501451121964, "grad_norm": 9.711006636922031, "learning_rate": 1.188796489468165e-06, "loss": 0.9786, "step": 9683 }, { "epoch": 1.3709917179868336, "grad_norm": 8.96455481900955, "learning_rate": 1.1883084891983828e-06, "loss": 1.0524, "step": 9684 }, { "epoch": 1.3711332908614708, "grad_norm": 8.458049700511822, "learning_rate": 1.1878205578832455e-06, "loss": 0.9422, "step": 9685 }, { "epoch": 1.371274863736108, "grad_norm": 8.684707741747578, "learning_rate": 1.187332695548402e-06, "loss": 0.9003, "step": 9686 }, { "epoch": 1.3714164366107453, "grad_norm": 8.684567622093505, "learning_rate": 1.1868449022194997e-06, "loss": 0.9642, "step": 9687 }, { "epoch": 1.3715580094853825, "grad_norm": 10.497125868029677, "learning_rate": 1.186357177922183e-06, "loss": 1.058, "step": 9688 }, { "epoch": 1.3716995823600198, "grad_norm": 8.273840011012542, "learning_rate": 1.185869522682089e-06, "loss": 1.0504, "step": 9689 }, { "epoch": 1.371841155234657, "grad_norm": 7.59743532776491, "learning_rate": 1.1853819365248553e-06, "loss": 0.9593, "step": 9690 }, { "epoch": 1.3719827281092942, "grad_norm": 9.697407436966602, "learning_rate": 1.184894419476112e-06, "loss": 1.1385, "step": 9691 }, { "epoch": 1.3721243009839315, "grad_norm": 7.157203577467698, "learning_rate": 1.1844069715614893e-06, "loss": 0.9424, "step": 9692 }, { "epoch": 1.3722658738585687, "grad_norm": 8.31811242185962, "learning_rate": 1.1839195928066101e-06, "loss": 0.9322, "step": 9693 }, { "epoch": 1.372407446733206, "grad_norm": 7.105316814792583, "learning_rate": 1.183432283237098e-06, "loss": 0.9271, "step": 9694 }, { "epoch": 1.3725490196078431, "grad_norm": 8.621172954297469, "learning_rate": 1.1829450428785689e-06, "loss": 1.0171, "step": 9695 }, { "epoch": 1.3726905924824804, "grad_norm": 9.458625239994662, "learning_rate": 1.1824578717566358e-06, "loss": 1.0915, "step": 9696 }, { "epoch": 1.3728321653571176, "grad_norm": 9.587562578735634, "learning_rate": 1.181970769896911e-06, "loss": 0.9648, "step": 9697 }, { "epoch": 1.3729737382317548, "grad_norm": 9.211468837383636, "learning_rate": 1.1814837373249991e-06, "loss": 0.968, "step": 9698 }, { "epoch": 1.373115311106392, "grad_norm": 8.697316284169215, "learning_rate": 1.180996774066505e-06, "loss": 0.9122, "step": 9699 }, { "epoch": 1.3732568839810293, "grad_norm": 8.937654934053722, "learning_rate": 1.1805098801470259e-06, "loss": 0.8791, "step": 9700 }, { "epoch": 1.3733984568556665, "grad_norm": 10.495242176215008, "learning_rate": 1.1800230555921597e-06, "loss": 1.0434, "step": 9701 }, { "epoch": 1.3735400297303038, "grad_norm": 7.978813488931929, "learning_rate": 1.179536300427496e-06, "loss": 1.0724, "step": 9702 }, { "epoch": 1.373681602604941, "grad_norm": 9.661300967735151, "learning_rate": 1.1790496146786257e-06, "loss": 1.0061, "step": 9703 }, { "epoch": 1.3738231754795782, "grad_norm": 10.29995167544982, "learning_rate": 1.1785629983711311e-06, "loss": 0.9946, "step": 9704 }, { "epoch": 1.3739647483542154, "grad_norm": 9.034517007488212, "learning_rate": 1.1780764515305942e-06, "loss": 0.9804, "step": 9705 }, { "epoch": 1.3741063212288527, "grad_norm": 8.330870556900992, "learning_rate": 1.1775899741825947e-06, "loss": 0.9625, "step": 9706 }, { "epoch": 1.37424789410349, "grad_norm": 8.359269129002472, "learning_rate": 1.1771035663527021e-06, "loss": 0.9642, "step": 9707 }, { "epoch": 1.374389466978127, "grad_norm": 9.823667355301541, "learning_rate": 1.17661722806649e-06, "loss": 0.9844, "step": 9708 }, { "epoch": 1.3745310398527641, "grad_norm": 8.830320696250665, "learning_rate": 1.1761309593495224e-06, "loss": 1.0695, "step": 9709 }, { "epoch": 1.3746726127274014, "grad_norm": 10.062235005366485, "learning_rate": 1.1756447602273629e-06, "loss": 1.0446, "step": 9710 }, { "epoch": 1.3748141856020386, "grad_norm": 10.406862533000123, "learning_rate": 1.1751586307255719e-06, "loss": 1.023, "step": 9711 }, { "epoch": 1.3749557584766758, "grad_norm": 9.293684211503237, "learning_rate": 1.174672570869703e-06, "loss": 1.049, "step": 9712 }, { "epoch": 1.375097331351313, "grad_norm": 7.480618675064165, "learning_rate": 1.1741865806853097e-06, "loss": 0.9429, "step": 9713 }, { "epoch": 1.3752389042259503, "grad_norm": 9.074196889771343, "learning_rate": 1.1737006601979384e-06, "loss": 1.0665, "step": 9714 }, { "epoch": 1.3753804771005875, "grad_norm": 9.071151497453126, "learning_rate": 1.1732148094331353e-06, "loss": 0.9454, "step": 9715 }, { "epoch": 1.3755220499752248, "grad_norm": 9.610676995128257, "learning_rate": 1.1727290284164406e-06, "loss": 1.0145, "step": 9716 }, { "epoch": 1.375663622849862, "grad_norm": 9.566191316108345, "learning_rate": 1.1722433171733903e-06, "loss": 1.0013, "step": 9717 }, { "epoch": 1.3758051957244992, "grad_norm": 9.642164110579717, "learning_rate": 1.1717576757295192e-06, "loss": 0.9149, "step": 9718 }, { "epoch": 1.3759467685991364, "grad_norm": 9.516907353427923, "learning_rate": 1.171272104110356e-06, "loss": 0.9135, "step": 9719 }, { "epoch": 1.3760883414737737, "grad_norm": 9.269676519220942, "learning_rate": 1.1707866023414288e-06, "loss": 1.0935, "step": 9720 }, { "epoch": 1.376229914348411, "grad_norm": 7.976430744274841, "learning_rate": 1.1703011704482577e-06, "loss": 0.8765, "step": 9721 }, { "epoch": 1.3763714872230481, "grad_norm": 9.126756890879413, "learning_rate": 1.1698158084563635e-06, "loss": 1.069, "step": 9722 }, { "epoch": 1.3765130600976851, "grad_norm": 10.105811881133326, "learning_rate": 1.1693305163912597e-06, "loss": 0.9654, "step": 9723 }, { "epoch": 1.3766546329723224, "grad_norm": 10.367817426227843, "learning_rate": 1.1688452942784592e-06, "loss": 1.0585, "step": 9724 }, { "epoch": 1.3767962058469596, "grad_norm": 8.603380612235192, "learning_rate": 1.168360142143468e-06, "loss": 0.9179, "step": 9725 }, { "epoch": 1.3769377787215968, "grad_norm": 10.101173529102402, "learning_rate": 1.1678750600117914e-06, "loss": 1.023, "step": 9726 }, { "epoch": 1.377079351596234, "grad_norm": 8.227128200516532, "learning_rate": 1.1673900479089314e-06, "loss": 1.0164, "step": 9727 }, { "epoch": 1.3772209244708713, "grad_norm": 8.463401992036571, "learning_rate": 1.1669051058603811e-06, "loss": 1.0171, "step": 9728 }, { "epoch": 1.3773624973455085, "grad_norm": 9.647844823016378, "learning_rate": 1.1664202338916364e-06, "loss": 1.0026, "step": 9729 }, { "epoch": 1.3775040702201458, "grad_norm": 9.715581543028614, "learning_rate": 1.1659354320281845e-06, "loss": 0.9628, "step": 9730 }, { "epoch": 1.377645643094783, "grad_norm": 9.444457464271498, "learning_rate": 1.1654507002955135e-06, "loss": 0.9623, "step": 9731 }, { "epoch": 1.3777872159694202, "grad_norm": 7.778744213672799, "learning_rate": 1.1649660387191027e-06, "loss": 0.951, "step": 9732 }, { "epoch": 1.3779287888440575, "grad_norm": 10.012297507535148, "learning_rate": 1.1644814473244322e-06, "loss": 1.0129, "step": 9733 }, { "epoch": 1.3780703617186947, "grad_norm": 9.614506140854013, "learning_rate": 1.163996926136977e-06, "loss": 0.905, "step": 9734 }, { "epoch": 1.378211934593332, "grad_norm": 8.793638565669664, "learning_rate": 1.1635124751822063e-06, "loss": 0.8931, "step": 9735 }, { "epoch": 1.3783535074679691, "grad_norm": 9.122764418061085, "learning_rate": 1.163028094485589e-06, "loss": 1.0382, "step": 9736 }, { "epoch": 1.3784950803426064, "grad_norm": 8.886169711748481, "learning_rate": 1.162543784072588e-06, "loss": 0.9906, "step": 9737 }, { "epoch": 1.3786366532172436, "grad_norm": 9.800161975378384, "learning_rate": 1.1620595439686632e-06, "loss": 0.9523, "step": 9738 }, { "epoch": 1.3787782260918808, "grad_norm": 8.633896813973163, "learning_rate": 1.1615753741992696e-06, "loss": 0.9608, "step": 9739 }, { "epoch": 1.378919798966518, "grad_norm": 9.44038801656586, "learning_rate": 1.1610912747898607e-06, "loss": 0.9882, "step": 9740 }, { "epoch": 1.3790613718411553, "grad_norm": 8.292505985380904, "learning_rate": 1.1606072457658856e-06, "loss": 0.9647, "step": 9741 }, { "epoch": 1.3792029447157925, "grad_norm": 9.385552735917656, "learning_rate": 1.1601232871527884e-06, "loss": 1.0268, "step": 9742 }, { "epoch": 1.3793445175904298, "grad_norm": 10.065547884314384, "learning_rate": 1.1596393989760118e-06, "loss": 1.0146, "step": 9743 }, { "epoch": 1.379486090465067, "grad_norm": 8.76177796179279, "learning_rate": 1.1591555812609914e-06, "loss": 0.8555, "step": 9744 }, { "epoch": 1.3796276633397042, "grad_norm": 9.949131714013689, "learning_rate": 1.1586718340331634e-06, "loss": 1.0407, "step": 9745 }, { "epoch": 1.3797692362143414, "grad_norm": 8.863758543368423, "learning_rate": 1.1581881573179562e-06, "loss": 0.9986, "step": 9746 }, { "epoch": 1.3799108090889787, "grad_norm": 9.490218095594702, "learning_rate": 1.1577045511407977e-06, "loss": 1.0722, "step": 9747 }, { "epoch": 1.380052381963616, "grad_norm": 7.9554710899903265, "learning_rate": 1.1572210155271105e-06, "loss": 1.1103, "step": 9748 }, { "epoch": 1.380193954838253, "grad_norm": 8.396837147668917, "learning_rate": 1.156737550502312e-06, "loss": 0.802, "step": 9749 }, { "epoch": 1.3803355277128901, "grad_norm": 9.293842648196364, "learning_rate": 1.15625415609182e-06, "loss": 1.0517, "step": 9750 }, { "epoch": 1.3804771005875274, "grad_norm": 10.495911665126041, "learning_rate": 1.155770832321044e-06, "loss": 1.0364, "step": 9751 }, { "epoch": 1.3806186734621646, "grad_norm": 10.972023152236904, "learning_rate": 1.1552875792153943e-06, "loss": 1.0104, "step": 9752 }, { "epoch": 1.3807602463368018, "grad_norm": 9.064813568504224, "learning_rate": 1.1548043968002725e-06, "loss": 0.9811, "step": 9753 }, { "epoch": 1.380901819211439, "grad_norm": 9.599470489521764, "learning_rate": 1.1543212851010819e-06, "loss": 1.0554, "step": 9754 }, { "epoch": 1.3810433920860763, "grad_norm": 9.406172945334307, "learning_rate": 1.1538382441432166e-06, "loss": 0.9954, "step": 9755 }, { "epoch": 1.3811849649607135, "grad_norm": 7.934399337100205, "learning_rate": 1.1533552739520715e-06, "loss": 0.9552, "step": 9756 }, { "epoch": 1.3813265378353508, "grad_norm": 9.24541834180268, "learning_rate": 1.1528723745530362e-06, "loss": 1.0918, "step": 9757 }, { "epoch": 1.381468110709988, "grad_norm": 8.691115251448737, "learning_rate": 1.1523895459714948e-06, "loss": 1.0263, "step": 9758 }, { "epoch": 1.3816096835846252, "grad_norm": 8.31832842027288, "learning_rate": 1.151906788232832e-06, "loss": 0.9928, "step": 9759 }, { "epoch": 1.3817512564592624, "grad_norm": 10.29538485637231, "learning_rate": 1.1514241013624225e-06, "loss": 1.0494, "step": 9760 }, { "epoch": 1.3818928293338997, "grad_norm": 9.329824423819394, "learning_rate": 1.1509414853856421e-06, "loss": 0.8418, "step": 9761 }, { "epoch": 1.382034402208537, "grad_norm": 9.049373828448669, "learning_rate": 1.1504589403278631e-06, "loss": 0.9206, "step": 9762 }, { "epoch": 1.3821759750831741, "grad_norm": 8.739762338859455, "learning_rate": 1.1499764662144505e-06, "loss": 0.8851, "step": 9763 }, { "epoch": 1.3823175479578114, "grad_norm": 7.533078806580503, "learning_rate": 1.1494940630707693e-06, "loss": 0.9249, "step": 9764 }, { "epoch": 1.3824591208324484, "grad_norm": 8.539247446247368, "learning_rate": 1.1490117309221772e-06, "loss": 0.9477, "step": 9765 }, { "epoch": 1.3826006937070856, "grad_norm": 9.21041027368857, "learning_rate": 1.148529469794032e-06, "loss": 0.9871, "step": 9766 }, { "epoch": 1.3827422665817228, "grad_norm": 9.611124316765663, "learning_rate": 1.148047279711684e-06, "loss": 0.9922, "step": 9767 }, { "epoch": 1.38288383945636, "grad_norm": 8.483281017609311, "learning_rate": 1.1475651607004834e-06, "loss": 0.9808, "step": 9768 }, { "epoch": 1.3830254123309973, "grad_norm": 11.95913541188512, "learning_rate": 1.1470831127857738e-06, "loss": 1.0263, "step": 9769 }, { "epoch": 1.3831669852056345, "grad_norm": 8.14955723969867, "learning_rate": 1.1466011359928951e-06, "loss": 0.8907, "step": 9770 }, { "epoch": 1.3833085580802718, "grad_norm": 8.61914748505562, "learning_rate": 1.146119230347187e-06, "loss": 0.9406, "step": 9771 }, { "epoch": 1.383450130954909, "grad_norm": 10.442629712856059, "learning_rate": 1.14563739587398e-06, "loss": 0.9875, "step": 9772 }, { "epoch": 1.3835917038295462, "grad_norm": 9.86896687922214, "learning_rate": 1.1451556325986065e-06, "loss": 1.1369, "step": 9773 }, { "epoch": 1.3837332767041834, "grad_norm": 6.605504266518207, "learning_rate": 1.14467394054639e-06, "loss": 0.9416, "step": 9774 }, { "epoch": 1.3838748495788207, "grad_norm": 8.930052742284364, "learning_rate": 1.144192319742655e-06, "loss": 0.9492, "step": 9775 }, { "epoch": 1.384016422453458, "grad_norm": 8.041633988019203, "learning_rate": 1.1437107702127178e-06, "loss": 0.9405, "step": 9776 }, { "epoch": 1.3841579953280951, "grad_norm": 10.02109515085357, "learning_rate": 1.1432292919818952e-06, "loss": 0.9252, "step": 9777 }, { "epoch": 1.3842995682027324, "grad_norm": 8.335754449724018, "learning_rate": 1.1427478850754959e-06, "loss": 1.0027, "step": 9778 }, { "epoch": 1.3844411410773696, "grad_norm": 9.429193137672971, "learning_rate": 1.1422665495188284e-06, "loss": 0.9538, "step": 9779 }, { "epoch": 1.3845827139520068, "grad_norm": 9.328256270668678, "learning_rate": 1.1417852853371978e-06, "loss": 1.0289, "step": 9780 }, { "epoch": 1.384724286826644, "grad_norm": 8.897042423839597, "learning_rate": 1.1413040925559e-06, "loss": 0.9004, "step": 9781 }, { "epoch": 1.3848658597012813, "grad_norm": 8.864002990253923, "learning_rate": 1.1408229712002345e-06, "loss": 0.9028, "step": 9782 }, { "epoch": 1.3850074325759185, "grad_norm": 11.159903988442284, "learning_rate": 1.1403419212954904e-06, "loss": 1.0192, "step": 9783 }, { "epoch": 1.3851490054505557, "grad_norm": 8.917373296480813, "learning_rate": 1.1398609428669582e-06, "loss": 0.9797, "step": 9784 }, { "epoch": 1.385290578325193, "grad_norm": 8.544977901602039, "learning_rate": 1.1393800359399225e-06, "loss": 0.9699, "step": 9785 }, { "epoch": 1.3854321511998302, "grad_norm": 10.59286630371215, "learning_rate": 1.1388992005396632e-06, "loss": 0.9813, "step": 9786 }, { "epoch": 1.3855737240744674, "grad_norm": 10.649876811429925, "learning_rate": 1.1384184366914588e-06, "loss": 1.0246, "step": 9787 }, { "epoch": 1.3857152969491047, "grad_norm": 10.233421502577428, "learning_rate": 1.1379377444205814e-06, "loss": 1.0216, "step": 9788 }, { "epoch": 1.385856869823742, "grad_norm": 9.35213601911976, "learning_rate": 1.1374571237523015e-06, "loss": 0.9766, "step": 9789 }, { "epoch": 1.385998442698379, "grad_norm": 9.97610212083811, "learning_rate": 1.1369765747118853e-06, "loss": 1.0166, "step": 9790 }, { "epoch": 1.3861400155730161, "grad_norm": 9.672279263184016, "learning_rate": 1.1364960973245927e-06, "loss": 1.0205, "step": 9791 }, { "epoch": 1.3862815884476534, "grad_norm": 9.677590303859503, "learning_rate": 1.136015691615685e-06, "loss": 0.9582, "step": 9792 }, { "epoch": 1.3864231613222906, "grad_norm": 8.60175319810181, "learning_rate": 1.135535357610414e-06, "loss": 0.9753, "step": 9793 }, { "epoch": 1.3865647341969278, "grad_norm": 8.119797669694568, "learning_rate": 1.1350550953340334e-06, "loss": 0.9292, "step": 9794 }, { "epoch": 1.386706307071565, "grad_norm": 9.624366640332187, "learning_rate": 1.1345749048117872e-06, "loss": 1.1083, "step": 9795 }, { "epoch": 1.3868478799462023, "grad_norm": 9.54689657705213, "learning_rate": 1.1340947860689214e-06, "loss": 0.9207, "step": 9796 }, { "epoch": 1.3869894528208395, "grad_norm": 9.718768447907532, "learning_rate": 1.133614739130673e-06, "loss": 1.0425, "step": 9797 }, { "epoch": 1.3871310256954767, "grad_norm": 6.998280995333671, "learning_rate": 1.13313476402228e-06, "loss": 0.8899, "step": 9798 }, { "epoch": 1.387272598570114, "grad_norm": 8.60782051245316, "learning_rate": 1.1326548607689724e-06, "loss": 0.9122, "step": 9799 }, { "epoch": 1.3874141714447512, "grad_norm": 9.762571006839039, "learning_rate": 1.1321750293959802e-06, "loss": 1.0628, "step": 9800 }, { "epoch": 1.3875557443193884, "grad_norm": 8.006145024560627, "learning_rate": 1.1316952699285268e-06, "loss": 0.851, "step": 9801 }, { "epoch": 1.3876973171940257, "grad_norm": 9.210354360251632, "learning_rate": 1.131215582391832e-06, "loss": 1.0349, "step": 9802 }, { "epoch": 1.387838890068663, "grad_norm": 9.645688895564668, "learning_rate": 1.1307359668111141e-06, "loss": 1.0336, "step": 9803 }, { "epoch": 1.3879804629433001, "grad_norm": 8.881161122746484, "learning_rate": 1.1302564232115848e-06, "loss": 0.9945, "step": 9804 }, { "epoch": 1.3881220358179374, "grad_norm": 9.031114834628669, "learning_rate": 1.1297769516184544e-06, "loss": 1.0237, "step": 9805 }, { "epoch": 1.3882636086925744, "grad_norm": 8.372949605438626, "learning_rate": 1.1292975520569278e-06, "loss": 1.0458, "step": 9806 }, { "epoch": 1.3884051815672116, "grad_norm": 8.000864935847854, "learning_rate": 1.1288182245522063e-06, "loss": 0.8501, "step": 9807 }, { "epoch": 1.3885467544418488, "grad_norm": 10.081596119775163, "learning_rate": 1.1283389691294894e-06, "loss": 0.9817, "step": 9808 }, { "epoch": 1.388688327316486, "grad_norm": 8.603436479843117, "learning_rate": 1.1278597858139692e-06, "loss": 1.0192, "step": 9809 }, { "epoch": 1.3888299001911233, "grad_norm": 9.729457243692268, "learning_rate": 1.127380674630838e-06, "loss": 1.0561, "step": 9810 }, { "epoch": 1.3889714730657605, "grad_norm": 9.09255396787346, "learning_rate": 1.1269016356052803e-06, "loss": 1.0057, "step": 9811 }, { "epoch": 1.3891130459403978, "grad_norm": 9.933985442533169, "learning_rate": 1.1264226687624815e-06, "loss": 0.9874, "step": 9812 }, { "epoch": 1.389254618815035, "grad_norm": 11.263769920296793, "learning_rate": 1.1259437741276172e-06, "loss": 1.0732, "step": 9813 }, { "epoch": 1.3893961916896722, "grad_norm": 8.128756549708793, "learning_rate": 1.125464951725864e-06, "loss": 1.0266, "step": 9814 }, { "epoch": 1.3895377645643094, "grad_norm": 9.225283618898722, "learning_rate": 1.1249862015823943e-06, "loss": 1.0911, "step": 9815 }, { "epoch": 1.3896793374389467, "grad_norm": 12.471955845703892, "learning_rate": 1.1245075237223741e-06, "loss": 0.9459, "step": 9816 }, { "epoch": 1.389820910313584, "grad_norm": 10.253222819486327, "learning_rate": 1.1240289181709681e-06, "loss": 1.0048, "step": 9817 }, { "epoch": 1.3899624831882211, "grad_norm": 10.2462638931022, "learning_rate": 1.1235503849533355e-06, "loss": 1.0934, "step": 9818 }, { "epoch": 1.3901040560628584, "grad_norm": 7.996184869870414, "learning_rate": 1.1230719240946336e-06, "loss": 1.0285, "step": 9819 }, { "epoch": 1.3902456289374956, "grad_norm": 8.849773822200293, "learning_rate": 1.1225935356200129e-06, "loss": 1.0109, "step": 9820 }, { "epoch": 1.3903872018121328, "grad_norm": 9.30454769718126, "learning_rate": 1.1221152195546241e-06, "loss": 1.0644, "step": 9821 }, { "epoch": 1.39052877468677, "grad_norm": 8.22108571328482, "learning_rate": 1.1216369759236108e-06, "loss": 0.9903, "step": 9822 }, { "epoch": 1.3906703475614073, "grad_norm": 10.58991075112519, "learning_rate": 1.121158804752113e-06, "loss": 0.9183, "step": 9823 }, { "epoch": 1.3908119204360445, "grad_norm": 8.383582884131203, "learning_rate": 1.1206807060652696e-06, "loss": 0.9565, "step": 9824 }, { "epoch": 1.3909534933106817, "grad_norm": 7.5930617534688984, "learning_rate": 1.120202679888212e-06, "loss": 0.9245, "step": 9825 }, { "epoch": 1.391095066185319, "grad_norm": 9.230753903498519, "learning_rate": 1.119724726246072e-06, "loss": 1.0103, "step": 9826 }, { "epoch": 1.3912366390599562, "grad_norm": 8.644806165844294, "learning_rate": 1.1192468451639727e-06, "loss": 0.9606, "step": 9827 }, { "epoch": 1.3913782119345934, "grad_norm": 8.826027734148859, "learning_rate": 1.1187690366670381e-06, "loss": 1.0395, "step": 9828 }, { "epoch": 1.3915197848092307, "grad_norm": 9.606955916925303, "learning_rate": 1.1182913007803847e-06, "loss": 0.9304, "step": 9829 }, { "epoch": 1.391661357683868, "grad_norm": 10.236502343310892, "learning_rate": 1.117813637529127e-06, "loss": 1.0783, "step": 9830 }, { "epoch": 1.3918029305585051, "grad_norm": 10.49549114966309, "learning_rate": 1.117336046938377e-06, "loss": 1.1029, "step": 9831 }, { "epoch": 1.3919445034331421, "grad_norm": 8.61517659135292, "learning_rate": 1.116858529033239e-06, "loss": 0.9538, "step": 9832 }, { "epoch": 1.3920860763077794, "grad_norm": 8.714745261517661, "learning_rate": 1.1163810838388187e-06, "loss": 0.9938, "step": 9833 }, { "epoch": 1.3922276491824166, "grad_norm": 8.848905213308655, "learning_rate": 1.1159037113802113e-06, "loss": 0.9565, "step": 9834 }, { "epoch": 1.3923692220570538, "grad_norm": 9.05775316820097, "learning_rate": 1.1154264116825147e-06, "loss": 1.0408, "step": 9835 }, { "epoch": 1.392510794931691, "grad_norm": 11.238404125805491, "learning_rate": 1.1149491847708186e-06, "loss": 1.1864, "step": 9836 }, { "epoch": 1.3926523678063283, "grad_norm": 9.696230000402378, "learning_rate": 1.1144720306702106e-06, "loss": 1.0287, "step": 9837 }, { "epoch": 1.3927939406809655, "grad_norm": 8.520940678922189, "learning_rate": 1.113994949405776e-06, "loss": 1.0714, "step": 9838 }, { "epoch": 1.3929355135556027, "grad_norm": 9.409201124213386, "learning_rate": 1.1135179410025925e-06, "loss": 1.0663, "step": 9839 }, { "epoch": 1.39307708643024, "grad_norm": 10.419871170506905, "learning_rate": 1.1130410054857382e-06, "loss": 1.0578, "step": 9840 }, { "epoch": 1.3932186593048772, "grad_norm": 10.943281465845729, "learning_rate": 1.1125641428802831e-06, "loss": 0.909, "step": 9841 }, { "epoch": 1.3933602321795144, "grad_norm": 7.777053375418068, "learning_rate": 1.1120873532112971e-06, "loss": 0.9003, "step": 9842 }, { "epoch": 1.3935018050541517, "grad_norm": 9.45067722376513, "learning_rate": 1.1116106365038443e-06, "loss": 1.013, "step": 9843 }, { "epoch": 1.393643377928789, "grad_norm": 12.175954818100381, "learning_rate": 1.1111339927829842e-06, "loss": 1.003, "step": 9844 }, { "epoch": 1.3937849508034261, "grad_norm": 10.173304116076723, "learning_rate": 1.1106574220737754e-06, "loss": 1.0761, "step": 9845 }, { "epoch": 1.3939265236780634, "grad_norm": 8.651016581340311, "learning_rate": 1.110180924401269e-06, "loss": 0.9594, "step": 9846 }, { "epoch": 1.3940680965527004, "grad_norm": 8.632203121167716, "learning_rate": 1.1097044997905162e-06, "loss": 0.9537, "step": 9847 }, { "epoch": 1.3942096694273376, "grad_norm": 11.34131102180693, "learning_rate": 1.1092281482665601e-06, "loss": 1.0468, "step": 9848 }, { "epoch": 1.3943512423019748, "grad_norm": 8.093532250026966, "learning_rate": 1.1087518698544444e-06, "loss": 1.061, "step": 9849 }, { "epoch": 1.394492815176612, "grad_norm": 8.832124543403774, "learning_rate": 1.1082756645792046e-06, "loss": 0.9748, "step": 9850 }, { "epoch": 1.3946343880512493, "grad_norm": 8.673499812591423, "learning_rate": 1.1077995324658762e-06, "loss": 0.9349, "step": 9851 }, { "epoch": 1.3947759609258865, "grad_norm": 9.57494706022204, "learning_rate": 1.1073234735394872e-06, "loss": 1.0039, "step": 9852 }, { "epoch": 1.3949175338005237, "grad_norm": 10.50893457937827, "learning_rate": 1.1068474878250649e-06, "loss": 1.0982, "step": 9853 }, { "epoch": 1.395059106675161, "grad_norm": 7.356896633096619, "learning_rate": 1.1063715753476334e-06, "loss": 0.9977, "step": 9854 }, { "epoch": 1.3952006795497982, "grad_norm": 8.159319888176812, "learning_rate": 1.105895736132207e-06, "loss": 0.9707, "step": 9855 }, { "epoch": 1.3953422524244354, "grad_norm": 7.928406079113397, "learning_rate": 1.1054199702038032e-06, "loss": 0.8649, "step": 9856 }, { "epoch": 1.3954838252990727, "grad_norm": 11.467022924285699, "learning_rate": 1.104944277587431e-06, "loss": 0.9684, "step": 9857 }, { "epoch": 1.39562539817371, "grad_norm": 8.042555158744348, "learning_rate": 1.1044686583080976e-06, "loss": 0.9052, "step": 9858 }, { "epoch": 1.3957669710483471, "grad_norm": 10.1371857581488, "learning_rate": 1.1039931123908074e-06, "loss": 0.9746, "step": 9859 }, { "epoch": 1.3959085439229844, "grad_norm": 11.208382153788792, "learning_rate": 1.1035176398605576e-06, "loss": 1.0338, "step": 9860 }, { "epoch": 1.3960501167976216, "grad_norm": 9.953044897023622, "learning_rate": 1.103042240742345e-06, "loss": 0.9811, "step": 9861 }, { "epoch": 1.3961916896722588, "grad_norm": 10.307088645926184, "learning_rate": 1.1025669150611594e-06, "loss": 1.0152, "step": 9862 }, { "epoch": 1.396333262546896, "grad_norm": 10.920247274540507, "learning_rate": 1.1020916628419898e-06, "loss": 0.9956, "step": 9863 }, { "epoch": 1.3964748354215333, "grad_norm": 8.833081175845834, "learning_rate": 1.1016164841098193e-06, "loss": 1.0007, "step": 9864 }, { "epoch": 1.3966164082961705, "grad_norm": 10.232605852576347, "learning_rate": 1.1011413788896263e-06, "loss": 1.0058, "step": 9865 }, { "epoch": 1.3967579811708077, "grad_norm": 9.528089655994835, "learning_rate": 1.1006663472063892e-06, "loss": 0.9644, "step": 9866 }, { "epoch": 1.396899554045445, "grad_norm": 9.422239786807468, "learning_rate": 1.100191389085078e-06, "loss": 1.1542, "step": 9867 }, { "epoch": 1.3970411269200822, "grad_norm": 10.938013729564638, "learning_rate": 1.0997165045506624e-06, "loss": 1.0074, "step": 9868 }, { "epoch": 1.3971826997947194, "grad_norm": 8.168814966925568, "learning_rate": 1.0992416936281054e-06, "loss": 1.0213, "step": 9869 }, { "epoch": 1.3973242726693567, "grad_norm": 9.124174315838161, "learning_rate": 1.098766956342369e-06, "loss": 0.9767, "step": 9870 }, { "epoch": 1.397465845543994, "grad_norm": 8.992548082746504, "learning_rate": 1.0982922927184077e-06, "loss": 1.0447, "step": 9871 }, { "epoch": 1.3976074184186311, "grad_norm": 9.776236812464134, "learning_rate": 1.0978177027811767e-06, "loss": 1.023, "step": 9872 }, { "epoch": 1.3977489912932681, "grad_norm": 8.64250066172912, "learning_rate": 1.0973431865556225e-06, "loss": 0.9801, "step": 9873 }, { "epoch": 1.3978905641679054, "grad_norm": 8.773451413613188, "learning_rate": 1.096868744066692e-06, "loss": 0.9677, "step": 9874 }, { "epoch": 1.3980321370425426, "grad_norm": 9.503667826078388, "learning_rate": 1.0963943753393252e-06, "loss": 0.9831, "step": 9875 }, { "epoch": 1.3981737099171798, "grad_norm": 8.556507547541392, "learning_rate": 1.095920080398459e-06, "loss": 0.9473, "step": 9876 }, { "epoch": 1.398315282791817, "grad_norm": 10.339206216178496, "learning_rate": 1.0954458592690278e-06, "loss": 0.9401, "step": 9877 }, { "epoch": 1.3984568556664543, "grad_norm": 10.41906242794626, "learning_rate": 1.0949717119759597e-06, "loss": 1.1009, "step": 9878 }, { "epoch": 1.3985984285410915, "grad_norm": 8.455068875962255, "learning_rate": 1.0944976385441822e-06, "loss": 0.9754, "step": 9879 }, { "epoch": 1.3987400014157287, "grad_norm": 8.946358085726665, "learning_rate": 1.0940236389986148e-06, "loss": 0.9684, "step": 9880 }, { "epoch": 1.398881574290366, "grad_norm": 9.346867784339826, "learning_rate": 1.0935497133641765e-06, "loss": 0.9694, "step": 9881 }, { "epoch": 1.3990231471650032, "grad_norm": 8.291549553994576, "learning_rate": 1.0930758616657816e-06, "loss": 0.9036, "step": 9882 }, { "epoch": 1.3991647200396404, "grad_norm": 9.770689311860563, "learning_rate": 1.0926020839283392e-06, "loss": 0.9508, "step": 9883 }, { "epoch": 1.3993062929142777, "grad_norm": 12.821933455569893, "learning_rate": 1.0921283801767562e-06, "loss": 1.0885, "step": 9884 }, { "epoch": 1.399447865788915, "grad_norm": 8.107208962288016, "learning_rate": 1.091654750435934e-06, "loss": 0.8427, "step": 9885 }, { "epoch": 1.3995894386635521, "grad_norm": 12.136076282480252, "learning_rate": 1.0911811947307732e-06, "loss": 1.1749, "step": 9886 }, { "epoch": 1.3997310115381894, "grad_norm": 10.145484077557303, "learning_rate": 1.0907077130861646e-06, "loss": 1.0787, "step": 9887 }, { "epoch": 1.3998725844128266, "grad_norm": 9.662377249862082, "learning_rate": 1.0902343055270006e-06, "loss": 0.9086, "step": 9888 }, { "epoch": 1.4000141572874636, "grad_norm": 9.38767088062786, "learning_rate": 1.0897609720781693e-06, "loss": 1.0273, "step": 9889 }, { "epoch": 1.4001557301621008, "grad_norm": 9.877597901697415, "learning_rate": 1.089287712764551e-06, "loss": 1.0592, "step": 9890 }, { "epoch": 1.400297303036738, "grad_norm": 10.235603084117978, "learning_rate": 1.0888145276110268e-06, "loss": 0.9977, "step": 9891 }, { "epoch": 1.4004388759113753, "grad_norm": 8.62502874839525, "learning_rate": 1.0883414166424697e-06, "loss": 0.9734, "step": 9892 }, { "epoch": 1.4005804487860125, "grad_norm": 9.127843126478542, "learning_rate": 1.0878683798837524e-06, "loss": 0.979, "step": 9893 }, { "epoch": 1.4007220216606497, "grad_norm": 9.891314972107672, "learning_rate": 1.087395417359741e-06, "loss": 1.0566, "step": 9894 }, { "epoch": 1.400863594535287, "grad_norm": 10.167665234497091, "learning_rate": 1.0869225290952997e-06, "loss": 1.1065, "step": 9895 }, { "epoch": 1.4010051674099242, "grad_norm": 8.431401061789558, "learning_rate": 1.0864497151152879e-06, "loss": 1.0283, "step": 9896 }, { "epoch": 1.4011467402845614, "grad_norm": 9.248481007167685, "learning_rate": 1.0859769754445592e-06, "loss": 0.9978, "step": 9897 }, { "epoch": 1.4012883131591987, "grad_norm": 10.478317398229875, "learning_rate": 1.0855043101079677e-06, "loss": 0.9315, "step": 9898 }, { "epoch": 1.401429886033836, "grad_norm": 8.179632469072285, "learning_rate": 1.085031719130359e-06, "loss": 0.9549, "step": 9899 }, { "epoch": 1.4015714589084731, "grad_norm": 10.750669236753865, "learning_rate": 1.0845592025365786e-06, "loss": 1.0403, "step": 9900 }, { "epoch": 1.4017130317831104, "grad_norm": 10.022738734899104, "learning_rate": 1.0840867603514648e-06, "loss": 1.0682, "step": 9901 }, { "epoch": 1.4018546046577476, "grad_norm": 10.798832187386196, "learning_rate": 1.083614392599855e-06, "loss": 0.9885, "step": 9902 }, { "epoch": 1.4019961775323848, "grad_norm": 10.530815681534694, "learning_rate": 1.0831420993065798e-06, "loss": 1.0795, "step": 9903 }, { "epoch": 1.402137750407022, "grad_norm": 9.921241752060679, "learning_rate": 1.0826698804964679e-06, "loss": 0.9896, "step": 9904 }, { "epoch": 1.4022793232816593, "grad_norm": 9.350815159151901, "learning_rate": 1.0821977361943441e-06, "loss": 1.0132, "step": 9905 }, { "epoch": 1.4024208961562965, "grad_norm": 8.765175003768018, "learning_rate": 1.0817256664250275e-06, "loss": 0.9847, "step": 9906 }, { "epoch": 1.4025624690309337, "grad_norm": 9.03395541630947, "learning_rate": 1.081253671213337e-06, "loss": 1.0261, "step": 9907 }, { "epoch": 1.402704041905571, "grad_norm": 8.119779817198564, "learning_rate": 1.0807817505840815e-06, "loss": 0.9404, "step": 9908 }, { "epoch": 1.4028456147802082, "grad_norm": 8.669083869516937, "learning_rate": 1.0803099045620716e-06, "loss": 0.9466, "step": 9909 }, { "epoch": 1.4029871876548454, "grad_norm": 8.702052380743696, "learning_rate": 1.079838133172111e-06, "loss": 0.952, "step": 9910 }, { "epoch": 1.4031287605294827, "grad_norm": 10.601527237183433, "learning_rate": 1.0793664364390004e-06, "loss": 0.9277, "step": 9911 }, { "epoch": 1.4032703334041199, "grad_norm": 7.9785318811033905, "learning_rate": 1.0788948143875383e-06, "loss": 1.0133, "step": 9912 }, { "epoch": 1.4034119062787571, "grad_norm": 9.908616418112524, "learning_rate": 1.0784232670425148e-06, "loss": 1.0181, "step": 9913 }, { "epoch": 1.4035534791533943, "grad_norm": 9.385296672995304, "learning_rate": 1.0779517944287216e-06, "loss": 1.127, "step": 9914 }, { "epoch": 1.4036950520280314, "grad_norm": 8.50803578822003, "learning_rate": 1.077480396570941e-06, "loss": 1.0146, "step": 9915 }, { "epoch": 1.4038366249026686, "grad_norm": 10.309075174079595, "learning_rate": 1.0770090734939564e-06, "loss": 0.9438, "step": 9916 }, { "epoch": 1.4039781977773058, "grad_norm": 7.906534182300036, "learning_rate": 1.0765378252225436e-06, "loss": 1.051, "step": 9917 }, { "epoch": 1.404119770651943, "grad_norm": 8.682100443039538, "learning_rate": 1.076066651781475e-06, "loss": 0.9939, "step": 9918 }, { "epoch": 1.4042613435265803, "grad_norm": 10.414361398561304, "learning_rate": 1.075595553195522e-06, "loss": 1.0599, "step": 9919 }, { "epoch": 1.4044029164012175, "grad_norm": 8.955649134943968, "learning_rate": 1.0751245294894474e-06, "loss": 0.9874, "step": 9920 }, { "epoch": 1.4045444892758547, "grad_norm": 7.882142476092889, "learning_rate": 1.074653580688015e-06, "loss": 0.9548, "step": 9921 }, { "epoch": 1.404686062150492, "grad_norm": 9.618622884226454, "learning_rate": 1.0741827068159803e-06, "loss": 0.9802, "step": 9922 }, { "epoch": 1.4048276350251292, "grad_norm": 8.36650132500956, "learning_rate": 1.0737119078980981e-06, "loss": 1.1444, "step": 9923 }, { "epoch": 1.4049692078997664, "grad_norm": 10.99576833998965, "learning_rate": 1.0732411839591167e-06, "loss": 1.0017, "step": 9924 }, { "epoch": 1.4051107807744037, "grad_norm": 11.120771311623564, "learning_rate": 1.0727705350237833e-06, "loss": 1.0354, "step": 9925 }, { "epoch": 1.4052523536490409, "grad_norm": 8.803277133741561, "learning_rate": 1.0722999611168377e-06, "loss": 0.8871, "step": 9926 }, { "epoch": 1.4053939265236781, "grad_norm": 7.775924637448923, "learning_rate": 1.0718294622630188e-06, "loss": 0.966, "step": 9927 }, { "epoch": 1.4055354993983153, "grad_norm": 9.316487322760453, "learning_rate": 1.071359038487062e-06, "loss": 1.008, "step": 9928 }, { "epoch": 1.4056770722729526, "grad_norm": 12.12770412219026, "learning_rate": 1.0708886898136932e-06, "loss": 1.133, "step": 9929 }, { "epoch": 1.4058186451475896, "grad_norm": 8.786018570117703, "learning_rate": 1.0704184162676417e-06, "loss": 0.9342, "step": 9930 }, { "epoch": 1.4059602180222268, "grad_norm": 8.761853282098013, "learning_rate": 1.069948217873627e-06, "loss": 1.0629, "step": 9931 }, { "epoch": 1.406101790896864, "grad_norm": 9.927528993117052, "learning_rate": 1.069478094656369e-06, "loss": 1.0585, "step": 9932 }, { "epoch": 1.4062433637715013, "grad_norm": 9.68641942058087, "learning_rate": 1.0690080466405803e-06, "loss": 1.0358, "step": 9933 }, { "epoch": 1.4063849366461385, "grad_norm": 8.489293255365014, "learning_rate": 1.0685380738509712e-06, "loss": 0.9752, "step": 9934 }, { "epoch": 1.4065265095207757, "grad_norm": 8.387986775356449, "learning_rate": 1.0680681763122493e-06, "loss": 0.9716, "step": 9935 }, { "epoch": 1.406668082395413, "grad_norm": 10.902521496222144, "learning_rate": 1.067598354049115e-06, "loss": 0.9811, "step": 9936 }, { "epoch": 1.4068096552700502, "grad_norm": 9.243556072065656, "learning_rate": 1.0671286070862678e-06, "loss": 0.992, "step": 9937 }, { "epoch": 1.4069512281446874, "grad_norm": 9.370126093094386, "learning_rate": 1.0666589354484005e-06, "loss": 0.9365, "step": 9938 }, { "epoch": 1.4070928010193247, "grad_norm": 9.997209541086875, "learning_rate": 1.066189339160205e-06, "loss": 1.0858, "step": 9939 }, { "epoch": 1.4072343738939619, "grad_norm": 9.726326004278487, "learning_rate": 1.065719818246367e-06, "loss": 1.0589, "step": 9940 }, { "epoch": 1.4073759467685991, "grad_norm": 8.251168832921492, "learning_rate": 1.065250372731568e-06, "loss": 0.9036, "step": 9941 }, { "epoch": 1.4075175196432363, "grad_norm": 7.201765945881455, "learning_rate": 1.0647810026404878e-06, "loss": 0.9568, "step": 9942 }, { "epoch": 1.4076590925178736, "grad_norm": 9.494388328323415, "learning_rate": 1.064311707997799e-06, "loss": 1.0038, "step": 9943 }, { "epoch": 1.4078006653925108, "grad_norm": 10.516295005006478, "learning_rate": 1.0638424888281744e-06, "loss": 1.1114, "step": 9944 }, { "epoch": 1.407942238267148, "grad_norm": 9.566196101331036, "learning_rate": 1.0633733451562787e-06, "loss": 1.0112, "step": 9945 }, { "epoch": 1.4080838111417853, "grad_norm": 8.960243577371115, "learning_rate": 1.0629042770067754e-06, "loss": 1.0299, "step": 9946 }, { "epoch": 1.4082253840164225, "grad_norm": 9.766705018403284, "learning_rate": 1.0624352844043224e-06, "loss": 1.0099, "step": 9947 }, { "epoch": 1.4083669568910597, "grad_norm": 9.192886953086422, "learning_rate": 1.061966367373575e-06, "loss": 1.109, "step": 9948 }, { "epoch": 1.408508529765697, "grad_norm": 8.795389649660523, "learning_rate": 1.0614975259391835e-06, "loss": 1.0161, "step": 9949 }, { "epoch": 1.4086501026403342, "grad_norm": 8.913048220943674, "learning_rate": 1.0610287601257937e-06, "loss": 1.1388, "step": 9950 }, { "epoch": 1.4087916755149714, "grad_norm": 11.112956525352512, "learning_rate": 1.06056006995805e-06, "loss": 1.082, "step": 9951 }, { "epoch": 1.4089332483896087, "grad_norm": 10.566218156251, "learning_rate": 1.060091455460589e-06, "loss": 1.037, "step": 9952 }, { "epoch": 1.4090748212642459, "grad_norm": 9.428144048937709, "learning_rate": 1.0596229166580477e-06, "loss": 0.9881, "step": 9953 }, { "epoch": 1.409216394138883, "grad_norm": 10.563663937695775, "learning_rate": 1.0591544535750545e-06, "loss": 1.0606, "step": 9954 }, { "epoch": 1.4093579670135203, "grad_norm": 7.9918879865828885, "learning_rate": 1.0586860662362375e-06, "loss": 0.9399, "step": 9955 }, { "epoch": 1.4094995398881573, "grad_norm": 8.641710540846491, "learning_rate": 1.0582177546662203e-06, "loss": 0.9624, "step": 9956 }, { "epoch": 1.4096411127627946, "grad_norm": 9.54186580622285, "learning_rate": 1.0577495188896198e-06, "loss": 1.0746, "step": 9957 }, { "epoch": 1.4097826856374318, "grad_norm": 7.907770907705898, "learning_rate": 1.0572813589310524e-06, "loss": 1.0465, "step": 9958 }, { "epoch": 1.409924258512069, "grad_norm": 12.086603463188787, "learning_rate": 1.0568132748151274e-06, "loss": 1.0087, "step": 9959 }, { "epoch": 1.4100658313867063, "grad_norm": 10.828175378107014, "learning_rate": 1.0563452665664542e-06, "loss": 0.9298, "step": 9960 }, { "epoch": 1.4102074042613435, "grad_norm": 9.217026785448324, "learning_rate": 1.055877334209632e-06, "loss": 1.0611, "step": 9961 }, { "epoch": 1.4103489771359807, "grad_norm": 9.700845693315987, "learning_rate": 1.055409477769262e-06, "loss": 1.1251, "step": 9962 }, { "epoch": 1.410490550010618, "grad_norm": 11.901439265559308, "learning_rate": 1.0549416972699392e-06, "loss": 1.0218, "step": 9963 }, { "epoch": 1.4106321228852552, "grad_norm": 9.372457744496012, "learning_rate": 1.054473992736253e-06, "loss": 1.0618, "step": 9964 }, { "epoch": 1.4107736957598924, "grad_norm": 9.326154900180887, "learning_rate": 1.0540063641927923e-06, "loss": 0.9688, "step": 9965 }, { "epoch": 1.4109152686345297, "grad_norm": 10.176788502811535, "learning_rate": 1.0535388116641376e-06, "loss": 1.0063, "step": 9966 }, { "epoch": 1.4110568415091669, "grad_norm": 10.154598723934916, "learning_rate": 1.0530713351748704e-06, "loss": 1.0217, "step": 9967 }, { "epoch": 1.4111984143838041, "grad_norm": 8.255454370019178, "learning_rate": 1.052603934749563e-06, "loss": 0.9024, "step": 9968 }, { "epoch": 1.4113399872584413, "grad_norm": 10.210583331176071, "learning_rate": 1.0521366104127885e-06, "loss": 1.03, "step": 9969 }, { "epoch": 1.4114815601330786, "grad_norm": 8.481595478035212, "learning_rate": 1.0516693621891127e-06, "loss": 0.9556, "step": 9970 }, { "epoch": 1.4116231330077158, "grad_norm": 8.85825842762591, "learning_rate": 1.0512021901030978e-06, "loss": 0.8219, "step": 9971 }, { "epoch": 1.4117647058823528, "grad_norm": 7.9993407454652505, "learning_rate": 1.0507350941793044e-06, "loss": 0.9926, "step": 9972 }, { "epoch": 1.41190627875699, "grad_norm": 11.715142673953816, "learning_rate": 1.0502680744422856e-06, "loss": 0.9621, "step": 9973 }, { "epoch": 1.4120478516316273, "grad_norm": 9.98393217486794, "learning_rate": 1.049801130916594e-06, "loss": 0.8474, "step": 9974 }, { "epoch": 1.4121894245062645, "grad_norm": 7.907567815305924, "learning_rate": 1.0493342636267747e-06, "loss": 1.0552, "step": 9975 }, { "epoch": 1.4123309973809017, "grad_norm": 8.542281065142872, "learning_rate": 1.0488674725973727e-06, "loss": 0.9288, "step": 9976 }, { "epoch": 1.412472570255539, "grad_norm": 9.341883648392205, "learning_rate": 1.0484007578529246e-06, "loss": 1.0488, "step": 9977 }, { "epoch": 1.4126141431301762, "grad_norm": 8.019366664470377, "learning_rate": 1.047934119417966e-06, "loss": 0.9763, "step": 9978 }, { "epoch": 1.4127557160048134, "grad_norm": 9.023462865224088, "learning_rate": 1.0474675573170293e-06, "loss": 1.0877, "step": 9979 }, { "epoch": 1.4128972888794507, "grad_norm": 8.664746242886359, "learning_rate": 1.047001071574639e-06, "loss": 1.042, "step": 9980 }, { "epoch": 1.4130388617540879, "grad_norm": 9.290147825446716, "learning_rate": 1.0465346622153209e-06, "loss": 0.9932, "step": 9981 }, { "epoch": 1.4131804346287251, "grad_norm": 9.108786614654479, "learning_rate": 1.04606832926359e-06, "loss": 1.0223, "step": 9982 }, { "epoch": 1.4133220075033623, "grad_norm": 8.996533892022667, "learning_rate": 1.0456020727439635e-06, "loss": 1.0207, "step": 9983 }, { "epoch": 1.4134635803779996, "grad_norm": 10.586047768106509, "learning_rate": 1.0451358926809513e-06, "loss": 0.984, "step": 9984 }, { "epoch": 1.4136051532526368, "grad_norm": 9.32829430201261, "learning_rate": 1.04466978909906e-06, "loss": 0.9047, "step": 9985 }, { "epoch": 1.413746726127274, "grad_norm": 9.881309714670728, "learning_rate": 1.0442037620227938e-06, "loss": 1.0072, "step": 9986 }, { "epoch": 1.4138882990019113, "grad_norm": 8.968860585664052, "learning_rate": 1.0437378114766495e-06, "loss": 0.9894, "step": 9987 }, { "epoch": 1.4140298718765485, "grad_norm": 9.371013556737948, "learning_rate": 1.0432719374851233e-06, "loss": 0.9621, "step": 9988 }, { "epoch": 1.4141714447511857, "grad_norm": 9.53251104298694, "learning_rate": 1.0428061400727045e-06, "loss": 0.9185, "step": 9989 }, { "epoch": 1.414313017625823, "grad_norm": 9.849703293412533, "learning_rate": 1.0423404192638812e-06, "loss": 0.914, "step": 9990 }, { "epoch": 1.4144545905004602, "grad_norm": 9.297636113146204, "learning_rate": 1.041874775083134e-06, "loss": 0.9649, "step": 9991 }, { "epoch": 1.4145961633750974, "grad_norm": 8.158087395561124, "learning_rate": 1.041409207554944e-06, "loss": 0.915, "step": 9992 }, { "epoch": 1.4147377362497346, "grad_norm": 9.718544715602237, "learning_rate": 1.0409437167037843e-06, "loss": 0.9845, "step": 9993 }, { "epoch": 1.4148793091243719, "grad_norm": 9.201212297260499, "learning_rate": 1.0404783025541244e-06, "loss": 0.9492, "step": 9994 }, { "epoch": 1.415020881999009, "grad_norm": 8.992170106819625, "learning_rate": 1.0400129651304328e-06, "loss": 1.1167, "step": 9995 }, { "epoch": 1.4151624548736463, "grad_norm": 9.82094759469344, "learning_rate": 1.03954770445717e-06, "loss": 0.9107, "step": 9996 }, { "epoch": 1.4153040277482836, "grad_norm": 11.001914464637826, "learning_rate": 1.0390825205587966e-06, "loss": 0.989, "step": 9997 }, { "epoch": 1.4154456006229206, "grad_norm": 9.779058728938253, "learning_rate": 1.0386174134597649e-06, "loss": 0.9825, "step": 9998 }, { "epoch": 1.4155871734975578, "grad_norm": 9.901383320657658, "learning_rate": 1.0381523831845266e-06, "loss": 1.0791, "step": 9999 }, { "epoch": 1.415728746372195, "grad_norm": 10.373147822129312, "learning_rate": 1.037687429757527e-06, "loss": 0.9561, "step": 10000 }, { "epoch": 1.4158703192468323, "grad_norm": 8.36939929041636, "learning_rate": 1.0372225532032087e-06, "loss": 0.9785, "step": 10001 }, { "epoch": 1.4160118921214695, "grad_norm": 8.230342681165473, "learning_rate": 1.0367577535460122e-06, "loss": 0.8896, "step": 10002 }, { "epoch": 1.4161534649961067, "grad_norm": 8.162064273998988, "learning_rate": 1.0362930308103675e-06, "loss": 1.0298, "step": 10003 }, { "epoch": 1.416295037870744, "grad_norm": 8.258453199926189, "learning_rate": 1.0358283850207077e-06, "loss": 0.9029, "step": 10004 }, { "epoch": 1.4164366107453812, "grad_norm": 8.922044742392515, "learning_rate": 1.035363816201457e-06, "loss": 1.0004, "step": 10005 }, { "epoch": 1.4165781836200184, "grad_norm": 9.112996188078089, "learning_rate": 1.0348993243770395e-06, "loss": 1.1048, "step": 10006 }, { "epoch": 1.4167197564946556, "grad_norm": 10.826057886318091, "learning_rate": 1.0344349095718712e-06, "loss": 0.969, "step": 10007 }, { "epoch": 1.4168613293692929, "grad_norm": 8.257863776301303, "learning_rate": 1.0339705718103666e-06, "loss": 0.8446, "step": 10008 }, { "epoch": 1.41700290224393, "grad_norm": 8.325638791798735, "learning_rate": 1.0335063111169372e-06, "loss": 0.9271, "step": 10009 }, { "epoch": 1.4171444751185673, "grad_norm": 8.0959361301662, "learning_rate": 1.0330421275159863e-06, "loss": 0.9124, "step": 10010 }, { "epoch": 1.4172860479932046, "grad_norm": 8.847889500926504, "learning_rate": 1.032578021031918e-06, "loss": 0.9488, "step": 10011 }, { "epoch": 1.4174276208678418, "grad_norm": 7.884875448207171, "learning_rate": 1.032113991689128e-06, "loss": 0.8708, "step": 10012 }, { "epoch": 1.4175691937424788, "grad_norm": 9.680814934467561, "learning_rate": 1.031650039512012e-06, "loss": 0.9081, "step": 10013 }, { "epoch": 1.417710766617116, "grad_norm": 8.111215489149933, "learning_rate": 1.0311861645249588e-06, "loss": 0.8486, "step": 10014 }, { "epoch": 1.4178523394917533, "grad_norm": 8.579129964469919, "learning_rate": 1.0307223667523524e-06, "loss": 0.9023, "step": 10015 }, { "epoch": 1.4179939123663905, "grad_norm": 9.977347083968983, "learning_rate": 1.0302586462185769e-06, "loss": 0.915, "step": 10016 }, { "epoch": 1.4181354852410277, "grad_norm": 9.090893693390724, "learning_rate": 1.0297950029480073e-06, "loss": 1.024, "step": 10017 }, { "epoch": 1.418277058115665, "grad_norm": 10.281589861878883, "learning_rate": 1.0293314369650193e-06, "loss": 0.9581, "step": 10018 }, { "epoch": 1.4184186309903022, "grad_norm": 10.077475451451587, "learning_rate": 1.0288679482939801e-06, "loss": 0.9837, "step": 10019 }, { "epoch": 1.4185602038649394, "grad_norm": 9.008164411355496, "learning_rate": 1.0284045369592567e-06, "loss": 1.0137, "step": 10020 }, { "epoch": 1.4187017767395766, "grad_norm": 9.917025605018166, "learning_rate": 1.0279412029852087e-06, "loss": 0.9903, "step": 10021 }, { "epoch": 1.4188433496142139, "grad_norm": 8.782629590249794, "learning_rate": 1.0274779463961947e-06, "loss": 0.8891, "step": 10022 }, { "epoch": 1.418984922488851, "grad_norm": 9.959990380830218, "learning_rate": 1.0270147672165677e-06, "loss": 1.1136, "step": 10023 }, { "epoch": 1.4191264953634883, "grad_norm": 10.056363906357499, "learning_rate": 1.0265516654706748e-06, "loss": 1.1713, "step": 10024 }, { "epoch": 1.4192680682381256, "grad_norm": 9.597767538528572, "learning_rate": 1.026088641182863e-06, "loss": 1.0733, "step": 10025 }, { "epoch": 1.4194096411127628, "grad_norm": 8.47833092872141, "learning_rate": 1.0256256943774718e-06, "loss": 0.9263, "step": 10026 }, { "epoch": 1.4195512139874, "grad_norm": 10.629847957639802, "learning_rate": 1.025162825078839e-06, "loss": 1.0289, "step": 10027 }, { "epoch": 1.4196927868620373, "grad_norm": 10.994680679076737, "learning_rate": 1.0247000333112962e-06, "loss": 0.9997, "step": 10028 }, { "epoch": 1.4198343597366745, "grad_norm": 8.488671776134899, "learning_rate": 1.0242373190991734e-06, "loss": 0.9207, "step": 10029 }, { "epoch": 1.4199759326113117, "grad_norm": 9.608594115248184, "learning_rate": 1.0237746824667932e-06, "loss": 0.9619, "step": 10030 }, { "epoch": 1.420117505485949, "grad_norm": 8.768962258823999, "learning_rate": 1.0233121234384777e-06, "loss": 0.9249, "step": 10031 }, { "epoch": 1.4202590783605862, "grad_norm": 11.00598640537826, "learning_rate": 1.0228496420385434e-06, "loss": 1.0945, "step": 10032 }, { "epoch": 1.4204006512352234, "grad_norm": 9.459851203377307, "learning_rate": 1.022387238291301e-06, "loss": 0.9718, "step": 10033 }, { "epoch": 1.4205422241098606, "grad_norm": 7.815255373481686, "learning_rate": 1.021924912221062e-06, "loss": 0.8313, "step": 10034 }, { "epoch": 1.4206837969844979, "grad_norm": 6.3814332702203656, "learning_rate": 1.021462663852126e-06, "loss": 0.8674, "step": 10035 }, { "epoch": 1.420825369859135, "grad_norm": 11.214812796565608, "learning_rate": 1.0210004932087956e-06, "loss": 1.0402, "step": 10036 }, { "epoch": 1.4209669427337723, "grad_norm": 11.859894792627273, "learning_rate": 1.0205384003153673e-06, "loss": 1.025, "step": 10037 }, { "epoch": 1.4211085156084096, "grad_norm": 8.933300819148016, "learning_rate": 1.0200763851961313e-06, "loss": 0.9491, "step": 10038 }, { "epoch": 1.4212500884830466, "grad_norm": 8.670522223455828, "learning_rate": 1.019614447875377e-06, "loss": 0.9547, "step": 10039 }, { "epoch": 1.4213916613576838, "grad_norm": 11.259904697482423, "learning_rate": 1.0191525883773867e-06, "loss": 1.0307, "step": 10040 }, { "epoch": 1.421533234232321, "grad_norm": 8.94266558136864, "learning_rate": 1.0186908067264415e-06, "loss": 0.9563, "step": 10041 }, { "epoch": 1.4216748071069583, "grad_norm": 10.100448039475502, "learning_rate": 1.018229102946815e-06, "loss": 0.9784, "step": 10042 }, { "epoch": 1.4218163799815955, "grad_norm": 9.801698996738079, "learning_rate": 1.0177674770627807e-06, "loss": 1.032, "step": 10043 }, { "epoch": 1.4219579528562327, "grad_norm": 10.391484045786152, "learning_rate": 1.0173059290986048e-06, "loss": 1.0662, "step": 10044 }, { "epoch": 1.42209952573087, "grad_norm": 7.816105125227864, "learning_rate": 1.01684445907855e-06, "loss": 0.9509, "step": 10045 }, { "epoch": 1.4222410986055072, "grad_norm": 9.409739914643255, "learning_rate": 1.0163830670268768e-06, "loss": 1.0095, "step": 10046 }, { "epoch": 1.4223826714801444, "grad_norm": 8.951594409962325, "learning_rate": 1.015921752967839e-06, "loss": 0.9509, "step": 10047 }, { "epoch": 1.4225242443547816, "grad_norm": 9.254385655981109, "learning_rate": 1.0154605169256884e-06, "loss": 1.0046, "step": 10048 }, { "epoch": 1.4226658172294189, "grad_norm": 9.18845326155843, "learning_rate": 1.014999358924671e-06, "loss": 1.0559, "step": 10049 }, { "epoch": 1.422807390104056, "grad_norm": 10.557515971745074, "learning_rate": 1.014538278989031e-06, "loss": 1.0383, "step": 10050 }, { "epoch": 1.4229489629786933, "grad_norm": 8.65421819425134, "learning_rate": 1.014077277143005e-06, "loss": 0.9338, "step": 10051 }, { "epoch": 1.4230905358533306, "grad_norm": 8.024176781757886, "learning_rate": 1.0136163534108284e-06, "loss": 0.916, "step": 10052 }, { "epoch": 1.4232321087279678, "grad_norm": 9.728950666906558, "learning_rate": 1.0131555078167328e-06, "loss": 1.118, "step": 10053 }, { "epoch": 1.423373681602605, "grad_norm": 8.713828608653953, "learning_rate": 1.012694740384943e-06, "loss": 0.9741, "step": 10054 }, { "epoch": 1.423515254477242, "grad_norm": 11.159987392623506, "learning_rate": 1.0122340511396833e-06, "loss": 1.0491, "step": 10055 }, { "epoch": 1.4236568273518793, "grad_norm": 9.162955995378466, "learning_rate": 1.0117734401051682e-06, "loss": 0.9688, "step": 10056 }, { "epoch": 1.4237984002265165, "grad_norm": 9.51898665155901, "learning_rate": 1.0113129073056149e-06, "loss": 0.8839, "step": 10057 }, { "epoch": 1.4239399731011537, "grad_norm": 12.051690352661959, "learning_rate": 1.0108524527652308e-06, "loss": 1.043, "step": 10058 }, { "epoch": 1.424081545975791, "grad_norm": 9.369276003230294, "learning_rate": 1.010392076508223e-06, "loss": 1.0495, "step": 10059 }, { "epoch": 1.4242231188504282, "grad_norm": 8.784590007874318, "learning_rate": 1.0099317785587941e-06, "loss": 0.9606, "step": 10060 }, { "epoch": 1.4243646917250654, "grad_norm": 9.328017446642308, "learning_rate": 1.0094715589411398e-06, "loss": 0.9156, "step": 10061 }, { "epoch": 1.4245062645997026, "grad_norm": 7.78077003806525, "learning_rate": 1.009011417679455e-06, "loss": 0.9424, "step": 10062 }, { "epoch": 1.4246478374743399, "grad_norm": 8.098811742962619, "learning_rate": 1.0085513547979272e-06, "loss": 0.9325, "step": 10063 }, { "epoch": 1.424789410348977, "grad_norm": 11.7426644010773, "learning_rate": 1.0080913703207434e-06, "loss": 1.0565, "step": 10064 }, { "epoch": 1.4249309832236143, "grad_norm": 11.11757454965729, "learning_rate": 1.0076314642720834e-06, "loss": 0.9146, "step": 10065 }, { "epoch": 1.4250725560982516, "grad_norm": 9.121701833316934, "learning_rate": 1.007171636676125e-06, "loss": 0.9089, "step": 10066 }, { "epoch": 1.4252141289728888, "grad_norm": 10.19945325695072, "learning_rate": 1.006711887557041e-06, "loss": 0.9306, "step": 10067 }, { "epoch": 1.425355701847526, "grad_norm": 9.742018342557088, "learning_rate": 1.0062522169389986e-06, "loss": 1.0949, "step": 10068 }, { "epoch": 1.4254972747221633, "grad_norm": 9.333642500343515, "learning_rate": 1.0057926248461638e-06, "loss": 0.9547, "step": 10069 }, { "epoch": 1.4256388475968005, "grad_norm": 9.013648386683915, "learning_rate": 1.0053331113026962e-06, "loss": 0.9283, "step": 10070 }, { "epoch": 1.4257804204714377, "grad_norm": 9.330328548489272, "learning_rate": 1.0048736763327532e-06, "loss": 1.0323, "step": 10071 }, { "epoch": 1.425921993346075, "grad_norm": 9.819019654357366, "learning_rate": 1.0044143199604856e-06, "loss": 1.0756, "step": 10072 }, { "epoch": 1.4260635662207122, "grad_norm": 9.929667523091627, "learning_rate": 1.0039550422100424e-06, "loss": 0.923, "step": 10073 }, { "epoch": 1.4262051390953494, "grad_norm": 11.535361944703448, "learning_rate": 1.0034958431055666e-06, "loss": 1.0485, "step": 10074 }, { "epoch": 1.4263467119699866, "grad_norm": 9.140357484733869, "learning_rate": 1.0030367226711984e-06, "loss": 0.906, "step": 10075 }, { "epoch": 1.4264882848446239, "grad_norm": 9.280017706755483, "learning_rate": 1.0025776809310752e-06, "loss": 1.1052, "step": 10076 }, { "epoch": 1.426629857719261, "grad_norm": 8.862522434050193, "learning_rate": 1.0021187179093254e-06, "loss": 0.9694, "step": 10077 }, { "epoch": 1.4267714305938983, "grad_norm": 10.64029911567321, "learning_rate": 1.0016598336300781e-06, "loss": 1.0069, "step": 10078 }, { "epoch": 1.4269130034685356, "grad_norm": 10.715097233148205, "learning_rate": 1.0012010281174555e-06, "loss": 1.0523, "step": 10079 }, { "epoch": 1.4270545763431726, "grad_norm": 11.586765869324662, "learning_rate": 1.0007423013955784e-06, "loss": 1.0422, "step": 10080 }, { "epoch": 1.4271961492178098, "grad_norm": 10.614584451412682, "learning_rate": 1.0002836534885594e-06, "loss": 0.9988, "step": 10081 }, { "epoch": 1.427337722092447, "grad_norm": 11.247589192101449, "learning_rate": 9.998250844205107e-07, "loss": 1.0537, "step": 10082 }, { "epoch": 1.4274792949670843, "grad_norm": 8.318097746604707, "learning_rate": 9.993665942155395e-07, "loss": 0.8959, "step": 10083 }, { "epoch": 1.4276208678417215, "grad_norm": 9.781726959592142, "learning_rate": 9.989081828977464e-07, "loss": 1.1051, "step": 10084 }, { "epoch": 1.4277624407163587, "grad_norm": 9.808014251350142, "learning_rate": 9.984498504912321e-07, "loss": 0.9045, "step": 10085 }, { "epoch": 1.427904013590996, "grad_norm": 9.60707821573561, "learning_rate": 9.979915970200888e-07, "loss": 0.9625, "step": 10086 }, { "epoch": 1.4280455864656332, "grad_norm": 8.437164526027557, "learning_rate": 9.97533422508408e-07, "loss": 0.9823, "step": 10087 }, { "epoch": 1.4281871593402704, "grad_norm": 9.959522341262822, "learning_rate": 9.970753269802746e-07, "loss": 0.9167, "step": 10088 }, { "epoch": 1.4283287322149076, "grad_norm": 9.0213086808368, "learning_rate": 9.966173104597701e-07, "loss": 0.9576, "step": 10089 }, { "epoch": 1.4284703050895449, "grad_norm": 8.547373532221252, "learning_rate": 9.961593729709734e-07, "loss": 0.9802, "step": 10090 }, { "epoch": 1.428611877964182, "grad_norm": 11.014074857685648, "learning_rate": 9.957015145379564e-07, "loss": 1.0474, "step": 10091 }, { "epoch": 1.4287534508388193, "grad_norm": 9.155254218729828, "learning_rate": 9.9524373518479e-07, "loss": 1.0277, "step": 10092 }, { "epoch": 1.4288950237134566, "grad_norm": 8.970045468014206, "learning_rate": 9.947860349355372e-07, "loss": 0.8895, "step": 10093 }, { "epoch": 1.4290365965880938, "grad_norm": 10.004422163702902, "learning_rate": 9.943284138142615e-07, "loss": 0.9628, "step": 10094 }, { "epoch": 1.429178169462731, "grad_norm": 10.070359471864851, "learning_rate": 9.938708718450175e-07, "loss": 0.957, "step": 10095 }, { "epoch": 1.429319742337368, "grad_norm": 8.205961656116859, "learning_rate": 9.934134090518593e-07, "loss": 0.8662, "step": 10096 }, { "epoch": 1.4294613152120053, "grad_norm": 7.687691042627463, "learning_rate": 9.929560254588353e-07, "loss": 0.8529, "step": 10097 }, { "epoch": 1.4296028880866425, "grad_norm": 9.962839508972477, "learning_rate": 9.92498721089988e-07, "loss": 0.9102, "step": 10098 }, { "epoch": 1.4297444609612797, "grad_norm": 10.164451691362164, "learning_rate": 9.9204149596936e-07, "loss": 0.985, "step": 10099 }, { "epoch": 1.429886033835917, "grad_norm": 7.39765244440643, "learning_rate": 9.91584350120985e-07, "loss": 0.8694, "step": 10100 }, { "epoch": 1.4300276067105542, "grad_norm": 9.36281149413503, "learning_rate": 9.911272835688973e-07, "loss": 1.0648, "step": 10101 }, { "epoch": 1.4301691795851914, "grad_norm": 9.22975582771073, "learning_rate": 9.906702963371222e-07, "loss": 0.9487, "step": 10102 }, { "epoch": 1.4303107524598286, "grad_norm": 8.889900271840046, "learning_rate": 9.902133884496853e-07, "loss": 0.913, "step": 10103 }, { "epoch": 1.4304523253344659, "grad_norm": 10.706660000176292, "learning_rate": 9.897565599306037e-07, "loss": 0.9916, "step": 10104 }, { "epoch": 1.430593898209103, "grad_norm": 8.835747107116521, "learning_rate": 9.892998108038937e-07, "loss": 1.0926, "step": 10105 }, { "epoch": 1.4307354710837403, "grad_norm": 7.628799992459572, "learning_rate": 9.88843141093567e-07, "loss": 0.9021, "step": 10106 }, { "epoch": 1.4308770439583776, "grad_norm": 9.151071587855418, "learning_rate": 9.88386550823629e-07, "loss": 1.0125, "step": 10107 }, { "epoch": 1.4310186168330148, "grad_norm": 9.271177226914151, "learning_rate": 9.879300400180844e-07, "loss": 1.0689, "step": 10108 }, { "epoch": 1.431160189707652, "grad_norm": 9.263903504575694, "learning_rate": 9.874736087009285e-07, "loss": 1.0114, "step": 10109 }, { "epoch": 1.4313017625822892, "grad_norm": 8.977241988398314, "learning_rate": 9.870172568961572e-07, "loss": 0.9999, "step": 10110 }, { "epoch": 1.4314433354569265, "grad_norm": 9.810798254481472, "learning_rate": 9.865609846277615e-07, "loss": 1.1067, "step": 10111 }, { "epoch": 1.4315849083315637, "grad_norm": 8.709787883080834, "learning_rate": 9.861047919197254e-07, "loss": 0.907, "step": 10112 }, { "epoch": 1.431726481206201, "grad_norm": 8.399370406034814, "learning_rate": 9.856486787960326e-07, "loss": 1.0108, "step": 10113 }, { "epoch": 1.4318680540808382, "grad_norm": 9.965451642571434, "learning_rate": 9.851926452806584e-07, "loss": 1.1269, "step": 10114 }, { "epoch": 1.4320096269554754, "grad_norm": 9.095986841653584, "learning_rate": 9.847366913975787e-07, "loss": 0.9848, "step": 10115 }, { "epoch": 1.4321511998301126, "grad_norm": 11.005586765978293, "learning_rate": 9.842808171707602e-07, "loss": 1.0114, "step": 10116 }, { "epoch": 1.4322927727047499, "grad_norm": 10.437210170353795, "learning_rate": 9.838250226241696e-07, "loss": 0.9445, "step": 10117 }, { "epoch": 1.432434345579387, "grad_norm": 7.588091716286473, "learning_rate": 9.833693077817666e-07, "loss": 0.8942, "step": 10118 }, { "epoch": 1.4325759184540243, "grad_norm": 9.773917182598037, "learning_rate": 9.82913672667509e-07, "loss": 0.9805, "step": 10119 }, { "epoch": 1.4327174913286616, "grad_norm": 10.75490121225896, "learning_rate": 9.824581173053483e-07, "loss": 0.9603, "step": 10120 }, { "epoch": 1.4328590642032988, "grad_norm": 9.059269612157578, "learning_rate": 9.820026417192322e-07, "loss": 1.0621, "step": 10121 }, { "epoch": 1.4330006370779358, "grad_norm": 8.892556890692306, "learning_rate": 9.815472459331061e-07, "loss": 1.0997, "step": 10122 }, { "epoch": 1.433142209952573, "grad_norm": 9.914791624464074, "learning_rate": 9.81091929970908e-07, "loss": 1.0054, "step": 10123 }, { "epoch": 1.4332837828272103, "grad_norm": 8.834583074104954, "learning_rate": 9.806366938565756e-07, "loss": 0.8961, "step": 10124 }, { "epoch": 1.4334253557018475, "grad_norm": 9.276139313264006, "learning_rate": 9.801815376140385e-07, "loss": 0.8764, "step": 10125 }, { "epoch": 1.4335669285764847, "grad_norm": 8.589605268261796, "learning_rate": 9.797264612672256e-07, "loss": 0.9172, "step": 10126 }, { "epoch": 1.433708501451122, "grad_norm": 8.905071622291677, "learning_rate": 9.792714648400584e-07, "loss": 0.9662, "step": 10127 }, { "epoch": 1.4338500743257592, "grad_norm": 12.152013516085193, "learning_rate": 9.78816548356456e-07, "loss": 0.9928, "step": 10128 }, { "epoch": 1.4339916472003964, "grad_norm": 8.583659736821996, "learning_rate": 9.783617118403354e-07, "loss": 0.9449, "step": 10129 }, { "epoch": 1.4341332200750336, "grad_norm": 8.487785089617848, "learning_rate": 9.779069553156031e-07, "loss": 0.9717, "step": 10130 }, { "epoch": 1.4342747929496709, "grad_norm": 8.896203729451031, "learning_rate": 9.774522788061685e-07, "loss": 1.0395, "step": 10131 }, { "epoch": 1.434416365824308, "grad_norm": 9.578796462750667, "learning_rate": 9.769976823359311e-07, "loss": 1.0201, "step": 10132 }, { "epoch": 1.4345579386989453, "grad_norm": 9.82446414951957, "learning_rate": 9.765431659287901e-07, "loss": 0.9283, "step": 10133 }, { "epoch": 1.4346995115735826, "grad_norm": 9.513966382926167, "learning_rate": 9.760887296086397e-07, "loss": 1.0969, "step": 10134 }, { "epoch": 1.4348410844482198, "grad_norm": 7.550855514863707, "learning_rate": 9.756343733993679e-07, "loss": 0.926, "step": 10135 }, { "epoch": 1.434982657322857, "grad_norm": 9.021198738103672, "learning_rate": 9.75180097324861e-07, "loss": 0.8644, "step": 10136 }, { "epoch": 1.435124230197494, "grad_norm": 9.308619567604776, "learning_rate": 9.747259014089988e-07, "loss": 1.0022, "step": 10137 }, { "epoch": 1.4352658030721313, "grad_norm": 8.338777467696431, "learning_rate": 9.742717856756595e-07, "loss": 0.9491, "step": 10138 }, { "epoch": 1.4354073759467685, "grad_norm": 10.04302914399869, "learning_rate": 9.738177501487137e-07, "loss": 0.9359, "step": 10139 }, { "epoch": 1.4355489488214057, "grad_norm": 10.10248690665911, "learning_rate": 9.73363794852032e-07, "loss": 1.049, "step": 10140 }, { "epoch": 1.435690521696043, "grad_norm": 8.272763838101273, "learning_rate": 9.729099198094771e-07, "loss": 1.0008, "step": 10141 }, { "epoch": 1.4358320945706802, "grad_norm": 7.930905704224874, "learning_rate": 9.724561250449082e-07, "loss": 0.908, "step": 10142 }, { "epoch": 1.4359736674453174, "grad_norm": 8.68058578553764, "learning_rate": 9.720024105821827e-07, "loss": 1.0109, "step": 10143 }, { "epoch": 1.4361152403199546, "grad_norm": 10.156219951878628, "learning_rate": 9.715487764451504e-07, "loss": 1.1139, "step": 10144 }, { "epoch": 1.4362568131945919, "grad_norm": 8.415691306628174, "learning_rate": 9.7109522265766e-07, "loss": 0.9177, "step": 10145 }, { "epoch": 1.436398386069229, "grad_norm": 8.948381109178221, "learning_rate": 9.70641749243553e-07, "loss": 0.9136, "step": 10146 }, { "epoch": 1.4365399589438663, "grad_norm": 9.348628272957349, "learning_rate": 9.701883562266696e-07, "loss": 1.0116, "step": 10147 }, { "epoch": 1.4366815318185036, "grad_norm": 9.945496994189169, "learning_rate": 9.697350436308428e-07, "loss": 0.9582, "step": 10148 }, { "epoch": 1.4368231046931408, "grad_norm": 8.456042450855131, "learning_rate": 9.692818114799038e-07, "loss": 0.9232, "step": 10149 }, { "epoch": 1.436964677567778, "grad_norm": 8.40226929302203, "learning_rate": 9.688286597976804e-07, "loss": 1.0238, "step": 10150 }, { "epoch": 1.4371062504424152, "grad_norm": 9.053736586581213, "learning_rate": 9.68375588607991e-07, "loss": 0.9766, "step": 10151 }, { "epoch": 1.4372478233170525, "grad_norm": 9.667902856467366, "learning_rate": 9.679225979346558e-07, "loss": 0.9836, "step": 10152 }, { "epoch": 1.4373893961916897, "grad_norm": 8.876065472327419, "learning_rate": 9.674696878014862e-07, "loss": 0.912, "step": 10153 }, { "epoch": 1.437530969066327, "grad_norm": 9.03167744717257, "learning_rate": 9.67016858232293e-07, "loss": 0.9116, "step": 10154 }, { "epoch": 1.4376725419409642, "grad_norm": 10.228876072212605, "learning_rate": 9.6656410925088e-07, "loss": 0.957, "step": 10155 }, { "epoch": 1.4378141148156014, "grad_norm": 9.11797030634144, "learning_rate": 9.661114408810485e-07, "loss": 1.0945, "step": 10156 }, { "epoch": 1.4379556876902386, "grad_norm": 8.17628514632075, "learning_rate": 9.656588531465954e-07, "loss": 0.9836, "step": 10157 }, { "epoch": 1.4380972605648759, "grad_norm": 9.925322358784184, "learning_rate": 9.652063460713117e-07, "loss": 1.0632, "step": 10158 }, { "epoch": 1.438238833439513, "grad_norm": 8.436118683950992, "learning_rate": 9.647539196789868e-07, "loss": 0.9628, "step": 10159 }, { "epoch": 1.4383804063141503, "grad_norm": 9.752125043860634, "learning_rate": 9.643015739934027e-07, "loss": 0.9117, "step": 10160 }, { "epoch": 1.4385219791887875, "grad_norm": 8.031487962336753, "learning_rate": 9.638493090383408e-07, "loss": 0.8945, "step": 10161 }, { "epoch": 1.4386635520634248, "grad_norm": 11.1454981863431, "learning_rate": 9.633971248375753e-07, "loss": 1.0763, "step": 10162 }, { "epoch": 1.4388051249380618, "grad_norm": 11.155185509767385, "learning_rate": 9.629450214148764e-07, "loss": 1.0194, "step": 10163 }, { "epoch": 1.438946697812699, "grad_norm": 11.185420178641207, "learning_rate": 9.624929987940124e-07, "loss": 1.1827, "step": 10164 }, { "epoch": 1.4390882706873362, "grad_norm": 10.7559400603588, "learning_rate": 9.62041056998744e-07, "loss": 1.0162, "step": 10165 }, { "epoch": 1.4392298435619735, "grad_norm": 9.541099387106732, "learning_rate": 9.615891960528314e-07, "loss": 1.0797, "step": 10166 }, { "epoch": 1.4393714164366107, "grad_norm": 8.998176813895196, "learning_rate": 9.611374159800272e-07, "loss": 1.0409, "step": 10167 }, { "epoch": 1.439512989311248, "grad_norm": 8.214182930060426, "learning_rate": 9.60685716804082e-07, "loss": 1.0057, "step": 10168 }, { "epoch": 1.4396545621858852, "grad_norm": 8.889231701704148, "learning_rate": 9.6023409854874e-07, "loss": 0.9224, "step": 10169 }, { "epoch": 1.4397961350605224, "grad_norm": 9.757438756353245, "learning_rate": 9.597825612377448e-07, "loss": 1.0457, "step": 10170 }, { "epoch": 1.4399377079351596, "grad_norm": 8.103614716419331, "learning_rate": 9.593311048948306e-07, "loss": 1.0097, "step": 10171 }, { "epoch": 1.4400792808097969, "grad_norm": 9.10573687132055, "learning_rate": 9.588797295437324e-07, "loss": 1.0008, "step": 10172 }, { "epoch": 1.440220853684434, "grad_norm": 11.122185854789361, "learning_rate": 9.584284352081777e-07, "loss": 1.0495, "step": 10173 }, { "epoch": 1.4403624265590713, "grad_norm": 8.380232813111645, "learning_rate": 9.579772219118899e-07, "loss": 0.9646, "step": 10174 }, { "epoch": 1.4405039994337085, "grad_norm": 9.520329056510509, "learning_rate": 9.575260896785907e-07, "loss": 1.0225, "step": 10175 }, { "epoch": 1.4406455723083458, "grad_norm": 8.251870145662673, "learning_rate": 9.570750385319939e-07, "loss": 1.036, "step": 10176 }, { "epoch": 1.440787145182983, "grad_norm": 10.475369219771078, "learning_rate": 9.566240684958128e-07, "loss": 0.9305, "step": 10177 }, { "epoch": 1.4409287180576202, "grad_norm": 9.138217498121215, "learning_rate": 9.561731795937526e-07, "loss": 1.0148, "step": 10178 }, { "epoch": 1.4410702909322572, "grad_norm": 10.359961583718025, "learning_rate": 9.557223718495173e-07, "loss": 1.0224, "step": 10179 }, { "epoch": 1.4412118638068945, "grad_norm": 9.289882152126077, "learning_rate": 9.552716452868064e-07, "loss": 1.0684, "step": 10180 }, { "epoch": 1.4413534366815317, "grad_norm": 9.263026781842575, "learning_rate": 9.548209999293122e-07, "loss": 1.0122, "step": 10181 }, { "epoch": 1.441495009556169, "grad_norm": 9.725376825950667, "learning_rate": 9.543704358007281e-07, "loss": 0.9418, "step": 10182 }, { "epoch": 1.4416365824308062, "grad_norm": 8.419540619630354, "learning_rate": 9.539199529247356e-07, "loss": 1.0594, "step": 10183 }, { "epoch": 1.4417781553054434, "grad_norm": 9.789206684143656, "learning_rate": 9.534695513250183e-07, "loss": 1.0601, "step": 10184 }, { "epoch": 1.4419197281800806, "grad_norm": 10.288702122105677, "learning_rate": 9.530192310252548e-07, "loss": 0.9696, "step": 10185 }, { "epoch": 1.4420613010547179, "grad_norm": 10.25360899866767, "learning_rate": 9.525689920491157e-07, "loss": 1.0402, "step": 10186 }, { "epoch": 1.442202873929355, "grad_norm": 8.224429644720479, "learning_rate": 9.521188344202717e-07, "loss": 1.0821, "step": 10187 }, { "epoch": 1.4423444468039923, "grad_norm": 8.729037651862702, "learning_rate": 9.516687581623857e-07, "loss": 0.9145, "step": 10188 }, { "epoch": 1.4424860196786295, "grad_norm": 8.20074556264265, "learning_rate": 9.512187632991193e-07, "loss": 0.9674, "step": 10189 }, { "epoch": 1.4426275925532668, "grad_norm": 7.856680982811624, "learning_rate": 9.50768849854127e-07, "loss": 0.9765, "step": 10190 }, { "epoch": 1.442769165427904, "grad_norm": 11.524594089203248, "learning_rate": 9.503190178510618e-07, "loss": 1.0368, "step": 10191 }, { "epoch": 1.4429107383025412, "grad_norm": 12.096201251500277, "learning_rate": 9.498692673135698e-07, "loss": 0.8879, "step": 10192 }, { "epoch": 1.4430523111771785, "grad_norm": 8.67950334877219, "learning_rate": 9.494195982652951e-07, "loss": 0.9537, "step": 10193 }, { "epoch": 1.4431938840518157, "grad_norm": 9.694411439544934, "learning_rate": 9.489700107298763e-07, "loss": 0.9988, "step": 10194 }, { "epoch": 1.443335456926453, "grad_norm": 10.172832016231702, "learning_rate": 9.485205047309465e-07, "loss": 1.0809, "step": 10195 }, { "epoch": 1.4434770298010902, "grad_norm": 11.031393163686804, "learning_rate": 9.480710802921377e-07, "loss": 0.9861, "step": 10196 }, { "epoch": 1.4436186026757274, "grad_norm": 9.438166512706905, "learning_rate": 9.476217374370741e-07, "loss": 0.9313, "step": 10197 }, { "epoch": 1.4437601755503646, "grad_norm": 8.82821012472787, "learning_rate": 9.471724761893794e-07, "loss": 0.927, "step": 10198 }, { "epoch": 1.4439017484250019, "grad_norm": 9.487641577989882, "learning_rate": 9.467232965726689e-07, "loss": 0.912, "step": 10199 }, { "epoch": 1.444043321299639, "grad_norm": 8.309961375893852, "learning_rate": 9.462741986105573e-07, "loss": 0.9259, "step": 10200 }, { "epoch": 1.4441848941742763, "grad_norm": 9.890988311603317, "learning_rate": 9.458251823266518e-07, "loss": 0.9539, "step": 10201 }, { "epoch": 1.4443264670489135, "grad_norm": 11.753212996754135, "learning_rate": 9.453762477445574e-07, "loss": 0.9804, "step": 10202 }, { "epoch": 1.4444680399235508, "grad_norm": 8.55331618931936, "learning_rate": 9.449273948878762e-07, "loss": 1.0172, "step": 10203 }, { "epoch": 1.444609612798188, "grad_norm": 10.043370610153278, "learning_rate": 9.444786237802009e-07, "loss": 0.9363, "step": 10204 }, { "epoch": 1.444751185672825, "grad_norm": 11.931448635967653, "learning_rate": 9.440299344451251e-07, "loss": 0.9206, "step": 10205 }, { "epoch": 1.4448927585474622, "grad_norm": 7.771798027096086, "learning_rate": 9.435813269062349e-07, "loss": 0.9382, "step": 10206 }, { "epoch": 1.4450343314220995, "grad_norm": 9.457513670058106, "learning_rate": 9.431328011871135e-07, "loss": 0.9953, "step": 10207 }, { "epoch": 1.4451759042967367, "grad_norm": 9.211657262219516, "learning_rate": 9.426843573113409e-07, "loss": 0.9446, "step": 10208 }, { "epoch": 1.445317477171374, "grad_norm": 9.189155273799441, "learning_rate": 9.422359953024895e-07, "loss": 0.9659, "step": 10209 }, { "epoch": 1.4454590500460112, "grad_norm": 9.934925441606165, "learning_rate": 9.417877151841315e-07, "loss": 1.0852, "step": 10210 }, { "epoch": 1.4456006229206484, "grad_norm": 9.33438263398897, "learning_rate": 9.413395169798303e-07, "loss": 0.8961, "step": 10211 }, { "epoch": 1.4457421957952856, "grad_norm": 10.786387405395624, "learning_rate": 9.408914007131495e-07, "loss": 1.0328, "step": 10212 }, { "epoch": 1.4458837686699229, "grad_norm": 10.68875096902496, "learning_rate": 9.404433664076442e-07, "loss": 1.0679, "step": 10213 }, { "epoch": 1.44602534154456, "grad_norm": 10.323479373708969, "learning_rate": 9.399954140868695e-07, "loss": 1.1236, "step": 10214 }, { "epoch": 1.4461669144191973, "grad_norm": 9.461111682954797, "learning_rate": 9.395475437743723e-07, "loss": 0.9808, "step": 10215 }, { "epoch": 1.4463084872938345, "grad_norm": 8.400734605910683, "learning_rate": 9.390997554936964e-07, "loss": 0.9493, "step": 10216 }, { "epoch": 1.4464500601684718, "grad_norm": 10.139695793569588, "learning_rate": 9.386520492683835e-07, "loss": 0.9693, "step": 10217 }, { "epoch": 1.446591633043109, "grad_norm": 8.921125871570597, "learning_rate": 9.382044251219672e-07, "loss": 0.9745, "step": 10218 }, { "epoch": 1.4467332059177462, "grad_norm": 9.114856673728731, "learning_rate": 9.377568830779807e-07, "loss": 1.0036, "step": 10219 }, { "epoch": 1.4468747787923832, "grad_norm": 8.685023112510626, "learning_rate": 9.373094231599491e-07, "loss": 0.964, "step": 10220 }, { "epoch": 1.4470163516670205, "grad_norm": 10.959902420166445, "learning_rate": 9.368620453913968e-07, "loss": 1.0837, "step": 10221 }, { "epoch": 1.4471579245416577, "grad_norm": 9.199577023278112, "learning_rate": 9.364147497958404e-07, "loss": 0.9574, "step": 10222 }, { "epoch": 1.447299497416295, "grad_norm": 9.696344878556365, "learning_rate": 9.359675363967958e-07, "loss": 1.0083, "step": 10223 }, { "epoch": 1.4474410702909322, "grad_norm": 10.3569196780358, "learning_rate": 9.355204052177705e-07, "loss": 0.9051, "step": 10224 }, { "epoch": 1.4475826431655694, "grad_norm": 10.30378937874428, "learning_rate": 9.350733562822717e-07, "loss": 1.0302, "step": 10225 }, { "epoch": 1.4477242160402066, "grad_norm": 9.498034223657521, "learning_rate": 9.346263896138e-07, "loss": 1.0057, "step": 10226 }, { "epoch": 1.4478657889148439, "grad_norm": 9.298256446076959, "learning_rate": 9.341795052358507e-07, "loss": 0.9409, "step": 10227 }, { "epoch": 1.448007361789481, "grad_norm": 10.521489994719587, "learning_rate": 9.337327031719185e-07, "loss": 1.047, "step": 10228 }, { "epoch": 1.4481489346641183, "grad_norm": 9.97744725545325, "learning_rate": 9.332859834454891e-07, "loss": 1.0758, "step": 10229 }, { "epoch": 1.4482905075387555, "grad_norm": 9.455967499420547, "learning_rate": 9.328393460800475e-07, "loss": 0.9993, "step": 10230 }, { "epoch": 1.4484320804133928, "grad_norm": 10.031317309572659, "learning_rate": 9.323927910990735e-07, "loss": 0.9294, "step": 10231 }, { "epoch": 1.44857365328803, "grad_norm": 9.12251728720058, "learning_rate": 9.31946318526041e-07, "loss": 0.8772, "step": 10232 }, { "epoch": 1.4487152261626672, "grad_norm": 12.111008584369184, "learning_rate": 9.314999283844223e-07, "loss": 1.0386, "step": 10233 }, { "epoch": 1.4488567990373045, "grad_norm": 7.74814534299038, "learning_rate": 9.310536206976819e-07, "loss": 0.9548, "step": 10234 }, { "epoch": 1.4489983719119417, "grad_norm": 8.937071890014378, "learning_rate": 9.306073954892844e-07, "loss": 1.0806, "step": 10235 }, { "epoch": 1.449139944786579, "grad_norm": 10.00324692465624, "learning_rate": 9.301612527826844e-07, "loss": 0.9316, "step": 10236 }, { "epoch": 1.4492815176612162, "grad_norm": 9.843357817088352, "learning_rate": 9.297151926013368e-07, "loss": 1.0034, "step": 10237 }, { "epoch": 1.4494230905358534, "grad_norm": 12.762827086857497, "learning_rate": 9.292692149686913e-07, "loss": 1.078, "step": 10238 }, { "epoch": 1.4495646634104906, "grad_norm": 9.387819197875094, "learning_rate": 9.288233199081914e-07, "loss": 1.0899, "step": 10239 }, { "epoch": 1.4497062362851278, "grad_norm": 11.101371103095884, "learning_rate": 9.283775074432788e-07, "loss": 0.9459, "step": 10240 }, { "epoch": 1.449847809159765, "grad_norm": 7.608120103949022, "learning_rate": 9.279317775973879e-07, "loss": 0.8648, "step": 10241 }, { "epoch": 1.4499893820344023, "grad_norm": 8.648416327887363, "learning_rate": 9.274861303939523e-07, "loss": 0.9715, "step": 10242 }, { "epoch": 1.4501309549090395, "grad_norm": 9.187961281335713, "learning_rate": 9.270405658563972e-07, "loss": 1.0015, "step": 10243 }, { "epoch": 1.4502725277836768, "grad_norm": 9.104924523230848, "learning_rate": 9.265950840081475e-07, "loss": 0.9995, "step": 10244 }, { "epoch": 1.450414100658314, "grad_norm": 10.651698418525505, "learning_rate": 9.261496848726204e-07, "loss": 1.0393, "step": 10245 }, { "epoch": 1.450555673532951, "grad_norm": 8.721940745396688, "learning_rate": 9.257043684732316e-07, "loss": 1.0426, "step": 10246 }, { "epoch": 1.4506972464075882, "grad_norm": 8.739087520777028, "learning_rate": 9.252591348333906e-07, "loss": 0.9123, "step": 10247 }, { "epoch": 1.4508388192822255, "grad_norm": 9.904786973614112, "learning_rate": 9.248139839765013e-07, "loss": 1.0077, "step": 10248 }, { "epoch": 1.4509803921568627, "grad_norm": 8.151563211316338, "learning_rate": 9.243689159259677e-07, "loss": 0.9692, "step": 10249 }, { "epoch": 1.4511219650315, "grad_norm": 11.138781979608803, "learning_rate": 9.239239307051842e-07, "loss": 1.0883, "step": 10250 }, { "epoch": 1.4512635379061372, "grad_norm": 10.237834875570767, "learning_rate": 9.234790283375456e-07, "loss": 1.025, "step": 10251 }, { "epoch": 1.4514051107807744, "grad_norm": 9.32133280052926, "learning_rate": 9.230342088464381e-07, "loss": 0.9356, "step": 10252 }, { "epoch": 1.4515466836554116, "grad_norm": 9.167182722882043, "learning_rate": 9.225894722552462e-07, "loss": 0.9286, "step": 10253 }, { "epoch": 1.4516882565300488, "grad_norm": 8.769486880455684, "learning_rate": 9.221448185873505e-07, "loss": 1.0286, "step": 10254 }, { "epoch": 1.451829829404686, "grad_norm": 9.619096010366079, "learning_rate": 9.217002478661244e-07, "loss": 0.9794, "step": 10255 }, { "epoch": 1.4519714022793233, "grad_norm": 8.309915929706452, "learning_rate": 9.212557601149411e-07, "loss": 0.9966, "step": 10256 }, { "epoch": 1.4521129751539605, "grad_norm": 7.411023368233429, "learning_rate": 9.208113553571638e-07, "loss": 0.8686, "step": 10257 }, { "epoch": 1.4522545480285978, "grad_norm": 10.867216284334633, "learning_rate": 9.203670336161558e-07, "loss": 1.1452, "step": 10258 }, { "epoch": 1.452396120903235, "grad_norm": 9.586642238637108, "learning_rate": 9.199227949152758e-07, "loss": 0.9755, "step": 10259 }, { "epoch": 1.4525376937778722, "grad_norm": 10.952414496392166, "learning_rate": 9.194786392778757e-07, "loss": 0.9568, "step": 10260 }, { "epoch": 1.4526792666525095, "grad_norm": 9.191006983731434, "learning_rate": 9.190345667273059e-07, "loss": 0.9026, "step": 10261 }, { "epoch": 1.4528208395271465, "grad_norm": 8.497243097545761, "learning_rate": 9.185905772869091e-07, "loss": 0.9793, "step": 10262 }, { "epoch": 1.4529624124017837, "grad_norm": 8.225017753976138, "learning_rate": 9.181466709800274e-07, "loss": 0.9565, "step": 10263 }, { "epoch": 1.453103985276421, "grad_norm": 8.998337062216214, "learning_rate": 9.177028478299948e-07, "loss": 1.0158, "step": 10264 }, { "epoch": 1.4532455581510582, "grad_norm": 8.125523007505905, "learning_rate": 9.172591078601448e-07, "loss": 0.9665, "step": 10265 }, { "epoch": 1.4533871310256954, "grad_norm": 8.93843724598381, "learning_rate": 9.168154510938024e-07, "loss": 1.0247, "step": 10266 }, { "epoch": 1.4535287039003326, "grad_norm": 8.863685380137678, "learning_rate": 9.163718775542921e-07, "loss": 1.0645, "step": 10267 }, { "epoch": 1.4536702767749698, "grad_norm": 10.802129634413387, "learning_rate": 9.159283872649313e-07, "loss": 0.944, "step": 10268 }, { "epoch": 1.453811849649607, "grad_norm": 8.078870137599397, "learning_rate": 9.154849802490332e-07, "loss": 1.0331, "step": 10269 }, { "epoch": 1.4539534225242443, "grad_norm": 11.273327864591632, "learning_rate": 9.150416565299092e-07, "loss": 0.9304, "step": 10270 }, { "epoch": 1.4540949953988815, "grad_norm": 9.075336073860605, "learning_rate": 9.145984161308627e-07, "loss": 1.0268, "step": 10271 }, { "epoch": 1.4542365682735188, "grad_norm": 10.688450821191594, "learning_rate": 9.14155259075196e-07, "loss": 1.0567, "step": 10272 }, { "epoch": 1.454378141148156, "grad_norm": 10.94368547195068, "learning_rate": 9.137121853862041e-07, "loss": 1.0636, "step": 10273 }, { "epoch": 1.4545197140227932, "grad_norm": 8.336908056930161, "learning_rate": 9.132691950871808e-07, "loss": 0.9923, "step": 10274 }, { "epoch": 1.4546612868974305, "grad_norm": 8.234006232847904, "learning_rate": 9.128262882014117e-07, "loss": 0.9921, "step": 10275 }, { "epoch": 1.4548028597720677, "grad_norm": 7.451211807723926, "learning_rate": 9.123834647521812e-07, "loss": 0.958, "step": 10276 }, { "epoch": 1.454944432646705, "grad_norm": 8.418975160981915, "learning_rate": 9.119407247627701e-07, "loss": 0.9784, "step": 10277 }, { "epoch": 1.4550860055213422, "grad_norm": 11.276553880482885, "learning_rate": 9.114980682564492e-07, "loss": 0.9813, "step": 10278 }, { "epoch": 1.4552275783959794, "grad_norm": 8.944725263885232, "learning_rate": 9.110554952564912e-07, "loss": 1.0743, "step": 10279 }, { "epoch": 1.4553691512706166, "grad_norm": 8.08056556535228, "learning_rate": 9.106130057861604e-07, "loss": 1.0248, "step": 10280 }, { "epoch": 1.4555107241452538, "grad_norm": 9.607122290574566, "learning_rate": 9.101705998687185e-07, "loss": 1.0776, "step": 10281 }, { "epoch": 1.455652297019891, "grad_norm": 11.489546295318437, "learning_rate": 9.097282775274238e-07, "loss": 0.9292, "step": 10282 }, { "epoch": 1.4557938698945283, "grad_norm": 7.6867799499228, "learning_rate": 9.092860387855271e-07, "loss": 0.82, "step": 10283 }, { "epoch": 1.4559354427691655, "grad_norm": 10.766239408946012, "learning_rate": 9.088438836662777e-07, "loss": 1.0324, "step": 10284 }, { "epoch": 1.4560770156438028, "grad_norm": 10.48399586567657, "learning_rate": 9.084018121929184e-07, "loss": 1.0342, "step": 10285 }, { "epoch": 1.45621858851844, "grad_norm": 9.449203409195897, "learning_rate": 9.079598243886897e-07, "loss": 0.971, "step": 10286 }, { "epoch": 1.4563601613930772, "grad_norm": 9.016401392994316, "learning_rate": 9.075179202768253e-07, "loss": 0.9794, "step": 10287 }, { "epoch": 1.4565017342677142, "grad_norm": 9.294997594708915, "learning_rate": 9.070760998805569e-07, "loss": 0.9699, "step": 10288 }, { "epoch": 1.4566433071423515, "grad_norm": 7.553346753603715, "learning_rate": 9.066343632231106e-07, "loss": 0.8835, "step": 10289 }, { "epoch": 1.4567848800169887, "grad_norm": 9.167247222086866, "learning_rate": 9.061927103277068e-07, "loss": 0.9342, "step": 10290 }, { "epoch": 1.456926452891626, "grad_norm": 9.547621377841152, "learning_rate": 9.057511412175646e-07, "loss": 0.9632, "step": 10291 }, { "epoch": 1.4570680257662632, "grad_norm": 10.595958558269029, "learning_rate": 9.053096559158956e-07, "loss": 1.0458, "step": 10292 }, { "epoch": 1.4572095986409004, "grad_norm": 7.651541122085717, "learning_rate": 9.048682544459094e-07, "loss": 0.9265, "step": 10293 }, { "epoch": 1.4573511715155376, "grad_norm": 8.34434798594515, "learning_rate": 9.044269368308089e-07, "loss": 1.0115, "step": 10294 }, { "epoch": 1.4574927443901748, "grad_norm": 9.884672425649843, "learning_rate": 9.039857030937957e-07, "loss": 0.9962, "step": 10295 }, { "epoch": 1.457634317264812, "grad_norm": 8.845834007987884, "learning_rate": 9.03544553258063e-07, "loss": 0.8919, "step": 10296 }, { "epoch": 1.4577758901394493, "grad_norm": 9.916428592506147, "learning_rate": 9.031034873468039e-07, "loss": 0.9861, "step": 10297 }, { "epoch": 1.4579174630140865, "grad_norm": 9.658824984719892, "learning_rate": 9.026625053832028e-07, "loss": 1.0353, "step": 10298 }, { "epoch": 1.4580590358887238, "grad_norm": 9.912510190479866, "learning_rate": 9.022216073904433e-07, "loss": 0.9936, "step": 10299 }, { "epoch": 1.458200608763361, "grad_norm": 8.7262958427813, "learning_rate": 9.017807933917027e-07, "loss": 0.9915, "step": 10300 }, { "epoch": 1.4583421816379982, "grad_norm": 8.889459999599113, "learning_rate": 9.013400634101535e-07, "loss": 0.9044, "step": 10301 }, { "epoch": 1.4584837545126355, "grad_norm": 8.784767613819309, "learning_rate": 9.008994174689659e-07, "loss": 0.9208, "step": 10302 }, { "epoch": 1.4586253273872725, "grad_norm": 8.889101243131643, "learning_rate": 9.004588555913027e-07, "loss": 0.9879, "step": 10303 }, { "epoch": 1.4587669002619097, "grad_norm": 8.08788425764963, "learning_rate": 9.000183778003246e-07, "loss": 0.9507, "step": 10304 }, { "epoch": 1.458908473136547, "grad_norm": 9.67694264704202, "learning_rate": 8.995779841191884e-07, "loss": 1.0461, "step": 10305 }, { "epoch": 1.4590500460111842, "grad_norm": 11.704667319516043, "learning_rate": 8.991376745710436e-07, "loss": 1.0189, "step": 10306 }, { "epoch": 1.4591916188858214, "grad_norm": 8.35185117017479, "learning_rate": 8.986974491790381e-07, "loss": 0.986, "step": 10307 }, { "epoch": 1.4593331917604586, "grad_norm": 9.958570107766908, "learning_rate": 8.982573079663132e-07, "loss": 0.9673, "step": 10308 }, { "epoch": 1.4594747646350958, "grad_norm": 10.258943331256187, "learning_rate": 8.978172509560087e-07, "loss": 1.1126, "step": 10309 }, { "epoch": 1.459616337509733, "grad_norm": 8.736102156911633, "learning_rate": 8.973772781712553e-07, "loss": 1.0102, "step": 10310 }, { "epoch": 1.4597579103843703, "grad_norm": 7.820611267785059, "learning_rate": 8.969373896351833e-07, "loss": 0.9388, "step": 10311 }, { "epoch": 1.4598994832590075, "grad_norm": 9.637515563556459, "learning_rate": 8.964975853709179e-07, "loss": 0.9051, "step": 10312 }, { "epoch": 1.4600410561336448, "grad_norm": 9.168353208811462, "learning_rate": 8.960578654015783e-07, "loss": 1.0697, "step": 10313 }, { "epoch": 1.460182629008282, "grad_norm": 8.883489712119445, "learning_rate": 8.956182297502817e-07, "loss": 0.9942, "step": 10314 }, { "epoch": 1.4603242018829192, "grad_norm": 10.36074069818087, "learning_rate": 8.951786784401376e-07, "loss": 1.0869, "step": 10315 }, { "epoch": 1.4604657747575565, "grad_norm": 10.428640660244643, "learning_rate": 8.947392114942547e-07, "loss": 1.0486, "step": 10316 }, { "epoch": 1.4606073476321937, "grad_norm": 9.440235272078102, "learning_rate": 8.942998289357333e-07, "loss": 0.9273, "step": 10317 }, { "epoch": 1.460748920506831, "grad_norm": 8.839606834402558, "learning_rate": 8.938605307876738e-07, "loss": 0.8906, "step": 10318 }, { "epoch": 1.4608904933814681, "grad_norm": 8.80439331321412, "learning_rate": 8.934213170731676e-07, "loss": 0.9555, "step": 10319 }, { "epoch": 1.4610320662561054, "grad_norm": 9.334208538255643, "learning_rate": 8.929821878153058e-07, "loss": 0.9483, "step": 10320 }, { "epoch": 1.4611736391307426, "grad_norm": 7.356755594333405, "learning_rate": 8.92543143037172e-07, "loss": 0.9561, "step": 10321 }, { "epoch": 1.4613152120053798, "grad_norm": 9.06419156828788, "learning_rate": 8.921041827618459e-07, "loss": 1.0116, "step": 10322 }, { "epoch": 1.461456784880017, "grad_norm": 9.710599271940678, "learning_rate": 8.916653070124048e-07, "loss": 1.0403, "step": 10323 }, { "epoch": 1.4615983577546543, "grad_norm": 9.747604075734978, "learning_rate": 8.912265158119185e-07, "loss": 1.0045, "step": 10324 }, { "epoch": 1.4617399306292915, "grad_norm": 10.119824252204571, "learning_rate": 8.907878091834554e-07, "loss": 0.9501, "step": 10325 }, { "epoch": 1.4618815035039288, "grad_norm": 9.323854628677925, "learning_rate": 8.903491871500767e-07, "loss": 0.9679, "step": 10326 }, { "epoch": 1.462023076378566, "grad_norm": 10.94358682472671, "learning_rate": 8.899106497348409e-07, "loss": 0.9778, "step": 10327 }, { "epoch": 1.4621646492532032, "grad_norm": 9.041386285000208, "learning_rate": 8.894721969608025e-07, "loss": 0.9216, "step": 10328 }, { "epoch": 1.4623062221278402, "grad_norm": 9.230552643900387, "learning_rate": 8.890338288510089e-07, "loss": 1.0482, "step": 10329 }, { "epoch": 1.4624477950024775, "grad_norm": 9.224304388021412, "learning_rate": 8.885955454285078e-07, "loss": 0.8981, "step": 10330 }, { "epoch": 1.4625893678771147, "grad_norm": 9.138755568047518, "learning_rate": 8.881573467163354e-07, "loss": 0.9902, "step": 10331 }, { "epoch": 1.462730940751752, "grad_norm": 9.338056686128, "learning_rate": 8.877192327375303e-07, "loss": 0.9726, "step": 10332 }, { "epoch": 1.4628725136263891, "grad_norm": 9.523112641224056, "learning_rate": 8.872812035151221e-07, "loss": 1.0659, "step": 10333 }, { "epoch": 1.4630140865010264, "grad_norm": 8.470444400663201, "learning_rate": 8.868432590721384e-07, "loss": 1.0284, "step": 10334 }, { "epoch": 1.4631556593756636, "grad_norm": 8.284688444478181, "learning_rate": 8.86405399431603e-07, "loss": 1.0402, "step": 10335 }, { "epoch": 1.4632972322503008, "grad_norm": 10.459992255651112, "learning_rate": 8.859676246165314e-07, "loss": 1.0138, "step": 10336 }, { "epoch": 1.463438805124938, "grad_norm": 8.252265850213362, "learning_rate": 8.855299346499394e-07, "loss": 0.8866, "step": 10337 }, { "epoch": 1.4635803779995753, "grad_norm": 10.923530420413682, "learning_rate": 8.850923295548339e-07, "loss": 1.1281, "step": 10338 }, { "epoch": 1.4637219508742125, "grad_norm": 7.998394089685505, "learning_rate": 8.846548093542215e-07, "loss": 0.8914, "step": 10339 }, { "epoch": 1.4638635237488498, "grad_norm": 9.980505442691847, "learning_rate": 8.842173740711002e-07, "loss": 1.0034, "step": 10340 }, { "epoch": 1.464005096623487, "grad_norm": 9.882645749581371, "learning_rate": 8.83780023728468e-07, "loss": 0.9885, "step": 10341 }, { "epoch": 1.4641466694981242, "grad_norm": 9.02029292813029, "learning_rate": 8.833427583493146e-07, "loss": 0.9547, "step": 10342 }, { "epoch": 1.4642882423727615, "grad_norm": 9.381727729237664, "learning_rate": 8.829055779566262e-07, "loss": 1.086, "step": 10343 }, { "epoch": 1.4644298152473987, "grad_norm": 8.556117888406664, "learning_rate": 8.824684825733865e-07, "loss": 0.9395, "step": 10344 }, { "epoch": 1.4645713881220357, "grad_norm": 10.116013671648554, "learning_rate": 8.82031472222572e-07, "loss": 0.9729, "step": 10345 }, { "epoch": 1.464712960996673, "grad_norm": 8.879713928838463, "learning_rate": 8.815945469271573e-07, "loss": 0.9355, "step": 10346 }, { "epoch": 1.4648545338713101, "grad_norm": 7.446436537986328, "learning_rate": 8.811577067101096e-07, "loss": 0.9128, "step": 10347 }, { "epoch": 1.4649961067459474, "grad_norm": 9.090977196871796, "learning_rate": 8.807209515943952e-07, "loss": 0.9786, "step": 10348 }, { "epoch": 1.4651376796205846, "grad_norm": 8.416903300468267, "learning_rate": 8.802842816029722e-07, "loss": 0.8487, "step": 10349 }, { "epoch": 1.4652792524952218, "grad_norm": 8.746043591442817, "learning_rate": 8.798476967587965e-07, "loss": 0.8342, "step": 10350 }, { "epoch": 1.465420825369859, "grad_norm": 9.934111011281919, "learning_rate": 8.794111970848205e-07, "loss": 1.1222, "step": 10351 }, { "epoch": 1.4655623982444963, "grad_norm": 11.50271508052046, "learning_rate": 8.789747826039893e-07, "loss": 1.0281, "step": 10352 }, { "epoch": 1.4657039711191335, "grad_norm": 9.524881405229747, "learning_rate": 8.785384533392452e-07, "loss": 1.014, "step": 10353 }, { "epoch": 1.4658455439937708, "grad_norm": 8.65966905626667, "learning_rate": 8.78102209313525e-07, "loss": 0.9687, "step": 10354 }, { "epoch": 1.465987116868408, "grad_norm": 10.459773801615263, "learning_rate": 8.776660505497619e-07, "loss": 0.9685, "step": 10355 }, { "epoch": 1.4661286897430452, "grad_norm": 10.21724136835375, "learning_rate": 8.772299770708859e-07, "loss": 1.0777, "step": 10356 }, { "epoch": 1.4662702626176825, "grad_norm": 10.119026588396716, "learning_rate": 8.767939888998192e-07, "loss": 1.0351, "step": 10357 }, { "epoch": 1.4664118354923197, "grad_norm": 10.735886291106404, "learning_rate": 8.763580860594828e-07, "loss": 1.0275, "step": 10358 }, { "epoch": 1.466553408366957, "grad_norm": 11.522107974149902, "learning_rate": 8.759222685727905e-07, "loss": 0.94, "step": 10359 }, { "epoch": 1.4666949812415941, "grad_norm": 12.404277039279133, "learning_rate": 8.754865364626544e-07, "loss": 0.9629, "step": 10360 }, { "epoch": 1.4668365541162314, "grad_norm": 8.64937034476151, "learning_rate": 8.750508897519791e-07, "loss": 1.0043, "step": 10361 }, { "epoch": 1.4669781269908686, "grad_norm": 9.215552030722687, "learning_rate": 8.746153284636677e-07, "loss": 0.9209, "step": 10362 }, { "epoch": 1.4671196998655058, "grad_norm": 9.704225732654434, "learning_rate": 8.741798526206164e-07, "loss": 1.0775, "step": 10363 }, { "epoch": 1.467261272740143, "grad_norm": 9.390654248082571, "learning_rate": 8.737444622457172e-07, "loss": 1.0019, "step": 10364 }, { "epoch": 1.4674028456147803, "grad_norm": 10.294863514525817, "learning_rate": 8.733091573618599e-07, "loss": 0.9004, "step": 10365 }, { "epoch": 1.4675444184894175, "grad_norm": 8.31510253549415, "learning_rate": 8.728739379919266e-07, "loss": 0.9733, "step": 10366 }, { "epoch": 1.4676859913640548, "grad_norm": 9.722558554024028, "learning_rate": 8.724388041587976e-07, "loss": 1.0151, "step": 10367 }, { "epoch": 1.467827564238692, "grad_norm": 10.901803845575389, "learning_rate": 8.720037558853464e-07, "loss": 1.0574, "step": 10368 }, { "epoch": 1.4679691371133292, "grad_norm": 9.430461760104278, "learning_rate": 8.71568793194445e-07, "loss": 1.0714, "step": 10369 }, { "epoch": 1.4681107099879662, "grad_norm": 6.6818410157029975, "learning_rate": 8.711339161089568e-07, "loss": 0.911, "step": 10370 }, { "epoch": 1.4682522828626035, "grad_norm": 9.494472300894923, "learning_rate": 8.706991246517449e-07, "loss": 0.8727, "step": 10371 }, { "epoch": 1.4683938557372407, "grad_norm": 10.185910715277716, "learning_rate": 8.702644188456646e-07, "loss": 0.9891, "step": 10372 }, { "epoch": 1.468535428611878, "grad_norm": 9.57090519529497, "learning_rate": 8.698297987135693e-07, "loss": 1.0787, "step": 10373 }, { "epoch": 1.4686770014865151, "grad_norm": 10.266002537768474, "learning_rate": 8.69395264278306e-07, "loss": 1.1111, "step": 10374 }, { "epoch": 1.4688185743611524, "grad_norm": 8.972606932820497, "learning_rate": 8.689608155627169e-07, "loss": 0.898, "step": 10375 }, { "epoch": 1.4689601472357896, "grad_norm": 7.956060860686356, "learning_rate": 8.685264525896426e-07, "loss": 0.9236, "step": 10376 }, { "epoch": 1.4691017201104268, "grad_norm": 8.994879219469906, "learning_rate": 8.680921753819152e-07, "loss": 1.0083, "step": 10377 }, { "epoch": 1.469243292985064, "grad_norm": 8.540548211041092, "learning_rate": 8.676579839623653e-07, "loss": 0.9582, "step": 10378 }, { "epoch": 1.4693848658597013, "grad_norm": 9.556756582361597, "learning_rate": 8.672238783538189e-07, "loss": 0.916, "step": 10379 }, { "epoch": 1.4695264387343385, "grad_norm": 8.609222133823225, "learning_rate": 8.667898585790951e-07, "loss": 0.9575, "step": 10380 }, { "epoch": 1.4696680116089758, "grad_norm": 9.063283741702959, "learning_rate": 8.663559246610115e-07, "loss": 0.9954, "step": 10381 }, { "epoch": 1.469809584483613, "grad_norm": 9.276208400923476, "learning_rate": 8.659220766223778e-07, "loss": 0.96, "step": 10382 }, { "epoch": 1.4699511573582502, "grad_norm": 6.880284723112391, "learning_rate": 8.654883144860038e-07, "loss": 0.9481, "step": 10383 }, { "epoch": 1.4700927302328874, "grad_norm": 9.816061551971332, "learning_rate": 8.650546382746888e-07, "loss": 1.0152, "step": 10384 }, { "epoch": 1.4702343031075247, "grad_norm": 8.871141051448088, "learning_rate": 8.646210480112325e-07, "loss": 0.889, "step": 10385 }, { "epoch": 1.4703758759821617, "grad_norm": 8.73000164787915, "learning_rate": 8.641875437184288e-07, "loss": 0.947, "step": 10386 }, { "epoch": 1.470517448856799, "grad_norm": 8.724167101435501, "learning_rate": 8.637541254190657e-07, "loss": 1.0007, "step": 10387 }, { "epoch": 1.4706590217314361, "grad_norm": 11.4024876072203, "learning_rate": 8.633207931359292e-07, "loss": 1.0317, "step": 10388 }, { "epoch": 1.4708005946060734, "grad_norm": 8.974138189820792, "learning_rate": 8.628875468917969e-07, "loss": 0.9475, "step": 10389 }, { "epoch": 1.4709421674807106, "grad_norm": 8.263005034324113, "learning_rate": 8.624543867094468e-07, "loss": 1.0664, "step": 10390 }, { "epoch": 1.4710837403553478, "grad_norm": 8.44344950832356, "learning_rate": 8.620213126116476e-07, "loss": 0.9228, "step": 10391 }, { "epoch": 1.471225313229985, "grad_norm": 9.277445311944085, "learning_rate": 8.615883246211676e-07, "loss": 0.9247, "step": 10392 }, { "epoch": 1.4713668861046223, "grad_norm": 9.906662411934857, "learning_rate": 8.611554227607665e-07, "loss": 1.0225, "step": 10393 }, { "epoch": 1.4715084589792595, "grad_norm": 8.911303134074346, "learning_rate": 8.607226070532041e-07, "loss": 0.9711, "step": 10394 }, { "epoch": 1.4716500318538968, "grad_norm": 9.36477388238307, "learning_rate": 8.602898775212317e-07, "loss": 0.9784, "step": 10395 }, { "epoch": 1.471791604728534, "grad_norm": 8.942006502214396, "learning_rate": 8.598572341875975e-07, "loss": 0.9156, "step": 10396 }, { "epoch": 1.4719331776031712, "grad_norm": 9.840032935358769, "learning_rate": 8.594246770750459e-07, "loss": 1.0393, "step": 10397 }, { "epoch": 1.4720747504778084, "grad_norm": 10.423681191289223, "learning_rate": 8.589922062063149e-07, "loss": 1.0821, "step": 10398 }, { "epoch": 1.4722163233524457, "grad_norm": 9.16582189193036, "learning_rate": 8.58559821604141e-07, "loss": 1.0729, "step": 10399 }, { "epoch": 1.472357896227083, "grad_norm": 8.560465995725048, "learning_rate": 8.581275232912526e-07, "loss": 1.0583, "step": 10400 }, { "epoch": 1.4724994691017201, "grad_norm": 10.531653150318485, "learning_rate": 8.57695311290376e-07, "loss": 1.0265, "step": 10401 }, { "epoch": 1.4726410419763574, "grad_norm": 9.577801597823251, "learning_rate": 8.572631856242333e-07, "loss": 0.9593, "step": 10402 }, { "epoch": 1.4727826148509946, "grad_norm": 10.177244676438784, "learning_rate": 8.56831146315539e-07, "loss": 0.9982, "step": 10403 }, { "epoch": 1.4729241877256318, "grad_norm": 7.634821835220937, "learning_rate": 8.563991933870072e-07, "loss": 0.8248, "step": 10404 }, { "epoch": 1.473065760600269, "grad_norm": 9.585858308174315, "learning_rate": 8.559673268613442e-07, "loss": 0.9436, "step": 10405 }, { "epoch": 1.4732073334749063, "grad_norm": 9.02773884561083, "learning_rate": 8.555355467612527e-07, "loss": 1.114, "step": 10406 }, { "epoch": 1.4733489063495435, "grad_norm": 9.723890508991726, "learning_rate": 8.551038531094308e-07, "loss": 1.0558, "step": 10407 }, { "epoch": 1.4734904792241807, "grad_norm": 6.903054443109315, "learning_rate": 8.546722459285727e-07, "loss": 0.9419, "step": 10408 }, { "epoch": 1.473632052098818, "grad_norm": 9.339635038576647, "learning_rate": 8.54240725241369e-07, "loss": 0.9194, "step": 10409 }, { "epoch": 1.4737736249734552, "grad_norm": 9.282442266776302, "learning_rate": 8.538092910705021e-07, "loss": 0.9418, "step": 10410 }, { "epoch": 1.4739151978480924, "grad_norm": 11.039017922216257, "learning_rate": 8.533779434386544e-07, "loss": 1.1073, "step": 10411 }, { "epoch": 1.4740567707227294, "grad_norm": 8.210254992893939, "learning_rate": 8.529466823684993e-07, "loss": 0.9286, "step": 10412 }, { "epoch": 1.4741983435973667, "grad_norm": 8.272097499781928, "learning_rate": 8.525155078827099e-07, "loss": 0.9146, "step": 10413 }, { "epoch": 1.474339916472004, "grad_norm": 9.190035723096884, "learning_rate": 8.520844200039511e-07, "loss": 1.0539, "step": 10414 }, { "epoch": 1.4744814893466411, "grad_norm": 9.72370965629787, "learning_rate": 8.516534187548864e-07, "loss": 0.9986, "step": 10415 }, { "epoch": 1.4746230622212784, "grad_norm": 8.633372790075839, "learning_rate": 8.512225041581726e-07, "loss": 0.9963, "step": 10416 }, { "epoch": 1.4747646350959156, "grad_norm": 7.768526145551748, "learning_rate": 8.507916762364613e-07, "loss": 0.8687, "step": 10417 }, { "epoch": 1.4749062079705528, "grad_norm": 9.609827939655588, "learning_rate": 8.503609350124029e-07, "loss": 0.9683, "step": 10418 }, { "epoch": 1.47504778084519, "grad_norm": 9.529401556160527, "learning_rate": 8.49930280508639e-07, "loss": 0.9046, "step": 10419 }, { "epoch": 1.4751893537198273, "grad_norm": 8.942743643837417, "learning_rate": 8.494997127478111e-07, "loss": 1.0498, "step": 10420 }, { "epoch": 1.4753309265944645, "grad_norm": 8.524479341688558, "learning_rate": 8.490692317525514e-07, "loss": 0.8731, "step": 10421 }, { "epoch": 1.4754724994691018, "grad_norm": 10.063448843438898, "learning_rate": 8.486388375454924e-07, "loss": 1.0862, "step": 10422 }, { "epoch": 1.475614072343739, "grad_norm": 9.220273890864554, "learning_rate": 8.482085301492574e-07, "loss": 0.9128, "step": 10423 }, { "epoch": 1.4757556452183762, "grad_norm": 10.681856505806682, "learning_rate": 8.477783095864683e-07, "loss": 1.0094, "step": 10424 }, { "epoch": 1.4758972180930134, "grad_norm": 9.500201976034468, "learning_rate": 8.473481758797425e-07, "loss": 1.0692, "step": 10425 }, { "epoch": 1.4760387909676507, "grad_norm": 7.965697178713785, "learning_rate": 8.469181290516906e-07, "loss": 0.9378, "step": 10426 }, { "epoch": 1.4761803638422877, "grad_norm": 8.014890164903619, "learning_rate": 8.464881691249202e-07, "loss": 0.9164, "step": 10427 }, { "epoch": 1.476321936716925, "grad_norm": 10.812572324654246, "learning_rate": 8.460582961220332e-07, "loss": 0.9901, "step": 10428 }, { "epoch": 1.4764635095915621, "grad_norm": 10.211993955963774, "learning_rate": 8.456285100656289e-07, "loss": 1.0194, "step": 10429 }, { "epoch": 1.4766050824661994, "grad_norm": 9.727104104524322, "learning_rate": 8.451988109782997e-07, "loss": 0.9091, "step": 10430 }, { "epoch": 1.4767466553408366, "grad_norm": 11.346547520060893, "learning_rate": 8.447691988826348e-07, "loss": 1.0239, "step": 10431 }, { "epoch": 1.4768882282154738, "grad_norm": 9.479307024129708, "learning_rate": 8.443396738012199e-07, "loss": 0.9626, "step": 10432 }, { "epoch": 1.477029801090111, "grad_norm": 8.429622335243955, "learning_rate": 8.439102357566331e-07, "loss": 1.1085, "step": 10433 }, { "epoch": 1.4771713739647483, "grad_norm": 8.075354449639018, "learning_rate": 8.434808847714512e-07, "loss": 1.0201, "step": 10434 }, { "epoch": 1.4773129468393855, "grad_norm": 8.24035682053788, "learning_rate": 8.430516208682429e-07, "loss": 0.9616, "step": 10435 }, { "epoch": 1.4774545197140228, "grad_norm": 9.040793052928427, "learning_rate": 8.426224440695765e-07, "loss": 0.9966, "step": 10436 }, { "epoch": 1.47759609258866, "grad_norm": 10.513864718473382, "learning_rate": 8.421933543980126e-07, "loss": 0.9605, "step": 10437 }, { "epoch": 1.4777376654632972, "grad_norm": 12.063112233491509, "learning_rate": 8.417643518761068e-07, "loss": 0.9655, "step": 10438 }, { "epoch": 1.4778792383379344, "grad_norm": 8.257205937248793, "learning_rate": 8.413354365264137e-07, "loss": 0.9317, "step": 10439 }, { "epoch": 1.4780208112125717, "grad_norm": 8.84472262692098, "learning_rate": 8.40906608371479e-07, "loss": 1.026, "step": 10440 }, { "epoch": 1.478162384087209, "grad_norm": 9.778704522266182, "learning_rate": 8.404778674338476e-07, "loss": 0.9935, "step": 10441 }, { "epoch": 1.4783039569618461, "grad_norm": 9.800218416232333, "learning_rate": 8.400492137360564e-07, "loss": 0.9632, "step": 10442 }, { "epoch": 1.4784455298364834, "grad_norm": 9.201218101467939, "learning_rate": 8.396206473006413e-07, "loss": 1.0215, "step": 10443 }, { "epoch": 1.4785871027111206, "grad_norm": 10.546452537835293, "learning_rate": 8.391921681501297e-07, "loss": 0.975, "step": 10444 }, { "epoch": 1.4787286755857578, "grad_norm": 8.409426858175758, "learning_rate": 8.387637763070486e-07, "loss": 0.9248, "step": 10445 }, { "epoch": 1.478870248460395, "grad_norm": 8.498572734606125, "learning_rate": 8.383354717939163e-07, "loss": 0.9985, "step": 10446 }, { "epoch": 1.4790118213350323, "grad_norm": 10.236644697034293, "learning_rate": 8.379072546332498e-07, "loss": 1.0653, "step": 10447 }, { "epoch": 1.4791533942096695, "grad_norm": 7.886642085359799, "learning_rate": 8.374791248475597e-07, "loss": 0.9279, "step": 10448 }, { "epoch": 1.4792949670843067, "grad_norm": 9.693318250277965, "learning_rate": 8.370510824593517e-07, "loss": 1.0287, "step": 10449 }, { "epoch": 1.479436539958944, "grad_norm": 8.755374674641821, "learning_rate": 8.366231274911291e-07, "loss": 0.9344, "step": 10450 }, { "epoch": 1.4795781128335812, "grad_norm": 11.543711640645864, "learning_rate": 8.361952599653875e-07, "loss": 1.011, "step": 10451 }, { "epoch": 1.4797196857082184, "grad_norm": 10.669356801632743, "learning_rate": 8.357674799046206e-07, "loss": 0.9875, "step": 10452 }, { "epoch": 1.4798612585828554, "grad_norm": 9.431236361120233, "learning_rate": 8.353397873313171e-07, "loss": 0.9505, "step": 10453 }, { "epoch": 1.4800028314574927, "grad_norm": 10.430918554453795, "learning_rate": 8.34912182267959e-07, "loss": 1.0267, "step": 10454 }, { "epoch": 1.48014440433213, "grad_norm": 9.047636062682557, "learning_rate": 8.34484664737027e-07, "loss": 0.9372, "step": 10455 }, { "epoch": 1.4802859772067671, "grad_norm": 10.109326700039178, "learning_rate": 8.340572347609932e-07, "loss": 1.065, "step": 10456 }, { "epoch": 1.4804275500814044, "grad_norm": 8.207386820742213, "learning_rate": 8.336298923623301e-07, "loss": 1.0411, "step": 10457 }, { "epoch": 1.4805691229560416, "grad_norm": 8.445042382648328, "learning_rate": 8.332026375634994e-07, "loss": 0.9299, "step": 10458 }, { "epoch": 1.4807106958306788, "grad_norm": 8.25118131560732, "learning_rate": 8.327754703869631e-07, "loss": 0.9254, "step": 10459 }, { "epoch": 1.480852268705316, "grad_norm": 8.641682289376725, "learning_rate": 8.323483908551783e-07, "loss": 0.967, "step": 10460 }, { "epoch": 1.4809938415799533, "grad_norm": 9.73434796031056, "learning_rate": 8.319213989905942e-07, "loss": 1.0437, "step": 10461 }, { "epoch": 1.4811354144545905, "grad_norm": 9.443567611781114, "learning_rate": 8.314944948156589e-07, "loss": 1.0333, "step": 10462 }, { "epoch": 1.4812769873292277, "grad_norm": 9.251327007062368, "learning_rate": 8.310676783528135e-07, "loss": 1.0128, "step": 10463 }, { "epoch": 1.481418560203865, "grad_norm": 8.582567298063049, "learning_rate": 8.306409496244966e-07, "loss": 0.9647, "step": 10464 }, { "epoch": 1.4815601330785022, "grad_norm": 8.23330247180833, "learning_rate": 8.302143086531392e-07, "loss": 0.991, "step": 10465 }, { "epoch": 1.4817017059531394, "grad_norm": 9.524105208211942, "learning_rate": 8.297877554611717e-07, "loss": 1.0912, "step": 10466 }, { "epoch": 1.4818432788277767, "grad_norm": 10.355008639226575, "learning_rate": 8.293612900710155e-07, "loss": 1.0355, "step": 10467 }, { "epoch": 1.481984851702414, "grad_norm": 9.605787249447108, "learning_rate": 8.289349125050914e-07, "loss": 0.9288, "step": 10468 }, { "epoch": 1.482126424577051, "grad_norm": 10.113531705570034, "learning_rate": 8.28508622785813e-07, "loss": 0.9754, "step": 10469 }, { "epoch": 1.4822679974516881, "grad_norm": 8.47260807458065, "learning_rate": 8.280824209355892e-07, "loss": 1.0291, "step": 10470 }, { "epoch": 1.4824095703263254, "grad_norm": 10.287273833707719, "learning_rate": 8.276563069768267e-07, "loss": 1.0163, "step": 10471 }, { "epoch": 1.4825511432009626, "grad_norm": 8.29844038684402, "learning_rate": 8.272302809319243e-07, "loss": 0.9279, "step": 10472 }, { "epoch": 1.4826927160755998, "grad_norm": 9.911695457798556, "learning_rate": 8.268043428232798e-07, "loss": 0.959, "step": 10473 }, { "epoch": 1.482834288950237, "grad_norm": 9.968109397275628, "learning_rate": 8.263784926732824e-07, "loss": 0.9986, "step": 10474 }, { "epoch": 1.4829758618248743, "grad_norm": 9.02579447031188, "learning_rate": 8.259527305043197e-07, "loss": 0.9676, "step": 10475 }, { "epoch": 1.4831174346995115, "grad_norm": 11.02156346191499, "learning_rate": 8.255270563387746e-07, "loss": 1.0002, "step": 10476 }, { "epoch": 1.4832590075741487, "grad_norm": 7.940016220037771, "learning_rate": 8.251014701990229e-07, "loss": 0.9783, "step": 10477 }, { "epoch": 1.483400580448786, "grad_norm": 8.306911339419518, "learning_rate": 8.246759721074388e-07, "loss": 0.8937, "step": 10478 }, { "epoch": 1.4835421533234232, "grad_norm": 9.11047084491868, "learning_rate": 8.242505620863894e-07, "loss": 1.0967, "step": 10479 }, { "epoch": 1.4836837261980604, "grad_norm": 9.764572599543609, "learning_rate": 8.238252401582389e-07, "loss": 0.935, "step": 10480 }, { "epoch": 1.4838252990726977, "grad_norm": 9.000075445918583, "learning_rate": 8.234000063453446e-07, "loss": 0.938, "step": 10481 }, { "epoch": 1.483966871947335, "grad_norm": 9.416395020294265, "learning_rate": 8.229748606700619e-07, "loss": 1.0043, "step": 10482 }, { "epoch": 1.4841084448219721, "grad_norm": 9.652604189500373, "learning_rate": 8.225498031547413e-07, "loss": 1.0149, "step": 10483 }, { "epoch": 1.4842500176966094, "grad_norm": 10.17565229044963, "learning_rate": 8.221248338217258e-07, "loss": 0.9503, "step": 10484 }, { "epoch": 1.4843915905712466, "grad_norm": 10.02510086731811, "learning_rate": 8.216999526933578e-07, "loss": 0.9297, "step": 10485 }, { "epoch": 1.4845331634458838, "grad_norm": 7.596432059150985, "learning_rate": 8.212751597919708e-07, "loss": 0.9475, "step": 10486 }, { "epoch": 1.484674736320521, "grad_norm": 9.267256851978738, "learning_rate": 8.208504551398977e-07, "loss": 1.0078, "step": 10487 }, { "epoch": 1.4848163091951583, "grad_norm": 7.784323701877092, "learning_rate": 8.204258387594635e-07, "loss": 0.8927, "step": 10488 }, { "epoch": 1.4849578820697955, "grad_norm": 8.506220616970284, "learning_rate": 8.200013106729915e-07, "loss": 1.0293, "step": 10489 }, { "epoch": 1.4850994549444327, "grad_norm": 10.65182913532801, "learning_rate": 8.195768709027979e-07, "loss": 0.9533, "step": 10490 }, { "epoch": 1.48524102781907, "grad_norm": 10.226723642733472, "learning_rate": 8.191525194711941e-07, "loss": 1.0558, "step": 10491 }, { "epoch": 1.4853826006937072, "grad_norm": 9.901512384972593, "learning_rate": 8.187282564004903e-07, "loss": 1.0268, "step": 10492 }, { "epoch": 1.4855241735683444, "grad_norm": 11.162133468641976, "learning_rate": 8.183040817129873e-07, "loss": 1.1875, "step": 10493 }, { "epoch": 1.4856657464429817, "grad_norm": 8.635739040147708, "learning_rate": 8.178799954309857e-07, "loss": 0.8267, "step": 10494 }, { "epoch": 1.4858073193176187, "grad_norm": 10.083787093448379, "learning_rate": 8.174559975767779e-07, "loss": 1.0018, "step": 10495 }, { "epoch": 1.485948892192256, "grad_norm": 9.367048629991258, "learning_rate": 8.170320881726542e-07, "loss": 0.9774, "step": 10496 }, { "epoch": 1.4860904650668931, "grad_norm": 34.758436756868825, "learning_rate": 8.166082672408985e-07, "loss": 1.1055, "step": 10497 }, { "epoch": 1.4862320379415304, "grad_norm": 9.108993496111797, "learning_rate": 8.161845348037906e-07, "loss": 0.986, "step": 10498 }, { "epoch": 1.4863736108161676, "grad_norm": 8.839085511421567, "learning_rate": 8.157608908836071e-07, "loss": 0.9973, "step": 10499 }, { "epoch": 1.4865151836908048, "grad_norm": 9.342254008585803, "learning_rate": 8.153373355026176e-07, "loss": 0.9969, "step": 10500 }, { "epoch": 1.486656756565442, "grad_norm": 9.060120579336093, "learning_rate": 8.149138686830882e-07, "loss": 0.9729, "step": 10501 }, { "epoch": 1.4867983294400793, "grad_norm": 10.294825718994598, "learning_rate": 8.144904904472798e-07, "loss": 0.9956, "step": 10502 }, { "epoch": 1.4869399023147165, "grad_norm": 7.886663367732119, "learning_rate": 8.1406720081745e-07, "loss": 0.9346, "step": 10503 }, { "epoch": 1.4870814751893537, "grad_norm": 8.975052058246915, "learning_rate": 8.136439998158499e-07, "loss": 0.9793, "step": 10504 }, { "epoch": 1.487223048063991, "grad_norm": 9.149411925715542, "learning_rate": 8.132208874647271e-07, "loss": 0.9917, "step": 10505 }, { "epoch": 1.4873646209386282, "grad_norm": 9.74097044071858, "learning_rate": 8.127978637863254e-07, "loss": 0.9173, "step": 10506 }, { "epoch": 1.4875061938132654, "grad_norm": 7.997345722471696, "learning_rate": 8.12374928802881e-07, "loss": 1.0369, "step": 10507 }, { "epoch": 1.4876477666879027, "grad_norm": 9.76863078742039, "learning_rate": 8.119520825366292e-07, "loss": 0.9024, "step": 10508 }, { "epoch": 1.48778933956254, "grad_norm": 9.611052079898409, "learning_rate": 8.115293250097969e-07, "loss": 0.9708, "step": 10509 }, { "epoch": 1.487930912437177, "grad_norm": 8.614166975434367, "learning_rate": 8.111066562446098e-07, "loss": 0.9121, "step": 10510 }, { "epoch": 1.4880724853118141, "grad_norm": 7.960165987824056, "learning_rate": 8.106840762632867e-07, "loss": 0.9867, "step": 10511 }, { "epoch": 1.4882140581864514, "grad_norm": 10.090476624315592, "learning_rate": 8.102615850880413e-07, "loss": 1.0913, "step": 10512 }, { "epoch": 1.4883556310610886, "grad_norm": 8.95052514267123, "learning_rate": 8.098391827410851e-07, "loss": 1.0337, "step": 10513 }, { "epoch": 1.4884972039357258, "grad_norm": 9.28909863849152, "learning_rate": 8.09416869244622e-07, "loss": 0.8979, "step": 10514 }, { "epoch": 1.488638776810363, "grad_norm": 8.903141590235508, "learning_rate": 8.089946446208543e-07, "loss": 1.0415, "step": 10515 }, { "epoch": 1.4887803496850003, "grad_norm": 9.062771497145487, "learning_rate": 8.085725088919766e-07, "loss": 0.9487, "step": 10516 }, { "epoch": 1.4889219225596375, "grad_norm": 9.606630705696196, "learning_rate": 8.081504620801816e-07, "loss": 1.0539, "step": 10517 }, { "epoch": 1.4890634954342747, "grad_norm": 12.114120164033078, "learning_rate": 8.077285042076546e-07, "loss": 0.9501, "step": 10518 }, { "epoch": 1.489205068308912, "grad_norm": 9.120058576830756, "learning_rate": 8.073066352965792e-07, "loss": 1.0354, "step": 10519 }, { "epoch": 1.4893466411835492, "grad_norm": 9.311548786098822, "learning_rate": 8.068848553691308e-07, "loss": 0.9304, "step": 10520 }, { "epoch": 1.4894882140581864, "grad_norm": 8.833995818240986, "learning_rate": 8.06463164447484e-07, "loss": 1.0128, "step": 10521 }, { "epoch": 1.4896297869328237, "grad_norm": 7.5269210847214945, "learning_rate": 8.060415625538059e-07, "loss": 0.9604, "step": 10522 }, { "epoch": 1.489771359807461, "grad_norm": 8.058759427829138, "learning_rate": 8.056200497102592e-07, "loss": 0.9084, "step": 10523 }, { "epoch": 1.4899129326820981, "grad_norm": 9.63302990744625, "learning_rate": 8.051986259390038e-07, "loss": 0.9889, "step": 10524 }, { "epoch": 1.4900545055567354, "grad_norm": 8.473751604074703, "learning_rate": 8.047772912621921e-07, "loss": 0.9771, "step": 10525 }, { "epoch": 1.4901960784313726, "grad_norm": 10.509447116700786, "learning_rate": 8.04356045701975e-07, "loss": 1.0418, "step": 10526 }, { "epoch": 1.4903376513060098, "grad_norm": 11.31912694900518, "learning_rate": 8.039348892804955e-07, "loss": 1.0416, "step": 10527 }, { "epoch": 1.490479224180647, "grad_norm": 10.522581613649663, "learning_rate": 8.03513822019894e-07, "loss": 1.0017, "step": 10528 }, { "epoch": 1.4906207970552843, "grad_norm": 9.495608469192236, "learning_rate": 8.030928439423069e-07, "loss": 1.0205, "step": 10529 }, { "epoch": 1.4907623699299215, "grad_norm": 9.48328012497316, "learning_rate": 8.026719550698628e-07, "loss": 1.0036, "step": 10530 }, { "epoch": 1.4909039428045587, "grad_norm": 9.327767165587387, "learning_rate": 8.022511554246895e-07, "loss": 0.9552, "step": 10531 }, { "epoch": 1.491045515679196, "grad_norm": 10.089853579892617, "learning_rate": 8.018304450289069e-07, "loss": 1.0296, "step": 10532 }, { "epoch": 1.4911870885538332, "grad_norm": 9.28913067021874, "learning_rate": 8.014098239046309e-07, "loss": 1.0625, "step": 10533 }, { "epoch": 1.4913286614284704, "grad_norm": 8.174666968978672, "learning_rate": 8.009892920739745e-07, "loss": 1.0985, "step": 10534 }, { "epoch": 1.4914702343031077, "grad_norm": 8.6892461325509, "learning_rate": 8.005688495590435e-07, "loss": 1.1311, "step": 10535 }, { "epoch": 1.4916118071777447, "grad_norm": 11.89184275342832, "learning_rate": 8.001484963819417e-07, "loss": 1.1134, "step": 10536 }, { "epoch": 1.491753380052382, "grad_norm": 10.364332482962075, "learning_rate": 7.997282325647654e-07, "loss": 0.9262, "step": 10537 }, { "epoch": 1.4918949529270191, "grad_norm": 9.775238237136394, "learning_rate": 7.993080581296087e-07, "loss": 1.026, "step": 10538 }, { "epoch": 1.4920365258016564, "grad_norm": 9.107303966304626, "learning_rate": 7.988879730985585e-07, "loss": 0.9686, "step": 10539 }, { "epoch": 1.4921780986762936, "grad_norm": 10.19158366916285, "learning_rate": 7.984679774937002e-07, "loss": 0.9419, "step": 10540 }, { "epoch": 1.4923196715509308, "grad_norm": 7.7921620039812485, "learning_rate": 7.980480713371106e-07, "loss": 0.9005, "step": 10541 }, { "epoch": 1.492461244425568, "grad_norm": 7.4057222975077, "learning_rate": 7.976282546508654e-07, "loss": 0.9272, "step": 10542 }, { "epoch": 1.4926028173002053, "grad_norm": 9.853321084861042, "learning_rate": 7.972085274570341e-07, "loss": 1.057, "step": 10543 }, { "epoch": 1.4927443901748425, "grad_norm": 8.176753555418843, "learning_rate": 7.967888897776796e-07, "loss": 0.9628, "step": 10544 }, { "epoch": 1.4928859630494797, "grad_norm": 9.019790083986416, "learning_rate": 7.963693416348642e-07, "loss": 0.958, "step": 10545 }, { "epoch": 1.493027535924117, "grad_norm": 7.90393756325961, "learning_rate": 7.959498830506412e-07, "loss": 0.9272, "step": 10546 }, { "epoch": 1.4931691087987542, "grad_norm": 9.597362919052296, "learning_rate": 7.955305140470635e-07, "loss": 0.9645, "step": 10547 }, { "epoch": 1.4933106816733914, "grad_norm": 11.705692595303457, "learning_rate": 7.951112346461745e-07, "loss": 1.0654, "step": 10548 }, { "epoch": 1.4934522545480287, "grad_norm": 8.241427967817863, "learning_rate": 7.946920448700168e-07, "loss": 0.9933, "step": 10549 }, { "epoch": 1.4935938274226659, "grad_norm": 8.614809069430057, "learning_rate": 7.942729447406278e-07, "loss": 0.9449, "step": 10550 }, { "epoch": 1.4937354002973031, "grad_norm": 8.402881727460144, "learning_rate": 7.938539342800373e-07, "loss": 1.0034, "step": 10551 }, { "epoch": 1.4938769731719401, "grad_norm": 10.174138018113432, "learning_rate": 7.934350135102742e-07, "loss": 0.9788, "step": 10552 }, { "epoch": 1.4940185460465774, "grad_norm": 7.963633132386017, "learning_rate": 7.930161824533597e-07, "loss": 0.9677, "step": 10553 }, { "epoch": 1.4941601189212146, "grad_norm": 10.01485331831199, "learning_rate": 7.92597441131312e-07, "loss": 1.0832, "step": 10554 }, { "epoch": 1.4943016917958518, "grad_norm": 9.011639591790399, "learning_rate": 7.921787895661429e-07, "loss": 0.9665, "step": 10555 }, { "epoch": 1.494443264670489, "grad_norm": 9.758059960458048, "learning_rate": 7.917602277798613e-07, "loss": 1.0582, "step": 10556 }, { "epoch": 1.4945848375451263, "grad_norm": 9.319824205959062, "learning_rate": 7.913417557944716e-07, "loss": 0.9557, "step": 10557 }, { "epoch": 1.4947264104197635, "grad_norm": 8.669997773307681, "learning_rate": 7.909233736319711e-07, "loss": 0.9677, "step": 10558 }, { "epoch": 1.4948679832944007, "grad_norm": 8.51559005870125, "learning_rate": 7.905050813143553e-07, "loss": 0.9711, "step": 10559 }, { "epoch": 1.495009556169038, "grad_norm": 9.80799597130548, "learning_rate": 7.900868788636118e-07, "loss": 0.9909, "step": 10560 }, { "epoch": 1.4951511290436752, "grad_norm": 8.373345510550875, "learning_rate": 7.896687663017269e-07, "loss": 0.9807, "step": 10561 }, { "epoch": 1.4952927019183124, "grad_norm": 9.46221476720864, "learning_rate": 7.892507436506791e-07, "loss": 1.0845, "step": 10562 }, { "epoch": 1.4954342747929497, "grad_norm": 10.804681851046968, "learning_rate": 7.888328109324448e-07, "loss": 0.9872, "step": 10563 }, { "epoch": 1.4955758476675869, "grad_norm": 9.036643137219325, "learning_rate": 7.884149681689937e-07, "loss": 0.9546, "step": 10564 }, { "epoch": 1.4957174205422241, "grad_norm": 10.05867073696266, "learning_rate": 7.879972153822907e-07, "loss": 0.9444, "step": 10565 }, { "epoch": 1.4958589934168613, "grad_norm": 10.47366081180698, "learning_rate": 7.875795525942984e-07, "loss": 1.0694, "step": 10566 }, { "epoch": 1.4960005662914986, "grad_norm": 6.953094002300989, "learning_rate": 7.871619798269711e-07, "loss": 0.8915, "step": 10567 }, { "epoch": 1.4961421391661358, "grad_norm": 9.371419400865275, "learning_rate": 7.867444971022626e-07, "loss": 0.93, "step": 10568 }, { "epoch": 1.496283712040773, "grad_norm": 9.244213872055573, "learning_rate": 7.863271044421175e-07, "loss": 0.9873, "step": 10569 }, { "epoch": 1.4964252849154103, "grad_norm": 7.860112968972505, "learning_rate": 7.859098018684793e-07, "loss": 1.0137, "step": 10570 }, { "epoch": 1.4965668577900475, "grad_norm": 9.585537554691564, "learning_rate": 7.854925894032842e-07, "loss": 0.8976, "step": 10571 }, { "epoch": 1.4967084306646847, "grad_norm": 9.296981682686221, "learning_rate": 7.850754670684654e-07, "loss": 0.9488, "step": 10572 }, { "epoch": 1.496850003539322, "grad_norm": 10.172228641485994, "learning_rate": 7.846584348859512e-07, "loss": 0.9739, "step": 10573 }, { "epoch": 1.4969915764139592, "grad_norm": 9.524886211206189, "learning_rate": 7.84241492877664e-07, "loss": 0.9419, "step": 10574 }, { "epoch": 1.4971331492885964, "grad_norm": 9.80198815867439, "learning_rate": 7.838246410655223e-07, "loss": 1.0354, "step": 10575 }, { "epoch": 1.4972747221632337, "grad_norm": 9.859460893791857, "learning_rate": 7.834078794714389e-07, "loss": 1.087, "step": 10576 }, { "epoch": 1.4974162950378709, "grad_norm": 7.8889622513250774, "learning_rate": 7.829912081173238e-07, "loss": 0.952, "step": 10577 }, { "epoch": 1.497557867912508, "grad_norm": 10.101764532371583, "learning_rate": 7.825746270250803e-07, "loss": 1.1303, "step": 10578 }, { "epoch": 1.4976994407871451, "grad_norm": 9.102801769841808, "learning_rate": 7.821581362166078e-07, "loss": 0.8476, "step": 10579 }, { "epoch": 1.4978410136617824, "grad_norm": 9.779454660637915, "learning_rate": 7.817417357138018e-07, "loss": 1.1036, "step": 10580 }, { "epoch": 1.4979825865364196, "grad_norm": 9.472504831264025, "learning_rate": 7.813254255385511e-07, "loss": 1.011, "step": 10581 }, { "epoch": 1.4981241594110568, "grad_norm": 8.855228806161513, "learning_rate": 7.809092057127421e-07, "loss": 1.0684, "step": 10582 }, { "epoch": 1.498265732285694, "grad_norm": 9.296929572450125, "learning_rate": 7.804930762582533e-07, "loss": 0.9821, "step": 10583 }, { "epoch": 1.4984073051603313, "grad_norm": 9.094597740903534, "learning_rate": 7.800770371969624e-07, "loss": 0.933, "step": 10584 }, { "epoch": 1.4985488780349685, "grad_norm": 6.545595030288469, "learning_rate": 7.796610885507391e-07, "loss": 0.9781, "step": 10585 }, { "epoch": 1.4986904509096057, "grad_norm": 8.14870668224484, "learning_rate": 7.792452303414489e-07, "loss": 0.9452, "step": 10586 }, { "epoch": 1.498832023784243, "grad_norm": 8.311998854136824, "learning_rate": 7.788294625909546e-07, "loss": 1.038, "step": 10587 }, { "epoch": 1.4989735966588802, "grad_norm": 8.353303048134004, "learning_rate": 7.784137853211113e-07, "loss": 0.9699, "step": 10588 }, { "epoch": 1.4991151695335174, "grad_norm": 9.462880748511358, "learning_rate": 7.779981985537724e-07, "loss": 0.935, "step": 10589 }, { "epoch": 1.4992567424081547, "grad_norm": 10.067726057615562, "learning_rate": 7.775827023107835e-07, "loss": 1.0534, "step": 10590 }, { "epoch": 1.4993983152827919, "grad_norm": 10.209726625195724, "learning_rate": 7.771672966139885e-07, "loss": 0.9276, "step": 10591 }, { "epoch": 1.4995398881574291, "grad_norm": 10.919600657891845, "learning_rate": 7.767519814852234e-07, "loss": 1.121, "step": 10592 }, { "epoch": 1.4996814610320661, "grad_norm": 9.45806422810033, "learning_rate": 7.763367569463224e-07, "loss": 1.0609, "step": 10593 }, { "epoch": 1.4998230339067034, "grad_norm": 8.325444976553337, "learning_rate": 7.759216230191119e-07, "loss": 0.9622, "step": 10594 }, { "epoch": 1.4999646067813406, "grad_norm": 10.491790104978922, "learning_rate": 7.755065797254172e-07, "loss": 1.0115, "step": 10595 }, { "epoch": 1.5001061796559778, "grad_norm": 10.923172465740372, "learning_rate": 7.750916270870556e-07, "loss": 1.0427, "step": 10596 }, { "epoch": 1.500247752530615, "grad_norm": 9.535002223126092, "learning_rate": 7.746767651258405e-07, "loss": 0.857, "step": 10597 }, { "epoch": 1.5003893254052523, "grad_norm": 8.473916817821117, "learning_rate": 7.74261993863582e-07, "loss": 0.9803, "step": 10598 }, { "epoch": 1.5005308982798895, "grad_norm": 7.635910732908503, "learning_rate": 7.738473133220828e-07, "loss": 0.8751, "step": 10599 }, { "epoch": 1.5006724711545267, "grad_norm": 7.6346032375925725, "learning_rate": 7.734327235231443e-07, "loss": 0.967, "step": 10600 }, { "epoch": 1.500814044029164, "grad_norm": 9.180900019654851, "learning_rate": 7.730182244885593e-07, "loss": 1.0112, "step": 10601 }, { "epoch": 1.5009556169038012, "grad_norm": 9.320557660622054, "learning_rate": 7.726038162401184e-07, "loss": 0.8991, "step": 10602 }, { "epoch": 1.5010971897784384, "grad_norm": 9.565536914358804, "learning_rate": 7.721894987996076e-07, "loss": 1.0257, "step": 10603 }, { "epoch": 1.5012387626530757, "grad_norm": 9.735909085453441, "learning_rate": 7.717752721888058e-07, "loss": 1.0535, "step": 10604 }, { "epoch": 1.5013803355277129, "grad_norm": 9.717706654959077, "learning_rate": 7.713611364294904e-07, "loss": 0.9466, "step": 10605 }, { "epoch": 1.5015219084023501, "grad_norm": 8.685568615401731, "learning_rate": 7.709470915434309e-07, "loss": 0.983, "step": 10606 }, { "epoch": 1.5016634812769873, "grad_norm": 12.07347121897388, "learning_rate": 7.705331375523928e-07, "loss": 1.0591, "step": 10607 }, { "epoch": 1.5018050541516246, "grad_norm": 10.532883152415648, "learning_rate": 7.701192744781389e-07, "loss": 1.1105, "step": 10608 }, { "epoch": 1.5019466270262618, "grad_norm": 7.991414469961678, "learning_rate": 7.69705502342424e-07, "loss": 0.9266, "step": 10609 }, { "epoch": 1.502088199900899, "grad_norm": 9.605052540291377, "learning_rate": 7.692918211670017e-07, "loss": 1.0391, "step": 10610 }, { "epoch": 1.5022297727755363, "grad_norm": 10.240100825528632, "learning_rate": 7.68878230973617e-07, "loss": 0.9847, "step": 10611 }, { "epoch": 1.5023713456501735, "grad_norm": 8.287339760452433, "learning_rate": 7.684647317840138e-07, "loss": 0.9959, "step": 10612 }, { "epoch": 1.5025129185248107, "grad_norm": 9.537718332173933, "learning_rate": 7.680513236199275e-07, "loss": 0.9851, "step": 10613 }, { "epoch": 1.502654491399448, "grad_norm": 10.588304048202787, "learning_rate": 7.676380065030928e-07, "loss": 0.972, "step": 10614 }, { "epoch": 1.5027960642740852, "grad_norm": 8.527191650040677, "learning_rate": 7.672247804552355e-07, "loss": 0.947, "step": 10615 }, { "epoch": 1.5029376371487224, "grad_norm": 9.449708027845276, "learning_rate": 7.668116454980804e-07, "loss": 1.0377, "step": 10616 }, { "epoch": 1.5030792100233596, "grad_norm": 10.406714443347195, "learning_rate": 7.663986016533446e-07, "loss": 1.0094, "step": 10617 }, { "epoch": 1.5032207828979969, "grad_norm": 9.080247220610369, "learning_rate": 7.659856489427409e-07, "loss": 1.0919, "step": 10618 }, { "epoch": 1.503362355772634, "grad_norm": 10.919145802495374, "learning_rate": 7.655727873879792e-07, "loss": 0.9483, "step": 10619 }, { "epoch": 1.5035039286472713, "grad_norm": 9.288801723762807, "learning_rate": 7.651600170107626e-07, "loss": 0.8591, "step": 10620 }, { "epoch": 1.5036455015219086, "grad_norm": 7.1614632753297345, "learning_rate": 7.647473378327908e-07, "loss": 0.897, "step": 10621 }, { "epoch": 1.5037870743965456, "grad_norm": 10.625561508600233, "learning_rate": 7.64334749875757e-07, "loss": 0.9727, "step": 10622 }, { "epoch": 1.5039286472711828, "grad_norm": 9.454243945269244, "learning_rate": 7.639222531613519e-07, "loss": 0.8509, "step": 10623 }, { "epoch": 1.50407022014582, "grad_norm": 9.409400995143226, "learning_rate": 7.635098477112588e-07, "loss": 0.899, "step": 10624 }, { "epoch": 1.5042117930204573, "grad_norm": 8.993203033717412, "learning_rate": 7.63097533547158e-07, "loss": 1.0293, "step": 10625 }, { "epoch": 1.5043533658950945, "grad_norm": 9.665854803610111, "learning_rate": 7.626853106907256e-07, "loss": 1.0635, "step": 10626 }, { "epoch": 1.5044949387697317, "grad_norm": 8.486619627832953, "learning_rate": 7.622731791636312e-07, "loss": 1.0205, "step": 10627 }, { "epoch": 1.504636511644369, "grad_norm": 9.444150488772923, "learning_rate": 7.6186113898754e-07, "loss": 0.9947, "step": 10628 }, { "epoch": 1.5047780845190062, "grad_norm": 8.394150431974005, "learning_rate": 7.614491901841118e-07, "loss": 1.0009, "step": 10629 }, { "epoch": 1.5049196573936434, "grad_norm": 8.613999137458555, "learning_rate": 7.610373327750034e-07, "loss": 0.9792, "step": 10630 }, { "epoch": 1.5050612302682806, "grad_norm": 8.341154573507232, "learning_rate": 7.606255667818668e-07, "loss": 0.9266, "step": 10631 }, { "epoch": 1.5052028031429177, "grad_norm": 11.444909202578824, "learning_rate": 7.602138922263461e-07, "loss": 0.9699, "step": 10632 }, { "epoch": 1.5053443760175549, "grad_norm": 8.896201156647216, "learning_rate": 7.598023091300849e-07, "loss": 0.9945, "step": 10633 }, { "epoch": 1.5054859488921921, "grad_norm": 9.849415532310788, "learning_rate": 7.593908175147177e-07, "loss": 1.0149, "step": 10634 }, { "epoch": 1.5056275217668293, "grad_norm": 11.076198910818265, "learning_rate": 7.589794174018786e-07, "loss": 1.037, "step": 10635 }, { "epoch": 1.5057690946414666, "grad_norm": 11.036541326239883, "learning_rate": 7.585681088131921e-07, "loss": 1.0438, "step": 10636 }, { "epoch": 1.5059106675161038, "grad_norm": 10.58448661067335, "learning_rate": 7.581568917702828e-07, "loss": 0.9594, "step": 10637 }, { "epoch": 1.506052240390741, "grad_norm": 8.7547507331957, "learning_rate": 7.577457662947668e-07, "loss": 0.9471, "step": 10638 }, { "epoch": 1.5061938132653783, "grad_norm": 8.741977174357766, "learning_rate": 7.573347324082558e-07, "loss": 0.8496, "step": 10639 }, { "epoch": 1.5063353861400155, "grad_norm": 7.305835262434812, "learning_rate": 7.569237901323595e-07, "loss": 0.8473, "step": 10640 }, { "epoch": 1.5064769590146527, "grad_norm": 10.05253382383252, "learning_rate": 7.565129394886792e-07, "loss": 1.0053, "step": 10641 }, { "epoch": 1.50661853188929, "grad_norm": 7.576724208082395, "learning_rate": 7.561021804988141e-07, "loss": 0.9412, "step": 10642 }, { "epoch": 1.5067601047639272, "grad_norm": 8.092350547902427, "learning_rate": 7.556915131843568e-07, "loss": 0.9467, "step": 10643 }, { "epoch": 1.5069016776385644, "grad_norm": 8.158801852506103, "learning_rate": 7.552809375668965e-07, "loss": 0.9285, "step": 10644 }, { "epoch": 1.5070432505132016, "grad_norm": 8.818206887178262, "learning_rate": 7.548704536680157e-07, "loss": 0.9113, "step": 10645 }, { "epoch": 1.5071848233878389, "grad_norm": 8.522519517620518, "learning_rate": 7.544600615092937e-07, "loss": 0.9896, "step": 10646 }, { "epoch": 1.507326396262476, "grad_norm": 10.635755054696215, "learning_rate": 7.540497611123058e-07, "loss": 0.9882, "step": 10647 }, { "epoch": 1.5074679691371133, "grad_norm": 9.286451121848247, "learning_rate": 7.536395524986201e-07, "loss": 0.9327, "step": 10648 }, { "epoch": 1.5076095420117506, "grad_norm": 8.951233882135766, "learning_rate": 7.532294356898006e-07, "loss": 0.9544, "step": 10649 }, { "epoch": 1.5077511148863878, "grad_norm": 10.42202177616378, "learning_rate": 7.528194107074069e-07, "loss": 0.9747, "step": 10650 }, { "epoch": 1.507892687761025, "grad_norm": 9.093262125148497, "learning_rate": 7.524094775729948e-07, "loss": 0.9765, "step": 10651 }, { "epoch": 1.5080342606356623, "grad_norm": 7.76767339880595, "learning_rate": 7.519996363081123e-07, "loss": 0.8716, "step": 10652 }, { "epoch": 1.5081758335102995, "grad_norm": 11.070592991109585, "learning_rate": 7.515898869343058e-07, "loss": 0.9953, "step": 10653 }, { "epoch": 1.5083174063849367, "grad_norm": 8.39990176188835, "learning_rate": 7.51180229473116e-07, "loss": 1.0624, "step": 10654 }, { "epoch": 1.508458979259574, "grad_norm": 9.487885229358978, "learning_rate": 7.507706639460768e-07, "loss": 0.884, "step": 10655 }, { "epoch": 1.5086005521342112, "grad_norm": 10.065443283757311, "learning_rate": 7.503611903747202e-07, "loss": 1.0184, "step": 10656 }, { "epoch": 1.5087421250088484, "grad_norm": 10.210285192199809, "learning_rate": 7.499518087805704e-07, "loss": 1.0307, "step": 10657 }, { "epoch": 1.5088836978834856, "grad_norm": 8.833630490852885, "learning_rate": 7.495425191851502e-07, "loss": 0.9901, "step": 10658 }, { "epoch": 1.5090252707581229, "grad_norm": 9.399595828193808, "learning_rate": 7.491333216099744e-07, "loss": 0.9351, "step": 10659 }, { "epoch": 1.50916684363276, "grad_norm": 9.980419443982905, "learning_rate": 7.487242160765535e-07, "loss": 1.0136, "step": 10660 }, { "epoch": 1.5093084165073973, "grad_norm": 10.219024374541851, "learning_rate": 7.483152026063955e-07, "loss": 0.9687, "step": 10661 }, { "epoch": 1.5094499893820346, "grad_norm": 9.34018029245195, "learning_rate": 7.479062812210005e-07, "loss": 0.9202, "step": 10662 }, { "epoch": 1.5095915622566716, "grad_norm": 7.183639418356739, "learning_rate": 7.474974519418668e-07, "loss": 0.9223, "step": 10663 }, { "epoch": 1.5097331351313088, "grad_norm": 9.708559047274424, "learning_rate": 7.470887147904845e-07, "loss": 1.005, "step": 10664 }, { "epoch": 1.509874708005946, "grad_norm": 10.298688297635552, "learning_rate": 7.466800697883422e-07, "loss": 0.9181, "step": 10665 }, { "epoch": 1.5100162808805833, "grad_norm": 9.09271842637545, "learning_rate": 7.462715169569204e-07, "loss": 0.9792, "step": 10666 }, { "epoch": 1.5101578537552205, "grad_norm": 9.169464470421183, "learning_rate": 7.458630563176983e-07, "loss": 0.9206, "step": 10667 }, { "epoch": 1.5102994266298577, "grad_norm": 8.237917751434189, "learning_rate": 7.454546878921465e-07, "loss": 0.9241, "step": 10668 }, { "epoch": 1.510440999504495, "grad_norm": 8.384871856492781, "learning_rate": 7.450464117017342e-07, "loss": 0.8389, "step": 10669 }, { "epoch": 1.5105825723791322, "grad_norm": 8.570435348367484, "learning_rate": 7.446382277679235e-07, "loss": 0.9227, "step": 10670 }, { "epoch": 1.5107241452537694, "grad_norm": 9.092783873319245, "learning_rate": 7.442301361121718e-07, "loss": 0.9956, "step": 10671 }, { "epoch": 1.5108657181284066, "grad_norm": 11.120334289795288, "learning_rate": 7.438221367559331e-07, "loss": 1.0708, "step": 10672 }, { "epoch": 1.5110072910030437, "grad_norm": 9.095335936798742, "learning_rate": 7.434142297206546e-07, "loss": 0.9131, "step": 10673 }, { "epoch": 1.5111488638776809, "grad_norm": 8.559528809885835, "learning_rate": 7.43006415027781e-07, "loss": 0.9403, "step": 10674 }, { "epoch": 1.5112904367523181, "grad_norm": 10.066337659229841, "learning_rate": 7.425986926987494e-07, "loss": 0.9394, "step": 10675 }, { "epoch": 1.5114320096269553, "grad_norm": 9.888729160842859, "learning_rate": 7.421910627549942e-07, "loss": 1.0814, "step": 10676 }, { "epoch": 1.5115735825015926, "grad_norm": 12.001372576734127, "learning_rate": 7.417835252179447e-07, "loss": 0.9898, "step": 10677 }, { "epoch": 1.5117151553762298, "grad_norm": 9.062702045161531, "learning_rate": 7.413760801090239e-07, "loss": 0.9495, "step": 10678 }, { "epoch": 1.511856728250867, "grad_norm": 8.937772306548815, "learning_rate": 7.409687274496516e-07, "loss": 0.8965, "step": 10679 }, { "epoch": 1.5119983011255043, "grad_norm": 8.748239830633208, "learning_rate": 7.405614672612421e-07, "loss": 0.9959, "step": 10680 }, { "epoch": 1.5121398740001415, "grad_norm": 10.523443662419316, "learning_rate": 7.401542995652033e-07, "loss": 0.9809, "step": 10681 }, { "epoch": 1.5122814468747787, "grad_norm": 9.450849174008825, "learning_rate": 7.397472243829418e-07, "loss": 0.9253, "step": 10682 }, { "epoch": 1.512423019749416, "grad_norm": 9.583970244983979, "learning_rate": 7.393402417358552e-07, "loss": 0.9436, "step": 10683 }, { "epoch": 1.5125645926240532, "grad_norm": 9.031679136646947, "learning_rate": 7.389333516453403e-07, "loss": 0.9714, "step": 10684 }, { "epoch": 1.5127061654986904, "grad_norm": 10.532465923986718, "learning_rate": 7.385265541327852e-07, "loss": 1.1005, "step": 10685 }, { "epoch": 1.5128477383733276, "grad_norm": 8.36445431987861, "learning_rate": 7.381198492195762e-07, "loss": 0.8993, "step": 10686 }, { "epoch": 1.5129893112479649, "grad_norm": 9.339066061830088, "learning_rate": 7.377132369270926e-07, "loss": 0.9379, "step": 10687 }, { "epoch": 1.513130884122602, "grad_norm": 11.927612044208532, "learning_rate": 7.373067172767107e-07, "loss": 0.9677, "step": 10688 }, { "epoch": 1.5132724569972393, "grad_norm": 10.170550704044258, "learning_rate": 7.369002902897998e-07, "loss": 0.9955, "step": 10689 }, { "epoch": 1.5134140298718766, "grad_norm": 11.74118945359269, "learning_rate": 7.364939559877265e-07, "loss": 0.9679, "step": 10690 }, { "epoch": 1.5135556027465138, "grad_norm": 7.227276938924113, "learning_rate": 7.360877143918512e-07, "loss": 0.9004, "step": 10691 }, { "epoch": 1.513697175621151, "grad_norm": 11.401673953648759, "learning_rate": 7.356815655235286e-07, "loss": 1.0292, "step": 10692 }, { "epoch": 1.5138387484957883, "grad_norm": 10.500263029164586, "learning_rate": 7.352755094041114e-07, "loss": 0.9596, "step": 10693 }, { "epoch": 1.5139803213704255, "grad_norm": 8.70595034859308, "learning_rate": 7.348695460549443e-07, "loss": 0.966, "step": 10694 }, { "epoch": 1.5141218942450627, "grad_norm": 8.879298928569227, "learning_rate": 7.344636754973695e-07, "loss": 0.9694, "step": 10695 }, { "epoch": 1.5142634671197, "grad_norm": 10.128686681118996, "learning_rate": 7.340578977527221e-07, "loss": 0.8536, "step": 10696 }, { "epoch": 1.5144050399943372, "grad_norm": 8.513803381712602, "learning_rate": 7.336522128423351e-07, "loss": 0.9073, "step": 10697 }, { "epoch": 1.5145466128689744, "grad_norm": 8.521825258552576, "learning_rate": 7.332466207875336e-07, "loss": 0.8711, "step": 10698 }, { "epoch": 1.5146881857436116, "grad_norm": 11.942192713102356, "learning_rate": 7.328411216096399e-07, "loss": 1.0558, "step": 10699 }, { "epoch": 1.5148297586182489, "grad_norm": 10.242694763611176, "learning_rate": 7.324357153299714e-07, "loss": 1.0306, "step": 10700 }, { "epoch": 1.514971331492886, "grad_norm": 8.546016224566092, "learning_rate": 7.320304019698393e-07, "loss": 1.0671, "step": 10701 }, { "epoch": 1.5151129043675233, "grad_norm": 7.765835734219396, "learning_rate": 7.31625181550551e-07, "loss": 0.9858, "step": 10702 }, { "epoch": 1.5152544772421606, "grad_norm": 9.464564037086408, "learning_rate": 7.312200540934073e-07, "loss": 0.9316, "step": 10703 }, { "epoch": 1.5153960501167976, "grad_norm": 8.797088349433258, "learning_rate": 7.308150196197064e-07, "loss": 0.9041, "step": 10704 }, { "epoch": 1.5155376229914348, "grad_norm": 11.905298390243823, "learning_rate": 7.30410078150742e-07, "loss": 1.0085, "step": 10705 }, { "epoch": 1.515679195866072, "grad_norm": 10.310290660590486, "learning_rate": 7.300052297077992e-07, "loss": 0.946, "step": 10706 }, { "epoch": 1.5158207687407093, "grad_norm": 10.043875001652903, "learning_rate": 7.296004743121627e-07, "loss": 1.0319, "step": 10707 }, { "epoch": 1.5159623416153465, "grad_norm": 9.019138755535451, "learning_rate": 7.291958119851081e-07, "loss": 0.9939, "step": 10708 }, { "epoch": 1.5161039144899837, "grad_norm": 8.698082934101953, "learning_rate": 7.287912427479105e-07, "loss": 0.9575, "step": 10709 }, { "epoch": 1.516245487364621, "grad_norm": 9.394357456812424, "learning_rate": 7.283867666218355e-07, "loss": 1.0526, "step": 10710 }, { "epoch": 1.5163870602392582, "grad_norm": 9.700225937561768, "learning_rate": 7.27982383628148e-07, "loss": 0.9654, "step": 10711 }, { "epoch": 1.5165286331138954, "grad_norm": 9.557991510834869, "learning_rate": 7.275780937881055e-07, "loss": 0.8925, "step": 10712 }, { "epoch": 1.5166702059885326, "grad_norm": 7.977222441480454, "learning_rate": 7.2717389712296e-07, "loss": 0.9986, "step": 10713 }, { "epoch": 1.5168117788631699, "grad_norm": 8.340699513325129, "learning_rate": 7.267697936539619e-07, "loss": 0.9223, "step": 10714 }, { "epoch": 1.5169533517378069, "grad_norm": 8.505039292053565, "learning_rate": 7.263657834023527e-07, "loss": 0.9138, "step": 10715 }, { "epoch": 1.517094924612444, "grad_norm": 8.75351448322932, "learning_rate": 7.259618663893725e-07, "loss": 0.9752, "step": 10716 }, { "epoch": 1.5172364974870813, "grad_norm": 9.41880999003685, "learning_rate": 7.255580426362535e-07, "loss": 1.0748, "step": 10717 }, { "epoch": 1.5173780703617186, "grad_norm": 9.784082946999659, "learning_rate": 7.25154312164226e-07, "loss": 0.9663, "step": 10718 }, { "epoch": 1.5175196432363558, "grad_norm": 8.499707834047012, "learning_rate": 7.247506749945118e-07, "loss": 1.0213, "step": 10719 }, { "epoch": 1.517661216110993, "grad_norm": 10.376849641576518, "learning_rate": 7.243471311483322e-07, "loss": 1.0541, "step": 10720 }, { "epoch": 1.5178027889856303, "grad_norm": 12.028954224956289, "learning_rate": 7.239436806468989e-07, "loss": 1.0823, "step": 10721 }, { "epoch": 1.5179443618602675, "grad_norm": 9.425691404202963, "learning_rate": 7.235403235114227e-07, "loss": 1.0524, "step": 10722 }, { "epoch": 1.5180859347349047, "grad_norm": 11.37050934065909, "learning_rate": 7.231370597631071e-07, "loss": 1.0289, "step": 10723 }, { "epoch": 1.518227507609542, "grad_norm": 7.8184447597136195, "learning_rate": 7.227338894231509e-07, "loss": 1.0203, "step": 10724 }, { "epoch": 1.5183690804841792, "grad_norm": 9.350526731617391, "learning_rate": 7.223308125127495e-07, "loss": 0.9015, "step": 10725 }, { "epoch": 1.5185106533588164, "grad_norm": 9.40655740127889, "learning_rate": 7.219278290530909e-07, "loss": 0.8697, "step": 10726 }, { "epoch": 1.5186522262334536, "grad_norm": 8.66796875, "learning_rate": 7.215249390653609e-07, "loss": 0.9373, "step": 10727 }, { "epoch": 1.5187937991080909, "grad_norm": 8.962354552978853, "learning_rate": 7.211221425707393e-07, "loss": 1.0046, "step": 10728 }, { "epoch": 1.518935371982728, "grad_norm": 11.435318535561903, "learning_rate": 7.207194395903997e-07, "loss": 1.0444, "step": 10729 }, { "epoch": 1.5190769448573653, "grad_norm": 12.227499714478759, "learning_rate": 7.203168301455129e-07, "loss": 1.0014, "step": 10730 }, { "epoch": 1.5192185177320026, "grad_norm": 8.944462978220102, "learning_rate": 7.199143142572429e-07, "loss": 0.9715, "step": 10731 }, { "epoch": 1.5193600906066398, "grad_norm": 9.774896770429782, "learning_rate": 7.195118919467506e-07, "loss": 1.0474, "step": 10732 }, { "epoch": 1.519501663481277, "grad_norm": 8.87468106408663, "learning_rate": 7.191095632351908e-07, "loss": 0.9354, "step": 10733 }, { "epoch": 1.5196432363559143, "grad_norm": 8.18551495199275, "learning_rate": 7.187073281437124e-07, "loss": 0.9651, "step": 10734 }, { "epoch": 1.5197848092305515, "grad_norm": 9.580693508841183, "learning_rate": 7.183051866934626e-07, "loss": 0.9761, "step": 10735 }, { "epoch": 1.5199263821051887, "grad_norm": 7.841893436127159, "learning_rate": 7.179031389055796e-07, "loss": 0.9616, "step": 10736 }, { "epoch": 1.520067954979826, "grad_norm": 8.763052659829524, "learning_rate": 7.175011848012009e-07, "loss": 1.0372, "step": 10737 }, { "epoch": 1.5202095278544632, "grad_norm": 9.059067069350585, "learning_rate": 7.170993244014548e-07, "loss": 0.9439, "step": 10738 }, { "epoch": 1.5203511007291004, "grad_norm": 9.133318727950323, "learning_rate": 7.166975577274687e-07, "loss": 1.0085, "step": 10739 }, { "epoch": 1.5204926736037376, "grad_norm": 9.942418063072884, "learning_rate": 7.162958848003615e-07, "loss": 1.0268, "step": 10740 }, { "epoch": 1.5206342464783749, "grad_norm": 9.668214169864825, "learning_rate": 7.158943056412504e-07, "loss": 1.033, "step": 10741 }, { "epoch": 1.520775819353012, "grad_norm": 8.543415715473662, "learning_rate": 7.154928202712447e-07, "loss": 0.996, "step": 10742 }, { "epoch": 1.5209173922276493, "grad_norm": 9.824541029753629, "learning_rate": 7.150914287114513e-07, "loss": 0.8967, "step": 10743 }, { "epoch": 1.5210589651022866, "grad_norm": 9.284115127526794, "learning_rate": 7.146901309829709e-07, "loss": 0.9664, "step": 10744 }, { "epoch": 1.5212005379769238, "grad_norm": 9.443670617608744, "learning_rate": 7.142889271068984e-07, "loss": 0.9824, "step": 10745 }, { "epoch": 1.5213421108515608, "grad_norm": 9.57077645559077, "learning_rate": 7.138878171043262e-07, "loss": 0.9877, "step": 10746 }, { "epoch": 1.521483683726198, "grad_norm": 10.485597087283846, "learning_rate": 7.134868009963389e-07, "loss": 0.9932, "step": 10747 }, { "epoch": 1.5216252566008353, "grad_norm": 8.780587493775108, "learning_rate": 7.13085878804019e-07, "loss": 1.1221, "step": 10748 }, { "epoch": 1.5217668294754725, "grad_norm": 9.542299163599763, "learning_rate": 7.126850505484415e-07, "loss": 0.9239, "step": 10749 }, { "epoch": 1.5219084023501097, "grad_norm": 10.037614271147683, "learning_rate": 7.122843162506781e-07, "loss": 1.1384, "step": 10750 }, { "epoch": 1.522049975224747, "grad_norm": 7.936968447691375, "learning_rate": 7.118836759317963e-07, "loss": 0.976, "step": 10751 }, { "epoch": 1.5221915480993842, "grad_norm": 8.135317311349795, "learning_rate": 7.114831296128552e-07, "loss": 0.9636, "step": 10752 }, { "epoch": 1.5223331209740214, "grad_norm": 9.869791323109053, "learning_rate": 7.110826773149135e-07, "loss": 0.9541, "step": 10753 }, { "epoch": 1.5224746938486586, "grad_norm": 9.005709638407659, "learning_rate": 7.106823190590214e-07, "loss": 1.1021, "step": 10754 }, { "epoch": 1.5226162667232959, "grad_norm": 10.129800670956008, "learning_rate": 7.102820548662257e-07, "loss": 0.9987, "step": 10755 }, { "epoch": 1.5227578395979329, "grad_norm": 9.787970138697561, "learning_rate": 7.09881884757567e-07, "loss": 0.9791, "step": 10756 }, { "epoch": 1.52289941247257, "grad_norm": 11.095652758257891, "learning_rate": 7.094818087540827e-07, "loss": 1.0703, "step": 10757 }, { "epoch": 1.5230409853472073, "grad_norm": 10.231261826719837, "learning_rate": 7.090818268768057e-07, "loss": 1.0468, "step": 10758 }, { "epoch": 1.5231825582218446, "grad_norm": 10.047821142755124, "learning_rate": 7.086819391467612e-07, "loss": 0.9785, "step": 10759 }, { "epoch": 1.5233241310964818, "grad_norm": 7.495455064147363, "learning_rate": 7.082821455849717e-07, "loss": 0.8861, "step": 10760 }, { "epoch": 1.523465703971119, "grad_norm": 10.224370035104661, "learning_rate": 7.078824462124534e-07, "loss": 0.9594, "step": 10761 }, { "epoch": 1.5236072768457563, "grad_norm": 8.077688651778931, "learning_rate": 7.074828410502193e-07, "loss": 0.9016, "step": 10762 }, { "epoch": 1.5237488497203935, "grad_norm": 8.325352878365962, "learning_rate": 7.07083330119275e-07, "loss": 0.9562, "step": 10763 }, { "epoch": 1.5238904225950307, "grad_norm": 9.847354095919862, "learning_rate": 7.066839134406239e-07, "loss": 0.9162, "step": 10764 }, { "epoch": 1.524031995469668, "grad_norm": 9.0896448261968, "learning_rate": 7.062845910352622e-07, "loss": 1.0289, "step": 10765 }, { "epoch": 1.5241735683443052, "grad_norm": 8.719110078109992, "learning_rate": 7.058853629241816e-07, "loss": 0.9337, "step": 10766 }, { "epoch": 1.5243151412189424, "grad_norm": 6.870059093237851, "learning_rate": 7.054862291283702e-07, "loss": 0.9769, "step": 10767 }, { "epoch": 1.5244567140935796, "grad_norm": 10.563303538800351, "learning_rate": 7.050871896688091e-07, "loss": 1.0638, "step": 10768 }, { "epoch": 1.5245982869682169, "grad_norm": 10.03375612130649, "learning_rate": 7.046882445664768e-07, "loss": 0.8844, "step": 10769 }, { "epoch": 1.524739859842854, "grad_norm": 9.752166116177449, "learning_rate": 7.042893938423442e-07, "loss": 0.9199, "step": 10770 }, { "epoch": 1.5248814327174913, "grad_norm": 9.479233782743737, "learning_rate": 7.038906375173799e-07, "loss": 1.0425, "step": 10771 }, { "epoch": 1.5250230055921286, "grad_norm": 8.761387417804515, "learning_rate": 7.034919756125447e-07, "loss": 0.8953, "step": 10772 }, { "epoch": 1.5251645784667658, "grad_norm": 8.58439965399058, "learning_rate": 7.030934081487969e-07, "loss": 0.9808, "step": 10773 }, { "epoch": 1.525306151341403, "grad_norm": 9.693266696552215, "learning_rate": 7.026949351470894e-07, "loss": 0.9943, "step": 10774 }, { "epoch": 1.5254477242160402, "grad_norm": 10.047520072482245, "learning_rate": 7.022965566283693e-07, "loss": 0.9828, "step": 10775 }, { "epoch": 1.5255892970906775, "grad_norm": 9.815073635546062, "learning_rate": 7.018982726135787e-07, "loss": 1.0293, "step": 10776 }, { "epoch": 1.5257308699653147, "grad_norm": 7.856622232800528, "learning_rate": 7.015000831236543e-07, "loss": 0.9221, "step": 10777 }, { "epoch": 1.525872442839952, "grad_norm": 10.840277651788773, "learning_rate": 7.011019881795298e-07, "loss": 1.0542, "step": 10778 }, { "epoch": 1.5260140157145892, "grad_norm": 9.794719550191038, "learning_rate": 7.00703987802133e-07, "loss": 1.0875, "step": 10779 }, { "epoch": 1.5261555885892264, "grad_norm": 8.869060409307478, "learning_rate": 7.003060820123852e-07, "loss": 1.0044, "step": 10780 }, { "epoch": 1.5262971614638636, "grad_norm": 9.647814773046772, "learning_rate": 6.999082708312055e-07, "loss": 1.0955, "step": 10781 }, { "epoch": 1.5264387343385009, "grad_norm": 8.266077270825965, "learning_rate": 6.99510554279505e-07, "loss": 0.8961, "step": 10782 }, { "epoch": 1.526580307213138, "grad_norm": 9.466331651517574, "learning_rate": 6.991129323781931e-07, "loss": 1.0153, "step": 10783 }, { "epoch": 1.5267218800877753, "grad_norm": 10.364976200089316, "learning_rate": 6.987154051481707e-07, "loss": 0.9216, "step": 10784 }, { "epoch": 1.5268634529624125, "grad_norm": 8.170569233481906, "learning_rate": 6.98317972610337e-07, "loss": 0.8875, "step": 10785 }, { "epoch": 1.5270050258370498, "grad_norm": 8.042422824023312, "learning_rate": 6.979206347855843e-07, "loss": 0.8476, "step": 10786 }, { "epoch": 1.5271465987116868, "grad_norm": 8.983192432362532, "learning_rate": 6.975233916947993e-07, "loss": 0.9151, "step": 10787 }, { "epoch": 1.527288171586324, "grad_norm": 11.776600696916296, "learning_rate": 6.971262433588663e-07, "loss": 0.9816, "step": 10788 }, { "epoch": 1.5274297444609612, "grad_norm": 10.66040995999388, "learning_rate": 6.967291897986617e-07, "loss": 0.9557, "step": 10789 }, { "epoch": 1.5275713173355985, "grad_norm": 10.69497472816253, "learning_rate": 6.963322310350598e-07, "loss": 0.9694, "step": 10790 }, { "epoch": 1.5277128902102357, "grad_norm": 10.802401198023817, "learning_rate": 6.959353670889269e-07, "loss": 1.0117, "step": 10791 }, { "epoch": 1.527854463084873, "grad_norm": 10.52829309263073, "learning_rate": 6.955385979811275e-07, "loss": 0.8988, "step": 10792 }, { "epoch": 1.5279960359595102, "grad_norm": 8.60872452454213, "learning_rate": 6.951419237325174e-07, "loss": 0.9314, "step": 10793 }, { "epoch": 1.5281376088341474, "grad_norm": 8.748328785017268, "learning_rate": 6.947453443639515e-07, "loss": 0.8901, "step": 10794 }, { "epoch": 1.5282791817087846, "grad_norm": 9.004135029631438, "learning_rate": 6.943488598962761e-07, "loss": 0.9501, "step": 10795 }, { "epoch": 1.5284207545834219, "grad_norm": 8.135778232669072, "learning_rate": 6.939524703503356e-07, "loss": 0.8712, "step": 10796 }, { "epoch": 1.528562327458059, "grad_norm": 10.540163880436815, "learning_rate": 6.93556175746967e-07, "loss": 1.0037, "step": 10797 }, { "epoch": 1.528703900332696, "grad_norm": 10.36177304505397, "learning_rate": 6.931599761070027e-07, "loss": 1.0155, "step": 10798 }, { "epoch": 1.5288454732073333, "grad_norm": 7.559920446391067, "learning_rate": 6.927638714512716e-07, "loss": 0.8965, "step": 10799 }, { "epoch": 1.5289870460819706, "grad_norm": 7.159913836765241, "learning_rate": 6.923678618005958e-07, "loss": 0.9671, "step": 10800 }, { "epoch": 1.5291286189566078, "grad_norm": 9.311738872786378, "learning_rate": 6.919719471757938e-07, "loss": 0.9886, "step": 10801 }, { "epoch": 1.529270191831245, "grad_norm": 8.97139730161679, "learning_rate": 6.915761275976787e-07, "loss": 1.0124, "step": 10802 }, { "epoch": 1.5294117647058822, "grad_norm": 10.403616173820884, "learning_rate": 6.911804030870578e-07, "loss": 1.0548, "step": 10803 }, { "epoch": 1.5295533375805195, "grad_norm": 9.188360264784395, "learning_rate": 6.90784773664735e-07, "loss": 1.0113, "step": 10804 }, { "epoch": 1.5296949104551567, "grad_norm": 9.433365276089383, "learning_rate": 6.903892393515068e-07, "loss": 0.9648, "step": 10805 }, { "epoch": 1.529836483329794, "grad_norm": 9.209918637988034, "learning_rate": 6.89993800168168e-07, "loss": 0.9521, "step": 10806 }, { "epoch": 1.5299780562044312, "grad_norm": 10.447614143735724, "learning_rate": 6.895984561355054e-07, "loss": 1.0176, "step": 10807 }, { "epoch": 1.5301196290790684, "grad_norm": 9.991995087084085, "learning_rate": 6.892032072743013e-07, "loss": 1.0487, "step": 10808 }, { "epoch": 1.5302612019537056, "grad_norm": 9.355513602449895, "learning_rate": 6.888080536053351e-07, "loss": 0.9612, "step": 10809 }, { "epoch": 1.5304027748283429, "grad_norm": 9.978983347144043, "learning_rate": 6.884129951493785e-07, "loss": 1.0236, "step": 10810 }, { "epoch": 1.53054434770298, "grad_norm": 10.853871835132225, "learning_rate": 6.880180319272006e-07, "loss": 1.0297, "step": 10811 }, { "epoch": 1.5306859205776173, "grad_norm": 9.319859815861218, "learning_rate": 6.876231639595629e-07, "loss": 0.9977, "step": 10812 }, { "epoch": 1.5308274934522546, "grad_norm": 10.813767904571714, "learning_rate": 6.872283912672247e-07, "loss": 0.9525, "step": 10813 }, { "epoch": 1.5309690663268918, "grad_norm": 8.379277745261017, "learning_rate": 6.868337138709377e-07, "loss": 1.0198, "step": 10814 }, { "epoch": 1.531110639201529, "grad_norm": 11.248833150750258, "learning_rate": 6.864391317914512e-07, "loss": 0.9318, "step": 10815 }, { "epoch": 1.5312522120761662, "grad_norm": 10.459301866971504, "learning_rate": 6.860446450495068e-07, "loss": 0.998, "step": 10816 }, { "epoch": 1.5313937849508035, "grad_norm": 8.926102206346538, "learning_rate": 6.856502536658433e-07, "loss": 0.9408, "step": 10817 }, { "epoch": 1.5315353578254407, "grad_norm": 9.941443854300195, "learning_rate": 6.852559576611931e-07, "loss": 1.0553, "step": 10818 }, { "epoch": 1.531676930700078, "grad_norm": 9.167251799432544, "learning_rate": 6.848617570562832e-07, "loss": 0.9191, "step": 10819 }, { "epoch": 1.5318185035747152, "grad_norm": 8.678535501718585, "learning_rate": 6.844676518718385e-07, "loss": 1.0511, "step": 10820 }, { "epoch": 1.5319600764493524, "grad_norm": 9.567731635422314, "learning_rate": 6.840736421285746e-07, "loss": 0.9951, "step": 10821 }, { "epoch": 1.5321016493239896, "grad_norm": 8.130437352052136, "learning_rate": 6.83679727847206e-07, "loss": 0.8765, "step": 10822 }, { "epoch": 1.5322432221986269, "grad_norm": 9.986415409786257, "learning_rate": 6.832859090484392e-07, "loss": 1.0369, "step": 10823 }, { "epoch": 1.532384795073264, "grad_norm": 8.939291607741756, "learning_rate": 6.828921857529774e-07, "loss": 1.0274, "step": 10824 }, { "epoch": 1.5325263679479013, "grad_norm": 9.380736363510046, "learning_rate": 6.824985579815194e-07, "loss": 1.0957, "step": 10825 }, { "epoch": 1.5326679408225385, "grad_norm": 9.147482153496401, "learning_rate": 6.821050257547562e-07, "loss": 0.9641, "step": 10826 }, { "epoch": 1.5328095136971758, "grad_norm": 10.0977639168583, "learning_rate": 6.817115890933773e-07, "loss": 0.961, "step": 10827 }, { "epoch": 1.532951086571813, "grad_norm": 8.902517721259741, "learning_rate": 6.813182480180641e-07, "loss": 1.1087, "step": 10828 }, { "epoch": 1.53309265944645, "grad_norm": 7.974924365794118, "learning_rate": 6.809250025494946e-07, "loss": 0.9622, "step": 10829 }, { "epoch": 1.5332342323210872, "grad_norm": 9.649258252034356, "learning_rate": 6.805318527083407e-07, "loss": 0.9455, "step": 10830 }, { "epoch": 1.5333758051957245, "grad_norm": 9.316166712722474, "learning_rate": 6.801387985152705e-07, "loss": 0.9668, "step": 10831 }, { "epoch": 1.5335173780703617, "grad_norm": 8.696836874326914, "learning_rate": 6.797458399909476e-07, "loss": 1.0449, "step": 10832 }, { "epoch": 1.533658950944999, "grad_norm": 8.97688504030521, "learning_rate": 6.793529771560278e-07, "loss": 0.9557, "step": 10833 }, { "epoch": 1.5338005238196362, "grad_norm": 8.28589289806259, "learning_rate": 6.789602100311654e-07, "loss": 0.9862, "step": 10834 }, { "epoch": 1.5339420966942734, "grad_norm": 10.620290632563554, "learning_rate": 6.785675386370061e-07, "loss": 1.0876, "step": 10835 }, { "epoch": 1.5340836695689106, "grad_norm": 9.628459754480586, "learning_rate": 6.781749629941938e-07, "loss": 1.0032, "step": 10836 }, { "epoch": 1.5342252424435479, "grad_norm": 10.801080748560029, "learning_rate": 6.777824831233645e-07, "loss": 1.0052, "step": 10837 }, { "epoch": 1.534366815318185, "grad_norm": 9.032645886528806, "learning_rate": 6.773900990451523e-07, "loss": 1.0284, "step": 10838 }, { "epoch": 1.534508388192822, "grad_norm": 9.605347622126674, "learning_rate": 6.769978107801837e-07, "loss": 1.0819, "step": 10839 }, { "epoch": 1.5346499610674593, "grad_norm": 9.996487954443793, "learning_rate": 6.766056183490799e-07, "loss": 0.9396, "step": 10840 }, { "epoch": 1.5347915339420966, "grad_norm": 9.401932748988576, "learning_rate": 6.7621352177246e-07, "loss": 1.0378, "step": 10841 }, { "epoch": 1.5349331068167338, "grad_norm": 9.557202436288724, "learning_rate": 6.758215210709345e-07, "loss": 1.0325, "step": 10842 }, { "epoch": 1.535074679691371, "grad_norm": 9.565265330469677, "learning_rate": 6.754296162651122e-07, "loss": 0.9008, "step": 10843 }, { "epoch": 1.5352162525660082, "grad_norm": 9.70160814306182, "learning_rate": 6.750378073755939e-07, "loss": 1.0011, "step": 10844 }, { "epoch": 1.5353578254406455, "grad_norm": 9.16278905061309, "learning_rate": 6.746460944229783e-07, "loss": 0.9835, "step": 10845 }, { "epoch": 1.5354993983152827, "grad_norm": 11.057128636719609, "learning_rate": 6.742544774278553e-07, "loss": 0.9851, "step": 10846 }, { "epoch": 1.53564097118992, "grad_norm": 8.2230519686549, "learning_rate": 6.738629564108134e-07, "loss": 0.9544, "step": 10847 }, { "epoch": 1.5357825440645572, "grad_norm": 10.111025738850595, "learning_rate": 6.734715313924348e-07, "loss": 0.9746, "step": 10848 }, { "epoch": 1.5359241169391944, "grad_norm": 10.205109337571885, "learning_rate": 6.730802023932962e-07, "loss": 1.0783, "step": 10849 }, { "epoch": 1.5360656898138316, "grad_norm": 8.928834355433935, "learning_rate": 6.726889694339689e-07, "loss": 0.8882, "step": 10850 }, { "epoch": 1.5362072626884689, "grad_norm": 9.359384781923103, "learning_rate": 6.72297832535019e-07, "loss": 0.9998, "step": 10851 }, { "epoch": 1.536348835563106, "grad_norm": 9.48416544700029, "learning_rate": 6.719067917170105e-07, "loss": 0.9895, "step": 10852 }, { "epoch": 1.5364904084377433, "grad_norm": 8.695427440558827, "learning_rate": 6.715158470004979e-07, "loss": 0.9843, "step": 10853 }, { "epoch": 1.5366319813123805, "grad_norm": 9.159891387428793, "learning_rate": 6.711249984060337e-07, "loss": 0.8669, "step": 10854 }, { "epoch": 1.5367735541870178, "grad_norm": 9.510289693084365, "learning_rate": 6.707342459541655e-07, "loss": 0.9905, "step": 10855 }, { "epoch": 1.536915127061655, "grad_norm": 9.248549888973304, "learning_rate": 6.703435896654334e-07, "loss": 1.0953, "step": 10856 }, { "epoch": 1.5370566999362922, "grad_norm": 8.837643081823298, "learning_rate": 6.699530295603751e-07, "loss": 0.9012, "step": 10857 }, { "epoch": 1.5371982728109295, "grad_norm": 10.532984921773346, "learning_rate": 6.695625656595209e-07, "loss": 1.0144, "step": 10858 }, { "epoch": 1.5373398456855667, "grad_norm": 9.231552699659035, "learning_rate": 6.691721979833984e-07, "loss": 1.0992, "step": 10859 }, { "epoch": 1.537481418560204, "grad_norm": 11.073272115625068, "learning_rate": 6.687819265525286e-07, "loss": 0.9751, "step": 10860 }, { "epoch": 1.5376229914348412, "grad_norm": 7.801448482121089, "learning_rate": 6.683917513874266e-07, "loss": 0.9307, "step": 10861 }, { "epoch": 1.5377645643094784, "grad_norm": 10.446319326125298, "learning_rate": 6.680016725086053e-07, "loss": 0.9453, "step": 10862 }, { "epoch": 1.5379061371841156, "grad_norm": 9.89363406722486, "learning_rate": 6.676116899365692e-07, "loss": 0.9793, "step": 10863 }, { "epoch": 1.5380477100587528, "grad_norm": 9.296043243386412, "learning_rate": 6.67221803691821e-07, "loss": 0.9917, "step": 10864 }, { "epoch": 1.53818928293339, "grad_norm": 7.130739543809631, "learning_rate": 6.668320137948556e-07, "loss": 0.9053, "step": 10865 }, { "epoch": 1.5383308558080273, "grad_norm": 8.084883026805576, "learning_rate": 6.664423202661649e-07, "loss": 0.9742, "step": 10866 }, { "epoch": 1.5384724286826645, "grad_norm": 9.289917055546072, "learning_rate": 6.660527231262334e-07, "loss": 0.9804, "step": 10867 }, { "epoch": 1.5386140015573018, "grad_norm": 9.503671037214712, "learning_rate": 6.656632223955437e-07, "loss": 0.8393, "step": 10868 }, { "epoch": 1.538755574431939, "grad_norm": 9.983010546561289, "learning_rate": 6.652738180945698e-07, "loss": 0.9962, "step": 10869 }, { "epoch": 1.538897147306576, "grad_norm": 9.00616540597741, "learning_rate": 6.648845102437839e-07, "loss": 0.9351, "step": 10870 }, { "epoch": 1.5390387201812132, "grad_norm": 9.373605853370192, "learning_rate": 6.644952988636514e-07, "loss": 0.9496, "step": 10871 }, { "epoch": 1.5391802930558505, "grad_norm": 9.84531547571769, "learning_rate": 6.641061839746313e-07, "loss": 0.9894, "step": 10872 }, { "epoch": 1.5393218659304877, "grad_norm": 10.18551897317607, "learning_rate": 6.637171655971811e-07, "loss": 1.055, "step": 10873 }, { "epoch": 1.539463438805125, "grad_norm": 9.850855028249967, "learning_rate": 6.633282437517496e-07, "loss": 0.8661, "step": 10874 }, { "epoch": 1.5396050116797622, "grad_norm": 9.547986154777163, "learning_rate": 6.629394184587826e-07, "loss": 0.9255, "step": 10875 }, { "epoch": 1.5397465845543994, "grad_norm": 9.376732017740483, "learning_rate": 6.625506897387215e-07, "loss": 1.1049, "step": 10876 }, { "epoch": 1.5398881574290366, "grad_norm": 11.034250515927313, "learning_rate": 6.621620576119999e-07, "loss": 0.9517, "step": 10877 }, { "epoch": 1.5400297303036738, "grad_norm": 7.289286226721266, "learning_rate": 6.617735220990495e-07, "loss": 0.9117, "step": 10878 }, { "epoch": 1.540171303178311, "grad_norm": 10.595234904689319, "learning_rate": 6.613850832202934e-07, "loss": 0.8983, "step": 10879 }, { "epoch": 1.5403128760529483, "grad_norm": 9.474598302856343, "learning_rate": 6.609967409961531e-07, "loss": 1.0366, "step": 10880 }, { "epoch": 1.5404544489275853, "grad_norm": 8.436438373605334, "learning_rate": 6.606084954470434e-07, "loss": 0.9311, "step": 10881 }, { "epoch": 1.5405960218022225, "grad_norm": 9.570835046321022, "learning_rate": 6.602203465933727e-07, "loss": 1.0173, "step": 10882 }, { "epoch": 1.5407375946768598, "grad_norm": 10.115352979790755, "learning_rate": 6.598322944555471e-07, "loss": 0.9074, "step": 10883 }, { "epoch": 1.540879167551497, "grad_norm": 10.001754225407122, "learning_rate": 6.594443390539651e-07, "loss": 1.0526, "step": 10884 }, { "epoch": 1.5410207404261342, "grad_norm": 7.6432781332723065, "learning_rate": 6.590564804090224e-07, "loss": 0.946, "step": 10885 }, { "epoch": 1.5411623133007715, "grad_norm": 9.991570925684723, "learning_rate": 6.586687185411073e-07, "loss": 1.0508, "step": 10886 }, { "epoch": 1.5413038861754087, "grad_norm": 8.987527894466231, "learning_rate": 6.582810534706055e-07, "loss": 0.9978, "step": 10887 }, { "epoch": 1.541445459050046, "grad_norm": 10.544540395546898, "learning_rate": 6.578934852178945e-07, "loss": 1.0092, "step": 10888 }, { "epoch": 1.5415870319246832, "grad_norm": 8.424059752568459, "learning_rate": 6.575060138033504e-07, "loss": 1.0183, "step": 10889 }, { "epoch": 1.5417286047993204, "grad_norm": 8.184353522308589, "learning_rate": 6.571186392473406e-07, "loss": 0.8266, "step": 10890 }, { "epoch": 1.5418701776739576, "grad_norm": 8.337625949257172, "learning_rate": 6.567313615702304e-07, "loss": 0.8927, "step": 10891 }, { "epoch": 1.5420117505485949, "grad_norm": 10.87657311998925, "learning_rate": 6.563441807923782e-07, "loss": 1.0264, "step": 10892 }, { "epoch": 1.542153323423232, "grad_norm": 8.732156051995764, "learning_rate": 6.559570969341369e-07, "loss": 0.9762, "step": 10893 }, { "epoch": 1.5422948962978693, "grad_norm": 11.47426478870896, "learning_rate": 6.555701100158571e-07, "loss": 0.9076, "step": 10894 }, { "epoch": 1.5424364691725065, "grad_norm": 11.34105269886175, "learning_rate": 6.551832200578803e-07, "loss": 0.9349, "step": 10895 }, { "epoch": 1.5425780420471438, "grad_norm": 9.906314308126243, "learning_rate": 6.547964270805468e-07, "loss": 0.9732, "step": 10896 }, { "epoch": 1.542719614921781, "grad_norm": 10.665167782693707, "learning_rate": 6.544097311041888e-07, "loss": 1.0259, "step": 10897 }, { "epoch": 1.5428611877964182, "grad_norm": 10.891444609133726, "learning_rate": 6.54023132149135e-07, "loss": 0.9673, "step": 10898 }, { "epoch": 1.5430027606710555, "grad_norm": 9.229732269338687, "learning_rate": 6.536366302357094e-07, "loss": 0.9385, "step": 10899 }, { "epoch": 1.5431443335456927, "grad_norm": 10.016895136287827, "learning_rate": 6.532502253842288e-07, "loss": 1.0921, "step": 10900 }, { "epoch": 1.54328590642033, "grad_norm": 9.635935726447055, "learning_rate": 6.528639176150072e-07, "loss": 1.0077, "step": 10901 }, { "epoch": 1.5434274792949672, "grad_norm": 10.056101405419211, "learning_rate": 6.524777069483526e-07, "loss": 1.0027, "step": 10902 }, { "epoch": 1.5435690521696044, "grad_norm": 8.812238107672114, "learning_rate": 6.520915934045674e-07, "loss": 0.9952, "step": 10903 }, { "epoch": 1.5437106250442416, "grad_norm": 6.97700948447367, "learning_rate": 6.517055770039482e-07, "loss": 0.9693, "step": 10904 }, { "epoch": 1.5438521979188788, "grad_norm": 9.002387683764184, "learning_rate": 6.51319657766789e-07, "loss": 0.9229, "step": 10905 }, { "epoch": 1.543993770793516, "grad_norm": 9.631455275559226, "learning_rate": 6.509338357133776e-07, "loss": 1.0499, "step": 10906 }, { "epoch": 1.5441353436681533, "grad_norm": 9.59222618700457, "learning_rate": 6.50548110863995e-07, "loss": 1.0389, "step": 10907 }, { "epoch": 1.5442769165427905, "grad_norm": 10.714349583026712, "learning_rate": 6.501624832389197e-07, "loss": 0.9874, "step": 10908 }, { "epoch": 1.5444184894174278, "grad_norm": 9.273804012212766, "learning_rate": 6.497769528584227e-07, "loss": 0.9108, "step": 10909 }, { "epoch": 1.544560062292065, "grad_norm": 8.57599595405711, "learning_rate": 6.493915197427727e-07, "loss": 1.0103, "step": 10910 }, { "epoch": 1.5447016351667022, "grad_norm": 9.205806425742516, "learning_rate": 6.490061839122297e-07, "loss": 0.937, "step": 10911 }, { "epoch": 1.5448432080413392, "grad_norm": 9.807208730245996, "learning_rate": 6.486209453870523e-07, "loss": 1.0145, "step": 10912 }, { "epoch": 1.5449847809159765, "grad_norm": 8.894613165932627, "learning_rate": 6.482358041874914e-07, "loss": 0.8484, "step": 10913 }, { "epoch": 1.5451263537906137, "grad_norm": 10.205772068212223, "learning_rate": 6.478507603337928e-07, "loss": 0.8997, "step": 10914 }, { "epoch": 1.545267926665251, "grad_norm": 10.777728138532954, "learning_rate": 6.474658138461992e-07, "loss": 1.0574, "step": 10915 }, { "epoch": 1.5454094995398882, "grad_norm": 8.553084271145492, "learning_rate": 6.470809647449458e-07, "loss": 0.971, "step": 10916 }, { "epoch": 1.5455510724145254, "grad_norm": 8.157687122701786, "learning_rate": 6.466962130502655e-07, "loss": 0.8774, "step": 10917 }, { "epoch": 1.5456926452891626, "grad_norm": 9.854077543752194, "learning_rate": 6.463115587823824e-07, "loss": 1.0206, "step": 10918 }, { "epoch": 1.5458342181637998, "grad_norm": 10.321133785214911, "learning_rate": 6.459270019615191e-07, "loss": 0.967, "step": 10919 }, { "epoch": 1.545975791038437, "grad_norm": 8.520796075442561, "learning_rate": 6.455425426078904e-07, "loss": 0.9514, "step": 10920 }, { "epoch": 1.5461173639130743, "grad_norm": 9.216913382991635, "learning_rate": 6.451581807417074e-07, "loss": 0.948, "step": 10921 }, { "epoch": 1.5462589367877113, "grad_norm": 9.156118763057995, "learning_rate": 6.447739163831765e-07, "loss": 0.9701, "step": 10922 }, { "epoch": 1.5464005096623485, "grad_norm": 8.932247725991289, "learning_rate": 6.443897495524976e-07, "loss": 1.0053, "step": 10923 }, { "epoch": 1.5465420825369858, "grad_norm": 9.691651863574473, "learning_rate": 6.440056802698658e-07, "loss": 0.9697, "step": 10924 }, { "epoch": 1.546683655411623, "grad_norm": 9.003681277693751, "learning_rate": 6.436217085554708e-07, "loss": 1.0305, "step": 10925 }, { "epoch": 1.5468252282862602, "grad_norm": 8.898332469611713, "learning_rate": 6.432378344294992e-07, "loss": 1.0101, "step": 10926 }, { "epoch": 1.5469668011608975, "grad_norm": 7.879788350782335, "learning_rate": 6.428540579121296e-07, "loss": 0.967, "step": 10927 }, { "epoch": 1.5471083740355347, "grad_norm": 8.794528682152952, "learning_rate": 6.424703790235374e-07, "loss": 0.9169, "step": 10928 }, { "epoch": 1.547249946910172, "grad_norm": 9.892397464108011, "learning_rate": 6.420867977838929e-07, "loss": 0.9802, "step": 10929 }, { "epoch": 1.5473915197848092, "grad_norm": 8.204289816853484, "learning_rate": 6.417033142133594e-07, "loss": 1.0065, "step": 10930 }, { "epoch": 1.5475330926594464, "grad_norm": 8.79675271201636, "learning_rate": 6.413199283320979e-07, "loss": 0.9693, "step": 10931 }, { "epoch": 1.5476746655340836, "grad_norm": 11.908062654217009, "learning_rate": 6.40936640160261e-07, "loss": 1.1407, "step": 10932 }, { "epoch": 1.5478162384087208, "grad_norm": 10.076148591936175, "learning_rate": 6.405534497179996e-07, "loss": 1.0452, "step": 10933 }, { "epoch": 1.547957811283358, "grad_norm": 10.339551551878124, "learning_rate": 6.401703570254569e-07, "loss": 1.1995, "step": 10934 }, { "epoch": 1.5480993841579953, "grad_norm": 9.921302118057561, "learning_rate": 6.397873621027711e-07, "loss": 1.0271, "step": 10935 }, { "epoch": 1.5482409570326325, "grad_norm": 8.08364909495889, "learning_rate": 6.394044649700773e-07, "loss": 0.8938, "step": 10936 }, { "epoch": 1.5483825299072698, "grad_norm": 10.234177821931665, "learning_rate": 6.390216656475027e-07, "loss": 1.0561, "step": 10937 }, { "epoch": 1.548524102781907, "grad_norm": 10.56781197326787, "learning_rate": 6.386389641551721e-07, "loss": 1.0073, "step": 10938 }, { "epoch": 1.5486656756565442, "grad_norm": 8.83129868976537, "learning_rate": 6.382563605132027e-07, "loss": 1.0433, "step": 10939 }, { "epoch": 1.5488072485311815, "grad_norm": 9.38195624085262, "learning_rate": 6.37873854741709e-07, "loss": 1.0003, "step": 10940 }, { "epoch": 1.5489488214058187, "grad_norm": 9.249675023324299, "learning_rate": 6.374914468607976e-07, "loss": 1.0807, "step": 10941 }, { "epoch": 1.549090394280456, "grad_norm": 9.058403825763426, "learning_rate": 6.37109136890573e-07, "loss": 0.9751, "step": 10942 }, { "epoch": 1.5492319671550931, "grad_norm": 8.783830908254364, "learning_rate": 6.367269248511309e-07, "loss": 0.9837, "step": 10943 }, { "epoch": 1.5493735400297304, "grad_norm": 8.533628027516299, "learning_rate": 6.363448107625653e-07, "loss": 0.9746, "step": 10944 }, { "epoch": 1.5495151129043676, "grad_norm": 10.13352174416442, "learning_rate": 6.359627946449648e-07, "loss": 1.0401, "step": 10945 }, { "epoch": 1.5496566857790048, "grad_norm": 7.127665506325922, "learning_rate": 6.355808765184088e-07, "loss": 0.977, "step": 10946 }, { "epoch": 1.549798258653642, "grad_norm": 10.614545278633841, "learning_rate": 6.351990564029767e-07, "loss": 1.0025, "step": 10947 }, { "epoch": 1.5499398315282793, "grad_norm": 9.664096052008247, "learning_rate": 6.348173343187392e-07, "loss": 1.0164, "step": 10948 }, { "epoch": 1.5500814044029165, "grad_norm": 8.408520019797768, "learning_rate": 6.344357102857643e-07, "loss": 0.9125, "step": 10949 }, { "epoch": 1.5502229772775538, "grad_norm": 9.244902159697679, "learning_rate": 6.340541843241124e-07, "loss": 0.9836, "step": 10950 }, { "epoch": 1.550364550152191, "grad_norm": 10.448257111124144, "learning_rate": 6.336727564538406e-07, "loss": 0.9796, "step": 10951 }, { "epoch": 1.5505061230268282, "grad_norm": 9.206780166018445, "learning_rate": 6.332914266950011e-07, "loss": 0.9816, "step": 10952 }, { "epoch": 1.5506476959014652, "grad_norm": 7.8665830650844235, "learning_rate": 6.329101950676389e-07, "loss": 1.0046, "step": 10953 }, { "epoch": 1.5507892687761025, "grad_norm": 8.8572067486968, "learning_rate": 6.325290615917961e-07, "loss": 1.0181, "step": 10954 }, { "epoch": 1.5509308416507397, "grad_norm": 11.650285028073414, "learning_rate": 6.321480262875082e-07, "loss": 1.142, "step": 10955 }, { "epoch": 1.551072414525377, "grad_norm": 8.922968648764598, "learning_rate": 6.317670891748051e-07, "loss": 0.9039, "step": 10956 }, { "epoch": 1.5512139874000141, "grad_norm": 8.629540616535818, "learning_rate": 6.313862502737139e-07, "loss": 0.9073, "step": 10957 }, { "epoch": 1.5513555602746514, "grad_norm": 8.99196096179674, "learning_rate": 6.310055096042533e-07, "loss": 1.0911, "step": 10958 }, { "epoch": 1.5514971331492886, "grad_norm": 9.309496094273678, "learning_rate": 6.306248671864404e-07, "loss": 1.0205, "step": 10959 }, { "epoch": 1.5516387060239258, "grad_norm": 10.592346278451116, "learning_rate": 6.302443230402836e-07, "loss": 1.0214, "step": 10960 }, { "epoch": 1.551780278898563, "grad_norm": 8.73609997361657, "learning_rate": 6.298638771857893e-07, "loss": 0.875, "step": 10961 }, { "epoch": 1.5519218517732003, "grad_norm": 9.10901276013792, "learning_rate": 6.294835296429558e-07, "loss": 0.9363, "step": 10962 }, { "epoch": 1.5520634246478373, "grad_norm": 10.19852529598719, "learning_rate": 6.291032804317789e-07, "loss": 1.085, "step": 10963 }, { "epoch": 1.5522049975224745, "grad_norm": 10.98751851189869, "learning_rate": 6.28723129572247e-07, "loss": 0.9899, "step": 10964 }, { "epoch": 1.5523465703971118, "grad_norm": 9.63959179848939, "learning_rate": 6.28343077084346e-07, "loss": 0.9675, "step": 10965 }, { "epoch": 1.552488143271749, "grad_norm": 8.740613862222524, "learning_rate": 6.279631229880534e-07, "loss": 0.9003, "step": 10966 }, { "epoch": 1.5526297161463862, "grad_norm": 7.939882176291325, "learning_rate": 6.27583267303343e-07, "loss": 0.9581, "step": 10967 }, { "epoch": 1.5527712890210235, "grad_norm": 10.028326731620426, "learning_rate": 6.272035100501849e-07, "loss": 1.0365, "step": 10968 }, { "epoch": 1.5529128618956607, "grad_norm": 8.035944772472648, "learning_rate": 6.268238512485412e-07, "loss": 0.9533, "step": 10969 }, { "epoch": 1.553054434770298, "grad_norm": 8.541992578258965, "learning_rate": 6.264442909183715e-07, "loss": 0.9482, "step": 10970 }, { "epoch": 1.5531960076449352, "grad_norm": 11.000963689032082, "learning_rate": 6.260648290796278e-07, "loss": 0.9909, "step": 10971 }, { "epoch": 1.5533375805195724, "grad_norm": 9.519800532198747, "learning_rate": 6.256854657522587e-07, "loss": 0.9916, "step": 10972 }, { "epoch": 1.5534791533942096, "grad_norm": 10.355162994118713, "learning_rate": 6.253062009562078e-07, "loss": 1.0223, "step": 10973 }, { "epoch": 1.5536207262688468, "grad_norm": 9.29872618124386, "learning_rate": 6.249270347114114e-07, "loss": 0.9936, "step": 10974 }, { "epoch": 1.553762299143484, "grad_norm": 10.249158126869265, "learning_rate": 6.245479670378036e-07, "loss": 0.9408, "step": 10975 }, { "epoch": 1.5539038720181213, "grad_norm": 10.087463878013857, "learning_rate": 6.241689979553106e-07, "loss": 1.0064, "step": 10976 }, { "epoch": 1.5540454448927585, "grad_norm": 8.777850922012862, "learning_rate": 6.237901274838546e-07, "loss": 0.9223, "step": 10977 }, { "epoch": 1.5541870177673958, "grad_norm": 9.546153738993317, "learning_rate": 6.234113556433522e-07, "loss": 1.0455, "step": 10978 }, { "epoch": 1.554328590642033, "grad_norm": 10.500862086191907, "learning_rate": 6.230326824537153e-07, "loss": 1.0624, "step": 10979 }, { "epoch": 1.5544701635166702, "grad_norm": 8.741090871092647, "learning_rate": 6.226541079348517e-07, "loss": 0.9611, "step": 10980 }, { "epoch": 1.5546117363913075, "grad_norm": 9.492475239980383, "learning_rate": 6.222756321066609e-07, "loss": 0.974, "step": 10981 }, { "epoch": 1.5547533092659447, "grad_norm": 8.71194684805301, "learning_rate": 6.218972549890409e-07, "loss": 0.9898, "step": 10982 }, { "epoch": 1.554894882140582, "grad_norm": 7.982474681361302, "learning_rate": 6.215189766018812e-07, "loss": 0.8735, "step": 10983 }, { "epoch": 1.5550364550152191, "grad_norm": 8.884531839430785, "learning_rate": 6.211407969650687e-07, "loss": 0.9459, "step": 10984 }, { "epoch": 1.5551780278898564, "grad_norm": 8.464244361854607, "learning_rate": 6.20762716098483e-07, "loss": 0.9777, "step": 10985 }, { "epoch": 1.5553196007644936, "grad_norm": 8.58006011870692, "learning_rate": 6.203847340220006e-07, "loss": 0.9469, "step": 10986 }, { "epoch": 1.5554611736391308, "grad_norm": 9.132858028249435, "learning_rate": 6.200068507554915e-07, "loss": 0.9551, "step": 10987 }, { "epoch": 1.555602746513768, "grad_norm": 8.668018040080677, "learning_rate": 6.196290663188198e-07, "loss": 0.9379, "step": 10988 }, { "epoch": 1.5557443193884053, "grad_norm": 9.875193147339733, "learning_rate": 6.192513807318468e-07, "loss": 0.9728, "step": 10989 }, { "epoch": 1.5558858922630425, "grad_norm": 9.083507524505823, "learning_rate": 6.188737940144254e-07, "loss": 0.9129, "step": 10990 }, { "epoch": 1.5560274651376798, "grad_norm": 10.715586381710814, "learning_rate": 6.184963061864069e-07, "loss": 0.9556, "step": 10991 }, { "epoch": 1.556169038012317, "grad_norm": 8.238400714816597, "learning_rate": 6.181189172676338e-07, "loss": 0.9852, "step": 10992 }, { "epoch": 1.5563106108869542, "grad_norm": 8.834214316527081, "learning_rate": 6.177416272779468e-07, "loss": 0.9749, "step": 10993 }, { "epoch": 1.5564521837615912, "grad_norm": 9.763395448614952, "learning_rate": 6.173644362371783e-07, "loss": 1.0192, "step": 10994 }, { "epoch": 1.5565937566362285, "grad_norm": 11.867027417246481, "learning_rate": 6.169873441651575e-07, "loss": 1.0293, "step": 10995 }, { "epoch": 1.5567353295108657, "grad_norm": 10.310375757779227, "learning_rate": 6.166103510817089e-07, "loss": 1.0274, "step": 10996 }, { "epoch": 1.556876902385503, "grad_norm": 9.074864443772785, "learning_rate": 6.162334570066497e-07, "loss": 1.0671, "step": 10997 }, { "epoch": 1.5570184752601401, "grad_norm": 9.03513007318949, "learning_rate": 6.158566619597933e-07, "loss": 0.9542, "step": 10998 }, { "epoch": 1.5571600481347774, "grad_norm": 9.855992433693693, "learning_rate": 6.154799659609464e-07, "loss": 1.0302, "step": 10999 }, { "epoch": 1.5573016210094146, "grad_norm": 8.771568092383278, "learning_rate": 6.151033690299133e-07, "loss": 1.0592, "step": 11000 }, { "epoch": 1.5574431938840518, "grad_norm": 8.328255543213501, "learning_rate": 6.147268711864898e-07, "loss": 0.8428, "step": 11001 }, { "epoch": 1.557584766758689, "grad_norm": 9.201323405738556, "learning_rate": 6.14350472450469e-07, "loss": 1.0473, "step": 11002 }, { "epoch": 1.5577263396333263, "grad_norm": 8.049450153029913, "learning_rate": 6.139741728416387e-07, "loss": 0.8713, "step": 11003 }, { "epoch": 1.5578679125079635, "grad_norm": 9.938605480964117, "learning_rate": 6.135979723797792e-07, "loss": 1.0513, "step": 11004 }, { "epoch": 1.5580094853826005, "grad_norm": 10.909325108759628, "learning_rate": 6.132218710846683e-07, "loss": 0.965, "step": 11005 }, { "epoch": 1.5581510582572378, "grad_norm": 13.225676469141616, "learning_rate": 6.12845868976076e-07, "loss": 1.0435, "step": 11006 }, { "epoch": 1.558292631131875, "grad_norm": 9.861021167422544, "learning_rate": 6.124699660737702e-07, "loss": 0.9165, "step": 11007 }, { "epoch": 1.5584342040065122, "grad_norm": 9.20830976618402, "learning_rate": 6.120941623975107e-07, "loss": 0.9988, "step": 11008 }, { "epoch": 1.5585757768811495, "grad_norm": 10.154019060863481, "learning_rate": 6.117184579670527e-07, "loss": 1.1105, "step": 11009 }, { "epoch": 1.5587173497557867, "grad_norm": 8.773323581462105, "learning_rate": 6.113428528021481e-07, "loss": 1.054, "step": 11010 }, { "epoch": 1.558858922630424, "grad_norm": 8.998008083742628, "learning_rate": 6.109673469225408e-07, "loss": 0.9344, "step": 11011 }, { "epoch": 1.5590004955050611, "grad_norm": 10.287137742924148, "learning_rate": 6.105919403479724e-07, "loss": 1.104, "step": 11012 }, { "epoch": 1.5591420683796984, "grad_norm": 8.855787085560783, "learning_rate": 6.10216633098176e-07, "loss": 1.0823, "step": 11013 }, { "epoch": 1.5592836412543356, "grad_norm": 13.98419823561468, "learning_rate": 6.098414251928831e-07, "loss": 1.0142, "step": 11014 }, { "epoch": 1.5594252141289728, "grad_norm": 9.6779308680229, "learning_rate": 6.094663166518161e-07, "loss": 0.9437, "step": 11015 }, { "epoch": 1.55956678700361, "grad_norm": 9.074045548282458, "learning_rate": 6.090913074946958e-07, "loss": 0.9625, "step": 11016 }, { "epoch": 1.5597083598782473, "grad_norm": 11.0922334735894, "learning_rate": 6.087163977412352e-07, "loss": 1.0534, "step": 11017 }, { "epoch": 1.5598499327528845, "grad_norm": 9.372061306995908, "learning_rate": 6.083415874111432e-07, "loss": 0.9916, "step": 11018 }, { "epoch": 1.5599915056275218, "grad_norm": 7.489868186941855, "learning_rate": 6.079668765241248e-07, "loss": 0.9538, "step": 11019 }, { "epoch": 1.560133078502159, "grad_norm": 8.356579008166355, "learning_rate": 6.075922650998756e-07, "loss": 0.9737, "step": 11020 }, { "epoch": 1.5602746513767962, "grad_norm": 8.543962669376729, "learning_rate": 6.072177531580909e-07, "loss": 1.0268, "step": 11021 }, { "epoch": 1.5604162242514334, "grad_norm": 8.1689984891612, "learning_rate": 6.068433407184566e-07, "loss": 0.9874, "step": 11022 }, { "epoch": 1.5605577971260707, "grad_norm": 9.28707754167783, "learning_rate": 6.064690278006572e-07, "loss": 0.8961, "step": 11023 }, { "epoch": 1.560699370000708, "grad_norm": 12.67719921768538, "learning_rate": 6.060948144243683e-07, "loss": 0.987, "step": 11024 }, { "epoch": 1.5608409428753451, "grad_norm": 8.385584277892663, "learning_rate": 6.057207006092628e-07, "loss": 0.875, "step": 11025 }, { "epoch": 1.5609825157499824, "grad_norm": 9.298806587679847, "learning_rate": 6.053466863750085e-07, "loss": 1.0237, "step": 11026 }, { "epoch": 1.5611240886246196, "grad_norm": 11.920957277505417, "learning_rate": 6.049727717412654e-07, "loss": 1.0502, "step": 11027 }, { "epoch": 1.5612656614992568, "grad_norm": 8.301438393105814, "learning_rate": 6.045989567276913e-07, "loss": 0.8897, "step": 11028 }, { "epoch": 1.561407234373894, "grad_norm": 8.768949208134561, "learning_rate": 6.042252413539368e-07, "loss": 0.918, "step": 11029 }, { "epoch": 1.5615488072485313, "grad_norm": 9.721813050394273, "learning_rate": 6.038516256396473e-07, "loss": 0.9015, "step": 11030 }, { "epoch": 1.5616903801231685, "grad_norm": 8.988845694739014, "learning_rate": 6.034781096044645e-07, "loss": 0.916, "step": 11031 }, { "epoch": 1.5618319529978058, "grad_norm": 9.614961219269004, "learning_rate": 6.031046932680229e-07, "loss": 0.9826, "step": 11032 }, { "epoch": 1.561973525872443, "grad_norm": 7.54438845027369, "learning_rate": 6.027313766499538e-07, "loss": 0.9196, "step": 11033 }, { "epoch": 1.5621150987470802, "grad_norm": 7.925006796656441, "learning_rate": 6.023581597698807e-07, "loss": 0.8265, "step": 11034 }, { "epoch": 1.5622566716217174, "grad_norm": 10.16637416064443, "learning_rate": 6.019850426474249e-07, "loss": 0.937, "step": 11035 }, { "epoch": 1.5623982444963544, "grad_norm": 9.547972970311388, "learning_rate": 6.016120253021998e-07, "loss": 0.9208, "step": 11036 }, { "epoch": 1.5625398173709917, "grad_norm": 7.672076624689526, "learning_rate": 6.012391077538154e-07, "loss": 0.9269, "step": 11037 }, { "epoch": 1.562681390245629, "grad_norm": 8.277648833747644, "learning_rate": 6.008662900218748e-07, "loss": 0.9484, "step": 11038 }, { "epoch": 1.5628229631202661, "grad_norm": 9.39203774081028, "learning_rate": 6.00493572125978e-07, "loss": 0.9529, "step": 11039 }, { "epoch": 1.5629645359949034, "grad_norm": 9.699096039799679, "learning_rate": 6.001209540857178e-07, "loss": 0.9926, "step": 11040 }, { "epoch": 1.5631061088695406, "grad_norm": 8.910852360740911, "learning_rate": 5.997484359206815e-07, "loss": 1.0085, "step": 11041 }, { "epoch": 1.5632476817441778, "grad_norm": 10.211391774516159, "learning_rate": 5.99376017650454e-07, "loss": 0.9389, "step": 11042 }, { "epoch": 1.563389254618815, "grad_norm": 9.64253717931098, "learning_rate": 5.990036992946114e-07, "loss": 0.9767, "step": 11043 }, { "epoch": 1.5635308274934523, "grad_norm": 8.56890406897868, "learning_rate": 5.986314808727273e-07, "loss": 0.8983, "step": 11044 }, { "epoch": 1.5636724003680895, "grad_norm": 9.598337474552071, "learning_rate": 5.982593624043682e-07, "loss": 0.9848, "step": 11045 }, { "epoch": 1.5638139732427265, "grad_norm": 9.780063545005008, "learning_rate": 5.978873439090968e-07, "loss": 1.0072, "step": 11046 }, { "epoch": 1.5639555461173638, "grad_norm": 9.11711762572303, "learning_rate": 5.975154254064688e-07, "loss": 1.0076, "step": 11047 }, { "epoch": 1.564097118992001, "grad_norm": 9.862162297296807, "learning_rate": 5.971436069160363e-07, "loss": 0.9937, "step": 11048 }, { "epoch": 1.5642386918666382, "grad_norm": 8.625865948094265, "learning_rate": 5.967718884573465e-07, "loss": 1.0984, "step": 11049 }, { "epoch": 1.5643802647412755, "grad_norm": 9.74060779970209, "learning_rate": 5.964002700499394e-07, "loss": 1.0331, "step": 11050 }, { "epoch": 1.5645218376159127, "grad_norm": 8.897739989261538, "learning_rate": 5.960287517133506e-07, "loss": 0.919, "step": 11051 }, { "epoch": 1.56466341049055, "grad_norm": 9.225484579948269, "learning_rate": 5.956573334671098e-07, "loss": 0.9276, "step": 11052 }, { "epoch": 1.5648049833651871, "grad_norm": 10.555822304372331, "learning_rate": 5.952860153307433e-07, "loss": 0.9588, "step": 11053 }, { "epoch": 1.5649465562398244, "grad_norm": 9.538732174614372, "learning_rate": 5.949147973237713e-07, "loss": 1.0093, "step": 11054 }, { "epoch": 1.5650881291144616, "grad_norm": 9.22195430606466, "learning_rate": 5.945436794657072e-07, "loss": 0.8985, "step": 11055 }, { "epoch": 1.5652297019890988, "grad_norm": 11.634272333266459, "learning_rate": 5.941726617760621e-07, "loss": 0.9922, "step": 11056 }, { "epoch": 1.565371274863736, "grad_norm": 9.199025508851468, "learning_rate": 5.938017442743382e-07, "loss": 1.015, "step": 11057 }, { "epoch": 1.5655128477383733, "grad_norm": 9.737952589846072, "learning_rate": 5.934309269800359e-07, "loss": 0.91, "step": 11058 }, { "epoch": 1.5656544206130105, "grad_norm": 7.956883829405718, "learning_rate": 5.930602099126476e-07, "loss": 0.9973, "step": 11059 }, { "epoch": 1.5657959934876478, "grad_norm": 7.96349493544851, "learning_rate": 5.926895930916629e-07, "loss": 0.8813, "step": 11060 }, { "epoch": 1.565937566362285, "grad_norm": 9.513022882875726, "learning_rate": 5.923190765365641e-07, "loss": 1.018, "step": 11061 }, { "epoch": 1.5660791392369222, "grad_norm": 8.503993722994077, "learning_rate": 5.919486602668281e-07, "loss": 0.8953, "step": 11062 }, { "epoch": 1.5662207121115594, "grad_norm": 9.945922736948818, "learning_rate": 5.915783443019293e-07, "loss": 0.9448, "step": 11063 }, { "epoch": 1.5663622849861967, "grad_norm": 9.23593389839847, "learning_rate": 5.912081286613334e-07, "loss": 0.9086, "step": 11064 }, { "epoch": 1.566503857860834, "grad_norm": 8.672067231118262, "learning_rate": 5.908380133645033e-07, "loss": 0.9862, "step": 11065 }, { "epoch": 1.5666454307354711, "grad_norm": 11.191813569703701, "learning_rate": 5.904679984308947e-07, "loss": 0.9571, "step": 11066 }, { "epoch": 1.5667870036101084, "grad_norm": 9.380826233220137, "learning_rate": 5.900980838799603e-07, "loss": 1.0357, "step": 11067 }, { "epoch": 1.5669285764847456, "grad_norm": 11.446422994334709, "learning_rate": 5.897282697311449e-07, "loss": 0.9655, "step": 11068 }, { "epoch": 1.5670701493593828, "grad_norm": 9.081607853034013, "learning_rate": 5.8935855600389e-07, "loss": 0.9448, "step": 11069 }, { "epoch": 1.56721172223402, "grad_norm": 8.919432831938197, "learning_rate": 5.889889427176318e-07, "loss": 1.0529, "step": 11070 }, { "epoch": 1.5673532951086573, "grad_norm": 9.816093807204368, "learning_rate": 5.886194298917994e-07, "loss": 0.9949, "step": 11071 }, { "epoch": 1.5674948679832945, "grad_norm": 8.070171639320067, "learning_rate": 5.882500175458198e-07, "loss": 0.9511, "step": 11072 }, { "epoch": 1.5676364408579317, "grad_norm": 10.178349229112998, "learning_rate": 5.878807056991098e-07, "loss": 1.1206, "step": 11073 }, { "epoch": 1.567778013732569, "grad_norm": 9.204555743518906, "learning_rate": 5.87511494371086e-07, "loss": 0.8781, "step": 11074 }, { "epoch": 1.5679195866072062, "grad_norm": 9.598216256683248, "learning_rate": 5.871423835811566e-07, "loss": 0.9609, "step": 11075 }, { "epoch": 1.5680611594818434, "grad_norm": 9.28617096501062, "learning_rate": 5.867733733487255e-07, "loss": 0.9459, "step": 11076 }, { "epoch": 1.5682027323564804, "grad_norm": 10.411943590700897, "learning_rate": 5.864044636931923e-07, "loss": 0.945, "step": 11077 }, { "epoch": 1.5683443052311177, "grad_norm": 8.041766335721267, "learning_rate": 5.86035654633949e-07, "loss": 0.9544, "step": 11078 }, { "epoch": 1.568485878105755, "grad_norm": 9.88264188958466, "learning_rate": 5.85666946190385e-07, "loss": 1.0569, "step": 11079 }, { "epoch": 1.5686274509803921, "grad_norm": 9.029974296254426, "learning_rate": 5.852983383818813e-07, "loss": 0.9961, "step": 11080 }, { "epoch": 1.5687690238550294, "grad_norm": 9.776297683628798, "learning_rate": 5.84929831227817e-07, "loss": 1.0524, "step": 11081 }, { "epoch": 1.5689105967296666, "grad_norm": 10.950808727760347, "learning_rate": 5.845614247475637e-07, "loss": 0.9419, "step": 11082 }, { "epoch": 1.5690521696043038, "grad_norm": 10.479963167431585, "learning_rate": 5.841931189604874e-07, "loss": 1.0495, "step": 11083 }, { "epoch": 1.569193742478941, "grad_norm": 7.880833962551964, "learning_rate": 5.838249138859509e-07, "loss": 0.9553, "step": 11084 }, { "epoch": 1.5693353153535783, "grad_norm": 8.388708936545022, "learning_rate": 5.834568095433093e-07, "loss": 1.0586, "step": 11085 }, { "epoch": 1.5694768882282155, "grad_norm": 10.851384270607086, "learning_rate": 5.830888059519149e-07, "loss": 0.9221, "step": 11086 }, { "epoch": 1.5696184611028527, "grad_norm": 9.622796140748605, "learning_rate": 5.827209031311121e-07, "loss": 1.0704, "step": 11087 }, { "epoch": 1.5697600339774898, "grad_norm": 9.412702508092954, "learning_rate": 5.823531011002423e-07, "loss": 0.9042, "step": 11088 }, { "epoch": 1.569901606852127, "grad_norm": 9.363416509244152, "learning_rate": 5.819853998786395e-07, "loss": 1.0665, "step": 11089 }, { "epoch": 1.5700431797267642, "grad_norm": 9.68866511075793, "learning_rate": 5.816177994856347e-07, "loss": 0.9937, "step": 11090 }, { "epoch": 1.5701847526014014, "grad_norm": 11.933577766969657, "learning_rate": 5.812502999405514e-07, "loss": 0.9598, "step": 11091 }, { "epoch": 1.5703263254760387, "grad_norm": 10.37265686784475, "learning_rate": 5.80882901262709e-07, "loss": 1.0108, "step": 11092 }, { "epoch": 1.570467898350676, "grad_norm": 9.248401400609561, "learning_rate": 5.805156034714227e-07, "loss": 1.032, "step": 11093 }, { "epoch": 1.5706094712253131, "grad_norm": 9.964100294443767, "learning_rate": 5.801484065859989e-07, "loss": 1.0037, "step": 11094 }, { "epoch": 1.5707510440999504, "grad_norm": 8.830775148404566, "learning_rate": 5.797813106257422e-07, "loss": 1.0082, "step": 11095 }, { "epoch": 1.5708926169745876, "grad_norm": 10.328624349691069, "learning_rate": 5.794143156099497e-07, "loss": 1.0765, "step": 11096 }, { "epoch": 1.5710341898492248, "grad_norm": 11.330444266892462, "learning_rate": 5.79047421557915e-07, "loss": 1.1613, "step": 11097 }, { "epoch": 1.571175762723862, "grad_norm": 8.442892428318174, "learning_rate": 5.786806284889246e-07, "loss": 0.9902, "step": 11098 }, { "epoch": 1.5713173355984993, "grad_norm": 9.229068890492107, "learning_rate": 5.783139364222609e-07, "loss": 0.9467, "step": 11099 }, { "epoch": 1.5714589084731365, "grad_norm": 9.392541369300613, "learning_rate": 5.779473453772017e-07, "loss": 0.9645, "step": 11100 }, { "epoch": 1.5716004813477737, "grad_norm": 11.099023559512313, "learning_rate": 5.775808553730164e-07, "loss": 1.0583, "step": 11101 }, { "epoch": 1.571742054222411, "grad_norm": 8.639161356737928, "learning_rate": 5.772144664289728e-07, "loss": 0.8095, "step": 11102 }, { "epoch": 1.5718836270970482, "grad_norm": 9.281950747562485, "learning_rate": 5.768481785643309e-07, "loss": 1.0259, "step": 11103 }, { "epoch": 1.5720251999716854, "grad_norm": 9.285387549049052, "learning_rate": 5.764819917983458e-07, "loss": 0.8707, "step": 11104 }, { "epoch": 1.5721667728463227, "grad_norm": 9.253768565617774, "learning_rate": 5.761159061502688e-07, "loss": 0.9377, "step": 11105 }, { "epoch": 1.57230834572096, "grad_norm": 9.173719802226664, "learning_rate": 5.757499216393433e-07, "loss": 0.9335, "step": 11106 }, { "epoch": 1.5724499185955971, "grad_norm": 9.01140041971358, "learning_rate": 5.753840382848105e-07, "loss": 0.9058, "step": 11107 }, { "epoch": 1.5725914914702344, "grad_norm": 8.461274285627496, "learning_rate": 5.750182561059031e-07, "loss": 0.9629, "step": 11108 }, { "epoch": 1.5727330643448716, "grad_norm": 10.001947213372105, "learning_rate": 5.746525751218512e-07, "loss": 0.9247, "step": 11109 }, { "epoch": 1.5728746372195088, "grad_norm": 9.800315338043855, "learning_rate": 5.742869953518773e-07, "loss": 0.9844, "step": 11110 }, { "epoch": 1.573016210094146, "grad_norm": 9.904868622264326, "learning_rate": 5.739215168152007e-07, "loss": 0.9594, "step": 11111 }, { "epoch": 1.5731577829687833, "grad_norm": 9.406599982458086, "learning_rate": 5.735561395310333e-07, "loss": 0.9334, "step": 11112 }, { "epoch": 1.5732993558434205, "grad_norm": 10.939382859698906, "learning_rate": 5.731908635185837e-07, "loss": 1.0812, "step": 11113 }, { "epoch": 1.5734409287180577, "grad_norm": 7.789863935464896, "learning_rate": 5.728256887970537e-07, "loss": 0.9819, "step": 11114 }, { "epoch": 1.573582501592695, "grad_norm": 8.965033103256255, "learning_rate": 5.724606153856396e-07, "loss": 1.0218, "step": 11115 }, { "epoch": 1.5737240744673322, "grad_norm": 11.510730546955278, "learning_rate": 5.720956433035346e-07, "loss": 1.1036, "step": 11116 }, { "epoch": 1.5738656473419694, "grad_norm": 9.010220552342515, "learning_rate": 5.717307725699234e-07, "loss": 0.9885, "step": 11117 }, { "epoch": 1.5740072202166067, "grad_norm": 10.508651075971809, "learning_rate": 5.713660032039884e-07, "loss": 1.0222, "step": 11118 }, { "epoch": 1.5741487930912437, "grad_norm": 9.015519642338244, "learning_rate": 5.710013352249039e-07, "loss": 0.8769, "step": 11119 }, { "epoch": 1.574290365965881, "grad_norm": 8.897133320527583, "learning_rate": 5.706367686518414e-07, "loss": 0.9199, "step": 11120 }, { "epoch": 1.5744319388405181, "grad_norm": 8.929684936843465, "learning_rate": 5.702723035039648e-07, "loss": 0.9325, "step": 11121 }, { "epoch": 1.5745735117151554, "grad_norm": 10.068794508965288, "learning_rate": 5.699079398004342e-07, "loss": 1.0588, "step": 11122 }, { "epoch": 1.5747150845897926, "grad_norm": 10.780247915386154, "learning_rate": 5.695436775604049e-07, "loss": 0.998, "step": 11123 }, { "epoch": 1.5748566574644298, "grad_norm": 10.213698698967052, "learning_rate": 5.691795168030242e-07, "loss": 0.9574, "step": 11124 }, { "epoch": 1.574998230339067, "grad_norm": 7.539129290507252, "learning_rate": 5.688154575474384e-07, "loss": 0.9325, "step": 11125 }, { "epoch": 1.5751398032137043, "grad_norm": 9.274784186389544, "learning_rate": 5.684514998127822e-07, "loss": 1.0004, "step": 11126 }, { "epoch": 1.5752813760883415, "grad_norm": 10.697803906543228, "learning_rate": 5.680876436181907e-07, "loss": 0.9314, "step": 11127 }, { "epoch": 1.5754229489629787, "grad_norm": 9.67513347208382, "learning_rate": 5.677238889827918e-07, "loss": 0.9936, "step": 11128 }, { "epoch": 1.5755645218376158, "grad_norm": 8.106537015447008, "learning_rate": 5.673602359257069e-07, "loss": 0.9865, "step": 11129 }, { "epoch": 1.575706094712253, "grad_norm": 8.982106965764412, "learning_rate": 5.669966844660538e-07, "loss": 1.026, "step": 11130 }, { "epoch": 1.5758476675868902, "grad_norm": 9.573313464442526, "learning_rate": 5.66633234622943e-07, "loss": 0.9691, "step": 11131 }, { "epoch": 1.5759892404615274, "grad_norm": 10.012807655132095, "learning_rate": 5.662698864154823e-07, "loss": 1.0006, "step": 11132 }, { "epoch": 1.5761308133361647, "grad_norm": 8.84602763363418, "learning_rate": 5.65906639862771e-07, "loss": 1.1026, "step": 11133 }, { "epoch": 1.576272386210802, "grad_norm": 9.214861550825324, "learning_rate": 5.655434949839061e-07, "loss": 0.9035, "step": 11134 }, { "epoch": 1.5764139590854391, "grad_norm": 8.920817137098748, "learning_rate": 5.651804517979775e-07, "loss": 0.9252, "step": 11135 }, { "epoch": 1.5765555319600764, "grad_norm": 11.138519302117869, "learning_rate": 5.648175103240694e-07, "loss": 0.9981, "step": 11136 }, { "epoch": 1.5766971048347136, "grad_norm": 9.852175833405722, "learning_rate": 5.64454670581262e-07, "loss": 0.8099, "step": 11137 }, { "epoch": 1.5768386777093508, "grad_norm": 8.4225566637687, "learning_rate": 5.64091932588629e-07, "loss": 0.8763, "step": 11138 }, { "epoch": 1.576980250583988, "grad_norm": 8.693180559762284, "learning_rate": 5.637292963652405e-07, "loss": 0.9491, "step": 11139 }, { "epoch": 1.5771218234586253, "grad_norm": 9.778824673103854, "learning_rate": 5.63366761930158e-07, "loss": 0.9834, "step": 11140 }, { "epoch": 1.5772633963332625, "grad_norm": 9.625173988255964, "learning_rate": 5.630043293024418e-07, "loss": 1.0759, "step": 11141 }, { "epoch": 1.5774049692078997, "grad_norm": 10.00867657947636, "learning_rate": 5.62641998501143e-07, "loss": 0.9334, "step": 11142 }, { "epoch": 1.577546542082537, "grad_norm": 8.80211140705274, "learning_rate": 5.622797695453106e-07, "loss": 1.0084, "step": 11143 }, { "epoch": 1.5776881149571742, "grad_norm": 8.079652033196668, "learning_rate": 5.619176424539849e-07, "loss": 0.9529, "step": 11144 }, { "epoch": 1.5778296878318114, "grad_norm": 9.771050836437306, "learning_rate": 5.615556172462039e-07, "loss": 0.9839, "step": 11145 }, { "epoch": 1.5779712607064487, "grad_norm": 9.260296014458211, "learning_rate": 5.611936939409998e-07, "loss": 0.9247, "step": 11146 }, { "epoch": 1.578112833581086, "grad_norm": 10.129004922629678, "learning_rate": 5.608318725573964e-07, "loss": 0.9801, "step": 11147 }, { "epoch": 1.5782544064557231, "grad_norm": 9.720928181102277, "learning_rate": 5.604701531144164e-07, "loss": 0.9917, "step": 11148 }, { "epoch": 1.5783959793303604, "grad_norm": 7.634676187439815, "learning_rate": 5.601085356310734e-07, "loss": 0.9388, "step": 11149 }, { "epoch": 1.5785375522049976, "grad_norm": 9.491779183604752, "learning_rate": 5.597470201263783e-07, "loss": 1.0232, "step": 11150 }, { "epoch": 1.5786791250796348, "grad_norm": 11.184632344041834, "learning_rate": 5.593856066193362e-07, "loss": 1.0255, "step": 11151 }, { "epoch": 1.578820697954272, "grad_norm": 8.855901666331356, "learning_rate": 5.590242951289451e-07, "loss": 1.0595, "step": 11152 }, { "epoch": 1.5789622708289093, "grad_norm": 9.82498754447167, "learning_rate": 5.586630856742004e-07, "loss": 0.9797, "step": 11153 }, { "epoch": 1.5791038437035465, "grad_norm": 9.798393460373843, "learning_rate": 5.58301978274089e-07, "loss": 1.0323, "step": 11154 }, { "epoch": 1.5792454165781837, "grad_norm": 8.385279481764737, "learning_rate": 5.579409729475954e-07, "loss": 0.9289, "step": 11155 }, { "epoch": 1.579386989452821, "grad_norm": 10.197781295319485, "learning_rate": 5.575800697136968e-07, "loss": 0.9545, "step": 11156 }, { "epoch": 1.5795285623274582, "grad_norm": 9.719406252964028, "learning_rate": 5.572192685913652e-07, "loss": 0.9169, "step": 11157 }, { "epoch": 1.5796701352020954, "grad_norm": 10.807401833336588, "learning_rate": 5.568585695995684e-07, "loss": 1.0688, "step": 11158 }, { "epoch": 1.5798117080767327, "grad_norm": 10.13357708119148, "learning_rate": 5.564979727572673e-07, "loss": 0.9404, "step": 11159 }, { "epoch": 1.5799532809513697, "grad_norm": 10.909803101382606, "learning_rate": 5.561374780834192e-07, "loss": 1.0362, "step": 11160 }, { "epoch": 1.580094853826007, "grad_norm": 8.785598275618279, "learning_rate": 5.557770855969738e-07, "loss": 1.0107, "step": 11161 }, { "epoch": 1.5802364267006441, "grad_norm": 9.536346773793362, "learning_rate": 5.554167953168779e-07, "loss": 0.8931, "step": 11162 }, { "epoch": 1.5803779995752814, "grad_norm": 10.065468676021437, "learning_rate": 5.550566072620705e-07, "loss": 0.9747, "step": 11163 }, { "epoch": 1.5805195724499186, "grad_norm": 9.601935557108842, "learning_rate": 5.54696521451488e-07, "loss": 0.9786, "step": 11164 }, { "epoch": 1.5806611453245558, "grad_norm": 10.314772846718174, "learning_rate": 5.54336537904058e-07, "loss": 0.9573, "step": 11165 }, { "epoch": 1.580802718199193, "grad_norm": 8.126901726811152, "learning_rate": 5.539766566387053e-07, "loss": 0.9301, "step": 11166 }, { "epoch": 1.5809442910738303, "grad_norm": 9.847776721669886, "learning_rate": 5.536168776743503e-07, "loss": 1.0656, "step": 11167 }, { "epoch": 1.5810858639484675, "grad_norm": 9.136289536733358, "learning_rate": 5.532572010299034e-07, "loss": 1.0218, "step": 11168 }, { "epoch": 1.5812274368231047, "grad_norm": 8.005679022197013, "learning_rate": 5.528976267242745e-07, "loss": 0.9856, "step": 11169 }, { "epoch": 1.581369009697742, "grad_norm": 10.354979904648951, "learning_rate": 5.525381547763647e-07, "loss": 1.0614, "step": 11170 }, { "epoch": 1.581510582572379, "grad_norm": 10.28582865787678, "learning_rate": 5.52178785205073e-07, "loss": 0.9737, "step": 11171 }, { "epoch": 1.5816521554470162, "grad_norm": 9.792181079960397, "learning_rate": 5.518195180292893e-07, "loss": 0.9766, "step": 11172 }, { "epoch": 1.5817937283216534, "grad_norm": 8.493543528895705, "learning_rate": 5.514603532679011e-07, "loss": 0.9312, "step": 11173 }, { "epoch": 1.5819353011962907, "grad_norm": 7.969560409924841, "learning_rate": 5.511012909397898e-07, "loss": 0.8787, "step": 11174 }, { "epoch": 1.582076874070928, "grad_norm": 7.570098338527742, "learning_rate": 5.507423310638299e-07, "loss": 0.9848, "step": 11175 }, { "epoch": 1.5822184469455651, "grad_norm": 10.85006626653871, "learning_rate": 5.503834736588929e-07, "loss": 1.0832, "step": 11176 }, { "epoch": 1.5823600198202024, "grad_norm": 9.534778179595056, "learning_rate": 5.500247187438429e-07, "loss": 0.9899, "step": 11177 }, { "epoch": 1.5825015926948396, "grad_norm": 10.468750364388987, "learning_rate": 5.496660663375389e-07, "loss": 0.9681, "step": 11178 }, { "epoch": 1.5826431655694768, "grad_norm": 10.400871284374261, "learning_rate": 5.49307516458836e-07, "loss": 0.9448, "step": 11179 }, { "epoch": 1.582784738444114, "grad_norm": 10.162120824217556, "learning_rate": 5.489490691265819e-07, "loss": 0.9497, "step": 11180 }, { "epoch": 1.5829263113187513, "grad_norm": 9.361855205339495, "learning_rate": 5.485907243596214e-07, "loss": 0.9158, "step": 11181 }, { "epoch": 1.5830678841933885, "grad_norm": 8.753280460675425, "learning_rate": 5.482324821767904e-07, "loss": 0.9433, "step": 11182 }, { "epoch": 1.5832094570680257, "grad_norm": 10.320180171606214, "learning_rate": 5.478743425969235e-07, "loss": 0.9081, "step": 11183 }, { "epoch": 1.583351029942663, "grad_norm": 10.198264583982038, "learning_rate": 5.47516305638846e-07, "loss": 1.026, "step": 11184 }, { "epoch": 1.5834926028173002, "grad_norm": 10.054265604007957, "learning_rate": 5.471583713213812e-07, "loss": 0.9333, "step": 11185 }, { "epoch": 1.5836341756919374, "grad_norm": 8.636410449325505, "learning_rate": 5.468005396633442e-07, "loss": 0.9974, "step": 11186 }, { "epoch": 1.5837757485665747, "grad_norm": 8.93012408026155, "learning_rate": 5.464428106835467e-07, "loss": 0.9067, "step": 11187 }, { "epoch": 1.583917321441212, "grad_norm": 10.313480954764831, "learning_rate": 5.460851844007945e-07, "loss": 1.0287, "step": 11188 }, { "epoch": 1.5840588943158491, "grad_norm": 6.38031801298159, "learning_rate": 5.457276608338862e-07, "loss": 0.8505, "step": 11189 }, { "epoch": 1.5842004671904863, "grad_norm": 9.25660469771639, "learning_rate": 5.453702400016186e-07, "loss": 0.8442, "step": 11190 }, { "epoch": 1.5843420400651236, "grad_norm": 8.9333674340048, "learning_rate": 5.450129219227792e-07, "loss": 0.9508, "step": 11191 }, { "epoch": 1.5844836129397608, "grad_norm": 8.687895621752869, "learning_rate": 5.446557066161537e-07, "loss": 0.9727, "step": 11192 }, { "epoch": 1.584625185814398, "grad_norm": 10.092963126014379, "learning_rate": 5.442985941005188e-07, "loss": 1.0242, "step": 11193 }, { "epoch": 1.5847667586890353, "grad_norm": 9.781708630421821, "learning_rate": 5.439415843946493e-07, "loss": 0.9958, "step": 11194 }, { "epoch": 1.5849083315636725, "grad_norm": 9.150573845884441, "learning_rate": 5.435846775173115e-07, "loss": 1.0655, "step": 11195 }, { "epoch": 1.5850499044383097, "grad_norm": 8.098283713027081, "learning_rate": 5.432278734872687e-07, "loss": 0.943, "step": 11196 }, { "epoch": 1.585191477312947, "grad_norm": 9.240468656206142, "learning_rate": 5.428711723232779e-07, "loss": 1.0024, "step": 11197 }, { "epoch": 1.5853330501875842, "grad_norm": 8.997964416881238, "learning_rate": 5.425145740440896e-07, "loss": 1.0445, "step": 11198 }, { "epoch": 1.5854746230622214, "grad_norm": 9.27547760836504, "learning_rate": 5.421580786684522e-07, "loss": 0.9562, "step": 11199 }, { "epoch": 1.5856161959368587, "grad_norm": 8.276230237159703, "learning_rate": 5.418016862151032e-07, "loss": 0.847, "step": 11200 }, { "epoch": 1.5857577688114959, "grad_norm": 10.652398540494406, "learning_rate": 5.414453967027797e-07, "loss": 0.9886, "step": 11201 }, { "epoch": 1.585899341686133, "grad_norm": 8.64873699162444, "learning_rate": 5.410892101502119e-07, "loss": 0.9343, "step": 11202 }, { "epoch": 1.5860409145607701, "grad_norm": 10.22407528303007, "learning_rate": 5.407331265761229e-07, "loss": 1.1703, "step": 11203 }, { "epoch": 1.5861824874354074, "grad_norm": 10.561376748937981, "learning_rate": 5.403771459992333e-07, "loss": 0.9298, "step": 11204 }, { "epoch": 1.5863240603100446, "grad_norm": 10.435657087254901, "learning_rate": 5.400212684382553e-07, "loss": 0.9945, "step": 11205 }, { "epoch": 1.5864656331846818, "grad_norm": 10.181535531121245, "learning_rate": 5.396654939118984e-07, "loss": 0.9668, "step": 11206 }, { "epoch": 1.586607206059319, "grad_norm": 9.506389276310639, "learning_rate": 5.393098224388643e-07, "loss": 1.0025, "step": 11207 }, { "epoch": 1.5867487789339563, "grad_norm": 9.035017765367582, "learning_rate": 5.389542540378515e-07, "loss": 0.9854, "step": 11208 }, { "epoch": 1.5868903518085935, "grad_norm": 10.773921875441877, "learning_rate": 5.385987887275512e-07, "loss": 1.0502, "step": 11209 }, { "epoch": 1.5870319246832307, "grad_norm": 10.252659522422212, "learning_rate": 5.382434265266495e-07, "loss": 0.9135, "step": 11210 }, { "epoch": 1.587173497557868, "grad_norm": 10.516694738609544, "learning_rate": 5.378881674538288e-07, "loss": 0.963, "step": 11211 }, { "epoch": 1.587315070432505, "grad_norm": 8.058010536942513, "learning_rate": 5.375330115277635e-07, "loss": 0.9602, "step": 11212 }, { "epoch": 1.5874566433071422, "grad_norm": 8.771874252009454, "learning_rate": 5.371779587671252e-07, "loss": 0.9035, "step": 11213 }, { "epoch": 1.5875982161817794, "grad_norm": 9.466826089224059, "learning_rate": 5.368230091905774e-07, "loss": 1.0542, "step": 11214 }, { "epoch": 1.5877397890564167, "grad_norm": 9.705233579769764, "learning_rate": 5.364681628167806e-07, "loss": 0.9635, "step": 11215 }, { "epoch": 1.587881361931054, "grad_norm": 7.910704070536425, "learning_rate": 5.36113419664388e-07, "loss": 1.0044, "step": 11216 }, { "epoch": 1.5880229348056911, "grad_norm": 9.362913758693624, "learning_rate": 5.357587797520491e-07, "loss": 0.874, "step": 11217 }, { "epoch": 1.5881645076803284, "grad_norm": 9.403184359494013, "learning_rate": 5.354042430984061e-07, "loss": 0.9357, "step": 11218 }, { "epoch": 1.5883060805549656, "grad_norm": 8.875045561337718, "learning_rate": 5.350498097220972e-07, "loss": 0.9519, "step": 11219 }, { "epoch": 1.5884476534296028, "grad_norm": 10.232624865267992, "learning_rate": 5.346954796417558e-07, "loss": 0.9909, "step": 11220 }, { "epoch": 1.58858922630424, "grad_norm": 9.540350099933276, "learning_rate": 5.343412528760064e-07, "loss": 0.9636, "step": 11221 }, { "epoch": 1.5887307991788773, "grad_norm": 9.99525148183169, "learning_rate": 5.339871294434724e-07, "loss": 1.1069, "step": 11222 }, { "epoch": 1.5888723720535145, "grad_norm": 12.597280759907882, "learning_rate": 5.336331093627683e-07, "loss": 1.0425, "step": 11223 }, { "epoch": 1.5890139449281517, "grad_norm": 10.035209848655033, "learning_rate": 5.332791926525055e-07, "loss": 1.0065, "step": 11224 }, { "epoch": 1.589155517802789, "grad_norm": 9.783819379110922, "learning_rate": 5.329253793312897e-07, "loss": 1.0242, "step": 11225 }, { "epoch": 1.5892970906774262, "grad_norm": 8.597456587016012, "learning_rate": 5.325716694177194e-07, "loss": 1.0197, "step": 11226 }, { "epoch": 1.5894386635520634, "grad_norm": 9.182444316758534, "learning_rate": 5.322180629303902e-07, "loss": 0.9704, "step": 11227 }, { "epoch": 1.5895802364267007, "grad_norm": 10.907915135802225, "learning_rate": 5.318645598878894e-07, "loss": 0.9941, "step": 11228 }, { "epoch": 1.5897218093013379, "grad_norm": 8.408914703847461, "learning_rate": 5.315111603088019e-07, "loss": 0.9859, "step": 11229 }, { "epoch": 1.5898633821759751, "grad_norm": 10.518445479846344, "learning_rate": 5.311578642117049e-07, "loss": 0.9922, "step": 11230 }, { "epoch": 1.5900049550506123, "grad_norm": 8.289833313447629, "learning_rate": 5.308046716151705e-07, "loss": 0.9331, "step": 11231 }, { "epoch": 1.5901465279252496, "grad_norm": 10.653384005060428, "learning_rate": 5.304515825377666e-07, "loss": 1.0366, "step": 11232 }, { "epoch": 1.5902881007998868, "grad_norm": 11.477373503509524, "learning_rate": 5.300985969980537e-07, "loss": 1.1637, "step": 11233 }, { "epoch": 1.590429673674524, "grad_norm": 9.388903264844172, "learning_rate": 5.297457150145898e-07, "loss": 1.0019, "step": 11234 }, { "epoch": 1.5905712465491613, "grad_norm": 10.681046528493717, "learning_rate": 5.293929366059236e-07, "loss": 1.1066, "step": 11235 }, { "epoch": 1.5907128194237985, "grad_norm": 7.654712008709361, "learning_rate": 5.290402617906021e-07, "loss": 0.9346, "step": 11236 }, { "epoch": 1.5908543922984357, "grad_norm": 8.588739661592344, "learning_rate": 5.286876905871638e-07, "loss": 0.9497, "step": 11237 }, { "epoch": 1.590995965173073, "grad_norm": 10.527886914906828, "learning_rate": 5.283352230141445e-07, "loss": 1.0091, "step": 11238 }, { "epoch": 1.5911375380477102, "grad_norm": 8.782564438155383, "learning_rate": 5.279828590900715e-07, "loss": 0.9532, "step": 11239 }, { "epoch": 1.5912791109223474, "grad_norm": 9.615361130905816, "learning_rate": 5.276305988334701e-07, "loss": 0.9314, "step": 11240 }, { "epoch": 1.5914206837969846, "grad_norm": 11.251063656856827, "learning_rate": 5.272784422628574e-07, "loss": 0.9833, "step": 11241 }, { "epoch": 1.5915622566716219, "grad_norm": 8.481464596354733, "learning_rate": 5.269263893967453e-07, "loss": 0.8925, "step": 11242 }, { "epoch": 1.5917038295462589, "grad_norm": 11.409375240730277, "learning_rate": 5.265744402536424e-07, "loss": 0.9975, "step": 11243 }, { "epoch": 1.5918454024208961, "grad_norm": 9.05790941450762, "learning_rate": 5.262225948520491e-07, "loss": 0.9307, "step": 11244 }, { "epoch": 1.5919869752955333, "grad_norm": 8.588534017219784, "learning_rate": 5.258708532104631e-07, "loss": 0.8719, "step": 11245 }, { "epoch": 1.5921285481701706, "grad_norm": 9.58837501481897, "learning_rate": 5.255192153473734e-07, "loss": 0.8895, "step": 11246 }, { "epoch": 1.5922701210448078, "grad_norm": 9.29730664701729, "learning_rate": 5.251676812812664e-07, "loss": 0.9659, "step": 11247 }, { "epoch": 1.592411693919445, "grad_norm": 10.74774785070364, "learning_rate": 5.248162510306229e-07, "loss": 0.9839, "step": 11248 }, { "epoch": 1.5925532667940823, "grad_norm": 8.056179200801687, "learning_rate": 5.244649246139152e-07, "loss": 0.8608, "step": 11249 }, { "epoch": 1.5926948396687195, "grad_norm": 12.011132480611346, "learning_rate": 5.241137020496142e-07, "loss": 1.0858, "step": 11250 }, { "epoch": 1.5928364125433567, "grad_norm": 10.291038421935749, "learning_rate": 5.237625833561821e-07, "loss": 1.0057, "step": 11251 }, { "epoch": 1.592977985417994, "grad_norm": 9.186534597317687, "learning_rate": 5.234115685520788e-07, "loss": 0.9673, "step": 11252 }, { "epoch": 1.593119558292631, "grad_norm": 9.336064983439222, "learning_rate": 5.23060657655754e-07, "loss": 1.0473, "step": 11253 }, { "epoch": 1.5932611311672682, "grad_norm": 9.525640719433138, "learning_rate": 5.227098506856563e-07, "loss": 0.9043, "step": 11254 }, { "epoch": 1.5934027040419054, "grad_norm": 11.648853399475028, "learning_rate": 5.223591476602283e-07, "loss": 0.9924, "step": 11255 }, { "epoch": 1.5935442769165427, "grad_norm": 9.203715229375446, "learning_rate": 5.220085485979046e-07, "loss": 0.9014, "step": 11256 }, { "epoch": 1.5936858497911799, "grad_norm": 9.795743788733539, "learning_rate": 5.216580535171173e-07, "loss": 0.9111, "step": 11257 }, { "epoch": 1.5938274226658171, "grad_norm": 12.134567418759916, "learning_rate": 5.213076624362903e-07, "loss": 1.0633, "step": 11258 }, { "epoch": 1.5939689955404543, "grad_norm": 7.855023918285414, "learning_rate": 5.209573753738448e-07, "loss": 0.8751, "step": 11259 }, { "epoch": 1.5941105684150916, "grad_norm": 9.692037983869117, "learning_rate": 5.206071923481937e-07, "loss": 0.954, "step": 11260 }, { "epoch": 1.5942521412897288, "grad_norm": 8.77519870109232, "learning_rate": 5.202571133777474e-07, "loss": 0.8821, "step": 11261 }, { "epoch": 1.594393714164366, "grad_norm": 10.124513214089141, "learning_rate": 5.199071384809085e-07, "loss": 0.9866, "step": 11262 }, { "epoch": 1.5945352870390033, "grad_norm": 11.395531538198906, "learning_rate": 5.19557267676074e-07, "loss": 0.9898, "step": 11263 }, { "epoch": 1.5946768599136405, "grad_norm": 10.089285698853312, "learning_rate": 5.192075009816381e-07, "loss": 1.0521, "step": 11264 }, { "epoch": 1.5948184327882777, "grad_norm": 9.968997964196614, "learning_rate": 5.188578384159862e-07, "loss": 0.9447, "step": 11265 }, { "epoch": 1.594960005662915, "grad_norm": 9.717972015937228, "learning_rate": 5.185082799975013e-07, "loss": 0.9334, "step": 11266 }, { "epoch": 1.5951015785375522, "grad_norm": 8.691522120014152, "learning_rate": 5.18158825744558e-07, "loss": 0.9523, "step": 11267 }, { "epoch": 1.5952431514121894, "grad_norm": 9.660882030206274, "learning_rate": 5.17809475675528e-07, "loss": 1.0605, "step": 11268 }, { "epoch": 1.5953847242868266, "grad_norm": 8.465747258685981, "learning_rate": 5.174602298087755e-07, "loss": 0.8618, "step": 11269 }, { "epoch": 1.5955262971614639, "grad_norm": 9.935156203924864, "learning_rate": 5.171110881626604e-07, "loss": 1.0468, "step": 11270 }, { "epoch": 1.595667870036101, "grad_norm": 8.902416595522498, "learning_rate": 5.167620507555373e-07, "loss": 0.9345, "step": 11271 }, { "epoch": 1.5958094429107383, "grad_norm": 9.262285064798105, "learning_rate": 5.164131176057541e-07, "loss": 0.9217, "step": 11272 }, { "epoch": 1.5959510157853756, "grad_norm": 9.349439030918973, "learning_rate": 5.160642887316555e-07, "loss": 1.0374, "step": 11273 }, { "epoch": 1.5960925886600128, "grad_norm": 8.78396249601213, "learning_rate": 5.157155641515766e-07, "loss": 0.9764, "step": 11274 }, { "epoch": 1.59623416153465, "grad_norm": 9.409799508078084, "learning_rate": 5.153669438838507e-07, "loss": 1.0268, "step": 11275 }, { "epoch": 1.5963757344092873, "grad_norm": 10.086089543054227, "learning_rate": 5.150184279468057e-07, "loss": 0.8831, "step": 11276 }, { "epoch": 1.5965173072839245, "grad_norm": 8.558972154032054, "learning_rate": 5.146700163587612e-07, "loss": 0.9182, "step": 11277 }, { "epoch": 1.5966588801585617, "grad_norm": 8.789026909650163, "learning_rate": 5.143217091380343e-07, "loss": 0.8448, "step": 11278 }, { "epoch": 1.596800453033199, "grad_norm": 10.156391976171106, "learning_rate": 5.139735063029338e-07, "loss": 1.0539, "step": 11279 }, { "epoch": 1.5969420259078362, "grad_norm": 8.790857455600534, "learning_rate": 5.136254078717659e-07, "loss": 1.0294, "step": 11280 }, { "epoch": 1.5970835987824734, "grad_norm": 8.03455566772874, "learning_rate": 5.132774138628286e-07, "loss": 0.9832, "step": 11281 }, { "epoch": 1.5972251716571106, "grad_norm": 9.905932689121517, "learning_rate": 5.129295242944168e-07, "loss": 1.0115, "step": 11282 }, { "epoch": 1.5973667445317479, "grad_norm": 12.404198926209485, "learning_rate": 5.125817391848187e-07, "loss": 1.0297, "step": 11283 }, { "epoch": 1.5975083174063849, "grad_norm": 9.829395691156213, "learning_rate": 5.122340585523156e-07, "loss": 0.9234, "step": 11284 }, { "epoch": 1.5976498902810221, "grad_norm": 7.888680579191782, "learning_rate": 5.118864824151868e-07, "loss": 0.8931, "step": 11285 }, { "epoch": 1.5977914631556593, "grad_norm": 8.766422228674948, "learning_rate": 5.115390107917024e-07, "loss": 1.0384, "step": 11286 }, { "epoch": 1.5979330360302966, "grad_norm": 10.005227057965516, "learning_rate": 5.111916437001302e-07, "loss": 0.9741, "step": 11287 }, { "epoch": 1.5980746089049338, "grad_norm": 8.884517670410693, "learning_rate": 5.1084438115873e-07, "loss": 0.9016, "step": 11288 }, { "epoch": 1.598216181779571, "grad_norm": 11.069797328895955, "learning_rate": 5.104972231857577e-07, "loss": 0.9878, "step": 11289 }, { "epoch": 1.5983577546542083, "grad_norm": 8.609414877686683, "learning_rate": 5.101501697994626e-07, "loss": 0.9294, "step": 11290 }, { "epoch": 1.5984993275288455, "grad_norm": 12.464193711505358, "learning_rate": 5.098032210180901e-07, "loss": 0.8722, "step": 11291 }, { "epoch": 1.5986409004034827, "grad_norm": 10.866138223854064, "learning_rate": 5.094563768598773e-07, "loss": 1.1141, "step": 11292 }, { "epoch": 1.59878247327812, "grad_norm": 9.075440316775786, "learning_rate": 5.091096373430588e-07, "loss": 0.9746, "step": 11293 }, { "epoch": 1.5989240461527572, "grad_norm": 10.018699709646473, "learning_rate": 5.087630024858637e-07, "loss": 0.9266, "step": 11294 }, { "epoch": 1.5990656190273942, "grad_norm": 8.79384548674948, "learning_rate": 5.084164723065111e-07, "loss": 0.9845, "step": 11295 }, { "epoch": 1.5992071919020314, "grad_norm": 10.403169559785539, "learning_rate": 5.080700468232206e-07, "loss": 1.0662, "step": 11296 }, { "epoch": 1.5993487647766687, "grad_norm": 10.309492932790148, "learning_rate": 5.077237260542014e-07, "loss": 1.0499, "step": 11297 }, { "epoch": 1.5994903376513059, "grad_norm": 7.694398839320402, "learning_rate": 5.073775100176609e-07, "loss": 0.8774, "step": 11298 }, { "epoch": 1.5996319105259431, "grad_norm": 9.912753019434934, "learning_rate": 5.070313987317992e-07, "loss": 0.9873, "step": 11299 }, { "epoch": 1.5997734834005803, "grad_norm": 9.46374218143437, "learning_rate": 5.066853922148104e-07, "loss": 1.0038, "step": 11300 }, { "epoch": 1.5999150562752176, "grad_norm": 10.346897533764349, "learning_rate": 5.063394904848851e-07, "loss": 0.9202, "step": 11301 }, { "epoch": 1.6000566291498548, "grad_norm": 9.359201369163648, "learning_rate": 5.059936935602052e-07, "loss": 1.0389, "step": 11302 }, { "epoch": 1.600198202024492, "grad_norm": 9.211528471155173, "learning_rate": 5.05648001458951e-07, "loss": 0.9754, "step": 11303 }, { "epoch": 1.6003397748991293, "grad_norm": 8.833129112713966, "learning_rate": 5.053024141992935e-07, "loss": 1.0471, "step": 11304 }, { "epoch": 1.6004813477737665, "grad_norm": 9.299638097140821, "learning_rate": 5.049569317994013e-07, "loss": 0.9084, "step": 11305 }, { "epoch": 1.6006229206484037, "grad_norm": 8.743616882139829, "learning_rate": 5.046115542774358e-07, "loss": 1.014, "step": 11306 }, { "epoch": 1.600764493523041, "grad_norm": 11.04486264213781, "learning_rate": 5.042662816515523e-07, "loss": 1.0566, "step": 11307 }, { "epoch": 1.6009060663976782, "grad_norm": 9.93695220397099, "learning_rate": 5.039211139399031e-07, "loss": 1.0597, "step": 11308 }, { "epoch": 1.6010476392723154, "grad_norm": 9.655500166831322, "learning_rate": 5.035760511606319e-07, "loss": 1.05, "step": 11309 }, { "epoch": 1.6011892121469526, "grad_norm": 9.2035606294128, "learning_rate": 5.032310933318798e-07, "loss": 1.0704, "step": 11310 }, { "epoch": 1.6013307850215899, "grad_norm": 8.402276103138894, "learning_rate": 5.028862404717796e-07, "loss": 0.9219, "step": 11311 }, { "epoch": 1.601472357896227, "grad_norm": 9.030832247988476, "learning_rate": 5.025414925984612e-07, "loss": 0.9958, "step": 11312 }, { "epoch": 1.6016139307708643, "grad_norm": 8.169416885764734, "learning_rate": 5.021968497300464e-07, "loss": 0.8955, "step": 11313 }, { "epoch": 1.6017555036455016, "grad_norm": 9.013702981054127, "learning_rate": 5.018523118846544e-07, "loss": 0.9711, "step": 11314 }, { "epoch": 1.6018970765201388, "grad_norm": 9.236943283133241, "learning_rate": 5.015078790803965e-07, "loss": 0.925, "step": 11315 }, { "epoch": 1.602038649394776, "grad_norm": 9.553379072180553, "learning_rate": 5.011635513353786e-07, "loss": 0.9241, "step": 11316 }, { "epoch": 1.6021802222694133, "grad_norm": 9.849223428699213, "learning_rate": 5.008193286677029e-07, "loss": 0.9637, "step": 11317 }, { "epoch": 1.6023217951440505, "grad_norm": 8.823651123703387, "learning_rate": 5.004752110954642e-07, "loss": 0.9366, "step": 11318 }, { "epoch": 1.6024633680186877, "grad_norm": 9.412184557245382, "learning_rate": 5.00131198636753e-07, "loss": 0.9206, "step": 11319 }, { "epoch": 1.602604940893325, "grad_norm": 10.108428203224578, "learning_rate": 4.997872913096529e-07, "loss": 0.9776, "step": 11320 }, { "epoch": 1.6027465137679622, "grad_norm": 8.22682077956972, "learning_rate": 4.994434891322436e-07, "loss": 0.8809, "step": 11321 }, { "epoch": 1.6028880866425994, "grad_norm": 8.309811723873802, "learning_rate": 4.99099792122599e-07, "loss": 0.9576, "step": 11322 }, { "epoch": 1.6030296595172366, "grad_norm": 8.869514166806194, "learning_rate": 4.987562002987858e-07, "loss": 1.0168, "step": 11323 }, { "epoch": 1.6031712323918739, "grad_norm": 9.008772918769411, "learning_rate": 4.984127136788675e-07, "loss": 0.9182, "step": 11324 }, { "epoch": 1.603312805266511, "grad_norm": 8.44566300729626, "learning_rate": 4.980693322808999e-07, "loss": 0.8853, "step": 11325 }, { "epoch": 1.603454378141148, "grad_norm": 8.858881968616961, "learning_rate": 4.97726056122936e-07, "loss": 0.9341, "step": 11326 }, { "epoch": 1.6035959510157853, "grad_norm": 8.803625521718887, "learning_rate": 4.97382885223019e-07, "loss": 0.9187, "step": 11327 }, { "epoch": 1.6037375238904226, "grad_norm": 9.351770129404521, "learning_rate": 4.970398195991908e-07, "loss": 0.9508, "step": 11328 }, { "epoch": 1.6038790967650598, "grad_norm": 10.09121681762668, "learning_rate": 4.96696859269486e-07, "loss": 0.9586, "step": 11329 }, { "epoch": 1.604020669639697, "grad_norm": 9.703777985400398, "learning_rate": 4.963540042519333e-07, "loss": 1.0117, "step": 11330 }, { "epoch": 1.6041622425143343, "grad_norm": 9.495740638359711, "learning_rate": 4.96011254564557e-07, "loss": 0.9882, "step": 11331 }, { "epoch": 1.6043038153889715, "grad_norm": 11.963181916096453, "learning_rate": 4.956686102253744e-07, "loss": 1.1031, "step": 11332 }, { "epoch": 1.6044453882636087, "grad_norm": 9.33105831575348, "learning_rate": 4.953260712523992e-07, "loss": 0.9343, "step": 11333 }, { "epoch": 1.604586961138246, "grad_norm": 9.495369033431754, "learning_rate": 4.949836376636366e-07, "loss": 0.9322, "step": 11334 }, { "epoch": 1.6047285340128832, "grad_norm": 7.8463937627783125, "learning_rate": 4.946413094770902e-07, "loss": 0.9708, "step": 11335 }, { "epoch": 1.6048701068875202, "grad_norm": 10.237867292399201, "learning_rate": 4.942990867107547e-07, "loss": 1.0268, "step": 11336 }, { "epoch": 1.6050116797621574, "grad_norm": 8.408273219437797, "learning_rate": 4.939569693826202e-07, "loss": 0.9802, "step": 11337 }, { "epoch": 1.6051532526367946, "grad_norm": 8.329230201202805, "learning_rate": 4.936149575106727e-07, "loss": 1.0269, "step": 11338 }, { "epoch": 1.6052948255114319, "grad_norm": 8.688890524254548, "learning_rate": 4.9327305111289e-07, "loss": 0.908, "step": 11339 }, { "epoch": 1.605436398386069, "grad_norm": 8.96168883415698, "learning_rate": 4.929312502072475e-07, "loss": 0.9499, "step": 11340 }, { "epoch": 1.6055779712607063, "grad_norm": 11.474000150050522, "learning_rate": 4.925895548117121e-07, "loss": 0.9098, "step": 11341 }, { "epoch": 1.6057195441353436, "grad_norm": 10.627873649193551, "learning_rate": 4.922479649442477e-07, "loss": 1.0664, "step": 11342 }, { "epoch": 1.6058611170099808, "grad_norm": 10.1515185793936, "learning_rate": 4.919064806228099e-07, "loss": 0.9648, "step": 11343 }, { "epoch": 1.606002689884618, "grad_norm": 8.309953112969175, "learning_rate": 4.915651018653511e-07, "loss": 1.0224, "step": 11344 }, { "epoch": 1.6061442627592553, "grad_norm": 9.082570968506067, "learning_rate": 4.91223828689818e-07, "loss": 0.8829, "step": 11345 }, { "epoch": 1.6062858356338925, "grad_norm": 8.30014933256451, "learning_rate": 4.908826611141498e-07, "loss": 0.9367, "step": 11346 }, { "epoch": 1.6064274085085297, "grad_norm": 9.900672858906209, "learning_rate": 4.905415991562834e-07, "loss": 1.1747, "step": 11347 }, { "epoch": 1.606568981383167, "grad_norm": 9.92376527576782, "learning_rate": 4.902006428341457e-07, "loss": 1.1159, "step": 11348 }, { "epoch": 1.6067105542578042, "grad_norm": 8.239395724611015, "learning_rate": 4.89859792165662e-07, "loss": 0.9263, "step": 11349 }, { "epoch": 1.6068521271324414, "grad_norm": 9.553602280204066, "learning_rate": 4.895190471687497e-07, "loss": 1.0716, "step": 11350 }, { "epoch": 1.6069937000070786, "grad_norm": 9.066718020948192, "learning_rate": 4.891784078613218e-07, "loss": 0.9621, "step": 11351 }, { "epoch": 1.6071352728817159, "grad_norm": 10.014013008927478, "learning_rate": 4.888378742612865e-07, "loss": 0.9182, "step": 11352 }, { "epoch": 1.607276845756353, "grad_norm": 9.622094050085533, "learning_rate": 4.884974463865438e-07, "loss": 1.0567, "step": 11353 }, { "epoch": 1.6074184186309903, "grad_norm": 9.650394775537967, "learning_rate": 4.881571242549915e-07, "loss": 0.9382, "step": 11354 }, { "epoch": 1.6075599915056276, "grad_norm": 9.391671374730779, "learning_rate": 4.87816907884518e-07, "loss": 0.9378, "step": 11355 }, { "epoch": 1.6077015643802648, "grad_norm": 9.69791172790948, "learning_rate": 4.874767972930103e-07, "loss": 1.0376, "step": 11356 }, { "epoch": 1.607843137254902, "grad_norm": 7.987924760917818, "learning_rate": 4.871367924983458e-07, "loss": 0.9377, "step": 11357 }, { "epoch": 1.6079847101295393, "grad_norm": 8.23484797992297, "learning_rate": 4.867968935184e-07, "loss": 0.9849, "step": 11358 }, { "epoch": 1.6081262830041765, "grad_norm": 8.848979360919566, "learning_rate": 4.864571003710405e-07, "loss": 0.9699, "step": 11359 }, { "epoch": 1.6082678558788137, "grad_norm": 8.76497175805111, "learning_rate": 4.861174130741292e-07, "loss": 0.8854, "step": 11360 }, { "epoch": 1.608409428753451, "grad_norm": 8.751930677763559, "learning_rate": 4.857778316455245e-07, "loss": 0.8753, "step": 11361 }, { "epoch": 1.6085510016280882, "grad_norm": 11.298734435382956, "learning_rate": 4.854383561030768e-07, "loss": 1.0169, "step": 11362 }, { "epoch": 1.6086925745027254, "grad_norm": 7.67847837474113, "learning_rate": 4.85098986464633e-07, "loss": 0.8503, "step": 11363 }, { "epoch": 1.6088341473773626, "grad_norm": 8.370690902579275, "learning_rate": 4.847597227480327e-07, "loss": 0.9254, "step": 11364 }, { "epoch": 1.6089757202519999, "grad_norm": 9.690032824764813, "learning_rate": 4.844205649711118e-07, "loss": 0.9781, "step": 11365 }, { "epoch": 1.609117293126637, "grad_norm": 9.490280801298768, "learning_rate": 4.840815131516979e-07, "loss": 0.9595, "step": 11366 }, { "epoch": 1.609258866001274, "grad_norm": 9.390661153866551, "learning_rate": 4.83742567307616e-07, "loss": 0.9854, "step": 11367 }, { "epoch": 1.6094004388759113, "grad_norm": 9.101740626609947, "learning_rate": 4.834037274566852e-07, "loss": 0.909, "step": 11368 }, { "epoch": 1.6095420117505486, "grad_norm": 10.278401545477688, "learning_rate": 4.830649936167156e-07, "loss": 0.9606, "step": 11369 }, { "epoch": 1.6096835846251858, "grad_norm": 10.950780511499405, "learning_rate": 4.827263658055161e-07, "loss": 0.9639, "step": 11370 }, { "epoch": 1.609825157499823, "grad_norm": 10.307453192161182, "learning_rate": 4.823878440408866e-07, "loss": 0.989, "step": 11371 }, { "epoch": 1.6099667303744603, "grad_norm": 8.325041753074256, "learning_rate": 4.820494283406238e-07, "loss": 0.9372, "step": 11372 }, { "epoch": 1.6101083032490975, "grad_norm": 12.914536298401673, "learning_rate": 4.817111187225184e-07, "loss": 1.0494, "step": 11373 }, { "epoch": 1.6102498761237347, "grad_norm": 10.549058295431406, "learning_rate": 4.813729152043542e-07, "loss": 0.8999, "step": 11374 }, { "epoch": 1.610391448998372, "grad_norm": 8.15344330300472, "learning_rate": 4.810348178039112e-07, "loss": 0.8271, "step": 11375 }, { "epoch": 1.6105330218730092, "grad_norm": 8.539288991592544, "learning_rate": 4.806968265389619e-07, "loss": 0.9128, "step": 11376 }, { "epoch": 1.6106745947476464, "grad_norm": 10.399498384556374, "learning_rate": 4.803589414272752e-07, "loss": 1.0517, "step": 11377 }, { "epoch": 1.6108161676222834, "grad_norm": 8.85862015564567, "learning_rate": 4.800211624866128e-07, "loss": 0.8699, "step": 11378 }, { "epoch": 1.6109577404969206, "grad_norm": 9.497095165857584, "learning_rate": 4.796834897347319e-07, "loss": 0.9267, "step": 11379 }, { "epoch": 1.6110993133715579, "grad_norm": 9.087491218321281, "learning_rate": 4.793459231893838e-07, "loss": 1.0007, "step": 11380 }, { "epoch": 1.611240886246195, "grad_norm": 9.674728539574282, "learning_rate": 4.790084628683131e-07, "loss": 1.0422, "step": 11381 }, { "epoch": 1.6113824591208323, "grad_norm": 10.932093041323796, "learning_rate": 4.786711087892613e-07, "loss": 0.9649, "step": 11382 }, { "epoch": 1.6115240319954696, "grad_norm": 10.646201847115771, "learning_rate": 4.783338609699614e-07, "loss": 0.9522, "step": 11383 }, { "epoch": 1.6116656048701068, "grad_norm": 9.423435265451152, "learning_rate": 4.779967194281438e-07, "loss": 0.9658, "step": 11384 }, { "epoch": 1.611807177744744, "grad_norm": 9.891471549351042, "learning_rate": 4.776596841815304e-07, "loss": 1.0657, "step": 11385 }, { "epoch": 1.6119487506193813, "grad_norm": 8.34245010136996, "learning_rate": 4.773227552478399e-07, "loss": 0.9944, "step": 11386 }, { "epoch": 1.6120903234940185, "grad_norm": 8.601757632891083, "learning_rate": 4.769859326447834e-07, "loss": 0.9776, "step": 11387 }, { "epoch": 1.6122318963686557, "grad_norm": 10.960385515992568, "learning_rate": 4.7664921639006877e-07, "loss": 0.951, "step": 11388 }, { "epoch": 1.612373469243293, "grad_norm": 9.074543284123433, "learning_rate": 4.7631260650139595e-07, "loss": 0.8431, "step": 11389 }, { "epoch": 1.6125150421179302, "grad_norm": 9.396881608925803, "learning_rate": 4.7597610299645993e-07, "loss": 0.9065, "step": 11390 }, { "epoch": 1.6126566149925674, "grad_norm": 9.818516921416025, "learning_rate": 4.7563970589295185e-07, "loss": 0.9826, "step": 11391 }, { "epoch": 1.6127981878672046, "grad_norm": 8.364049784427799, "learning_rate": 4.753034152085542e-07, "loss": 0.9191, "step": 11392 }, { "epoch": 1.6129397607418419, "grad_norm": 10.020688590566097, "learning_rate": 4.7496723096094684e-07, "loss": 0.9949, "step": 11393 }, { "epoch": 1.613081333616479, "grad_norm": 9.228297988505163, "learning_rate": 4.7463115316780163e-07, "loss": 0.9977, "step": 11394 }, { "epoch": 1.6132229064911163, "grad_norm": 10.657990702196425, "learning_rate": 4.7429518184678667e-07, "loss": 1.0057, "step": 11395 }, { "epoch": 1.6133644793657536, "grad_norm": 9.363614098302728, "learning_rate": 4.73959317015564e-07, "loss": 1.0551, "step": 11396 }, { "epoch": 1.6135060522403908, "grad_norm": 9.797453989445062, "learning_rate": 4.736235586917889e-07, "loss": 0.9832, "step": 11397 }, { "epoch": 1.613647625115028, "grad_norm": 11.52371225837695, "learning_rate": 4.732879068931132e-07, "loss": 1.037, "step": 11398 }, { "epoch": 1.6137891979896652, "grad_norm": 9.923147141009697, "learning_rate": 4.7295236163718006e-07, "loss": 1.089, "step": 11399 }, { "epoch": 1.6139307708643025, "grad_norm": 9.6617364697723, "learning_rate": 4.7261692294163134e-07, "loss": 0.9276, "step": 11400 }, { "epoch": 1.6140723437389397, "grad_norm": 9.063593515653281, "learning_rate": 4.72281590824098e-07, "loss": 0.9272, "step": 11401 }, { "epoch": 1.614213916613577, "grad_norm": 8.974785558322331, "learning_rate": 4.719463653022094e-07, "loss": 0.8934, "step": 11402 }, { "epoch": 1.6143554894882142, "grad_norm": 8.674196004850618, "learning_rate": 4.7161124639358873e-07, "loss": 0.8698, "step": 11403 }, { "epoch": 1.6144970623628514, "grad_norm": 9.496186946786606, "learning_rate": 4.7127623411585194e-07, "loss": 0.9746, "step": 11404 }, { "epoch": 1.6146386352374886, "grad_norm": 9.13165709249328, "learning_rate": 4.7094132848661154e-07, "loss": 0.9357, "step": 11405 }, { "epoch": 1.6147802081121259, "grad_norm": 8.356081875441506, "learning_rate": 4.706065295234719e-07, "loss": 1.0198, "step": 11406 }, { "epoch": 1.614921780986763, "grad_norm": 9.588722725974769, "learning_rate": 4.702718372440343e-07, "loss": 0.9291, "step": 11407 }, { "epoch": 1.6150633538614003, "grad_norm": 8.65789096303103, "learning_rate": 4.699372516658923e-07, "loss": 0.9814, "step": 11408 }, { "epoch": 1.6152049267360373, "grad_norm": 8.390442233519247, "learning_rate": 4.6960277280663574e-07, "loss": 0.8844, "step": 11409 }, { "epoch": 1.6153464996106746, "grad_norm": 8.388786242308278, "learning_rate": 4.692684006838477e-07, "loss": 0.9455, "step": 11410 }, { "epoch": 1.6154880724853118, "grad_norm": 9.472653028349967, "learning_rate": 4.68934135315105e-07, "loss": 0.9326, "step": 11411 }, { "epoch": 1.615629645359949, "grad_norm": 8.975414179433884, "learning_rate": 4.685999767179808e-07, "loss": 0.9623, "step": 11412 }, { "epoch": 1.6157712182345862, "grad_norm": 8.234745140370494, "learning_rate": 4.6826592491004075e-07, "loss": 0.9251, "step": 11413 }, { "epoch": 1.6159127911092235, "grad_norm": 10.148105207704571, "learning_rate": 4.679319799088466e-07, "loss": 1.0287, "step": 11414 }, { "epoch": 1.6160543639838607, "grad_norm": 8.542880783011894, "learning_rate": 4.675981417319528e-07, "loss": 1.0136, "step": 11415 }, { "epoch": 1.616195936858498, "grad_norm": 9.73275797288667, "learning_rate": 4.6726441039690955e-07, "loss": 1.0162, "step": 11416 }, { "epoch": 1.6163375097331352, "grad_norm": 8.711439342326793, "learning_rate": 4.669307859212599e-07, "loss": 0.9591, "step": 11417 }, { "epoch": 1.6164790826077724, "grad_norm": 10.176385913536812, "learning_rate": 4.665972683225431e-07, "loss": 1.0155, "step": 11418 }, { "epoch": 1.6166206554824094, "grad_norm": 8.846084124950606, "learning_rate": 4.6626385761829234e-07, "loss": 1.0258, "step": 11419 }, { "epoch": 1.6167622283570466, "grad_norm": 10.686125014347606, "learning_rate": 4.6593055382603334e-07, "loss": 1.0076, "step": 11420 }, { "epoch": 1.6169038012316839, "grad_norm": 10.481603583034182, "learning_rate": 4.655973569632899e-07, "loss": 0.9747, "step": 11421 }, { "epoch": 1.617045374106321, "grad_norm": 7.3674567487914375, "learning_rate": 4.6526426704757545e-07, "loss": 0.9419, "step": 11422 }, { "epoch": 1.6171869469809583, "grad_norm": 8.708907940416434, "learning_rate": 4.6493128409640153e-07, "loss": 0.9545, "step": 11423 }, { "epoch": 1.6173285198555956, "grad_norm": 9.879058004098358, "learning_rate": 4.6459840812727227e-07, "loss": 0.9471, "step": 11424 }, { "epoch": 1.6174700927302328, "grad_norm": 8.428970659974961, "learning_rate": 4.642656391576869e-07, "loss": 0.9959, "step": 11425 }, { "epoch": 1.61761166560487, "grad_norm": 9.97354020453248, "learning_rate": 4.6393297720513957e-07, "loss": 0.9806, "step": 11426 }, { "epoch": 1.6177532384795072, "grad_norm": 8.235891589762478, "learning_rate": 4.6360042228711684e-07, "loss": 0.8225, "step": 11427 }, { "epoch": 1.6178948113541445, "grad_norm": 8.145128406615449, "learning_rate": 4.6326797442110217e-07, "loss": 0.8719, "step": 11428 }, { "epoch": 1.6180363842287817, "grad_norm": 7.740241767596772, "learning_rate": 4.629356336245708e-07, "loss": 0.8522, "step": 11429 }, { "epoch": 1.618177957103419, "grad_norm": 9.060456565148652, "learning_rate": 4.626033999149948e-07, "loss": 1.0466, "step": 11430 }, { "epoch": 1.6183195299780562, "grad_norm": 9.579378279419538, "learning_rate": 4.622712733098386e-07, "loss": 0.9426, "step": 11431 }, { "epoch": 1.6184611028526934, "grad_norm": 10.342307552016052, "learning_rate": 4.619392538265624e-07, "loss": 0.9281, "step": 11432 }, { "epoch": 1.6186026757273306, "grad_norm": 10.25837477038578, "learning_rate": 4.6160734148262027e-07, "loss": 1.0415, "step": 11433 }, { "epoch": 1.6187442486019679, "grad_norm": 7.550301036769965, "learning_rate": 4.612755362954596e-07, "loss": 0.9419, "step": 11434 }, { "epoch": 1.618885821476605, "grad_norm": 10.505465583633418, "learning_rate": 4.609438382825246e-07, "loss": 1.0169, "step": 11435 }, { "epoch": 1.6190273943512423, "grad_norm": 9.967374222389774, "learning_rate": 4.6061224746125067e-07, "loss": 1.0317, "step": 11436 }, { "epoch": 1.6191689672258796, "grad_norm": 7.2381903389966835, "learning_rate": 4.602807638490711e-07, "loss": 0.8159, "step": 11437 }, { "epoch": 1.6193105401005168, "grad_norm": 9.635833192362513, "learning_rate": 4.5994938746341033e-07, "loss": 1.1217, "step": 11438 }, { "epoch": 1.619452112975154, "grad_norm": 8.879005495348327, "learning_rate": 4.5961811832168965e-07, "loss": 0.8694, "step": 11439 }, { "epoch": 1.6195936858497912, "grad_norm": 7.873746257465574, "learning_rate": 4.592869564413227e-07, "loss": 0.9164, "step": 11440 }, { "epoch": 1.6197352587244285, "grad_norm": 11.343053202600755, "learning_rate": 4.5895590183971854e-07, "loss": 0.9633, "step": 11441 }, { "epoch": 1.6198768315990657, "grad_norm": 8.777726196051661, "learning_rate": 4.5862495453428216e-07, "loss": 0.8506, "step": 11442 }, { "epoch": 1.620018404473703, "grad_norm": 10.144948264075166, "learning_rate": 4.5829411454240856e-07, "loss": 1.0048, "step": 11443 }, { "epoch": 1.6201599773483402, "grad_norm": 9.997256856424181, "learning_rate": 4.579633818814916e-07, "loss": 0.9553, "step": 11444 }, { "epoch": 1.6203015502229774, "grad_norm": 9.993348770723472, "learning_rate": 4.576327565689165e-07, "loss": 0.9843, "step": 11445 }, { "epoch": 1.6204431230976146, "grad_norm": 8.96919488634179, "learning_rate": 4.5730223862206493e-07, "loss": 0.9325, "step": 11446 }, { "epoch": 1.6205846959722519, "grad_norm": 9.113039303742879, "learning_rate": 4.569718280583113e-07, "loss": 0.9131, "step": 11447 }, { "epoch": 1.620726268846889, "grad_norm": 8.854542539509557, "learning_rate": 4.566415248950251e-07, "loss": 0.9468, "step": 11448 }, { "epoch": 1.6208678417215263, "grad_norm": 9.556914250183034, "learning_rate": 4.5631132914957076e-07, "loss": 0.9068, "step": 11449 }, { "epoch": 1.6210094145961633, "grad_norm": 9.363914751592821, "learning_rate": 4.5598124083930577e-07, "loss": 0.915, "step": 11450 }, { "epoch": 1.6211509874708006, "grad_norm": 9.780726994678545, "learning_rate": 4.556512599815832e-07, "loss": 0.8659, "step": 11451 }, { "epoch": 1.6212925603454378, "grad_norm": 7.984320056226072, "learning_rate": 4.553213865937492e-07, "loss": 0.9004, "step": 11452 }, { "epoch": 1.621434133220075, "grad_norm": 9.478394685826947, "learning_rate": 4.5499162069314567e-07, "loss": 0.9436, "step": 11453 }, { "epoch": 1.6215757060947122, "grad_norm": 9.433985984340573, "learning_rate": 4.5466196229710797e-07, "loss": 1.0098, "step": 11454 }, { "epoch": 1.6217172789693495, "grad_norm": 8.131816645471442, "learning_rate": 4.5433241142296524e-07, "loss": 0.8616, "step": 11455 }, { "epoch": 1.6218588518439867, "grad_norm": 10.441400769849853, "learning_rate": 4.5400296808804313e-07, "loss": 1.1071, "step": 11456 }, { "epoch": 1.622000424718624, "grad_norm": 8.757070246199639, "learning_rate": 4.536736323096586e-07, "loss": 0.8723, "step": 11457 }, { "epoch": 1.6221419975932612, "grad_norm": 13.747383007350209, "learning_rate": 4.5334440410512605e-07, "loss": 1.1655, "step": 11458 }, { "epoch": 1.6222835704678984, "grad_norm": 9.685422613074127, "learning_rate": 4.5301528349175144e-07, "loss": 0.9787, "step": 11459 }, { "epoch": 1.6224251433425356, "grad_norm": 11.49146584346163, "learning_rate": 4.526862704868376e-07, "loss": 1.0315, "step": 11460 }, { "epoch": 1.6225667162171726, "grad_norm": 11.617240695280024, "learning_rate": 4.5235736510767957e-07, "loss": 1.0132, "step": 11461 }, { "epoch": 1.6227082890918099, "grad_norm": 8.820932236448682, "learning_rate": 4.520285673715688e-07, "loss": 0.9522, "step": 11462 }, { "epoch": 1.622849861966447, "grad_norm": 9.293800371311882, "learning_rate": 4.5169987729578897e-07, "loss": 0.9272, "step": 11463 }, { "epoch": 1.6229914348410843, "grad_norm": 8.26176907859966, "learning_rate": 4.5137129489761874e-07, "loss": 0.9098, "step": 11464 }, { "epoch": 1.6231330077157216, "grad_norm": 10.384450997032324, "learning_rate": 4.510428201943326e-07, "loss": 0.9444, "step": 11465 }, { "epoch": 1.6232745805903588, "grad_norm": 8.137992916618783, "learning_rate": 4.5071445320319706e-07, "loss": 0.9438, "step": 11466 }, { "epoch": 1.623416153464996, "grad_norm": 12.835484576230094, "learning_rate": 4.5038619394147554e-07, "loss": 1.0152, "step": 11467 }, { "epoch": 1.6235577263396332, "grad_norm": 8.69029618964564, "learning_rate": 4.500580424264225e-07, "loss": 0.9012, "step": 11468 }, { "epoch": 1.6236992992142705, "grad_norm": 9.057193438959038, "learning_rate": 4.497299986752901e-07, "loss": 0.9961, "step": 11469 }, { "epoch": 1.6238408720889077, "grad_norm": 8.338007518871978, "learning_rate": 4.4940206270532333e-07, "loss": 0.8998, "step": 11470 }, { "epoch": 1.623982444963545, "grad_norm": 9.018242044611318, "learning_rate": 4.4907423453376034e-07, "loss": 0.9167, "step": 11471 }, { "epoch": 1.6241240178381822, "grad_norm": 11.259306724440854, "learning_rate": 4.487465141778366e-07, "loss": 0.927, "step": 11472 }, { "epoch": 1.6242655907128194, "grad_norm": 8.405133141274955, "learning_rate": 4.4841890165477825e-07, "loss": 0.9586, "step": 11473 }, { "epoch": 1.6244071635874566, "grad_norm": 9.443180219021862, "learning_rate": 4.480913969818099e-07, "loss": 1.0147, "step": 11474 }, { "epoch": 1.6245487364620939, "grad_norm": 8.613074419994028, "learning_rate": 4.4776400017614546e-07, "loss": 0.9353, "step": 11475 }, { "epoch": 1.624690309336731, "grad_norm": 9.994356470272752, "learning_rate": 4.474367112549974e-07, "loss": 0.9954, "step": 11476 }, { "epoch": 1.6248318822113683, "grad_norm": 8.45060243885546, "learning_rate": 4.471095302355716e-07, "loss": 0.8757, "step": 11477 }, { "epoch": 1.6249734550860055, "grad_norm": 8.95830073387661, "learning_rate": 4.467824571350665e-07, "loss": 1.0932, "step": 11478 }, { "epoch": 1.6251150279606428, "grad_norm": 9.097470076454892, "learning_rate": 4.4645549197067736e-07, "loss": 1.0106, "step": 11479 }, { "epoch": 1.62525660083528, "grad_norm": 9.29163126803328, "learning_rate": 4.461286347595911e-07, "loss": 0.9969, "step": 11480 }, { "epoch": 1.6253981737099172, "grad_norm": 9.472893843669816, "learning_rate": 4.4580188551899164e-07, "loss": 0.8249, "step": 11481 }, { "epoch": 1.6255397465845545, "grad_norm": 9.717664651891477, "learning_rate": 4.4547524426605484e-07, "loss": 0.9534, "step": 11482 }, { "epoch": 1.6256813194591917, "grad_norm": 9.690585918526079, "learning_rate": 4.451487110179531e-07, "loss": 0.992, "step": 11483 }, { "epoch": 1.625822892333829, "grad_norm": 8.982061098090139, "learning_rate": 4.448222857918508e-07, "loss": 0.9449, "step": 11484 }, { "epoch": 1.6259644652084662, "grad_norm": 11.30295999294215, "learning_rate": 4.444959686049094e-07, "loss": 1.0637, "step": 11485 }, { "epoch": 1.6261060380831034, "grad_norm": 9.4515256427255, "learning_rate": 4.441697594742819e-07, "loss": 0.8613, "step": 11486 }, { "epoch": 1.6262476109577406, "grad_norm": 9.1941371480933, "learning_rate": 4.4384365841711684e-07, "loss": 1.1167, "step": 11487 }, { "epoch": 1.6263891838323778, "grad_norm": 8.044027294960548, "learning_rate": 4.4351766545055826e-07, "loss": 0.8927, "step": 11488 }, { "epoch": 1.626530756707015, "grad_norm": 9.621147672910197, "learning_rate": 4.4319178059174186e-07, "loss": 1.0665, "step": 11489 }, { "epoch": 1.6266723295816523, "grad_norm": 9.480321479851204, "learning_rate": 4.428660038578006e-07, "loss": 0.9444, "step": 11490 }, { "epoch": 1.6268139024562895, "grad_norm": 10.016745470427804, "learning_rate": 4.4254033526585917e-07, "loss": 0.9305, "step": 11491 }, { "epoch": 1.6269554753309265, "grad_norm": 9.194946178052657, "learning_rate": 4.42214774833038e-07, "loss": 0.9987, "step": 11492 }, { "epoch": 1.6270970482055638, "grad_norm": 9.080506424414232, "learning_rate": 4.418893225764526e-07, "loss": 0.9358, "step": 11493 }, { "epoch": 1.627238621080201, "grad_norm": 7.81108214673092, "learning_rate": 4.4156397851321003e-07, "loss": 0.9413, "step": 11494 }, { "epoch": 1.6273801939548382, "grad_norm": 10.797722569278461, "learning_rate": 4.412387426604156e-07, "loss": 1.1375, "step": 11495 }, { "epoch": 1.6275217668294755, "grad_norm": 8.958439552801114, "learning_rate": 4.4091361503516424e-07, "loss": 0.9289, "step": 11496 }, { "epoch": 1.6276633397041127, "grad_norm": 9.5624652936717, "learning_rate": 4.405885956545494e-07, "loss": 0.9941, "step": 11497 }, { "epoch": 1.62780491257875, "grad_norm": 10.167661107524234, "learning_rate": 4.402636845356559e-07, "loss": 0.9828, "step": 11498 }, { "epoch": 1.6279464854533872, "grad_norm": 8.36342310350856, "learning_rate": 4.3993888169556463e-07, "loss": 0.9368, "step": 11499 }, { "epoch": 1.6280880583280244, "grad_norm": 10.071559831716618, "learning_rate": 4.3961418715135097e-07, "loss": 0.9633, "step": 11500 }, { "epoch": 1.6282296312026616, "grad_norm": 9.15973729718902, "learning_rate": 4.3928960092008254e-07, "loss": 0.9336, "step": 11501 }, { "epoch": 1.6283712040772986, "grad_norm": 9.77901386867062, "learning_rate": 4.389651230188241e-07, "loss": 1.0228, "step": 11502 }, { "epoch": 1.6285127769519359, "grad_norm": 9.691193301141901, "learning_rate": 4.386407534646314e-07, "loss": 0.9397, "step": 11503 }, { "epoch": 1.628654349826573, "grad_norm": 10.511985114843318, "learning_rate": 4.3831649227455806e-07, "loss": 0.9111, "step": 11504 }, { "epoch": 1.6287959227012103, "grad_norm": 9.18611560174554, "learning_rate": 4.3799233946564904e-07, "loss": 0.8326, "step": 11505 }, { "epoch": 1.6289374955758475, "grad_norm": 9.986271017035518, "learning_rate": 4.3766829505494574e-07, "loss": 1.0648, "step": 11506 }, { "epoch": 1.6290790684504848, "grad_norm": 9.91188327174386, "learning_rate": 4.3734435905948226e-07, "loss": 0.9953, "step": 11507 }, { "epoch": 1.629220641325122, "grad_norm": 9.267861519310335, "learning_rate": 4.370205314962872e-07, "loss": 1.0238, "step": 11508 }, { "epoch": 1.6293622141997592, "grad_norm": 9.128979376980322, "learning_rate": 4.366968123823856e-07, "loss": 1.1037, "step": 11509 }, { "epoch": 1.6295037870743965, "grad_norm": 11.118128336638364, "learning_rate": 4.36373201734793e-07, "loss": 0.9855, "step": 11510 }, { "epoch": 1.6296453599490337, "grad_norm": 10.061297421831187, "learning_rate": 4.360496995705235e-07, "loss": 0.9465, "step": 11511 }, { "epoch": 1.629786932823671, "grad_norm": 9.262439096433331, "learning_rate": 4.3572630590658136e-07, "loss": 0.9516, "step": 11512 }, { "epoch": 1.6299285056983082, "grad_norm": 9.609751326472534, "learning_rate": 4.354030207599691e-07, "loss": 1.0455, "step": 11513 }, { "epoch": 1.6300700785729454, "grad_norm": 8.79911230985298, "learning_rate": 4.3507984414767974e-07, "loss": 0.9234, "step": 11514 }, { "epoch": 1.6302116514475826, "grad_norm": 12.493082802892243, "learning_rate": 4.347567760867036e-07, "loss": 0.9793, "step": 11515 }, { "epoch": 1.6303532243222199, "grad_norm": 8.806242324035201, "learning_rate": 4.344338165940248e-07, "loss": 0.955, "step": 11516 }, { "epoch": 1.630494797196857, "grad_norm": 9.636883026442318, "learning_rate": 4.341109656866188e-07, "loss": 0.9097, "step": 11517 }, { "epoch": 1.6306363700714943, "grad_norm": 8.73089344604611, "learning_rate": 4.337882233814597e-07, "loss": 0.9364, "step": 11518 }, { "epoch": 1.6307779429461315, "grad_norm": 12.374488897076404, "learning_rate": 4.3346558969551253e-07, "loss": 1.0077, "step": 11519 }, { "epoch": 1.6309195158207688, "grad_norm": 9.941339866551449, "learning_rate": 4.331430646457391e-07, "loss": 0.8712, "step": 11520 }, { "epoch": 1.631061088695406, "grad_norm": 8.940941634799689, "learning_rate": 4.3282064824909265e-07, "loss": 0.9516, "step": 11521 }, { "epoch": 1.6312026615700432, "grad_norm": 10.741714833351105, "learning_rate": 4.324983405225236e-07, "loss": 0.9691, "step": 11522 }, { "epoch": 1.6313442344446805, "grad_norm": 8.281936789770016, "learning_rate": 4.321761414829759e-07, "loss": 0.9449, "step": 11523 }, { "epoch": 1.6314858073193177, "grad_norm": 9.028523492254717, "learning_rate": 4.3185405114738593e-07, "loss": 0.9834, "step": 11524 }, { "epoch": 1.631627380193955, "grad_norm": 7.593266978031143, "learning_rate": 4.3153206953268715e-07, "loss": 0.9946, "step": 11525 }, { "epoch": 1.6317689530685922, "grad_norm": 8.655081219041397, "learning_rate": 4.312101966558044e-07, "loss": 0.9267, "step": 11526 }, { "epoch": 1.6319105259432294, "grad_norm": 9.572231950371801, "learning_rate": 4.308884325336596e-07, "loss": 1.0216, "step": 11527 }, { "epoch": 1.6320520988178666, "grad_norm": 9.406144962167891, "learning_rate": 4.305667771831673e-07, "loss": 0.9139, "step": 11528 }, { "epoch": 1.6321936716925038, "grad_norm": 8.347900272813742, "learning_rate": 4.302452306212357e-07, "loss": 0.939, "step": 11529 }, { "epoch": 1.632335244567141, "grad_norm": 8.794555141351799, "learning_rate": 4.2992379286476984e-07, "loss": 0.9116, "step": 11530 }, { "epoch": 1.6324768174417783, "grad_norm": 7.83171879247364, "learning_rate": 4.296024639306659e-07, "loss": 0.8206, "step": 11531 }, { "epoch": 1.6326183903164155, "grad_norm": 9.112078152744234, "learning_rate": 4.292812438358174e-07, "loss": 0.977, "step": 11532 }, { "epoch": 1.6327599631910525, "grad_norm": 7.9875126177429925, "learning_rate": 4.2896013259710905e-07, "loss": 0.9885, "step": 11533 }, { "epoch": 1.6329015360656898, "grad_norm": 9.899222880987221, "learning_rate": 4.286391302314233e-07, "loss": 1.0573, "step": 11534 }, { "epoch": 1.633043108940327, "grad_norm": 9.564880872543945, "learning_rate": 4.2831823675563324e-07, "loss": 0.8901, "step": 11535 }, { "epoch": 1.6331846818149642, "grad_norm": 11.395161964032079, "learning_rate": 4.279974521866093e-07, "loss": 1.1026, "step": 11536 }, { "epoch": 1.6333262546896015, "grad_norm": 8.62727834491417, "learning_rate": 4.2767677654121375e-07, "loss": 0.932, "step": 11537 }, { "epoch": 1.6334678275642387, "grad_norm": 8.779311865601773, "learning_rate": 4.2735620983630543e-07, "loss": 0.9844, "step": 11538 }, { "epoch": 1.633609400438876, "grad_norm": 10.601274277009662, "learning_rate": 4.2703575208873585e-07, "loss": 0.9738, "step": 11539 }, { "epoch": 1.6337509733135132, "grad_norm": 9.661370854697184, "learning_rate": 4.267154033153503e-07, "loss": 0.9605, "step": 11540 }, { "epoch": 1.6338925461881504, "grad_norm": 9.394789903533924, "learning_rate": 4.26395163532991e-07, "loss": 1.1011, "step": 11541 }, { "epoch": 1.6340341190627876, "grad_norm": 7.960904197136549, "learning_rate": 4.2607503275849116e-07, "loss": 1.0098, "step": 11542 }, { "epoch": 1.6341756919374246, "grad_norm": 9.079834243673071, "learning_rate": 4.2575501100868085e-07, "loss": 1.0159, "step": 11543 }, { "epoch": 1.6343172648120619, "grad_norm": 9.548477962297676, "learning_rate": 4.2543509830038243e-07, "loss": 0.9385, "step": 11544 }, { "epoch": 1.634458837686699, "grad_norm": 9.33326076297703, "learning_rate": 4.2511529465041417e-07, "loss": 0.9722, "step": 11545 }, { "epoch": 1.6346004105613363, "grad_norm": 7.76870807565807, "learning_rate": 4.2479560007558845e-07, "loss": 0.9041, "step": 11546 }, { "epoch": 1.6347419834359735, "grad_norm": 8.215709283337642, "learning_rate": 4.2447601459270987e-07, "loss": 0.8449, "step": 11547 }, { "epoch": 1.6348835563106108, "grad_norm": 10.05191260210957, "learning_rate": 4.241565382185808e-07, "loss": 1.0691, "step": 11548 }, { "epoch": 1.635025129185248, "grad_norm": 9.54347280763038, "learning_rate": 4.238371709699937e-07, "loss": 1.1002, "step": 11549 }, { "epoch": 1.6351667020598852, "grad_norm": 11.58945930648197, "learning_rate": 4.2351791286373847e-07, "loss": 1.0213, "step": 11550 }, { "epoch": 1.6353082749345225, "grad_norm": 7.974556516555776, "learning_rate": 4.231987639165988e-07, "loss": 0.9436, "step": 11551 }, { "epoch": 1.6354498478091597, "grad_norm": 9.734138694087045, "learning_rate": 4.2287972414535084e-07, "loss": 0.8606, "step": 11552 }, { "epoch": 1.635591420683797, "grad_norm": 8.697380320445724, "learning_rate": 4.2256079356676776e-07, "loss": 0.9555, "step": 11553 }, { "epoch": 1.6357329935584342, "grad_norm": 9.583481453013508, "learning_rate": 4.222419721976143e-07, "loss": 0.9515, "step": 11554 }, { "epoch": 1.6358745664330714, "grad_norm": 10.580747113160594, "learning_rate": 4.2192326005465134e-07, "loss": 1.0036, "step": 11555 }, { "epoch": 1.6360161393077086, "grad_norm": 8.78975691701177, "learning_rate": 4.216046571546328e-07, "loss": 0.9879, "step": 11556 }, { "epoch": 1.6361577121823458, "grad_norm": 10.539547874409635, "learning_rate": 4.212861635143084e-07, "loss": 0.9782, "step": 11557 }, { "epoch": 1.636299285056983, "grad_norm": 10.659936172265637, "learning_rate": 4.2096777915041964e-07, "loss": 1.0941, "step": 11558 }, { "epoch": 1.6364408579316203, "grad_norm": 8.7957765154236, "learning_rate": 4.206495040797051e-07, "loss": 1.09, "step": 11559 }, { "epoch": 1.6365824308062575, "grad_norm": 9.141252649391111, "learning_rate": 4.203313383188959e-07, "loss": 1.0111, "step": 11560 }, { "epoch": 1.6367240036808948, "grad_norm": 11.19092460514619, "learning_rate": 4.200132818847169e-07, "loss": 0.8976, "step": 11561 }, { "epoch": 1.636865576555532, "grad_norm": 9.053854139551477, "learning_rate": 4.1969533479388925e-07, "loss": 1.0852, "step": 11562 }, { "epoch": 1.6370071494301692, "grad_norm": 10.16565069740066, "learning_rate": 4.193774970631262e-07, "loss": 0.9579, "step": 11563 }, { "epoch": 1.6371487223048065, "grad_norm": 9.14807305395983, "learning_rate": 4.1905976870913747e-07, "loss": 0.9838, "step": 11564 }, { "epoch": 1.6372902951794437, "grad_norm": 8.403408775772744, "learning_rate": 4.1874214974862436e-07, "loss": 0.9894, "step": 11565 }, { "epoch": 1.637431868054081, "grad_norm": 9.767178001515138, "learning_rate": 4.1842464019828444e-07, "loss": 0.8983, "step": 11566 }, { "epoch": 1.6375734409287181, "grad_norm": 10.677156853151352, "learning_rate": 4.1810724007480987e-07, "loss": 0.9388, "step": 11567 }, { "epoch": 1.6377150138033554, "grad_norm": 9.577426007452246, "learning_rate": 4.1778994939488476e-07, "loss": 0.98, "step": 11568 }, { "epoch": 1.6378565866779926, "grad_norm": 8.688252148898076, "learning_rate": 4.174727681751906e-07, "loss": 1.0108, "step": 11569 }, { "epoch": 1.6379981595526298, "grad_norm": 8.881540386867286, "learning_rate": 4.1715569643239916e-07, "loss": 0.8609, "step": 11570 }, { "epoch": 1.638139732427267, "grad_norm": 8.109498245390537, "learning_rate": 4.1683873418318007e-07, "loss": 0.9006, "step": 11571 }, { "epoch": 1.6382813053019043, "grad_norm": 9.50619706258888, "learning_rate": 4.1652188144419516e-07, "loss": 0.9928, "step": 11572 }, { "epoch": 1.6384228781765415, "grad_norm": 8.73434224404077, "learning_rate": 4.1620513823210115e-07, "loss": 0.8444, "step": 11573 }, { "epoch": 1.6385644510511785, "grad_norm": 9.126183002029022, "learning_rate": 4.1588850456354995e-07, "loss": 0.9435, "step": 11574 }, { "epoch": 1.6387060239258158, "grad_norm": 9.298865661353036, "learning_rate": 4.1557198045518554e-07, "loss": 0.9176, "step": 11575 }, { "epoch": 1.638847596800453, "grad_norm": 9.451054218566581, "learning_rate": 4.152555659236485e-07, "loss": 0.9513, "step": 11576 }, { "epoch": 1.6389891696750902, "grad_norm": 9.132972056722126, "learning_rate": 4.1493926098557127e-07, "loss": 0.8884, "step": 11577 }, { "epoch": 1.6391307425497275, "grad_norm": 8.264688074557114, "learning_rate": 4.146230656575831e-07, "loss": 0.8631, "step": 11578 }, { "epoch": 1.6392723154243647, "grad_norm": 10.203797953496592, "learning_rate": 4.1430697995630486e-07, "loss": 1.0625, "step": 11579 }, { "epoch": 1.639413888299002, "grad_norm": 9.344951845782834, "learning_rate": 4.139910038983541e-07, "loss": 0.958, "step": 11580 }, { "epoch": 1.6395554611736392, "grad_norm": 8.84428873207278, "learning_rate": 4.136751375003406e-07, "loss": 0.9541, "step": 11581 }, { "epoch": 1.6396970340482764, "grad_norm": 8.835416824015807, "learning_rate": 4.133593807788691e-07, "loss": 0.9606, "step": 11582 }, { "epoch": 1.6398386069229136, "grad_norm": 8.09323389477212, "learning_rate": 4.1304373375053995e-07, "loss": 0.805, "step": 11583 }, { "epoch": 1.6399801797975508, "grad_norm": 9.247755860983544, "learning_rate": 4.127281964319446e-07, "loss": 0.8859, "step": 11584 }, { "epoch": 1.6401217526721878, "grad_norm": 9.081806112800901, "learning_rate": 4.1241276883967256e-07, "loss": 0.9844, "step": 11585 }, { "epoch": 1.640263325546825, "grad_norm": 8.68935325121015, "learning_rate": 4.120974509903039e-07, "loss": 0.993, "step": 11586 }, { "epoch": 1.6404048984214623, "grad_norm": 10.103707235581588, "learning_rate": 4.117822429004159e-07, "loss": 0.9368, "step": 11587 }, { "epoch": 1.6405464712960995, "grad_norm": 8.321608522154602, "learning_rate": 4.114671445865781e-07, "loss": 0.9352, "step": 11588 }, { "epoch": 1.6406880441707368, "grad_norm": 9.95873252249451, "learning_rate": 4.11152156065355e-07, "loss": 1.0667, "step": 11589 }, { "epoch": 1.640829617045374, "grad_norm": 8.61970246759613, "learning_rate": 4.1083727735330677e-07, "loss": 0.9291, "step": 11590 }, { "epoch": 1.6409711899200112, "grad_norm": 9.458713563062707, "learning_rate": 4.105225084669839e-07, "loss": 0.9772, "step": 11591 }, { "epoch": 1.6411127627946485, "grad_norm": 7.615234375, "learning_rate": 4.1020784942293557e-07, "loss": 0.8925, "step": 11592 }, { "epoch": 1.6412543356692857, "grad_norm": 8.904904555034678, "learning_rate": 4.0989330023770146e-07, "loss": 1.0476, "step": 11593 }, { "epoch": 1.641395908543923, "grad_norm": 10.816011244593085, "learning_rate": 4.0957886092781897e-07, "loss": 1.0337, "step": 11594 }, { "epoch": 1.6415374814185602, "grad_norm": 11.89716142410331, "learning_rate": 4.092645315098165e-07, "loss": 1.1063, "step": 11595 }, { "epoch": 1.6416790542931974, "grad_norm": 10.279231003672185, "learning_rate": 4.0895031200021836e-07, "loss": 0.9769, "step": 11596 }, { "epoch": 1.6418206271678346, "grad_norm": 10.936800340344991, "learning_rate": 4.0863620241554407e-07, "loss": 0.9895, "step": 11597 }, { "epoch": 1.6419622000424718, "grad_norm": 8.501859573701362, "learning_rate": 4.0832220277230467e-07, "loss": 1.0537, "step": 11598 }, { "epoch": 1.642103772917109, "grad_norm": 10.540891314754775, "learning_rate": 4.0800831308700773e-07, "loss": 0.9357, "step": 11599 }, { "epoch": 1.6422453457917463, "grad_norm": 7.909820835867117, "learning_rate": 4.0769453337615367e-07, "loss": 0.9581, "step": 11600 }, { "epoch": 1.6423869186663835, "grad_norm": 10.32946454709003, "learning_rate": 4.073808636562382e-07, "loss": 1.0356, "step": 11601 }, { "epoch": 1.6425284915410208, "grad_norm": 9.52808164872579, "learning_rate": 4.070673039437506e-07, "loss": 0.9089, "step": 11602 }, { "epoch": 1.642670064415658, "grad_norm": 8.976209774673569, "learning_rate": 4.0675385425517356e-07, "loss": 0.9564, "step": 11603 }, { "epoch": 1.6428116372902952, "grad_norm": 10.59434125032417, "learning_rate": 4.0644051460698634e-07, "loss": 0.9773, "step": 11604 }, { "epoch": 1.6429532101649325, "grad_norm": 11.000929706472007, "learning_rate": 4.0612728501565973e-07, "loss": 0.973, "step": 11605 }, { "epoch": 1.6430947830395697, "grad_norm": 10.296449319610552, "learning_rate": 4.058141654976608e-07, "loss": 0.9689, "step": 11606 }, { "epoch": 1.643236355914207, "grad_norm": 10.412739697613395, "learning_rate": 4.055011560694494e-07, "loss": 0.9375, "step": 11607 }, { "epoch": 1.6433779287888441, "grad_norm": 8.980002332657339, "learning_rate": 4.0518825674748076e-07, "loss": 1.0587, "step": 11608 }, { "epoch": 1.6435195016634814, "grad_norm": 10.120878911507946, "learning_rate": 4.0487546754820304e-07, "loss": 0.9679, "step": 11609 }, { "epoch": 1.6436610745381186, "grad_norm": 11.652411189253385, "learning_rate": 4.0456278848806067e-07, "loss": 1.0362, "step": 11610 }, { "epoch": 1.6438026474127558, "grad_norm": 9.411394607055165, "learning_rate": 4.042502195834891e-07, "loss": 0.9734, "step": 11611 }, { "epoch": 1.643944220287393, "grad_norm": 8.947074829811914, "learning_rate": 4.039377608509218e-07, "loss": 0.905, "step": 11612 }, { "epoch": 1.6440857931620303, "grad_norm": 8.98279537744063, "learning_rate": 4.0362541230678316e-07, "loss": 1.0749, "step": 11613 }, { "epoch": 1.6442273660366675, "grad_norm": 14.14631705189773, "learning_rate": 4.033131739674931e-07, "loss": 0.9214, "step": 11614 }, { "epoch": 1.6443689389113048, "grad_norm": 8.669134913385786, "learning_rate": 4.0300104584946655e-07, "loss": 0.9345, "step": 11615 }, { "epoch": 1.6445105117859418, "grad_norm": 9.326536110361182, "learning_rate": 4.026890279691109e-07, "loss": 1.035, "step": 11616 }, { "epoch": 1.644652084660579, "grad_norm": 10.599539376093263, "learning_rate": 4.0237712034283004e-07, "loss": 1.0227, "step": 11617 }, { "epoch": 1.6447936575352162, "grad_norm": 10.475857181118842, "learning_rate": 4.020653229870192e-07, "loss": 0.8981, "step": 11618 }, { "epoch": 1.6449352304098535, "grad_norm": 9.482332364729436, "learning_rate": 4.0175363591806985e-07, "loss": 1.026, "step": 11619 }, { "epoch": 1.6450768032844907, "grad_norm": 9.703882553333315, "learning_rate": 4.0144205915236797e-07, "loss": 1.0134, "step": 11620 }, { "epoch": 1.645218376159128, "grad_norm": 10.542076454691575, "learning_rate": 4.0113059270629193e-07, "loss": 1.0175, "step": 11621 }, { "epoch": 1.6453599490337651, "grad_norm": 10.116560067417, "learning_rate": 4.008192365962166e-07, "loss": 1.0363, "step": 11622 }, { "epoch": 1.6455015219084024, "grad_norm": 9.93474113507234, "learning_rate": 4.0050799083850787e-07, "loss": 0.9697, "step": 11623 }, { "epoch": 1.6456430947830396, "grad_norm": 9.52131229812456, "learning_rate": 4.0019685544952835e-07, "loss": 0.9616, "step": 11624 }, { "epoch": 1.6457846676576768, "grad_norm": 11.080934843432413, "learning_rate": 3.998858304456352e-07, "loss": 1.0026, "step": 11625 }, { "epoch": 1.6459262405323138, "grad_norm": 9.6003609907307, "learning_rate": 3.995749158431772e-07, "loss": 0.9233, "step": 11626 }, { "epoch": 1.646067813406951, "grad_norm": 8.588203110430937, "learning_rate": 3.9926411165850054e-07, "loss": 1.0555, "step": 11627 }, { "epoch": 1.6462093862815883, "grad_norm": 8.91255687465417, "learning_rate": 3.989534179079427e-07, "loss": 0.9637, "step": 11628 }, { "epoch": 1.6463509591562255, "grad_norm": 10.752083155151515, "learning_rate": 3.986428346078375e-07, "loss": 1.0052, "step": 11629 }, { "epoch": 1.6464925320308628, "grad_norm": 7.467017830810673, "learning_rate": 3.983323617745111e-07, "loss": 0.9327, "step": 11630 }, { "epoch": 1.6466341049055, "grad_norm": 8.814458101940208, "learning_rate": 3.980219994242859e-07, "loss": 0.8851, "step": 11631 }, { "epoch": 1.6467756777801372, "grad_norm": 10.377646235145807, "learning_rate": 3.9771174757347626e-07, "loss": 1.0057, "step": 11632 }, { "epoch": 1.6469172506547745, "grad_norm": 10.576352758437023, "learning_rate": 3.9740160623839314e-07, "loss": 1.0271, "step": 11633 }, { "epoch": 1.6470588235294117, "grad_norm": 9.078375860480781, "learning_rate": 3.9709157543533996e-07, "loss": 1.0319, "step": 11634 }, { "epoch": 1.647200396404049, "grad_norm": 7.27365384051581, "learning_rate": 3.967816551806139e-07, "loss": 1.0377, "step": 11635 }, { "epoch": 1.6473419692786861, "grad_norm": 8.114111074107207, "learning_rate": 3.9647184549050865e-07, "loss": 0.8514, "step": 11636 }, { "epoch": 1.6474835421533234, "grad_norm": 9.357098386902212, "learning_rate": 3.9616214638130953e-07, "loss": 1.0231, "step": 11637 }, { "epoch": 1.6476251150279606, "grad_norm": 9.703203627902036, "learning_rate": 3.9585255786929816e-07, "loss": 0.9448, "step": 11638 }, { "epoch": 1.6477666879025978, "grad_norm": 9.964971609577596, "learning_rate": 3.9554307997074826e-07, "loss": 1.0127, "step": 11639 }, { "epoch": 1.647908260777235, "grad_norm": 9.276171389741323, "learning_rate": 3.952337127019301e-07, "loss": 0.9524, "step": 11640 }, { "epoch": 1.6480498336518723, "grad_norm": 9.26037675457747, "learning_rate": 3.9492445607910574e-07, "loss": 0.9289, "step": 11641 }, { "epoch": 1.6481914065265095, "grad_norm": 10.195049460806985, "learning_rate": 3.946153101185332e-07, "loss": 1.1107, "step": 11642 }, { "epoch": 1.6483329794011468, "grad_norm": 8.714160873626634, "learning_rate": 3.943062748364651e-07, "loss": 0.937, "step": 11643 }, { "epoch": 1.648474552275784, "grad_norm": 9.753829204116013, "learning_rate": 3.939973502491448e-07, "loss": 1.0663, "step": 11644 }, { "epoch": 1.6486161251504212, "grad_norm": 8.270744829320893, "learning_rate": 3.9368853637281404e-07, "loss": 0.9387, "step": 11645 }, { "epoch": 1.6487576980250584, "grad_norm": 9.090649892406406, "learning_rate": 3.93379833223706e-07, "loss": 0.9779, "step": 11646 }, { "epoch": 1.6488992708996957, "grad_norm": 9.759595404196492, "learning_rate": 3.9307124081804924e-07, "loss": 1.0579, "step": 11647 }, { "epoch": 1.649040843774333, "grad_norm": 9.442160964870459, "learning_rate": 3.92762759172067e-07, "loss": 0.963, "step": 11648 }, { "epoch": 1.6491824166489701, "grad_norm": 9.2332083389507, "learning_rate": 3.9245438830197464e-07, "loss": 0.9885, "step": 11649 }, { "epoch": 1.6493239895236074, "grad_norm": 9.48551318206603, "learning_rate": 3.9214612822398443e-07, "loss": 1.0123, "step": 11650 }, { "epoch": 1.6494655623982446, "grad_norm": 7.52971256756177, "learning_rate": 3.9183797895429973e-07, "loss": 0.8872, "step": 11651 }, { "epoch": 1.6496071352728818, "grad_norm": 9.372001066565199, "learning_rate": 3.9152994050912134e-07, "loss": 0.9581, "step": 11652 }, { "epoch": 1.649748708147519, "grad_norm": 10.593098578215582, "learning_rate": 3.9122201290464095e-07, "loss": 1.0242, "step": 11653 }, { "epoch": 1.6498902810221563, "grad_norm": 8.677590845721074, "learning_rate": 3.909141961570478e-07, "loss": 0.9353, "step": 11654 }, { "epoch": 1.6500318538967935, "grad_norm": 8.67052882287807, "learning_rate": 3.9060649028252265e-07, "loss": 0.8648, "step": 11655 }, { "epoch": 1.6501734267714308, "grad_norm": 9.363742834497618, "learning_rate": 3.9029889529724113e-07, "loss": 0.9038, "step": 11656 }, { "epoch": 1.6503149996460678, "grad_norm": 10.481659629974292, "learning_rate": 3.899914112173739e-07, "loss": 1.0738, "step": 11657 }, { "epoch": 1.650456572520705, "grad_norm": 10.052963002245244, "learning_rate": 3.896840380590844e-07, "loss": 0.9978, "step": 11658 }, { "epoch": 1.6505981453953422, "grad_norm": 9.51217553767132, "learning_rate": 3.8937677583853224e-07, "loss": 0.9839, "step": 11659 }, { "epoch": 1.6507397182699795, "grad_norm": 7.943286138477851, "learning_rate": 3.890696245718686e-07, "loss": 0.9163, "step": 11660 }, { "epoch": 1.6508812911446167, "grad_norm": 9.283142924339307, "learning_rate": 3.887625842752413e-07, "loss": 0.8638, "step": 11661 }, { "epoch": 1.651022864019254, "grad_norm": 9.453737551176436, "learning_rate": 3.8845565496479026e-07, "loss": 0.9768, "step": 11662 }, { "epoch": 1.6511644368938911, "grad_norm": 10.693761229606721, "learning_rate": 3.8814883665665076e-07, "loss": 0.9668, "step": 11663 }, { "epoch": 1.6513060097685284, "grad_norm": 9.229054010419816, "learning_rate": 3.878421293669532e-07, "loss": 1.082, "step": 11664 }, { "epoch": 1.6514475826431656, "grad_norm": 9.621221023442585, "learning_rate": 3.8753553311181966e-07, "loss": 1.0131, "step": 11665 }, { "epoch": 1.6515891555178028, "grad_norm": 10.261872649445495, "learning_rate": 3.8722904790736815e-07, "loss": 1.0331, "step": 11666 }, { "epoch": 1.65173072839244, "grad_norm": 10.050399423130065, "learning_rate": 3.869226737697099e-07, "loss": 0.9399, "step": 11667 }, { "epoch": 1.651872301267077, "grad_norm": 8.437630659433934, "learning_rate": 3.8661641071495145e-07, "loss": 0.9281, "step": 11668 }, { "epoch": 1.6520138741417143, "grad_norm": 8.254988116065748, "learning_rate": 3.863102587591919e-07, "loss": 0.9557, "step": 11669 }, { "epoch": 1.6521554470163515, "grad_norm": 8.695376989737268, "learning_rate": 3.860042179185261e-07, "loss": 0.987, "step": 11670 }, { "epoch": 1.6522970198909888, "grad_norm": 8.345280996154962, "learning_rate": 3.8569828820904265e-07, "loss": 0.9441, "step": 11671 }, { "epoch": 1.652438592765626, "grad_norm": 9.77012123056123, "learning_rate": 3.8539246964682336e-07, "loss": 0.9748, "step": 11672 }, { "epoch": 1.6525801656402632, "grad_norm": 8.815078249977288, "learning_rate": 3.850867622479457e-07, "loss": 0.9888, "step": 11673 }, { "epoch": 1.6527217385149005, "grad_norm": 9.22728476414662, "learning_rate": 3.847811660284795e-07, "loss": 0.9244, "step": 11674 }, { "epoch": 1.6528633113895377, "grad_norm": 9.72975286532564, "learning_rate": 3.844756810044914e-07, "loss": 0.9943, "step": 11675 }, { "epoch": 1.653004884264175, "grad_norm": 9.470685603652074, "learning_rate": 3.841703071920383e-07, "loss": 0.9572, "step": 11676 }, { "epoch": 1.6531464571388121, "grad_norm": 8.333353830948099, "learning_rate": 3.8386504460717426e-07, "loss": 0.9158, "step": 11677 }, { "epoch": 1.6532880300134494, "grad_norm": 9.215157122294455, "learning_rate": 3.835598932659476e-07, "loss": 0.9195, "step": 11678 }, { "epoch": 1.6534296028880866, "grad_norm": 10.989882237616547, "learning_rate": 3.8325485318439883e-07, "loss": 1.1058, "step": 11679 }, { "epoch": 1.6535711757627238, "grad_norm": 8.56531383752247, "learning_rate": 3.829499243785645e-07, "loss": 1.0172, "step": 11680 }, { "epoch": 1.653712748637361, "grad_norm": 10.054908684800075, "learning_rate": 3.8264510686447376e-07, "loss": 1.076, "step": 11681 }, { "epoch": 1.6538543215119983, "grad_norm": 8.656717996994715, "learning_rate": 3.823404006581513e-07, "loss": 1.1387, "step": 11682 }, { "epoch": 1.6539958943866355, "grad_norm": 9.928721263103832, "learning_rate": 3.820358057756146e-07, "loss": 0.9767, "step": 11683 }, { "epoch": 1.6541374672612728, "grad_norm": 9.105581445704358, "learning_rate": 3.8173132223287693e-07, "loss": 0.9741, "step": 11684 }, { "epoch": 1.65427904013591, "grad_norm": 9.912507881459666, "learning_rate": 3.814269500459436e-07, "loss": 0.8863, "step": 11685 }, { "epoch": 1.6544206130105472, "grad_norm": 9.847627197125925, "learning_rate": 3.8112268923081645e-07, "loss": 0.8754, "step": 11686 }, { "epoch": 1.6545621858851844, "grad_norm": 9.488793841006817, "learning_rate": 3.808185398034897e-07, "loss": 1.0081, "step": 11687 }, { "epoch": 1.6547037587598217, "grad_norm": 9.010872948767402, "learning_rate": 3.8051450177995136e-07, "loss": 0.89, "step": 11688 }, { "epoch": 1.654845331634459, "grad_norm": 8.739953513294608, "learning_rate": 3.802105751761859e-07, "loss": 0.9237, "step": 11689 }, { "epoch": 1.6549869045090961, "grad_norm": 11.258259093978449, "learning_rate": 3.799067600081696e-07, "loss": 0.9712, "step": 11690 }, { "epoch": 1.6551284773837334, "grad_norm": 8.093873483312374, "learning_rate": 3.7960305629187454e-07, "loss": 0.9498, "step": 11691 }, { "epoch": 1.6552700502583706, "grad_norm": 8.970968257907003, "learning_rate": 3.792994640432651e-07, "loss": 1.0105, "step": 11692 }, { "epoch": 1.6554116231330078, "grad_norm": 8.003020669956438, "learning_rate": 3.789959832783016e-07, "loss": 0.9366, "step": 11693 }, { "epoch": 1.655553196007645, "grad_norm": 9.412500275590343, "learning_rate": 3.786926140129385e-07, "loss": 1.0552, "step": 11694 }, { "epoch": 1.6556947688822823, "grad_norm": 9.5343128723701, "learning_rate": 3.7838935626312246e-07, "loss": 0.8943, "step": 11695 }, { "epoch": 1.6558363417569195, "grad_norm": 10.286814751868423, "learning_rate": 3.780862100447971e-07, "loss": 1.0287, "step": 11696 }, { "epoch": 1.6559779146315567, "grad_norm": 10.419785869250632, "learning_rate": 3.7778317537389613e-07, "loss": 1.1071, "step": 11697 }, { "epoch": 1.656119487506194, "grad_norm": 12.675113137246239, "learning_rate": 3.774802522663515e-07, "loss": 0.9939, "step": 11698 }, { "epoch": 1.656261060380831, "grad_norm": 8.122240742414274, "learning_rate": 3.771774407380879e-07, "loss": 0.981, "step": 11699 }, { "epoch": 1.6564026332554682, "grad_norm": 9.360995399169965, "learning_rate": 3.768747408050227e-07, "loss": 0.9692, "step": 11700 }, { "epoch": 1.6565442061301054, "grad_norm": 8.887222255653716, "learning_rate": 3.765721524830701e-07, "loss": 0.9145, "step": 11701 }, { "epoch": 1.6566857790047427, "grad_norm": 9.816707025087702, "learning_rate": 3.762696757881354e-07, "loss": 0.9241, "step": 11702 }, { "epoch": 1.65682735187938, "grad_norm": 8.703074155484336, "learning_rate": 3.7596731073612085e-07, "loss": 0.9322, "step": 11703 }, { "epoch": 1.6569689247540171, "grad_norm": 9.724535823619721, "learning_rate": 3.756650573429205e-07, "loss": 0.9802, "step": 11704 }, { "epoch": 1.6571104976286544, "grad_norm": 9.802769981816683, "learning_rate": 3.7536291562442483e-07, "loss": 0.9023, "step": 11705 }, { "epoch": 1.6572520705032916, "grad_norm": 8.75918837806794, "learning_rate": 3.750608855965157e-07, "loss": 0.9733, "step": 11706 }, { "epoch": 1.6573936433779288, "grad_norm": 9.235208593139035, "learning_rate": 3.747589672750723e-07, "loss": 0.9676, "step": 11707 }, { "epoch": 1.657535216252566, "grad_norm": 10.460917078956916, "learning_rate": 3.7445716067596506e-07, "loss": 0.9133, "step": 11708 }, { "epoch": 1.657676789127203, "grad_norm": 8.74570250562495, "learning_rate": 3.7415546581505954e-07, "loss": 0.9008, "step": 11709 }, { "epoch": 1.6578183620018403, "grad_norm": 8.102692483877274, "learning_rate": 3.7385388270821666e-07, "loss": 1.0909, "step": 11710 }, { "epoch": 1.6579599348764775, "grad_norm": 8.236062038104606, "learning_rate": 3.735524113712891e-07, "loss": 1.02, "step": 11711 }, { "epoch": 1.6581015077511148, "grad_norm": 12.679485928195998, "learning_rate": 3.7325105182012656e-07, "loss": 0.9559, "step": 11712 }, { "epoch": 1.658243080625752, "grad_norm": 9.680285714717035, "learning_rate": 3.729498040705698e-07, "loss": 0.9647, "step": 11713 }, { "epoch": 1.6583846535003892, "grad_norm": 8.052803773879347, "learning_rate": 3.726486681384564e-07, "loss": 0.9541, "step": 11714 }, { "epoch": 1.6585262263750264, "grad_norm": 7.454212805098951, "learning_rate": 3.723476440396157e-07, "loss": 0.8907, "step": 11715 }, { "epoch": 1.6586677992496637, "grad_norm": 10.998951948696138, "learning_rate": 3.7204673178987294e-07, "loss": 1.0297, "step": 11716 }, { "epoch": 1.658809372124301, "grad_norm": 8.884164296327517, "learning_rate": 3.717459314050473e-07, "loss": 0.8443, "step": 11717 }, { "epoch": 1.6589509449989381, "grad_norm": 11.04869986087606, "learning_rate": 3.714452429009513e-07, "loss": 1.1421, "step": 11718 }, { "epoch": 1.6590925178735754, "grad_norm": 9.638541072124113, "learning_rate": 3.711446662933915e-07, "loss": 1.0049, "step": 11719 }, { "epoch": 1.6592340907482126, "grad_norm": 9.513624760905204, "learning_rate": 3.708442015981689e-07, "loss": 0.9283, "step": 11720 }, { "epoch": 1.6593756636228498, "grad_norm": 9.16625305167407, "learning_rate": 3.705438488310792e-07, "loss": 0.8862, "step": 11721 }, { "epoch": 1.659517236497487, "grad_norm": 9.148985392699036, "learning_rate": 3.7024360800791195e-07, "loss": 0.907, "step": 11722 }, { "epoch": 1.6596588093721243, "grad_norm": 10.858618313340086, "learning_rate": 3.699434791444495e-07, "loss": 1.1472, "step": 11723 }, { "epoch": 1.6598003822467615, "grad_norm": 11.103263966200434, "learning_rate": 3.6964346225647097e-07, "loss": 1.0079, "step": 11724 }, { "epoch": 1.6599419551213987, "grad_norm": 10.84258446489664, "learning_rate": 3.6934355735974647e-07, "loss": 0.9794, "step": 11725 }, { "epoch": 1.660083527996036, "grad_norm": 8.480609542964164, "learning_rate": 3.690437644700431e-07, "loss": 0.882, "step": 11726 }, { "epoch": 1.6602251008706732, "grad_norm": 10.023330176500645, "learning_rate": 3.687440836031195e-07, "loss": 0.9927, "step": 11727 }, { "epoch": 1.6603666737453104, "grad_norm": 9.30161791438432, "learning_rate": 3.684445147747309e-07, "loss": 0.9795, "step": 11728 }, { "epoch": 1.6605082466199477, "grad_norm": 8.261616706668867, "learning_rate": 3.681450580006246e-07, "loss": 0.8965, "step": 11729 }, { "epoch": 1.660649819494585, "grad_norm": 8.205975137307057, "learning_rate": 3.6784571329654265e-07, "loss": 0.8972, "step": 11730 }, { "epoch": 1.6607913923692221, "grad_norm": 8.507657808941481, "learning_rate": 3.675464806782222e-07, "loss": 1.0148, "step": 11731 }, { "epoch": 1.6609329652438594, "grad_norm": 8.450299536457827, "learning_rate": 3.6724736016139293e-07, "loss": 0.9593, "step": 11732 }, { "epoch": 1.6610745381184966, "grad_norm": 8.536073320297348, "learning_rate": 3.6694835176178e-07, "loss": 1.0222, "step": 11733 }, { "epoch": 1.6612161109931338, "grad_norm": 9.20541565760065, "learning_rate": 3.666494554951014e-07, "loss": 1.0516, "step": 11734 }, { "epoch": 1.661357683867771, "grad_norm": 10.303812332529372, "learning_rate": 3.6635067137707063e-07, "loss": 1.0102, "step": 11735 }, { "epoch": 1.6614992567424083, "grad_norm": 9.334597592785576, "learning_rate": 3.660519994233935e-07, "loss": 0.969, "step": 11736 }, { "epoch": 1.6616408296170455, "grad_norm": 9.44091896627306, "learning_rate": 3.657534396497725e-07, "loss": 1.0129, "step": 11737 }, { "epoch": 1.6617824024916827, "grad_norm": 10.384851764912161, "learning_rate": 3.654549920719011e-07, "loss": 1.0494, "step": 11738 }, { "epoch": 1.66192397536632, "grad_norm": 8.798562140471937, "learning_rate": 3.6515665670546956e-07, "loss": 0.9682, "step": 11739 }, { "epoch": 1.662065548240957, "grad_norm": 8.852484282183651, "learning_rate": 3.6485843356616093e-07, "loss": 0.9699, "step": 11740 }, { "epoch": 1.6622071211155942, "grad_norm": 9.555789363080503, "learning_rate": 3.6456032266965173e-07, "loss": 0.9531, "step": 11741 }, { "epoch": 1.6623486939902314, "grad_norm": 9.691112607653404, "learning_rate": 3.6426232403161484e-07, "loss": 0.9689, "step": 11742 }, { "epoch": 1.6624902668648687, "grad_norm": 9.30383389110329, "learning_rate": 3.639644376677146e-07, "loss": 0.9443, "step": 11743 }, { "epoch": 1.662631839739506, "grad_norm": 9.738143754670142, "learning_rate": 3.636666635936112e-07, "loss": 0.9184, "step": 11744 }, { "epoch": 1.6627734126141431, "grad_norm": 8.585297689991958, "learning_rate": 3.633690018249586e-07, "loss": 0.8788, "step": 11745 }, { "epoch": 1.6629149854887804, "grad_norm": 7.417358823332096, "learning_rate": 3.6307145237740427e-07, "loss": 0.9379, "step": 11746 }, { "epoch": 1.6630565583634176, "grad_norm": 9.663223661932252, "learning_rate": 3.6277401526659067e-07, "loss": 1.0545, "step": 11747 }, { "epoch": 1.6631981312380548, "grad_norm": 8.800592263838608, "learning_rate": 3.624766905081528e-07, "loss": 0.9599, "step": 11748 }, { "epoch": 1.663339704112692, "grad_norm": 8.103774295778114, "learning_rate": 3.621794781177229e-07, "loss": 0.8686, "step": 11749 }, { "epoch": 1.6634812769873293, "grad_norm": 9.067293569919608, "learning_rate": 3.618823781109226e-07, "loss": 0.8172, "step": 11750 }, { "epoch": 1.6636228498619663, "grad_norm": 8.048862011955478, "learning_rate": 3.6158539050337146e-07, "loss": 1.0049, "step": 11751 }, { "epoch": 1.6637644227366035, "grad_norm": 9.410940629279946, "learning_rate": 3.6128851531068236e-07, "loss": 0.966, "step": 11752 }, { "epoch": 1.6639059956112408, "grad_norm": 10.14111138868684, "learning_rate": 3.609917525484608e-07, "loss": 1.0306, "step": 11753 }, { "epoch": 1.664047568485878, "grad_norm": 8.313226883162608, "learning_rate": 3.6069510223230854e-07, "loss": 0.9196, "step": 11754 }, { "epoch": 1.6641891413605152, "grad_norm": 9.203099715452321, "learning_rate": 3.603985643778188e-07, "loss": 1.0526, "step": 11755 }, { "epoch": 1.6643307142351524, "grad_norm": 9.2385806142035, "learning_rate": 3.601021390005821e-07, "loss": 0.9767, "step": 11756 }, { "epoch": 1.6644722871097897, "grad_norm": 8.479596951101215, "learning_rate": 3.5980582611617966e-07, "loss": 0.982, "step": 11757 }, { "epoch": 1.664613859984427, "grad_norm": 8.416275569276936, "learning_rate": 3.595096257401895e-07, "loss": 0.9726, "step": 11758 }, { "epoch": 1.6647554328590641, "grad_norm": 8.563547334869059, "learning_rate": 3.59213537888182e-07, "loss": 1.017, "step": 11759 }, { "epoch": 1.6648970057337014, "grad_norm": 9.206710971704494, "learning_rate": 3.58917562575723e-07, "loss": 0.9542, "step": 11760 }, { "epoch": 1.6650385786083386, "grad_norm": 10.454632067415814, "learning_rate": 3.586216998183714e-07, "loss": 0.9619, "step": 11761 }, { "epoch": 1.6651801514829758, "grad_norm": 9.41416988074132, "learning_rate": 3.583259496316796e-07, "loss": 0.9729, "step": 11762 }, { "epoch": 1.665321724357613, "grad_norm": 8.667397932844063, "learning_rate": 3.580303120311965e-07, "loss": 0.9786, "step": 11763 }, { "epoch": 1.6654632972322503, "grad_norm": 8.193052316579397, "learning_rate": 3.5773478703246213e-07, "loss": 0.9009, "step": 11764 }, { "epoch": 1.6656048701068875, "grad_norm": 10.221107141056924, "learning_rate": 3.5743937465101323e-07, "loss": 0.9233, "step": 11765 }, { "epoch": 1.6657464429815247, "grad_norm": 8.918686492415798, "learning_rate": 3.571440749023783e-07, "loss": 0.9677, "step": 11766 }, { "epoch": 1.665888015856162, "grad_norm": 8.180001822100673, "learning_rate": 3.568488878020815e-07, "loss": 0.9568, "step": 11767 }, { "epoch": 1.6660295887307992, "grad_norm": 10.693692025362465, "learning_rate": 3.5655381336564127e-07, "loss": 0.9725, "step": 11768 }, { "epoch": 1.6661711616054364, "grad_norm": 8.441714442256938, "learning_rate": 3.562588516085683e-07, "loss": 0.9594, "step": 11769 }, { "epoch": 1.6663127344800737, "grad_norm": 8.492914724928205, "learning_rate": 3.559640025463704e-07, "loss": 0.9117, "step": 11770 }, { "epoch": 1.666454307354711, "grad_norm": 7.981800119534024, "learning_rate": 3.556692661945446e-07, "loss": 0.9668, "step": 11771 }, { "epoch": 1.6665958802293481, "grad_norm": 8.435364403487789, "learning_rate": 3.553746425685875e-07, "loss": 0.9428, "step": 11772 }, { "epoch": 1.6667374531039854, "grad_norm": 10.16356031979067, "learning_rate": 3.550801316839858e-07, "loss": 1.0634, "step": 11773 }, { "epoch": 1.6668790259786226, "grad_norm": 9.008777576635632, "learning_rate": 3.5478573355622213e-07, "loss": 0.9641, "step": 11774 }, { "epoch": 1.6670205988532598, "grad_norm": 9.98134092935648, "learning_rate": 3.544914482007736e-07, "loss": 0.9653, "step": 11775 }, { "epoch": 1.667162171727897, "grad_norm": 10.130051847816631, "learning_rate": 3.541972756331091e-07, "loss": 1.028, "step": 11776 }, { "epoch": 1.6673037446025343, "grad_norm": 8.647387212506871, "learning_rate": 3.5390321586869473e-07, "loss": 0.9287, "step": 11777 }, { "epoch": 1.6674453174771715, "grad_norm": 7.847084096707565, "learning_rate": 3.5360926892298723e-07, "loss": 1.0373, "step": 11778 }, { "epoch": 1.6675868903518087, "grad_norm": 10.641364924714193, "learning_rate": 3.5331543481144094e-07, "loss": 1.0339, "step": 11779 }, { "epoch": 1.667728463226446, "grad_norm": 8.126349703850883, "learning_rate": 3.5302171354950065e-07, "loss": 0.8772, "step": 11780 }, { "epoch": 1.6678700361010832, "grad_norm": 11.108516182411687, "learning_rate": 3.527281051526088e-07, "loss": 0.9219, "step": 11781 }, { "epoch": 1.6680116089757202, "grad_norm": 10.322464631182406, "learning_rate": 3.5243460963619944e-07, "loss": 1.0464, "step": 11782 }, { "epoch": 1.6681531818503574, "grad_norm": 11.526090796231337, "learning_rate": 3.521412270157007e-07, "loss": 0.8959, "step": 11783 }, { "epoch": 1.6682947547249947, "grad_norm": 11.048757864755835, "learning_rate": 3.518479573065367e-07, "loss": 1.0008, "step": 11784 }, { "epoch": 1.668436327599632, "grad_norm": 10.011374299088278, "learning_rate": 3.5155480052412344e-07, "loss": 0.9487, "step": 11785 }, { "epoch": 1.6685779004742691, "grad_norm": 9.508640576100705, "learning_rate": 3.5126175668387275e-07, "loss": 0.8857, "step": 11786 }, { "epoch": 1.6687194733489064, "grad_norm": 8.06180492628255, "learning_rate": 3.5096882580118866e-07, "loss": 0.9324, "step": 11787 }, { "epoch": 1.6688610462235436, "grad_norm": 9.003364781712342, "learning_rate": 3.50676007891472e-07, "loss": 0.8743, "step": 11788 }, { "epoch": 1.6690026190981808, "grad_norm": 8.424945448520536, "learning_rate": 3.50383302970114e-07, "loss": 1.0064, "step": 11789 }, { "epoch": 1.669144191972818, "grad_norm": 9.92671510881432, "learning_rate": 3.5009071105250314e-07, "loss": 1.0441, "step": 11790 }, { "epoch": 1.6692857648474553, "grad_norm": 9.289647269230342, "learning_rate": 3.497982321540211e-07, "loss": 0.9393, "step": 11791 }, { "epoch": 1.6694273377220923, "grad_norm": 8.3265766472248, "learning_rate": 3.495058662900427e-07, "loss": 1.0154, "step": 11792 }, { "epoch": 1.6695689105967295, "grad_norm": 9.007117953492394, "learning_rate": 3.492136134759377e-07, "loss": 0.9564, "step": 11793 }, { "epoch": 1.6697104834713667, "grad_norm": 9.10543146375356, "learning_rate": 3.4892147372706854e-07, "loss": 0.908, "step": 11794 }, { "epoch": 1.669852056346004, "grad_norm": 9.78911510797416, "learning_rate": 3.4862944705879364e-07, "loss": 0.9495, "step": 11795 }, { "epoch": 1.6699936292206412, "grad_norm": 10.205367258164864, "learning_rate": 3.48337533486465e-07, "loss": 0.9097, "step": 11796 }, { "epoch": 1.6701352020952784, "grad_norm": 10.296903896725386, "learning_rate": 3.480457330254275e-07, "loss": 1.0547, "step": 11797 }, { "epoch": 1.6702767749699157, "grad_norm": 9.442829572457223, "learning_rate": 3.477540456910217e-07, "loss": 0.9873, "step": 11798 }, { "epoch": 1.670418347844553, "grad_norm": 8.792203431820912, "learning_rate": 3.474624714985805e-07, "loss": 0.9529, "step": 11799 }, { "epoch": 1.6705599207191901, "grad_norm": 9.504623091286781, "learning_rate": 3.4717101046343265e-07, "loss": 1.013, "step": 11800 }, { "epoch": 1.6707014935938274, "grad_norm": 8.156218663882571, "learning_rate": 3.4687966260089913e-07, "loss": 0.9721, "step": 11801 }, { "epoch": 1.6708430664684646, "grad_norm": 8.502459899395017, "learning_rate": 3.465884279262968e-07, "loss": 0.8711, "step": 11802 }, { "epoch": 1.6709846393431018, "grad_norm": 8.789533407523557, "learning_rate": 3.4629730645493493e-07, "loss": 1.0013, "step": 11803 }, { "epoch": 1.671126212217739, "grad_norm": 10.126917527962991, "learning_rate": 3.4600629820211755e-07, "loss": 1.0359, "step": 11804 }, { "epoch": 1.6712677850923763, "grad_norm": 9.076509998476364, "learning_rate": 3.4571540318314335e-07, "loss": 0.909, "step": 11805 }, { "epoch": 1.6714093579670135, "grad_norm": 9.500723961801008, "learning_rate": 3.4542462141330365e-07, "loss": 0.9654, "step": 11806 }, { "epoch": 1.6715509308416507, "grad_norm": 7.771400928648504, "learning_rate": 3.4513395290788566e-07, "loss": 1.0615, "step": 11807 }, { "epoch": 1.671692503716288, "grad_norm": 9.947456793948916, "learning_rate": 3.448433976821683e-07, "loss": 0.9564, "step": 11808 }, { "epoch": 1.6718340765909252, "grad_norm": 9.783364356889118, "learning_rate": 3.445529557514274e-07, "loss": 1.0065, "step": 11809 }, { "epoch": 1.6719756494655624, "grad_norm": 10.580934227582079, "learning_rate": 3.4426262713092963e-07, "loss": 0.9055, "step": 11810 }, { "epoch": 1.6721172223401997, "grad_norm": 9.559190763435879, "learning_rate": 3.4397241183593887e-07, "loss": 0.9582, "step": 11811 }, { "epoch": 1.672258795214837, "grad_norm": 9.978690903909277, "learning_rate": 3.436823098817102e-07, "loss": 0.8356, "step": 11812 }, { "epoch": 1.6724003680894741, "grad_norm": 9.799627912036021, "learning_rate": 3.4339232128349527e-07, "loss": 1.0757, "step": 11813 }, { "epoch": 1.6725419409641114, "grad_norm": 10.908411724449058, "learning_rate": 3.43102446056538e-07, "loss": 0.9505, "step": 11814 }, { "epoch": 1.6726835138387486, "grad_norm": 9.363446657122646, "learning_rate": 3.428126842160762e-07, "loss": 1.0524, "step": 11815 }, { "epoch": 1.6728250867133858, "grad_norm": 8.457961301342067, "learning_rate": 3.4252303577734376e-07, "loss": 0.8464, "step": 11816 }, { "epoch": 1.672966659588023, "grad_norm": 9.354485611834129, "learning_rate": 3.4223350075556605e-07, "loss": 0.9621, "step": 11817 }, { "epoch": 1.6731082324626603, "grad_norm": 9.05830486144569, "learning_rate": 3.419440791659645e-07, "loss": 0.9729, "step": 11818 }, { "epoch": 1.6732498053372975, "grad_norm": 10.678985594961834, "learning_rate": 3.4165477102375386e-07, "loss": 0.9458, "step": 11819 }, { "epoch": 1.6733913782119347, "grad_norm": 9.107981239859082, "learning_rate": 3.413655763441423e-07, "loss": 0.9884, "step": 11820 }, { "epoch": 1.673532951086572, "grad_norm": 10.18986399551389, "learning_rate": 3.4107649514233343e-07, "loss": 1.0302, "step": 11821 }, { "epoch": 1.6736745239612092, "grad_norm": 9.874161310434049, "learning_rate": 3.4078752743352263e-07, "loss": 1.065, "step": 11822 }, { "epoch": 1.6738160968358462, "grad_norm": 10.585462180864827, "learning_rate": 3.404986732329027e-07, "loss": 1.1469, "step": 11823 }, { "epoch": 1.6739576697104834, "grad_norm": 8.814066862589772, "learning_rate": 3.402099325556563e-07, "loss": 0.9319, "step": 11824 }, { "epoch": 1.6740992425851207, "grad_norm": 9.82492115095885, "learning_rate": 3.3992130541696336e-07, "loss": 0.9344, "step": 11825 }, { "epoch": 1.674240815459758, "grad_norm": 8.477562854998435, "learning_rate": 3.396327918319972e-07, "loss": 0.903, "step": 11826 }, { "epoch": 1.6743823883343951, "grad_norm": 9.440343567366702, "learning_rate": 3.3934439181592393e-07, "loss": 0.9049, "step": 11827 }, { "epoch": 1.6745239612090324, "grad_norm": 7.291975643332813, "learning_rate": 3.390561053839053e-07, "loss": 1.0322, "step": 11828 }, { "epoch": 1.6746655340836696, "grad_norm": 10.502238806963785, "learning_rate": 3.3876793255109565e-07, "loss": 0.9916, "step": 11829 }, { "epoch": 1.6748071069583068, "grad_norm": 8.01818640163639, "learning_rate": 3.3847987333264473e-07, "loss": 0.9518, "step": 11830 }, { "epoch": 1.674948679832944, "grad_norm": 11.019135217537846, "learning_rate": 3.381919277436946e-07, "loss": 0.9812, "step": 11831 }, { "epoch": 1.6750902527075813, "grad_norm": 12.173396161165531, "learning_rate": 3.3790409579938343e-07, "loss": 1.0364, "step": 11832 }, { "epoch": 1.6752318255822183, "grad_norm": 8.865432525255791, "learning_rate": 3.376163775148414e-07, "loss": 0.9214, "step": 11833 }, { "epoch": 1.6753733984568555, "grad_norm": 10.021925731182012, "learning_rate": 3.3732877290519437e-07, "loss": 1.0601, "step": 11834 }, { "epoch": 1.6755149713314927, "grad_norm": 9.153035611920558, "learning_rate": 3.370412819855615e-07, "loss": 0.971, "step": 11835 }, { "epoch": 1.67565654420613, "grad_norm": 8.932999338008011, "learning_rate": 3.3675390477105496e-07, "loss": 0.9648, "step": 11836 }, { "epoch": 1.6757981170807672, "grad_norm": 7.962786428908058, "learning_rate": 3.364666412767831e-07, "loss": 0.9761, "step": 11837 }, { "epoch": 1.6759396899554044, "grad_norm": 7.90513802047774, "learning_rate": 3.3617949151784623e-07, "loss": 0.8883, "step": 11838 }, { "epoch": 1.6760812628300417, "grad_norm": 9.45609417148436, "learning_rate": 3.358924555093407e-07, "loss": 0.8499, "step": 11839 }, { "epoch": 1.676222835704679, "grad_norm": 9.371575709261172, "learning_rate": 3.3560553326635467e-07, "loss": 0.9934, "step": 11840 }, { "epoch": 1.6763644085793161, "grad_norm": 11.193532670476266, "learning_rate": 3.353187248039716e-07, "loss": 1.1353, "step": 11841 }, { "epoch": 1.6765059814539534, "grad_norm": 9.245526443437853, "learning_rate": 3.3503203013727006e-07, "loss": 1.0195, "step": 11842 }, { "epoch": 1.6766475543285906, "grad_norm": 7.770699207556319, "learning_rate": 3.3474544928131956e-07, "loss": 0.9482, "step": 11843 }, { "epoch": 1.6767891272032278, "grad_norm": 8.884202511213593, "learning_rate": 3.3445898225118704e-07, "loss": 0.9994, "step": 11844 }, { "epoch": 1.676930700077865, "grad_norm": 9.815442075485294, "learning_rate": 3.3417262906193096e-07, "loss": 0.8812, "step": 11845 }, { "epoch": 1.6770722729525023, "grad_norm": 9.916383199620654, "learning_rate": 3.3388638972860515e-07, "loss": 0.9923, "step": 11846 }, { "epoch": 1.6772138458271395, "grad_norm": 8.512667473085285, "learning_rate": 3.3360026426625615e-07, "loss": 0.9772, "step": 11847 }, { "epoch": 1.6773554187017767, "grad_norm": 8.345683241770722, "learning_rate": 3.333142526899255e-07, "loss": 0.9373, "step": 11848 }, { "epoch": 1.677496991576414, "grad_norm": 10.53768624608355, "learning_rate": 3.330283550146499e-07, "loss": 0.9259, "step": 11849 }, { "epoch": 1.6776385644510512, "grad_norm": 8.952089153237893, "learning_rate": 3.3274257125545747e-07, "loss": 0.9924, "step": 11850 }, { "epoch": 1.6777801373256884, "grad_norm": 9.44404587247469, "learning_rate": 3.3245690142737236e-07, "loss": 0.903, "step": 11851 }, { "epoch": 1.6779217102003257, "grad_norm": 7.210224087390132, "learning_rate": 3.3217134554541145e-07, "loss": 0.8641, "step": 11852 }, { "epoch": 1.6780632830749629, "grad_norm": 9.015100738250283, "learning_rate": 3.3188590362458696e-07, "loss": 0.9772, "step": 11853 }, { "epoch": 1.6782048559496001, "grad_norm": 10.707020917885384, "learning_rate": 3.316005756799032e-07, "loss": 1.094, "step": 11854 }, { "epoch": 1.6783464288242373, "grad_norm": 7.877070079174525, "learning_rate": 3.313153617263612e-07, "loss": 0.8554, "step": 11855 }, { "epoch": 1.6784880016988746, "grad_norm": 10.108267816350649, "learning_rate": 3.310302617789532e-07, "loss": 0.99, "step": 11856 }, { "epoch": 1.6786295745735118, "grad_norm": 10.738785327724054, "learning_rate": 3.307452758526669e-07, "loss": 0.9674, "step": 11857 }, { "epoch": 1.678771147448149, "grad_norm": 7.415109478070931, "learning_rate": 3.3046040396248453e-07, "loss": 0.954, "step": 11858 }, { "epoch": 1.6789127203227863, "grad_norm": 10.065126064551427, "learning_rate": 3.3017564612338013e-07, "loss": 0.956, "step": 11859 }, { "epoch": 1.6790542931974235, "grad_norm": 10.2767472538081, "learning_rate": 3.298910023503249e-07, "loss": 1.0592, "step": 11860 }, { "epoch": 1.6791958660720607, "grad_norm": 7.79606523303107, "learning_rate": 3.296064726582812e-07, "loss": 0.8422, "step": 11861 }, { "epoch": 1.679337438946698, "grad_norm": 9.64881862860601, "learning_rate": 3.2932205706220714e-07, "loss": 1.002, "step": 11862 }, { "epoch": 1.6794790118213352, "grad_norm": 11.500534708611786, "learning_rate": 3.290377555770538e-07, "loss": 0.7866, "step": 11863 }, { "epoch": 1.6796205846959722, "grad_norm": 7.591650904259787, "learning_rate": 3.287535682177667e-07, "loss": 0.9144, "step": 11864 }, { "epoch": 1.6797621575706094, "grad_norm": 9.593241028844215, "learning_rate": 3.2846949499928616e-07, "loss": 0.9439, "step": 11865 }, { "epoch": 1.6799037304452467, "grad_norm": 9.866170291614905, "learning_rate": 3.281855359365452e-07, "loss": 0.9068, "step": 11866 }, { "epoch": 1.6800453033198839, "grad_norm": 8.397437713019547, "learning_rate": 3.27901691044471e-07, "loss": 0.9758, "step": 11867 }, { "epoch": 1.6801868761945211, "grad_norm": 9.565142497044649, "learning_rate": 3.27617960337985e-07, "loss": 0.9678, "step": 11868 }, { "epoch": 1.6803284490691583, "grad_norm": 11.643247659671811, "learning_rate": 3.273343438320034e-07, "loss": 1.0641, "step": 11869 }, { "epoch": 1.6804700219437956, "grad_norm": 8.784963453864915, "learning_rate": 3.2705084154143504e-07, "loss": 0.9687, "step": 11870 }, { "epoch": 1.6806115948184328, "grad_norm": 7.894906934947024, "learning_rate": 3.267674534811835e-07, "loss": 0.9253, "step": 11871 }, { "epoch": 1.68075316769307, "grad_norm": 9.751807020923858, "learning_rate": 3.264841796661469e-07, "loss": 1.0447, "step": 11872 }, { "epoch": 1.6808947405677073, "grad_norm": 10.262692664100072, "learning_rate": 3.2620102011121616e-07, "loss": 0.9982, "step": 11873 }, { "epoch": 1.6810363134423445, "grad_norm": 8.513578900544752, "learning_rate": 3.259179748312774e-07, "loss": 0.8752, "step": 11874 }, { "epoch": 1.6811778863169815, "grad_norm": 10.098826546060778, "learning_rate": 3.25635043841209e-07, "loss": 1.0218, "step": 11875 }, { "epoch": 1.6813194591916187, "grad_norm": 9.50536395777282, "learning_rate": 3.253522271558857e-07, "loss": 0.9274, "step": 11876 }, { "epoch": 1.681461032066256, "grad_norm": 9.854272649623868, "learning_rate": 3.2506952479017417e-07, "loss": 0.8875, "step": 11877 }, { "epoch": 1.6816026049408932, "grad_norm": 9.047955647758702, "learning_rate": 3.247869367589354e-07, "loss": 0.9565, "step": 11878 }, { "epoch": 1.6817441778155304, "grad_norm": 9.260458317860728, "learning_rate": 3.245044630770264e-07, "loss": 0.8994, "step": 11879 }, { "epoch": 1.6818857506901677, "grad_norm": 9.304597304873509, "learning_rate": 3.242221037592949e-07, "loss": 1.0018, "step": 11880 }, { "epoch": 1.6820273235648049, "grad_norm": 10.57726235865908, "learning_rate": 3.2393985882058555e-07, "loss": 0.9592, "step": 11881 }, { "epoch": 1.6821688964394421, "grad_norm": 8.249280493601207, "learning_rate": 3.2365772827573473e-07, "loss": 0.9758, "step": 11882 }, { "epoch": 1.6823104693140793, "grad_norm": 10.774387110297228, "learning_rate": 3.23375712139575e-07, "loss": 1.0518, "step": 11883 }, { "epoch": 1.6824520421887166, "grad_norm": 8.322223225440574, "learning_rate": 3.230938104269307e-07, "loss": 0.8852, "step": 11884 }, { "epoch": 1.6825936150633538, "grad_norm": 8.819556907544174, "learning_rate": 3.228120231526219e-07, "loss": 1.0592, "step": 11885 }, { "epoch": 1.682735187937991, "grad_norm": 10.29017506857364, "learning_rate": 3.225303503314614e-07, "loss": 1.012, "step": 11886 }, { "epoch": 1.6828767608126283, "grad_norm": 8.374196028195922, "learning_rate": 3.2224879197825717e-07, "loss": 0.9008, "step": 11887 }, { "epoch": 1.6830183336872655, "grad_norm": 8.906665885986172, "learning_rate": 3.2196734810781007e-07, "loss": 1.0331, "step": 11888 }, { "epoch": 1.6831599065619027, "grad_norm": 9.188012348862596, "learning_rate": 3.2168601873491493e-07, "loss": 0.9511, "step": 11889 }, { "epoch": 1.68330147943654, "grad_norm": 9.162084185440218, "learning_rate": 3.214048038743622e-07, "loss": 0.9543, "step": 11890 }, { "epoch": 1.6834430523111772, "grad_norm": 10.329737088752738, "learning_rate": 3.2112370354093397e-07, "loss": 0.9283, "step": 11891 }, { "epoch": 1.6835846251858144, "grad_norm": 9.523087004548021, "learning_rate": 3.208427177494081e-07, "loss": 0.962, "step": 11892 }, { "epoch": 1.6837261980604517, "grad_norm": 8.774238366441432, "learning_rate": 3.205618465145563e-07, "loss": 0.9373, "step": 11893 }, { "epoch": 1.6838677709350889, "grad_norm": 10.241426557473764, "learning_rate": 3.202810898511424e-07, "loss": 1.0365, "step": 11894 }, { "epoch": 1.6840093438097261, "grad_norm": 8.280929847142819, "learning_rate": 3.2000044777392684e-07, "loss": 0.9601, "step": 11895 }, { "epoch": 1.6841509166843633, "grad_norm": 9.903792885828672, "learning_rate": 3.1971992029766197e-07, "loss": 0.829, "step": 11896 }, { "epoch": 1.6842924895590006, "grad_norm": 9.615652722655641, "learning_rate": 3.194395074370957e-07, "loss": 1.0548, "step": 11897 }, { "epoch": 1.6844340624336378, "grad_norm": 10.079324056577406, "learning_rate": 3.191592092069684e-07, "loss": 1.0214, "step": 11898 }, { "epoch": 1.684575635308275, "grad_norm": 9.242922162791476, "learning_rate": 3.1887902562201506e-07, "loss": 0.9221, "step": 11899 }, { "epoch": 1.6847172081829123, "grad_norm": 10.130059002688446, "learning_rate": 3.185989566969655e-07, "loss": 0.8802, "step": 11900 }, { "epoch": 1.6848587810575495, "grad_norm": 9.94511956374618, "learning_rate": 3.1831900244654157e-07, "loss": 1.0212, "step": 11901 }, { "epoch": 1.6850003539321867, "grad_norm": 9.058780722520606, "learning_rate": 3.1803916288546176e-07, "loss": 0.8941, "step": 11902 }, { "epoch": 1.685141926806824, "grad_norm": 9.817534303345381, "learning_rate": 3.1775943802843546e-07, "loss": 1.0519, "step": 11903 }, { "epoch": 1.6852834996814612, "grad_norm": 9.346211086966147, "learning_rate": 3.174798278901692e-07, "loss": 0.828, "step": 11904 }, { "epoch": 1.6854250725560984, "grad_norm": 8.951270106036935, "learning_rate": 3.172003324853601e-07, "loss": 1.0545, "step": 11905 }, { "epoch": 1.6855666454307354, "grad_norm": 8.770281583295542, "learning_rate": 3.169209518287028e-07, "loss": 0.96, "step": 11906 }, { "epoch": 1.6857082183053727, "grad_norm": 7.704215879749045, "learning_rate": 3.166416859348825e-07, "loss": 0.981, "step": 11907 }, { "epoch": 1.6858497911800099, "grad_norm": 8.960388326404216, "learning_rate": 3.163625348185814e-07, "loss": 1.1275, "step": 11908 }, { "epoch": 1.6859913640546471, "grad_norm": 8.927863200845307, "learning_rate": 3.1608349849447385e-07, "loss": 0.9737, "step": 11909 }, { "epoch": 1.6861329369292843, "grad_norm": 9.827097537335263, "learning_rate": 3.1580457697722777e-07, "loss": 0.8913, "step": 11910 }, { "epoch": 1.6862745098039216, "grad_norm": 10.656798407351143, "learning_rate": 3.1552577028150677e-07, "loss": 1.065, "step": 11911 }, { "epoch": 1.6864160826785588, "grad_norm": 8.865094311724029, "learning_rate": 3.152470784219669e-07, "loss": 0.9225, "step": 11912 }, { "epoch": 1.686557655553196, "grad_norm": 8.526312995994463, "learning_rate": 3.1496850141325973e-07, "loss": 0.9968, "step": 11913 }, { "epoch": 1.6866992284278333, "grad_norm": 9.624221175749552, "learning_rate": 3.146900392700286e-07, "loss": 0.9439, "step": 11914 }, { "epoch": 1.6868408013024705, "grad_norm": 9.383336950632305, "learning_rate": 3.1441169200691265e-07, "loss": 1.0133, "step": 11915 }, { "epoch": 1.6869823741771075, "grad_norm": 8.485156843436357, "learning_rate": 3.141334596385448e-07, "loss": 0.9938, "step": 11916 }, { "epoch": 1.6871239470517447, "grad_norm": 10.424560566886996, "learning_rate": 3.138553421795507e-07, "loss": 1.026, "step": 11917 }, { "epoch": 1.687265519926382, "grad_norm": 9.278864184620907, "learning_rate": 3.1357733964455185e-07, "loss": 0.9525, "step": 11918 }, { "epoch": 1.6874070928010192, "grad_norm": 10.809381465242717, "learning_rate": 3.1329945204816166e-07, "loss": 1.0511, "step": 11919 }, { "epoch": 1.6875486656756564, "grad_norm": 8.735957621600866, "learning_rate": 3.1302167940498893e-07, "loss": 0.9766, "step": 11920 }, { "epoch": 1.6876902385502937, "grad_norm": 9.825362601153232, "learning_rate": 3.127440217296354e-07, "loss": 0.9797, "step": 11921 }, { "epoch": 1.6878318114249309, "grad_norm": 8.123700786300049, "learning_rate": 3.1246647903669794e-07, "loss": 0.9128, "step": 11922 }, { "epoch": 1.6879733842995681, "grad_norm": 8.544370741154944, "learning_rate": 3.121890513407669e-07, "loss": 0.8855, "step": 11923 }, { "epoch": 1.6881149571742053, "grad_norm": 9.659503080172364, "learning_rate": 3.119117386564255e-07, "loss": 0.989, "step": 11924 }, { "epoch": 1.6882565300488426, "grad_norm": 9.77817864993072, "learning_rate": 3.1163454099825326e-07, "loss": 0.9804, "step": 11925 }, { "epoch": 1.6883981029234798, "grad_norm": 8.29319506285641, "learning_rate": 3.113574583808207e-07, "loss": 1.0307, "step": 11926 }, { "epoch": 1.688539675798117, "grad_norm": 10.833348983362093, "learning_rate": 3.110804908186954e-07, "loss": 1.0891, "step": 11927 }, { "epoch": 1.6886812486727543, "grad_norm": 11.058976986361047, "learning_rate": 3.1080363832643593e-07, "loss": 0.9622, "step": 11928 }, { "epoch": 1.6888228215473915, "grad_norm": 9.028349836434284, "learning_rate": 3.105269009185974e-07, "loss": 0.8826, "step": 11929 }, { "epoch": 1.6889643944220287, "grad_norm": 10.346505619185253, "learning_rate": 3.102502786097272e-07, "loss": 1.0466, "step": 11930 }, { "epoch": 1.689105967296666, "grad_norm": 8.838653497807055, "learning_rate": 3.0997377141436665e-07, "loss": 0.9739, "step": 11931 }, { "epoch": 1.6892475401713032, "grad_norm": 9.445258380996735, "learning_rate": 3.096973793470523e-07, "loss": 0.9447, "step": 11932 }, { "epoch": 1.6893891130459404, "grad_norm": 9.04379847367905, "learning_rate": 3.0942110242231316e-07, "loss": 0.9191, "step": 11933 }, { "epoch": 1.6895306859205776, "grad_norm": 9.9251728494424, "learning_rate": 3.091449406546737e-07, "loss": 1.0237, "step": 11934 }, { "epoch": 1.6896722587952149, "grad_norm": 8.63080037000367, "learning_rate": 3.088688940586507e-07, "loss": 0.9055, "step": 11935 }, { "epoch": 1.689813831669852, "grad_norm": 8.147046973575668, "learning_rate": 3.0859296264875686e-07, "loss": 0.8484, "step": 11936 }, { "epoch": 1.6899554045444893, "grad_norm": 9.396829240774435, "learning_rate": 3.083171464394963e-07, "loss": 0.8984, "step": 11937 }, { "epoch": 1.6900969774191266, "grad_norm": 9.469124260987366, "learning_rate": 3.0804144544536897e-07, "loss": 0.9799, "step": 11938 }, { "epoch": 1.6902385502937638, "grad_norm": 8.977116633244496, "learning_rate": 3.0776585968086914e-07, "loss": 0.8645, "step": 11939 }, { "epoch": 1.690380123168401, "grad_norm": 9.467218155535035, "learning_rate": 3.0749038916048356e-07, "loss": 0.9405, "step": 11940 }, { "epoch": 1.6905216960430383, "grad_norm": 10.405462195099858, "learning_rate": 3.0721503389869344e-07, "loss": 1.0007, "step": 11941 }, { "epoch": 1.6906632689176755, "grad_norm": 10.236970016305904, "learning_rate": 3.0693979390997333e-07, "loss": 0.9135, "step": 11942 }, { "epoch": 1.6908048417923127, "grad_norm": 10.044982447136542, "learning_rate": 3.066646692087938e-07, "loss": 0.9734, "step": 11943 }, { "epoch": 1.69094641466695, "grad_norm": 9.71040167216401, "learning_rate": 3.063896598096164e-07, "loss": 1.0161, "step": 11944 }, { "epoch": 1.6910879875415872, "grad_norm": 8.556292657627411, "learning_rate": 3.0611476572689896e-07, "loss": 0.9038, "step": 11945 }, { "epoch": 1.6912295604162244, "grad_norm": 8.327576696156637, "learning_rate": 3.0583998697509305e-07, "loss": 0.8661, "step": 11946 }, { "epoch": 1.6913711332908614, "grad_norm": 8.480032861753886, "learning_rate": 3.055653235686426e-07, "loss": 0.9283, "step": 11947 }, { "epoch": 1.6915127061654986, "grad_norm": 8.909738814339402, "learning_rate": 3.0529077552198724e-07, "loss": 0.9836, "step": 11948 }, { "epoch": 1.6916542790401359, "grad_norm": 18.4314203098587, "learning_rate": 3.0501634284955867e-07, "loss": 0.9622, "step": 11949 }, { "epoch": 1.691795851914773, "grad_norm": 9.988615040130533, "learning_rate": 3.0474202556578513e-07, "loss": 0.9851, "step": 11950 }, { "epoch": 1.6919374247894103, "grad_norm": 9.341756652707158, "learning_rate": 3.044678236850862e-07, "loss": 0.9136, "step": 11951 }, { "epoch": 1.6920789976640476, "grad_norm": 9.85661013567541, "learning_rate": 3.0419373722187645e-07, "loss": 0.948, "step": 11952 }, { "epoch": 1.6922205705386848, "grad_norm": 9.357950398283712, "learning_rate": 3.039197661905652e-07, "loss": 0.9108, "step": 11953 }, { "epoch": 1.692362143413322, "grad_norm": 11.508190389671256, "learning_rate": 3.0364591060555363e-07, "loss": 0.8936, "step": 11954 }, { "epoch": 1.6925037162879593, "grad_norm": 8.14259616175443, "learning_rate": 3.033721704812395e-07, "loss": 0.8438, "step": 11955 }, { "epoch": 1.6926452891625965, "grad_norm": 8.693468417578877, "learning_rate": 3.030985458320118e-07, "loss": 0.9738, "step": 11956 }, { "epoch": 1.6927868620372337, "grad_norm": 10.39374769512433, "learning_rate": 3.028250366722563e-07, "loss": 1.0176, "step": 11957 }, { "epoch": 1.6929284349118707, "grad_norm": 10.352590603426199, "learning_rate": 3.025516430163497e-07, "loss": 1.0223, "step": 11958 }, { "epoch": 1.693070007786508, "grad_norm": 8.718775814113892, "learning_rate": 3.022783648786651e-07, "loss": 0.9748, "step": 11959 }, { "epoch": 1.6932115806611452, "grad_norm": 10.48356832252021, "learning_rate": 3.020052022735678e-07, "loss": 0.9554, "step": 11960 }, { "epoch": 1.6933531535357824, "grad_norm": 8.428494543135914, "learning_rate": 3.017321552154187e-07, "loss": 0.8462, "step": 11961 }, { "epoch": 1.6934947264104196, "grad_norm": 9.624102265738177, "learning_rate": 3.0145922371857097e-07, "loss": 1.1041, "step": 11962 }, { "epoch": 1.6936362992850569, "grad_norm": 9.509603765759028, "learning_rate": 3.0118640779737225e-07, "loss": 0.8537, "step": 11963 }, { "epoch": 1.693777872159694, "grad_norm": 8.902846371972798, "learning_rate": 3.009137074661647e-07, "loss": 1.0184, "step": 11964 }, { "epoch": 1.6939194450343313, "grad_norm": 7.659677664645526, "learning_rate": 3.006411227392836e-07, "loss": 0.9373, "step": 11965 }, { "epoch": 1.6940610179089686, "grad_norm": 8.979883812915233, "learning_rate": 3.003686536310593e-07, "loss": 1.0425, "step": 11966 }, { "epoch": 1.6942025907836058, "grad_norm": 10.390280260376048, "learning_rate": 3.000963001558141e-07, "loss": 1.0522, "step": 11967 }, { "epoch": 1.694344163658243, "grad_norm": 9.783505895631857, "learning_rate": 2.9982406232786614e-07, "loss": 1.0407, "step": 11968 }, { "epoch": 1.6944857365328803, "grad_norm": 10.16216361790011, "learning_rate": 2.995519401615274e-07, "loss": 0.9079, "step": 11969 }, { "epoch": 1.6946273094075175, "grad_norm": 10.80149960879637, "learning_rate": 2.9927993367110165e-07, "loss": 0.9625, "step": 11970 }, { "epoch": 1.6947688822821547, "grad_norm": 10.768702362008314, "learning_rate": 2.9900804287088944e-07, "loss": 1.0385, "step": 11971 }, { "epoch": 1.694910455156792, "grad_norm": 9.615118329395704, "learning_rate": 2.9873626777518343e-07, "loss": 1.004, "step": 11972 }, { "epoch": 1.6950520280314292, "grad_norm": 9.269698329983896, "learning_rate": 2.984646083982698e-07, "loss": 0.9411, "step": 11973 }, { "epoch": 1.6951936009060664, "grad_norm": 9.38706052033342, "learning_rate": 2.9819306475443096e-07, "loss": 0.9899, "step": 11974 }, { "epoch": 1.6953351737807036, "grad_norm": 9.115377709058938, "learning_rate": 2.9792163685794015e-07, "loss": 0.8801, "step": 11975 }, { "epoch": 1.6954767466553409, "grad_norm": 8.913787329369187, "learning_rate": 2.976503247230675e-07, "loss": 1.0147, "step": 11976 }, { "epoch": 1.695618319529978, "grad_norm": 9.408388217960782, "learning_rate": 2.9737912836407477e-07, "loss": 0.9077, "step": 11977 }, { "epoch": 1.6957598924046153, "grad_norm": 7.586003877799191, "learning_rate": 2.971080477952193e-07, "loss": 0.961, "step": 11978 }, { "epoch": 1.6959014652792526, "grad_norm": 8.617339782199318, "learning_rate": 2.968370830307507e-07, "loss": 1.0292, "step": 11979 }, { "epoch": 1.6960430381538898, "grad_norm": 10.755669806357874, "learning_rate": 2.965662340849146e-07, "loss": 1.0223, "step": 11980 }, { "epoch": 1.696184611028527, "grad_norm": 10.099490585322005, "learning_rate": 2.9629550097194787e-07, "loss": 0.9441, "step": 11981 }, { "epoch": 1.6963261839031643, "grad_norm": 9.816980590291307, "learning_rate": 2.960248837060842e-07, "loss": 0.9685, "step": 11982 }, { "epoch": 1.6964677567778015, "grad_norm": 8.860635227139078, "learning_rate": 2.957543823015491e-07, "loss": 1.0273, "step": 11983 }, { "epoch": 1.6966093296524387, "grad_norm": 10.997541673099606, "learning_rate": 2.9548399677256174e-07, "loss": 1.0662, "step": 11984 }, { "epoch": 1.696750902527076, "grad_norm": 9.260829051003698, "learning_rate": 2.9521372713333773e-07, "loss": 1.0339, "step": 11985 }, { "epoch": 1.6968924754017132, "grad_norm": 9.643650365427268, "learning_rate": 2.9494357339808347e-07, "loss": 0.9284, "step": 11986 }, { "epoch": 1.6970340482763504, "grad_norm": 10.350994972650124, "learning_rate": 2.946735355810018e-07, "loss": 0.8918, "step": 11987 }, { "epoch": 1.6971756211509876, "grad_norm": 11.027384915682108, "learning_rate": 2.9440361369628773e-07, "loss": 0.9987, "step": 11988 }, { "epoch": 1.6973171940256246, "grad_norm": 9.493851529169294, "learning_rate": 2.94133807758131e-07, "loss": 0.9964, "step": 11989 }, { "epoch": 1.6974587669002619, "grad_norm": 10.821883347166864, "learning_rate": 2.9386411778071584e-07, "loss": 0.9757, "step": 11990 }, { "epoch": 1.697600339774899, "grad_norm": 9.408181027130473, "learning_rate": 2.935945437782184e-07, "loss": 0.9536, "step": 11991 }, { "epoch": 1.6977419126495363, "grad_norm": 9.303503414372996, "learning_rate": 2.933250857648112e-07, "loss": 0.8363, "step": 11992 }, { "epoch": 1.6978834855241736, "grad_norm": 9.8638006515469, "learning_rate": 2.9305574375465884e-07, "loss": 1.0356, "step": 11993 }, { "epoch": 1.6980250583988108, "grad_norm": 9.129501277676757, "learning_rate": 2.9278651776192073e-07, "loss": 0.9285, "step": 11994 }, { "epoch": 1.698166631273448, "grad_norm": 11.198716321678033, "learning_rate": 2.925174078007487e-07, "loss": 1.0013, "step": 11995 }, { "epoch": 1.6983082041480853, "grad_norm": 9.177042504579532, "learning_rate": 2.922484138852907e-07, "loss": 1.0145, "step": 11996 }, { "epoch": 1.6984497770227225, "grad_norm": 9.908358472872948, "learning_rate": 2.9197953602968814e-07, "loss": 1.0366, "step": 11997 }, { "epoch": 1.6985913498973597, "grad_norm": 8.482830883602755, "learning_rate": 2.917107742480743e-07, "loss": 1.0254, "step": 11998 }, { "epoch": 1.6987329227719967, "grad_norm": 11.541415398865933, "learning_rate": 2.9144212855457906e-07, "loss": 0.9426, "step": 11999 }, { "epoch": 1.698874495646634, "grad_norm": 11.320241734380264, "learning_rate": 2.911735989633238e-07, "loss": 0.9643, "step": 12000 }, { "epoch": 1.6990160685212712, "grad_norm": 8.955992873467988, "learning_rate": 2.9090518548842594e-07, "loss": 0.9502, "step": 12001 }, { "epoch": 1.6991576413959084, "grad_norm": 8.02701299501068, "learning_rate": 2.906368881439947e-07, "loss": 0.9016, "step": 12002 }, { "epoch": 1.6992992142705456, "grad_norm": 9.313852096815133, "learning_rate": 2.903687069441358e-07, "loss": 1.0138, "step": 12003 }, { "epoch": 1.6994407871451829, "grad_norm": 9.14182559222672, "learning_rate": 2.901006419029459e-07, "loss": 0.9687, "step": 12004 }, { "epoch": 1.69958236001982, "grad_norm": 7.825072403350386, "learning_rate": 2.8983269303451715e-07, "loss": 0.9112, "step": 12005 }, { "epoch": 1.6997239328944573, "grad_norm": 8.889984805048003, "learning_rate": 2.8956486035293635e-07, "loss": 1.0099, "step": 12006 }, { "epoch": 1.6998655057690946, "grad_norm": 8.619095259978621, "learning_rate": 2.892971438722822e-07, "loss": 0.871, "step": 12007 }, { "epoch": 1.7000070786437318, "grad_norm": 9.662933505698302, "learning_rate": 2.8902954360662925e-07, "loss": 1.144, "step": 12008 }, { "epoch": 1.700148651518369, "grad_norm": 9.346347001396204, "learning_rate": 2.887620595700441e-07, "loss": 0.9412, "step": 12009 }, { "epoch": 1.7002902243930063, "grad_norm": 9.813455911713893, "learning_rate": 2.8849469177658933e-07, "loss": 0.9883, "step": 12010 }, { "epoch": 1.7004317972676435, "grad_norm": 10.6520483062027, "learning_rate": 2.8822744024031904e-07, "loss": 1.0134, "step": 12011 }, { "epoch": 1.7005733701422807, "grad_norm": 9.20529796808079, "learning_rate": 2.8796030497528325e-07, "loss": 0.914, "step": 12012 }, { "epoch": 1.700714943016918, "grad_norm": 9.299247580003406, "learning_rate": 2.8769328599552503e-07, "loss": 0.9374, "step": 12013 }, { "epoch": 1.7008565158915552, "grad_norm": 11.406695464332149, "learning_rate": 2.874263833150814e-07, "loss": 0.9508, "step": 12014 }, { "epoch": 1.7009980887661924, "grad_norm": 10.321290864275673, "learning_rate": 2.871595969479832e-07, "loss": 1.0107, "step": 12015 }, { "epoch": 1.7011396616408296, "grad_norm": 8.638283051743052, "learning_rate": 2.86892926908254e-07, "loss": 0.9346, "step": 12016 }, { "epoch": 1.7012812345154669, "grad_norm": 8.385240812807318, "learning_rate": 2.866263732099145e-07, "loss": 0.9799, "step": 12017 }, { "epoch": 1.701422807390104, "grad_norm": 9.186276308946754, "learning_rate": 2.8635993586697555e-07, "loss": 0.9613, "step": 12018 }, { "epoch": 1.7015643802647413, "grad_norm": 9.420476857161137, "learning_rate": 2.86093614893444e-07, "loss": 0.8663, "step": 12019 }, { "epoch": 1.7017059531393786, "grad_norm": 9.074739596411336, "learning_rate": 2.8582741030332095e-07, "loss": 1.0323, "step": 12020 }, { "epoch": 1.7018475260140158, "grad_norm": 9.222580556789127, "learning_rate": 2.8556132211059963e-07, "loss": 0.934, "step": 12021 }, { "epoch": 1.701989098888653, "grad_norm": 8.5696060877111, "learning_rate": 2.852953503292688e-07, "loss": 0.9121, "step": 12022 }, { "epoch": 1.7021306717632902, "grad_norm": 10.448011393448837, "learning_rate": 2.8502949497330954e-07, "loss": 0.9729, "step": 12023 }, { "epoch": 1.7022722446379275, "grad_norm": 9.35119944340155, "learning_rate": 2.8476375605669905e-07, "loss": 0.9201, "step": 12024 }, { "epoch": 1.7024138175125647, "grad_norm": 9.750005086261694, "learning_rate": 2.8449813359340576e-07, "loss": 1.002, "step": 12025 }, { "epoch": 1.702555390387202, "grad_norm": 11.052907114251415, "learning_rate": 2.8423262759739307e-07, "loss": 0.9178, "step": 12026 }, { "epoch": 1.7026969632618392, "grad_norm": 9.673185145765475, "learning_rate": 2.839672380826197e-07, "loss": 0.9864, "step": 12027 }, { "epoch": 1.7028385361364764, "grad_norm": 8.968428443168534, "learning_rate": 2.8370196506303573e-07, "loss": 0.9014, "step": 12028 }, { "epoch": 1.7029801090111136, "grad_norm": 8.592392914864751, "learning_rate": 2.8343680855258764e-07, "loss": 0.9967, "step": 12029 }, { "epoch": 1.7031216818857506, "grad_norm": 10.681531165157296, "learning_rate": 2.83171768565213e-07, "loss": 1.0126, "step": 12030 }, { "epoch": 1.7032632547603879, "grad_norm": 9.748983085615304, "learning_rate": 2.8290684511484615e-07, "loss": 0.9565, "step": 12031 }, { "epoch": 1.703404827635025, "grad_norm": 9.99420532181766, "learning_rate": 2.826420382154127e-07, "loss": 0.9809, "step": 12032 }, { "epoch": 1.7035464005096623, "grad_norm": 7.686636775483338, "learning_rate": 2.823773478808348e-07, "loss": 0.8995, "step": 12033 }, { "epoch": 1.7036879733842996, "grad_norm": 9.952128841069044, "learning_rate": 2.8211277412502543e-07, "loss": 1.0475, "step": 12034 }, { "epoch": 1.7038295462589368, "grad_norm": 7.732995359116743, "learning_rate": 2.818483169618941e-07, "loss": 0.8353, "step": 12035 }, { "epoch": 1.703971119133574, "grad_norm": 10.25273393596172, "learning_rate": 2.8158397640534326e-07, "loss": 1.0028, "step": 12036 }, { "epoch": 1.7041126920082112, "grad_norm": 13.185046216285551, "learning_rate": 2.813197524692679e-07, "loss": 0.9964, "step": 12037 }, { "epoch": 1.7042542648828485, "grad_norm": 10.003877269579265, "learning_rate": 2.810556451675592e-07, "loss": 1.0644, "step": 12038 }, { "epoch": 1.7043958377574857, "grad_norm": 11.282504447870204, "learning_rate": 2.807916545141004e-07, "loss": 0.9009, "step": 12039 }, { "epoch": 1.704537410632123, "grad_norm": 7.74158858091177, "learning_rate": 2.805277805227702e-07, "loss": 1.0147, "step": 12040 }, { "epoch": 1.70467898350676, "grad_norm": 9.274094003193957, "learning_rate": 2.8026402320743914e-07, "loss": 0.9509, "step": 12041 }, { "epoch": 1.7048205563813972, "grad_norm": 9.379501686117596, "learning_rate": 2.8000038258197334e-07, "loss": 1.1136, "step": 12042 }, { "epoch": 1.7049621292560344, "grad_norm": 9.063732826301868, "learning_rate": 2.7973685866023224e-07, "loss": 0.9883, "step": 12043 }, { "epoch": 1.7051037021306716, "grad_norm": 8.09028462931316, "learning_rate": 2.7947345145606877e-07, "loss": 0.884, "step": 12044 }, { "epoch": 1.7052452750053089, "grad_norm": 11.147768531194826, "learning_rate": 2.792101609833309e-07, "loss": 1.0315, "step": 12045 }, { "epoch": 1.705386847879946, "grad_norm": 9.497955503814566, "learning_rate": 2.7894698725585866e-07, "loss": 0.9971, "step": 12046 }, { "epoch": 1.7055284207545833, "grad_norm": 9.66149049052656, "learning_rate": 2.786839302874869e-07, "loss": 0.9509, "step": 12047 }, { "epoch": 1.7056699936292206, "grad_norm": 10.118308788008607, "learning_rate": 2.784209900920451e-07, "loss": 1.0315, "step": 12048 }, { "epoch": 1.7058115665038578, "grad_norm": 9.802703048677282, "learning_rate": 2.781581666833549e-07, "loss": 1.0331, "step": 12049 }, { "epoch": 1.705953139378495, "grad_norm": 10.471071993537608, "learning_rate": 2.778954600752337e-07, "loss": 0.9965, "step": 12050 }, { "epoch": 1.7060947122531323, "grad_norm": 9.596143392924999, "learning_rate": 2.776328702814909e-07, "loss": 0.9363, "step": 12051 }, { "epoch": 1.7062362851277695, "grad_norm": 8.950754008152225, "learning_rate": 2.773703973159314e-07, "loss": 1.0235, "step": 12052 }, { "epoch": 1.7063778580024067, "grad_norm": 10.851100925546067, "learning_rate": 2.771080411923524e-07, "loss": 0.9603, "step": 12053 }, { "epoch": 1.706519430877044, "grad_norm": 8.146767903316494, "learning_rate": 2.7684580192454653e-07, "loss": 0.9182, "step": 12054 }, { "epoch": 1.7066610037516812, "grad_norm": 8.946750362059714, "learning_rate": 2.7658367952629885e-07, "loss": 0.9693, "step": 12055 }, { "epoch": 1.7068025766263184, "grad_norm": 8.520854275280525, "learning_rate": 2.7632167401138996e-07, "loss": 0.9691, "step": 12056 }, { "epoch": 1.7069441495009556, "grad_norm": 9.204658523019233, "learning_rate": 2.760597853935923e-07, "loss": 0.9692, "step": 12057 }, { "epoch": 1.7070857223755929, "grad_norm": 8.509892261150355, "learning_rate": 2.757980136866731e-07, "loss": 0.9017, "step": 12058 }, { "epoch": 1.70722729525023, "grad_norm": 16.76415417593071, "learning_rate": 2.755363589043944e-07, "loss": 1.0981, "step": 12059 }, { "epoch": 1.7073688681248673, "grad_norm": 12.607053181334335, "learning_rate": 2.7527482106051025e-07, "loss": 0.995, "step": 12060 }, { "epoch": 1.7075104409995046, "grad_norm": 9.345797209231229, "learning_rate": 2.7501340016877044e-07, "loss": 1.0848, "step": 12061 }, { "epoch": 1.7076520138741418, "grad_norm": 9.252642692201551, "learning_rate": 2.7475209624291674e-07, "loss": 0.9408, "step": 12062 }, { "epoch": 1.707793586748779, "grad_norm": 9.156463724708056, "learning_rate": 2.744909092966863e-07, "loss": 0.9659, "step": 12063 }, { "epoch": 1.7079351596234162, "grad_norm": 10.196371323131562, "learning_rate": 2.742298393438092e-07, "loss": 1.0373, "step": 12064 }, { "epoch": 1.7080767324980535, "grad_norm": 9.98331547280711, "learning_rate": 2.739688863980097e-07, "loss": 1.011, "step": 12065 }, { "epoch": 1.7082183053726907, "grad_norm": 9.610222110781555, "learning_rate": 2.7370805047300633e-07, "loss": 1.1545, "step": 12066 }, { "epoch": 1.708359878247328, "grad_norm": 8.626259089908585, "learning_rate": 2.734473315825112e-07, "loss": 1.1023, "step": 12067 }, { "epoch": 1.7085014511219652, "grad_norm": 8.251690315711457, "learning_rate": 2.7318672974022936e-07, "loss": 0.9123, "step": 12068 }, { "epoch": 1.7086430239966024, "grad_norm": 8.94822039253389, "learning_rate": 2.729262449598602e-07, "loss": 0.8456, "step": 12069 }, { "epoch": 1.7087845968712396, "grad_norm": 10.8454778432505, "learning_rate": 2.7266587725509805e-07, "loss": 1.015, "step": 12070 }, { "epoch": 1.7089261697458769, "grad_norm": 8.787036658887278, "learning_rate": 2.724056266396302e-07, "loss": 0.9829, "step": 12071 }, { "epoch": 1.7090677426205139, "grad_norm": 9.193449209182395, "learning_rate": 2.7214549312713723e-07, "loss": 0.9137, "step": 12072 }, { "epoch": 1.709209315495151, "grad_norm": 8.079818695602556, "learning_rate": 2.7188547673129477e-07, "loss": 0.928, "step": 12073 }, { "epoch": 1.7093508883697883, "grad_norm": 9.765124596554335, "learning_rate": 2.716255774657714e-07, "loss": 0.9102, "step": 12074 }, { "epoch": 1.7094924612444256, "grad_norm": 7.891667484120934, "learning_rate": 2.7136579534423003e-07, "loss": 1.0628, "step": 12075 }, { "epoch": 1.7096340341190628, "grad_norm": 8.780524498777554, "learning_rate": 2.711061303803267e-07, "loss": 1.0141, "step": 12076 }, { "epoch": 1.7097756069937, "grad_norm": 8.359690323346081, "learning_rate": 2.7084658258771265e-07, "loss": 0.9683, "step": 12077 }, { "epoch": 1.7099171798683372, "grad_norm": 8.850489767714437, "learning_rate": 2.7058715198003155e-07, "loss": 0.9542, "step": 12078 }, { "epoch": 1.7100587527429745, "grad_norm": 9.839613078504051, "learning_rate": 2.7032783857092096e-07, "loss": 1.0401, "step": 12079 }, { "epoch": 1.7102003256176117, "grad_norm": 11.410734617103017, "learning_rate": 2.7006864237401426e-07, "loss": 1.1198, "step": 12080 }, { "epoch": 1.710341898492249, "grad_norm": 8.924129702238915, "learning_rate": 2.6980956340293543e-07, "loss": 0.8312, "step": 12081 }, { "epoch": 1.710483471366886, "grad_norm": 9.749738640827923, "learning_rate": 2.695506016713056e-07, "loss": 0.9657, "step": 12082 }, { "epoch": 1.7106250442415232, "grad_norm": 7.625617080483015, "learning_rate": 2.692917571927373e-07, "loss": 0.9213, "step": 12083 }, { "epoch": 1.7107666171161604, "grad_norm": 9.499928122800663, "learning_rate": 2.69033029980838e-07, "loss": 1.01, "step": 12084 }, { "epoch": 1.7109081899907976, "grad_norm": 8.416302311127206, "learning_rate": 2.6877442004920873e-07, "loss": 0.9777, "step": 12085 }, { "epoch": 1.7110497628654349, "grad_norm": 7.159396217078311, "learning_rate": 2.685159274114443e-07, "loss": 0.8677, "step": 12086 }, { "epoch": 1.711191335740072, "grad_norm": 10.250565676082832, "learning_rate": 2.6825755208113454e-07, "loss": 1.0206, "step": 12087 }, { "epoch": 1.7113329086147093, "grad_norm": 10.79203738799522, "learning_rate": 2.6799929407186095e-07, "loss": 0.9905, "step": 12088 }, { "epoch": 1.7114744814893466, "grad_norm": 9.565107800304464, "learning_rate": 2.677411533972002e-07, "loss": 1.0048, "step": 12089 }, { "epoch": 1.7116160543639838, "grad_norm": 9.963936818661063, "learning_rate": 2.674831300707223e-07, "loss": 0.935, "step": 12090 }, { "epoch": 1.711757627238621, "grad_norm": 12.44068875482125, "learning_rate": 2.6722522410599196e-07, "loss": 1.0228, "step": 12091 }, { "epoch": 1.7118992001132582, "grad_norm": 10.758758525488227, "learning_rate": 2.669674355165661e-07, "loss": 1.0827, "step": 12092 }, { "epoch": 1.7120407729878955, "grad_norm": 9.276773831178797, "learning_rate": 2.667097643159974e-07, "loss": 0.9878, "step": 12093 }, { "epoch": 1.7121823458625327, "grad_norm": 10.09114726142463, "learning_rate": 2.664522105178316e-07, "loss": 1.1429, "step": 12094 }, { "epoch": 1.71232391873717, "grad_norm": 9.14601995904823, "learning_rate": 2.661947741356072e-07, "loss": 0.9289, "step": 12095 }, { "epoch": 1.7124654916118072, "grad_norm": 10.779722774529182, "learning_rate": 2.6593745518285836e-07, "loss": 0.9055, "step": 12096 }, { "epoch": 1.7126070644864444, "grad_norm": 10.003286203207486, "learning_rate": 2.6568025367311125e-07, "loss": 0.9636, "step": 12097 }, { "epoch": 1.7127486373610816, "grad_norm": 8.607789933913741, "learning_rate": 2.654231696198878e-07, "loss": 0.941, "step": 12098 }, { "epoch": 1.7128902102357189, "grad_norm": 8.824245552471469, "learning_rate": 2.651662030367019e-07, "loss": 1.0006, "step": 12099 }, { "epoch": 1.713031783110356, "grad_norm": 9.269828370448836, "learning_rate": 2.64909353937062e-07, "loss": 0.921, "step": 12100 }, { "epoch": 1.7131733559849933, "grad_norm": 9.456549612115735, "learning_rate": 2.646526223344714e-07, "loss": 0.9015, "step": 12101 }, { "epoch": 1.7133149288596305, "grad_norm": 9.327881265136492, "learning_rate": 2.6439600824242515e-07, "loss": 0.9839, "step": 12102 }, { "epoch": 1.7134565017342678, "grad_norm": 10.068245142113854, "learning_rate": 2.6413951167441415e-07, "loss": 0.9787, "step": 12103 }, { "epoch": 1.713598074608905, "grad_norm": 9.488860978326937, "learning_rate": 2.6388313264392174e-07, "loss": 0.9184, "step": 12104 }, { "epoch": 1.7137396474835422, "grad_norm": 9.521299477371404, "learning_rate": 2.6362687116442605e-07, "loss": 1.0116, "step": 12105 }, { "epoch": 1.7138812203581795, "grad_norm": 8.18410928417935, "learning_rate": 2.633707272493977e-07, "loss": 0.8233, "step": 12106 }, { "epoch": 1.7140227932328167, "grad_norm": 9.159468674221392, "learning_rate": 2.631147009123028e-07, "loss": 0.9196, "step": 12107 }, { "epoch": 1.714164366107454, "grad_norm": 10.711256961524207, "learning_rate": 2.628587921666001e-07, "loss": 1.0642, "step": 12108 }, { "epoch": 1.7143059389820912, "grad_norm": 10.00290485152938, "learning_rate": 2.626030010257427e-07, "loss": 0.985, "step": 12109 }, { "epoch": 1.7144475118567284, "grad_norm": 11.243383199530486, "learning_rate": 2.6234732750317765e-07, "loss": 1.0022, "step": 12110 }, { "epoch": 1.7145890847313656, "grad_norm": 10.612345620909485, "learning_rate": 2.620917716123444e-07, "loss": 0.9637, "step": 12111 }, { "epoch": 1.7147306576060029, "grad_norm": 9.284949185217457, "learning_rate": 2.6183633336667845e-07, "loss": 0.9775, "step": 12112 }, { "epoch": 1.7148722304806399, "grad_norm": 8.473823632262096, "learning_rate": 2.615810127796073e-07, "loss": 1.023, "step": 12113 }, { "epoch": 1.715013803355277, "grad_norm": 8.274803560351288, "learning_rate": 2.613258098645538e-07, "loss": 0.9633, "step": 12114 }, { "epoch": 1.7151553762299143, "grad_norm": 8.698669717744112, "learning_rate": 2.610707246349328e-07, "loss": 0.988, "step": 12115 }, { "epoch": 1.7152969491045515, "grad_norm": 9.60770874463716, "learning_rate": 2.608157571041542e-07, "loss": 0.9395, "step": 12116 }, { "epoch": 1.7154385219791888, "grad_norm": 9.753629742401312, "learning_rate": 2.6056090728562216e-07, "loss": 0.9432, "step": 12117 }, { "epoch": 1.715580094853826, "grad_norm": 9.65956942591856, "learning_rate": 2.60306175192733e-07, "loss": 1.0883, "step": 12118 }, { "epoch": 1.7157216677284632, "grad_norm": 8.639549919697288, "learning_rate": 2.600515608388787e-07, "loss": 0.9243, "step": 12119 }, { "epoch": 1.7158632406031005, "grad_norm": 9.415764239345497, "learning_rate": 2.5979706423744396e-07, "loss": 0.9685, "step": 12120 }, { "epoch": 1.7160048134777377, "grad_norm": 8.309491294227804, "learning_rate": 2.595426854018063e-07, "loss": 0.937, "step": 12121 }, { "epoch": 1.716146386352375, "grad_norm": 9.918263368260973, "learning_rate": 2.592884243453397e-07, "loss": 1.05, "step": 12122 }, { "epoch": 1.716287959227012, "grad_norm": 9.628386459093967, "learning_rate": 2.590342810814095e-07, "loss": 0.9603, "step": 12123 }, { "epoch": 1.7164295321016492, "grad_norm": 9.147198991475392, "learning_rate": 2.587802556233765e-07, "loss": 0.9699, "step": 12124 }, { "epoch": 1.7165711049762864, "grad_norm": 11.302920843405264, "learning_rate": 2.5852634798459397e-07, "loss": 0.9498, "step": 12125 }, { "epoch": 1.7167126778509236, "grad_norm": 9.06531223203359, "learning_rate": 2.5827255817841067e-07, "loss": 0.9142, "step": 12126 }, { "epoch": 1.7168542507255609, "grad_norm": 10.099630715445931, "learning_rate": 2.580188862181668e-07, "loss": 0.9608, "step": 12127 }, { "epoch": 1.716995823600198, "grad_norm": 11.305644787668742, "learning_rate": 2.5776533211719883e-07, "loss": 1.119, "step": 12128 }, { "epoch": 1.7171373964748353, "grad_norm": 8.917911858529658, "learning_rate": 2.5751189588883506e-07, "loss": 0.9621, "step": 12129 }, { "epoch": 1.7172789693494726, "grad_norm": 9.498473195308401, "learning_rate": 2.572585775463993e-07, "loss": 1.0393, "step": 12130 }, { "epoch": 1.7174205422241098, "grad_norm": 9.93690268202871, "learning_rate": 2.57005377103208e-07, "loss": 0.9229, "step": 12131 }, { "epoch": 1.717562115098747, "grad_norm": 9.13678805686625, "learning_rate": 2.567522945725709e-07, "loss": 1.0497, "step": 12132 }, { "epoch": 1.7177036879733842, "grad_norm": 8.902534004110063, "learning_rate": 2.564993299677937e-07, "loss": 0.8511, "step": 12133 }, { "epoch": 1.7178452608480215, "grad_norm": 9.069568902551268, "learning_rate": 2.5624648330217327e-07, "loss": 0.8975, "step": 12134 }, { "epoch": 1.7179868337226587, "grad_norm": 8.959566205967848, "learning_rate": 2.559937545890029e-07, "loss": 1.0008, "step": 12135 }, { "epoch": 1.718128406597296, "grad_norm": 9.806759850089335, "learning_rate": 2.557411438415669e-07, "loss": 1.0788, "step": 12136 }, { "epoch": 1.7182699794719332, "grad_norm": 8.901460129194572, "learning_rate": 2.5548865107314606e-07, "loss": 0.9771, "step": 12137 }, { "epoch": 1.7184115523465704, "grad_norm": 9.317131375737594, "learning_rate": 2.552362762970129e-07, "loss": 1.0258, "step": 12138 }, { "epoch": 1.7185531252212076, "grad_norm": 10.75378954429499, "learning_rate": 2.54984019526435e-07, "loss": 1.0206, "step": 12139 }, { "epoch": 1.7186946980958449, "grad_norm": 12.236613530440493, "learning_rate": 2.547318807746738e-07, "loss": 1.1325, "step": 12140 }, { "epoch": 1.718836270970482, "grad_norm": 9.984272223527165, "learning_rate": 2.5447986005498303e-07, "loss": 1.0947, "step": 12141 }, { "epoch": 1.7189778438451193, "grad_norm": 7.89663702934933, "learning_rate": 2.542279573806122e-07, "loss": 0.9065, "step": 12142 }, { "epoch": 1.7191194167197565, "grad_norm": 7.848879895812936, "learning_rate": 2.539761727648024e-07, "loss": 0.8932, "step": 12143 }, { "epoch": 1.7192609895943938, "grad_norm": 8.00727846923577, "learning_rate": 2.537245062207905e-07, "loss": 0.8669, "step": 12144 }, { "epoch": 1.719402562469031, "grad_norm": 11.547144575675949, "learning_rate": 2.5347295776180697e-07, "loss": 1.07, "step": 12145 }, { "epoch": 1.7195441353436682, "grad_norm": 9.184399139902139, "learning_rate": 2.5322152740107436e-07, "loss": 0.9661, "step": 12146 }, { "epoch": 1.7196857082183055, "grad_norm": 9.884117456458437, "learning_rate": 2.5297021515181123e-07, "loss": 0.9294, "step": 12147 }, { "epoch": 1.7198272810929427, "grad_norm": 11.622147404964986, "learning_rate": 2.527190210272282e-07, "loss": 1.0718, "step": 12148 }, { "epoch": 1.71996885396758, "grad_norm": 9.558712676773446, "learning_rate": 2.5246794504053094e-07, "loss": 0.9124, "step": 12149 }, { "epoch": 1.7201104268422172, "grad_norm": 7.840757733920939, "learning_rate": 2.522169872049174e-07, "loss": 0.8814, "step": 12150 }, { "epoch": 1.7202519997168544, "grad_norm": 9.130891752950616, "learning_rate": 2.5196614753358136e-07, "loss": 0.9806, "step": 12151 }, { "epoch": 1.7203935725914916, "grad_norm": 9.275895857410191, "learning_rate": 2.5171542603970897e-07, "loss": 1.0379, "step": 12152 }, { "epoch": 1.7205351454661288, "grad_norm": 8.203238931500481, "learning_rate": 2.514648227364794e-07, "loss": 0.7677, "step": 12153 }, { "epoch": 1.7206767183407659, "grad_norm": 7.687570711136302, "learning_rate": 2.512143376370682e-07, "loss": 0.96, "step": 12154 }, { "epoch": 1.720818291215403, "grad_norm": 9.593227906574537, "learning_rate": 2.509639707546421e-07, "loss": 0.9974, "step": 12155 }, { "epoch": 1.7209598640900403, "grad_norm": 10.208988298437642, "learning_rate": 2.507137221023634e-07, "loss": 0.9181, "step": 12156 }, { "epoch": 1.7211014369646775, "grad_norm": 11.350415838076842, "learning_rate": 2.5046359169338677e-07, "loss": 1.0141, "step": 12157 }, { "epoch": 1.7212430098393148, "grad_norm": 9.74463408094845, "learning_rate": 2.502135795408622e-07, "loss": 0.9727, "step": 12158 }, { "epoch": 1.721384582713952, "grad_norm": 10.01222283102341, "learning_rate": 2.499636856579321e-07, "loss": 0.9704, "step": 12159 }, { "epoch": 1.7215261555885892, "grad_norm": 12.470723160956888, "learning_rate": 2.4971391005773337e-07, "loss": 0.9698, "step": 12160 }, { "epoch": 1.7216677284632265, "grad_norm": 8.063096135301214, "learning_rate": 2.4946425275339634e-07, "loss": 0.9714, "step": 12161 }, { "epoch": 1.7218093013378637, "grad_norm": 7.913806063836517, "learning_rate": 2.492147137580458e-07, "loss": 0.9555, "step": 12162 }, { "epoch": 1.721950874212501, "grad_norm": 9.027021322265542, "learning_rate": 2.4896529308479966e-07, "loss": 0.9772, "step": 12163 }, { "epoch": 1.7220924470871382, "grad_norm": 8.391706514184992, "learning_rate": 2.48715990746769e-07, "loss": 0.9307, "step": 12164 }, { "epoch": 1.7222340199617752, "grad_norm": 10.088188410718558, "learning_rate": 2.484668067570606e-07, "loss": 0.9819, "step": 12165 }, { "epoch": 1.7223755928364124, "grad_norm": 9.69594397935956, "learning_rate": 2.482177411287728e-07, "loss": 0.8441, "step": 12166 }, { "epoch": 1.7225171657110496, "grad_norm": 7.857270886257473, "learning_rate": 2.4796879387499947e-07, "loss": 0.8536, "step": 12167 }, { "epoch": 1.7226587385856869, "grad_norm": 8.82098023933919, "learning_rate": 2.47719965008828e-07, "loss": 0.9403, "step": 12168 }, { "epoch": 1.722800311460324, "grad_norm": 9.747772353565383, "learning_rate": 2.4747125454333805e-07, "loss": 0.9172, "step": 12169 }, { "epoch": 1.7229418843349613, "grad_norm": 8.990054675520481, "learning_rate": 2.4722266249160493e-07, "loss": 0.9912, "step": 12170 }, { "epoch": 1.7230834572095985, "grad_norm": 9.429926985359895, "learning_rate": 2.4697418886669654e-07, "loss": 0.8984, "step": 12171 }, { "epoch": 1.7232250300842358, "grad_norm": 8.851642658783117, "learning_rate": 2.467258336816755e-07, "loss": 0.9805, "step": 12172 }, { "epoch": 1.723366602958873, "grad_norm": 7.406729767144597, "learning_rate": 2.4647759694959724e-07, "loss": 0.9574, "step": 12173 }, { "epoch": 1.7235081758335102, "grad_norm": 10.004499567528587, "learning_rate": 2.462294786835109e-07, "loss": 1.0425, "step": 12174 }, { "epoch": 1.7236497487081475, "grad_norm": 8.835607224016346, "learning_rate": 2.4598147889646097e-07, "loss": 0.824, "step": 12175 }, { "epoch": 1.7237913215827847, "grad_norm": 10.918515890807344, "learning_rate": 2.4573359760148354e-07, "loss": 0.9977, "step": 12176 }, { "epoch": 1.723932894457422, "grad_norm": 10.580583069856997, "learning_rate": 2.4548583481161044e-07, "loss": 1.1313, "step": 12177 }, { "epoch": 1.7240744673320592, "grad_norm": 8.807427427635519, "learning_rate": 2.4523819053986544e-07, "loss": 0.9808, "step": 12178 }, { "epoch": 1.7242160402066964, "grad_norm": 10.115006778128329, "learning_rate": 2.4499066479926807e-07, "loss": 0.9967, "step": 12179 }, { "epoch": 1.7243576130813336, "grad_norm": 8.584625393798815, "learning_rate": 2.447432576028294e-07, "loss": 0.9582, "step": 12180 }, { "epoch": 1.7244991859559708, "grad_norm": 9.398164334532925, "learning_rate": 2.4449596896355677e-07, "loss": 0.9102, "step": 12181 }, { "epoch": 1.724640758830608, "grad_norm": 8.719394454928882, "learning_rate": 2.442487988944489e-07, "loss": 1.0387, "step": 12182 }, { "epoch": 1.7247823317052453, "grad_norm": 10.889150599061571, "learning_rate": 2.440017474084999e-07, "loss": 0.9324, "step": 12183 }, { "epoch": 1.7249239045798825, "grad_norm": 10.850629839217392, "learning_rate": 2.4375481451869713e-07, "loss": 1.026, "step": 12184 }, { "epoch": 1.7250654774545198, "grad_norm": 8.922090063492647, "learning_rate": 2.4350800023802106e-07, "loss": 0.9643, "step": 12185 }, { "epoch": 1.725207050329157, "grad_norm": 8.66935140645445, "learning_rate": 2.4326130457944713e-07, "loss": 0.9385, "step": 12186 }, { "epoch": 1.7253486232037942, "grad_norm": 9.36867177369464, "learning_rate": 2.430147275559433e-07, "loss": 0.9097, "step": 12187 }, { "epoch": 1.7254901960784315, "grad_norm": 8.988334725232061, "learning_rate": 2.4276826918047283e-07, "loss": 0.9835, "step": 12188 }, { "epoch": 1.7256317689530687, "grad_norm": 10.757576689840187, "learning_rate": 2.425219294659908e-07, "loss": 0.9332, "step": 12189 }, { "epoch": 1.725773341827706, "grad_norm": 8.260344060423998, "learning_rate": 2.422757084254479e-07, "loss": 0.9211, "step": 12190 }, { "epoch": 1.7259149147023432, "grad_norm": 10.785455475068613, "learning_rate": 2.4202960607178806e-07, "loss": 1.0413, "step": 12191 }, { "epoch": 1.7260564875769804, "grad_norm": 8.284356452290865, "learning_rate": 2.417836224179476e-07, "loss": 0.8873, "step": 12192 }, { "epoch": 1.7261980604516176, "grad_norm": 10.005483268416889, "learning_rate": 2.4153775747685906e-07, "loss": 0.9945, "step": 12193 }, { "epoch": 1.7263396333262548, "grad_norm": 11.739634385402711, "learning_rate": 2.412920112614464e-07, "loss": 0.9282, "step": 12194 }, { "epoch": 1.726481206200892, "grad_norm": 8.839734571105495, "learning_rate": 2.41046383784628e-07, "loss": 1.0704, "step": 12195 }, { "epoch": 1.726622779075529, "grad_norm": 9.123614480375824, "learning_rate": 2.4080087505931744e-07, "loss": 0.9999, "step": 12196 }, { "epoch": 1.7267643519501663, "grad_norm": 7.878575073604978, "learning_rate": 2.4055548509841984e-07, "loss": 0.9435, "step": 12197 }, { "epoch": 1.7269059248248035, "grad_norm": 8.010380209520283, "learning_rate": 2.403102139148361e-07, "loss": 0.9592, "step": 12198 }, { "epoch": 1.7270474976994408, "grad_norm": 10.278852838416752, "learning_rate": 2.400650615214592e-07, "loss": 0.9958, "step": 12199 }, { "epoch": 1.727189070574078, "grad_norm": 12.007539288124748, "learning_rate": 2.3982002793117744e-07, "loss": 1.0177, "step": 12200 }, { "epoch": 1.7273306434487152, "grad_norm": 9.113332317420141, "learning_rate": 2.3957511315687075e-07, "loss": 0.9752, "step": 12201 }, { "epoch": 1.7274722163233525, "grad_norm": 8.894944253010255, "learning_rate": 2.393303172114159e-07, "loss": 0.9205, "step": 12202 }, { "epoch": 1.7276137891979897, "grad_norm": 8.332982119470675, "learning_rate": 2.3908564010767966e-07, "loss": 0.9414, "step": 12203 }, { "epoch": 1.727755362072627, "grad_norm": 9.050476938399434, "learning_rate": 2.388410818585263e-07, "loss": 1.0249, "step": 12204 }, { "epoch": 1.7278969349472642, "grad_norm": 10.096028262744163, "learning_rate": 2.38596642476811e-07, "loss": 1.0624, "step": 12205 }, { "epoch": 1.7280385078219012, "grad_norm": 9.618764467581713, "learning_rate": 2.383523219753839e-07, "loss": 0.9188, "step": 12206 }, { "epoch": 1.7281800806965384, "grad_norm": 8.448015456243306, "learning_rate": 2.381081203670893e-07, "loss": 0.9047, "step": 12207 }, { "epoch": 1.7283216535711756, "grad_norm": 10.516788321958693, "learning_rate": 2.3786403766476368e-07, "loss": 0.9227, "step": 12208 }, { "epoch": 1.7284632264458129, "grad_norm": 8.149375152037875, "learning_rate": 2.3762007388123927e-07, "loss": 0.8539, "step": 12209 }, { "epoch": 1.72860479932045, "grad_norm": 10.269973275243649, "learning_rate": 2.3737622902934022e-07, "loss": 1.017, "step": 12210 }, { "epoch": 1.7287463721950873, "grad_norm": 10.378705936978344, "learning_rate": 2.371325031218863e-07, "loss": 1.0143, "step": 12211 }, { "epoch": 1.7288879450697245, "grad_norm": 7.824822313527717, "learning_rate": 2.368888961716889e-07, "loss": 0.8919, "step": 12212 }, { "epoch": 1.7290295179443618, "grad_norm": 9.005965904610122, "learning_rate": 2.366454081915548e-07, "loss": 0.9454, "step": 12213 }, { "epoch": 1.729171090818999, "grad_norm": 9.071487074707324, "learning_rate": 2.3640203919428451e-07, "loss": 0.9327, "step": 12214 }, { "epoch": 1.7293126636936362, "grad_norm": 7.791243053759388, "learning_rate": 2.3615878919267116e-07, "loss": 0.9724, "step": 12215 }, { "epoch": 1.7294542365682735, "grad_norm": 8.782151796999804, "learning_rate": 2.359156581995023e-07, "loss": 0.8735, "step": 12216 }, { "epoch": 1.7295958094429107, "grad_norm": 10.802114802394092, "learning_rate": 2.3567264622755853e-07, "loss": 0.9309, "step": 12217 }, { "epoch": 1.729737382317548, "grad_norm": 8.703023310671629, "learning_rate": 2.3542975328961548e-07, "loss": 0.988, "step": 12218 }, { "epoch": 1.7298789551921852, "grad_norm": 9.662769277356356, "learning_rate": 2.351869793984421e-07, "loss": 0.9193, "step": 12219 }, { "epoch": 1.7300205280668224, "grad_norm": 8.823522721830338, "learning_rate": 2.3494432456680038e-07, "loss": 0.8542, "step": 12220 }, { "epoch": 1.7301621009414596, "grad_norm": 7.468082158703825, "learning_rate": 2.3470178880744681e-07, "loss": 0.9079, "step": 12221 }, { "epoch": 1.7303036738160968, "grad_norm": 9.920179711836713, "learning_rate": 2.3445937213313062e-07, "loss": 1.0143, "step": 12222 }, { "epoch": 1.730445246690734, "grad_norm": 9.636862046715304, "learning_rate": 2.3421707455659664e-07, "loss": 0.9595, "step": 12223 }, { "epoch": 1.7305868195653713, "grad_norm": 11.479250557391584, "learning_rate": 2.3397489609058104e-07, "loss": 0.9513, "step": 12224 }, { "epoch": 1.7307283924400085, "grad_norm": 9.219488185487508, "learning_rate": 2.3373283674781588e-07, "loss": 0.9802, "step": 12225 }, { "epoch": 1.7308699653146458, "grad_norm": 8.449667062915621, "learning_rate": 2.3349089654102597e-07, "loss": 1.0481, "step": 12226 }, { "epoch": 1.731011538189283, "grad_norm": 10.54186151095768, "learning_rate": 2.332490754829289e-07, "loss": 0.9228, "step": 12227 }, { "epoch": 1.7311531110639202, "grad_norm": 9.46148382699543, "learning_rate": 2.3300737358623843e-07, "loss": 0.9697, "step": 12228 }, { "epoch": 1.7312946839385575, "grad_norm": 12.119096400251701, "learning_rate": 2.3276579086365937e-07, "loss": 1.0096, "step": 12229 }, { "epoch": 1.7314362568131947, "grad_norm": 8.90186595429642, "learning_rate": 2.3252432732789264e-07, "loss": 0.9813, "step": 12230 }, { "epoch": 1.731577829687832, "grad_norm": 7.933618512624452, "learning_rate": 2.3228298299163092e-07, "loss": 0.8693, "step": 12231 }, { "epoch": 1.7317194025624691, "grad_norm": 9.419376579761911, "learning_rate": 2.3204175786756238e-07, "loss": 0.9455, "step": 12232 }, { "epoch": 1.7318609754371064, "grad_norm": 8.318994265437313, "learning_rate": 2.3180065196836716e-07, "loss": 0.9362, "step": 12233 }, { "epoch": 1.7320025483117436, "grad_norm": 10.527287946280667, "learning_rate": 2.3155966530672092e-07, "loss": 1.1002, "step": 12234 }, { "epoch": 1.7321441211863808, "grad_norm": 10.13729300516939, "learning_rate": 2.3131879789529105e-07, "loss": 1.018, "step": 12235 }, { "epoch": 1.732285694061018, "grad_norm": 10.469155557093098, "learning_rate": 2.3107804974674074e-07, "loss": 0.8784, "step": 12236 }, { "epoch": 1.732427266935655, "grad_norm": 9.24604754160401, "learning_rate": 2.3083742087372574e-07, "loss": 0.9436, "step": 12237 }, { "epoch": 1.7325688398102923, "grad_norm": 8.750091552255414, "learning_rate": 2.3059691128889504e-07, "loss": 0.9452, "step": 12238 }, { "epoch": 1.7327104126849295, "grad_norm": 8.969116203461265, "learning_rate": 2.303565210048933e-07, "loss": 0.8833, "step": 12239 }, { "epoch": 1.7328519855595668, "grad_norm": 11.83836930979259, "learning_rate": 2.301162500343562e-07, "loss": 0.9921, "step": 12240 }, { "epoch": 1.732993558434204, "grad_norm": 10.620607433009363, "learning_rate": 2.2987609838991536e-07, "loss": 1.1429, "step": 12241 }, { "epoch": 1.7331351313088412, "grad_norm": 9.421730053041363, "learning_rate": 2.2963606608419593e-07, "loss": 0.9419, "step": 12242 }, { "epoch": 1.7332767041834785, "grad_norm": 10.49081855074932, "learning_rate": 2.29396153129815e-07, "loss": 1.081, "step": 12243 }, { "epoch": 1.7334182770581157, "grad_norm": 9.985702211523563, "learning_rate": 2.2915635953938587e-07, "loss": 0.9243, "step": 12244 }, { "epoch": 1.733559849932753, "grad_norm": 10.851564960740362, "learning_rate": 2.2891668532551315e-07, "loss": 0.9696, "step": 12245 }, { "epoch": 1.7337014228073901, "grad_norm": 10.077504220222076, "learning_rate": 2.2867713050079732e-07, "loss": 1.0455, "step": 12246 }, { "epoch": 1.7338429956820274, "grad_norm": 9.496560528171996, "learning_rate": 2.2843769507783137e-07, "loss": 0.9531, "step": 12247 }, { "epoch": 1.7339845685566644, "grad_norm": 9.374320450631759, "learning_rate": 2.2819837906920134e-07, "loss": 0.9467, "step": 12248 }, { "epoch": 1.7341261414313016, "grad_norm": 8.932510370333375, "learning_rate": 2.2795918248748939e-07, "loss": 0.9179, "step": 12249 }, { "epoch": 1.7342677143059388, "grad_norm": 6.96556942465352, "learning_rate": 2.2772010534526822e-07, "loss": 0.9226, "step": 12250 }, { "epoch": 1.734409287180576, "grad_norm": 9.41452362070874, "learning_rate": 2.2748114765510754e-07, "loss": 1.0146, "step": 12251 }, { "epoch": 1.7345508600552133, "grad_norm": 9.763071931895375, "learning_rate": 2.272423094295681e-07, "loss": 0.9774, "step": 12252 }, { "epoch": 1.7346924329298505, "grad_norm": 10.545343855871852, "learning_rate": 2.2700359068120624e-07, "loss": 0.9721, "step": 12253 }, { "epoch": 1.7348340058044878, "grad_norm": 10.53082401308327, "learning_rate": 2.2676499142257002e-07, "loss": 1.0, "step": 12254 }, { "epoch": 1.734975578679125, "grad_norm": 9.362664817917802, "learning_rate": 2.265265116662041e-07, "loss": 1.0022, "step": 12255 }, { "epoch": 1.7351171515537622, "grad_norm": 8.553162767231424, "learning_rate": 2.2628815142464344e-07, "loss": 0.9085, "step": 12256 }, { "epoch": 1.7352587244283995, "grad_norm": 9.355653051420552, "learning_rate": 2.2604991071041999e-07, "loss": 1.0007, "step": 12257 }, { "epoch": 1.7354002973030367, "grad_norm": 8.469102239849478, "learning_rate": 2.258117895360573e-07, "loss": 1.0184, "step": 12258 }, { "epoch": 1.735541870177674, "grad_norm": 9.617053826624826, "learning_rate": 2.2557378791407264e-07, "loss": 0.9521, "step": 12259 }, { "epoch": 1.7356834430523111, "grad_norm": 9.745099914884106, "learning_rate": 2.2533590585697817e-07, "loss": 0.9972, "step": 12260 }, { "epoch": 1.7358250159269484, "grad_norm": 9.693550435415567, "learning_rate": 2.2509814337727891e-07, "loss": 0.9244, "step": 12261 }, { "epoch": 1.7359665888015856, "grad_norm": 7.78806481433264, "learning_rate": 2.2486050048747459e-07, "loss": 0.933, "step": 12262 }, { "epoch": 1.7361081616762228, "grad_norm": 8.735058479659218, "learning_rate": 2.246229772000566e-07, "loss": 0.9756, "step": 12263 }, { "epoch": 1.73624973455086, "grad_norm": 10.337069379516304, "learning_rate": 2.2438557352751216e-07, "loss": 0.9255, "step": 12264 }, { "epoch": 1.7363913074254973, "grad_norm": 8.65993336006584, "learning_rate": 2.2414828948232186e-07, "loss": 0.9412, "step": 12265 }, { "epoch": 1.7365328803001345, "grad_norm": 9.56375452925266, "learning_rate": 2.2391112507695877e-07, "loss": 0.9092, "step": 12266 }, { "epoch": 1.7366744531747718, "grad_norm": 9.441838561937368, "learning_rate": 2.23674080323891e-07, "loss": 0.9191, "step": 12267 }, { "epoch": 1.736816026049409, "grad_norm": 8.047489831638934, "learning_rate": 2.2343715523557934e-07, "loss": 0.9328, "step": 12268 }, { "epoch": 1.7369575989240462, "grad_norm": 7.588184970561863, "learning_rate": 2.232003498244792e-07, "loss": 0.986, "step": 12269 }, { "epoch": 1.7370991717986835, "grad_norm": 10.082972580522586, "learning_rate": 2.229636641030386e-07, "loss": 0.8987, "step": 12270 }, { "epoch": 1.7372407446733207, "grad_norm": 9.431284897969107, "learning_rate": 2.2272709808370013e-07, "loss": 0.9064, "step": 12271 }, { "epoch": 1.737382317547958, "grad_norm": 8.444218426183774, "learning_rate": 2.2249065177890077e-07, "loss": 0.8806, "step": 12272 }, { "epoch": 1.7375238904225951, "grad_norm": 9.069862058825, "learning_rate": 2.222543252010692e-07, "loss": 0.9051, "step": 12273 }, { "epoch": 1.7376654632972324, "grad_norm": 10.40269725741801, "learning_rate": 2.2201811836262966e-07, "loss": 1.081, "step": 12274 }, { "epoch": 1.7378070361718696, "grad_norm": 10.606722920202877, "learning_rate": 2.2178203127599883e-07, "loss": 1.0522, "step": 12275 }, { "epoch": 1.7379486090465068, "grad_norm": 8.199219680502567, "learning_rate": 2.2154606395358824e-07, "loss": 0.8656, "step": 12276 }, { "epoch": 1.738090181921144, "grad_norm": 8.982366028786494, "learning_rate": 2.2131021640780182e-07, "loss": 0.9881, "step": 12277 }, { "epoch": 1.7382317547957813, "grad_norm": 9.095689494634945, "learning_rate": 2.2107448865103853e-07, "loss": 0.9375, "step": 12278 }, { "epoch": 1.7383733276704183, "grad_norm": 10.102710065689637, "learning_rate": 2.2083888069569042e-07, "loss": 0.9801, "step": 12279 }, { "epoch": 1.7385149005450555, "grad_norm": 9.454312941886629, "learning_rate": 2.2060339255414232e-07, "loss": 0.9014, "step": 12280 }, { "epoch": 1.7386564734196928, "grad_norm": 10.442186959332433, "learning_rate": 2.2036802423877458e-07, "loss": 0.9993, "step": 12281 }, { "epoch": 1.73879804629433, "grad_norm": 9.066329252285064, "learning_rate": 2.201327757619598e-07, "loss": 0.9165, "step": 12282 }, { "epoch": 1.7389396191689672, "grad_norm": 7.786863454701013, "learning_rate": 2.198976471360656e-07, "loss": 0.8719, "step": 12283 }, { "epoch": 1.7390811920436045, "grad_norm": 10.143712589103263, "learning_rate": 2.1966263837345125e-07, "loss": 1.1036, "step": 12284 }, { "epoch": 1.7392227649182417, "grad_norm": 9.293835260016827, "learning_rate": 2.1942774948647245e-07, "loss": 1.0191, "step": 12285 }, { "epoch": 1.739364337792879, "grad_norm": 10.09293364539332, "learning_rate": 2.1919298048747567e-07, "loss": 0.9622, "step": 12286 }, { "epoch": 1.7395059106675161, "grad_norm": 11.106666460102105, "learning_rate": 2.189583313888033e-07, "loss": 0.9584, "step": 12287 }, { "epoch": 1.7396474835421534, "grad_norm": 9.70579524009629, "learning_rate": 2.1872380220279127e-07, "loss": 0.9768, "step": 12288 }, { "epoch": 1.7397890564167904, "grad_norm": 10.345198515457243, "learning_rate": 2.184893929417678e-07, "loss": 1.0388, "step": 12289 }, { "epoch": 1.7399306292914276, "grad_norm": 10.453211123991249, "learning_rate": 2.182551036180558e-07, "loss": 1.0447, "step": 12290 }, { "epoch": 1.7400722021660648, "grad_norm": 9.929583004957648, "learning_rate": 2.1802093424397126e-07, "loss": 1.005, "step": 12291 }, { "epoch": 1.740213775040702, "grad_norm": 8.990832003748535, "learning_rate": 2.1778688483182486e-07, "loss": 0.99, "step": 12292 }, { "epoch": 1.7403553479153393, "grad_norm": 9.838467006827516, "learning_rate": 2.175529553939204e-07, "loss": 0.8765, "step": 12293 }, { "epoch": 1.7404969207899765, "grad_norm": 9.888808627523204, "learning_rate": 2.1731914594255498e-07, "loss": 0.9669, "step": 12294 }, { "epoch": 1.7406384936646138, "grad_norm": 9.535611114841885, "learning_rate": 2.1708545649002015e-07, "loss": 0.9844, "step": 12295 }, { "epoch": 1.740780066539251, "grad_norm": 10.29067255281964, "learning_rate": 2.1685188704860056e-07, "loss": 1.0723, "step": 12296 }, { "epoch": 1.7409216394138882, "grad_norm": 9.346861662451825, "learning_rate": 2.1661843763057522e-07, "loss": 1.0187, "step": 12297 }, { "epoch": 1.7410632122885255, "grad_norm": 9.420745731324955, "learning_rate": 2.1638510824821547e-07, "loss": 0.925, "step": 12298 }, { "epoch": 1.7412047851631627, "grad_norm": 9.901023858041148, "learning_rate": 2.161518989137884e-07, "loss": 0.916, "step": 12299 }, { "epoch": 1.7413463580378, "grad_norm": 11.000088431262972, "learning_rate": 2.1591880963955314e-07, "loss": 0.9787, "step": 12300 }, { "epoch": 1.7414879309124371, "grad_norm": 9.864137880498644, "learning_rate": 2.1568584043776237e-07, "loss": 0.9596, "step": 12301 }, { "epoch": 1.7416295037870744, "grad_norm": 7.916551475356103, "learning_rate": 2.1545299132066432e-07, "loss": 0.8779, "step": 12302 }, { "epoch": 1.7417710766617116, "grad_norm": 10.393419942472542, "learning_rate": 2.152202623004987e-07, "loss": 0.9432, "step": 12303 }, { "epoch": 1.7419126495363488, "grad_norm": 9.936213955945208, "learning_rate": 2.1498765338950067e-07, "loss": 0.9963, "step": 12304 }, { "epoch": 1.742054222410986, "grad_norm": 8.562193036668116, "learning_rate": 2.1475516459989743e-07, "loss": 0.9829, "step": 12305 }, { "epoch": 1.7421957952856233, "grad_norm": 8.039611975572791, "learning_rate": 2.1452279594391167e-07, "loss": 0.9642, "step": 12306 }, { "epoch": 1.7423373681602605, "grad_norm": 9.296388347294391, "learning_rate": 2.142905474337578e-07, "loss": 0.9541, "step": 12307 }, { "epoch": 1.7424789410348978, "grad_norm": 10.26251981967959, "learning_rate": 2.1405841908164636e-07, "loss": 0.9605, "step": 12308 }, { "epoch": 1.742620513909535, "grad_norm": 8.70093447171925, "learning_rate": 2.1382641089977867e-07, "loss": 0.8826, "step": 12309 }, { "epoch": 1.7427620867841722, "grad_norm": 8.715746290389452, "learning_rate": 2.1359452290035194e-07, "loss": 1.029, "step": 12310 }, { "epoch": 1.7429036596588094, "grad_norm": 9.697965617044382, "learning_rate": 2.1336275509555722e-07, "loss": 0.8008, "step": 12311 }, { "epoch": 1.7430452325334467, "grad_norm": 10.272527362845532, "learning_rate": 2.1313110749757672e-07, "loss": 1.0769, "step": 12312 }, { "epoch": 1.743186805408084, "grad_norm": 9.745127316186865, "learning_rate": 2.1289958011858903e-07, "loss": 0.9183, "step": 12313 }, { "epoch": 1.7433283782827211, "grad_norm": 8.778154255107667, "learning_rate": 2.1266817297076469e-07, "loss": 1.0178, "step": 12314 }, { "epoch": 1.7434699511573584, "grad_norm": 10.455905059943811, "learning_rate": 2.12436886066269e-07, "loss": 0.9821, "step": 12315 }, { "epoch": 1.7436115240319956, "grad_norm": 7.849382422384694, "learning_rate": 2.1220571941726082e-07, "loss": 0.9082, "step": 12316 }, { "epoch": 1.7437530969066328, "grad_norm": 10.6839775839413, "learning_rate": 2.119746730358918e-07, "loss": 0.9908, "step": 12317 }, { "epoch": 1.74389466978127, "grad_norm": 8.884692420079272, "learning_rate": 2.1174374693430865e-07, "loss": 0.9795, "step": 12318 }, { "epoch": 1.7440362426559073, "grad_norm": 9.698092274999969, "learning_rate": 2.1151294112464997e-07, "loss": 0.9567, "step": 12319 }, { "epoch": 1.7441778155305443, "grad_norm": 11.553140347104021, "learning_rate": 2.1128225561905024e-07, "loss": 1.1119, "step": 12320 }, { "epoch": 1.7443193884051815, "grad_norm": 8.926068017138455, "learning_rate": 2.1105169042963585e-07, "loss": 0.883, "step": 12321 }, { "epoch": 1.7444609612798188, "grad_norm": 8.518739575206846, "learning_rate": 2.1082124556852684e-07, "loss": 0.8321, "step": 12322 }, { "epoch": 1.744602534154456, "grad_norm": 8.611203423555587, "learning_rate": 2.1059092104783824e-07, "loss": 0.9984, "step": 12323 }, { "epoch": 1.7447441070290932, "grad_norm": 11.264313472005538, "learning_rate": 2.1036071687967785e-07, "loss": 1.1026, "step": 12324 }, { "epoch": 1.7448856799037304, "grad_norm": 9.310985467630049, "learning_rate": 2.101306330761474e-07, "loss": 0.89, "step": 12325 }, { "epoch": 1.7450272527783677, "grad_norm": 8.392973785981223, "learning_rate": 2.0990066964934193e-07, "loss": 0.9588, "step": 12326 }, { "epoch": 1.745168825653005, "grad_norm": 10.277823668597986, "learning_rate": 2.096708266113512e-07, "loss": 0.9316, "step": 12327 }, { "epoch": 1.7453103985276421, "grad_norm": 7.129743101566559, "learning_rate": 2.0944110397425693e-07, "loss": 0.8784, "step": 12328 }, { "epoch": 1.7454519714022794, "grad_norm": 8.593424620970884, "learning_rate": 2.0921150175013616e-07, "loss": 0.8837, "step": 12329 }, { "epoch": 1.7455935442769166, "grad_norm": 8.51852059786613, "learning_rate": 2.089820199510584e-07, "loss": 0.9867, "step": 12330 }, { "epoch": 1.7457351171515536, "grad_norm": 8.426033423005482, "learning_rate": 2.0875265858908782e-07, "loss": 0.9852, "step": 12331 }, { "epoch": 1.7458766900261908, "grad_norm": 9.863097925276508, "learning_rate": 2.0852341767628182e-07, "loss": 1.0821, "step": 12332 }, { "epoch": 1.746018262900828, "grad_norm": 9.521098349546563, "learning_rate": 2.082942972246907e-07, "loss": 0.9697, "step": 12333 }, { "epoch": 1.7461598357754653, "grad_norm": 7.831877579929474, "learning_rate": 2.0806529724635982e-07, "loss": 0.9711, "step": 12334 }, { "epoch": 1.7463014086501025, "grad_norm": 8.615965160784686, "learning_rate": 2.0783641775332708e-07, "loss": 0.9424, "step": 12335 }, { "epoch": 1.7464429815247398, "grad_norm": 9.47955169454705, "learning_rate": 2.0760765875762506e-07, "loss": 0.9354, "step": 12336 }, { "epoch": 1.746584554399377, "grad_norm": 9.997497626969938, "learning_rate": 2.0737902027127888e-07, "loss": 1.0258, "step": 12337 }, { "epoch": 1.7467261272740142, "grad_norm": 8.521427298614148, "learning_rate": 2.0715050230630807e-07, "loss": 1.0145, "step": 12338 }, { "epoch": 1.7468677001486514, "grad_norm": 9.355826340373707, "learning_rate": 2.069221048747261e-07, "loss": 1.0461, "step": 12339 }, { "epoch": 1.7470092730232887, "grad_norm": 10.560510933393157, "learning_rate": 2.0669382798853887e-07, "loss": 1.0186, "step": 12340 }, { "epoch": 1.747150845897926, "grad_norm": 8.786519597414108, "learning_rate": 2.064656716597474e-07, "loss": 0.9409, "step": 12341 }, { "epoch": 1.7472924187725631, "grad_norm": 9.293010209685784, "learning_rate": 2.0623763590034567e-07, "loss": 0.9905, "step": 12342 }, { "epoch": 1.7474339916472004, "grad_norm": 10.188206185982198, "learning_rate": 2.0600972072232105e-07, "loss": 0.9602, "step": 12343 }, { "epoch": 1.7475755645218376, "grad_norm": 7.577329110106747, "learning_rate": 2.0578192613765453e-07, "loss": 0.9612, "step": 12344 }, { "epoch": 1.7477171373964748, "grad_norm": 8.383972372140573, "learning_rate": 2.0555425215832176e-07, "loss": 0.9345, "step": 12345 }, { "epoch": 1.747858710271112, "grad_norm": 9.270085153857305, "learning_rate": 2.0532669879629124e-07, "loss": 1.0398, "step": 12346 }, { "epoch": 1.7480002831457493, "grad_norm": 8.842416184200411, "learning_rate": 2.050992660635248e-07, "loss": 0.8873, "step": 12347 }, { "epoch": 1.7481418560203865, "grad_norm": 10.696569687912985, "learning_rate": 2.0487195397197928e-07, "loss": 1.0581, "step": 12348 }, { "epoch": 1.7482834288950238, "grad_norm": 11.382266898814988, "learning_rate": 2.0464476253360344e-07, "loss": 1.0265, "step": 12349 }, { "epoch": 1.748425001769661, "grad_norm": 8.72554478734351, "learning_rate": 2.044176917603413e-07, "loss": 1.0172, "step": 12350 }, { "epoch": 1.7485665746442982, "grad_norm": 10.1369486817612, "learning_rate": 2.0419074166412893e-07, "loss": 0.9216, "step": 12351 }, { "epoch": 1.7487081475189354, "grad_norm": 9.943526833629669, "learning_rate": 2.0396391225689817e-07, "loss": 1.0162, "step": 12352 }, { "epoch": 1.7488497203935727, "grad_norm": 10.324966940468979, "learning_rate": 2.037372035505722e-07, "loss": 0.9928, "step": 12353 }, { "epoch": 1.74899129326821, "grad_norm": 8.738427493048707, "learning_rate": 2.0351061555706901e-07, "loss": 0.8993, "step": 12354 }, { "epoch": 1.7491328661428471, "grad_norm": 10.164612317589615, "learning_rate": 2.0328414828830078e-07, "loss": 1.0142, "step": 12355 }, { "epoch": 1.7492744390174844, "grad_norm": 8.13383496514617, "learning_rate": 2.0305780175617213e-07, "loss": 1.0036, "step": 12356 }, { "epoch": 1.7494160118921216, "grad_norm": 11.197431365150049, "learning_rate": 2.0283157597258241e-07, "loss": 1.0548, "step": 12357 }, { "epoch": 1.7495575847667588, "grad_norm": 8.80692369146154, "learning_rate": 2.026054709494235e-07, "loss": 0.939, "step": 12358 }, { "epoch": 1.749699157641396, "grad_norm": 8.067177059115847, "learning_rate": 2.0237948669858233e-07, "loss": 0.8955, "step": 12359 }, { "epoch": 1.7498407305160333, "grad_norm": 10.139649519120741, "learning_rate": 2.0215362323193822e-07, "loss": 1.0363, "step": 12360 }, { "epoch": 1.7499823033906705, "grad_norm": 8.740260780337207, "learning_rate": 2.0192788056136446e-07, "loss": 0.9306, "step": 12361 }, { "epoch": 1.7501238762653075, "grad_norm": 11.084040645506944, "learning_rate": 2.0170225869872912e-07, "loss": 1.064, "step": 12362 }, { "epoch": 1.7502654491399448, "grad_norm": 9.964633582039854, "learning_rate": 2.0147675765589236e-07, "loss": 0.8679, "step": 12363 }, { "epoch": 1.750407022014582, "grad_norm": 12.807701440403873, "learning_rate": 2.0125137744470863e-07, "loss": 0.9899, "step": 12364 }, { "epoch": 1.7505485948892192, "grad_norm": 8.951487446365627, "learning_rate": 2.0102611807702539e-07, "loss": 0.9016, "step": 12365 }, { "epoch": 1.7506901677638564, "grad_norm": 8.535669767875891, "learning_rate": 2.0080097956468537e-07, "loss": 0.9291, "step": 12366 }, { "epoch": 1.7508317406384937, "grad_norm": 8.08852143949365, "learning_rate": 2.0057596191952327e-07, "loss": 0.8867, "step": 12367 }, { "epoch": 1.750973313513131, "grad_norm": 9.331021930950484, "learning_rate": 2.0035106515336798e-07, "loss": 0.8127, "step": 12368 }, { "epoch": 1.7511148863877681, "grad_norm": 7.989335342663324, "learning_rate": 2.001262892780434e-07, "loss": 0.8885, "step": 12369 }, { "epoch": 1.7512564592624054, "grad_norm": 7.518790801612198, "learning_rate": 1.999016343053642e-07, "loss": 1.0252, "step": 12370 }, { "epoch": 1.7513980321370426, "grad_norm": 10.829953610364022, "learning_rate": 1.996771002471415e-07, "loss": 0.9558, "step": 12371 }, { "epoch": 1.7515396050116796, "grad_norm": 9.85114816857699, "learning_rate": 1.9945268711517807e-07, "loss": 0.9599, "step": 12372 }, { "epoch": 1.7516811778863168, "grad_norm": 10.188503847805048, "learning_rate": 1.9922839492127199e-07, "loss": 1.005, "step": 12373 }, { "epoch": 1.751822750760954, "grad_norm": 9.359130448599712, "learning_rate": 1.9900422367721355e-07, "loss": 1.0208, "step": 12374 }, { "epoch": 1.7519643236355913, "grad_norm": 9.584380604008794, "learning_rate": 1.9878017339478695e-07, "loss": 0.9632, "step": 12375 }, { "epoch": 1.7521058965102285, "grad_norm": 10.189281097299512, "learning_rate": 1.9855624408577136e-07, "loss": 1.0212, "step": 12376 }, { "epoch": 1.7522474693848658, "grad_norm": 8.257798641485532, "learning_rate": 1.9833243576193734e-07, "loss": 0.8773, "step": 12377 }, { "epoch": 1.752389042259503, "grad_norm": 9.36580074375797, "learning_rate": 1.9810874843505164e-07, "loss": 0.9013, "step": 12378 }, { "epoch": 1.7525306151341402, "grad_norm": 10.65577029941187, "learning_rate": 1.9788518211687202e-07, "loss": 0.9728, "step": 12379 }, { "epoch": 1.7526721880087774, "grad_norm": 10.304554970954062, "learning_rate": 1.9766173681915247e-07, "loss": 0.969, "step": 12380 }, { "epoch": 1.7528137608834147, "grad_norm": 9.954677107444347, "learning_rate": 1.9743841255363827e-07, "loss": 0.856, "step": 12381 }, { "epoch": 1.752955333758052, "grad_norm": 9.380156052842809, "learning_rate": 1.9721520933207006e-07, "loss": 0.953, "step": 12382 }, { "epoch": 1.7530969066326891, "grad_norm": 8.344602619855591, "learning_rate": 1.9699212716618123e-07, "loss": 0.9722, "step": 12383 }, { "epoch": 1.7532384795073264, "grad_norm": 8.679984155455864, "learning_rate": 1.9676916606769874e-07, "loss": 0.8481, "step": 12384 }, { "epoch": 1.7533800523819636, "grad_norm": 8.834665114095873, "learning_rate": 1.9654632604834494e-07, "loss": 1.037, "step": 12385 }, { "epoch": 1.7535216252566008, "grad_norm": 8.519684829971752, "learning_rate": 1.9632360711983212e-07, "loss": 0.9771, "step": 12386 }, { "epoch": 1.753663198131238, "grad_norm": 9.603078475725246, "learning_rate": 1.9610100929387006e-07, "loss": 0.9963, "step": 12387 }, { "epoch": 1.7538047710058753, "grad_norm": 9.66986644885398, "learning_rate": 1.9587853258215995e-07, "loss": 1.0042, "step": 12388 }, { "epoch": 1.7539463438805125, "grad_norm": 9.184303609980072, "learning_rate": 1.9565617699639717e-07, "loss": 0.9083, "step": 12389 }, { "epoch": 1.7540879167551497, "grad_norm": 9.70780775314748, "learning_rate": 1.9543394254827125e-07, "loss": 0.9259, "step": 12390 }, { "epoch": 1.754229489629787, "grad_norm": 8.31735615391774, "learning_rate": 1.9521182924946426e-07, "loss": 0.8734, "step": 12391 }, { "epoch": 1.7543710625044242, "grad_norm": 11.462866497940617, "learning_rate": 1.9498983711165347e-07, "loss": 0.9948, "step": 12392 }, { "epoch": 1.7545126353790614, "grad_norm": 9.77541189277157, "learning_rate": 1.9476796614650766e-07, "loss": 1.0848, "step": 12393 }, { "epoch": 1.7546542082536987, "grad_norm": 8.731459675307642, "learning_rate": 1.9454621636569138e-07, "loss": 0.9391, "step": 12394 }, { "epoch": 1.754795781128336, "grad_norm": 10.072779741641966, "learning_rate": 1.9432458778086167e-07, "loss": 1.0332, "step": 12395 }, { "epoch": 1.7549373540029731, "grad_norm": 10.563932603822568, "learning_rate": 1.9410308040366867e-07, "loss": 0.9264, "step": 12396 }, { "epoch": 1.7550789268776104, "grad_norm": 9.951325402090959, "learning_rate": 1.9388169424575802e-07, "loss": 1.051, "step": 12397 }, { "epoch": 1.7552204997522476, "grad_norm": 10.872396223178635, "learning_rate": 1.936604293187666e-07, "loss": 0.9026, "step": 12398 }, { "epoch": 1.7553620726268848, "grad_norm": 11.15368315074211, "learning_rate": 1.93439285634327e-07, "loss": 0.943, "step": 12399 }, { "epoch": 1.755503645501522, "grad_norm": 8.503365691793979, "learning_rate": 1.932182632040641e-07, "loss": 0.9681, "step": 12400 }, { "epoch": 1.7556452183761593, "grad_norm": 9.209925265096992, "learning_rate": 1.929973620395975e-07, "loss": 0.9726, "step": 12401 }, { "epoch": 1.7557867912507965, "grad_norm": 12.232169116870113, "learning_rate": 1.9277658215253904e-07, "loss": 0.9771, "step": 12402 }, { "epoch": 1.7559283641254335, "grad_norm": 8.69471978620614, "learning_rate": 1.925559235544955e-07, "loss": 1.0174, "step": 12403 }, { "epoch": 1.7560699370000707, "grad_norm": 9.712903389231005, "learning_rate": 1.9233538625706622e-07, "loss": 0.9774, "step": 12404 }, { "epoch": 1.756211509874708, "grad_norm": 9.307662219309934, "learning_rate": 1.9211497027184556e-07, "loss": 0.9663, "step": 12405 }, { "epoch": 1.7563530827493452, "grad_norm": 9.36346865686461, "learning_rate": 1.918946756104201e-07, "loss": 0.9344, "step": 12406 }, { "epoch": 1.7564946556239824, "grad_norm": 8.96054882480416, "learning_rate": 1.9167450228436995e-07, "loss": 1.0261, "step": 12407 }, { "epoch": 1.7566362284986197, "grad_norm": 8.639953918334143, "learning_rate": 1.9145445030527065e-07, "loss": 0.905, "step": 12408 }, { "epoch": 1.756777801373257, "grad_norm": 9.042775544642323, "learning_rate": 1.9123451968468903e-07, "loss": 0.97, "step": 12409 }, { "epoch": 1.7569193742478941, "grad_norm": 10.868188237418638, "learning_rate": 1.910147104341875e-07, "loss": 0.9893, "step": 12410 }, { "epoch": 1.7570609471225314, "grad_norm": 9.390671309422004, "learning_rate": 1.9079502256532073e-07, "loss": 0.9814, "step": 12411 }, { "epoch": 1.7572025199971686, "grad_norm": 9.51958414531878, "learning_rate": 1.9057545608963807e-07, "loss": 1.0014, "step": 12412 }, { "epoch": 1.7573440928718056, "grad_norm": 8.904201553078837, "learning_rate": 1.9035601101868168e-07, "loss": 1.0235, "step": 12413 }, { "epoch": 1.7574856657464428, "grad_norm": 9.121127208182745, "learning_rate": 1.9013668736398761e-07, "loss": 0.9992, "step": 12414 }, { "epoch": 1.75762723862108, "grad_norm": 10.116211267451607, "learning_rate": 1.899174851370858e-07, "loss": 0.9228, "step": 12415 }, { "epoch": 1.7577688114957173, "grad_norm": 8.694714082620292, "learning_rate": 1.8969840434949926e-07, "loss": 1.0065, "step": 12416 }, { "epoch": 1.7579103843703545, "grad_norm": 9.529635333262283, "learning_rate": 1.8947944501274517e-07, "loss": 0.9214, "step": 12417 }, { "epoch": 1.7580519572449917, "grad_norm": 10.533939188019577, "learning_rate": 1.892606071383332e-07, "loss": 0.888, "step": 12418 }, { "epoch": 1.758193530119629, "grad_norm": 8.74377263402293, "learning_rate": 1.8904189073776835e-07, "loss": 0.8965, "step": 12419 }, { "epoch": 1.7583351029942662, "grad_norm": 8.373653929985414, "learning_rate": 1.8882329582254833e-07, "loss": 0.938, "step": 12420 }, { "epoch": 1.7584766758689034, "grad_norm": 9.093476071770965, "learning_rate": 1.8860482240416424e-07, "loss": 0.94, "step": 12421 }, { "epoch": 1.7586182487435407, "grad_norm": 8.760951137379033, "learning_rate": 1.8838647049410157e-07, "loss": 0.8522, "step": 12422 }, { "epoch": 1.758759821618178, "grad_norm": 8.922718976883093, "learning_rate": 1.881682401038379e-07, "loss": 1.0468, "step": 12423 }, { "epoch": 1.7589013944928151, "grad_norm": 8.539686565856291, "learning_rate": 1.8795013124484674e-07, "loss": 0.8883, "step": 12424 }, { "epoch": 1.7590429673674524, "grad_norm": 9.015635578010475, "learning_rate": 1.8773214392859284e-07, "loss": 0.9723, "step": 12425 }, { "epoch": 1.7591845402420896, "grad_norm": 8.691114373610313, "learning_rate": 1.8751427816653623e-07, "loss": 0.9502, "step": 12426 }, { "epoch": 1.7593261131167268, "grad_norm": 9.233239738274596, "learning_rate": 1.8729653397012993e-07, "loss": 0.8738, "step": 12427 }, { "epoch": 1.759467685991364, "grad_norm": 11.790590342721725, "learning_rate": 1.870789113508198e-07, "loss": 0.9089, "step": 12428 }, { "epoch": 1.7596092588660013, "grad_norm": 9.453887252971368, "learning_rate": 1.8686141032004724e-07, "loss": 1.041, "step": 12429 }, { "epoch": 1.7597508317406385, "grad_norm": 9.755689086038824, "learning_rate": 1.8664403088924533e-07, "loss": 1.1162, "step": 12430 }, { "epoch": 1.7598924046152757, "grad_norm": 10.57415742155285, "learning_rate": 1.8642677306984213e-07, "loss": 0.9731, "step": 12431 }, { "epoch": 1.760033977489913, "grad_norm": 8.711987131969241, "learning_rate": 1.8620963687325772e-07, "loss": 0.9267, "step": 12432 }, { "epoch": 1.7601755503645502, "grad_norm": 10.041174236796756, "learning_rate": 1.859926223109082e-07, "loss": 0.915, "step": 12433 }, { "epoch": 1.7603171232391874, "grad_norm": 7.139809993919136, "learning_rate": 1.857757293942006e-07, "loss": 0.9165, "step": 12434 }, { "epoch": 1.7604586961138247, "grad_norm": 10.607759018360172, "learning_rate": 1.855589581345374e-07, "loss": 0.911, "step": 12435 }, { "epoch": 1.760600268988462, "grad_norm": 9.56832648398358, "learning_rate": 1.8534230854331454e-07, "loss": 0.8656, "step": 12436 }, { "epoch": 1.7607418418630991, "grad_norm": 9.249565732917334, "learning_rate": 1.851257806319201e-07, "loss": 0.9097, "step": 12437 }, { "epoch": 1.7608834147377364, "grad_norm": 9.979124404934542, "learning_rate": 1.8490937441173807e-07, "loss": 1.0198, "step": 12438 }, { "epoch": 1.7610249876123736, "grad_norm": 10.41489507233154, "learning_rate": 1.846930898941432e-07, "loss": 1.0692, "step": 12439 }, { "epoch": 1.7611665604870108, "grad_norm": 8.327076457649646, "learning_rate": 1.8447692709050668e-07, "loss": 0.9545, "step": 12440 }, { "epoch": 1.761308133361648, "grad_norm": 10.527462966045753, "learning_rate": 1.842608860121914e-07, "loss": 0.9721, "step": 12441 }, { "epoch": 1.7614497062362853, "grad_norm": 8.318852112886383, "learning_rate": 1.8404496667055433e-07, "loss": 0.9888, "step": 12442 }, { "epoch": 1.7615912791109225, "grad_norm": 10.55490362819983, "learning_rate": 1.8382916907694725e-07, "loss": 0.973, "step": 12443 }, { "epoch": 1.7617328519855595, "grad_norm": 10.341837634185497, "learning_rate": 1.8361349324271304e-07, "loss": 1.0644, "step": 12444 }, { "epoch": 1.7618744248601967, "grad_norm": 8.335766805744154, "learning_rate": 1.8339793917919096e-07, "loss": 0.9497, "step": 12445 }, { "epoch": 1.762015997734834, "grad_norm": 10.00767413363072, "learning_rate": 1.831825068977111e-07, "loss": 1.0624, "step": 12446 }, { "epoch": 1.7621575706094712, "grad_norm": 9.483345290066422, "learning_rate": 1.8296719640960025e-07, "loss": 0.9489, "step": 12447 }, { "epoch": 1.7622991434841084, "grad_norm": 7.897250757006705, "learning_rate": 1.8275200772617603e-07, "loss": 0.9576, "step": 12448 }, { "epoch": 1.7624407163587457, "grad_norm": 7.906454814926758, "learning_rate": 1.8253694085875047e-07, "loss": 0.9297, "step": 12449 }, { "epoch": 1.762582289233383, "grad_norm": 9.724506402911842, "learning_rate": 1.8232199581863036e-07, "loss": 0.9329, "step": 12450 }, { "epoch": 1.7627238621080201, "grad_norm": 9.362290783162852, "learning_rate": 1.8210717261711448e-07, "loss": 1.0372, "step": 12451 }, { "epoch": 1.7628654349826574, "grad_norm": 9.906541501157715, "learning_rate": 1.8189247126549653e-07, "loss": 0.9685, "step": 12452 }, { "epoch": 1.7630070078572946, "grad_norm": 9.74443443101605, "learning_rate": 1.816778917750625e-07, "loss": 0.9725, "step": 12453 }, { "epoch": 1.7631485807319318, "grad_norm": 10.087145082264087, "learning_rate": 1.8146343415709367e-07, "loss": 0.9901, "step": 12454 }, { "epoch": 1.7632901536065688, "grad_norm": 9.357457137991895, "learning_rate": 1.8124909842286293e-07, "loss": 0.9665, "step": 12455 }, { "epoch": 1.763431726481206, "grad_norm": 9.106301994934563, "learning_rate": 1.810348845836385e-07, "loss": 1.001, "step": 12456 }, { "epoch": 1.7635732993558433, "grad_norm": 9.313200444243023, "learning_rate": 1.8082079265068053e-07, "loss": 1.0108, "step": 12457 }, { "epoch": 1.7637148722304805, "grad_norm": 11.020884715046417, "learning_rate": 1.806068226352445e-07, "loss": 0.9816, "step": 12458 }, { "epoch": 1.7638564451051177, "grad_norm": 8.665196171745, "learning_rate": 1.8039297454857885e-07, "loss": 0.8374, "step": 12459 }, { "epoch": 1.763998017979755, "grad_norm": 7.833464303511236, "learning_rate": 1.8017924840192435e-07, "loss": 0.9802, "step": 12460 }, { "epoch": 1.7641395908543922, "grad_norm": 7.730068294721881, "learning_rate": 1.7996564420651758e-07, "loss": 0.8988, "step": 12461 }, { "epoch": 1.7642811637290294, "grad_norm": 10.202516686618193, "learning_rate": 1.7975216197358648e-07, "loss": 1.0772, "step": 12462 }, { "epoch": 1.7644227366036667, "grad_norm": 9.037809847299151, "learning_rate": 1.7953880171435455e-07, "loss": 0.9727, "step": 12463 }, { "epoch": 1.764564309478304, "grad_norm": 9.37498982746844, "learning_rate": 1.7932556344003703e-07, "loss": 0.8676, "step": 12464 }, { "epoch": 1.7647058823529411, "grad_norm": 9.135013468481365, "learning_rate": 1.7911244716184468e-07, "loss": 0.8966, "step": 12465 }, { "epoch": 1.7648474552275784, "grad_norm": 10.398555625972547, "learning_rate": 1.7889945289098042e-07, "loss": 0.9892, "step": 12466 }, { "epoch": 1.7649890281022156, "grad_norm": 8.96522202701374, "learning_rate": 1.786865806386412e-07, "loss": 0.8309, "step": 12467 }, { "epoch": 1.7651306009768528, "grad_norm": 10.535276101816407, "learning_rate": 1.7847383041601772e-07, "loss": 0.9689, "step": 12468 }, { "epoch": 1.76527217385149, "grad_norm": 7.707651661467421, "learning_rate": 1.7826120223429416e-07, "loss": 0.9354, "step": 12469 }, { "epoch": 1.7654137467261273, "grad_norm": 9.91549530377281, "learning_rate": 1.7804869610464766e-07, "loss": 0.918, "step": 12470 }, { "epoch": 1.7655553196007645, "grad_norm": 9.764364371758802, "learning_rate": 1.7783631203825007e-07, "loss": 0.9312, "step": 12471 }, { "epoch": 1.7656968924754017, "grad_norm": 10.057806780500743, "learning_rate": 1.7762405004626586e-07, "loss": 1.028, "step": 12472 }, { "epoch": 1.765838465350039, "grad_norm": 9.301919341199202, "learning_rate": 1.7741191013985387e-07, "loss": 0.9831, "step": 12473 }, { "epoch": 1.7659800382246762, "grad_norm": 10.96581122435114, "learning_rate": 1.7719989233016572e-07, "loss": 1.0514, "step": 12474 }, { "epoch": 1.7661216110993134, "grad_norm": 10.120258492647697, "learning_rate": 1.7698799662834776e-07, "loss": 0.9736, "step": 12475 }, { "epoch": 1.7662631839739507, "grad_norm": 8.022169389958766, "learning_rate": 1.7677622304553833e-07, "loss": 0.8292, "step": 12476 }, { "epoch": 1.7664047568485879, "grad_norm": 9.25098723220975, "learning_rate": 1.76564571592871e-07, "loss": 0.9318, "step": 12477 }, { "epoch": 1.7665463297232251, "grad_norm": 10.032099892807953, "learning_rate": 1.7635304228147104e-07, "loss": 1.0354, "step": 12478 }, { "epoch": 1.7666879025978623, "grad_norm": 9.860911689377566, "learning_rate": 1.7614163512245957e-07, "loss": 1.0525, "step": 12479 }, { "epoch": 1.7668294754724996, "grad_norm": 8.891245842091632, "learning_rate": 1.7593035012694992e-07, "loss": 0.9926, "step": 12480 }, { "epoch": 1.7669710483471368, "grad_norm": 9.35994274219704, "learning_rate": 1.757191873060482e-07, "loss": 0.9648, "step": 12481 }, { "epoch": 1.767112621221774, "grad_norm": 10.550930209981413, "learning_rate": 1.755081466708561e-07, "loss": 1.0819, "step": 12482 }, { "epoch": 1.7672541940964113, "grad_norm": 9.948267830980129, "learning_rate": 1.752972282324672e-07, "loss": 0.8663, "step": 12483 }, { "epoch": 1.7673957669710485, "grad_norm": 9.020363552438674, "learning_rate": 1.750864320019699e-07, "loss": 1.0163, "step": 12484 }, { "epoch": 1.7675373398456857, "grad_norm": 9.828382334046596, "learning_rate": 1.7487575799044505e-07, "loss": 0.9904, "step": 12485 }, { "epoch": 1.7676789127203227, "grad_norm": 9.335955069936396, "learning_rate": 1.746652062089685e-07, "loss": 0.9225, "step": 12486 }, { "epoch": 1.76782048559496, "grad_norm": 9.294074962401492, "learning_rate": 1.7445477666860749e-07, "loss": 0.9712, "step": 12487 }, { "epoch": 1.7679620584695972, "grad_norm": 11.081598552422426, "learning_rate": 1.7424446938042517e-07, "loss": 0.9865, "step": 12488 }, { "epoch": 1.7681036313442344, "grad_norm": 9.311886351163881, "learning_rate": 1.740342843554771e-07, "loss": 0.8697, "step": 12489 }, { "epoch": 1.7682452042188717, "grad_norm": 10.299715382615707, "learning_rate": 1.7382422160481193e-07, "loss": 0.9316, "step": 12490 }, { "epoch": 1.7683867770935089, "grad_norm": 10.15633000268971, "learning_rate": 1.7361428113947392e-07, "loss": 0.9108, "step": 12491 }, { "epoch": 1.7685283499681461, "grad_norm": 9.814894851688786, "learning_rate": 1.7340446297049784e-07, "loss": 1.093, "step": 12492 }, { "epoch": 1.7686699228427833, "grad_norm": 9.936924179956447, "learning_rate": 1.7319476710891431e-07, "loss": 0.9854, "step": 12493 }, { "epoch": 1.7688114957174206, "grad_norm": 7.727104083174177, "learning_rate": 1.7298519356574728e-07, "loss": 0.8401, "step": 12494 }, { "epoch": 1.7689530685920578, "grad_norm": 9.470192173072407, "learning_rate": 1.7277574235201295e-07, "loss": 0.9678, "step": 12495 }, { "epoch": 1.7690946414666948, "grad_norm": 10.036151009595379, "learning_rate": 1.7256641347872304e-07, "loss": 1.0291, "step": 12496 }, { "epoch": 1.769236214341332, "grad_norm": 9.75427387172661, "learning_rate": 1.723572069568813e-07, "loss": 0.9602, "step": 12497 }, { "epoch": 1.7693777872159693, "grad_norm": 9.283422350727784, "learning_rate": 1.7214812279748584e-07, "loss": 0.869, "step": 12498 }, { "epoch": 1.7695193600906065, "grad_norm": 10.687556751836711, "learning_rate": 1.719391610115273e-07, "loss": 1.0596, "step": 12499 }, { "epoch": 1.7696609329652437, "grad_norm": 10.21514474341437, "learning_rate": 1.7173032160999164e-07, "loss": 1.0173, "step": 12500 }, { "epoch": 1.769802505839881, "grad_norm": 11.686854380354758, "learning_rate": 1.7152160460385703e-07, "loss": 0.9274, "step": 12501 }, { "epoch": 1.7699440787145182, "grad_norm": 11.735647973146152, "learning_rate": 1.7131301000409496e-07, "loss": 1.0789, "step": 12502 }, { "epoch": 1.7700856515891554, "grad_norm": 10.824300864110224, "learning_rate": 1.7110453782167218e-07, "loss": 0.9453, "step": 12503 }, { "epoch": 1.7702272244637927, "grad_norm": 9.296369471549767, "learning_rate": 1.7089618806754692e-07, "loss": 1.0339, "step": 12504 }, { "epoch": 1.77036879733843, "grad_norm": 10.103789919616604, "learning_rate": 1.7068796075267264e-07, "loss": 0.9927, "step": 12505 }, { "epoch": 1.7705103702130671, "grad_norm": 7.815215836544589, "learning_rate": 1.7047985588799525e-07, "loss": 0.9494, "step": 12506 }, { "epoch": 1.7706519430877043, "grad_norm": 9.210628539930699, "learning_rate": 1.7027187348445522e-07, "loss": 1.0317, "step": 12507 }, { "epoch": 1.7707935159623416, "grad_norm": 11.137385642409708, "learning_rate": 1.700640135529852e-07, "loss": 1.012, "step": 12508 }, { "epoch": 1.7709350888369788, "grad_norm": 8.529586909354888, "learning_rate": 1.6985627610451278e-07, "loss": 1.023, "step": 12509 }, { "epoch": 1.771076661711616, "grad_norm": 8.461413594638383, "learning_rate": 1.6964866114995871e-07, "loss": 0.9703, "step": 12510 }, { "epoch": 1.7712182345862533, "grad_norm": 9.511791741249713, "learning_rate": 1.6944116870023675e-07, "loss": 0.8958, "step": 12511 }, { "epoch": 1.7713598074608905, "grad_norm": 10.360892390401913, "learning_rate": 1.6923379876625568e-07, "loss": 1.0038, "step": 12512 }, { "epoch": 1.7715013803355277, "grad_norm": 9.199278048508456, "learning_rate": 1.690265513589151e-07, "loss": 0.9102, "step": 12513 }, { "epoch": 1.771642953210165, "grad_norm": 7.59267540380844, "learning_rate": 1.6881942648911077e-07, "loss": 0.863, "step": 12514 }, { "epoch": 1.7717845260848022, "grad_norm": 7.37738567495103, "learning_rate": 1.6861242416773087e-07, "loss": 0.9862, "step": 12515 }, { "epoch": 1.7719260989594394, "grad_norm": 9.085900973298246, "learning_rate": 1.684055444056573e-07, "loss": 0.9686, "step": 12516 }, { "epoch": 1.7720676718340767, "grad_norm": 9.630536357326374, "learning_rate": 1.6819878721376637e-07, "loss": 0.9799, "step": 12517 }, { "epoch": 1.7722092447087139, "grad_norm": 9.335590997753904, "learning_rate": 1.67992152602926e-07, "loss": 1.0369, "step": 12518 }, { "epoch": 1.7723508175833511, "grad_norm": 9.650558819180418, "learning_rate": 1.6778564058399977e-07, "loss": 1.0652, "step": 12519 }, { "epoch": 1.7724923904579883, "grad_norm": 9.219898217026271, "learning_rate": 1.6757925116784313e-07, "loss": 0.957, "step": 12520 }, { "epoch": 1.7726339633326256, "grad_norm": 9.87358952360115, "learning_rate": 1.673729843653063e-07, "loss": 0.967, "step": 12521 }, { "epoch": 1.7727755362072628, "grad_norm": 9.889696990675843, "learning_rate": 1.6716684018723256e-07, "loss": 0.9599, "step": 12522 }, { "epoch": 1.7729171090819, "grad_norm": 8.309061586940272, "learning_rate": 1.6696081864445823e-07, "loss": 0.8866, "step": 12523 }, { "epoch": 1.7730586819565373, "grad_norm": 8.588240421364334, "learning_rate": 1.6675491974781438e-07, "loss": 0.8765, "step": 12524 }, { "epoch": 1.7732002548311745, "grad_norm": 8.687927674668652, "learning_rate": 1.665491435081243e-07, "loss": 0.8543, "step": 12525 }, { "epoch": 1.7733418277058117, "grad_norm": 8.530762151945535, "learning_rate": 1.6634348993620624e-07, "loss": 0.9192, "step": 12526 }, { "epoch": 1.7734834005804487, "grad_norm": 8.616976779495195, "learning_rate": 1.661379590428705e-07, "loss": 0.9594, "step": 12527 }, { "epoch": 1.773624973455086, "grad_norm": 7.656064401050784, "learning_rate": 1.6593255083892228e-07, "loss": 0.9885, "step": 12528 }, { "epoch": 1.7737665463297232, "grad_norm": 7.118070210594246, "learning_rate": 1.6572726533515936e-07, "loss": 0.8428, "step": 12529 }, { "epoch": 1.7739081192043604, "grad_norm": 9.256897287963302, "learning_rate": 1.6552210254237395e-07, "loss": 1.0063, "step": 12530 }, { "epoch": 1.7740496920789977, "grad_norm": 8.921041205839655, "learning_rate": 1.6531706247135042e-07, "loss": 0.951, "step": 12531 }, { "epoch": 1.7741912649536349, "grad_norm": 8.829435052133503, "learning_rate": 1.6511214513286826e-07, "loss": 0.9142, "step": 12532 }, { "epoch": 1.7743328378282721, "grad_norm": 10.886403736584363, "learning_rate": 1.6490735053770023e-07, "loss": 0.9697, "step": 12533 }, { "epoch": 1.7744744107029093, "grad_norm": 8.933378536432636, "learning_rate": 1.6470267869661105e-07, "loss": 0.9557, "step": 12534 }, { "epoch": 1.7746159835775466, "grad_norm": 10.132994333251887, "learning_rate": 1.6449812962036128e-07, "loss": 0.9591, "step": 12535 }, { "epoch": 1.7747575564521838, "grad_norm": 9.903411940043654, "learning_rate": 1.6429370331970285e-07, "loss": 1.0265, "step": 12536 }, { "epoch": 1.774899129326821, "grad_norm": 8.532740647873178, "learning_rate": 1.640893998053833e-07, "loss": 0.8891, "step": 12537 }, { "epoch": 1.775040702201458, "grad_norm": 9.38087096442913, "learning_rate": 1.6388521908814181e-07, "loss": 1.0177, "step": 12538 }, { "epoch": 1.7751822750760953, "grad_norm": 9.832302836932376, "learning_rate": 1.6368116117871257e-07, "loss": 0.9956, "step": 12539 }, { "epoch": 1.7753238479507325, "grad_norm": 9.364017004102726, "learning_rate": 1.6347722608782284e-07, "loss": 0.957, "step": 12540 }, { "epoch": 1.7754654208253697, "grad_norm": 11.464359284908886, "learning_rate": 1.6327341382619294e-07, "loss": 0.9263, "step": 12541 }, { "epoch": 1.775606993700007, "grad_norm": 8.787913987458944, "learning_rate": 1.6306972440453788e-07, "loss": 0.8976, "step": 12542 }, { "epoch": 1.7757485665746442, "grad_norm": 8.325463762625658, "learning_rate": 1.6286615783356468e-07, "loss": 0.971, "step": 12543 }, { "epoch": 1.7758901394492814, "grad_norm": 8.536892879675984, "learning_rate": 1.626627141239745e-07, "loss": 0.9654, "step": 12544 }, { "epoch": 1.7760317123239187, "grad_norm": 9.543082275215038, "learning_rate": 1.6245939328646322e-07, "loss": 1.0564, "step": 12545 }, { "epoch": 1.7761732851985559, "grad_norm": 9.858151125628105, "learning_rate": 1.622561953317181e-07, "loss": 0.9744, "step": 12546 }, { "epoch": 1.7763148580731931, "grad_norm": 9.036058459197093, "learning_rate": 1.620531202704223e-07, "loss": 0.9476, "step": 12547 }, { "epoch": 1.7764564309478303, "grad_norm": 8.958166596982714, "learning_rate": 1.6185016811325033e-07, "loss": 0.9078, "step": 12548 }, { "epoch": 1.7765980038224676, "grad_norm": 9.471883425304526, "learning_rate": 1.6164733887087168e-07, "loss": 1.0065, "step": 12549 }, { "epoch": 1.7767395766971048, "grad_norm": 10.187926113835758, "learning_rate": 1.614446325539487e-07, "loss": 1.0507, "step": 12550 }, { "epoch": 1.776881149571742, "grad_norm": 7.7425185978647315, "learning_rate": 1.6124204917313811e-07, "loss": 0.9452, "step": 12551 }, { "epoch": 1.7770227224463793, "grad_norm": 10.748651109389806, "learning_rate": 1.6103958873908893e-07, "loss": 0.9412, "step": 12552 }, { "epoch": 1.7771642953210165, "grad_norm": 9.392409372509313, "learning_rate": 1.608372512624448e-07, "loss": 0.9469, "step": 12553 }, { "epoch": 1.7773058681956537, "grad_norm": 9.326102952952134, "learning_rate": 1.6063503675384202e-07, "loss": 0.9053, "step": 12554 }, { "epoch": 1.777447441070291, "grad_norm": 10.04385980950784, "learning_rate": 1.604329452239109e-07, "loss": 0.9152, "step": 12555 }, { "epoch": 1.7775890139449282, "grad_norm": 7.997577777375834, "learning_rate": 1.6023097668327574e-07, "loss": 0.9349, "step": 12556 }, { "epoch": 1.7777305868195654, "grad_norm": 11.738411890911953, "learning_rate": 1.6002913114255309e-07, "loss": 1.121, "step": 12557 }, { "epoch": 1.7778721596942026, "grad_norm": 9.701798451239675, "learning_rate": 1.5982740861235468e-07, "loss": 1.0294, "step": 12558 }, { "epoch": 1.7780137325688399, "grad_norm": 8.342651294848743, "learning_rate": 1.5962580910328402e-07, "loss": 0.9253, "step": 12559 }, { "epoch": 1.778155305443477, "grad_norm": 8.264005852164516, "learning_rate": 1.594243326259401e-07, "loss": 0.9247, "step": 12560 }, { "epoch": 1.7782968783181143, "grad_norm": 9.316402564857762, "learning_rate": 1.5922297919091334e-07, "loss": 0.9678, "step": 12561 }, { "epoch": 1.7784384511927516, "grad_norm": 10.605980578694481, "learning_rate": 1.590217488087892e-07, "loss": 0.9553, "step": 12562 }, { "epoch": 1.7785800240673888, "grad_norm": 11.098878862277564, "learning_rate": 1.5882064149014637e-07, "loss": 1.0829, "step": 12563 }, { "epoch": 1.778721596942026, "grad_norm": 9.233737155424599, "learning_rate": 1.5861965724555673e-07, "loss": 0.907, "step": 12564 }, { "epoch": 1.7788631698166633, "grad_norm": 9.106547890256717, "learning_rate": 1.5841879608558652e-07, "loss": 0.932, "step": 12565 }, { "epoch": 1.7790047426913005, "grad_norm": 9.557161324397867, "learning_rate": 1.5821805802079343e-07, "loss": 1.0524, "step": 12566 }, { "epoch": 1.7791463155659377, "grad_norm": 9.401100954553932, "learning_rate": 1.5801744306173094e-07, "loss": 0.9568, "step": 12567 }, { "epoch": 1.779287888440575, "grad_norm": 8.914139529136948, "learning_rate": 1.5781695121894563e-07, "loss": 0.9691, "step": 12568 }, { "epoch": 1.779429461315212, "grad_norm": 10.55766050086215, "learning_rate": 1.5761658250297658e-07, "loss": 0.9681, "step": 12569 }, { "epoch": 1.7795710341898492, "grad_norm": 9.894473033903946, "learning_rate": 1.5741633692435725e-07, "loss": 0.9386, "step": 12570 }, { "epoch": 1.7797126070644864, "grad_norm": 9.836401724673518, "learning_rate": 1.572162144936143e-07, "loss": 0.8889, "step": 12571 }, { "epoch": 1.7798541799391236, "grad_norm": 8.499110455808834, "learning_rate": 1.5701621522126843e-07, "loss": 0.9562, "step": 12572 }, { "epoch": 1.7799957528137609, "grad_norm": 7.9116686922141355, "learning_rate": 1.568163391178326e-07, "loss": 0.8851, "step": 12573 }, { "epoch": 1.780137325688398, "grad_norm": 9.137572106365205, "learning_rate": 1.5661658619381515e-07, "loss": 0.9452, "step": 12574 }, { "epoch": 1.7802788985630353, "grad_norm": 9.397369552401061, "learning_rate": 1.564169564597165e-07, "loss": 0.8766, "step": 12575 }, { "epoch": 1.7804204714376726, "grad_norm": 8.951571824130028, "learning_rate": 1.5621744992603049e-07, "loss": 1.0462, "step": 12576 }, { "epoch": 1.7805620443123098, "grad_norm": 7.9498005871984425, "learning_rate": 1.5601806660324598e-07, "loss": 0.8393, "step": 12577 }, { "epoch": 1.780703617186947, "grad_norm": 9.85963654813544, "learning_rate": 1.558188065018437e-07, "loss": 1.0287, "step": 12578 }, { "epoch": 1.780845190061584, "grad_norm": 8.920173977237198, "learning_rate": 1.5561966963229925e-07, "loss": 0.9183, "step": 12579 }, { "epoch": 1.7809867629362213, "grad_norm": 10.610458655767063, "learning_rate": 1.5542065600508e-07, "loss": 0.9157, "step": 12580 }, { "epoch": 1.7811283358108585, "grad_norm": 9.03962081991668, "learning_rate": 1.5522176563064928e-07, "loss": 0.9413, "step": 12581 }, { "epoch": 1.7812699086854957, "grad_norm": 8.203066871073805, "learning_rate": 1.550229985194618e-07, "loss": 0.9795, "step": 12582 }, { "epoch": 1.781411481560133, "grad_norm": 9.653947771796075, "learning_rate": 1.5482435468196695e-07, "loss": 0.9558, "step": 12583 }, { "epoch": 1.7815530544347702, "grad_norm": 8.746791578346832, "learning_rate": 1.5462583412860692e-07, "loss": 0.9142, "step": 12584 }, { "epoch": 1.7816946273094074, "grad_norm": 8.851707733390393, "learning_rate": 1.5442743686981787e-07, "loss": 0.9191, "step": 12585 }, { "epoch": 1.7818362001840446, "grad_norm": 9.045767655405811, "learning_rate": 1.542291629160303e-07, "loss": 1.1069, "step": 12586 }, { "epoch": 1.7819777730586819, "grad_norm": 9.311652842654164, "learning_rate": 1.5403101227766587e-07, "loss": 1.0081, "step": 12587 }, { "epoch": 1.782119345933319, "grad_norm": 8.883965919783078, "learning_rate": 1.538329849651421e-07, "loss": 0.9805, "step": 12588 }, { "epoch": 1.7822609188079563, "grad_norm": 11.51143906599506, "learning_rate": 1.536350809888687e-07, "loss": 1.0494, "step": 12589 }, { "epoch": 1.7824024916825936, "grad_norm": 9.15228998263656, "learning_rate": 1.534373003592496e-07, "loss": 0.9368, "step": 12590 }, { "epoch": 1.7825440645572308, "grad_norm": 9.014593795959547, "learning_rate": 1.5323964308668227e-07, "loss": 0.8895, "step": 12591 }, { "epoch": 1.782685637431868, "grad_norm": 8.886274885893203, "learning_rate": 1.5304210918155677e-07, "loss": 1.0013, "step": 12592 }, { "epoch": 1.7828272103065053, "grad_norm": 10.250938930771285, "learning_rate": 1.5284469865425784e-07, "loss": 0.8964, "step": 12593 }, { "epoch": 1.7829687831811425, "grad_norm": 10.149388457418029, "learning_rate": 1.5264741151516272e-07, "loss": 0.9657, "step": 12594 }, { "epoch": 1.7831103560557797, "grad_norm": 9.014827382259385, "learning_rate": 1.524502477746434e-07, "loss": 0.9409, "step": 12595 }, { "epoch": 1.783251928930417, "grad_norm": 10.78189500731344, "learning_rate": 1.522532074430641e-07, "loss": 0.91, "step": 12596 }, { "epoch": 1.7833935018050542, "grad_norm": 9.192396040124473, "learning_rate": 1.5205629053078262e-07, "loss": 0.9149, "step": 12597 }, { "epoch": 1.7835350746796914, "grad_norm": 10.069513187100721, "learning_rate": 1.5185949704815185e-07, "loss": 0.9813, "step": 12598 }, { "epoch": 1.7836766475543286, "grad_norm": 9.143010172244457, "learning_rate": 1.5166282700551594e-07, "loss": 0.9468, "step": 12599 }, { "epoch": 1.7838182204289659, "grad_norm": 9.110506016970403, "learning_rate": 1.5146628041321443e-07, "loss": 0.9728, "step": 12600 }, { "epoch": 1.783959793303603, "grad_norm": 8.866250896983557, "learning_rate": 1.5126985728157934e-07, "loss": 1.0479, "step": 12601 }, { "epoch": 1.7841013661782403, "grad_norm": 8.852124889046106, "learning_rate": 1.5107355762093685e-07, "loss": 0.9614, "step": 12602 }, { "epoch": 1.7842429390528776, "grad_norm": 9.054288524563107, "learning_rate": 1.5087738144160562e-07, "loss": 0.9686, "step": 12603 }, { "epoch": 1.7843845119275148, "grad_norm": 10.4861958908011, "learning_rate": 1.5068132875389913e-07, "loss": 0.8913, "step": 12604 }, { "epoch": 1.784526084802152, "grad_norm": 7.7628724038054155, "learning_rate": 1.5048539956812324e-07, "loss": 0.9005, "step": 12605 }, { "epoch": 1.7846676576767893, "grad_norm": 9.160719682165526, "learning_rate": 1.5028959389457782e-07, "loss": 0.9451, "step": 12606 }, { "epoch": 1.7848092305514265, "grad_norm": 7.526334686030933, "learning_rate": 1.5009391174355735e-07, "loss": 0.9281, "step": 12607 }, { "epoch": 1.7849508034260637, "grad_norm": 9.877468524902989, "learning_rate": 1.49898353125347e-07, "loss": 0.9212, "step": 12608 }, { "epoch": 1.785092376300701, "grad_norm": 8.102197193677426, "learning_rate": 1.4970291805022825e-07, "loss": 0.9627, "step": 12609 }, { "epoch": 1.785233949175338, "grad_norm": 9.59583967978604, "learning_rate": 1.4950760652847422e-07, "loss": 0.947, "step": 12610 }, { "epoch": 1.7853755220499752, "grad_norm": 9.879450314323343, "learning_rate": 1.4931241857035343e-07, "loss": 0.947, "step": 12611 }, { "epoch": 1.7855170949246124, "grad_norm": 8.572142135345997, "learning_rate": 1.4911735418612515e-07, "loss": 0.9738, "step": 12612 }, { "epoch": 1.7856586677992496, "grad_norm": 9.219618520395574, "learning_rate": 1.4892241338604506e-07, "loss": 0.9884, "step": 12613 }, { "epoch": 1.7858002406738869, "grad_norm": 9.88727125826789, "learning_rate": 1.4872759618036081e-07, "loss": 0.9184, "step": 12614 }, { "epoch": 1.785941813548524, "grad_norm": 10.729807496917102, "learning_rate": 1.4853290257931364e-07, "loss": 0.839, "step": 12615 }, { "epoch": 1.7860833864231613, "grad_norm": 9.127464066254204, "learning_rate": 1.483383325931384e-07, "loss": 0.965, "step": 12616 }, { "epoch": 1.7862249592977986, "grad_norm": 8.551686384968491, "learning_rate": 1.4814388623206333e-07, "loss": 0.8766, "step": 12617 }, { "epoch": 1.7863665321724358, "grad_norm": 8.829272602400495, "learning_rate": 1.4794956350631106e-07, "loss": 1.0021, "step": 12618 }, { "epoch": 1.786508105047073, "grad_norm": 11.843237873368723, "learning_rate": 1.4775536442609623e-07, "loss": 1.0581, "step": 12619 }, { "epoch": 1.7866496779217103, "grad_norm": 8.936465757056913, "learning_rate": 1.4756128900162757e-07, "loss": 0.9166, "step": 12620 }, { "epoch": 1.7867912507963473, "grad_norm": 9.425380984430292, "learning_rate": 1.4736733724310865e-07, "loss": 1.0655, "step": 12621 }, { "epoch": 1.7869328236709845, "grad_norm": 8.558954771881282, "learning_rate": 1.4717350916073375e-07, "loss": 1.0835, "step": 12622 }, { "epoch": 1.7870743965456217, "grad_norm": 10.882001705596515, "learning_rate": 1.4697980476469392e-07, "loss": 1.0308, "step": 12623 }, { "epoch": 1.787215969420259, "grad_norm": 8.566444546483025, "learning_rate": 1.4678622406517074e-07, "loss": 0.9314, "step": 12624 }, { "epoch": 1.7873575422948962, "grad_norm": 10.90329018027912, "learning_rate": 1.4659276707234132e-07, "loss": 0.9438, "step": 12625 }, { "epoch": 1.7874991151695334, "grad_norm": 9.749796938615608, "learning_rate": 1.4639943379637534e-07, "loss": 1.0804, "step": 12626 }, { "epoch": 1.7876406880441706, "grad_norm": 9.351774208521503, "learning_rate": 1.462062242474363e-07, "loss": 0.9972, "step": 12627 }, { "epoch": 1.7877822609188079, "grad_norm": 9.540375290372614, "learning_rate": 1.460131384356811e-07, "loss": 0.9448, "step": 12628 }, { "epoch": 1.787923833793445, "grad_norm": 10.093671768877371, "learning_rate": 1.4582017637125967e-07, "loss": 0.932, "step": 12629 }, { "epoch": 1.7880654066680823, "grad_norm": 9.077991793208374, "learning_rate": 1.4562733806431666e-07, "loss": 1.1062, "step": 12630 }, { "epoch": 1.7882069795427196, "grad_norm": 10.473737297337221, "learning_rate": 1.4543462352498844e-07, "loss": 0.942, "step": 12631 }, { "epoch": 1.7883485524173568, "grad_norm": 7.959071845761671, "learning_rate": 1.4524203276340687e-07, "loss": 0.9148, "step": 12632 }, { "epoch": 1.788490125291994, "grad_norm": 8.79076285635551, "learning_rate": 1.4504956578969554e-07, "loss": 0.9902, "step": 12633 }, { "epoch": 1.7886316981666313, "grad_norm": 9.156462058257896, "learning_rate": 1.4485722261397273e-07, "loss": 0.8934, "step": 12634 }, { "epoch": 1.7887732710412685, "grad_norm": 10.864472303009087, "learning_rate": 1.4466500324634952e-07, "loss": 1.0181, "step": 12635 }, { "epoch": 1.7889148439159057, "grad_norm": 9.159515735868627, "learning_rate": 1.444729076969309e-07, "loss": 0.8702, "step": 12636 }, { "epoch": 1.789056416790543, "grad_norm": 9.660177968915132, "learning_rate": 1.4428093597581544e-07, "loss": 1.0226, "step": 12637 }, { "epoch": 1.7891979896651802, "grad_norm": 9.814752599597638, "learning_rate": 1.4408908809309423e-07, "loss": 1.1138, "step": 12638 }, { "epoch": 1.7893395625398174, "grad_norm": 11.569250167512005, "learning_rate": 1.4389736405885397e-07, "loss": 1.0313, "step": 12639 }, { "epoch": 1.7894811354144546, "grad_norm": 9.630102613304462, "learning_rate": 1.4370576388317155e-07, "loss": 0.9918, "step": 12640 }, { "epoch": 1.7896227082890919, "grad_norm": 8.06076859210151, "learning_rate": 1.435142875761203e-07, "loss": 0.9797, "step": 12641 }, { "epoch": 1.789764281163729, "grad_norm": 10.51197096211271, "learning_rate": 1.4332293514776635e-07, "loss": 0.9488, "step": 12642 }, { "epoch": 1.7899058540383663, "grad_norm": 9.635390976880167, "learning_rate": 1.4313170660816805e-07, "loss": 0.9343, "step": 12643 }, { "epoch": 1.7900474269130036, "grad_norm": 10.445537830053656, "learning_rate": 1.4294060196737874e-07, "loss": 1.0301, "step": 12644 }, { "epoch": 1.7901889997876408, "grad_norm": 8.948073315091138, "learning_rate": 1.4274962123544457e-07, "loss": 0.9848, "step": 12645 }, { "epoch": 1.790330572662278, "grad_norm": 11.10813774591083, "learning_rate": 1.4255876442240524e-07, "loss": 0.9293, "step": 12646 }, { "epoch": 1.7904721455369152, "grad_norm": 9.465496246983575, "learning_rate": 1.423680315382933e-07, "loss": 0.8675, "step": 12647 }, { "epoch": 1.7906137184115525, "grad_norm": 11.748476619581766, "learning_rate": 1.421774225931366e-07, "loss": 1.0237, "step": 12648 }, { "epoch": 1.7907552912861897, "grad_norm": 7.2275975821210485, "learning_rate": 1.4198693759695486e-07, "loss": 0.9596, "step": 12649 }, { "epoch": 1.790896864160827, "grad_norm": 10.350919791368483, "learning_rate": 1.417965765597612e-07, "loss": 0.8623, "step": 12650 }, { "epoch": 1.7910384370354642, "grad_norm": 9.361852353032516, "learning_rate": 1.4160633949156344e-07, "loss": 1.0354, "step": 12651 }, { "epoch": 1.7911800099101012, "grad_norm": 12.292564092084605, "learning_rate": 1.4141622640236164e-07, "loss": 0.974, "step": 12652 }, { "epoch": 1.7913215827847384, "grad_norm": 8.986023116495712, "learning_rate": 1.412262373021503e-07, "loss": 0.9764, "step": 12653 }, { "epoch": 1.7914631556593756, "grad_norm": 8.418841946445976, "learning_rate": 1.410363722009167e-07, "loss": 0.9389, "step": 12654 }, { "epoch": 1.7916047285340129, "grad_norm": 9.128332914173988, "learning_rate": 1.4084663110864262e-07, "loss": 0.9, "step": 12655 }, { "epoch": 1.79174630140865, "grad_norm": 10.091144993279535, "learning_rate": 1.406570140353014e-07, "loss": 0.887, "step": 12656 }, { "epoch": 1.7918878742832873, "grad_norm": 9.595603142607043, "learning_rate": 1.4046752099086236e-07, "loss": 0.9697, "step": 12657 }, { "epoch": 1.7920294471579246, "grad_norm": 8.222716096130195, "learning_rate": 1.4027815198528582e-07, "loss": 0.8758, "step": 12658 }, { "epoch": 1.7921710200325618, "grad_norm": 9.507660838714402, "learning_rate": 1.4008890702852774e-07, "loss": 1.0101, "step": 12659 }, { "epoch": 1.792312592907199, "grad_norm": 12.350415402735132, "learning_rate": 1.398997861305365e-07, "loss": 1.0039, "step": 12660 }, { "epoch": 1.7924541657818363, "grad_norm": 9.49739239693236, "learning_rate": 1.397107893012531e-07, "loss": 0.9502, "step": 12661 }, { "epoch": 1.7925957386564733, "grad_norm": 8.470320102087992, "learning_rate": 1.3952191655061425e-07, "loss": 0.8519, "step": 12662 }, { "epoch": 1.7927373115311105, "grad_norm": 9.31463557596785, "learning_rate": 1.393331678885476e-07, "loss": 1.001, "step": 12663 }, { "epoch": 1.7928788844057477, "grad_norm": 8.47822609323367, "learning_rate": 1.3914454332497608e-07, "loss": 1.1078, "step": 12664 }, { "epoch": 1.793020457280385, "grad_norm": 10.639459442518868, "learning_rate": 1.3895604286981613e-07, "loss": 1.0103, "step": 12665 }, { "epoch": 1.7931620301550222, "grad_norm": 9.353984012533818, "learning_rate": 1.3876766653297597e-07, "loss": 0.9169, "step": 12666 }, { "epoch": 1.7933036030296594, "grad_norm": 8.413548814258009, "learning_rate": 1.3857941432435934e-07, "loss": 0.841, "step": 12667 }, { "epoch": 1.7934451759042966, "grad_norm": 11.662908620922998, "learning_rate": 1.3839128625386193e-07, "loss": 1.0956, "step": 12668 }, { "epoch": 1.7935867487789339, "grad_norm": 9.950777983801576, "learning_rate": 1.3820328233137393e-07, "loss": 0.8704, "step": 12669 }, { "epoch": 1.793728321653571, "grad_norm": 8.675387714087831, "learning_rate": 1.380154025667782e-07, "loss": 0.9661, "step": 12670 }, { "epoch": 1.7938698945282083, "grad_norm": 9.588800302830307, "learning_rate": 1.3782764696995188e-07, "loss": 1.0735, "step": 12671 }, { "epoch": 1.7940114674028456, "grad_norm": 10.624643476899847, "learning_rate": 1.3764001555076484e-07, "loss": 1.0172, "step": 12672 }, { "epoch": 1.7941530402774828, "grad_norm": 7.423842769569218, "learning_rate": 1.374525083190803e-07, "loss": 0.8711, "step": 12673 }, { "epoch": 1.79429461315212, "grad_norm": 7.340616799614273, "learning_rate": 1.372651252847562e-07, "loss": 0.9064, "step": 12674 }, { "epoch": 1.7944361860267573, "grad_norm": 8.55724089675736, "learning_rate": 1.370778664576422e-07, "loss": 0.9706, "step": 12675 }, { "epoch": 1.7945777589013945, "grad_norm": 9.547827140914993, "learning_rate": 1.3689073184758345e-07, "loss": 0.9111, "step": 12676 }, { "epoch": 1.7947193317760317, "grad_norm": 9.688080407260205, "learning_rate": 1.3670372146441652e-07, "loss": 0.9627, "step": 12677 }, { "epoch": 1.794860904650669, "grad_norm": 8.558171201344562, "learning_rate": 1.3651683531797327e-07, "loss": 1.1064, "step": 12678 }, { "epoch": 1.7950024775253062, "grad_norm": 9.215722986879458, "learning_rate": 1.3633007341807726e-07, "loss": 0.9271, "step": 12679 }, { "epoch": 1.7951440503999434, "grad_norm": 9.286408811313438, "learning_rate": 1.3614343577454725e-07, "loss": 0.9678, "step": 12680 }, { "epoch": 1.7952856232745806, "grad_norm": 9.342968554017892, "learning_rate": 1.3595692239719404e-07, "loss": 0.9355, "step": 12681 }, { "epoch": 1.7954271961492179, "grad_norm": 9.927989703521185, "learning_rate": 1.3577053329582258e-07, "loss": 1.0076, "step": 12682 }, { "epoch": 1.795568769023855, "grad_norm": 9.80749889660629, "learning_rate": 1.3558426848023165e-07, "loss": 0.9897, "step": 12683 }, { "epoch": 1.7957103418984923, "grad_norm": 8.88616069676387, "learning_rate": 1.3539812796021234e-07, "loss": 0.9749, "step": 12684 }, { "epoch": 1.7958519147731296, "grad_norm": 11.219741479951894, "learning_rate": 1.352121117455507e-07, "loss": 0.9479, "step": 12685 }, { "epoch": 1.7959934876477668, "grad_norm": 10.19653144652823, "learning_rate": 1.3502621984602477e-07, "loss": 0.994, "step": 12686 }, { "epoch": 1.796135060522404, "grad_norm": 10.728476690433174, "learning_rate": 1.3484045227140697e-07, "loss": 0.9711, "step": 12687 }, { "epoch": 1.7962766333970412, "grad_norm": 8.5761801038306, "learning_rate": 1.3465480903146365e-07, "loss": 0.8826, "step": 12688 }, { "epoch": 1.7964182062716785, "grad_norm": 8.375671815259572, "learning_rate": 1.344692901359529e-07, "loss": 0.9202, "step": 12689 }, { "epoch": 1.7965597791463157, "grad_norm": 8.325220456798538, "learning_rate": 1.3428389559462796e-07, "loss": 0.9551, "step": 12690 }, { "epoch": 1.796701352020953, "grad_norm": 10.627887288648429, "learning_rate": 1.340986254172344e-07, "loss": 0.9938, "step": 12691 }, { "epoch": 1.7968429248955902, "grad_norm": 9.560658395991553, "learning_rate": 1.3391347961351275e-07, "loss": 1.053, "step": 12692 }, { "epoch": 1.7969844977702272, "grad_norm": 8.685457497221154, "learning_rate": 1.337284581931944e-07, "loss": 1.004, "step": 12693 }, { "epoch": 1.7971260706448644, "grad_norm": 8.135353885917999, "learning_rate": 1.3354356116600685e-07, "loss": 0.8838, "step": 12694 }, { "epoch": 1.7972676435195016, "grad_norm": 8.710111104998935, "learning_rate": 1.3335878854166984e-07, "loss": 0.9343, "step": 12695 }, { "epoch": 1.7974092163941389, "grad_norm": 11.489811903938664, "learning_rate": 1.3317414032989668e-07, "loss": 1.0081, "step": 12696 }, { "epoch": 1.797550789268776, "grad_norm": 7.888883190720102, "learning_rate": 1.3298961654039433e-07, "loss": 0.9216, "step": 12697 }, { "epoch": 1.7976923621434133, "grad_norm": 9.422560836097635, "learning_rate": 1.3280521718286255e-07, "loss": 0.9315, "step": 12698 }, { "epoch": 1.7978339350180506, "grad_norm": 10.698713521180983, "learning_rate": 1.3262094226699578e-07, "loss": 0.947, "step": 12699 }, { "epoch": 1.7979755078926878, "grad_norm": 9.7665522997239, "learning_rate": 1.3243679180248075e-07, "loss": 0.953, "step": 12700 }, { "epoch": 1.798117080767325, "grad_norm": 11.558292097491632, "learning_rate": 1.3225276579899833e-07, "loss": 0.9635, "step": 12701 }, { "epoch": 1.7982586536419622, "grad_norm": 9.820504780710063, "learning_rate": 1.3206886426622267e-07, "loss": 0.8741, "step": 12702 }, { "epoch": 1.7984002265165993, "grad_norm": 9.507022069393217, "learning_rate": 1.318850872138211e-07, "loss": 0.995, "step": 12703 }, { "epoch": 1.7985417993912365, "grad_norm": 9.139521918616909, "learning_rate": 1.3170143465145474e-07, "loss": 0.9728, "step": 12704 }, { "epoch": 1.7986833722658737, "grad_norm": 9.769362175541172, "learning_rate": 1.3151790658877785e-07, "loss": 0.9252, "step": 12705 }, { "epoch": 1.798824945140511, "grad_norm": 9.992434500832015, "learning_rate": 1.3133450303543904e-07, "loss": 0.8868, "step": 12706 }, { "epoch": 1.7989665180151482, "grad_norm": 10.138430506361635, "learning_rate": 1.3115122400107872e-07, "loss": 1.0271, "step": 12707 }, { "epoch": 1.7991080908897854, "grad_norm": 9.599655669712888, "learning_rate": 1.3096806949533274e-07, "loss": 0.9676, "step": 12708 }, { "epoch": 1.7992496637644226, "grad_norm": 8.9522574705132, "learning_rate": 1.3078503952782845e-07, "loss": 0.949, "step": 12709 }, { "epoch": 1.7993912366390599, "grad_norm": 7.830652738721922, "learning_rate": 1.306021341081881e-07, "loss": 0.8734, "step": 12710 }, { "epoch": 1.799532809513697, "grad_norm": 8.38970885434319, "learning_rate": 1.304193532460274e-07, "loss": 0.8736, "step": 12711 }, { "epoch": 1.7996743823883343, "grad_norm": 10.97262913288746, "learning_rate": 1.3023669695095413e-07, "loss": 1.0721, "step": 12712 }, { "epoch": 1.7998159552629716, "grad_norm": 8.965218197517647, "learning_rate": 1.3005416523257126e-07, "loss": 0.963, "step": 12713 }, { "epoch": 1.7999575281376088, "grad_norm": 10.907585696212285, "learning_rate": 1.2987175810047297e-07, "loss": 1.0495, "step": 12714 }, { "epoch": 1.800099101012246, "grad_norm": 10.545016112227088, "learning_rate": 1.2968947556424943e-07, "loss": 0.9067, "step": 12715 }, { "epoch": 1.8002406738868832, "grad_norm": 10.432243805405896, "learning_rate": 1.2950731763348295e-07, "loss": 1.0686, "step": 12716 }, { "epoch": 1.8003822467615205, "grad_norm": 10.90906040302982, "learning_rate": 1.2932528431774892e-07, "loss": 1.0509, "step": 12717 }, { "epoch": 1.8005238196361577, "grad_norm": 6.835618575149702, "learning_rate": 1.291433756266175e-07, "loss": 0.9148, "step": 12718 }, { "epoch": 1.800665392510795, "grad_norm": 8.181315408536994, "learning_rate": 1.289615915696507e-07, "loss": 0.9009, "step": 12719 }, { "epoch": 1.8008069653854322, "grad_norm": 10.349441852354813, "learning_rate": 1.2877993215640539e-07, "loss": 0.9566, "step": 12720 }, { "epoch": 1.8009485382600694, "grad_norm": 10.784770711372357, "learning_rate": 1.2859839739643054e-07, "loss": 1.012, "step": 12721 }, { "epoch": 1.8010901111347066, "grad_norm": 9.696950720277194, "learning_rate": 1.2841698729927022e-07, "loss": 1.0576, "step": 12722 }, { "epoch": 1.8012316840093439, "grad_norm": 8.897907191040549, "learning_rate": 1.2823570187446065e-07, "loss": 1.0437, "step": 12723 }, { "epoch": 1.801373256883981, "grad_norm": 8.573869938804709, "learning_rate": 1.2805454113153121e-07, "loss": 0.9707, "step": 12724 }, { "epoch": 1.8015148297586183, "grad_norm": 9.269937421964018, "learning_rate": 1.2787350508000645e-07, "loss": 0.9418, "step": 12725 }, { "epoch": 1.8016564026332555, "grad_norm": 8.463997383564363, "learning_rate": 1.276925937294024e-07, "loss": 0.8227, "step": 12726 }, { "epoch": 1.8017979755078928, "grad_norm": 10.972424361758513, "learning_rate": 1.2751180708923005e-07, "loss": 1.0389, "step": 12727 }, { "epoch": 1.80193954838253, "grad_norm": 10.344878443450442, "learning_rate": 1.2733114516899293e-07, "loss": 0.9716, "step": 12728 }, { "epoch": 1.8020811212571672, "grad_norm": 9.468590058338734, "learning_rate": 1.271506079781884e-07, "loss": 0.9114, "step": 12729 }, { "epoch": 1.8022226941318045, "grad_norm": 10.22380365587327, "learning_rate": 1.2697019552630696e-07, "loss": 1.0108, "step": 12730 }, { "epoch": 1.8023642670064417, "grad_norm": 10.803898030178015, "learning_rate": 1.2678990782283324e-07, "loss": 0.9444, "step": 12731 }, { "epoch": 1.802505839881079, "grad_norm": 10.420665207495116, "learning_rate": 1.266097448772441e-07, "loss": 1.0263, "step": 12732 }, { "epoch": 1.8026474127557162, "grad_norm": 7.577629403494802, "learning_rate": 1.264297066990111e-07, "loss": 0.8934, "step": 12733 }, { "epoch": 1.8027889856303532, "grad_norm": 9.447722096877332, "learning_rate": 1.2624979329759952e-07, "loss": 1.0135, "step": 12734 }, { "epoch": 1.8029305585049904, "grad_norm": 9.627653474537876, "learning_rate": 1.2607000468246533e-07, "loss": 1.0593, "step": 12735 }, { "epoch": 1.8030721313796276, "grad_norm": 9.499057120664167, "learning_rate": 1.2589034086306129e-07, "loss": 0.9265, "step": 12736 }, { "epoch": 1.8032137042542649, "grad_norm": 9.199498237527177, "learning_rate": 1.2571080184883178e-07, "loss": 0.99, "step": 12737 }, { "epoch": 1.803355277128902, "grad_norm": 9.688043000784967, "learning_rate": 1.255313876492148e-07, "loss": 1.1023, "step": 12738 }, { "epoch": 1.8034968500035393, "grad_norm": 11.56542865676186, "learning_rate": 1.2535209827364282e-07, "loss": 1.0495, "step": 12739 }, { "epoch": 1.8036384228781766, "grad_norm": 11.597613490364084, "learning_rate": 1.2517293373153993e-07, "loss": 1.1008, "step": 12740 }, { "epoch": 1.8037799957528138, "grad_norm": 9.885979451813547, "learning_rate": 1.2499389403232532e-07, "loss": 0.863, "step": 12741 }, { "epoch": 1.803921568627451, "grad_norm": 9.253165450904701, "learning_rate": 1.2481497918541085e-07, "loss": 0.9011, "step": 12742 }, { "epoch": 1.8040631415020882, "grad_norm": 8.077847326586497, "learning_rate": 1.246361892002021e-07, "loss": 0.858, "step": 12743 }, { "epoch": 1.8042047143767255, "grad_norm": 9.257869775007428, "learning_rate": 1.2445752408609733e-07, "loss": 0.9319, "step": 12744 }, { "epoch": 1.8043462872513625, "grad_norm": 9.183058307311802, "learning_rate": 1.2427898385248965e-07, "loss": 0.8931, "step": 12745 }, { "epoch": 1.8044878601259997, "grad_norm": 9.798510644585267, "learning_rate": 1.2410056850876428e-07, "loss": 0.93, "step": 12746 }, { "epoch": 1.804629433000637, "grad_norm": 10.109708187974373, "learning_rate": 1.239222780643004e-07, "loss": 0.9574, "step": 12747 }, { "epoch": 1.8047710058752742, "grad_norm": 10.859045141685348, "learning_rate": 1.237441125284708e-07, "loss": 0.9982, "step": 12748 }, { "epoch": 1.8049125787499114, "grad_norm": 11.203166200782851, "learning_rate": 1.2356607191064102e-07, "loss": 1.059, "step": 12749 }, { "epoch": 1.8050541516245486, "grad_norm": 8.393706879892035, "learning_rate": 1.2338815622017137e-07, "loss": 0.984, "step": 12750 }, { "epoch": 1.8051957244991859, "grad_norm": 7.780154966953708, "learning_rate": 1.2321036546641406e-07, "loss": 0.8634, "step": 12751 }, { "epoch": 1.805337297373823, "grad_norm": 10.602328897546888, "learning_rate": 1.2303269965871583e-07, "loss": 1.0428, "step": 12752 }, { "epoch": 1.8054788702484603, "grad_norm": 8.059662075484065, "learning_rate": 1.2285515880641585e-07, "loss": 0.9367, "step": 12753 }, { "epoch": 1.8056204431230976, "grad_norm": 10.009833269588507, "learning_rate": 1.2267774291884805e-07, "loss": 1.0259, "step": 12754 }, { "epoch": 1.8057620159977348, "grad_norm": 8.380830471795301, "learning_rate": 1.2250045200533855e-07, "loss": 0.9226, "step": 12755 }, { "epoch": 1.805903588872372, "grad_norm": 8.804443572624857, "learning_rate": 1.2232328607520743e-07, "loss": 0.9278, "step": 12756 }, { "epoch": 1.8060451617470092, "grad_norm": 9.697855084992337, "learning_rate": 1.2214624513776861e-07, "loss": 0.9645, "step": 12757 }, { "epoch": 1.8061867346216465, "grad_norm": 10.28946550024644, "learning_rate": 1.219693292023283e-07, "loss": 1.0778, "step": 12758 }, { "epoch": 1.8063283074962837, "grad_norm": 7.05528282112313, "learning_rate": 1.217925382781876e-07, "loss": 0.8354, "step": 12759 }, { "epoch": 1.806469880370921, "grad_norm": 8.868912449236747, "learning_rate": 1.216158723746394e-07, "loss": 0.9305, "step": 12760 }, { "epoch": 1.8066114532455582, "grad_norm": 8.962233671649825, "learning_rate": 1.2143933150097154e-07, "loss": 0.9825, "step": 12761 }, { "epoch": 1.8067530261201954, "grad_norm": 9.053137421920413, "learning_rate": 1.2126291566646464e-07, "loss": 0.9008, "step": 12762 }, { "epoch": 1.8068945989948326, "grad_norm": 9.487512914676445, "learning_rate": 1.210866248803924e-07, "loss": 0.9971, "step": 12763 }, { "epoch": 1.8070361718694699, "grad_norm": 10.052365334990816, "learning_rate": 1.20910459152023e-07, "loss": 1.069, "step": 12764 }, { "epoch": 1.807177744744107, "grad_norm": 10.393172194247402, "learning_rate": 1.2073441849061645e-07, "loss": 1.1262, "step": 12765 }, { "epoch": 1.8073193176187443, "grad_norm": 8.870035421651238, "learning_rate": 1.205585029054279e-07, "loss": 0.9298, "step": 12766 }, { "epoch": 1.8074608904933815, "grad_norm": 10.305823181826774, "learning_rate": 1.2038271240570415e-07, "loss": 0.9763, "step": 12767 }, { "epoch": 1.8076024633680188, "grad_norm": 10.582236726271974, "learning_rate": 1.2020704700068691e-07, "loss": 1.0406, "step": 12768 }, { "epoch": 1.807744036242656, "grad_norm": 8.623280229359503, "learning_rate": 1.2003150669961105e-07, "loss": 0.9175, "step": 12769 }, { "epoch": 1.8078856091172932, "grad_norm": 8.374850143686107, "learning_rate": 1.198560915117039e-07, "loss": 0.9899, "step": 12770 }, { "epoch": 1.8080271819919305, "grad_norm": 8.840743885070207, "learning_rate": 1.1968080144618783e-07, "loss": 0.9166, "step": 12771 }, { "epoch": 1.8081687548665677, "grad_norm": 11.034148875479703, "learning_rate": 1.195056365122768e-07, "loss": 1.0506, "step": 12772 }, { "epoch": 1.808310327741205, "grad_norm": 10.59289655370338, "learning_rate": 1.193305967191796e-07, "loss": 1.0595, "step": 12773 }, { "epoch": 1.8084519006158422, "grad_norm": 9.592673573696954, "learning_rate": 1.191556820760978e-07, "loss": 0.9549, "step": 12774 }, { "epoch": 1.8085934734904794, "grad_norm": 10.153403296961182, "learning_rate": 1.1898089259222673e-07, "loss": 0.9528, "step": 12775 }, { "epoch": 1.8087350463651164, "grad_norm": 9.231878314269132, "learning_rate": 1.1880622827675464e-07, "loss": 0.9242, "step": 12776 }, { "epoch": 1.8088766192397536, "grad_norm": 8.936455085333753, "learning_rate": 1.1863168913886364e-07, "loss": 0.9109, "step": 12777 }, { "epoch": 1.8090181921143909, "grad_norm": 8.972343336402652, "learning_rate": 1.1845727518772915e-07, "loss": 0.8671, "step": 12778 }, { "epoch": 1.809159764989028, "grad_norm": 10.627238677299804, "learning_rate": 1.1828298643251967e-07, "loss": 0.9133, "step": 12779 }, { "epoch": 1.8093013378636653, "grad_norm": 8.428178625675246, "learning_rate": 1.1810882288239817e-07, "loss": 0.8308, "step": 12780 }, { "epoch": 1.8094429107383025, "grad_norm": 8.902147492626076, "learning_rate": 1.1793478454651952e-07, "loss": 0.8512, "step": 12781 }, { "epoch": 1.8095844836129398, "grad_norm": 9.65078689450272, "learning_rate": 1.1776087143403337e-07, "loss": 0.9325, "step": 12782 }, { "epoch": 1.809726056487577, "grad_norm": 9.154911711604084, "learning_rate": 1.1758708355408155e-07, "loss": 1.009, "step": 12783 }, { "epoch": 1.8098676293622142, "grad_norm": 8.635541141166618, "learning_rate": 1.1741342091580038e-07, "loss": 0.8576, "step": 12784 }, { "epoch": 1.8100092022368515, "grad_norm": 8.596859788898529, "learning_rate": 1.172398835283195e-07, "loss": 1.0073, "step": 12785 }, { "epoch": 1.8101507751114885, "grad_norm": 10.054649939833691, "learning_rate": 1.1706647140076105e-07, "loss": 1.0687, "step": 12786 }, { "epoch": 1.8102923479861257, "grad_norm": 9.225854237607173, "learning_rate": 1.1689318454224191e-07, "loss": 0.9986, "step": 12787 }, { "epoch": 1.810433920860763, "grad_norm": 8.89957217784824, "learning_rate": 1.1672002296187063e-07, "loss": 0.9835, "step": 12788 }, { "epoch": 1.8105754937354002, "grad_norm": 10.459002793809645, "learning_rate": 1.1654698666875076e-07, "loss": 0.9087, "step": 12789 }, { "epoch": 1.8107170666100374, "grad_norm": 9.58827396138605, "learning_rate": 1.1637407567197862e-07, "loss": 1.0319, "step": 12790 }, { "epoch": 1.8108586394846746, "grad_norm": 9.2888752346557, "learning_rate": 1.162012899806439e-07, "loss": 0.9258, "step": 12791 }, { "epoch": 1.8110002123593119, "grad_norm": 8.752637193264329, "learning_rate": 1.1602862960383015e-07, "loss": 0.9687, "step": 12792 }, { "epoch": 1.811141785233949, "grad_norm": 9.554831228477429, "learning_rate": 1.1585609455061348e-07, "loss": 0.9609, "step": 12793 }, { "epoch": 1.8112833581085863, "grad_norm": 8.960098400256838, "learning_rate": 1.1568368483006465e-07, "loss": 1.0632, "step": 12794 }, { "epoch": 1.8114249309832235, "grad_norm": 7.478579895747593, "learning_rate": 1.1551140045124615e-07, "loss": 0.9471, "step": 12795 }, { "epoch": 1.8115665038578608, "grad_norm": 9.025846878007314, "learning_rate": 1.1533924142321601e-07, "loss": 1.0184, "step": 12796 }, { "epoch": 1.811708076732498, "grad_norm": 9.34101465305216, "learning_rate": 1.1516720775502338e-07, "loss": 0.9815, "step": 12797 }, { "epoch": 1.8118496496071352, "grad_norm": 8.542460583022903, "learning_rate": 1.1499529945571269e-07, "loss": 0.9997, "step": 12798 }, { "epoch": 1.8119912224817725, "grad_norm": 9.45359268923126, "learning_rate": 1.1482351653432089e-07, "loss": 1.0937, "step": 12799 }, { "epoch": 1.8121327953564097, "grad_norm": 10.075776434198662, "learning_rate": 1.1465185899987797e-07, "loss": 0.9933, "step": 12800 }, { "epoch": 1.812274368231047, "grad_norm": 10.632498933427454, "learning_rate": 1.1448032686140864e-07, "loss": 0.9833, "step": 12801 }, { "epoch": 1.8124159411056842, "grad_norm": 8.387885813360167, "learning_rate": 1.1430892012792933e-07, "loss": 0.8388, "step": 12802 }, { "epoch": 1.8125575139803214, "grad_norm": 9.332236861354613, "learning_rate": 1.1413763880845169e-07, "loss": 0.933, "step": 12803 }, { "epoch": 1.8126990868549586, "grad_norm": 8.657103568729383, "learning_rate": 1.139664829119791e-07, "loss": 0.9381, "step": 12804 }, { "epoch": 1.8128406597295958, "grad_norm": 8.895541207894416, "learning_rate": 1.1379545244750961e-07, "loss": 0.9346, "step": 12805 }, { "epoch": 1.812982232604233, "grad_norm": 8.605339309058268, "learning_rate": 1.1362454742403356e-07, "loss": 0.9154, "step": 12806 }, { "epoch": 1.8131238054788703, "grad_norm": 10.262417226775852, "learning_rate": 1.1345376785053596e-07, "loss": 1.0922, "step": 12807 }, { "epoch": 1.8132653783535075, "grad_norm": 8.765467025230896, "learning_rate": 1.1328311373599493e-07, "loss": 0.9646, "step": 12808 }, { "epoch": 1.8134069512281448, "grad_norm": 8.186546211889024, "learning_rate": 1.1311258508938022e-07, "loss": 0.9584, "step": 12809 }, { "epoch": 1.813548524102782, "grad_norm": 7.499659975609285, "learning_rate": 1.1294218191965745e-07, "loss": 0.8604, "step": 12810 }, { "epoch": 1.8136900969774192, "grad_norm": 9.243726510307587, "learning_rate": 1.1277190423578416e-07, "loss": 1.0135, "step": 12811 }, { "epoch": 1.8138316698520565, "grad_norm": 8.879775360977652, "learning_rate": 1.1260175204671181e-07, "loss": 1.0202, "step": 12812 }, { "epoch": 1.8139732427266937, "grad_norm": 10.790577797961848, "learning_rate": 1.1243172536138547e-07, "loss": 0.8829, "step": 12813 }, { "epoch": 1.814114815601331, "grad_norm": 7.490636702851856, "learning_rate": 1.1226182418874271e-07, "loss": 0.9443, "step": 12814 }, { "epoch": 1.8142563884759682, "grad_norm": 9.306364560110655, "learning_rate": 1.1209204853771582e-07, "loss": 0.9576, "step": 12815 }, { "epoch": 1.8143979613506054, "grad_norm": 11.38716095839445, "learning_rate": 1.1192239841722935e-07, "loss": 0.9607, "step": 12816 }, { "epoch": 1.8145395342252424, "grad_norm": 8.039465357526163, "learning_rate": 1.1175287383620197e-07, "loss": 0.9414, "step": 12817 }, { "epoch": 1.8146811070998796, "grad_norm": 9.117722208773811, "learning_rate": 1.1158347480354493e-07, "loss": 0.9659, "step": 12818 }, { "epoch": 1.8148226799745169, "grad_norm": 11.238124428650366, "learning_rate": 1.1141420132816383e-07, "loss": 1.0027, "step": 12819 }, { "epoch": 1.814964252849154, "grad_norm": 11.24945982589883, "learning_rate": 1.1124505341895742e-07, "loss": 1.0878, "step": 12820 }, { "epoch": 1.8151058257237913, "grad_norm": 8.361985299959471, "learning_rate": 1.1107603108481718e-07, "loss": 0.9682, "step": 12821 }, { "epoch": 1.8152473985984285, "grad_norm": 9.996223881162537, "learning_rate": 1.109071343346288e-07, "loss": 0.9964, "step": 12822 }, { "epoch": 1.8153889714730658, "grad_norm": 10.264149273804499, "learning_rate": 1.1073836317727071e-07, "loss": 0.9917, "step": 12823 }, { "epoch": 1.815530544347703, "grad_norm": 9.643708908972528, "learning_rate": 1.1056971762161584e-07, "loss": 0.9702, "step": 12824 }, { "epoch": 1.8156721172223402, "grad_norm": 9.680010256801246, "learning_rate": 1.1040119767652901e-07, "loss": 1.04, "step": 12825 }, { "epoch": 1.8158136900969775, "grad_norm": 8.609826050174174, "learning_rate": 1.1023280335086956e-07, "loss": 1.0025, "step": 12826 }, { "epoch": 1.8159552629716147, "grad_norm": 10.01825459398309, "learning_rate": 1.1006453465348954e-07, "loss": 0.9532, "step": 12827 }, { "epoch": 1.8160968358462517, "grad_norm": 9.11556393534569, "learning_rate": 1.0989639159323523e-07, "loss": 1.021, "step": 12828 }, { "epoch": 1.816238408720889, "grad_norm": 10.794804729269835, "learning_rate": 1.0972837417894538e-07, "loss": 0.9194, "step": 12829 }, { "epoch": 1.8163799815955262, "grad_norm": 8.650099349140447, "learning_rate": 1.0956048241945238e-07, "loss": 0.9851, "step": 12830 }, { "epoch": 1.8165215544701634, "grad_norm": 8.125443666522587, "learning_rate": 1.0939271632358278e-07, "loss": 0.9918, "step": 12831 }, { "epoch": 1.8166631273448006, "grad_norm": 8.267134934155317, "learning_rate": 1.0922507590015535e-07, "loss": 0.8619, "step": 12832 }, { "epoch": 1.8168047002194379, "grad_norm": 9.259668194818051, "learning_rate": 1.0905756115798332e-07, "loss": 0.9139, "step": 12833 }, { "epoch": 1.816946273094075, "grad_norm": 10.060883765411132, "learning_rate": 1.0889017210587216e-07, "loss": 0.909, "step": 12834 }, { "epoch": 1.8170878459687123, "grad_norm": 7.865215217182405, "learning_rate": 1.0872290875262175e-07, "loss": 0.9594, "step": 12835 }, { "epoch": 1.8172294188433495, "grad_norm": 7.792516434241874, "learning_rate": 1.0855577110702536e-07, "loss": 0.8817, "step": 12836 }, { "epoch": 1.8173709917179868, "grad_norm": 8.588903107603256, "learning_rate": 1.0838875917786845e-07, "loss": 0.9011, "step": 12837 }, { "epoch": 1.817512564592624, "grad_norm": 12.26359457716721, "learning_rate": 1.0822187297393177e-07, "loss": 1.0606, "step": 12838 }, { "epoch": 1.8176541374672612, "grad_norm": 9.266538938378469, "learning_rate": 1.0805511250398748e-07, "loss": 0.9557, "step": 12839 }, { "epoch": 1.8177957103418985, "grad_norm": 10.0745637027749, "learning_rate": 1.07888477776803e-07, "loss": 0.9889, "step": 12840 }, { "epoch": 1.8179372832165357, "grad_norm": 8.269974081789892, "learning_rate": 1.0772196880113716e-07, "loss": 0.846, "step": 12841 }, { "epoch": 1.818078856091173, "grad_norm": 8.05275356041554, "learning_rate": 1.0755558558574325e-07, "loss": 0.9247, "step": 12842 }, { "epoch": 1.8182204289658102, "grad_norm": 9.486099915864767, "learning_rate": 1.0738932813936897e-07, "loss": 1.0297, "step": 12843 }, { "epoch": 1.8183620018404474, "grad_norm": 7.422564215531679, "learning_rate": 1.0722319647075347e-07, "loss": 0.8692, "step": 12844 }, { "epoch": 1.8185035747150846, "grad_norm": 9.844759450349912, "learning_rate": 1.0705719058863057e-07, "loss": 0.9411, "step": 12845 }, { "epoch": 1.8186451475897218, "grad_norm": 12.219640987135923, "learning_rate": 1.0689131050172635e-07, "loss": 1.0461, "step": 12846 }, { "epoch": 1.818786720464359, "grad_norm": 8.879173909158556, "learning_rate": 1.0672555621876218e-07, "loss": 0.7361, "step": 12847 }, { "epoch": 1.8189282933389963, "grad_norm": 9.606599335532685, "learning_rate": 1.0655992774845054e-07, "loss": 0.9272, "step": 12848 }, { "epoch": 1.8190698662136335, "grad_norm": 11.295104419619044, "learning_rate": 1.0639442509949944e-07, "loss": 1.0664, "step": 12849 }, { "epoch": 1.8192114390882708, "grad_norm": 8.220000777418559, "learning_rate": 1.0622904828060803e-07, "loss": 0.9969, "step": 12850 }, { "epoch": 1.819353011962908, "grad_norm": 10.705174515792548, "learning_rate": 1.0606379730047134e-07, "loss": 0.9877, "step": 12851 }, { "epoch": 1.8194945848375452, "grad_norm": 9.865241528593229, "learning_rate": 1.0589867216777544e-07, "loss": 0.9762, "step": 12852 }, { "epoch": 1.8196361577121825, "grad_norm": 8.278983788533516, "learning_rate": 1.0573367289120118e-07, "loss": 0.9783, "step": 12853 }, { "epoch": 1.8197777305868197, "grad_norm": 10.53610814723049, "learning_rate": 1.0556879947942272e-07, "loss": 1.0008, "step": 12854 }, { "epoch": 1.819919303461457, "grad_norm": 10.000548919870873, "learning_rate": 1.0540405194110703e-07, "loss": 0.9116, "step": 12855 }, { "epoch": 1.8200608763360941, "grad_norm": 9.076320869432854, "learning_rate": 1.0523943028491496e-07, "loss": 0.9242, "step": 12856 }, { "epoch": 1.8202024492107314, "grad_norm": 7.059311408591957, "learning_rate": 1.0507493451949984e-07, "loss": 0.8606, "step": 12857 }, { "epoch": 1.8203440220853686, "grad_norm": 9.976577029893491, "learning_rate": 1.0491056465351007e-07, "loss": 0.9011, "step": 12858 }, { "epoch": 1.8204855949600056, "grad_norm": 10.517163735162944, "learning_rate": 1.0474632069558621e-07, "loss": 0.9802, "step": 12859 }, { "epoch": 1.8206271678346428, "grad_norm": 9.47819948956366, "learning_rate": 1.045822026543622e-07, "loss": 0.9208, "step": 12860 }, { "epoch": 1.82076874070928, "grad_norm": 10.315154872513807, "learning_rate": 1.0441821053846612e-07, "loss": 0.9776, "step": 12861 }, { "epoch": 1.8209103135839173, "grad_norm": 10.137760363150436, "learning_rate": 1.0425434435651776e-07, "loss": 1.0228, "step": 12862 }, { "epoch": 1.8210518864585545, "grad_norm": 10.693543627122667, "learning_rate": 1.0409060411713273e-07, "loss": 1.0402, "step": 12863 }, { "epoch": 1.8211934593331918, "grad_norm": 10.315760981216172, "learning_rate": 1.0392698982891775e-07, "loss": 0.9623, "step": 12864 }, { "epoch": 1.821335032207829, "grad_norm": 7.736159979869267, "learning_rate": 1.0376350150047427e-07, "loss": 0.9462, "step": 12865 }, { "epoch": 1.8214766050824662, "grad_norm": 11.207708934612295, "learning_rate": 1.0360013914039708e-07, "loss": 0.9996, "step": 12866 }, { "epoch": 1.8216181779571035, "grad_norm": 9.608478187953539, "learning_rate": 1.0343690275727374e-07, "loss": 0.9923, "step": 12867 }, { "epoch": 1.8217597508317407, "grad_norm": 9.078351909276918, "learning_rate": 1.0327379235968549e-07, "loss": 0.8762, "step": 12868 }, { "epoch": 1.8219013237063777, "grad_norm": 8.846694725622637, "learning_rate": 1.0311080795620654e-07, "loss": 0.8426, "step": 12869 }, { "epoch": 1.822042896581015, "grad_norm": 8.693825154970355, "learning_rate": 1.0294794955540587e-07, "loss": 0.9012, "step": 12870 }, { "epoch": 1.8221844694556522, "grad_norm": 10.041853484846948, "learning_rate": 1.0278521716584361e-07, "loss": 1.0188, "step": 12871 }, { "epoch": 1.8223260423302894, "grad_norm": 10.3457906963091, "learning_rate": 1.0262261079607539e-07, "loss": 0.974, "step": 12872 }, { "epoch": 1.8224676152049266, "grad_norm": 9.995689989159077, "learning_rate": 1.0246013045464881e-07, "loss": 0.9451, "step": 12873 }, { "epoch": 1.8226091880795638, "grad_norm": 8.591487184333934, "learning_rate": 1.0229777615010538e-07, "loss": 1.0158, "step": 12874 }, { "epoch": 1.822750760954201, "grad_norm": 9.990116194010168, "learning_rate": 1.0213554789098052e-07, "loss": 0.9816, "step": 12875 }, { "epoch": 1.8228923338288383, "grad_norm": 11.316094770480326, "learning_rate": 1.0197344568580153e-07, "loss": 0.9488, "step": 12876 }, { "epoch": 1.8230339067034755, "grad_norm": 9.3117237151541, "learning_rate": 1.0181146954309052e-07, "loss": 0.9755, "step": 12877 }, { "epoch": 1.8231754795781128, "grad_norm": 9.090930619647803, "learning_rate": 1.0164961947136232e-07, "loss": 0.9468, "step": 12878 }, { "epoch": 1.82331705245275, "grad_norm": 8.898374910611752, "learning_rate": 1.0148789547912569e-07, "loss": 0.982, "step": 12879 }, { "epoch": 1.8234586253273872, "grad_norm": 10.439095563626354, "learning_rate": 1.013262975748816e-07, "loss": 0.9876, "step": 12880 }, { "epoch": 1.8236001982020245, "grad_norm": 7.7776603538489635, "learning_rate": 1.011648257671255e-07, "loss": 0.9738, "step": 12881 }, { "epoch": 1.8237417710766617, "grad_norm": 10.684332127135027, "learning_rate": 1.0100348006434641e-07, "loss": 1.046, "step": 12882 }, { "epoch": 1.823883343951299, "grad_norm": 9.463692601787782, "learning_rate": 1.0084226047502505e-07, "loss": 0.9212, "step": 12883 }, { "epoch": 1.8240249168259361, "grad_norm": 10.097237282773389, "learning_rate": 1.0068116700763769e-07, "loss": 0.9336, "step": 12884 }, { "epoch": 1.8241664897005734, "grad_norm": 10.976110005130531, "learning_rate": 1.0052019967065174e-07, "loss": 0.9587, "step": 12885 }, { "epoch": 1.8243080625752106, "grad_norm": 9.689269070452575, "learning_rate": 1.0035935847253015e-07, "loss": 1.0792, "step": 12886 }, { "epoch": 1.8244496354498478, "grad_norm": 7.2092174656674874, "learning_rate": 1.001986434217278e-07, "loss": 0.8763, "step": 12887 }, { "epoch": 1.824591208324485, "grad_norm": 8.037325568553378, "learning_rate": 1.0003805452669296e-07, "loss": 0.9538, "step": 12888 }, { "epoch": 1.8247327811991223, "grad_norm": 9.762523726313598, "learning_rate": 9.987759179586886e-08, "loss": 0.9521, "step": 12889 }, { "epoch": 1.8248743540737595, "grad_norm": 10.017758432343488, "learning_rate": 9.97172552376896e-08, "loss": 0.9503, "step": 12890 }, { "epoch": 1.8250159269483968, "grad_norm": 8.960716557460376, "learning_rate": 9.955704486058482e-08, "loss": 0.9259, "step": 12891 }, { "epoch": 1.825157499823034, "grad_norm": 8.885596168794885, "learning_rate": 9.939696067297611e-08, "loss": 0.8666, "step": 12892 }, { "epoch": 1.8252990726976712, "grad_norm": 10.766528176124611, "learning_rate": 9.923700268327952e-08, "loss": 1.031, "step": 12893 }, { "epoch": 1.8254406455723085, "grad_norm": 11.742605926510201, "learning_rate": 9.90771708999036e-08, "loss": 0.9069, "step": 12894 }, { "epoch": 1.8255822184469457, "grad_norm": 8.946917074293097, "learning_rate": 9.891746533125024e-08, "loss": 0.8929, "step": 12895 }, { "epoch": 1.825723791321583, "grad_norm": 8.784639946339087, "learning_rate": 9.87578859857155e-08, "loss": 0.9612, "step": 12896 }, { "epoch": 1.8258653641962201, "grad_norm": 9.244115658969562, "learning_rate": 9.859843287168825e-08, "loss": 0.966, "step": 12897 }, { "epoch": 1.8260069370708574, "grad_norm": 8.50334595291874, "learning_rate": 9.843910599755119e-08, "loss": 0.9786, "step": 12898 }, { "epoch": 1.8261485099454946, "grad_norm": 11.001975835732374, "learning_rate": 9.827990537167903e-08, "loss": 0.9132, "step": 12899 }, { "epoch": 1.8262900828201316, "grad_norm": 8.90182995788625, "learning_rate": 9.812083100244201e-08, "loss": 0.9185, "step": 12900 }, { "epoch": 1.8264316556947688, "grad_norm": 8.718808191030048, "learning_rate": 9.796188289820152e-08, "loss": 0.8114, "step": 12901 }, { "epoch": 1.826573228569406, "grad_norm": 9.735792715030403, "learning_rate": 9.780306106731419e-08, "loss": 1.0164, "step": 12902 }, { "epoch": 1.8267148014440433, "grad_norm": 11.095371181610593, "learning_rate": 9.764436551812889e-08, "loss": 1.0259, "step": 12903 }, { "epoch": 1.8268563743186805, "grad_norm": 8.989857786961101, "learning_rate": 9.748579625898758e-08, "loss": 0.9497, "step": 12904 }, { "epoch": 1.8269979471933178, "grad_norm": 10.68449564863148, "learning_rate": 9.73273532982269e-08, "loss": 1.0219, "step": 12905 }, { "epoch": 1.827139520067955, "grad_norm": 9.110273627684794, "learning_rate": 9.716903664417549e-08, "loss": 0.9936, "step": 12906 }, { "epoch": 1.8272810929425922, "grad_norm": 10.994308039396419, "learning_rate": 9.701084630515667e-08, "loss": 0.9856, "step": 12907 }, { "epoch": 1.8274226658172295, "grad_norm": 9.4068927735375, "learning_rate": 9.685278228948519e-08, "loss": 0.8979, "step": 12908 }, { "epoch": 1.8275642386918667, "grad_norm": 9.384677622181043, "learning_rate": 9.669484460547135e-08, "loss": 0.8183, "step": 12909 }, { "epoch": 1.827705811566504, "grad_norm": 9.841055171185873, "learning_rate": 9.653703326141794e-08, "loss": 1.0055, "step": 12910 }, { "epoch": 1.827847384441141, "grad_norm": 8.97906900044465, "learning_rate": 9.637934826562001e-08, "loss": 0.925, "step": 12911 }, { "epoch": 1.8279889573157782, "grad_norm": 10.131673991552123, "learning_rate": 9.622178962636813e-08, "loss": 0.9977, "step": 12912 }, { "epoch": 1.8281305301904154, "grad_norm": 8.550265962803026, "learning_rate": 9.606435735194403e-08, "loss": 0.9441, "step": 12913 }, { "epoch": 1.8282721030650526, "grad_norm": 8.451402301789877, "learning_rate": 9.590705145062468e-08, "loss": 0.9574, "step": 12914 }, { "epoch": 1.8284136759396898, "grad_norm": 9.011072764829217, "learning_rate": 9.574987193067847e-08, "loss": 0.9782, "step": 12915 }, { "epoch": 1.828555248814327, "grad_norm": 9.845353059574643, "learning_rate": 9.559281880036908e-08, "loss": 1.0511, "step": 12916 }, { "epoch": 1.8286968216889643, "grad_norm": 10.323618680537015, "learning_rate": 9.54358920679524e-08, "loss": 0.9772, "step": 12917 }, { "epoch": 1.8288383945636015, "grad_norm": 10.84962150511248, "learning_rate": 9.527909174167793e-08, "loss": 1.0066, "step": 12918 }, { "epoch": 1.8289799674382388, "grad_norm": 9.115421650435836, "learning_rate": 9.512241782978853e-08, "loss": 0.8382, "step": 12919 }, { "epoch": 1.829121540312876, "grad_norm": 9.255342332638806, "learning_rate": 9.496587034052041e-08, "loss": 0.9287, "step": 12920 }, { "epoch": 1.8292631131875132, "grad_norm": 9.08009429949893, "learning_rate": 9.480944928210362e-08, "loss": 1.0741, "step": 12921 }, { "epoch": 1.8294046860621505, "grad_norm": 9.223901582985242, "learning_rate": 9.46531546627602e-08, "loss": 0.8933, "step": 12922 }, { "epoch": 1.8295462589367877, "grad_norm": 9.656981999080582, "learning_rate": 9.449698649070721e-08, "loss": 0.9359, "step": 12923 }, { "epoch": 1.829687831811425, "grad_norm": 8.192485194351466, "learning_rate": 9.43409447741539e-08, "loss": 0.9928, "step": 12924 }, { "epoch": 1.8298294046860621, "grad_norm": 11.437982267624486, "learning_rate": 9.418502952130343e-08, "loss": 0.9966, "step": 12925 }, { "epoch": 1.8299709775606994, "grad_norm": 8.922810039525936, "learning_rate": 9.40292407403523e-08, "loss": 1.0666, "step": 12926 }, { "epoch": 1.8301125504353366, "grad_norm": 10.760340485698585, "learning_rate": 9.38735784394898e-08, "loss": 1.0672, "step": 12927 }, { "epoch": 1.8302541233099738, "grad_norm": 8.760851425529102, "learning_rate": 9.371804262689938e-08, "loss": 0.927, "step": 12928 }, { "epoch": 1.830395696184611, "grad_norm": 9.815515139990305, "learning_rate": 9.3562633310757e-08, "loss": 0.9425, "step": 12929 }, { "epoch": 1.8305372690592483, "grad_norm": 11.66651705691368, "learning_rate": 9.340735049923277e-08, "loss": 1.1048, "step": 12930 }, { "epoch": 1.8306788419338855, "grad_norm": 8.984788968044434, "learning_rate": 9.325219420048964e-08, "loss": 1.0651, "step": 12931 }, { "epoch": 1.8308204148085228, "grad_norm": 7.6648695401251725, "learning_rate": 9.309716442268413e-08, "loss": 0.9509, "step": 12932 }, { "epoch": 1.83096198768316, "grad_norm": 9.459430602924416, "learning_rate": 9.29422611739661e-08, "loss": 0.9915, "step": 12933 }, { "epoch": 1.8311035605577972, "grad_norm": 8.784496209642677, "learning_rate": 9.278748446247848e-08, "loss": 1.033, "step": 12934 }, { "epoch": 1.8312451334324344, "grad_norm": 10.471459610275394, "learning_rate": 9.263283429635839e-08, "loss": 0.9813, "step": 12935 }, { "epoch": 1.8313867063070717, "grad_norm": 10.152111536638264, "learning_rate": 9.247831068373458e-08, "loss": 0.9652, "step": 12936 }, { "epoch": 1.831528279181709, "grad_norm": 8.65816897939071, "learning_rate": 9.23239136327314e-08, "loss": 0.9265, "step": 12937 }, { "epoch": 1.8316698520563461, "grad_norm": 9.238120208691505, "learning_rate": 9.216964315146431e-08, "loss": 0.9648, "step": 12938 }, { "epoch": 1.8318114249309834, "grad_norm": 8.980586412479516, "learning_rate": 9.201549924804376e-08, "loss": 0.9927, "step": 12939 }, { "epoch": 1.8319529978056206, "grad_norm": 11.047787986052485, "learning_rate": 9.186148193057325e-08, "loss": 0.851, "step": 12940 }, { "epoch": 1.8320945706802578, "grad_norm": 11.963410862412552, "learning_rate": 9.170759120714884e-08, "loss": 0.909, "step": 12941 }, { "epoch": 1.8322361435548948, "grad_norm": 9.748229428452435, "learning_rate": 9.155382708586097e-08, "loss": 1.0846, "step": 12942 }, { "epoch": 1.832377716429532, "grad_norm": 10.657799213816672, "learning_rate": 9.140018957479236e-08, "loss": 1.0612, "step": 12943 }, { "epoch": 1.8325192893041693, "grad_norm": 9.623194933976173, "learning_rate": 9.124667868201986e-08, "loss": 0.9854, "step": 12944 }, { "epoch": 1.8326608621788065, "grad_norm": 7.671886187698691, "learning_rate": 9.109329441561343e-08, "loss": 0.8384, "step": 12945 }, { "epoch": 1.8328024350534438, "grad_norm": 9.724760987156662, "learning_rate": 9.094003678363633e-08, "loss": 0.9189, "step": 12946 }, { "epoch": 1.832944007928081, "grad_norm": 8.894694652237533, "learning_rate": 9.078690579414546e-08, "loss": 1.079, "step": 12947 }, { "epoch": 1.8330855808027182, "grad_norm": 10.050130693335534, "learning_rate": 9.063390145519019e-08, "loss": 1.0162, "step": 12948 }, { "epoch": 1.8332271536773554, "grad_norm": 10.060086736892908, "learning_rate": 9.048102377481466e-08, "loss": 0.942, "step": 12949 }, { "epoch": 1.8333687265519927, "grad_norm": 11.037817364425367, "learning_rate": 9.032827276105466e-08, "loss": 0.9777, "step": 12950 }, { "epoch": 1.83351029942663, "grad_norm": 9.475096335210711, "learning_rate": 9.017564842194099e-08, "loss": 1.0715, "step": 12951 }, { "epoch": 1.833651872301267, "grad_norm": 10.532570957005486, "learning_rate": 9.002315076549639e-08, "loss": 0.9719, "step": 12952 }, { "epoch": 1.8337934451759041, "grad_norm": 11.80842382754852, "learning_rate": 8.987077979973807e-08, "loss": 1.0606, "step": 12953 }, { "epoch": 1.8339350180505414, "grad_norm": 10.010242938314835, "learning_rate": 8.971853553267545e-08, "loss": 0.9095, "step": 12954 }, { "epoch": 1.8340765909251786, "grad_norm": 8.771740308487864, "learning_rate": 8.956641797231214e-08, "loss": 0.9323, "step": 12955 }, { "epoch": 1.8342181637998158, "grad_norm": 9.542391509353063, "learning_rate": 8.941442712664561e-08, "loss": 1.1305, "step": 12956 }, { "epoch": 1.834359736674453, "grad_norm": 12.194534228049159, "learning_rate": 8.926256300366475e-08, "loss": 1.0773, "step": 12957 }, { "epoch": 1.8345013095490903, "grad_norm": 8.674930398535402, "learning_rate": 8.911082561135348e-08, "loss": 0.9571, "step": 12958 }, { "epoch": 1.8346428824237275, "grad_norm": 11.52975991812194, "learning_rate": 8.895921495768845e-08, "loss": 1.0264, "step": 12959 }, { "epoch": 1.8347844552983648, "grad_norm": 7.742597921292361, "learning_rate": 8.880773105063994e-08, "loss": 0.9414, "step": 12960 }, { "epoch": 1.834926028173002, "grad_norm": 10.590726617914852, "learning_rate": 8.865637389817077e-08, "loss": 0.9761, "step": 12961 }, { "epoch": 1.8350676010476392, "grad_norm": 10.297962646464045, "learning_rate": 8.850514350823819e-08, "loss": 0.932, "step": 12962 }, { "epoch": 1.8352091739222764, "grad_norm": 8.705270280229987, "learning_rate": 8.835403988879221e-08, "loss": 0.9841, "step": 12963 }, { "epoch": 1.8353507467969137, "grad_norm": 9.985411111145787, "learning_rate": 8.820306304777593e-08, "loss": 0.9991, "step": 12964 }, { "epoch": 1.835492319671551, "grad_norm": 7.9582423925612975, "learning_rate": 8.805221299312689e-08, "loss": 0.8871, "step": 12965 }, { "epoch": 1.8356338925461881, "grad_norm": 8.877767641743123, "learning_rate": 8.790148973277401e-08, "loss": 0.9625, "step": 12966 }, { "epoch": 1.8357754654208254, "grad_norm": 8.578257075059058, "learning_rate": 8.775089327464154e-08, "loss": 0.9333, "step": 12967 }, { "epoch": 1.8359170382954626, "grad_norm": 8.938028813436675, "learning_rate": 8.760042362664617e-08, "loss": 0.9849, "step": 12968 }, { "epoch": 1.8360586111700998, "grad_norm": 10.112253713732583, "learning_rate": 8.745008079669742e-08, "loss": 0.9212, "step": 12969 }, { "epoch": 1.836200184044737, "grad_norm": 8.17063879870517, "learning_rate": 8.729986479269926e-08, "loss": 0.866, "step": 12970 }, { "epoch": 1.8363417569193743, "grad_norm": 10.897008614887376, "learning_rate": 8.714977562254784e-08, "loss": 1.0781, "step": 12971 }, { "epoch": 1.8364833297940115, "grad_norm": 11.39893547575688, "learning_rate": 8.699981329413409e-08, "loss": 1.0536, "step": 12972 }, { "epoch": 1.8366249026686488, "grad_norm": 10.03693587640717, "learning_rate": 8.68499778153406e-08, "loss": 0.9397, "step": 12973 }, { "epoch": 1.836766475543286, "grad_norm": 8.868278859738345, "learning_rate": 8.670026919404467e-08, "loss": 0.9075, "step": 12974 }, { "epoch": 1.8369080484179232, "grad_norm": 9.231602699603158, "learning_rate": 8.655068743811613e-08, "loss": 0.9314, "step": 12975 }, { "epoch": 1.8370496212925604, "grad_norm": 9.424937798980343, "learning_rate": 8.640123255541838e-08, "loss": 1.0249, "step": 12976 }, { "epoch": 1.8371911941671977, "grad_norm": 9.27009873355107, "learning_rate": 8.625190455380821e-08, "loss": 0.9314, "step": 12977 }, { "epoch": 1.837332767041835, "grad_norm": 8.692598671713798, "learning_rate": 8.610270344113575e-08, "loss": 0.9824, "step": 12978 }, { "epoch": 1.8374743399164721, "grad_norm": 9.535720727223383, "learning_rate": 8.595362922524413e-08, "loss": 0.9426, "step": 12979 }, { "epoch": 1.8376159127911094, "grad_norm": 8.897706977256837, "learning_rate": 8.580468191397018e-08, "loss": 0.9774, "step": 12980 }, { "epoch": 1.8377574856657466, "grad_norm": 8.961154180166897, "learning_rate": 8.565586151514427e-08, "loss": 0.9543, "step": 12981 }, { "epoch": 1.8378990585403838, "grad_norm": 9.771058644595929, "learning_rate": 8.550716803658904e-08, "loss": 0.9443, "step": 12982 }, { "epoch": 1.8380406314150208, "grad_norm": 8.017293834408596, "learning_rate": 8.535860148612213e-08, "loss": 0.8564, "step": 12983 }, { "epoch": 1.838182204289658, "grad_norm": 9.278438668941062, "learning_rate": 8.521016187155284e-08, "loss": 1.0057, "step": 12984 }, { "epoch": 1.8383237771642953, "grad_norm": 8.994430090008091, "learning_rate": 8.506184920068466e-08, "loss": 0.9332, "step": 12985 }, { "epoch": 1.8384653500389325, "grad_norm": 8.257211019071546, "learning_rate": 8.491366348131469e-08, "loss": 0.8506, "step": 12986 }, { "epoch": 1.8386069229135698, "grad_norm": 11.278641924978158, "learning_rate": 8.476560472123251e-08, "loss": 1.0233, "step": 12987 }, { "epoch": 1.838748495788207, "grad_norm": 7.954259525461102, "learning_rate": 8.46176729282222e-08, "loss": 0.9463, "step": 12988 }, { "epoch": 1.8388900686628442, "grad_norm": 10.64621725466356, "learning_rate": 8.44698681100592e-08, "loss": 0.9825, "step": 12989 }, { "epoch": 1.8390316415374814, "grad_norm": 9.534412897335368, "learning_rate": 8.432219027451421e-08, "loss": 0.9314, "step": 12990 }, { "epoch": 1.8391732144121187, "grad_norm": 8.572839439170938, "learning_rate": 8.41746394293505e-08, "loss": 0.9599, "step": 12991 }, { "epoch": 1.839314787286756, "grad_norm": 9.203549023948884, "learning_rate": 8.402721558232463e-08, "loss": 0.8987, "step": 12992 }, { "epoch": 1.839456360161393, "grad_norm": 11.394045804066097, "learning_rate": 8.387991874118678e-08, "loss": 1.0777, "step": 12993 }, { "epoch": 1.8395979330360301, "grad_norm": 10.181777938540307, "learning_rate": 8.373274891367993e-08, "loss": 0.9484, "step": 12994 }, { "epoch": 1.8397395059106674, "grad_norm": 9.813767661075252, "learning_rate": 8.358570610754097e-08, "loss": 0.9506, "step": 12995 }, { "epoch": 1.8398810787853046, "grad_norm": 7.955160363940856, "learning_rate": 8.343879033049951e-08, "loss": 0.9218, "step": 12996 }, { "epoch": 1.8400226516599418, "grad_norm": 9.680444917148453, "learning_rate": 8.329200159027939e-08, "loss": 0.953, "step": 12997 }, { "epoch": 1.840164224534579, "grad_norm": 9.578605702022827, "learning_rate": 8.314533989459612e-08, "loss": 0.9916, "step": 12998 }, { "epoch": 1.8403057974092163, "grad_norm": 12.50054075978753, "learning_rate": 8.299880525116072e-08, "loss": 0.9955, "step": 12999 }, { "epoch": 1.8404473702838535, "grad_norm": 8.974499072703559, "learning_rate": 8.285239766767595e-08, "loss": 0.9163, "step": 13000 }, { "epoch": 1.8405889431584908, "grad_norm": 11.357965814844306, "learning_rate": 8.270611715183813e-08, "loss": 1.0176, "step": 13001 }, { "epoch": 1.840730516033128, "grad_norm": 9.909857156380372, "learning_rate": 8.25599637113375e-08, "loss": 0.8938, "step": 13002 }, { "epoch": 1.8408720889077652, "grad_norm": 9.262306069262834, "learning_rate": 8.241393735385684e-08, "loss": 0.9781, "step": 13003 }, { "epoch": 1.8410136617824024, "grad_norm": 10.71959912020275, "learning_rate": 8.226803808707301e-08, "loss": 0.9523, "step": 13004 }, { "epoch": 1.8411552346570397, "grad_norm": 8.311560097238063, "learning_rate": 8.212226591865547e-08, "loss": 0.935, "step": 13005 }, { "epoch": 1.841296807531677, "grad_norm": 9.355656721102237, "learning_rate": 8.197662085626778e-08, "loss": 0.9374, "step": 13006 }, { "epoch": 1.8414383804063141, "grad_norm": 8.672278812388626, "learning_rate": 8.183110290756608e-08, "loss": 1.0015, "step": 13007 }, { "epoch": 1.8415799532809514, "grad_norm": 9.393771896286024, "learning_rate": 8.168571208020032e-08, "loss": 0.9502, "step": 13008 }, { "epoch": 1.8417215261555886, "grad_norm": 8.708969263344706, "learning_rate": 8.154044838181385e-08, "loss": 0.847, "step": 13009 }, { "epoch": 1.8418630990302258, "grad_norm": 9.951260618164163, "learning_rate": 8.139531182004223e-08, "loss": 0.8045, "step": 13010 }, { "epoch": 1.842004671904863, "grad_norm": 9.291207158197635, "learning_rate": 8.125030240251575e-08, "loss": 0.9115, "step": 13011 }, { "epoch": 1.8421462447795003, "grad_norm": 10.611929719337763, "learning_rate": 8.110542013685745e-08, "loss": 1.007, "step": 13012 }, { "epoch": 1.8422878176541375, "grad_norm": 12.28694156840785, "learning_rate": 8.09606650306835e-08, "loss": 0.9937, "step": 13013 }, { "epoch": 1.8424293905287747, "grad_norm": 9.095824539196629, "learning_rate": 8.081603709160362e-08, "loss": 0.9475, "step": 13014 }, { "epoch": 1.842570963403412, "grad_norm": 9.112774746158205, "learning_rate": 8.067153632722092e-08, "loss": 1.1256, "step": 13015 }, { "epoch": 1.8427125362780492, "grad_norm": 10.620747871000209, "learning_rate": 8.052716274513178e-08, "loss": 0.9661, "step": 13016 }, { "epoch": 1.8428541091526864, "grad_norm": 8.323687145742284, "learning_rate": 8.038291635292545e-08, "loss": 0.8972, "step": 13017 }, { "epoch": 1.8429956820273237, "grad_norm": 8.970315085568343, "learning_rate": 8.023879715818556e-08, "loss": 0.8606, "step": 13018 }, { "epoch": 1.843137254901961, "grad_norm": 8.889796427915837, "learning_rate": 8.009480516848717e-08, "loss": 1.0637, "step": 13019 }, { "epoch": 1.8432788277765981, "grad_norm": 8.73340406122275, "learning_rate": 7.995094039140116e-08, "loss": 0.9445, "step": 13020 }, { "epoch": 1.8434204006512354, "grad_norm": 7.806131681793236, "learning_rate": 7.980720283448957e-08, "loss": 0.9272, "step": 13021 }, { "epoch": 1.8435619735258726, "grad_norm": 10.588864620679505, "learning_rate": 7.966359250530824e-08, "loss": 1.0178, "step": 13022 }, { "epoch": 1.8437035464005098, "grad_norm": 8.482839877523052, "learning_rate": 7.952010941140786e-08, "loss": 0.9035, "step": 13023 }, { "epoch": 1.8438451192751468, "grad_norm": 9.067777373028983, "learning_rate": 7.937675356032987e-08, "loss": 0.916, "step": 13024 }, { "epoch": 1.843986692149784, "grad_norm": 10.669700389827796, "learning_rate": 7.923352495961157e-08, "loss": 1.0147, "step": 13025 }, { "epoch": 1.8441282650244213, "grad_norm": 9.504766372787367, "learning_rate": 7.909042361678165e-08, "loss": 0.9642, "step": 13026 }, { "epoch": 1.8442698378990585, "grad_norm": 10.146428920385032, "learning_rate": 7.894744953936329e-08, "loss": 0.8746, "step": 13027 }, { "epoch": 1.8444114107736957, "grad_norm": 9.618806109348409, "learning_rate": 7.880460273487184e-08, "loss": 0.9346, "step": 13028 }, { "epoch": 1.844552983648333, "grad_norm": 10.186998975170125, "learning_rate": 7.866188321081741e-08, "loss": 0.9592, "step": 13029 }, { "epoch": 1.8446945565229702, "grad_norm": 8.145782184358941, "learning_rate": 7.851929097470234e-08, "loss": 0.9785, "step": 13030 }, { "epoch": 1.8448361293976074, "grad_norm": 9.848632376750734, "learning_rate": 7.837682603402258e-08, "loss": 0.9813, "step": 13031 }, { "epoch": 1.8449777022722447, "grad_norm": 9.158348285731106, "learning_rate": 7.823448839626768e-08, "loss": 0.9978, "step": 13032 }, { "epoch": 1.845119275146882, "grad_norm": 11.895150845931335, "learning_rate": 7.809227806891972e-08, "loss": 0.988, "step": 13033 }, { "epoch": 1.8452608480215191, "grad_norm": 9.049581646601991, "learning_rate": 7.795019505945495e-08, "loss": 1.0043, "step": 13034 }, { "epoch": 1.8454024208961561, "grad_norm": 9.594776210510862, "learning_rate": 7.78082393753421e-08, "loss": 1.0956, "step": 13035 }, { "epoch": 1.8455439937707934, "grad_norm": 11.50632999191369, "learning_rate": 7.766641102404438e-08, "loss": 0.9045, "step": 13036 }, { "epoch": 1.8456855666454306, "grad_norm": 8.310621465220493, "learning_rate": 7.75247100130172e-08, "loss": 0.9002, "step": 13037 }, { "epoch": 1.8458271395200678, "grad_norm": 10.403922339032578, "learning_rate": 7.738313634970962e-08, "loss": 1.0398, "step": 13038 }, { "epoch": 1.845968712394705, "grad_norm": 8.788950520119975, "learning_rate": 7.724169004156457e-08, "loss": 1.0229, "step": 13039 }, { "epoch": 1.8461102852693423, "grad_norm": 10.149523388426031, "learning_rate": 7.710037109601692e-08, "loss": 0.9554, "step": 13040 }, { "epoch": 1.8462518581439795, "grad_norm": 9.50049348854463, "learning_rate": 7.695917952049658e-08, "loss": 0.8966, "step": 13041 }, { "epoch": 1.8463934310186167, "grad_norm": 9.207439350443197, "learning_rate": 7.68181153224254e-08, "loss": 0.9867, "step": 13042 }, { "epoch": 1.846535003893254, "grad_norm": 9.527365771679142, "learning_rate": 7.66771785092188e-08, "loss": 0.9256, "step": 13043 }, { "epoch": 1.8466765767678912, "grad_norm": 8.36289444750076, "learning_rate": 7.653636908828644e-08, "loss": 0.9432, "step": 13044 }, { "epoch": 1.8468181496425284, "grad_norm": 9.657787015492788, "learning_rate": 7.639568706702988e-08, "loss": 0.9931, "step": 13045 }, { "epoch": 1.8469597225171657, "grad_norm": 9.609125298457963, "learning_rate": 7.625513245284515e-08, "loss": 0.9211, "step": 13046 }, { "epoch": 1.847101295391803, "grad_norm": 10.58248257158639, "learning_rate": 7.611470525312054e-08, "loss": 1.0482, "step": 13047 }, { "epoch": 1.8472428682664401, "grad_norm": 8.186145932579763, "learning_rate": 7.597440547523872e-08, "loss": 0.8642, "step": 13048 }, { "epoch": 1.8473844411410774, "grad_norm": 9.991028766986606, "learning_rate": 7.58342331265749e-08, "loss": 0.9589, "step": 13049 }, { "epoch": 1.8475260140157146, "grad_norm": 10.968936407782149, "learning_rate": 7.56941882144982e-08, "loss": 1.0033, "step": 13050 }, { "epoch": 1.8476675868903518, "grad_norm": 8.697067590870832, "learning_rate": 7.555427074636995e-08, "loss": 0.9412, "step": 13051 }, { "epoch": 1.847809159764989, "grad_norm": 10.176391911259264, "learning_rate": 7.541448072954622e-08, "loss": 0.9544, "step": 13052 }, { "epoch": 1.8479507326396263, "grad_norm": 8.703347660414748, "learning_rate": 7.527481817137555e-08, "loss": 0.9829, "step": 13053 }, { "epoch": 1.8480923055142635, "grad_norm": 9.00740255463527, "learning_rate": 7.513528307919931e-08, "loss": 1.0087, "step": 13054 }, { "epoch": 1.8482338783889007, "grad_norm": 10.014287279034452, "learning_rate": 7.499587546035358e-08, "loss": 0.9304, "step": 13055 }, { "epoch": 1.848375451263538, "grad_norm": 10.069911715017277, "learning_rate": 7.48565953221661e-08, "loss": 0.9799, "step": 13056 }, { "epoch": 1.8485170241381752, "grad_norm": 9.758388334315292, "learning_rate": 7.471744267195962e-08, "loss": 1.0266, "step": 13057 }, { "epoch": 1.8486585970128124, "grad_norm": 8.514414962050893, "learning_rate": 7.457841751704831e-08, "loss": 0.9963, "step": 13058 }, { "epoch": 1.8488001698874497, "grad_norm": 9.618668492537621, "learning_rate": 7.44395198647413e-08, "loss": 1.0008, "step": 13059 }, { "epoch": 1.848941742762087, "grad_norm": 9.376238931807768, "learning_rate": 7.430074972234053e-08, "loss": 1.0124, "step": 13060 }, { "epoch": 1.8490833156367241, "grad_norm": 8.033866248387053, "learning_rate": 7.416210709714016e-08, "loss": 0.9281, "step": 13061 }, { "epoch": 1.8492248885113614, "grad_norm": 11.12034423789292, "learning_rate": 7.402359199642967e-08, "loss": 0.9565, "step": 13062 }, { "epoch": 1.8493664613859986, "grad_norm": 10.652145714032844, "learning_rate": 7.388520442748959e-08, "loss": 1.0056, "step": 13063 }, { "epoch": 1.8495080342606358, "grad_norm": 8.784513145508594, "learning_rate": 7.374694439759523e-08, "loss": 0.8987, "step": 13064 }, { "epoch": 1.849649607135273, "grad_norm": 9.246618116600159, "learning_rate": 7.36088119140152e-08, "loss": 0.9565, "step": 13065 }, { "epoch": 1.84979118000991, "grad_norm": 8.750112042390915, "learning_rate": 7.347080698401038e-08, "loss": 0.9175, "step": 13066 }, { "epoch": 1.8499327528845473, "grad_norm": 11.31989868334394, "learning_rate": 7.333292961483634e-08, "loss": 0.9148, "step": 13067 }, { "epoch": 1.8500743257591845, "grad_norm": 10.13199100973195, "learning_rate": 7.319517981374036e-08, "loss": 0.9726, "step": 13068 }, { "epoch": 1.8502158986338217, "grad_norm": 7.541616815504121, "learning_rate": 7.305755758796468e-08, "loss": 0.9055, "step": 13069 }, { "epoch": 1.850357471508459, "grad_norm": 9.850850768550796, "learning_rate": 7.292006294474325e-08, "loss": 0.9288, "step": 13070 }, { "epoch": 1.8504990443830962, "grad_norm": 9.684367405561318, "learning_rate": 7.278269589130472e-08, "loss": 0.8569, "step": 13071 }, { "epoch": 1.8506406172577334, "grad_norm": 10.889882745172846, "learning_rate": 7.264545643486997e-08, "loss": 0.8868, "step": 13072 }, { "epoch": 1.8507821901323707, "grad_norm": 8.861116537519631, "learning_rate": 7.250834458265355e-08, "loss": 1.0099, "step": 13073 }, { "epoch": 1.850923763007008, "grad_norm": 10.910132823663535, "learning_rate": 7.237136034186382e-08, "loss": 1.0168, "step": 13074 }, { "epoch": 1.8510653358816451, "grad_norm": 8.902057504173808, "learning_rate": 7.223450371970114e-08, "loss": 0.9637, "step": 13075 }, { "epoch": 1.8512069087562821, "grad_norm": 9.516098035758768, "learning_rate": 7.209777472336061e-08, "loss": 0.8819, "step": 13076 }, { "epoch": 1.8513484816309194, "grad_norm": 9.765888277701064, "learning_rate": 7.19611733600295e-08, "loss": 0.9684, "step": 13077 }, { "epoch": 1.8514900545055566, "grad_norm": 8.471118553552408, "learning_rate": 7.182469963688932e-08, "loss": 0.9412, "step": 13078 }, { "epoch": 1.8516316273801938, "grad_norm": 8.240771594170363, "learning_rate": 7.168835356111376e-08, "loss": 0.9273, "step": 13079 }, { "epoch": 1.851773200254831, "grad_norm": 9.998574727531482, "learning_rate": 7.155213513987124e-08, "loss": 0.946, "step": 13080 }, { "epoch": 1.8519147731294683, "grad_norm": 8.346526523343876, "learning_rate": 7.141604438032218e-08, "loss": 0.9134, "step": 13081 }, { "epoch": 1.8520563460041055, "grad_norm": 8.766702895095676, "learning_rate": 7.128008128962055e-08, "loss": 0.9002, "step": 13082 }, { "epoch": 1.8521979188787427, "grad_norm": 9.53327975313651, "learning_rate": 7.11442458749148e-08, "loss": 0.9651, "step": 13083 }, { "epoch": 1.85233949175338, "grad_norm": 8.867388402872761, "learning_rate": 7.100853814334451e-08, "loss": 0.9324, "step": 13084 }, { "epoch": 1.8524810646280172, "grad_norm": 10.591889614717081, "learning_rate": 7.087295810204425e-08, "loss": 0.933, "step": 13085 }, { "epoch": 1.8526226375026544, "grad_norm": 9.105956808697012, "learning_rate": 7.073750575814136e-08, "loss": 0.8856, "step": 13086 }, { "epoch": 1.8527642103772917, "grad_norm": 8.476567000286824, "learning_rate": 7.060218111875628e-08, "loss": 0.9151, "step": 13087 }, { "epoch": 1.852905783251929, "grad_norm": 8.323353959214463, "learning_rate": 7.046698419100356e-08, "loss": 0.974, "step": 13088 }, { "epoch": 1.8530473561265661, "grad_norm": 9.058255589446043, "learning_rate": 7.033191498198949e-08, "loss": 1.0201, "step": 13089 }, { "epoch": 1.8531889290012034, "grad_norm": 7.393273941411471, "learning_rate": 7.019697349881532e-08, "loss": 0.9247, "step": 13090 }, { "epoch": 1.8533305018758406, "grad_norm": 10.97521642936424, "learning_rate": 7.006215974857428e-08, "loss": 0.9166, "step": 13091 }, { "epoch": 1.8534720747504778, "grad_norm": 9.561019882642702, "learning_rate": 6.992747373835401e-08, "loss": 1.006, "step": 13092 }, { "epoch": 1.853613647625115, "grad_norm": 9.265343390008038, "learning_rate": 6.979291547523415e-08, "loss": 0.8939, "step": 13093 }, { "epoch": 1.8537552204997523, "grad_norm": 9.89323884917158, "learning_rate": 6.965848496628902e-08, "loss": 1.0408, "step": 13094 }, { "epoch": 1.8538967933743895, "grad_norm": 7.123465121735079, "learning_rate": 6.952418221858492e-08, "loss": 0.9842, "step": 13095 }, { "epoch": 1.8540383662490267, "grad_norm": 8.85638926416064, "learning_rate": 6.939000723918232e-08, "loss": 0.8615, "step": 13096 }, { "epoch": 1.854179939123664, "grad_norm": 8.404809991525825, "learning_rate": 6.925596003513501e-08, "loss": 0.8953, "step": 13097 }, { "epoch": 1.8543215119983012, "grad_norm": 11.017442225994426, "learning_rate": 6.912204061348904e-08, "loss": 1.019, "step": 13098 }, { "epoch": 1.8544630848729384, "grad_norm": 8.331790883048352, "learning_rate": 6.898824898128515e-08, "loss": 0.9597, "step": 13099 }, { "epoch": 1.8546046577475757, "grad_norm": 9.624311942736153, "learning_rate": 6.885458514555632e-08, "loss": 0.8911, "step": 13100 }, { "epoch": 1.8547462306222129, "grad_norm": 8.739358152656873, "learning_rate": 6.872104911332916e-08, "loss": 0.948, "step": 13101 }, { "epoch": 1.8548878034968501, "grad_norm": 8.46199695438699, "learning_rate": 6.858764089162334e-08, "loss": 0.8473, "step": 13102 }, { "epoch": 1.8550293763714873, "grad_norm": 12.582404769758405, "learning_rate": 6.845436048745241e-08, "loss": 1.0742, "step": 13103 }, { "epoch": 1.8551709492461246, "grad_norm": 9.67196058688404, "learning_rate": 6.832120790782326e-08, "loss": 1.0029, "step": 13104 }, { "epoch": 1.8553125221207618, "grad_norm": 10.604959414564975, "learning_rate": 6.818818315973475e-08, "loss": 1.0182, "step": 13105 }, { "epoch": 1.855454094995399, "grad_norm": 11.162167302179085, "learning_rate": 6.805528625018016e-08, "loss": 1.0216, "step": 13106 }, { "epoch": 1.855595667870036, "grad_norm": 8.546121121202397, "learning_rate": 6.792251718614584e-08, "loss": 0.8667, "step": 13107 }, { "epoch": 1.8557372407446733, "grad_norm": 9.910491131758116, "learning_rate": 6.778987597461123e-08, "loss": 0.9804, "step": 13108 }, { "epoch": 1.8558788136193105, "grad_norm": 9.757001661417222, "learning_rate": 6.765736262254935e-08, "loss": 0.9271, "step": 13109 }, { "epoch": 1.8560203864939477, "grad_norm": 9.900943718829913, "learning_rate": 6.752497713692629e-08, "loss": 0.9063, "step": 13110 }, { "epoch": 1.856161959368585, "grad_norm": 10.366774274097882, "learning_rate": 6.73927195247015e-08, "loss": 0.889, "step": 13111 }, { "epoch": 1.8563035322432222, "grad_norm": 8.834075704604727, "learning_rate": 6.726058979282774e-08, "loss": 0.9885, "step": 13112 }, { "epoch": 1.8564451051178594, "grad_norm": 7.936539719533101, "learning_rate": 6.712858794825083e-08, "loss": 1.0252, "step": 13113 }, { "epoch": 1.8565866779924967, "grad_norm": 10.335155777553092, "learning_rate": 6.699671399790969e-08, "loss": 0.9567, "step": 13114 }, { "epoch": 1.856728250867134, "grad_norm": 10.39396937190981, "learning_rate": 6.686496794873792e-08, "loss": 1.0478, "step": 13115 }, { "epoch": 1.8568698237417711, "grad_norm": 9.59702029079412, "learning_rate": 6.673334980765972e-08, "loss": 0.9651, "step": 13116 }, { "epoch": 1.8570113966164083, "grad_norm": 10.175292394687379, "learning_rate": 6.660185958159537e-08, "loss": 0.9328, "step": 13117 }, { "epoch": 1.8571529694910454, "grad_norm": 9.272263000207637, "learning_rate": 6.647049727745685e-08, "loss": 0.9154, "step": 13118 }, { "epoch": 1.8572945423656826, "grad_norm": 9.10779695272305, "learning_rate": 6.633926290214976e-08, "loss": 0.8968, "step": 13119 }, { "epoch": 1.8574361152403198, "grad_norm": 8.995993252125368, "learning_rate": 6.620815646257301e-08, "loss": 0.9513, "step": 13120 }, { "epoch": 1.857577688114957, "grad_norm": 11.671939058140817, "learning_rate": 6.607717796561858e-08, "loss": 1.0231, "step": 13121 }, { "epoch": 1.8577192609895943, "grad_norm": 9.819108232146613, "learning_rate": 6.594632741817237e-08, "loss": 0.9822, "step": 13122 }, { "epoch": 1.8578608338642315, "grad_norm": 8.408019152676648, "learning_rate": 6.581560482711247e-08, "loss": 0.8869, "step": 13123 }, { "epoch": 1.8580024067388687, "grad_norm": 10.087067556270535, "learning_rate": 6.568501019931173e-08, "loss": 0.9617, "step": 13124 }, { "epoch": 1.858143979613506, "grad_norm": 9.660540865018246, "learning_rate": 6.555454354163437e-08, "loss": 0.963, "step": 13125 }, { "epoch": 1.8582855524881432, "grad_norm": 8.703024187308847, "learning_rate": 6.542420486093992e-08, "loss": 0.901, "step": 13126 }, { "epoch": 1.8584271253627804, "grad_norm": 9.044448869825443, "learning_rate": 6.529399416407955e-08, "loss": 1.0308, "step": 13127 }, { "epoch": 1.8585686982374177, "grad_norm": 10.277133662571455, "learning_rate": 6.516391145789836e-08, "loss": 1.0744, "step": 13128 }, { "epoch": 1.858710271112055, "grad_norm": 10.794214917426546, "learning_rate": 6.503395674923529e-08, "loss": 1.0107, "step": 13129 }, { "epoch": 1.8588518439866921, "grad_norm": 8.803718249528993, "learning_rate": 6.490413004492102e-08, "loss": 1.1059, "step": 13130 }, { "epoch": 1.8589934168613294, "grad_norm": 10.4164680970657, "learning_rate": 6.477443135178118e-08, "loss": 1.0361, "step": 13131 }, { "epoch": 1.8591349897359666, "grad_norm": 9.898872588726054, "learning_rate": 6.464486067663366e-08, "loss": 1.0363, "step": 13132 }, { "epoch": 1.8592765626106038, "grad_norm": 7.72906126651377, "learning_rate": 6.451541802628969e-08, "loss": 0.9623, "step": 13133 }, { "epoch": 1.859418135485241, "grad_norm": 9.461080233491856, "learning_rate": 6.438610340755464e-08, "loss": 0.9553, "step": 13134 }, { "epoch": 1.8595597083598783, "grad_norm": 10.32943869588143, "learning_rate": 6.425691682722584e-08, "loss": 0.9445, "step": 13135 }, { "epoch": 1.8597012812345155, "grad_norm": 9.319100927119727, "learning_rate": 6.412785829209511e-08, "loss": 0.9638, "step": 13136 }, { "epoch": 1.8598428541091527, "grad_norm": 8.89988679278093, "learning_rate": 6.399892780894618e-08, "loss": 0.8734, "step": 13137 }, { "epoch": 1.85998442698379, "grad_norm": 9.4174486583893, "learning_rate": 6.387012538455723e-08, "loss": 0.9438, "step": 13138 }, { "epoch": 1.8601259998584272, "grad_norm": 9.57461398867999, "learning_rate": 6.37414510256995e-08, "loss": 0.9536, "step": 13139 }, { "epoch": 1.8602675727330644, "grad_norm": 9.097639897070962, "learning_rate": 6.361290473913705e-08, "loss": 0.9459, "step": 13140 }, { "epoch": 1.8604091456077017, "grad_norm": 9.249057618671001, "learning_rate": 6.34844865316278e-08, "loss": 0.9651, "step": 13141 }, { "epoch": 1.8605507184823389, "grad_norm": 8.672152128173241, "learning_rate": 6.335619640992191e-08, "loss": 0.8411, "step": 13142 }, { "epoch": 1.8606922913569761, "grad_norm": 9.00859676497001, "learning_rate": 6.322803438076453e-08, "loss": 0.9454, "step": 13143 }, { "epoch": 1.8608338642316133, "grad_norm": 8.298758485160267, "learning_rate": 6.310000045089193e-08, "loss": 1.0268, "step": 13144 }, { "epoch": 1.8609754371062506, "grad_norm": 9.915302172051335, "learning_rate": 6.297209462703569e-08, "loss": 1.0177, "step": 13145 }, { "epoch": 1.8611170099808878, "grad_norm": 7.44745079308549, "learning_rate": 6.284431691591875e-08, "loss": 0.8253, "step": 13146 }, { "epoch": 1.861258582855525, "grad_norm": 9.71640447543101, "learning_rate": 6.271666732425935e-08, "loss": 0.9073, "step": 13147 }, { "epoch": 1.8614001557301623, "grad_norm": 10.034096002160155, "learning_rate": 6.258914585876741e-08, "loss": 0.8306, "step": 13148 }, { "epoch": 1.8615417286047993, "grad_norm": 8.227819044290287, "learning_rate": 6.246175252614645e-08, "loss": 0.9639, "step": 13149 }, { "epoch": 1.8616833014794365, "grad_norm": 10.147896203809006, "learning_rate": 6.233448733309388e-08, "loss": 1.0243, "step": 13150 }, { "epoch": 1.8618248743540737, "grad_norm": 10.211489276527146, "learning_rate": 6.220735028629937e-08, "loss": 1.0547, "step": 13151 }, { "epoch": 1.861966447228711, "grad_norm": 9.224214647559856, "learning_rate": 6.2080341392447e-08, "loss": 0.9154, "step": 13152 }, { "epoch": 1.8621080201033482, "grad_norm": 10.40983341138714, "learning_rate": 6.195346065821312e-08, "loss": 0.9972, "step": 13153 }, { "epoch": 1.8622495929779854, "grad_norm": 9.637994095027308, "learning_rate": 6.18267080902682e-08, "loss": 0.9512, "step": 13154 }, { "epoch": 1.8623911658526227, "grad_norm": 11.425861377924164, "learning_rate": 6.170008369527496e-08, "loss": 1.0542, "step": 13155 }, { "epoch": 1.8625327387272599, "grad_norm": 9.46062097878354, "learning_rate": 6.157358747989034e-08, "loss": 0.96, "step": 13156 }, { "epoch": 1.8626743116018971, "grad_norm": 9.566724853693113, "learning_rate": 6.144721945076426e-08, "loss": 0.9594, "step": 13157 }, { "epoch": 1.8628158844765343, "grad_norm": 10.108588964925639, "learning_rate": 6.132097961453948e-08, "loss": 0.9837, "step": 13158 }, { "epoch": 1.8629574573511714, "grad_norm": 8.910304809562712, "learning_rate": 6.119486797785263e-08, "loss": 0.9431, "step": 13159 }, { "epoch": 1.8630990302258086, "grad_norm": 10.404625201608058, "learning_rate": 6.106888454733284e-08, "loss": 0.9828, "step": 13160 }, { "epoch": 1.8632406031004458, "grad_norm": 8.827154865804856, "learning_rate": 6.094302932960317e-08, "loss": 0.9122, "step": 13161 }, { "epoch": 1.863382175975083, "grad_norm": 10.316580040423704, "learning_rate": 6.081730233127996e-08, "loss": 0.991, "step": 13162 }, { "epoch": 1.8635237488497203, "grad_norm": 9.8298516871769, "learning_rate": 6.069170355897241e-08, "loss": 0.9367, "step": 13163 }, { "epoch": 1.8636653217243575, "grad_norm": 9.032865914314048, "learning_rate": 6.056623301928327e-08, "loss": 0.8885, "step": 13164 }, { "epoch": 1.8638068945989947, "grad_norm": 8.356191895424857, "learning_rate": 6.04408907188081e-08, "loss": 0.9255, "step": 13165 }, { "epoch": 1.863948467473632, "grad_norm": 9.652427534035398, "learning_rate": 6.031567666413663e-08, "loss": 0.914, "step": 13166 }, { "epoch": 1.8640900403482692, "grad_norm": 9.536217967569742, "learning_rate": 6.019059086185053e-08, "loss": 0.9193, "step": 13167 }, { "epoch": 1.8642316132229064, "grad_norm": 9.854806848314082, "learning_rate": 6.006563331852622e-08, "loss": 0.9535, "step": 13168 }, { "epoch": 1.8643731860975437, "grad_norm": 10.001856822239464, "learning_rate": 5.994080404073233e-08, "loss": 0.939, "step": 13169 }, { "epoch": 1.8645147589721809, "grad_norm": 8.67311189095626, "learning_rate": 5.981610303503054e-08, "loss": 0.9379, "step": 13170 }, { "epoch": 1.8646563318468181, "grad_norm": 9.865987407225454, "learning_rate": 5.969153030797731e-08, "loss": 1.0385, "step": 13171 }, { "epoch": 1.8647979047214553, "grad_norm": 9.573757351948313, "learning_rate": 5.9567085866120144e-08, "loss": 1.0238, "step": 13172 }, { "epoch": 1.8649394775960926, "grad_norm": 8.419898542576695, "learning_rate": 5.944276971600216e-08, "loss": 0.8576, "step": 13173 }, { "epoch": 1.8650810504707298, "grad_norm": 11.405991141112324, "learning_rate": 5.9318581864157563e-08, "loss": 1.0316, "step": 13174 }, { "epoch": 1.865222623345367, "grad_norm": 11.364754894347424, "learning_rate": 5.919452231711559e-08, "loss": 1.0169, "step": 13175 }, { "epoch": 1.8653641962200043, "grad_norm": 9.476643812769453, "learning_rate": 5.9070591081397397e-08, "loss": 0.8696, "step": 13176 }, { "epoch": 1.8655057690946415, "grad_norm": 9.397357780352579, "learning_rate": 5.894678816351862e-08, "loss": 0.9797, "step": 13177 }, { "epoch": 1.8656473419692787, "grad_norm": 8.829254024182063, "learning_rate": 5.8823113569986545e-08, "loss": 0.8938, "step": 13178 }, { "epoch": 1.865788914843916, "grad_norm": 9.301128638908265, "learning_rate": 5.8699567307303474e-08, "loss": 0.8945, "step": 13179 }, { "epoch": 1.8659304877185532, "grad_norm": 8.03930972120474, "learning_rate": 5.8576149381963935e-08, "loss": 0.9437, "step": 13180 }, { "epoch": 1.8660720605931904, "grad_norm": 8.77202558833268, "learning_rate": 5.845285980045551e-08, "loss": 0.9612, "step": 13181 }, { "epoch": 1.8662136334678276, "grad_norm": 10.103910357708127, "learning_rate": 5.8329698569259963e-08, "loss": 0.9519, "step": 13182 }, { "epoch": 1.8663552063424649, "grad_norm": 10.04034372903964, "learning_rate": 5.820666569485156e-08, "loss": 1.0175, "step": 13183 }, { "epoch": 1.866496779217102, "grad_norm": 9.829998765636844, "learning_rate": 5.808376118369791e-08, "loss": 0.9274, "step": 13184 }, { "epoch": 1.8666383520917393, "grad_norm": 7.857430128320299, "learning_rate": 5.796098504226022e-08, "loss": 0.9941, "step": 13185 }, { "epoch": 1.8667799249663766, "grad_norm": 9.923344733048326, "learning_rate": 5.7838337276992787e-08, "loss": 0.9607, "step": 13186 }, { "epoch": 1.8669214978410138, "grad_norm": 9.198758447878815, "learning_rate": 5.7715817894342944e-08, "loss": 0.9882, "step": 13187 }, { "epoch": 1.867063070715651, "grad_norm": 10.3930785989365, "learning_rate": 5.759342690075137e-08, "loss": 0.9949, "step": 13188 }, { "epoch": 1.8672046435902883, "grad_norm": 9.879566923217435, "learning_rate": 5.7471164302652646e-08, "loss": 1.0297, "step": 13189 }, { "epoch": 1.8673462164649253, "grad_norm": 10.355206463577648, "learning_rate": 5.73490301064733e-08, "loss": 1.0071, "step": 13190 }, { "epoch": 1.8674877893395625, "grad_norm": 8.1367267200213, "learning_rate": 5.722702431863403e-08, "loss": 0.9257, "step": 13191 }, { "epoch": 1.8676293622141997, "grad_norm": 10.68796435680271, "learning_rate": 5.710514694554886e-08, "loss": 1.1202, "step": 13192 }, { "epoch": 1.867770935088837, "grad_norm": 9.41941059836059, "learning_rate": 5.6983397993624346e-08, "loss": 0.9225, "step": 13193 }, { "epoch": 1.8679125079634742, "grad_norm": 7.770646434782907, "learning_rate": 5.686177746926147e-08, "loss": 0.8764, "step": 13194 }, { "epoch": 1.8680540808381114, "grad_norm": 9.625136733632061, "learning_rate": 5.67402853788529e-08, "loss": 0.952, "step": 13195 }, { "epoch": 1.8681956537127486, "grad_norm": 10.000700735336194, "learning_rate": 5.6618921728786026e-08, "loss": 1.1599, "step": 13196 }, { "epoch": 1.8683372265873859, "grad_norm": 9.374472641417622, "learning_rate": 5.6497686525440186e-08, "loss": 1.0372, "step": 13197 }, { "epoch": 1.868478799462023, "grad_norm": 10.731444201658988, "learning_rate": 5.6376579775189456e-08, "loss": 0.9827, "step": 13198 }, { "epoch": 1.8686203723366603, "grad_norm": 8.170436170834437, "learning_rate": 5.625560148439929e-08, "loss": 0.9443, "step": 13199 }, { "epoch": 1.8687619452112976, "grad_norm": 11.406104852458329, "learning_rate": 5.6134751659430716e-08, "loss": 1.1051, "step": 13200 }, { "epoch": 1.8689035180859346, "grad_norm": 10.678383312946893, "learning_rate": 5.6014030306635606e-08, "loss": 1.012, "step": 13201 }, { "epoch": 1.8690450909605718, "grad_norm": 9.765119908813004, "learning_rate": 5.589343743236081e-08, "loss": 0.9728, "step": 13202 }, { "epoch": 1.869186663835209, "grad_norm": 9.20505346743727, "learning_rate": 5.577297304294543e-08, "loss": 0.9947, "step": 13203 }, { "epoch": 1.8693282367098463, "grad_norm": 9.614592635183632, "learning_rate": 5.5652637144722463e-08, "loss": 1.0008, "step": 13204 }, { "epoch": 1.8694698095844835, "grad_norm": 9.565069912915684, "learning_rate": 5.5532429744017957e-08, "loss": 0.9796, "step": 13205 }, { "epoch": 1.8696113824591207, "grad_norm": 10.25259031734576, "learning_rate": 5.5412350847150466e-08, "loss": 1.0645, "step": 13206 }, { "epoch": 1.869752955333758, "grad_norm": 8.77164289373295, "learning_rate": 5.5292400460432993e-08, "loss": 0.9238, "step": 13207 }, { "epoch": 1.8698945282083952, "grad_norm": 9.03517060499292, "learning_rate": 5.517257859017161e-08, "loss": 1.0229, "step": 13208 }, { "epoch": 1.8700361010830324, "grad_norm": 8.555108883029384, "learning_rate": 5.505288524266461e-08, "loss": 0.9477, "step": 13209 }, { "epoch": 1.8701776739576697, "grad_norm": 11.169984542849646, "learning_rate": 5.493332042420446e-08, "loss": 0.9355, "step": 13210 }, { "epoch": 1.8703192468323069, "grad_norm": 13.07381784846208, "learning_rate": 5.481388414107669e-08, "loss": 0.9079, "step": 13211 }, { "epoch": 1.8704608197069441, "grad_norm": 10.264490816816762, "learning_rate": 5.469457639955961e-08, "loss": 0.8875, "step": 13212 }, { "epoch": 1.8706023925815813, "grad_norm": 8.38332442882917, "learning_rate": 5.457539720592514e-08, "loss": 0.8591, "step": 13213 }, { "epoch": 1.8707439654562186, "grad_norm": 12.444692094548785, "learning_rate": 5.445634656643884e-08, "loss": 1.017, "step": 13214 }, { "epoch": 1.8708855383308558, "grad_norm": 12.509947823858582, "learning_rate": 5.4337424487359016e-08, "loss": 0.9404, "step": 13215 }, { "epoch": 1.871027111205493, "grad_norm": 8.536342344646213, "learning_rate": 5.421863097493707e-08, "loss": 0.8926, "step": 13216 }, { "epoch": 1.8711686840801303, "grad_norm": 8.701102824852281, "learning_rate": 5.409996603541828e-08, "loss": 0.8891, "step": 13217 }, { "epoch": 1.8713102569547675, "grad_norm": 8.755106716409724, "learning_rate": 5.398142967504017e-08, "loss": 0.9509, "step": 13218 }, { "epoch": 1.8714518298294047, "grad_norm": 8.799613025132972, "learning_rate": 5.386302190003495e-08, "loss": 0.8506, "step": 13219 }, { "epoch": 1.871593402704042, "grad_norm": 8.32791154598639, "learning_rate": 5.3744742716626276e-08, "loss": 0.9666, "step": 13220 }, { "epoch": 1.8717349755786792, "grad_norm": 8.370125789813716, "learning_rate": 5.362659213103277e-08, "loss": 0.9233, "step": 13221 }, { "epoch": 1.8718765484533164, "grad_norm": 10.341494588063245, "learning_rate": 5.350857014946531e-08, "loss": 0.9327, "step": 13222 }, { "epoch": 1.8720181213279536, "grad_norm": 11.31799452739178, "learning_rate": 5.339067677812782e-08, "loss": 0.8916, "step": 13223 }, { "epoch": 1.8721596942025909, "grad_norm": 11.420302505895886, "learning_rate": 5.327291202321866e-08, "loss": 1.0188, "step": 13224 }, { "epoch": 1.872301267077228, "grad_norm": 8.975636460202844, "learning_rate": 5.315527589092762e-08, "loss": 0.9627, "step": 13225 }, { "epoch": 1.8724428399518653, "grad_norm": 8.845316502163023, "learning_rate": 5.303776838743946e-08, "loss": 0.982, "step": 13226 }, { "epoch": 1.8725844128265026, "grad_norm": 6.9905358777357085, "learning_rate": 5.292038951893119e-08, "loss": 0.8401, "step": 13227 }, { "epoch": 1.8727259857011398, "grad_norm": 9.857518427863276, "learning_rate": 5.2803139291573716e-08, "loss": 0.9373, "step": 13228 }, { "epoch": 1.872867558575777, "grad_norm": 9.684638012850113, "learning_rate": 5.268601771153042e-08, "loss": 0.9604, "step": 13229 }, { "epoch": 1.8730091314504143, "grad_norm": 9.080521127811814, "learning_rate": 5.2569024784958065e-08, "loss": 1.0529, "step": 13230 }, { "epoch": 1.8731507043250515, "grad_norm": 9.921502438256224, "learning_rate": 5.2452160518007555e-08, "loss": 0.9141, "step": 13231 }, { "epoch": 1.8732922771996885, "grad_norm": 10.730630857439502, "learning_rate": 5.233542491682203e-08, "loss": 1.0272, "step": 13232 }, { "epoch": 1.8734338500743257, "grad_norm": 10.524612281888679, "learning_rate": 5.2218817987537976e-08, "loss": 0.9855, "step": 13233 }, { "epoch": 1.873575422948963, "grad_norm": 7.96389562811062, "learning_rate": 5.210233973628548e-08, "loss": 0.9537, "step": 13234 }, { "epoch": 1.8737169958236002, "grad_norm": 10.320140250975502, "learning_rate": 5.198599016918771e-08, "loss": 0.9, "step": 13235 }, { "epoch": 1.8738585686982374, "grad_norm": 9.625916672688932, "learning_rate": 5.1869769292361425e-08, "loss": 1.0395, "step": 13236 }, { "epoch": 1.8740001415728746, "grad_norm": 9.380123518596175, "learning_rate": 5.1753677111915645e-08, "loss": 0.9394, "step": 13237 }, { "epoch": 1.8741417144475119, "grad_norm": 10.04042275560786, "learning_rate": 5.163771363395381e-08, "loss": 0.954, "step": 13238 }, { "epoch": 1.874283287322149, "grad_norm": 9.243040198671528, "learning_rate": 5.152187886457161e-08, "loss": 0.9706, "step": 13239 }, { "epoch": 1.8744248601967863, "grad_norm": 8.896870489357172, "learning_rate": 5.14061728098586e-08, "loss": 0.9961, "step": 13240 }, { "epoch": 1.8745664330714236, "grad_norm": 9.231353110407133, "learning_rate": 5.1290595475897434e-08, "loss": 0.931, "step": 13241 }, { "epoch": 1.8747080059460606, "grad_norm": 8.674727237161994, "learning_rate": 5.117514686876379e-08, "loss": 0.9229, "step": 13242 }, { "epoch": 1.8748495788206978, "grad_norm": 9.290793293731998, "learning_rate": 5.105982699452699e-08, "loss": 0.9451, "step": 13243 }, { "epoch": 1.874991151695335, "grad_norm": 9.217015196940114, "learning_rate": 5.094463585924858e-08, "loss": 1.0649, "step": 13244 }, { "epoch": 1.8751327245699723, "grad_norm": 9.55927815732758, "learning_rate": 5.082957346898482e-08, "loss": 0.9931, "step": 13245 }, { "epoch": 1.8752742974446095, "grad_norm": 12.106163258554439, "learning_rate": 5.0714639829784195e-08, "loss": 0.984, "step": 13246 }, { "epoch": 1.8754158703192467, "grad_norm": 9.476625296061522, "learning_rate": 5.0599834947688834e-08, "loss": 0.9971, "step": 13247 }, { "epoch": 1.875557443193884, "grad_norm": 9.128382225813448, "learning_rate": 5.048515882873362e-08, "loss": 0.9432, "step": 13248 }, { "epoch": 1.8756990160685212, "grad_norm": 10.449945944322288, "learning_rate": 5.037061147894734e-08, "loss": 0.9985, "step": 13249 }, { "epoch": 1.8758405889431584, "grad_norm": 9.169764001236832, "learning_rate": 5.0256192904351295e-08, "loss": 0.9875, "step": 13250 }, { "epoch": 1.8759821618177956, "grad_norm": 9.817136798820611, "learning_rate": 5.014190311096068e-08, "loss": 1.0201, "step": 13251 }, { "epoch": 1.8761237346924329, "grad_norm": 8.931950480151736, "learning_rate": 5.002774210478345e-08, "loss": 0.9613, "step": 13252 }, { "epoch": 1.87626530756707, "grad_norm": 8.06730756919653, "learning_rate": 4.9913709891821207e-08, "loss": 0.9179, "step": 13253 }, { "epoch": 1.8764068804417073, "grad_norm": 8.356303283475874, "learning_rate": 4.9799806478068314e-08, "loss": 0.921, "step": 13254 }, { "epoch": 1.8765484533163446, "grad_norm": 8.775454308902573, "learning_rate": 4.9686031869512486e-08, "loss": 0.9654, "step": 13255 }, { "epoch": 1.8766900261909818, "grad_norm": 9.1686719146087, "learning_rate": 4.9572386072135046e-08, "loss": 0.922, "step": 13256 }, { "epoch": 1.876831599065619, "grad_norm": 9.622760859041078, "learning_rate": 4.945886909191011e-08, "loss": 1.0163, "step": 13257 }, { "epoch": 1.8769731719402563, "grad_norm": 7.829734651982721, "learning_rate": 4.9345480934805125e-08, "loss": 0.9285, "step": 13258 }, { "epoch": 1.8771147448148935, "grad_norm": 8.500813164803155, "learning_rate": 4.923222160678115e-08, "loss": 0.8954, "step": 13259 }, { "epoch": 1.8772563176895307, "grad_norm": 10.27612993473452, "learning_rate": 4.911909111379176e-08, "loss": 0.9429, "step": 13260 }, { "epoch": 1.877397890564168, "grad_norm": 8.52976893050524, "learning_rate": 4.9006089461784424e-08, "loss": 0.9801, "step": 13261 }, { "epoch": 1.8775394634388052, "grad_norm": 8.46638303667114, "learning_rate": 4.8893216656699386e-08, "loss": 0.9552, "step": 13262 }, { "epoch": 1.8776810363134424, "grad_norm": 10.000746889832389, "learning_rate": 4.878047270447051e-08, "loss": 0.9927, "step": 13263 }, { "epoch": 1.8778226091880796, "grad_norm": 9.376646583916234, "learning_rate": 4.8667857611024164e-08, "loss": 0.9315, "step": 13264 }, { "epoch": 1.8779641820627169, "grad_norm": 10.263740076188723, "learning_rate": 4.8555371382280894e-08, "loss": 1.0413, "step": 13265 }, { "epoch": 1.878105754937354, "grad_norm": 9.480995845720138, "learning_rate": 4.844301402415402e-08, "loss": 0.9604, "step": 13266 }, { "epoch": 1.8782473278119913, "grad_norm": 10.80554398833252, "learning_rate": 4.833078554254966e-08, "loss": 0.9783, "step": 13267 }, { "epoch": 1.8783889006866286, "grad_norm": 8.416308656638526, "learning_rate": 4.8218685943368094e-08, "loss": 0.8969, "step": 13268 }, { "epoch": 1.8785304735612658, "grad_norm": 10.847486748090546, "learning_rate": 4.810671523250182e-08, "loss": 1.0563, "step": 13269 }, { "epoch": 1.878672046435903, "grad_norm": 9.085369850066645, "learning_rate": 4.799487341583753e-08, "loss": 0.9245, "step": 13270 }, { "epoch": 1.8788136193105403, "grad_norm": 8.838085072101265, "learning_rate": 4.788316049925412e-08, "loss": 1.0472, "step": 13271 }, { "epoch": 1.8789551921851775, "grad_norm": 9.80652023147319, "learning_rate": 4.777157648862496e-08, "loss": 0.998, "step": 13272 }, { "epoch": 1.8790967650598145, "grad_norm": 8.683869625622249, "learning_rate": 4.766012138981535e-08, "loss": 0.9783, "step": 13273 }, { "epoch": 1.8792383379344517, "grad_norm": 10.044013249116983, "learning_rate": 4.754879520868477e-08, "loss": 0.9726, "step": 13274 }, { "epoch": 1.879379910809089, "grad_norm": 9.526731126521376, "learning_rate": 4.743759795108549e-08, "loss": 0.9683, "step": 13275 }, { "epoch": 1.8795214836837262, "grad_norm": 9.313737415821798, "learning_rate": 4.732652962286283e-08, "loss": 1.0732, "step": 13276 }, { "epoch": 1.8796630565583634, "grad_norm": 9.767019431694685, "learning_rate": 4.7215590229855723e-08, "loss": 0.9437, "step": 13277 }, { "epoch": 1.8798046294330006, "grad_norm": 10.718827583766231, "learning_rate": 4.710477977789618e-08, "loss": 1.0804, "step": 13278 }, { "epoch": 1.8799462023076379, "grad_norm": 8.788663619420875, "learning_rate": 4.699409827280954e-08, "loss": 0.9904, "step": 13279 }, { "epoch": 1.880087775182275, "grad_norm": 10.488288524206043, "learning_rate": 4.6883545720413925e-08, "loss": 0.9651, "step": 13280 }, { "epoch": 1.8802293480569123, "grad_norm": 8.592938083094882, "learning_rate": 4.677312212652108e-08, "loss": 0.8702, "step": 13281 }, { "epoch": 1.8803709209315496, "grad_norm": 10.43129413245701, "learning_rate": 4.666282749693607e-08, "loss": 0.948, "step": 13282 }, { "epoch": 1.8805124938061866, "grad_norm": 7.879288973801254, "learning_rate": 4.655266183745705e-08, "loss": 0.875, "step": 13283 }, { "epoch": 1.8806540666808238, "grad_norm": 10.694775341468945, "learning_rate": 4.644262515387521e-08, "loss": 1.1069, "step": 13284 }, { "epoch": 1.880795639555461, "grad_norm": 9.007284395431055, "learning_rate": 4.633271745197537e-08, "loss": 1.0015, "step": 13285 }, { "epoch": 1.8809372124300983, "grad_norm": 8.653023569512062, "learning_rate": 4.6222938737534864e-08, "loss": 0.9783, "step": 13286 }, { "epoch": 1.8810787853047355, "grad_norm": 8.561169151963718, "learning_rate": 4.6113289016324615e-08, "loss": 0.9322, "step": 13287 }, { "epoch": 1.8812203581793727, "grad_norm": 9.91877027580434, "learning_rate": 4.600376829410919e-08, "loss": 0.8596, "step": 13288 }, { "epoch": 1.88136193105401, "grad_norm": 8.815777973847782, "learning_rate": 4.589437657664592e-08, "loss": 0.903, "step": 13289 }, { "epoch": 1.8815035039286472, "grad_norm": 10.741114292981097, "learning_rate": 4.578511386968548e-08, "loss": 1.0245, "step": 13290 }, { "epoch": 1.8816450768032844, "grad_norm": 8.818082732025577, "learning_rate": 4.567598017897162e-08, "loss": 1.011, "step": 13291 }, { "epoch": 1.8817866496779216, "grad_norm": 8.86751573938178, "learning_rate": 4.556697551024142e-08, "loss": 0.9409, "step": 13292 }, { "epoch": 1.8819282225525589, "grad_norm": 10.017920649059832, "learning_rate": 4.545809986922528e-08, "loss": 1.0133, "step": 13293 }, { "epoch": 1.882069795427196, "grad_norm": 8.990634283618556, "learning_rate": 4.5349353261646414e-08, "loss": 0.9205, "step": 13294 }, { "epoch": 1.8822113683018333, "grad_norm": 8.427300964739123, "learning_rate": 4.524073569322218e-08, "loss": 0.9228, "step": 13295 }, { "epoch": 1.8823529411764706, "grad_norm": 10.202797853795309, "learning_rate": 4.5132247169661916e-08, "loss": 0.9682, "step": 13296 }, { "epoch": 1.8824945140511078, "grad_norm": 9.530050033325917, "learning_rate": 4.5023887696668824e-08, "loss": 0.9838, "step": 13297 }, { "epoch": 1.882636086925745, "grad_norm": 9.71190026590586, "learning_rate": 4.491565727993974e-08, "loss": 0.9801, "step": 13298 }, { "epoch": 1.8827776598003823, "grad_norm": 8.20485472834719, "learning_rate": 4.480755592516372e-08, "loss": 0.9881, "step": 13299 }, { "epoch": 1.8829192326750195, "grad_norm": 7.8061013835995725, "learning_rate": 4.469958363802401e-08, "loss": 0.9568, "step": 13300 }, { "epoch": 1.8830608055496567, "grad_norm": 9.424635449551012, "learning_rate": 4.459174042419634e-08, "loss": 1.0009, "step": 13301 }, { "epoch": 1.883202378424294, "grad_norm": 8.20200187623924, "learning_rate": 4.448402628935034e-08, "loss": 0.9783, "step": 13302 }, { "epoch": 1.8833439512989312, "grad_norm": 8.738539683818118, "learning_rate": 4.437644123914758e-08, "loss": 0.8802, "step": 13303 }, { "epoch": 1.8834855241735684, "grad_norm": 9.613350692430009, "learning_rate": 4.426898527924467e-08, "loss": 0.9048, "step": 13304 }, { "epoch": 1.8836270970482056, "grad_norm": 10.40065562309111, "learning_rate": 4.4161658415290135e-08, "loss": 0.9539, "step": 13305 }, { "epoch": 1.8837686699228429, "grad_norm": 10.8680608249011, "learning_rate": 4.405446065292612e-08, "loss": 0.9691, "step": 13306 }, { "epoch": 1.88391024279748, "grad_norm": 8.00131882287373, "learning_rate": 4.3947391997787857e-08, "loss": 0.9092, "step": 13307 }, { "epoch": 1.8840518156721173, "grad_norm": 9.624039639208204, "learning_rate": 4.384045245550389e-08, "loss": 0.9545, "step": 13308 }, { "epoch": 1.8841933885467546, "grad_norm": 10.888272309092619, "learning_rate": 4.373364203169583e-08, "loss": 0.9556, "step": 13309 }, { "epoch": 1.8843349614213918, "grad_norm": 10.6046112107264, "learning_rate": 4.362696073197864e-08, "loss": 0.9528, "step": 13310 }, { "epoch": 1.884476534296029, "grad_norm": 8.718632741707017, "learning_rate": 4.35204085619606e-08, "loss": 1.0298, "step": 13311 }, { "epoch": 1.8846181071706662, "grad_norm": 9.05153060530804, "learning_rate": 4.3413985527243353e-08, "loss": 1.0097, "step": 13312 }, { "epoch": 1.8847596800453035, "grad_norm": 7.848387301066319, "learning_rate": 4.330769163342102e-08, "loss": 0.9568, "step": 13313 }, { "epoch": 1.8849012529199405, "grad_norm": 10.560864564587622, "learning_rate": 4.320152688608165e-08, "loss": 0.921, "step": 13314 }, { "epoch": 1.8850428257945777, "grad_norm": 12.616120794691607, "learning_rate": 4.309549129080576e-08, "loss": 0.9689, "step": 13315 }, { "epoch": 1.885184398669215, "grad_norm": 7.705250166409467, "learning_rate": 4.298958485316834e-08, "loss": 0.869, "step": 13316 }, { "epoch": 1.8853259715438522, "grad_norm": 8.577057207420006, "learning_rate": 4.2883807578736337e-08, "loss": 0.9285, "step": 13317 }, { "epoch": 1.8854675444184894, "grad_norm": 11.701084993052929, "learning_rate": 4.277815947307029e-08, "loss": 1.0675, "step": 13318 }, { "epoch": 1.8856091172931266, "grad_norm": 8.645361707455297, "learning_rate": 4.267264054172465e-08, "loss": 0.9967, "step": 13319 }, { "epoch": 1.8857506901677639, "grad_norm": 10.413934293331234, "learning_rate": 4.256725079024554e-08, "loss": 0.8381, "step": 13320 }, { "epoch": 1.885892263042401, "grad_norm": 9.981143721060665, "learning_rate": 4.2461990224174076e-08, "loss": 1.0067, "step": 13321 }, { "epoch": 1.8860338359170383, "grad_norm": 9.85311744271039, "learning_rate": 4.235685884904306e-08, "loss": 0.9281, "step": 13322 }, { "epoch": 1.8861754087916756, "grad_norm": 10.451370251632977, "learning_rate": 4.2251856670379733e-08, "loss": 0.9824, "step": 13323 }, { "epoch": 1.8863169816663128, "grad_norm": 9.26018767329415, "learning_rate": 4.214698369370357e-08, "loss": 0.9729, "step": 13324 }, { "epoch": 1.8864585545409498, "grad_norm": 8.379081528615389, "learning_rate": 4.204223992452794e-08, "loss": 0.9223, "step": 13325 }, { "epoch": 1.886600127415587, "grad_norm": 8.240881764907417, "learning_rate": 4.193762536835871e-08, "loss": 0.901, "step": 13326 }, { "epoch": 1.8867417002902243, "grad_norm": 9.749712426272866, "learning_rate": 4.1833140030696216e-08, "loss": 1.0013, "step": 13327 }, { "epoch": 1.8868832731648615, "grad_norm": 11.04511614965082, "learning_rate": 4.172878391703245e-08, "loss": 1.005, "step": 13328 }, { "epoch": 1.8870248460394987, "grad_norm": 11.05504326669136, "learning_rate": 4.162455703285356e-08, "loss": 0.9638, "step": 13329 }, { "epoch": 1.887166418914136, "grad_norm": 9.028449551620275, "learning_rate": 4.152045938363852e-08, "loss": 0.922, "step": 13330 }, { "epoch": 1.8873079917887732, "grad_norm": 8.990171363903123, "learning_rate": 4.141649097485989e-08, "loss": 0.9562, "step": 13331 }, { "epoch": 1.8874495646634104, "grad_norm": 9.706085294074015, "learning_rate": 4.131265181198302e-08, "loss": 0.9813, "step": 13332 }, { "epoch": 1.8875911375380476, "grad_norm": 10.115879802069708, "learning_rate": 4.120894190046687e-08, "loss": 0.9894, "step": 13333 }, { "epoch": 1.8877327104126849, "grad_norm": 9.24253378934413, "learning_rate": 4.11053612457632e-08, "loss": 0.866, "step": 13334 }, { "epoch": 1.887874283287322, "grad_norm": 8.870302058326821, "learning_rate": 4.100190985331765e-08, "loss": 0.8875, "step": 13335 }, { "epoch": 1.8880158561619593, "grad_norm": 10.63363260866458, "learning_rate": 4.0898587728567805e-08, "loss": 0.9882, "step": 13336 }, { "epoch": 1.8881574290365966, "grad_norm": 9.626030408521533, "learning_rate": 4.0795394876945726e-08, "loss": 0.9881, "step": 13337 }, { "epoch": 1.8882990019112338, "grad_norm": 9.362831050790382, "learning_rate": 4.0692331303876234e-08, "loss": 0.9728, "step": 13338 }, { "epoch": 1.888440574785871, "grad_norm": 9.423776513783542, "learning_rate": 4.058939701477693e-08, "loss": 0.9001, "step": 13339 }, { "epoch": 1.8885821476605082, "grad_norm": 10.186580312206411, "learning_rate": 4.048659201505933e-08, "loss": 0.9023, "step": 13340 }, { "epoch": 1.8887237205351455, "grad_norm": 9.09367197534182, "learning_rate": 4.038391631012745e-08, "loss": 0.9923, "step": 13341 }, { "epoch": 1.8888652934097827, "grad_norm": 9.229102783900544, "learning_rate": 4.028136990537945e-08, "loss": 0.8629, "step": 13342 }, { "epoch": 1.88900686628442, "grad_norm": 11.820449334138592, "learning_rate": 4.0178952806205486e-08, "loss": 1.1086, "step": 13343 }, { "epoch": 1.8891484391590572, "grad_norm": 9.912805355799772, "learning_rate": 4.0076665017990124e-08, "loss": 0.9833, "step": 13344 }, { "epoch": 1.8892900120336944, "grad_norm": 10.397490609475014, "learning_rate": 3.997450654611018e-08, "loss": 1.0017, "step": 13345 }, { "epoch": 1.8894315849083316, "grad_norm": 10.316023900762044, "learning_rate": 3.987247739593636e-08, "loss": 0.9965, "step": 13346 }, { "epoch": 1.8895731577829689, "grad_norm": 10.592827770860124, "learning_rate": 3.9770577572831594e-08, "loss": 1.0287, "step": 13347 }, { "epoch": 1.889714730657606, "grad_norm": 8.677726242470305, "learning_rate": 3.966880708215354e-08, "loss": 0.9679, "step": 13348 }, { "epoch": 1.8898563035322433, "grad_norm": 11.224773012172882, "learning_rate": 3.9567165929251804e-08, "loss": 1.0072, "step": 13349 }, { "epoch": 1.8899978764068806, "grad_norm": 9.441440189223236, "learning_rate": 3.9465654119469345e-08, "loss": 0.9587, "step": 13350 }, { "epoch": 1.8901394492815178, "grad_norm": 10.030298110624045, "learning_rate": 3.9364271658142997e-08, "loss": 0.8941, "step": 13351 }, { "epoch": 1.890281022156155, "grad_norm": 12.141419923501084, "learning_rate": 3.926301855060183e-08, "loss": 1.0563, "step": 13352 }, { "epoch": 1.8904225950307922, "grad_norm": 8.887882822110692, "learning_rate": 3.916189480216937e-08, "loss": 0.9966, "step": 13353 }, { "epoch": 1.8905641679054295, "grad_norm": 10.01802688816184, "learning_rate": 3.906090041816107e-08, "loss": 0.9931, "step": 13354 }, { "epoch": 1.8907057407800667, "grad_norm": 9.875217483636556, "learning_rate": 3.896003540388604e-08, "loss": 1.0032, "step": 13355 }, { "epoch": 1.8908473136547037, "grad_norm": 10.023933380538823, "learning_rate": 3.885929976464725e-08, "loss": 0.9948, "step": 13356 }, { "epoch": 1.890988886529341, "grad_norm": 9.149410674914652, "learning_rate": 3.875869350573963e-08, "loss": 0.9835, "step": 13357 }, { "epoch": 1.8911304594039782, "grad_norm": 10.350117455152343, "learning_rate": 3.865821663245284e-08, "loss": 0.9881, "step": 13358 }, { "epoch": 1.8912720322786154, "grad_norm": 10.747214732744421, "learning_rate": 3.855786915006793e-08, "loss": 0.9584, "step": 13359 }, { "epoch": 1.8914136051532526, "grad_norm": 10.006125290788685, "learning_rate": 3.8457651063860954e-08, "loss": 0.9646, "step": 13360 }, { "epoch": 1.8915551780278899, "grad_norm": 9.493191337094988, "learning_rate": 3.835756237909938e-08, "loss": 0.9013, "step": 13361 }, { "epoch": 1.891696750902527, "grad_norm": 8.807313515613298, "learning_rate": 3.825760310104537e-08, "loss": 0.8218, "step": 13362 }, { "epoch": 1.8918383237771643, "grad_norm": 8.134579689126538, "learning_rate": 3.815777323495362e-08, "loss": 0.9365, "step": 13363 }, { "epoch": 1.8919798966518016, "grad_norm": 13.152618124365606, "learning_rate": 3.805807278607215e-08, "loss": 1.0082, "step": 13364 }, { "epoch": 1.8921214695264388, "grad_norm": 10.136924597500474, "learning_rate": 3.795850175964205e-08, "loss": 1.1796, "step": 13365 }, { "epoch": 1.8922630424010758, "grad_norm": 9.081924982577215, "learning_rate": 3.785906016089774e-08, "loss": 0.9606, "step": 13366 }, { "epoch": 1.892404615275713, "grad_norm": 10.561186759615813, "learning_rate": 3.775974799506699e-08, "loss": 0.9984, "step": 13367 }, { "epoch": 1.8925461881503503, "grad_norm": 10.39465529214789, "learning_rate": 3.766056526737005e-08, "loss": 1.0717, "step": 13368 }, { "epoch": 1.8926877610249875, "grad_norm": 9.02889614383574, "learning_rate": 3.756151198302138e-08, "loss": 0.8931, "step": 13369 }, { "epoch": 1.8928293338996247, "grad_norm": 8.794903874828904, "learning_rate": 3.7462588147228193e-08, "loss": 0.9827, "step": 13370 }, { "epoch": 1.892970906774262, "grad_norm": 9.490154987512676, "learning_rate": 3.736379376519023e-08, "loss": 0.9759, "step": 13371 }, { "epoch": 1.8931124796488992, "grad_norm": 8.538775244625112, "learning_rate": 3.726512884210165e-08, "loss": 1.0444, "step": 13372 }, { "epoch": 1.8932540525235364, "grad_norm": 8.785570052643275, "learning_rate": 3.7166593383148594e-08, "loss": 1.0032, "step": 13373 }, { "epoch": 1.8933956253981736, "grad_norm": 8.348268121439405, "learning_rate": 3.706818739351164e-08, "loss": 0.9337, "step": 13374 }, { "epoch": 1.8935371982728109, "grad_norm": 7.993705657525811, "learning_rate": 3.69699108783636e-08, "loss": 0.9573, "step": 13375 }, { "epoch": 1.893678771147448, "grad_norm": 9.333568524621684, "learning_rate": 3.687176384287089e-08, "loss": 0.9256, "step": 13376 }, { "epoch": 1.8938203440220853, "grad_norm": 9.757253443002112, "learning_rate": 3.677374629219271e-08, "loss": 0.9553, "step": 13377 }, { "epoch": 1.8939619168967226, "grad_norm": 9.3030921471592, "learning_rate": 3.667585823148218e-08, "loss": 0.9815, "step": 13378 }, { "epoch": 1.8941034897713598, "grad_norm": 8.0136954381356, "learning_rate": 3.657809966588516e-08, "loss": 1.01, "step": 13379 }, { "epoch": 1.894245062645997, "grad_norm": 7.691047656222978, "learning_rate": 3.6480470600540606e-08, "loss": 0.8806, "step": 13380 }, { "epoch": 1.8943866355206342, "grad_norm": 9.994045010352181, "learning_rate": 3.638297104058081e-08, "loss": 0.8653, "step": 13381 }, { "epoch": 1.8945282083952715, "grad_norm": 8.020049244260978, "learning_rate": 3.6285600991131095e-08, "loss": 0.9492, "step": 13382 }, { "epoch": 1.8946697812699087, "grad_norm": 10.33901509595072, "learning_rate": 3.618836045731072e-08, "loss": 0.9447, "step": 13383 }, { "epoch": 1.894811354144546, "grad_norm": 9.634850947638117, "learning_rate": 3.609124944423087e-08, "loss": 1.0634, "step": 13384 }, { "epoch": 1.8949529270191832, "grad_norm": 9.635057223377238, "learning_rate": 3.599426795699662e-08, "loss": 0.8922, "step": 13385 }, { "epoch": 1.8950944998938204, "grad_norm": 8.150998350440682, "learning_rate": 3.5897416000706956e-08, "loss": 0.8751, "step": 13386 }, { "epoch": 1.8952360727684576, "grad_norm": 9.417267186644352, "learning_rate": 3.580069358045252e-08, "loss": 0.9288, "step": 13387 }, { "epoch": 1.8953776456430949, "grad_norm": 9.091504635810772, "learning_rate": 3.570410070131841e-08, "loss": 0.8924, "step": 13388 }, { "epoch": 1.895519218517732, "grad_norm": 9.16706079733899, "learning_rate": 3.5607637368381965e-08, "loss": 0.9539, "step": 13389 }, { "epoch": 1.8956607913923693, "grad_norm": 8.10870934843291, "learning_rate": 3.5511303586714676e-08, "loss": 0.974, "step": 13390 }, { "epoch": 1.8958023642670065, "grad_norm": 11.078996502986342, "learning_rate": 3.541509936138082e-08, "loss": 0.9742, "step": 13391 }, { "epoch": 1.8959439371416438, "grad_norm": 7.956731372064616, "learning_rate": 3.5319024697437196e-08, "loss": 0.8735, "step": 13392 }, { "epoch": 1.896085510016281, "grad_norm": 11.938408782080083, "learning_rate": 3.522307959993476e-08, "loss": 0.9255, "step": 13393 }, { "epoch": 1.8962270828909182, "grad_norm": 10.41963137335208, "learning_rate": 3.5127264073917256e-08, "loss": 0.9519, "step": 13394 }, { "epoch": 1.8963686557655555, "grad_norm": 9.612170896613476, "learning_rate": 3.503157812442148e-08, "loss": 0.9151, "step": 13395 }, { "epoch": 1.8965102286401927, "grad_norm": 9.96809294158566, "learning_rate": 3.4936021756477865e-08, "loss": 1.0055, "step": 13396 }, { "epoch": 1.8966518015148297, "grad_norm": 7.6800046527371615, "learning_rate": 3.4840594975109607e-08, "loss": 0.9635, "step": 13397 }, { "epoch": 1.896793374389467, "grad_norm": 10.317194939205821, "learning_rate": 3.474529778533298e-08, "loss": 0.9365, "step": 13398 }, { "epoch": 1.8969349472641042, "grad_norm": 10.032948192753542, "learning_rate": 3.465013019215785e-08, "loss": 0.9516, "step": 13399 }, { "epoch": 1.8970765201387414, "grad_norm": 11.500851806938313, "learning_rate": 3.455509220058717e-08, "loss": 0.8813, "step": 13400 }, { "epoch": 1.8972180930133786, "grad_norm": 9.955698683633253, "learning_rate": 3.4460183815617224e-08, "loss": 1.1118, "step": 13401 }, { "epoch": 1.8973596658880159, "grad_norm": 8.555520881923856, "learning_rate": 3.4365405042236785e-08, "loss": 0.9273, "step": 13402 }, { "epoch": 1.897501238762653, "grad_norm": 9.017501344905423, "learning_rate": 3.4270755885428555e-08, "loss": 0.9484, "step": 13403 }, { "epoch": 1.8976428116372903, "grad_norm": 9.2968479188156, "learning_rate": 3.4176236350168255e-08, "loss": 0.9737, "step": 13404 }, { "epoch": 1.8977843845119275, "grad_norm": 9.144766523030656, "learning_rate": 3.408184644142443e-08, "loss": 0.9855, "step": 13405 }, { "epoch": 1.8979259573865648, "grad_norm": 9.329791714059558, "learning_rate": 3.398758616415948e-08, "loss": 0.9728, "step": 13406 }, { "epoch": 1.898067530261202, "grad_norm": 10.02258687264066, "learning_rate": 3.389345552332834e-08, "loss": 0.9495, "step": 13407 }, { "epoch": 1.898209103135839, "grad_norm": 8.39615657616417, "learning_rate": 3.379945452387928e-08, "loss": 0.9369, "step": 13408 }, { "epoch": 1.8983506760104762, "grad_norm": 12.578124393439426, "learning_rate": 3.370558317075417e-08, "loss": 1.0413, "step": 13409 }, { "epoch": 1.8984922488851135, "grad_norm": 7.878413111658907, "learning_rate": 3.3611841468887683e-08, "loss": 0.9447, "step": 13410 }, { "epoch": 1.8986338217597507, "grad_norm": 8.499323649705394, "learning_rate": 3.351822942320754e-08, "loss": 0.8958, "step": 13411 }, { "epoch": 1.898775394634388, "grad_norm": 7.8879394795724975, "learning_rate": 3.342474703863508e-08, "loss": 1.0375, "step": 13412 }, { "epoch": 1.8989169675090252, "grad_norm": 8.907440641356914, "learning_rate": 3.333139432008442e-08, "loss": 0.9485, "step": 13413 }, { "epoch": 1.8990585403836624, "grad_norm": 10.450964369377594, "learning_rate": 3.3238171272463316e-08, "loss": 0.9393, "step": 13414 }, { "epoch": 1.8992001132582996, "grad_norm": 10.309800413107205, "learning_rate": 3.314507790067201e-08, "loss": 0.9853, "step": 13415 }, { "epoch": 1.8993416861329369, "grad_norm": 9.7462980627382, "learning_rate": 3.3052114209604636e-08, "loss": 0.9594, "step": 13416 }, { "epoch": 1.899483259007574, "grad_norm": 9.692497293483227, "learning_rate": 3.295928020414812e-08, "loss": 1.0306, "step": 13417 }, { "epoch": 1.8996248318822113, "grad_norm": 8.523049014405181, "learning_rate": 3.286657588918302e-08, "loss": 0.999, "step": 13418 }, { "epoch": 1.8997664047568485, "grad_norm": 8.688285956747219, "learning_rate": 3.2774001269582354e-08, "loss": 0.9359, "step": 13419 }, { "epoch": 1.8999079776314858, "grad_norm": 10.30667078976304, "learning_rate": 3.2681556350212805e-08, "loss": 1.0489, "step": 13420 }, { "epoch": 1.900049550506123, "grad_norm": 12.170484039786393, "learning_rate": 3.2589241135933815e-08, "loss": 1.0389, "step": 13421 }, { "epoch": 1.9001911233807602, "grad_norm": 8.411764498749466, "learning_rate": 3.2497055631598995e-08, "loss": 0.8953, "step": 13422 }, { "epoch": 1.9003326962553975, "grad_norm": 10.861897259353631, "learning_rate": 3.2404999842054194e-08, "loss": 1.0661, "step": 13423 }, { "epoch": 1.9004742691300347, "grad_norm": 10.73791569892076, "learning_rate": 3.231307377213833e-08, "loss": 1.0241, "step": 13424 }, { "epoch": 1.900615842004672, "grad_norm": 9.571757703380637, "learning_rate": 3.222127742668446e-08, "loss": 1.0648, "step": 13425 }, { "epoch": 1.9007574148793092, "grad_norm": 10.330235621967734, "learning_rate": 3.2129610810517633e-08, "loss": 0.8834, "step": 13426 }, { "epoch": 1.9008989877539464, "grad_norm": 10.502178147870534, "learning_rate": 3.203807392845732e-08, "loss": 0.9516, "step": 13427 }, { "epoch": 1.9010405606285836, "grad_norm": 9.76614178320128, "learning_rate": 3.1946666785315216e-08, "loss": 0.9731, "step": 13428 }, { "epoch": 1.9011821335032209, "grad_norm": 10.113139800213006, "learning_rate": 3.1855389385896383e-08, "loss": 0.9995, "step": 13429 }, { "epoch": 1.901323706377858, "grad_norm": 9.299988810214622, "learning_rate": 3.176424173499976e-08, "loss": 0.9323, "step": 13430 }, { "epoch": 1.9014652792524953, "grad_norm": 9.380289442074261, "learning_rate": 3.167322383741622e-08, "loss": 0.9628, "step": 13431 }, { "epoch": 1.9016068521271325, "grad_norm": 9.16936753682557, "learning_rate": 3.158233569793112e-08, "loss": 0.9525, "step": 13432 }, { "epoch": 1.9017484250017698, "grad_norm": 10.687589232312066, "learning_rate": 3.149157732132202e-08, "loss": 0.9856, "step": 13433 }, { "epoch": 1.901889997876407, "grad_norm": 7.345346090681809, "learning_rate": 3.14009487123601e-08, "loss": 0.9087, "step": 13434 }, { "epoch": 1.9020315707510442, "grad_norm": 8.375568882993532, "learning_rate": 3.131044987580961e-08, "loss": 0.9363, "step": 13435 }, { "epoch": 1.9021731436256815, "grad_norm": 8.873277953806463, "learning_rate": 3.122008081642786e-08, "loss": 0.9464, "step": 13436 }, { "epoch": 1.9023147165003187, "grad_norm": 10.459277430826663, "learning_rate": 3.112984153896603e-08, "loss": 1.0055, "step": 13437 }, { "epoch": 1.902456289374956, "grad_norm": 10.514638233791024, "learning_rate": 3.1039732048167295e-08, "loss": 0.9384, "step": 13438 }, { "epoch": 1.902597862249593, "grad_norm": 10.2492042790888, "learning_rate": 3.0949752348768956e-08, "loss": 1.0037, "step": 13439 }, { "epoch": 1.9027394351242302, "grad_norm": 7.9313859595751675, "learning_rate": 3.0859902445501136e-08, "loss": 1.0063, "step": 13440 }, { "epoch": 1.9028810079988674, "grad_norm": 9.558512336187817, "learning_rate": 3.077018234308726e-08, "loss": 0.9287, "step": 13441 }, { "epoch": 1.9030225808735046, "grad_norm": 9.20081760256468, "learning_rate": 3.0680592046243576e-08, "loss": 0.9129, "step": 13442 }, { "epoch": 1.9031641537481419, "grad_norm": 9.249102987149323, "learning_rate": 3.059113155968019e-08, "loss": 0.8579, "step": 13443 }, { "epoch": 1.903305726622779, "grad_norm": 9.417917714243718, "learning_rate": 3.050180088809973e-08, "loss": 0.9487, "step": 13444 }, { "epoch": 1.9034472994974163, "grad_norm": 8.291625005211632, "learning_rate": 3.041260003619817e-08, "loss": 0.8918, "step": 13445 }, { "epoch": 1.9035888723720535, "grad_norm": 11.299716196462876, "learning_rate": 3.032352900866481e-08, "loss": 1.0546, "step": 13446 }, { "epoch": 1.9037304452466908, "grad_norm": 10.31118747565978, "learning_rate": 3.0234587810182014e-08, "loss": 1.0423, "step": 13447 }, { "epoch": 1.903872018121328, "grad_norm": 9.421724384679239, "learning_rate": 3.014577644542549e-08, "loss": 0.9485, "step": 13448 }, { "epoch": 1.904013590995965, "grad_norm": 10.778637024841371, "learning_rate": 3.0057094919064e-08, "loss": 0.9392, "step": 13449 }, { "epoch": 1.9041551638706022, "grad_norm": 10.037659875872036, "learning_rate": 2.996854323575937e-08, "loss": 0.9923, "step": 13450 }, { "epoch": 1.9042967367452395, "grad_norm": 8.843368683806775, "learning_rate": 2.98801214001665e-08, "loss": 0.9153, "step": 13451 }, { "epoch": 1.9044383096198767, "grad_norm": 8.412251538806268, "learning_rate": 2.9791829416933593e-08, "loss": 0.8796, "step": 13452 }, { "epoch": 1.904579882494514, "grad_norm": 9.533612668228791, "learning_rate": 2.970366729070279e-08, "loss": 0.9548, "step": 13453 }, { "epoch": 1.9047214553691512, "grad_norm": 9.586814933482739, "learning_rate": 2.9615635026108426e-08, "loss": 0.9628, "step": 13454 }, { "epoch": 1.9048630282437884, "grad_norm": 10.442315549748884, "learning_rate": 2.9527732627777915e-08, "loss": 0.9904, "step": 13455 }, { "epoch": 1.9050046011184256, "grad_norm": 10.311384660921552, "learning_rate": 2.9439960100332288e-08, "loss": 0.9419, "step": 13456 }, { "epoch": 1.9051461739930629, "grad_norm": 8.812790020438246, "learning_rate": 2.9352317448385902e-08, "loss": 0.9268, "step": 13457 }, { "epoch": 1.9052877468677, "grad_norm": 9.722057503409932, "learning_rate": 2.926480467654591e-08, "loss": 1.0103, "step": 13458 }, { "epoch": 1.9054293197423373, "grad_norm": 8.022694821701025, "learning_rate": 2.9177421789412795e-08, "loss": 0.9526, "step": 13459 }, { "epoch": 1.9055708926169745, "grad_norm": 8.972158814302302, "learning_rate": 2.9090168791580663e-08, "loss": 0.8934, "step": 13460 }, { "epoch": 1.9057124654916118, "grad_norm": 11.46659244542904, "learning_rate": 2.9003045687635845e-08, "loss": 1.0629, "step": 13461 }, { "epoch": 1.905854038366249, "grad_norm": 9.456416895188445, "learning_rate": 2.8916052482158284e-08, "loss": 0.9219, "step": 13462 }, { "epoch": 1.9059956112408862, "grad_norm": 7.862880281552141, "learning_rate": 2.8829189179721552e-08, "loss": 0.8812, "step": 13463 }, { "epoch": 1.9061371841155235, "grad_norm": 8.758839093628014, "learning_rate": 2.8742455784891708e-08, "loss": 0.8967, "step": 13464 }, { "epoch": 1.9062787569901607, "grad_norm": 9.049436216549754, "learning_rate": 2.865585230222817e-08, "loss": 0.8519, "step": 13465 }, { "epoch": 1.906420329864798, "grad_norm": 8.606751971818086, "learning_rate": 2.856937873628396e-08, "loss": 0.9475, "step": 13466 }, { "epoch": 1.9065619027394352, "grad_norm": 10.745886792013833, "learning_rate": 2.8483035091604604e-08, "loss": 1.0389, "step": 13467 }, { "epoch": 1.9067034756140724, "grad_norm": 8.566105215868875, "learning_rate": 2.8396821372729257e-08, "loss": 0.9655, "step": 13468 }, { "epoch": 1.9068450484887096, "grad_norm": 10.733680225082, "learning_rate": 2.8310737584190117e-08, "loss": 0.9589, "step": 13469 }, { "epoch": 1.9069866213633468, "grad_norm": 10.76119199595603, "learning_rate": 2.822478373051246e-08, "loss": 0.936, "step": 13470 }, { "epoch": 1.907128194237984, "grad_norm": 9.36042853588917, "learning_rate": 2.8138959816215174e-08, "loss": 0.9508, "step": 13471 }, { "epoch": 1.9072697671126213, "grad_norm": 9.906229205666266, "learning_rate": 2.8053265845809363e-08, "loss": 1.0017, "step": 13472 }, { "epoch": 1.9074113399872585, "grad_norm": 9.970163848969232, "learning_rate": 2.796770182380032e-08, "loss": 0.9964, "step": 13473 }, { "epoch": 1.9075529128618958, "grad_norm": 8.669436330104643, "learning_rate": 2.7882267754685832e-08, "loss": 0.9208, "step": 13474 }, { "epoch": 1.907694485736533, "grad_norm": 9.751788244337668, "learning_rate": 2.7796963642957586e-08, "loss": 0.9629, "step": 13475 }, { "epoch": 1.9078360586111702, "grad_norm": 9.7363283209002, "learning_rate": 2.7711789493099495e-08, "loss": 0.9324, "step": 13476 }, { "epoch": 1.9079776314858075, "grad_norm": 9.369634695188978, "learning_rate": 2.7626745309589088e-08, "loss": 1.1096, "step": 13477 }, { "epoch": 1.9081192043604447, "grad_norm": 9.391330584583091, "learning_rate": 2.7541831096897232e-08, "loss": 0.9645, "step": 13478 }, { "epoch": 1.908260777235082, "grad_norm": 9.887729986436117, "learning_rate": 2.7457046859487578e-08, "loss": 1.0036, "step": 13479 }, { "epoch": 1.908402350109719, "grad_norm": 7.995703974226224, "learning_rate": 2.7372392601817678e-08, "loss": 0.8994, "step": 13480 }, { "epoch": 1.9085439229843562, "grad_norm": 9.664088157428552, "learning_rate": 2.7287868328337297e-08, "loss": 0.9544, "step": 13481 }, { "epoch": 1.9086854958589934, "grad_norm": 8.151237964805512, "learning_rate": 2.720347404348983e-08, "loss": 0.8965, "step": 13482 }, { "epoch": 1.9088270687336306, "grad_norm": 8.072810241654011, "learning_rate": 2.7119209751712283e-08, "loss": 0.9081, "step": 13483 }, { "epoch": 1.9089686416082678, "grad_norm": 10.531480011281763, "learning_rate": 2.7035075457433613e-08, "loss": 1.0924, "step": 13484 }, { "epoch": 1.909110214482905, "grad_norm": 11.068224099502, "learning_rate": 2.6951071165077504e-08, "loss": 0.9029, "step": 13485 }, { "epoch": 1.9092517873575423, "grad_norm": 8.819364430889776, "learning_rate": 2.686719687905931e-08, "loss": 0.9274, "step": 13486 }, { "epoch": 1.9093933602321795, "grad_norm": 8.2774700246975, "learning_rate": 2.678345260378856e-08, "loss": 0.9829, "step": 13487 }, { "epoch": 1.9095349331068168, "grad_norm": 9.08104413318359, "learning_rate": 2.669983834366785e-08, "loss": 0.996, "step": 13488 }, { "epoch": 1.909676505981454, "grad_norm": 9.672864133084335, "learning_rate": 2.661635410309199e-08, "loss": 0.9142, "step": 13489 }, { "epoch": 1.9098180788560912, "grad_norm": 9.81485287587632, "learning_rate": 2.653299988645053e-08, "loss": 0.931, "step": 13490 }, { "epoch": 1.9099596517307282, "grad_norm": 10.18982730797761, "learning_rate": 2.644977569812496e-08, "loss": 1.0901, "step": 13491 }, { "epoch": 1.9101012246053655, "grad_norm": 9.047751586822853, "learning_rate": 2.6366681542490114e-08, "loss": 0.9596, "step": 13492 }, { "epoch": 1.9102427974800027, "grad_norm": 7.929456825847438, "learning_rate": 2.6283717423914445e-08, "loss": 0.9202, "step": 13493 }, { "epoch": 1.91038437035464, "grad_norm": 9.790774257385472, "learning_rate": 2.6200883346759466e-08, "loss": 0.9065, "step": 13494 }, { "epoch": 1.9105259432292772, "grad_norm": 9.142706009773711, "learning_rate": 2.6118179315379467e-08, "loss": 0.9257, "step": 13495 }, { "epoch": 1.9106675161039144, "grad_norm": 9.310116866184426, "learning_rate": 2.6035605334122084e-08, "loss": 0.9458, "step": 13496 }, { "epoch": 1.9108090889785516, "grad_norm": 9.207682959142014, "learning_rate": 2.5953161407328565e-08, "loss": 0.9008, "step": 13497 }, { "epoch": 1.9109506618531888, "grad_norm": 8.682439194501217, "learning_rate": 2.587084753933211e-08, "loss": 0.9587, "step": 13498 }, { "epoch": 1.911092234727826, "grad_norm": 7.53112970153209, "learning_rate": 2.578866373446065e-08, "loss": 0.8894, "step": 13499 }, { "epoch": 1.9112338076024633, "grad_norm": 9.195610774022802, "learning_rate": 2.5706609997034337e-08, "loss": 0.9316, "step": 13500 }, { "epoch": 1.9113753804771005, "grad_norm": 8.710095776317111, "learning_rate": 2.5624686331366666e-08, "loss": 0.8548, "step": 13501 }, { "epoch": 1.9115169533517378, "grad_norm": 9.753688016931884, "learning_rate": 2.554289274176419e-08, "loss": 0.9296, "step": 13502 }, { "epoch": 1.911658526226375, "grad_norm": 8.831449871676648, "learning_rate": 2.546122923252681e-08, "loss": 1.013, "step": 13503 }, { "epoch": 1.9118000991010122, "grad_norm": 7.783480282471151, "learning_rate": 2.5379695807947467e-08, "loss": 0.9543, "step": 13504 }, { "epoch": 1.9119416719756495, "grad_norm": 8.168520762372616, "learning_rate": 2.5298292472312192e-08, "loss": 0.9072, "step": 13505 }, { "epoch": 1.9120832448502867, "grad_norm": 9.012006168883168, "learning_rate": 2.5217019229900607e-08, "loss": 0.9077, "step": 13506 }, { "epoch": 1.912224817724924, "grad_norm": 9.214147006962607, "learning_rate": 2.513587608498541e-08, "loss": 1.0346, "step": 13507 }, { "epoch": 1.9123663905995612, "grad_norm": 10.087492996459194, "learning_rate": 2.5054863041831524e-08, "loss": 0.9347, "step": 13508 }, { "epoch": 1.9125079634741984, "grad_norm": 8.759677875466288, "learning_rate": 2.4973980104698036e-08, "loss": 0.9788, "step": 13509 }, { "epoch": 1.9126495363488356, "grad_norm": 10.179736965056335, "learning_rate": 2.4893227277837106e-08, "loss": 1.0745, "step": 13510 }, { "epoch": 1.9127911092234728, "grad_norm": 8.858605083939747, "learning_rate": 2.481260456549367e-08, "loss": 0.9596, "step": 13511 }, { "epoch": 1.91293268209811, "grad_norm": 10.432688443501977, "learning_rate": 2.4732111971906004e-08, "loss": 0.9665, "step": 13512 }, { "epoch": 1.9130742549727473, "grad_norm": 8.981306796070198, "learning_rate": 2.4651749501305446e-08, "loss": 0.9847, "step": 13513 }, { "epoch": 1.9132158278473845, "grad_norm": 10.477581250570415, "learning_rate": 2.4571517157916946e-08, "loss": 1.0264, "step": 13514 }, { "epoch": 1.9133574007220218, "grad_norm": 8.498278275190694, "learning_rate": 2.449141494595797e-08, "loss": 0.9464, "step": 13515 }, { "epoch": 1.913498973596659, "grad_norm": 10.565009366422618, "learning_rate": 2.441144286963931e-08, "loss": 0.9029, "step": 13516 }, { "epoch": 1.9136405464712962, "grad_norm": 8.631713023876692, "learning_rate": 2.433160093316539e-08, "loss": 0.9823, "step": 13517 }, { "epoch": 1.9137821193459335, "grad_norm": 8.811159721882884, "learning_rate": 2.4251889140733398e-08, "loss": 0.9887, "step": 13518 }, { "epoch": 1.9139236922205707, "grad_norm": 7.840119148438928, "learning_rate": 2.417230749653332e-08, "loss": 0.9025, "step": 13519 }, { "epoch": 1.914065265095208, "grad_norm": 8.5149696027893, "learning_rate": 2.409285600474931e-08, "loss": 0.8929, "step": 13520 }, { "epoch": 1.9142068379698451, "grad_norm": 9.815860634458764, "learning_rate": 2.401353466955747e-08, "loss": 1.056, "step": 13521 }, { "epoch": 1.9143484108444822, "grad_norm": 8.670836790329481, "learning_rate": 2.3934343495128075e-08, "loss": 0.9451, "step": 13522 }, { "epoch": 1.9144899837191194, "grad_norm": 9.22511532098905, "learning_rate": 2.385528248562391e-08, "loss": 1.0044, "step": 13523 }, { "epoch": 1.9146315565937566, "grad_norm": 7.764982692013898, "learning_rate": 2.3776351645201367e-08, "loss": 0.9039, "step": 13524 }, { "epoch": 1.9147731294683938, "grad_norm": 8.15212054602805, "learning_rate": 2.3697550978009632e-08, "loss": 1.0103, "step": 13525 }, { "epoch": 1.914914702343031, "grad_norm": 8.724063033808909, "learning_rate": 2.3618880488190942e-08, "loss": 0.9494, "step": 13526 }, { "epoch": 1.9150562752176683, "grad_norm": 9.891205058030089, "learning_rate": 2.3540340179881717e-08, "loss": 0.9424, "step": 13527 }, { "epoch": 1.9151978480923055, "grad_norm": 7.768758406446578, "learning_rate": 2.3461930057210037e-08, "loss": 0.9529, "step": 13528 }, { "epoch": 1.9153394209669428, "grad_norm": 11.215071986422759, "learning_rate": 2.338365012429816e-08, "loss": 1.0085, "step": 13529 }, { "epoch": 1.91548099384158, "grad_norm": 8.360241996511487, "learning_rate": 2.3305500385261137e-08, "loss": 0.8767, "step": 13530 }, { "epoch": 1.9156225667162172, "grad_norm": 8.427765380519443, "learning_rate": 2.322748084420734e-08, "loss": 1.0135, "step": 13531 }, { "epoch": 1.9157641395908542, "grad_norm": 6.944824201584801, "learning_rate": 2.3149591505237935e-08, "loss": 0.8237, "step": 13532 }, { "epoch": 1.9159057124654915, "grad_norm": 10.549319016593781, "learning_rate": 2.30718323724477e-08, "loss": 0.9596, "step": 13533 }, { "epoch": 1.9160472853401287, "grad_norm": 12.288853279171015, "learning_rate": 2.299420344992448e-08, "loss": 0.9355, "step": 13534 }, { "epoch": 1.916188858214766, "grad_norm": 11.593717096904118, "learning_rate": 2.2916704741748897e-08, "loss": 0.984, "step": 13535 }, { "epoch": 1.9163304310894032, "grad_norm": 7.100053985820284, "learning_rate": 2.283933625199547e-08, "loss": 0.8617, "step": 13536 }, { "epoch": 1.9164720039640404, "grad_norm": 8.839547280849347, "learning_rate": 2.2762097984730948e-08, "loss": 0.8901, "step": 13537 }, { "epoch": 1.9166135768386776, "grad_norm": 10.37417084471487, "learning_rate": 2.268498994401569e-08, "loss": 1.0135, "step": 13538 }, { "epoch": 1.9167551497133148, "grad_norm": 9.5260319668884, "learning_rate": 2.2608012133903402e-08, "loss": 0.953, "step": 13539 }, { "epoch": 1.916896722587952, "grad_norm": 9.844481232141208, "learning_rate": 2.2531164558440843e-08, "loss": 1.0099, "step": 13540 }, { "epoch": 1.9170382954625893, "grad_norm": 9.65059676622829, "learning_rate": 2.2454447221667563e-08, "loss": 0.8475, "step": 13541 }, { "epoch": 1.9171798683372265, "grad_norm": 8.326669648136596, "learning_rate": 2.2377860127616447e-08, "loss": 0.9752, "step": 13542 }, { "epoch": 1.9173214412118638, "grad_norm": 9.86911955921213, "learning_rate": 2.230140328031427e-08, "loss": 0.8638, "step": 13543 }, { "epoch": 1.917463014086501, "grad_norm": 7.976369528533197, "learning_rate": 2.222507668377949e-08, "loss": 0.9395, "step": 13544 }, { "epoch": 1.9176045869611382, "grad_norm": 10.425144580597776, "learning_rate": 2.214888034202528e-08, "loss": 0.9297, "step": 13545 }, { "epoch": 1.9177461598357755, "grad_norm": 8.780680899318916, "learning_rate": 2.2072814259056496e-08, "loss": 0.9802, "step": 13546 }, { "epoch": 1.9178877327104127, "grad_norm": 8.757851266222184, "learning_rate": 2.199687843887244e-08, "loss": 0.9671, "step": 13547 }, { "epoch": 1.91802930558505, "grad_norm": 9.389971362833021, "learning_rate": 2.1921072885464633e-08, "loss": 0.8864, "step": 13548 }, { "epoch": 1.9181708784596871, "grad_norm": 11.221667407162478, "learning_rate": 2.1845397602818508e-08, "loss": 0.9445, "step": 13549 }, { "epoch": 1.9183124513343244, "grad_norm": 8.983788194151886, "learning_rate": 2.1769852594912265e-08, "loss": 0.9098, "step": 13550 }, { "epoch": 1.9184540242089616, "grad_norm": 10.973209007303595, "learning_rate": 2.169443786571662e-08, "loss": 0.9304, "step": 13551 }, { "epoch": 1.9185955970835988, "grad_norm": 9.25946715170985, "learning_rate": 2.161915341919646e-08, "loss": 0.9023, "step": 13552 }, { "epoch": 1.918737169958236, "grad_norm": 10.403944338581065, "learning_rate": 2.1543999259309724e-08, "loss": 1.0713, "step": 13553 }, { "epoch": 1.9188787428328733, "grad_norm": 9.748850436788288, "learning_rate": 2.1468975390006587e-08, "loss": 0.8966, "step": 13554 }, { "epoch": 1.9190203157075105, "grad_norm": 12.352629816314671, "learning_rate": 2.139408181523167e-08, "loss": 1.0146, "step": 13555 }, { "epoch": 1.9191618885821478, "grad_norm": 9.298379112833405, "learning_rate": 2.1319318538921552e-08, "loss": 0.9287, "step": 13556 }, { "epoch": 1.919303461456785, "grad_norm": 11.054612618452216, "learning_rate": 2.1244685565006695e-08, "loss": 1.0996, "step": 13557 }, { "epoch": 1.9194450343314222, "grad_norm": 9.389960394015082, "learning_rate": 2.1170182897410353e-08, "loss": 0.9441, "step": 13558 }, { "epoch": 1.9195866072060594, "grad_norm": 8.86716340855758, "learning_rate": 2.109581054004939e-08, "loss": 0.9244, "step": 13559 }, { "epoch": 1.9197281800806967, "grad_norm": 10.903326216481272, "learning_rate": 2.1021568496833454e-08, "loss": 0.9612, "step": 13560 }, { "epoch": 1.919869752955334, "grad_norm": 9.516307687307705, "learning_rate": 2.0947456771664987e-08, "loss": 0.9381, "step": 13561 }, { "epoch": 1.9200113258299711, "grad_norm": 9.552953005893649, "learning_rate": 2.087347536844059e-08, "loss": 0.8864, "step": 13562 }, { "epoch": 1.9201528987046081, "grad_norm": 10.14313681694695, "learning_rate": 2.0799624291048816e-08, "loss": 1.0174, "step": 13563 }, { "epoch": 1.9202944715792454, "grad_norm": 9.521752199758891, "learning_rate": 2.0725903543372117e-08, "loss": 0.9353, "step": 13564 }, { "epoch": 1.9204360444538826, "grad_norm": 9.969346175109965, "learning_rate": 2.0652313129286284e-08, "loss": 0.9735, "step": 13565 }, { "epoch": 1.9205776173285198, "grad_norm": 12.481654017465743, "learning_rate": 2.057885305265961e-08, "loss": 1.0113, "step": 13566 }, { "epoch": 1.920719190203157, "grad_norm": 9.387675350496956, "learning_rate": 2.0505523317353727e-08, "loss": 0.931, "step": 13567 }, { "epoch": 1.9208607630777943, "grad_norm": 9.088182560658614, "learning_rate": 2.0432323927223883e-08, "loss": 1.0067, "step": 13568 }, { "epoch": 1.9210023359524315, "grad_norm": 9.604503258842755, "learning_rate": 2.0359254886117842e-08, "loss": 0.9189, "step": 13569 }, { "epoch": 1.9211439088270688, "grad_norm": 12.511807167772194, "learning_rate": 2.0286316197876964e-08, "loss": 1.0589, "step": 13570 }, { "epoch": 1.921285481701706, "grad_norm": 9.33005338892733, "learning_rate": 2.0213507866335412e-08, "loss": 0.9526, "step": 13571 }, { "epoch": 1.9214270545763432, "grad_norm": 10.256774896350903, "learning_rate": 2.0140829895320955e-08, "loss": 1.1206, "step": 13572 }, { "epoch": 1.9215686274509802, "grad_norm": 9.631391112564353, "learning_rate": 2.0068282288653872e-08, "loss": 0.9872, "step": 13573 }, { "epoch": 1.9217102003256175, "grad_norm": 9.431225844770053, "learning_rate": 1.9995865050147777e-08, "loss": 1.0159, "step": 13574 }, { "epoch": 1.9218517732002547, "grad_norm": 9.014297995927082, "learning_rate": 1.9923578183610182e-08, "loss": 0.9618, "step": 13575 }, { "epoch": 1.921993346074892, "grad_norm": 8.159490533519943, "learning_rate": 1.9851421692840822e-08, "loss": 0.8725, "step": 13576 }, { "epoch": 1.9221349189495291, "grad_norm": 9.90755648977812, "learning_rate": 1.9779395581633055e-08, "loss": 0.9635, "step": 13577 }, { "epoch": 1.9222764918241664, "grad_norm": 9.975791238174379, "learning_rate": 1.9707499853773016e-08, "loss": 0.9732, "step": 13578 }, { "epoch": 1.9224180646988036, "grad_norm": 8.839476506539738, "learning_rate": 1.9635734513040182e-08, "loss": 0.864, "step": 13579 }, { "epoch": 1.9225596375734408, "grad_norm": 10.260507919499565, "learning_rate": 1.956409956320737e-08, "loss": 0.9208, "step": 13580 }, { "epoch": 1.922701210448078, "grad_norm": 10.306949485790485, "learning_rate": 1.949259500804074e-08, "loss": 0.9467, "step": 13581 }, { "epoch": 1.9228427833227153, "grad_norm": 9.402322246937397, "learning_rate": 1.942122085129866e-08, "loss": 0.9597, "step": 13582 }, { "epoch": 1.9229843561973525, "grad_norm": 9.206092757509197, "learning_rate": 1.9349977096733142e-08, "loss": 1.0211, "step": 13583 }, { "epoch": 1.9231259290719898, "grad_norm": 8.628568049478005, "learning_rate": 1.9278863748089794e-08, "loss": 0.8718, "step": 13584 }, { "epoch": 1.923267501946627, "grad_norm": 8.085260010869122, "learning_rate": 1.9207880809107014e-08, "loss": 0.9589, "step": 13585 }, { "epoch": 1.9234090748212642, "grad_norm": 9.795137826320827, "learning_rate": 1.913702828351599e-08, "loss": 0.82, "step": 13586 }, { "epoch": 1.9235506476959014, "grad_norm": 9.899714579465552, "learning_rate": 1.9066306175041792e-08, "loss": 0.9791, "step": 13587 }, { "epoch": 1.9236922205705387, "grad_norm": 7.442230259085514, "learning_rate": 1.899571448740173e-08, "loss": 0.8969, "step": 13588 }, { "epoch": 1.923833793445176, "grad_norm": 8.690292677960466, "learning_rate": 1.892525322430755e-08, "loss": 1.0189, "step": 13589 }, { "epoch": 1.9239753663198131, "grad_norm": 10.618202458500546, "learning_rate": 1.8854922389462405e-08, "loss": 0.9264, "step": 13590 }, { "epoch": 1.9241169391944504, "grad_norm": 8.496900105412985, "learning_rate": 1.8784721986564168e-08, "loss": 1.0278, "step": 13591 }, { "epoch": 1.9242585120690876, "grad_norm": 8.736047137709662, "learning_rate": 1.871465201930295e-08, "loss": 0.9615, "step": 13592 }, { "epoch": 1.9244000849437248, "grad_norm": 9.683610295215797, "learning_rate": 1.864471249136218e-08, "loss": 1.0545, "step": 13593 }, { "epoch": 1.924541657818362, "grad_norm": 10.81474783580057, "learning_rate": 1.8574903406418933e-08, "loss": 0.9543, "step": 13594 }, { "epoch": 1.9246832306929993, "grad_norm": 7.668355285882844, "learning_rate": 1.850522476814276e-08, "loss": 0.9342, "step": 13595 }, { "epoch": 1.9248248035676365, "grad_norm": 8.640330966685662, "learning_rate": 1.843567658019657e-08, "loss": 1.036, "step": 13596 }, { "epoch": 1.9249663764422738, "grad_norm": 11.757155816379445, "learning_rate": 1.8366258846236607e-08, "loss": 1.1005, "step": 13597 }, { "epoch": 1.925107949316911, "grad_norm": 9.418567791893217, "learning_rate": 1.8296971569911893e-08, "loss": 0.9845, "step": 13598 }, { "epoch": 1.9252495221915482, "grad_norm": 8.455845308734894, "learning_rate": 1.822781475486507e-08, "loss": 0.8628, "step": 13599 }, { "epoch": 1.9253910950661854, "grad_norm": 11.125256138971132, "learning_rate": 1.8158788404731565e-08, "loss": 0.9828, "step": 13600 }, { "epoch": 1.9255326679408227, "grad_norm": 10.058811435751243, "learning_rate": 1.8089892523139864e-08, "loss": 1.033, "step": 13601 }, { "epoch": 1.92567424081546, "grad_norm": 10.272021201091052, "learning_rate": 1.8021127113712066e-08, "loss": 1.1036, "step": 13602 }, { "epoch": 1.9258158136900971, "grad_norm": 10.202681200268703, "learning_rate": 1.7952492180063064e-08, "loss": 0.9851, "step": 13603 }, { "epoch": 1.9259573865647341, "grad_norm": 8.687821855689227, "learning_rate": 1.7883987725800522e-08, "loss": 0.9394, "step": 13604 }, { "epoch": 1.9260989594393714, "grad_norm": 8.623108144955033, "learning_rate": 1.7815613754526283e-08, "loss": 0.88, "step": 13605 }, { "epoch": 1.9262405323140086, "grad_norm": 9.420156546747744, "learning_rate": 1.774737026983414e-08, "loss": 1.0313, "step": 13606 }, { "epoch": 1.9263821051886458, "grad_norm": 10.176434270073432, "learning_rate": 1.7679257275312057e-08, "loss": 0.9563, "step": 13607 }, { "epoch": 1.926523678063283, "grad_norm": 10.774447653081985, "learning_rate": 1.7611274774540777e-08, "loss": 0.8907, "step": 13608 }, { "epoch": 1.9266652509379203, "grad_norm": 9.36032298386678, "learning_rate": 1.7543422771093554e-08, "loss": 0.9141, "step": 13609 }, { "epoch": 1.9268068238125575, "grad_norm": 10.161540088621225, "learning_rate": 1.7475701268537814e-08, "loss": 0.8986, "step": 13610 }, { "epoch": 1.9269483966871948, "grad_norm": 9.83546665994783, "learning_rate": 1.7408110270432932e-08, "loss": 0.8716, "step": 13611 }, { "epoch": 1.927089969561832, "grad_norm": 9.605669302628778, "learning_rate": 1.7340649780333007e-08, "loss": 1.0537, "step": 13612 }, { "epoch": 1.9272315424364692, "grad_norm": 12.013680607062751, "learning_rate": 1.7273319801784094e-08, "loss": 1.0396, "step": 13613 }, { "epoch": 1.9273731153111064, "grad_norm": 9.932054095068315, "learning_rate": 1.7206120338325305e-08, "loss": 0.9809, "step": 13614 }, { "epoch": 1.9275146881857435, "grad_norm": 9.973693961017018, "learning_rate": 1.7139051393489647e-08, "loss": 1.0293, "step": 13615 }, { "epoch": 1.9276562610603807, "grad_norm": 9.146765265345774, "learning_rate": 1.7072112970802634e-08, "loss": 0.9192, "step": 13616 }, { "epoch": 1.927797833935018, "grad_norm": 8.232328504437882, "learning_rate": 1.7005305073783396e-08, "loss": 0.9257, "step": 13617 }, { "epoch": 1.9279394068096551, "grad_norm": 6.6878396776869105, "learning_rate": 1.6938627705943566e-08, "loss": 0.8649, "step": 13618 }, { "epoch": 1.9280809796842924, "grad_norm": 9.054470109016767, "learning_rate": 1.6872080870788955e-08, "loss": 0.8768, "step": 13619 }, { "epoch": 1.9282225525589296, "grad_norm": 8.256137761082991, "learning_rate": 1.6805664571817593e-08, "loss": 0.9167, "step": 13620 }, { "epoch": 1.9283641254335668, "grad_norm": 8.194541640342827, "learning_rate": 1.6739378812520858e-08, "loss": 0.8964, "step": 13621 }, { "epoch": 1.928505698308204, "grad_norm": 12.178555536918996, "learning_rate": 1.667322359638318e-08, "loss": 1.1302, "step": 13622 }, { "epoch": 1.9286472711828413, "grad_norm": 8.47970311922737, "learning_rate": 1.660719892688262e-08, "loss": 0.8946, "step": 13623 }, { "epoch": 1.9287888440574785, "grad_norm": 11.678057604710867, "learning_rate": 1.6541304807489998e-08, "loss": 1.023, "step": 13624 }, { "epoch": 1.9289304169321158, "grad_norm": 11.161742495884484, "learning_rate": 1.6475541241669224e-08, "loss": 0.9851, "step": 13625 }, { "epoch": 1.929071989806753, "grad_norm": 8.537938440997664, "learning_rate": 1.6409908232877246e-08, "loss": 0.9547, "step": 13626 }, { "epoch": 1.9292135626813902, "grad_norm": 8.04411882030685, "learning_rate": 1.6344405784564642e-08, "loss": 0.9545, "step": 13627 }, { "epoch": 1.9293551355560274, "grad_norm": 8.54940485081989, "learning_rate": 1.6279033900175047e-08, "loss": 0.9554, "step": 13628 }, { "epoch": 1.9294967084306647, "grad_norm": 10.817373952287719, "learning_rate": 1.6213792583144318e-08, "loss": 0.9284, "step": 13629 }, { "epoch": 1.929638281305302, "grad_norm": 9.156766596988193, "learning_rate": 1.614868183690249e-08, "loss": 0.9814, "step": 13630 }, { "epoch": 1.9297798541799391, "grad_norm": 9.561888432113376, "learning_rate": 1.6083701664872663e-08, "loss": 0.9981, "step": 13631 }, { "epoch": 1.9299214270545764, "grad_norm": 9.674565694361258, "learning_rate": 1.6018852070470437e-08, "loss": 0.9134, "step": 13632 }, { "epoch": 1.9300629999292136, "grad_norm": 8.445129561935158, "learning_rate": 1.5954133057105027e-08, "loss": 0.8493, "step": 13633 }, { "epoch": 1.9302045728038508, "grad_norm": 9.994027070575822, "learning_rate": 1.588954462817871e-08, "loss": 0.9652, "step": 13634 }, { "epoch": 1.930346145678488, "grad_norm": 10.3099210346502, "learning_rate": 1.582508678708683e-08, "loss": 0.8786, "step": 13635 }, { "epoch": 1.9304877185531253, "grad_norm": 11.286134798241692, "learning_rate": 1.5760759537217783e-08, "loss": 1.0369, "step": 13636 }, { "epoch": 1.9306292914277625, "grad_norm": 9.469540402499495, "learning_rate": 1.5696562881953314e-08, "loss": 0.9587, "step": 13637 }, { "epoch": 1.9307708643023997, "grad_norm": 10.13174101047398, "learning_rate": 1.563249682466822e-08, "loss": 1.0075, "step": 13638 }, { "epoch": 1.930912437177037, "grad_norm": 9.18923912974798, "learning_rate": 1.5568561368730082e-08, "loss": 0.9456, "step": 13639 }, { "epoch": 1.9310540100516742, "grad_norm": 8.781303867161293, "learning_rate": 1.5504756517500385e-08, "loss": 1.0188, "step": 13640 }, { "epoch": 1.9311955829263114, "grad_norm": 10.0418261334643, "learning_rate": 1.544108227433311e-08, "loss": 1.003, "step": 13641 }, { "epoch": 1.9313371558009487, "grad_norm": 11.284488287171923, "learning_rate": 1.5377538642575574e-08, "loss": 0.9307, "step": 13642 }, { "epoch": 1.931478728675586, "grad_norm": 8.939766549768258, "learning_rate": 1.5314125625568167e-08, "loss": 0.8804, "step": 13643 }, { "epoch": 1.9316203015502231, "grad_norm": 11.13943950236087, "learning_rate": 1.5250843226644608e-08, "loss": 0.9515, "step": 13644 }, { "epoch": 1.9317618744248604, "grad_norm": 8.443508241299847, "learning_rate": 1.518769144913168e-08, "loss": 0.9434, "step": 13645 }, { "epoch": 1.9319034472994974, "grad_norm": 10.151217202550606, "learning_rate": 1.5124670296348676e-08, "loss": 0.9593, "step": 13646 }, { "epoch": 1.9320450201741346, "grad_norm": 10.390291274598422, "learning_rate": 1.506177977160933e-08, "loss": 0.9302, "step": 13647 }, { "epoch": 1.9321865930487718, "grad_norm": 9.911481853596852, "learning_rate": 1.4999019878219056e-08, "loss": 1.1009, "step": 13648 }, { "epoch": 1.932328165923409, "grad_norm": 9.282128289277912, "learning_rate": 1.4936390619477715e-08, "loss": 0.9039, "step": 13649 }, { "epoch": 1.9324697387980463, "grad_norm": 10.488431097626014, "learning_rate": 1.4873891998677115e-08, "loss": 0.9798, "step": 13650 }, { "epoch": 1.9326113116726835, "grad_norm": 10.00091586687287, "learning_rate": 1.4811524019103241e-08, "loss": 1.0018, "step": 13651 }, { "epoch": 1.9327528845473207, "grad_norm": 7.832358306372628, "learning_rate": 1.4749286684034303e-08, "loss": 0.8955, "step": 13652 }, { "epoch": 1.932894457421958, "grad_norm": 10.88984141000495, "learning_rate": 1.468717999674213e-08, "loss": 1.0395, "step": 13653 }, { "epoch": 1.9330360302965952, "grad_norm": 10.370540901578307, "learning_rate": 1.4625203960492162e-08, "loss": 0.961, "step": 13654 }, { "epoch": 1.9331776031712324, "grad_norm": 9.069718215754202, "learning_rate": 1.4563358578542074e-08, "loss": 1.0121, "step": 13655 }, { "epoch": 1.9333191760458694, "grad_norm": 10.243410923427382, "learning_rate": 1.4501643854142877e-08, "loss": 1.1012, "step": 13656 }, { "epoch": 1.9334607489205067, "grad_norm": 8.143655809515016, "learning_rate": 1.4440059790538918e-08, "loss": 0.8827, "step": 13657 }, { "epoch": 1.933602321795144, "grad_norm": 10.726687681352583, "learning_rate": 1.4378606390967609e-08, "loss": 0.9516, "step": 13658 }, { "epoch": 1.9337438946697811, "grad_norm": 12.303515569191113, "learning_rate": 1.4317283658659698e-08, "loss": 0.9903, "step": 13659 }, { "epoch": 1.9338854675444184, "grad_norm": 9.335125160788051, "learning_rate": 1.4256091596838717e-08, "loss": 0.9285, "step": 13660 }, { "epoch": 1.9340270404190556, "grad_norm": 10.433409111877133, "learning_rate": 1.4195030208721816e-08, "loss": 0.9128, "step": 13661 }, { "epoch": 1.9341686132936928, "grad_norm": 12.326808612584568, "learning_rate": 1.4134099497518372e-08, "loss": 1.1485, "step": 13662 }, { "epoch": 1.93431018616833, "grad_norm": 9.43546096298344, "learning_rate": 1.4073299466431933e-08, "loss": 1.019, "step": 13663 }, { "epoch": 1.9344517590429673, "grad_norm": 9.60714055576388, "learning_rate": 1.4012630118658555e-08, "loss": 0.9873, "step": 13664 }, { "epoch": 1.9345933319176045, "grad_norm": 10.305968649628666, "learning_rate": 1.395209145738763e-08, "loss": 0.9453, "step": 13665 }, { "epoch": 1.9347349047922417, "grad_norm": 11.340310660384196, "learning_rate": 1.389168348580161e-08, "loss": 0.9932, "step": 13666 }, { "epoch": 1.934876477666879, "grad_norm": 8.41647817066992, "learning_rate": 1.3831406207076014e-08, "loss": 1.0044, "step": 13667 }, { "epoch": 1.9350180505415162, "grad_norm": 9.697713476252042, "learning_rate": 1.3771259624379696e-08, "loss": 0.9346, "step": 13668 }, { "epoch": 1.9351596234161534, "grad_norm": 8.242102802438042, "learning_rate": 1.3711243740874292e-08, "loss": 0.8208, "step": 13669 }, { "epoch": 1.9353011962907907, "grad_norm": 9.058756719507521, "learning_rate": 1.3651358559715056e-08, "loss": 0.9293, "step": 13670 }, { "epoch": 1.935442769165428, "grad_norm": 7.951378171451755, "learning_rate": 1.3591604084049747e-08, "loss": 0.9419, "step": 13671 }, { "epoch": 1.9355843420400651, "grad_norm": 8.467811222759803, "learning_rate": 1.3531980317020299e-08, "loss": 0.9163, "step": 13672 }, { "epoch": 1.9357259149147024, "grad_norm": 10.653150896492432, "learning_rate": 1.3472487261760313e-08, "loss": 0.9726, "step": 13673 }, { "epoch": 1.9358674877893396, "grad_norm": 8.118429490973137, "learning_rate": 1.3413124921397846e-08, "loss": 0.8981, "step": 13674 }, { "epoch": 1.9360090606639768, "grad_norm": 9.095953290995544, "learning_rate": 1.3353893299053178e-08, "loss": 0.8776, "step": 13675 }, { "epoch": 1.936150633538614, "grad_norm": 10.613565552757272, "learning_rate": 1.3294792397840206e-08, "loss": 0.964, "step": 13676 }, { "epoch": 1.9362922064132513, "grad_norm": 10.177767920269511, "learning_rate": 1.323582222086589e-08, "loss": 0.881, "step": 13677 }, { "epoch": 1.9364337792878885, "grad_norm": 10.732929959822757, "learning_rate": 1.3176982771230252e-08, "loss": 1.0284, "step": 13678 }, { "epoch": 1.9365753521625257, "grad_norm": 10.641606536273157, "learning_rate": 1.311827405202637e-08, "loss": 0.9083, "step": 13679 }, { "epoch": 1.936716925037163, "grad_norm": 9.347344054936015, "learning_rate": 1.3059696066340388e-08, "loss": 0.9217, "step": 13680 }, { "epoch": 1.9368584979118002, "grad_norm": 10.202488270355339, "learning_rate": 1.3001248817251788e-08, "loss": 1.0251, "step": 13681 }, { "epoch": 1.9370000707864374, "grad_norm": 10.131010555612391, "learning_rate": 1.294293230783339e-08, "loss": 0.9994, "step": 13682 }, { "epoch": 1.9371416436610747, "grad_norm": 9.855712984364743, "learning_rate": 1.2884746541150516e-08, "loss": 0.9747, "step": 13683 }, { "epoch": 1.937283216535712, "grad_norm": 8.284715611072455, "learning_rate": 1.2826691520262114e-08, "loss": 0.9769, "step": 13684 }, { "epoch": 1.9374247894103491, "grad_norm": 9.522612312998564, "learning_rate": 1.2768767248219903e-08, "loss": 0.9888, "step": 13685 }, { "epoch": 1.9375663622849864, "grad_norm": 8.876925407341215, "learning_rate": 1.2710973728069231e-08, "loss": 0.8996, "step": 13686 }, { "epoch": 1.9377079351596234, "grad_norm": 10.348796431732978, "learning_rate": 1.2653310962847943e-08, "loss": 1.0699, "step": 13687 }, { "epoch": 1.9378495080342606, "grad_norm": 9.85432955477546, "learning_rate": 1.2595778955587501e-08, "loss": 0.9452, "step": 13688 }, { "epoch": 1.9379910809088978, "grad_norm": 8.944010037547912, "learning_rate": 1.2538377709312155e-08, "loss": 0.9565, "step": 13689 }, { "epoch": 1.938132653783535, "grad_norm": 10.27869473969131, "learning_rate": 1.248110722703949e-08, "loss": 1.0607, "step": 13690 }, { "epoch": 1.9382742266581723, "grad_norm": 10.312219604379326, "learning_rate": 1.2423967511780432e-08, "loss": 0.9558, "step": 13691 }, { "epoch": 1.9384157995328095, "grad_norm": 11.230913714011095, "learning_rate": 1.2366958566538689e-08, "loss": 1.0498, "step": 13692 }, { "epoch": 1.9385573724074467, "grad_norm": 8.69438984954994, "learning_rate": 1.231008039431103e-08, "loss": 0.8978, "step": 13693 }, { "epoch": 1.938698945282084, "grad_norm": 9.492755737878408, "learning_rate": 1.2253332998087286e-08, "loss": 0.9783, "step": 13694 }, { "epoch": 1.9388405181567212, "grad_norm": 12.198099857630234, "learning_rate": 1.2196716380851181e-08, "loss": 1.0713, "step": 13695 }, { "epoch": 1.9389820910313584, "grad_norm": 9.201076311924142, "learning_rate": 1.214023054557839e-08, "loss": 0.9476, "step": 13696 }, { "epoch": 1.9391236639059957, "grad_norm": 8.98473037680306, "learning_rate": 1.2083875495238761e-08, "loss": 0.9134, "step": 13697 }, { "epoch": 1.9392652367806327, "grad_norm": 10.284840985720805, "learning_rate": 1.2027651232794924e-08, "loss": 1.0095, "step": 13698 }, { "epoch": 1.93940680965527, "grad_norm": 10.361701991664425, "learning_rate": 1.197155776120229e-08, "loss": 1.0064, "step": 13699 }, { "epoch": 1.9395483825299071, "grad_norm": 10.699633754273364, "learning_rate": 1.1915595083409615e-08, "loss": 0.9064, "step": 13700 }, { "epoch": 1.9396899554045444, "grad_norm": 8.374556800160896, "learning_rate": 1.1859763202358987e-08, "loss": 0.978, "step": 13701 }, { "epoch": 1.9398315282791816, "grad_norm": 10.109515747788572, "learning_rate": 1.1804062120985282e-08, "loss": 0.868, "step": 13702 }, { "epoch": 1.9399731011538188, "grad_norm": 9.984750182455977, "learning_rate": 1.1748491842216714e-08, "loss": 0.9517, "step": 13703 }, { "epoch": 1.940114674028456, "grad_norm": 8.657068317169765, "learning_rate": 1.1693052368974834e-08, "loss": 0.8934, "step": 13704 }, { "epoch": 1.9402562469030933, "grad_norm": 9.922645839273926, "learning_rate": 1.1637743704173698e-08, "loss": 1.0041, "step": 13705 }, { "epoch": 1.9403978197777305, "grad_norm": 9.640379273484061, "learning_rate": 1.1582565850720984e-08, "loss": 0.9761, "step": 13706 }, { "epoch": 1.9405393926523677, "grad_norm": 9.489729298468061, "learning_rate": 1.1527518811517146e-08, "loss": 0.8655, "step": 13707 }, { "epoch": 1.940680965527005, "grad_norm": 10.92898315167025, "learning_rate": 1.1472602589456538e-08, "loss": 0.9456, "step": 13708 }, { "epoch": 1.9408225384016422, "grad_norm": 8.588349688165412, "learning_rate": 1.1417817187425461e-08, "loss": 0.9265, "step": 13709 }, { "epoch": 1.9409641112762794, "grad_norm": 8.325538448298799, "learning_rate": 1.1363162608304112e-08, "loss": 0.9204, "step": 13710 }, { "epoch": 1.9411056841509167, "grad_norm": 10.654675856859564, "learning_rate": 1.1308638854965748e-08, "loss": 1.0429, "step": 13711 }, { "epoch": 1.941247257025554, "grad_norm": 9.155317135312382, "learning_rate": 1.1254245930276686e-08, "loss": 0.8282, "step": 13712 }, { "epoch": 1.9413888299001911, "grad_norm": 9.053248662238989, "learning_rate": 1.1199983837096307e-08, "loss": 0.8914, "step": 13713 }, { "epoch": 1.9415304027748284, "grad_norm": 10.804130357788448, "learning_rate": 1.1145852578276772e-08, "loss": 0.9621, "step": 13714 }, { "epoch": 1.9416719756494656, "grad_norm": 9.584896015524528, "learning_rate": 1.109185215666414e-08, "loss": 1.0916, "step": 13715 }, { "epoch": 1.9418135485241028, "grad_norm": 10.647490987601477, "learning_rate": 1.103798257509725e-08, "loss": 0.9903, "step": 13716 }, { "epoch": 1.94195512139874, "grad_norm": 9.885722845311637, "learning_rate": 1.0984243836407449e-08, "loss": 0.971, "step": 13717 }, { "epoch": 1.9420966942733773, "grad_norm": 9.514282733224526, "learning_rate": 1.0930635943420254e-08, "loss": 1.0164, "step": 13718 }, { "epoch": 1.9422382671480145, "grad_norm": 6.917131921524657, "learning_rate": 1.0877158898953411e-08, "loss": 0.9119, "step": 13719 }, { "epoch": 1.9423798400226517, "grad_norm": 8.996816283945689, "learning_rate": 1.082381270581856e-08, "loss": 0.959, "step": 13720 }, { "epoch": 1.942521412897289, "grad_norm": 8.978729119375474, "learning_rate": 1.0770597366819847e-08, "loss": 0.891, "step": 13721 }, { "epoch": 1.9426629857719262, "grad_norm": 9.112745024810419, "learning_rate": 1.0717512884754478e-08, "loss": 0.9314, "step": 13722 }, { "epoch": 1.9428045586465634, "grad_norm": 10.977076486520351, "learning_rate": 1.0664559262413831e-08, "loss": 0.8975, "step": 13723 }, { "epoch": 1.9429461315212007, "grad_norm": 9.648056751723253, "learning_rate": 1.061173650258096e-08, "loss": 1.0029, "step": 13724 }, { "epoch": 1.943087704395838, "grad_norm": 10.010176630281485, "learning_rate": 1.0559044608032809e-08, "loss": 0.8593, "step": 13725 }, { "epoch": 1.9432292772704751, "grad_norm": 10.0039115884894, "learning_rate": 1.0506483581539662e-08, "loss": 0.9526, "step": 13726 }, { "epoch": 1.9433708501451123, "grad_norm": 11.199740910939322, "learning_rate": 1.0454053425864308e-08, "loss": 1.0393, "step": 13727 }, { "epoch": 1.9435124230197496, "grad_norm": 9.634139836572075, "learning_rate": 1.0401754143763154e-08, "loss": 0.8627, "step": 13728 }, { "epoch": 1.9436539958943866, "grad_norm": 8.91179283728628, "learning_rate": 1.034958573798539e-08, "loss": 0.8972, "step": 13729 }, { "epoch": 1.9437955687690238, "grad_norm": 10.420643975389256, "learning_rate": 1.0297548211273544e-08, "loss": 1.0306, "step": 13730 }, { "epoch": 1.943937141643661, "grad_norm": 8.93627494472363, "learning_rate": 1.0245641566363208e-08, "loss": 0.9522, "step": 13731 }, { "epoch": 1.9440787145182983, "grad_norm": 11.264254884827118, "learning_rate": 1.0193865805983028e-08, "loss": 1.0337, "step": 13732 }, { "epoch": 1.9442202873929355, "grad_norm": 11.552909874248133, "learning_rate": 1.0142220932854995e-08, "loss": 0.9402, "step": 13733 }, { "epoch": 1.9443618602675727, "grad_norm": 10.870000281873487, "learning_rate": 1.0090706949693884e-08, "loss": 0.9694, "step": 13734 }, { "epoch": 1.94450343314221, "grad_norm": 7.858445706041044, "learning_rate": 1.0039323859207529e-08, "loss": 0.8285, "step": 13735 }, { "epoch": 1.9446450060168472, "grad_norm": 8.104369277840291, "learning_rate": 9.988071664097376e-09, "loss": 0.8609, "step": 13736 }, { "epoch": 1.9447865788914844, "grad_norm": 10.080878303046806, "learning_rate": 9.93695036705794e-09, "loss": 0.8424, "step": 13737 }, { "epoch": 1.9449281517661217, "grad_norm": 9.528250600675726, "learning_rate": 9.885959970775961e-09, "loss": 0.9862, "step": 13738 }, { "epoch": 1.9450697246407587, "grad_norm": 9.602059906257583, "learning_rate": 9.835100477932624e-09, "loss": 0.8722, "step": 13739 }, { "epoch": 1.945211297515396, "grad_norm": 8.8982351546353, "learning_rate": 9.784371891201349e-09, "loss": 0.7944, "step": 13740 }, { "epoch": 1.9453528703900331, "grad_norm": 8.990019032221625, "learning_rate": 9.733774213248615e-09, "loss": 0.9536, "step": 13741 }, { "epoch": 1.9454944432646704, "grad_norm": 8.328475400842667, "learning_rate": 9.683307446734792e-09, "loss": 0.8791, "step": 13742 }, { "epoch": 1.9456360161393076, "grad_norm": 9.937725712354128, "learning_rate": 9.632971594312478e-09, "loss": 1.0701, "step": 13743 }, { "epoch": 1.9457775890139448, "grad_norm": 7.810214265236037, "learning_rate": 9.582766658628173e-09, "loss": 0.9384, "step": 13744 }, { "epoch": 1.945919161888582, "grad_norm": 9.83854106353117, "learning_rate": 9.532692642320596e-09, "loss": 0.9371, "step": 13745 }, { "epoch": 1.9460607347632193, "grad_norm": 10.469037134588387, "learning_rate": 9.482749548022641e-09, "loss": 1.0148, "step": 13746 }, { "epoch": 1.9462023076378565, "grad_norm": 10.766990187501515, "learning_rate": 9.43293737835943e-09, "loss": 0.9663, "step": 13747 }, { "epoch": 1.9463438805124937, "grad_norm": 9.39430751097918, "learning_rate": 9.383256135949704e-09, "loss": 0.9559, "step": 13748 }, { "epoch": 1.946485453387131, "grad_norm": 8.069044192599822, "learning_rate": 9.333705823404981e-09, "loss": 0.9505, "step": 13749 }, { "epoch": 1.9466270262617682, "grad_norm": 9.77610336281533, "learning_rate": 9.284286443330127e-09, "loss": 1.0065, "step": 13750 }, { "epoch": 1.9467685991364054, "grad_norm": 8.388706208093303, "learning_rate": 9.234997998323613e-09, "loss": 1.0141, "step": 13751 }, { "epoch": 1.9469101720110427, "grad_norm": 10.366708406648494, "learning_rate": 9.185840490975594e-09, "loss": 1.0471, "step": 13752 }, { "epoch": 1.94705174488568, "grad_norm": 10.572082145842298, "learning_rate": 9.136813923871224e-09, "loss": 1.0055, "step": 13753 }, { "epoch": 1.9471933177603171, "grad_norm": 11.90420570745443, "learning_rate": 9.087918299586772e-09, "loss": 0.8845, "step": 13754 }, { "epoch": 1.9473348906349544, "grad_norm": 9.191859035144777, "learning_rate": 9.039153620693242e-09, "loss": 0.9258, "step": 13755 }, { "epoch": 1.9474764635095916, "grad_norm": 10.30461420202324, "learning_rate": 8.990519889754412e-09, "loss": 0.936, "step": 13756 }, { "epoch": 1.9476180363842288, "grad_norm": 9.210896085042602, "learning_rate": 8.942017109326295e-09, "loss": 0.8834, "step": 13757 }, { "epoch": 1.947759609258866, "grad_norm": 9.470335975495775, "learning_rate": 8.893645281959073e-09, "loss": 0.9056, "step": 13758 }, { "epoch": 1.9479011821335033, "grad_norm": 9.809262658891994, "learning_rate": 8.845404410195157e-09, "loss": 0.9402, "step": 13759 }, { "epoch": 1.9480427550081405, "grad_norm": 10.286170222758445, "learning_rate": 8.79729449657113e-09, "loss": 0.8577, "step": 13760 }, { "epoch": 1.9481843278827777, "grad_norm": 10.255685857475674, "learning_rate": 8.7493155436158e-09, "loss": 0.9612, "step": 13761 }, { "epoch": 1.948325900757415, "grad_norm": 12.217875183477013, "learning_rate": 8.701467553851317e-09, "loss": 1.0659, "step": 13762 }, { "epoch": 1.9484674736320522, "grad_norm": 10.68370943721642, "learning_rate": 8.65375052979317e-09, "loss": 0.8955, "step": 13763 }, { "epoch": 1.9486090465066894, "grad_norm": 8.398897153119085, "learning_rate": 8.60616447394963e-09, "loss": 0.838, "step": 13764 }, { "epoch": 1.9487506193813267, "grad_norm": 11.086734412424635, "learning_rate": 8.558709388822584e-09, "loss": 1.0533, "step": 13765 }, { "epoch": 1.9488921922559639, "grad_norm": 10.28119694194611, "learning_rate": 8.511385276906148e-09, "loss": 0.817, "step": 13766 }, { "epoch": 1.9490337651306011, "grad_norm": 24.162020251962584, "learning_rate": 8.464192140688888e-09, "loss": 0.9223, "step": 13767 }, { "epoch": 1.9491753380052383, "grad_norm": 7.417064384626397, "learning_rate": 8.417129982650762e-09, "loss": 0.9227, "step": 13768 }, { "epoch": 1.9493169108798756, "grad_norm": 9.449090774634623, "learning_rate": 8.370198805266739e-09, "loss": 0.876, "step": 13769 }, { "epoch": 1.9494584837545126, "grad_norm": 9.475861249043952, "learning_rate": 8.323398611003176e-09, "loss": 0.9234, "step": 13770 }, { "epoch": 1.9496000566291498, "grad_norm": 8.789326384927417, "learning_rate": 8.27672940232116e-09, "loss": 0.9823, "step": 13771 }, { "epoch": 1.949741629503787, "grad_norm": 8.151625451255633, "learning_rate": 8.230191181673175e-09, "loss": 0.951, "step": 13772 }, { "epoch": 1.9498832023784243, "grad_norm": 10.561589128393141, "learning_rate": 8.183783951506152e-09, "loss": 1.0485, "step": 13773 }, { "epoch": 1.9500247752530615, "grad_norm": 10.386156449263328, "learning_rate": 8.137507714259806e-09, "loss": 1.0371, "step": 13774 }, { "epoch": 1.9501663481276987, "grad_norm": 9.246924636568366, "learning_rate": 8.09136247236636e-09, "loss": 0.8736, "step": 13775 }, { "epoch": 1.950307921002336, "grad_norm": 10.732412811358776, "learning_rate": 8.045348228252204e-09, "loss": 1.0494, "step": 13776 }, { "epoch": 1.9504494938769732, "grad_norm": 8.858428958389847, "learning_rate": 7.999464984335959e-09, "loss": 1.0392, "step": 13777 }, { "epoch": 1.9505910667516104, "grad_norm": 8.878730097515705, "learning_rate": 7.953712743029585e-09, "loss": 0.9428, "step": 13778 }, { "epoch": 1.9507326396262477, "grad_norm": 9.430983965479365, "learning_rate": 7.908091506738658e-09, "loss": 0.9507, "step": 13779 }, { "epoch": 1.9508742125008849, "grad_norm": 8.568254082795617, "learning_rate": 7.862601277860982e-09, "loss": 0.838, "step": 13780 }, { "epoch": 1.951015785375522, "grad_norm": 9.992628050540324, "learning_rate": 7.817242058788255e-09, "loss": 1.0651, "step": 13781 }, { "epoch": 1.9511573582501591, "grad_norm": 9.539485587720142, "learning_rate": 7.772013851904681e-09, "loss": 0.9801, "step": 13782 }, { "epoch": 1.9512989311247964, "grad_norm": 11.955175605355315, "learning_rate": 7.72691665958808e-09, "loss": 1.1557, "step": 13783 }, { "epoch": 1.9514405039994336, "grad_norm": 7.9848424845562125, "learning_rate": 7.681950484209334e-09, "loss": 0.9984, "step": 13784 }, { "epoch": 1.9515820768740708, "grad_norm": 11.321755348224015, "learning_rate": 7.637115328131828e-09, "loss": 0.9237, "step": 13785 }, { "epoch": 1.951723649748708, "grad_norm": 10.538654927487856, "learning_rate": 7.592411193713123e-09, "loss": 1.0087, "step": 13786 }, { "epoch": 1.9518652226233453, "grad_norm": 8.751318260071594, "learning_rate": 7.547838083302728e-09, "loss": 1.016, "step": 13787 }, { "epoch": 1.9520067954979825, "grad_norm": 10.175766254787415, "learning_rate": 7.503395999244045e-09, "loss": 1.0605, "step": 13788 }, { "epoch": 1.9521483683726197, "grad_norm": 10.168454581050876, "learning_rate": 7.45908494387354e-09, "loss": 0.8283, "step": 13789 }, { "epoch": 1.952289941247257, "grad_norm": 9.438316865646525, "learning_rate": 7.414904919520183e-09, "loss": 0.8882, "step": 13790 }, { "epoch": 1.9524315141218942, "grad_norm": 10.898577158043349, "learning_rate": 7.3708559285068374e-09, "loss": 0.9557, "step": 13791 }, { "epoch": 1.9525730869965314, "grad_norm": 8.977022297025282, "learning_rate": 7.326937973148873e-09, "loss": 0.9214, "step": 13792 }, { "epoch": 1.9527146598711687, "grad_norm": 10.055503924701512, "learning_rate": 7.283151055755555e-09, "loss": 0.9461, "step": 13793 }, { "epoch": 1.9528562327458059, "grad_norm": 9.785105959825858, "learning_rate": 7.23949517862782e-09, "loss": 0.9942, "step": 13794 }, { "epoch": 1.9529978056204431, "grad_norm": 9.965077264757063, "learning_rate": 7.195970344061609e-09, "loss": 0.9645, "step": 13795 }, { "epoch": 1.9531393784950803, "grad_norm": 9.137408872637895, "learning_rate": 7.152576554344259e-09, "loss": 0.8788, "step": 13796 }, { "epoch": 1.9532809513697176, "grad_norm": 10.081632822788933, "learning_rate": 7.109313811757279e-09, "loss": 0.9177, "step": 13797 }, { "epoch": 1.9534225242443548, "grad_norm": 9.227769273721735, "learning_rate": 7.066182118574683e-09, "loss": 0.8425, "step": 13798 }, { "epoch": 1.953564097118992, "grad_norm": 9.434617972996401, "learning_rate": 7.023181477064378e-09, "loss": 0.8584, "step": 13799 }, { "epoch": 1.9537056699936293, "grad_norm": 9.109857405751475, "learning_rate": 6.980311889486502e-09, "loss": 0.978, "step": 13800 }, { "epoch": 1.9538472428682665, "grad_norm": 10.321083149808159, "learning_rate": 6.937573358094529e-09, "loss": 1.0533, "step": 13801 }, { "epoch": 1.9539888157429037, "grad_norm": 9.717962594961472, "learning_rate": 6.894965885135829e-09, "loss": 0.8945, "step": 13802 }, { "epoch": 1.954130388617541, "grad_norm": 8.943355322872526, "learning_rate": 6.852489472849444e-09, "loss": 0.9501, "step": 13803 }, { "epoch": 1.9542719614921782, "grad_norm": 9.617083972711457, "learning_rate": 6.810144123469142e-09, "loss": 0.9254, "step": 13804 }, { "epoch": 1.9544135343668154, "grad_norm": 9.826778835172318, "learning_rate": 6.7679298392200885e-09, "loss": 0.9469, "step": 13805 }, { "epoch": 1.9545551072414526, "grad_norm": 9.16758718781509, "learning_rate": 6.7258466223221745e-09, "loss": 0.9272, "step": 13806 }, { "epoch": 1.9546966801160899, "grad_norm": 9.260291895045496, "learning_rate": 6.683894474987518e-09, "loss": 0.9945, "step": 13807 }, { "epoch": 1.954838252990727, "grad_norm": 10.638949583017135, "learning_rate": 6.6420733994213006e-09, "loss": 1.0379, "step": 13808 }, { "epoch": 1.9549798258653643, "grad_norm": 8.033903284814057, "learning_rate": 6.600383397822319e-09, "loss": 0.872, "step": 13809 }, { "epoch": 1.9551213987400016, "grad_norm": 10.77882353525649, "learning_rate": 6.558824472381875e-09, "loss": 0.8931, "step": 13810 }, { "epoch": 1.9552629716146388, "grad_norm": 7.187725030444699, "learning_rate": 6.5173966252848885e-09, "loss": 0.7972, "step": 13811 }, { "epoch": 1.9554045444892758, "grad_norm": 8.944044584678391, "learning_rate": 6.476099858709062e-09, "loss": 0.9765, "step": 13812 }, { "epoch": 1.955546117363913, "grad_norm": 8.682859210385875, "learning_rate": 6.4349341748254354e-09, "loss": 0.9603, "step": 13813 }, { "epoch": 1.9556876902385503, "grad_norm": 8.655735262222818, "learning_rate": 6.3938995757981125e-09, "loss": 0.9303, "step": 13814 }, { "epoch": 1.9558292631131875, "grad_norm": 10.451945832805226, "learning_rate": 6.3529960637842555e-09, "loss": 1.0073, "step": 13815 }, { "epoch": 1.9559708359878247, "grad_norm": 8.77120146956656, "learning_rate": 6.3122236409338125e-09, "loss": 0.9591, "step": 13816 }, { "epoch": 1.956112408862462, "grad_norm": 9.748816393877924, "learning_rate": 6.271582309390622e-09, "loss": 1.0316, "step": 13817 }, { "epoch": 1.9562539817370992, "grad_norm": 9.084824839169903, "learning_rate": 6.231072071290756e-09, "loss": 1.0123, "step": 13818 }, { "epoch": 1.9563955546117364, "grad_norm": 8.16595886692485, "learning_rate": 6.190692928764175e-09, "loss": 0.9338, "step": 13819 }, { "epoch": 1.9565371274863737, "grad_norm": 9.845767634868508, "learning_rate": 6.150444883933348e-09, "loss": 0.9778, "step": 13820 }, { "epoch": 1.9566787003610109, "grad_norm": 8.271585604826244, "learning_rate": 6.110327938914085e-09, "loss": 0.9406, "step": 13821 }, { "epoch": 1.956820273235648, "grad_norm": 9.092766672565547, "learning_rate": 6.070342095815529e-09, "loss": 0.9238, "step": 13822 }, { "epoch": 1.9569618461102851, "grad_norm": 9.040899381881252, "learning_rate": 6.030487356739334e-09, "loss": 0.9167, "step": 13823 }, { "epoch": 1.9571034189849223, "grad_norm": 9.495610477855964, "learning_rate": 5.990763723780768e-09, "loss": 0.9738, "step": 13824 }, { "epoch": 1.9572449918595596, "grad_norm": 9.871816773950833, "learning_rate": 5.951171199028438e-09, "loss": 0.961, "step": 13825 }, { "epoch": 1.9573865647341968, "grad_norm": 10.29073038102264, "learning_rate": 5.91170978456318e-09, "loss": 1.0215, "step": 13826 }, { "epoch": 1.957528137608834, "grad_norm": 10.661246909669869, "learning_rate": 5.8723794824597226e-09, "loss": 1.0909, "step": 13827 }, { "epoch": 1.9576697104834713, "grad_norm": 9.723610401654524, "learning_rate": 5.833180294785579e-09, "loss": 1.0063, "step": 13828 }, { "epoch": 1.9578112833581085, "grad_norm": 10.891160905259724, "learning_rate": 5.794112223601322e-09, "loss": 0.861, "step": 13829 }, { "epoch": 1.9579528562327457, "grad_norm": 9.562276600740145, "learning_rate": 5.755175270961144e-09, "loss": 1.0269, "step": 13830 }, { "epoch": 1.958094429107383, "grad_norm": 7.807187158361429, "learning_rate": 5.716369438911185e-09, "loss": 0.8542, "step": 13831 }, { "epoch": 1.9582360019820202, "grad_norm": 7.8328241493904, "learning_rate": 5.6776947294923115e-09, "loss": 0.7993, "step": 13832 }, { "epoch": 1.9583775748566574, "grad_norm": 9.840396564215064, "learning_rate": 5.639151144736787e-09, "loss": 0.935, "step": 13833 }, { "epoch": 1.9585191477312947, "grad_norm": 9.633851972124171, "learning_rate": 5.6007386866713255e-09, "loss": 1.0628, "step": 13834 }, { "epoch": 1.9586607206059319, "grad_norm": 8.19446948491238, "learning_rate": 5.5624573573154205e-09, "loss": 0.9348, "step": 13835 }, { "epoch": 1.9588022934805691, "grad_norm": 9.157673487497426, "learning_rate": 5.524307158680797e-09, "loss": 0.8986, "step": 13836 }, { "epoch": 1.9589438663552063, "grad_norm": 9.908094360789297, "learning_rate": 5.486288092773628e-09, "loss": 1.0263, "step": 13837 }, { "epoch": 1.9590854392298436, "grad_norm": 8.20321010000501, "learning_rate": 5.4484001615920375e-09, "loss": 0.9121, "step": 13838 }, { "epoch": 1.9592270121044808, "grad_norm": 9.911197425721127, "learning_rate": 5.410643367128321e-09, "loss": 1.0319, "step": 13839 }, { "epoch": 1.959368584979118, "grad_norm": 8.734641410572564, "learning_rate": 5.373017711367001e-09, "loss": 1.0163, "step": 13840 }, { "epoch": 1.9595101578537553, "grad_norm": 10.011516424628462, "learning_rate": 5.335523196285941e-09, "loss": 1.0025, "step": 13841 }, { "epoch": 1.9596517307283925, "grad_norm": 10.729673819299201, "learning_rate": 5.2981598238563415e-09, "loss": 0.9081, "step": 13842 }, { "epoch": 1.9597933036030297, "grad_norm": 10.064005673939086, "learning_rate": 5.260927596042464e-09, "loss": 1.0215, "step": 13843 }, { "epoch": 1.959934876477667, "grad_norm": 8.797285216068168, "learning_rate": 5.223826514801356e-09, "loss": 0.8695, "step": 13844 }, { "epoch": 1.9600764493523042, "grad_norm": 10.665126291949955, "learning_rate": 5.186856582083677e-09, "loss": 1.032, "step": 13845 }, { "epoch": 1.9602180222269414, "grad_norm": 9.221206803191574, "learning_rate": 5.1500177998325965e-09, "loss": 0.8942, "step": 13846 }, { "epoch": 1.9603595951015786, "grad_norm": 8.230606867315245, "learning_rate": 5.1133101699848975e-09, "loss": 0.9032, "step": 13847 }, { "epoch": 1.9605011679762159, "grad_norm": 10.884976425881325, "learning_rate": 5.076733694470149e-09, "loss": 0.9242, "step": 13848 }, { "epoch": 1.960642740850853, "grad_norm": 9.698371152680059, "learning_rate": 5.040288375211255e-09, "loss": 0.9878, "step": 13849 }, { "epoch": 1.9607843137254903, "grad_norm": 9.419161125783791, "learning_rate": 5.003974214124186e-09, "loss": 0.8995, "step": 13850 }, { "epoch": 1.9609258866001276, "grad_norm": 10.36216806356368, "learning_rate": 4.96779121311769e-09, "loss": 0.9409, "step": 13851 }, { "epoch": 1.9610674594747648, "grad_norm": 8.850665620244994, "learning_rate": 4.931739374093858e-09, "loss": 0.9241, "step": 13852 }, { "epoch": 1.9612090323494018, "grad_norm": 11.053803727371937, "learning_rate": 4.895818698948396e-09, "loss": 0.981, "step": 13853 }, { "epoch": 1.961350605224039, "grad_norm": 9.448588141679885, "learning_rate": 4.860029189569237e-09, "loss": 0.9918, "step": 13854 }, { "epoch": 1.9614921780986763, "grad_norm": 9.552205846590857, "learning_rate": 4.824370847837933e-09, "loss": 0.9193, "step": 13855 }, { "epoch": 1.9616337509733135, "grad_norm": 9.754506561297426, "learning_rate": 4.788843675629096e-09, "loss": 0.9275, "step": 13856 }, { "epoch": 1.9617753238479507, "grad_norm": 8.4297775532435, "learning_rate": 4.7534476748098416e-09, "loss": 0.9393, "step": 13857 }, { "epoch": 1.961916896722588, "grad_norm": 11.92590024097205, "learning_rate": 4.7181828472417365e-09, "loss": 0.9861, "step": 13858 }, { "epoch": 1.9620584695972252, "grad_norm": 11.138815541605933, "learning_rate": 4.6830491947777445e-09, "loss": 0.9698, "step": 13859 }, { "epoch": 1.9622000424718624, "grad_norm": 11.023367250949974, "learning_rate": 4.648046719265553e-09, "loss": 0.9316, "step": 13860 }, { "epoch": 1.9623416153464996, "grad_norm": 8.790932526753997, "learning_rate": 4.61317542254508e-09, "loss": 0.973, "step": 13861 }, { "epoch": 1.9624831882211369, "grad_norm": 9.946284412201294, "learning_rate": 4.578435306449025e-09, "loss": 1.0141, "step": 13862 }, { "epoch": 1.9626247610957739, "grad_norm": 8.487072257723066, "learning_rate": 4.543826372803983e-09, "loss": 0.9251, "step": 13863 }, { "epoch": 1.9627663339704111, "grad_norm": 8.38865846004457, "learning_rate": 4.50934862342961e-09, "loss": 0.9073, "step": 13864 }, { "epoch": 1.9629079068450483, "grad_norm": 10.889355885186152, "learning_rate": 4.475002060137789e-09, "loss": 0.97, "step": 13865 }, { "epoch": 1.9630494797196856, "grad_norm": 10.12561899812623, "learning_rate": 4.440786684734577e-09, "loss": 1.0321, "step": 13866 }, { "epoch": 1.9631910525943228, "grad_norm": 9.60651515180278, "learning_rate": 4.406702499018256e-09, "loss": 0.8903, "step": 13867 }, { "epoch": 1.96333262546896, "grad_norm": 10.988155924343074, "learning_rate": 4.372749504780727e-09, "loss": 0.9591, "step": 13868 }, { "epoch": 1.9634741983435973, "grad_norm": 8.2348836491975, "learning_rate": 4.338927703807227e-09, "loss": 0.9118, "step": 13869 }, { "epoch": 1.9636157712182345, "grad_norm": 8.177189748926057, "learning_rate": 4.305237097875226e-09, "loss": 0.8775, "step": 13870 }, { "epoch": 1.9637573440928717, "grad_norm": 9.154900877836297, "learning_rate": 4.271677688756082e-09, "loss": 0.9028, "step": 13871 }, { "epoch": 1.963898916967509, "grad_norm": 8.564947293682238, "learning_rate": 4.23824947821394e-09, "loss": 0.982, "step": 13872 }, { "epoch": 1.9640404898421462, "grad_norm": 8.751250695352645, "learning_rate": 4.204952468006007e-09, "loss": 0.9335, "step": 13873 }, { "epoch": 1.9641820627167834, "grad_norm": 8.886438010675487, "learning_rate": 4.171786659882826e-09, "loss": 0.8991, "step": 13874 }, { "epoch": 1.9643236355914206, "grad_norm": 10.025728696752116, "learning_rate": 4.138752055588002e-09, "loss": 0.8841, "step": 13875 }, { "epoch": 1.9644652084660579, "grad_norm": 11.077379812295838, "learning_rate": 4.105848656857925e-09, "loss": 1.0422, "step": 13876 }, { "epoch": 1.964606781340695, "grad_norm": 9.957869472367618, "learning_rate": 4.073076465422321e-09, "loss": 1.0197, "step": 13877 }, { "epoch": 1.9647483542153323, "grad_norm": 8.851531470598745, "learning_rate": 4.0404354830042566e-09, "loss": 0.9124, "step": 13878 }, { "epoch": 1.9648899270899696, "grad_norm": 13.088989849237317, "learning_rate": 4.0079257113190275e-09, "loss": 0.973, "step": 13879 }, { "epoch": 1.9650314999646068, "grad_norm": 8.765031383086109, "learning_rate": 3.9755471520763754e-09, "loss": 0.9857, "step": 13880 }, { "epoch": 1.965173072839244, "grad_norm": 10.099395023772038, "learning_rate": 3.943299806977996e-09, "loss": 1.0255, "step": 13881 }, { "epoch": 1.9653146457138813, "grad_norm": 10.417015212267724, "learning_rate": 3.911183677719199e-09, "loss": 0.874, "step": 13882 }, { "epoch": 1.9654562185885185, "grad_norm": 9.590932028371984, "learning_rate": 3.8791987659883565e-09, "loss": 0.9618, "step": 13883 }, { "epoch": 1.9655977914631557, "grad_norm": 10.391239922476279, "learning_rate": 3.847345073466624e-09, "loss": 0.8776, "step": 13884 }, { "epoch": 1.965739364337793, "grad_norm": 10.551461314740477, "learning_rate": 3.81562260182905e-09, "loss": 0.9431, "step": 13885 }, { "epoch": 1.9658809372124302, "grad_norm": 9.254553421597562, "learning_rate": 3.784031352742912e-09, "loss": 0.8914, "step": 13886 }, { "epoch": 1.9660225100870674, "grad_norm": 9.810342928213093, "learning_rate": 3.752571327868826e-09, "loss": 0.9764, "step": 13887 }, { "epoch": 1.9661640829617046, "grad_norm": 9.382410401328906, "learning_rate": 3.721242528861024e-09, "loss": 0.8723, "step": 13888 }, { "epoch": 1.9663056558363419, "grad_norm": 8.474443499543385, "learning_rate": 3.6900449573659682e-09, "loss": 0.9809, "step": 13889 }, { "epoch": 1.966447228710979, "grad_norm": 7.888031123395883, "learning_rate": 3.6589786150240112e-09, "loss": 0.9752, "step": 13890 }, { "epoch": 1.9665888015856163, "grad_norm": 11.158437135159941, "learning_rate": 3.6280435034682927e-09, "loss": 0.8799, "step": 13891 }, { "epoch": 1.9667303744602536, "grad_norm": 10.97045189676961, "learning_rate": 3.597239624325011e-09, "loss": 1.0121, "step": 13892 }, { "epoch": 1.9668719473348908, "grad_norm": 11.429483908602005, "learning_rate": 3.5665669792131484e-09, "loss": 0.9647, "step": 13893 }, { "epoch": 1.9670135202095278, "grad_norm": 10.769157904074516, "learning_rate": 3.5360255697455826e-09, "loss": 0.9951, "step": 13894 }, { "epoch": 1.967155093084165, "grad_norm": 8.873195410998104, "learning_rate": 3.505615397527695e-09, "loss": 0.9311, "step": 13895 }, { "epoch": 1.9672966659588023, "grad_norm": 8.651866257281531, "learning_rate": 3.4753364641582076e-09, "loss": 0.8699, "step": 13896 }, { "epoch": 1.9674382388334395, "grad_norm": 10.649040042792956, "learning_rate": 3.445188771228625e-09, "loss": 1.1079, "step": 13897 }, { "epoch": 1.9675798117080767, "grad_norm": 10.470833733271874, "learning_rate": 3.4151723203240673e-09, "loss": 0.9937, "step": 13898 }, { "epoch": 1.967721384582714, "grad_norm": 9.04971990836854, "learning_rate": 3.385287113022717e-09, "loss": 0.9848, "step": 13899 }, { "epoch": 1.9678629574573512, "grad_norm": 8.548466898568302, "learning_rate": 3.3555331508947076e-09, "loss": 0.9521, "step": 13900 }, { "epoch": 1.9680045303319884, "grad_norm": 9.72188054044462, "learning_rate": 3.325910435505175e-09, "loss": 0.9954, "step": 13901 }, { "epoch": 1.9681461032066256, "grad_norm": 11.186989884494455, "learning_rate": 3.296418968410653e-09, "loss": 1.0185, "step": 13902 }, { "epoch": 1.9682876760812629, "grad_norm": 8.413211024987817, "learning_rate": 3.2670587511618448e-09, "loss": 0.9541, "step": 13903 }, { "epoch": 1.9684292489559, "grad_norm": 8.81890073954267, "learning_rate": 3.2378297853022377e-09, "loss": 0.9058, "step": 13904 }, { "epoch": 1.968570821830537, "grad_norm": 8.208235945018975, "learning_rate": 3.208732072368104e-09, "loss": 1.002, "step": 13905 }, { "epoch": 1.9687123947051743, "grad_norm": 8.674799795341551, "learning_rate": 3.179765613889052e-09, "loss": 0.8216, "step": 13906 }, { "epoch": 1.9688539675798116, "grad_norm": 11.089957263200313, "learning_rate": 3.150930411388309e-09, "loss": 0.9744, "step": 13907 }, { "epoch": 1.9689955404544488, "grad_norm": 10.246127257617722, "learning_rate": 3.1222264663813285e-09, "loss": 1.0275, "step": 13908 }, { "epoch": 1.969137113329086, "grad_norm": 9.76963003811009, "learning_rate": 3.0936537803771814e-09, "loss": 1.0527, "step": 13909 }, { "epoch": 1.9692786862037233, "grad_norm": 9.675072753001857, "learning_rate": 3.065212354878e-09, "loss": 1.0166, "step": 13910 }, { "epoch": 1.9694202590783605, "grad_norm": 10.993615118070025, "learning_rate": 3.036902191378699e-09, "loss": 0.9799, "step": 13911 }, { "epoch": 1.9695618319529977, "grad_norm": 10.570399834496934, "learning_rate": 3.0087232913675325e-09, "loss": 1.0347, "step": 13912 }, { "epoch": 1.969703404827635, "grad_norm": 10.214726860588382, "learning_rate": 2.980675656326093e-09, "loss": 1.0828, "step": 13913 }, { "epoch": 1.9698449777022722, "grad_norm": 8.17832980688452, "learning_rate": 2.9527592877284793e-09, "loss": 0.9624, "step": 13914 }, { "epoch": 1.9699865505769094, "grad_norm": 9.055543952835649, "learning_rate": 2.924974187042684e-09, "loss": 0.9055, "step": 13915 }, { "epoch": 1.9701281234515466, "grad_norm": 10.57999681959483, "learning_rate": 2.8973203557289274e-09, "loss": 1.0327, "step": 13916 }, { "epoch": 1.9702696963261839, "grad_norm": 11.447457403539852, "learning_rate": 2.869797795241325e-09, "loss": 1.0303, "step": 13917 }, { "epoch": 1.970411269200821, "grad_norm": 9.351236973500901, "learning_rate": 2.8424065070262186e-09, "loss": 0.9229, "step": 13918 }, { "epoch": 1.9705528420754583, "grad_norm": 8.598753869906638, "learning_rate": 2.8151464925241235e-09, "loss": 0.9216, "step": 13919 }, { "epoch": 1.9706944149500956, "grad_norm": 11.4853701205769, "learning_rate": 2.7880177531677822e-09, "loss": 0.9128, "step": 13920 }, { "epoch": 1.9708359878247328, "grad_norm": 8.765802119840444, "learning_rate": 2.7610202903829986e-09, "loss": 0.9528, "step": 13921 }, { "epoch": 1.97097756069937, "grad_norm": 9.691083479107144, "learning_rate": 2.734154105589748e-09, "loss": 0.9595, "step": 13922 }, { "epoch": 1.9711191335740073, "grad_norm": 9.500778166371509, "learning_rate": 2.7074192001996792e-09, "loss": 0.9791, "step": 13923 }, { "epoch": 1.9712607064486445, "grad_norm": 9.292547985298867, "learning_rate": 2.680815575618889e-09, "loss": 0.8868, "step": 13924 }, { "epoch": 1.9714022793232817, "grad_norm": 10.130296993439677, "learning_rate": 2.654343233245149e-09, "loss": 0.9465, "step": 13925 }, { "epoch": 1.971543852197919, "grad_norm": 9.832515057499634, "learning_rate": 2.6280021744706783e-09, "loss": 1.0062, "step": 13926 }, { "epoch": 1.9716854250725562, "grad_norm": 10.447995328496148, "learning_rate": 2.6017924006799254e-09, "loss": 0.9874, "step": 13927 }, { "epoch": 1.9718269979471934, "grad_norm": 9.833025997553078, "learning_rate": 2.5757139132509545e-09, "loss": 1.0468, "step": 13928 }, { "epoch": 1.9719685708218306, "grad_norm": 9.938483807311032, "learning_rate": 2.5497667135546135e-09, "loss": 0.8859, "step": 13929 }, { "epoch": 1.9721101436964679, "grad_norm": 10.559106767041113, "learning_rate": 2.5239508029545332e-09, "loss": 0.9626, "step": 13930 }, { "epoch": 1.972251716571105, "grad_norm": 9.69171287239149, "learning_rate": 2.4982661828085175e-09, "loss": 0.9002, "step": 13931 }, { "epoch": 1.9723932894457423, "grad_norm": 10.116733520219885, "learning_rate": 2.4727128544660415e-09, "loss": 0.9359, "step": 13932 }, { "epoch": 1.9725348623203796, "grad_norm": 11.23063484931517, "learning_rate": 2.447290819271031e-09, "loss": 0.9634, "step": 13933 }, { "epoch": 1.9726764351950168, "grad_norm": 8.947989756974355, "learning_rate": 2.4220000785599162e-09, "loss": 1.0433, "step": 13934 }, { "epoch": 1.972818008069654, "grad_norm": 12.025938610474054, "learning_rate": 2.3968406336616344e-09, "loss": 1.1168, "step": 13935 }, { "epoch": 1.972959580944291, "grad_norm": 9.070209459874457, "learning_rate": 2.3718124858992943e-09, "loss": 1.026, "step": 13936 }, { "epoch": 1.9731011538189283, "grad_norm": 10.176644936954986, "learning_rate": 2.3469156365885095e-09, "loss": 0.9297, "step": 13937 }, { "epoch": 1.9732427266935655, "grad_norm": 9.966343891271364, "learning_rate": 2.3221500870379552e-09, "loss": 1.0063, "step": 13938 }, { "epoch": 1.9733842995682027, "grad_norm": 9.893771329508555, "learning_rate": 2.2975158385496466e-09, "loss": 0.928, "step": 13939 }, { "epoch": 1.97352587244284, "grad_norm": 9.343549132978893, "learning_rate": 2.273012892418658e-09, "loss": 0.914, "step": 13940 }, { "epoch": 1.9736674453174772, "grad_norm": 10.759538189362026, "learning_rate": 2.248641249932848e-09, "loss": 0.9975, "step": 13941 }, { "epoch": 1.9738090181921144, "grad_norm": 8.794294015938377, "learning_rate": 2.2244009123734145e-09, "loss": 1.0449, "step": 13942 }, { "epoch": 1.9739505910667516, "grad_norm": 7.600852376420985, "learning_rate": 2.200291881015171e-09, "loss": 0.8593, "step": 13943 }, { "epoch": 1.9740921639413889, "grad_norm": 10.519513843747204, "learning_rate": 2.1763141571248813e-09, "loss": 1.0167, "step": 13944 }, { "epoch": 1.974233736816026, "grad_norm": 9.171049792519666, "learning_rate": 2.152467741963482e-09, "loss": 0.9663, "step": 13945 }, { "epoch": 1.974375309690663, "grad_norm": 9.481238863189734, "learning_rate": 2.1287526367844147e-09, "loss": 0.9002, "step": 13946 }, { "epoch": 1.9745168825653003, "grad_norm": 9.161704876423153, "learning_rate": 2.105168842834182e-09, "loss": 0.9645, "step": 13947 }, { "epoch": 1.9746584554399376, "grad_norm": 9.188942309460952, "learning_rate": 2.081716361352626e-09, "loss": 0.9103, "step": 13948 }, { "epoch": 1.9748000283145748, "grad_norm": 11.00366323336109, "learning_rate": 2.058395193572926e-09, "loss": 1.1364, "step": 13949 }, { "epoch": 1.974941601189212, "grad_norm": 11.168397759773235, "learning_rate": 2.0352053407207696e-09, "loss": 0.9418, "step": 13950 }, { "epoch": 1.9750831740638493, "grad_norm": 7.923587892278972, "learning_rate": 2.0121468040151803e-09, "loss": 0.9801, "step": 13951 }, { "epoch": 1.9752247469384865, "grad_norm": 10.772649635237109, "learning_rate": 1.9892195846685227e-09, "loss": 0.9814, "step": 13952 }, { "epoch": 1.9753663198131237, "grad_norm": 7.970676563221486, "learning_rate": 1.9664236838862204e-09, "loss": 1.0273, "step": 13953 }, { "epoch": 1.975507892687761, "grad_norm": 8.684703788559489, "learning_rate": 1.9437591028662053e-09, "loss": 0.935, "step": 13954 }, { "epoch": 1.9756494655623982, "grad_norm": 9.900083337298188, "learning_rate": 1.921225842800023e-09, "loss": 0.9843, "step": 13955 }, { "epoch": 1.9757910384370354, "grad_norm": 8.344774961917132, "learning_rate": 1.8988239048725598e-09, "loss": 0.9162, "step": 13956 }, { "epoch": 1.9759326113116726, "grad_norm": 9.249192073695587, "learning_rate": 1.876553290261207e-09, "loss": 1.0437, "step": 13957 }, { "epoch": 1.9760741841863099, "grad_norm": 9.35042514752421, "learning_rate": 1.854414000136695e-09, "loss": 0.9327, "step": 13958 }, { "epoch": 1.976215757060947, "grad_norm": 9.27804397028548, "learning_rate": 1.8324060356630925e-09, "loss": 0.9507, "step": 13959 }, { "epoch": 1.9763573299355843, "grad_norm": 9.012726157389114, "learning_rate": 1.8105293979972516e-09, "loss": 0.968, "step": 13960 }, { "epoch": 1.9764989028102216, "grad_norm": 8.62307540871553, "learning_rate": 1.7887840882888085e-09, "loss": 0.9067, "step": 13961 }, { "epoch": 1.9766404756848588, "grad_norm": 8.547023625450201, "learning_rate": 1.7671701076815706e-09, "loss": 0.9334, "step": 13962 }, { "epoch": 1.976782048559496, "grad_norm": 9.83787375761657, "learning_rate": 1.7456874573112958e-09, "loss": 0.9523, "step": 13963 }, { "epoch": 1.9769236214341332, "grad_norm": 9.910499599885766, "learning_rate": 1.7243361383076363e-09, "loss": 1.0004, "step": 13964 }, { "epoch": 1.9770651943087705, "grad_norm": 11.16983393450913, "learning_rate": 1.703116151792472e-09, "loss": 1.0169, "step": 13965 }, { "epoch": 1.9772067671834077, "grad_norm": 9.75306457872385, "learning_rate": 1.6820274988818552e-09, "loss": 0.9968, "step": 13966 }, { "epoch": 1.977348340058045, "grad_norm": 10.828173616638695, "learning_rate": 1.6610701806843432e-09, "loss": 0.9638, "step": 13967 }, { "epoch": 1.9774899129326822, "grad_norm": 9.898615545705352, "learning_rate": 1.6402441983015548e-09, "loss": 0.9042, "step": 13968 }, { "epoch": 1.9776314858073194, "grad_norm": 9.713250569881371, "learning_rate": 1.6195495528281701e-09, "loss": 1.0102, "step": 13969 }, { "epoch": 1.9777730586819566, "grad_norm": 9.268443921952326, "learning_rate": 1.5989862453522075e-09, "loss": 0.9719, "step": 13970 }, { "epoch": 1.9779146315565939, "grad_norm": 8.331816980367243, "learning_rate": 1.5785542769544692e-09, "loss": 0.8962, "step": 13971 }, { "epoch": 1.978056204431231, "grad_norm": 8.825753276544587, "learning_rate": 1.5582536487093737e-09, "loss": 1.0386, "step": 13972 }, { "epoch": 1.9781977773058683, "grad_norm": 9.455279245111026, "learning_rate": 1.5380843616841223e-09, "loss": 0.7992, "step": 13973 }, { "epoch": 1.9783393501805056, "grad_norm": 8.69152563120259, "learning_rate": 1.518046416938701e-09, "loss": 0.8825, "step": 13974 }, { "epoch": 1.9784809230551428, "grad_norm": 10.708690315757973, "learning_rate": 1.4981398155267112e-09, "loss": 0.932, "step": 13975 }, { "epoch": 1.97862249592978, "grad_norm": 9.222338168884805, "learning_rate": 1.4783645584942607e-09, "loss": 0.9793, "step": 13976 }, { "epoch": 1.978764068804417, "grad_norm": 9.207625371953096, "learning_rate": 1.4587206468816285e-09, "loss": 0.8506, "step": 13977 }, { "epoch": 1.9789056416790543, "grad_norm": 7.837309727648247, "learning_rate": 1.4392080817207666e-09, "loss": 0.8996, "step": 13978 }, { "epoch": 1.9790472145536915, "grad_norm": 9.450600531333023, "learning_rate": 1.4198268640377987e-09, "loss": 0.8772, "step": 13979 }, { "epoch": 1.9791887874283287, "grad_norm": 9.44429186030778, "learning_rate": 1.4005769948516324e-09, "loss": 0.8713, "step": 13980 }, { "epoch": 1.979330360302966, "grad_norm": 10.79476232326227, "learning_rate": 1.381458475173958e-09, "loss": 1.0343, "step": 13981 }, { "epoch": 1.9794719331776032, "grad_norm": 10.44694594148905, "learning_rate": 1.3624713060100825e-09, "loss": 1.0366, "step": 13982 }, { "epoch": 1.9796135060522404, "grad_norm": 7.4690697254394784, "learning_rate": 1.343615488357819e-09, "loss": 0.9388, "step": 13983 }, { "epoch": 1.9797550789268776, "grad_norm": 12.030246762149275, "learning_rate": 1.324891023208874e-09, "loss": 1.1146, "step": 13984 }, { "epoch": 1.9798966518015149, "grad_norm": 10.559021506781017, "learning_rate": 1.306297911547183e-09, "loss": 0.9715, "step": 13985 }, { "epoch": 1.980038224676152, "grad_norm": 9.692709820129073, "learning_rate": 1.287836154350297e-09, "loss": 0.9135, "step": 13986 }, { "epoch": 1.9801797975507893, "grad_norm": 9.254794553581615, "learning_rate": 1.2695057525888288e-09, "loss": 0.7929, "step": 13987 }, { "epoch": 1.9803213704254263, "grad_norm": 9.374559315814405, "learning_rate": 1.2513067072261742e-09, "loss": 0.9053, "step": 13988 }, { "epoch": 1.9804629433000636, "grad_norm": 8.804440539737517, "learning_rate": 1.2332390192193456e-09, "loss": 1.0364, "step": 13989 }, { "epoch": 1.9806045161747008, "grad_norm": 8.718092808172786, "learning_rate": 1.215302689517861e-09, "loss": 0.9212, "step": 13990 }, { "epoch": 1.980746089049338, "grad_norm": 9.299270552053692, "learning_rate": 1.1974977190645777e-09, "loss": 0.9643, "step": 13991 }, { "epoch": 1.9808876619239753, "grad_norm": 9.457992032204828, "learning_rate": 1.1798241087959684e-09, "loss": 0.945, "step": 13992 }, { "epoch": 1.9810292347986125, "grad_norm": 10.365968013006778, "learning_rate": 1.1622818596407348e-09, "loss": 1.0768, "step": 13993 }, { "epoch": 1.9811708076732497, "grad_norm": 8.329458276689156, "learning_rate": 1.1448709725209173e-09, "loss": 0.8813, "step": 13994 }, { "epoch": 1.981312380547887, "grad_norm": 8.947585171179306, "learning_rate": 1.1275914483521721e-09, "loss": 1.0332, "step": 13995 }, { "epoch": 1.9814539534225242, "grad_norm": 9.60698847715025, "learning_rate": 1.1104432880429394e-09, "loss": 0.9802, "step": 13996 }, { "epoch": 1.9815955262971614, "grad_norm": 9.721074945483299, "learning_rate": 1.0934264924941651e-09, "loss": 1.0361, "step": 13997 }, { "epoch": 1.9817370991717986, "grad_norm": 8.280262783874647, "learning_rate": 1.076541062600689e-09, "loss": 0.9591, "step": 13998 }, { "epoch": 1.9818786720464359, "grad_norm": 7.9795688564308715, "learning_rate": 1.059786999250134e-09, "loss": 0.9154, "step": 13999 }, { "epoch": 1.982020244921073, "grad_norm": 9.40698644853753, "learning_rate": 1.0431643033234629e-09, "loss": 1.0131, "step": 14000 }, { "epoch": 1.9821618177957103, "grad_norm": 9.742307709638274, "learning_rate": 1.0266729756944205e-09, "loss": 0.9872, "step": 14001 }, { "epoch": 1.9823033906703476, "grad_norm": 8.972413487818466, "learning_rate": 1.0103130172295362e-09, "loss": 0.8758, "step": 14002 }, { "epoch": 1.9824449635449848, "grad_norm": 10.217186111097742, "learning_rate": 9.940844287895101e-10, "loss": 1.0029, "step": 14003 }, { "epoch": 1.982586536419622, "grad_norm": 9.35798790130844, "learning_rate": 9.779872112267163e-10, "loss": 0.9376, "step": 14004 }, { "epoch": 1.9827281092942592, "grad_norm": 8.42385416360745, "learning_rate": 9.62021365388255e-10, "loss": 0.8877, "step": 14005 }, { "epoch": 1.9828696821688965, "grad_norm": 10.830056814958747, "learning_rate": 9.461868921126216e-10, "loss": 0.9722, "step": 14006 }, { "epoch": 1.9830112550435337, "grad_norm": 8.939988436766107, "learning_rate": 9.304837922327614e-10, "loss": 0.8681, "step": 14007 }, { "epoch": 1.983152827918171, "grad_norm": 8.393747327723188, "learning_rate": 9.149120665738476e-10, "loss": 0.9681, "step": 14008 }, { "epoch": 1.9832944007928082, "grad_norm": 11.694356121133417, "learning_rate": 8.994717159546695e-10, "loss": 1.0164, "step": 14009 }, { "epoch": 1.9834359736674454, "grad_norm": 10.365252592373759, "learning_rate": 8.841627411870779e-10, "loss": 1.123, "step": 14010 }, { "epoch": 1.9835775465420826, "grad_norm": 10.488184866293633, "learning_rate": 8.689851430754293e-10, "loss": 0.9634, "step": 14011 }, { "epoch": 1.9837191194167199, "grad_norm": 10.705136387190935, "learning_rate": 8.539389224176964e-10, "loss": 0.9358, "step": 14012 }, { "epoch": 1.983860692291357, "grad_norm": 8.901929376188756, "learning_rate": 8.390240800051907e-10, "loss": 0.8926, "step": 14013 }, { "epoch": 1.9840022651659943, "grad_norm": 8.39886990165939, "learning_rate": 8.242406166214522e-10, "loss": 0.8348, "step": 14014 }, { "epoch": 1.9841438380406315, "grad_norm": 9.018948423406274, "learning_rate": 8.095885330441921e-10, "loss": 1.0428, "step": 14015 }, { "epoch": 1.9842854109152688, "grad_norm": 10.493082947060255, "learning_rate": 7.950678300430725e-10, "loss": 1.0039, "step": 14016 }, { "epoch": 1.984426983789906, "grad_norm": 11.550248867507886, "learning_rate": 7.806785083819268e-10, "loss": 0.999, "step": 14017 }, { "epoch": 1.9845685566645432, "grad_norm": 10.760185561724942, "learning_rate": 7.664205688170945e-10, "loss": 0.9057, "step": 14018 }, { "epoch": 1.9847101295391802, "grad_norm": 9.608465483507443, "learning_rate": 7.52294012097976e-10, "loss": 0.9473, "step": 14019 }, { "epoch": 1.9848517024138175, "grad_norm": 9.820782124436146, "learning_rate": 7.382988389673107e-10, "loss": 0.9244, "step": 14020 }, { "epoch": 1.9849932752884547, "grad_norm": 9.620168290599345, "learning_rate": 7.244350501606212e-10, "loss": 0.9556, "step": 14021 }, { "epoch": 1.985134848163092, "grad_norm": 12.290730551725442, "learning_rate": 7.10702646406769e-10, "loss": 0.9837, "step": 14022 }, { "epoch": 1.9852764210377292, "grad_norm": 9.247564871612823, "learning_rate": 6.971016284279541e-10, "loss": 0.8975, "step": 14023 }, { "epoch": 1.9854179939123664, "grad_norm": 10.092975976514559, "learning_rate": 6.836319969388828e-10, "loss": 0.9068, "step": 14024 }, { "epoch": 1.9855595667870036, "grad_norm": 9.819925208030282, "learning_rate": 6.702937526475994e-10, "loss": 0.9037, "step": 14025 }, { "epoch": 1.9857011396616409, "grad_norm": 8.894354120299301, "learning_rate": 6.570868962554877e-10, "loss": 0.9689, "step": 14026 }, { "epoch": 1.985842712536278, "grad_norm": 9.814420670033403, "learning_rate": 6.440114284567145e-10, "loss": 0.9736, "step": 14027 }, { "epoch": 1.9859842854109153, "grad_norm": 9.140124602355376, "learning_rate": 6.310673499387854e-10, "loss": 0.9375, "step": 14028 }, { "epoch": 1.9861258582855523, "grad_norm": 11.806328993250823, "learning_rate": 6.182546613817119e-10, "loss": 1.0271, "step": 14029 }, { "epoch": 1.9862674311601896, "grad_norm": 10.289114034490341, "learning_rate": 6.055733634596772e-10, "loss": 1.0454, "step": 14030 }, { "epoch": 1.9864090040348268, "grad_norm": 10.499511344074802, "learning_rate": 5.930234568388149e-10, "loss": 1.0557, "step": 14031 }, { "epoch": 1.986550576909464, "grad_norm": 9.30812820122024, "learning_rate": 5.806049421791527e-10, "loss": 0.8842, "step": 14032 }, { "epoch": 1.9866921497841012, "grad_norm": 11.978519287307854, "learning_rate": 5.683178201335015e-10, "loss": 1.1155, "step": 14033 }, { "epoch": 1.9868337226587385, "grad_norm": 9.072371795328705, "learning_rate": 5.561620913477339e-10, "loss": 1.0163, "step": 14034 }, { "epoch": 1.9869752955333757, "grad_norm": 8.268525103381734, "learning_rate": 5.44137756460783e-10, "loss": 0.965, "step": 14035 }, { "epoch": 1.987116868408013, "grad_norm": 10.72045244315451, "learning_rate": 5.322448161049209e-10, "loss": 1.0096, "step": 14036 }, { "epoch": 1.9872584412826502, "grad_norm": 10.159443232982762, "learning_rate": 5.204832709052032e-10, "loss": 1.0069, "step": 14037 }, { "epoch": 1.9874000141572874, "grad_norm": 9.766401922220263, "learning_rate": 5.088531214800241e-10, "loss": 0.9671, "step": 14038 }, { "epoch": 1.9875415870319246, "grad_norm": 10.770727357521338, "learning_rate": 4.973543684408389e-10, "loss": 0.9431, "step": 14039 }, { "epoch": 1.9876831599065619, "grad_norm": 10.213340517378523, "learning_rate": 4.859870123918864e-10, "loss": 1.0877, "step": 14040 }, { "epoch": 1.987824732781199, "grad_norm": 10.704574063632288, "learning_rate": 4.747510539307442e-10, "loss": 1.0495, "step": 14041 }, { "epoch": 1.9879663056558363, "grad_norm": 9.597340660511444, "learning_rate": 4.636464936483287e-10, "loss": 0.8816, "step": 14042 }, { "epoch": 1.9881078785304735, "grad_norm": 10.459705966879481, "learning_rate": 4.5267333212833943e-10, "loss": 1.0029, "step": 14043 }, { "epoch": 1.9882494514051108, "grad_norm": 8.389996212036587, "learning_rate": 4.418315699475373e-10, "loss": 0.9831, "step": 14044 }, { "epoch": 1.988391024279748, "grad_norm": 8.748187504102413, "learning_rate": 4.311212076760218e-10, "loss": 0.963, "step": 14045 }, { "epoch": 1.9885325971543852, "grad_norm": 10.537162773555615, "learning_rate": 4.2054224587667615e-10, "loss": 0.9568, "step": 14046 }, { "epoch": 1.9886741700290225, "grad_norm": 9.078241816965699, "learning_rate": 4.1009468510544434e-10, "loss": 1.0877, "step": 14047 }, { "epoch": 1.9888157429036597, "grad_norm": 11.14841525871555, "learning_rate": 3.9977852591188694e-10, "loss": 0.97, "step": 14048 }, { "epoch": 1.988957315778297, "grad_norm": 11.62167868800991, "learning_rate": 3.8959376883834776e-10, "loss": 1.0277, "step": 14049 }, { "epoch": 1.9890988886529342, "grad_norm": 9.66497388904279, "learning_rate": 3.795404144199544e-10, "loss": 0.8741, "step": 14050 }, { "epoch": 1.9892404615275714, "grad_norm": 9.64908153531193, "learning_rate": 3.696184631851729e-10, "loss": 1.0663, "step": 14051 }, { "epoch": 1.9893820344022086, "grad_norm": 9.797268654247269, "learning_rate": 3.5982791565608575e-10, "loss": 0.9946, "step": 14052 }, { "epoch": 1.9895236072768459, "grad_norm": 11.365710814736524, "learning_rate": 3.501687723467262e-10, "loss": 0.9605, "step": 14053 }, { "epoch": 1.989665180151483, "grad_norm": 9.827415064655982, "learning_rate": 3.4064103376529876e-10, "loss": 1.0662, "step": 14054 }, { "epoch": 1.9898067530261203, "grad_norm": 10.506409278402064, "learning_rate": 3.3124470041251413e-10, "loss": 1.0051, "step": 14055 }, { "epoch": 1.9899483259007575, "grad_norm": 10.867001453071344, "learning_rate": 3.219797727824214e-10, "loss": 1.0141, "step": 14056 }, { "epoch": 1.9900898987753948, "grad_norm": 8.382054332851057, "learning_rate": 3.128462513618535e-10, "loss": 0.9268, "step": 14057 }, { "epoch": 1.990231471650032, "grad_norm": 10.234006732400957, "learning_rate": 3.0384413663125944e-10, "loss": 1.0637, "step": 14058 }, { "epoch": 1.9903730445246692, "grad_norm": 9.962880095502669, "learning_rate": 2.9497342906387173e-10, "loss": 0.949, "step": 14059 }, { "epoch": 1.9905146173993062, "grad_norm": 9.377295454657773, "learning_rate": 2.862341291257065e-10, "loss": 0.9723, "step": 14060 }, { "epoch": 1.9906561902739435, "grad_norm": 8.355732175750434, "learning_rate": 2.776262372761185e-10, "loss": 0.9262, "step": 14061 }, { "epoch": 1.9907977631485807, "grad_norm": 9.111438445540584, "learning_rate": 2.6914975396807873e-10, "loss": 1.0132, "step": 14062 }, { "epoch": 1.990939336023218, "grad_norm": 9.803736180815319, "learning_rate": 2.6080467964706424e-10, "loss": 0.8982, "step": 14063 }, { "epoch": 1.9910809088978552, "grad_norm": 9.478522265542182, "learning_rate": 2.525910147516131e-10, "loss": 0.9836, "step": 14064 }, { "epoch": 1.9912224817724924, "grad_norm": 9.085043603249751, "learning_rate": 2.4450875971332445e-10, "loss": 1.0378, "step": 14065 }, { "epoch": 1.9913640546471296, "grad_norm": 11.780454735260912, "learning_rate": 2.3655791495769134e-10, "loss": 1.1135, "step": 14066 }, { "epoch": 1.9915056275217669, "grad_norm": 8.86473930272478, "learning_rate": 2.2873848090188e-10, "loss": 0.912, "step": 14067 }, { "epoch": 1.991647200396404, "grad_norm": 8.987107261172982, "learning_rate": 2.2105045795778323e-10, "loss": 0.9194, "step": 14068 }, { "epoch": 1.9917887732710413, "grad_norm": 10.506981481428156, "learning_rate": 2.134938465289671e-10, "loss": 0.962, "step": 14069 }, { "epoch": 1.9919303461456785, "grad_norm": 9.224682777140208, "learning_rate": 2.0606864701289142e-10, "loss": 0.9546, "step": 14070 }, { "epoch": 1.9920719190203156, "grad_norm": 8.660313943365809, "learning_rate": 1.987748597997996e-10, "loss": 0.9418, "step": 14071 }, { "epoch": 1.9922134918949528, "grad_norm": 11.516909565183587, "learning_rate": 1.916124852732737e-10, "loss": 1.0818, "step": 14072 }, { "epoch": 1.99235506476959, "grad_norm": 8.699773447879755, "learning_rate": 1.845815238096793e-10, "loss": 0.9736, "step": 14073 }, { "epoch": 1.9924966376442272, "grad_norm": 7.231468193435274, "learning_rate": 1.776819757787207e-10, "loss": 1.0841, "step": 14074 }, { "epoch": 1.9926382105188645, "grad_norm": 11.735148032335182, "learning_rate": 1.7091384154288571e-10, "loss": 1.0571, "step": 14075 }, { "epoch": 1.9927797833935017, "grad_norm": 9.75933821076059, "learning_rate": 1.6427712145827834e-10, "loss": 0.9774, "step": 14076 }, { "epoch": 1.992921356268139, "grad_norm": 8.190988045178067, "learning_rate": 1.577718158737862e-10, "loss": 0.8637, "step": 14077 }, { "epoch": 1.9930629291427762, "grad_norm": 10.10490099149178, "learning_rate": 1.5139792513135799e-10, "loss": 0.9389, "step": 14078 }, { "epoch": 1.9932045020174134, "grad_norm": 8.639374626980388, "learning_rate": 1.451554495657259e-10, "loss": 0.9225, "step": 14079 }, { "epoch": 1.9933460748920506, "grad_norm": 9.344997973332152, "learning_rate": 1.39044389505516e-10, "loss": 0.9441, "step": 14080 }, { "epoch": 1.9934876477666879, "grad_norm": 8.03580568291897, "learning_rate": 1.3306474527158275e-10, "loss": 0.872, "step": 14081 }, { "epoch": 1.993629220641325, "grad_norm": 9.467412369420497, "learning_rate": 1.2721651717839678e-10, "loss": 0.9022, "step": 14082 }, { "epoch": 1.9937707935159623, "grad_norm": 8.939967955120098, "learning_rate": 1.2149970553376745e-10, "loss": 0.8157, "step": 14083 }, { "epoch": 1.9939123663905995, "grad_norm": 8.383081892223094, "learning_rate": 1.1591431063745495e-10, "loss": 1.0132, "step": 14084 }, { "epoch": 1.9940539392652368, "grad_norm": 10.664703864069587, "learning_rate": 1.1046033278394597e-10, "loss": 0.9679, "step": 14085 }, { "epoch": 1.994195512139874, "grad_norm": 8.761370001836587, "learning_rate": 1.0513777225940047e-10, "loss": 1.0198, "step": 14086 }, { "epoch": 1.9943370850145112, "grad_norm": 9.05917318381838, "learning_rate": 9.994662934387223e-11, "loss": 0.8921, "step": 14087 }, { "epoch": 1.9944786578891485, "grad_norm": 9.996249449256347, "learning_rate": 9.488690430992098e-11, "loss": 0.9482, "step": 14088 }, { "epoch": 1.9946202307637857, "grad_norm": 9.876373581783605, "learning_rate": 8.99585974237227e-11, "loss": 0.9781, "step": 14089 }, { "epoch": 1.994761803638423, "grad_norm": 10.231358393687424, "learning_rate": 8.516170894479203e-11, "loss": 0.9941, "step": 14090 }, { "epoch": 1.9949033765130602, "grad_norm": 9.65139006253297, "learning_rate": 8.049623912459448e-11, "loss": 0.9969, "step": 14091 }, { "epoch": 1.9950449493876974, "grad_norm": 8.456945997693857, "learning_rate": 7.596218820876688e-11, "loss": 0.9453, "step": 14092 }, { "epoch": 1.9951865222623346, "grad_norm": 10.373410758127529, "learning_rate": 7.15595564354521e-11, "loss": 0.9588, "step": 14093 }, { "epoch": 1.9953280951369718, "grad_norm": 9.539160475669041, "learning_rate": 6.728834403640916e-11, "loss": 0.9831, "step": 14094 }, { "epoch": 1.995469668011609, "grad_norm": 11.539020845600172, "learning_rate": 6.314855123590313e-11, "loss": 1.027, "step": 14095 }, { "epoch": 1.9956112408862463, "grad_norm": 9.496088126026718, "learning_rate": 5.914017825153773e-11, "loss": 0.9072, "step": 14096 }, { "epoch": 1.9957528137608835, "grad_norm": 8.727819176385564, "learning_rate": 5.526322529425532e-11, "loss": 0.9314, "step": 14097 }, { "epoch": 1.9958943866355208, "grad_norm": 9.016879042441216, "learning_rate": 5.151769256778183e-11, "loss": 0.9545, "step": 14098 }, { "epoch": 1.996035959510158, "grad_norm": 9.274147887043947, "learning_rate": 4.790358026890429e-11, "loss": 0.9317, "step": 14099 }, { "epoch": 1.9961775323847952, "grad_norm": 7.756148329813097, "learning_rate": 4.44208885877484e-11, "loss": 0.8546, "step": 14100 }, { "epoch": 1.9963191052594325, "grad_norm": 9.180174937191634, "learning_rate": 4.1069617707223396e-11, "loss": 0.9981, "step": 14101 }, { "epoch": 1.9964606781340695, "grad_norm": 10.210510104865598, "learning_rate": 3.7849767803854745e-11, "loss": 0.9232, "step": 14102 }, { "epoch": 1.9966022510087067, "grad_norm": 8.289224952019083, "learning_rate": 3.4761339046396336e-11, "loss": 0.9707, "step": 14103 }, { "epoch": 1.996743823883344, "grad_norm": 8.788531233943473, "learning_rate": 3.1804331597773406e-11, "loss": 1.0367, "step": 14104 }, { "epoch": 1.9968853967579812, "grad_norm": 10.15907450207791, "learning_rate": 2.897874561286207e-11, "loss": 0.9704, "step": 14105 }, { "epoch": 1.9970269696326184, "grad_norm": 8.538672044767027, "learning_rate": 2.6284581240709762e-11, "loss": 0.8485, "step": 14106 }, { "epoch": 1.9971685425072556, "grad_norm": 8.258352040084556, "learning_rate": 2.3721838622592362e-11, "loss": 0.9273, "step": 14107 }, { "epoch": 1.9973101153818928, "grad_norm": 8.326705382200853, "learning_rate": 2.1290517893401974e-11, "loss": 0.9373, "step": 14108 }, { "epoch": 1.99745168825653, "grad_norm": 11.450221590121766, "learning_rate": 1.899061918081424e-11, "loss": 1.0987, "step": 14109 }, { "epoch": 1.9975932611311673, "grad_norm": 9.136704972066354, "learning_rate": 1.682214260584347e-11, "loss": 0.9543, "step": 14110 }, { "epoch": 1.9977348340058045, "grad_norm": 9.715882298353002, "learning_rate": 1.4785088282565084e-11, "loss": 0.9831, "step": 14111 }, { "epoch": 1.9978764068804415, "grad_norm": 10.009648817916032, "learning_rate": 1.2879456318115602e-11, "loss": 0.9491, "step": 14112 }, { "epoch": 1.9980179797550788, "grad_norm": 11.14523532487212, "learning_rate": 1.1105246812137538e-11, "loss": 0.8261, "step": 14113 }, { "epoch": 1.998159552629716, "grad_norm": 8.472663003541568, "learning_rate": 9.462459858444739e-12, "loss": 0.9505, "step": 14114 }, { "epoch": 1.9983011255043532, "grad_norm": 8.70827278469032, "learning_rate": 7.951095543357046e-12, "loss": 0.9245, "step": 14115 }, { "epoch": 1.9984426983789905, "grad_norm": 9.630333945880572, "learning_rate": 6.5711539462554044e-12, "loss": 0.9342, "step": 14116 }, { "epoch": 1.9985842712536277, "grad_norm": 10.226881798873253, "learning_rate": 5.322635139304311e-12, "loss": 0.8568, "step": 14117 }, { "epoch": 1.998725844128265, "grad_norm": 10.778458297411692, "learning_rate": 4.205539188839591e-12, "loss": 1.0366, "step": 14118 }, { "epoch": 1.9988674170029022, "grad_norm": 8.40262976762149, "learning_rate": 3.219866153147955e-12, "loss": 0.9487, "step": 14119 }, { "epoch": 1.9990089898775394, "grad_norm": 8.21709933221857, "learning_rate": 2.3656160838547713e-12, "loss": 0.9008, "step": 14120 }, { "epoch": 1.9991505627521766, "grad_norm": 10.678906650084125, "learning_rate": 1.64278902620163e-12, "loss": 0.8971, "step": 14121 }, { "epoch": 1.9992921356268138, "grad_norm": 8.876277777871838, "learning_rate": 1.0513850182136687e-12, "loss": 1.0227, "step": 14122 }, { "epoch": 1.999433708501451, "grad_norm": 9.206272590852926, "learning_rate": 5.914040909771324e-13, "loss": 0.9777, "step": 14123 }, { "epoch": 1.9995752813760883, "grad_norm": 10.335507892148884, "learning_rate": 2.62846268361816e-13, "loss": 0.8741, "step": 14124 }, { "epoch": 1.9997168542507255, "grad_norm": 9.391732301344476, "learning_rate": 6.571156785373234e-14, "loss": 1.0245, "step": 14125 }, { "epoch": 1.9998584271253628, "grad_norm": 9.947442988490929, "learning_rate": 0.0, "loss": 0.8958, "step": 14126 }, { "epoch": 1.9998584271253628, "step": 14126, "total_flos": 3113833205456896.0, "train_loss": 1.16577873653365, "train_runtime": 406995.3187, "train_samples_per_second": 4.443, "train_steps_per_second": 0.035 } ], "logging_steps": 1.0, "max_steps": 14126, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3113833205456896.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }