|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.079734219269103, |
|
"eval_steps": 500, |
|
"global_step": 650, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0016611295681063123, |
|
"grad_norm": 283.7961630862445, |
|
"learning_rate": 2.7027027027027028e-08, |
|
"loss": 1.0013, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0033222591362126247, |
|
"grad_norm": 403.0161196007378, |
|
"learning_rate": 5.4054054054054056e-08, |
|
"loss": 1.3486, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0049833887043189366, |
|
"grad_norm": 225.65685288846345, |
|
"learning_rate": 8.108108108108108e-08, |
|
"loss": 0.9601, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.006644518272425249, |
|
"grad_norm": 309.07994612900893, |
|
"learning_rate": 1.0810810810810811e-07, |
|
"loss": 0.814, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.008305647840531562, |
|
"grad_norm": 380.5732961356113, |
|
"learning_rate": 1.3513513513513515e-07, |
|
"loss": 0.9665, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.009966777408637873, |
|
"grad_norm": 350.2306529649741, |
|
"learning_rate": 1.6216216216216215e-07, |
|
"loss": 0.9394, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.011627906976744186, |
|
"grad_norm": 303.16533146413684, |
|
"learning_rate": 1.891891891891892e-07, |
|
"loss": 1.1157, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.013289036544850499, |
|
"grad_norm": 330.3322177024234, |
|
"learning_rate": 2.1621621621621622e-07, |
|
"loss": 0.8766, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.014950166112956811, |
|
"grad_norm": 232.1781635901737, |
|
"learning_rate": 2.4324324324324326e-07, |
|
"loss": 0.7901, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.016611295681063124, |
|
"grad_norm": 303.49257454610745, |
|
"learning_rate": 2.702702702702703e-07, |
|
"loss": 0.875, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.018272425249169437, |
|
"grad_norm": 191.62808346871842, |
|
"learning_rate": 2.972972972972973e-07, |
|
"loss": 0.6306, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.019933554817275746, |
|
"grad_norm": 184.28128658857395, |
|
"learning_rate": 3.243243243243243e-07, |
|
"loss": 0.6915, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02159468438538206, |
|
"grad_norm": 466.93407676031165, |
|
"learning_rate": 3.5135135135135134e-07, |
|
"loss": 0.6333, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.023255813953488372, |
|
"grad_norm": 146.99367069527102, |
|
"learning_rate": 3.783783783783784e-07, |
|
"loss": 0.4615, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.024916943521594685, |
|
"grad_norm": 146.6363236745373, |
|
"learning_rate": 4.054054054054054e-07, |
|
"loss": 0.6328, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.026578073089700997, |
|
"grad_norm": 196.3416905532426, |
|
"learning_rate": 4.3243243243243244e-07, |
|
"loss": 0.6118, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02823920265780731, |
|
"grad_norm": 302.44153365360745, |
|
"learning_rate": 4.594594594594595e-07, |
|
"loss": 0.5397, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.029900332225913623, |
|
"grad_norm": 8753.401359110785, |
|
"learning_rate": 4.864864864864865e-07, |
|
"loss": 0.6609, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03156146179401993, |
|
"grad_norm": 139.31291333841494, |
|
"learning_rate": 5.135135135135134e-07, |
|
"loss": 0.4247, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03322259136212625, |
|
"grad_norm": 117.2303002306334, |
|
"learning_rate": 5.405405405405406e-07, |
|
"loss": 0.5025, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03488372093023256, |
|
"grad_norm": 159.0527667166082, |
|
"learning_rate": 5.675675675675675e-07, |
|
"loss": 0.4789, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.036544850498338874, |
|
"grad_norm": 211.30553229145636, |
|
"learning_rate": 5.945945945945947e-07, |
|
"loss": 0.6005, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03820598006644518, |
|
"grad_norm": 74.75774440900648, |
|
"learning_rate": 6.216216216216216e-07, |
|
"loss": 0.4322, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03986710963455149, |
|
"grad_norm": 152.6630766770394, |
|
"learning_rate": 6.486486486486486e-07, |
|
"loss": 0.6084, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04152823920265781, |
|
"grad_norm": 68.59356856504525, |
|
"learning_rate": 6.756756756756756e-07, |
|
"loss": 0.3271, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04318936877076412, |
|
"grad_norm": 48.213900231519695, |
|
"learning_rate": 7.027027027027027e-07, |
|
"loss": 0.3455, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.044850498338870434, |
|
"grad_norm": 50.0637038462766, |
|
"learning_rate": 7.297297297297297e-07, |
|
"loss": 0.3409, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.046511627906976744, |
|
"grad_norm": 64.20573798423982, |
|
"learning_rate": 7.567567567567568e-07, |
|
"loss": 0.448, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04817275747508306, |
|
"grad_norm": 132.42160531061333, |
|
"learning_rate": 7.837837837837838e-07, |
|
"loss": 0.3915, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04983388704318937, |
|
"grad_norm": 101.32898633286817, |
|
"learning_rate": 8.108108108108108e-07, |
|
"loss": 0.3159, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05149501661129568, |
|
"grad_norm": 67.44357173798667, |
|
"learning_rate": 8.378378378378377e-07, |
|
"loss": 0.2786, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.053156146179401995, |
|
"grad_norm": 94.87155550615857, |
|
"learning_rate": 8.648648648648649e-07, |
|
"loss": 0.4167, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.054817275747508304, |
|
"grad_norm": 120.07807866412833, |
|
"learning_rate": 8.918918918918918e-07, |
|
"loss": 0.3996, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.05647840531561462, |
|
"grad_norm": 53.547698067900235, |
|
"learning_rate": 9.18918918918919e-07, |
|
"loss": 0.3981, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05813953488372093, |
|
"grad_norm": 89.39620348618399, |
|
"learning_rate": 9.459459459459459e-07, |
|
"loss": 0.3523, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.059800664451827246, |
|
"grad_norm": 125.63871272969014, |
|
"learning_rate": 9.72972972972973e-07, |
|
"loss": 0.343, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.061461794019933555, |
|
"grad_norm": 85.85489825737757, |
|
"learning_rate": 1e-06, |
|
"loss": 0.3063, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.06312292358803986, |
|
"grad_norm": 56.63108841849816, |
|
"learning_rate": 9.999981882520454e-07, |
|
"loss": 0.4455, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.06478405315614617, |
|
"grad_norm": 40.40412484077134, |
|
"learning_rate": 9.999927530213112e-07, |
|
"loss": 0.3411, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0664451827242525, |
|
"grad_norm": 42.56638423746503, |
|
"learning_rate": 9.999836943471866e-07, |
|
"loss": 0.3422, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0681063122923588, |
|
"grad_norm": 149.52998978810905, |
|
"learning_rate": 9.999710122953198e-07, |
|
"loss": 0.3539, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.06976744186046512, |
|
"grad_norm": 99.06075800406914, |
|
"learning_rate": 9.999547069576173e-07, |
|
"loss": 0.3705, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.07142857142857142, |
|
"grad_norm": 28.75050949230183, |
|
"learning_rate": 9.99934778452244e-07, |
|
"loss": 0.2556, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.07308970099667775, |
|
"grad_norm": 102.07599788982593, |
|
"learning_rate": 9.999112269236213e-07, |
|
"loss": 0.3375, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.07475083056478406, |
|
"grad_norm": 94.98798632226429, |
|
"learning_rate": 9.99884052542427e-07, |
|
"loss": 0.325, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07641196013289037, |
|
"grad_norm": 45.49599727736315, |
|
"learning_rate": 9.99853255505594e-07, |
|
"loss": 0.3344, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.07807308970099668, |
|
"grad_norm": 437.1776518783744, |
|
"learning_rate": 9.99818836036308e-07, |
|
"loss": 0.3195, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.07973421926910298, |
|
"grad_norm": 40.386876297214194, |
|
"learning_rate": 9.997807943840063e-07, |
|
"loss": 0.2935, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.08139534883720931, |
|
"grad_norm": 35.81499917192016, |
|
"learning_rate": 9.997391308243767e-07, |
|
"loss": 0.3221, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.08305647840531562, |
|
"grad_norm": 135.69410164163278, |
|
"learning_rate": 9.996938456593547e-07, |
|
"loss": 0.3641, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08471760797342193, |
|
"grad_norm": 37.49737579141678, |
|
"learning_rate": 9.996449392171216e-07, |
|
"loss": 0.3116, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.08637873754152824, |
|
"grad_norm": 53.54810233248991, |
|
"learning_rate": 9.995924118521016e-07, |
|
"loss": 0.2374, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.08803986710963455, |
|
"grad_norm": 29.764129425896126, |
|
"learning_rate": 9.995362639449604e-07, |
|
"loss": 0.3214, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.08970099667774087, |
|
"grad_norm": 86.36715802553283, |
|
"learning_rate": 9.994764959026014e-07, |
|
"loss": 0.2724, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.09136212624584718, |
|
"grad_norm": 59.60483629138851, |
|
"learning_rate": 9.99413108158163e-07, |
|
"loss": 0.2682, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.09302325581395349, |
|
"grad_norm": 1121.1809969504397, |
|
"learning_rate": 9.99346101171016e-07, |
|
"loss": 0.3457, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0946843853820598, |
|
"grad_norm": 36.4371245053598, |
|
"learning_rate": 9.99275475426759e-07, |
|
"loss": 0.3518, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.09634551495016612, |
|
"grad_norm": 36.679857127723125, |
|
"learning_rate": 9.992012314372164e-07, |
|
"loss": 0.1912, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.09800664451827243, |
|
"grad_norm": 168.93933955821868, |
|
"learning_rate": 9.991233697404337e-07, |
|
"loss": 0.2478, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.09966777408637874, |
|
"grad_norm": 24.14622539640721, |
|
"learning_rate": 9.990418909006743e-07, |
|
"loss": 0.2118, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10132890365448505, |
|
"grad_norm": 43.18341622294434, |
|
"learning_rate": 9.989567955084143e-07, |
|
"loss": 0.3924, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.10299003322259136, |
|
"grad_norm": 29.085536311011936, |
|
"learning_rate": 9.988680841803396e-07, |
|
"loss": 0.2878, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.10465116279069768, |
|
"grad_norm": 24.284878773871643, |
|
"learning_rate": 9.987757575593402e-07, |
|
"loss": 0.1948, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.10631229235880399, |
|
"grad_norm": 27.805278117108944, |
|
"learning_rate": 9.986798163145066e-07, |
|
"loss": 0.2563, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1079734219269103, |
|
"grad_norm": 22.56960935878, |
|
"learning_rate": 9.985802611411243e-07, |
|
"loss": 0.2298, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.10963455149501661, |
|
"grad_norm": 37.482492845739586, |
|
"learning_rate": 9.984770927606686e-07, |
|
"loss": 0.2785, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.11129568106312292, |
|
"grad_norm": 23.848350011819495, |
|
"learning_rate": 9.983703119207998e-07, |
|
"loss": 0.2113, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.11295681063122924, |
|
"grad_norm": 30.649497045880594, |
|
"learning_rate": 9.98259919395358e-07, |
|
"loss": 0.255, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.11461794019933555, |
|
"grad_norm": 23.006604632466246, |
|
"learning_rate": 9.98145915984357e-07, |
|
"loss": 0.224, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.11627906976744186, |
|
"grad_norm": 21.882953775970815, |
|
"learning_rate": 9.98028302513978e-07, |
|
"loss": 0.2616, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11794019933554817, |
|
"grad_norm": 25.05081248063707, |
|
"learning_rate": 9.97907079836566e-07, |
|
"loss": 0.2128, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.11960132890365449, |
|
"grad_norm": 31.598368685329934, |
|
"learning_rate": 9.977822488306195e-07, |
|
"loss": 0.3792, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1212624584717608, |
|
"grad_norm": 37.48759519077218, |
|
"learning_rate": 9.976538104007886e-07, |
|
"loss": 0.2736, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.12292358803986711, |
|
"grad_norm": 19.638414416569248, |
|
"learning_rate": 9.975217654778651e-07, |
|
"loss": 0.1277, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.12458471760797342, |
|
"grad_norm": 29.04555059332869, |
|
"learning_rate": 9.97386115018778e-07, |
|
"loss": 0.3141, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.12624584717607973, |
|
"grad_norm": 21.430456032128323, |
|
"learning_rate": 9.972468600065845e-07, |
|
"loss": 0.2253, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.12790697674418605, |
|
"grad_norm": 41.85901544277914, |
|
"learning_rate": 9.971040014504648e-07, |
|
"loss": 0.3621, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.12956810631229235, |
|
"grad_norm": 14.223035223233115, |
|
"learning_rate": 9.969575403857135e-07, |
|
"loss": 0.1284, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.13122923588039867, |
|
"grad_norm": 22.262799198485006, |
|
"learning_rate": 9.968074778737332e-07, |
|
"loss": 0.2524, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.132890365448505, |
|
"grad_norm": 50.697373968189815, |
|
"learning_rate": 9.966538150020252e-07, |
|
"loss": 0.2189, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1345514950166113, |
|
"grad_norm": 21.86462698311471, |
|
"learning_rate": 9.964965528841833e-07, |
|
"loss": 0.2334, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.1362126245847176, |
|
"grad_norm": 30.03903059222607, |
|
"learning_rate": 9.963356926598848e-07, |
|
"loss": 0.2619, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.1378737541528239, |
|
"grad_norm": 44.39420516166669, |
|
"learning_rate": 9.961712354948822e-07, |
|
"loss": 0.3148, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.13953488372093023, |
|
"grad_norm": 45.40184063467476, |
|
"learning_rate": 9.960031825809955e-07, |
|
"loss": 0.2719, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.14119601328903655, |
|
"grad_norm": 41.30803088566475, |
|
"learning_rate": 9.95831535136103e-07, |
|
"loss": 0.2746, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 21.10123734223929, |
|
"learning_rate": 9.956562944041316e-07, |
|
"loss": 0.2082, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.14451827242524917, |
|
"grad_norm": 38.37926428810434, |
|
"learning_rate": 9.954774616550499e-07, |
|
"loss": 0.221, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.1461794019933555, |
|
"grad_norm": 21.615149648940918, |
|
"learning_rate": 9.952950381848576e-07, |
|
"loss": 0.1952, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.1478405315614618, |
|
"grad_norm": 21.749889867769415, |
|
"learning_rate": 9.951090253155757e-07, |
|
"loss": 0.2139, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.14950166112956811, |
|
"grad_norm": 16.24745315976571, |
|
"learning_rate": 9.949194243952382e-07, |
|
"loss": 0.1852, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1511627906976744, |
|
"grad_norm": 13.76793966078715, |
|
"learning_rate": 9.94726236797881e-07, |
|
"loss": 0.179, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.15282392026578073, |
|
"grad_norm": 16.40584213825403, |
|
"learning_rate": 9.945294639235336e-07, |
|
"loss": 0.2484, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.15448504983388706, |
|
"grad_norm": 19.45237506640772, |
|
"learning_rate": 9.943291071982072e-07, |
|
"loss": 0.2379, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.15614617940199335, |
|
"grad_norm": 11.996023853804303, |
|
"learning_rate": 9.941251680738852e-07, |
|
"loss": 0.1372, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.15780730897009967, |
|
"grad_norm": 16.66955200884909, |
|
"learning_rate": 9.939176480285128e-07, |
|
"loss": 0.1833, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.15946843853820597, |
|
"grad_norm": 20.352150945455673, |
|
"learning_rate": 9.93706548565986e-07, |
|
"loss": 0.1878, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.1611295681063123, |
|
"grad_norm": 30.86575710552868, |
|
"learning_rate": 9.934918712161414e-07, |
|
"loss": 0.2089, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.16279069767441862, |
|
"grad_norm": 30.108048559073602, |
|
"learning_rate": 9.932736175347433e-07, |
|
"loss": 0.2334, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.1644518272425249, |
|
"grad_norm": 16.137380650744188, |
|
"learning_rate": 9.930517891034748e-07, |
|
"loss": 0.1935, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.16611295681063123, |
|
"grad_norm": 17.736914720083263, |
|
"learning_rate": 9.928263875299245e-07, |
|
"loss": 0.1772, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16777408637873753, |
|
"grad_norm": 11.972509757727426, |
|
"learning_rate": 9.92597414447576e-07, |
|
"loss": 0.1374, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.16943521594684385, |
|
"grad_norm": 20.433799792275494, |
|
"learning_rate": 9.923648715157952e-07, |
|
"loss": 0.2198, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.17109634551495018, |
|
"grad_norm": 19.404673471029323, |
|
"learning_rate": 9.921287604198185e-07, |
|
"loss": 0.152, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.17275747508305647, |
|
"grad_norm": 18.73899376004668, |
|
"learning_rate": 9.918890828707416e-07, |
|
"loss": 0.2282, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.1744186046511628, |
|
"grad_norm": 22.306238872760357, |
|
"learning_rate": 9.916458406055055e-07, |
|
"loss": 0.1895, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.1760797342192691, |
|
"grad_norm": 19.021294987112594, |
|
"learning_rate": 9.91399035386885e-07, |
|
"loss": 0.2403, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.1777408637873754, |
|
"grad_norm": 86.53948554137872, |
|
"learning_rate": 9.911486690034753e-07, |
|
"loss": 0.1723, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.17940199335548174, |
|
"grad_norm": 18.7063649829218, |
|
"learning_rate": 9.908947432696798e-07, |
|
"loss": 0.2134, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.18106312292358803, |
|
"grad_norm": 19.41271168055036, |
|
"learning_rate": 9.906372600256962e-07, |
|
"loss": 0.225, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.18272425249169436, |
|
"grad_norm": 20.719404876758283, |
|
"learning_rate": 9.903762211375032e-07, |
|
"loss": 0.2158, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18438538205980065, |
|
"grad_norm": 21.610832850601305, |
|
"learning_rate": 9.901116284968478e-07, |
|
"loss": 0.2267, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.18604651162790697, |
|
"grad_norm": 22.19654692004818, |
|
"learning_rate": 9.898434840212305e-07, |
|
"loss": 0.2376, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1877076411960133, |
|
"grad_norm": 14.838187787942, |
|
"learning_rate": 9.89571789653892e-07, |
|
"loss": 0.1743, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.1893687707641196, |
|
"grad_norm": 17.480053388106516, |
|
"learning_rate": 9.892965473637992e-07, |
|
"loss": 0.239, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.19102990033222592, |
|
"grad_norm": 18.66511685282266, |
|
"learning_rate": 9.890177591456311e-07, |
|
"loss": 0.2502, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.19269102990033224, |
|
"grad_norm": 17.526724768165533, |
|
"learning_rate": 9.887354270197634e-07, |
|
"loss": 0.2557, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.19435215946843853, |
|
"grad_norm": 13.92139359787026, |
|
"learning_rate": 9.884495530322548e-07, |
|
"loss": 0.2024, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.19601328903654486, |
|
"grad_norm": 11.321726941564314, |
|
"learning_rate": 9.881601392548314e-07, |
|
"loss": 0.1411, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.19767441860465115, |
|
"grad_norm": 13.326684770702352, |
|
"learning_rate": 9.878671877848728e-07, |
|
"loss": 0.1813, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.19933554817275748, |
|
"grad_norm": 19.563367999650673, |
|
"learning_rate": 9.875707007453957e-07, |
|
"loss": 0.2395, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2009966777408638, |
|
"grad_norm": 19.412272467279955, |
|
"learning_rate": 9.872706802850395e-07, |
|
"loss": 0.1867, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.2026578073089701, |
|
"grad_norm": 38.85770250068695, |
|
"learning_rate": 9.869671285780498e-07, |
|
"loss": 0.213, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.20431893687707642, |
|
"grad_norm": 12.921432457309935, |
|
"learning_rate": 9.866600478242635e-07, |
|
"loss": 0.1208, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2059800664451827, |
|
"grad_norm": 23.926163027310174, |
|
"learning_rate": 9.863494402490922e-07, |
|
"loss": 0.2012, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.20764119601328904, |
|
"grad_norm": 15.480133857657162, |
|
"learning_rate": 9.860353081035065e-07, |
|
"loss": 0.1231, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.20930232558139536, |
|
"grad_norm": 22.15508037126096, |
|
"learning_rate": 9.857176536640195e-07, |
|
"loss": 0.2013, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.21096345514950166, |
|
"grad_norm": 18.2661529166959, |
|
"learning_rate": 9.853964792326704e-07, |
|
"loss": 0.2317, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.21262458471760798, |
|
"grad_norm": 16.111552087003037, |
|
"learning_rate": 9.850717871370073e-07, |
|
"loss": 0.1145, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.21428571428571427, |
|
"grad_norm": 17.14067866125475, |
|
"learning_rate": 9.847435797300718e-07, |
|
"loss": 0.2102, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.2159468438538206, |
|
"grad_norm": 15.60291480405653, |
|
"learning_rate": 9.844118593903797e-07, |
|
"loss": 0.1035, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.21760797342192692, |
|
"grad_norm": 15.174859703173738, |
|
"learning_rate": 9.840766285219059e-07, |
|
"loss": 0.1183, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.21926910299003322, |
|
"grad_norm": 25.54546397866724, |
|
"learning_rate": 9.837378895540655e-07, |
|
"loss": 0.2647, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.22093023255813954, |
|
"grad_norm": 10.70010328252097, |
|
"learning_rate": 9.833956449416976e-07, |
|
"loss": 0.1388, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.22259136212624583, |
|
"grad_norm": 13.281620643715373, |
|
"learning_rate": 9.830498971650454e-07, |
|
"loss": 0.1973, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.22425249169435216, |
|
"grad_norm": 15.696504177542776, |
|
"learning_rate": 9.827006487297406e-07, |
|
"loss": 0.2341, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.22591362126245848, |
|
"grad_norm": 11.52200533124044, |
|
"learning_rate": 9.823479021667838e-07, |
|
"loss": 0.1317, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.22757475083056478, |
|
"grad_norm": 27.726328978283576, |
|
"learning_rate": 9.819916600325262e-07, |
|
"loss": 0.354, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2292358803986711, |
|
"grad_norm": 17.901230984984156, |
|
"learning_rate": 9.816319249086519e-07, |
|
"loss": 0.2298, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.23089700996677742, |
|
"grad_norm": 14.384698945178693, |
|
"learning_rate": 9.812686994021582e-07, |
|
"loss": 0.1523, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.23255813953488372, |
|
"grad_norm": 11.621939766839967, |
|
"learning_rate": 9.809019861453373e-07, |
|
"loss": 0.2313, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.23421926910299004, |
|
"grad_norm": 12.969245765067623, |
|
"learning_rate": 9.805317877957576e-07, |
|
"loss": 0.2519, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.23588039867109634, |
|
"grad_norm": 8.925285164861304, |
|
"learning_rate": 9.80158107036243e-07, |
|
"loss": 0.1042, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.23754152823920266, |
|
"grad_norm": 13.756628040019768, |
|
"learning_rate": 9.797809465748553e-07, |
|
"loss": 0.1994, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.23920265780730898, |
|
"grad_norm": 15.970803014352063, |
|
"learning_rate": 9.794003091448728e-07, |
|
"loss": 0.22, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.24086378737541528, |
|
"grad_norm": 11.738958531546247, |
|
"learning_rate": 9.790161975047724e-07, |
|
"loss": 0.1279, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2425249169435216, |
|
"grad_norm": 13.897850888275721, |
|
"learning_rate": 9.786286144382077e-07, |
|
"loss": 0.1566, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.2441860465116279, |
|
"grad_norm": 17.313264421559992, |
|
"learning_rate": 9.7823756275399e-07, |
|
"loss": 0.225, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.24584717607973422, |
|
"grad_norm": 23.335482929522485, |
|
"learning_rate": 9.77843045286068e-07, |
|
"loss": 0.2193, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.24750830564784054, |
|
"grad_norm": 13.474282765831358, |
|
"learning_rate": 9.774450648935062e-07, |
|
"loss": 0.1841, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.24916943521594684, |
|
"grad_norm": 9.992948490443583, |
|
"learning_rate": 9.77043624460465e-07, |
|
"loss": 0.1319, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.25083056478405313, |
|
"grad_norm": 12.069688114769054, |
|
"learning_rate": 9.766387268961807e-07, |
|
"loss": 0.2002, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.25249169435215946, |
|
"grad_norm": 19.552574894536637, |
|
"learning_rate": 9.762303751349421e-07, |
|
"loss": 0.3202, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2541528239202658, |
|
"grad_norm": 15.59457607811674, |
|
"learning_rate": 9.758185721360713e-07, |
|
"loss": 0.134, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.2558139534883721, |
|
"grad_norm": 14.504175942466917, |
|
"learning_rate": 9.754033208839009e-07, |
|
"loss": 0.1177, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.2574750830564784, |
|
"grad_norm": 14.826282805181464, |
|
"learning_rate": 9.749846243877538e-07, |
|
"loss": 0.1866, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2591362126245847, |
|
"grad_norm": 10.496856702175336, |
|
"learning_rate": 9.745624856819197e-07, |
|
"loss": 0.1535, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.260797342192691, |
|
"grad_norm": 17.600209366012557, |
|
"learning_rate": 9.741369078256344e-07, |
|
"loss": 0.1506, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.26245847176079734, |
|
"grad_norm": 16.897725025587278, |
|
"learning_rate": 9.737078939030574e-07, |
|
"loss": 0.1118, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.26411960132890366, |
|
"grad_norm": 14.824856178621472, |
|
"learning_rate": 9.73275447023249e-07, |
|
"loss": 0.1801, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.26578073089701, |
|
"grad_norm": 10.323291152162106, |
|
"learning_rate": 9.728395703201482e-07, |
|
"loss": 0.1151, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.26744186046511625, |
|
"grad_norm": 25.65889033205719, |
|
"learning_rate": 9.724002669525494e-07, |
|
"loss": 0.2601, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.2691029900332226, |
|
"grad_norm": 15.331455485719754, |
|
"learning_rate": 9.719575401040814e-07, |
|
"loss": 0.2295, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.2707641196013289, |
|
"grad_norm": 15.620975269391694, |
|
"learning_rate": 9.715113929831816e-07, |
|
"loss": 0.1661, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.2724252491694352, |
|
"grad_norm": 21.27647184161525, |
|
"learning_rate": 9.710618288230743e-07, |
|
"loss": 0.1653, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.27408637873754155, |
|
"grad_norm": 9.574800737047406, |
|
"learning_rate": 9.706088508817475e-07, |
|
"loss": 0.1149, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.2757475083056478, |
|
"grad_norm": 10.020677235555565, |
|
"learning_rate": 9.701524624419288e-07, |
|
"loss": 0.114, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.27740863787375414, |
|
"grad_norm": 26.463216054020872, |
|
"learning_rate": 9.696926668110612e-07, |
|
"loss": 0.2905, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.27906976744186046, |
|
"grad_norm": 9.8357213518326, |
|
"learning_rate": 9.692294673212803e-07, |
|
"loss": 0.0852, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.2807308970099668, |
|
"grad_norm": 11.574558850099985, |
|
"learning_rate": 9.687628673293887e-07, |
|
"loss": 0.2001, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.2823920265780731, |
|
"grad_norm": 20.35819109924581, |
|
"learning_rate": 9.682928702168325e-07, |
|
"loss": 0.2113, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2840531561461794, |
|
"grad_norm": 10.618810483797844, |
|
"learning_rate": 9.678194793896772e-07, |
|
"loss": 0.157, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 12.604859535960964, |
|
"learning_rate": 9.673426982785825e-07, |
|
"loss": 0.1428, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.287375415282392, |
|
"grad_norm": 14.237387799784342, |
|
"learning_rate": 9.668625303387768e-07, |
|
"loss": 0.1614, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.28903654485049834, |
|
"grad_norm": 10.68448826627167, |
|
"learning_rate": 9.663789790500332e-07, |
|
"loss": 0.1228, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.29069767441860467, |
|
"grad_norm": 14.01788767612421, |
|
"learning_rate": 9.658920479166444e-07, |
|
"loss": 0.1634, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.292358803986711, |
|
"grad_norm": 12.150319124625579, |
|
"learning_rate": 9.65401740467396e-07, |
|
"loss": 0.1816, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.29401993355481726, |
|
"grad_norm": 13.26166970387516, |
|
"learning_rate": 9.649080602555419e-07, |
|
"loss": 0.2212, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.2956810631229236, |
|
"grad_norm": 10.456475792269867, |
|
"learning_rate": 9.644110108587791e-07, |
|
"loss": 0.162, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.2973421926910299, |
|
"grad_norm": 12.01103083397816, |
|
"learning_rate": 9.6391059587922e-07, |
|
"loss": 0.1953, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.29900332225913623, |
|
"grad_norm": 10.932107597417101, |
|
"learning_rate": 9.634068189433682e-07, |
|
"loss": 0.1792, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.30066445182724255, |
|
"grad_norm": 9.912596130837807, |
|
"learning_rate": 9.628996837020907e-07, |
|
"loss": 0.171, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.3023255813953488, |
|
"grad_norm": 9.446769528764404, |
|
"learning_rate": 9.623891938305928e-07, |
|
"loss": 0.1131, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.30398671096345514, |
|
"grad_norm": 11.672641463794086, |
|
"learning_rate": 9.618753530283901e-07, |
|
"loss": 0.1384, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.30564784053156147, |
|
"grad_norm": 10.856472785858744, |
|
"learning_rate": 9.613581650192831e-07, |
|
"loss": 0.1635, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.3073089700996678, |
|
"grad_norm": 15.534434398535327, |
|
"learning_rate": 9.608376335513285e-07, |
|
"loss": 0.2019, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3089700996677741, |
|
"grad_norm": 9.808233191877529, |
|
"learning_rate": 9.60313762396814e-07, |
|
"loss": 0.0811, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.3106312292358804, |
|
"grad_norm": 11.12760822568997, |
|
"learning_rate": 9.597865553522297e-07, |
|
"loss": 0.1293, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.3122923588039867, |
|
"grad_norm": 16.610274735181868, |
|
"learning_rate": 9.592560162382403e-07, |
|
"loss": 0.1754, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.313953488372093, |
|
"grad_norm": 31.037129088244438, |
|
"learning_rate": 9.587221488996586e-07, |
|
"loss": 0.3788, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.31561461794019935, |
|
"grad_norm": 19.704030174639467, |
|
"learning_rate": 9.58184957205417e-07, |
|
"loss": 0.1908, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.31727574750830567, |
|
"grad_norm": 11.615505292621943, |
|
"learning_rate": 9.576444450485391e-07, |
|
"loss": 0.1098, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.31893687707641194, |
|
"grad_norm": 10.666388155171473, |
|
"learning_rate": 9.571006163461123e-07, |
|
"loss": 0.131, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.32059800664451826, |
|
"grad_norm": 17.15366066076218, |
|
"learning_rate": 9.565534750392585e-07, |
|
"loss": 0.2124, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.3222591362126246, |
|
"grad_norm": 12.172651785352677, |
|
"learning_rate": 9.560030250931064e-07, |
|
"loss": 0.1371, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3239202657807309, |
|
"grad_norm": 22.03825149725322, |
|
"learning_rate": 9.554492704967624e-07, |
|
"loss": 0.2334, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.32558139534883723, |
|
"grad_norm": 13.245192413042442, |
|
"learning_rate": 9.548922152632811e-07, |
|
"loss": 0.1631, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.3272425249169435, |
|
"grad_norm": 10.802335247206143, |
|
"learning_rate": 9.543318634296375e-07, |
|
"loss": 0.1568, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.3289036544850498, |
|
"grad_norm": 12.17592621693887, |
|
"learning_rate": 9.53768219056697e-07, |
|
"loss": 0.1141, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.33056478405315615, |
|
"grad_norm": 11.253478382590625, |
|
"learning_rate": 9.532012862291853e-07, |
|
"loss": 0.1163, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.33222591362126247, |
|
"grad_norm": 21.815045663911672, |
|
"learning_rate": 9.526310690556605e-07, |
|
"loss": 0.1867, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3338870431893688, |
|
"grad_norm": 18.859610289112627, |
|
"learning_rate": 9.520575716684811e-07, |
|
"loss": 0.2125, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.33554817275747506, |
|
"grad_norm": 11.205977833725816, |
|
"learning_rate": 9.514807982237785e-07, |
|
"loss": 0.1618, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.3372093023255814, |
|
"grad_norm": 11.388470403679207, |
|
"learning_rate": 9.50900752901425e-07, |
|
"loss": 0.1184, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.3388704318936877, |
|
"grad_norm": 10.075648460940602, |
|
"learning_rate": 9.503174399050043e-07, |
|
"loss": 0.1441, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.34053156146179403, |
|
"grad_norm": 12.322759834617335, |
|
"learning_rate": 9.497308634617807e-07, |
|
"loss": 0.1244, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.34219269102990035, |
|
"grad_norm": 14.94788430766923, |
|
"learning_rate": 9.491410278226692e-07, |
|
"loss": 0.2405, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.3438538205980066, |
|
"grad_norm": 12.443492266603144, |
|
"learning_rate": 9.485479372622037e-07, |
|
"loss": 0.149, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.34551495016611294, |
|
"grad_norm": 11.067400945275928, |
|
"learning_rate": 9.479515960785068e-07, |
|
"loss": 0.1404, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.34717607973421927, |
|
"grad_norm": 11.525444371950433, |
|
"learning_rate": 9.473520085932579e-07, |
|
"loss": 0.1384, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.3488372093023256, |
|
"grad_norm": 10.971783104668468, |
|
"learning_rate": 9.467491791516626e-07, |
|
"loss": 0.1349, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3504983388704319, |
|
"grad_norm": 12.635492557571897, |
|
"learning_rate": 9.461431121224214e-07, |
|
"loss": 0.1997, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.3521594684385382, |
|
"grad_norm": 19.325558806797325, |
|
"learning_rate": 9.455338118976966e-07, |
|
"loss": 0.1585, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.3538205980066445, |
|
"grad_norm": 8.353857534598658, |
|
"learning_rate": 9.449212828930822e-07, |
|
"loss": 0.1202, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.3554817275747508, |
|
"grad_norm": 10.543973821466691, |
|
"learning_rate": 9.443055295475707e-07, |
|
"loss": 0.1858, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 9.95076123718523, |
|
"learning_rate": 9.436865563235217e-07, |
|
"loss": 0.179, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.3588039867109635, |
|
"grad_norm": 13.713185473400454, |
|
"learning_rate": 9.430643677066291e-07, |
|
"loss": 0.1925, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.36046511627906974, |
|
"grad_norm": 7.513881407573629, |
|
"learning_rate": 9.424389682058886e-07, |
|
"loss": 0.1222, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.36212624584717606, |
|
"grad_norm": 11.36739084106459, |
|
"learning_rate": 9.418103623535653e-07, |
|
"loss": 0.1867, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.3637873754152824, |
|
"grad_norm": 12.043749514925038, |
|
"learning_rate": 9.41178554705161e-07, |
|
"loss": 0.1931, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.3654485049833887, |
|
"grad_norm": 6.233797219912333, |
|
"learning_rate": 9.405435498393799e-07, |
|
"loss": 0.0966, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.36710963455149503, |
|
"grad_norm": 9.594573864750178, |
|
"learning_rate": 9.399053523580976e-07, |
|
"loss": 0.1386, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.3687707641196013, |
|
"grad_norm": 11.240178079664732, |
|
"learning_rate": 9.392639668863258e-07, |
|
"loss": 0.1203, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.3704318936877076, |
|
"grad_norm": 14.503931970774168, |
|
"learning_rate": 9.3861939807218e-07, |
|
"loss": 0.1463, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.37209302325581395, |
|
"grad_norm": 10.257728339652989, |
|
"learning_rate": 9.379716505868447e-07, |
|
"loss": 0.1593, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.37375415282392027, |
|
"grad_norm": 12.14682043932465, |
|
"learning_rate": 9.373207291245411e-07, |
|
"loss": 0.1257, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3754152823920266, |
|
"grad_norm": 9.99859322996326, |
|
"learning_rate": 9.366666384024913e-07, |
|
"loss": 0.1696, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.3770764119601329, |
|
"grad_norm": 10.04625893529298, |
|
"learning_rate": 9.360093831608856e-07, |
|
"loss": 0.1625, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.3787375415282392, |
|
"grad_norm": 16.19965207561594, |
|
"learning_rate": 9.353489681628475e-07, |
|
"loss": 0.1471, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.3803986710963455, |
|
"grad_norm": 13.04710404485369, |
|
"learning_rate": 9.346853981943988e-07, |
|
"loss": 0.1499, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.38205980066445183, |
|
"grad_norm": 11.06095587308583, |
|
"learning_rate": 9.340186780644259e-07, |
|
"loss": 0.0893, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.38372093023255816, |
|
"grad_norm": 11.353482545061441, |
|
"learning_rate": 9.333488126046438e-07, |
|
"loss": 0.1214, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.3853820598006645, |
|
"grad_norm": 17.31613403125687, |
|
"learning_rate": 9.326758066695624e-07, |
|
"loss": 0.1278, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.38704318936877075, |
|
"grad_norm": 19.69366300321997, |
|
"learning_rate": 9.319996651364499e-07, |
|
"loss": 0.1722, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.38870431893687707, |
|
"grad_norm": 13.58019477734271, |
|
"learning_rate": 9.313203929052986e-07, |
|
"loss": 0.1316, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.3903654485049834, |
|
"grad_norm": 22.806246251767572, |
|
"learning_rate": 9.306379948987888e-07, |
|
"loss": 0.2574, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.3920265780730897, |
|
"grad_norm": 16.293534036256254, |
|
"learning_rate": 9.299524760622533e-07, |
|
"loss": 0.1146, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.39368770764119604, |
|
"grad_norm": 9.215117865221517, |
|
"learning_rate": 9.292638413636414e-07, |
|
"loss": 0.0652, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.3953488372093023, |
|
"grad_norm": 19.243051206888683, |
|
"learning_rate": 9.285720957934831e-07, |
|
"loss": 0.2231, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.39700996677740863, |
|
"grad_norm": 11.51135738383444, |
|
"learning_rate": 9.278772443648531e-07, |
|
"loss": 0.1822, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.39867109634551495, |
|
"grad_norm": 12.885715889145898, |
|
"learning_rate": 9.271792921133337e-07, |
|
"loss": 0.1281, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4003322259136213, |
|
"grad_norm": 12.475318988797966, |
|
"learning_rate": 9.264782440969793e-07, |
|
"loss": 0.1822, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.4019933554817276, |
|
"grad_norm": 9.33400580821295, |
|
"learning_rate": 9.257741053962794e-07, |
|
"loss": 0.1347, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.40365448504983387, |
|
"grad_norm": 12.058606856026875, |
|
"learning_rate": 9.25066881114121e-07, |
|
"loss": 0.1706, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.4053156146179402, |
|
"grad_norm": 10.824161359526528, |
|
"learning_rate": 9.243565763757529e-07, |
|
"loss": 0.1761, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.4069767441860465, |
|
"grad_norm": 14.581271946544478, |
|
"learning_rate": 9.236431963287477e-07, |
|
"loss": 0.2583, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.40863787375415284, |
|
"grad_norm": 6.398803338643679, |
|
"learning_rate": 9.229267461429647e-07, |
|
"loss": 0.1036, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.41029900332225916, |
|
"grad_norm": 8.214273717131517, |
|
"learning_rate": 9.222072310105126e-07, |
|
"loss": 0.151, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.4119601328903654, |
|
"grad_norm": 6.531248071881361, |
|
"learning_rate": 9.214846561457117e-07, |
|
"loss": 0.1343, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.41362126245847175, |
|
"grad_norm": 7.946424836117755, |
|
"learning_rate": 9.207590267850562e-07, |
|
"loss": 0.1339, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.4152823920265781, |
|
"grad_norm": 14.683229666391957, |
|
"learning_rate": 9.200303481871758e-07, |
|
"loss": 0.2346, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4169435215946844, |
|
"grad_norm": 6.7523422282754595, |
|
"learning_rate": 9.192986256327989e-07, |
|
"loss": 0.1082, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.4186046511627907, |
|
"grad_norm": 12.807771031205872, |
|
"learning_rate": 9.185638644247122e-07, |
|
"loss": 0.172, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.420265780730897, |
|
"grad_norm": 12.441325438265876, |
|
"learning_rate": 9.178260698877247e-07, |
|
"loss": 0.1524, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.4219269102990033, |
|
"grad_norm": 11.924760374595467, |
|
"learning_rate": 9.170852473686272e-07, |
|
"loss": 0.145, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.42358803986710963, |
|
"grad_norm": 15.295324115541575, |
|
"learning_rate": 9.163414022361542e-07, |
|
"loss": 0.2366, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.42524916943521596, |
|
"grad_norm": 11.45834430193356, |
|
"learning_rate": 9.155945398809457e-07, |
|
"loss": 0.1714, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.4269102990033223, |
|
"grad_norm": 11.860210597995017, |
|
"learning_rate": 9.148446657155069e-07, |
|
"loss": 0.1581, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 14.653302275353555, |
|
"learning_rate": 9.140917851741696e-07, |
|
"loss": 0.1782, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.43023255813953487, |
|
"grad_norm": 11.340233977827461, |
|
"learning_rate": 9.13335903713053e-07, |
|
"loss": 0.135, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.4318936877076412, |
|
"grad_norm": 12.33215015955027, |
|
"learning_rate": 9.125770268100241e-07, |
|
"loss": 0.1755, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4335548172757475, |
|
"grad_norm": 10.82579948342303, |
|
"learning_rate": 9.118151599646573e-07, |
|
"loss": 0.1775, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.43521594684385384, |
|
"grad_norm": 8.893649702916877, |
|
"learning_rate": 9.110503086981955e-07, |
|
"loss": 0.134, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.4368770764119601, |
|
"grad_norm": 11.851382147068934, |
|
"learning_rate": 9.102824785535096e-07, |
|
"loss": 0.248, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.43853820598006643, |
|
"grad_norm": 8.085625713042226, |
|
"learning_rate": 9.095116750950583e-07, |
|
"loss": 0.1053, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.44019933554817275, |
|
"grad_norm": 8.825530836149932, |
|
"learning_rate": 9.087379039088481e-07, |
|
"loss": 0.1699, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.4418604651162791, |
|
"grad_norm": 8.37105680491955, |
|
"learning_rate": 9.079611706023925e-07, |
|
"loss": 0.1496, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.4435215946843854, |
|
"grad_norm": 8.257776032135112, |
|
"learning_rate": 9.071814808046709e-07, |
|
"loss": 0.1492, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.44518272425249167, |
|
"grad_norm": 9.848656586818922, |
|
"learning_rate": 9.063988401660895e-07, |
|
"loss": 0.1167, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.446843853820598, |
|
"grad_norm": 13.15071986231609, |
|
"learning_rate": 9.056132543584385e-07, |
|
"loss": 0.2396, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.4485049833887043, |
|
"grad_norm": 7.4908331870411065, |
|
"learning_rate": 9.048247290748516e-07, |
|
"loss": 0.1152, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.45016611295681064, |
|
"grad_norm": 8.35585031303107, |
|
"learning_rate": 9.040332700297651e-07, |
|
"loss": 0.0845, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.45182724252491696, |
|
"grad_norm": 18.299405153632886, |
|
"learning_rate": 9.032388829588764e-07, |
|
"loss": 0.1516, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.45348837209302323, |
|
"grad_norm": 10.140601449856803, |
|
"learning_rate": 9.02441573619102e-07, |
|
"loss": 0.1274, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.45514950166112955, |
|
"grad_norm": 12.172095204640925, |
|
"learning_rate": 9.01641347788536e-07, |
|
"loss": 0.182, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.4568106312292359, |
|
"grad_norm": 9.563752668175168, |
|
"learning_rate": 9.008382112664088e-07, |
|
"loss": 0.0945, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4584717607973422, |
|
"grad_norm": 10.287796564887433, |
|
"learning_rate": 9.000321698730439e-07, |
|
"loss": 0.0976, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.4601328903654485, |
|
"grad_norm": 13.189377107182274, |
|
"learning_rate": 8.992232294498169e-07, |
|
"loss": 0.1124, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.46179401993355484, |
|
"grad_norm": 25.060289240642998, |
|
"learning_rate": 8.984113958591124e-07, |
|
"loss": 0.1806, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.4634551495016611, |
|
"grad_norm": 14.670517988936247, |
|
"learning_rate": 8.975966749842816e-07, |
|
"loss": 0.1432, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.46511627906976744, |
|
"grad_norm": 19.822372119676242, |
|
"learning_rate": 8.967790727296001e-07, |
|
"loss": 0.2261, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.46677740863787376, |
|
"grad_norm": 5.631680487351567, |
|
"learning_rate": 8.959585950202248e-07, |
|
"loss": 0.0537, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.4684385382059801, |
|
"grad_norm": 21.281978024222216, |
|
"learning_rate": 8.95135247802151e-07, |
|
"loss": 0.2133, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.4700996677740864, |
|
"grad_norm": 16.409996226272277, |
|
"learning_rate": 8.943090370421691e-07, |
|
"loss": 0.1548, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.4717607973421927, |
|
"grad_norm": 10.142872282405547, |
|
"learning_rate": 8.934799687278219e-07, |
|
"loss": 0.1067, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.473421926910299, |
|
"grad_norm": 22.26876142574316, |
|
"learning_rate": 8.926480488673605e-07, |
|
"loss": 0.1667, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.4750830564784053, |
|
"grad_norm": 9.876108544387835, |
|
"learning_rate": 8.918132834897015e-07, |
|
"loss": 0.1081, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.47674418604651164, |
|
"grad_norm": 13.966378148098578, |
|
"learning_rate": 8.909756786443827e-07, |
|
"loss": 0.1993, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.47840531561461797, |
|
"grad_norm": 14.484542043786044, |
|
"learning_rate": 8.901352404015194e-07, |
|
"loss": 0.1349, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.48006644518272423, |
|
"grad_norm": 7.865469366519635, |
|
"learning_rate": 8.89291974851761e-07, |
|
"loss": 0.0748, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.48172757475083056, |
|
"grad_norm": 12.981698322123203, |
|
"learning_rate": 8.884458881062457e-07, |
|
"loss": 0.1387, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4833887043189369, |
|
"grad_norm": 14.42068589553622, |
|
"learning_rate": 8.875969862965574e-07, |
|
"loss": 0.1887, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.4850498338870432, |
|
"grad_norm": 19.32168616528694, |
|
"learning_rate": 8.867452755746805e-07, |
|
"loss": 0.1184, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.4867109634551495, |
|
"grad_norm": 14.687720171684697, |
|
"learning_rate": 8.858907621129559e-07, |
|
"loss": 0.1596, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.4883720930232558, |
|
"grad_norm": 10.32826671997461, |
|
"learning_rate": 8.850334521040352e-07, |
|
"loss": 0.1432, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.4900332225913621, |
|
"grad_norm": 9.149170000100604, |
|
"learning_rate": 8.841733517608374e-07, |
|
"loss": 0.1725, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.49169435215946844, |
|
"grad_norm": 7.682038024526175, |
|
"learning_rate": 8.833104673165024e-07, |
|
"loss": 0.1473, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.49335548172757476, |
|
"grad_norm": 7.36387604217277, |
|
"learning_rate": 8.824448050243469e-07, |
|
"loss": 0.1065, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.4950166112956811, |
|
"grad_norm": 10.351711544120024, |
|
"learning_rate": 8.815763711578183e-07, |
|
"loss": 0.1717, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.49667774086378735, |
|
"grad_norm": 10.17527289922022, |
|
"learning_rate": 8.8070517201045e-07, |
|
"loss": 0.1498, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.4983388704318937, |
|
"grad_norm": 17.205640014020535, |
|
"learning_rate": 8.798312138958146e-07, |
|
"loss": 0.1562, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 13.741848852332474, |
|
"learning_rate": 8.789545031474799e-07, |
|
"loss": 0.1875, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.5016611295681063, |
|
"grad_norm": 7.794673805015374, |
|
"learning_rate": 8.780750461189612e-07, |
|
"loss": 0.1141, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.5033222591362126, |
|
"grad_norm": 11.818808426001134, |
|
"learning_rate": 8.771928491836764e-07, |
|
"loss": 0.1633, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.5049833887043189, |
|
"grad_norm": 7.761679819092504, |
|
"learning_rate": 8.763079187348999e-07, |
|
"loss": 0.1248, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.5066445182724253, |
|
"grad_norm": 8.882760685506408, |
|
"learning_rate": 8.754202611857149e-07, |
|
"loss": 0.1513, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5083056478405316, |
|
"grad_norm": 7.40135368569042, |
|
"learning_rate": 8.745298829689686e-07, |
|
"loss": 0.0891, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.5099667774086378, |
|
"grad_norm": 10.925689760300747, |
|
"learning_rate": 8.736367905372246e-07, |
|
"loss": 0.1939, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.5116279069767442, |
|
"grad_norm": 11.424412238662494, |
|
"learning_rate": 8.727409903627165e-07, |
|
"loss": 0.1181, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.5132890365448505, |
|
"grad_norm": 11.026582956807562, |
|
"learning_rate": 8.71842488937301e-07, |
|
"loss": 0.1892, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.5149501661129569, |
|
"grad_norm": 14.452286426871511, |
|
"learning_rate": 8.709412927724103e-07, |
|
"loss": 0.1648, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5166112956810631, |
|
"grad_norm": 8.879574876643009, |
|
"learning_rate": 8.700374083990057e-07, |
|
"loss": 0.1412, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.5182724252491694, |
|
"grad_norm": 14.483071618844798, |
|
"learning_rate": 8.691308423675299e-07, |
|
"loss": 0.2708, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.5199335548172758, |
|
"grad_norm": 10.234610377564792, |
|
"learning_rate": 8.682216012478596e-07, |
|
"loss": 0.1516, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.521594684385382, |
|
"grad_norm": 17.446807434470152, |
|
"learning_rate": 8.673096916292576e-07, |
|
"loss": 0.1629, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.5232558139534884, |
|
"grad_norm": 9.65924900517999, |
|
"learning_rate": 8.663951201203254e-07, |
|
"loss": 0.1413, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.5249169435215947, |
|
"grad_norm": 9.522606938597889, |
|
"learning_rate": 8.654778933489556e-07, |
|
"loss": 0.1678, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.526578073089701, |
|
"grad_norm": 10.287315185571302, |
|
"learning_rate": 8.645580179622828e-07, |
|
"loss": 0.1753, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.5282392026578073, |
|
"grad_norm": 9.999270763772957, |
|
"learning_rate": 8.636355006266365e-07, |
|
"loss": 0.1578, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.5299003322259136, |
|
"grad_norm": 9.32239950078714, |
|
"learning_rate": 8.627103480274921e-07, |
|
"loss": 0.1659, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.53156146179402, |
|
"grad_norm": 6.923158524420186, |
|
"learning_rate": 8.617825668694232e-07, |
|
"loss": 0.1233, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5332225913621262, |
|
"grad_norm": 10.823618378270748, |
|
"learning_rate": 8.60852163876052e-07, |
|
"loss": 0.1538, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.5348837209302325, |
|
"grad_norm": 11.047906207579194, |
|
"learning_rate": 8.599191457900016e-07, |
|
"loss": 0.1547, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.5365448504983389, |
|
"grad_norm": 13.05735973202964, |
|
"learning_rate": 8.589835193728463e-07, |
|
"loss": 0.1444, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.5382059800664452, |
|
"grad_norm": 9.104429009432321, |
|
"learning_rate": 8.580452914050631e-07, |
|
"loss": 0.1255, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.5398671096345515, |
|
"grad_norm": 11.64252163137442, |
|
"learning_rate": 8.571044686859825e-07, |
|
"loss": 0.1912, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5415282392026578, |
|
"grad_norm": 14.944866982774602, |
|
"learning_rate": 8.561610580337391e-07, |
|
"loss": 0.1768, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.5431893687707641, |
|
"grad_norm": 9.488326230185102, |
|
"learning_rate": 8.55215066285222e-07, |
|
"loss": 0.1118, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.5448504983388704, |
|
"grad_norm": 9.85897825770774, |
|
"learning_rate": 8.542665002960257e-07, |
|
"loss": 0.1025, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.5465116279069767, |
|
"grad_norm": 8.684946844670787, |
|
"learning_rate": 8.533153669404001e-07, |
|
"loss": 0.1264, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.5481727574750831, |
|
"grad_norm": 10.062901574236582, |
|
"learning_rate": 8.523616731112011e-07, |
|
"loss": 0.1723, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5498338870431894, |
|
"grad_norm": 12.11357006155644, |
|
"learning_rate": 8.514054257198398e-07, |
|
"loss": 0.1531, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.5514950166112956, |
|
"grad_norm": 11.839908028521032, |
|
"learning_rate": 8.504466316962336e-07, |
|
"loss": 0.1442, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.553156146179402, |
|
"grad_norm": 11.844766776417758, |
|
"learning_rate": 8.494852979887544e-07, |
|
"loss": 0.1071, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.5548172757475083, |
|
"grad_norm": 10.271319518938574, |
|
"learning_rate": 8.4852143156418e-07, |
|
"loss": 0.149, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.5564784053156147, |
|
"grad_norm": 11.779914075239326, |
|
"learning_rate": 8.475550394076426e-07, |
|
"loss": 0.1389, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.5581395348837209, |
|
"grad_norm": 10.435527692770954, |
|
"learning_rate": 8.465861285225781e-07, |
|
"loss": 0.149, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.5598006644518272, |
|
"grad_norm": 9.38848130124771, |
|
"learning_rate": 8.456147059306757e-07, |
|
"loss": 0.0886, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.5614617940199336, |
|
"grad_norm": 10.191781455117614, |
|
"learning_rate": 8.446407786718273e-07, |
|
"loss": 0.1092, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.5631229235880398, |
|
"grad_norm": 10.76683247123338, |
|
"learning_rate": 8.436643538040753e-07, |
|
"loss": 0.1363, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.5647840531561462, |
|
"grad_norm": 10.294295935493142, |
|
"learning_rate": 8.426854384035631e-07, |
|
"loss": 0.0882, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5664451827242525, |
|
"grad_norm": 16.910697465451545, |
|
"learning_rate": 8.417040395644825e-07, |
|
"loss": 0.1969, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.5681063122923588, |
|
"grad_norm": 15.166734734046708, |
|
"learning_rate": 8.40720164399023e-07, |
|
"loss": 0.1724, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.5697674418604651, |
|
"grad_norm": 11.95776400356682, |
|
"learning_rate": 8.397338200373194e-07, |
|
"loss": 0.1101, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 14.389186956170414, |
|
"learning_rate": 8.387450136274017e-07, |
|
"loss": 0.1589, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.5730897009966778, |
|
"grad_norm": 14.678898341131756, |
|
"learning_rate": 8.377537523351417e-07, |
|
"loss": 0.1563, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.574750830564784, |
|
"grad_norm": 10.867669749295963, |
|
"learning_rate": 8.367600433442018e-07, |
|
"loss": 0.1036, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.5764119601328903, |
|
"grad_norm": 20.043379096999764, |
|
"learning_rate": 8.35763893855983e-07, |
|
"loss": 0.2066, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.5780730897009967, |
|
"grad_norm": 13.690312486819662, |
|
"learning_rate": 8.347653110895725e-07, |
|
"loss": 0.156, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.579734219269103, |
|
"grad_norm": 10.391292183316825, |
|
"learning_rate": 8.337643022816914e-07, |
|
"loss": 0.1022, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.5813953488372093, |
|
"grad_norm": 9.001405280579482, |
|
"learning_rate": 8.327608746866423e-07, |
|
"loss": 0.101, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5830564784053156, |
|
"grad_norm": 8.24123642090196, |
|
"learning_rate": 8.31755035576257e-07, |
|
"loss": 0.0964, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.584717607973422, |
|
"grad_norm": 9.492222500148317, |
|
"learning_rate": 8.307467922398432e-07, |
|
"loss": 0.1317, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.5863787375415282, |
|
"grad_norm": 15.062860420034843, |
|
"learning_rate": 8.297361519841318e-07, |
|
"loss": 0.2075, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.5880398671096345, |
|
"grad_norm": 14.016489579298407, |
|
"learning_rate": 8.28723122133225e-07, |
|
"loss": 0.2038, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.5897009966777409, |
|
"grad_norm": 10.966278256892918, |
|
"learning_rate": 8.277077100285412e-07, |
|
"loss": 0.1182, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.5913621262458472, |
|
"grad_norm": 10.72462410494972, |
|
"learning_rate": 8.266899230287642e-07, |
|
"loss": 0.1052, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.5930232558139535, |
|
"grad_norm": 10.814041170004836, |
|
"learning_rate": 8.256697685097877e-07, |
|
"loss": 0.0989, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.5946843853820598, |
|
"grad_norm": 15.163523649534604, |
|
"learning_rate": 8.246472538646634e-07, |
|
"loss": 0.1417, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.5963455149501661, |
|
"grad_norm": 11.111867453534417, |
|
"learning_rate": 8.236223865035465e-07, |
|
"loss": 0.1706, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.5980066445182725, |
|
"grad_norm": 10.54296261815159, |
|
"learning_rate": 8.225951738536423e-07, |
|
"loss": 0.1287, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5996677740863787, |
|
"grad_norm": 8.36851786714232, |
|
"learning_rate": 8.215656233591524e-07, |
|
"loss": 0.1091, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.6013289036544851, |
|
"grad_norm": 14.223204073643483, |
|
"learning_rate": 8.205337424812208e-07, |
|
"loss": 0.1974, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.6029900332225914, |
|
"grad_norm": 9.381500918095396, |
|
"learning_rate": 8.194995386978803e-07, |
|
"loss": 0.1167, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.6046511627906976, |
|
"grad_norm": 13.697438543636968, |
|
"learning_rate": 8.184630195039965e-07, |
|
"loss": 0.1341, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.606312292358804, |
|
"grad_norm": 11.51516985079088, |
|
"learning_rate": 8.17424192411216e-07, |
|
"loss": 0.1389, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6079734219269103, |
|
"grad_norm": 17.961936532222403, |
|
"learning_rate": 8.163830649479101e-07, |
|
"loss": 0.2059, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.6096345514950167, |
|
"grad_norm": 11.255484209831073, |
|
"learning_rate": 8.15339644659121e-07, |
|
"loss": 0.1636, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.6112956810631229, |
|
"grad_norm": 14.400184086382511, |
|
"learning_rate": 8.14293939106507e-07, |
|
"loss": 0.2286, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.6129568106312292, |
|
"grad_norm": 11.730011163552305, |
|
"learning_rate": 8.132459558682878e-07, |
|
"loss": 0.1594, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.6146179401993356, |
|
"grad_norm": 10.322545416211497, |
|
"learning_rate": 8.121957025391891e-07, |
|
"loss": 0.1497, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6162790697674418, |
|
"grad_norm": 10.109902353400317, |
|
"learning_rate": 8.111431867303884e-07, |
|
"loss": 0.1422, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.6179401993355482, |
|
"grad_norm": 8.097191708903397, |
|
"learning_rate": 8.10088416069459e-07, |
|
"loss": 0.0915, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.6196013289036545, |
|
"grad_norm": 15.757926168628456, |
|
"learning_rate": 8.090313982003155e-07, |
|
"loss": 0.2464, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.6212624584717608, |
|
"grad_norm": 12.285328658201744, |
|
"learning_rate": 8.079721407831574e-07, |
|
"loss": 0.1759, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.6229235880398671, |
|
"grad_norm": 9.06127827692409, |
|
"learning_rate": 8.06910651494415e-07, |
|
"loss": 0.1211, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6245847176079734, |
|
"grad_norm": 6.734374794139948, |
|
"learning_rate": 8.058469380266921e-07, |
|
"loss": 0.11, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.6262458471760798, |
|
"grad_norm": 9.929774756393966, |
|
"learning_rate": 8.047810080887116e-07, |
|
"loss": 0.146, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.627906976744186, |
|
"grad_norm": 16.089957357745977, |
|
"learning_rate": 8.037128694052588e-07, |
|
"loss": 0.2195, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.6295681063122923, |
|
"grad_norm": 15.96907916727788, |
|
"learning_rate": 8.026425297171266e-07, |
|
"loss": 0.1866, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.6312292358803987, |
|
"grad_norm": 10.24243255900198, |
|
"learning_rate": 8.015699967810576e-07, |
|
"loss": 0.1659, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.632890365448505, |
|
"grad_norm": 8.298998658122965, |
|
"learning_rate": 8.004952783696891e-07, |
|
"loss": 0.1212, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.6345514950166113, |
|
"grad_norm": 6.94281041135004, |
|
"learning_rate": 7.994183822714968e-07, |
|
"loss": 0.0888, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.6362126245847176, |
|
"grad_norm": 10.305910457713361, |
|
"learning_rate": 7.983393162907379e-07, |
|
"loss": 0.1903, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.6378737541528239, |
|
"grad_norm": 7.464775960157331, |
|
"learning_rate": 7.972580882473946e-07, |
|
"loss": 0.097, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.6395348837209303, |
|
"grad_norm": 7.4746993771307055, |
|
"learning_rate": 7.961747059771179e-07, |
|
"loss": 0.1109, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.6411960132890365, |
|
"grad_norm": 6.615341400751429, |
|
"learning_rate": 7.950891773311701e-07, |
|
"loss": 0.0779, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.6428571428571429, |
|
"grad_norm": 17.60157058625115, |
|
"learning_rate": 7.940015101763684e-07, |
|
"loss": 0.2216, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.6445182724252492, |
|
"grad_norm": 11.013583175188954, |
|
"learning_rate": 7.92911712395028e-07, |
|
"loss": 0.172, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.6461794019933554, |
|
"grad_norm": 8.0676059944362, |
|
"learning_rate": 7.918197918849042e-07, |
|
"loss": 0.1122, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.6478405315614618, |
|
"grad_norm": 10.8514146503401, |
|
"learning_rate": 7.907257565591362e-07, |
|
"loss": 0.082, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6495016611295681, |
|
"grad_norm": 12.550088112103355, |
|
"learning_rate": 7.896296143461889e-07, |
|
"loss": 0.1142, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.6511627906976745, |
|
"grad_norm": 12.146722322785989, |
|
"learning_rate": 7.885313731897962e-07, |
|
"loss": 0.1843, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.6528239202657807, |
|
"grad_norm": 11.011969414306924, |
|
"learning_rate": 7.874310410489027e-07, |
|
"loss": 0.1209, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.654485049833887, |
|
"grad_norm": 6.810537387951736, |
|
"learning_rate": 7.863286258976061e-07, |
|
"loss": 0.0608, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.6561461794019934, |
|
"grad_norm": 10.89167605967294, |
|
"learning_rate": 7.852241357251002e-07, |
|
"loss": 0.1189, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.6578073089700996, |
|
"grad_norm": 14.859678250628704, |
|
"learning_rate": 7.841175785356165e-07, |
|
"loss": 0.1324, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.659468438538206, |
|
"grad_norm": 14.58129989899506, |
|
"learning_rate": 7.830089623483656e-07, |
|
"loss": 0.1417, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.6611295681063123, |
|
"grad_norm": 16.34306018101847, |
|
"learning_rate": 7.818982951974798e-07, |
|
"loss": 0.1263, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.6627906976744186, |
|
"grad_norm": 9.974280701968183, |
|
"learning_rate": 7.807855851319554e-07, |
|
"loss": 0.1354, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.6644518272425249, |
|
"grad_norm": 14.425725799753549, |
|
"learning_rate": 7.796708402155925e-07, |
|
"loss": 0.1874, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6661129568106312, |
|
"grad_norm": 10.094566765127704, |
|
"learning_rate": 7.785540685269388e-07, |
|
"loss": 0.147, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.6677740863787376, |
|
"grad_norm": 15.97109522405515, |
|
"learning_rate": 7.774352781592295e-07, |
|
"loss": 0.1826, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.6694352159468439, |
|
"grad_norm": 13.187995357816002, |
|
"learning_rate": 7.763144772203291e-07, |
|
"loss": 0.1317, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.6710963455149501, |
|
"grad_norm": 15.41977600347847, |
|
"learning_rate": 7.751916738326732e-07, |
|
"loss": 0.1712, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.6727574750830565, |
|
"grad_norm": 6.680838078645473, |
|
"learning_rate": 7.740668761332084e-07, |
|
"loss": 0.0829, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.6744186046511628, |
|
"grad_norm": 16.868662883815464, |
|
"learning_rate": 7.729400922733345e-07, |
|
"loss": 0.1237, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.6760797342192691, |
|
"grad_norm": 10.871555946818436, |
|
"learning_rate": 7.71811330418845e-07, |
|
"loss": 0.1275, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.6777408637873754, |
|
"grad_norm": 14.978834275251021, |
|
"learning_rate": 7.706805987498677e-07, |
|
"loss": 0.1144, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.6794019933554817, |
|
"grad_norm": 9.534606751566304, |
|
"learning_rate": 7.69547905460806e-07, |
|
"loss": 0.1177, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.6810631229235881, |
|
"grad_norm": 9.950511975602048, |
|
"learning_rate": 7.684132587602786e-07, |
|
"loss": 0.1758, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6827242524916943, |
|
"grad_norm": 10.042396922065393, |
|
"learning_rate": 7.67276666871061e-07, |
|
"loss": 0.1446, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.6843853820598007, |
|
"grad_norm": 10.048910173755388, |
|
"learning_rate": 7.661381380300253e-07, |
|
"loss": 0.163, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.686046511627907, |
|
"grad_norm": 9.33358937868672, |
|
"learning_rate": 7.649976804880809e-07, |
|
"loss": 0.1048, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.6877076411960132, |
|
"grad_norm": 10.583010872327236, |
|
"learning_rate": 7.63855302510114e-07, |
|
"loss": 0.1344, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.6893687707641196, |
|
"grad_norm": 12.165380086835881, |
|
"learning_rate": 7.627110123749285e-07, |
|
"loss": 0.1494, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.6910299003322259, |
|
"grad_norm": 10.99633419748417, |
|
"learning_rate": 7.615648183751857e-07, |
|
"loss": 0.1329, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.6926910299003323, |
|
"grad_norm": 11.667588050351162, |
|
"learning_rate": 7.60416728817344e-07, |
|
"loss": 0.153, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.6943521594684385, |
|
"grad_norm": 8.985742528680719, |
|
"learning_rate": 7.592667520215994e-07, |
|
"loss": 0.1267, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.6960132890365448, |
|
"grad_norm": 9.25393580859672, |
|
"learning_rate": 7.581148963218241e-07, |
|
"loss": 0.1382, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.6976744186046512, |
|
"grad_norm": 8.89447816694309, |
|
"learning_rate": 7.569611700655068e-07, |
|
"loss": 0.1189, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6993355481727574, |
|
"grad_norm": 11.740985525599102, |
|
"learning_rate": 7.558055816136924e-07, |
|
"loss": 0.1677, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.7009966777408638, |
|
"grad_norm": 7.905842682651146, |
|
"learning_rate": 7.546481393409209e-07, |
|
"loss": 0.098, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.7026578073089701, |
|
"grad_norm": 15.390816785264915, |
|
"learning_rate": 7.53488851635167e-07, |
|
"loss": 0.1973, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.7043189368770764, |
|
"grad_norm": 10.964069409142427, |
|
"learning_rate": 7.523277268977792e-07, |
|
"loss": 0.1268, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.7059800664451827, |
|
"grad_norm": 11.34599369651872, |
|
"learning_rate": 7.51164773543419e-07, |
|
"loss": 0.1469, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.707641196013289, |
|
"grad_norm": 9.205102192725148, |
|
"learning_rate": 7.5e-07, |
|
"loss": 0.1199, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.7093023255813954, |
|
"grad_norm": 8.670727684942934, |
|
"learning_rate": 7.488334147086263e-07, |
|
"loss": 0.1012, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.7109634551495017, |
|
"grad_norm": 10.11416734093539, |
|
"learning_rate": 7.476650261235318e-07, |
|
"loss": 0.1354, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.7126245847176079, |
|
"grad_norm": 6.003475116109574, |
|
"learning_rate": 7.464948427120197e-07, |
|
"loss": 0.0826, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 11.188990918400444, |
|
"learning_rate": 7.453228729543988e-07, |
|
"loss": 0.1512, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7159468438538206, |
|
"grad_norm": 19.722212570905874, |
|
"learning_rate": 7.441491253439249e-07, |
|
"loss": 0.0985, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.717607973421927, |
|
"grad_norm": 9.220172905747987, |
|
"learning_rate": 7.429736083867371e-07, |
|
"loss": 0.1254, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.7192691029900332, |
|
"grad_norm": 10.857365036669531, |
|
"learning_rate": 7.417963306017972e-07, |
|
"loss": 0.1556, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.7209302325581395, |
|
"grad_norm": 9.12825370131944, |
|
"learning_rate": 7.406173005208277e-07, |
|
"loss": 0.109, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.7225913621262459, |
|
"grad_norm": 11.058921726765327, |
|
"learning_rate": 7.394365266882501e-07, |
|
"loss": 0.1443, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.7242524916943521, |
|
"grad_norm": 17.6871451943868, |
|
"learning_rate": 7.382540176611223e-07, |
|
"loss": 0.2528, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.7259136212624585, |
|
"grad_norm": 9.523326447398212, |
|
"learning_rate": 7.370697820090778e-07, |
|
"loss": 0.0873, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.7275747508305648, |
|
"grad_norm": 11.905512725983018, |
|
"learning_rate": 7.358838283142628e-07, |
|
"loss": 0.1576, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.729235880398671, |
|
"grad_norm": 13.862231389544819, |
|
"learning_rate": 7.346961651712739e-07, |
|
"loss": 0.2174, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.7308970099667774, |
|
"grad_norm": 20.52530465219098, |
|
"learning_rate": 7.335068011870962e-07, |
|
"loss": 0.2746, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7325581395348837, |
|
"grad_norm": 10.26969871122055, |
|
"learning_rate": 7.323157449810405e-07, |
|
"loss": 0.1119, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.7342192691029901, |
|
"grad_norm": 11.415857653085869, |
|
"learning_rate": 7.311230051846819e-07, |
|
"loss": 0.138, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.7358803986710963, |
|
"grad_norm": 11.650217063993916, |
|
"learning_rate": 7.299285904417955e-07, |
|
"loss": 0.1596, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.7375415282392026, |
|
"grad_norm": 9.256723737467732, |
|
"learning_rate": 7.287325094082954e-07, |
|
"loss": 0.1267, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.739202657807309, |
|
"grad_norm": 7.694604457260306, |
|
"learning_rate": 7.275347707521709e-07, |
|
"loss": 0.1038, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.7408637873754153, |
|
"grad_norm": 7.191980021296017, |
|
"learning_rate": 7.263353831534244e-07, |
|
"loss": 0.109, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.7425249169435216, |
|
"grad_norm": 10.691175165821143, |
|
"learning_rate": 7.25134355304008e-07, |
|
"loss": 0.1907, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.7441860465116279, |
|
"grad_norm": 6.947238283602248, |
|
"learning_rate": 7.239316959077607e-07, |
|
"loss": 0.0847, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.7458471760797342, |
|
"grad_norm": 13.005333968288324, |
|
"learning_rate": 7.227274136803452e-07, |
|
"loss": 0.2188, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.7475083056478405, |
|
"grad_norm": 7.289755042527808, |
|
"learning_rate": 7.215215173491849e-07, |
|
"loss": 0.1152, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7491694352159468, |
|
"grad_norm": 8.425252309131805, |
|
"learning_rate": 7.203140156534009e-07, |
|
"loss": 0.1461, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.7508305647840532, |
|
"grad_norm": 5.562081357930086, |
|
"learning_rate": 7.191049173437479e-07, |
|
"loss": 0.0852, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.7524916943521595, |
|
"grad_norm": 12.352790950896518, |
|
"learning_rate": 7.178942311825516e-07, |
|
"loss": 0.155, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.7541528239202658, |
|
"grad_norm": 9.171891575083187, |
|
"learning_rate": 7.166819659436445e-07, |
|
"loss": 0.1495, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.7558139534883721, |
|
"grad_norm": 9.41209858612358, |
|
"learning_rate": 7.15468130412303e-07, |
|
"loss": 0.1169, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.7574750830564784, |
|
"grad_norm": 10.230916740772512, |
|
"learning_rate": 7.142527333851833e-07, |
|
"loss": 0.2093, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.7591362126245847, |
|
"grad_norm": 7.965188922429285, |
|
"learning_rate": 7.130357836702577e-07, |
|
"loss": 0.114, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.760797342192691, |
|
"grad_norm": 6.820864692461988, |
|
"learning_rate": 7.118172900867508e-07, |
|
"loss": 0.1279, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.7624584717607974, |
|
"grad_norm": 9.896346871574128, |
|
"learning_rate": 7.105972614650756e-07, |
|
"loss": 0.1915, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.7641196013289037, |
|
"grad_norm": 13.402974322971838, |
|
"learning_rate": 7.093757066467696e-07, |
|
"loss": 0.1564, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7657807308970099, |
|
"grad_norm": 8.969620661819997, |
|
"learning_rate": 7.081526344844305e-07, |
|
"loss": 0.1348, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.7674418604651163, |
|
"grad_norm": 7.901573755091921, |
|
"learning_rate": 7.069280538416524e-07, |
|
"loss": 0.117, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.7691029900332226, |
|
"grad_norm": 8.676435505228378, |
|
"learning_rate": 7.05701973592961e-07, |
|
"loss": 0.1312, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.770764119601329, |
|
"grad_norm": 8.312745149516928, |
|
"learning_rate": 7.044744026237499e-07, |
|
"loss": 0.1163, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.7724252491694352, |
|
"grad_norm": 8.29093146988122, |
|
"learning_rate": 7.03245349830216e-07, |
|
"loss": 0.1253, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.7740863787375415, |
|
"grad_norm": 10.991996181440621, |
|
"learning_rate": 7.020148241192945e-07, |
|
"loss": 0.1426, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.7757475083056479, |
|
"grad_norm": 9.665978609673429, |
|
"learning_rate": 7.007828344085958e-07, |
|
"loss": 0.116, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.7774086378737541, |
|
"grad_norm": 12.673357083226641, |
|
"learning_rate": 6.995493896263385e-07, |
|
"loss": 0.1128, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.7790697674418605, |
|
"grad_norm": 16.485475962652067, |
|
"learning_rate": 6.983144987112875e-07, |
|
"loss": 0.2125, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.7807308970099668, |
|
"grad_norm": 9.751397196349588, |
|
"learning_rate": 6.970781706126864e-07, |
|
"loss": 0.1438, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.782392026578073, |
|
"grad_norm": 15.35224234314337, |
|
"learning_rate": 6.958404142901956e-07, |
|
"loss": 0.1653, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.7840531561461794, |
|
"grad_norm": 12.390405950401401, |
|
"learning_rate": 6.946012387138247e-07, |
|
"loss": 0.1534, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.7857142857142857, |
|
"grad_norm": 8.927360461817429, |
|
"learning_rate": 6.933606528638689e-07, |
|
"loss": 0.1109, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.7873754152823921, |
|
"grad_norm": 13.299787573189752, |
|
"learning_rate": 6.921186657308439e-07, |
|
"loss": 0.179, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.7890365448504983, |
|
"grad_norm": 8.06526614276188, |
|
"learning_rate": 6.9087528631542e-07, |
|
"loss": 0.1337, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.7906976744186046, |
|
"grad_norm": 8.042198670013263, |
|
"learning_rate": 6.89630523628358e-07, |
|
"loss": 0.1081, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.792358803986711, |
|
"grad_norm": 9.692237559854625, |
|
"learning_rate": 6.883843866904426e-07, |
|
"loss": 0.1177, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.7940199335548173, |
|
"grad_norm": 9.344382237225672, |
|
"learning_rate": 6.87136884532418e-07, |
|
"loss": 0.1255, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.7956810631229236, |
|
"grad_norm": 8.203203552366837, |
|
"learning_rate": 6.858880261949224e-07, |
|
"loss": 0.1308, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.7973421926910299, |
|
"grad_norm": 7.069561317398428, |
|
"learning_rate": 6.84637820728422e-07, |
|
"loss": 0.1177, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7990033222591362, |
|
"grad_norm": 12.651184018556993, |
|
"learning_rate": 6.833862771931452e-07, |
|
"loss": 0.1717, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.8006644518272426, |
|
"grad_norm": 7.53322640696838, |
|
"learning_rate": 6.82133404659018e-07, |
|
"loss": 0.132, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.8023255813953488, |
|
"grad_norm": 8.74434892318003, |
|
"learning_rate": 6.808792122055973e-07, |
|
"loss": 0.144, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.8039867109634552, |
|
"grad_norm": 10.42589485523522, |
|
"learning_rate": 6.796237089220057e-07, |
|
"loss": 0.1394, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.8056478405315615, |
|
"grad_norm": 11.906093609013176, |
|
"learning_rate": 6.783669039068652e-07, |
|
"loss": 0.1599, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.8073089700996677, |
|
"grad_norm": 11.119016103511091, |
|
"learning_rate": 6.771088062682312e-07, |
|
"loss": 0.1454, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.8089700996677741, |
|
"grad_norm": 6.861287582124284, |
|
"learning_rate": 6.758494251235274e-07, |
|
"loss": 0.0874, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.8106312292358804, |
|
"grad_norm": 22.336812147687517, |
|
"learning_rate": 6.745887695994783e-07, |
|
"loss": 0.2066, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.8122923588039868, |
|
"grad_norm": 10.480117099044136, |
|
"learning_rate": 6.733268488320442e-07, |
|
"loss": 0.1989, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.813953488372093, |
|
"grad_norm": 7.915417258802982, |
|
"learning_rate": 6.720636719663549e-07, |
|
"loss": 0.0994, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8156146179401993, |
|
"grad_norm": 6.0203241786289015, |
|
"learning_rate": 6.707992481566426e-07, |
|
"loss": 0.0882, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.8172757475083057, |
|
"grad_norm": 10.519130887832892, |
|
"learning_rate": 6.695335865661763e-07, |
|
"loss": 0.1457, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.8189368770764119, |
|
"grad_norm": 10.88189785815634, |
|
"learning_rate": 6.682666963671953e-07, |
|
"loss": 0.1381, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.8205980066445183, |
|
"grad_norm": 8.410449806885296, |
|
"learning_rate": 6.669985867408421e-07, |
|
"loss": 0.1285, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.8222591362126246, |
|
"grad_norm": 10.63153721731556, |
|
"learning_rate": 6.657292668770973e-07, |
|
"loss": 0.1344, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.8239202657807309, |
|
"grad_norm": 7.858722492263171, |
|
"learning_rate": 6.644587459747113e-07, |
|
"loss": 0.1392, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.8255813953488372, |
|
"grad_norm": 12.495414541553474, |
|
"learning_rate": 6.631870332411387e-07, |
|
"loss": 0.1249, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.8272425249169435, |
|
"grad_norm": 8.69182573599952, |
|
"learning_rate": 6.619141378924714e-07, |
|
"loss": 0.1069, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.8289036544850499, |
|
"grad_norm": 11.19610726093013, |
|
"learning_rate": 6.606400691533715e-07, |
|
"loss": 0.1561, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.8305647840531561, |
|
"grad_norm": 11.60589510557093, |
|
"learning_rate": 6.593648362570045e-07, |
|
"loss": 0.1657, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8322259136212624, |
|
"grad_norm": 8.967078348213883, |
|
"learning_rate": 6.580884484449733e-07, |
|
"loss": 0.1476, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.8338870431893688, |
|
"grad_norm": 8.046078745336114, |
|
"learning_rate": 6.568109149672496e-07, |
|
"loss": 0.1536, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.8355481727574751, |
|
"grad_norm": 10.11126212003755, |
|
"learning_rate": 6.555322450821081e-07, |
|
"loss": 0.1772, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.8372093023255814, |
|
"grad_norm": 22.00811087279718, |
|
"learning_rate": 6.542524480560588e-07, |
|
"loss": 0.196, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.8388704318936877, |
|
"grad_norm": 9.729933636134202, |
|
"learning_rate": 6.529715331637804e-07, |
|
"loss": 0.1218, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.840531561461794, |
|
"grad_norm": 20.546621479120287, |
|
"learning_rate": 6.516895096880529e-07, |
|
"loss": 0.1806, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.8421926910299004, |
|
"grad_norm": 8.302645527868407, |
|
"learning_rate": 6.504063869196897e-07, |
|
"loss": 0.1164, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.8438538205980066, |
|
"grad_norm": 18.63430523045363, |
|
"learning_rate": 6.491221741574711e-07, |
|
"loss": 0.2653, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.845514950166113, |
|
"grad_norm": 5.795031890334442, |
|
"learning_rate": 6.478368807080763e-07, |
|
"loss": 0.0734, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.8471760797342193, |
|
"grad_norm": 7.3352460848157754, |
|
"learning_rate": 6.465505158860165e-07, |
|
"loss": 0.1188, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8488372093023255, |
|
"grad_norm": 8.230664802958891, |
|
"learning_rate": 6.452630890135672e-07, |
|
"loss": 0.1376, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.8504983388704319, |
|
"grad_norm": 10.861378955756326, |
|
"learning_rate": 6.439746094207004e-07, |
|
"loss": 0.1895, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.8521594684385382, |
|
"grad_norm": 7.801899185147919, |
|
"learning_rate": 6.426850864450168e-07, |
|
"loss": 0.0992, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.8538205980066446, |
|
"grad_norm": 8.21231391824538, |
|
"learning_rate": 6.413945294316794e-07, |
|
"loss": 0.1277, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.8554817275747508, |
|
"grad_norm": 8.182824956563156, |
|
"learning_rate": 6.401029477333437e-07, |
|
"loss": 0.0903, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 8.908858082334962, |
|
"learning_rate": 6.388103507100922e-07, |
|
"loss": 0.1044, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.8588039867109635, |
|
"grad_norm": 8.858069259669536, |
|
"learning_rate": 6.375167477293648e-07, |
|
"loss": 0.143, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.8604651162790697, |
|
"grad_norm": 7.449561570396506, |
|
"learning_rate": 6.362221481658917e-07, |
|
"loss": 0.1143, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.8621262458471761, |
|
"grad_norm": 10.420657379070615, |
|
"learning_rate": 6.349265614016254e-07, |
|
"loss": 0.0923, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.8637873754152824, |
|
"grad_norm": 6.692843180236105, |
|
"learning_rate": 6.336299968256724e-07, |
|
"loss": 0.0929, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8654485049833887, |
|
"grad_norm": 13.432306584512084, |
|
"learning_rate": 6.323324638342257e-07, |
|
"loss": 0.1248, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.867109634551495, |
|
"grad_norm": 13.80099877387905, |
|
"learning_rate": 6.310339718304965e-07, |
|
"loss": 0.1533, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.8687707641196013, |
|
"grad_norm": 11.959387915414487, |
|
"learning_rate": 6.297345302246452e-07, |
|
"loss": 0.1385, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.8704318936877077, |
|
"grad_norm": 15.092125999647005, |
|
"learning_rate": 6.28434148433715e-07, |
|
"loss": 0.2109, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.872093023255814, |
|
"grad_norm": 10.776480947820303, |
|
"learning_rate": 6.271328358815618e-07, |
|
"loss": 0.171, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.8737541528239202, |
|
"grad_norm": 8.238618005353674, |
|
"learning_rate": 6.258306019987871e-07, |
|
"loss": 0.1164, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.8754152823920266, |
|
"grad_norm": 26.508653168688756, |
|
"learning_rate": 6.245274562226693e-07, |
|
"loss": 0.2546, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.8770764119601329, |
|
"grad_norm": 18.913079012630988, |
|
"learning_rate": 6.232234079970949e-07, |
|
"loss": 0.1723, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.8787375415282392, |
|
"grad_norm": 15.484934247028333, |
|
"learning_rate": 6.219184667724911e-07, |
|
"loss": 0.1934, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.8803986710963455, |
|
"grad_norm": 11.50624636520952, |
|
"learning_rate": 6.20612642005756e-07, |
|
"loss": 0.153, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8820598006644518, |
|
"grad_norm": 9.765436764418478, |
|
"learning_rate": 6.193059431601909e-07, |
|
"loss": 0.1117, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.8837209302325582, |
|
"grad_norm": 10.442516383917948, |
|
"learning_rate": 6.179983797054321e-07, |
|
"loss": 0.1138, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.8853820598006644, |
|
"grad_norm": 9.29144855908764, |
|
"learning_rate": 6.166899611173808e-07, |
|
"loss": 0.1424, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.8870431893687708, |
|
"grad_norm": 8.897928846693906, |
|
"learning_rate": 6.15380696878136e-07, |
|
"loss": 0.1231, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.8887043189368771, |
|
"grad_norm": 6.957319586157739, |
|
"learning_rate": 6.14070596475925e-07, |
|
"loss": 0.1312, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.8903654485049833, |
|
"grad_norm": 10.179421163075975, |
|
"learning_rate": 6.127596694050345e-07, |
|
"loss": 0.1678, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.8920265780730897, |
|
"grad_norm": 7.020040317670267, |
|
"learning_rate": 6.114479251657425e-07, |
|
"loss": 0.0954, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.893687707641196, |
|
"grad_norm": 8.473993860366981, |
|
"learning_rate": 6.101353732642485e-07, |
|
"loss": 0.1449, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.8953488372093024, |
|
"grad_norm": 8.652250840430034, |
|
"learning_rate": 6.088220232126055e-07, |
|
"loss": 0.1063, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.8970099667774086, |
|
"grad_norm": 9.534760389427388, |
|
"learning_rate": 6.075078845286509e-07, |
|
"loss": 0.1728, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8986710963455149, |
|
"grad_norm": 5.776374425659757, |
|
"learning_rate": 6.061929667359365e-07, |
|
"loss": 0.0742, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.9003322259136213, |
|
"grad_norm": 11.015148721611304, |
|
"learning_rate": 6.04877279363661e-07, |
|
"loss": 0.1788, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.9019933554817275, |
|
"grad_norm": 9.473756170184656, |
|
"learning_rate": 6.035608319466e-07, |
|
"loss": 0.1579, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.9036544850498339, |
|
"grad_norm": 11.179037463866795, |
|
"learning_rate": 6.02243634025037e-07, |
|
"loss": 0.1533, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.9053156146179402, |
|
"grad_norm": 7.8758415023645405, |
|
"learning_rate": 6.00925695144695e-07, |
|
"loss": 0.1146, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.9069767441860465, |
|
"grad_norm": 15.665873822759634, |
|
"learning_rate": 5.99607024856666e-07, |
|
"loss": 0.1047, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.9086378737541528, |
|
"grad_norm": 9.200391667118774, |
|
"learning_rate": 5.982876327173427e-07, |
|
"loss": 0.1272, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.9102990033222591, |
|
"grad_norm": 8.77578098706721, |
|
"learning_rate": 5.969675282883493e-07, |
|
"loss": 0.1516, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.9119601328903655, |
|
"grad_norm": 9.320250348637398, |
|
"learning_rate": 5.956467211364717e-07, |
|
"loss": 0.1387, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.9136212624584718, |
|
"grad_norm": 9.952052907950817, |
|
"learning_rate": 5.943252208335884e-07, |
|
"loss": 0.1403, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.915282392026578, |
|
"grad_norm": 5.2002513589385275, |
|
"learning_rate": 5.930030369566017e-07, |
|
"loss": 0.0565, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.9169435215946844, |
|
"grad_norm": 15.488056016488622, |
|
"learning_rate": 5.916801790873669e-07, |
|
"loss": 0.1978, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.9186046511627907, |
|
"grad_norm": 12.637431729375692, |
|
"learning_rate": 5.903566568126245e-07, |
|
"loss": 0.1326, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.920265780730897, |
|
"grad_norm": 9.074527532983787, |
|
"learning_rate": 5.890324797239294e-07, |
|
"loss": 0.1423, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.9219269102990033, |
|
"grad_norm": 9.091722076710525, |
|
"learning_rate": 5.877076574175819e-07, |
|
"loss": 0.1073, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.9235880398671097, |
|
"grad_norm": 8.454862363294326, |
|
"learning_rate": 5.86382199494559e-07, |
|
"loss": 0.0991, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.925249169435216, |
|
"grad_norm": 7.602236450189799, |
|
"learning_rate": 5.850561155604429e-07, |
|
"loss": 0.1149, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.9269102990033222, |
|
"grad_norm": 10.529531993339845, |
|
"learning_rate": 5.837294152253533e-07, |
|
"loss": 0.1796, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.9285714285714286, |
|
"grad_norm": 8.976880430709578, |
|
"learning_rate": 5.824021081038767e-07, |
|
"loss": 0.1138, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.9302325581395349, |
|
"grad_norm": 14.33254707392304, |
|
"learning_rate": 5.810742038149966e-07, |
|
"loss": 0.1308, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9318936877076412, |
|
"grad_norm": 7.258067172250153, |
|
"learning_rate": 5.79745711982025e-07, |
|
"loss": 0.09, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.9335548172757475, |
|
"grad_norm": 8.172499836134483, |
|
"learning_rate": 5.78416642232531e-07, |
|
"loss": 0.1044, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.9352159468438538, |
|
"grad_norm": 6.594794551088574, |
|
"learning_rate": 5.770870041982722e-07, |
|
"loss": 0.1254, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.9368770764119602, |
|
"grad_norm": 5.9566914133383255, |
|
"learning_rate": 5.757568075151249e-07, |
|
"loss": 0.0921, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.9385382059800664, |
|
"grad_norm": 8.378099614968635, |
|
"learning_rate": 5.744260618230133e-07, |
|
"loss": 0.1151, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.9401993355481728, |
|
"grad_norm": 7.594866350018622, |
|
"learning_rate": 5.730947767658404e-07, |
|
"loss": 0.0926, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.9418604651162791, |
|
"grad_norm": 5.975799246097571, |
|
"learning_rate": 5.717629619914185e-07, |
|
"loss": 0.0634, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.9435215946843853, |
|
"grad_norm": 7.98709361058564, |
|
"learning_rate": 5.704306271513981e-07, |
|
"loss": 0.0739, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.9451827242524917, |
|
"grad_norm": 5.107038268623648, |
|
"learning_rate": 5.69097781901199e-07, |
|
"loss": 0.0742, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.946843853820598, |
|
"grad_norm": 7.691173623087892, |
|
"learning_rate": 5.677644358999398e-07, |
|
"loss": 0.1137, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9485049833887044, |
|
"grad_norm": 15.079146650209767, |
|
"learning_rate": 5.664305988103678e-07, |
|
"loss": 0.1334, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.9501661129568106, |
|
"grad_norm": 10.468995649492486, |
|
"learning_rate": 5.6509628029879e-07, |
|
"loss": 0.0933, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.9518272425249169, |
|
"grad_norm": 12.307929483854858, |
|
"learning_rate": 5.637614900350014e-07, |
|
"loss": 0.1288, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.9534883720930233, |
|
"grad_norm": 11.854140397945446, |
|
"learning_rate": 5.624262376922162e-07, |
|
"loss": 0.1043, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.9551495016611296, |
|
"grad_norm": 10.655229778108161, |
|
"learning_rate": 5.610905329469973e-07, |
|
"loss": 0.0992, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.9568106312292359, |
|
"grad_norm": 12.54430451998764, |
|
"learning_rate": 5.597543854791856e-07, |
|
"loss": 0.187, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.9584717607973422, |
|
"grad_norm": 10.536342105186373, |
|
"learning_rate": 5.584178049718314e-07, |
|
"loss": 0.1524, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.9601328903654485, |
|
"grad_norm": 13.313735426136686, |
|
"learning_rate": 5.570808011111226e-07, |
|
"loss": 0.1978, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.9617940199335548, |
|
"grad_norm": 7.220442413364977, |
|
"learning_rate": 5.557433835863151e-07, |
|
"loss": 0.0943, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.9634551495016611, |
|
"grad_norm": 23.093365160299122, |
|
"learning_rate": 5.544055620896629e-07, |
|
"loss": 0.1533, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9651162790697675, |
|
"grad_norm": 11.087199919198286, |
|
"learning_rate": 5.530673463163471e-07, |
|
"loss": 0.1455, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.9667774086378738, |
|
"grad_norm": 10.58290498744938, |
|
"learning_rate": 5.517287459644069e-07, |
|
"loss": 0.1665, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.96843853820598, |
|
"grad_norm": 7.202020662641881, |
|
"learning_rate": 5.50389770734668e-07, |
|
"loss": 0.0956, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.9700996677740864, |
|
"grad_norm": 12.46472530295776, |
|
"learning_rate": 5.490504303306727e-07, |
|
"loss": 0.1617, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.9717607973421927, |
|
"grad_norm": 16.76493326267004, |
|
"learning_rate": 5.477107344586101e-07, |
|
"loss": 0.1507, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.973421926910299, |
|
"grad_norm": 9.237732865929152, |
|
"learning_rate": 5.463706928272453e-07, |
|
"loss": 0.1412, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.9750830564784053, |
|
"grad_norm": 12.383497236276556, |
|
"learning_rate": 5.450303151478489e-07, |
|
"loss": 0.1493, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.9767441860465116, |
|
"grad_norm": 11.151722325366409, |
|
"learning_rate": 5.43689611134127e-07, |
|
"loss": 0.1412, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.978405315614618, |
|
"grad_norm": 7.21013726279382, |
|
"learning_rate": 5.423485905021507e-07, |
|
"loss": 0.1246, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.9800664451827242, |
|
"grad_norm": 8.579739495792525, |
|
"learning_rate": 5.410072629702856e-07, |
|
"loss": 0.1234, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9817275747508306, |
|
"grad_norm": 7.602281673588693, |
|
"learning_rate": 5.396656382591213e-07, |
|
"loss": 0.1116, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.9833887043189369, |
|
"grad_norm": 14.100145982497999, |
|
"learning_rate": 5.38323726091401e-07, |
|
"loss": 0.1388, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.9850498338870431, |
|
"grad_norm": 7.492618519081229, |
|
"learning_rate": 5.369815361919511e-07, |
|
"loss": 0.0761, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.9867109634551495, |
|
"grad_norm": 6.371586229880432, |
|
"learning_rate": 5.356390782876111e-07, |
|
"loss": 0.1078, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.9883720930232558, |
|
"grad_norm": 9.371673513904842, |
|
"learning_rate": 5.342963621071623e-07, |
|
"loss": 0.1745, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.9900332225913622, |
|
"grad_norm": 5.206481672409249, |
|
"learning_rate": 5.329533973812581e-07, |
|
"loss": 0.0683, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.9916943521594684, |
|
"grad_norm": 14.826520082325914, |
|
"learning_rate": 5.316101938423524e-07, |
|
"loss": 0.1577, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.9933554817275747, |
|
"grad_norm": 10.961715205667316, |
|
"learning_rate": 5.302667612246308e-07, |
|
"loss": 0.1665, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.9950166112956811, |
|
"grad_norm": 14.397029548851924, |
|
"learning_rate": 5.28923109263938e-07, |
|
"loss": 0.1731, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.9966777408637874, |
|
"grad_norm": 8.375566282055818, |
|
"learning_rate": 5.275792476977091e-07, |
|
"loss": 0.1293, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9983388704318937, |
|
"grad_norm": 11.268520674866318, |
|
"learning_rate": 5.262351862648978e-07, |
|
"loss": 0.1419, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.784664762230538, |
|
"learning_rate": 5.248909347059061e-07, |
|
"loss": 0.075, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.0016611295681064, |
|
"grad_norm": 4.877956718125895, |
|
"learning_rate": 5.235465027625146e-07, |
|
"loss": 0.0621, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.0033222591362125, |
|
"grad_norm": 4.203005899244808, |
|
"learning_rate": 5.2220190017781e-07, |
|
"loss": 0.0457, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.004983388704319, |
|
"grad_norm": 3.8123959854058103, |
|
"learning_rate": 5.208571366961165e-07, |
|
"loss": 0.0378, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.0066445182724253, |
|
"grad_norm": 5.315954696600741, |
|
"learning_rate": 5.195122220629239e-07, |
|
"loss": 0.0723, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.0083056478405317, |
|
"grad_norm": 3.7805372757538094, |
|
"learning_rate": 5.181671660248178e-07, |
|
"loss": 0.0298, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.0099667774086378, |
|
"grad_norm": 4.0974547093630225, |
|
"learning_rate": 5.16821978329408e-07, |
|
"loss": 0.0396, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.0116279069767442, |
|
"grad_norm": 4.948296436070784, |
|
"learning_rate": 5.154766687252591e-07, |
|
"loss": 0.0263, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.0132890365448506, |
|
"grad_norm": 9.792467737742347, |
|
"learning_rate": 5.141312469618183e-07, |
|
"loss": 0.0942, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0149501661129567, |
|
"grad_norm": 7.834675461013419, |
|
"learning_rate": 5.127857227893465e-07, |
|
"loss": 0.0447, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.0166112956810631, |
|
"grad_norm": 7.985753475492194, |
|
"learning_rate": 5.114401059588464e-07, |
|
"loss": 0.0646, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.0182724252491695, |
|
"grad_norm": 10.331188978857707, |
|
"learning_rate": 5.100944062219917e-07, |
|
"loss": 0.0382, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.0199335548172757, |
|
"grad_norm": 3.83376887835134, |
|
"learning_rate": 5.08748633331058e-07, |
|
"loss": 0.0345, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.021594684385382, |
|
"grad_norm": 3.8639360019028355, |
|
"learning_rate": 5.074027970388499e-07, |
|
"loss": 0.0243, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.0232558139534884, |
|
"grad_norm": 10.400286609821283, |
|
"learning_rate": 5.060569070986324e-07, |
|
"loss": 0.0734, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.0249169435215948, |
|
"grad_norm": 5.442828090266754, |
|
"learning_rate": 5.047109732640586e-07, |
|
"loss": 0.0294, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.026578073089701, |
|
"grad_norm": 7.870712026341651, |
|
"learning_rate": 5.033650052891001e-07, |
|
"loss": 0.0301, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.0282392026578073, |
|
"grad_norm": 13.585823168789453, |
|
"learning_rate": 5.020190129279759e-07, |
|
"loss": 0.0988, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.0299003322259137, |
|
"grad_norm": 8.796258559095701, |
|
"learning_rate": 5.006730059350815e-07, |
|
"loss": 0.0468, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0315614617940199, |
|
"grad_norm": 10.44576092302698, |
|
"learning_rate": 4.993269940649184e-07, |
|
"loss": 0.0714, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.0332225913621262, |
|
"grad_norm": 12.352450302810853, |
|
"learning_rate": 4.979809870720242e-07, |
|
"loss": 0.0478, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.0348837209302326, |
|
"grad_norm": 17.75211377150514, |
|
"learning_rate": 4.966349947108999e-07, |
|
"loss": 0.1147, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.0365448504983388, |
|
"grad_norm": 6.156505538276604, |
|
"learning_rate": 4.952890267359412e-07, |
|
"loss": 0.0478, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.0382059800664452, |
|
"grad_norm": 9.218532704370334, |
|
"learning_rate": 4.939430929013677e-07, |
|
"loss": 0.027, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.0398671096345515, |
|
"grad_norm": 7.120527183593547, |
|
"learning_rate": 4.925972029611501e-07, |
|
"loss": 0.0544, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.041528239202658, |
|
"grad_norm": 9.198383686246443, |
|
"learning_rate": 4.912513666689421e-07, |
|
"loss": 0.0323, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.043189368770764, |
|
"grad_norm": 12.663439974096635, |
|
"learning_rate": 4.899055937780083e-07, |
|
"loss": 0.0445, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.0448504983388704, |
|
"grad_norm": 24.085803314752685, |
|
"learning_rate": 4.885598940411536e-07, |
|
"loss": 0.0655, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.0465116279069768, |
|
"grad_norm": 6.902292684354512, |
|
"learning_rate": 4.872142772106535e-07, |
|
"loss": 0.0326, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.048172757475083, |
|
"grad_norm": 6.121287974432574, |
|
"learning_rate": 4.858687530381817e-07, |
|
"loss": 0.0369, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.0498338870431894, |
|
"grad_norm": 8.86764220454998, |
|
"learning_rate": 4.845233312747411e-07, |
|
"loss": 0.0607, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.0514950166112957, |
|
"grad_norm": 9.943461616121928, |
|
"learning_rate": 4.831780216705919e-07, |
|
"loss": 0.0529, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.053156146179402, |
|
"grad_norm": 3.7501260677915313, |
|
"learning_rate": 4.818328339751823e-07, |
|
"loss": 0.0177, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.0548172757475083, |
|
"grad_norm": 3.303220265814859, |
|
"learning_rate": 4.804877779370762e-07, |
|
"loss": 0.0139, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.0564784053156147, |
|
"grad_norm": 7.432956410407962, |
|
"learning_rate": 4.791428633038835e-07, |
|
"loss": 0.0463, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.058139534883721, |
|
"grad_norm": 8.712378765440459, |
|
"learning_rate": 4.777980998221901e-07, |
|
"loss": 0.0424, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.0598006644518272, |
|
"grad_norm": 11.111774138122533, |
|
"learning_rate": 4.764534972374855e-07, |
|
"loss": 0.0522, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.0614617940199336, |
|
"grad_norm": 4.983601758483826, |
|
"learning_rate": 4.751090652940938e-07, |
|
"loss": 0.0182, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.06312292358804, |
|
"grad_norm": 9.89938771331774, |
|
"learning_rate": 4.7376481373510217e-07, |
|
"loss": 0.0418, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.064784053156146, |
|
"grad_norm": 3.1865048946418626, |
|
"learning_rate": 4.7242075230229083e-07, |
|
"loss": 0.0155, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.0664451827242525, |
|
"grad_norm": 10.924670279472195, |
|
"learning_rate": 4.71076890736062e-07, |
|
"loss": 0.0586, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.0681063122923589, |
|
"grad_norm": 4.291225390159846, |
|
"learning_rate": 4.6973323877536925e-07, |
|
"loss": 0.0206, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.069767441860465, |
|
"grad_norm": 7.729879396540719, |
|
"learning_rate": 4.6838980615764756e-07, |
|
"loss": 0.0442, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.0714285714285714, |
|
"grad_norm": 10.4730457424228, |
|
"learning_rate": 4.6704660261874195e-07, |
|
"loss": 0.0297, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.0730897009966778, |
|
"grad_norm": 2.9531809499425465, |
|
"learning_rate": 4.657036378928376e-07, |
|
"loss": 0.0126, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.0747508305647842, |
|
"grad_norm": 6.896419676230295, |
|
"learning_rate": 4.643609217123888e-07, |
|
"loss": 0.024, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.0764119601328903, |
|
"grad_norm": 2.4703995052788392, |
|
"learning_rate": 4.630184638080488e-07, |
|
"loss": 0.0102, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.0780730897009967, |
|
"grad_norm": 10.728604189023233, |
|
"learning_rate": 4.616762739085992e-07, |
|
"loss": 0.0538, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.079734219269103, |
|
"grad_norm": 12.945045897775847, |
|
"learning_rate": 4.603343617408787e-07, |
|
"loss": 0.0504, |
|
"step": 650 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1204, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 689596271034368.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|