|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.666975365808483, |
|
"eval_steps": 500, |
|
"global_step": 9000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0018521948508983144, |
|
"grad_norm": 0.11988232185850008, |
|
"learning_rate": 7.692307692307693e-05, |
|
"loss": 0.5464, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003704389701796629, |
|
"grad_norm": 0.1595727597726012, |
|
"learning_rate": 0.00015384615384615385, |
|
"loss": 0.3714, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0055565845526949435, |
|
"grad_norm": 0.09668305340799446, |
|
"learning_rate": 0.0002307692307692308, |
|
"loss": 0.3157, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.007408779403593258, |
|
"grad_norm": 0.16023475106511212, |
|
"learning_rate": 0.0003076923076923077, |
|
"loss": 0.3987, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.009260974254491572, |
|
"grad_norm": 0.169453827168658, |
|
"learning_rate": 0.00038461538461538467, |
|
"loss": 0.3192, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.011113169105389887, |
|
"grad_norm": 0.3012465198223198, |
|
"learning_rate": 0.0004615384615384616, |
|
"loss": 0.3793, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.012965363956288202, |
|
"grad_norm": 0.3048336774601151, |
|
"learning_rate": 0.0004999997322635931, |
|
"loss": 0.3902, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.014817558807186515, |
|
"grad_norm": 0.3627728524819501, |
|
"learning_rate": 0.0004999979009491321, |
|
"loss": 0.3932, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.016669753658084832, |
|
"grad_norm": 0.4114314981691849, |
|
"learning_rate": 0.0004999938313774507, |
|
"loss": 0.4758, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.018521948508983144, |
|
"grad_norm": 0.44568863103845946, |
|
"learning_rate": 0.0004999876199685106, |
|
"loss": 0.4944, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02037414335988146, |
|
"grad_norm": 0.3944831575252504, |
|
"learning_rate": 0.0004999792667755284, |
|
"loss": 0.4604, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.022226338210779774, |
|
"grad_norm": 0.4390978457678222, |
|
"learning_rate": 0.0004999687718700706, |
|
"loss": 0.5137, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02407853306167809, |
|
"grad_norm": 0.43510394816525627, |
|
"learning_rate": 0.000499956135342053, |
|
"loss": 0.4757, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.025930727912576404, |
|
"grad_norm": 0.44811558740355373, |
|
"learning_rate": 0.0004999413572997397, |
|
"loss": 0.5541, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02778292276347472, |
|
"grad_norm": 0.3691107778538866, |
|
"learning_rate": 0.0004999262261712005, |
|
"loss": 0.465, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02963511761437303, |
|
"grad_norm": 0.38877053127394406, |
|
"learning_rate": 0.0004999073796157043, |
|
"loss": 0.4877, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.031487312465271346, |
|
"grad_norm": 0.43039217557165316, |
|
"learning_rate": 0.00049988639196363, |
|
"loss": 0.4371, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.033339507316169664, |
|
"grad_norm": 0.31896475378695344, |
|
"learning_rate": 0.0004998632633947908, |
|
"loss": 0.5814, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.035191702167067976, |
|
"grad_norm": 0.4333729012961258, |
|
"learning_rate": 0.000499837994107342, |
|
"loss": 0.5196, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03704389701796629, |
|
"grad_norm": 0.34656949570118684, |
|
"learning_rate": 0.0004998105843177797, |
|
"loss": 0.4593, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.038896091868864606, |
|
"grad_norm": 0.3679842761186855, |
|
"learning_rate": 0.000499781034260939, |
|
"loss": 0.4526, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04074828671976292, |
|
"grad_norm": 0.3877757158501542, |
|
"learning_rate": 0.0004997493441899917, |
|
"loss": 0.4261, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.042600481570661236, |
|
"grad_norm": 0.3289617422897924, |
|
"learning_rate": 0.0004997155143764444, |
|
"loss": 0.4934, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04445267642155955, |
|
"grad_norm": 0.3826208484059836, |
|
"learning_rate": 0.0004996795451101361, |
|
"loss": 0.5347, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04630487127245786, |
|
"grad_norm": 0.38157790109545875, |
|
"learning_rate": 0.0004996414366992357, |
|
"loss": 0.4789, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04815706612335618, |
|
"grad_norm": 0.3154636026466987, |
|
"learning_rate": 0.0004996011894702393, |
|
"loss": 0.5096, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05000926097425449, |
|
"grad_norm": 0.45302017894233715, |
|
"learning_rate": 0.0004995588037679675, |
|
"loss": 0.4752, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05186145582515281, |
|
"grad_norm": 0.3690345364974773, |
|
"learning_rate": 0.0004995142799555624, |
|
"loss": 0.4454, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05371365067605112, |
|
"grad_norm": 0.2843117768216058, |
|
"learning_rate": 0.0004994676184144843, |
|
"loss": 0.5058, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.05556584552694944, |
|
"grad_norm": 0.3278296958742726, |
|
"learning_rate": 0.0004994188195445089, |
|
"loss": 0.514, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05741804037784775, |
|
"grad_norm": 0.3702152478433116, |
|
"learning_rate": 0.0004993678837637235, |
|
"loss": 0.4938, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.05927023522874606, |
|
"grad_norm": 0.3157950493567883, |
|
"learning_rate": 0.0004993148115085233, |
|
"loss": 0.4744, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06112243007964438, |
|
"grad_norm": 0.2864560430174496, |
|
"learning_rate": 0.0004992596032336082, |
|
"loss": 0.4614, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06297462493054269, |
|
"grad_norm": 0.27306121969283015, |
|
"learning_rate": 0.0004992022594119784, |
|
"loss": 0.5571, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.064826819781441, |
|
"grad_norm": 0.28239603992719803, |
|
"learning_rate": 0.0004991427805349305, |
|
"loss": 0.5352, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.06667901463233933, |
|
"grad_norm": 0.3067959635308188, |
|
"learning_rate": 0.0004990811671120534, |
|
"loss": 0.4366, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06853120948323764, |
|
"grad_norm": 0.28026509474367334, |
|
"learning_rate": 0.0004990174196712239, |
|
"loss": 0.4413, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.07038340433413595, |
|
"grad_norm": 0.3553036031847406, |
|
"learning_rate": 0.0004989515387586022, |
|
"loss": 0.4771, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07223559918503426, |
|
"grad_norm": 0.3078977949859338, |
|
"learning_rate": 0.0004988904222849908, |
|
"loss": 0.4456, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07408779403593257, |
|
"grad_norm": 0.3110301739864855, |
|
"learning_rate": 0.0004988204893460954, |
|
"loss": 0.4383, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0759399888868309, |
|
"grad_norm": 0.36180927604524143, |
|
"learning_rate": 0.0004987484246226201, |
|
"loss": 0.4467, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.07779218373772921, |
|
"grad_norm": 0.2902432894644559, |
|
"learning_rate": 0.0004986742287319836, |
|
"loss": 0.5027, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07964437858862752, |
|
"grad_norm": 0.3038323599185379, |
|
"learning_rate": 0.0004985979023098639, |
|
"loss": 0.4896, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08149657343952584, |
|
"grad_norm": 0.37728379802123757, |
|
"learning_rate": 0.0004985194460101922, |
|
"loss": 0.446, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.08334876829042415, |
|
"grad_norm": 0.4090633364935015, |
|
"learning_rate": 0.0004984388605051474, |
|
"loss": 0.4457, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08520096314132247, |
|
"grad_norm": 0.2905031001353468, |
|
"learning_rate": 0.000498356146485151, |
|
"loss": 0.4807, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.08705315799222078, |
|
"grad_norm": 0.33050891942743665, |
|
"learning_rate": 0.00049827130465886, |
|
"loss": 0.4457, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.0889053528431191, |
|
"grad_norm": 0.3454971392885025, |
|
"learning_rate": 0.0004981843357531622, |
|
"loss": 0.4441, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09075754769401741, |
|
"grad_norm": 0.2560611824951078, |
|
"learning_rate": 0.0004980952405131687, |
|
"loss": 0.4601, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.09260974254491572, |
|
"grad_norm": 0.3071403176866605, |
|
"learning_rate": 0.0004980040197022085, |
|
"loss": 0.422, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09446193739581404, |
|
"grad_norm": 0.3103572812280149, |
|
"learning_rate": 0.0004979106741018214, |
|
"loss": 0.4556, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.09631413224671236, |
|
"grad_norm": 0.22158829723284448, |
|
"learning_rate": 0.0004978152045117515, |
|
"loss": 0.5279, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.09816632709761067, |
|
"grad_norm": 0.2643825421503944, |
|
"learning_rate": 0.0004977176117499402, |
|
"loss": 0.4332, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.10001852194850898, |
|
"grad_norm": 0.4119060845568467, |
|
"learning_rate": 0.0004976178966525194, |
|
"loss": 0.4748, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1018707167994073, |
|
"grad_norm": 0.23946695979831795, |
|
"learning_rate": 0.0004975160600738043, |
|
"loss": 0.4564, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.10372291165030562, |
|
"grad_norm": 0.32293396447938405, |
|
"learning_rate": 0.0004974121028862858, |
|
"loss": 0.4037, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.10557510650120393, |
|
"grad_norm": 0.2737410631384409, |
|
"learning_rate": 0.0004973060259806235, |
|
"loss": 0.4471, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.10742730135210224, |
|
"grad_norm": 0.2639287107206222, |
|
"learning_rate": 0.0004971978302656376, |
|
"loss": 0.492, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.10927949620300055, |
|
"grad_norm": 0.3304530971496624, |
|
"learning_rate": 0.0004970875166683017, |
|
"loss": 0.4433, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.11113169105389888, |
|
"grad_norm": 0.3383662002406531, |
|
"learning_rate": 0.0004969750861337338, |
|
"loss": 0.5059, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11298388590479719, |
|
"grad_norm": 0.3718630666323684, |
|
"learning_rate": 0.0004968605396251896, |
|
"loss": 0.4944, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1148360807556955, |
|
"grad_norm": 0.3089667090694828, |
|
"learning_rate": 0.0004967438781240532, |
|
"loss": 0.5117, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.11668827560659381, |
|
"grad_norm": 0.3055449117119714, |
|
"learning_rate": 0.000496625102629829, |
|
"loss": 0.4504, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.11854047045749212, |
|
"grad_norm": 0.3104727563565212, |
|
"learning_rate": 0.0004965042141601331, |
|
"loss": 0.4279, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.12039266530839045, |
|
"grad_norm": 0.3356499915029813, |
|
"learning_rate": 0.000496381213750685, |
|
"loss": 0.4227, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.12224486015928876, |
|
"grad_norm": 0.27125317345626304, |
|
"learning_rate": 0.0004962561024552981, |
|
"loss": 0.4373, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.12409705501018707, |
|
"grad_norm": 0.30382841565038987, |
|
"learning_rate": 0.0004961288813458708, |
|
"loss": 0.4621, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.12594924986108538, |
|
"grad_norm": 0.24989596364522906, |
|
"learning_rate": 0.0004959995515123779, |
|
"loss": 0.4213, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1278014447119837, |
|
"grad_norm": 0.38205278522841757, |
|
"learning_rate": 0.0004958681140628603, |
|
"loss": 0.4367, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.129653639562882, |
|
"grad_norm": 0.36439745638468385, |
|
"learning_rate": 0.0004957345701234165, |
|
"loss": 0.4427, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13150583441378033, |
|
"grad_norm": 0.35080175318468465, |
|
"learning_rate": 0.0004955989208381922, |
|
"loss": 0.4133, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.13335802926467866, |
|
"grad_norm": 0.3137679102742871, |
|
"learning_rate": 0.0004954611673693708, |
|
"loss": 0.4044, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.13521022411557695, |
|
"grad_norm": 0.329188444759587, |
|
"learning_rate": 0.0004953213108971637, |
|
"loss": 0.4922, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.13706241896647528, |
|
"grad_norm": 0.21687503014556075, |
|
"learning_rate": 0.0004951793526197992, |
|
"loss": 0.4667, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.13891461381737358, |
|
"grad_norm": 0.35060249170961755, |
|
"learning_rate": 0.0004950352937535139, |
|
"loss": 0.4678, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.1407668086682719, |
|
"grad_norm": 0.2424350783919833, |
|
"learning_rate": 0.0004948891355325407, |
|
"loss": 0.5452, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.14261900351917023, |
|
"grad_norm": 0.29988592081373705, |
|
"learning_rate": 0.0004947408792090989, |
|
"loss": 0.4472, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.14447119837006853, |
|
"grad_norm": 0.25092463767440515, |
|
"learning_rate": 0.0004945905260533836, |
|
"loss": 0.4379, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.14632339322096685, |
|
"grad_norm": 0.2707811618812939, |
|
"learning_rate": 0.0004944380773535545, |
|
"loss": 0.4489, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.14817558807186515, |
|
"grad_norm": 0.29945644041990244, |
|
"learning_rate": 0.000494283534415725, |
|
"loss": 0.4627, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15002778292276348, |
|
"grad_norm": 0.3269089383236662, |
|
"learning_rate": 0.0004941268985639511, |
|
"loss": 0.4559, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1518799777736618, |
|
"grad_norm": 0.34167018575418623, |
|
"learning_rate": 0.0004939681711402201, |
|
"loss": 0.4502, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1537321726245601, |
|
"grad_norm": 0.23233347955254757, |
|
"learning_rate": 0.0004938073535044385, |
|
"loss": 0.4848, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.15558436747545842, |
|
"grad_norm": 0.28810200476716363, |
|
"learning_rate": 0.0004936444470344212, |
|
"loss": 0.4334, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.15743656232635672, |
|
"grad_norm": 0.2502390156217485, |
|
"learning_rate": 0.0004934794531258794, |
|
"loss": 0.4756, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.15928875717725505, |
|
"grad_norm": 0.306502262394428, |
|
"learning_rate": 0.0004933123731924083, |
|
"loss": 0.4009, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.16114095202815337, |
|
"grad_norm": 0.2866551052549121, |
|
"learning_rate": 0.0004931432086654751, |
|
"loss": 0.411, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.16299314687905167, |
|
"grad_norm": 0.2975618645144025, |
|
"learning_rate": 0.0004929719609944075, |
|
"loss": 0.4386, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.16484534172995, |
|
"grad_norm": 0.3269989409370364, |
|
"learning_rate": 0.00049279863164638, |
|
"loss": 0.4811, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.1666975365808483, |
|
"grad_norm": 0.2764720769175588, |
|
"learning_rate": 0.0004926232221064024, |
|
"loss": 0.4319, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.16854973143174662, |
|
"grad_norm": 0.31817437091747597, |
|
"learning_rate": 0.0004924457338773062, |
|
"loss": 0.5039, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.17040192628264494, |
|
"grad_norm": 0.27931435921536724, |
|
"learning_rate": 0.0004922661684797332, |
|
"loss": 0.447, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.17225412113354324, |
|
"grad_norm": 0.31719086644687416, |
|
"learning_rate": 0.0004920845274521201, |
|
"loss": 0.4486, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.17410631598444157, |
|
"grad_norm": 0.2554455359026809, |
|
"learning_rate": 0.0004919008123506878, |
|
"loss": 0.4683, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.17595851083533987, |
|
"grad_norm": 0.33286076816889937, |
|
"learning_rate": 0.0004917150247494265, |
|
"loss": 0.4438, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.1778107056862382, |
|
"grad_norm": 0.2611238399418209, |
|
"learning_rate": 0.0004915271662400824, |
|
"loss": 0.3582, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.17966290053713652, |
|
"grad_norm": 0.2652458587080694, |
|
"learning_rate": 0.0004913372384321449, |
|
"loss": 0.4845, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.18151509538803481, |
|
"grad_norm": 0.2794832294188891, |
|
"learning_rate": 0.000491145242952832, |
|
"loss": 0.4398, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.18336729023893314, |
|
"grad_norm": 0.21029714010049572, |
|
"learning_rate": 0.0004909511814470764, |
|
"loss": 0.4408, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.18521948508983144, |
|
"grad_norm": 0.2781493608292439, |
|
"learning_rate": 0.0004907550555775119, |
|
"loss": 0.4999, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18707167994072976, |
|
"grad_norm": 0.3287877830017298, |
|
"learning_rate": 0.0004905568670244588, |
|
"loss": 0.4389, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.1889238747916281, |
|
"grad_norm": 0.34207107261927205, |
|
"learning_rate": 0.0004903566174859094, |
|
"loss": 0.4537, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.19077606964252639, |
|
"grad_norm": 0.24403509336935494, |
|
"learning_rate": 0.0004901543086775137, |
|
"loss": 0.3921, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.1926282644934247, |
|
"grad_norm": 0.2671352359873941, |
|
"learning_rate": 0.0004899499423325647, |
|
"loss": 0.4023, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.194480459344323, |
|
"grad_norm": 0.36145293617111, |
|
"learning_rate": 0.0004897435202019832, |
|
"loss": 0.4346, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.19633265419522133, |
|
"grad_norm": 0.3104045357811312, |
|
"learning_rate": 0.0004895350440543036, |
|
"loss": 0.4299, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.19818484904611966, |
|
"grad_norm": 0.2530391260727553, |
|
"learning_rate": 0.0004893245156756578, |
|
"loss": 0.4477, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.20003704389701796, |
|
"grad_norm": 0.26339622262916945, |
|
"learning_rate": 0.0004891119368697605, |
|
"loss": 0.4907, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.20188923874791628, |
|
"grad_norm": 0.24758807862533388, |
|
"learning_rate": 0.0004888973094578931, |
|
"loss": 0.4215, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.2037414335988146, |
|
"grad_norm": 0.24646474329045825, |
|
"learning_rate": 0.0004886806352788893, |
|
"loss": 0.4727, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2055936284497129, |
|
"grad_norm": 0.30101780230375413, |
|
"learning_rate": 0.0004884619161891181, |
|
"loss": 0.4835, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.20744582330061123, |
|
"grad_norm": 0.38338135072675056, |
|
"learning_rate": 0.0004882411540624684, |
|
"loss": 0.4713, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.20929801815150953, |
|
"grad_norm": 0.30051618582402373, |
|
"learning_rate": 0.00048801835079033325, |
|
"loss": 0.4318, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.21115021300240785, |
|
"grad_norm": 0.3169294143209614, |
|
"learning_rate": 0.00048779350828159307, |
|
"loss": 0.4414, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.21300240785330618, |
|
"grad_norm": 0.2243691219456984, |
|
"learning_rate": 0.0004875666284625996, |
|
"loss": 0.4732, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.21485460270420448, |
|
"grad_norm": 0.32093479593839086, |
|
"learning_rate": 0.0004873377132771594, |
|
"loss": 0.4477, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2167067975551028, |
|
"grad_norm": 0.30480291068654214, |
|
"learning_rate": 0.00048710676468651724, |
|
"loss": 0.4159, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.2185589924060011, |
|
"grad_norm": 0.31550505987353533, |
|
"learning_rate": 0.00048687378466933913, |
|
"loss": 0.4121, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.22041118725689943, |
|
"grad_norm": 0.2825917386970882, |
|
"learning_rate": 0.0004866387752216953, |
|
"loss": 0.4531, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.22226338210779775, |
|
"grad_norm": 0.2507091074214277, |
|
"learning_rate": 0.0004864017383570436, |
|
"loss": 0.373, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.22411557695869605, |
|
"grad_norm": 0.2533897084759911, |
|
"learning_rate": 0.00048616267610621154, |
|
"loss": 0.466, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.22596777180959438, |
|
"grad_norm": 0.30135005574304485, |
|
"learning_rate": 0.00048592159051737946, |
|
"loss": 0.4678, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.22781996666049267, |
|
"grad_norm": 0.2900534769133878, |
|
"learning_rate": 0.0004856784836560627, |
|
"loss": 0.4412, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.229672161511391, |
|
"grad_norm": 0.3356512247856666, |
|
"learning_rate": 0.000485433357605094, |
|
"loss": 0.4381, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.23152435636228932, |
|
"grad_norm": 0.28373492782986676, |
|
"learning_rate": 0.00048518621446460555, |
|
"loss": 0.4332, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.23337655121318762, |
|
"grad_norm": 0.27681961152835116, |
|
"learning_rate": 0.00048493705635201123, |
|
"loss": 0.3954, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.23522874606408595, |
|
"grad_norm": 0.3183042306103447, |
|
"learning_rate": 0.0004846858854019882, |
|
"loss": 0.4898, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.23708094091498425, |
|
"grad_norm": 0.2806922738056069, |
|
"learning_rate": 0.00048443270376645876, |
|
"loss": 0.4621, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.23893313576588257, |
|
"grad_norm": 0.32027034011519323, |
|
"learning_rate": 0.00048417751361457185, |
|
"loss": 0.4264, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.2407853306167809, |
|
"grad_norm": 0.25756897907173815, |
|
"learning_rate": 0.00048392031713268447, |
|
"loss": 0.4213, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2426375254676792, |
|
"grad_norm": 0.29761680785972183, |
|
"learning_rate": 0.0004836611165243432, |
|
"loss": 0.41, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.24448972031857752, |
|
"grad_norm": 0.28775863303393384, |
|
"learning_rate": 0.00048339991401026474, |
|
"loss": 0.4237, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.24634191516947582, |
|
"grad_norm": 0.20527409355092, |
|
"learning_rate": 0.00048313671182831743, |
|
"loss": 0.4227, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.24819411002037414, |
|
"grad_norm": 0.3049894888864481, |
|
"learning_rate": 0.00048287151223350193, |
|
"loss": 0.4188, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.25004630487127244, |
|
"grad_norm": 0.28816158479568416, |
|
"learning_rate": 0.00048260431749793184, |
|
"loss": 0.4193, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.25189849972217077, |
|
"grad_norm": 0.2810466941829626, |
|
"learning_rate": 0.00048233512991081406, |
|
"loss": 0.431, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.2537506945730691, |
|
"grad_norm": 0.34419272070908224, |
|
"learning_rate": 0.0004820639517784297, |
|
"loss": 0.4802, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.2556028894239674, |
|
"grad_norm": 0.2614191417571005, |
|
"learning_rate": 0.00048179078542411367, |
|
"loss": 0.4218, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.25745508427486574, |
|
"grad_norm": 0.3620169455808058, |
|
"learning_rate": 0.0004815156331882352, |
|
"loss": 0.4259, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.259307279125764, |
|
"grad_norm": 0.3495069978116607, |
|
"learning_rate": 0.0004812384974281778, |
|
"loss": 0.414, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.26115947397666234, |
|
"grad_norm": 0.23822327577745042, |
|
"learning_rate": 0.0004809593805183187, |
|
"loss": 0.4885, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.26301166882756066, |
|
"grad_norm": 0.31188479403470154, |
|
"learning_rate": 0.00048067828485000904, |
|
"loss": 0.438, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.264863863678459, |
|
"grad_norm": 0.30908266150851776, |
|
"learning_rate": 0.00048039521283155283, |
|
"loss": 0.4224, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.2667160585293573, |
|
"grad_norm": 0.3926396606462005, |
|
"learning_rate": 0.0004801101668881869, |
|
"loss": 0.4481, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.2685682533802556, |
|
"grad_norm": 0.2937266710438928, |
|
"learning_rate": 0.0004798231494620593, |
|
"loss": 0.4785, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.2704204482311539, |
|
"grad_norm": 0.29097772272918393, |
|
"learning_rate": 0.00047953416301220936, |
|
"loss": 0.5, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.27227264308205223, |
|
"grad_norm": 0.2552279327553987, |
|
"learning_rate": 0.000479243210014546, |
|
"loss": 0.32, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.27412483793295056, |
|
"grad_norm": 0.2699430209822517, |
|
"learning_rate": 0.00047895029296182636, |
|
"loss": 0.3985, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.2759770327838489, |
|
"grad_norm": 0.31833186888024984, |
|
"learning_rate": 0.0004786554143636353, |
|
"loss": 0.4375, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.27782922763474716, |
|
"grad_norm": 0.2751779388841223, |
|
"learning_rate": 0.00047835857674636287, |
|
"loss": 0.4001, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2796814224856455, |
|
"grad_norm": 0.2940862163328187, |
|
"learning_rate": 0.0004780597826531833, |
|
"loss": 0.4308, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.2815336173365438, |
|
"grad_norm": 0.3386550227204627, |
|
"learning_rate": 0.00047775903464403305, |
|
"loss": 0.5353, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.28338581218744213, |
|
"grad_norm": 0.31240154547554955, |
|
"learning_rate": 0.00047745633529558884, |
|
"loss": 0.3715, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.28523800703834046, |
|
"grad_norm": 0.32759929614793354, |
|
"learning_rate": 0.0004771516872012457, |
|
"loss": 0.3929, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.2870902018892387, |
|
"grad_norm": 0.29742817791928194, |
|
"learning_rate": 0.0004768450929710945, |
|
"loss": 0.4812, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.28894239674013705, |
|
"grad_norm": 0.32461600905212035, |
|
"learning_rate": 0.00047653655523189996, |
|
"loss": 0.4181, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.2907945915910354, |
|
"grad_norm": 0.26208477940948965, |
|
"learning_rate": 0.00047622607662707773, |
|
"loss": 0.3872, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.2926467864419337, |
|
"grad_norm": 0.315046477208, |
|
"learning_rate": 0.000475913659816672, |
|
"loss": 0.4267, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.29449898129283203, |
|
"grad_norm": 0.2451451562089501, |
|
"learning_rate": 0.0004755993074773327, |
|
"loss": 0.4525, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.2963511761437303, |
|
"grad_norm": 0.2936495362556869, |
|
"learning_rate": 0.00047528302230229246, |
|
"loss": 0.4167, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2982033709946286, |
|
"grad_norm": 0.3551639863299712, |
|
"learning_rate": 0.00047496480700134376, |
|
"loss": 0.4214, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.30005556584552695, |
|
"grad_norm": 0.21422448887216472, |
|
"learning_rate": 0.0004746446643008153, |
|
"loss": 0.4111, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.3019077606964253, |
|
"grad_norm": 0.2593924521965729, |
|
"learning_rate": 0.00047432259694354896, |
|
"loss": 0.5274, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3037599555473236, |
|
"grad_norm": 0.30074263766274656, |
|
"learning_rate": 0.0004739986076888765, |
|
"loss": 0.4424, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.30561215039822187, |
|
"grad_norm": 0.291226317138353, |
|
"learning_rate": 0.0004736726993125952, |
|
"loss": 0.4802, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3074643452491202, |
|
"grad_norm": 0.23749441719859632, |
|
"learning_rate": 0.0004733448746069449, |
|
"loss": 0.4288, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.3093165401000185, |
|
"grad_norm": 0.2740636498957509, |
|
"learning_rate": 0.00047301513638058355, |
|
"loss": 0.4742, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.31116873495091685, |
|
"grad_norm": 0.3263090001341323, |
|
"learning_rate": 0.0004726834874585634, |
|
"loss": 0.4945, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.3130209298018152, |
|
"grad_norm": 0.23702905590165377, |
|
"learning_rate": 0.00047234993068230656, |
|
"loss": 0.3995, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.31487312465271344, |
|
"grad_norm": 0.35028858247208006, |
|
"learning_rate": 0.0004720144689095809, |
|
"loss": 0.3937, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.31672531950361177, |
|
"grad_norm": 0.35160376937763926, |
|
"learning_rate": 0.00047167710501447535, |
|
"loss": 0.4388, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.3185775143545101, |
|
"grad_norm": 0.2769519878263511, |
|
"learning_rate": 0.0004713378418873756, |
|
"loss": 0.43, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.3204297092054084, |
|
"grad_norm": 0.2723567337414344, |
|
"learning_rate": 0.00047099668243493886, |
|
"loss": 0.4546, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.32228190405630674, |
|
"grad_norm": 0.4145209498456788, |
|
"learning_rate": 0.0004706536295800695, |
|
"loss": 0.4331, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.324134098907205, |
|
"grad_norm": 0.3793519870853873, |
|
"learning_rate": 0.0004703086862618935, |
|
"loss": 0.3716, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.32598629375810334, |
|
"grad_norm": 0.2962260082256936, |
|
"learning_rate": 0.00046996185543573356, |
|
"loss": 0.4161, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.32783848860900167, |
|
"grad_norm": 0.24861664813452802, |
|
"learning_rate": 0.00046961314007308374, |
|
"loss": 0.4772, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.3296906834599, |
|
"grad_norm": 0.30394710320503215, |
|
"learning_rate": 0.00046926254316158414, |
|
"loss": 0.4521, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.3315428783107983, |
|
"grad_norm": 0.2835284077342044, |
|
"learning_rate": 0.0004689100677049948, |
|
"loss": 0.439, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.3333950731616966, |
|
"grad_norm": 0.2936297703950855, |
|
"learning_rate": 0.00046855571672317056, |
|
"loss": 0.4539, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3352472680125949, |
|
"grad_norm": 0.31076414372805394, |
|
"learning_rate": 0.00046819949325203485, |
|
"loss": 0.5226, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.33709946286349324, |
|
"grad_norm": 0.3151990506296693, |
|
"learning_rate": 0.00046784140034355386, |
|
"loss": 0.4502, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.33895165771439156, |
|
"grad_norm": 0.2999740764164084, |
|
"learning_rate": 0.0004674814410657102, |
|
"loss": 0.405, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.3408038525652899, |
|
"grad_norm": 0.2848528621693946, |
|
"learning_rate": 0.00046711961850247677, |
|
"loss": 0.4686, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.34265604741618816, |
|
"grad_norm": 0.3304960024436658, |
|
"learning_rate": 0.0004667559357537901, |
|
"loss": 0.3961, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.3445082422670865, |
|
"grad_norm": 0.29714447800492894, |
|
"learning_rate": 0.00046639039593552423, |
|
"loss": 0.4121, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.3463604371179848, |
|
"grad_norm": 0.3737053983821796, |
|
"learning_rate": 0.0004660230021794637, |
|
"loss": 0.4899, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.34821263196888314, |
|
"grad_norm": 0.2715803166164925, |
|
"learning_rate": 0.00046565375763327655, |
|
"loss": 0.418, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.35006482681978146, |
|
"grad_norm": 0.2962801885853028, |
|
"learning_rate": 0.0004652826654604879, |
|
"loss": 0.4675, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.35191702167067973, |
|
"grad_norm": 0.30660107375890056, |
|
"learning_rate": 0.0004649097288404523, |
|
"loss": 0.4536, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.35376921652157806, |
|
"grad_norm": 0.28266003520813626, |
|
"learning_rate": 0.00046453495096832677, |
|
"loss": 0.44, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.3556214113724764, |
|
"grad_norm": 0.3422119367179134, |
|
"learning_rate": 0.00046415833505504344, |
|
"loss": 0.4584, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3574736062233747, |
|
"grad_norm": 0.2749096084932521, |
|
"learning_rate": 0.0004637798843272819, |
|
"loss": 0.3907, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.35932580107427303, |
|
"grad_norm": 0.26388805864831494, |
|
"learning_rate": 0.00046339960202744154, |
|
"loss": 0.5757, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.3611779959251713, |
|
"grad_norm": 0.2738001016444935, |
|
"learning_rate": 0.000463017491413614, |
|
"loss": 0.4938, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.36303019077606963, |
|
"grad_norm": 0.27217682271594046, |
|
"learning_rate": 0.00046263355575955513, |
|
"loss": 0.4063, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.36488238562696795, |
|
"grad_norm": 0.23291262129921603, |
|
"learning_rate": 0.0004622477983546567, |
|
"loss": 0.419, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.3667345804778663, |
|
"grad_norm": 0.304942976924537, |
|
"learning_rate": 0.0004618602225039187, |
|
"loss": 0.4168, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.3685867753287646, |
|
"grad_norm": 0.24084297499524615, |
|
"learning_rate": 0.00046147083152792064, |
|
"loss": 0.3846, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.3704389701796629, |
|
"grad_norm": 0.27930179036055947, |
|
"learning_rate": 0.00046107962876279317, |
|
"loss": 0.4226, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3722911650305612, |
|
"grad_norm": 0.22286791279607676, |
|
"learning_rate": 0.00046068661756018975, |
|
"loss": 0.3928, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.3741433598814595, |
|
"grad_norm": 0.22400156451080455, |
|
"learning_rate": 0.00046029180128725756, |
|
"loss": 0.4584, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.37599555473235785, |
|
"grad_norm": 0.3152682221415501, |
|
"learning_rate": 0.0004598951833266087, |
|
"loss": 0.4314, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.3778477495832562, |
|
"grad_norm": 0.31019682799358195, |
|
"learning_rate": 0.00045949676707629186, |
|
"loss": 0.4237, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.37969994443415445, |
|
"grad_norm": 0.32258613660465024, |
|
"learning_rate": 0.00045909655594976207, |
|
"loss": 0.3827, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.38155213928505277, |
|
"grad_norm": 0.2506911135234745, |
|
"learning_rate": 0.00045869455337585246, |
|
"loss": 0.4037, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.3834043341359511, |
|
"grad_norm": 0.35915658848471477, |
|
"learning_rate": 0.0004582907627987444, |
|
"loss": 0.4242, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.3852565289868494, |
|
"grad_norm": 0.28180517097875335, |
|
"learning_rate": 0.00045788518767793786, |
|
"loss": 0.4342, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.38710872383774775, |
|
"grad_norm": 0.22401926241944572, |
|
"learning_rate": 0.0004574778314882225, |
|
"loss": 0.4546, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.388960918688646, |
|
"grad_norm": 0.3007971129642205, |
|
"learning_rate": 0.0004570686977196468, |
|
"loss": 0.4364, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.39081311353954434, |
|
"grad_norm": 0.24088799894015317, |
|
"learning_rate": 0.0004566577898774893, |
|
"loss": 0.4313, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.39266530839044267, |
|
"grad_norm": 0.30698196088504776, |
|
"learning_rate": 0.0004562451114822276, |
|
"loss": 0.3996, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.394517503241341, |
|
"grad_norm": 0.2516817084212753, |
|
"learning_rate": 0.0004558306660695089, |
|
"loss": 0.4434, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.3963696980922393, |
|
"grad_norm": 0.24923810797995163, |
|
"learning_rate": 0.00045541445719011933, |
|
"loss": 0.3827, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.39822189294313765, |
|
"grad_norm": 0.2838748265882661, |
|
"learning_rate": 0.0004549964884099534, |
|
"loss": 0.4097, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.4000740877940359, |
|
"grad_norm": 0.2520366270233344, |
|
"learning_rate": 0.0004545767633099842, |
|
"loss": 0.4257, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.40192628264493424, |
|
"grad_norm": 0.29635595927178765, |
|
"learning_rate": 0.0004541552854862317, |
|
"loss": 0.4305, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.40377847749583257, |
|
"grad_norm": 0.3136173166936259, |
|
"learning_rate": 0.00045373205854973265, |
|
"loss": 0.4592, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.4056306723467309, |
|
"grad_norm": 0.2849443744452371, |
|
"learning_rate": 0.0004533070861265094, |
|
"loss": 0.4604, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.4074828671976292, |
|
"grad_norm": 0.27436502832510207, |
|
"learning_rate": 0.000452880371857539, |
|
"loss": 0.3709, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.4093350620485275, |
|
"grad_norm": 0.31084213819654966, |
|
"learning_rate": 0.0004524519193987215, |
|
"loss": 0.4707, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.4111872568994258, |
|
"grad_norm": 0.27171948513912497, |
|
"learning_rate": 0.00045202173242084954, |
|
"loss": 0.4131, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.41303945175032414, |
|
"grad_norm": 0.2720258760965373, |
|
"learning_rate": 0.0004515898146095758, |
|
"loss": 0.3954, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.41489164660122246, |
|
"grad_norm": 0.21964829380379464, |
|
"learning_rate": 0.0004511561696653823, |
|
"loss": 0.432, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.4167438414521208, |
|
"grad_norm": 0.22147147407497397, |
|
"learning_rate": 0.0004507208013035483, |
|
"loss": 0.406, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.41859603630301906, |
|
"grad_norm": 0.2592943907855162, |
|
"learning_rate": 0.0004502837132541186, |
|
"loss": 0.4092, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.4204482311539174, |
|
"grad_norm": 0.2697288980975384, |
|
"learning_rate": 0.0004498449092618715, |
|
"loss": 0.3643, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.4223004260048157, |
|
"grad_norm": 0.2502930773158984, |
|
"learning_rate": 0.00044940439308628654, |
|
"loss": 0.344, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.42415262085571404, |
|
"grad_norm": 0.28445457893318615, |
|
"learning_rate": 0.00044896216850151294, |
|
"loss": 0.4511, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.42600481570661236, |
|
"grad_norm": 0.3361734430502526, |
|
"learning_rate": 0.0004485182392963364, |
|
"loss": 0.3547, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.42785701055751063, |
|
"grad_norm": 0.2326479256523765, |
|
"learning_rate": 0.0004480726092741472, |
|
"loss": 0.3731, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.42970920540840896, |
|
"grad_norm": 0.2646729222942232, |
|
"learning_rate": 0.00044762528225290757, |
|
"loss": 0.4015, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.4315614002593073, |
|
"grad_norm": 0.33778964570201236, |
|
"learning_rate": 0.0004471762620651187, |
|
"loss": 0.4, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.4334135951102056, |
|
"grad_norm": 0.31289509233278756, |
|
"learning_rate": 0.00044672555255778824, |
|
"loss": 0.4377, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.43526578996110393, |
|
"grad_norm": 0.27440247092572545, |
|
"learning_rate": 0.00044627315759239715, |
|
"loss": 0.3972, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.4371179848120022, |
|
"grad_norm": 0.2641845623874125, |
|
"learning_rate": 0.0004458190810448667, |
|
"loss": 0.3864, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.43897017966290053, |
|
"grad_norm": 0.3042810996664228, |
|
"learning_rate": 0.0004453633268055249, |
|
"loss": 0.4277, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.44082237451379885, |
|
"grad_norm": 0.2497842382086681, |
|
"learning_rate": 0.00044490589877907406, |
|
"loss": 0.3926, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.4426745693646972, |
|
"grad_norm": 0.2259561601883072, |
|
"learning_rate": 0.00044444680088455624, |
|
"loss": 0.4567, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.4445267642155955, |
|
"grad_norm": 0.2644522169590116, |
|
"learning_rate": 0.00044398603705532046, |
|
"loss": 0.4257, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.4463789590664938, |
|
"grad_norm": 0.24862008909243488, |
|
"learning_rate": 0.0004435236112389887, |
|
"loss": 0.3187, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.4482311539173921, |
|
"grad_norm": 0.2838495721029593, |
|
"learning_rate": 0.000443059527397422, |
|
"loss": 0.4659, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.4500833487682904, |
|
"grad_norm": 0.219358259027201, |
|
"learning_rate": 0.00044259378950668683, |
|
"loss": 0.3919, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.45193554361918875, |
|
"grad_norm": 0.31146983163040265, |
|
"learning_rate": 0.00044212640155702053, |
|
"loss": 0.4584, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.4537877384700871, |
|
"grad_norm": 0.26979102938650734, |
|
"learning_rate": 0.00044165736755279785, |
|
"loss": 0.3086, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.45563993332098535, |
|
"grad_norm": 0.29314640181084967, |
|
"learning_rate": 0.00044118669151249585, |
|
"loss": 0.4357, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.45749212817188367, |
|
"grad_norm": 0.2523855052206998, |
|
"learning_rate": 0.00044071437746865994, |
|
"loss": 0.4024, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.459344323022782, |
|
"grad_norm": 0.24148640334233432, |
|
"learning_rate": 0.0004402404294678692, |
|
"loss": 0.396, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.4611965178736803, |
|
"grad_norm": 0.22896761800287638, |
|
"learning_rate": 0.00043976485157070185, |
|
"loss": 0.4293, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.46304871272457865, |
|
"grad_norm": 0.24737906716097793, |
|
"learning_rate": 0.0004392876478517002, |
|
"loss": 0.4756, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.4649009075754769, |
|
"grad_norm": 0.305490554690619, |
|
"learning_rate": 0.000438808822399336, |
|
"loss": 0.405, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.46675310242637524, |
|
"grad_norm": 0.2802043380804828, |
|
"learning_rate": 0.00043832837931597526, |
|
"loss": 0.3876, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.46860529727727357, |
|
"grad_norm": 0.2860415378563156, |
|
"learning_rate": 0.00043784632271784304, |
|
"loss": 0.4161, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.4704574921281719, |
|
"grad_norm": 0.28267000501834966, |
|
"learning_rate": 0.0004373626567349885, |
|
"loss": 0.4143, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.4723096869790702, |
|
"grad_norm": 0.2525367504836072, |
|
"learning_rate": 0.00043687738551124913, |
|
"loss": 0.3757, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.4741618818299685, |
|
"grad_norm": 0.3925357847215651, |
|
"learning_rate": 0.0004363905132042154, |
|
"loss": 0.3826, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.4760140766808668, |
|
"grad_norm": 0.3263265495863413, |
|
"learning_rate": 0.00043590204398519526, |
|
"loss": 0.4263, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.47786627153176514, |
|
"grad_norm": 0.30208444736193557, |
|
"learning_rate": 0.0004354119820391784, |
|
"loss": 0.3817, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.47971846638266347, |
|
"grad_norm": 0.2561058320675499, |
|
"learning_rate": 0.00043492033156479997, |
|
"loss": 0.4278, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.4815706612335618, |
|
"grad_norm": 0.30589399146654594, |
|
"learning_rate": 0.0004344270967743052, |
|
"loss": 0.4058, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.48342285608446006, |
|
"grad_norm": 0.2978445001042373, |
|
"learning_rate": 0.00043393228189351297, |
|
"loss": 0.4212, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.4852750509353584, |
|
"grad_norm": 0.29323906443796505, |
|
"learning_rate": 0.0004334358911617797, |
|
"loss": 0.4304, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.4871272457862567, |
|
"grad_norm": 0.25775394604491453, |
|
"learning_rate": 0.000432937928831963, |
|
"loss": 0.4291, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.48897944063715504, |
|
"grad_norm": 0.2860673624388678, |
|
"learning_rate": 0.00043243839917038506, |
|
"loss": 0.4452, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.49083163548805336, |
|
"grad_norm": 0.2451402557512562, |
|
"learning_rate": 0.00043193730645679665, |
|
"loss": 0.349, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.49268383033895163, |
|
"grad_norm": 0.23951029660105672, |
|
"learning_rate": 0.0004314346549843398, |
|
"loss": 0.3986, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.49453602518984996, |
|
"grad_norm": 0.24086380299145352, |
|
"learning_rate": 0.0004309304490595113, |
|
"loss": 0.4069, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.4963882200407483, |
|
"grad_norm": 0.19690525958834837, |
|
"learning_rate": 0.00043042469300212595, |
|
"loss": 0.3658, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.4982404148916466, |
|
"grad_norm": 0.2873547855172915, |
|
"learning_rate": 0.0004299173911452794, |
|
"loss": 0.4045, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.5000926097425449, |
|
"grad_norm": 0.3445660214713212, |
|
"learning_rate": 0.0004294085478353109, |
|
"loss": 0.3342, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5019448045934433, |
|
"grad_norm": 0.26259627047719875, |
|
"learning_rate": 0.00042889816743176625, |
|
"loss": 0.4115, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.5037969994443415, |
|
"grad_norm": 0.27090069459316, |
|
"learning_rate": 0.0004283862543073604, |
|
"loss": 0.4178, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.5056491942952399, |
|
"grad_norm": 0.3203148075266908, |
|
"learning_rate": 0.00042787281284794, |
|
"loss": 0.4177, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.5075013891461382, |
|
"grad_norm": 0.2044466650316563, |
|
"learning_rate": 0.00042735784745244585, |
|
"loss": 0.415, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.5093535839970365, |
|
"grad_norm": 0.2673811085531597, |
|
"learning_rate": 0.000426841362532875, |
|
"loss": 0.3923, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.5112057788479348, |
|
"grad_norm": 0.23323940410282512, |
|
"learning_rate": 0.00042632336251424317, |
|
"loss": 0.3643, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.5130579736988331, |
|
"grad_norm": 0.19502502356966445, |
|
"learning_rate": 0.00042580385183454695, |
|
"loss": 0.4509, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.5149101685497315, |
|
"grad_norm": 0.3081825384344212, |
|
"learning_rate": 0.0004252828349447254, |
|
"loss": 0.3374, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.5167623634006298, |
|
"grad_norm": 0.19926889616728075, |
|
"learning_rate": 0.00042476031630862235, |
|
"loss": 0.3751, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.518614558251528, |
|
"grad_norm": 0.2980672545203656, |
|
"learning_rate": 0.00042423630040294756, |
|
"loss": 0.3737, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5204667531024264, |
|
"grad_norm": 0.2805956385580894, |
|
"learning_rate": 0.0004237107917172391, |
|
"loss": 0.3498, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.5223189479533247, |
|
"grad_norm": 0.24883952133869866, |
|
"learning_rate": 0.00042318379475382454, |
|
"loss": 0.369, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.5241711428042231, |
|
"grad_norm": 0.26010129083226985, |
|
"learning_rate": 0.0004226553140277819, |
|
"loss": 0.3763, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.5260233376551213, |
|
"grad_norm": 0.3407509896784033, |
|
"learning_rate": 0.000422125354066902, |
|
"loss": 0.3339, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.5278755325060196, |
|
"grad_norm": 0.2022248872951544, |
|
"learning_rate": 0.0004215939194116487, |
|
"loss": 0.415, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.529727727356918, |
|
"grad_norm": 0.3427987857911665, |
|
"learning_rate": 0.0004210610146151206, |
|
"loss": 0.4224, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.5315799222078162, |
|
"grad_norm": 0.23594824415533, |
|
"learning_rate": 0.0004205266442430117, |
|
"loss": 0.4051, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.5334321170587146, |
|
"grad_norm": 0.29315061402915377, |
|
"learning_rate": 0.00041999081287357246, |
|
"loss": 0.3898, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.5352843119096129, |
|
"grad_norm": 0.25391786215048595, |
|
"learning_rate": 0.0004194535250975705, |
|
"loss": 0.4163, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.5371365067605112, |
|
"grad_norm": 0.30989709227816453, |
|
"learning_rate": 0.00041891478551825135, |
|
"loss": 0.4528, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5389887016114095, |
|
"grad_norm": 0.30084068834422883, |
|
"learning_rate": 0.000418374598751299, |
|
"loss": 0.4187, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.5408408964623078, |
|
"grad_norm": 0.2707819885874306, |
|
"learning_rate": 0.000417832969424796, |
|
"loss": 0.4203, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.5426930913132062, |
|
"grad_norm": 0.27765562870418, |
|
"learning_rate": 0.00041728990217918454, |
|
"loss": 0.4354, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.5445452861641045, |
|
"grad_norm": 0.2957077208859336, |
|
"learning_rate": 0.00041674540166722595, |
|
"loss": 0.4214, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5463974810150027, |
|
"grad_norm": 0.3687676577456054, |
|
"learning_rate": 0.0004161994725539614, |
|
"loss": 0.3915, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.5482496758659011, |
|
"grad_norm": 0.26016346169725796, |
|
"learning_rate": 0.00041565211951667143, |
|
"loss": 0.4265, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.5501018707167994, |
|
"grad_norm": 0.29400682034550746, |
|
"learning_rate": 0.0004151033472448363, |
|
"loss": 0.3754, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.5519540655676978, |
|
"grad_norm": 0.24729614759661173, |
|
"learning_rate": 0.00041455316044009563, |
|
"loss": 0.3678, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.553806260418596, |
|
"grad_norm": 0.30448617928085525, |
|
"learning_rate": 0.0004140015638162081, |
|
"loss": 0.3521, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.5556584552694943, |
|
"grad_norm": 0.326331806127286, |
|
"learning_rate": 0.0004134485620990113, |
|
"loss": 0.3829, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5575106501203927, |
|
"grad_norm": 0.2831079722418925, |
|
"learning_rate": 0.0004128941600263805, |
|
"loss": 0.3499, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.559362844971291, |
|
"grad_norm": 0.2544434887846111, |
|
"learning_rate": 0.00041233836234818926, |
|
"loss": 0.4621, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.5612150398221893, |
|
"grad_norm": 0.272652788679403, |
|
"learning_rate": 0.0004117811738262677, |
|
"loss": 0.413, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.5630672346730876, |
|
"grad_norm": 0.25142412831266564, |
|
"learning_rate": 0.0004112225992343621, |
|
"loss": 0.4163, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.5649194295239859, |
|
"grad_norm": 0.2647884767561391, |
|
"learning_rate": 0.00041066264335809413, |
|
"loss": 0.3914, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.5667716243748843, |
|
"grad_norm": 0.23801633376774256, |
|
"learning_rate": 0.00041010131099491944, |
|
"loss": 0.3754, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.5686238192257825, |
|
"grad_norm": 0.2731341421028539, |
|
"learning_rate": 0.0004095386069540872, |
|
"loss": 0.4227, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.5704760140766809, |
|
"grad_norm": 0.2011024370599634, |
|
"learning_rate": 0.0004089745360565981, |
|
"loss": 0.3834, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.5723282089275792, |
|
"grad_norm": 0.23740640073183247, |
|
"learning_rate": 0.00040840910313516364, |
|
"loss": 0.4279, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.5741804037784775, |
|
"grad_norm": 0.2525764151086583, |
|
"learning_rate": 0.00040784231303416473, |
|
"loss": 0.3782, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5760325986293758, |
|
"grad_norm": 0.29277924659862603, |
|
"learning_rate": 0.00040727417060960967, |
|
"loss": 0.3743, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.5778847934802741, |
|
"grad_norm": 0.24242972284715095, |
|
"learning_rate": 0.0004067046807290931, |
|
"loss": 0.3832, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.5797369883311725, |
|
"grad_norm": 0.25071856580407875, |
|
"learning_rate": 0.0004061338482717538, |
|
"loss": 0.3867, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.5815891831820708, |
|
"grad_norm": 0.2837990600721797, |
|
"learning_rate": 0.0004055616781282335, |
|
"loss": 0.4151, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.583441378032969, |
|
"grad_norm": 0.22534951219394125, |
|
"learning_rate": 0.0004049881752006346, |
|
"loss": 0.3788, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.5852935728838674, |
|
"grad_norm": 0.2817669494395476, |
|
"learning_rate": 0.0004044133444024779, |
|
"loss": 0.437, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.5871457677347657, |
|
"grad_norm": 0.20817420244233692, |
|
"learning_rate": 0.00040383719065866105, |
|
"loss": 0.3918, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.5889979625856641, |
|
"grad_norm": 0.2734267113676852, |
|
"learning_rate": 0.0004032597189054161, |
|
"loss": 0.4261, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.5908501574365623, |
|
"grad_norm": 0.27859862469331026, |
|
"learning_rate": 0.0004026809340902672, |
|
"loss": 0.4035, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.5927023522874606, |
|
"grad_norm": 0.2545952221508602, |
|
"learning_rate": 0.0004021008411719881, |
|
"loss": 0.3432, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.594554547138359, |
|
"grad_norm": 0.270005891201155, |
|
"learning_rate": 0.0004015194451205601, |
|
"loss": 0.354, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.5964067419892572, |
|
"grad_norm": 0.24352901007536132, |
|
"learning_rate": 0.000400936750917129, |
|
"loss": 0.3729, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.5982589368401556, |
|
"grad_norm": 0.2556498791861634, |
|
"learning_rate": 0.0004003527635539625, |
|
"loss": 0.4015, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.6001111316910539, |
|
"grad_norm": 0.2752351613083482, |
|
"learning_rate": 0.00039976748803440774, |
|
"loss": 0.3672, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.6019633265419522, |
|
"grad_norm": 0.2609226477539244, |
|
"learning_rate": 0.000399180929372848, |
|
"loss": 0.4015, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.6038155213928506, |
|
"grad_norm": 0.30960657643957806, |
|
"learning_rate": 0.00039859309259466017, |
|
"loss": 0.3641, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.6056677162437488, |
|
"grad_norm": 0.3035485490629689, |
|
"learning_rate": 0.0003980039827361712, |
|
"loss": 0.4543, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.6075199110946472, |
|
"grad_norm": 0.2184916474124068, |
|
"learning_rate": 0.0003974136048446155, |
|
"loss": 0.337, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.6093721059455455, |
|
"grad_norm": 0.2843568329769092, |
|
"learning_rate": 0.0003968219639780915, |
|
"loss": 0.4351, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.6112243007964437, |
|
"grad_norm": 0.269831900653445, |
|
"learning_rate": 0.00039622906520551786, |
|
"loss": 0.3777, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.6130764956473421, |
|
"grad_norm": 0.2834037960599415, |
|
"learning_rate": 0.0003956349136065908, |
|
"loss": 0.3924, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.6149286904982404, |
|
"grad_norm": 0.24761657160080242, |
|
"learning_rate": 0.00039503951427173985, |
|
"loss": 0.4168, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.6167808853491388, |
|
"grad_norm": 0.30901172205688504, |
|
"learning_rate": 0.00039444287230208495, |
|
"loss": 0.3873, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.618633080200037, |
|
"grad_norm": 0.29747872909981493, |
|
"learning_rate": 0.0003938449928093922, |
|
"loss": 0.4341, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.6204852750509353, |
|
"grad_norm": 0.2543886903531346, |
|
"learning_rate": 0.0003932458809160303, |
|
"loss": 0.3683, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.6223374699018337, |
|
"grad_norm": 0.33337732842586854, |
|
"learning_rate": 0.0003926455417549266, |
|
"loss": 0.3755, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.624189664752732, |
|
"grad_norm": 0.2464332085515913, |
|
"learning_rate": 0.00039204398046952313, |
|
"loss": 0.3602, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.6260418596036303, |
|
"grad_norm": 0.2946927475643436, |
|
"learning_rate": 0.00039144120221373254, |
|
"loss": 0.4474, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.6278940544545286, |
|
"grad_norm": 0.3017003197321625, |
|
"learning_rate": 0.0003908372121518939, |
|
"loss": 0.4334, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.6297462493054269, |
|
"grad_norm": 0.32871078632996376, |
|
"learning_rate": 0.0003902320154587288, |
|
"loss": 0.3826, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.6315984441563253, |
|
"grad_norm": 0.3041703577665594, |
|
"learning_rate": 0.0003896256173192963, |
|
"loss": 0.4301, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.6334506390072235, |
|
"grad_norm": 0.27657730284049636, |
|
"learning_rate": 0.0003890180229289492, |
|
"loss": 0.3637, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.6353028338581219, |
|
"grad_norm": 0.2894023841563432, |
|
"learning_rate": 0.0003884701694853233, |
|
"loss": 0.4083, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.6371550287090202, |
|
"grad_norm": 0.3313798136401644, |
|
"learning_rate": 0.00038786031656810573, |
|
"loss": 0.3613, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.6390072235599185, |
|
"grad_norm": 0.31419538828574267, |
|
"learning_rate": 0.0003872492825242943, |
|
"loss": 0.3517, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.6408594184108168, |
|
"grad_norm": 0.2856367570197956, |
|
"learning_rate": 0.0003866370725889602, |
|
"loss": 0.3311, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.6427116132617151, |
|
"grad_norm": 0.32378046135112004, |
|
"learning_rate": 0.00038602369200724907, |
|
"loss": 0.3808, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.6445638081126135, |
|
"grad_norm": 0.2809834575253639, |
|
"learning_rate": 0.00038540914603433596, |
|
"loss": 0.3874, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.6464160029635118, |
|
"grad_norm": 0.23009208535401943, |
|
"learning_rate": 0.00038479343993538085, |
|
"loss": 0.415, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.64826819781441, |
|
"grad_norm": 0.22641660111122883, |
|
"learning_rate": 0.00038417657898548284, |
|
"loss": 0.3278, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6501203926653084, |
|
"grad_norm": 0.2981220824138414, |
|
"learning_rate": 0.00038355856846963545, |
|
"loss": 0.4047, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.6519725875162067, |
|
"grad_norm": 0.2555163199749857, |
|
"learning_rate": 0.00038293941368268105, |
|
"loss": 0.4132, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.6538247823671051, |
|
"grad_norm": 0.2291679316803199, |
|
"learning_rate": 0.00038231911992926573, |
|
"loss": 0.4501, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.6556769772180033, |
|
"grad_norm": 0.22327007435525262, |
|
"learning_rate": 0.0003816976925237936, |
|
"loss": 0.4047, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.6575291720689016, |
|
"grad_norm": 0.26270477479908155, |
|
"learning_rate": 0.00038113744298654294, |
|
"loss": 0.3669, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.6593813669198, |
|
"grad_norm": 0.20304050646048286, |
|
"learning_rate": 0.00038051387631809585, |
|
"loss": 0.4247, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.6612335617706983, |
|
"grad_norm": 0.2626214779683425, |
|
"learning_rate": 0.0003798891914641258, |
|
"loss": 0.3397, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.6630857566215966, |
|
"grad_norm": 0.2927783575344774, |
|
"learning_rate": 0.00037926339377665805, |
|
"loss": 0.3352, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.6649379514724949, |
|
"grad_norm": 0.2868661472365901, |
|
"learning_rate": 0.0003786364886172521, |
|
"loss": 0.4321, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.6667901463233932, |
|
"grad_norm": 0.1980588697868199, |
|
"learning_rate": 0.00037800848135695564, |
|
"loss": 0.355, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.6686423411742916, |
|
"grad_norm": 0.27964064214829887, |
|
"learning_rate": 0.00037737937737625905, |
|
"loss": 0.3953, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.6704945360251898, |
|
"grad_norm": 0.30140561884162703, |
|
"learning_rate": 0.0003767491820650486, |
|
"loss": 0.3802, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.6723467308760882, |
|
"grad_norm": 0.26216353668713616, |
|
"learning_rate": 0.00037611790082256073, |
|
"loss": 0.3701, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.6741989257269865, |
|
"grad_norm": 0.2667607207767126, |
|
"learning_rate": 0.00037548553905733566, |
|
"loss": 0.4217, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.6760511205778847, |
|
"grad_norm": 0.2888052260287578, |
|
"learning_rate": 0.00037485210218717095, |
|
"loss": 0.3861, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.6779033154287831, |
|
"grad_norm": 0.322681691929484, |
|
"learning_rate": 0.0003742175956390754, |
|
"loss": 0.3769, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.6797555102796814, |
|
"grad_norm": 0.2809039196576165, |
|
"learning_rate": 0.0003735820248492221, |
|
"loss": 0.37, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.6816077051305798, |
|
"grad_norm": 0.3168194333373297, |
|
"learning_rate": 0.0003729453952629022, |
|
"loss": 0.3813, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.683459899981478, |
|
"grad_norm": 0.2743408298239755, |
|
"learning_rate": 0.00037230771233447813, |
|
"loss": 0.3762, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.6853120948323763, |
|
"grad_norm": 0.2997039201183461, |
|
"learning_rate": 0.000371668981527337, |
|
"loss": 0.4346, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.6871642896832747, |
|
"grad_norm": 0.18532771548719357, |
|
"learning_rate": 0.0003710292083138436, |
|
"loss": 0.344, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.689016484534173, |
|
"grad_norm": 0.3521954419398032, |
|
"learning_rate": 0.0003703883981752935, |
|
"loss": 0.378, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.6908686793850713, |
|
"grad_norm": 0.3037259752726694, |
|
"learning_rate": 0.00036974655660186644, |
|
"loss": 0.4339, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.6927208742359696, |
|
"grad_norm": 0.24733145996258551, |
|
"learning_rate": 0.0003691036890925788, |
|
"loss": 0.4195, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.6945730690868679, |
|
"grad_norm": 0.19584340465708208, |
|
"learning_rate": 0.0003684598011552368, |
|
"loss": 0.3404, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.6964252639377663, |
|
"grad_norm": 0.2530305551321265, |
|
"learning_rate": 0.00036781489830638923, |
|
"loss": 0.3163, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.6982774587886645, |
|
"grad_norm": 0.26939789666432756, |
|
"learning_rate": 0.0003671689860712804, |
|
"loss": 0.3419, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.7001296536395629, |
|
"grad_norm": 0.24191294552249204, |
|
"learning_rate": 0.0003665220699838022, |
|
"loss": 0.4176, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.7019818484904612, |
|
"grad_norm": 0.2777592117015156, |
|
"learning_rate": 0.00036587415558644756, |
|
"loss": 0.3215, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.7038340433413595, |
|
"grad_norm": 0.30078087923699953, |
|
"learning_rate": 0.00036522524843026193, |
|
"loss": 0.3564, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.7056862381922578, |
|
"grad_norm": 0.29338660781666925, |
|
"learning_rate": 0.00036457535407479673, |
|
"loss": 0.3725, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.7075384330431561, |
|
"grad_norm": 0.2296766539983086, |
|
"learning_rate": 0.00036392447808806117, |
|
"loss": 0.3688, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.7093906278940545, |
|
"grad_norm": 0.30321062833889273, |
|
"learning_rate": 0.0003632726260464746, |
|
"loss": 0.3948, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.7112428227449528, |
|
"grad_norm": 0.29399675372420425, |
|
"learning_rate": 0.0003626198035348187, |
|
"loss": 0.4013, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.713095017595851, |
|
"grad_norm": 0.2105362387910143, |
|
"learning_rate": 0.0003619660161461898, |
|
"loss": 0.366, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.7149472124467494, |
|
"grad_norm": 0.23037128345764354, |
|
"learning_rate": 0.00036131126948195103, |
|
"loss": 0.4221, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.7167994072976477, |
|
"grad_norm": 0.2768953340591145, |
|
"learning_rate": 0.00036065556915168377, |
|
"loss": 0.2986, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.7186516021485461, |
|
"grad_norm": 0.23581750422601885, |
|
"learning_rate": 0.0003599989207731404, |
|
"loss": 0.3691, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.7205037969994443, |
|
"grad_norm": 0.23261721710497926, |
|
"learning_rate": 0.0003593413299721955, |
|
"loss": 0.4161, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.7223559918503426, |
|
"grad_norm": 0.26947390848344027, |
|
"learning_rate": 0.00035868280238279804, |
|
"loss": 0.4034, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.724208186701241, |
|
"grad_norm": 0.2604323518406546, |
|
"learning_rate": 0.00035802334364692283, |
|
"loss": 0.3652, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.7260603815521393, |
|
"grad_norm": 0.19811786937816656, |
|
"learning_rate": 0.00035736295941452256, |
|
"loss": 0.3411, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.7279125764030376, |
|
"grad_norm": 0.2942447611839833, |
|
"learning_rate": 0.0003567016553434791, |
|
"loss": 0.3932, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.7297647712539359, |
|
"grad_norm": 0.20647945881304144, |
|
"learning_rate": 0.00035603943709955495, |
|
"loss": 0.3481, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.7316169661048342, |
|
"grad_norm": 0.29098401038664423, |
|
"learning_rate": 0.0003553763103563449, |
|
"loss": 0.3205, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.7334691609557326, |
|
"grad_norm": 0.24827960683081182, |
|
"learning_rate": 0.00035471228079522754, |
|
"loss": 0.3653, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.7353213558066308, |
|
"grad_norm": 0.21532456030161418, |
|
"learning_rate": 0.0003540473541053161, |
|
"loss": 0.3299, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.7371735506575292, |
|
"grad_norm": 0.28516797949078204, |
|
"learning_rate": 0.0003533815359834103, |
|
"loss": 0.3718, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.7390257455084275, |
|
"grad_norm": 0.2617620703053819, |
|
"learning_rate": 0.00035271483213394715, |
|
"loss": 0.3505, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.7408779403593257, |
|
"grad_norm": 0.27198805201563014, |
|
"learning_rate": 0.000352047248268952, |
|
"loss": 0.3968, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7427301352102241, |
|
"grad_norm": 0.1957730557770133, |
|
"learning_rate": 0.0003513787901079902, |
|
"loss": 0.3647, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.7445823300611224, |
|
"grad_norm": 0.2424016899157965, |
|
"learning_rate": 0.0003507094633781173, |
|
"loss": 0.4071, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.7464345249120208, |
|
"grad_norm": 0.2513574669580144, |
|
"learning_rate": 0.00035003927381383046, |
|
"loss": 0.3348, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.748286719762919, |
|
"grad_norm": 0.2524624117498673, |
|
"learning_rate": 0.00034936822715701945, |
|
"loss": 0.3805, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.7501389146138173, |
|
"grad_norm": 0.23903538948524897, |
|
"learning_rate": 0.00034869632915691685, |
|
"loss": 0.335, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.7519911094647157, |
|
"grad_norm": 0.18376558979991064, |
|
"learning_rate": 0.0003480235855700495, |
|
"loss": 0.3251, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.753843304315614, |
|
"grad_norm": 0.23255076073481523, |
|
"learning_rate": 0.0003473500021601888, |
|
"loss": 0.3706, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.7556954991665124, |
|
"grad_norm": 0.26504941120664904, |
|
"learning_rate": 0.0003466755846983012, |
|
"loss": 0.3388, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.7575476940174106, |
|
"grad_norm": 0.21513866870033804, |
|
"learning_rate": 0.00034600033896249903, |
|
"loss": 0.3493, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.7593998888683089, |
|
"grad_norm": 0.2588933457999632, |
|
"learning_rate": 0.00034532427073799115, |
|
"loss": 0.3335, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.7612520837192073, |
|
"grad_norm": 0.22932856457029652, |
|
"learning_rate": 0.0003446473858170328, |
|
"loss": 0.3573, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.7631042785701055, |
|
"grad_norm": 0.25882003589945557, |
|
"learning_rate": 0.00034396968999887635, |
|
"loss": 0.3448, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.7649564734210039, |
|
"grad_norm": 0.18186372017813182, |
|
"learning_rate": 0.00034329118908972187, |
|
"loss": 0.3451, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.7668086682719022, |
|
"grad_norm": 0.2905270964806583, |
|
"learning_rate": 0.00034261188890266674, |
|
"loss": 0.3388, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.7686608631228005, |
|
"grad_norm": 0.27875971252061826, |
|
"learning_rate": 0.00034193179525765646, |
|
"loss": 0.3131, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.7705130579736988, |
|
"grad_norm": 0.24842087853864708, |
|
"learning_rate": 0.00034125091398143445, |
|
"loss": 0.4291, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.7723652528245971, |
|
"grad_norm": 0.2684559843295528, |
|
"learning_rate": 0.00034056925090749214, |
|
"loss": 0.3715, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.7742174476754955, |
|
"grad_norm": 0.22463589836430295, |
|
"learning_rate": 0.00033988681187601907, |
|
"loss": 0.4228, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.7760696425263938, |
|
"grad_norm": 0.27828743228315045, |
|
"learning_rate": 0.00033920360273385295, |
|
"loss": 0.2931, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.777921837377292, |
|
"grad_norm": 0.24380996785281236, |
|
"learning_rate": 0.0003385196293344295, |
|
"loss": 0.4017, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.7797740322281904, |
|
"grad_norm": 0.2909979077113848, |
|
"learning_rate": 0.0003378348975377319, |
|
"loss": 0.3481, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.7816262270790887, |
|
"grad_norm": 0.23332383664304898, |
|
"learning_rate": 0.0003371494132102414, |
|
"loss": 0.3445, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.7834784219299871, |
|
"grad_norm": 0.21450077928300515, |
|
"learning_rate": 0.0003364631822248863, |
|
"loss": 0.3472, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.7853306167808853, |
|
"grad_norm": 0.21521239472704395, |
|
"learning_rate": 0.00033577621046099214, |
|
"loss": 0.3326, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.7871828116317837, |
|
"grad_norm": 0.21746868050833518, |
|
"learning_rate": 0.00033508850380423107, |
|
"loss": 0.317, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.789035006482682, |
|
"grad_norm": 0.25145609268154195, |
|
"learning_rate": 0.00033440006814657123, |
|
"loss": 0.3903, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.7908872013335803, |
|
"grad_norm": 0.2493850757271924, |
|
"learning_rate": 0.00033371090938622683, |
|
"loss": 0.376, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.7927393961844786, |
|
"grad_norm": 0.27042518686478084, |
|
"learning_rate": 0.00033302103342760717, |
|
"loss": 0.3324, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.7945915910353769, |
|
"grad_norm": 0.36372007737066575, |
|
"learning_rate": 0.0003323304461812663, |
|
"loss": 0.2962, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.7964437858862753, |
|
"grad_norm": 0.2789450982129661, |
|
"learning_rate": 0.0003316391535638521, |
|
"loss": 0.4018, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.7982959807371736, |
|
"grad_norm": 0.30183962763634775, |
|
"learning_rate": 0.00033094716149805587, |
|
"loss": 0.3866, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.8001481755880718, |
|
"grad_norm": 0.21612720841935062, |
|
"learning_rate": 0.0003302544759125615, |
|
"loss": 0.4077, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.8020003704389702, |
|
"grad_norm": 0.23394333144621351, |
|
"learning_rate": 0.00032956110274199457, |
|
"loss": 0.386, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.8038525652898685, |
|
"grad_norm": 0.23944805976592476, |
|
"learning_rate": 0.00032886704792687156, |
|
"loss": 0.2975, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.8057047601407669, |
|
"grad_norm": 0.30206829611790686, |
|
"learning_rate": 0.0003281723174135491, |
|
"loss": 0.3464, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.8075569549916651, |
|
"grad_norm": 0.25395526533782503, |
|
"learning_rate": 0.00032747691715417297, |
|
"loss": 0.3839, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.8094091498425634, |
|
"grad_norm": 0.2701846283890953, |
|
"learning_rate": 0.0003267808531066268, |
|
"loss": 0.3718, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.8112613446934618, |
|
"grad_norm": 0.3284423662284243, |
|
"learning_rate": 0.00032608413123448127, |
|
"loss": 0.3123, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.81311353954436, |
|
"grad_norm": 0.19093953526607452, |
|
"learning_rate": 0.00032538675750694323, |
|
"loss": 0.3178, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.8149657343952584, |
|
"grad_norm": 0.2588745305552011, |
|
"learning_rate": 0.0003246887378988044, |
|
"loss": 0.3364, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.8168179292461567, |
|
"grad_norm": 0.2944248033604882, |
|
"learning_rate": 0.00032399007839038974, |
|
"loss": 0.3851, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.818670124097055, |
|
"grad_norm": 0.35233338424624305, |
|
"learning_rate": 0.00032329078496750685, |
|
"loss": 0.3935, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.8205223189479534, |
|
"grad_norm": 0.2529989683445966, |
|
"learning_rate": 0.00032259086362139444, |
|
"loss": 0.3545, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.8223745137988516, |
|
"grad_norm": 0.21890769609197974, |
|
"learning_rate": 0.00032189032034867095, |
|
"loss": 0.3322, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.82422670864975, |
|
"grad_norm": 0.2966639221943858, |
|
"learning_rate": 0.00032118916115128317, |
|
"loss": 0.3413, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.8260789035006483, |
|
"grad_norm": 0.28138389738354624, |
|
"learning_rate": 0.00032048739203645484, |
|
"loss": 0.3594, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.8279310983515465, |
|
"grad_norm": 0.26012433275701663, |
|
"learning_rate": 0.00031978501901663544, |
|
"loss": 0.354, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.8297832932024449, |
|
"grad_norm": 0.22288136348571755, |
|
"learning_rate": 0.00031908204810944806, |
|
"loss": 0.3345, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.8316354880533432, |
|
"grad_norm": 0.2563012485418534, |
|
"learning_rate": 0.0003183784853376386, |
|
"loss": 0.377, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.8334876829042416, |
|
"grad_norm": 0.19175987210580075, |
|
"learning_rate": 0.00031767433672902357, |
|
"loss": 0.378, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8353398777551398, |
|
"grad_norm": 0.27929483171815755, |
|
"learning_rate": 0.0003169696083164387, |
|
"loss": 0.4083, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.8371920726060381, |
|
"grad_norm": 0.22806754292261686, |
|
"learning_rate": 0.00031626430613768727, |
|
"loss": 0.2805, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.8390442674569365, |
|
"grad_norm": 0.2098902858669142, |
|
"learning_rate": 0.0003155584362354883, |
|
"loss": 0.3046, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.8408964623078348, |
|
"grad_norm": 0.22326173310010555, |
|
"learning_rate": 0.0003148520046574248, |
|
"loss": 0.3618, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.8427486571587331, |
|
"grad_norm": 0.28432435874722173, |
|
"learning_rate": 0.00031414501745589214, |
|
"loss": 0.3047, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.8446008520096314, |
|
"grad_norm": 0.22658460752200546, |
|
"learning_rate": 0.0003134374806880458, |
|
"loss": 0.3075, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.8464530468605297, |
|
"grad_norm": 0.2326511532797664, |
|
"learning_rate": 0.00031272940041574985, |
|
"loss": 0.3253, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.8483052417114281, |
|
"grad_norm": 0.26196194032003345, |
|
"learning_rate": 0.00031202078270552483, |
|
"loss": 0.3672, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.8501574365623263, |
|
"grad_norm": 0.2216415083774707, |
|
"learning_rate": 0.00031131163362849563, |
|
"loss": 0.361, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.8520096314132247, |
|
"grad_norm": 0.31309200526058145, |
|
"learning_rate": 0.0003106019592603401, |
|
"loss": 0.4028, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.853861826264123, |
|
"grad_norm": 0.30199878040880657, |
|
"learning_rate": 0.000309891765681236, |
|
"loss": 0.3254, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.8557140211150213, |
|
"grad_norm": 0.2657478340310185, |
|
"learning_rate": 0.0003091810589758099, |
|
"loss": 0.3965, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.8575662159659196, |
|
"grad_norm": 0.26801220601237896, |
|
"learning_rate": 0.0003084698452330844, |
|
"loss": 0.2717, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.8594184108168179, |
|
"grad_norm": 0.2691236527559968, |
|
"learning_rate": 0.0003077581305464263, |
|
"loss": 0.3449, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.8612706056677163, |
|
"grad_norm": 0.250751208793887, |
|
"learning_rate": 0.0003070459210134941, |
|
"loss": 0.3398, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.8631228005186146, |
|
"grad_norm": 0.2598136376324884, |
|
"learning_rate": 0.0003063332227361861, |
|
"loss": 0.379, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.8649749953695128, |
|
"grad_norm": 0.2320138289175307, |
|
"learning_rate": 0.00030569138145676144, |
|
"loss": 0.4172, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.8668271902204112, |
|
"grad_norm": 0.2544457573722289, |
|
"learning_rate": 0.0003049777713908237, |
|
"loss": 0.3363, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.8686793850713095, |
|
"grad_norm": 0.21755454053442072, |
|
"learning_rate": 0.000304263690299507, |
|
"loss": 0.3903, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.8705315799222079, |
|
"grad_norm": 0.1876698563670142, |
|
"learning_rate": 0.0003035491443007442, |
|
"loss": 0.3813, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.8723837747731061, |
|
"grad_norm": 0.23125086361592628, |
|
"learning_rate": 0.0003028341395164513, |
|
"loss": 0.326, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.8742359696240044, |
|
"grad_norm": 0.24526039999109062, |
|
"learning_rate": 0.0003021186820724752, |
|
"loss": 0.3818, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.8760881644749028, |
|
"grad_norm": 0.23276472003991475, |
|
"learning_rate": 0.0003014027780985406, |
|
"loss": 0.3286, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.8779403593258011, |
|
"grad_norm": 0.2879683324317072, |
|
"learning_rate": 0.00030068643372819804, |
|
"loss": 0.3563, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.8797925541766994, |
|
"grad_norm": 0.19871362889489913, |
|
"learning_rate": 0.0002999696550987713, |
|
"loss": 0.3271, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.8816447490275977, |
|
"grad_norm": 0.2749990294223314, |
|
"learning_rate": 0.00029925244835130466, |
|
"loss": 0.36, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.883496943878496, |
|
"grad_norm": 0.19581874215709116, |
|
"learning_rate": 0.00029853481963051015, |
|
"loss": 0.3869, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.8853491387293944, |
|
"grad_norm": 0.25690630291268424, |
|
"learning_rate": 0.0002978167750847153, |
|
"loss": 0.3291, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.8872013335802926, |
|
"grad_norm": 0.23380636858065187, |
|
"learning_rate": 0.0002970983208658101, |
|
"loss": 0.3148, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.889053528431191, |
|
"grad_norm": 0.27392706669357925, |
|
"learning_rate": 0.00029637946312919443, |
|
"loss": 0.3471, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.8909057232820893, |
|
"grad_norm": 0.262683330886347, |
|
"learning_rate": 0.00029566020803372544, |
|
"loss": 0.3581, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.8927579181329875, |
|
"grad_norm": 0.1967433279025824, |
|
"learning_rate": 0.0002949405617416647, |
|
"loss": 0.3244, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.8946101129838859, |
|
"grad_norm": 0.21893101415992228, |
|
"learning_rate": 0.00029422053041862524, |
|
"loss": 0.2418, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.8964623078347842, |
|
"grad_norm": 0.3050479264269311, |
|
"learning_rate": 0.000293500120233519, |
|
"loss": 0.3154, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.8983145026856826, |
|
"grad_norm": 0.22098931345400527, |
|
"learning_rate": 0.00029277933735850366, |
|
"loss": 0.3875, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.9001666975365809, |
|
"grad_norm": 0.18665489074313069, |
|
"learning_rate": 0.0002920581879689302, |
|
"loss": 0.3203, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.9020188923874791, |
|
"grad_norm": 0.22546452927540434, |
|
"learning_rate": 0.00029133667824328944, |
|
"loss": 0.3174, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.9038710872383775, |
|
"grad_norm": 0.273911749633942, |
|
"learning_rate": 0.0002906148143631597, |
|
"loss": 0.4109, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.9057232820892758, |
|
"grad_norm": 0.2862382822755954, |
|
"learning_rate": 0.0002898926025131534, |
|
"loss": 0.3438, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.9075754769401742, |
|
"grad_norm": 0.2256784413424552, |
|
"learning_rate": 0.0002891700488808641, |
|
"loss": 0.4231, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.9094276717910724, |
|
"grad_norm": 0.25475613390595164, |
|
"learning_rate": 0.0002884471596568138, |
|
"loss": 0.311, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.9112798666419707, |
|
"grad_norm": 0.22040988223176197, |
|
"learning_rate": 0.0002877239410343995, |
|
"loss": 0.3609, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.9131320614928691, |
|
"grad_norm": 0.21405974357001087, |
|
"learning_rate": 0.0002870003992098406, |
|
"loss": 0.3199, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.9149842563437673, |
|
"grad_norm": 0.22165830710412393, |
|
"learning_rate": 0.00028627654038212535, |
|
"loss": 0.2932, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.9168364511946657, |
|
"grad_norm": 0.2539298146212295, |
|
"learning_rate": 0.000285552370752958, |
|
"loss": 0.3203, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.918688646045564, |
|
"grad_norm": 0.2519284526672049, |
|
"learning_rate": 0.0002848278965267057, |
|
"loss": 0.299, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.9205408408964623, |
|
"grad_norm": 0.21558726442907455, |
|
"learning_rate": 0.000284103123910345, |
|
"loss": 0.3227, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.9223930357473606, |
|
"grad_norm": 0.2314909389156984, |
|
"learning_rate": 0.00028337805911340914, |
|
"loss": 0.3018, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.9242452305982589, |
|
"grad_norm": 0.278811225532839, |
|
"learning_rate": 0.00028265270834793466, |
|
"loss": 0.3002, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.9260974254491573, |
|
"grad_norm": 0.21464467115282912, |
|
"learning_rate": 0.0002819270778284081, |
|
"loss": 0.2984, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9279496203000556, |
|
"grad_norm": 0.21949485740442687, |
|
"learning_rate": 0.0002812011737717127, |
|
"loss": 0.3034, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.9298018151509538, |
|
"grad_norm": 0.22922734336855702, |
|
"learning_rate": 0.0002804750023970753, |
|
"loss": 0.3648, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.9316540100018522, |
|
"grad_norm": 0.2807666058464406, |
|
"learning_rate": 0.00027974856992601314, |
|
"loss": 0.347, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.9335062048527505, |
|
"grad_norm": 0.21380147064458355, |
|
"learning_rate": 0.00027902188258228033, |
|
"loss": 0.2868, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.9353583997036489, |
|
"grad_norm": 0.23226632039182726, |
|
"learning_rate": 0.00027829494659181454, |
|
"loss": 0.3373, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.9372105945545471, |
|
"grad_norm": 0.16664382791007723, |
|
"learning_rate": 0.0002775677681826838, |
|
"loss": 0.3425, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.9390627894054454, |
|
"grad_norm": 0.2131603970341897, |
|
"learning_rate": 0.00027684035358503315, |
|
"loss": 0.356, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.9409149842563438, |
|
"grad_norm": 0.2943760673928641, |
|
"learning_rate": 0.00027611270903103095, |
|
"loss": 0.3573, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.9427671791072421, |
|
"grad_norm": 0.2862566121817152, |
|
"learning_rate": 0.00027538484075481613, |
|
"loss": 0.4255, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.9446193739581404, |
|
"grad_norm": 0.231901510250299, |
|
"learning_rate": 0.00027465675499244396, |
|
"loss": 0.3407, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.9464715688090387, |
|
"grad_norm": 0.2476530639942114, |
|
"learning_rate": 0.0002739284579818333, |
|
"loss": 0.2723, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.948323763659937, |
|
"grad_norm": 0.21350073532203115, |
|
"learning_rate": 0.0002731999559627127, |
|
"loss": 0.3461, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.9501759585108354, |
|
"grad_norm": 0.2002031483905575, |
|
"learning_rate": 0.0002724712551765673, |
|
"loss": 0.3514, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.9520281533617336, |
|
"grad_norm": 0.2370797517823577, |
|
"learning_rate": 0.00027174236186658515, |
|
"loss": 0.3378, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.953880348212632, |
|
"grad_norm": 0.21585863872901473, |
|
"learning_rate": 0.0002710132822776037, |
|
"loss": 0.3321, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.9557325430635303, |
|
"grad_norm": 0.26386608394124156, |
|
"learning_rate": 0.0002702840226560564, |
|
"loss": 0.3436, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.9575847379144286, |
|
"grad_norm": 0.2890408109766508, |
|
"learning_rate": 0.00026955458924991923, |
|
"loss": 0.401, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.9594369327653269, |
|
"grad_norm": 0.25751071532225056, |
|
"learning_rate": 0.00026882498830865673, |
|
"loss": 0.3359, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.9612891276162252, |
|
"grad_norm": 0.1908489549011557, |
|
"learning_rate": 0.00026809522608316926, |
|
"loss": 0.3446, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.9631413224671236, |
|
"grad_norm": 0.2654943827624779, |
|
"learning_rate": 0.0002673653088257388, |
|
"loss": 0.3226, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.9649935173180219, |
|
"grad_norm": 0.2090532023246876, |
|
"learning_rate": 0.00026663524278997534, |
|
"loss": 0.3627, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.9668457121689201, |
|
"grad_norm": 0.1928560578254249, |
|
"learning_rate": 0.00026590503423076404, |
|
"loss": 0.3829, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.9686979070198185, |
|
"grad_norm": 0.2669070196379663, |
|
"learning_rate": 0.0002651746894042108, |
|
"loss": 0.3034, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.9705501018707168, |
|
"grad_norm": 0.30560885950305455, |
|
"learning_rate": 0.00026444421456758887, |
|
"loss": 0.3662, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.9724022967216152, |
|
"grad_norm": 0.26179376779317864, |
|
"learning_rate": 0.00026371361597928586, |
|
"loss": 0.3277, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.9742544915725134, |
|
"grad_norm": 0.22773579499385666, |
|
"learning_rate": 0.0002629828998987491, |
|
"loss": 0.3227, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.9761066864234117, |
|
"grad_norm": 0.22913911318822955, |
|
"learning_rate": 0.0002622520725864328, |
|
"loss": 0.4155, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.9779588812743101, |
|
"grad_norm": 0.26745430474124415, |
|
"learning_rate": 0.0002615211403037441, |
|
"loss": 0.3134, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.9798110761252083, |
|
"grad_norm": 0.18747224024104983, |
|
"learning_rate": 0.00026079010931298965, |
|
"loss": 0.3352, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.9816632709761067, |
|
"grad_norm": 0.2507770069072283, |
|
"learning_rate": 0.0002600589858773216, |
|
"loss": 0.2841, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.983515465827005, |
|
"grad_norm": 0.2320843718590129, |
|
"learning_rate": 0.00025932777626068405, |
|
"loss": 0.2901, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.9853676606779033, |
|
"grad_norm": 0.25694442462488337, |
|
"learning_rate": 0.0002585964867277597, |
|
"loss": 0.3655, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.9872198555288016, |
|
"grad_norm": 0.1946752572256077, |
|
"learning_rate": 0.00025786512354391585, |
|
"loss": 0.3399, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.9890720503796999, |
|
"grad_norm": 0.1531862751587864, |
|
"learning_rate": 0.00025713369297515056, |
|
"loss": 0.3309, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.9909242452305983, |
|
"grad_norm": 0.23979500779092153, |
|
"learning_rate": 0.00025640220128803965, |
|
"loss": 0.3476, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.9927764400814966, |
|
"grad_norm": 0.22955793113305528, |
|
"learning_rate": 0.00025567065474968226, |
|
"loss": 0.34, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.9946286349323948, |
|
"grad_norm": 0.26774128565687644, |
|
"learning_rate": 0.00025501222114748204, |
|
"loss": 0.3265, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.9964808297832932, |
|
"grad_norm": 0.2331087333203837, |
|
"learning_rate": 0.00025428058765925466, |
|
"loss": 0.2761, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.9983330246341915, |
|
"grad_norm": 0.24526043917044132, |
|
"learning_rate": 0.00025354891749683386, |
|
"loss": 0.3495, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.0001852194850898, |
|
"grad_norm": 0.2031173709527516, |
|
"learning_rate": 0.0002528172169288478, |
|
"loss": 0.3272, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.0020374143359883, |
|
"grad_norm": 0.2229851857312578, |
|
"learning_rate": 0.0002520854922241855, |
|
"loss": 0.2226, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.0038896091868865, |
|
"grad_norm": 0.23237399050753563, |
|
"learning_rate": 0.0002513537496519425, |
|
"loss": 0.2502, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.0057418040377848, |
|
"grad_norm": 0.22482059046916258, |
|
"learning_rate": 0.00025062199548136767, |
|
"loss": 0.2567, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.007593998888683, |
|
"grad_norm": 0.19384034239788644, |
|
"learning_rate": 0.00024989023598180886, |
|
"loss": 0.231, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.0094461937395813, |
|
"grad_norm": 0.18371330112888887, |
|
"learning_rate": 0.0002491584774226599, |
|
"loss": 0.2927, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.0112983885904798, |
|
"grad_norm": 0.21546778676484551, |
|
"learning_rate": 0.0002484267260733065, |
|
"loss": 0.265, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.013150583441378, |
|
"grad_norm": 0.14298891444963896, |
|
"learning_rate": 0.0002476949882030726, |
|
"loss": 0.2211, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.0150027782922764, |
|
"grad_norm": 0.25187217178584165, |
|
"learning_rate": 0.0002469632700811665, |
|
"loss": 0.2581, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.0168549731431746, |
|
"grad_norm": 0.31946252092124755, |
|
"learning_rate": 0.00024623157797662757, |
|
"loss": 0.2171, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.018707167994073, |
|
"grad_norm": 0.20257626106772428, |
|
"learning_rate": 0.000245499918158272, |
|
"loss": 0.21, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.0205593628449714, |
|
"grad_norm": 0.30792020448282925, |
|
"learning_rate": 0.00024476829689463965, |
|
"loss": 0.2199, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.0224115576958697, |
|
"grad_norm": 0.2359106076314458, |
|
"learning_rate": 0.0002440367204539398, |
|
"loss": 0.2221, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.024263752546768, |
|
"grad_norm": 0.2642461112213505, |
|
"learning_rate": 0.00024330519510399774, |
|
"loss": 0.287, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.0261159473976662, |
|
"grad_norm": 0.25013845200803386, |
|
"learning_rate": 0.00024257372711220134, |
|
"loss": 0.2578, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.0279681422485645, |
|
"grad_norm": 0.26551429905341034, |
|
"learning_rate": 0.00024184232274544672, |
|
"loss": 0.2509, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.029820337099463, |
|
"grad_norm": 0.2070332092773878, |
|
"learning_rate": 0.00024111098827008494, |
|
"loss": 0.2202, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.0316725319503612, |
|
"grad_norm": 0.21040587853785286, |
|
"learning_rate": 0.00024037972995186838, |
|
"loss": 0.2858, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.0335247268012595, |
|
"grad_norm": 0.21864583485000008, |
|
"learning_rate": 0.00023964855405589689, |
|
"loss": 0.2114, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.0353769216521578, |
|
"grad_norm": 0.21646010024279735, |
|
"learning_rate": 0.00023891746684656412, |
|
"loss": 0.2519, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 1.037229116503056, |
|
"grad_norm": 0.31512168932825474, |
|
"learning_rate": 0.00023818647458750388, |
|
"loss": 0.2967, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.0390813113539545, |
|
"grad_norm": 0.20525167225456686, |
|
"learning_rate": 0.00023745558354153654, |
|
"loss": 0.2591, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 1.0409335062048528, |
|
"grad_norm": 0.23384175420672978, |
|
"learning_rate": 0.0002367247999706154, |
|
"loss": 0.2236, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.042785701055751, |
|
"grad_norm": 0.24586451573414675, |
|
"learning_rate": 0.00023599413013577277, |
|
"loss": 0.2807, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 1.0446378959066493, |
|
"grad_norm": 0.31412889304572406, |
|
"learning_rate": 0.00023526358029706665, |
|
"loss": 0.2676, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.0464900907575476, |
|
"grad_norm": 0.157853905207218, |
|
"learning_rate": 0.00023453315671352693, |
|
"loss": 0.2769, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.0483422856084461, |
|
"grad_norm": 0.2229105615382073, |
|
"learning_rate": 0.00023380286564310176, |
|
"loss": 0.2735, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.0501944804593444, |
|
"grad_norm": 0.26127473765870846, |
|
"learning_rate": 0.0002330727133426041, |
|
"loss": 0.3007, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 1.0520466753102427, |
|
"grad_norm": 0.3906751493250249, |
|
"learning_rate": 0.00023234270606765778, |
|
"loss": 0.2809, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.053898870161141, |
|
"grad_norm": 0.2398049248934978, |
|
"learning_rate": 0.00023161285007264446, |
|
"loss": 0.2144, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 1.0557510650120392, |
|
"grad_norm": 0.24411940105501112, |
|
"learning_rate": 0.0002308831516106494, |
|
"loss": 0.223, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.0576032598629377, |
|
"grad_norm": 0.2547297157594742, |
|
"learning_rate": 0.0002301536169334082, |
|
"loss": 0.2458, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 1.059455454713836, |
|
"grad_norm": 0.18393906015457895, |
|
"learning_rate": 0.00022942425229125328, |
|
"loss": 0.248, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.0613076495647342, |
|
"grad_norm": 0.24279551434371524, |
|
"learning_rate": 0.0002286950639330604, |
|
"loss": 0.2709, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.0631598444156325, |
|
"grad_norm": 0.23381376758753333, |
|
"learning_rate": 0.00022796605810619487, |
|
"loss": 0.2361, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.0650120392665308, |
|
"grad_norm": 0.24452694586413046, |
|
"learning_rate": 0.00022723724105645814, |
|
"loss": 0.2076, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.0668642341174293, |
|
"grad_norm": 0.30441717560616044, |
|
"learning_rate": 0.00022650861902803426, |
|
"loss": 0.2922, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.0687164289683275, |
|
"grad_norm": 0.2588550928583629, |
|
"learning_rate": 0.00022578019826343656, |
|
"loss": 0.2687, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.0705686238192258, |
|
"grad_norm": 0.17900093913620954, |
|
"learning_rate": 0.00022505198500345403, |
|
"loss": 0.2467, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.072420818670124, |
|
"grad_norm": 0.2492431472220246, |
|
"learning_rate": 0.00022432398548709767, |
|
"loss": 0.2938, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.0742730135210223, |
|
"grad_norm": 0.21358503411722063, |
|
"learning_rate": 0.00022359620595154743, |
|
"loss": 0.2038, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.0761252083719208, |
|
"grad_norm": 0.28309019763963955, |
|
"learning_rate": 0.00022286865263209833, |
|
"loss": 0.2905, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.077977403222819, |
|
"grad_norm": 0.21729388154855128, |
|
"learning_rate": 0.00022214133176210756, |
|
"loss": 0.226, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.0798295980737174, |
|
"grad_norm": 0.18775475682209616, |
|
"learning_rate": 0.0002214142495729405, |
|
"loss": 0.2762, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.0816817929246156, |
|
"grad_norm": 0.19069211253783463, |
|
"learning_rate": 0.00022068741229391777, |
|
"loss": 0.2256, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.083533987775514, |
|
"grad_norm": 0.25813186890444373, |
|
"learning_rate": 0.00021996082615226176, |
|
"loss": 0.2409, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.0853861826264124, |
|
"grad_norm": 0.19945938160620094, |
|
"learning_rate": 0.00021923449737304312, |
|
"loss": 0.2536, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.0872383774773107, |
|
"grad_norm": 0.25882839571818395, |
|
"learning_rate": 0.00021850843217912757, |
|
"loss": 0.277, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.089090572328209, |
|
"grad_norm": 0.3164832568487736, |
|
"learning_rate": 0.0002177826367911225, |
|
"loss": 0.2705, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.0909427671791072, |
|
"grad_norm": 0.26233993949922385, |
|
"learning_rate": 0.0002170571174273238, |
|
"loss": 0.2524, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.0927949620300055, |
|
"grad_norm": 0.21974259388964484, |
|
"learning_rate": 0.0002163318803036624, |
|
"loss": 0.2304, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.094647156880904, |
|
"grad_norm": 0.2423119808479642, |
|
"learning_rate": 0.00021560693163365127, |
|
"loss": 0.2864, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 1.0964993517318022, |
|
"grad_norm": 0.23788077135736266, |
|
"learning_rate": 0.00021488227762833187, |
|
"loss": 0.223, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.0983515465827005, |
|
"grad_norm": 0.2626939992945942, |
|
"learning_rate": 0.00021415792449622128, |
|
"loss": 0.2174, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 1.1002037414335988, |
|
"grad_norm": 0.15991056421689562, |
|
"learning_rate": 0.0002134338784432587, |
|
"loss": 0.2381, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.102055936284497, |
|
"grad_norm": 0.20700833727267778, |
|
"learning_rate": 0.00021271014567275239, |
|
"loss": 0.2646, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.1039081311353955, |
|
"grad_norm": 0.3351339504582773, |
|
"learning_rate": 0.00021198673238532665, |
|
"loss": 0.2484, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.1057603259862938, |
|
"grad_norm": 0.25621425870572345, |
|
"learning_rate": 0.00021126364477886848, |
|
"loss": 0.2078, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 1.107612520837192, |
|
"grad_norm": 0.23131050803651781, |
|
"learning_rate": 0.00021054088904847476, |
|
"loss": 0.2254, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.1094647156880904, |
|
"grad_norm": 0.18439721493846953, |
|
"learning_rate": 0.0002098184713863987, |
|
"loss": 0.2095, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 1.1113169105389886, |
|
"grad_norm": 0.2388500241914586, |
|
"learning_rate": 0.00020909639798199754, |
|
"loss": 0.2091, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1131691053898871, |
|
"grad_norm": 0.21529124736985356, |
|
"learning_rate": 0.00020837467502167868, |
|
"loss": 0.2167, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 1.1150213002407854, |
|
"grad_norm": 0.16618163554721885, |
|
"learning_rate": 0.0002076533086888472, |
|
"loss": 0.2104, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.1168734950916837, |
|
"grad_norm": 0.33925928207566014, |
|
"learning_rate": 0.00020693230516385266, |
|
"loss": 0.2119, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 1.118725689942582, |
|
"grad_norm": 0.1826830206402772, |
|
"learning_rate": 0.0002062116706239365, |
|
"loss": 0.2462, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.1205778847934802, |
|
"grad_norm": 0.19046785383617137, |
|
"learning_rate": 0.00020549141124317865, |
|
"loss": 0.2117, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.1224300796443787, |
|
"grad_norm": 0.24622926500228018, |
|
"learning_rate": 0.00020477153319244478, |
|
"loss": 0.227, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.124282274495277, |
|
"grad_norm": 0.2165508639382145, |
|
"learning_rate": 0.00020405204263933375, |
|
"loss": 0.2638, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 1.1261344693461752, |
|
"grad_norm": 0.23498687913366198, |
|
"learning_rate": 0.00020333294574812415, |
|
"loss": 0.2281, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.1279866641970735, |
|
"grad_norm": 0.19311160739289338, |
|
"learning_rate": 0.00020261424867972226, |
|
"loss": 0.2159, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 1.1298388590479718, |
|
"grad_norm": 0.20569897318234276, |
|
"learning_rate": 0.00020189595759160855, |
|
"loss": 0.2557, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.1316910538988703, |
|
"grad_norm": 0.1637570670386419, |
|
"learning_rate": 0.00020117807863778537, |
|
"loss": 0.2231, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 1.1335432487497685, |
|
"grad_norm": 0.26014467806402464, |
|
"learning_rate": 0.000200460617968724, |
|
"loss": 0.286, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.1353954436006668, |
|
"grad_norm": 0.2505673154655342, |
|
"learning_rate": 0.00019974358173131202, |
|
"loss": 0.2853, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 1.137247638451565, |
|
"grad_norm": 0.22347929448158552, |
|
"learning_rate": 0.00019902697606880089, |
|
"loss": 0.2677, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.1390998333024633, |
|
"grad_norm": 0.20920726669707854, |
|
"learning_rate": 0.00019831080712075268, |
|
"loss": 0.244, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.1409520281533618, |
|
"grad_norm": 0.20688915094296348, |
|
"learning_rate": 0.00019759508102298846, |
|
"loss": 0.2327, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.14280422300426, |
|
"grad_norm": 0.25157909739969075, |
|
"learning_rate": 0.00019687980390753465, |
|
"loss": 0.2485, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.1446564178551584, |
|
"grad_norm": 0.23866241222091628, |
|
"learning_rate": 0.00019616498190257121, |
|
"loss": 0.2492, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.1465086127060566, |
|
"grad_norm": 0.264337208089594, |
|
"learning_rate": 0.00019545062113237875, |
|
"loss": 0.2758, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 1.148360807556955, |
|
"grad_norm": 0.25587094035952673, |
|
"learning_rate": 0.00019473672771728648, |
|
"loss": 0.2129, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.1502130024078534, |
|
"grad_norm": 0.16128043145453166, |
|
"learning_rate": 0.00019402330777361934, |
|
"loss": 0.2231, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 1.1520651972587517, |
|
"grad_norm": 0.233999456400375, |
|
"learning_rate": 0.0001933103674136458, |
|
"loss": 0.2443, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.15391739210965, |
|
"grad_norm": 0.23923089697365066, |
|
"learning_rate": 0.00019259791274552548, |
|
"loss": 0.2532, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 1.1557695869605482, |
|
"grad_norm": 0.18310940478929233, |
|
"learning_rate": 0.00019188594987325675, |
|
"loss": 0.2084, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.1576217818114465, |
|
"grad_norm": 0.20715212646569164, |
|
"learning_rate": 0.00019117448489662468, |
|
"loss": 0.2315, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.159473976662345, |
|
"grad_norm": 0.16666508872746613, |
|
"learning_rate": 0.00019046352391114836, |
|
"loss": 0.2214, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.1613261715132432, |
|
"grad_norm": 0.19036221587749683, |
|
"learning_rate": 0.000189753073008029, |
|
"loss": 0.2011, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 1.1631783663641415, |
|
"grad_norm": 0.18630573209584733, |
|
"learning_rate": 0.00018904313827409764, |
|
"loss": 0.2081, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.1650305612150398, |
|
"grad_norm": 0.20378341723916718, |
|
"learning_rate": 0.0001883337257917631, |
|
"loss": 0.2573, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 1.166882756065938, |
|
"grad_norm": 0.24764507328618723, |
|
"learning_rate": 0.00018762484163895962, |
|
"loss": 0.2245, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.1687349509168365, |
|
"grad_norm": 0.2536985360849042, |
|
"learning_rate": 0.00018691649188909494, |
|
"loss": 0.2427, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 1.1705871457677348, |
|
"grad_norm": 0.22553827575055346, |
|
"learning_rate": 0.00018620868261099856, |
|
"loss": 0.2556, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.172439340618633, |
|
"grad_norm": 0.238267227934858, |
|
"learning_rate": 0.00018550141986886914, |
|
"loss": 0.2079, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 1.1742915354695314, |
|
"grad_norm": 0.24364164673526545, |
|
"learning_rate": 0.00018479470972222295, |
|
"loss": 0.2377, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.1761437303204296, |
|
"grad_norm": 0.23684110576656128, |
|
"learning_rate": 0.00018408855822584186, |
|
"loss": 0.2106, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.1779959251713281, |
|
"grad_norm": 0.24133180260347029, |
|
"learning_rate": 0.0001833829714297216, |
|
"loss": 0.2325, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.1798481200222264, |
|
"grad_norm": 0.27161152313481657, |
|
"learning_rate": 0.0001826779553790196, |
|
"loss": 0.2816, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 1.1817003148731247, |
|
"grad_norm": 0.2549979606684111, |
|
"learning_rate": 0.0001819735161140035, |
|
"loss": 0.2716, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.183552509724023, |
|
"grad_norm": 0.2171602609914945, |
|
"learning_rate": 0.0001812696596699992, |
|
"loss": 0.1919, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 1.1854047045749212, |
|
"grad_norm": 0.2426365201904578, |
|
"learning_rate": 0.00018056639207733943, |
|
"loss": 0.1937, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.1872568994258197, |
|
"grad_norm": 0.23103167647591963, |
|
"learning_rate": 0.0001798637193613118, |
|
"loss": 0.2212, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 1.189109094276718, |
|
"grad_norm": 0.18152043318271277, |
|
"learning_rate": 0.00017916164754210723, |
|
"loss": 0.2525, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.1909612891276162, |
|
"grad_norm": 0.2404169525253988, |
|
"learning_rate": 0.00017846018263476844, |
|
"loss": 0.2365, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 1.1928134839785145, |
|
"grad_norm": 0.2527427714001698, |
|
"learning_rate": 0.00017775933064913838, |
|
"loss": 0.2382, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.1946656788294128, |
|
"grad_norm": 0.2504119633783523, |
|
"learning_rate": 0.0001770590975898089, |
|
"loss": 0.2435, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.1965178736803113, |
|
"grad_norm": 0.21122876356534948, |
|
"learning_rate": 0.0001763594894560689, |
|
"loss": 0.2182, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.1983700685312095, |
|
"grad_norm": 0.17197814060082, |
|
"learning_rate": 0.00017566051224185357, |
|
"loss": 0.2316, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 1.2002222633821078, |
|
"grad_norm": 0.2261749683499797, |
|
"learning_rate": 0.0001749621719356923, |
|
"loss": 0.2834, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.202074458233006, |
|
"grad_norm": 0.18709901189179085, |
|
"learning_rate": 0.00017426447452065786, |
|
"loss": 0.2329, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 1.2039266530839043, |
|
"grad_norm": 0.22261464085835025, |
|
"learning_rate": 0.00017356742597431503, |
|
"loss": 0.2294, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2057788479348028, |
|
"grad_norm": 0.1562966068716981, |
|
"learning_rate": 0.0001728710322686694, |
|
"loss": 0.2676, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 1.207631042785701, |
|
"grad_norm": 0.20080366502853164, |
|
"learning_rate": 0.00017217529937011612, |
|
"loss": 0.2034, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.2094832376365994, |
|
"grad_norm": 0.2488017093046758, |
|
"learning_rate": 0.00017148023323938877, |
|
"loss": 0.2576, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 1.2113354324874976, |
|
"grad_norm": 0.3018899089016778, |
|
"learning_rate": 0.00017078583983150852, |
|
"loss": 0.2521, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.213187627338396, |
|
"grad_norm": 0.21650035591018305, |
|
"learning_rate": 0.00017009212509573273, |
|
"loss": 0.1992, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.2150398221892944, |
|
"grad_norm": 0.18604059543117943, |
|
"learning_rate": 0.00016939909497550455, |
|
"loss": 0.2145, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.2168920170401927, |
|
"grad_norm": 0.13425561299908903, |
|
"learning_rate": 0.0001687067554084012, |
|
"loss": 0.2121, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 1.218744211891091, |
|
"grad_norm": 0.15061326471247105, |
|
"learning_rate": 0.00016801511232608388, |
|
"loss": 0.2093, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.2205964067419892, |
|
"grad_norm": 0.18586921295904735, |
|
"learning_rate": 0.00016732417165424645, |
|
"loss": 0.2442, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 1.2224486015928875, |
|
"grad_norm": 0.1947265751683096, |
|
"learning_rate": 0.00016663393931256484, |
|
"loss": 0.1964, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.224300796443786, |
|
"grad_norm": 0.3014541141949089, |
|
"learning_rate": 0.00016594442121464648, |
|
"loss": 0.2539, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 1.2261529912946842, |
|
"grad_norm": 0.2665331923593494, |
|
"learning_rate": 0.00016525562326797911, |
|
"loss": 0.2052, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.2280051861455825, |
|
"grad_norm": 0.23248425733346062, |
|
"learning_rate": 0.00016456755137388105, |
|
"loss": 0.2206, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 1.2298573809964808, |
|
"grad_norm": 0.21597100541187533, |
|
"learning_rate": 0.0001638802114274497, |
|
"loss": 0.2399, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.231709575847379, |
|
"grad_norm": 0.22311107620019674, |
|
"learning_rate": 0.0001631936093175116, |
|
"loss": 0.2344, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.2335617706982775, |
|
"grad_norm": 0.23595231727324342, |
|
"learning_rate": 0.0001625077509265717, |
|
"loss": 0.2302, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.2354139655491758, |
|
"grad_norm": 0.18416586445656416, |
|
"learning_rate": 0.0001618226421307635, |
|
"loss": 0.2438, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 1.237266160400074, |
|
"grad_norm": 0.2397024652142972, |
|
"learning_rate": 0.00016113828879979776, |
|
"loss": 0.2174, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.2391183552509724, |
|
"grad_norm": 0.2458273041744814, |
|
"learning_rate": 0.00016045469679691306, |
|
"loss": 0.2649, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 1.2409705501018706, |
|
"grad_norm": 0.24261819790944433, |
|
"learning_rate": 0.00015977187197882529, |
|
"loss": 0.2353, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.2428227449527691, |
|
"grad_norm": 0.21058758451619233, |
|
"learning_rate": 0.0001590898201956772, |
|
"loss": 0.2517, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 1.2446749398036674, |
|
"grad_norm": 0.2260538599044833, |
|
"learning_rate": 0.0001584085472909888, |
|
"loss": 0.2425, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.2465271346545657, |
|
"grad_norm": 0.2973826520271178, |
|
"learning_rate": 0.0001577280591016068, |
|
"loss": 0.2344, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 1.248379329505464, |
|
"grad_norm": 0.17773144739281946, |
|
"learning_rate": 0.0001570483614576549, |
|
"loss": 0.237, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.2502315243563622, |
|
"grad_norm": 0.24361822775457953, |
|
"learning_rate": 0.0001563694601824837, |
|
"loss": 0.2208, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.2520837192072607, |
|
"grad_norm": 0.19831921681917936, |
|
"learning_rate": 0.000155691361092621, |
|
"loss": 0.2447, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.253935914058159, |
|
"grad_norm": 0.2429000368973823, |
|
"learning_rate": 0.00015501406999772154, |
|
"loss": 0.2525, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 1.2557881089090572, |
|
"grad_norm": 0.2833773062005256, |
|
"learning_rate": 0.000154337592700518, |
|
"loss": 0.2699, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.2576403037599555, |
|
"grad_norm": 0.28456822568540374, |
|
"learning_rate": 0.00015366193499677036, |
|
"loss": 0.2871, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 1.2594924986108538, |
|
"grad_norm": 0.22620507444223148, |
|
"learning_rate": 0.00015298710267521682, |
|
"loss": 0.2287, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.2613446934617523, |
|
"grad_norm": 0.28690671723743605, |
|
"learning_rate": 0.00015231310151752407, |
|
"loss": 0.2882, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 1.2631968883126505, |
|
"grad_norm": 0.3475884413325309, |
|
"learning_rate": 0.0001516399372982377, |
|
"loss": 0.2293, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.2650490831635488, |
|
"grad_norm": 0.2072556191346626, |
|
"learning_rate": 0.000150967615784733, |
|
"loss": 0.2185, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 1.266901278014447, |
|
"grad_norm": 0.21644887901267165, |
|
"learning_rate": 0.00015029614273716506, |
|
"loss": 0.2664, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.2687534728653453, |
|
"grad_norm": 0.17990296855165974, |
|
"learning_rate": 0.0001496255239084199, |
|
"loss": 0.2087, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.2706056677162438, |
|
"grad_norm": 0.27058636297908395, |
|
"learning_rate": 0.00014895576504406465, |
|
"loss": 0.1908, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.272457862567142, |
|
"grad_norm": 0.18569390040885966, |
|
"learning_rate": 0.00014828687188229905, |
|
"loss": 0.2416, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 1.2743100574180404, |
|
"grad_norm": 0.29190142926898804, |
|
"learning_rate": 0.00014761885015390568, |
|
"loss": 0.2463, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.2761622522689386, |
|
"grad_norm": 0.17606951118976896, |
|
"learning_rate": 0.000146951705582201, |
|
"loss": 0.2208, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 1.278014447119837, |
|
"grad_norm": 0.17608746275541837, |
|
"learning_rate": 0.00014628544388298642, |
|
"loss": 0.219, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.2798666419707354, |
|
"grad_norm": 0.16242847709515437, |
|
"learning_rate": 0.00014562007076449944, |
|
"loss": 0.2331, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 1.2817188368216337, |
|
"grad_norm": 0.2755204876160437, |
|
"learning_rate": 0.00014495559192736435, |
|
"loss": 0.2291, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.283571031672532, |
|
"grad_norm": 0.20200318254837507, |
|
"learning_rate": 0.00014429201306454364, |
|
"loss": 0.235, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 1.2854232265234302, |
|
"grad_norm": 0.17156079642065042, |
|
"learning_rate": 0.00014362933986128963, |
|
"loss": 0.2182, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.2872754213743285, |
|
"grad_norm": 0.21604115340537886, |
|
"learning_rate": 0.0001429675779950947, |
|
"loss": 0.2471, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.289127616225227, |
|
"grad_norm": 0.187996583890282, |
|
"learning_rate": 0.00014230673313564397, |
|
"loss": 0.2151, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.2909798110761252, |
|
"grad_norm": 0.19730532837034964, |
|
"learning_rate": 0.00014164681094476551, |
|
"loss": 0.2106, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 1.2928320059270235, |
|
"grad_norm": 0.18610760518567895, |
|
"learning_rate": 0.0001409878170763826, |
|
"loss": 0.1997, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.2946842007779218, |
|
"grad_norm": 0.26588737789650624, |
|
"learning_rate": 0.00014032975717646505, |
|
"loss": 0.2779, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 1.29653639562882, |
|
"grad_norm": 0.2023558780876639, |
|
"learning_rate": 0.0001396726368829808, |
|
"loss": 0.1862, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.2983885904797186, |
|
"grad_norm": 0.1911627012671031, |
|
"learning_rate": 0.0001390164618258477, |
|
"loss": 0.2309, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 1.3002407853306168, |
|
"grad_norm": 0.11786773578619021, |
|
"learning_rate": 0.0001383612376268852, |
|
"loss": 0.2342, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.302092980181515, |
|
"grad_norm": 0.28174803457783004, |
|
"learning_rate": 0.00013770696989976616, |
|
"loss": 0.2286, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 1.3039451750324134, |
|
"grad_norm": 0.17826542771264642, |
|
"learning_rate": 0.0001370536642499689, |
|
"loss": 0.1801, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.3057973698833116, |
|
"grad_norm": 0.2244828460772529, |
|
"learning_rate": 0.00013640132627472918, |
|
"loss": 0.2266, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.3076495647342101, |
|
"grad_norm": 0.17076031236762176, |
|
"learning_rate": 0.0001357499615629919, |
|
"loss": 0.2064, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.3095017595851084, |
|
"grad_norm": 0.21153152349490145, |
|
"learning_rate": 0.00013509957569536368, |
|
"loss": 0.2259, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 1.3113539544360067, |
|
"grad_norm": 0.21657797572838655, |
|
"learning_rate": 0.00013445017424406459, |
|
"loss": 0.2174, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.313206149286905, |
|
"grad_norm": 0.19916951980627734, |
|
"learning_rate": 0.00013380176277288098, |
|
"loss": 0.2524, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 1.3150583441378032, |
|
"grad_norm": 0.15608777576271463, |
|
"learning_rate": 0.00013315434683711731, |
|
"loss": 0.2252, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.3169105389887017, |
|
"grad_norm": 0.21137373945091645, |
|
"learning_rate": 0.0001325079319835486, |
|
"loss": 0.2512, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 1.3187627338396, |
|
"grad_norm": 0.28789005617840957, |
|
"learning_rate": 0.00013186252375037332, |
|
"loss": 0.2269, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.3206149286904982, |
|
"grad_norm": 0.20697477426134353, |
|
"learning_rate": 0.0001312181276671654, |
|
"loss": 0.1923, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 1.3224671235413965, |
|
"grad_norm": 0.20780168330103488, |
|
"learning_rate": 0.00013057474925482732, |
|
"loss": 0.2, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.3243193183922948, |
|
"grad_norm": 0.2619781587243672, |
|
"learning_rate": 0.00012993239402554237, |
|
"loss": 0.2418, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.3261715132431933, |
|
"grad_norm": 0.21912577308112016, |
|
"learning_rate": 0.00012929106748272792, |
|
"loss": 0.2187, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.3280237080940915, |
|
"grad_norm": 0.2268912171128973, |
|
"learning_rate": 0.00012865077512098789, |
|
"loss": 0.2028, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 1.3298759029449898, |
|
"grad_norm": 0.21743955397611459, |
|
"learning_rate": 0.0001280115224260658, |
|
"loss": 0.2427, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.331728097795888, |
|
"grad_norm": 0.2738954036709458, |
|
"learning_rate": 0.00012737331487479764, |
|
"loss": 0.2614, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 1.3335802926467863, |
|
"grad_norm": 0.19258917852110208, |
|
"learning_rate": 0.00012673615793506524, |
|
"loss": 0.2099, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.3354324874976848, |
|
"grad_norm": 0.2502839601700166, |
|
"learning_rate": 0.00012610005706574918, |
|
"loss": 0.212, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 1.337284682348583, |
|
"grad_norm": 0.2599916951105217, |
|
"learning_rate": 0.0001254650177166821, |
|
"loss": 0.2124, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.3391368771994814, |
|
"grad_norm": 0.177484083446667, |
|
"learning_rate": 0.00012483104532860204, |
|
"loss": 0.1797, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 1.3409890720503796, |
|
"grad_norm": 0.2826696479487746, |
|
"learning_rate": 0.00012419814533310558, |
|
"loss": 0.2466, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.342841266901278, |
|
"grad_norm": 0.25661668196827314, |
|
"learning_rate": 0.0001235663231526019, |
|
"loss": 0.2332, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.3446934617521764, |
|
"grad_norm": 0.2568941368041713, |
|
"learning_rate": 0.00012293558420026557, |
|
"loss": 0.2523, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.3465456566030747, |
|
"grad_norm": 0.20215212528107282, |
|
"learning_rate": 0.00012230593387999082, |
|
"loss": 0.2352, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 1.348397851453973, |
|
"grad_norm": 0.24815860875352733, |
|
"learning_rate": 0.00012167737758634473, |
|
"loss": 0.2188, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.3502500463048712, |
|
"grad_norm": 0.22038982892081588, |
|
"learning_rate": 0.00012104992070452137, |
|
"loss": 0.2685, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 1.3521022411557695, |
|
"grad_norm": 0.2083445910203971, |
|
"learning_rate": 0.00012042356861029547, |
|
"loss": 0.2328, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.353954436006668, |
|
"grad_norm": 0.20267314146087212, |
|
"learning_rate": 0.00011979832666997642, |
|
"loss": 0.2264, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 1.3558066308575663, |
|
"grad_norm": 0.29234235079551857, |
|
"learning_rate": 0.00011917420024036241, |
|
"loss": 0.24, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.3576588257084645, |
|
"grad_norm": 0.19217333964822353, |
|
"learning_rate": 0.00011855119466869426, |
|
"loss": 0.2551, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 1.3595110205593628, |
|
"grad_norm": 0.18622316897174804, |
|
"learning_rate": 0.00011792931529260992, |
|
"loss": 0.2383, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.361363215410261, |
|
"grad_norm": 0.2639171890597442, |
|
"learning_rate": 0.00011730856744009846, |
|
"loss": 0.2447, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.3632154102611596, |
|
"grad_norm": 0.24703406547971726, |
|
"learning_rate": 0.0001166889564294546, |
|
"loss": 0.1885, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.3650676051120578, |
|
"grad_norm": 0.2395087018493502, |
|
"learning_rate": 0.00011607048756923327, |
|
"loss": 0.2408, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 1.366919799962956, |
|
"grad_norm": 0.1715869085136323, |
|
"learning_rate": 0.00011551484651328101, |
|
"loss": 0.2231, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.3687719948138544, |
|
"grad_norm": 0.24875690978651382, |
|
"learning_rate": 0.0001148985623288476, |
|
"loss": 0.2107, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 1.3706241896647526, |
|
"grad_norm": 0.21621060634153644, |
|
"learning_rate": 0.00011428343563414629, |
|
"loss": 0.2827, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.3724763845156511, |
|
"grad_norm": 0.17411298598721778, |
|
"learning_rate": 0.00011366947169931222, |
|
"loss": 0.1956, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 1.3743285793665494, |
|
"grad_norm": 0.21075418595890044, |
|
"learning_rate": 0.00011305667578451847, |
|
"loss": 0.2384, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.3761807742174477, |
|
"grad_norm": 0.1762011368192225, |
|
"learning_rate": 0.00011244505313993115, |
|
"loss": 0.2248, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 1.378032969068346, |
|
"grad_norm": 0.2713344050149392, |
|
"learning_rate": 0.00011183460900566405, |
|
"loss": 0.2253, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.3798851639192442, |
|
"grad_norm": 0.13308645120441578, |
|
"learning_rate": 0.00011122534861173444, |
|
"loss": 0.2188, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.3817373587701427, |
|
"grad_norm": 0.26214160905875167, |
|
"learning_rate": 0.00011061727717801745, |
|
"loss": 0.2509, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.383589553621041, |
|
"grad_norm": 0.16725861800168582, |
|
"learning_rate": 0.00011001039991420181, |
|
"loss": 0.2395, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 1.3854417484719392, |
|
"grad_norm": 0.17751505759886393, |
|
"learning_rate": 0.00010940472201974508, |
|
"loss": 0.1914, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.3872939433228375, |
|
"grad_norm": 0.21463454020196815, |
|
"learning_rate": 0.00010880024868382943, |
|
"loss": 0.2086, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 1.3891461381737358, |
|
"grad_norm": 0.2026092509755857, |
|
"learning_rate": 0.00010819698508531659, |
|
"loss": 0.2149, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.3909983330246343, |
|
"grad_norm": 0.16323623074986704, |
|
"learning_rate": 0.00010759493639270387, |
|
"loss": 0.27, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 1.3928505278755325, |
|
"grad_norm": 0.22139846358468115, |
|
"learning_rate": 0.00010705413557727304, |
|
"loss": 0.2054, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.3947027227264308, |
|
"grad_norm": 0.25885865603646047, |
|
"learning_rate": 0.0001064544094077661, |
|
"loss": 0.2037, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 1.396554917577329, |
|
"grad_norm": 0.18312190666440223, |
|
"learning_rate": 0.00010585591307378175, |
|
"loss": 0.2177, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.3984071124282274, |
|
"grad_norm": 0.2452824521308415, |
|
"learning_rate": 0.00010525865170297353, |
|
"loss": 0.2443, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.4002593072791258, |
|
"grad_norm": 0.22491815184492542, |
|
"learning_rate": 0.00010466263041241426, |
|
"loss": 0.2028, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.4021115021300241, |
|
"grad_norm": 0.21626081653727397, |
|
"learning_rate": 0.00010406785430855237, |
|
"loss": 0.1719, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 1.4039636969809224, |
|
"grad_norm": 0.24105946067666537, |
|
"learning_rate": 0.00010347432848716812, |
|
"loss": 0.225, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.4058158918318207, |
|
"grad_norm": 0.23078802018114886, |
|
"learning_rate": 0.00010288205803332975, |
|
"loss": 0.2278, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 1.407668086682719, |
|
"grad_norm": 0.2574880724739788, |
|
"learning_rate": 0.00010229104802135034, |
|
"loss": 0.244, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.4095202815336174, |
|
"grad_norm": 0.24593167284827877, |
|
"learning_rate": 0.00010170130351474377, |
|
"loss": 0.2159, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 1.4113724763845157, |
|
"grad_norm": 0.261530928817991, |
|
"learning_rate": 0.00010111282956618181, |
|
"loss": 0.1827, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.413224671235414, |
|
"grad_norm": 0.19005464332149496, |
|
"learning_rate": 0.0001005256312174505, |
|
"loss": 0.1942, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 1.4150768660863122, |
|
"grad_norm": 0.22377467210489174, |
|
"learning_rate": 9.993971349940717e-05, |
|
"loss": 0.2553, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.4169290609372105, |
|
"grad_norm": 0.21440875435999618, |
|
"learning_rate": 9.935508143193739e-05, |
|
"loss": 0.2169, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.418781255788109, |
|
"grad_norm": 0.22734623733013004, |
|
"learning_rate": 9.877174002391165e-05, |
|
"loss": 0.1859, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.4206334506390073, |
|
"grad_norm": 0.20257954902342695, |
|
"learning_rate": 9.818969427314275e-05, |
|
"loss": 0.208, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 1.4224856454899055, |
|
"grad_norm": 0.23157903079657188, |
|
"learning_rate": 9.760894916634283e-05, |
|
"loss": 0.2136, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.4243378403408038, |
|
"grad_norm": 0.23047953760740483, |
|
"learning_rate": 9.702950967908067e-05, |
|
"loss": 0.2244, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 1.426190035191702, |
|
"grad_norm": 0.1893981494941497, |
|
"learning_rate": 9.645138077573904e-05, |
|
"loss": 0.202, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.4280422300426006, |
|
"grad_norm": 0.1944059258719957, |
|
"learning_rate": 9.587456740947236e-05, |
|
"loss": 0.2395, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 1.4298944248934988, |
|
"grad_norm": 0.19154551462212566, |
|
"learning_rate": 9.529907452216402e-05, |
|
"loss": 0.1877, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.431746619744397, |
|
"grad_norm": 0.25705195721078017, |
|
"learning_rate": 9.472490704438403e-05, |
|
"loss": 0.2439, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 1.4335988145952954, |
|
"grad_norm": 0.27237298997689074, |
|
"learning_rate": 9.4152069895347e-05, |
|
"loss": 0.2269, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.4354510094461936, |
|
"grad_norm": 0.22572015857646327, |
|
"learning_rate": 9.358056798286982e-05, |
|
"loss": 0.1761, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.4373032042970921, |
|
"grad_norm": 0.1681521243481353, |
|
"learning_rate": 9.301040620332962e-05, |
|
"loss": 0.2453, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.4391553991479904, |
|
"grad_norm": 0.20322718308914284, |
|
"learning_rate": 9.244158944162198e-05, |
|
"loss": 0.1995, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 1.4410075939988887, |
|
"grad_norm": 0.17221136952935692, |
|
"learning_rate": 9.187412257111882e-05, |
|
"loss": 0.1991, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.442859788849787, |
|
"grad_norm": 0.23211721231411886, |
|
"learning_rate": 9.130801045362678e-05, |
|
"loss": 0.225, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 1.4447119837006852, |
|
"grad_norm": 0.2557003480049842, |
|
"learning_rate": 9.074325793934582e-05, |
|
"loss": 0.2396, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.4465641785515837, |
|
"grad_norm": 0.2743087049471899, |
|
"learning_rate": 9.017986986682705e-05, |
|
"loss": 0.2622, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 1.448416373402482, |
|
"grad_norm": 0.22044857915056804, |
|
"learning_rate": 8.961785106293202e-05, |
|
"loss": 0.208, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.4502685682533802, |
|
"grad_norm": 0.295975717325647, |
|
"learning_rate": 8.905720634279068e-05, |
|
"loss": 0.2406, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 1.4521207631042785, |
|
"grad_norm": 0.2119255826308734, |
|
"learning_rate": 8.849794050976062e-05, |
|
"loss": 0.1863, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.4539729579551768, |
|
"grad_norm": 0.19120118368025074, |
|
"learning_rate": 8.794005835538558e-05, |
|
"loss": 0.1899, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.4558251528060753, |
|
"grad_norm": 0.20269011463788664, |
|
"learning_rate": 8.738356465935467e-05, |
|
"loss": 0.1887, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.4576773476569735, |
|
"grad_norm": 0.2933956506003441, |
|
"learning_rate": 8.68284641894613e-05, |
|
"loss": 0.1969, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 1.4595295425078718, |
|
"grad_norm": 0.17871898787286603, |
|
"learning_rate": 8.627476170156224e-05, |
|
"loss": 0.2315, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.46138173735877, |
|
"grad_norm": 0.2552476396822797, |
|
"learning_rate": 8.572246193953703e-05, |
|
"loss": 0.2485, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 1.4632339322096684, |
|
"grad_norm": 0.31173163044095015, |
|
"learning_rate": 8.517156963524719e-05, |
|
"loss": 0.1816, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.4650861270605668, |
|
"grad_norm": 0.2158798667093176, |
|
"learning_rate": 8.462208950849598e-05, |
|
"loss": 0.2469, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 1.4669383219114651, |
|
"grad_norm": 0.24218457777393995, |
|
"learning_rate": 8.407402626698751e-05, |
|
"loss": 0.2161, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.4687905167623634, |
|
"grad_norm": 0.1979730263341676, |
|
"learning_rate": 8.352738460628675e-05, |
|
"loss": 0.2037, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 1.4706427116132617, |
|
"grad_norm": 0.2696373926575332, |
|
"learning_rate": 8.298216920977914e-05, |
|
"loss": 0.1691, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.47249490646416, |
|
"grad_norm": 0.25798986555999925, |
|
"learning_rate": 8.243838474863047e-05, |
|
"loss": 0.2285, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.4743471013150584, |
|
"grad_norm": 0.20862952822180633, |
|
"learning_rate": 8.189603588174712e-05, |
|
"loss": 0.2118, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.4761992961659567, |
|
"grad_norm": 0.1750842888641512, |
|
"learning_rate": 8.135512725573574e-05, |
|
"loss": 0.2116, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 1.478051491016855, |
|
"grad_norm": 0.23773871116567313, |
|
"learning_rate": 8.081566350486363e-05, |
|
"loss": 0.1949, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.4799036858677532, |
|
"grad_norm": 0.164420670542161, |
|
"learning_rate": 8.027764925101911e-05, |
|
"loss": 0.209, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 1.4817558807186515, |
|
"grad_norm": 0.21216576721258398, |
|
"learning_rate": 7.974108910367178e-05, |
|
"loss": 0.1966, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.48360807556955, |
|
"grad_norm": 0.2790248976449928, |
|
"learning_rate": 7.920598765983308e-05, |
|
"loss": 0.2063, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 1.4854602704204483, |
|
"grad_norm": 0.29784954052004964, |
|
"learning_rate": 7.867234950401714e-05, |
|
"loss": 0.1589, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.4873124652713465, |
|
"grad_norm": 0.15966925896267653, |
|
"learning_rate": 7.8140179208201e-05, |
|
"loss": 0.2203, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 1.4891646601222448, |
|
"grad_norm": 0.21411813554248801, |
|
"learning_rate": 7.76094813317858e-05, |
|
"loss": 0.191, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.491016854973143, |
|
"grad_norm": 0.16778546998214966, |
|
"learning_rate": 7.708026042155775e-05, |
|
"loss": 0.1972, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.4928690498240416, |
|
"grad_norm": 0.23986270787568656, |
|
"learning_rate": 7.655252101164894e-05, |
|
"loss": 0.2115, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.4947212446749398, |
|
"grad_norm": 0.250339172193944, |
|
"learning_rate": 7.602626762349865e-05, |
|
"loss": 0.2112, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 1.496573439525838, |
|
"grad_norm": 0.18288675343831115, |
|
"learning_rate": 7.55015047658146e-05, |
|
"loss": 0.2316, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.4984256343767364, |
|
"grad_norm": 0.23542544018483225, |
|
"learning_rate": 7.497823693453429e-05, |
|
"loss": 0.2278, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 1.5002778292276346, |
|
"grad_norm": 0.21853735172760996, |
|
"learning_rate": 7.44564686127865e-05, |
|
"loss": 0.2435, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.5021300240785331, |
|
"grad_norm": 0.230876996211439, |
|
"learning_rate": 7.39362042708527e-05, |
|
"loss": 0.2132, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 1.5039822189294314, |
|
"grad_norm": 0.23449285027681627, |
|
"learning_rate": 7.341744836612929e-05, |
|
"loss": 0.2205, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.5058344137803297, |
|
"grad_norm": 0.1770364349318145, |
|
"learning_rate": 7.290020534308883e-05, |
|
"loss": 0.1771, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 1.5076866086312282, |
|
"grad_norm": 0.24440773842340074, |
|
"learning_rate": 7.23844796332421e-05, |
|
"loss": 0.2009, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.5095388034821262, |
|
"grad_norm": 0.19125723562538224, |
|
"learning_rate": 7.187027565510032e-05, |
|
"loss": 0.2214, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.5113909983330247, |
|
"grad_norm": 0.24413160941991816, |
|
"learning_rate": 7.135759781413714e-05, |
|
"loss": 0.2483, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.513243193183923, |
|
"grad_norm": 0.18714126123807273, |
|
"learning_rate": 7.084645050275093e-05, |
|
"loss": 0.1754, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 1.5150953880348212, |
|
"grad_norm": 0.24068172003031482, |
|
"learning_rate": 7.033683810022717e-05, |
|
"loss": 0.2208, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.5169475828857197, |
|
"grad_norm": 0.21118944152545294, |
|
"learning_rate": 6.982876497270093e-05, |
|
"loss": 0.2354, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 1.5187997777366178, |
|
"grad_norm": 0.16304142225648927, |
|
"learning_rate": 6.932223547311948e-05, |
|
"loss": 0.191, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.5206519725875163, |
|
"grad_norm": 0.22402630540204685, |
|
"learning_rate": 6.881725394120483e-05, |
|
"loss": 0.2235, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 1.5225041674384145, |
|
"grad_norm": 0.14686671761669617, |
|
"learning_rate": 6.831382470341674e-05, |
|
"loss": 0.2374, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.5243563622893128, |
|
"grad_norm": 0.1910492658359761, |
|
"learning_rate": 6.781195207291579e-05, |
|
"loss": 0.1912, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 1.5262085571402113, |
|
"grad_norm": 0.285797167185037, |
|
"learning_rate": 6.7311640349526e-05, |
|
"loss": 0.1946, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.5280607519911094, |
|
"grad_norm": 0.24899927517169534, |
|
"learning_rate": 6.681289381969827e-05, |
|
"loss": 0.2437, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.5299129468420078, |
|
"grad_norm": 0.27104957130230045, |
|
"learning_rate": 6.631571675647358e-05, |
|
"loss": 0.2007, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.5317651416929061, |
|
"grad_norm": 0.1836787768149552, |
|
"learning_rate": 6.582011341944661e-05, |
|
"loss": 0.1992, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 1.5336173365438044, |
|
"grad_norm": 0.16592192801262687, |
|
"learning_rate": 6.532608805472884e-05, |
|
"loss": 0.2243, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.5354695313947029, |
|
"grad_norm": 0.19477759718427087, |
|
"learning_rate": 6.483364489491242e-05, |
|
"loss": 0.1866, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 1.537321726245601, |
|
"grad_norm": 0.2612938997397552, |
|
"learning_rate": 6.434278815903392e-05, |
|
"loss": 0.1884, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.5391739210964994, |
|
"grad_norm": 0.22106523393294486, |
|
"learning_rate": 6.3853522052538e-05, |
|
"loss": 0.2464, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 1.5410261159473977, |
|
"grad_norm": 0.11918922044507506, |
|
"learning_rate": 6.336585076724169e-05, |
|
"loss": 0.2205, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.542878310798296, |
|
"grad_norm": 0.27735599029951385, |
|
"learning_rate": 6.287977848129811e-05, |
|
"loss": 0.2125, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 1.5447305056491945, |
|
"grad_norm": 0.11824966641617995, |
|
"learning_rate": 6.239530935916105e-05, |
|
"loss": 0.1886, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.5465827005000925, |
|
"grad_norm": 0.14239263856247222, |
|
"learning_rate": 6.191244755154896e-05, |
|
"loss": 0.2283, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.548434895350991, |
|
"grad_norm": 0.2614832702732058, |
|
"learning_rate": 6.143119719540951e-05, |
|
"loss": 0.2419, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.5502870902018893, |
|
"grad_norm": 0.1719421648495295, |
|
"learning_rate": 6.0951562413884276e-05, |
|
"loss": 0.1813, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 1.5521392850527875, |
|
"grad_norm": 0.1339861662540805, |
|
"learning_rate": 6.047354731627319e-05, |
|
"loss": 0.1732, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.553991479903686, |
|
"grad_norm": 0.2649420028984007, |
|
"learning_rate": 5.9997155997999486e-05, |
|
"loss": 0.2312, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 1.555843674754584, |
|
"grad_norm": 0.2986635713988608, |
|
"learning_rate": 5.952239254057462e-05, |
|
"loss": 0.2537, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.5576958696054826, |
|
"grad_norm": 0.20847627410802858, |
|
"learning_rate": 5.904926101156316e-05, |
|
"loss": 0.2198, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 1.5595480644563808, |
|
"grad_norm": 0.20583750133284387, |
|
"learning_rate": 5.8577765464548014e-05, |
|
"loss": 0.2194, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.561400259307279, |
|
"grad_norm": 0.24884919423637333, |
|
"learning_rate": 5.810790993909595e-05, |
|
"loss": 0.2201, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 1.5632524541581776, |
|
"grad_norm": 0.2292784136541862, |
|
"learning_rate": 5.7639698460722366e-05, |
|
"loss": 0.2139, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.5651046490090756, |
|
"grad_norm": 0.20773042455822294, |
|
"learning_rate": 5.717313504085761e-05, |
|
"loss": 0.1876, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.5669568438599741, |
|
"grad_norm": 0.218184017555461, |
|
"learning_rate": 5.670822367681189e-05, |
|
"loss": 0.1821, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.5688090387108724, |
|
"grad_norm": 0.17843712174744172, |
|
"learning_rate": 5.6244968351741396e-05, |
|
"loss": 0.2006, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 1.5706612335617707, |
|
"grad_norm": 0.21436245431091455, |
|
"learning_rate": 5.578337303461414e-05, |
|
"loss": 0.1928, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.5725134284126692, |
|
"grad_norm": 0.2084740928506598, |
|
"learning_rate": 5.532344168017589e-05, |
|
"loss": 0.2444, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 1.5743656232635672, |
|
"grad_norm": 0.20902509315653023, |
|
"learning_rate": 5.4865178228916317e-05, |
|
"loss": 0.2288, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.5762178181144657, |
|
"grad_norm": 0.191128809958979, |
|
"learning_rate": 5.4408586607035236e-05, |
|
"loss": 0.2307, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 1.578070012965364, |
|
"grad_norm": 0.2804233173323839, |
|
"learning_rate": 5.3953670726408973e-05, |
|
"loss": 0.2049, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.5799222078162622, |
|
"grad_norm": 0.2523996334467096, |
|
"learning_rate": 5.3500434484556744e-05, |
|
"loss": 0.2309, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 1.5817744026671607, |
|
"grad_norm": 0.22808681153892332, |
|
"learning_rate": 5.304888176460759e-05, |
|
"loss": 0.2224, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.5836265975180588, |
|
"grad_norm": 0.17496689187022768, |
|
"learning_rate": 5.2599016435266656e-05, |
|
"loss": 0.212, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.5854787923689573, |
|
"grad_norm": 0.16684956568038284, |
|
"learning_rate": 5.215084235078232e-05, |
|
"loss": 0.1599, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.5873309872198555, |
|
"grad_norm": 0.2524704034190916, |
|
"learning_rate": 5.170436335091319e-05, |
|
"loss": 0.2239, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 1.5891831820707538, |
|
"grad_norm": 0.20276889978373874, |
|
"learning_rate": 5.130398471023492e-05, |
|
"loss": 0.1991, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.5910353769216523, |
|
"grad_norm": 0.19401086487052652, |
|
"learning_rate": 5.086073689762982e-05, |
|
"loss": 0.2054, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 1.5928875717725504, |
|
"grad_norm": 0.24314231015564167, |
|
"learning_rate": 5.0419195222696305e-05, |
|
"loss": 0.216, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.5947397666234489, |
|
"grad_norm": 0.1962559761069099, |
|
"learning_rate": 4.9979363468369426e-05, |
|
"loss": 0.2028, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 1.5965919614743471, |
|
"grad_norm": 0.21450451616005048, |
|
"learning_rate": 4.95412454029342e-05, |
|
"loss": 0.1485, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.5984441563252454, |
|
"grad_norm": 0.2262800799406614, |
|
"learning_rate": 4.9104844779993744e-05, |
|
"loss": 0.2205, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 1.6002963511761439, |
|
"grad_norm": 0.15673015952559616, |
|
"learning_rate": 4.867016533843677e-05, |
|
"loss": 0.1878, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.602148546027042, |
|
"grad_norm": 0.22772029995019283, |
|
"learning_rate": 4.823721080240562e-05, |
|
"loss": 0.2144, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.6040007408779404, |
|
"grad_norm": 0.16737363953611054, |
|
"learning_rate": 4.7805984881264366e-05, |
|
"loss": 0.219, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.6058529357288387, |
|
"grad_norm": 0.15059728369872777, |
|
"learning_rate": 4.7376491269567305e-05, |
|
"loss": 0.1827, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 1.607705130579737, |
|
"grad_norm": 0.2174362092107457, |
|
"learning_rate": 4.694873364702687e-05, |
|
"loss": 0.2427, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.6095573254306355, |
|
"grad_norm": 0.2536534486510469, |
|
"learning_rate": 4.652271567848229e-05, |
|
"loss": 0.2458, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 1.6114095202815335, |
|
"grad_norm": 0.20306793867476478, |
|
"learning_rate": 4.6098441013868285e-05, |
|
"loss": 0.221, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.613261715132432, |
|
"grad_norm": 0.29865060955062883, |
|
"learning_rate": 4.567591328818371e-05, |
|
"loss": 0.2621, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 1.6151139099833303, |
|
"grad_norm": 0.20862574024207642, |
|
"learning_rate": 4.529713496011825e-05, |
|
"loss": 0.207, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.6169661048342285, |
|
"grad_norm": 0.21837675462224324, |
|
"learning_rate": 4.487793637919196e-05, |
|
"loss": 0.1828, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 1.618818299685127, |
|
"grad_norm": 0.23283771674120501, |
|
"learning_rate": 4.446049519394233e-05, |
|
"loss": 0.2166, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.620670494536025, |
|
"grad_norm": 0.1948474369408113, |
|
"learning_rate": 4.4044814980821856e-05, |
|
"loss": 0.2154, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.6225226893869236, |
|
"grad_norm": 0.2821939610991762, |
|
"learning_rate": 4.3630899301195904e-05, |
|
"loss": 0.2428, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.6243748842378218, |
|
"grad_norm": 0.18991376076496028, |
|
"learning_rate": 4.321875170131218e-05, |
|
"loss": 0.1933, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 1.62622707908872, |
|
"grad_norm": 0.17477269695823847, |
|
"learning_rate": 4.280837571227006e-05, |
|
"loss": 0.1945, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.6280792739396186, |
|
"grad_norm": 0.22671892134617525, |
|
"learning_rate": 4.239977484999063e-05, |
|
"loss": 0.1973, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 1.6299314687905166, |
|
"grad_norm": 0.2061718775432731, |
|
"learning_rate": 4.1992952615186516e-05, |
|
"loss": 0.2122, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.6317836636414151, |
|
"grad_norm": 0.25086071759237627, |
|
"learning_rate": 4.158791249333177e-05, |
|
"loss": 0.226, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 1.6336358584923134, |
|
"grad_norm": 0.242794082456384, |
|
"learning_rate": 4.118465795463214e-05, |
|
"loss": 0.2267, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.6354880533432117, |
|
"grad_norm": 0.1935934917483956, |
|
"learning_rate": 4.078319245399514e-05, |
|
"loss": 0.2011, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 1.6373402481941102, |
|
"grad_norm": 0.2628523170855809, |
|
"learning_rate": 4.038351943100088e-05, |
|
"loss": 0.1934, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.6391924430450082, |
|
"grad_norm": 0.19568463922046236, |
|
"learning_rate": 3.998564230987209e-05, |
|
"loss": 0.1997, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.6410446378959067, |
|
"grad_norm": 0.2481046435287445, |
|
"learning_rate": 3.958956449944501e-05, |
|
"loss": 0.2151, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.642896832746805, |
|
"grad_norm": 0.22476767911377235, |
|
"learning_rate": 3.9195289393140155e-05, |
|
"loss": 0.1621, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 1.6447490275977033, |
|
"grad_norm": 0.1945122394139851, |
|
"learning_rate": 3.880282036893348e-05, |
|
"loss": 0.1753, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.6466012224486017, |
|
"grad_norm": 0.27437177077690705, |
|
"learning_rate": 3.841216078932702e-05, |
|
"loss": 0.226, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 1.6484534172994998, |
|
"grad_norm": 0.18562250131807664, |
|
"learning_rate": 3.802331400132028e-05, |
|
"loss": 0.1717, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.6503056121503983, |
|
"grad_norm": 0.21622010412383683, |
|
"learning_rate": 3.7636283336381636e-05, |
|
"loss": 0.155, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 1.6521578070012966, |
|
"grad_norm": 0.22634728029439885, |
|
"learning_rate": 3.7251072110419727e-05, |
|
"loss": 0.2022, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.6540100018521948, |
|
"grad_norm": 0.2671242474144964, |
|
"learning_rate": 3.686768362375498e-05, |
|
"loss": 0.2234, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 1.6558621967030933, |
|
"grad_norm": 0.16832839316697204, |
|
"learning_rate": 3.648612116109146e-05, |
|
"loss": 0.1805, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.6577143915539914, |
|
"grad_norm": 0.2688098808357188, |
|
"learning_rate": 3.610638799148858e-05, |
|
"loss": 0.1909, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.6595665864048899, |
|
"grad_norm": 0.172871399134501, |
|
"learning_rate": 3.572848736833326e-05, |
|
"loss": 0.2112, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.6614187812557881, |
|
"grad_norm": 0.23426972546449246, |
|
"learning_rate": 3.5352422529311814e-05, |
|
"loss": 0.2276, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 1.6632709761066864, |
|
"grad_norm": 0.2682786605548356, |
|
"learning_rate": 3.497819669638266e-05, |
|
"loss": 0.2521, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.6651231709575849, |
|
"grad_norm": 0.2122644465486904, |
|
"learning_rate": 3.4605813075748085e-05, |
|
"loss": 0.2003, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 1.666975365808483, |
|
"grad_norm": 0.24717950759123916, |
|
"learning_rate": 3.42352748578274e-05, |
|
"loss": 0.1813, |
|
"step": 9000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10798, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 3000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 196010447634432.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|