whisper-medium-pt-cv16-fleurs2 / trainer_state.json
fsicoli's picture
End of training
faf749a verified
raw
history blame contribute delete
No virus
175 kB
{
"best_metric": 0.14280347526073456,
"best_model_checkpoint": "d:\\\\whisper-medium-pt-cv16-fleurs2\\checkpoint-15000",
"epoch": 11.671335200746965,
"eval_steps": 5000,
"global_step": 25000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011671335200746966,
"grad_norm": 18.58954620361328,
"learning_rate": 4.6e-10,
"loss": 0.7382,
"step": 25
},
{
"epoch": 0.02334267040149393,
"grad_norm": 37.525917053222656,
"learning_rate": 9.399999999999999e-10,
"loss": 1.2845,
"step": 50
},
{
"epoch": 0.0350140056022409,
"grad_norm": 15.194890975952148,
"learning_rate": 1.44e-09,
"loss": 0.7588,
"step": 75
},
{
"epoch": 0.04668534080298786,
"grad_norm": 40.771392822265625,
"learning_rate": 1.94e-09,
"loss": 1.2737,
"step": 100
},
{
"epoch": 0.05835667600373483,
"grad_norm": 16.082420349121094,
"learning_rate": 2.44e-09,
"loss": 0.7373,
"step": 125
},
{
"epoch": 0.0700280112044818,
"grad_norm": 38.88285446166992,
"learning_rate": 2.9399999999999998e-09,
"loss": 1.3064,
"step": 150
},
{
"epoch": 0.08169934640522876,
"grad_norm": 17.771854400634766,
"learning_rate": 3.4399999999999997e-09,
"loss": 0.7341,
"step": 175
},
{
"epoch": 0.09337068160597572,
"grad_norm": 41.72404861450195,
"learning_rate": 3.94e-09,
"loss": 1.3691,
"step": 200
},
{
"epoch": 0.10504201680672269,
"grad_norm": 16.00535774230957,
"learning_rate": 4.44e-09,
"loss": 0.7228,
"step": 225
},
{
"epoch": 0.11671335200746966,
"grad_norm": 41.851478576660156,
"learning_rate": 4.94e-09,
"loss": 1.3768,
"step": 250
},
{
"epoch": 0.1283846872082166,
"grad_norm": 20.789945602416992,
"learning_rate": 5.44e-09,
"loss": 0.7399,
"step": 275
},
{
"epoch": 0.1400560224089636,
"grad_norm": 39.85365295410156,
"learning_rate": 5.94e-09,
"loss": 1.359,
"step": 300
},
{
"epoch": 0.15172735760971054,
"grad_norm": 16.680112838745117,
"learning_rate": 6.44e-09,
"loss": 0.7578,
"step": 325
},
{
"epoch": 0.16339869281045752,
"grad_norm": 41.59889221191406,
"learning_rate": 6.94e-09,
"loss": 1.3793,
"step": 350
},
{
"epoch": 0.17507002801120447,
"grad_norm": 16.55697250366211,
"learning_rate": 7.44e-09,
"loss": 0.7345,
"step": 375
},
{
"epoch": 0.18674136321195145,
"grad_norm": 37.21436309814453,
"learning_rate": 7.939999999999999e-09,
"loss": 1.3301,
"step": 400
},
{
"epoch": 0.1984126984126984,
"grad_norm": 18.573705673217773,
"learning_rate": 8.44e-09,
"loss": 0.7678,
"step": 425
},
{
"epoch": 0.21008403361344538,
"grad_norm": 42.38700866699219,
"learning_rate": 8.94e-09,
"loss": 1.3582,
"step": 450
},
{
"epoch": 0.22175536881419233,
"grad_norm": 16.054298400878906,
"learning_rate": 9.44e-09,
"loss": 0.7475,
"step": 475
},
{
"epoch": 0.2334267040149393,
"grad_norm": 40.42662811279297,
"learning_rate": 9.939999999999998e-09,
"loss": 1.3728,
"step": 500
},
{
"epoch": 0.24509803921568626,
"grad_norm": 17.739364624023438,
"learning_rate": 1.044e-08,
"loss": 0.7678,
"step": 525
},
{
"epoch": 0.2567693744164332,
"grad_norm": 38.32596206665039,
"learning_rate": 1.0939999999999999e-08,
"loss": 1.3873,
"step": 550
},
{
"epoch": 0.2684407096171802,
"grad_norm": 14.915902137756348,
"learning_rate": 1.144e-08,
"loss": 0.7033,
"step": 575
},
{
"epoch": 0.2801120448179272,
"grad_norm": 34.46598434448242,
"learning_rate": 1.1939999999999998e-08,
"loss": 1.2503,
"step": 600
},
{
"epoch": 0.29178338001867415,
"grad_norm": 14.2736177444458,
"learning_rate": 1.244e-08,
"loss": 0.7253,
"step": 625
},
{
"epoch": 0.3034547152194211,
"grad_norm": 40.36186981201172,
"learning_rate": 1.2939999999999999e-08,
"loss": 1.3159,
"step": 650
},
{
"epoch": 0.31512605042016806,
"grad_norm": 14.474146842956543,
"learning_rate": 1.344e-08,
"loss": 0.7097,
"step": 675
},
{
"epoch": 0.32679738562091504,
"grad_norm": 39.71982192993164,
"learning_rate": 1.394e-08,
"loss": 1.3331,
"step": 700
},
{
"epoch": 0.338468720821662,
"grad_norm": 13.428034782409668,
"learning_rate": 1.4439999999999999e-08,
"loss": 0.7001,
"step": 725
},
{
"epoch": 0.35014005602240894,
"grad_norm": 38.90840148925781,
"learning_rate": 1.494e-08,
"loss": 1.2718,
"step": 750
},
{
"epoch": 0.3618113912231559,
"grad_norm": 15.919449806213379,
"learning_rate": 1.544e-08,
"loss": 0.6877,
"step": 775
},
{
"epoch": 0.3734827264239029,
"grad_norm": 37.94025802612305,
"learning_rate": 1.594e-08,
"loss": 1.2798,
"step": 800
},
{
"epoch": 0.3851540616246499,
"grad_norm": 14.55276107788086,
"learning_rate": 1.644e-08,
"loss": 0.7123,
"step": 825
},
{
"epoch": 0.3968253968253968,
"grad_norm": 33.79072952270508,
"learning_rate": 1.6939999999999998e-08,
"loss": 1.2842,
"step": 850
},
{
"epoch": 0.4084967320261438,
"grad_norm": 14.676527976989746,
"learning_rate": 1.744e-08,
"loss": 0.7064,
"step": 875
},
{
"epoch": 0.42016806722689076,
"grad_norm": 36.242069244384766,
"learning_rate": 1.794e-08,
"loss": 1.2235,
"step": 900
},
{
"epoch": 0.43183940242763774,
"grad_norm": 12.617734909057617,
"learning_rate": 1.8440000000000002e-08,
"loss": 0.6694,
"step": 925
},
{
"epoch": 0.44351073762838467,
"grad_norm": 40.069305419921875,
"learning_rate": 1.8939999999999996e-08,
"loss": 1.2126,
"step": 950
},
{
"epoch": 0.45518207282913165,
"grad_norm": 15.211618423461914,
"learning_rate": 1.9439999999999997e-08,
"loss": 0.6739,
"step": 975
},
{
"epoch": 0.4668534080298786,
"grad_norm": 32.790863037109375,
"learning_rate": 1.994e-08,
"loss": 1.1737,
"step": 1000
},
{
"epoch": 0.4785247432306256,
"grad_norm": 12.688447952270508,
"learning_rate": 2.044e-08,
"loss": 0.6731,
"step": 1025
},
{
"epoch": 0.49019607843137253,
"grad_norm": 34.38262176513672,
"learning_rate": 2.094e-08,
"loss": 1.1325,
"step": 1050
},
{
"epoch": 0.5018674136321195,
"grad_norm": 12.72283935546875,
"learning_rate": 2.144e-08,
"loss": 0.6504,
"step": 1075
},
{
"epoch": 0.5135387488328664,
"grad_norm": 28.521909713745117,
"learning_rate": 2.194e-08,
"loss": 1.1427,
"step": 1100
},
{
"epoch": 0.5252100840336135,
"grad_norm": 14.081682205200195,
"learning_rate": 2.244e-08,
"loss": 0.6429,
"step": 1125
},
{
"epoch": 0.5368814192343604,
"grad_norm": 33.354591369628906,
"learning_rate": 2.294e-08,
"loss": 1.124,
"step": 1150
},
{
"epoch": 0.5485527544351074,
"grad_norm": 13.210142135620117,
"learning_rate": 2.3439999999999997e-08,
"loss": 0.6365,
"step": 1175
},
{
"epoch": 0.5602240896358543,
"grad_norm": 40.134281158447266,
"learning_rate": 2.3939999999999998e-08,
"loss": 1.1303,
"step": 1200
},
{
"epoch": 0.5718954248366013,
"grad_norm": 12.516732215881348,
"learning_rate": 2.444e-08,
"loss": 0.5871,
"step": 1225
},
{
"epoch": 0.5835667600373483,
"grad_norm": 30.771167755126953,
"learning_rate": 2.494e-08,
"loss": 1.0342,
"step": 1250
},
{
"epoch": 0.5952380952380952,
"grad_norm": 11.771331787109375,
"learning_rate": 2.5439999999999998e-08,
"loss": 0.5727,
"step": 1275
},
{
"epoch": 0.6069094304388422,
"grad_norm": 32.63950729370117,
"learning_rate": 2.594e-08,
"loss": 0.9901,
"step": 1300
},
{
"epoch": 0.6185807656395892,
"grad_norm": 15.06674575805664,
"learning_rate": 2.644e-08,
"loss": 0.5162,
"step": 1325
},
{
"epoch": 0.6302521008403361,
"grad_norm": 34.53097152709961,
"learning_rate": 2.694e-08,
"loss": 0.9596,
"step": 1350
},
{
"epoch": 0.6419234360410832,
"grad_norm": 13.923140525817871,
"learning_rate": 2.7439999999999996e-08,
"loss": 0.5145,
"step": 1375
},
{
"epoch": 0.6535947712418301,
"grad_norm": 34.99801254272461,
"learning_rate": 2.7939999999999997e-08,
"loss": 0.8,
"step": 1400
},
{
"epoch": 0.665266106442577,
"grad_norm": 13.633746147155762,
"learning_rate": 2.844e-08,
"loss": 0.4484,
"step": 1425
},
{
"epoch": 0.676937441643324,
"grad_norm": 27.909713745117188,
"learning_rate": 2.894e-08,
"loss": 0.7578,
"step": 1450
},
{
"epoch": 0.688608776844071,
"grad_norm": 10.687728881835938,
"learning_rate": 2.944e-08,
"loss": 0.4327,
"step": 1475
},
{
"epoch": 0.7002801120448179,
"grad_norm": 25.57269859313965,
"learning_rate": 2.994e-08,
"loss": 0.6606,
"step": 1500
},
{
"epoch": 0.7119514472455649,
"grad_norm": 11.038127899169922,
"learning_rate": 3.044e-08,
"loss": 0.3744,
"step": 1525
},
{
"epoch": 0.7236227824463118,
"grad_norm": 32.23295974731445,
"learning_rate": 3.094e-08,
"loss": 0.5246,
"step": 1550
},
{
"epoch": 0.7352941176470589,
"grad_norm": 10.442867279052734,
"learning_rate": 3.144e-08,
"loss": 0.3276,
"step": 1575
},
{
"epoch": 0.7469654528478058,
"grad_norm": 30.245128631591797,
"learning_rate": 3.194e-08,
"loss": 0.5341,
"step": 1600
},
{
"epoch": 0.7586367880485527,
"grad_norm": 7.633006572723389,
"learning_rate": 3.244e-08,
"loss": 0.3076,
"step": 1625
},
{
"epoch": 0.7703081232492998,
"grad_norm": 25.9896297454834,
"learning_rate": 3.2939999999999996e-08,
"loss": 0.5054,
"step": 1650
},
{
"epoch": 0.7819794584500467,
"grad_norm": 11.558256149291992,
"learning_rate": 3.3439999999999994e-08,
"loss": 0.2894,
"step": 1675
},
{
"epoch": 0.7936507936507936,
"grad_norm": 25.086002349853516,
"learning_rate": 3.394e-08,
"loss": 0.4637,
"step": 1700
},
{
"epoch": 0.8053221288515406,
"grad_norm": 9.39806079864502,
"learning_rate": 3.4439999999999996e-08,
"loss": 0.2774,
"step": 1725
},
{
"epoch": 0.8169934640522876,
"grad_norm": 25.4090518951416,
"learning_rate": 3.494e-08,
"loss": 0.4205,
"step": 1750
},
{
"epoch": 0.8286647992530346,
"grad_norm": 7.335741996765137,
"learning_rate": 3.544e-08,
"loss": 0.2438,
"step": 1775
},
{
"epoch": 0.8403361344537815,
"grad_norm": 24.587902069091797,
"learning_rate": 3.5939999999999996e-08,
"loss": 0.4571,
"step": 1800
},
{
"epoch": 0.8520074696545284,
"grad_norm": 7.93494176864624,
"learning_rate": 3.644e-08,
"loss": 0.2692,
"step": 1825
},
{
"epoch": 0.8636788048552755,
"grad_norm": 25.627216339111328,
"learning_rate": 3.694e-08,
"loss": 0.3872,
"step": 1850
},
{
"epoch": 0.8753501400560224,
"grad_norm": 6.054137229919434,
"learning_rate": 3.7439999999999996e-08,
"loss": 0.2613,
"step": 1875
},
{
"epoch": 0.8870214752567693,
"grad_norm": 22.876371383666992,
"learning_rate": 3.794e-08,
"loss": 0.3745,
"step": 1900
},
{
"epoch": 0.8986928104575164,
"grad_norm": 9.932693481445312,
"learning_rate": 3.844e-08,
"loss": 0.2459,
"step": 1925
},
{
"epoch": 0.9103641456582633,
"grad_norm": 24.33729362487793,
"learning_rate": 3.894e-08,
"loss": 0.3707,
"step": 1950
},
{
"epoch": 0.9220354808590103,
"grad_norm": 5.043721675872803,
"learning_rate": 3.944e-08,
"loss": 0.2594,
"step": 1975
},
{
"epoch": 0.9337068160597572,
"grad_norm": 23.499347686767578,
"learning_rate": 3.994e-08,
"loss": 0.3559,
"step": 2000
},
{
"epoch": 0.9453781512605042,
"grad_norm": 9.935140609741211,
"learning_rate": 4.044e-08,
"loss": 0.2537,
"step": 2025
},
{
"epoch": 0.9570494864612512,
"grad_norm": 21.89238929748535,
"learning_rate": 4.0939999999999995e-08,
"loss": 0.3577,
"step": 2050
},
{
"epoch": 0.9687208216619981,
"grad_norm": 7.369849681854248,
"learning_rate": 4.143999999999999e-08,
"loss": 0.2506,
"step": 2075
},
{
"epoch": 0.9803921568627451,
"grad_norm": 21.387100219726562,
"learning_rate": 4.194e-08,
"loss": 0.3548,
"step": 2100
},
{
"epoch": 0.9920634920634921,
"grad_norm": 9.189516067504883,
"learning_rate": 4.2439999999999995e-08,
"loss": 0.2296,
"step": 2125
},
{
"epoch": 1.003734827264239,
"grad_norm": 6.029189109802246,
"learning_rate": 4.294e-08,
"loss": 0.3258,
"step": 2150
},
{
"epoch": 1.015406162464986,
"grad_norm": 9.663504600524902,
"learning_rate": 4.344e-08,
"loss": 0.1926,
"step": 2175
},
{
"epoch": 1.0270774976657329,
"grad_norm": 5.585232734680176,
"learning_rate": 4.3939999999999995e-08,
"loss": 0.3378,
"step": 2200
},
{
"epoch": 1.03874883286648,
"grad_norm": 8.463289260864258,
"learning_rate": 4.444e-08,
"loss": 0.2082,
"step": 2225
},
{
"epoch": 1.050420168067227,
"grad_norm": 5.860575199127197,
"learning_rate": 4.494e-08,
"loss": 0.3448,
"step": 2250
},
{
"epoch": 1.0620915032679739,
"grad_norm": 6.821081161499023,
"learning_rate": 4.544e-08,
"loss": 0.2441,
"step": 2275
},
{
"epoch": 1.0737628384687208,
"grad_norm": 6.957500457763672,
"learning_rate": 4.594e-08,
"loss": 0.3243,
"step": 2300
},
{
"epoch": 1.0854341736694677,
"grad_norm": 9.871063232421875,
"learning_rate": 4.644e-08,
"loss": 0.2316,
"step": 2325
},
{
"epoch": 1.0971055088702149,
"grad_norm": 5.59705114364624,
"learning_rate": 4.694e-08,
"loss": 0.3128,
"step": 2350
},
{
"epoch": 1.1087768440709618,
"grad_norm": 11.109825134277344,
"learning_rate": 4.744e-08,
"loss": 0.2319,
"step": 2375
},
{
"epoch": 1.1204481792717087,
"grad_norm": 6.252768039703369,
"learning_rate": 4.7940000000000004e-08,
"loss": 0.2975,
"step": 2400
},
{
"epoch": 1.1321195144724556,
"grad_norm": 9.143139839172363,
"learning_rate": 4.8439999999999996e-08,
"loss": 0.2043,
"step": 2425
},
{
"epoch": 1.1437908496732025,
"grad_norm": 5.3806657791137695,
"learning_rate": 4.8939999999999994e-08,
"loss": 0.3358,
"step": 2450
},
{
"epoch": 1.1554621848739495,
"grad_norm": 5.736015796661377,
"learning_rate": 4.944e-08,
"loss": 0.1975,
"step": 2475
},
{
"epoch": 1.1671335200746966,
"grad_norm": 8.606856346130371,
"learning_rate": 4.9939999999999996e-08,
"loss": 0.3118,
"step": 2500
},
{
"epoch": 1.1788048552754435,
"grad_norm": 8.582596778869629,
"learning_rate": 5.0439999999999994e-08,
"loss": 0.2086,
"step": 2525
},
{
"epoch": 1.1904761904761905,
"grad_norm": 5.178341388702393,
"learning_rate": 5.094e-08,
"loss": 0.2946,
"step": 2550
},
{
"epoch": 1.2021475256769374,
"grad_norm": 11.727195739746094,
"learning_rate": 5.1439999999999996e-08,
"loss": 0.2178,
"step": 2575
},
{
"epoch": 1.2138188608776843,
"grad_norm": 5.104198932647705,
"learning_rate": 5.194e-08,
"loss": 0.3234,
"step": 2600
},
{
"epoch": 1.2254901960784315,
"grad_norm": 9.104410171508789,
"learning_rate": 5.244e-08,
"loss": 0.2093,
"step": 2625
},
{
"epoch": 1.2371615312791784,
"grad_norm": 6.0715765953063965,
"learning_rate": 5.2939999999999996e-08,
"loss": 0.3366,
"step": 2650
},
{
"epoch": 1.2488328664799253,
"grad_norm": 8.743270874023438,
"learning_rate": 5.344e-08,
"loss": 0.1902,
"step": 2675
},
{
"epoch": 1.2605042016806722,
"grad_norm": 5.818302154541016,
"learning_rate": 5.394e-08,
"loss": 0.3033,
"step": 2700
},
{
"epoch": 1.2721755368814192,
"grad_norm": 5.093564510345459,
"learning_rate": 5.444e-08,
"loss": 0.2089,
"step": 2725
},
{
"epoch": 1.283846872082166,
"grad_norm": 6.838255405426025,
"learning_rate": 5.494e-08,
"loss": 0.2888,
"step": 2750
},
{
"epoch": 1.2955182072829132,
"grad_norm": 10.466809272766113,
"learning_rate": 5.544e-08,
"loss": 0.2166,
"step": 2775
},
{
"epoch": 1.3071895424836601,
"grad_norm": 5.292140007019043,
"learning_rate": 5.5939999999999997e-08,
"loss": 0.292,
"step": 2800
},
{
"epoch": 1.318860877684407,
"grad_norm": 8.67912483215332,
"learning_rate": 5.6439999999999995e-08,
"loss": 0.2073,
"step": 2825
},
{
"epoch": 1.330532212885154,
"grad_norm": 4.972991943359375,
"learning_rate": 5.693999999999999e-08,
"loss": 0.2952,
"step": 2850
},
{
"epoch": 1.3422035480859011,
"grad_norm": 8.939681053161621,
"learning_rate": 5.744e-08,
"loss": 0.1821,
"step": 2875
},
{
"epoch": 1.353874883286648,
"grad_norm": 7.211392402648926,
"learning_rate": 5.7939999999999995e-08,
"loss": 0.2914,
"step": 2900
},
{
"epoch": 1.365546218487395,
"grad_norm": 8.267333984375,
"learning_rate": 5.844e-08,
"loss": 0.1839,
"step": 2925
},
{
"epoch": 1.377217553688142,
"grad_norm": 6.720695972442627,
"learning_rate": 5.894e-08,
"loss": 0.2678,
"step": 2950
},
{
"epoch": 1.3888888888888888,
"grad_norm": 8.372034072875977,
"learning_rate": 5.9439999999999995e-08,
"loss": 0.1999,
"step": 2975
},
{
"epoch": 1.4005602240896358,
"grad_norm": 6.330301284790039,
"learning_rate": 5.993999999999999e-08,
"loss": 0.3173,
"step": 3000
},
{
"epoch": 1.4122315592903827,
"grad_norm": 10.318882942199707,
"learning_rate": 6.044e-08,
"loss": 0.1953,
"step": 3025
},
{
"epoch": 1.4239028944911298,
"grad_norm": 7.442046165466309,
"learning_rate": 6.094e-08,
"loss": 0.3231,
"step": 3050
},
{
"epoch": 1.4355742296918768,
"grad_norm": 9.14301872253418,
"learning_rate": 6.144e-08,
"loss": 0.2168,
"step": 3075
},
{
"epoch": 1.4472455648926237,
"grad_norm": 6.955599784851074,
"learning_rate": 6.194e-08,
"loss": 0.265,
"step": 3100
},
{
"epoch": 1.4589169000933706,
"grad_norm": 7.568444728851318,
"learning_rate": 6.244e-08,
"loss": 0.182,
"step": 3125
},
{
"epoch": 1.4705882352941178,
"grad_norm": 4.784877300262451,
"learning_rate": 6.293999999999999e-08,
"loss": 0.2616,
"step": 3150
},
{
"epoch": 1.4822595704948647,
"grad_norm": 5.884426116943359,
"learning_rate": 6.343999999999999e-08,
"loss": 0.1782,
"step": 3175
},
{
"epoch": 1.4939309056956116,
"grad_norm": 8.85175609588623,
"learning_rate": 6.393999999999999e-08,
"loss": 0.2837,
"step": 3200
},
{
"epoch": 1.5056022408963585,
"grad_norm": 7.9142537117004395,
"learning_rate": 6.444e-08,
"loss": 0.1751,
"step": 3225
},
{
"epoch": 1.5172735760971054,
"grad_norm": 6.807056903839111,
"learning_rate": 6.494e-08,
"loss": 0.3072,
"step": 3250
},
{
"epoch": 1.5289449112978524,
"grad_norm": 8.901240348815918,
"learning_rate": 6.544e-08,
"loss": 0.1975,
"step": 3275
},
{
"epoch": 1.5406162464985993,
"grad_norm": 5.000201225280762,
"learning_rate": 6.594e-08,
"loss": 0.2892,
"step": 3300
},
{
"epoch": 1.5522875816993464,
"grad_norm": 11.009442329406738,
"learning_rate": 6.643999999999999e-08,
"loss": 0.205,
"step": 3325
},
{
"epoch": 1.5639589169000934,
"grad_norm": 4.820681095123291,
"learning_rate": 6.694e-08,
"loss": 0.2868,
"step": 3350
},
{
"epoch": 1.5756302521008403,
"grad_norm": 11.95584487915039,
"learning_rate": 6.744e-08,
"loss": 0.1905,
"step": 3375
},
{
"epoch": 1.5873015873015874,
"grad_norm": 5.530846118927002,
"learning_rate": 6.794e-08,
"loss": 0.2608,
"step": 3400
},
{
"epoch": 1.5989729225023344,
"grad_norm": 8.828543663024902,
"learning_rate": 6.844e-08,
"loss": 0.1916,
"step": 3425
},
{
"epoch": 1.6106442577030813,
"grad_norm": 5.600862503051758,
"learning_rate": 6.894e-08,
"loss": 0.2636,
"step": 3450
},
{
"epoch": 1.6223155929038282,
"grad_norm": 9.772380828857422,
"learning_rate": 6.944e-08,
"loss": 0.1835,
"step": 3475
},
{
"epoch": 1.6339869281045751,
"grad_norm": 4.258734703063965,
"learning_rate": 6.994e-08,
"loss": 0.2693,
"step": 3500
},
{
"epoch": 1.645658263305322,
"grad_norm": 6.106602668762207,
"learning_rate": 7.044e-08,
"loss": 0.1971,
"step": 3525
},
{
"epoch": 1.657329598506069,
"grad_norm": 3.7969162464141846,
"learning_rate": 7.094e-08,
"loss": 0.2919,
"step": 3550
},
{
"epoch": 1.669000933706816,
"grad_norm": 7.152183532714844,
"learning_rate": 7.144e-08,
"loss": 0.2027,
"step": 3575
},
{
"epoch": 1.680672268907563,
"grad_norm": 6.071133613586426,
"learning_rate": 7.194e-08,
"loss": 0.2699,
"step": 3600
},
{
"epoch": 1.69234360410831,
"grad_norm": 6.300527095794678,
"learning_rate": 7.244e-08,
"loss": 0.1766,
"step": 3625
},
{
"epoch": 1.7040149393090571,
"grad_norm": 5.592601776123047,
"learning_rate": 7.294e-08,
"loss": 0.2645,
"step": 3650
},
{
"epoch": 1.715686274509804,
"grad_norm": 14.278104782104492,
"learning_rate": 7.344e-08,
"loss": 0.1926,
"step": 3675
},
{
"epoch": 1.727357609710551,
"grad_norm": 6.237105369567871,
"learning_rate": 7.394e-08,
"loss": 0.2814,
"step": 3700
},
{
"epoch": 1.739028944911298,
"grad_norm": 10.357053756713867,
"learning_rate": 7.444e-08,
"loss": 0.2081,
"step": 3725
},
{
"epoch": 1.7507002801120448,
"grad_norm": 7.063169002532959,
"learning_rate": 7.494000000000001e-08,
"loss": 0.2718,
"step": 3750
},
{
"epoch": 1.7623716153127917,
"grad_norm": 7.2696638107299805,
"learning_rate": 7.543999999999999e-08,
"loss": 0.1849,
"step": 3775
},
{
"epoch": 1.7740429505135387,
"grad_norm": 3.825491428375244,
"learning_rate": 7.593999999999999e-08,
"loss": 0.271,
"step": 3800
},
{
"epoch": 1.7857142857142856,
"grad_norm": 7.341236114501953,
"learning_rate": 7.643999999999999e-08,
"loss": 0.166,
"step": 3825
},
{
"epoch": 1.7973856209150327,
"grad_norm": 5.081601619720459,
"learning_rate": 7.693999999999999e-08,
"loss": 0.2857,
"step": 3850
},
{
"epoch": 1.8090569561157797,
"grad_norm": 7.839240550994873,
"learning_rate": 7.744e-08,
"loss": 0.1839,
"step": 3875
},
{
"epoch": 1.8207282913165266,
"grad_norm": 4.5172014236450195,
"learning_rate": 7.794e-08,
"loss": 0.2515,
"step": 3900
},
{
"epoch": 1.8323996265172737,
"grad_norm": 9.478545188903809,
"learning_rate": 7.843999999999999e-08,
"loss": 0.1678,
"step": 3925
},
{
"epoch": 1.8440709617180207,
"grad_norm": 5.93352746963501,
"learning_rate": 7.893999999999999e-08,
"loss": 0.2674,
"step": 3950
},
{
"epoch": 1.8557422969187676,
"grad_norm": 9.502734184265137,
"learning_rate": 7.943999999999999e-08,
"loss": 0.1609,
"step": 3975
},
{
"epoch": 1.8674136321195145,
"grad_norm": 7.912998676300049,
"learning_rate": 7.994e-08,
"loss": 0.2345,
"step": 4000
},
{
"epoch": 1.8790849673202614,
"grad_norm": 5.549155235290527,
"learning_rate": 8.044e-08,
"loss": 0.1918,
"step": 4025
},
{
"epoch": 1.8907563025210083,
"grad_norm": 7.1379499435424805,
"learning_rate": 8.094e-08,
"loss": 0.2655,
"step": 4050
},
{
"epoch": 1.9024276377217553,
"grad_norm": 5.990372657775879,
"learning_rate": 8.144e-08,
"loss": 0.1538,
"step": 4075
},
{
"epoch": 1.9140989729225022,
"grad_norm": 5.755247592926025,
"learning_rate": 8.192000000000001e-08,
"loss": 0.2351,
"step": 4100
},
{
"epoch": 1.9257703081232493,
"grad_norm": 11.432059288024902,
"learning_rate": 8.241999999999999e-08,
"loss": 0.1734,
"step": 4125
},
{
"epoch": 1.9374416433239963,
"grad_norm": 4.935561656951904,
"learning_rate": 8.291999999999999e-08,
"loss": 0.2586,
"step": 4150
},
{
"epoch": 1.9491129785247432,
"grad_norm": 7.362981796264648,
"learning_rate": 8.341999999999999e-08,
"loss": 0.1681,
"step": 4175
},
{
"epoch": 1.9607843137254903,
"grad_norm": 5.120658874511719,
"learning_rate": 8.391999999999999e-08,
"loss": 0.2669,
"step": 4200
},
{
"epoch": 1.9724556489262373,
"grad_norm": 9.280594825744629,
"learning_rate": 8.442e-08,
"loss": 0.1717,
"step": 4225
},
{
"epoch": 1.9841269841269842,
"grad_norm": 7.310540199279785,
"learning_rate": 8.492e-08,
"loss": 0.2391,
"step": 4250
},
{
"epoch": 1.995798319327731,
"grad_norm": 7.3643927574157715,
"learning_rate": 8.541999999999999e-08,
"loss": 0.1764,
"step": 4275
},
{
"epoch": 2.007469654528478,
"grad_norm": 4.083337783813477,
"learning_rate": 8.59e-08,
"loss": 0.2192,
"step": 4300
},
{
"epoch": 2.019140989729225,
"grad_norm": 10.079933166503906,
"learning_rate": 8.64e-08,
"loss": 0.1708,
"step": 4325
},
{
"epoch": 2.030812324929972,
"grad_norm": 5.14344596862793,
"learning_rate": 8.69e-08,
"loss": 0.2486,
"step": 4350
},
{
"epoch": 2.042483660130719,
"grad_norm": 6.526447296142578,
"learning_rate": 8.74e-08,
"loss": 0.1678,
"step": 4375
},
{
"epoch": 2.0541549953314657,
"grad_norm": 5.671429634094238,
"learning_rate": 8.79e-08,
"loss": 0.2349,
"step": 4400
},
{
"epoch": 2.065826330532213,
"grad_norm": 9.383622169494629,
"learning_rate": 8.84e-08,
"loss": 0.1365,
"step": 4425
},
{
"epoch": 2.07749766573296,
"grad_norm": 6.865725040435791,
"learning_rate": 8.890000000000001e-08,
"loss": 0.2207,
"step": 4450
},
{
"epoch": 2.089169000933707,
"grad_norm": 10.070252418518066,
"learning_rate": 8.939999999999999e-08,
"loss": 0.1797,
"step": 4475
},
{
"epoch": 2.100840336134454,
"grad_norm": 4.737791061401367,
"learning_rate": 8.989999999999999e-08,
"loss": 0.2361,
"step": 4500
},
{
"epoch": 2.112511671335201,
"grad_norm": 9.253829002380371,
"learning_rate": 9.039999999999999e-08,
"loss": 0.184,
"step": 4525
},
{
"epoch": 2.1241830065359477,
"grad_norm": 5.701707363128662,
"learning_rate": 9.089999999999999e-08,
"loss": 0.233,
"step": 4550
},
{
"epoch": 2.1358543417366946,
"grad_norm": 7.527386665344238,
"learning_rate": 9.139999999999998e-08,
"loss": 0.1771,
"step": 4575
},
{
"epoch": 2.1475256769374416,
"grad_norm": 7.340992450714111,
"learning_rate": 9.19e-08,
"loss": 0.2505,
"step": 4600
},
{
"epoch": 2.1591970121381885,
"grad_norm": 11.267548561096191,
"learning_rate": 9.24e-08,
"loss": 0.1706,
"step": 4625
},
{
"epoch": 2.1708683473389354,
"grad_norm": 5.289811134338379,
"learning_rate": 9.289999999999999e-08,
"loss": 0.2326,
"step": 4650
},
{
"epoch": 2.1825396825396823,
"grad_norm": 6.074433326721191,
"learning_rate": 9.339999999999999e-08,
"loss": 0.1794,
"step": 4675
},
{
"epoch": 2.1942110177404297,
"grad_norm": 6.203845024108887,
"learning_rate": 9.389999999999999e-08,
"loss": 0.2259,
"step": 4700
},
{
"epoch": 2.2058823529411766,
"grad_norm": 9.799361228942871,
"learning_rate": 9.44e-08,
"loss": 0.1796,
"step": 4725
},
{
"epoch": 2.2175536881419236,
"grad_norm": 7.236292839050293,
"learning_rate": 9.49e-08,
"loss": 0.2338,
"step": 4750
},
{
"epoch": 2.2292250233426705,
"grad_norm": 10.37661075592041,
"learning_rate": 9.54e-08,
"loss": 0.1969,
"step": 4775
},
{
"epoch": 2.2408963585434174,
"grad_norm": 6.369841575622559,
"learning_rate": 9.589999999999999e-08,
"loss": 0.2103,
"step": 4800
},
{
"epoch": 2.2525676937441643,
"grad_norm": 9.137279510498047,
"learning_rate": 9.639999999999999e-08,
"loss": 0.1836,
"step": 4825
},
{
"epoch": 2.2642390289449112,
"grad_norm": 6.758956432342529,
"learning_rate": 9.69e-08,
"loss": 0.2462,
"step": 4850
},
{
"epoch": 2.275910364145658,
"grad_norm": 6.473018169403076,
"learning_rate": 9.74e-08,
"loss": 0.1802,
"step": 4875
},
{
"epoch": 2.287581699346405,
"grad_norm": 4.492936134338379,
"learning_rate": 9.79e-08,
"loss": 0.2323,
"step": 4900
},
{
"epoch": 2.299253034547152,
"grad_norm": 9.348398208618164,
"learning_rate": 9.84e-08,
"loss": 0.1794,
"step": 4925
},
{
"epoch": 2.310924369747899,
"grad_norm": 5.3305230140686035,
"learning_rate": 9.889999999999999e-08,
"loss": 0.2443,
"step": 4950
},
{
"epoch": 2.3225957049486463,
"grad_norm": 10.86744499206543,
"learning_rate": 9.94e-08,
"loss": 0.1844,
"step": 4975
},
{
"epoch": 2.3342670401493932,
"grad_norm": 6.479306697845459,
"learning_rate": 9.99e-08,
"loss": 0.2244,
"step": 5000
},
{
"epoch": 2.3342670401493932,
"eval_loss": 0.17277346551418304,
"eval_runtime": 6738.9666,
"eval_samples_per_second": 1.397,
"eval_steps_per_second": 0.175,
"eval_wer": 0.11098013886646213,
"step": 5000
},
{
"epoch": 2.34593837535014,
"grad_norm": 5.447085857391357,
"learning_rate": 1.004e-07,
"loss": 0.1718,
"step": 5025
},
{
"epoch": 2.357609710550887,
"grad_norm": 8.150873184204102,
"learning_rate": 1.009e-07,
"loss": 0.2243,
"step": 5050
},
{
"epoch": 2.369281045751634,
"grad_norm": 8.1106538772583,
"learning_rate": 1.014e-07,
"loss": 0.146,
"step": 5075
},
{
"epoch": 2.380952380952381,
"grad_norm": 4.127166748046875,
"learning_rate": 1.019e-07,
"loss": 0.2267,
"step": 5100
},
{
"epoch": 2.392623716153128,
"grad_norm": 11.673868179321289,
"learning_rate": 1.024e-07,
"loss": 0.1827,
"step": 5125
},
{
"epoch": 2.404295051353875,
"grad_norm": 5.34147834777832,
"learning_rate": 1.029e-07,
"loss": 0.2271,
"step": 5150
},
{
"epoch": 2.4159663865546217,
"grad_norm": 8.061164855957031,
"learning_rate": 1.034e-07,
"loss": 0.1765,
"step": 5175
},
{
"epoch": 2.4276377217553686,
"grad_norm": 6.568578243255615,
"learning_rate": 1.039e-07,
"loss": 0.2249,
"step": 5200
},
{
"epoch": 2.439309056956116,
"grad_norm": 8.7069730758667,
"learning_rate": 1.0440000000000001e-07,
"loss": 0.1717,
"step": 5225
},
{
"epoch": 2.450980392156863,
"grad_norm": 5.4418792724609375,
"learning_rate": 1.0489999999999999e-07,
"loss": 0.2247,
"step": 5250
},
{
"epoch": 2.46265172735761,
"grad_norm": 9.333065032958984,
"learning_rate": 1.0539999999999999e-07,
"loss": 0.1851,
"step": 5275
},
{
"epoch": 2.4743230625583568,
"grad_norm": 6.602376461029053,
"learning_rate": 1.0589999999999999e-07,
"loss": 0.2658,
"step": 5300
},
{
"epoch": 2.4859943977591037,
"grad_norm": 11.450864791870117,
"learning_rate": 1.0639999999999999e-07,
"loss": 0.1743,
"step": 5325
},
{
"epoch": 2.4976657329598506,
"grad_norm": 5.90830135345459,
"learning_rate": 1.0689999999999998e-07,
"loss": 0.2272,
"step": 5350
},
{
"epoch": 2.5093370681605975,
"grad_norm": 6.921583652496338,
"learning_rate": 1.074e-07,
"loss": 0.1585,
"step": 5375
},
{
"epoch": 2.5210084033613445,
"grad_norm": 5.965441703796387,
"learning_rate": 1.079e-07,
"loss": 0.2117,
"step": 5400
},
{
"epoch": 2.5326797385620914,
"grad_norm": 8.437889099121094,
"learning_rate": 1.0839999999999999e-07,
"loss": 0.1903,
"step": 5425
},
{
"epoch": 2.5443510737628383,
"grad_norm": 5.796535491943359,
"learning_rate": 1.0889999999999999e-07,
"loss": 0.2151,
"step": 5450
},
{
"epoch": 2.5560224089635852,
"grad_norm": 9.895671844482422,
"learning_rate": 1.0939999999999999e-07,
"loss": 0.1778,
"step": 5475
},
{
"epoch": 2.567693744164332,
"grad_norm": 10.211431503295898,
"learning_rate": 1.099e-07,
"loss": 0.2166,
"step": 5500
},
{
"epoch": 2.5793650793650795,
"grad_norm": 8.422016143798828,
"learning_rate": 1.104e-07,
"loss": 0.1599,
"step": 5525
},
{
"epoch": 2.5910364145658265,
"grad_norm": 6.82072639465332,
"learning_rate": 1.109e-07,
"loss": 0.2406,
"step": 5550
},
{
"epoch": 2.6027077497665734,
"grad_norm": 7.977824687957764,
"learning_rate": 1.1139999999999999e-07,
"loss": 0.174,
"step": 5575
},
{
"epoch": 2.6143790849673203,
"grad_norm": 4.871920108795166,
"learning_rate": 1.1189999999999999e-07,
"loss": 0.2077,
"step": 5600
},
{
"epoch": 2.6260504201680672,
"grad_norm": 14.31760025024414,
"learning_rate": 1.124e-07,
"loss": 0.1581,
"step": 5625
},
{
"epoch": 2.637721755368814,
"grad_norm": 4.476131916046143,
"learning_rate": 1.129e-07,
"loss": 0.2158,
"step": 5650
},
{
"epoch": 2.649393090569561,
"grad_norm": 6.954850673675537,
"learning_rate": 1.134e-07,
"loss": 0.1689,
"step": 5675
},
{
"epoch": 2.661064425770308,
"grad_norm": 5.502589702606201,
"learning_rate": 1.139e-07,
"loss": 0.2082,
"step": 5700
},
{
"epoch": 2.6727357609710554,
"grad_norm": 13.118797302246094,
"learning_rate": 1.1439999999999999e-07,
"loss": 0.1646,
"step": 5725
},
{
"epoch": 2.6844070961718023,
"grad_norm": 3.66182541847229,
"learning_rate": 1.149e-07,
"loss": 0.2201,
"step": 5750
},
{
"epoch": 2.696078431372549,
"grad_norm": 9.46583366394043,
"learning_rate": 1.154e-07,
"loss": 0.158,
"step": 5775
},
{
"epoch": 2.707749766573296,
"grad_norm": 6.853757381439209,
"learning_rate": 1.159e-07,
"loss": 0.2417,
"step": 5800
},
{
"epoch": 2.719421101774043,
"grad_norm": 8.791181564331055,
"learning_rate": 1.164e-07,
"loss": 0.163,
"step": 5825
},
{
"epoch": 2.73109243697479,
"grad_norm": 6.461370944976807,
"learning_rate": 1.169e-07,
"loss": 0.2103,
"step": 5850
},
{
"epoch": 2.742763772175537,
"grad_norm": 9.98912525177002,
"learning_rate": 1.1739999999999999e-07,
"loss": 0.1519,
"step": 5875
},
{
"epoch": 2.754435107376284,
"grad_norm": 4.975451946258545,
"learning_rate": 1.179e-07,
"loss": 0.2517,
"step": 5900
},
{
"epoch": 2.7661064425770308,
"grad_norm": 8.629615783691406,
"learning_rate": 1.184e-07,
"loss": 0.1452,
"step": 5925
},
{
"epoch": 2.7777777777777777,
"grad_norm": 6.218091011047363,
"learning_rate": 1.189e-07,
"loss": 0.2481,
"step": 5950
},
{
"epoch": 2.7894491129785246,
"grad_norm": 7.882603168487549,
"learning_rate": 1.194e-07,
"loss": 0.175,
"step": 5975
},
{
"epoch": 2.8011204481792715,
"grad_norm": 5.259121417999268,
"learning_rate": 1.199e-07,
"loss": 0.1935,
"step": 6000
},
{
"epoch": 2.8127917833800185,
"grad_norm": 9.59416389465332,
"learning_rate": 1.204e-07,
"loss": 0.1517,
"step": 6025
},
{
"epoch": 2.8244631185807654,
"grad_norm": 5.815752983093262,
"learning_rate": 1.2089999999999998e-07,
"loss": 0.2308,
"step": 6050
},
{
"epoch": 2.8361344537815127,
"grad_norm": 16.040206909179688,
"learning_rate": 1.214e-07,
"loss": 0.1681,
"step": 6075
},
{
"epoch": 2.8478057889822597,
"grad_norm": 6.292205333709717,
"learning_rate": 1.219e-07,
"loss": 0.2188,
"step": 6100
},
{
"epoch": 2.8594771241830066,
"grad_norm": 8.221199035644531,
"learning_rate": 1.2239999999999998e-07,
"loss": 0.1637,
"step": 6125
},
{
"epoch": 2.8711484593837535,
"grad_norm": 5.419161319732666,
"learning_rate": 1.229e-07,
"loss": 0.2308,
"step": 6150
},
{
"epoch": 2.8828197945845004,
"grad_norm": 7.9300665855407715,
"learning_rate": 1.2339999999999998e-07,
"loss": 0.1609,
"step": 6175
},
{
"epoch": 2.8944911297852474,
"grad_norm": 5.752758026123047,
"learning_rate": 1.239e-07,
"loss": 0.2159,
"step": 6200
},
{
"epoch": 2.9061624649859943,
"grad_norm": 8.784625053405762,
"learning_rate": 1.244e-07,
"loss": 0.1663,
"step": 6225
},
{
"epoch": 2.917833800186741,
"grad_norm": 6.791645526885986,
"learning_rate": 1.249e-07,
"loss": 0.2163,
"step": 6250
},
{
"epoch": 2.9295051353874886,
"grad_norm": 6.143098831176758,
"learning_rate": 1.254e-07,
"loss": 0.1623,
"step": 6275
},
{
"epoch": 2.9411764705882355,
"grad_norm": 4.767801284790039,
"learning_rate": 1.259e-07,
"loss": 0.2019,
"step": 6300
},
{
"epoch": 2.9528478057889824,
"grad_norm": 9.43720531463623,
"learning_rate": 1.264e-07,
"loss": 0.1709,
"step": 6325
},
{
"epoch": 2.9645191409897294,
"grad_norm": 5.25966215133667,
"learning_rate": 1.269e-07,
"loss": 0.2319,
"step": 6350
},
{
"epoch": 2.9761904761904763,
"grad_norm": 13.078607559204102,
"learning_rate": 1.2740000000000002e-07,
"loss": 0.1754,
"step": 6375
},
{
"epoch": 2.987861811391223,
"grad_norm": 5.5642991065979,
"learning_rate": 1.279e-07,
"loss": 0.2153,
"step": 6400
},
{
"epoch": 2.99953314659197,
"grad_norm": 11.523698806762695,
"learning_rate": 1.2839999999999999e-07,
"loss": 0.2086,
"step": 6425
},
{
"epoch": 3.011204481792717,
"grad_norm": 5.675624370574951,
"learning_rate": 1.2888e-07,
"loss": 0.1593,
"step": 6450
},
{
"epoch": 3.022875816993464,
"grad_norm": 17.480037689208984,
"learning_rate": 1.2937999999999998e-07,
"loss": 0.1673,
"step": 6475
},
{
"epoch": 3.034547152194211,
"grad_norm": 9.549832344055176,
"learning_rate": 1.2988e-07,
"loss": 0.1938,
"step": 6500
},
{
"epoch": 3.046218487394958,
"grad_norm": 12.89521598815918,
"learning_rate": 1.3037999999999998e-07,
"loss": 0.186,
"step": 6525
},
{
"epoch": 3.0578898225957047,
"grad_norm": 7.42260217666626,
"learning_rate": 1.3088e-07,
"loss": 0.2042,
"step": 6550
},
{
"epoch": 3.069561157796452,
"grad_norm": 13.60092544555664,
"learning_rate": 1.3138e-07,
"loss": 0.1988,
"step": 6575
},
{
"epoch": 3.081232492997199,
"grad_norm": 5.782377243041992,
"learning_rate": 1.3188e-07,
"loss": 0.1736,
"step": 6600
},
{
"epoch": 3.092903828197946,
"grad_norm": 11.844609260559082,
"learning_rate": 1.3238e-07,
"loss": 0.1904,
"step": 6625
},
{
"epoch": 3.104575163398693,
"grad_norm": 6.240257263183594,
"learning_rate": 1.3287999999999998e-07,
"loss": 0.1605,
"step": 6650
},
{
"epoch": 3.11624649859944,
"grad_norm": 12.566492080688477,
"learning_rate": 1.3338e-07,
"loss": 0.1957,
"step": 6675
},
{
"epoch": 3.1279178338001867,
"grad_norm": 8.285445213317871,
"learning_rate": 1.3388e-07,
"loss": 0.1801,
"step": 6700
},
{
"epoch": 3.1395891690009337,
"grad_norm": 12.288935661315918,
"learning_rate": 1.3438e-07,
"loss": 0.1982,
"step": 6725
},
{
"epoch": 3.1512605042016806,
"grad_norm": 7.052362442016602,
"learning_rate": 1.3488e-07,
"loss": 0.1619,
"step": 6750
},
{
"epoch": 3.1629318394024275,
"grad_norm": 18.458065032958984,
"learning_rate": 1.3537999999999999e-07,
"loss": 0.1855,
"step": 6775
},
{
"epoch": 3.1746031746031744,
"grad_norm": 15.821798324584961,
"learning_rate": 1.3588e-07,
"loss": 0.1476,
"step": 6800
},
{
"epoch": 3.186274509803922,
"grad_norm": 11.816914558410645,
"learning_rate": 1.3638e-07,
"loss": 0.1944,
"step": 6825
},
{
"epoch": 3.1979458450046687,
"grad_norm": 6.645755767822266,
"learning_rate": 1.3688e-07,
"loss": 0.1678,
"step": 6850
},
{
"epoch": 3.2096171802054156,
"grad_norm": 16.721040725708008,
"learning_rate": 1.3738e-07,
"loss": 0.1706,
"step": 6875
},
{
"epoch": 3.2212885154061626,
"grad_norm": 8.140375137329102,
"learning_rate": 1.3788e-07,
"loss": 0.1695,
"step": 6900
},
{
"epoch": 3.2329598506069095,
"grad_norm": 12.450023651123047,
"learning_rate": 1.3838e-07,
"loss": 0.1797,
"step": 6925
},
{
"epoch": 3.2446311858076564,
"grad_norm": 6.419872760772705,
"learning_rate": 1.3888e-07,
"loss": 0.1665,
"step": 6950
},
{
"epoch": 3.2563025210084033,
"grad_norm": 10.356698036193848,
"learning_rate": 1.3938e-07,
"loss": 0.1614,
"step": 6975
},
{
"epoch": 3.2679738562091503,
"grad_norm": 8.553840637207031,
"learning_rate": 1.3988e-07,
"loss": 0.1616,
"step": 7000
},
{
"epoch": 3.279645191409897,
"grad_norm": 11.927959442138672,
"learning_rate": 1.4038e-07,
"loss": 0.1686,
"step": 7025
},
{
"epoch": 3.291316526610644,
"grad_norm": 6.493635654449463,
"learning_rate": 1.4088e-07,
"loss": 0.1804,
"step": 7050
},
{
"epoch": 3.302987861811391,
"grad_norm": 14.237950325012207,
"learning_rate": 1.4137999999999999e-07,
"loss": 0.1607,
"step": 7075
},
{
"epoch": 3.314659197012138,
"grad_norm": 7.410088539123535,
"learning_rate": 1.4188e-07,
"loss": 0.1458,
"step": 7100
},
{
"epoch": 3.3263305322128853,
"grad_norm": 10.997467041015625,
"learning_rate": 1.4238e-07,
"loss": 0.1676,
"step": 7125
},
{
"epoch": 3.3380018674136323,
"grad_norm": 6.001441955566406,
"learning_rate": 1.4288e-07,
"loss": 0.1655,
"step": 7150
},
{
"epoch": 3.349673202614379,
"grad_norm": 14.08969497680664,
"learning_rate": 1.4338e-07,
"loss": 0.1959,
"step": 7175
},
{
"epoch": 3.361344537815126,
"grad_norm": 5.801328182220459,
"learning_rate": 1.4388e-07,
"loss": 0.1686,
"step": 7200
},
{
"epoch": 3.373015873015873,
"grad_norm": 13.626670837402344,
"learning_rate": 1.4438e-07,
"loss": 0.1986,
"step": 7225
},
{
"epoch": 3.38468720821662,
"grad_norm": 6.545166492462158,
"learning_rate": 1.4488e-07,
"loss": 0.1852,
"step": 7250
},
{
"epoch": 3.396358543417367,
"grad_norm": 13.894329071044922,
"learning_rate": 1.4538e-07,
"loss": 0.1642,
"step": 7275
},
{
"epoch": 3.408029878618114,
"grad_norm": 10.140618324279785,
"learning_rate": 1.4588e-07,
"loss": 0.177,
"step": 7300
},
{
"epoch": 3.4197012138188607,
"grad_norm": 18.14762306213379,
"learning_rate": 1.4638e-07,
"loss": 0.1856,
"step": 7325
},
{
"epoch": 3.431372549019608,
"grad_norm": 5.81195592880249,
"learning_rate": 1.4688e-07,
"loss": 0.1701,
"step": 7350
},
{
"epoch": 3.443043884220355,
"grad_norm": 12.958548545837402,
"learning_rate": 1.4738000000000001e-07,
"loss": 0.1755,
"step": 7375
},
{
"epoch": 3.454715219421102,
"grad_norm": 8.025079727172852,
"learning_rate": 1.4788e-07,
"loss": 0.1801,
"step": 7400
},
{
"epoch": 3.466386554621849,
"grad_norm": 19.22530746459961,
"learning_rate": 1.4838e-07,
"loss": 0.1776,
"step": 7425
},
{
"epoch": 3.478057889822596,
"grad_norm": 7.672618865966797,
"learning_rate": 1.4888e-07,
"loss": 0.1915,
"step": 7450
},
{
"epoch": 3.4897292250233427,
"grad_norm": 12.312602043151855,
"learning_rate": 1.4938e-07,
"loss": 0.1888,
"step": 7475
},
{
"epoch": 3.5014005602240896,
"grad_norm": 7.824102878570557,
"learning_rate": 1.4988000000000002e-07,
"loss": 0.1725,
"step": 7500
},
{
"epoch": 3.5130718954248366,
"grad_norm": 11.86865234375,
"learning_rate": 1.5038e-07,
"loss": 0.1996,
"step": 7525
},
{
"epoch": 3.5247432306255835,
"grad_norm": 6.472956657409668,
"learning_rate": 1.5087999999999999e-07,
"loss": 0.1807,
"step": 7550
},
{
"epoch": 3.5364145658263304,
"grad_norm": 14.855595588684082,
"learning_rate": 1.5137999999999997e-07,
"loss": 0.1665,
"step": 7575
},
{
"epoch": 3.5480859010270773,
"grad_norm": 5.422650337219238,
"learning_rate": 1.5187999999999998e-07,
"loss": 0.169,
"step": 7600
},
{
"epoch": 3.5597572362278243,
"grad_norm": 13.586644172668457,
"learning_rate": 1.5238e-07,
"loss": 0.1886,
"step": 7625
},
{
"epoch": 3.571428571428571,
"grad_norm": 7.154773712158203,
"learning_rate": 1.5287999999999998e-07,
"loss": 0.1772,
"step": 7650
},
{
"epoch": 3.5830999066293185,
"grad_norm": 15.92589282989502,
"learning_rate": 1.5338e-07,
"loss": 0.1603,
"step": 7675
},
{
"epoch": 3.5947712418300655,
"grad_norm": 4.725268363952637,
"learning_rate": 1.5387999999999997e-07,
"loss": 0.1579,
"step": 7700
},
{
"epoch": 3.6064425770308124,
"grad_norm": 10.37312126159668,
"learning_rate": 1.5437999999999998e-07,
"loss": 0.1556,
"step": 7725
},
{
"epoch": 3.6181139122315593,
"grad_norm": 3.6106224060058594,
"learning_rate": 1.5488e-07,
"loss": 0.1753,
"step": 7750
},
{
"epoch": 3.6297852474323062,
"grad_norm": 13.736579895019531,
"learning_rate": 1.5537999999999998e-07,
"loss": 0.1932,
"step": 7775
},
{
"epoch": 3.641456582633053,
"grad_norm": 7.333006381988525,
"learning_rate": 1.5588e-07,
"loss": 0.1688,
"step": 7800
},
{
"epoch": 3.6531279178338,
"grad_norm": 16.784841537475586,
"learning_rate": 1.5637999999999997e-07,
"loss": 0.1999,
"step": 7825
},
{
"epoch": 3.664799253034547,
"grad_norm": 9.547866821289062,
"learning_rate": 1.5687999999999999e-07,
"loss": 0.1852,
"step": 7850
},
{
"epoch": 3.6764705882352944,
"grad_norm": 14.133809089660645,
"learning_rate": 1.5738e-07,
"loss": 0.1554,
"step": 7875
},
{
"epoch": 3.6881419234360413,
"grad_norm": 7.968010425567627,
"learning_rate": 1.5787999999999998e-07,
"loss": 0.1649,
"step": 7900
},
{
"epoch": 3.6998132586367882,
"grad_norm": 12.247528076171875,
"learning_rate": 1.5838e-07,
"loss": 0.1975,
"step": 7925
},
{
"epoch": 3.711484593837535,
"grad_norm": 6.5526323318481445,
"learning_rate": 1.5887999999999998e-07,
"loss": 0.1808,
"step": 7950
},
{
"epoch": 3.723155929038282,
"grad_norm": 11.869317054748535,
"learning_rate": 1.5938e-07,
"loss": 0.1796,
"step": 7975
},
{
"epoch": 3.734827264239029,
"grad_norm": 7.336709499359131,
"learning_rate": 1.5988e-07,
"loss": 0.179,
"step": 8000
},
{
"epoch": 3.746498599439776,
"grad_norm": 21.456043243408203,
"learning_rate": 1.6037999999999998e-07,
"loss": 0.1854,
"step": 8025
},
{
"epoch": 3.758169934640523,
"grad_norm": 5.577650547027588,
"learning_rate": 1.6088e-07,
"loss": 0.1771,
"step": 8050
},
{
"epoch": 3.7698412698412698,
"grad_norm": 18.03679084777832,
"learning_rate": 1.6137999999999998e-07,
"loss": 0.1849,
"step": 8075
},
{
"epoch": 3.7815126050420167,
"grad_norm": 6.453721046447754,
"learning_rate": 1.6188e-07,
"loss": 0.1715,
"step": 8100
},
{
"epoch": 3.7931839402427636,
"grad_norm": 11.65691089630127,
"learning_rate": 1.6238e-07,
"loss": 0.1721,
"step": 8125
},
{
"epoch": 3.8048552754435105,
"grad_norm": 5.9379963874816895,
"learning_rate": 1.6288e-07,
"loss": 0.1691,
"step": 8150
},
{
"epoch": 3.8165266106442575,
"grad_norm": 16.275161743164062,
"learning_rate": 1.6338e-07,
"loss": 0.187,
"step": 8175
},
{
"epoch": 3.828197945845005,
"grad_norm": 4.220703125,
"learning_rate": 1.6387999999999998e-07,
"loss": 0.1973,
"step": 8200
},
{
"epoch": 3.8398692810457518,
"grad_norm": 13.512842178344727,
"learning_rate": 1.6438e-07,
"loss": 0.2035,
"step": 8225
},
{
"epoch": 3.8515406162464987,
"grad_norm": 4.128376007080078,
"learning_rate": 1.6487999999999998e-07,
"loss": 0.1761,
"step": 8250
},
{
"epoch": 3.8632119514472456,
"grad_norm": 15.425586700439453,
"learning_rate": 1.6538e-07,
"loss": 0.176,
"step": 8275
},
{
"epoch": 3.8748832866479925,
"grad_norm": 6.103633403778076,
"learning_rate": 1.6588e-07,
"loss": 0.1643,
"step": 8300
},
{
"epoch": 3.8865546218487395,
"grad_norm": 15.449716567993164,
"learning_rate": 1.6637999999999999e-07,
"loss": 0.1892,
"step": 8325
},
{
"epoch": 3.8982259570494864,
"grad_norm": 5.736420154571533,
"learning_rate": 1.6688e-07,
"loss": 0.1849,
"step": 8350
},
{
"epoch": 3.9098972922502333,
"grad_norm": 8.855688095092773,
"learning_rate": 1.6737999999999998e-07,
"loss": 0.178,
"step": 8375
},
{
"epoch": 3.9215686274509802,
"grad_norm": 6.678255558013916,
"learning_rate": 1.6788e-07,
"loss": 0.1574,
"step": 8400
},
{
"epoch": 3.9332399626517276,
"grad_norm": 15.251968383789062,
"learning_rate": 1.6838e-07,
"loss": 0.186,
"step": 8425
},
{
"epoch": 3.9449112978524745,
"grad_norm": 4.868924140930176,
"learning_rate": 1.6888e-07,
"loss": 0.1476,
"step": 8450
},
{
"epoch": 3.9565826330532214,
"grad_norm": 13.231505393981934,
"learning_rate": 1.6938e-07,
"loss": 0.1432,
"step": 8475
},
{
"epoch": 3.9682539682539684,
"grad_norm": 6.0398478507995605,
"learning_rate": 1.6987999999999998e-07,
"loss": 0.1835,
"step": 8500
},
{
"epoch": 3.9799253034547153,
"grad_norm": 20.359071731567383,
"learning_rate": 1.7038e-07,
"loss": 0.187,
"step": 8525
},
{
"epoch": 3.991596638655462,
"grad_norm": 5.61522912979126,
"learning_rate": 1.7088e-07,
"loss": 0.1767,
"step": 8550
},
{
"epoch": 4.003267973856209,
"grad_norm": 5.084539413452148,
"learning_rate": 1.7138e-07,
"loss": 0.1768,
"step": 8575
},
{
"epoch": 4.014939309056956,
"grad_norm": 8.967703819274902,
"learning_rate": 1.7188e-07,
"loss": 0.1259,
"step": 8600
},
{
"epoch": 4.026610644257703,
"grad_norm": 5.355931758880615,
"learning_rate": 1.7236000000000002e-07,
"loss": 0.1866,
"step": 8625
},
{
"epoch": 4.03828197945845,
"grad_norm": 8.791220664978027,
"learning_rate": 1.7286e-07,
"loss": 0.1286,
"step": 8650
},
{
"epoch": 4.049953314659197,
"grad_norm": 6.436952590942383,
"learning_rate": 1.7335999999999999e-07,
"loss": 0.1782,
"step": 8675
},
{
"epoch": 4.061624649859944,
"grad_norm": 7.118254661560059,
"learning_rate": 1.7385999999999997e-07,
"loss": 0.1487,
"step": 8700
},
{
"epoch": 4.073295985060691,
"grad_norm": 4.484027862548828,
"learning_rate": 1.7435999999999998e-07,
"loss": 0.1915,
"step": 8725
},
{
"epoch": 4.084967320261438,
"grad_norm": 6.743505477905273,
"learning_rate": 1.7486e-07,
"loss": 0.1209,
"step": 8750
},
{
"epoch": 4.0966386554621845,
"grad_norm": 4.635137557983398,
"learning_rate": 1.7535999999999998e-07,
"loss": 0.1897,
"step": 8775
},
{
"epoch": 4.1083099906629315,
"grad_norm": 4.966923713684082,
"learning_rate": 1.7586e-07,
"loss": 0.1394,
"step": 8800
},
{
"epoch": 4.119981325863678,
"grad_norm": 4.779516696929932,
"learning_rate": 1.7635999999999997e-07,
"loss": 0.1839,
"step": 8825
},
{
"epoch": 4.131652661064426,
"grad_norm": 8.51559066772461,
"learning_rate": 1.7685999999999998e-07,
"loss": 0.1169,
"step": 8850
},
{
"epoch": 4.143323996265173,
"grad_norm": 4.456249237060547,
"learning_rate": 1.7736e-07,
"loss": 0.1903,
"step": 8875
},
{
"epoch": 4.15499533146592,
"grad_norm": 5.5919671058654785,
"learning_rate": 1.7785999999999998e-07,
"loss": 0.1338,
"step": 8900
},
{
"epoch": 4.166666666666667,
"grad_norm": 5.427141189575195,
"learning_rate": 1.7836e-07,
"loss": 0.1815,
"step": 8925
},
{
"epoch": 4.178338001867414,
"grad_norm": 5.67875337600708,
"learning_rate": 1.7885999999999998e-07,
"loss": 0.1258,
"step": 8950
},
{
"epoch": 4.190009337068161,
"grad_norm": 5.330212116241455,
"learning_rate": 1.7935999999999999e-07,
"loss": 0.1972,
"step": 8975
},
{
"epoch": 4.201680672268908,
"grad_norm": 7.678745269775391,
"learning_rate": 1.7985999999999997e-07,
"loss": 0.1486,
"step": 9000
},
{
"epoch": 4.213352007469655,
"grad_norm": 6.018349647521973,
"learning_rate": 1.8035999999999998e-07,
"loss": 0.1717,
"step": 9025
},
{
"epoch": 4.225023342670402,
"grad_norm": 6.979328155517578,
"learning_rate": 1.8086e-07,
"loss": 0.1444,
"step": 9050
},
{
"epoch": 4.2366946778711485,
"grad_norm": 4.748338222503662,
"learning_rate": 1.8135999999999998e-07,
"loss": 0.1776,
"step": 9075
},
{
"epoch": 4.248366013071895,
"grad_norm": 8.463362693786621,
"learning_rate": 1.8186e-07,
"loss": 0.1377,
"step": 9100
},
{
"epoch": 4.260037348272642,
"grad_norm": 4.122219085693359,
"learning_rate": 1.8235999999999997e-07,
"loss": 0.1752,
"step": 9125
},
{
"epoch": 4.271708683473389,
"grad_norm": 5.625559329986572,
"learning_rate": 1.8285999999999998e-07,
"loss": 0.1301,
"step": 9150
},
{
"epoch": 4.283380018674136,
"grad_norm": 5.3939385414123535,
"learning_rate": 1.8336e-07,
"loss": 0.193,
"step": 9175
},
{
"epoch": 4.295051353874883,
"grad_norm": 6.4233551025390625,
"learning_rate": 1.8385999999999998e-07,
"loss": 0.1424,
"step": 9200
},
{
"epoch": 4.30672268907563,
"grad_norm": 6.088770866394043,
"learning_rate": 1.8436e-07,
"loss": 0.1863,
"step": 9225
},
{
"epoch": 4.318394024276377,
"grad_norm": 8.557315826416016,
"learning_rate": 1.8485999999999998e-07,
"loss": 0.1227,
"step": 9250
},
{
"epoch": 4.330065359477124,
"grad_norm": 5.410427570343018,
"learning_rate": 1.8536e-07,
"loss": 0.1942,
"step": 9275
},
{
"epoch": 4.341736694677871,
"grad_norm": 4.211329460144043,
"learning_rate": 1.8586e-07,
"loss": 0.1457,
"step": 9300
},
{
"epoch": 4.353408029878618,
"grad_norm": 4.537903308868408,
"learning_rate": 1.8635999999999998e-07,
"loss": 0.18,
"step": 9325
},
{
"epoch": 4.365079365079365,
"grad_norm": 7.43745231628418,
"learning_rate": 1.8686e-07,
"loss": 0.1405,
"step": 9350
},
{
"epoch": 4.3767507002801125,
"grad_norm": 4.6163763999938965,
"learning_rate": 1.8735999999999998e-07,
"loss": 0.184,
"step": 9375
},
{
"epoch": 4.388422035480859,
"grad_norm": 4.933877944946289,
"learning_rate": 1.8786e-07,
"loss": 0.1333,
"step": 9400
},
{
"epoch": 4.400093370681606,
"grad_norm": 6.013834476470947,
"learning_rate": 1.8836e-07,
"loss": 0.1765,
"step": 9425
},
{
"epoch": 4.411764705882353,
"grad_norm": 4.945307731628418,
"learning_rate": 1.8885999999999999e-07,
"loss": 0.1286,
"step": 9450
},
{
"epoch": 4.4234360410831,
"grad_norm": 3.952646017074585,
"learning_rate": 1.8936e-07,
"loss": 0.1942,
"step": 9475
},
{
"epoch": 4.435107376283847,
"grad_norm": 8.337225914001465,
"learning_rate": 1.8985999999999998e-07,
"loss": 0.1381,
"step": 9500
},
{
"epoch": 4.446778711484594,
"grad_norm": 6.671125888824463,
"learning_rate": 1.9036e-07,
"loss": 0.1985,
"step": 9525
},
{
"epoch": 4.458450046685341,
"grad_norm": 6.973220348358154,
"learning_rate": 1.9086e-07,
"loss": 0.1275,
"step": 9550
},
{
"epoch": 4.470121381886088,
"grad_norm": 5.624568939208984,
"learning_rate": 1.9136e-07,
"loss": 0.1704,
"step": 9575
},
{
"epoch": 4.481792717086835,
"grad_norm": 7.6258745193481445,
"learning_rate": 1.9186e-07,
"loss": 0.1518,
"step": 9600
},
{
"epoch": 4.493464052287582,
"grad_norm": 4.0021185874938965,
"learning_rate": 1.9235999999999998e-07,
"loss": 0.1953,
"step": 9625
},
{
"epoch": 4.505135387488329,
"grad_norm": 6.774437427520752,
"learning_rate": 1.9286e-07,
"loss": 0.1527,
"step": 9650
},
{
"epoch": 4.516806722689076,
"grad_norm": 5.060838222503662,
"learning_rate": 1.9336e-07,
"loss": 0.2042,
"step": 9675
},
{
"epoch": 4.5284780578898225,
"grad_norm": 5.490878582000732,
"learning_rate": 1.9386e-07,
"loss": 0.1299,
"step": 9700
},
{
"epoch": 4.540149393090569,
"grad_norm": 5.598012447357178,
"learning_rate": 1.9436e-07,
"loss": 0.1717,
"step": 9725
},
{
"epoch": 4.551820728291316,
"grad_norm": 5.59892463684082,
"learning_rate": 1.9485999999999999e-07,
"loss": 0.1244,
"step": 9750
},
{
"epoch": 4.563492063492063,
"grad_norm": 4.751144886016846,
"learning_rate": 1.9536e-07,
"loss": 0.161,
"step": 9775
},
{
"epoch": 4.57516339869281,
"grad_norm": 6.7092671394348145,
"learning_rate": 1.9586e-07,
"loss": 0.1416,
"step": 9800
},
{
"epoch": 4.586834733893557,
"grad_norm": 4.288263320922852,
"learning_rate": 1.9636e-07,
"loss": 0.171,
"step": 9825
},
{
"epoch": 4.598506069094304,
"grad_norm": 8.770625114440918,
"learning_rate": 1.9686e-07,
"loss": 0.1334,
"step": 9850
},
{
"epoch": 4.610177404295051,
"grad_norm": 5.096324443817139,
"learning_rate": 1.9736e-07,
"loss": 0.1988,
"step": 9875
},
{
"epoch": 4.621848739495798,
"grad_norm": 4.740445613861084,
"learning_rate": 1.9786e-07,
"loss": 0.1476,
"step": 9900
},
{
"epoch": 4.633520074696545,
"grad_norm": 4.8285956382751465,
"learning_rate": 1.9836e-07,
"loss": 0.1912,
"step": 9925
},
{
"epoch": 4.645191409897293,
"grad_norm": 4.7548346519470215,
"learning_rate": 1.9886e-07,
"loss": 0.1305,
"step": 9950
},
{
"epoch": 4.6568627450980395,
"grad_norm": 4.447470188140869,
"learning_rate": 1.9936e-07,
"loss": 0.2013,
"step": 9975
},
{
"epoch": 4.6685340802987865,
"grad_norm": 6.167608261108398,
"learning_rate": 1.9986e-07,
"loss": 0.1471,
"step": 10000
},
{
"epoch": 4.6685340802987865,
"eval_loss": 0.15147170424461365,
"eval_runtime": 6575.0605,
"eval_samples_per_second": 1.432,
"eval_steps_per_second": 0.179,
"eval_wer": 0.09961246568706604,
"step": 10000
},
{
"epoch": 4.680205415499533,
"grad_norm": 3.9752037525177,
"learning_rate": 2.0036e-07,
"loss": 0.1705,
"step": 10025
},
{
"epoch": 4.69187675070028,
"grad_norm": 9.894227981567383,
"learning_rate": 2.0086e-07,
"loss": 0.1178,
"step": 10050
},
{
"epoch": 4.703548085901027,
"grad_norm": 5.56553840637207,
"learning_rate": 2.0136e-07,
"loss": 0.1849,
"step": 10075
},
{
"epoch": 4.715219421101774,
"grad_norm": 8.528691291809082,
"learning_rate": 2.0186e-07,
"loss": 0.1402,
"step": 10100
},
{
"epoch": 4.726890756302521,
"grad_norm": 5.351251125335693,
"learning_rate": 2.0236e-07,
"loss": 0.1812,
"step": 10125
},
{
"epoch": 4.738562091503268,
"grad_norm": 6.408919334411621,
"learning_rate": 2.0286e-07,
"loss": 0.132,
"step": 10150
},
{
"epoch": 4.750233426704015,
"grad_norm": 4.955003261566162,
"learning_rate": 2.0336000000000002e-07,
"loss": 0.1827,
"step": 10175
},
{
"epoch": 4.761904761904762,
"grad_norm": 9.441489219665527,
"learning_rate": 2.0386e-07,
"loss": 0.1711,
"step": 10200
},
{
"epoch": 4.773576097105509,
"grad_norm": 4.768829822540283,
"learning_rate": 2.0436e-07,
"loss": 0.1839,
"step": 10225
},
{
"epoch": 4.785247432306256,
"grad_norm": 8.283427238464355,
"learning_rate": 2.0485999999999997e-07,
"loss": 0.1308,
"step": 10250
},
{
"epoch": 4.796918767507003,
"grad_norm": 4.502756118774414,
"learning_rate": 2.0535999999999998e-07,
"loss": 0.1939,
"step": 10275
},
{
"epoch": 4.80859010270775,
"grad_norm": 6.445580959320068,
"learning_rate": 2.0585999999999997e-07,
"loss": 0.1366,
"step": 10300
},
{
"epoch": 4.8202614379084965,
"grad_norm": 5.302786350250244,
"learning_rate": 2.0635999999999998e-07,
"loss": 0.1733,
"step": 10325
},
{
"epoch": 4.831932773109243,
"grad_norm": 7.272347927093506,
"learning_rate": 2.0686e-07,
"loss": 0.1203,
"step": 10350
},
{
"epoch": 4.84360410830999,
"grad_norm": 4.1720170974731445,
"learning_rate": 2.0735999999999997e-07,
"loss": 0.1725,
"step": 10375
},
{
"epoch": 4.855275443510737,
"grad_norm": 4.301048755645752,
"learning_rate": 2.0785999999999998e-07,
"loss": 0.1177,
"step": 10400
},
{
"epoch": 4.866946778711485,
"grad_norm": 4.472489356994629,
"learning_rate": 2.0835999999999997e-07,
"loss": 0.1855,
"step": 10425
},
{
"epoch": 4.878618113912232,
"grad_norm": 7.996962070465088,
"learning_rate": 2.0885999999999998e-07,
"loss": 0.1327,
"step": 10450
},
{
"epoch": 4.890289449112979,
"grad_norm": 6.440398693084717,
"learning_rate": 2.0936e-07,
"loss": 0.1955,
"step": 10475
},
{
"epoch": 4.901960784313726,
"grad_norm": 7.208395481109619,
"learning_rate": 2.0985999999999997e-07,
"loss": 0.1387,
"step": 10500
},
{
"epoch": 4.913632119514473,
"grad_norm": 5.004977703094482,
"learning_rate": 2.1035999999999999e-07,
"loss": 0.1633,
"step": 10525
},
{
"epoch": 4.92530345471522,
"grad_norm": 3.838132381439209,
"learning_rate": 2.1085999999999997e-07,
"loss": 0.1151,
"step": 10550
},
{
"epoch": 4.936974789915967,
"grad_norm": 6.472508430480957,
"learning_rate": 2.1135999999999998e-07,
"loss": 0.1954,
"step": 10575
},
{
"epoch": 4.9486461251167135,
"grad_norm": 5.543705940246582,
"learning_rate": 2.1186e-07,
"loss": 0.1317,
"step": 10600
},
{
"epoch": 4.9603174603174605,
"grad_norm": 6.308438301086426,
"learning_rate": 2.1235999999999998e-07,
"loss": 0.1586,
"step": 10625
},
{
"epoch": 4.971988795518207,
"grad_norm": 7.787223815917969,
"learning_rate": 2.1286e-07,
"loss": 0.1266,
"step": 10650
},
{
"epoch": 4.983660130718954,
"grad_norm": 4.786161422729492,
"learning_rate": 2.1335999999999997e-07,
"loss": 0.2183,
"step": 10675
},
{
"epoch": 4.995331465919701,
"grad_norm": 5.482990264892578,
"learning_rate": 2.1385999999999998e-07,
"loss": 0.13,
"step": 10700
},
{
"epoch": 5.007002801120448,
"grad_norm": 4.17052698135376,
"learning_rate": 2.1434e-07,
"loss": 0.168,
"step": 10725
},
{
"epoch": 5.018674136321195,
"grad_norm": 7.545019149780273,
"learning_rate": 2.1483999999999998e-07,
"loss": 0.1227,
"step": 10750
},
{
"epoch": 5.030345471521942,
"grad_norm": 6.398622512817383,
"learning_rate": 2.1534e-07,
"loss": 0.1802,
"step": 10775
},
{
"epoch": 5.042016806722689,
"grad_norm": 6.926197052001953,
"learning_rate": 2.1584e-07,
"loss": 0.1188,
"step": 10800
},
{
"epoch": 5.053688141923436,
"grad_norm": 5.543834686279297,
"learning_rate": 2.1634e-07,
"loss": 0.1689,
"step": 10825
},
{
"epoch": 5.065359477124183,
"grad_norm": 5.125446796417236,
"learning_rate": 2.1684e-07,
"loss": 0.1124,
"step": 10850
},
{
"epoch": 5.07703081232493,
"grad_norm": 4.485465049743652,
"learning_rate": 2.1733999999999999e-07,
"loss": 0.1604,
"step": 10875
},
{
"epoch": 5.088702147525677,
"grad_norm": 12.635501861572266,
"learning_rate": 2.1784e-07,
"loss": 0.1181,
"step": 10900
},
{
"epoch": 5.1003734827264235,
"grad_norm": 6.018717288970947,
"learning_rate": 2.1834e-07,
"loss": 0.1523,
"step": 10925
},
{
"epoch": 5.1120448179271705,
"grad_norm": 8.683155059814453,
"learning_rate": 2.1884e-07,
"loss": 0.1214,
"step": 10950
},
{
"epoch": 5.123716153127917,
"grad_norm": 4.261901378631592,
"learning_rate": 2.1934e-07,
"loss": 0.1792,
"step": 10975
},
{
"epoch": 5.135387488328665,
"grad_norm": 7.0739264488220215,
"learning_rate": 2.1984e-07,
"loss": 0.1161,
"step": 11000
},
{
"epoch": 5.147058823529412,
"grad_norm": 6.2149529457092285,
"learning_rate": 2.2034e-07,
"loss": 0.1545,
"step": 11025
},
{
"epoch": 5.158730158730159,
"grad_norm": 9.735761642456055,
"learning_rate": 2.2084e-07,
"loss": 0.123,
"step": 11050
},
{
"epoch": 5.170401493930906,
"grad_norm": 2.7549943923950195,
"learning_rate": 2.2134e-07,
"loss": 0.1651,
"step": 11075
},
{
"epoch": 5.182072829131653,
"grad_norm": 13.182941436767578,
"learning_rate": 2.2184e-07,
"loss": 0.1333,
"step": 11100
},
{
"epoch": 5.1937441643324,
"grad_norm": 5.390936851501465,
"learning_rate": 2.2234e-07,
"loss": 0.1825,
"step": 11125
},
{
"epoch": 5.205415499533147,
"grad_norm": 5.555058479309082,
"learning_rate": 2.2284e-07,
"loss": 0.109,
"step": 11150
},
{
"epoch": 5.217086834733894,
"grad_norm": 3.377044916152954,
"learning_rate": 2.2334000000000001e-07,
"loss": 0.1604,
"step": 11175
},
{
"epoch": 5.228758169934641,
"grad_norm": 5.754917621612549,
"learning_rate": 2.2384e-07,
"loss": 0.1167,
"step": 11200
},
{
"epoch": 5.2404295051353875,
"grad_norm": 5.694931507110596,
"learning_rate": 2.2434e-07,
"loss": 0.1642,
"step": 11225
},
{
"epoch": 5.2521008403361344,
"grad_norm": 8.598726272583008,
"learning_rate": 2.2484e-07,
"loss": 0.1272,
"step": 11250
},
{
"epoch": 5.263772175536881,
"grad_norm": 5.686309814453125,
"learning_rate": 2.2534e-07,
"loss": 0.182,
"step": 11275
},
{
"epoch": 5.275443510737628,
"grad_norm": 7.420335292816162,
"learning_rate": 2.2584000000000002e-07,
"loss": 0.1181,
"step": 11300
},
{
"epoch": 5.287114845938375,
"grad_norm": 6.151350498199463,
"learning_rate": 2.2634e-07,
"loss": 0.1614,
"step": 11325
},
{
"epoch": 5.298786181139122,
"grad_norm": 7.9199957847595215,
"learning_rate": 2.2684e-07,
"loss": 0.13,
"step": 11350
},
{
"epoch": 5.310457516339869,
"grad_norm": 5.582814693450928,
"learning_rate": 2.2733999999999997e-07,
"loss": 0.1994,
"step": 11375
},
{
"epoch": 5.322128851540616,
"grad_norm": 8.254546165466309,
"learning_rate": 2.2783999999999998e-07,
"loss": 0.1087,
"step": 11400
},
{
"epoch": 5.333800186741363,
"grad_norm": 3.0663414001464844,
"learning_rate": 2.2833999999999997e-07,
"loss": 0.1602,
"step": 11425
},
{
"epoch": 5.34547152194211,
"grad_norm": 4.976311683654785,
"learning_rate": 2.2883999999999998e-07,
"loss": 0.1125,
"step": 11450
},
{
"epoch": 5.357142857142857,
"grad_norm": 5.26088285446167,
"learning_rate": 2.2934e-07,
"loss": 0.1631,
"step": 11475
},
{
"epoch": 5.368814192343605,
"grad_norm": 8.958911895751953,
"learning_rate": 2.2983999999999997e-07,
"loss": 0.1129,
"step": 11500
},
{
"epoch": 5.3804855275443515,
"grad_norm": 3.8142902851104736,
"learning_rate": 2.3033999999999998e-07,
"loss": 0.2008,
"step": 11525
},
{
"epoch": 5.392156862745098,
"grad_norm": 7.608828067779541,
"learning_rate": 2.3083999999999997e-07,
"loss": 0.1166,
"step": 11550
},
{
"epoch": 5.403828197945845,
"grad_norm": 4.0368475914001465,
"learning_rate": 2.3133999999999998e-07,
"loss": 0.154,
"step": 11575
},
{
"epoch": 5.415499533146592,
"grad_norm": 10.697487831115723,
"learning_rate": 2.3184e-07,
"loss": 0.1306,
"step": 11600
},
{
"epoch": 5.427170868347339,
"grad_norm": 7.596348762512207,
"learning_rate": 2.3233999999999997e-07,
"loss": 0.1723,
"step": 11625
},
{
"epoch": 5.438842203548086,
"grad_norm": 9.744882583618164,
"learning_rate": 2.3283999999999999e-07,
"loss": 0.129,
"step": 11650
},
{
"epoch": 5.450513538748833,
"grad_norm": 6.707164287567139,
"learning_rate": 2.3333999999999997e-07,
"loss": 0.1933,
"step": 11675
},
{
"epoch": 5.46218487394958,
"grad_norm": 5.012074947357178,
"learning_rate": 2.3383999999999998e-07,
"loss": 0.1223,
"step": 11700
},
{
"epoch": 5.473856209150327,
"grad_norm": 4.564844608306885,
"learning_rate": 2.3434e-07,
"loss": 0.1619,
"step": 11725
},
{
"epoch": 5.485527544351074,
"grad_norm": 6.225306034088135,
"learning_rate": 2.3483999999999998e-07,
"loss": 0.1143,
"step": 11750
},
{
"epoch": 5.497198879551821,
"grad_norm": 5.616468906402588,
"learning_rate": 2.3534e-07,
"loss": 0.1641,
"step": 11775
},
{
"epoch": 5.508870214752568,
"grad_norm": 5.898648738861084,
"learning_rate": 2.3583999999999997e-07,
"loss": 0.128,
"step": 11800
},
{
"epoch": 5.520541549953315,
"grad_norm": 5.743541717529297,
"learning_rate": 2.3633999999999998e-07,
"loss": 0.1671,
"step": 11825
},
{
"epoch": 5.5322128851540615,
"grad_norm": 9.724596977233887,
"learning_rate": 2.3684e-07,
"loss": 0.1263,
"step": 11850
},
{
"epoch": 5.543884220354808,
"grad_norm": 5.801641464233398,
"learning_rate": 2.3733999999999998e-07,
"loss": 0.1555,
"step": 11875
},
{
"epoch": 5.555555555555555,
"grad_norm": 6.424407958984375,
"learning_rate": 2.3784e-07,
"loss": 0.1172,
"step": 11900
},
{
"epoch": 5.567226890756302,
"grad_norm": 4.034692764282227,
"learning_rate": 2.3833999999999998e-07,
"loss": 0.1539,
"step": 11925
},
{
"epoch": 5.578898225957049,
"grad_norm": 7.315247058868408,
"learning_rate": 2.3884e-07,
"loss": 0.1376,
"step": 11950
},
{
"epoch": 5.590569561157796,
"grad_norm": 4.622725963592529,
"learning_rate": 2.3933999999999997e-07,
"loss": 0.1555,
"step": 11975
},
{
"epoch": 5.602240896358543,
"grad_norm": 7.289337635040283,
"learning_rate": 2.3984e-07,
"loss": 0.1465,
"step": 12000
},
{
"epoch": 5.61391223155929,
"grad_norm": 5.700815677642822,
"learning_rate": 2.4034e-07,
"loss": 0.1753,
"step": 12025
},
{
"epoch": 5.625583566760037,
"grad_norm": 10.472694396972656,
"learning_rate": 2.4084e-07,
"loss": 0.1078,
"step": 12050
},
{
"epoch": 5.637254901960784,
"grad_norm": 6.967726707458496,
"learning_rate": 2.4133999999999996e-07,
"loss": 0.1628,
"step": 12075
},
{
"epoch": 5.648926237161532,
"grad_norm": 5.44551944732666,
"learning_rate": 2.4184e-07,
"loss": 0.1265,
"step": 12100
},
{
"epoch": 5.660597572362279,
"grad_norm": 3.403899669647217,
"learning_rate": 2.4234e-07,
"loss": 0.1646,
"step": 12125
},
{
"epoch": 5.6722689075630255,
"grad_norm": 6.885541915893555,
"learning_rate": 2.4283999999999997e-07,
"loss": 0.1376,
"step": 12150
},
{
"epoch": 5.683940242763772,
"grad_norm": 5.3647050857543945,
"learning_rate": 2.4334e-07,
"loss": 0.1683,
"step": 12175
},
{
"epoch": 5.695611577964519,
"grad_norm": 9.983818054199219,
"learning_rate": 2.4384e-07,
"loss": 0.1342,
"step": 12200
},
{
"epoch": 5.707282913165266,
"grad_norm": 4.701688766479492,
"learning_rate": 2.4434e-07,
"loss": 0.1687,
"step": 12225
},
{
"epoch": 5.718954248366013,
"grad_norm": 4.64987850189209,
"learning_rate": 2.4484e-07,
"loss": 0.1083,
"step": 12250
},
{
"epoch": 5.73062558356676,
"grad_norm": 6.0408935546875,
"learning_rate": 2.4534e-07,
"loss": 0.1661,
"step": 12275
},
{
"epoch": 5.742296918767507,
"grad_norm": 13.088526725769043,
"learning_rate": 2.4584e-07,
"loss": 0.1211,
"step": 12300
},
{
"epoch": 5.753968253968254,
"grad_norm": 4.763770580291748,
"learning_rate": 2.4633999999999997e-07,
"loss": 0.1531,
"step": 12325
},
{
"epoch": 5.765639589169001,
"grad_norm": 7.281481742858887,
"learning_rate": 2.4684e-07,
"loss": 0.1197,
"step": 12350
},
{
"epoch": 5.777310924369748,
"grad_norm": 3.6176838874816895,
"learning_rate": 2.4734e-07,
"loss": 0.1586,
"step": 12375
},
{
"epoch": 5.788982259570495,
"grad_norm": 9.852710723876953,
"learning_rate": 2.4784e-07,
"loss": 0.1155,
"step": 12400
},
{
"epoch": 5.800653594771242,
"grad_norm": 7.409560680389404,
"learning_rate": 2.4834e-07,
"loss": 0.155,
"step": 12425
},
{
"epoch": 5.812324929971989,
"grad_norm": 5.356072425842285,
"learning_rate": 2.4884e-07,
"loss": 0.1158,
"step": 12450
},
{
"epoch": 5.8239962651727355,
"grad_norm": 5.186484336853027,
"learning_rate": 2.4934e-07,
"loss": 0.1471,
"step": 12475
},
{
"epoch": 5.835667600373482,
"grad_norm": 7.531067848205566,
"learning_rate": 2.4984e-07,
"loss": 0.1174,
"step": 12500
},
{
"epoch": 5.847338935574229,
"grad_norm": 5.400341987609863,
"learning_rate": 2.5034e-07,
"loss": 0.1815,
"step": 12525
},
{
"epoch": 5.859010270774976,
"grad_norm": 7.280223369598389,
"learning_rate": 2.5084e-07,
"loss": 0.1251,
"step": 12550
},
{
"epoch": 5.870681605975724,
"grad_norm": 5.493415832519531,
"learning_rate": 2.5133999999999997e-07,
"loss": 0.1661,
"step": 12575
},
{
"epoch": 5.882352941176471,
"grad_norm": 10.021145820617676,
"learning_rate": 2.5184e-07,
"loss": 0.1275,
"step": 12600
},
{
"epoch": 5.894024276377218,
"grad_norm": 6.028408050537109,
"learning_rate": 2.5234e-07,
"loss": 0.1629,
"step": 12625
},
{
"epoch": 5.905695611577965,
"grad_norm": 4.85552453994751,
"learning_rate": 2.5284e-07,
"loss": 0.1204,
"step": 12650
},
{
"epoch": 5.917366946778712,
"grad_norm": 7.91325569152832,
"learning_rate": 2.5334e-07,
"loss": 0.174,
"step": 12675
},
{
"epoch": 5.929038281979459,
"grad_norm": 9.452722549438477,
"learning_rate": 2.5384e-07,
"loss": 0.1164,
"step": 12700
},
{
"epoch": 5.940709617180206,
"grad_norm": 5.12371826171875,
"learning_rate": 2.5434e-07,
"loss": 0.1568,
"step": 12725
},
{
"epoch": 5.9523809523809526,
"grad_norm": 4.421220779418945,
"learning_rate": 2.5484e-07,
"loss": 0.1141,
"step": 12750
},
{
"epoch": 5.9640522875816995,
"grad_norm": 2.8665106296539307,
"learning_rate": 2.5534e-07,
"loss": 0.1596,
"step": 12775
},
{
"epoch": 5.975723622782446,
"grad_norm": 7.798137187957764,
"learning_rate": 2.5584e-07,
"loss": 0.1087,
"step": 12800
},
{
"epoch": 5.987394957983193,
"grad_norm": 7.315576076507568,
"learning_rate": 2.5634e-07,
"loss": 0.1696,
"step": 12825
},
{
"epoch": 5.99906629318394,
"grad_norm": 7.312651634216309,
"learning_rate": 2.5684e-07,
"loss": 0.1194,
"step": 12850
},
{
"epoch": 6.010737628384687,
"grad_norm": 4.936952590942383,
"learning_rate": 2.5732e-07,
"loss": 0.1514,
"step": 12875
},
{
"epoch": 6.022408963585434,
"grad_norm": 10.799747467041016,
"learning_rate": 2.5781999999999996e-07,
"loss": 0.1231,
"step": 12900
},
{
"epoch": 6.034080298786181,
"grad_norm": 4.585947036743164,
"learning_rate": 2.5832e-07,
"loss": 0.1211,
"step": 12925
},
{
"epoch": 6.045751633986928,
"grad_norm": 13.68216609954834,
"learning_rate": 2.5882e-07,
"loss": 0.139,
"step": 12950
},
{
"epoch": 6.057422969187675,
"grad_norm": 5.997958660125732,
"learning_rate": 2.5931999999999997e-07,
"loss": 0.1375,
"step": 12975
},
{
"epoch": 6.069094304388422,
"grad_norm": 11.332950592041016,
"learning_rate": 2.5982e-07,
"loss": 0.1285,
"step": 13000
},
{
"epoch": 6.080765639589169,
"grad_norm": 3.158031702041626,
"learning_rate": 2.6032e-07,
"loss": 0.1383,
"step": 13025
},
{
"epoch": 6.092436974789916,
"grad_norm": 13.571795463562012,
"learning_rate": 2.6082e-07,
"loss": 0.1301,
"step": 13050
},
{
"epoch": 6.104108309990663,
"grad_norm": 6.358757972717285,
"learning_rate": 2.6131999999999996e-07,
"loss": 0.1355,
"step": 13075
},
{
"epoch": 6.1157796451914095,
"grad_norm": 12.662508964538574,
"learning_rate": 2.6182e-07,
"loss": 0.1245,
"step": 13100
},
{
"epoch": 6.127450980392156,
"grad_norm": 4.365048885345459,
"learning_rate": 2.6232e-07,
"loss": 0.1395,
"step": 13125
},
{
"epoch": 6.139122315592904,
"grad_norm": 8.536576271057129,
"learning_rate": 2.6281999999999997e-07,
"loss": 0.1303,
"step": 13150
},
{
"epoch": 6.150793650793651,
"grad_norm": 2.988816738128662,
"learning_rate": 2.6332e-07,
"loss": 0.1242,
"step": 13175
},
{
"epoch": 6.162464985994398,
"grad_norm": 8.541171073913574,
"learning_rate": 2.6382e-07,
"loss": 0.1184,
"step": 13200
},
{
"epoch": 6.174136321195145,
"grad_norm": 5.187004566192627,
"learning_rate": 2.6432e-07,
"loss": 0.1415,
"step": 13225
},
{
"epoch": 6.185807656395892,
"grad_norm": 9.733490943908691,
"learning_rate": 2.6482e-07,
"loss": 0.11,
"step": 13250
},
{
"epoch": 6.197478991596639,
"grad_norm": 3.2871172428131104,
"learning_rate": 2.6532e-07,
"loss": 0.1563,
"step": 13275
},
{
"epoch": 6.209150326797386,
"grad_norm": 20.811479568481445,
"learning_rate": 2.6582e-07,
"loss": 0.1341,
"step": 13300
},
{
"epoch": 6.220821661998133,
"grad_norm": 5.399178504943848,
"learning_rate": 2.6631999999999997e-07,
"loss": 0.1308,
"step": 13325
},
{
"epoch": 6.23249299719888,
"grad_norm": 10.317353248596191,
"learning_rate": 2.6682e-07,
"loss": 0.1268,
"step": 13350
},
{
"epoch": 6.2441643323996265,
"grad_norm": 7.681791305541992,
"learning_rate": 2.6732e-07,
"loss": 0.1449,
"step": 13375
},
{
"epoch": 6.2558356676003735,
"grad_norm": 12.44479751586914,
"learning_rate": 2.6781999999999997e-07,
"loss": 0.1228,
"step": 13400
},
{
"epoch": 6.26750700280112,
"grad_norm": 5.903497695922852,
"learning_rate": 2.6832e-07,
"loss": 0.1262,
"step": 13425
},
{
"epoch": 6.279178338001867,
"grad_norm": 17.685346603393555,
"learning_rate": 2.6882e-07,
"loss": 0.1288,
"step": 13450
},
{
"epoch": 6.290849673202614,
"grad_norm": 3.951446533203125,
"learning_rate": 2.6932e-07,
"loss": 0.1213,
"step": 13475
},
{
"epoch": 6.302521008403361,
"grad_norm": 8.137782096862793,
"learning_rate": 2.6982e-07,
"loss": 0.1228,
"step": 13500
},
{
"epoch": 6.314192343604108,
"grad_norm": 8.63837718963623,
"learning_rate": 2.7032e-07,
"loss": 0.1414,
"step": 13525
},
{
"epoch": 6.325863678804855,
"grad_norm": 9.500225067138672,
"learning_rate": 2.7082e-07,
"loss": 0.1141,
"step": 13550
},
{
"epoch": 6.337535014005602,
"grad_norm": 5.4421844482421875,
"learning_rate": 2.7131999999999997e-07,
"loss": 0.1213,
"step": 13575
},
{
"epoch": 6.349206349206349,
"grad_norm": 7.188438892364502,
"learning_rate": 2.7182e-07,
"loss": 0.1235,
"step": 13600
},
{
"epoch": 6.360877684407096,
"grad_norm": 5.949901103973389,
"learning_rate": 2.7232e-07,
"loss": 0.1372,
"step": 13625
},
{
"epoch": 6.372549019607844,
"grad_norm": 11.207901000976562,
"learning_rate": 2.7282e-07,
"loss": 0.1236,
"step": 13650
},
{
"epoch": 6.3842203548085905,
"grad_norm": 6.0445122718811035,
"learning_rate": 2.7332e-07,
"loss": 0.1323,
"step": 13675
},
{
"epoch": 6.395891690009337,
"grad_norm": 11.870309829711914,
"learning_rate": 2.7382e-07,
"loss": 0.1236,
"step": 13700
},
{
"epoch": 6.407563025210084,
"grad_norm": 7.774009704589844,
"learning_rate": 2.7432e-07,
"loss": 0.1373,
"step": 13725
},
{
"epoch": 6.419234360410831,
"grad_norm": 6.658696174621582,
"learning_rate": 2.7482e-07,
"loss": 0.1348,
"step": 13750
},
{
"epoch": 6.430905695611578,
"grad_norm": 5.360461711883545,
"learning_rate": 2.7532e-07,
"loss": 0.1523,
"step": 13775
},
{
"epoch": 6.442577030812325,
"grad_norm": 11.454927444458008,
"learning_rate": 2.7582e-07,
"loss": 0.1448,
"step": 13800
},
{
"epoch": 6.454248366013072,
"grad_norm": 3.2537364959716797,
"learning_rate": 2.7632e-07,
"loss": 0.1094,
"step": 13825
},
{
"epoch": 6.465919701213819,
"grad_norm": 8.776263236999512,
"learning_rate": 2.7682e-07,
"loss": 0.1164,
"step": 13850
},
{
"epoch": 6.477591036414566,
"grad_norm": 6.700248718261719,
"learning_rate": 2.7732e-07,
"loss": 0.1287,
"step": 13875
},
{
"epoch": 6.489262371615313,
"grad_norm": 9.243896484375,
"learning_rate": 2.7782e-07,
"loss": 0.1346,
"step": 13900
},
{
"epoch": 6.50093370681606,
"grad_norm": 8.710789680480957,
"learning_rate": 2.7832e-07,
"loss": 0.1463,
"step": 13925
},
{
"epoch": 6.512605042016807,
"grad_norm": 10.246273040771484,
"learning_rate": 2.7882e-07,
"loss": 0.1217,
"step": 13950
},
{
"epoch": 6.524276377217554,
"grad_norm": 4.089282512664795,
"learning_rate": 2.7932e-07,
"loss": 0.1368,
"step": 13975
},
{
"epoch": 6.5359477124183005,
"grad_norm": 14.169453620910645,
"learning_rate": 2.7982000000000003e-07,
"loss": 0.1105,
"step": 14000
},
{
"epoch": 6.5476190476190474,
"grad_norm": 6.04651403427124,
"learning_rate": 2.8032e-07,
"loss": 0.1318,
"step": 14025
},
{
"epoch": 6.559290382819794,
"grad_norm": 9.370837211608887,
"learning_rate": 2.8082e-07,
"loss": 0.1282,
"step": 14050
},
{
"epoch": 6.570961718020541,
"grad_norm": 4.370868682861328,
"learning_rate": 2.8132e-07,
"loss": 0.144,
"step": 14075
},
{
"epoch": 6.582633053221288,
"grad_norm": 9.317498207092285,
"learning_rate": 2.8182e-07,
"loss": 0.1233,
"step": 14100
},
{
"epoch": 6.594304388422035,
"grad_norm": 2.9422969818115234,
"learning_rate": 2.8232e-07,
"loss": 0.1428,
"step": 14125
},
{
"epoch": 6.605975723622782,
"grad_norm": 12.039034843444824,
"learning_rate": 2.8282e-07,
"loss": 0.1146,
"step": 14150
},
{
"epoch": 6.617647058823529,
"grad_norm": 4.379167556762695,
"learning_rate": 2.8332e-07,
"loss": 0.131,
"step": 14175
},
{
"epoch": 6.629318394024276,
"grad_norm": 9.709012031555176,
"learning_rate": 2.8382e-07,
"loss": 0.1159,
"step": 14200
},
{
"epoch": 6.640989729225024,
"grad_norm": 8.104528427124023,
"learning_rate": 2.8432e-07,
"loss": 0.137,
"step": 14225
},
{
"epoch": 6.652661064425771,
"grad_norm": 12.878413200378418,
"learning_rate": 2.8482e-07,
"loss": 0.1048,
"step": 14250
},
{
"epoch": 6.664332399626518,
"grad_norm": 3.268336057662964,
"learning_rate": 2.8532e-07,
"loss": 0.1471,
"step": 14275
},
{
"epoch": 6.6760037348272645,
"grad_norm": 11.308536529541016,
"learning_rate": 2.8582e-07,
"loss": 0.123,
"step": 14300
},
{
"epoch": 6.687675070028011,
"grad_norm": 5.743576526641846,
"learning_rate": 2.8632e-07,
"loss": 0.1277,
"step": 14325
},
{
"epoch": 6.699346405228758,
"grad_norm": 6.817793369293213,
"learning_rate": 2.8682e-07,
"loss": 0.1313,
"step": 14350
},
{
"epoch": 6.711017740429505,
"grad_norm": 3.572624921798706,
"learning_rate": 2.8732e-07,
"loss": 0.121,
"step": 14375
},
{
"epoch": 6.722689075630252,
"grad_norm": 8.181254386901855,
"learning_rate": 2.8782e-07,
"loss": 0.1158,
"step": 14400
},
{
"epoch": 6.734360410830999,
"grad_norm": 4.736342906951904,
"learning_rate": 2.8832000000000003e-07,
"loss": 0.15,
"step": 14425
},
{
"epoch": 6.746031746031746,
"grad_norm": 18.210702896118164,
"learning_rate": 2.8882e-07,
"loss": 0.1373,
"step": 14450
},
{
"epoch": 6.757703081232493,
"grad_norm": 5.613450050354004,
"learning_rate": 2.8932e-07,
"loss": 0.148,
"step": 14475
},
{
"epoch": 6.76937441643324,
"grad_norm": 11.278425216674805,
"learning_rate": 2.8982e-07,
"loss": 0.1255,
"step": 14500
},
{
"epoch": 6.781045751633987,
"grad_norm": 3.2928617000579834,
"learning_rate": 2.9032e-07,
"loss": 0.1288,
"step": 14525
},
{
"epoch": 6.792717086834734,
"grad_norm": 12.555643081665039,
"learning_rate": 2.9082e-07,
"loss": 0.107,
"step": 14550
},
{
"epoch": 6.804388422035481,
"grad_norm": 4.838390350341797,
"learning_rate": 2.9132e-07,
"loss": 0.1224,
"step": 14575
},
{
"epoch": 6.816059757236228,
"grad_norm": 11.363154411315918,
"learning_rate": 2.9182000000000003e-07,
"loss": 0.1344,
"step": 14600
},
{
"epoch": 6.8277310924369745,
"grad_norm": 2.745389223098755,
"learning_rate": 2.9232e-07,
"loss": 0.1456,
"step": 14625
},
{
"epoch": 6.839402427637721,
"grad_norm": 11.974946975708008,
"learning_rate": 2.9282e-07,
"loss": 0.1163,
"step": 14650
},
{
"epoch": 6.851073762838468,
"grad_norm": 3.53490948677063,
"learning_rate": 2.9332000000000004e-07,
"loss": 0.1255,
"step": 14675
},
{
"epoch": 6.862745098039216,
"grad_norm": 8.966546058654785,
"learning_rate": 2.9382e-07,
"loss": 0.1198,
"step": 14700
},
{
"epoch": 6.874416433239963,
"grad_norm": 5.7963480949401855,
"learning_rate": 2.9432e-07,
"loss": 0.1321,
"step": 14725
},
{
"epoch": 6.88608776844071,
"grad_norm": 13.834965705871582,
"learning_rate": 2.9482e-07,
"loss": 0.1356,
"step": 14750
},
{
"epoch": 6.897759103641457,
"grad_norm": 4.368019104003906,
"learning_rate": 2.9532000000000003e-07,
"loss": 0.1247,
"step": 14775
},
{
"epoch": 6.909430438842204,
"grad_norm": 6.511091232299805,
"learning_rate": 2.9582e-07,
"loss": 0.1207,
"step": 14800
},
{
"epoch": 6.921101774042951,
"grad_norm": 4.450834274291992,
"learning_rate": 2.9631999999999994e-07,
"loss": 0.1333,
"step": 14825
},
{
"epoch": 6.932773109243698,
"grad_norm": 7.795094013214111,
"learning_rate": 2.9682e-07,
"loss": 0.1144,
"step": 14850
},
{
"epoch": 6.944444444444445,
"grad_norm": 6.080096244812012,
"learning_rate": 2.9731999999999997e-07,
"loss": 0.1518,
"step": 14875
},
{
"epoch": 6.956115779645192,
"grad_norm": 7.597021102905273,
"learning_rate": 2.9781999999999995e-07,
"loss": 0.1385,
"step": 14900
},
{
"epoch": 6.9677871148459385,
"grad_norm": 4.243095397949219,
"learning_rate": 2.9831999999999993e-07,
"loss": 0.1303,
"step": 14925
},
{
"epoch": 6.979458450046685,
"grad_norm": 10.551504135131836,
"learning_rate": 2.9881999999999997e-07,
"loss": 0.1364,
"step": 14950
},
{
"epoch": 6.991129785247432,
"grad_norm": 4.460564136505127,
"learning_rate": 2.9931999999999996e-07,
"loss": 0.1185,
"step": 14975
},
{
"epoch": 7.002801120448179,
"grad_norm": 5.397023677825928,
"learning_rate": 2.9981999999999994e-07,
"loss": 0.149,
"step": 15000
},
{
"epoch": 7.002801120448179,
"eval_loss": 0.14280347526073456,
"eval_runtime": 6589.115,
"eval_samples_per_second": 1.429,
"eval_steps_per_second": 0.179,
"eval_wer": 0.09492975940578072,
"step": 15000
},
{
"epoch": 7.014472455648926,
"grad_norm": 5.963614463806152,
"learning_rate": 6.006e-07,
"loss": 0.1013,
"step": 15025
},
{
"epoch": 7.026143790849673,
"grad_norm": 3.1698148250579834,
"learning_rate": 6.015599999999999e-07,
"loss": 0.1194,
"step": 15050
},
{
"epoch": 7.03781512605042,
"grad_norm": 6.9241180419921875,
"learning_rate": 6.025599999999999e-07,
"loss": 0.0989,
"step": 15075
},
{
"epoch": 7.049486461251167,
"grad_norm": 8.083003044128418,
"learning_rate": 6.0356e-07,
"loss": 0.1198,
"step": 15100
},
{
"epoch": 7.061157796451914,
"grad_norm": 9.302962303161621,
"learning_rate": 6.0456e-07,
"loss": 0.0863,
"step": 15125
},
{
"epoch": 7.072829131652661,
"grad_norm": 3.6642816066741943,
"learning_rate": 6.055599999999999e-07,
"loss": 0.1192,
"step": 15150
},
{
"epoch": 7.084500466853408,
"grad_norm": 5.131696701049805,
"learning_rate": 6.0656e-07,
"loss": 0.0944,
"step": 15175
},
{
"epoch": 7.096171802054155,
"grad_norm": 5.429873466491699,
"learning_rate": 6.0756e-07,
"loss": 0.1432,
"step": 15200
},
{
"epoch": 7.107843137254902,
"grad_norm": 2.794274091720581,
"learning_rate": 6.085599999999999e-07,
"loss": 0.0996,
"step": 15225
},
{
"epoch": 7.1195144724556485,
"grad_norm": 4.206586837768555,
"learning_rate": 6.0956e-07,
"loss": 0.1557,
"step": 15250
},
{
"epoch": 7.131185807656396,
"grad_norm": 4.8860087394714355,
"learning_rate": 6.1056e-07,
"loss": 0.0954,
"step": 15275
},
{
"epoch": 7.142857142857143,
"grad_norm": 5.189944744110107,
"learning_rate": 6.1156e-07,
"loss": 0.1637,
"step": 15300
},
{
"epoch": 7.15452847805789,
"grad_norm": 6.401843070983887,
"learning_rate": 6.125599999999999e-07,
"loss": 0.1137,
"step": 15325
},
{
"epoch": 7.166199813258637,
"grad_norm": 3.2334303855895996,
"learning_rate": 6.1356e-07,
"loss": 0.1411,
"step": 15350
},
{
"epoch": 7.177871148459384,
"grad_norm": 5.686134338378906,
"learning_rate": 6.1456e-07,
"loss": 0.0959,
"step": 15375
},
{
"epoch": 7.189542483660131,
"grad_norm": 5.280776023864746,
"learning_rate": 6.155599999999999e-07,
"loss": 0.1254,
"step": 15400
},
{
"epoch": 7.201213818860878,
"grad_norm": 10.093783378601074,
"learning_rate": 6.1656e-07,
"loss": 0.1124,
"step": 15425
},
{
"epoch": 7.212885154061625,
"grad_norm": 4.502685546875,
"learning_rate": 6.1756e-07,
"loss": 0.1333,
"step": 15450
},
{
"epoch": 7.224556489262372,
"grad_norm": 6.842624664306641,
"learning_rate": 6.1856e-07,
"loss": 0.1003,
"step": 15475
},
{
"epoch": 7.236227824463119,
"grad_norm": 4.324547290802002,
"learning_rate": 6.1956e-07,
"loss": 0.1222,
"step": 15500
},
{
"epoch": 7.2478991596638656,
"grad_norm": 5.093228340148926,
"learning_rate": 6.2056e-07,
"loss": 0.1144,
"step": 15525
},
{
"epoch": 7.2595704948646125,
"grad_norm": 4.086531639099121,
"learning_rate": 6.2156e-07,
"loss": 0.1592,
"step": 15550
},
{
"epoch": 7.271241830065359,
"grad_norm": 5.517257213592529,
"learning_rate": 6.225599999999999e-07,
"loss": 0.1114,
"step": 15575
},
{
"epoch": 7.282913165266106,
"grad_norm": 3.0472617149353027,
"learning_rate": 6.2356e-07,
"loss": 0.1257,
"step": 15600
},
{
"epoch": 7.294584500466853,
"grad_norm": 5.590120315551758,
"learning_rate": 6.2456e-07,
"loss": 0.1051,
"step": 15625
},
{
"epoch": 7.3062558356676,
"grad_norm": 4.192562103271484,
"learning_rate": 6.255599999999999e-07,
"loss": 0.1385,
"step": 15650
},
{
"epoch": 7.317927170868347,
"grad_norm": 6.378529071807861,
"learning_rate": 6.2656e-07,
"loss": 0.098,
"step": 15675
},
{
"epoch": 7.329598506069094,
"grad_norm": 3.6979291439056396,
"learning_rate": 6.2756e-07,
"loss": 0.1496,
"step": 15700
},
{
"epoch": 7.341269841269841,
"grad_norm": 5.2295122146606445,
"learning_rate": 6.2856e-07,
"loss": 0.0779,
"step": 15725
},
{
"epoch": 7.352941176470588,
"grad_norm": 3.9444265365600586,
"learning_rate": 6.295599999999999e-07,
"loss": 0.1394,
"step": 15750
},
{
"epoch": 7.364612511671335,
"grad_norm": 5.075552463531494,
"learning_rate": 6.3056e-07,
"loss": 0.1025,
"step": 15775
},
{
"epoch": 7.376283846872083,
"grad_norm": 4.6857733726501465,
"learning_rate": 6.3156e-07,
"loss": 0.149,
"step": 15800
},
{
"epoch": 7.3879551820728295,
"grad_norm": 3.647244930267334,
"learning_rate": 6.325599999999999e-07,
"loss": 0.0909,
"step": 15825
},
{
"epoch": 7.3996265172735765,
"grad_norm": 5.009864330291748,
"learning_rate": 6.3356e-07,
"loss": 0.1427,
"step": 15850
},
{
"epoch": 7.411297852474323,
"grad_norm": 7.3696513175964355,
"learning_rate": 6.3456e-07,
"loss": 0.1056,
"step": 15875
},
{
"epoch": 7.42296918767507,
"grad_norm": 5.746430397033691,
"learning_rate": 6.3556e-07,
"loss": 0.1283,
"step": 15900
},
{
"epoch": 7.434640522875817,
"grad_norm": 6.898996353149414,
"learning_rate": 6.3656e-07,
"loss": 0.0922,
"step": 15925
},
{
"epoch": 7.446311858076564,
"grad_norm": 4.499526023864746,
"learning_rate": 6.3756e-07,
"loss": 0.1294,
"step": 15950
},
{
"epoch": 7.457983193277311,
"grad_norm": 10.639655113220215,
"learning_rate": 6.3856e-07,
"loss": 0.108,
"step": 15975
},
{
"epoch": 7.469654528478058,
"grad_norm": 4.458117485046387,
"learning_rate": 6.395599999999999e-07,
"loss": 0.1426,
"step": 16000
},
{
"epoch": 7.481325863678805,
"grad_norm": 6.026330947875977,
"learning_rate": 6.4056e-07,
"loss": 0.1027,
"step": 16025
},
{
"epoch": 7.492997198879552,
"grad_norm": 8.034396171569824,
"learning_rate": 6.4156e-07,
"loss": 0.1368,
"step": 16050
},
{
"epoch": 7.504668534080299,
"grad_norm": 6.156011581420898,
"learning_rate": 6.4256e-07,
"loss": 0.1084,
"step": 16075
},
{
"epoch": 7.516339869281046,
"grad_norm": 4.8642168045043945,
"learning_rate": 6.4356e-07,
"loss": 0.1383,
"step": 16100
},
{
"epoch": 7.528011204481793,
"grad_norm": 6.108960151672363,
"learning_rate": 6.4456e-07,
"loss": 0.101,
"step": 16125
},
{
"epoch": 7.5396825396825395,
"grad_norm": 4.260036945343018,
"learning_rate": 6.4556e-07,
"loss": 0.1384,
"step": 16150
},
{
"epoch": 7.5513538748832865,
"grad_norm": 7.994537353515625,
"learning_rate": 6.4656e-07,
"loss": 0.1009,
"step": 16175
},
{
"epoch": 7.563025210084033,
"grad_norm": 5.753657817840576,
"learning_rate": 6.4756e-07,
"loss": 0.1383,
"step": 16200
},
{
"epoch": 7.57469654528478,
"grad_norm": 7.363603591918945,
"learning_rate": 6.4856e-07,
"loss": 0.1059,
"step": 16225
},
{
"epoch": 7.586367880485527,
"grad_norm": 5.041802883148193,
"learning_rate": 6.4956e-07,
"loss": 0.1257,
"step": 16250
},
{
"epoch": 7.598039215686274,
"grad_norm": 6.2316575050354,
"learning_rate": 6.5056e-07,
"loss": 0.1008,
"step": 16275
},
{
"epoch": 7.609710550887021,
"grad_norm": 3.6404504776000977,
"learning_rate": 6.5156e-07,
"loss": 0.1255,
"step": 16300
},
{
"epoch": 7.621381886087768,
"grad_norm": 5.228316783905029,
"learning_rate": 6.5256e-07,
"loss": 0.0888,
"step": 16325
},
{
"epoch": 7.633053221288515,
"grad_norm": 4.744984149932861,
"learning_rate": 6.5356e-07,
"loss": 0.1435,
"step": 16350
},
{
"epoch": 7.644724556489263,
"grad_norm": 6.733789443969727,
"learning_rate": 6.5456e-07,
"loss": 0.1008,
"step": 16375
},
{
"epoch": 7.65639589169001,
"grad_norm": 4.16718864440918,
"learning_rate": 6.5556e-07,
"loss": 0.1194,
"step": 16400
},
{
"epoch": 7.668067226890757,
"grad_norm": 2.9225594997406006,
"learning_rate": 6.5656e-07,
"loss": 0.0953,
"step": 16425
},
{
"epoch": 7.6797385620915035,
"grad_norm": 5.475734233856201,
"learning_rate": 6.5756e-07,
"loss": 0.1403,
"step": 16450
},
{
"epoch": 7.69140989729225,
"grad_norm": 5.1282477378845215,
"learning_rate": 6.5856e-07,
"loss": 0.1052,
"step": 16475
},
{
"epoch": 7.703081232492997,
"grad_norm": 5.281869411468506,
"learning_rate": 6.5956e-07,
"loss": 0.1438,
"step": 16500
},
{
"epoch": 7.714752567693744,
"grad_norm": 5.311507225036621,
"learning_rate": 6.6056e-07,
"loss": 0.1021,
"step": 16525
},
{
"epoch": 7.726423902894491,
"grad_norm": 3.4866130352020264,
"learning_rate": 6.6156e-07,
"loss": 0.1216,
"step": 16550
},
{
"epoch": 7.738095238095238,
"grad_norm": 9.126893997192383,
"learning_rate": 6.6256e-07,
"loss": 0.0901,
"step": 16575
},
{
"epoch": 7.749766573295985,
"grad_norm": 4.056077003479004,
"learning_rate": 6.6356e-07,
"loss": 0.1385,
"step": 16600
},
{
"epoch": 7.761437908496732,
"grad_norm": 11.753646850585938,
"learning_rate": 6.6456e-07,
"loss": 0.0845,
"step": 16625
},
{
"epoch": 7.773109243697479,
"grad_norm": 5.014488697052002,
"learning_rate": 6.6556e-07,
"loss": 0.1224,
"step": 16650
},
{
"epoch": 7.784780578898226,
"grad_norm": 6.308728218078613,
"learning_rate": 6.665600000000001e-07,
"loss": 0.0885,
"step": 16675
},
{
"epoch": 7.796451914098973,
"grad_norm": 2.8483879566192627,
"learning_rate": 6.6756e-07,
"loss": 0.1288,
"step": 16700
},
{
"epoch": 7.80812324929972,
"grad_norm": 8.292418479919434,
"learning_rate": 6.6856e-07,
"loss": 0.095,
"step": 16725
},
{
"epoch": 7.819794584500467,
"grad_norm": 4.9927263259887695,
"learning_rate": 6.6956e-07,
"loss": 0.1085,
"step": 16750
},
{
"epoch": 7.8314659197012135,
"grad_norm": 14.002336502075195,
"learning_rate": 6.7056e-07,
"loss": 0.0862,
"step": 16775
},
{
"epoch": 7.8431372549019605,
"grad_norm": 5.258606433868408,
"learning_rate": 6.7156e-07,
"loss": 0.1349,
"step": 16800
},
{
"epoch": 7.854808590102707,
"grad_norm": 6.183353900909424,
"learning_rate": 6.7256e-07,
"loss": 0.0962,
"step": 16825
},
{
"epoch": 7.866479925303455,
"grad_norm": 4.468369960784912,
"learning_rate": 6.735600000000001e-07,
"loss": 0.1222,
"step": 16850
},
{
"epoch": 7.878151260504202,
"grad_norm": 5.605790615081787,
"learning_rate": 6.7456e-07,
"loss": 0.1022,
"step": 16875
},
{
"epoch": 7.889822595704949,
"grad_norm": 3.859760284423828,
"learning_rate": 6.7556e-07,
"loss": 0.114,
"step": 16900
},
{
"epoch": 7.901493930905696,
"grad_norm": 7.612791061401367,
"learning_rate": 6.765600000000001e-07,
"loss": 0.0984,
"step": 16925
},
{
"epoch": 7.913165266106443,
"grad_norm": 3.680158853530884,
"learning_rate": 6.7756e-07,
"loss": 0.1247,
"step": 16950
},
{
"epoch": 7.92483660130719,
"grad_norm": 4.414365768432617,
"learning_rate": 6.7856e-07,
"loss": 0.0998,
"step": 16975
},
{
"epoch": 7.936507936507937,
"grad_norm": 4.558730125427246,
"learning_rate": 6.7956e-07,
"loss": 0.1391,
"step": 17000
},
{
"epoch": 7.948179271708684,
"grad_norm": 5.9088053703308105,
"learning_rate": 6.805600000000001e-07,
"loss": 0.0958,
"step": 17025
},
{
"epoch": 7.959850606909431,
"grad_norm": 4.314589977264404,
"learning_rate": 6.8152e-07,
"loss": 0.1399,
"step": 17050
},
{
"epoch": 7.9715219421101775,
"grad_norm": 6.758405685424805,
"learning_rate": 6.825199999999999e-07,
"loss": 0.1045,
"step": 17075
},
{
"epoch": 7.983193277310924,
"grad_norm": 5.049168109893799,
"learning_rate": 6.8352e-07,
"loss": 0.1305,
"step": 17100
},
{
"epoch": 7.994864612511671,
"grad_norm": 5.647031784057617,
"learning_rate": 6.8452e-07,
"loss": 0.1029,
"step": 17125
},
{
"epoch": 8.006535947712418,
"grad_norm": 2.389552354812622,
"learning_rate": 6.8552e-07,
"loss": 0.1187,
"step": 17150
},
{
"epoch": 8.018207282913165,
"grad_norm": 7.662734508514404,
"learning_rate": 6.8652e-07,
"loss": 0.0784,
"step": 17175
},
{
"epoch": 8.029878618113912,
"grad_norm": 4.091869354248047,
"learning_rate": 6.8752e-07,
"loss": 0.1117,
"step": 17200
},
{
"epoch": 8.041549953314659,
"grad_norm": 4.1873459815979,
"learning_rate": 6.8852e-07,
"loss": 0.0841,
"step": 17225
},
{
"epoch": 8.053221288515406,
"grad_norm": 1.8552043437957764,
"learning_rate": 6.895199999999999e-07,
"loss": 0.1085,
"step": 17250
},
{
"epoch": 8.064892623716153,
"grad_norm": 6.734919548034668,
"learning_rate": 6.9052e-07,
"loss": 0.0705,
"step": 17275
},
{
"epoch": 8.0765639589169,
"grad_norm": 5.204643726348877,
"learning_rate": 6.9152e-07,
"loss": 0.1131,
"step": 17300
},
{
"epoch": 8.088235294117647,
"grad_norm": 8.857121467590332,
"learning_rate": 6.9252e-07,
"loss": 0.0759,
"step": 17325
},
{
"epoch": 8.099906629318394,
"grad_norm": 3.829099655151367,
"learning_rate": 6.9352e-07,
"loss": 0.1171,
"step": 17350
},
{
"epoch": 8.11157796451914,
"grad_norm": 5.418254852294922,
"learning_rate": 6.9452e-07,
"loss": 0.0744,
"step": 17375
},
{
"epoch": 8.123249299719888,
"grad_norm": 4.435729026794434,
"learning_rate": 6.9552e-07,
"loss": 0.1098,
"step": 17400
},
{
"epoch": 8.134920634920634,
"grad_norm": 8.937211036682129,
"learning_rate": 6.9652e-07,
"loss": 0.0684,
"step": 17425
},
{
"epoch": 8.146591970121381,
"grad_norm": 4.719937801361084,
"learning_rate": 6.9752e-07,
"loss": 0.1071,
"step": 17450
},
{
"epoch": 8.158263305322128,
"grad_norm": 5.891451835632324,
"learning_rate": 6.9852e-07,
"loss": 0.0652,
"step": 17475
},
{
"epoch": 8.169934640522875,
"grad_norm": 2.8404786586761475,
"learning_rate": 6.9952e-07,
"loss": 0.0993,
"step": 17500
},
{
"epoch": 8.181605975723622,
"grad_norm": 6.352357387542725,
"learning_rate": 7.0052e-07,
"loss": 0.0816,
"step": 17525
},
{
"epoch": 8.193277310924369,
"grad_norm": 4.22411584854126,
"learning_rate": 7.0152e-07,
"loss": 0.1167,
"step": 17550
},
{
"epoch": 8.204948646125116,
"grad_norm": 7.104822158813477,
"learning_rate": 7.0252e-07,
"loss": 0.0873,
"step": 17575
},
{
"epoch": 8.216619981325863,
"grad_norm": 2.5561118125915527,
"learning_rate": 7.0352e-07,
"loss": 0.1045,
"step": 17600
},
{
"epoch": 8.22829131652661,
"grad_norm": 8.497262001037598,
"learning_rate": 7.0452e-07,
"loss": 0.0772,
"step": 17625
},
{
"epoch": 8.239962651727357,
"grad_norm": 4.319903373718262,
"learning_rate": 7.0552e-07,
"loss": 0.1291,
"step": 17650
},
{
"epoch": 8.251633986928105,
"grad_norm": 5.6005754470825195,
"learning_rate": 7.065200000000001e-07,
"loss": 0.0748,
"step": 17675
},
{
"epoch": 8.263305322128852,
"grad_norm": 4.352419853210449,
"learning_rate": 7.0752e-07,
"loss": 0.1162,
"step": 17700
},
{
"epoch": 8.2749766573296,
"grad_norm": 3.519353151321411,
"learning_rate": 7.0852e-07,
"loss": 0.0731,
"step": 17725
},
{
"epoch": 8.286647992530346,
"grad_norm": 4.001583576202393,
"learning_rate": 7.0952e-07,
"loss": 0.1132,
"step": 17750
},
{
"epoch": 8.298319327731093,
"grad_norm": 7.335994720458984,
"learning_rate": 7.1052e-07,
"loss": 0.0744,
"step": 17775
},
{
"epoch": 8.30999066293184,
"grad_norm": 4.729821681976318,
"learning_rate": 7.1152e-07,
"loss": 0.1145,
"step": 17800
},
{
"epoch": 8.321661998132587,
"grad_norm": 4.817710876464844,
"learning_rate": 7.1252e-07,
"loss": 0.0684,
"step": 17825
},
{
"epoch": 8.333333333333334,
"grad_norm": 3.727107524871826,
"learning_rate": 7.1352e-07,
"loss": 0.1014,
"step": 17850
},
{
"epoch": 8.34500466853408,
"grad_norm": 6.569036960601807,
"learning_rate": 7.1452e-07,
"loss": 0.0809,
"step": 17875
},
{
"epoch": 8.356676003734828,
"grad_norm": 3.9731063842773438,
"learning_rate": 7.1552e-07,
"loss": 0.116,
"step": 17900
},
{
"epoch": 8.368347338935575,
"grad_norm": 5.585522174835205,
"learning_rate": 7.165200000000001e-07,
"loss": 0.0708,
"step": 17925
},
{
"epoch": 8.380018674136322,
"grad_norm": 4.29518461227417,
"learning_rate": 7.1752e-07,
"loss": 0.1187,
"step": 17950
},
{
"epoch": 8.391690009337069,
"grad_norm": 5.51121187210083,
"learning_rate": 7.1852e-07,
"loss": 0.0718,
"step": 17975
},
{
"epoch": 8.403361344537815,
"grad_norm": 3.5351364612579346,
"learning_rate": 7.1952e-07,
"loss": 0.1168,
"step": 18000
},
{
"epoch": 8.415032679738562,
"grad_norm": 4.3715128898620605,
"learning_rate": 7.2052e-07,
"loss": 0.079,
"step": 18025
},
{
"epoch": 8.42670401493931,
"grad_norm": 3.5346896648406982,
"learning_rate": 7.2152e-07,
"loss": 0.1288,
"step": 18050
},
{
"epoch": 8.438375350140056,
"grad_norm": 6.278663635253906,
"learning_rate": 7.2252e-07,
"loss": 0.0774,
"step": 18075
},
{
"epoch": 8.450046685340803,
"grad_norm": 4.804433345794678,
"learning_rate": 7.235200000000001e-07,
"loss": 0.1094,
"step": 18100
},
{
"epoch": 8.46171802054155,
"grad_norm": 3.196178436279297,
"learning_rate": 7.2452e-07,
"loss": 0.0775,
"step": 18125
},
{
"epoch": 8.473389355742297,
"grad_norm": 3.008148193359375,
"learning_rate": 7.2552e-07,
"loss": 0.1186,
"step": 18150
},
{
"epoch": 8.485060690943044,
"grad_norm": 6.987017631530762,
"learning_rate": 7.2652e-07,
"loss": 0.0705,
"step": 18175
},
{
"epoch": 8.49673202614379,
"grad_norm": 3.8024909496307373,
"learning_rate": 7.275199999999999e-07,
"loss": 0.116,
"step": 18200
},
{
"epoch": 8.508403361344538,
"grad_norm": 6.151003837585449,
"learning_rate": 7.285199999999999e-07,
"loss": 0.0701,
"step": 18225
},
{
"epoch": 8.520074696545285,
"grad_norm": 1.7352893352508545,
"learning_rate": 7.295199999999999e-07,
"loss": 0.114,
"step": 18250
},
{
"epoch": 8.531746031746032,
"grad_norm": 7.302786827087402,
"learning_rate": 7.3052e-07,
"loss": 0.0665,
"step": 18275
},
{
"epoch": 8.543417366946779,
"grad_norm": 3.3787872791290283,
"learning_rate": 7.315199999999999e-07,
"loss": 0.1052,
"step": 18300
},
{
"epoch": 8.555088702147525,
"grad_norm": 6.7813920974731445,
"learning_rate": 7.325199999999999e-07,
"loss": 0.0875,
"step": 18325
},
{
"epoch": 8.566760037348272,
"grad_norm": 6.053928375244141,
"learning_rate": 7.3352e-07,
"loss": 0.1131,
"step": 18350
},
{
"epoch": 8.57843137254902,
"grad_norm": 11.57036018371582,
"learning_rate": 7.345199999999999e-07,
"loss": 0.0769,
"step": 18375
},
{
"epoch": 8.590102707749766,
"grad_norm": 3.816927671432495,
"learning_rate": 7.355199999999999e-07,
"loss": 0.1181,
"step": 18400
},
{
"epoch": 8.601774042950513,
"grad_norm": 4.542242527008057,
"learning_rate": 7.365199999999999e-07,
"loss": 0.0829,
"step": 18425
},
{
"epoch": 8.61344537815126,
"grad_norm": 3.25649356842041,
"learning_rate": 7.3752e-07,
"loss": 0.1144,
"step": 18450
},
{
"epoch": 8.625116713352007,
"grad_norm": 7.228991508483887,
"learning_rate": 7.385199999999999e-07,
"loss": 0.0807,
"step": 18475
},
{
"epoch": 8.636788048552754,
"grad_norm": 2.458822727203369,
"learning_rate": 7.395199999999999e-07,
"loss": 0.1036,
"step": 18500
},
{
"epoch": 8.6484593837535,
"grad_norm": 6.6406474113464355,
"learning_rate": 7.4052e-07,
"loss": 0.0792,
"step": 18525
},
{
"epoch": 8.660130718954248,
"grad_norm": 6.3346099853515625,
"learning_rate": 7.415199999999999e-07,
"loss": 0.1265,
"step": 18550
},
{
"epoch": 8.671802054154995,
"grad_norm": 5.170047760009766,
"learning_rate": 7.425199999999999e-07,
"loss": 0.0687,
"step": 18575
},
{
"epoch": 8.683473389355742,
"grad_norm": 5.245858669281006,
"learning_rate": 7.4352e-07,
"loss": 0.1178,
"step": 18600
},
{
"epoch": 8.695144724556489,
"grad_norm": 5.291413307189941,
"learning_rate": 7.445199999999999e-07,
"loss": 0.0817,
"step": 18625
},
{
"epoch": 8.706816059757235,
"grad_norm": 4.81880521774292,
"learning_rate": 7.455199999999999e-07,
"loss": 0.1066,
"step": 18650
},
{
"epoch": 8.718487394957982,
"grad_norm": 2.500437021255493,
"learning_rate": 7.465199999999999e-07,
"loss": 0.0817,
"step": 18675
},
{
"epoch": 8.73015873015873,
"grad_norm": 3.1665596961975098,
"learning_rate": 7.4752e-07,
"loss": 0.0836,
"step": 18700
},
{
"epoch": 8.741830065359476,
"grad_norm": 4.715977668762207,
"learning_rate": 7.485199999999999e-07,
"loss": 0.0695,
"step": 18725
},
{
"epoch": 8.753501400560225,
"grad_norm": 3.079907178878784,
"learning_rate": 7.495199999999999e-07,
"loss": 0.1352,
"step": 18750
},
{
"epoch": 8.76517273576097,
"grad_norm": 7.7203288078308105,
"learning_rate": 7.5052e-07,
"loss": 0.0754,
"step": 18775
},
{
"epoch": 8.776844070961719,
"grad_norm": 5.75888204574585,
"learning_rate": 7.515199999999999e-07,
"loss": 0.1045,
"step": 18800
},
{
"epoch": 8.788515406162466,
"grad_norm": 9.753093719482422,
"learning_rate": 7.525199999999999e-07,
"loss": 0.0754,
"step": 18825
},
{
"epoch": 8.800186741363213,
"grad_norm": 4.152544975280762,
"learning_rate": 7.535199999999999e-07,
"loss": 0.115,
"step": 18850
},
{
"epoch": 8.81185807656396,
"grad_norm": 6.212908744812012,
"learning_rate": 7.5452e-07,
"loss": 0.0836,
"step": 18875
},
{
"epoch": 8.823529411764707,
"grad_norm": 4.721496105194092,
"learning_rate": 7.555199999999999e-07,
"loss": 0.117,
"step": 18900
},
{
"epoch": 8.835200746965453,
"grad_norm": 8.515325546264648,
"learning_rate": 7.565199999999999e-07,
"loss": 0.0852,
"step": 18925
},
{
"epoch": 8.8468720821662,
"grad_norm": 5.124463081359863,
"learning_rate": 7.5752e-07,
"loss": 0.1089,
"step": 18950
},
{
"epoch": 8.858543417366947,
"grad_norm": 10.328991889953613,
"learning_rate": 7.585199999999999e-07,
"loss": 0.0688,
"step": 18975
},
{
"epoch": 8.870214752567694,
"grad_norm": 8.239870071411133,
"learning_rate": 7.595199999999999e-07,
"loss": 0.1033,
"step": 19000
},
{
"epoch": 8.881886087768441,
"grad_norm": 9.038163185119629,
"learning_rate": 7.6052e-07,
"loss": 0.0853,
"step": 19025
},
{
"epoch": 8.893557422969188,
"grad_norm": 3.644261598587036,
"learning_rate": 7.6152e-07,
"loss": 0.0984,
"step": 19050
},
{
"epoch": 8.905228758169935,
"grad_norm": 4.2948832511901855,
"learning_rate": 7.625199999999999e-07,
"loss": 0.0708,
"step": 19075
},
{
"epoch": 8.916900093370682,
"grad_norm": 3.6126201152801514,
"learning_rate": 7.635199999999999e-07,
"loss": 0.1205,
"step": 19100
},
{
"epoch": 8.928571428571429,
"grad_norm": 5.034170150756836,
"learning_rate": 7.6452e-07,
"loss": 0.0727,
"step": 19125
},
{
"epoch": 8.940242763772176,
"grad_norm": 3.311403274536133,
"learning_rate": 7.6548e-07,
"loss": 0.1083,
"step": 19150
},
{
"epoch": 8.951914098972923,
"grad_norm": 6.666570663452148,
"learning_rate": 7.6648e-07,
"loss": 0.0808,
"step": 19175
},
{
"epoch": 8.96358543417367,
"grad_norm": 2.835561513900757,
"learning_rate": 7.6748e-07,
"loss": 0.098,
"step": 19200
},
{
"epoch": 8.975256769374417,
"grad_norm": 3.3188419342041016,
"learning_rate": 7.6848e-07,
"loss": 0.0801,
"step": 19225
},
{
"epoch": 8.986928104575163,
"grad_norm": 4.5640788078308105,
"learning_rate": 7.6948e-07,
"loss": 0.102,
"step": 19250
},
{
"epoch": 8.99859943977591,
"grad_norm": 9.698177337646484,
"learning_rate": 7.704800000000001e-07,
"loss": 0.1022,
"step": 19275
},
{
"epoch": 9.010270774976657,
"grad_norm": 3.567379951477051,
"learning_rate": 7.7148e-07,
"loss": 0.0744,
"step": 19300
},
{
"epoch": 9.021942110177404,
"grad_norm": 5.79501485824585,
"learning_rate": 7.7248e-07,
"loss": 0.0616,
"step": 19325
},
{
"epoch": 9.033613445378151,
"grad_norm": 3.6958703994750977,
"learning_rate": 7.7348e-07,
"loss": 0.0797,
"step": 19350
},
{
"epoch": 9.045284780578898,
"grad_norm": 8.376121520996094,
"learning_rate": 7.744799999999999e-07,
"loss": 0.0573,
"step": 19375
},
{
"epoch": 9.056956115779645,
"grad_norm": 4.162479877471924,
"learning_rate": 7.754799999999999e-07,
"loss": 0.0947,
"step": 19400
},
{
"epoch": 9.068627450980392,
"grad_norm": 6.144433498382568,
"learning_rate": 7.764799999999999e-07,
"loss": 0.0545,
"step": 19425
},
{
"epoch": 9.080298786181139,
"grad_norm": 4.648292541503906,
"learning_rate": 7.774799999999999e-07,
"loss": 0.0824,
"step": 19450
},
{
"epoch": 9.091970121381886,
"grad_norm": 5.517236709594727,
"learning_rate": 7.784799999999999e-07,
"loss": 0.0697,
"step": 19475
},
{
"epoch": 9.103641456582633,
"grad_norm": 6.232855796813965,
"learning_rate": 7.794799999999999e-07,
"loss": 0.0764,
"step": 19500
},
{
"epoch": 9.11531279178338,
"grad_norm": 6.580794811248779,
"learning_rate": 7.8048e-07,
"loss": 0.0756,
"step": 19525
},
{
"epoch": 9.126984126984127,
"grad_norm": 5.505826950073242,
"learning_rate": 7.814799999999999e-07,
"loss": 0.0936,
"step": 19550
},
{
"epoch": 9.138655462184873,
"grad_norm": 13.38469409942627,
"learning_rate": 7.824799999999999e-07,
"loss": 0.0711,
"step": 19575
},
{
"epoch": 9.15032679738562,
"grad_norm": 8.618947982788086,
"learning_rate": 7.834799999999999e-07,
"loss": 0.0752,
"step": 19600
},
{
"epoch": 9.161998132586367,
"grad_norm": 7.913994789123535,
"learning_rate": 7.844799999999999e-07,
"loss": 0.0594,
"step": 19625
},
{
"epoch": 9.173669467787114,
"grad_norm": 2.6098523139953613,
"learning_rate": 7.854799999999999e-07,
"loss": 0.0849,
"step": 19650
},
{
"epoch": 9.185340802987861,
"grad_norm": 6.636572360992432,
"learning_rate": 7.864799999999999e-07,
"loss": 0.0559,
"step": 19675
},
{
"epoch": 9.197012138188608,
"grad_norm": 2.971862554550171,
"learning_rate": 7.8748e-07,
"loss": 0.0908,
"step": 19700
},
{
"epoch": 9.208683473389355,
"grad_norm": 9.039565086364746,
"learning_rate": 7.884799999999999e-07,
"loss": 0.0642,
"step": 19725
},
{
"epoch": 9.220354808590102,
"grad_norm": 2.7158315181732178,
"learning_rate": 7.894799999999999e-07,
"loss": 0.0642,
"step": 19750
},
{
"epoch": 9.232026143790849,
"grad_norm": 5.9499921798706055,
"learning_rate": 7.9048e-07,
"loss": 0.0608,
"step": 19775
},
{
"epoch": 9.243697478991596,
"grad_norm": 8.955631256103516,
"learning_rate": 7.914799999999999e-07,
"loss": 0.0813,
"step": 19800
},
{
"epoch": 9.255368814192344,
"grad_norm": 8.660055160522461,
"learning_rate": 7.924799999999999e-07,
"loss": 0.0663,
"step": 19825
},
{
"epoch": 9.267040149393091,
"grad_norm": 4.199616432189941,
"learning_rate": 7.934799999999999e-07,
"loss": 0.0805,
"step": 19850
},
{
"epoch": 9.278711484593838,
"grad_norm": 4.530280590057373,
"learning_rate": 7.9448e-07,
"loss": 0.0539,
"step": 19875
},
{
"epoch": 9.290382819794585,
"grad_norm": 3.6157238483428955,
"learning_rate": 7.954799999999999e-07,
"loss": 0.0789,
"step": 19900
},
{
"epoch": 9.302054154995332,
"grad_norm": 11.870729446411133,
"learning_rate": 7.964799999999999e-07,
"loss": 0.0753,
"step": 19925
},
{
"epoch": 9.313725490196079,
"grad_norm": 3.857879400253296,
"learning_rate": 7.9748e-07,
"loss": 0.0804,
"step": 19950
},
{
"epoch": 9.325396825396826,
"grad_norm": 9.552889823913574,
"learning_rate": 7.984799999999999e-07,
"loss": 0.0696,
"step": 19975
},
{
"epoch": 9.337068160597573,
"grad_norm": 4.108628749847412,
"learning_rate": 7.994799999999999e-07,
"loss": 0.0697,
"step": 20000
},
{
"epoch": 9.337068160597573,
"eval_loss": 0.1435898393392563,
"eval_runtime": 6476.4775,
"eval_samples_per_second": 1.454,
"eval_steps_per_second": 0.182,
"eval_wer": 0.0940416599386404,
"step": 20000
},
{
"epoch": 9.34873949579832,
"grad_norm": 16.435834884643555,
"learning_rate": 8.0048e-07,
"loss": 0.0697,
"step": 20025
},
{
"epoch": 9.360410830999067,
"grad_norm": 2.7345032691955566,
"learning_rate": 8.0148e-07,
"loss": 0.0808,
"step": 20050
},
{
"epoch": 9.372082166199814,
"grad_norm": 7.900310039520264,
"learning_rate": 8.024799999999999e-07,
"loss": 0.0642,
"step": 20075
},
{
"epoch": 9.38375350140056,
"grad_norm": 5.272299289703369,
"learning_rate": 8.034799999999999e-07,
"loss": 0.0912,
"step": 20100
},
{
"epoch": 9.395424836601308,
"grad_norm": 7.384624004364014,
"learning_rate": 8.0448e-07,
"loss": 0.0689,
"step": 20125
},
{
"epoch": 9.407096171802054,
"grad_norm": 6.52332067489624,
"learning_rate": 8.054799999999999e-07,
"loss": 0.0758,
"step": 20150
},
{
"epoch": 9.418767507002801,
"grad_norm": 7.095821380615234,
"learning_rate": 8.064799999999999e-07,
"loss": 0.0928,
"step": 20175
},
{
"epoch": 9.430438842203548,
"grad_norm": 7.10612154006958,
"learning_rate": 8.0748e-07,
"loss": 0.0813,
"step": 20200
},
{
"epoch": 9.442110177404295,
"grad_norm": 2.9239766597747803,
"learning_rate": 8.084799999999999e-07,
"loss": 0.0512,
"step": 20225
},
{
"epoch": 9.453781512605042,
"grad_norm": 5.488339424133301,
"learning_rate": 8.094799999999999e-07,
"loss": 0.1023,
"step": 20250
},
{
"epoch": 9.465452847805789,
"grad_norm": 14.871928215026855,
"learning_rate": 8.1048e-07,
"loss": 0.0733,
"step": 20275
},
{
"epoch": 9.477124183006536,
"grad_norm": 4.915029525756836,
"learning_rate": 8.1148e-07,
"loss": 0.0781,
"step": 20300
},
{
"epoch": 9.488795518207283,
"grad_norm": 11.16457748413086,
"learning_rate": 8.124799999999999e-07,
"loss": 0.0824,
"step": 20325
},
{
"epoch": 9.50046685340803,
"grad_norm": 4.7055535316467285,
"learning_rate": 8.134799999999999e-07,
"loss": 0.0715,
"step": 20350
},
{
"epoch": 9.512138188608777,
"grad_norm": 9.46976375579834,
"learning_rate": 8.1448e-07,
"loss": 0.0622,
"step": 20375
},
{
"epoch": 9.523809523809524,
"grad_norm": 4.587946891784668,
"learning_rate": 8.154799999999999e-07,
"loss": 0.0973,
"step": 20400
},
{
"epoch": 9.53548085901027,
"grad_norm": 10.611247062683105,
"learning_rate": 8.164799999999999e-07,
"loss": 0.0701,
"step": 20425
},
{
"epoch": 9.547152194211018,
"grad_norm": 5.382807731628418,
"learning_rate": 8.1748e-07,
"loss": 0.0799,
"step": 20450
},
{
"epoch": 9.558823529411764,
"grad_norm": 12.089332580566406,
"learning_rate": 8.1848e-07,
"loss": 0.0705,
"step": 20475
},
{
"epoch": 9.570494864612511,
"grad_norm": 3.3015291690826416,
"learning_rate": 8.194799999999999e-07,
"loss": 0.0713,
"step": 20500
},
{
"epoch": 9.582166199813258,
"grad_norm": 7.883571147918701,
"learning_rate": 8.2048e-07,
"loss": 0.077,
"step": 20525
},
{
"epoch": 9.593837535014005,
"grad_norm": 5.369983673095703,
"learning_rate": 8.2148e-07,
"loss": 0.0812,
"step": 20550
},
{
"epoch": 9.605508870214752,
"grad_norm": 11.33123779296875,
"learning_rate": 8.224799999999999e-07,
"loss": 0.077,
"step": 20575
},
{
"epoch": 9.6171802054155,
"grad_norm": 6.490606307983398,
"learning_rate": 8.234799999999999e-07,
"loss": 0.082,
"step": 20600
},
{
"epoch": 9.628851540616246,
"grad_norm": 13.423641204833984,
"learning_rate": 8.2448e-07,
"loss": 0.0616,
"step": 20625
},
{
"epoch": 9.640522875816993,
"grad_norm": 5.520218372344971,
"learning_rate": 8.2548e-07,
"loss": 0.0772,
"step": 20650
},
{
"epoch": 9.65219421101774,
"grad_norm": 7.892084121704102,
"learning_rate": 8.264799999999999e-07,
"loss": 0.0821,
"step": 20675
},
{
"epoch": 9.663865546218487,
"grad_norm": 1.9956510066986084,
"learning_rate": 8.2748e-07,
"loss": 0.0863,
"step": 20700
},
{
"epoch": 9.675536881419234,
"grad_norm": 12.01615047454834,
"learning_rate": 8.2848e-07,
"loss": 0.0686,
"step": 20725
},
{
"epoch": 9.68720821661998,
"grad_norm": 4.383852481842041,
"learning_rate": 8.294799999999999e-07,
"loss": 0.0783,
"step": 20750
},
{
"epoch": 9.698879551820728,
"grad_norm": 9.427849769592285,
"learning_rate": 8.3048e-07,
"loss": 0.0696,
"step": 20775
},
{
"epoch": 9.710550887021475,
"grad_norm": 3.1968441009521484,
"learning_rate": 8.3148e-07,
"loss": 0.0824,
"step": 20800
},
{
"epoch": 9.722222222222221,
"grad_norm": 5.970606803894043,
"learning_rate": 8.3248e-07,
"loss": 0.0745,
"step": 20825
},
{
"epoch": 9.733893557422968,
"grad_norm": 3.773395538330078,
"learning_rate": 8.334799999999999e-07,
"loss": 0.087,
"step": 20850
},
{
"epoch": 9.745564892623715,
"grad_norm": 12.782556533813477,
"learning_rate": 8.3448e-07,
"loss": 0.0669,
"step": 20875
},
{
"epoch": 9.757236227824464,
"grad_norm": 3.228957176208496,
"learning_rate": 8.3548e-07,
"loss": 0.0844,
"step": 20900
},
{
"epoch": 9.768907563025211,
"grad_norm": 9.885223388671875,
"learning_rate": 8.364799999999999e-07,
"loss": 0.0724,
"step": 20925
},
{
"epoch": 9.780578898225958,
"grad_norm": 3.015071153640747,
"learning_rate": 8.3748e-07,
"loss": 0.0784,
"step": 20950
},
{
"epoch": 9.792250233426705,
"grad_norm": 7.431763172149658,
"learning_rate": 8.3848e-07,
"loss": 0.0782,
"step": 20975
},
{
"epoch": 9.803921568627452,
"grad_norm": 3.960148334503174,
"learning_rate": 8.394799999999999e-07,
"loss": 0.0765,
"step": 21000
},
{
"epoch": 9.815592903828199,
"grad_norm": 11.450105667114258,
"learning_rate": 8.4048e-07,
"loss": 0.0757,
"step": 21025
},
{
"epoch": 9.827264239028946,
"grad_norm": 2.3904082775115967,
"learning_rate": 8.4148e-07,
"loss": 0.0887,
"step": 21050
},
{
"epoch": 9.838935574229692,
"grad_norm": 5.4470086097717285,
"learning_rate": 8.4248e-07,
"loss": 0.0824,
"step": 21075
},
{
"epoch": 9.85060690943044,
"grad_norm": 2.513823986053467,
"learning_rate": 8.434799999999999e-07,
"loss": 0.0729,
"step": 21100
},
{
"epoch": 9.862278244631186,
"grad_norm": 14.719958305358887,
"learning_rate": 8.4448e-07,
"loss": 0.0705,
"step": 21125
},
{
"epoch": 9.873949579831933,
"grad_norm": 5.428534984588623,
"learning_rate": 8.4548e-07,
"loss": 0.0861,
"step": 21150
},
{
"epoch": 9.88562091503268,
"grad_norm": 5.955714702606201,
"learning_rate": 8.464799999999999e-07,
"loss": 0.0672,
"step": 21175
},
{
"epoch": 9.897292250233427,
"grad_norm": 7.154689788818359,
"learning_rate": 8.4744e-07,
"loss": 0.0954,
"step": 21200
},
{
"epoch": 9.908963585434174,
"grad_norm": 7.696076393127441,
"learning_rate": 8.484399999999999e-07,
"loss": 0.0617,
"step": 21225
},
{
"epoch": 9.920634920634921,
"grad_norm": 5.049380779266357,
"learning_rate": 8.494399999999999e-07,
"loss": 0.0775,
"step": 21250
},
{
"epoch": 9.932306255835668,
"grad_norm": 9.34526252746582,
"learning_rate": 8.5044e-07,
"loss": 0.0749,
"step": 21275
},
{
"epoch": 9.943977591036415,
"grad_norm": 7.990287780761719,
"learning_rate": 8.5144e-07,
"loss": 0.0865,
"step": 21300
},
{
"epoch": 9.955648926237162,
"grad_norm": 10.391454696655273,
"learning_rate": 8.524399999999999e-07,
"loss": 0.0658,
"step": 21325
},
{
"epoch": 9.967320261437909,
"grad_norm": 5.023215293884277,
"learning_rate": 8.534399999999999e-07,
"loss": 0.0713,
"step": 21350
},
{
"epoch": 9.978991596638656,
"grad_norm": 12.558219909667969,
"learning_rate": 8.5444e-07,
"loss": 0.0764,
"step": 21375
},
{
"epoch": 9.990662931839402,
"grad_norm": 4.4138336181640625,
"learning_rate": 8.554399999999999e-07,
"loss": 0.076,
"step": 21400
},
{
"epoch": 10.00233426704015,
"grad_norm": 2.9889421463012695,
"learning_rate": 8.564399999999999e-07,
"loss": 0.0805,
"step": 21425
},
{
"epoch": 10.014005602240896,
"grad_norm": 3.3753228187561035,
"learning_rate": 8.5744e-07,
"loss": 0.0487,
"step": 21450
},
{
"epoch": 10.025676937441643,
"grad_norm": 3.1645426750183105,
"learning_rate": 8.5844e-07,
"loss": 0.0719,
"step": 21475
},
{
"epoch": 10.03734827264239,
"grad_norm": 9.965222358703613,
"learning_rate": 8.594399999999999e-07,
"loss": 0.0453,
"step": 21500
},
{
"epoch": 10.049019607843137,
"grad_norm": 4.585392475128174,
"learning_rate": 8.6044e-07,
"loss": 0.0813,
"step": 21525
},
{
"epoch": 10.060690943043884,
"grad_norm": 2.651890277862549,
"learning_rate": 8.6144e-07,
"loss": 0.0441,
"step": 21550
},
{
"epoch": 10.072362278244631,
"grad_norm": 3.137420415878296,
"learning_rate": 8.624399999999999e-07,
"loss": 0.0776,
"step": 21575
},
{
"epoch": 10.084033613445378,
"grad_norm": 3.0008487701416016,
"learning_rate": 8.634399999999999e-07,
"loss": 0.0549,
"step": 21600
},
{
"epoch": 10.095704948646125,
"grad_norm": 5.419103145599365,
"learning_rate": 8.6444e-07,
"loss": 0.0807,
"step": 21625
},
{
"epoch": 10.107376283846872,
"grad_norm": 4.442772388458252,
"learning_rate": 8.654399999999999e-07,
"loss": 0.0473,
"step": 21650
},
{
"epoch": 10.119047619047619,
"grad_norm": 7.645913600921631,
"learning_rate": 8.664399999999999e-07,
"loss": 0.0832,
"step": 21675
},
{
"epoch": 10.130718954248366,
"grad_norm": 5.763233184814453,
"learning_rate": 8.6744e-07,
"loss": 0.0564,
"step": 21700
},
{
"epoch": 10.142390289449112,
"grad_norm": 2.8492894172668457,
"learning_rate": 8.6844e-07,
"loss": 0.0722,
"step": 21725
},
{
"epoch": 10.15406162464986,
"grad_norm": 5.006544589996338,
"learning_rate": 8.694399999999999e-07,
"loss": 0.0494,
"step": 21750
},
{
"epoch": 10.165732959850606,
"grad_norm": 3.509387731552124,
"learning_rate": 8.7044e-07,
"loss": 0.0764,
"step": 21775
},
{
"epoch": 10.177404295051353,
"grad_norm": 4.277322769165039,
"learning_rate": 8.7144e-07,
"loss": 0.0387,
"step": 21800
},
{
"epoch": 10.1890756302521,
"grad_norm": 3.3739705085754395,
"learning_rate": 8.724399999999999e-07,
"loss": 0.0558,
"step": 21825
},
{
"epoch": 10.200746965452847,
"grad_norm": 2.4098832607269287,
"learning_rate": 8.734399999999999e-07,
"loss": 0.041,
"step": 21850
},
{
"epoch": 10.212418300653594,
"grad_norm": 4.2443108558654785,
"learning_rate": 8.7444e-07,
"loss": 0.0614,
"step": 21875
},
{
"epoch": 10.224089635854341,
"grad_norm": 7.186864376068115,
"learning_rate": 8.7544e-07,
"loss": 0.0478,
"step": 21900
},
{
"epoch": 10.235760971055088,
"grad_norm": 3.2214882373809814,
"learning_rate": 8.764399999999999e-07,
"loss": 0.0596,
"step": 21925
},
{
"epoch": 10.247432306255835,
"grad_norm": 1.4043220281600952,
"learning_rate": 8.7744e-07,
"loss": 0.0558,
"step": 21950
},
{
"epoch": 10.259103641456583,
"grad_norm": 3.13395357131958,
"learning_rate": 8.7844e-07,
"loss": 0.0751,
"step": 21975
},
{
"epoch": 10.27077497665733,
"grad_norm": 3.777238368988037,
"learning_rate": 8.794399999999999e-07,
"loss": 0.0449,
"step": 22000
},
{
"epoch": 10.282446311858077,
"grad_norm": 3.8136322498321533,
"learning_rate": 8.804399999999999e-07,
"loss": 0.0741,
"step": 22025
},
{
"epoch": 10.294117647058824,
"grad_norm": 2.8861405849456787,
"learning_rate": 8.8144e-07,
"loss": 0.0514,
"step": 22050
},
{
"epoch": 10.305788982259571,
"grad_norm": 4.029909133911133,
"learning_rate": 8.8244e-07,
"loss": 0.0715,
"step": 22075
},
{
"epoch": 10.317460317460318,
"grad_norm": 9.571359634399414,
"learning_rate": 8.834399999999999e-07,
"loss": 0.0442,
"step": 22100
},
{
"epoch": 10.329131652661065,
"grad_norm": 4.114884376525879,
"learning_rate": 8.8444e-07,
"loss": 0.0752,
"step": 22125
},
{
"epoch": 10.340802987861812,
"grad_norm": 4.394742965698242,
"learning_rate": 8.8544e-07,
"loss": 0.046,
"step": 22150
},
{
"epoch": 10.352474323062559,
"grad_norm": 3.1479573249816895,
"learning_rate": 8.864399999999999e-07,
"loss": 0.067,
"step": 22175
},
{
"epoch": 10.364145658263306,
"grad_norm": 2.1959614753723145,
"learning_rate": 8.8744e-07,
"loss": 0.0495,
"step": 22200
},
{
"epoch": 10.375816993464053,
"grad_norm": 4.033957481384277,
"learning_rate": 8.8844e-07,
"loss": 0.0587,
"step": 22225
},
{
"epoch": 10.3874883286648,
"grad_norm": 6.8924736976623535,
"learning_rate": 8.8944e-07,
"loss": 0.0517,
"step": 22250
},
{
"epoch": 10.399159663865547,
"grad_norm": 3.8382160663604736,
"learning_rate": 8.904399999999999e-07,
"loss": 0.0678,
"step": 22275
},
{
"epoch": 10.410830999066294,
"grad_norm": 10.509393692016602,
"learning_rate": 8.9144e-07,
"loss": 0.048,
"step": 22300
},
{
"epoch": 10.42250233426704,
"grad_norm": 3.9929986000061035,
"learning_rate": 8.9244e-07,
"loss": 0.0707,
"step": 22325
},
{
"epoch": 10.434173669467787,
"grad_norm": 4.263001918792725,
"learning_rate": 8.934399999999999e-07,
"loss": 0.0534,
"step": 22350
},
{
"epoch": 10.445845004668534,
"grad_norm": 4.735872745513916,
"learning_rate": 8.9444e-07,
"loss": 0.0714,
"step": 22375
},
{
"epoch": 10.457516339869281,
"grad_norm": 2.775026321411133,
"learning_rate": 8.9544e-07,
"loss": 0.0599,
"step": 22400
},
{
"epoch": 10.469187675070028,
"grad_norm": 4.69821834564209,
"learning_rate": 8.964399999999999e-07,
"loss": 0.0679,
"step": 22425
},
{
"epoch": 10.480859010270775,
"grad_norm": 4.804164886474609,
"learning_rate": 8.9744e-07,
"loss": 0.0382,
"step": 22450
},
{
"epoch": 10.492530345471522,
"grad_norm": 4.525900840759277,
"learning_rate": 8.9844e-07,
"loss": 0.0723,
"step": 22475
},
{
"epoch": 10.504201680672269,
"grad_norm": 9.724448204040527,
"learning_rate": 8.9944e-07,
"loss": 0.0558,
"step": 22500
},
{
"epoch": 10.515873015873016,
"grad_norm": 3.797886848449707,
"learning_rate": 9.004399999999999e-07,
"loss": 0.0665,
"step": 22525
},
{
"epoch": 10.527544351073763,
"grad_norm": 4.131737232208252,
"learning_rate": 9.0144e-07,
"loss": 0.0463,
"step": 22550
},
{
"epoch": 10.53921568627451,
"grad_norm": 4.074097633361816,
"learning_rate": 9.0244e-07,
"loss": 0.0631,
"step": 22575
},
{
"epoch": 10.550887021475257,
"grad_norm": 6.83477258682251,
"learning_rate": 9.034399999999999e-07,
"loss": 0.0453,
"step": 22600
},
{
"epoch": 10.562558356676004,
"grad_norm": 4.893357276916504,
"learning_rate": 9.0444e-07,
"loss": 0.0579,
"step": 22625
},
{
"epoch": 10.57422969187675,
"grad_norm": 3.0031166076660156,
"learning_rate": 9.0544e-07,
"loss": 0.0468,
"step": 22650
},
{
"epoch": 10.585901027077497,
"grad_norm": 2.353924036026001,
"learning_rate": 9.0644e-07,
"loss": 0.0769,
"step": 22675
},
{
"epoch": 10.597572362278244,
"grad_norm": 4.528254985809326,
"learning_rate": 9.0744e-07,
"loss": 0.0457,
"step": 22700
},
{
"epoch": 10.609243697478991,
"grad_norm": 3.255937099456787,
"learning_rate": 9.0844e-07,
"loss": 0.07,
"step": 22725
},
{
"epoch": 10.620915032679738,
"grad_norm": 5.159088611602783,
"learning_rate": 9.0944e-07,
"loss": 0.0592,
"step": 22750
},
{
"epoch": 10.632586367880485,
"grad_norm": 5.292705535888672,
"learning_rate": 9.104399999999999e-07,
"loss": 0.075,
"step": 22775
},
{
"epoch": 10.644257703081232,
"grad_norm": 2.7910406589508057,
"learning_rate": 9.1144e-07,
"loss": 0.0512,
"step": 22800
},
{
"epoch": 10.655929038281979,
"grad_norm": 2.8850274085998535,
"learning_rate": 9.1244e-07,
"loss": 0.0531,
"step": 22825
},
{
"epoch": 10.667600373482726,
"grad_norm": 3.195202589035034,
"learning_rate": 9.1344e-07,
"loss": 0.0511,
"step": 22850
},
{
"epoch": 10.679271708683473,
"grad_norm": 3.8003671169281006,
"learning_rate": 9.1444e-07,
"loss": 0.0758,
"step": 22875
},
{
"epoch": 10.69094304388422,
"grad_norm": 3.130300998687744,
"learning_rate": 9.1544e-07,
"loss": 0.0522,
"step": 22900
},
{
"epoch": 10.702614379084967,
"grad_norm": 4.510003089904785,
"learning_rate": 9.1644e-07,
"loss": 0.0843,
"step": 22925
},
{
"epoch": 10.714285714285714,
"grad_norm": 6.213229656219482,
"learning_rate": 9.1744e-07,
"loss": 0.0481,
"step": 22950
},
{
"epoch": 10.72595704948646,
"grad_norm": 2.293363094329834,
"learning_rate": 9.1844e-07,
"loss": 0.0604,
"step": 22975
},
{
"epoch": 10.73762838468721,
"grad_norm": 1.0174866914749146,
"learning_rate": 9.1944e-07,
"loss": 0.0501,
"step": 23000
},
{
"epoch": 10.749299719887954,
"grad_norm": 5.294317722320557,
"learning_rate": 9.2044e-07,
"loss": 0.0734,
"step": 23025
},
{
"epoch": 10.760971055088703,
"grad_norm": 3.6734204292297363,
"learning_rate": 9.2144e-07,
"loss": 0.0492,
"step": 23050
},
{
"epoch": 10.77264239028945,
"grad_norm": 3.0096030235290527,
"learning_rate": 9.2244e-07,
"loss": 0.0517,
"step": 23075
},
{
"epoch": 10.784313725490197,
"grad_norm": 4.717250347137451,
"learning_rate": 9.2344e-07,
"loss": 0.0447,
"step": 23100
},
{
"epoch": 10.795985060690944,
"grad_norm": 3.78305983543396,
"learning_rate": 9.2444e-07,
"loss": 0.0804,
"step": 23125
},
{
"epoch": 10.80765639589169,
"grad_norm": 6.005961894989014,
"learning_rate": 9.2544e-07,
"loss": 0.0535,
"step": 23150
},
{
"epoch": 10.819327731092438,
"grad_norm": 5.146392345428467,
"learning_rate": 9.2644e-07,
"loss": 0.069,
"step": 23175
},
{
"epoch": 10.830999066293185,
"grad_norm": 6.436806678771973,
"learning_rate": 9.2744e-07,
"loss": 0.0532,
"step": 23200
},
{
"epoch": 10.842670401493931,
"grad_norm": 3.9533166885375977,
"learning_rate": 9.2844e-07,
"loss": 0.0761,
"step": 23225
},
{
"epoch": 10.854341736694678,
"grad_norm": 4.497999668121338,
"learning_rate": 9.2944e-07,
"loss": 0.0498,
"step": 23250
},
{
"epoch": 10.866013071895425,
"grad_norm": 3.916146755218506,
"learning_rate": 9.3044e-07,
"loss": 0.0707,
"step": 23275
},
{
"epoch": 10.877684407096172,
"grad_norm": 1.4552559852600098,
"learning_rate": 9.3144e-07,
"loss": 0.049,
"step": 23300
},
{
"epoch": 10.88935574229692,
"grad_norm": 4.581323146820068,
"learning_rate": 9.3244e-07,
"loss": 0.0679,
"step": 23325
},
{
"epoch": 10.901027077497666,
"grad_norm": 9.022045135498047,
"learning_rate": 9.3344e-07,
"loss": 0.0452,
"step": 23350
},
{
"epoch": 10.912698412698413,
"grad_norm": 2.782165288925171,
"learning_rate": 9.3444e-07,
"loss": 0.0697,
"step": 23375
},
{
"epoch": 10.92436974789916,
"grad_norm": 2.9013919830322266,
"learning_rate": 9.3544e-07,
"loss": 0.0539,
"step": 23400
},
{
"epoch": 10.936041083099907,
"grad_norm": 5.128820419311523,
"learning_rate": 9.3644e-07,
"loss": 0.0659,
"step": 23425
},
{
"epoch": 10.947712418300654,
"grad_norm": 3.9068808555603027,
"learning_rate": 9.3744e-07,
"loss": 0.0461,
"step": 23450
},
{
"epoch": 10.9593837535014,
"grad_norm": 3.185457229614258,
"learning_rate": 9.3844e-07,
"loss": 0.0737,
"step": 23475
},
{
"epoch": 10.971055088702148,
"grad_norm": 6.052894592285156,
"learning_rate": 9.3944e-07,
"loss": 0.0569,
"step": 23500
},
{
"epoch": 10.982726423902895,
"grad_norm": 3.6629199981689453,
"learning_rate": 9.4044e-07,
"loss": 0.0621,
"step": 23525
},
{
"epoch": 10.994397759103641,
"grad_norm": 1.8605912923812866,
"learning_rate": 9.4144e-07,
"loss": 0.0487,
"step": 23550
},
{
"epoch": 11.006069094304388,
"grad_norm": 3.8178212642669678,
"learning_rate": 9.424e-07,
"loss": 0.0691,
"step": 23575
},
{
"epoch": 11.017740429505135,
"grad_norm": 20.160478591918945,
"learning_rate": 9.433999999999999e-07,
"loss": 0.0342,
"step": 23600
},
{
"epoch": 11.029411764705882,
"grad_norm": 2.548696756362915,
"learning_rate": 9.444e-07,
"loss": 0.0535,
"step": 23625
},
{
"epoch": 11.04108309990663,
"grad_norm": 6.730154514312744,
"learning_rate": 9.454e-07,
"loss": 0.0303,
"step": 23650
},
{
"epoch": 11.052754435107376,
"grad_norm": 5.233523368835449,
"learning_rate": 9.464e-07,
"loss": 0.0631,
"step": 23675
},
{
"epoch": 11.064425770308123,
"grad_norm": 5.212557315826416,
"learning_rate": 9.474e-07,
"loss": 0.0376,
"step": 23700
},
{
"epoch": 11.07609710550887,
"grad_norm": 3.276489019393921,
"learning_rate": 9.484e-07,
"loss": 0.052,
"step": 23725
},
{
"epoch": 11.087768440709617,
"grad_norm": 1.3591026067733765,
"learning_rate": 9.494e-07,
"loss": 0.0376,
"step": 23750
},
{
"epoch": 11.099439775910364,
"grad_norm": 5.116319179534912,
"learning_rate": 9.503999999999999e-07,
"loss": 0.0549,
"step": 23775
},
{
"epoch": 11.11111111111111,
"grad_norm": 3.6078543663024902,
"learning_rate": 9.514e-07,
"loss": 0.0306,
"step": 23800
},
{
"epoch": 11.122782446311858,
"grad_norm": 1.3853808641433716,
"learning_rate": 9.524e-07,
"loss": 0.0554,
"step": 23825
},
{
"epoch": 11.134453781512605,
"grad_norm": 1.325642704963684,
"learning_rate": 9.534e-07,
"loss": 0.0316,
"step": 23850
},
{
"epoch": 11.146125116713351,
"grad_norm": 3.342453718185425,
"learning_rate": 9.544e-07,
"loss": 0.0565,
"step": 23875
},
{
"epoch": 11.157796451914098,
"grad_norm": 5.374787330627441,
"learning_rate": 9.554e-07,
"loss": 0.0371,
"step": 23900
},
{
"epoch": 11.169467787114845,
"grad_norm": 4.615719318389893,
"learning_rate": 9.564e-07,
"loss": 0.0464,
"step": 23925
},
{
"epoch": 11.181139122315592,
"grad_norm": 5.073611259460449,
"learning_rate": 9.574e-07,
"loss": 0.0374,
"step": 23950
},
{
"epoch": 11.19281045751634,
"grad_norm": 4.32796049118042,
"learning_rate": 9.584e-07,
"loss": 0.0623,
"step": 23975
},
{
"epoch": 11.204481792717086,
"grad_norm": 4.874443054199219,
"learning_rate": 9.594e-07,
"loss": 0.0333,
"step": 24000
},
{
"epoch": 11.216153127917833,
"grad_norm": 3.2107975482940674,
"learning_rate": 9.604e-07,
"loss": 0.0522,
"step": 24025
},
{
"epoch": 11.22782446311858,
"grad_norm": 2.836677312850952,
"learning_rate": 9.614e-07,
"loss": 0.0288,
"step": 24050
},
{
"epoch": 11.239495798319327,
"grad_norm": 1.4697824716567993,
"learning_rate": 9.624e-07,
"loss": 0.0602,
"step": 24075
},
{
"epoch": 11.251167133520074,
"grad_norm": 3.0650887489318848,
"learning_rate": 9.634e-07,
"loss": 0.0289,
"step": 24100
},
{
"epoch": 11.262838468720823,
"grad_norm": 3.5631065368652344,
"learning_rate": 9.644e-07,
"loss": 0.057,
"step": 24125
},
{
"epoch": 11.27450980392157,
"grad_norm": 5.980957508087158,
"learning_rate": 9.654e-07,
"loss": 0.0368,
"step": 24150
},
{
"epoch": 11.286181139122316,
"grad_norm": 2.9237494468688965,
"learning_rate": 9.664e-07,
"loss": 0.0572,
"step": 24175
},
{
"epoch": 11.297852474323063,
"grad_norm": 1.2119998931884766,
"learning_rate": 9.674e-07,
"loss": 0.03,
"step": 24200
},
{
"epoch": 11.30952380952381,
"grad_norm": 3.5559473037719727,
"learning_rate": 9.684e-07,
"loss": 0.0667,
"step": 24225
},
{
"epoch": 11.321195144724557,
"grad_norm": 3.1826765537261963,
"learning_rate": 9.694e-07,
"loss": 0.0397,
"step": 24250
},
{
"epoch": 11.332866479925304,
"grad_norm": 2.0330376625061035,
"learning_rate": 9.704e-07,
"loss": 0.0553,
"step": 24275
},
{
"epoch": 11.344537815126051,
"grad_norm": 4.895223617553711,
"learning_rate": 9.714e-07,
"loss": 0.0335,
"step": 24300
},
{
"epoch": 11.356209150326798,
"grad_norm": 3.050001621246338,
"learning_rate": 9.724e-07,
"loss": 0.0568,
"step": 24325
},
{
"epoch": 11.367880485527545,
"grad_norm": 6.4767560958862305,
"learning_rate": 9.734e-07,
"loss": 0.0404,
"step": 24350
},
{
"epoch": 11.379551820728292,
"grad_norm": 4.696669101715088,
"learning_rate": 9.744e-07,
"loss": 0.0641,
"step": 24375
},
{
"epoch": 11.391223155929039,
"grad_norm": 3.8657402992248535,
"learning_rate": 9.754e-07,
"loss": 0.0316,
"step": 24400
},
{
"epoch": 11.402894491129786,
"grad_norm": 4.243162155151367,
"learning_rate": 9.764e-07,
"loss": 0.0512,
"step": 24425
},
{
"epoch": 11.414565826330533,
"grad_norm": 6.798733234405518,
"learning_rate": 9.774e-07,
"loss": 0.0293,
"step": 24450
},
{
"epoch": 11.42623716153128,
"grad_norm": 2.7520804405212402,
"learning_rate": 9.784e-07,
"loss": 0.0513,
"step": 24475
},
{
"epoch": 11.437908496732026,
"grad_norm": 3.6984705924987793,
"learning_rate": 9.794e-07,
"loss": 0.0376,
"step": 24500
},
{
"epoch": 11.449579831932773,
"grad_norm": 10.192070960998535,
"learning_rate": 9.804e-07,
"loss": 0.0491,
"step": 24525
},
{
"epoch": 11.46125116713352,
"grad_norm": 3.717801332473755,
"learning_rate": 9.814e-07,
"loss": 0.0345,
"step": 24550
},
{
"epoch": 11.472922502334267,
"grad_norm": 5.155227184295654,
"learning_rate": 9.824e-07,
"loss": 0.0549,
"step": 24575
},
{
"epoch": 11.484593837535014,
"grad_norm": 5.626723766326904,
"learning_rate": 9.834e-07,
"loss": 0.0373,
"step": 24600
},
{
"epoch": 11.496265172735761,
"grad_norm": 3.582610607147217,
"learning_rate": 9.844e-07,
"loss": 0.0698,
"step": 24625
},
{
"epoch": 11.507936507936508,
"grad_norm": 4.073030948638916,
"learning_rate": 9.854e-07,
"loss": 0.0348,
"step": 24650
},
{
"epoch": 11.519607843137255,
"grad_norm": 5.026329517364502,
"learning_rate": 9.864e-07,
"loss": 0.0607,
"step": 24675
},
{
"epoch": 11.531279178338002,
"grad_norm": 3.416334390640259,
"learning_rate": 9.874e-07,
"loss": 0.0318,
"step": 24700
},
{
"epoch": 11.542950513538749,
"grad_norm": 2.8178179264068604,
"learning_rate": 9.884e-07,
"loss": 0.0524,
"step": 24725
},
{
"epoch": 11.554621848739496,
"grad_norm": 2.5382184982299805,
"learning_rate": 9.894e-07,
"loss": 0.0341,
"step": 24750
},
{
"epoch": 11.566293183940243,
"grad_norm": 4.652471542358398,
"learning_rate": 9.903999999999999e-07,
"loss": 0.065,
"step": 24775
},
{
"epoch": 11.57796451914099,
"grad_norm": 5.832390785217285,
"learning_rate": 9.914e-07,
"loss": 0.0317,
"step": 24800
},
{
"epoch": 11.589635854341736,
"grad_norm": 3.8395602703094482,
"learning_rate": 9.923999999999998e-07,
"loss": 0.0642,
"step": 24825
},
{
"epoch": 11.601307189542483,
"grad_norm": 5.006762981414795,
"learning_rate": 9.933999999999999e-07,
"loss": 0.0367,
"step": 24850
},
{
"epoch": 11.61297852474323,
"grad_norm": 3.3889858722686768,
"learning_rate": 9.944e-07,
"loss": 0.0567,
"step": 24875
},
{
"epoch": 11.624649859943977,
"grad_norm": 2.9701974391937256,
"learning_rate": 9.953999999999998e-07,
"loss": 0.0289,
"step": 24900
},
{
"epoch": 11.636321195144724,
"grad_norm": 3.798945903778076,
"learning_rate": 9.964e-07,
"loss": 0.0591,
"step": 24925
},
{
"epoch": 11.647992530345471,
"grad_norm": 3.1179754734039307,
"learning_rate": 9.974e-07,
"loss": 0.0286,
"step": 24950
},
{
"epoch": 11.659663865546218,
"grad_norm": 4.428998947143555,
"learning_rate": 9.983999999999998e-07,
"loss": 0.0668,
"step": 24975
},
{
"epoch": 11.671335200746965,
"grad_norm": 5.318285942077637,
"learning_rate": 9.994e-07,
"loss": 0.0374,
"step": 25000
},
{
"epoch": 11.671335200746965,
"eval_loss": 0.15606163442134857,
"eval_runtime": 6264.6896,
"eval_samples_per_second": 1.503,
"eval_steps_per_second": 0.188,
"eval_wer": 0.09723881802034555,
"step": 25000
},
{
"epoch": 11.671335200746965,
"step": 25000,
"total_flos": 4.081858297380864e+20,
"train_loss": 0.03221806969165802,
"train_runtime": 95538.6721,
"train_samples_per_second": 4.187,
"train_steps_per_second": 0.262
}
],
"logging_steps": 25,
"max_steps": 25000,
"num_input_tokens_seen": 0,
"num_train_epochs": 12,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.081858297380864e+20,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}