xlsr_mid1_ja-ko / checkpoint-6900 /trainer_state.json
yesj1234's picture
Upload folder using huggingface_hub
8b8b1c8
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 300.0,
"eval_steps": 150,
"global_step": 6900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.17,
"learning_rate": 0.0003,
"loss": 35.2887,
"step": 50
},
{
"epoch": 4.35,
"learning_rate": 0.00029781021897810217,
"loss": 5.9569,
"step": 100
},
{
"epoch": 6.52,
"learning_rate": 0.00029562043795620436,
"loss": 4.9138,
"step": 150
},
{
"epoch": 6.52,
"eval_cer": 1.0,
"eval_loss": 4.7965407371521,
"eval_runtime": 1.256,
"eval_samples_per_second": 35.828,
"eval_steps_per_second": 2.389,
"step": 150
},
{
"epoch": 8.7,
"learning_rate": 0.00029343065693430656,
"loss": 4.887,
"step": 200
},
{
"epoch": 10.87,
"learning_rate": 0.00029124087591240875,
"loss": 4.8447,
"step": 250
},
{
"epoch": 13.04,
"learning_rate": 0.00028905109489051094,
"loss": 4.7484,
"step": 300
},
{
"epoch": 13.04,
"eval_cer": 1.0,
"eval_loss": 4.608075141906738,
"eval_runtime": 1.2451,
"eval_samples_per_second": 36.142,
"eval_steps_per_second": 2.409,
"step": 300
},
{
"epoch": 15.22,
"learning_rate": 0.00028686131386861314,
"loss": 4.6529,
"step": 350
},
{
"epoch": 17.39,
"learning_rate": 0.0002846715328467153,
"loss": 4.6373,
"step": 400
},
{
"epoch": 19.57,
"learning_rate": 0.00028248175182481747,
"loss": 4.5894,
"step": 450
},
{
"epoch": 19.57,
"eval_cer": 0.9851301115241635,
"eval_loss": 4.469708442687988,
"eval_runtime": 1.2325,
"eval_samples_per_second": 36.51,
"eval_steps_per_second": 2.434,
"step": 450
},
{
"epoch": 21.74,
"learning_rate": 0.00028029197080291966,
"loss": 4.5045,
"step": 500
},
{
"epoch": 23.91,
"learning_rate": 0.00027810218978102186,
"loss": 4.4076,
"step": 550
},
{
"epoch": 26.09,
"learning_rate": 0.00027591240875912405,
"loss": 4.2024,
"step": 600
},
{
"epoch": 26.09,
"eval_cer": 0.9076827757125155,
"eval_loss": 4.037315845489502,
"eval_runtime": 1.2357,
"eval_samples_per_second": 36.417,
"eval_steps_per_second": 2.428,
"step": 600
},
{
"epoch": 28.26,
"learning_rate": 0.00027372262773722625,
"loss": 3.8743,
"step": 650
},
{
"epoch": 30.43,
"learning_rate": 0.00027153284671532844,
"loss": 3.3488,
"step": 700
},
{
"epoch": 32.61,
"learning_rate": 0.00026934306569343063,
"loss": 2.7314,
"step": 750
},
{
"epoch": 32.61,
"eval_cer": 0.5340768277571252,
"eval_loss": 2.5507473945617676,
"eval_runtime": 1.2278,
"eval_samples_per_second": 36.651,
"eval_steps_per_second": 2.443,
"step": 750
},
{
"epoch": 34.78,
"learning_rate": 0.00026715328467153283,
"loss": 2.1968,
"step": 800
},
{
"epoch": 36.96,
"learning_rate": 0.000264963503649635,
"loss": 1.6522,
"step": 850
},
{
"epoch": 39.13,
"learning_rate": 0.0002627737226277372,
"loss": 1.2293,
"step": 900
},
{
"epoch": 39.13,
"eval_cer": 0.4138785625774473,
"eval_loss": 2.01461124420166,
"eval_runtime": 1.2246,
"eval_samples_per_second": 36.746,
"eval_steps_per_second": 2.45,
"step": 900
},
{
"epoch": 41.3,
"learning_rate": 0.0002605839416058394,
"loss": 0.9292,
"step": 950
},
{
"epoch": 43.48,
"learning_rate": 0.00025839416058394155,
"loss": 0.7208,
"step": 1000
},
{
"epoch": 45.65,
"learning_rate": 0.00025620437956204374,
"loss": 0.5544,
"step": 1050
},
{
"epoch": 45.65,
"eval_cer": 0.355638166047088,
"eval_loss": 1.9821244478225708,
"eval_runtime": 1.2073,
"eval_samples_per_second": 37.275,
"eval_steps_per_second": 2.485,
"step": 1050
},
{
"epoch": 47.83,
"learning_rate": 0.00025401459854014594,
"loss": 0.4757,
"step": 1100
},
{
"epoch": 50.0,
"learning_rate": 0.00025182481751824813,
"loss": 0.3895,
"step": 1150
},
{
"epoch": 52.17,
"learning_rate": 0.0002496350364963503,
"loss": 0.3224,
"step": 1200
},
{
"epoch": 52.17,
"eval_cer": 0.3587360594795539,
"eval_loss": 2.0189881324768066,
"eval_runtime": 1.1983,
"eval_samples_per_second": 37.554,
"eval_steps_per_second": 2.504,
"step": 1200
},
{
"epoch": 54.35,
"learning_rate": 0.0002474452554744525,
"loss": 0.279,
"step": 1250
},
{
"epoch": 56.52,
"learning_rate": 0.0002452554744525547,
"loss": 0.2285,
"step": 1300
},
{
"epoch": 58.7,
"learning_rate": 0.0002430656934306569,
"loss": 0.1951,
"step": 1350
},
{
"epoch": 58.7,
"eval_cer": 0.36121437422552666,
"eval_loss": 2.1229116916656494,
"eval_runtime": 1.2603,
"eval_samples_per_second": 35.706,
"eval_steps_per_second": 2.38,
"step": 1350
},
{
"epoch": 60.87,
"learning_rate": 0.0002408759124087591,
"loss": 0.1964,
"step": 1400
},
{
"epoch": 63.04,
"learning_rate": 0.0002386861313868613,
"loss": 0.1622,
"step": 1450
},
{
"epoch": 65.22,
"learning_rate": 0.0002364963503649635,
"loss": 0.1539,
"step": 1500
},
{
"epoch": 65.22,
"eval_cer": 0.3469640644361834,
"eval_loss": 2.111368179321289,
"eval_runtime": 1.2194,
"eval_samples_per_second": 36.903,
"eval_steps_per_second": 2.46,
"step": 1500
},
{
"epoch": 67.39,
"learning_rate": 0.00023430656934306568,
"loss": 0.1492,
"step": 1550
},
{
"epoch": 69.57,
"learning_rate": 0.00023211678832116788,
"loss": 0.1404,
"step": 1600
},
{
"epoch": 71.74,
"learning_rate": 0.00022992700729927004,
"loss": 0.1165,
"step": 1650
},
{
"epoch": 71.74,
"eval_cer": 0.33147459727385375,
"eval_loss": 2.274796485900879,
"eval_runtime": 1.1874,
"eval_samples_per_second": 37.898,
"eval_steps_per_second": 2.527,
"step": 1650
},
{
"epoch": 73.91,
"learning_rate": 0.00022773722627737224,
"loss": 0.1268,
"step": 1700
},
{
"epoch": 76.09,
"learning_rate": 0.00022554744525547443,
"loss": 0.1186,
"step": 1750
},
{
"epoch": 78.26,
"learning_rate": 0.00022335766423357663,
"loss": 0.1119,
"step": 1800
},
{
"epoch": 78.26,
"eval_cer": 0.34882280049566294,
"eval_loss": 2.2390518188476562,
"eval_runtime": 1.3465,
"eval_samples_per_second": 33.42,
"eval_steps_per_second": 2.228,
"step": 1800
},
{
"epoch": 80.43,
"learning_rate": 0.00022116788321167882,
"loss": 0.0988,
"step": 1850
},
{
"epoch": 82.61,
"learning_rate": 0.00021897810218978101,
"loss": 0.112,
"step": 1900
},
{
"epoch": 84.78,
"learning_rate": 0.0002167883211678832,
"loss": 0.0989,
"step": 1950
},
{
"epoch": 84.78,
"eval_cer": 0.3382899628252788,
"eval_loss": 2.343754529953003,
"eval_runtime": 1.2055,
"eval_samples_per_second": 37.329,
"eval_steps_per_second": 2.489,
"step": 1950
},
{
"epoch": 86.96,
"learning_rate": 0.00021459854014598537,
"loss": 0.097,
"step": 2000
},
{
"epoch": 89.13,
"learning_rate": 0.00021240875912408757,
"loss": 0.0854,
"step": 2050
},
{
"epoch": 91.3,
"learning_rate": 0.00021021897810218976,
"loss": 0.0915,
"step": 2100
},
{
"epoch": 91.3,
"eval_cer": 0.3587360594795539,
"eval_loss": 2.121840000152588,
"eval_runtime": 1.2037,
"eval_samples_per_second": 37.386,
"eval_steps_per_second": 2.492,
"step": 2100
},
{
"epoch": 93.48,
"learning_rate": 0.00020802919708029196,
"loss": 0.078,
"step": 2150
},
{
"epoch": 95.65,
"learning_rate": 0.00020583941605839415,
"loss": 0.0857,
"step": 2200
},
{
"epoch": 97.83,
"learning_rate": 0.00020364963503649632,
"loss": 0.0721,
"step": 2250
},
{
"epoch": 97.83,
"eval_cer": 0.35192069392812886,
"eval_loss": 2.242812395095825,
"eval_runtime": 1.1964,
"eval_samples_per_second": 37.614,
"eval_steps_per_second": 2.508,
"step": 2250
},
{
"epoch": 100.0,
"learning_rate": 0.0002014598540145985,
"loss": 0.0799,
"step": 2300
},
{
"epoch": 102.17,
"learning_rate": 0.0001992700729927007,
"loss": 0.0798,
"step": 2350
},
{
"epoch": 104.35,
"learning_rate": 0.0001970802919708029,
"loss": 0.0742,
"step": 2400
},
{
"epoch": 104.35,
"eval_cer": 0.33643122676579923,
"eval_loss": 2.229339838027954,
"eval_runtime": 1.2156,
"eval_samples_per_second": 37.019,
"eval_steps_per_second": 2.468,
"step": 2400
},
{
"epoch": 106.52,
"learning_rate": 0.0001948905109489051,
"loss": 0.0692,
"step": 2450
},
{
"epoch": 108.7,
"learning_rate": 0.0001927007299270073,
"loss": 0.0664,
"step": 2500
},
{
"epoch": 110.87,
"learning_rate": 0.00019051094890510948,
"loss": 0.0629,
"step": 2550
},
{
"epoch": 110.87,
"eval_cer": 0.33705080545229243,
"eval_loss": 2.2878150939941406,
"eval_runtime": 1.2044,
"eval_samples_per_second": 37.364,
"eval_steps_per_second": 2.491,
"step": 2550
},
{
"epoch": 113.04,
"learning_rate": 0.00018832116788321167,
"loss": 0.0619,
"step": 2600
},
{
"epoch": 115.22,
"learning_rate": 0.00018613138686131387,
"loss": 0.0582,
"step": 2650
},
{
"epoch": 117.39,
"learning_rate": 0.00018394160583941606,
"loss": 0.0495,
"step": 2700
},
{
"epoch": 117.39,
"eval_cer": 0.34076827757125155,
"eval_loss": 2.2671637535095215,
"eval_runtime": 1.2039,
"eval_samples_per_second": 37.379,
"eval_steps_per_second": 2.492,
"step": 2700
},
{
"epoch": 119.57,
"learning_rate": 0.00018175182481751826,
"loss": 0.0614,
"step": 2750
},
{
"epoch": 121.74,
"learning_rate": 0.00017956204379562042,
"loss": 0.0565,
"step": 2800
},
{
"epoch": 123.91,
"learning_rate": 0.00017737226277372262,
"loss": 0.0466,
"step": 2850
},
{
"epoch": 123.91,
"eval_cer": 0.35254027261462206,
"eval_loss": 2.2532107830047607,
"eval_runtime": 1.3563,
"eval_samples_per_second": 33.179,
"eval_steps_per_second": 2.212,
"step": 2850
},
{
"epoch": 126.09,
"learning_rate": 0.00017518248175182478,
"loss": 0.0465,
"step": 2900
},
{
"epoch": 128.26,
"learning_rate": 0.00017299270072992698,
"loss": 0.0496,
"step": 2950
},
{
"epoch": 130.43,
"learning_rate": 0.00017080291970802917,
"loss": 0.0424,
"step": 3000
},
{
"epoch": 130.43,
"eval_cer": 0.32589838909541513,
"eval_loss": 2.2844393253326416,
"eval_runtime": 1.2006,
"eval_samples_per_second": 37.48,
"eval_steps_per_second": 2.499,
"step": 3000
},
{
"epoch": 132.61,
"learning_rate": 0.00016861313868613137,
"loss": 0.0483,
"step": 3050
},
{
"epoch": 134.78,
"learning_rate": 0.00016642335766423356,
"loss": 0.0488,
"step": 3100
},
{
"epoch": 136.96,
"learning_rate": 0.00016423357664233575,
"loss": 0.0446,
"step": 3150
},
{
"epoch": 136.96,
"eval_cer": 0.3252788104089219,
"eval_loss": 2.2763445377349854,
"eval_runtime": 1.2043,
"eval_samples_per_second": 37.368,
"eval_steps_per_second": 2.491,
"step": 3150
},
{
"epoch": 139.13,
"learning_rate": 0.00016204379562043795,
"loss": 0.0424,
"step": 3200
},
{
"epoch": 141.3,
"learning_rate": 0.00015985401459854014,
"loss": 0.0429,
"step": 3250
},
{
"epoch": 143.48,
"learning_rate": 0.00015766423357664234,
"loss": 0.0411,
"step": 3300
},
{
"epoch": 143.48,
"eval_cer": 0.3302354399008674,
"eval_loss": 2.301079034805298,
"eval_runtime": 1.345,
"eval_samples_per_second": 33.458,
"eval_steps_per_second": 2.231,
"step": 3300
},
{
"epoch": 145.65,
"learning_rate": 0.00015547445255474453,
"loss": 0.0392,
"step": 3350
},
{
"epoch": 147.83,
"learning_rate": 0.00015328467153284672,
"loss": 0.0426,
"step": 3400
},
{
"epoch": 150.0,
"learning_rate": 0.00015109489051094892,
"loss": 0.0419,
"step": 3450
},
{
"epoch": 150.0,
"eval_cer": 0.3420074349442379,
"eval_loss": 2.320059299468994,
"eval_runtime": 1.2411,
"eval_samples_per_second": 36.259,
"eval_steps_per_second": 2.417,
"step": 3450
},
{
"epoch": 152.17,
"learning_rate": 0.00014890510948905108,
"loss": 0.0386,
"step": 3500
},
{
"epoch": 154.35,
"learning_rate": 0.00014671532846715328,
"loss": 0.0402,
"step": 3550
},
{
"epoch": 156.52,
"learning_rate": 0.00014452554744525547,
"loss": 0.0333,
"step": 3600
},
{
"epoch": 156.52,
"eval_cer": 0.34386617100371747,
"eval_loss": 2.364445209503174,
"eval_runtime": 1.2337,
"eval_samples_per_second": 36.475,
"eval_steps_per_second": 2.432,
"step": 3600
},
{
"epoch": 158.7,
"learning_rate": 0.00014233576642335764,
"loss": 0.0434,
"step": 3650
},
{
"epoch": 160.87,
"learning_rate": 0.00014014598540145983,
"loss": 0.0393,
"step": 3700
},
{
"epoch": 163.04,
"learning_rate": 0.00013795620437956203,
"loss": 0.0384,
"step": 3750
},
{
"epoch": 163.04,
"eval_cer": 0.35315985130111527,
"eval_loss": 2.3685200214385986,
"eval_runtime": 1.2136,
"eval_samples_per_second": 37.081,
"eval_steps_per_second": 2.472,
"step": 3750
},
{
"epoch": 165.22,
"learning_rate": 0.00013576642335766422,
"loss": 0.0324,
"step": 3800
},
{
"epoch": 167.39,
"learning_rate": 0.00013357664233576641,
"loss": 0.0438,
"step": 3850
},
{
"epoch": 169.57,
"learning_rate": 0.0001313868613138686,
"loss": 0.0367,
"step": 3900
},
{
"epoch": 169.57,
"eval_cer": 0.3469640644361834,
"eval_loss": 2.397036552429199,
"eval_runtime": 1.2259,
"eval_samples_per_second": 36.708,
"eval_steps_per_second": 2.447,
"step": 3900
},
{
"epoch": 171.74,
"learning_rate": 0.00012919708029197077,
"loss": 0.0336,
"step": 3950
},
{
"epoch": 173.91,
"learning_rate": 0.00012700729927007297,
"loss": 0.037,
"step": 4000
},
{
"epoch": 176.09,
"learning_rate": 0.00012481751824817516,
"loss": 0.0307,
"step": 4050
},
{
"epoch": 176.09,
"eval_cer": 0.3308550185873606,
"eval_loss": 2.3530125617980957,
"eval_runtime": 1.2484,
"eval_samples_per_second": 36.047,
"eval_steps_per_second": 2.403,
"step": 4050
},
{
"epoch": 178.26,
"learning_rate": 0.00012262773722627736,
"loss": 0.0284,
"step": 4100
},
{
"epoch": 180.43,
"learning_rate": 0.00012043795620437955,
"loss": 0.0233,
"step": 4150
},
{
"epoch": 182.61,
"learning_rate": 0.00011824817518248174,
"loss": 0.0328,
"step": 4200
},
{
"epoch": 182.61,
"eval_cer": 0.33147459727385375,
"eval_loss": 2.3414556980133057,
"eval_runtime": 1.2281,
"eval_samples_per_second": 36.64,
"eval_steps_per_second": 2.443,
"step": 4200
},
{
"epoch": 184.78,
"learning_rate": 0.00011605839416058394,
"loss": 0.0285,
"step": 4250
},
{
"epoch": 186.96,
"learning_rate": 0.00011386861313868612,
"loss": 0.0222,
"step": 4300
},
{
"epoch": 189.13,
"learning_rate": 0.00011167883211678831,
"loss": 0.0271,
"step": 4350
},
{
"epoch": 189.13,
"eval_cer": 0.3308550185873606,
"eval_loss": 2.4165024757385254,
"eval_runtime": 1.1891,
"eval_samples_per_second": 37.844,
"eval_steps_per_second": 2.523,
"step": 4350
},
{
"epoch": 191.3,
"learning_rate": 0.00010948905109489051,
"loss": 0.0307,
"step": 4400
},
{
"epoch": 193.48,
"learning_rate": 0.00010729927007299269,
"loss": 0.023,
"step": 4450
},
{
"epoch": 195.65,
"learning_rate": 0.00010510948905109488,
"loss": 0.0213,
"step": 4500
},
{
"epoch": 195.65,
"eval_cer": 0.3451053283767038,
"eval_loss": 2.447828769683838,
"eval_runtime": 1.1406,
"eval_samples_per_second": 39.452,
"eval_steps_per_second": 2.63,
"step": 4500
},
{
"epoch": 197.83,
"learning_rate": 0.00010291970802919708,
"loss": 0.021,
"step": 4550
},
{
"epoch": 200.0,
"learning_rate": 0.00010072992700729926,
"loss": 0.0246,
"step": 4600
},
{
"epoch": 202.17,
"learning_rate": 9.854014598540145e-05,
"loss": 0.0193,
"step": 4650
},
{
"epoch": 202.17,
"eval_cer": 0.355638166047088,
"eval_loss": 2.524061918258667,
"eval_runtime": 1.203,
"eval_samples_per_second": 37.406,
"eval_steps_per_second": 2.494,
"step": 4650
},
{
"epoch": 204.35,
"learning_rate": 9.635036496350364e-05,
"loss": 0.0223,
"step": 4700
},
{
"epoch": 206.52,
"learning_rate": 9.416058394160584e-05,
"loss": 0.0223,
"step": 4750
},
{
"epoch": 208.7,
"learning_rate": 9.197080291970803e-05,
"loss": 0.0204,
"step": 4800
},
{
"epoch": 208.7,
"eval_cer": 0.34634448574969023,
"eval_loss": 2.570009708404541,
"eval_runtime": 1.2664,
"eval_samples_per_second": 35.533,
"eval_steps_per_second": 2.369,
"step": 4800
},
{
"epoch": 210.87,
"learning_rate": 8.978102189781021e-05,
"loss": 0.0202,
"step": 4850
},
{
"epoch": 213.04,
"learning_rate": 8.759124087591239e-05,
"loss": 0.0193,
"step": 4900
},
{
"epoch": 215.22,
"learning_rate": 8.540145985401459e-05,
"loss": 0.0185,
"step": 4950
},
{
"epoch": 215.22,
"eval_cer": 0.31784386617100374,
"eval_loss": 2.583724021911621,
"eval_runtime": 1.2549,
"eval_samples_per_second": 35.859,
"eval_steps_per_second": 2.391,
"step": 4950
},
{
"epoch": 217.39,
"learning_rate": 8.321167883211678e-05,
"loss": 0.0191,
"step": 5000
},
{
"epoch": 219.57,
"learning_rate": 8.102189781021897e-05,
"loss": 0.0169,
"step": 5050
},
{
"epoch": 221.74,
"learning_rate": 7.883211678832117e-05,
"loss": 0.0161,
"step": 5100
},
{
"epoch": 221.74,
"eval_cer": 0.33767038413878564,
"eval_loss": 2.513859987258911,
"eval_runtime": 1.2515,
"eval_samples_per_second": 35.958,
"eval_steps_per_second": 2.397,
"step": 5100
},
{
"epoch": 223.91,
"learning_rate": 7.664233576642336e-05,
"loss": 0.0183,
"step": 5150
},
{
"epoch": 226.09,
"learning_rate": 7.445255474452554e-05,
"loss": 0.0228,
"step": 5200
},
{
"epoch": 228.26,
"learning_rate": 7.226277372262774e-05,
"loss": 0.0167,
"step": 5250
},
{
"epoch": 228.26,
"eval_cer": 0.3351920693928129,
"eval_loss": 2.5287766456604004,
"eval_runtime": 1.2044,
"eval_samples_per_second": 37.363,
"eval_steps_per_second": 2.491,
"step": 5250
},
{
"epoch": 230.43,
"learning_rate": 7.007299270072992e-05,
"loss": 0.0181,
"step": 5300
},
{
"epoch": 232.61,
"learning_rate": 6.788321167883211e-05,
"loss": 0.0144,
"step": 5350
},
{
"epoch": 234.78,
"learning_rate": 6.56934306569343e-05,
"loss": 0.0148,
"step": 5400
},
{
"epoch": 234.78,
"eval_cer": 0.338909541511772,
"eval_loss": 2.574066400527954,
"eval_runtime": 1.2534,
"eval_samples_per_second": 35.904,
"eval_steps_per_second": 2.394,
"step": 5400
},
{
"epoch": 236.96,
"learning_rate": 6.350364963503648e-05,
"loss": 0.0143,
"step": 5450
},
{
"epoch": 239.13,
"learning_rate": 6.131386861313868e-05,
"loss": 0.0197,
"step": 5500
},
{
"epoch": 241.3,
"learning_rate": 5.912408759124087e-05,
"loss": 0.0141,
"step": 5550
},
{
"epoch": 241.3,
"eval_cer": 0.338909541511772,
"eval_loss": 2.5173895359039307,
"eval_runtime": 1.1989,
"eval_samples_per_second": 37.536,
"eval_steps_per_second": 2.502,
"step": 5550
},
{
"epoch": 243.48,
"learning_rate": 5.693430656934306e-05,
"loss": 0.0165,
"step": 5600
},
{
"epoch": 245.65,
"learning_rate": 5.4744525547445253e-05,
"loss": 0.0127,
"step": 5650
},
{
"epoch": 247.83,
"learning_rate": 5.255474452554744e-05,
"loss": 0.0122,
"step": 5700
},
{
"epoch": 247.83,
"eval_cer": 0.3351920693928129,
"eval_loss": 2.5573315620422363,
"eval_runtime": 1.2363,
"eval_samples_per_second": 36.4,
"eval_steps_per_second": 2.427,
"step": 5700
},
{
"epoch": 250.0,
"learning_rate": 5.036496350364963e-05,
"loss": 0.0135,
"step": 5750
},
{
"epoch": 252.17,
"learning_rate": 4.817518248175182e-05,
"loss": 0.0116,
"step": 5800
},
{
"epoch": 254.35,
"learning_rate": 4.5985401459854016e-05,
"loss": 0.0115,
"step": 5850
},
{
"epoch": 254.35,
"eval_cer": 0.32961586121437425,
"eval_loss": 2.579023838043213,
"eval_runtime": 1.2327,
"eval_samples_per_second": 36.506,
"eval_steps_per_second": 2.434,
"step": 5850
},
{
"epoch": 256.52,
"learning_rate": 4.3795620437956196e-05,
"loss": 0.0141,
"step": 5900
},
{
"epoch": 258.7,
"learning_rate": 4.160583941605839e-05,
"loss": 0.0143,
"step": 5950
},
{
"epoch": 260.87,
"learning_rate": 3.9416058394160584e-05,
"loss": 0.0141,
"step": 6000
},
{
"epoch": 260.87,
"eval_cer": 0.32032218091697645,
"eval_loss": 2.577375888824463,
"eval_runtime": 1.2321,
"eval_samples_per_second": 36.524,
"eval_steps_per_second": 2.435,
"step": 6000
},
{
"epoch": 263.04,
"learning_rate": 3.722627737226277e-05,
"loss": 0.0116,
"step": 6050
},
{
"epoch": 265.22,
"learning_rate": 3.503649635036496e-05,
"loss": 0.0101,
"step": 6100
},
{
"epoch": 267.39,
"learning_rate": 3.284671532846715e-05,
"loss": 0.0123,
"step": 6150
},
{
"epoch": 267.39,
"eval_cer": 0.3308550185873606,
"eval_loss": 2.614670753479004,
"eval_runtime": 1.1319,
"eval_samples_per_second": 39.755,
"eval_steps_per_second": 2.65,
"step": 6150
},
{
"epoch": 269.57,
"learning_rate": 3.065693430656934e-05,
"loss": 0.0151,
"step": 6200
},
{
"epoch": 271.74,
"learning_rate": 2.846715328467153e-05,
"loss": 0.0099,
"step": 6250
},
{
"epoch": 273.91,
"learning_rate": 2.627737226277372e-05,
"loss": 0.0214,
"step": 6300
},
{
"epoch": 273.91,
"eval_cer": 0.3302354399008674,
"eval_loss": 2.620166778564453,
"eval_runtime": 1.262,
"eval_samples_per_second": 35.657,
"eval_steps_per_second": 2.377,
"step": 6300
},
{
"epoch": 276.09,
"learning_rate": 2.408759124087591e-05,
"loss": 0.0085,
"step": 6350
},
{
"epoch": 278.26,
"learning_rate": 2.1897810218978098e-05,
"loss": 0.0119,
"step": 6400
},
{
"epoch": 280.43,
"learning_rate": 1.9708029197080292e-05,
"loss": 0.0107,
"step": 6450
},
{
"epoch": 280.43,
"eval_cer": 0.32342007434944237,
"eval_loss": 2.6263809204101562,
"eval_runtime": 1.2547,
"eval_samples_per_second": 35.867,
"eval_steps_per_second": 2.391,
"step": 6450
},
{
"epoch": 282.61,
"learning_rate": 1.751824817518248e-05,
"loss": 0.0107,
"step": 6500
},
{
"epoch": 284.78,
"learning_rate": 1.532846715328467e-05,
"loss": 0.0105,
"step": 6550
},
{
"epoch": 286.96,
"learning_rate": 1.313868613138686e-05,
"loss": 0.0086,
"step": 6600
},
{
"epoch": 286.96,
"eval_cer": 0.3215613382899628,
"eval_loss": 2.607461452484131,
"eval_runtime": 1.204,
"eval_samples_per_second": 37.374,
"eval_steps_per_second": 2.492,
"step": 6600
},
{
"epoch": 289.13,
"learning_rate": 1.0948905109489049e-05,
"loss": 0.0095,
"step": 6650
},
{
"epoch": 291.3,
"learning_rate": 8.75912408759124e-06,
"loss": 0.0108,
"step": 6700
},
{
"epoch": 293.48,
"learning_rate": 6.56934306569343e-06,
"loss": 0.0106,
"step": 6750
},
{
"epoch": 293.48,
"eval_cer": 0.3246592317224288,
"eval_loss": 2.595982789993286,
"eval_runtime": 1.1323,
"eval_samples_per_second": 39.741,
"eval_steps_per_second": 2.649,
"step": 6750
},
{
"epoch": 295.65,
"learning_rate": 4.37956204379562e-06,
"loss": 0.0143,
"step": 6800
},
{
"epoch": 297.83,
"learning_rate": 2.18978102189781e-06,
"loss": 0.0105,
"step": 6850
},
{
"epoch": 300.0,
"learning_rate": 0.0,
"loss": 0.0085,
"step": 6900
},
{
"epoch": 300.0,
"eval_cer": 0.32403965303593557,
"eval_loss": 2.5951595306396484,
"eval_runtime": 1.2068,
"eval_samples_per_second": 37.288,
"eval_steps_per_second": 2.486,
"step": 6900
}
],
"logging_steps": 50,
"max_steps": 6900,
"num_train_epochs": 300,
"save_steps": 150,
"total_flos": 2.3112928880616276e+19,
"trial_name": null,
"trial_params": null
}