{ "best_metric": null, "best_model_checkpoint": null, "epoch": 300.0, "eval_steps": 150, "global_step": 6900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.17, "learning_rate": 0.0003, "loss": 35.2887, "step": 50 }, { "epoch": 4.35, "learning_rate": 0.00029781021897810217, "loss": 5.9569, "step": 100 }, { "epoch": 6.52, "learning_rate": 0.00029562043795620436, "loss": 4.9138, "step": 150 }, { "epoch": 6.52, "eval_cer": 1.0, "eval_loss": 4.7965407371521, "eval_runtime": 1.256, "eval_samples_per_second": 35.828, "eval_steps_per_second": 2.389, "step": 150 }, { "epoch": 8.7, "learning_rate": 0.00029343065693430656, "loss": 4.887, "step": 200 }, { "epoch": 10.87, "learning_rate": 0.00029124087591240875, "loss": 4.8447, "step": 250 }, { "epoch": 13.04, "learning_rate": 0.00028905109489051094, "loss": 4.7484, "step": 300 }, { "epoch": 13.04, "eval_cer": 1.0, "eval_loss": 4.608075141906738, "eval_runtime": 1.2451, "eval_samples_per_second": 36.142, "eval_steps_per_second": 2.409, "step": 300 }, { "epoch": 15.22, "learning_rate": 0.00028686131386861314, "loss": 4.6529, "step": 350 }, { "epoch": 17.39, "learning_rate": 0.0002846715328467153, "loss": 4.6373, "step": 400 }, { "epoch": 19.57, "learning_rate": 0.00028248175182481747, "loss": 4.5894, "step": 450 }, { "epoch": 19.57, "eval_cer": 0.9851301115241635, "eval_loss": 4.469708442687988, "eval_runtime": 1.2325, "eval_samples_per_second": 36.51, "eval_steps_per_second": 2.434, "step": 450 }, { "epoch": 21.74, "learning_rate": 0.00028029197080291966, "loss": 4.5045, "step": 500 }, { "epoch": 23.91, "learning_rate": 0.00027810218978102186, "loss": 4.4076, "step": 550 }, { "epoch": 26.09, "learning_rate": 0.00027591240875912405, "loss": 4.2024, "step": 600 }, { "epoch": 26.09, "eval_cer": 0.9076827757125155, "eval_loss": 4.037315845489502, "eval_runtime": 1.2357, "eval_samples_per_second": 36.417, "eval_steps_per_second": 2.428, "step": 600 }, { "epoch": 28.26, "learning_rate": 0.00027372262773722625, "loss": 3.8743, "step": 650 }, { "epoch": 30.43, "learning_rate": 0.00027153284671532844, "loss": 3.3488, "step": 700 }, { "epoch": 32.61, "learning_rate": 0.00026934306569343063, "loss": 2.7314, "step": 750 }, { "epoch": 32.61, "eval_cer": 0.5340768277571252, "eval_loss": 2.5507473945617676, "eval_runtime": 1.2278, "eval_samples_per_second": 36.651, "eval_steps_per_second": 2.443, "step": 750 }, { "epoch": 34.78, "learning_rate": 0.00026715328467153283, "loss": 2.1968, "step": 800 }, { "epoch": 36.96, "learning_rate": 0.000264963503649635, "loss": 1.6522, "step": 850 }, { "epoch": 39.13, "learning_rate": 0.0002627737226277372, "loss": 1.2293, "step": 900 }, { "epoch": 39.13, "eval_cer": 0.4138785625774473, "eval_loss": 2.01461124420166, "eval_runtime": 1.2246, "eval_samples_per_second": 36.746, "eval_steps_per_second": 2.45, "step": 900 }, { "epoch": 41.3, "learning_rate": 0.0002605839416058394, "loss": 0.9292, "step": 950 }, { "epoch": 43.48, "learning_rate": 0.00025839416058394155, "loss": 0.7208, "step": 1000 }, { "epoch": 45.65, "learning_rate": 0.00025620437956204374, "loss": 0.5544, "step": 1050 }, { "epoch": 45.65, "eval_cer": 0.355638166047088, "eval_loss": 1.9821244478225708, "eval_runtime": 1.2073, "eval_samples_per_second": 37.275, "eval_steps_per_second": 2.485, "step": 1050 }, { "epoch": 47.83, "learning_rate": 0.00025401459854014594, "loss": 0.4757, "step": 1100 }, { "epoch": 50.0, "learning_rate": 0.00025182481751824813, "loss": 0.3895, "step": 1150 }, { "epoch": 52.17, "learning_rate": 0.0002496350364963503, "loss": 0.3224, "step": 1200 }, { "epoch": 52.17, "eval_cer": 0.3587360594795539, "eval_loss": 2.0189881324768066, "eval_runtime": 1.1983, "eval_samples_per_second": 37.554, "eval_steps_per_second": 2.504, "step": 1200 }, { "epoch": 54.35, "learning_rate": 0.0002474452554744525, "loss": 0.279, "step": 1250 }, { "epoch": 56.52, "learning_rate": 0.0002452554744525547, "loss": 0.2285, "step": 1300 }, { "epoch": 58.7, "learning_rate": 0.0002430656934306569, "loss": 0.1951, "step": 1350 }, { "epoch": 58.7, "eval_cer": 0.36121437422552666, "eval_loss": 2.1229116916656494, "eval_runtime": 1.2603, "eval_samples_per_second": 35.706, "eval_steps_per_second": 2.38, "step": 1350 }, { "epoch": 60.87, "learning_rate": 0.0002408759124087591, "loss": 0.1964, "step": 1400 }, { "epoch": 63.04, "learning_rate": 0.0002386861313868613, "loss": 0.1622, "step": 1450 }, { "epoch": 65.22, "learning_rate": 0.0002364963503649635, "loss": 0.1539, "step": 1500 }, { "epoch": 65.22, "eval_cer": 0.3469640644361834, "eval_loss": 2.111368179321289, "eval_runtime": 1.2194, "eval_samples_per_second": 36.903, "eval_steps_per_second": 2.46, "step": 1500 }, { "epoch": 67.39, "learning_rate": 0.00023430656934306568, "loss": 0.1492, "step": 1550 }, { "epoch": 69.57, "learning_rate": 0.00023211678832116788, "loss": 0.1404, "step": 1600 }, { "epoch": 71.74, "learning_rate": 0.00022992700729927004, "loss": 0.1165, "step": 1650 }, { "epoch": 71.74, "eval_cer": 0.33147459727385375, "eval_loss": 2.274796485900879, "eval_runtime": 1.1874, "eval_samples_per_second": 37.898, "eval_steps_per_second": 2.527, "step": 1650 }, { "epoch": 73.91, "learning_rate": 0.00022773722627737224, "loss": 0.1268, "step": 1700 }, { "epoch": 76.09, "learning_rate": 0.00022554744525547443, "loss": 0.1186, "step": 1750 }, { "epoch": 78.26, "learning_rate": 0.00022335766423357663, "loss": 0.1119, "step": 1800 }, { "epoch": 78.26, "eval_cer": 0.34882280049566294, "eval_loss": 2.2390518188476562, "eval_runtime": 1.3465, "eval_samples_per_second": 33.42, "eval_steps_per_second": 2.228, "step": 1800 }, { "epoch": 80.43, "learning_rate": 0.00022116788321167882, "loss": 0.0988, "step": 1850 }, { "epoch": 82.61, "learning_rate": 0.00021897810218978101, "loss": 0.112, "step": 1900 }, { "epoch": 84.78, "learning_rate": 0.0002167883211678832, "loss": 0.0989, "step": 1950 }, { "epoch": 84.78, "eval_cer": 0.3382899628252788, "eval_loss": 2.343754529953003, "eval_runtime": 1.2055, "eval_samples_per_second": 37.329, "eval_steps_per_second": 2.489, "step": 1950 }, { "epoch": 86.96, "learning_rate": 0.00021459854014598537, "loss": 0.097, "step": 2000 }, { "epoch": 89.13, "learning_rate": 0.00021240875912408757, "loss": 0.0854, "step": 2050 }, { "epoch": 91.3, "learning_rate": 0.00021021897810218976, "loss": 0.0915, "step": 2100 }, { "epoch": 91.3, "eval_cer": 0.3587360594795539, "eval_loss": 2.121840000152588, "eval_runtime": 1.2037, "eval_samples_per_second": 37.386, "eval_steps_per_second": 2.492, "step": 2100 }, { "epoch": 93.48, "learning_rate": 0.00020802919708029196, "loss": 0.078, "step": 2150 }, { "epoch": 95.65, "learning_rate": 0.00020583941605839415, "loss": 0.0857, "step": 2200 }, { "epoch": 97.83, "learning_rate": 0.00020364963503649632, "loss": 0.0721, "step": 2250 }, { "epoch": 97.83, "eval_cer": 0.35192069392812886, "eval_loss": 2.242812395095825, "eval_runtime": 1.1964, "eval_samples_per_second": 37.614, "eval_steps_per_second": 2.508, "step": 2250 }, { "epoch": 100.0, "learning_rate": 0.0002014598540145985, "loss": 0.0799, "step": 2300 }, { "epoch": 102.17, "learning_rate": 0.0001992700729927007, "loss": 0.0798, "step": 2350 }, { "epoch": 104.35, "learning_rate": 0.0001970802919708029, "loss": 0.0742, "step": 2400 }, { "epoch": 104.35, "eval_cer": 0.33643122676579923, "eval_loss": 2.229339838027954, "eval_runtime": 1.2156, "eval_samples_per_second": 37.019, "eval_steps_per_second": 2.468, "step": 2400 }, { "epoch": 106.52, "learning_rate": 0.0001948905109489051, "loss": 0.0692, "step": 2450 }, { "epoch": 108.7, "learning_rate": 0.0001927007299270073, "loss": 0.0664, "step": 2500 }, { "epoch": 110.87, "learning_rate": 0.00019051094890510948, "loss": 0.0629, "step": 2550 }, { "epoch": 110.87, "eval_cer": 0.33705080545229243, "eval_loss": 2.2878150939941406, "eval_runtime": 1.2044, "eval_samples_per_second": 37.364, "eval_steps_per_second": 2.491, "step": 2550 }, { "epoch": 113.04, "learning_rate": 0.00018832116788321167, "loss": 0.0619, "step": 2600 }, { "epoch": 115.22, "learning_rate": 0.00018613138686131387, "loss": 0.0582, "step": 2650 }, { "epoch": 117.39, "learning_rate": 0.00018394160583941606, "loss": 0.0495, "step": 2700 }, { "epoch": 117.39, "eval_cer": 0.34076827757125155, "eval_loss": 2.2671637535095215, "eval_runtime": 1.2039, "eval_samples_per_second": 37.379, "eval_steps_per_second": 2.492, "step": 2700 }, { "epoch": 119.57, "learning_rate": 0.00018175182481751826, "loss": 0.0614, "step": 2750 }, { "epoch": 121.74, "learning_rate": 0.00017956204379562042, "loss": 0.0565, "step": 2800 }, { "epoch": 123.91, "learning_rate": 0.00017737226277372262, "loss": 0.0466, "step": 2850 }, { "epoch": 123.91, "eval_cer": 0.35254027261462206, "eval_loss": 2.2532107830047607, "eval_runtime": 1.3563, "eval_samples_per_second": 33.179, "eval_steps_per_second": 2.212, "step": 2850 }, { "epoch": 126.09, "learning_rate": 0.00017518248175182478, "loss": 0.0465, "step": 2900 }, { "epoch": 128.26, "learning_rate": 0.00017299270072992698, "loss": 0.0496, "step": 2950 }, { "epoch": 130.43, "learning_rate": 0.00017080291970802917, "loss": 0.0424, "step": 3000 }, { "epoch": 130.43, "eval_cer": 0.32589838909541513, "eval_loss": 2.2844393253326416, "eval_runtime": 1.2006, "eval_samples_per_second": 37.48, "eval_steps_per_second": 2.499, "step": 3000 }, { "epoch": 132.61, "learning_rate": 0.00016861313868613137, "loss": 0.0483, "step": 3050 }, { "epoch": 134.78, "learning_rate": 0.00016642335766423356, "loss": 0.0488, "step": 3100 }, { "epoch": 136.96, "learning_rate": 0.00016423357664233575, "loss": 0.0446, "step": 3150 }, { "epoch": 136.96, "eval_cer": 0.3252788104089219, "eval_loss": 2.2763445377349854, "eval_runtime": 1.2043, "eval_samples_per_second": 37.368, "eval_steps_per_second": 2.491, "step": 3150 }, { "epoch": 139.13, "learning_rate": 0.00016204379562043795, "loss": 0.0424, "step": 3200 }, { "epoch": 141.3, "learning_rate": 0.00015985401459854014, "loss": 0.0429, "step": 3250 }, { "epoch": 143.48, "learning_rate": 0.00015766423357664234, "loss": 0.0411, "step": 3300 }, { "epoch": 143.48, "eval_cer": 0.3302354399008674, "eval_loss": 2.301079034805298, "eval_runtime": 1.345, "eval_samples_per_second": 33.458, "eval_steps_per_second": 2.231, "step": 3300 }, { "epoch": 145.65, "learning_rate": 0.00015547445255474453, "loss": 0.0392, "step": 3350 }, { "epoch": 147.83, "learning_rate": 0.00015328467153284672, "loss": 0.0426, "step": 3400 }, { "epoch": 150.0, "learning_rate": 0.00015109489051094892, "loss": 0.0419, "step": 3450 }, { "epoch": 150.0, "eval_cer": 0.3420074349442379, "eval_loss": 2.320059299468994, "eval_runtime": 1.2411, "eval_samples_per_second": 36.259, "eval_steps_per_second": 2.417, "step": 3450 }, { "epoch": 152.17, "learning_rate": 0.00014890510948905108, "loss": 0.0386, "step": 3500 }, { "epoch": 154.35, "learning_rate": 0.00014671532846715328, "loss": 0.0402, "step": 3550 }, { "epoch": 156.52, "learning_rate": 0.00014452554744525547, "loss": 0.0333, "step": 3600 }, { "epoch": 156.52, "eval_cer": 0.34386617100371747, "eval_loss": 2.364445209503174, "eval_runtime": 1.2337, "eval_samples_per_second": 36.475, "eval_steps_per_second": 2.432, "step": 3600 }, { "epoch": 158.7, "learning_rate": 0.00014233576642335764, "loss": 0.0434, "step": 3650 }, { "epoch": 160.87, "learning_rate": 0.00014014598540145983, "loss": 0.0393, "step": 3700 }, { "epoch": 163.04, "learning_rate": 0.00013795620437956203, "loss": 0.0384, "step": 3750 }, { "epoch": 163.04, "eval_cer": 0.35315985130111527, "eval_loss": 2.3685200214385986, "eval_runtime": 1.2136, "eval_samples_per_second": 37.081, "eval_steps_per_second": 2.472, "step": 3750 }, { "epoch": 165.22, "learning_rate": 0.00013576642335766422, "loss": 0.0324, "step": 3800 }, { "epoch": 167.39, "learning_rate": 0.00013357664233576641, "loss": 0.0438, "step": 3850 }, { "epoch": 169.57, "learning_rate": 0.0001313868613138686, "loss": 0.0367, "step": 3900 }, { "epoch": 169.57, "eval_cer": 0.3469640644361834, "eval_loss": 2.397036552429199, "eval_runtime": 1.2259, "eval_samples_per_second": 36.708, "eval_steps_per_second": 2.447, "step": 3900 }, { "epoch": 171.74, "learning_rate": 0.00012919708029197077, "loss": 0.0336, "step": 3950 }, { "epoch": 173.91, "learning_rate": 0.00012700729927007297, "loss": 0.037, "step": 4000 }, { "epoch": 176.09, "learning_rate": 0.00012481751824817516, "loss": 0.0307, "step": 4050 }, { "epoch": 176.09, "eval_cer": 0.3308550185873606, "eval_loss": 2.3530125617980957, "eval_runtime": 1.2484, "eval_samples_per_second": 36.047, "eval_steps_per_second": 2.403, "step": 4050 }, { "epoch": 178.26, "learning_rate": 0.00012262773722627736, "loss": 0.0284, "step": 4100 }, { "epoch": 180.43, "learning_rate": 0.00012043795620437955, "loss": 0.0233, "step": 4150 }, { "epoch": 182.61, "learning_rate": 0.00011824817518248174, "loss": 0.0328, "step": 4200 }, { "epoch": 182.61, "eval_cer": 0.33147459727385375, "eval_loss": 2.3414556980133057, "eval_runtime": 1.2281, "eval_samples_per_second": 36.64, "eval_steps_per_second": 2.443, "step": 4200 }, { "epoch": 184.78, "learning_rate": 0.00011605839416058394, "loss": 0.0285, "step": 4250 }, { "epoch": 186.96, "learning_rate": 0.00011386861313868612, "loss": 0.0222, "step": 4300 }, { "epoch": 189.13, "learning_rate": 0.00011167883211678831, "loss": 0.0271, "step": 4350 }, { "epoch": 189.13, "eval_cer": 0.3308550185873606, "eval_loss": 2.4165024757385254, "eval_runtime": 1.1891, "eval_samples_per_second": 37.844, "eval_steps_per_second": 2.523, "step": 4350 }, { "epoch": 191.3, "learning_rate": 0.00010948905109489051, "loss": 0.0307, "step": 4400 }, { "epoch": 193.48, "learning_rate": 0.00010729927007299269, "loss": 0.023, "step": 4450 }, { "epoch": 195.65, "learning_rate": 0.00010510948905109488, "loss": 0.0213, "step": 4500 }, { "epoch": 195.65, "eval_cer": 0.3451053283767038, "eval_loss": 2.447828769683838, "eval_runtime": 1.1406, "eval_samples_per_second": 39.452, "eval_steps_per_second": 2.63, "step": 4500 }, { "epoch": 197.83, "learning_rate": 0.00010291970802919708, "loss": 0.021, "step": 4550 }, { "epoch": 200.0, "learning_rate": 0.00010072992700729926, "loss": 0.0246, "step": 4600 }, { "epoch": 202.17, "learning_rate": 9.854014598540145e-05, "loss": 0.0193, "step": 4650 }, { "epoch": 202.17, "eval_cer": 0.355638166047088, "eval_loss": 2.524061918258667, "eval_runtime": 1.203, "eval_samples_per_second": 37.406, "eval_steps_per_second": 2.494, "step": 4650 }, { "epoch": 204.35, "learning_rate": 9.635036496350364e-05, "loss": 0.0223, "step": 4700 }, { "epoch": 206.52, "learning_rate": 9.416058394160584e-05, "loss": 0.0223, "step": 4750 }, { "epoch": 208.7, "learning_rate": 9.197080291970803e-05, "loss": 0.0204, "step": 4800 }, { "epoch": 208.7, "eval_cer": 0.34634448574969023, "eval_loss": 2.570009708404541, "eval_runtime": 1.2664, "eval_samples_per_second": 35.533, "eval_steps_per_second": 2.369, "step": 4800 }, { "epoch": 210.87, "learning_rate": 8.978102189781021e-05, "loss": 0.0202, "step": 4850 }, { "epoch": 213.04, "learning_rate": 8.759124087591239e-05, "loss": 0.0193, "step": 4900 }, { "epoch": 215.22, "learning_rate": 8.540145985401459e-05, "loss": 0.0185, "step": 4950 }, { "epoch": 215.22, "eval_cer": 0.31784386617100374, "eval_loss": 2.583724021911621, "eval_runtime": 1.2549, "eval_samples_per_second": 35.859, "eval_steps_per_second": 2.391, "step": 4950 }, { "epoch": 217.39, "learning_rate": 8.321167883211678e-05, "loss": 0.0191, "step": 5000 }, { "epoch": 219.57, "learning_rate": 8.102189781021897e-05, "loss": 0.0169, "step": 5050 }, { "epoch": 221.74, "learning_rate": 7.883211678832117e-05, "loss": 0.0161, "step": 5100 }, { "epoch": 221.74, "eval_cer": 0.33767038413878564, "eval_loss": 2.513859987258911, "eval_runtime": 1.2515, "eval_samples_per_second": 35.958, "eval_steps_per_second": 2.397, "step": 5100 }, { "epoch": 223.91, "learning_rate": 7.664233576642336e-05, "loss": 0.0183, "step": 5150 }, { "epoch": 226.09, "learning_rate": 7.445255474452554e-05, "loss": 0.0228, "step": 5200 }, { "epoch": 228.26, "learning_rate": 7.226277372262774e-05, "loss": 0.0167, "step": 5250 }, { "epoch": 228.26, "eval_cer": 0.3351920693928129, "eval_loss": 2.5287766456604004, "eval_runtime": 1.2044, "eval_samples_per_second": 37.363, "eval_steps_per_second": 2.491, "step": 5250 }, { "epoch": 230.43, "learning_rate": 7.007299270072992e-05, "loss": 0.0181, "step": 5300 }, { "epoch": 232.61, "learning_rate": 6.788321167883211e-05, "loss": 0.0144, "step": 5350 }, { "epoch": 234.78, "learning_rate": 6.56934306569343e-05, "loss": 0.0148, "step": 5400 }, { "epoch": 234.78, "eval_cer": 0.338909541511772, "eval_loss": 2.574066400527954, "eval_runtime": 1.2534, "eval_samples_per_second": 35.904, "eval_steps_per_second": 2.394, "step": 5400 }, { "epoch": 236.96, "learning_rate": 6.350364963503648e-05, "loss": 0.0143, "step": 5450 }, { "epoch": 239.13, "learning_rate": 6.131386861313868e-05, "loss": 0.0197, "step": 5500 }, { "epoch": 241.3, "learning_rate": 5.912408759124087e-05, "loss": 0.0141, "step": 5550 }, { "epoch": 241.3, "eval_cer": 0.338909541511772, "eval_loss": 2.5173895359039307, "eval_runtime": 1.1989, "eval_samples_per_second": 37.536, "eval_steps_per_second": 2.502, "step": 5550 }, { "epoch": 243.48, "learning_rate": 5.693430656934306e-05, "loss": 0.0165, "step": 5600 }, { "epoch": 245.65, "learning_rate": 5.4744525547445253e-05, "loss": 0.0127, "step": 5650 }, { "epoch": 247.83, "learning_rate": 5.255474452554744e-05, "loss": 0.0122, "step": 5700 }, { "epoch": 247.83, "eval_cer": 0.3351920693928129, "eval_loss": 2.5573315620422363, "eval_runtime": 1.2363, "eval_samples_per_second": 36.4, "eval_steps_per_second": 2.427, "step": 5700 }, { "epoch": 250.0, "learning_rate": 5.036496350364963e-05, "loss": 0.0135, "step": 5750 }, { "epoch": 252.17, "learning_rate": 4.817518248175182e-05, "loss": 0.0116, "step": 5800 }, { "epoch": 254.35, "learning_rate": 4.5985401459854016e-05, "loss": 0.0115, "step": 5850 }, { "epoch": 254.35, "eval_cer": 0.32961586121437425, "eval_loss": 2.579023838043213, "eval_runtime": 1.2327, "eval_samples_per_second": 36.506, "eval_steps_per_second": 2.434, "step": 5850 }, { "epoch": 256.52, "learning_rate": 4.3795620437956196e-05, "loss": 0.0141, "step": 5900 }, { "epoch": 258.7, "learning_rate": 4.160583941605839e-05, "loss": 0.0143, "step": 5950 }, { "epoch": 260.87, "learning_rate": 3.9416058394160584e-05, "loss": 0.0141, "step": 6000 }, { "epoch": 260.87, "eval_cer": 0.32032218091697645, "eval_loss": 2.577375888824463, "eval_runtime": 1.2321, "eval_samples_per_second": 36.524, "eval_steps_per_second": 2.435, "step": 6000 }, { "epoch": 263.04, "learning_rate": 3.722627737226277e-05, "loss": 0.0116, "step": 6050 }, { "epoch": 265.22, "learning_rate": 3.503649635036496e-05, "loss": 0.0101, "step": 6100 }, { "epoch": 267.39, "learning_rate": 3.284671532846715e-05, "loss": 0.0123, "step": 6150 }, { "epoch": 267.39, "eval_cer": 0.3308550185873606, "eval_loss": 2.614670753479004, "eval_runtime": 1.1319, "eval_samples_per_second": 39.755, "eval_steps_per_second": 2.65, "step": 6150 }, { "epoch": 269.57, "learning_rate": 3.065693430656934e-05, "loss": 0.0151, "step": 6200 }, { "epoch": 271.74, "learning_rate": 2.846715328467153e-05, "loss": 0.0099, "step": 6250 }, { "epoch": 273.91, "learning_rate": 2.627737226277372e-05, "loss": 0.0214, "step": 6300 }, { "epoch": 273.91, "eval_cer": 0.3302354399008674, "eval_loss": 2.620166778564453, "eval_runtime": 1.262, "eval_samples_per_second": 35.657, "eval_steps_per_second": 2.377, "step": 6300 }, { "epoch": 276.09, "learning_rate": 2.408759124087591e-05, "loss": 0.0085, "step": 6350 }, { "epoch": 278.26, "learning_rate": 2.1897810218978098e-05, "loss": 0.0119, "step": 6400 }, { "epoch": 280.43, "learning_rate": 1.9708029197080292e-05, "loss": 0.0107, "step": 6450 }, { "epoch": 280.43, "eval_cer": 0.32342007434944237, "eval_loss": 2.6263809204101562, "eval_runtime": 1.2547, "eval_samples_per_second": 35.867, "eval_steps_per_second": 2.391, "step": 6450 }, { "epoch": 282.61, "learning_rate": 1.751824817518248e-05, "loss": 0.0107, "step": 6500 }, { "epoch": 284.78, "learning_rate": 1.532846715328467e-05, "loss": 0.0105, "step": 6550 }, { "epoch": 286.96, "learning_rate": 1.313868613138686e-05, "loss": 0.0086, "step": 6600 }, { "epoch": 286.96, "eval_cer": 0.3215613382899628, "eval_loss": 2.607461452484131, "eval_runtime": 1.204, "eval_samples_per_second": 37.374, "eval_steps_per_second": 2.492, "step": 6600 }, { "epoch": 289.13, "learning_rate": 1.0948905109489049e-05, "loss": 0.0095, "step": 6650 }, { "epoch": 291.3, "learning_rate": 8.75912408759124e-06, "loss": 0.0108, "step": 6700 }, { "epoch": 293.48, "learning_rate": 6.56934306569343e-06, "loss": 0.0106, "step": 6750 }, { "epoch": 293.48, "eval_cer": 0.3246592317224288, "eval_loss": 2.595982789993286, "eval_runtime": 1.1323, "eval_samples_per_second": 39.741, "eval_steps_per_second": 2.649, "step": 6750 }, { "epoch": 295.65, "learning_rate": 4.37956204379562e-06, "loss": 0.0143, "step": 6800 }, { "epoch": 297.83, "learning_rate": 2.18978102189781e-06, "loss": 0.0105, "step": 6850 }, { "epoch": 300.0, "learning_rate": 0.0, "loss": 0.0085, "step": 6900 }, { "epoch": 300.0, "eval_cer": 0.32403965303593557, "eval_loss": 2.5951595306396484, "eval_runtime": 1.2068, "eval_samples_per_second": 37.288, "eval_steps_per_second": 2.486, "step": 6900 } ], "logging_steps": 50, "max_steps": 6900, "num_train_epochs": 300, "save_steps": 150, "total_flos": 2.3112928880616276e+19, "trial_name": null, "trial_params": null }