{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "global_step": 6480, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23, "eval_loss": 3.359571933746338, "eval_runtime": 1.8892, "eval_samples_per_second": 52.934, "eval_steps_per_second": 6.881, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.46, "eval_loss": 2.9279825687408447, "eval_runtime": 1.8734, "eval_samples_per_second": 53.378, "eval_steps_per_second": 6.939, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.69, "eval_loss": 1.5090594291687012, "eval_runtime": 1.876, "eval_samples_per_second": 53.306, "eval_steps_per_second": 6.93, "eval_wer": 0.9649737302977233, "step": 300 }, { "epoch": 0.93, "eval_loss": 0.994255006313324, "eval_runtime": 1.8636, "eval_samples_per_second": 53.659, "eval_steps_per_second": 6.976, "eval_wer": 0.9176882661996497, "step": 400 }, { "epoch": 1.16, "learning_rate": 0.0002982, "loss": 3.1184, "step": 500 }, { "epoch": 1.16, "eval_loss": 0.7590276598930359, "eval_runtime": 1.9271, "eval_samples_per_second": 51.893, "eval_steps_per_second": 6.746, "eval_wer": 0.7793345008756567, "step": 500 }, { "epoch": 1.39, "eval_loss": 0.7336117625236511, "eval_runtime": 1.8782, "eval_samples_per_second": 53.242, "eval_steps_per_second": 6.921, "eval_wer": 0.7408056042031523, "step": 600 }, { "epoch": 1.62, "eval_loss": 0.7039847373962402, "eval_runtime": 1.8772, "eval_samples_per_second": 53.27, "eval_steps_per_second": 6.925, "eval_wer": 0.7618213660245184, "step": 700 }, { "epoch": 1.85, "eval_loss": 0.6815319061279297, "eval_runtime": 1.8953, "eval_samples_per_second": 52.762, "eval_steps_per_second": 6.859, "eval_wer": 0.723292469352014, "step": 800 }, { "epoch": 2.08, "eval_loss": 0.6456977128982544, "eval_runtime": 1.8847, "eval_samples_per_second": 53.058, "eval_steps_per_second": 6.898, "eval_wer": 0.6865148861646234, "step": 900 }, { "epoch": 2.31, "learning_rate": 0.00027511705685618725, "loss": 0.7917, "step": 1000 }, { "epoch": 2.31, "eval_loss": 0.5704939365386963, "eval_runtime": 1.8802, "eval_samples_per_second": 53.187, "eval_steps_per_second": 6.914, "eval_wer": 0.681260945709282, "step": 1000 }, { "epoch": 2.55, "eval_loss": 0.5708367824554443, "eval_runtime": 1.8769, "eval_samples_per_second": 53.28, "eval_steps_per_second": 6.926, "eval_wer": 0.6619964973730298, "step": 1100 }, { "epoch": 2.78, "eval_loss": 0.5888168811798096, "eval_runtime": 1.8801, "eval_samples_per_second": 53.188, "eval_steps_per_second": 6.914, "eval_wer": 0.6462346760070052, "step": 1200 }, { "epoch": 3.01, "eval_loss": 0.6509290337562561, "eval_runtime": 1.8971, "eval_samples_per_second": 52.711, "eval_steps_per_second": 6.852, "eval_wer": 0.6970227670753065, "step": 1300 }, { "epoch": 3.24, "eval_loss": 0.5871497392654419, "eval_runtime": 1.8883, "eval_samples_per_second": 52.957, "eval_steps_per_second": 6.884, "eval_wer": 0.6462346760070052, "step": 1400 }, { "epoch": 3.47, "learning_rate": 0.00025008361204013375, "loss": 0.5909, "step": 1500 }, { "epoch": 3.47, "eval_loss": 0.619912326335907, "eval_runtime": 1.8867, "eval_samples_per_second": 53.002, "eval_steps_per_second": 6.89, "eval_wer": 0.681260945709282, "step": 1500 }, { "epoch": 3.7, "eval_loss": 0.6230051517486572, "eval_runtime": 1.8866, "eval_samples_per_second": 53.006, "eval_steps_per_second": 6.891, "eval_wer": 0.5919439579684763, "step": 1600 }, { "epoch": 3.94, "eval_loss": 0.572126030921936, "eval_runtime": 1.896, "eval_samples_per_second": 52.741, "eval_steps_per_second": 6.856, "eval_wer": 0.6427320490367776, "step": 1700 }, { "epoch": 4.17, "eval_loss": 0.5330815315246582, "eval_runtime": 1.8961, "eval_samples_per_second": 52.741, "eval_steps_per_second": 6.856, "eval_wer": 0.5866900175131349, "step": 1800 }, { "epoch": 4.4, "eval_loss": 0.5560976266860962, "eval_runtime": 1.9173, "eval_samples_per_second": 52.157, "eval_steps_per_second": 6.78, "eval_wer": 0.6007005253940455, "step": 1900 }, { "epoch": 4.63, "learning_rate": 0.00022510033444816051, "loss": 0.4607, "step": 2000 }, { "epoch": 4.63, "eval_loss": 0.5414324402809143, "eval_runtime": 1.8832, "eval_samples_per_second": 53.102, "eval_steps_per_second": 6.903, "eval_wer": 0.5849387040280211, "step": 2000 }, { "epoch": 4.86, "eval_loss": 0.5389540791511536, "eval_runtime": 1.8801, "eval_samples_per_second": 53.19, "eval_steps_per_second": 6.915, "eval_wer": 0.5586690017513135, "step": 2100 }, { "epoch": 5.09, "eval_loss": 0.5313354730606079, "eval_runtime": 1.8925, "eval_samples_per_second": 52.842, "eval_steps_per_second": 6.869, "eval_wer": 0.5569176882661997, "step": 2200 }, { "epoch": 5.32, "eval_loss": 0.5893406271934509, "eval_runtime": 1.8839, "eval_samples_per_second": 53.083, "eval_steps_per_second": 6.901, "eval_wer": 0.5796847635726795, "step": 2300 }, { "epoch": 5.56, "eval_loss": 0.550654411315918, "eval_runtime": 1.8741, "eval_samples_per_second": 53.359, "eval_steps_per_second": 6.937, "eval_wer": 0.5954465849387041, "step": 2400 }, { "epoch": 5.79, "learning_rate": 0.00020006688963210701, "loss": 0.3933, "step": 2500 }, { "epoch": 5.79, "eval_loss": 0.552050769329071, "eval_runtime": 1.8818, "eval_samples_per_second": 53.14, "eval_steps_per_second": 6.908, "eval_wer": 0.6024518388791593, "step": 2500 }, { "epoch": 6.02, "eval_loss": 0.5662926435470581, "eval_runtime": 1.9296, "eval_samples_per_second": 51.824, "eval_steps_per_second": 6.737, "eval_wer": 0.5989492119089317, "step": 2600 }, { "epoch": 6.25, "eval_loss": 0.5636402368545532, "eval_runtime": 1.8763, "eval_samples_per_second": 53.297, "eval_steps_per_second": 6.929, "eval_wer": 0.5831873905429071, "step": 2700 }, { "epoch": 6.48, "eval_loss": 0.5463794469833374, "eval_runtime": 1.8753, "eval_samples_per_second": 53.325, "eval_steps_per_second": 6.932, "eval_wer": 0.5919439579684763, "step": 2800 }, { "epoch": 6.71, "eval_loss": 0.5623293519020081, "eval_runtime": 1.8826, "eval_samples_per_second": 53.118, "eval_steps_per_second": 6.905, "eval_wer": 0.5831873905429071, "step": 2900 }, { "epoch": 6.94, "learning_rate": 0.00017503344481605351, "loss": 0.3367, "step": 3000 }, { "epoch": 6.94, "eval_loss": 0.5324317812919617, "eval_runtime": 1.8849, "eval_samples_per_second": 53.053, "eval_steps_per_second": 6.897, "eval_wer": 0.5691768826619965, "step": 3000 }, { "epoch": 7.18, "eval_loss": 0.5907294750213623, "eval_runtime": 1.8884, "eval_samples_per_second": 52.954, "eval_steps_per_second": 6.884, "eval_wer": 0.5394045534150613, "step": 3100 }, { "epoch": 7.41, "eval_loss": 0.5653238892555237, "eval_runtime": 1.8757, "eval_samples_per_second": 53.314, "eval_steps_per_second": 6.931, "eval_wer": 0.5814360770577933, "step": 3200 }, { "epoch": 7.64, "eval_loss": 0.5707294940948486, "eval_runtime": 1.9074, "eval_samples_per_second": 52.427, "eval_steps_per_second": 6.816, "eval_wer": 0.5814360770577933, "step": 3300 }, { "epoch": 7.87, "eval_loss": 0.5753923654556274, "eval_runtime": 1.8899, "eval_samples_per_second": 52.912, "eval_steps_per_second": 6.879, "eval_wer": 0.542907180385289, "step": 3400 }, { "epoch": 8.1, "learning_rate": 0.00015, "loss": 0.2856, "step": 3500 }, { "epoch": 8.1, "eval_loss": 0.5953063368797302, "eval_runtime": 1.8933, "eval_samples_per_second": 52.818, "eval_steps_per_second": 6.866, "eval_wer": 0.5569176882661997, "step": 3500 }, { "epoch": 8.33, "eval_loss": 0.6274660229682922, "eval_runtime": 1.8873, "eval_samples_per_second": 52.985, "eval_steps_per_second": 6.888, "eval_wer": 0.5394045534150613, "step": 3600 }, { "epoch": 8.56, "eval_loss": 0.6253136992454529, "eval_runtime": 1.8922, "eval_samples_per_second": 52.847, "eval_steps_per_second": 6.87, "eval_wer": 0.5569176882661997, "step": 3700 }, { "epoch": 8.8, "eval_loss": 0.5930343866348267, "eval_runtime": 1.9165, "eval_samples_per_second": 52.177, "eval_steps_per_second": 6.783, "eval_wer": 0.542907180385289, "step": 3800 }, { "epoch": 9.03, "eval_loss": 0.6082107424736023, "eval_runtime": 1.9067, "eval_samples_per_second": 52.448, "eval_steps_per_second": 6.818, "eval_wer": 0.521891418563923, "step": 3900 }, { "epoch": 9.26, "learning_rate": 0.00012496655518394646, "loss": 0.2522, "step": 4000 }, { "epoch": 9.26, "eval_loss": 0.6026180982589722, "eval_runtime": 1.9243, "eval_samples_per_second": 51.967, "eval_steps_per_second": 6.756, "eval_wer": 0.5446584938704028, "step": 4000 }, { "epoch": 9.49, "eval_loss": 0.6052154302597046, "eval_runtime": 1.9113, "eval_samples_per_second": 52.32, "eval_steps_per_second": 6.802, "eval_wer": 0.5271453590192644, "step": 4100 }, { "epoch": 9.72, "eval_loss": 0.5870827436447144, "eval_runtime": 1.8877, "eval_samples_per_second": 52.973, "eval_steps_per_second": 6.887, "eval_wer": 0.521891418563923, "step": 4200 }, { "epoch": 9.95, "eval_loss": 0.5870257019996643, "eval_runtime": 1.8862, "eval_samples_per_second": 53.017, "eval_steps_per_second": 6.892, "eval_wer": 0.5236427320490368, "step": 4300 }, { "epoch": 10.19, "eval_loss": 0.5880929231643677, "eval_runtime": 1.9034, "eval_samples_per_second": 52.538, "eval_steps_per_second": 6.83, "eval_wer": 0.5131348511383538, "step": 4400 }, { "epoch": 10.42, "learning_rate": 9.993311036789297e-05, "loss": 0.2167, "step": 4500 }, { "epoch": 10.42, "eval_loss": 0.6122171878814697, "eval_runtime": 1.8922, "eval_samples_per_second": 52.847, "eval_steps_per_second": 6.87, "eval_wer": 0.5288966725043783, "step": 4500 }, { "epoch": 10.65, "eval_loss": 0.612826406955719, "eval_runtime": 1.891, "eval_samples_per_second": 52.882, "eval_steps_per_second": 6.875, "eval_wer": 0.5166374781085814, "step": 4600 }, { "epoch": 10.88, "eval_loss": 0.6134529113769531, "eval_runtime": 1.891, "eval_samples_per_second": 52.883, "eval_steps_per_second": 6.875, "eval_wer": 0.5376532399299475, "step": 4700 }, { "epoch": 11.11, "eval_loss": 0.6054602265357971, "eval_runtime": 1.8917, "eval_samples_per_second": 52.863, "eval_steps_per_second": 6.872, "eval_wer": 0.5183887915936952, "step": 4800 }, { "epoch": 11.34, "eval_loss": 0.6724901795387268, "eval_runtime": 1.8847, "eval_samples_per_second": 53.058, "eval_steps_per_second": 6.898, "eval_wer": 0.5569176882661997, "step": 4900 }, { "epoch": 11.57, "learning_rate": 7.489966555183946e-05, "loss": 0.1965, "step": 5000 }, { "epoch": 11.57, "eval_loss": 0.6481964588165283, "eval_runtime": 1.9092, "eval_samples_per_second": 52.378, "eval_steps_per_second": 6.809, "eval_wer": 0.542907180385289, "step": 5000 }, { "epoch": 11.81, "eval_loss": 0.6037153601646423, "eval_runtime": 1.8885, "eval_samples_per_second": 52.953, "eval_steps_per_second": 6.884, "eval_wer": 0.5096322241681261, "step": 5100 }, { "epoch": 12.04, "eval_loss": 0.5931165218353271, "eval_runtime": 1.9024, "eval_samples_per_second": 52.566, "eval_steps_per_second": 6.834, "eval_wer": 0.5131348511383538, "step": 5200 }, { "epoch": 12.27, "eval_loss": 0.5853209495544434, "eval_runtime": 1.9149, "eval_samples_per_second": 52.223, "eval_steps_per_second": 6.789, "eval_wer": 0.51138353765324, "step": 5300 }, { "epoch": 12.5, "eval_loss": 0.5798044800758362, "eval_runtime": 1.9229, "eval_samples_per_second": 52.006, "eval_steps_per_second": 6.761, "eval_wer": 0.521891418563923, "step": 5400 }, { "epoch": 12.73, "learning_rate": 4.991638795986622e-05, "loss": 0.172, "step": 5500 }, { "epoch": 12.73, "eval_loss": 0.5774720907211304, "eval_runtime": 1.8968, "eval_samples_per_second": 52.72, "eval_steps_per_second": 6.854, "eval_wer": 0.500875656742557, "step": 5500 }, { "epoch": 12.96, "eval_loss": 0.5782347321510315, "eval_runtime": 1.9216, "eval_samples_per_second": 52.041, "eval_steps_per_second": 6.765, "eval_wer": 0.5043782837127846, "step": 5600 }, { "epoch": 13.19, "eval_loss": 0.5803666114807129, "eval_runtime": 1.9303, "eval_samples_per_second": 51.806, "eval_steps_per_second": 6.735, "eval_wer": 0.5183887915936952, "step": 5700 }, { "epoch": 13.43, "eval_loss": 0.5976961255073547, "eval_runtime": 1.8886, "eval_samples_per_second": 52.948, "eval_steps_per_second": 6.883, "eval_wer": 0.521891418563923, "step": 5800 }, { "epoch": 13.66, "eval_loss": 0.6069247722625732, "eval_runtime": 1.9188, "eval_samples_per_second": 52.117, "eval_steps_per_second": 6.775, "eval_wer": 0.5236427320490368, "step": 5900 }, { "epoch": 13.89, "learning_rate": 2.488294314381271e-05, "loss": 0.1622, "step": 6000 }, { "epoch": 13.89, "eval_loss": 0.5849621295928955, "eval_runtime": 1.8831, "eval_samples_per_second": 53.103, "eval_steps_per_second": 6.903, "eval_wer": 0.5131348511383538, "step": 6000 }, { "epoch": 14.12, "eval_loss": 0.5757902264595032, "eval_runtime": 1.9024, "eval_samples_per_second": 52.566, "eval_steps_per_second": 6.834, "eval_wer": 0.5096322241681261, "step": 6100 }, { "epoch": 14.35, "eval_loss": 0.5752313137054443, "eval_runtime": 1.8854, "eval_samples_per_second": 53.038, "eval_steps_per_second": 6.895, "eval_wer": 0.500875656742557, "step": 6200 }, { "epoch": 14.58, "eval_loss": 0.5727171301841736, "eval_runtime": 1.9071, "eval_samples_per_second": 52.437, "eval_steps_per_second": 6.817, "eval_wer": 0.5183887915936952, "step": 6300 }, { "epoch": 14.81, "eval_loss": 0.5795398354530334, "eval_runtime": 1.8851, "eval_samples_per_second": 53.048, "eval_steps_per_second": 6.896, "eval_wer": 0.5043782837127846, "step": 6400 }, { "epoch": 15.0, "step": 6480, "total_flos": 1.5048474592820595e+19, "train_loss": 0.5518865008413055, "train_runtime": 5863.5283, "train_samples_per_second": 35.331, "train_steps_per_second": 1.105 } ], "max_steps": 6480, "num_train_epochs": 15, "total_flos": 1.5048474592820595e+19, "trial_name": null, "trial_params": null }