|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 6480, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 3.359571933746338, |
|
"eval_runtime": 1.8892, |
|
"eval_samples_per_second": 52.934, |
|
"eval_steps_per_second": 6.881, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 2.9279825687408447, |
|
"eval_runtime": 1.8734, |
|
"eval_samples_per_second": 53.378, |
|
"eval_steps_per_second": 6.939, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.5090594291687012, |
|
"eval_runtime": 1.876, |
|
"eval_samples_per_second": 53.306, |
|
"eval_steps_per_second": 6.93, |
|
"eval_wer": 0.9649737302977233, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 0.994255006313324, |
|
"eval_runtime": 1.8636, |
|
"eval_samples_per_second": 53.659, |
|
"eval_steps_per_second": 6.976, |
|
"eval_wer": 0.9176882661996497, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0002982, |
|
"loss": 3.1184, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 0.7590276598930359, |
|
"eval_runtime": 1.9271, |
|
"eval_samples_per_second": 51.893, |
|
"eval_steps_per_second": 6.746, |
|
"eval_wer": 0.7793345008756567, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 0.7336117625236511, |
|
"eval_runtime": 1.8782, |
|
"eval_samples_per_second": 53.242, |
|
"eval_steps_per_second": 6.921, |
|
"eval_wer": 0.7408056042031523, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 0.7039847373962402, |
|
"eval_runtime": 1.8772, |
|
"eval_samples_per_second": 53.27, |
|
"eval_steps_per_second": 6.925, |
|
"eval_wer": 0.7618213660245184, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 0.6815319061279297, |
|
"eval_runtime": 1.8953, |
|
"eval_samples_per_second": 52.762, |
|
"eval_steps_per_second": 6.859, |
|
"eval_wer": 0.723292469352014, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 0.6456977128982544, |
|
"eval_runtime": 1.8847, |
|
"eval_samples_per_second": 53.058, |
|
"eval_steps_per_second": 6.898, |
|
"eval_wer": 0.6865148861646234, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00027511705685618725, |
|
"loss": 0.7917, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 0.5704939365386963, |
|
"eval_runtime": 1.8802, |
|
"eval_samples_per_second": 53.187, |
|
"eval_steps_per_second": 6.914, |
|
"eval_wer": 0.681260945709282, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 0.5708367824554443, |
|
"eval_runtime": 1.8769, |
|
"eval_samples_per_second": 53.28, |
|
"eval_steps_per_second": 6.926, |
|
"eval_wer": 0.6619964973730298, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_loss": 0.5888168811798096, |
|
"eval_runtime": 1.8801, |
|
"eval_samples_per_second": 53.188, |
|
"eval_steps_per_second": 6.914, |
|
"eval_wer": 0.6462346760070052, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_loss": 0.6509290337562561, |
|
"eval_runtime": 1.8971, |
|
"eval_samples_per_second": 52.711, |
|
"eval_steps_per_second": 6.852, |
|
"eval_wer": 0.6970227670753065, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"eval_loss": 0.5871497392654419, |
|
"eval_runtime": 1.8883, |
|
"eval_samples_per_second": 52.957, |
|
"eval_steps_per_second": 6.884, |
|
"eval_wer": 0.6462346760070052, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00025008361204013375, |
|
"loss": 0.5909, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_loss": 0.619912326335907, |
|
"eval_runtime": 1.8867, |
|
"eval_samples_per_second": 53.002, |
|
"eval_steps_per_second": 6.89, |
|
"eval_wer": 0.681260945709282, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_loss": 0.6230051517486572, |
|
"eval_runtime": 1.8866, |
|
"eval_samples_per_second": 53.006, |
|
"eval_steps_per_second": 6.891, |
|
"eval_wer": 0.5919439579684763, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_loss": 0.572126030921936, |
|
"eval_runtime": 1.896, |
|
"eval_samples_per_second": 52.741, |
|
"eval_steps_per_second": 6.856, |
|
"eval_wer": 0.6427320490367776, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_loss": 0.5330815315246582, |
|
"eval_runtime": 1.8961, |
|
"eval_samples_per_second": 52.741, |
|
"eval_steps_per_second": 6.856, |
|
"eval_wer": 0.5866900175131349, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_loss": 0.5560976266860962, |
|
"eval_runtime": 1.9173, |
|
"eval_samples_per_second": 52.157, |
|
"eval_steps_per_second": 6.78, |
|
"eval_wer": 0.6007005253940455, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.00022510033444816051, |
|
"loss": 0.4607, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_loss": 0.5414324402809143, |
|
"eval_runtime": 1.8832, |
|
"eval_samples_per_second": 53.102, |
|
"eval_steps_per_second": 6.903, |
|
"eval_wer": 0.5849387040280211, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"eval_loss": 0.5389540791511536, |
|
"eval_runtime": 1.8801, |
|
"eval_samples_per_second": 53.19, |
|
"eval_steps_per_second": 6.915, |
|
"eval_wer": 0.5586690017513135, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_loss": 0.5313354730606079, |
|
"eval_runtime": 1.8925, |
|
"eval_samples_per_second": 52.842, |
|
"eval_steps_per_second": 6.869, |
|
"eval_wer": 0.5569176882661997, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_loss": 0.5893406271934509, |
|
"eval_runtime": 1.8839, |
|
"eval_samples_per_second": 53.083, |
|
"eval_steps_per_second": 6.901, |
|
"eval_wer": 0.5796847635726795, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"eval_loss": 0.550654411315918, |
|
"eval_runtime": 1.8741, |
|
"eval_samples_per_second": 53.359, |
|
"eval_steps_per_second": 6.937, |
|
"eval_wer": 0.5954465849387041, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.00020006688963210701, |
|
"loss": 0.3933, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_loss": 0.552050769329071, |
|
"eval_runtime": 1.8818, |
|
"eval_samples_per_second": 53.14, |
|
"eval_steps_per_second": 6.908, |
|
"eval_wer": 0.6024518388791593, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_loss": 0.5662926435470581, |
|
"eval_runtime": 1.9296, |
|
"eval_samples_per_second": 51.824, |
|
"eval_steps_per_second": 6.737, |
|
"eval_wer": 0.5989492119089317, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"eval_loss": 0.5636402368545532, |
|
"eval_runtime": 1.8763, |
|
"eval_samples_per_second": 53.297, |
|
"eval_steps_per_second": 6.929, |
|
"eval_wer": 0.5831873905429071, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"eval_loss": 0.5463794469833374, |
|
"eval_runtime": 1.8753, |
|
"eval_samples_per_second": 53.325, |
|
"eval_steps_per_second": 6.932, |
|
"eval_wer": 0.5919439579684763, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"eval_loss": 0.5623293519020081, |
|
"eval_runtime": 1.8826, |
|
"eval_samples_per_second": 53.118, |
|
"eval_steps_per_second": 6.905, |
|
"eval_wer": 0.5831873905429071, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.00017503344481605351, |
|
"loss": 0.3367, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"eval_loss": 0.5324317812919617, |
|
"eval_runtime": 1.8849, |
|
"eval_samples_per_second": 53.053, |
|
"eval_steps_per_second": 6.897, |
|
"eval_wer": 0.5691768826619965, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"eval_loss": 0.5907294750213623, |
|
"eval_runtime": 1.8884, |
|
"eval_samples_per_second": 52.954, |
|
"eval_steps_per_second": 6.884, |
|
"eval_wer": 0.5394045534150613, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 0.5653238892555237, |
|
"eval_runtime": 1.8757, |
|
"eval_samples_per_second": 53.314, |
|
"eval_steps_per_second": 6.931, |
|
"eval_wer": 0.5814360770577933, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"eval_loss": 0.5707294940948486, |
|
"eval_runtime": 1.9074, |
|
"eval_samples_per_second": 52.427, |
|
"eval_steps_per_second": 6.816, |
|
"eval_wer": 0.5814360770577933, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"eval_loss": 0.5753923654556274, |
|
"eval_runtime": 1.8899, |
|
"eval_samples_per_second": 52.912, |
|
"eval_steps_per_second": 6.879, |
|
"eval_wer": 0.542907180385289, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.00015, |
|
"loss": 0.2856, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_loss": 0.5953063368797302, |
|
"eval_runtime": 1.8933, |
|
"eval_samples_per_second": 52.818, |
|
"eval_steps_per_second": 6.866, |
|
"eval_wer": 0.5569176882661997, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_loss": 0.6274660229682922, |
|
"eval_runtime": 1.8873, |
|
"eval_samples_per_second": 52.985, |
|
"eval_steps_per_second": 6.888, |
|
"eval_wer": 0.5394045534150613, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_loss": 0.6253136992454529, |
|
"eval_runtime": 1.8922, |
|
"eval_samples_per_second": 52.847, |
|
"eval_steps_per_second": 6.87, |
|
"eval_wer": 0.5569176882661997, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_loss": 0.5930343866348267, |
|
"eval_runtime": 1.9165, |
|
"eval_samples_per_second": 52.177, |
|
"eval_steps_per_second": 6.783, |
|
"eval_wer": 0.542907180385289, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_loss": 0.6082107424736023, |
|
"eval_runtime": 1.9067, |
|
"eval_samples_per_second": 52.448, |
|
"eval_steps_per_second": 6.818, |
|
"eval_wer": 0.521891418563923, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 0.00012496655518394646, |
|
"loss": 0.2522, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_loss": 0.6026180982589722, |
|
"eval_runtime": 1.9243, |
|
"eval_samples_per_second": 51.967, |
|
"eval_steps_per_second": 6.756, |
|
"eval_wer": 0.5446584938704028, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"eval_loss": 0.6052154302597046, |
|
"eval_runtime": 1.9113, |
|
"eval_samples_per_second": 52.32, |
|
"eval_steps_per_second": 6.802, |
|
"eval_wer": 0.5271453590192644, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"eval_loss": 0.5870827436447144, |
|
"eval_runtime": 1.8877, |
|
"eval_samples_per_second": 52.973, |
|
"eval_steps_per_second": 6.887, |
|
"eval_wer": 0.521891418563923, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_loss": 0.5870257019996643, |
|
"eval_runtime": 1.8862, |
|
"eval_samples_per_second": 53.017, |
|
"eval_steps_per_second": 6.892, |
|
"eval_wer": 0.5236427320490368, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 10.19, |
|
"eval_loss": 0.5880929231643677, |
|
"eval_runtime": 1.9034, |
|
"eval_samples_per_second": 52.538, |
|
"eval_steps_per_second": 6.83, |
|
"eval_wer": 0.5131348511383538, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 9.993311036789297e-05, |
|
"loss": 0.2167, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"eval_loss": 0.6122171878814697, |
|
"eval_runtime": 1.8922, |
|
"eval_samples_per_second": 52.847, |
|
"eval_steps_per_second": 6.87, |
|
"eval_wer": 0.5288966725043783, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"eval_loss": 0.612826406955719, |
|
"eval_runtime": 1.891, |
|
"eval_samples_per_second": 52.882, |
|
"eval_steps_per_second": 6.875, |
|
"eval_wer": 0.5166374781085814, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"eval_loss": 0.6134529113769531, |
|
"eval_runtime": 1.891, |
|
"eval_samples_per_second": 52.883, |
|
"eval_steps_per_second": 6.875, |
|
"eval_wer": 0.5376532399299475, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"eval_loss": 0.6054602265357971, |
|
"eval_runtime": 1.8917, |
|
"eval_samples_per_second": 52.863, |
|
"eval_steps_per_second": 6.872, |
|
"eval_wer": 0.5183887915936952, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"eval_loss": 0.6724901795387268, |
|
"eval_runtime": 1.8847, |
|
"eval_samples_per_second": 53.058, |
|
"eval_steps_per_second": 6.898, |
|
"eval_wer": 0.5569176882661997, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 7.489966555183946e-05, |
|
"loss": 0.1965, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"eval_loss": 0.6481964588165283, |
|
"eval_runtime": 1.9092, |
|
"eval_samples_per_second": 52.378, |
|
"eval_steps_per_second": 6.809, |
|
"eval_wer": 0.542907180385289, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"eval_loss": 0.6037153601646423, |
|
"eval_runtime": 1.8885, |
|
"eval_samples_per_second": 52.953, |
|
"eval_steps_per_second": 6.884, |
|
"eval_wer": 0.5096322241681261, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"eval_loss": 0.5931165218353271, |
|
"eval_runtime": 1.9024, |
|
"eval_samples_per_second": 52.566, |
|
"eval_steps_per_second": 6.834, |
|
"eval_wer": 0.5131348511383538, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"eval_loss": 0.5853209495544434, |
|
"eval_runtime": 1.9149, |
|
"eval_samples_per_second": 52.223, |
|
"eval_steps_per_second": 6.789, |
|
"eval_wer": 0.51138353765324, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_loss": 0.5798044800758362, |
|
"eval_runtime": 1.9229, |
|
"eval_samples_per_second": 52.006, |
|
"eval_steps_per_second": 6.761, |
|
"eval_wer": 0.521891418563923, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 4.991638795986622e-05, |
|
"loss": 0.172, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"eval_loss": 0.5774720907211304, |
|
"eval_runtime": 1.8968, |
|
"eval_samples_per_second": 52.72, |
|
"eval_steps_per_second": 6.854, |
|
"eval_wer": 0.500875656742557, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"eval_loss": 0.5782347321510315, |
|
"eval_runtime": 1.9216, |
|
"eval_samples_per_second": 52.041, |
|
"eval_steps_per_second": 6.765, |
|
"eval_wer": 0.5043782837127846, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"eval_loss": 0.5803666114807129, |
|
"eval_runtime": 1.9303, |
|
"eval_samples_per_second": 51.806, |
|
"eval_steps_per_second": 6.735, |
|
"eval_wer": 0.5183887915936952, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"eval_loss": 0.5976961255073547, |
|
"eval_runtime": 1.8886, |
|
"eval_samples_per_second": 52.948, |
|
"eval_steps_per_second": 6.883, |
|
"eval_wer": 0.521891418563923, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 13.66, |
|
"eval_loss": 0.6069247722625732, |
|
"eval_runtime": 1.9188, |
|
"eval_samples_per_second": 52.117, |
|
"eval_steps_per_second": 6.775, |
|
"eval_wer": 0.5236427320490368, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 2.488294314381271e-05, |
|
"loss": 0.1622, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"eval_loss": 0.5849621295928955, |
|
"eval_runtime": 1.8831, |
|
"eval_samples_per_second": 53.103, |
|
"eval_steps_per_second": 6.903, |
|
"eval_wer": 0.5131348511383538, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"eval_loss": 0.5757902264595032, |
|
"eval_runtime": 1.9024, |
|
"eval_samples_per_second": 52.566, |
|
"eval_steps_per_second": 6.834, |
|
"eval_wer": 0.5096322241681261, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"eval_loss": 0.5752313137054443, |
|
"eval_runtime": 1.8854, |
|
"eval_samples_per_second": 53.038, |
|
"eval_steps_per_second": 6.895, |
|
"eval_wer": 0.500875656742557, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"eval_loss": 0.5727171301841736, |
|
"eval_runtime": 1.9071, |
|
"eval_samples_per_second": 52.437, |
|
"eval_steps_per_second": 6.817, |
|
"eval_wer": 0.5183887915936952, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"eval_loss": 0.5795398354530334, |
|
"eval_runtime": 1.8851, |
|
"eval_samples_per_second": 53.048, |
|
"eval_steps_per_second": 6.896, |
|
"eval_wer": 0.5043782837127846, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 6480, |
|
"total_flos": 1.5048474592820595e+19, |
|
"train_loss": 0.5518865008413055, |
|
"train_runtime": 5863.5283, |
|
"train_samples_per_second": 35.331, |
|
"train_steps_per_second": 1.105 |
|
} |
|
], |
|
"max_steps": 6480, |
|
"num_train_epochs": 15, |
|
"total_flos": 1.5048474592820595e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|