{ "best_metric": null, "best_model_checkpoint": null, "epoch": 35.80246913580247, "global_step": 2900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.23, "learning_rate": 0.0001, "loss": 0.0625, "step": 100 }, { "epoch": 1.23, "eval_loss": 0.28051668405532837, "eval_runtime": 198.6071, "eval_samples_per_second": 16.374, "eval_steps_per_second": 2.049, "eval_wer": 0.2381097005406062, "step": 100 }, { "epoch": 2.47, "learning_rate": 9.987639060568604e-05, "loss": 0.047, "step": 200 }, { "epoch": 2.47, "eval_loss": 0.34423157572746277, "eval_runtime": 147.7154, "eval_samples_per_second": 22.015, "eval_steps_per_second": 2.755, "eval_wer": 0.2334325457085586, "step": 200 }, { "epoch": 3.7, "learning_rate": 9.975278121137207e-05, "loss": 0.0409, "step": 300 }, { "epoch": 3.7, "eval_loss": 0.3597787916660309, "eval_runtime": 156.092, "eval_samples_per_second": 20.834, "eval_steps_per_second": 2.607, "eval_wer": 0.23124582396889995, "step": 300 }, { "epoch": 4.94, "learning_rate": 9.96291718170581e-05, "loss": 0.0413, "step": 400 }, { "epoch": 4.94, "eval_loss": 0.35860675573349, "eval_runtime": 151.5312, "eval_samples_per_second": 21.461, "eval_steps_per_second": 2.686, "eval_wer": 0.2421794326671931, "step": 400 }, { "epoch": 6.17, "learning_rate": 9.950556242274414e-05, "loss": 0.0388, "step": 500 }, { "epoch": 6.17, "eval_loss": 0.3748931586742401, "eval_runtime": 154.1563, "eval_samples_per_second": 21.095, "eval_steps_per_second": 2.64, "eval_wer": 0.23914231913988945, "step": 500 }, { "epoch": 7.41, "learning_rate": 9.938195302843017e-05, "loss": 0.0383, "step": 600 }, { "epoch": 7.41, "eval_loss": 0.36109668016433716, "eval_runtime": 163.2656, "eval_samples_per_second": 19.918, "eval_steps_per_second": 2.493, "eval_wer": 0.2352548138249408, "step": 600 }, { "epoch": 8.64, "learning_rate": 9.92583436341162e-05, "loss": 0.0381, "step": 700 }, { "epoch": 8.64, "eval_loss": 0.3883003294467926, "eval_runtime": 164.0468, "eval_samples_per_second": 19.824, "eval_steps_per_second": 2.481, "eval_wer": 0.23683411285913866, "step": 700 }, { "epoch": 9.88, "learning_rate": 9.913473423980223e-05, "loss": 0.0379, "step": 800 }, { "epoch": 9.88, "eval_loss": 0.3676028549671173, "eval_runtime": 174.6686, "eval_samples_per_second": 18.618, "eval_steps_per_second": 2.33, "eval_wer": 0.2324606693798214, "step": 800 }, { "epoch": 11.11, "learning_rate": 9.901112484548825e-05, "loss": 0.0364, "step": 900 }, { "epoch": 11.11, "eval_loss": 0.37265580892562866, "eval_runtime": 150.5621, "eval_samples_per_second": 21.599, "eval_steps_per_second": 2.703, "eval_wer": 0.23331106116746644, "step": 900 }, { "epoch": 12.35, "learning_rate": 9.88875154511743e-05, "loss": 0.0355, "step": 1000 }, { "epoch": 12.35, "eval_loss": 0.3740839958190918, "eval_runtime": 180.5633, "eval_samples_per_second": 18.01, "eval_steps_per_second": 2.254, "eval_wer": 0.23282512300309785, "step": 1000 }, { "epoch": 13.58, "learning_rate": 9.876390605686032e-05, "loss": 0.0365, "step": 1100 }, { "epoch": 13.58, "eval_loss": 0.37167179584503174, "eval_runtime": 177.8674, "eval_samples_per_second": 18.283, "eval_steps_per_second": 2.288, "eval_wer": 0.23580149425985544, "step": 1100 }, { "epoch": 14.81, "learning_rate": 9.864029666254637e-05, "loss": 0.0343, "step": 1200 }, { "epoch": 14.81, "eval_loss": 0.3842860460281372, "eval_runtime": 182.4399, "eval_samples_per_second": 17.825, "eval_steps_per_second": 2.231, "eval_wer": 0.23580149425985544, "step": 1200 }, { "epoch": 16.05, "learning_rate": 9.851668726823239e-05, "loss": 0.0358, "step": 1300 }, { "epoch": 16.05, "eval_loss": 0.3594246208667755, "eval_runtime": 180.3747, "eval_samples_per_second": 18.029, "eval_steps_per_second": 2.256, "eval_wer": 0.23616594788313186, "step": 1300 }, { "epoch": 17.28, "learning_rate": 9.839307787391843e-05, "loss": 0.0343, "step": 1400 }, { "epoch": 17.28, "eval_loss": 0.4051465690135956, "eval_runtime": 180.0644, "eval_samples_per_second": 18.06, "eval_steps_per_second": 2.26, "eval_wer": 0.2351940715543947, "step": 1400 }, { "epoch": 18.52, "learning_rate": 9.826946847960445e-05, "loss": 0.0363, "step": 1500 }, { "epoch": 18.52, "eval_loss": 0.3952256739139557, "eval_runtime": 185.3946, "eval_samples_per_second": 17.541, "eval_steps_per_second": 2.195, "eval_wer": 0.23962825730425805, "step": 1500 }, { "epoch": 19.75, "learning_rate": 9.814585908529048e-05, "loss": 0.0382, "step": 1600 }, { "epoch": 19.75, "eval_loss": 0.3582073450088501, "eval_runtime": 179.8373, "eval_samples_per_second": 18.083, "eval_steps_per_second": 2.263, "eval_wer": 0.23756302010569155, "step": 1600 }, { "epoch": 20.99, "learning_rate": 9.802224969097652e-05, "loss": 0.0337, "step": 1700 }, { "epoch": 20.99, "eval_loss": 0.3877179026603699, "eval_runtime": 183.3138, "eval_samples_per_second": 17.74, "eval_steps_per_second": 2.22, "eval_wer": 0.2347081333900261, "step": 1700 }, { "epoch": 22.22, "learning_rate": 9.789864029666255e-05, "loss": 0.0331, "step": 1800 }, { "epoch": 22.22, "eval_loss": 0.3826364576816559, "eval_runtime": 174.2395, "eval_samples_per_second": 18.664, "eval_steps_per_second": 2.336, "eval_wer": 0.2383526696227905, "step": 1800 }, { "epoch": 23.46, "learning_rate": 9.777503090234858e-05, "loss": 0.0321, "step": 1900 }, { "epoch": 23.46, "eval_loss": 0.3872096538543701, "eval_runtime": 181.6396, "eval_samples_per_second": 17.904, "eval_steps_per_second": 2.241, "eval_wer": 0.23847415416388265, "step": 1900 }, { "epoch": 24.69, "learning_rate": 9.765142150803462e-05, "loss": 0.0342, "step": 2000 }, { "epoch": 24.69, "eval_loss": 0.4173298180103302, "eval_runtime": 185.9384, "eval_samples_per_second": 17.49, "eval_steps_per_second": 2.189, "eval_wer": 0.24266537083156167, "step": 2000 }, { "epoch": 25.93, "learning_rate": 9.752781211372065e-05, "loss": 0.0348, "step": 2100 }, { "epoch": 25.93, "eval_loss": 0.36838769912719727, "eval_runtime": 184.1892, "eval_samples_per_second": 17.656, "eval_steps_per_second": 2.21, "eval_wer": 0.23671262831804654, "step": 2100 }, { "epoch": 27.16, "learning_rate": 9.740420271940668e-05, "loss": 0.0332, "step": 2200 }, { "epoch": 27.16, "eval_loss": 0.3941015899181366, "eval_runtime": 184.3624, "eval_samples_per_second": 17.639, "eval_steps_per_second": 2.208, "eval_wer": 0.23233918483872928, "step": 2200 }, { "epoch": 28.4, "learning_rate": 9.728059332509271e-05, "loss": 0.0339, "step": 2300 }, { "epoch": 28.4, "eval_loss": 0.3854130506515503, "eval_runtime": 186.7325, "eval_samples_per_second": 17.415, "eval_steps_per_second": 2.18, "eval_wer": 0.24254388629046955, "step": 2300 }, { "epoch": 29.63, "learning_rate": 9.715698393077875e-05, "loss": 0.0349, "step": 2400 }, { "epoch": 29.63, "eval_loss": 0.39330288767814636, "eval_runtime": 183.7314, "eval_samples_per_second": 17.7, "eval_steps_per_second": 2.215, "eval_wer": 0.23774524691732976, "step": 2400 }, { "epoch": 30.86, "learning_rate": 9.703337453646477e-05, "loss": 0.0327, "step": 2500 }, { "epoch": 30.86, "eval_loss": 0.3882978558540344, "eval_runtime": 186.0182, "eval_samples_per_second": 17.482, "eval_steps_per_second": 2.188, "eval_wer": 0.2347081333900261, "step": 2500 }, { "epoch": 32.1, "learning_rate": 9.690976514215081e-05, "loss": 0.0335, "step": 2600 }, { "epoch": 32.1, "eval_loss": 0.3765297532081604, "eval_runtime": 152.737, "eval_samples_per_second": 21.292, "eval_steps_per_second": 2.665, "eval_wer": 0.2403571645508109, "step": 2600 }, { "epoch": 33.33, "learning_rate": 9.678615574783683e-05, "loss": 0.0331, "step": 2700 }, { "epoch": 33.33, "eval_loss": 0.37883350253105164, "eval_runtime": 171.3633, "eval_samples_per_second": 18.977, "eval_steps_per_second": 2.375, "eval_wer": 0.23616594788313186, "step": 2700 }, { "epoch": 34.57, "learning_rate": 9.666254635352288e-05, "loss": 0.0312, "step": 2800 }, { "epoch": 34.57, "eval_loss": 0.3879595696926117, "eval_runtime": 186.5981, "eval_samples_per_second": 17.428, "eval_steps_per_second": 2.181, "eval_wer": 0.2289376176881492, "step": 2800 }, { "epoch": 35.8, "learning_rate": 9.65389369592089e-05, "loss": 0.0306, "step": 2900 }, { "epoch": 35.8, "eval_loss": 0.38653597235679626, "eval_runtime": 182.3902, "eval_samples_per_second": 17.83, "eval_steps_per_second": 2.231, "eval_wer": 0.23258215392091355, "step": 2900 } ], "max_steps": 81000, "num_train_epochs": 1000, "total_flos": 9.224758770642593e+19, "trial_name": null, "trial_params": null }