|
{ |
|
"best_metric": 56.651029055690074, |
|
"best_model_checkpoint": "./checkpoint-600", |
|
"epoch": 100.0, |
|
"global_step": 700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.9200000000000003e-06, |
|
"loss": 2.1183, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_loss": 1.3170489072799683, |
|
"eval_runtime": 435.5063, |
|
"eval_samples_per_second": 1.176, |
|
"eval_steps_per_second": 0.147, |
|
"eval_wer": 76.95217917675545, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 3.920000000000001e-06, |
|
"loss": 0.8565, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 0.9366902709007263, |
|
"eval_runtime": 473.9252, |
|
"eval_samples_per_second": 1.08, |
|
"eval_steps_per_second": 0.135, |
|
"eval_wer": 61.99303874092009, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 4.12e-06, |
|
"loss": 0.5824, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 4.32e-06, |
|
"loss": 0.5195, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 4.520000000000001e-06, |
|
"loss": 0.4472, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 4.7200000000000005e-06, |
|
"loss": 0.4615, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 4.92e-06, |
|
"loss": 0.4068, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 5.12e-06, |
|
"loss": 0.3408, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5.320000000000001e-06, |
|
"loss": 0.3718, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 5.5200000000000005e-06, |
|
"loss": 0.2749, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 5.72e-06, |
|
"loss": 0.2625, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 5.92e-06, |
|
"loss": 0.2246, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"eval_loss": 0.9642460942268372, |
|
"eval_runtime": 439.7231, |
|
"eval_samples_per_second": 1.164, |
|
"eval_steps_per_second": 0.146, |
|
"eval_wer": 58.830205811138015, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 6.120000000000001e-06, |
|
"loss": 0.1991, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 6.3200000000000005e-06, |
|
"loss": 0.1798, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 6.520000000000001e-06, |
|
"loss": 0.1426, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 6.720000000000001e-06, |
|
"loss": 0.125, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 6.92e-06, |
|
"loss": 0.1177, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 7.1200000000000004e-06, |
|
"loss": 0.0874, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 7.32e-06, |
|
"loss": 0.0852, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 7.520000000000001e-06, |
|
"loss": 0.0666, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 7.72e-06, |
|
"loss": 0.0531, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 7.92e-06, |
|
"loss": 0.054, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"eval_loss": 1.0876343250274658, |
|
"eval_runtime": 428.9567, |
|
"eval_samples_per_second": 1.194, |
|
"eval_steps_per_second": 0.149, |
|
"eval_wer": 57.99031476997578, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 8.120000000000002e-06, |
|
"loss": 0.0465, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 8.32e-06, |
|
"loss": 0.0392, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 8.52e-06, |
|
"loss": 0.0354, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 8.720000000000001e-06, |
|
"loss": 0.0296, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 8.920000000000001e-06, |
|
"loss": 0.0255, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 17.04, |
|
"learning_rate": 9.12e-06, |
|
"loss": 0.0274, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 9.32e-06, |
|
"loss": 0.0216, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"learning_rate": 9.52e-06, |
|
"loss": 0.022, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 18.15, |
|
"learning_rate": 9.72e-06, |
|
"loss": 0.0219, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 9.920000000000002e-06, |
|
"loss": 0.0159, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"eval_loss": 1.1797882318496704, |
|
"eval_runtime": 450.9533, |
|
"eval_samples_per_second": 1.135, |
|
"eval_steps_per_second": 0.142, |
|
"eval_wer": 57.87681598062954, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.019, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 19.26, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.021, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.0139, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.013, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 20.37, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 0.007, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 20.74, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.0082, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 21.11, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 0.009, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 21.48, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.0045, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 21.85, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 0.0049, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.0045, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"eval_loss": 1.2309296131134033, |
|
"eval_runtime": 449.2432, |
|
"eval_samples_per_second": 1.14, |
|
"eval_steps_per_second": 0.142, |
|
"eval_wer": 56.651029055690074, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 87.14, |
|
"learning_rate": 4.7e-06, |
|
"loss": 0.0038, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 88.57, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 0.0033, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 3.7e-06, |
|
"loss": 0.0033, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 91.43, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.0031, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 92.86, |
|
"learning_rate": 2.7000000000000004e-06, |
|
"loss": 0.0029, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 94.29, |
|
"learning_rate": 2.2e-06, |
|
"loss": 0.0028, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 95.71, |
|
"learning_rate": 1.7000000000000002e-06, |
|
"loss": 0.0027, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 97.14, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.0026, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 98.57, |
|
"learning_rate": 7.000000000000001e-07, |
|
"loss": 0.0026, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 0.0026, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 1.2580721378326416, |
|
"eval_runtime": 471.9052, |
|
"eval_samples_per_second": 1.085, |
|
"eval_steps_per_second": 0.068, |
|
"eval_wer": 56.847760290556906, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 700, |
|
"total_flos": 4.53742824923136e+18, |
|
"train_loss": 0.0004244298700775419, |
|
"train_runtime": 836.7871, |
|
"train_samples_per_second": 53.538, |
|
"train_steps_per_second": 0.837 |
|
} |
|
], |
|
"max_steps": 700, |
|
"num_train_epochs": 100, |
|
"total_flos": 4.53742824923136e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|