|
{ |
|
"best_metric": 0.6053687907676869, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-pos-ud-Korean-GSD/checkpoint-500", |
|
"epoch": 21.73913043478261, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.9e-05, |
|
"loss": 1.5142, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.967114093959731e-05, |
|
"loss": 1.0614, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.933557046979866e-05, |
|
"loss": 1.042, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 4.9e-05, |
|
"loss": 1.0138, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.8664429530201344e-05, |
|
"loss": 1.0024, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_accuracy": 0.6053687907676869, |
|
"eval_loss": 1.007150650024414, |
|
"eval_runtime": 4.8522, |
|
"eval_samples_per_second": 195.787, |
|
"eval_steps_per_second": 24.525, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 4.8328859060402684e-05, |
|
"loss": 1.0071, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 4.799328859060403e-05, |
|
"loss": 0.9899, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 4.7657718120805376e-05, |
|
"loss": 0.9818, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 4.7322147651006715e-05, |
|
"loss": 0.9812, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 4.698657718120806e-05, |
|
"loss": 0.9736, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_accuracy": 0.6013547415955846, |
|
"eval_loss": 1.0191282033920288, |
|
"eval_runtime": 4.8652, |
|
"eval_samples_per_second": 195.263, |
|
"eval_steps_per_second": 24.459, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 4.66510067114094e-05, |
|
"loss": 0.9626, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 4.631543624161074e-05, |
|
"loss": 0.9428, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 4.597986577181208e-05, |
|
"loss": 0.9431, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 4.5644295302013425e-05, |
|
"loss": 0.9438, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 4.5308724832214764e-05, |
|
"loss": 0.9281, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"eval_accuracy": 0.6036126442548921, |
|
"eval_loss": 1.0334581136703491, |
|
"eval_runtime": 4.8675, |
|
"eval_samples_per_second": 195.172, |
|
"eval_steps_per_second": 24.448, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 4.497315436241611e-05, |
|
"loss": 0.9, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 4.463758389261745e-05, |
|
"loss": 0.9157, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 4.4302013422818796e-05, |
|
"loss": 0.8991, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 4.3966442953020135e-05, |
|
"loss": 0.8891, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 4.363087248322148e-05, |
|
"loss": 0.8694, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"eval_accuracy": 0.5921558789095166, |
|
"eval_loss": 1.0879769325256348, |
|
"eval_runtime": 4.8603, |
|
"eval_samples_per_second": 195.461, |
|
"eval_steps_per_second": 24.484, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 4.329530201342282e-05, |
|
"loss": 0.8726, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 4.2959731543624166e-05, |
|
"loss": 0.8605, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 4.2624161073825505e-05, |
|
"loss": 0.8449, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 4.2288590604026845e-05, |
|
"loss": 0.8285, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"learning_rate": 4.195302013422819e-05, |
|
"loss": 0.8418, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"eval_accuracy": 0.5876400735909015, |
|
"eval_loss": 1.1507753133773804, |
|
"eval_runtime": 4.8677, |
|
"eval_samples_per_second": 195.164, |
|
"eval_steps_per_second": 24.447, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.84, |
|
"learning_rate": 4.161744966442953e-05, |
|
"loss": 0.8277, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 4.1281879194630876e-05, |
|
"loss": 0.8069, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 20.29, |
|
"learning_rate": 4.0946308724832215e-05, |
|
"loss": 0.7859, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 4.061073825503356e-05, |
|
"loss": 0.8126, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 4.02751677852349e-05, |
|
"loss": 0.7752, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"eval_accuracy": 0.5837096504432179, |
|
"eval_loss": 1.1938718557357788, |
|
"eval_runtime": 4.8673, |
|
"eval_samples_per_second": 195.178, |
|
"eval_steps_per_second": 24.449, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"step": 3000, |
|
"total_flos": 1.2500041419988992e+16, |
|
"train_loss": 0.9339236297607422, |
|
"train_runtime": 790.8493, |
|
"train_samples_per_second": 606.942, |
|
"train_steps_per_second": 18.967 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 109, |
|
"total_flos": 1.2500041419988992e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|