|
{ |
|
"best_metric": 0.07183855026960373, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_dj_aragpt2-large/checkpoint-10716", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 37506, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.1402301788330078, |
|
"learning_rate": 4.772267016688543e-05, |
|
"loss": 0.1323, |
|
"step": 5358 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.08300663646266425, |
|
"eval_loss": 0.07568918913602829, |
|
"eval_rouge1": 0.4072217751588082, |
|
"eval_rouge2": 0.18254715184887235, |
|
"eval_rougeL": 0.4034974548012786, |
|
"eval_runtime": 706.7922, |
|
"eval_samples_per_second": 7.612, |
|
"eval_steps_per_second": 1.903, |
|
"step": 5358 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.7255021929740906, |
|
"learning_rate": 4.521095068441778e-05, |
|
"loss": 0.0662, |
|
"step": 10716 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.10627535338849203, |
|
"eval_loss": 0.07183855026960373, |
|
"eval_rouge1": 0.45517089056426413, |
|
"eval_rouge2": 0.2232454846856679, |
|
"eval_rougeL": 0.4515800058148628, |
|
"eval_runtime": 715.8215, |
|
"eval_samples_per_second": 7.516, |
|
"eval_steps_per_second": 1.879, |
|
"step": 10716 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.5735991597175598, |
|
"learning_rate": 4.269923120195012e-05, |
|
"loss": 0.0526, |
|
"step": 16074 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.11970864457422921, |
|
"eval_loss": 0.07271187007427216, |
|
"eval_rouge1": 0.47525716850104355, |
|
"eval_rouge2": 0.25197591385088847, |
|
"eval_rougeL": 0.47186762638658064, |
|
"eval_runtime": 766.9396, |
|
"eval_samples_per_second": 7.015, |
|
"eval_steps_per_second": 1.754, |
|
"step": 16074 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.9193664789199829, |
|
"learning_rate": 4.018751171948247e-05, |
|
"loss": 0.0414, |
|
"step": 21432 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.12737868761714774, |
|
"eval_loss": 0.07567169517278671, |
|
"eval_rouge1": 0.4894464779386084, |
|
"eval_rouge2": 0.2644220876804199, |
|
"eval_rougeL": 0.48620236061532013, |
|
"eval_runtime": 767.056, |
|
"eval_samples_per_second": 7.014, |
|
"eval_steps_per_second": 1.753, |
|
"step": 21432 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.2567533254623413, |
|
"learning_rate": 3.7675792237014815e-05, |
|
"loss": 0.0325, |
|
"step": 26790 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.12904171218936847, |
|
"eval_loss": 0.08186400681734085, |
|
"eval_rouge1": 0.4910163857094879, |
|
"eval_rouge2": 0.26712580782902845, |
|
"eval_rougeL": 0.4874926457795191, |
|
"eval_runtime": 767.2369, |
|
"eval_samples_per_second": 7.012, |
|
"eval_steps_per_second": 1.753, |
|
"step": 26790 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.125952124595642, |
|
"learning_rate": 3.516407275454716e-05, |
|
"loss": 0.0262, |
|
"step": 32148 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.12974770289755774, |
|
"eval_loss": 0.08628461509943008, |
|
"eval_rouge1": 0.4922385680285879, |
|
"eval_rouge2": 0.26649486337370165, |
|
"eval_rougeL": 0.4887932180100094, |
|
"eval_runtime": 828.4635, |
|
"eval_samples_per_second": 6.494, |
|
"eval_steps_per_second": 1.623, |
|
"step": 32148 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.9687314629554749, |
|
"learning_rate": 3.26523532720795e-05, |
|
"loss": 0.0221, |
|
"step": 37506 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.13258665968806682, |
|
"eval_loss": 0.09302929788827896, |
|
"eval_rouge1": 0.4960344976284088, |
|
"eval_rouge2": 0.2712943985267227, |
|
"eval_rougeL": 0.4923109277459174, |
|
"eval_runtime": 828.274, |
|
"eval_samples_per_second": 6.495, |
|
"eval_steps_per_second": 1.624, |
|
"step": 37506 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 37506, |
|
"total_flos": 6.528656496918528e+17, |
|
"train_loss": 0.0533416826159377, |
|
"train_runtime": 67570.4522, |
|
"train_samples_per_second": 6.343, |
|
"train_steps_per_second": 1.586 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 107160, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.528656496918528e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|