{ "best_metric": 0.07183855026960373, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_dj_aragpt2-large/checkpoint-10716", "epoch": 7.0, "eval_steps": 500, "global_step": 37506, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.1402301788330078, "learning_rate": 4.772267016688543e-05, "loss": 0.1323, "step": 5358 }, { "epoch": 1.0, "eval_bleu": 0.08300663646266425, "eval_loss": 0.07568918913602829, "eval_rouge1": 0.4072217751588082, "eval_rouge2": 0.18254715184887235, "eval_rougeL": 0.4034974548012786, "eval_runtime": 706.7922, "eval_samples_per_second": 7.612, "eval_steps_per_second": 1.903, "step": 5358 }, { "epoch": 2.0, "grad_norm": 0.7255021929740906, "learning_rate": 4.521095068441778e-05, "loss": 0.0662, "step": 10716 }, { "epoch": 2.0, "eval_bleu": 0.10627535338849203, "eval_loss": 0.07183855026960373, "eval_rouge1": 0.45517089056426413, "eval_rouge2": 0.2232454846856679, "eval_rougeL": 0.4515800058148628, "eval_runtime": 715.8215, "eval_samples_per_second": 7.516, "eval_steps_per_second": 1.879, "step": 10716 }, { "epoch": 3.0, "grad_norm": 0.5735991597175598, "learning_rate": 4.269923120195012e-05, "loss": 0.0526, "step": 16074 }, { "epoch": 3.0, "eval_bleu": 0.11970864457422921, "eval_loss": 0.07271187007427216, "eval_rouge1": 0.47525716850104355, "eval_rouge2": 0.25197591385088847, "eval_rougeL": 0.47186762638658064, "eval_runtime": 766.9396, "eval_samples_per_second": 7.015, "eval_steps_per_second": 1.754, "step": 16074 }, { "epoch": 4.0, "grad_norm": 0.9193664789199829, "learning_rate": 4.018751171948247e-05, "loss": 0.0414, "step": 21432 }, { "epoch": 4.0, "eval_bleu": 0.12737868761714774, "eval_loss": 0.07567169517278671, "eval_rouge1": 0.4894464779386084, "eval_rouge2": 0.2644220876804199, "eval_rougeL": 0.48620236061532013, "eval_runtime": 767.056, "eval_samples_per_second": 7.014, "eval_steps_per_second": 1.753, "step": 21432 }, { "epoch": 5.0, "grad_norm": 1.2567533254623413, "learning_rate": 3.7675792237014815e-05, "loss": 0.0325, "step": 26790 }, { "epoch": 5.0, "eval_bleu": 0.12904171218936847, "eval_loss": 0.08186400681734085, "eval_rouge1": 0.4910163857094879, "eval_rouge2": 0.26712580782902845, "eval_rougeL": 0.4874926457795191, "eval_runtime": 767.2369, "eval_samples_per_second": 7.012, "eval_steps_per_second": 1.753, "step": 26790 }, { "epoch": 6.0, "grad_norm": 1.125952124595642, "learning_rate": 3.516407275454716e-05, "loss": 0.0262, "step": 32148 }, { "epoch": 6.0, "eval_bleu": 0.12974770289755774, "eval_loss": 0.08628461509943008, "eval_rouge1": 0.4922385680285879, "eval_rouge2": 0.26649486337370165, "eval_rougeL": 0.4887932180100094, "eval_runtime": 828.4635, "eval_samples_per_second": 6.494, "eval_steps_per_second": 1.623, "step": 32148 }, { "epoch": 7.0, "grad_norm": 0.9687314629554749, "learning_rate": 3.26523532720795e-05, "loss": 0.0221, "step": 37506 }, { "epoch": 7.0, "eval_bleu": 0.13258665968806682, "eval_loss": 0.09302929788827896, "eval_rouge1": 0.4960344976284088, "eval_rouge2": 0.2712943985267227, "eval_rougeL": 0.4923109277459174, "eval_runtime": 828.274, "eval_samples_per_second": 6.495, "eval_steps_per_second": 1.624, "step": 37506 }, { "epoch": 7.0, "step": 37506, "total_flos": 6.528656496918528e+17, "train_loss": 0.0533416826159377, "train_runtime": 67570.4522, "train_samples_per_second": 6.343, "train_steps_per_second": 1.586 } ], "logging_steps": 500, "max_steps": 107160, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.528656496918528e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }