|
{ |
|
"best_metric": 27.4584, |
|
"best_model_checkpoint": "vilmedic/SciFiverouge-1/checkpoint-66726", |
|
"epoch": 35.99029126213592, |
|
"global_step": 66726, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.9538455297488754e-05, |
|
"loss": 1.8057, |
|
"step": 3707 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 83.8658, |
|
"eval_f1_radgraph": 0.2199, |
|
"eval_loss": 1.5417813062667847, |
|
"eval_rouge1": 28.1224, |
|
"eval_rouge2": 14.9915, |
|
"eval_rougeL": 25.1977, |
|
"eval_rougeLsum": 27.135, |
|
"eval_runtime": 525.382, |
|
"eval_samples_per_second": 14.11, |
|
"eval_steps_per_second": 0.442, |
|
"step": 3707 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.8935795570433828e-05, |
|
"loss": 1.5951, |
|
"step": 7414 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 91.197, |
|
"eval_f1_radgraph": 0.227, |
|
"eval_loss": 1.4663870334625244, |
|
"eval_rouge1": 28.885, |
|
"eval_rouge2": 15.7599, |
|
"eval_rougeL": 25.9785, |
|
"eval_rougeLsum": 27.819, |
|
"eval_runtime": 515.8882, |
|
"eval_samples_per_second": 14.369, |
|
"eval_steps_per_second": 0.45, |
|
"step": 7414 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 2.8333135843378902e-05, |
|
"loss": 1.5199, |
|
"step": 11121 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 87.253, |
|
"eval_f1_radgraph": 0.2314, |
|
"eval_loss": 1.4249236583709717, |
|
"eval_rouge1": 29.0849, |
|
"eval_rouge2": 16.0903, |
|
"eval_rougeL": 26.1967, |
|
"eval_rougeLsum": 28.0078, |
|
"eval_runtime": 513.5301, |
|
"eval_samples_per_second": 14.435, |
|
"eval_steps_per_second": 0.452, |
|
"step": 11121 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 2.7730476116323972e-05, |
|
"loss": 1.4705, |
|
"step": 14828 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 85.3504, |
|
"eval_f1_radgraph": 0.2342, |
|
"eval_loss": 1.3992971181869507, |
|
"eval_rouge1": 29.3725, |
|
"eval_rouge2": 16.356, |
|
"eval_rougeL": 26.4644, |
|
"eval_rougeLsum": 28.2535, |
|
"eval_runtime": 522.0475, |
|
"eval_samples_per_second": 14.2, |
|
"eval_steps_per_second": 0.444, |
|
"step": 14828 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.7127816389269046e-05, |
|
"loss": 1.4326, |
|
"step": 18535 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 90.5282, |
|
"eval_f1_radgraph": 0.2341, |
|
"eval_loss": 1.38131844997406, |
|
"eval_rouge1": 29.5246, |
|
"eval_rouge2": 16.41, |
|
"eval_rougeL": 26.5568, |
|
"eval_rougeLsum": 28.3933, |
|
"eval_runtime": 515.287, |
|
"eval_samples_per_second": 14.386, |
|
"eval_steps_per_second": 0.45, |
|
"step": 18535 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2.652515666221412e-05, |
|
"loss": 1.4015, |
|
"step": 22242 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 91.7941, |
|
"eval_f1_radgraph": 0.2346, |
|
"eval_loss": 1.366599440574646, |
|
"eval_rouge1": 29.7344, |
|
"eval_rouge2": 16.6795, |
|
"eval_rougeL": 26.7433, |
|
"eval_rougeLsum": 28.603, |
|
"eval_runtime": 515.9852, |
|
"eval_samples_per_second": 14.367, |
|
"eval_steps_per_second": 0.45, |
|
"step": 22242 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 2.5922496935159194e-05, |
|
"loss": 1.3756, |
|
"step": 25949 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 92.9315, |
|
"eval_f1_radgraph": 0.2377, |
|
"eval_loss": 1.356188178062439, |
|
"eval_rouge1": 29.8231, |
|
"eval_rouge2": 16.7219, |
|
"eval_rougeL": 26.8298, |
|
"eval_rougeLsum": 28.6547, |
|
"eval_runtime": 526.1612, |
|
"eval_samples_per_second": 14.089, |
|
"eval_steps_per_second": 0.441, |
|
"step": 25949 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.5319837208104264e-05, |
|
"loss": 1.3534, |
|
"step": 29656 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 90.1592, |
|
"eval_f1_radgraph": 0.2402, |
|
"eval_loss": 1.348175048828125, |
|
"eval_rouge1": 29.9244, |
|
"eval_rouge2": 16.8767, |
|
"eval_rougeL": 26.9699, |
|
"eval_rougeLsum": 28.7771, |
|
"eval_runtime": 517.9401, |
|
"eval_samples_per_second": 14.312, |
|
"eval_steps_per_second": 0.448, |
|
"step": 29656 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2.4717177481049338e-05, |
|
"loss": 1.3335, |
|
"step": 33363 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 93.8733, |
|
"eval_f1_radgraph": 0.2412, |
|
"eval_loss": 1.3401567935943604, |
|
"eval_rouge1": 29.9866, |
|
"eval_rouge2": 16.8753, |
|
"eval_rougeL": 26.967, |
|
"eval_rougeLsum": 28.8217, |
|
"eval_runtime": 520.2162, |
|
"eval_samples_per_second": 14.25, |
|
"eval_steps_per_second": 0.446, |
|
"step": 33363 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"learning_rate": 2.4114517753994412e-05, |
|
"loss": 1.3157, |
|
"step": 37070 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"eval_bleu": 90.6225, |
|
"eval_f1_radgraph": 0.2409, |
|
"eval_loss": 1.3334068059921265, |
|
"eval_rouge1": 30.0245, |
|
"eval_rouge2": 17.0323, |
|
"eval_rougeL": 27.0771, |
|
"eval_rougeLsum": 28.8866, |
|
"eval_runtime": 523.6658, |
|
"eval_samples_per_second": 14.156, |
|
"eval_steps_per_second": 0.443, |
|
"step": 37070 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"learning_rate": 2.3511858026939486e-05, |
|
"loss": 1.2994, |
|
"step": 40777 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"eval_bleu": 93.6108, |
|
"eval_f1_radgraph": 0.2403, |
|
"eval_loss": 1.3307315111160278, |
|
"eval_rouge1": 29.9589, |
|
"eval_rouge2": 16.9066, |
|
"eval_rougeL": 26.9681, |
|
"eval_rougeLsum": 28.7908, |
|
"eval_runtime": 516.1874, |
|
"eval_samples_per_second": 14.361, |
|
"eval_steps_per_second": 0.449, |
|
"step": 40777 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"learning_rate": 2.290919829988456e-05, |
|
"loss": 1.2843, |
|
"step": 44484 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"eval_bleu": 95.5973, |
|
"eval_f1_radgraph": 0.241, |
|
"eval_loss": 1.3280918598175049, |
|
"eval_rouge1": 30.1835, |
|
"eval_rouge2": 17.1623, |
|
"eval_rougeL": 27.182, |
|
"eval_rougeLsum": 29.0235, |
|
"eval_runtime": 517.0183, |
|
"eval_samples_per_second": 14.338, |
|
"eval_steps_per_second": 0.449, |
|
"step": 44484 |
|
}, |
|
{ |
|
"epoch": 25.99, |
|
"learning_rate": 2.230653857282963e-05, |
|
"loss": 1.2693, |
|
"step": 48191 |
|
}, |
|
{ |
|
"epoch": 25.99, |
|
"eval_bleu": 94.2149, |
|
"eval_f1_radgraph": 0.2427, |
|
"eval_loss": 1.3227483034133911, |
|
"eval_rouge1": 30.2847, |
|
"eval_rouge2": 17.2726, |
|
"eval_rougeL": 27.2884, |
|
"eval_rougeLsum": 29.1106, |
|
"eval_runtime": 529.2786, |
|
"eval_samples_per_second": 14.006, |
|
"eval_steps_per_second": 0.438, |
|
"step": 48191 |
|
}, |
|
{ |
|
"epoch": 27.99, |
|
"learning_rate": 2.1703878845774707e-05, |
|
"loss": 1.2564, |
|
"step": 51898 |
|
}, |
|
{ |
|
"epoch": 27.99, |
|
"eval_bleu": 94.0952, |
|
"eval_f1_radgraph": 0.2446, |
|
"eval_loss": 1.3205522298812866, |
|
"eval_rouge1": 30.2975, |
|
"eval_rouge2": 17.3352, |
|
"eval_rougeL": 27.3306, |
|
"eval_rougeLsum": 29.1631, |
|
"eval_runtime": 522.5234, |
|
"eval_samples_per_second": 14.187, |
|
"eval_steps_per_second": 0.444, |
|
"step": 51898 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"learning_rate": 2.1101219118719778e-05, |
|
"loss": 1.2453, |
|
"step": 55605 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"eval_bleu": 93.4725, |
|
"eval_f1_radgraph": 0.2429, |
|
"eval_loss": 1.3210633993148804, |
|
"eval_rouge1": 30.2609, |
|
"eval_rouge2": 17.3045, |
|
"eval_rougeL": 27.2887, |
|
"eval_rougeLsum": 29.0797, |
|
"eval_runtime": 520.8063, |
|
"eval_samples_per_second": 14.234, |
|
"eval_steps_per_second": 0.445, |
|
"step": 55605 |
|
}, |
|
{ |
|
"epoch": 31.99, |
|
"learning_rate": 2.0498559391664852e-05, |
|
"loss": 1.2332, |
|
"step": 59312 |
|
}, |
|
{ |
|
"epoch": 31.99, |
|
"eval_bleu": 90.7444, |
|
"eval_f1_radgraph": 0.2423, |
|
"eval_loss": 1.3173481225967407, |
|
"eval_rouge1": 30.2939, |
|
"eval_rouge2": 17.3007, |
|
"eval_rougeL": 27.2658, |
|
"eval_rougeLsum": 29.1106, |
|
"eval_runtime": 520.4251, |
|
"eval_samples_per_second": 14.244, |
|
"eval_steps_per_second": 0.446, |
|
"step": 59312 |
|
}, |
|
{ |
|
"epoch": 33.99, |
|
"learning_rate": 1.9895899664609926e-05, |
|
"loss": 1.2229, |
|
"step": 63019 |
|
}, |
|
{ |
|
"epoch": 33.99, |
|
"eval_bleu": 90.9915, |
|
"eval_f1_radgraph": 0.2444, |
|
"eval_loss": 1.317107081413269, |
|
"eval_rouge1": 30.3679, |
|
"eval_rouge2": 17.3809, |
|
"eval_rougeL": 27.3829, |
|
"eval_rougeLsum": 29.1488, |
|
"eval_runtime": 520.2024, |
|
"eval_samples_per_second": 14.25, |
|
"eval_steps_per_second": 0.446, |
|
"step": 63019 |
|
}, |
|
{ |
|
"epoch": 35.99, |
|
"learning_rate": 1.9293239937555e-05, |
|
"loss": 1.2123, |
|
"step": 66726 |
|
}, |
|
{ |
|
"epoch": 35.99, |
|
"eval_bleu": 96.0056, |
|
"eval_f1_radgraph": 0.2452, |
|
"eval_loss": 1.3161959648132324, |
|
"eval_rouge1": 30.438, |
|
"eval_rouge2": 17.4316, |
|
"eval_rougeL": 27.4584, |
|
"eval_rougeLsum": 29.2665, |
|
"eval_runtime": 518.103, |
|
"eval_samples_per_second": 14.308, |
|
"eval_steps_per_second": 0.448, |
|
"step": 66726 |
|
} |
|
], |
|
"max_steps": 185400, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.1641360206370867e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|