|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 18484, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.945899156026834e-05, |
|
"loss": 2.5051, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8917983120536682e-05, |
|
"loss": 2.3264, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.837697468080502e-05, |
|
"loss": 2.2657, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.7835966241073363e-05, |
|
"loss": 2.2257, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.7294957801341702e-05, |
|
"loss": 2.1931, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6753949361610044e-05, |
|
"loss": 2.1735, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6212940921878382e-05, |
|
"loss": 2.1473, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.5671932482146724e-05, |
|
"loss": 2.1401, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5130924042415063e-05, |
|
"loss": 2.1171, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.8773, |
|
"eval_gen_len": 16.4704, |
|
"eval_loss": 2.013514280319214, |
|
"eval_precision": 0.8836, |
|
"eval_recall": 0.8717, |
|
"eval_rouge1": 0.3138, |
|
"eval_rouge2": 0.1556, |
|
"eval_rougeL": 0.2853, |
|
"eval_rougeLsum": 0.2853, |
|
"eval_runtime": 1599.8646, |
|
"eval_samples_per_second": 6.251, |
|
"eval_steps_per_second": 0.391, |
|
"step": 4621 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.4589915602683402e-05, |
|
"loss": 2.0523, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.4048907162951744e-05, |
|
"loss": 2.0306, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.3507898723220082e-05, |
|
"loss": 2.0262, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.2966890283488423e-05, |
|
"loss": 2.0169, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.2425881843756765e-05, |
|
"loss": 2.0101, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.1884873404025104e-05, |
|
"loss": 1.9955, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.1343864964293442e-05, |
|
"loss": 1.9943, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.0802856524561784e-05, |
|
"loss": 1.9863, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.0261848084830125e-05, |
|
"loss": 1.9804, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.8775, |
|
"eval_gen_len": 16.2207, |
|
"eval_loss": 1.9440101385116577, |
|
"eval_precision": 0.8831, |
|
"eval_recall": 0.8725, |
|
"eval_rouge1": 0.3147, |
|
"eval_rouge2": 0.1581, |
|
"eval_rougeL": 0.2864, |
|
"eval_rougeLsum": 0.2866, |
|
"eval_runtime": 1574.2726, |
|
"eval_samples_per_second": 6.352, |
|
"eval_steps_per_second": 0.397, |
|
"step": 9242 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.720839645098465e-06, |
|
"loss": 1.9455, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.179831205366805e-06, |
|
"loss": 1.9171, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.638822765635144e-06, |
|
"loss": 1.9211, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 8.097814325903484e-06, |
|
"loss": 1.9122, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 7.5568058861718255e-06, |
|
"loss": 1.9126, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.015797446440165e-06, |
|
"loss": 1.9065, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 6.474789006708505e-06, |
|
"loss": 1.8955, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.933780566976846e-06, |
|
"loss": 1.899, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.392772127245185e-06, |
|
"loss": 1.8971, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.8792, |
|
"eval_gen_len": 15.4676, |
|
"eval_loss": 1.9157460927963257, |
|
"eval_precision": 0.8857, |
|
"eval_recall": 0.8733, |
|
"eval_rouge1": 0.3209, |
|
"eval_rouge2": 0.1638, |
|
"eval_rougeL": 0.2925, |
|
"eval_rougeLsum": 0.2926, |
|
"eval_runtime": 1567.1343, |
|
"eval_samples_per_second": 6.381, |
|
"eval_steps_per_second": 0.399, |
|
"step": 13863 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.8517636875135255e-06, |
|
"loss": 1.8862, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 4.310755247781866e-06, |
|
"loss": 1.8611, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 3.769746808050206e-06, |
|
"loss": 1.8474, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.2287383683185457e-06, |
|
"loss": 1.8523, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 2.687729928586886e-06, |
|
"loss": 1.8462, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.1467214888552264e-06, |
|
"loss": 1.8455, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.6057130491235664e-06, |
|
"loss": 1.8456, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.0647046093919065e-06, |
|
"loss": 1.8425, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 5.236961696602468e-07, |
|
"loss": 1.8449, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.8795, |
|
"eval_gen_len": 15.7946, |
|
"eval_loss": 1.9021437168121338, |
|
"eval_precision": 0.8858, |
|
"eval_recall": 0.8739, |
|
"eval_rouge1": 0.3236, |
|
"eval_rouge2": 0.1651, |
|
"eval_rougeL": 0.2953, |
|
"eval_rougeLsum": 0.2953, |
|
"eval_runtime": 1613.4446, |
|
"eval_samples_per_second": 6.198, |
|
"eval_steps_per_second": 0.387, |
|
"step": 18484 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 18484, |
|
"total_flos": 3.845275732313899e+18, |
|
"train_loss": 1.997600625588558, |
|
"train_runtime": 109033.1022, |
|
"train_samples_per_second": 16.274, |
|
"train_steps_per_second": 0.17 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 18484, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 3.845275732313899e+18, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|