{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 18484, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 1.945899156026834e-05, "loss": 2.5051, "step": 500 }, { "epoch": 0.22, "learning_rate": 1.8917983120536682e-05, "loss": 2.3264, "step": 1000 }, { "epoch": 0.32, "learning_rate": 1.837697468080502e-05, "loss": 2.2657, "step": 1500 }, { "epoch": 0.43, "learning_rate": 1.7835966241073363e-05, "loss": 2.2257, "step": 2000 }, { "epoch": 0.54, "learning_rate": 1.7294957801341702e-05, "loss": 2.1931, "step": 2500 }, { "epoch": 0.65, "learning_rate": 1.6753949361610044e-05, "loss": 2.1735, "step": 3000 }, { "epoch": 0.76, "learning_rate": 1.6212940921878382e-05, "loss": 2.1473, "step": 3500 }, { "epoch": 0.87, "learning_rate": 1.5671932482146724e-05, "loss": 2.1401, "step": 4000 }, { "epoch": 0.97, "learning_rate": 1.5130924042415063e-05, "loss": 2.1171, "step": 4500 }, { "epoch": 1.0, "eval_f1": 0.8773, "eval_gen_len": 16.4704, "eval_loss": 2.013514280319214, "eval_precision": 0.8836, "eval_recall": 0.8717, "eval_rouge1": 0.3138, "eval_rouge2": 0.1556, "eval_rougeL": 0.2853, "eval_rougeLsum": 0.2853, "eval_runtime": 1599.8646, "eval_samples_per_second": 6.251, "eval_steps_per_second": 0.391, "step": 4621 }, { "epoch": 1.08, "learning_rate": 1.4589915602683402e-05, "loss": 2.0523, "step": 5000 }, { "epoch": 1.19, "learning_rate": 1.4048907162951744e-05, "loss": 2.0306, "step": 5500 }, { "epoch": 1.3, "learning_rate": 1.3507898723220082e-05, "loss": 2.0262, "step": 6000 }, { "epoch": 1.41, "learning_rate": 1.2966890283488423e-05, "loss": 2.0169, "step": 6500 }, { "epoch": 1.51, "learning_rate": 1.2425881843756765e-05, "loss": 2.0101, "step": 7000 }, { "epoch": 1.62, "learning_rate": 1.1884873404025104e-05, "loss": 1.9955, "step": 7500 }, { "epoch": 1.73, "learning_rate": 1.1343864964293442e-05, "loss": 1.9943, "step": 8000 }, { "epoch": 1.84, "learning_rate": 1.0802856524561784e-05, "loss": 1.9863, "step": 8500 }, { "epoch": 1.95, "learning_rate": 1.0261848084830125e-05, "loss": 1.9804, "step": 9000 }, { "epoch": 2.0, "eval_f1": 0.8775, "eval_gen_len": 16.2207, "eval_loss": 1.9440101385116577, "eval_precision": 0.8831, "eval_recall": 0.8725, "eval_rouge1": 0.3147, "eval_rouge2": 0.1581, "eval_rougeL": 0.2864, "eval_rougeLsum": 0.2866, "eval_runtime": 1574.2726, "eval_samples_per_second": 6.352, "eval_steps_per_second": 0.397, "step": 9242 }, { "epoch": 2.06, "learning_rate": 9.720839645098465e-06, "loss": 1.9455, "step": 9500 }, { "epoch": 2.16, "learning_rate": 9.179831205366805e-06, "loss": 1.9171, "step": 10000 }, { "epoch": 2.27, "learning_rate": 8.638822765635144e-06, "loss": 1.9211, "step": 10500 }, { "epoch": 2.38, "learning_rate": 8.097814325903484e-06, "loss": 1.9122, "step": 11000 }, { "epoch": 2.49, "learning_rate": 7.5568058861718255e-06, "loss": 1.9126, "step": 11500 }, { "epoch": 2.6, "learning_rate": 7.015797446440165e-06, "loss": 1.9065, "step": 12000 }, { "epoch": 2.71, "learning_rate": 6.474789006708505e-06, "loss": 1.8955, "step": 12500 }, { "epoch": 2.81, "learning_rate": 5.933780566976846e-06, "loss": 1.899, "step": 13000 }, { "epoch": 2.92, "learning_rate": 5.392772127245185e-06, "loss": 1.8971, "step": 13500 }, { "epoch": 3.0, "eval_f1": 0.8792, "eval_gen_len": 15.4676, "eval_loss": 1.9157460927963257, "eval_precision": 0.8857, "eval_recall": 0.8733, "eval_rouge1": 0.3209, "eval_rouge2": 0.1638, "eval_rougeL": 0.2925, "eval_rougeLsum": 0.2926, "eval_runtime": 1567.1343, "eval_samples_per_second": 6.381, "eval_steps_per_second": 0.399, "step": 13863 }, { "epoch": 3.03, "learning_rate": 4.8517636875135255e-06, "loss": 1.8862, "step": 14000 }, { "epoch": 3.14, "learning_rate": 4.310755247781866e-06, "loss": 1.8611, "step": 14500 }, { "epoch": 3.25, "learning_rate": 3.769746808050206e-06, "loss": 1.8474, "step": 15000 }, { "epoch": 3.35, "learning_rate": 3.2287383683185457e-06, "loss": 1.8523, "step": 15500 }, { "epoch": 3.46, "learning_rate": 2.687729928586886e-06, "loss": 1.8462, "step": 16000 }, { "epoch": 3.57, "learning_rate": 2.1467214888552264e-06, "loss": 1.8455, "step": 16500 }, { "epoch": 3.68, "learning_rate": 1.6057130491235664e-06, "loss": 1.8456, "step": 17000 }, { "epoch": 3.79, "learning_rate": 1.0647046093919065e-06, "loss": 1.8425, "step": 17500 }, { "epoch": 3.9, "learning_rate": 5.236961696602468e-07, "loss": 1.8449, "step": 18000 }, { "epoch": 4.0, "eval_f1": 0.8795, "eval_gen_len": 15.7946, "eval_loss": 1.9021437168121338, "eval_precision": 0.8858, "eval_recall": 0.8739, "eval_rouge1": 0.3236, "eval_rouge2": 0.1651, "eval_rougeL": 0.2953, "eval_rougeLsum": 0.2953, "eval_runtime": 1613.4446, "eval_samples_per_second": 6.198, "eval_steps_per_second": 0.387, "step": 18484 }, { "epoch": 4.0, "step": 18484, "total_flos": 3.845275732313899e+18, "train_loss": 1.997600625588558, "train_runtime": 109033.1022, "train_samples_per_second": 16.274, "train_steps_per_second": 0.17 } ], "logging_steps": 500, "max_steps": 18484, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 3.845275732313899e+18, "train_batch_size": 24, "trial_name": null, "trial_params": null }