|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 20, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9626, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2e-05, |
|
"loss": 0.7551, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6737, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5923, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_f1": 0.2973817897616256, |
|
"eval_loss": 0.6075014472007751, |
|
"eval_runtime": 2.0329, |
|
"eval_samples_per_second": 464.848, |
|
"eval_steps_per_second": 14.757, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5935, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6499, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5639, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6694, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_f1": 0.2973817897616256, |
|
"eval_loss": 0.5937851071357727, |
|
"eval_runtime": 2.0028, |
|
"eval_samples_per_second": 471.842, |
|
"eval_steps_per_second": 14.979, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5923, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6133, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5702, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5225, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.3024223872822884, |
|
"eval_loss": 0.534581184387207, |
|
"eval_runtime": 2.0063, |
|
"eval_samples_per_second": 471.005, |
|
"eval_steps_per_second": 14.953, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4961, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4879, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5475, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5458, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_f1": 0.4681950604946577, |
|
"eval_loss": 0.5171502828598022, |
|
"eval_runtime": 2.0079, |
|
"eval_samples_per_second": 470.651, |
|
"eval_steps_per_second": 14.941, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5224, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5344, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4791, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5052, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_f1": 0.43614802647951817, |
|
"eval_loss": 0.5550346970558167, |
|
"eval_runtime": 2.0143, |
|
"eval_samples_per_second": 469.148, |
|
"eval_steps_per_second": 14.894, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2e-05, |
|
"loss": 0.5821, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4842, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4613, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3689, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.4394838882203503, |
|
"eval_loss": 0.5187910795211792, |
|
"eval_runtime": 2.0147, |
|
"eval_samples_per_second": 469.048, |
|
"eval_steps_per_second": 14.89, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4172, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4645, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4628, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4097, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_f1": 0.4676343432076015, |
|
"eval_loss": 0.4918379485607147, |
|
"eval_runtime": 2.021, |
|
"eval_samples_per_second": 467.58, |
|
"eval_steps_per_second": 14.844, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4716, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4947, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4288, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4904, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_f1": 0.4640191084468284, |
|
"eval_loss": 0.4755867123603821, |
|
"eval_runtime": 2.0186, |
|
"eval_samples_per_second": 468.144, |
|
"eval_steps_per_second": 14.862, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4391, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2e-05, |
|
"loss": 0.408, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4724, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3696, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.46883968839688395, |
|
"eval_loss": 0.47884294390678406, |
|
"eval_runtime": 2.0223, |
|
"eval_samples_per_second": 467.285, |
|
"eval_steps_per_second": 14.834, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4094, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4162, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3975, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3631, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_f1": 0.4735017608176137, |
|
"eval_loss": 0.4751051366329193, |
|
"eval_runtime": 2.0238, |
|
"eval_samples_per_second": 466.941, |
|
"eval_steps_per_second": 14.824, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3953, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3781, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4447, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4345, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_f1": 0.4796518931606489, |
|
"eval_loss": 0.4609261453151703, |
|
"eval_runtime": 2.0228, |
|
"eval_samples_per_second": 467.18, |
|
"eval_steps_per_second": 14.831, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3724, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2e-05, |
|
"loss": 0.366, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3763, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3398, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.5672850456534463, |
|
"eval_loss": 0.46373221278190613, |
|
"eval_runtime": 2.0191, |
|
"eval_samples_per_second": 468.022, |
|
"eval_steps_per_second": 14.858, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3861, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 2e-05, |
|
"loss": 0.315, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3501, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2985, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_f1": 0.5091622097836045, |
|
"eval_loss": 0.4849531948566437, |
|
"eval_runtime": 2.0286, |
|
"eval_samples_per_second": 465.831, |
|
"eval_steps_per_second": 14.788, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3734, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3658, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3268, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3248, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_f1": 0.6056820231448513, |
|
"eval_loss": 0.508940577507019, |
|
"eval_runtime": 2.0241, |
|
"eval_samples_per_second": 466.871, |
|
"eval_steps_per_second": 14.821, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3573, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3298, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2909, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3323, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.6159893659893659, |
|
"eval_loss": 0.46433570981025696, |
|
"eval_runtime": 2.0165, |
|
"eval_samples_per_second": 468.623, |
|
"eval_steps_per_second": 14.877, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 300, |
|
"total_flos": 9950949458472960.0, |
|
"train_loss": 0.4641021112600962, |
|
"train_runtime": 274.9964, |
|
"train_samples_per_second": 137.529, |
|
"train_steps_per_second": 1.091 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 300, |
|
"num_train_epochs": 5, |
|
"save_steps": 20, |
|
"total_flos": 9950949458472960.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|