{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.998080921157844, "eval_steps": 500, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 4.9018471133855755e-05, "loss": 1.9521, "step": 500 }, { "epoch": 0.16, "learning_rate": 4.801895090356629e-05, "loss": 0.6834, "step": 1000 }, { "epoch": 0.24, "learning_rate": 4.701943067327683e-05, "loss": 0.55, "step": 1500 }, { "epoch": 0.32, "learning_rate": 4.6019910442987366e-05, "loss": 0.4952, "step": 2000 }, { "epoch": 0.4, "learning_rate": 4.502039021269791e-05, "loss": 0.4475, "step": 2500 }, { "epoch": 0.48, "learning_rate": 4.402086998240845e-05, "loss": 0.4157, "step": 3000 }, { "epoch": 0.56, "learning_rate": 4.3021349752118984e-05, "loss": 0.3926, "step": 3500 }, { "epoch": 0.64, "learning_rate": 4.2021829521829525e-05, "loss": 0.3788, "step": 4000 }, { "epoch": 0.72, "learning_rate": 4.102230929154006e-05, "loss": 0.4176, "step": 4500 }, { "epoch": 0.8, "learning_rate": 4.00227890612506e-05, "loss": 0.3467, "step": 5000 }, { "epoch": 0.88, "learning_rate": 3.902326883096114e-05, "loss": 0.3491, "step": 5500 }, { "epoch": 0.96, "learning_rate": 3.802374860067168e-05, "loss": 0.321, "step": 6000 }, { "epoch": 1.04, "learning_rate": 3.702422837038222e-05, "loss": 0.2809, "step": 6500 }, { "epoch": 1.12, "learning_rate": 3.6024708140092754e-05, "loss": 0.2329, "step": 7000 }, { "epoch": 1.2, "learning_rate": 3.5025187909803295e-05, "loss": 0.2385, "step": 7500 }, { "epoch": 1.28, "learning_rate": 3.402566767951384e-05, "loss": 0.2367, "step": 8000 }, { "epoch": 1.36, "learning_rate": 3.302614744922438e-05, "loss": 0.232, "step": 8500 }, { "epoch": 1.44, "learning_rate": 3.202662721893491e-05, "loss": 0.2325, "step": 9000 }, { "epoch": 1.52, "learning_rate": 3.102710698864545e-05, "loss": 0.229, "step": 9500 }, { "epoch": 1.6, "learning_rate": 3.0027586758355993e-05, "loss": 0.2213, "step": 10000 }, { "epoch": 1.68, "learning_rate": 2.9028066528066527e-05, "loss": 0.2153, "step": 10500 }, { "epoch": 1.76, "learning_rate": 2.802854629777707e-05, "loss": 0.2163, "step": 11000 }, { "epoch": 1.84, "learning_rate": 2.7029026067487607e-05, "loss": 0.2152, "step": 11500 }, { "epoch": 1.92, "learning_rate": 2.602950583719815e-05, "loss": 0.2061, "step": 12000 }, { "epoch": 2.0, "learning_rate": 2.5029985606908683e-05, "loss": 0.205, "step": 12500 }, { "epoch": 2.08, "learning_rate": 2.4030465376619225e-05, "loss": 0.1256, "step": 13000 }, { "epoch": 2.16, "learning_rate": 2.3030945146329763e-05, "loss": 0.1247, "step": 13500 }, { "epoch": 2.24, "learning_rate": 2.20314249160403e-05, "loss": 0.1249, "step": 14000 }, { "epoch": 2.32, "learning_rate": 2.1033903726211422e-05, "loss": 0.1263, "step": 14500 }, { "epoch": 2.4, "learning_rate": 2.0034383495921957e-05, "loss": 0.1283, "step": 15000 }, { "epoch": 2.48, "learning_rate": 1.9034863265632495e-05, "loss": 0.1279, "step": 15500 }, { "epoch": 2.56, "learning_rate": 1.8035343035343037e-05, "loss": 0.1247, "step": 16000 }, { "epoch": 2.64, "learning_rate": 1.7035822805053575e-05, "loss": 0.1205, "step": 16500 }, { "epoch": 2.72, "learning_rate": 1.6036302574764113e-05, "loss": 0.1227, "step": 17000 }, { "epoch": 2.8, "learning_rate": 1.5036782344474653e-05, "loss": 0.1173, "step": 17500 }, { "epoch": 2.88, "learning_rate": 1.4037262114185193e-05, "loss": 0.1211, "step": 18000 }, { "epoch": 2.96, "learning_rate": 1.3037741883895729e-05, "loss": 0.1209, "step": 18500 }, { "epoch": 3.04, "learning_rate": 1.203822165360627e-05, "loss": 0.0903, "step": 19000 }, { "epoch": 3.12, "learning_rate": 1.1038701423316809e-05, "loss": 0.0639, "step": 19500 }, { "epoch": 3.2, "learning_rate": 1.0039181193027347e-05, "loss": 0.0616, "step": 20000 }, { "epoch": 3.28, "learning_rate": 9.041660003198465e-06, "loss": 0.0578, "step": 20500 }, { "epoch": 3.36, "learning_rate": 8.042139772909003e-06, "loss": 0.0595, "step": 21000 }, { "epoch": 3.44, "learning_rate": 7.042619542619543e-06, "loss": 0.0564, "step": 21500 }, { "epoch": 3.52, "learning_rate": 6.043099312330082e-06, "loss": 0.0594, "step": 22000 }, { "epoch": 3.6, "learning_rate": 5.043579082040621e-06, "loss": 0.0566, "step": 22500 }, { "epoch": 3.68, "learning_rate": 4.04405885175116e-06, "loss": 0.0582, "step": 23000 }, { "epoch": 3.76, "learning_rate": 3.0445386214616987e-06, "loss": 0.0572, "step": 23500 }, { "epoch": 3.84, "learning_rate": 2.0450183911722376e-06, "loss": 0.0561, "step": 24000 }, { "epoch": 3.92, "learning_rate": 1.0454981608827763e-06, "loss": 0.0538, "step": 24500 }, { "epoch": 4.0, "learning_rate": 4.797697105389413e-08, "loss": 0.0546, "step": 25000 }, { "epoch": 4.0, "step": 25000, "total_flos": 5.320960491493786e+16, "train_loss": 0.0, "train_runtime": 0.0376, "train_samples_per_second": 2660469.321, "train_steps_per_second": 166289.305 } ], "logging_steps": 500, "max_steps": 6253, "num_train_epochs": 1, "save_steps": 500, "total_flos": 5.320960491493786e+16, "trial_name": null, "trial_params": null }