{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.789678675754625, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.31, "grad_norm": 0.48931118845939636, "learning_rate": 1.98125e-05, "loss": 1.7417, "step": 20 }, { "epoch": 0.62, "grad_norm": 0.56327223777771, "learning_rate": 1.9604166666666668e-05, "loss": 1.7626, "step": 40 }, { "epoch": 0.93, "grad_norm": 0.6054126620292664, "learning_rate": 1.9395833333333335e-05, "loss": 1.5774, "step": 60 }, { "epoch": 1.25, "grad_norm": 0.8035449385643005, "learning_rate": 1.9187500000000002e-05, "loss": 1.4772, "step": 80 }, { "epoch": 1.56, "grad_norm": 0.9972027540206909, "learning_rate": 1.897916666666667e-05, "loss": 1.409, "step": 100 }, { "epoch": 1.87, "grad_norm": 0.7526600360870361, "learning_rate": 1.8781250000000003e-05, "loss": 1.3088, "step": 120 }, { "epoch": 2.18, "grad_norm": 0.8370587825775146, "learning_rate": 1.8572916666666666e-05, "loss": 1.2086, "step": 140 }, { "epoch": 2.49, "grad_norm": 0.8337924480438232, "learning_rate": 1.8364583333333334e-05, "loss": 1.1321, "step": 160 }, { "epoch": 2.8, "grad_norm": 1.0028570890426636, "learning_rate": 1.815625e-05, "loss": 1.0439, "step": 180 }, { "epoch": 3.12, "grad_norm": 0.7663702368736267, "learning_rate": 1.7947916666666668e-05, "loss": 1.026, "step": 200 }, { "epoch": 3.43, "grad_norm": 0.8835870623588562, "learning_rate": 1.7739583333333335e-05, "loss": 0.9994, "step": 220 }, { "epoch": 3.74, "grad_norm": 1.010624885559082, "learning_rate": 1.7531250000000003e-05, "loss": 0.9381, "step": 240 }, { "epoch": 4.05, "grad_norm": 0.9645354151725769, "learning_rate": 1.7322916666666666e-05, "loss": 0.8667, "step": 260 }, { "epoch": 4.36, "grad_norm": 0.9926008582115173, "learning_rate": 1.7114583333333334e-05, "loss": 0.8729, "step": 280 }, { "epoch": 4.67, "grad_norm": 1.0364482402801514, "learning_rate": 1.690625e-05, "loss": 0.836, "step": 300 }, { "epoch": 4.99, "grad_norm": 1.3519001007080078, "learning_rate": 1.6697916666666668e-05, "loss": 0.8126, "step": 320 }, { "epoch": 5.3, "grad_norm": 1.2280508279800415, "learning_rate": 1.6489583333333335e-05, "loss": 0.7953, "step": 340 }, { "epoch": 5.61, "grad_norm": 0.8931779265403748, "learning_rate": 1.6281250000000003e-05, "loss": 0.7813, "step": 360 }, { "epoch": 5.92, "grad_norm": 2.7053027153015137, "learning_rate": 1.6072916666666667e-05, "loss": 0.7256, "step": 380 }, { "epoch": 6.23, "grad_norm": 1.411024808883667, "learning_rate": 1.5864583333333334e-05, "loss": 0.7336, "step": 400 }, { "epoch": 6.54, "grad_norm": 1.0111807584762573, "learning_rate": 1.565625e-05, "loss": 0.7207, "step": 420 }, { "epoch": 6.85, "grad_norm": 2.213623523712158, "learning_rate": 1.544791666666667e-05, "loss": 0.664, "step": 440 }, { "epoch": 7.17, "grad_norm": 1.3642323017120361, "learning_rate": 1.5239583333333334e-05, "loss": 0.6848, "step": 460 }, { "epoch": 7.48, "grad_norm": 1.3692028522491455, "learning_rate": 1.5031250000000001e-05, "loss": 0.6663, "step": 480 }, { "epoch": 7.79, "grad_norm": 1.9850131273269653, "learning_rate": 1.4822916666666667e-05, "loss": 0.6199, "step": 500 } ], "logging_steps": 20, "max_steps": 1920, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 1.2995638935552e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }