{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9473684210526314, "eval_steps": 500, "global_step": 27, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10526315789473684, "grad_norm": 33.15888044761256, "learning_rate": 5e-06, "loss": 2.3845, "step": 1 }, { "epoch": 0.21052631578947367, "grad_norm": 32.85087228020456, "learning_rate": 4.957432749209755e-06, "loss": 2.4262, "step": 2 }, { "epoch": 0.3157894736842105, "grad_norm": 17.287178112682515, "learning_rate": 4.83118057351089e-06, "loss": 2.0605, "step": 3 }, { "epoch": 0.42105263157894735, "grad_norm": 22.156624644223225, "learning_rate": 4.625542839324036e-06, "loss": 2.0643, "step": 4 }, { "epoch": 0.5263157894736842, "grad_norm": 31.24716491045878, "learning_rate": 4.3475222930516484e-06, "loss": 2.1075, "step": 5 }, { "epoch": 0.631578947368421, "grad_norm": 19.777743012091744, "learning_rate": 4.006586590948141e-06, "loss": 2.0151, "step": 6 }, { "epoch": 0.7368421052631579, "grad_norm": 12.072245875701771, "learning_rate": 3.6143458894413463e-06, "loss": 1.9219, "step": 7 }, { "epoch": 0.8421052631578947, "grad_norm": 9.496121066394277, "learning_rate": 3.184157475180208e-06, "loss": 1.9058, "step": 8 }, { "epoch": 0.9473684210526315, "grad_norm": 10.211533731158024, "learning_rate": 2.730670898658255e-06, "loss": 1.9148, "step": 9 }, { "epoch": 1.0526315789473684, "grad_norm": 9.965283978172675, "learning_rate": 2.269329101341745e-06, "loss": 1.7407, "step": 10 }, { "epoch": 1.1578947368421053, "grad_norm": 8.734937073121175, "learning_rate": 1.8158425248197931e-06, "loss": 1.7286, "step": 11 }, { "epoch": 1.263157894736842, "grad_norm": 9.508662641095643, "learning_rate": 1.3856541105586545e-06, "loss": 1.6936, "step": 12 }, { "epoch": 1.368421052631579, "grad_norm": 8.355635389438104, "learning_rate": 9.934134090518593e-07, "loss": 1.6708, "step": 13 }, { "epoch": 1.4736842105263157, "grad_norm": 8.05041781460727, "learning_rate": 6.524777069483526e-07, "loss": 1.6466, "step": 14 }, { "epoch": 1.5789473684210527, "grad_norm": 7.5793931666820455, "learning_rate": 3.7445716067596506e-07, "loss": 1.6767, "step": 15 }, { "epoch": 1.6842105263157894, "grad_norm": 7.162816395853179, "learning_rate": 1.6881942648911077e-07, "loss": 1.6494, "step": 16 }, { "epoch": 1.7894736842105263, "grad_norm": 7.225686707910806, "learning_rate": 4.256725079024554e-08, "loss": 1.6394, "step": 17 }, { "epoch": 1.8947368421052633, "grad_norm": 7.320022756405671, "learning_rate": 0.0, "loss": 1.6727, "step": 18 }, { "epoch": 2.1052631578947367, "grad_norm": 7.157781045707602, "learning_rate": 4.594214621574912e-06, "loss": 1.5742, "step": 19 }, { "epoch": 2.2105263157894735, "grad_norm": 7.15791774456708, "learning_rate": 4.54355584639723e-06, "loss": 1.673, "step": 20 }, { "epoch": 2.3157894736842106, "grad_norm": 10.340618846680513, "learning_rate": 4.49023266426411e-06, "loss": 1.5806, "step": 21 }, { "epoch": 2.4210526315789473, "grad_norm": 11.734839865750393, "learning_rate": 4.434314598432091e-06, "loss": 1.5359, "step": 22 }, { "epoch": 2.526315789473684, "grad_norm": 12.307990008201672, "learning_rate": 4.3758745553910065e-06, "loss": 1.5259, "step": 23 }, { "epoch": 2.6315789473684212, "grad_norm": 10.433638720770736, "learning_rate": 4.3149887298078275e-06, "loss": 1.4633, "step": 24 }, { "epoch": 2.736842105263158, "grad_norm": 11.46032438607999, "learning_rate": 4.2517365051833564e-06, "loss": 1.5129, "step": 25 }, { "epoch": 2.8421052631578947, "grad_norm": 12.528340674980042, "learning_rate": 4.1862003503512845e-06, "loss": 1.509, "step": 26 }, { "epoch": 2.9473684210526314, "grad_norm": 10.849307524210143, "learning_rate": 4.11846571195457e-06, "loss": 1.4902, "step": 27 } ], "logging_steps": 1.0, "max_steps": 90, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 5000.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4718633000960.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }