{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.41143797572515944, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016457519029006377, "grad_norm": 1.7265625, "learning_rate": 4e-05, "loss": 3.4409, "step": 20 }, { "epoch": 0.032915038058012755, "grad_norm": 1.125, "learning_rate": 8e-05, "loss": 2.834, "step": 40 }, { "epoch": 0.04937255708701913, "grad_norm": 2.203125, "learning_rate": 0.00012, "loss": 2.0422, "step": 60 }, { "epoch": 0.06583007611602551, "grad_norm": 1.734375, "learning_rate": 0.00016, "loss": 1.6617, "step": 80 }, { "epoch": 0.08228759514503188, "grad_norm": 1.4765625, "learning_rate": 0.0002, "loss": 1.5035, "step": 100 }, { "epoch": 0.09874511417403826, "grad_norm": 1.6328125, "learning_rate": 0.0001988716502115656, "loss": 1.4449, "step": 120 }, { "epoch": 0.11520263320304464, "grad_norm": 2.015625, "learning_rate": 0.00019774330042313118, "loss": 1.3653, "step": 140 }, { "epoch": 0.13166015223205102, "grad_norm": 1.640625, "learning_rate": 0.00019661495063469676, "loss": 1.3634, "step": 160 }, { "epoch": 0.1481176712610574, "grad_norm": 1.2734375, "learning_rate": 0.00019548660084626237, "loss": 1.3116, "step": 180 }, { "epoch": 0.16457519029006376, "grad_norm": 1.1796875, "learning_rate": 0.00019435825105782795, "loss": 1.2961, "step": 200 }, { "epoch": 0.18103270931907015, "grad_norm": 1.359375, "learning_rate": 0.00019322990126939354, "loss": 1.2868, "step": 220 }, { "epoch": 0.19749022834807653, "grad_norm": 1.2421875, "learning_rate": 0.0001921015514809591, "loss": 1.2902, "step": 240 }, { "epoch": 0.21394774737708291, "grad_norm": 1.5859375, "learning_rate": 0.00019097320169252468, "loss": 1.209, "step": 260 }, { "epoch": 0.23040526640608927, "grad_norm": 1.5859375, "learning_rate": 0.00018984485190409026, "loss": 1.2912, "step": 280 }, { "epoch": 0.24686278543509566, "grad_norm": 1.4296875, "learning_rate": 0.00018871650211565587, "loss": 1.2733, "step": 300 }, { "epoch": 0.26332030446410204, "grad_norm": 1.4140625, "learning_rate": 0.00018758815232722145, "loss": 1.1895, "step": 320 }, { "epoch": 0.2797778234931084, "grad_norm": 1.25, "learning_rate": 0.00018645980253878704, "loss": 1.2259, "step": 340 }, { "epoch": 0.2962353425221148, "grad_norm": 1.4921875, "learning_rate": 0.00018533145275035262, "loss": 1.2636, "step": 360 }, { "epoch": 0.3126928615511212, "grad_norm": 1.796875, "learning_rate": 0.0001842031029619182, "loss": 1.2748, "step": 380 }, { "epoch": 0.3291503805801275, "grad_norm": 1.4296875, "learning_rate": 0.0001830747531734838, "loss": 1.1335, "step": 400 }, { "epoch": 0.3456078996091339, "grad_norm": 1.6640625, "learning_rate": 0.00018194640338504937, "loss": 1.1985, "step": 420 }, { "epoch": 0.3620654186381403, "grad_norm": 1.5078125, "learning_rate": 0.00018081805359661496, "loss": 1.2015, "step": 440 }, { "epoch": 0.3785229376671467, "grad_norm": 1.53125, "learning_rate": 0.00017968970380818057, "loss": 1.1548, "step": 460 }, { "epoch": 0.39498045669615306, "grad_norm": 1.359375, "learning_rate": 0.00017856135401974612, "loss": 1.1249, "step": 480 }, { "epoch": 0.41143797572515944, "grad_norm": 1.7109375, "learning_rate": 0.0001774330042313117, "loss": 1.1734, "step": 500 } ], "logging_steps": 20, "max_steps": 3645, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6534994597171200.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }