{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.806032970887408, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "grad_norm": 0.5909375548362732, "learning_rate": 1.9932584269662923e-05, "loss": 2.0237, "step": 20 }, { "epoch": 0.22, "grad_norm": 0.5826025009155273, "learning_rate": 1.9857677902621722e-05, "loss": 1.9306, "step": 40 }, { "epoch": 0.34, "grad_norm": 0.5491089820861816, "learning_rate": 1.9782771535580525e-05, "loss": 1.7959, "step": 60 }, { "epoch": 0.45, "grad_norm": 1.362810730934143, "learning_rate": 1.970786516853933e-05, "loss": 1.6599, "step": 80 }, { "epoch": 0.56, "grad_norm": 1.4427486658096313, "learning_rate": 1.963295880149813e-05, "loss": 1.5685, "step": 100 }, { "epoch": 0.67, "grad_norm": 0.9993659257888794, "learning_rate": 1.956179775280899e-05, "loss": 1.4621, "step": 120 }, { "epoch": 0.79, "grad_norm": 1.614562749862671, "learning_rate": 1.9486891385767793e-05, "loss": 1.31, "step": 140 }, { "epoch": 0.9, "grad_norm": 1.1975798606872559, "learning_rate": 1.9411985018726593e-05, "loss": 1.2322, "step": 160 }, { "epoch": 1.01, "grad_norm": 0.7684128880500793, "learning_rate": 1.9337078651685396e-05, "loss": 1.1361, "step": 180 }, { "epoch": 1.12, "grad_norm": 0.9336960911750793, "learning_rate": 1.9262172284644195e-05, "loss": 1.0797, "step": 200 }, { "epoch": 1.23, "grad_norm": 0.8471770882606506, "learning_rate": 1.9187265917603e-05, "loss": 1.0368, "step": 220 }, { "epoch": 1.35, "grad_norm": 1.111340045928955, "learning_rate": 1.9112359550561798e-05, "loss": 0.9738, "step": 240 }, { "epoch": 1.46, "grad_norm": 0.8093781471252441, "learning_rate": 1.90374531835206e-05, "loss": 0.9494, "step": 260 }, { "epoch": 1.57, "grad_norm": 0.8438062071800232, "learning_rate": 1.89625468164794e-05, "loss": 0.9276, "step": 280 }, { "epoch": 1.68, "grad_norm": 0.9896701574325562, "learning_rate": 1.8887640449438204e-05, "loss": 0.8656, "step": 300 }, { "epoch": 1.8, "grad_norm": 0.8278244137763977, "learning_rate": 1.8812734082397007e-05, "loss": 0.8431, "step": 320 }, { "epoch": 1.91, "grad_norm": 0.931291937828064, "learning_rate": 1.8737827715355807e-05, "loss": 0.7945, "step": 340 }, { "epoch": 2.02, "grad_norm": 1.21769380569458, "learning_rate": 1.866292134831461e-05, "loss": 0.7647, "step": 360 }, { "epoch": 2.13, "grad_norm": 3.5183286666870117, "learning_rate": 1.858801498127341e-05, "loss": 0.7497, "step": 380 }, { "epoch": 2.24, "grad_norm": 1.1153030395507812, "learning_rate": 1.8513108614232212e-05, "loss": 0.7507, "step": 400 }, { "epoch": 2.36, "grad_norm": 1.0140526294708252, "learning_rate": 1.8438202247191012e-05, "loss": 0.7415, "step": 420 }, { "epoch": 2.47, "grad_norm": 1.4395232200622559, "learning_rate": 1.8363295880149815e-05, "loss": 0.6947, "step": 440 }, { "epoch": 2.58, "grad_norm": 1.4253089427947998, "learning_rate": 1.8288389513108615e-05, "loss": 0.7429, "step": 460 }, { "epoch": 2.69, "grad_norm": 1.3152351379394531, "learning_rate": 1.8213483146067418e-05, "loss": 0.7363, "step": 480 }, { "epoch": 2.81, "grad_norm": 2.5935957431793213, "learning_rate": 1.8138576779026217e-05, "loss": 0.6486, "step": 500 } ], "logging_steps": 20, "max_steps": 5340, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 1.2995638935552e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }