{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.017770597738287562, "eval_steps": 1, "global_step": 11, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016155088852988692, "grad_norm": 10.710270881652832, "learning_rate": 2.6666666666666664e-06, "loss": 1.8319, "step": 1 }, { "epoch": 0.0016155088852988692, "eval_loss": 1.8248138427734375, "eval_runtime": 220.146, "eval_samples_per_second": 0.454, "eval_steps_per_second": 0.059, "step": 1 }, { "epoch": 0.0032310177705977385, "grad_norm": 9.534977912902832, "learning_rate": 5.333333333333333e-06, "loss": 1.8296, "step": 2 }, { "epoch": 0.0032310177705977385, "eval_loss": 1.816626787185669, "eval_runtime": 220.6901, "eval_samples_per_second": 0.453, "eval_steps_per_second": 0.059, "step": 2 }, { "epoch": 0.004846526655896607, "grad_norm": 9.70173168182373, "learning_rate": 8e-06, "loss": 1.7468, "step": 3 }, { "epoch": 0.004846526655896607, "eval_loss": 1.8004165887832642, "eval_runtime": 220.5949, "eval_samples_per_second": 0.453, "eval_steps_per_second": 0.059, "step": 3 }, { "epoch": 0.006462035541195477, "grad_norm": 9.628801345825195, "learning_rate": 7.529411764705882e-06, "loss": 1.4906, "step": 4 }, { "epoch": 0.006462035541195477, "eval_loss": 1.7767361402511597, "eval_runtime": 220.4948, "eval_samples_per_second": 0.454, "eval_steps_per_second": 0.059, "step": 4 }, { "epoch": 0.008077544426494346, "grad_norm": 9.02491283416748, "learning_rate": 7.058823529411764e-06, "loss": 1.7261, "step": 5 }, { "epoch": 0.008077544426494346, "eval_loss": 1.7553186416625977, "eval_runtime": 220.7639, "eval_samples_per_second": 0.453, "eval_steps_per_second": 0.059, "step": 5 }, { "epoch": 0.009693053311793215, "grad_norm": 8.989361763000488, "learning_rate": 6.588235294117646e-06, "loss": 1.8311, "step": 6 }, { "epoch": 0.009693053311793215, "eval_loss": 1.7363536357879639, "eval_runtime": 220.3852, "eval_samples_per_second": 0.454, "eval_steps_per_second": 0.059, "step": 6 }, { "epoch": 0.011308562197092083, "grad_norm": 8.91972541809082, "learning_rate": 6.1176470588235285e-06, "loss": 1.8441, "step": 7 }, { "epoch": 0.011308562197092083, "eval_loss": 1.720198631286621, "eval_runtime": 220.8055, "eval_samples_per_second": 0.453, "eval_steps_per_second": 0.059, "step": 7 }, { "epoch": 0.012924071082390954, "grad_norm": 5.602985858917236, "learning_rate": 5.647058823529412e-06, "loss": 1.6051, "step": 8 }, { "epoch": 0.012924071082390954, "eval_loss": 1.7061126232147217, "eval_runtime": 220.3213, "eval_samples_per_second": 0.454, "eval_steps_per_second": 0.059, "step": 8 }, { "epoch": 0.014539579967689823, "grad_norm": 4.554469108581543, "learning_rate": 5.176470588235294e-06, "loss": 1.6065, "step": 9 }, { "epoch": 0.014539579967689823, "eval_loss": 1.69382643699646, "eval_runtime": 220.1928, "eval_samples_per_second": 0.454, "eval_steps_per_second": 0.059, "step": 9 }, { "epoch": 0.01615508885298869, "grad_norm": 4.634820461273193, "learning_rate": 4.705882352941176e-06, "loss": 1.6142, "step": 10 }, { "epoch": 0.01615508885298869, "eval_loss": 1.6832895278930664, "eval_runtime": 220.9396, "eval_samples_per_second": 0.453, "eval_steps_per_second": 0.059, "step": 10 }, { "epoch": 0.017770597738287562, "grad_norm": 3.6815476417541504, "learning_rate": 4.235294117647058e-06, "loss": 1.3256, "step": 11 }, { "epoch": 0.017770597738287562, "eval_loss": 1.6742274761199951, "eval_runtime": 220.8408, "eval_samples_per_second": 0.453, "eval_steps_per_second": 0.059, "step": 11 } ], "logging_steps": 1, "max_steps": 20, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1, "total_flos": 4063376418275328.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }