{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9924812030075187, "eval_steps": 17, "global_step": 66, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 2e-05, "loss": 1.8373, "step": 1 }, { "epoch": 0.02, "eval_loss": 1.8333783149719238, "eval_runtime": 16.5756, "eval_samples_per_second": 2.715, "eval_steps_per_second": 1.388, "step": 1 }, { "epoch": 0.03, "learning_rate": 4e-05, "loss": 1.8119, "step": 2 }, { "epoch": 0.05, "learning_rate": 6e-05, "loss": 1.8301, "step": 3 }, { "epoch": 0.06, "learning_rate": 8e-05, "loss": 1.7976, "step": 4 }, { "epoch": 0.08, "learning_rate": 0.0001, "loss": 1.853, "step": 5 }, { "epoch": 0.09, "learning_rate": 0.00012, "loss": 1.7586, "step": 6 }, { "epoch": 0.11, "learning_rate": 0.00014, "loss": 1.8416, "step": 7 }, { "epoch": 0.12, "learning_rate": 0.00016, "loss": 1.7755, "step": 8 }, { "epoch": 0.14, "learning_rate": 0.00018, "loss": 1.804, "step": 9 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 1.7858, "step": 10 }, { "epoch": 0.17, "learning_rate": 0.0001999923511388017, "loss": 1.7311, "step": 11 }, { "epoch": 0.18, "learning_rate": 0.0001999694057253083, "loss": 1.7012, "step": 12 }, { "epoch": 0.2, "learning_rate": 0.00019993116726964554, "loss": 1.742, "step": 13 }, { "epoch": 0.21, "learning_rate": 0.00019987764162142613, "loss": 1.7209, "step": 14 }, { "epoch": 0.23, "learning_rate": 0.0001998088369688552, "loss": 1.7578, "step": 15 }, { "epoch": 0.24, "learning_rate": 0.00019972476383747748, "loss": 1.6906, "step": 16 }, { "epoch": 0.26, "learning_rate": 0.0001996254350885672, "loss": 1.738, "step": 17 }, { "epoch": 0.26, "eval_loss": 1.754616141319275, "eval_runtime": 16.6372, "eval_samples_per_second": 2.705, "eval_steps_per_second": 1.382, "step": 17 }, { "epoch": 0.27, "learning_rate": 0.0001995108659171607, "loss": 1.7976, "step": 18 }, { "epoch": 0.29, "learning_rate": 0.00019938107384973166, "loss": 1.7781, "step": 19 }, { "epoch": 0.3, "learning_rate": 0.00019923607874151032, "loss": 1.7737, "step": 20 }, { "epoch": 0.32, "learning_rate": 0.00019907590277344582, "loss": 1.6662, "step": 21 }, { "epoch": 0.33, "learning_rate": 0.00019890057044881306, "loss": 1.7869, "step": 22 }, { "epoch": 0.35, "learning_rate": 0.0001987101085894644, "loss": 1.6646, "step": 23 }, { "epoch": 0.36, "learning_rate": 0.00019850454633172631, "loss": 1.7482, "step": 24 }, { "epoch": 0.38, "learning_rate": 0.0001982839151219424, "loss": 1.7112, "step": 25 }, { "epoch": 0.39, "learning_rate": 0.00019804824871166255, "loss": 1.7083, "step": 26 }, { "epoch": 0.41, "learning_rate": 0.00019779758315248004, "loss": 1.728, "step": 27 }, { "epoch": 0.42, "learning_rate": 0.00019753195679051628, "loss": 1.7292, "step": 28 }, { "epoch": 0.44, "learning_rate": 0.0001972514102605547, "loss": 1.6902, "step": 29 }, { "epoch": 0.45, "learning_rate": 0.00019695598647982468, "loss": 1.7432, "step": 30 }, { "epoch": 0.47, "learning_rate": 0.00019664573064143604, "loss": 1.7258, "step": 31 }, { "epoch": 0.48, "learning_rate": 0.00019632069020746572, "loss": 1.7363, "step": 32 }, { "epoch": 0.5, "learning_rate": 0.00019598091490169694, "loss": 1.7142, "step": 33 }, { "epoch": 0.51, "learning_rate": 0.00019562645670201276, "loss": 1.704, "step": 34 }, { "epoch": 0.51, "eval_loss": 1.7388739585876465, "eval_runtime": 16.5955, "eval_samples_per_second": 2.712, "eval_steps_per_second": 1.386, "step": 34 }, { "epoch": 0.53, "learning_rate": 0.0001952573698324446, "loss": 1.7393, "step": 35 }, { "epoch": 0.54, "learning_rate": 0.00019487371075487713, "loss": 1.7568, "step": 36 }, { "epoch": 0.56, "learning_rate": 0.000194475538160411, "loss": 1.7445, "step": 37 }, { "epoch": 0.57, "learning_rate": 0.0001940629129603844, "loss": 1.6595, "step": 38 }, { "epoch": 0.59, "learning_rate": 0.00019363589827705492, "loss": 1.7288, "step": 39 }, { "epoch": 0.6, "learning_rate": 0.00019319455943394347, "loss": 1.6342, "step": 40 }, { "epoch": 0.62, "learning_rate": 0.00019273896394584103, "loss": 1.7066, "step": 41 }, { "epoch": 0.63, "learning_rate": 0.00019226918150848068, "loss": 1.6558, "step": 42 }, { "epoch": 0.65, "learning_rate": 0.00019178528398787551, "loss": 1.6756, "step": 43 }, { "epoch": 0.66, "learning_rate": 0.00019128734540932495, "loss": 1.7146, "step": 44 }, { "epoch": 0.68, "learning_rate": 0.00019077544194609042, "loss": 1.7043, "step": 45 }, { "epoch": 0.69, "learning_rate": 0.00019024965190774263, "loss": 1.6396, "step": 46 }, { "epoch": 0.71, "learning_rate": 0.00018971005572818213, "loss": 1.648, "step": 47 }, { "epoch": 0.72, "learning_rate": 0.00018915673595333444, "loss": 1.5988, "step": 48 }, { "epoch": 0.74, "learning_rate": 0.00018858977722852275, "loss": 1.7394, "step": 49 }, { "epoch": 0.75, "learning_rate": 0.00018800926628551886, "loss": 1.6362, "step": 50 }, { "epoch": 0.77, "learning_rate": 0.00018741529192927526, "loss": 1.6762, "step": 51 }, { "epoch": 0.77, "eval_loss": 1.7409569025039673, "eval_runtime": 16.7147, "eval_samples_per_second": 2.692, "eval_steps_per_second": 1.376, "step": 51 }, { "epoch": 0.78, "learning_rate": 0.00018680794502434018, "loss": 1.6534, "step": 52 }, { "epoch": 0.8, "learning_rate": 0.00018618731848095706, "loss": 1.6551, "step": 53 }, { "epoch": 0.81, "learning_rate": 0.00018555350724085162, "loss": 1.6297, "step": 54 }, { "epoch": 0.83, "learning_rate": 0.0001849066082627079, "loss": 1.7152, "step": 55 }, { "epoch": 0.84, "learning_rate": 0.00018424672050733576, "loss": 1.7062, "step": 56 }, { "epoch": 0.86, "learning_rate": 0.00018357394492253215, "loss": 1.5742, "step": 57 }, { "epoch": 0.87, "learning_rate": 0.00018288838442763838, "loss": 1.6424, "step": 58 }, { "epoch": 0.89, "learning_rate": 0.00018219014389779585, "loss": 1.6544, "step": 59 }, { "epoch": 0.9, "learning_rate": 0.00018147933014790244, "loss": 1.6179, "step": 60 }, { "epoch": 0.92, "learning_rate": 0.0001807560519162724, "loss": 1.6823, "step": 61 }, { "epoch": 0.93, "learning_rate": 0.00018002041984800174, "loss": 1.5845, "step": 62 }, { "epoch": 0.95, "learning_rate": 0.00017927254647804209, "loss": 1.6177, "step": 63 }, { "epoch": 0.96, "learning_rate": 0.0001785125462139855, "loss": 1.6196, "step": 64 }, { "epoch": 0.98, "learning_rate": 0.00017774053531856258, "loss": 1.6526, "step": 65 }, { "epoch": 0.99, "learning_rate": 0.000176956631891857, "loss": 1.5792, "step": 66 } ], "logging_steps": 1, "max_steps": 264, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 66, "total_flos": 8.67752288012206e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }