{ "best_metric": 0.0006509709637612104, "best_model_checkpoint": "humza-sami/prompt-classifier/checkpoint-247", "epoch": 19.0, "eval_steps": 500, "global_step": 247, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.77, "learning_rate": 1.0000000000000002e-06, "loss": 1.1265, "step": 10 }, { "epoch": 1.0, "eval_loss": 1.111129641532898, "eval_runtime": 0.0855, "eval_samples_per_second": 292.39, "eval_steps_per_second": 46.782, "step": 13 }, { "epoch": 1.54, "learning_rate": 2.0000000000000003e-06, "loss": 1.1033, "step": 20 }, { "epoch": 2.0, "eval_loss": 1.108482837677002, "eval_runtime": 0.0857, "eval_samples_per_second": 291.796, "eval_steps_per_second": 46.687, "step": 26 }, { "epoch": 2.31, "learning_rate": 3e-06, "loss": 1.0507, "step": 30 }, { "epoch": 3.0, "eval_loss": 1.1056454181671143, "eval_runtime": 0.0827, "eval_samples_per_second": 302.186, "eval_steps_per_second": 48.35, "step": 39 }, { "epoch": 3.08, "learning_rate": 4.000000000000001e-06, "loss": 1.0898, "step": 40 }, { "epoch": 3.85, "learning_rate": 5e-06, "loss": 1.0706, "step": 50 }, { "epoch": 4.0, "eval_loss": 1.0947411060333252, "eval_runtime": 0.0864, "eval_samples_per_second": 289.336, "eval_steps_per_second": 46.294, "step": 52 }, { "epoch": 4.62, "learning_rate": 6e-06, "loss": 1.0614, "step": 60 }, { "epoch": 5.0, "eval_loss": 1.051918387413025, "eval_runtime": 0.0973, "eval_samples_per_second": 256.927, "eval_steps_per_second": 41.108, "step": 65 }, { "epoch": 5.38, "learning_rate": 7.000000000000001e-06, "loss": 1.0133, "step": 70 }, { "epoch": 6.0, "eval_loss": 0.8626992702484131, "eval_runtime": 0.0947, "eval_samples_per_second": 264.056, "eval_steps_per_second": 42.249, "step": 78 }, { "epoch": 6.15, "learning_rate": 8.000000000000001e-06, "loss": 0.892, "step": 80 }, { "epoch": 6.92, "learning_rate": 9e-06, "loss": 0.7292, "step": 90 }, { "epoch": 7.0, "eval_loss": 0.5572385787963867, "eval_runtime": 0.0833, "eval_samples_per_second": 299.954, "eval_steps_per_second": 47.993, "step": 91 }, { "epoch": 7.69, "learning_rate": 1e-05, "loss": 0.4992, "step": 100 }, { "epoch": 8.0, "eval_loss": 0.19228488206863403, "eval_runtime": 0.0864, "eval_samples_per_second": 289.389, "eval_steps_per_second": 46.302, "step": 104 }, { "epoch": 8.46, "learning_rate": 1.1000000000000001e-05, "loss": 0.3048, "step": 110 }, { "epoch": 9.0, "eval_loss": 0.05422622710466385, "eval_runtime": 0.0847, "eval_samples_per_second": 295.026, "eval_steps_per_second": 47.204, "step": 117 }, { "epoch": 9.23, "learning_rate": 1.2e-05, "loss": 0.1196, "step": 120 }, { "epoch": 10.0, "learning_rate": 1.3000000000000001e-05, "loss": 0.0357, "step": 130 }, { "epoch": 10.0, "eval_loss": 0.02210908569395542, "eval_runtime": 0.0776, "eval_samples_per_second": 321.978, "eval_steps_per_second": 51.516, "step": 130 }, { "epoch": 10.77, "learning_rate": 1.4000000000000001e-05, "loss": 0.0143, "step": 140 }, { "epoch": 11.0, "eval_loss": 0.008073830977082253, "eval_runtime": 0.0889, "eval_samples_per_second": 281.316, "eval_steps_per_second": 45.011, "step": 143 }, { "epoch": 11.54, "learning_rate": 1.5e-05, "loss": 0.0071, "step": 150 }, { "epoch": 12.0, "eval_loss": 0.01958407461643219, "eval_runtime": 0.0969, "eval_samples_per_second": 257.94, "eval_steps_per_second": 41.27, "step": 156 }, { "epoch": 12.31, "learning_rate": 1.6000000000000003e-05, "loss": 0.0041, "step": 160 }, { "epoch": 13.0, "eval_loss": 0.00249945605173707, "eval_runtime": 0.0789, "eval_samples_per_second": 316.996, "eval_steps_per_second": 50.719, "step": 169 }, { "epoch": 13.08, "learning_rate": 1.7000000000000003e-05, "loss": 0.003, "step": 170 }, { "epoch": 13.85, "learning_rate": 1.8e-05, "loss": 0.0024, "step": 180 }, { "epoch": 14.0, "eval_loss": 0.0013755728723481297, "eval_runtime": 0.0872, "eval_samples_per_second": 286.818, "eval_steps_per_second": 45.891, "step": 182 }, { "epoch": 14.62, "learning_rate": 1.9e-05, "loss": 0.002, "step": 190 }, { "epoch": 15.0, "eval_loss": 0.0011439197696745396, "eval_runtime": 0.0872, "eval_samples_per_second": 286.641, "eval_steps_per_second": 45.863, "step": 195 }, { "epoch": 15.38, "learning_rate": 2e-05, "loss": 0.0017, "step": 200 }, { "epoch": 16.0, "eval_loss": 0.0010419671889394522, "eval_runtime": 0.0861, "eval_samples_per_second": 290.515, "eval_steps_per_second": 46.482, "step": 208 }, { "epoch": 16.15, "learning_rate": 2.1e-05, "loss": 0.0016, "step": 210 }, { "epoch": 16.92, "learning_rate": 2.2000000000000003e-05, "loss": 0.0014, "step": 220 }, { "epoch": 17.0, "eval_loss": 0.0008559968555346131, "eval_runtime": 0.0864, "eval_samples_per_second": 289.468, "eval_steps_per_second": 46.315, "step": 221 }, { "epoch": 17.69, "learning_rate": 2.3000000000000003e-05, "loss": 0.0012, "step": 230 }, { "epoch": 18.0, "eval_loss": 0.000737538211978972, "eval_runtime": 0.0854, "eval_samples_per_second": 292.593, "eval_steps_per_second": 46.815, "step": 234 }, { "epoch": 18.46, "learning_rate": 2.4e-05, "loss": 0.0011, "step": 240 }, { "epoch": 19.0, "eval_loss": 0.0006509709637612104, "eval_runtime": 0.0863, "eval_samples_per_second": 289.626, "eval_steps_per_second": 46.34, "step": 247 } ], "logging_steps": 10, "max_steps": 260, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 181727093133432.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }