{ "best_metric": 1.3716470003128052, "best_model_checkpoint": "training/mcq/checkpoint-9344", "epoch": 0.8000684990153266, "eval_steps": 2336, "global_step": 9344, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.050004281188457914, "grad_norm": 33.87300109863281, "learning_rate": 2.5e-06, "loss": 1.9187, "step": 584 }, { "epoch": 0.10000856237691583, "grad_norm": 21.070466995239258, "learning_rate": 5e-06, "loss": 1.4832, "step": 1168 }, { "epoch": 0.15001284356537375, "grad_norm": 26.92688751220703, "learning_rate": 4.962012173713227e-06, "loss": 1.4456, "step": 1752 }, { "epoch": 0.20001712475383165, "grad_norm": 23.34733009338379, "learning_rate": 4.849203154809702e-06, "loss": 1.4308, "step": 2336 }, { "epoch": 0.20001712475383165, "eval_loss": 1.4146325588226318, "eval_runtime": 45.195, "eval_samples_per_second": 22.126, "eval_steps_per_second": 22.126, "step": 2336 }, { "epoch": 0.25002140594228955, "grad_norm": 24.534526824951172, "learning_rate": 4.665001238820376e-06, "loss": 1.438, "step": 2920 }, { "epoch": 0.3000256871307475, "grad_norm": 17.830188751220703, "learning_rate": 4.4150043700542834e-06, "loss": 1.4406, "step": 3504 }, { "epoch": 0.3500299683192054, "grad_norm": 23.592960357666016, "learning_rate": 4.106810018609764e-06, "loss": 1.3984, "step": 4088 }, { "epoch": 0.4000342495076633, "grad_norm": 26.053688049316406, "learning_rate": 3.7497842912750083e-06, "loss": 1.4088, "step": 4672 }, { "epoch": 0.4000342495076633, "eval_loss": 1.3911218643188477, "eval_runtime": 45.1415, "eval_samples_per_second": 22.153, "eval_steps_per_second": 22.153, "step": 4672 }, { "epoch": 0.45003853069612126, "grad_norm": 18.178329467773438, "learning_rate": 3.3547772930979383e-06, "loss": 1.4211, "step": 5256 }, { "epoch": 0.5000428118845791, "grad_norm": 24.71056365966797, "learning_rate": 2.9337933898616017e-06, "loss": 1.3645, "step": 5840 }, { "epoch": 0.5500470930730371, "grad_norm": 20.96603012084961, "learning_rate": 2.499626392274534e-06, "loss": 1.3794, "step": 6424 }, { "epoch": 0.600051374261495, "grad_norm": 21.262020111083984, "learning_rate": 2.0654707487237674e-06, "loss": 1.3988, "step": 7008 }, { "epoch": 0.600051374261495, "eval_loss": 1.3772270679473877, "eval_runtime": 44.5512, "eval_samples_per_second": 22.446, "eval_steps_per_second": 22.446, "step": 7008 }, { "epoch": 0.6500556554499529, "grad_norm": 33.738304138183594, "learning_rate": 1.6445205625442019e-06, "loss": 1.3821, "step": 7592 }, { "epoch": 0.7000599366384108, "grad_norm": 25.255008697509766, "learning_rate": 1.2495686197742195e-06, "loss": 1.3797, "step": 8176 }, { "epoch": 0.7500642178268687, "grad_norm": 25.995363235473633, "learning_rate": 8.926176130486741e-07, "loss": 1.3834, "step": 8760 }, { "epoch": 0.8000684990153266, "grad_norm": 23.964170455932617, "learning_rate": 5.845153766366707e-07, "loss": 1.378, "step": 9344 }, { "epoch": 0.8000684990153266, "eval_loss": 1.3716470003128052, "eval_runtime": 44.1994, "eval_samples_per_second": 22.625, "eval_steps_per_second": 22.625, "step": 9344 } ], "logging_steps": 584, "max_steps": 11679, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2336, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }