{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.666666666666666, "eval_steps": 500, "global_step": 33, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.44, "learning_rate": 0.00019805941782534764, "loss": 2.2515, "step": 1 }, { "epoch": 0.89, "learning_rate": 0.00019605881764529358, "loss": 2.2515, "step": 2 }, { "epoch": 1.33, "learning_rate": 0.00019405821746523957, "loss": 2.2204, "step": 3 }, { "epoch": 1.78, "learning_rate": 0.00019205761728518557, "loss": 2.1854, "step": 4 }, { "epoch": 2.22, "learning_rate": 0.00019005701710513156, "loss": 2.1461, "step": 5 }, { "epoch": 2.67, "learning_rate": 0.00018805641692507753, "loss": 2.1013, "step": 6 }, { "epoch": 3.11, "learning_rate": 0.00018605581674502352, "loss": 2.0539, "step": 7 }, { "epoch": 3.56, "learning_rate": 0.00018405521656496952, "loss": 2.0019, "step": 8 }, { "epoch": 4.0, "learning_rate": 0.00018205461638491548, "loss": 1.9467, "step": 9 }, { "epoch": 4.44, "learning_rate": 0.00018005401620486148, "loss": 1.887, "step": 10 }, { "epoch": 4.89, "learning_rate": 0.00017805341602480744, "loss": 1.8227, "step": 11 }, { "epoch": 5.33, "learning_rate": 0.00017605281584475344, "loss": 1.7543, "step": 12 }, { "epoch": 5.78, "learning_rate": 0.00017405221566469943, "loss": 1.682, "step": 13 }, { "epoch": 6.22, "learning_rate": 0.00017205161548464542, "loss": 1.6024, "step": 14 }, { "epoch": 6.67, "learning_rate": 0.00017005101530459136, "loss": 1.5171, "step": 15 }, { "epoch": 7.11, "learning_rate": 0.00016805041512453736, "loss": 1.4308, "step": 16 }, { "epoch": 7.56, "learning_rate": 0.00016604981494448335, "loss": 1.3407, "step": 17 }, { "epoch": 8.0, "learning_rate": 0.00016404921476442935, "loss": 1.2479, "step": 18 }, { "epoch": 8.44, "learning_rate": 0.0001620486145843753, "loss": 1.1525, "step": 19 }, { "epoch": 8.89, "learning_rate": 0.0001600480144043213, "loss": 1.0553, "step": 20 }, { "epoch": 9.33, "learning_rate": 0.0001580474142242673, "loss": 0.9591, "step": 21 }, { "epoch": 9.78, "learning_rate": 0.00015604681404421327, "loss": 0.8655, "step": 22 }, { "epoch": 10.22, "learning_rate": 0.00015404621386415926, "loss": 0.7722, "step": 23 }, { "epoch": 10.67, "learning_rate": 0.00015204561368410523, "loss": 0.6823, "step": 24 }, { "epoch": 11.11, "learning_rate": 0.00015004501350405122, "loss": 0.5956, "step": 25 }, { "epoch": 11.56, "learning_rate": 0.00014804441332399721, "loss": 0.5104, "step": 26 }, { "epoch": 12.0, "learning_rate": 0.0001460438131439432, "loss": 0.4314, "step": 27 }, { "epoch": 12.44, "learning_rate": 0.00014404321296388918, "loss": 0.359, "step": 28 }, { "epoch": 12.89, "learning_rate": 0.00014204261278383514, "loss": 0.2952, "step": 29 }, { "epoch": 13.33, "learning_rate": 0.00014004201260378114, "loss": 0.2375, "step": 30 }, { "epoch": 13.78, "learning_rate": 0.00013804141242372713, "loss": 0.1853, "step": 31 }, { "epoch": 14.22, "learning_rate": 0.00013604081224367312, "loss": 0.1398, "step": 32 }, { "epoch": 14.67, "learning_rate": 0.0001340402120636191, "loss": 0.1033, "step": 33 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1.38542793652224e+16, "trial_name": null, "trial_params": null }