Mistral-7B-v0.1_case-briefs / trainer_state.json
zlucia's picture
End of training
4c75d24 verified
raw
history blame
No virus
4.73 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.99660441426146,
"eval_steps": 50,
"global_step": 294,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07,
"learning_rate": 5e-05,
"loss": 1.1701,
"step": 10
},
{
"epoch": 0.14,
"learning_rate": 5e-05,
"loss": 1.1349,
"step": 20
},
{
"epoch": 0.2,
"learning_rate": 5e-05,
"loss": 1.124,
"step": 30
},
{
"epoch": 0.27,
"learning_rate": 5e-05,
"loss": 1.0961,
"step": 40
},
{
"epoch": 0.34,
"learning_rate": 5e-05,
"loss": 1.0956,
"step": 50
},
{
"epoch": 0.34,
"eval_loss": 1.1443167924880981,
"eval_runtime": 15.8615,
"eval_samples_per_second": 16.518,
"eval_steps_per_second": 4.161,
"step": 50
},
{
"epoch": 0.41,
"learning_rate": 5e-05,
"loss": 1.0875,
"step": 60
},
{
"epoch": 0.48,
"learning_rate": 5e-05,
"loss": 1.0766,
"step": 70
},
{
"epoch": 0.54,
"learning_rate": 5e-05,
"loss": 1.1265,
"step": 80
},
{
"epoch": 0.61,
"learning_rate": 5e-05,
"loss": 1.1126,
"step": 90
},
{
"epoch": 0.68,
"learning_rate": 5e-05,
"loss": 1.0635,
"step": 100
},
{
"epoch": 0.68,
"eval_loss": 1.124241828918457,
"eval_runtime": 15.8746,
"eval_samples_per_second": 16.504,
"eval_steps_per_second": 4.158,
"step": 100
},
{
"epoch": 0.75,
"learning_rate": 5e-05,
"loss": 1.0434,
"step": 110
},
{
"epoch": 0.81,
"learning_rate": 5e-05,
"loss": 1.1071,
"step": 120
},
{
"epoch": 0.88,
"learning_rate": 5e-05,
"loss": 1.0926,
"step": 130
},
{
"epoch": 0.95,
"learning_rate": 5e-05,
"loss": 1.0677,
"step": 140
},
{
"epoch": 1.02,
"learning_rate": 5e-05,
"loss": 1.0208,
"step": 150
},
{
"epoch": 1.02,
"eval_loss": 1.1208317279815674,
"eval_runtime": 15.9371,
"eval_samples_per_second": 16.44,
"eval_steps_per_second": 4.141,
"step": 150
},
{
"epoch": 1.09,
"learning_rate": 5e-05,
"loss": 0.9435,
"step": 160
},
{
"epoch": 1.15,
"learning_rate": 5e-05,
"loss": 1.0034,
"step": 170
},
{
"epoch": 1.22,
"learning_rate": 5e-05,
"loss": 0.9165,
"step": 180
},
{
"epoch": 1.29,
"learning_rate": 5e-05,
"loss": 0.8848,
"step": 190
},
{
"epoch": 1.36,
"learning_rate": 5e-05,
"loss": 0.9505,
"step": 200
},
{
"epoch": 1.36,
"eval_loss": 1.1453500986099243,
"eval_runtime": 15.8776,
"eval_samples_per_second": 16.501,
"eval_steps_per_second": 4.157,
"step": 200
},
{
"epoch": 1.43,
"learning_rate": 5e-05,
"loss": 0.9259,
"step": 210
},
{
"epoch": 1.49,
"learning_rate": 5e-05,
"loss": 0.8383,
"step": 220
},
{
"epoch": 1.56,
"learning_rate": 5e-05,
"loss": 0.9317,
"step": 230
},
{
"epoch": 1.63,
"learning_rate": 5e-05,
"loss": 0.9336,
"step": 240
},
{
"epoch": 1.7,
"learning_rate": 5e-05,
"loss": 0.9604,
"step": 250
},
{
"epoch": 1.7,
"eval_loss": 1.1406688690185547,
"eval_runtime": 15.8651,
"eval_samples_per_second": 16.514,
"eval_steps_per_second": 4.16,
"step": 250
},
{
"epoch": 1.77,
"learning_rate": 5e-05,
"loss": 0.8884,
"step": 260
},
{
"epoch": 1.83,
"learning_rate": 5e-05,
"loss": 0.9757,
"step": 270
},
{
"epoch": 1.9,
"learning_rate": 5e-05,
"loss": 0.8977,
"step": 280
},
{
"epoch": 1.97,
"learning_rate": 5e-05,
"loss": 0.8645,
"step": 290
},
{
"epoch": 2.0,
"step": 294,
"total_flos": 7.119009482145792e+16,
"train_loss": 1.0086434495692351,
"train_runtime": 872.7075,
"train_samples_per_second": 5.399,
"train_steps_per_second": 0.337
}
],
"logging_steps": 10,
"max_steps": 294,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 250,
"total_flos": 7.119009482145792e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}