tony__assi-ig-prediction / trainer_state.json
tonyassi's picture
Upload folder using huggingface_hub
70e9a78 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 8.472058296203613,
"learning_rate": 9.5e-05,
"loss": 0.2548,
"step": 10
},
{
"epoch": 1.0,
"eval_loss": 0.20547232031822205,
"eval_mse": 0.20547235012054443,
"eval_runtime": 1.3582,
"eval_samples_per_second": 14.725,
"eval_steps_per_second": 2.209,
"step": 10
},
{
"epoch": 2.0,
"grad_norm": 16.11898422241211,
"learning_rate": 9e-05,
"loss": 0.4596,
"step": 20
},
{
"epoch": 2.0,
"eval_loss": 0.10995284467935562,
"eval_mse": 0.10995285212993622,
"eval_runtime": 0.9281,
"eval_samples_per_second": 21.551,
"eval_steps_per_second": 3.233,
"step": 20
},
{
"epoch": 3.0,
"grad_norm": 6.556423187255859,
"learning_rate": 8.5e-05,
"loss": 0.0866,
"step": 30
},
{
"epoch": 3.0,
"eval_loss": 0.12321324646472931,
"eval_mse": 0.12321324646472931,
"eval_runtime": 0.8771,
"eval_samples_per_second": 22.803,
"eval_steps_per_second": 3.42,
"step": 30
},
{
"epoch": 4.0,
"grad_norm": 10.101408958435059,
"learning_rate": 8e-05,
"loss": 0.0741,
"step": 40
},
{
"epoch": 4.0,
"eval_loss": 0.055902622640132904,
"eval_mse": 0.05590261146426201,
"eval_runtime": 0.8907,
"eval_samples_per_second": 22.455,
"eval_steps_per_second": 3.368,
"step": 40
},
{
"epoch": 5.0,
"grad_norm": 0.8052617311477661,
"learning_rate": 7.500000000000001e-05,
"loss": 0.0471,
"step": 50
},
{
"epoch": 5.0,
"eval_loss": 0.033162668347358704,
"eval_mse": 0.033162668347358704,
"eval_runtime": 0.8798,
"eval_samples_per_second": 22.732,
"eval_steps_per_second": 3.41,
"step": 50
},
{
"epoch": 6.0,
"grad_norm": 4.320048809051514,
"learning_rate": 7e-05,
"loss": 0.0206,
"step": 60
},
{
"epoch": 6.0,
"eval_loss": 0.0486394502222538,
"eval_mse": 0.0486394464969635,
"eval_runtime": 0.9146,
"eval_samples_per_second": 21.867,
"eval_steps_per_second": 3.28,
"step": 60
},
{
"epoch": 7.0,
"grad_norm": 4.353482246398926,
"learning_rate": 6.500000000000001e-05,
"loss": 0.0136,
"step": 70
},
{
"epoch": 7.0,
"eval_loss": 0.03369127959012985,
"eval_mse": 0.03369127959012985,
"eval_runtime": 0.9267,
"eval_samples_per_second": 21.582,
"eval_steps_per_second": 3.237,
"step": 70
},
{
"epoch": 8.0,
"grad_norm": 1.3020066022872925,
"learning_rate": 6e-05,
"loss": 0.0065,
"step": 80
},
{
"epoch": 8.0,
"eval_loss": 0.03295436501502991,
"eval_mse": 0.032954368740320206,
"eval_runtime": 0.8738,
"eval_samples_per_second": 22.889,
"eval_steps_per_second": 3.433,
"step": 80
},
{
"epoch": 9.0,
"grad_norm": 0.6798593997955322,
"learning_rate": 5.500000000000001e-05,
"loss": 0.0054,
"step": 90
},
{
"epoch": 9.0,
"eval_loss": 0.03235679119825363,
"eval_mse": 0.03235679119825363,
"eval_runtime": 0.911,
"eval_samples_per_second": 21.953,
"eval_steps_per_second": 3.293,
"step": 90
},
{
"epoch": 10.0,
"grad_norm": 0.5722326040267944,
"learning_rate": 5e-05,
"loss": 0.0023,
"step": 100
},
{
"epoch": 10.0,
"eval_loss": 0.03351482003927231,
"eval_mse": 0.03351482003927231,
"eval_runtime": 0.9079,
"eval_samples_per_second": 22.029,
"eval_steps_per_second": 3.304,
"step": 100
},
{
"epoch": 11.0,
"grad_norm": 1.5198137760162354,
"learning_rate": 4.5e-05,
"loss": 0.0035,
"step": 110
},
{
"epoch": 11.0,
"eval_loss": 0.03029645048081875,
"eval_mse": 0.030296454206109047,
"eval_runtime": 0.8889,
"eval_samples_per_second": 22.5,
"eval_steps_per_second": 3.375,
"step": 110
},
{
"epoch": 12.0,
"grad_norm": 2.942338705062866,
"learning_rate": 4e-05,
"loss": 0.0029,
"step": 120
},
{
"epoch": 12.0,
"eval_loss": 0.03209378570318222,
"eval_mse": 0.03209378197789192,
"eval_runtime": 0.9137,
"eval_samples_per_second": 21.889,
"eval_steps_per_second": 3.283,
"step": 120
},
{
"epoch": 13.0,
"grad_norm": 0.5375175476074219,
"learning_rate": 3.5e-05,
"loss": 0.0035,
"step": 130
},
{
"epoch": 13.0,
"eval_loss": 0.03165650740265846,
"eval_mse": 0.03165651112794876,
"eval_runtime": 0.9157,
"eval_samples_per_second": 21.84,
"eval_steps_per_second": 3.276,
"step": 130
},
{
"epoch": 14.0,
"grad_norm": 0.43184858560562134,
"learning_rate": 3e-05,
"loss": 0.0011,
"step": 140
},
{
"epoch": 14.0,
"eval_loss": 0.03108842670917511,
"eval_mse": 0.03108843043446541,
"eval_runtime": 0.889,
"eval_samples_per_second": 22.498,
"eval_steps_per_second": 3.375,
"step": 140
},
{
"epoch": 15.0,
"grad_norm": 0.3797425925731659,
"learning_rate": 2.5e-05,
"loss": 0.0007,
"step": 150
},
{
"epoch": 15.0,
"eval_loss": 0.029892250895500183,
"eval_mse": 0.029892250895500183,
"eval_runtime": 0.9199,
"eval_samples_per_second": 21.742,
"eval_steps_per_second": 3.261,
"step": 150
},
{
"epoch": 16.0,
"grad_norm": 0.3728632926940918,
"learning_rate": 2e-05,
"loss": 0.0004,
"step": 160
},
{
"epoch": 16.0,
"eval_loss": 0.029963841661810875,
"eval_mse": 0.029963845387101173,
"eval_runtime": 0.9532,
"eval_samples_per_second": 20.981,
"eval_steps_per_second": 3.147,
"step": 160
},
{
"epoch": 17.0,
"grad_norm": 0.7130188941955566,
"learning_rate": 1.5e-05,
"loss": 0.0003,
"step": 170
},
{
"epoch": 17.0,
"eval_loss": 0.03030126914381981,
"eval_mse": 0.03030126728117466,
"eval_runtime": 0.8824,
"eval_samples_per_second": 22.665,
"eval_steps_per_second": 3.4,
"step": 170
},
{
"epoch": 18.0,
"grad_norm": 0.05147755146026611,
"learning_rate": 1e-05,
"loss": 0.0001,
"step": 180
},
{
"epoch": 18.0,
"eval_loss": 0.03000779077410698,
"eval_mse": 0.03000778891146183,
"eval_runtime": 0.9309,
"eval_samples_per_second": 21.484,
"eval_steps_per_second": 3.223,
"step": 180
},
{
"epoch": 19.0,
"grad_norm": 0.17308902740478516,
"learning_rate": 5e-06,
"loss": 0.0,
"step": 190
},
{
"epoch": 19.0,
"eval_loss": 0.030044889077544212,
"eval_mse": 0.03004489280283451,
"eval_runtime": 0.8961,
"eval_samples_per_second": 22.32,
"eval_steps_per_second": 3.348,
"step": 190
},
{
"epoch": 20.0,
"grad_norm": 0.0706406906247139,
"learning_rate": 0.0,
"loss": 0.0,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}