longformer-spans / checkpoint-492 /trainer_state.json
Theoreticallyhugo's picture
Training in progress, epoch 12, checkpoint
17b18ab verified
raw
history blame
13.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 12.0,
"eval_steps": 500,
"global_step": 492,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_B": {
"f1-score": 0.6659475008989573,
"precision": 0.6905294556301268,
"recall": 0.6430555555555556,
"support": 1440.0
},
"eval_I": {
"f1-score": 0.9167863956473609,
"precision": 0.9325347388596071,
"recall": 0.9015611247510076,
"support": 21587.0
},
"eval_O": {
"f1-score": 0.8443157797996509,
"precision": 0.8138010452653025,
"recall": 0.8772080588179128,
"support": 10473.0
},
"eval_accuracy": 0.8828358208955224,
"eval_loss": 0.31183817982673645,
"eval_macro avg": {
"f1-score": 0.8090165587819897,
"precision": 0.8122884132516788,
"recall": 0.8072749130414919,
"support": 33500.0
},
"eval_runtime": 1.3725,
"eval_samples_per_second": 59.018,
"eval_steps_per_second": 8.015,
"eval_weighted avg": {
"f1-score": 0.8833478055515172,
"precision": 0.8850127812218876,
"recall": 0.8828358208955224,
"support": 33500.0
},
"step": 41
},
{
"epoch": 2.0,
"eval_B": {
"f1-score": 0.8377561303325496,
"precision": 0.8113207547169812,
"recall": 0.8659722222222223,
"support": 1440.0
},
"eval_I": {
"f1-score": 0.9473755107538951,
"precision": 0.9191461555216729,
"recall": 0.9773938018251725,
"support": 21587.0
},
"eval_O": {
"f1-score": 0.8803449514911966,
"precision": 0.9519316163410302,
"recall": 0.8187720805881791,
"support": 10473.0
},
"eval_accuracy": 0.9230149253731343,
"eval_loss": 0.22658132016658783,
"eval_macro avg": {
"f1-score": 0.8884921975258804,
"precision": 0.8941328421932281,
"recall": 0.8873793682118579,
"support": 33500.0
},
"eval_runtime": 1.3801,
"eval_samples_per_second": 58.69,
"eval_steps_per_second": 7.97,
"eval_weighted avg": {
"f1-score": 0.9217079598594182,
"precision": 0.9247608884769675,
"recall": 0.9230149253731343,
"support": 33500.0
},
"step": 82
},
{
"epoch": 3.0,
"eval_B": {
"f1-score": 0.8710106382978723,
"precision": 0.8354591836734694,
"recall": 0.9097222222222222,
"support": 1440.0
},
"eval_I": {
"f1-score": 0.9543258273315707,
"precision": 0.9392974112791063,
"recall": 0.9698429610413675,
"support": 21587.0
},
"eval_O": {
"f1-score": 0.8996818452972758,
"precision": 0.9384009125790729,
"recall": 0.8640313186288552,
"support": 10473.0
},
"eval_accuracy": 0.934179104477612,
"eval_loss": 0.20443089306354523,
"eval_macro avg": {
"f1-score": 0.908339436975573,
"precision": 0.9043858358438829,
"recall": 0.9145321672974815,
"support": 33500.0
},
"eval_runtime": 1.3812,
"eval_samples_per_second": 58.646,
"eval_steps_per_second": 7.964,
"eval_weighted avg": {
"f1-score": 0.9336613408822066,
"precision": 0.9345536477376863,
"recall": 0.934179104477612,
"support": 33500.0
},
"step": 123
},
{
"epoch": 4.0,
"eval_B": {
"f1-score": 0.8771342484097756,
"precision": 0.8468002585649644,
"recall": 0.9097222222222222,
"support": 1440.0
},
"eval_I": {
"f1-score": 0.959998160834981,
"precision": 0.952900369677331,
"recall": 0.9672024829758651,
"support": 21587.0
},
"eval_O": {
"f1-score": 0.9145503290275409,
"precision": 0.9341764588727345,
"recall": 0.8957318819822401,
"support": 10473.0
},
"eval_accuracy": 0.9423880597014925,
"eval_loss": 0.18545031547546387,
"eval_macro avg": {
"f1-score": 0.9172275794240993,
"precision": 0.9112923623716767,
"recall": 0.9242188623934425,
"support": 33500.0
},
"eval_runtime": 1.377,
"eval_samples_per_second": 58.824,
"eval_steps_per_second": 7.988,
"eval_weighted avg": {
"f1-score": 0.9422280361659775,
"precision": 0.9424860509352908,
"recall": 0.9423880597014925,
"support": 33500.0
},
"step": 164
},
{
"epoch": 5.0,
"eval_B": {
"f1-score": 0.8835341365461846,
"precision": 0.8527131782945736,
"recall": 0.9166666666666666,
"support": 1440.0
},
"eval_I": {
"f1-score": 0.9602170394181884,
"precision": 0.9453672113485365,
"recall": 0.9755408347616621,
"support": 21587.0
},
"eval_O": {
"f1-score": 0.9125018611345476,
"precision": 0.9500826787928897,
"recall": 0.877780960565263,
"support": 10473.0
},
"eval_accuracy": 0.9424477611940298,
"eval_loss": 0.19699566066265106,
"eval_macro avg": {
"f1-score": 0.9187510123663069,
"precision": 0.9160543561453333,
"recall": 0.9233294873311971,
"support": 33500.0
},
"eval_runtime": 1.3779,
"eval_samples_per_second": 58.784,
"eval_steps_per_second": 7.983,
"eval_weighted avg": {
"f1-score": 0.9420037724838524,
"precision": 0.9428586526305366,
"recall": 0.9424477611940298,
"support": 33500.0
},
"step": 205
},
{
"epoch": 6.0,
"eval_B": {
"f1-score": 0.882471457353929,
"precision": 0.8543563068920677,
"recall": 0.9125,
"support": 1440.0
},
"eval_I": {
"f1-score": 0.9563333022648896,
"precision": 0.9621173050775939,
"recall": 0.9506184277574466,
"support": 21587.0
},
"eval_O": {
"f1-score": 0.9094096465460059,
"precision": 0.9025674786043449,
"recall": 0.9163563448868519,
"support": 10473.0
},
"eval_accuracy": 0.9382686567164179,
"eval_loss": 0.22578871250152588,
"eval_macro avg": {
"f1-score": 0.9160714687216082,
"precision": 0.9063470301913354,
"recall": 0.9264915908814327,
"support": 33500.0
},
"eval_runtime": 1.3827,
"eval_samples_per_second": 58.582,
"eval_steps_per_second": 7.956,
"eval_weighted avg": {
"f1-score": 0.9384887499360641,
"precision": 0.9388683149271014,
"recall": 0.9382686567164179,
"support": 33500.0
},
"step": 246
},
{
"epoch": 7.0,
"eval_B": {
"f1-score": 0.8892651540805959,
"precision": 0.8678122934567085,
"recall": 0.9118055555555555,
"support": 1440.0
},
"eval_I": {
"f1-score": 0.9597268994787103,
"precision": 0.9557587173243901,
"recall": 0.963728169731783,
"support": 21587.0
},
"eval_O": {
"f1-score": 0.914125549702798,
"precision": 0.925440313111546,
"recall": 0.903084121073236,
"support": 10473.0
},
"eval_accuracy": 0.9425373134328359,
"eval_loss": 0.22205175459384918,
"eval_macro avg": {
"f1-score": 0.9210392010873681,
"precision": 0.9163371079642149,
"recall": 0.9262059487868582,
"support": 33500.0
},
"eval_runtime": 1.3808,
"eval_samples_per_second": 58.663,
"eval_steps_per_second": 7.967,
"eval_weighted avg": {
"f1-score": 0.9424418890435934,
"precision": 0.9424999860500445,
"recall": 0.9425373134328359,
"support": 33500.0
},
"step": 287
},
{
"epoch": 8.0,
"eval_B": {
"f1-score": 0.8742837883383889,
"precision": 0.8493778650949574,
"recall": 0.9006944444444445,
"support": 1440.0
},
"eval_I": {
"f1-score": 0.954344074989507,
"precision": 0.9607962815155641,
"recall": 0.9479779496919443,
"support": 21587.0
},
"eval_O": {
"f1-score": 0.9060386816096845,
"precision": 0.8975079632752483,
"recall": 0.9147331232693593,
"support": 10473.0
},
"eval_accuracy": 0.9355522388059702,
"eval_loss": 0.2697383165359497,
"eval_macro avg": {
"f1-score": 0.9115555149791934,
"precision": 0.9025607032952566,
"recall": 0.9211351724685827,
"support": 33500.0
},
"eval_runtime": 1.3839,
"eval_samples_per_second": 58.529,
"eval_steps_per_second": 7.948,
"eval_weighted avg": {
"f1-score": 0.9358011138657909,
"precision": 0.9362213240058178,
"recall": 0.9355522388059702,
"support": 33500.0
},
"step": 328
},
{
"epoch": 9.0,
"eval_B": {
"f1-score": 0.8867732063923836,
"precision": 0.8687541638907396,
"recall": 0.9055555555555556,
"support": 1440.0
},
"eval_I": {
"f1-score": 0.9593785402168635,
"precision": 0.9576294655220161,
"recall": 0.9611340158428684,
"support": 21587.0
},
"eval_O": {
"f1-score": 0.9133903681630298,
"precision": 0.9195780509048679,
"recall": 0.907285400553805,
"support": 10473.0
},
"eval_accuracy": 0.941910447761194,
"eval_loss": 0.236963152885437,
"eval_macro avg": {
"f1-score": 0.9198473715907589,
"precision": 0.9153205601058745,
"recall": 0.9246583239840763,
"support": 33500.0
},
"eval_runtime": 1.3821,
"eval_samples_per_second": 58.608,
"eval_steps_per_second": 7.959,
"eval_weighted avg": {
"f1-score": 0.9418804564369516,
"precision": 0.9419132595627793,
"recall": 0.941910447761194,
"support": 33500.0
},
"step": 369
},
{
"epoch": 10.0,
"eval_B": {
"f1-score": 0.8820989704417137,
"precision": 0.8453214513049013,
"recall": 0.9222222222222223,
"support": 1440.0
},
"eval_I": {
"f1-score": 0.9598799630655587,
"precision": 0.956655776929094,
"recall": 0.9631259554361421,
"support": 21587.0
},
"eval_O": {
"f1-score": 0.914896705210702,
"precision": 0.9273244409572381,
"recall": 0.9027976701995608,
"support": 10473.0
},
"eval_accuracy": 0.9425074626865672,
"eval_loss": 0.27435505390167236,
"eval_macro avg": {
"f1-score": 0.9189585462393248,
"precision": 0.9097672230637445,
"recall": 0.9293819492859751,
"support": 33500.0
},
"eval_runtime": 1.375,
"eval_samples_per_second": 58.909,
"eval_steps_per_second": 8.0,
"eval_weighted avg": {
"f1-score": 0.9424735663822079,
"precision": 0.9427002990027631,
"recall": 0.9425074626865672,
"support": 33500.0
},
"step": 410
},
{
"epoch": 11.0,
"eval_B": {
"f1-score": 0.8886590830748018,
"precision": 0.8822724161533196,
"recall": 0.8951388888888889,
"support": 1440.0
},
"eval_I": {
"f1-score": 0.9554279336882978,
"precision": 0.9591074596209505,
"recall": 0.9517765321721406,
"support": 21587.0
},
"eval_O": {
"f1-score": 0.9066856330014225,
"precision": 0.9005368748233964,
"recall": 0.91291893440275,
"support": 10473.0
},
"eval_accuracy": 0.9371940298507463,
"eval_loss": 0.29653802514076233,
"eval_macro avg": {
"f1-score": 0.916924216588174,
"precision": 0.9139722501992221,
"recall": 0.9199447851545933,
"support": 33500.0
},
"eval_runtime": 1.3782,
"eval_samples_per_second": 58.771,
"eval_steps_per_second": 7.981,
"eval_weighted avg": {
"f1-score": 0.9373197169725641,
"precision": 0.9374939611977214,
"recall": 0.9371940298507463,
"support": 33500.0
},
"step": 451
},
{
"epoch": 12.0,
"eval_B": {
"f1-score": 0.8851788756388416,
"precision": 0.8688963210702341,
"recall": 0.9020833333333333,
"support": 1440.0
},
"eval_I": {
"f1-score": 0.956586582154592,
"precision": 0.9624402138235019,
"recall": 0.9508037244637977,
"support": 21587.0
},
"eval_O": {
"f1-score": 0.9096066565809379,
"precision": 0.9008334113681056,
"recall": 0.9185524682516948,
"support": 10473.0
},
"eval_accuracy": 0.9386268656716418,
"eval_loss": 0.3317875564098358,
"eval_macro avg": {
"f1-score": 0.9171240381247904,
"precision": 0.9107233154206137,
"recall": 0.9238131753496086,
"support": 33500.0
},
"eval_runtime": 1.3866,
"eval_samples_per_second": 58.418,
"eval_steps_per_second": 7.933,
"eval_weighted avg": {
"f1-score": 0.9388299296795006,
"precision": 0.9391592810569325,
"recall": 0.9386268656716418,
"support": 33500.0
},
"step": 492
}
],
"logging_steps": 500,
"max_steps": 656,
"num_input_tokens_seen": 0,
"num_train_epochs": 16,
"save_steps": 500,
"total_flos": 1720081324029600.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}