longformer-spans / checkpoint-492 /trainer_state.json
Theoreticallyhugo's picture
Training in progress, epoch 12, checkpoint
11e7834 verified
raw
history blame
13.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 12.0,
"eval_steps": 500,
"global_step": 492,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_B": {
"f1-score": 0.6054590570719602,
"precision": 0.8472222222222222,
"recall": 0.47104247104247104,
"support": 1295.0
},
"eval_I": {
"f1-score": 0.9281628372580799,
"precision": 0.8894522863277146,
"recall": 0.9703962123099925,
"support": 20065.0
},
"eval_O": {
"f1-score": 0.826809241932404,
"precision": 0.8983402489626556,
"recall": 0.7658295012380616,
"support": 8481.0
},
"eval_accuracy": 0.8905867765825543,
"eval_loss": 0.3152744174003601,
"eval_macro avg": {
"f1-score": 0.786810378754148,
"precision": 0.8783382525041974,
"recall": 0.735756061530175,
"support": 29841.0
},
"eval_runtime": 1.3493,
"eval_samples_per_second": 59.291,
"eval_steps_per_second": 7.411,
"eval_weighted avg": {
"f1-score": 0.8853532384745914,
"precision": 0.8901456571293072,
"recall": 0.8905867765825543,
"support": 29841.0
},
"step": 41
},
{
"epoch": 2.0,
"eval_B": {
"f1-score": 0.8510791366906474,
"precision": 0.7966329966329966,
"recall": 0.9135135135135135,
"support": 1295.0
},
"eval_I": {
"f1-score": 0.9477035236938031,
"precision": 0.9247806497510078,
"recall": 0.9717916770495888,
"support": 20065.0
},
"eval_O": {
"f1-score": 0.8622397155916709,
"precision": 0.9339843212763032,
"recall": 0.8007310458672326,
"support": 8481.0
},
"eval_accuracy": 0.9206460909486948,
"eval_loss": 0.22530655562877655,
"eval_macro avg": {
"f1-score": 0.8870074586587071,
"precision": 0.8851326558867693,
"recall": 0.895345412143445,
"support": 29841.0
},
"eval_runtime": 1.356,
"eval_samples_per_second": 58.999,
"eval_steps_per_second": 7.375,
"eval_weighted avg": {
"f1-score": 0.9192209950358067,
"precision": 0.9218352098333846,
"recall": 0.9206460909486948,
"support": 29841.0
},
"step": 82
},
{
"epoch": 3.0,
"eval_B": {
"f1-score": 0.8718693284936478,
"precision": 0.8226027397260274,
"recall": 0.9274131274131274,
"support": 1295.0
},
"eval_I": {
"f1-score": 0.95727385377943,
"precision": 0.9520828198175992,
"recall": 0.9625218041365562,
"support": 20065.0
},
"eval_O": {
"f1-score": 0.8947336671291549,
"precision": 0.91600790513834,
"recall": 0.8744251857092324,
"support": 8481.0
},
"eval_accuracy": 0.9359605911330049,
"eval_loss": 0.18091395497322083,
"eval_macro avg": {
"f1-score": 0.9079589498007442,
"precision": 0.8968978215606556,
"recall": 0.9214533724196388,
"support": 29841.0
},
"eval_runtime": 1.3616,
"eval_samples_per_second": 58.756,
"eval_steps_per_second": 7.344,
"eval_weighted avg": {
"f1-score": 0.9357932672298482,
"precision": 0.9362110978540797,
"recall": 0.9359605911330049,
"support": 29841.0
},
"step": 123
},
{
"epoch": 4.0,
"eval_B": {
"f1-score": 0.8863383931877082,
"precision": 0.8513513513513513,
"recall": 0.9243243243243243,
"support": 1295.0
},
"eval_I": {
"f1-score": 0.9597514129823103,
"precision": 0.942660770931462,
"recall": 0.9774732120608024,
"support": 20065.0
},
"eval_O": {
"f1-score": 0.8954686530105526,
"precision": 0.9454712282081531,
"recall": 0.8504893290885509,
"support": 8481.0
},
"eval_accuracy": 0.9390771086759827,
"eval_loss": 0.19618020951747894,
"eval_macro avg": {
"f1-score": 0.913852819726857,
"precision": 0.9131611168303221,
"recall": 0.9174289551578925,
"support": 29841.0
},
"eval_runtime": 1.355,
"eval_samples_per_second": 59.041,
"eval_steps_per_second": 7.38,
"eval_weighted avg": {
"f1-score": 0.9382959675228925,
"precision": 0.9394969959174669,
"recall": 0.9390771086759827,
"support": 29841.0
},
"step": 164
},
{
"epoch": 5.0,
"eval_B": {
"f1-score": 0.8794862108046846,
"precision": 0.8609467455621301,
"recall": 0.8988416988416988,
"support": 1295.0
},
"eval_I": {
"f1-score": 0.955187788105494,
"precision": 0.9656717938270347,
"recall": 0.9449289808123599,
"support": 20065.0
},
"eval_O": {
"f1-score": 0.8953622519612366,
"precision": 0.8764539808018069,
"recall": 0.9151043509020163,
"support": 8481.0
},
"eval_accuracy": 0.9344525987734995,
"eval_loss": 0.1936398446559906,
"eval_macro avg": {
"f1-score": 0.9100120836238051,
"precision": 0.9010241733969906,
"recall": 0.9196250101853582,
"support": 29841.0
},
"eval_runtime": 1.3533,
"eval_samples_per_second": 59.115,
"eval_steps_per_second": 7.389,
"eval_weighted avg": {
"f1-score": 0.9348997979361299,
"precision": 0.9357708116290517,
"recall": 0.9344525987734995,
"support": 29841.0
},
"step": 205
},
{
"epoch": 6.0,
"eval_B": {
"f1-score": 0.8813928182807399,
"precision": 0.8310533515731874,
"recall": 0.9382239382239382,
"support": 1295.0
},
"eval_I": {
"f1-score": 0.9576389581878055,
"precision": 0.958739197762126,
"recall": 0.9565412409668577,
"support": 20065.0
},
"eval_O": {
"f1-score": 0.8994715278190131,
"precision": 0.9059808612440191,
"recall": 0.8930550642612899,
"support": 8481.0
},
"eval_accuracy": 0.9377031600817667,
"eval_loss": 0.19472843408584595,
"eval_macro avg": {
"f1-score": 0.9128344347625195,
"precision": 0.8985911368597775,
"recall": 0.9292734144840287,
"support": 29841.0
},
"eval_runtime": 1.351,
"eval_samples_per_second": 59.216,
"eval_steps_per_second": 7.402,
"eval_weighted avg": {
"f1-score": 0.9377985799116962,
"precision": 0.9382038060921168,
"recall": 0.9377031600817667,
"support": 29841.0
},
"step": 246
},
{
"epoch": 7.0,
"eval_B": {
"f1-score": 0.8952959028831563,
"precision": 0.8799403430275914,
"recall": 0.9111969111969112,
"support": 1295.0
},
"eval_I": {
"f1-score": 0.9592318906147891,
"precision": 0.9675979919882359,
"recall": 0.9510092200348866,
"support": 20065.0
},
"eval_O": {
"f1-score": 0.9041714947856316,
"precision": 0.8888256065611118,
"recall": 0.9200565970993987,
"support": 8481.0
},
"eval_accuracy": 0.940484568211521,
"eval_loss": 0.20142190158367157,
"eval_macro avg": {
"f1-score": 0.919566429427859,
"precision": 0.9121213138589797,
"recall": 0.9274209094437321,
"support": 29841.0
},
"eval_runtime": 1.3501,
"eval_samples_per_second": 59.256,
"eval_steps_per_second": 7.407,
"eval_weighted avg": {
"f1-score": 0.9408087707079646,
"precision": 0.9414063343289258,
"recall": 0.940484568211521,
"support": 29841.0
},
"step": 287
},
{
"epoch": 8.0,
"eval_B": {
"f1-score": 0.8921130952380952,
"precision": 0.8607322325915291,
"recall": 0.9258687258687258,
"support": 1295.0
},
"eval_I": {
"f1-score": 0.9613341204250295,
"precision": 0.9490554125588849,
"recall": 0.9739347121853975,
"support": 20065.0
},
"eval_O": {
"f1-score": 0.9013343126453666,
"precision": 0.9371261295659921,
"recall": 0.8681759226506308,
"support": 8481.0
},
"eval_accuracy": 0.9417914949230924,
"eval_loss": 0.21689558029174805,
"eval_macro avg": {
"f1-score": 0.9182605094361639,
"precision": 0.9156379249054686,
"recall": 0.9226597869015847,
"support": 29841.0
},
"eval_runtime": 1.3576,
"eval_samples_per_second": 58.928,
"eval_steps_per_second": 7.366,
"eval_weighted avg": {
"f1-score": 0.9412778355352336,
"precision": 0.9418321034499257,
"recall": 0.9417914949230924,
"support": 29841.0
},
"step": 328
},
{
"epoch": 9.0,
"eval_B": {
"f1-score": 0.8985947588302315,
"precision": 0.8841554559043349,
"recall": 0.9135135135135135,
"support": 1295.0
},
"eval_I": {
"f1-score": 0.9622013609496847,
"precision": 0.958962427602594,
"recall": 0.9654622476949912,
"support": 20065.0
},
"eval_O": {
"f1-score": 0.9079425609247452,
"precision": 0.9177306673090821,
"recall": 0.8983610423299139,
"support": 8481.0
},
"eval_accuracy": 0.9441372608156563,
"eval_loss": 0.23563022911548615,
"eval_macro avg": {
"f1-score": 0.9229128935682205,
"precision": 0.9202828502720036,
"recall": 0.9257789345128061,
"support": 29841.0
},
"eval_runtime": 1.3543,
"eval_samples_per_second": 59.071,
"eval_steps_per_second": 7.384,
"eval_weighted avg": {
"f1-score": 0.944020353853535,
"precision": 0.9439977284504704,
"recall": 0.9441372608156563,
"support": 29841.0
},
"step": 369
},
{
"epoch": 10.0,
"eval_B": {
"f1-score": 0.883806713389893,
"precision": 0.846045197740113,
"recall": 0.9250965250965251,
"support": 1295.0
},
"eval_I": {
"f1-score": 0.9612357047378584,
"precision": 0.9549009000147544,
"recall": 0.9676551208572141,
"support": 20065.0
},
"eval_O": {
"f1-score": 0.904241839135944,
"precision": 0.9259762728620861,
"recall": 0.8835043037377668,
"support": 8481.0
},
"eval_accuracy": 0.9418920277470594,
"eval_loss": 0.2491447478532791,
"eval_macro avg": {
"f1-score": 0.9164280857545651,
"precision": 0.9089741235389845,
"recall": 0.9254186498971686,
"support": 29841.0
},
"eval_runtime": 1.3513,
"eval_samples_per_second": 59.204,
"eval_steps_per_second": 7.401,
"eval_weighted avg": {
"f1-score": 0.9416775291416837,
"precision": 0.941956364063297,
"recall": 0.9418920277470594,
"support": 29841.0
},
"step": 410
},
{
"epoch": 11.0,
"eval_B": {
"f1-score": 0.8962935230250841,
"precision": 0.8699127906976745,
"recall": 0.9243243243243243,
"support": 1295.0
},
"eval_I": {
"f1-score": 0.9609275419158742,
"precision": 0.9454922579711543,
"recall": 0.9768751557438325,
"support": 20065.0
},
"eval_O": {
"f1-score": 0.8992907801418439,
"precision": 0.9427204551331781,
"recall": 0.8596863577408325,
"support": 8481.0
},
"eval_accuracy": 0.9412888308032573,
"eval_loss": 0.2823159396648407,
"eval_macro avg": {
"f1-score": 0.9188372816942675,
"precision": 0.9193751679340023,
"recall": 0.9202952792696631,
"support": 29841.0
},
"eval_runtime": 1.359,
"eval_samples_per_second": 58.865,
"eval_steps_per_second": 7.358,
"eval_weighted avg": {
"f1-score": 0.9406050851929385,
"precision": 0.9414245970352596,
"recall": 0.9412888308032573,
"support": 29841.0
},
"step": 451
},
{
"epoch": 12.0,
"eval_B": {
"f1-score": 0.896006028636021,
"precision": 0.8749080206033848,
"recall": 0.9181467181467181,
"support": 1295.0
},
"eval_I": {
"f1-score": 0.9606028094513335,
"precision": 0.9618267212950934,
"recall": 0.9593820084724645,
"support": 20065.0
},
"eval_O": {
"f1-score": 0.9053041477373296,
"precision": 0.9059990552668871,
"recall": 0.9046103053885155,
"support": 8481.0
},
"eval_accuracy": 0.9420260715123487,
"eval_loss": 0.2665592133998871,
"eval_macro avg": {
"f1-score": 0.9206376619415613,
"precision": 0.9142445990551217,
"recall": 0.9273796773358992,
"support": 29841.0
},
"eval_runtime": 1.3553,
"eval_samples_per_second": 59.028,
"eval_steps_per_second": 7.379,
"eval_weighted avg": {
"f1-score": 0.9420832966618057,
"precision": 0.9421881651816595,
"recall": 0.9420260715123487,
"support": 29841.0
},
"step": 492
}
],
"logging_steps": 500,
"max_steps": 656,
"num_input_tokens_seen": 0,
"num_train_epochs": 16,
"save_steps": 500,
"total_flos": 1725439832827200.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}