|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 14.0, |
|
"eval_steps": 500, |
|
"global_step": 1134, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_B": { |
|
"f1-score": 0.8266764382557715, |
|
"precision": 0.7866108786610879, |
|
"recall": 0.871042471042471, |
|
"support": 1295.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9410658617447671, |
|
"precision": 0.9344077040239768, |
|
"recall": 0.9478195863443808, |
|
"support": 20065.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.8562443302086484, |
|
"precision": 0.8789421405512788, |
|
"recall": 0.8346893055064262, |
|
"support": 8481.0 |
|
}, |
|
"eval_accuracy": 0.912335377500754, |
|
"eval_loss": 0.2450982630252838, |
|
"eval_macro avg": { |
|
"f1-score": 0.8746622100697291, |
|
"precision": 0.8666535744121145, |
|
"recall": 0.8845171209644261, |
|
"support": 29841.0 |
|
}, |
|
"eval_runtime": 4.6269, |
|
"eval_samples_per_second": 17.29, |
|
"eval_steps_per_second": 2.161, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9119949287205363, |
|
"precision": 0.9122301519092053, |
|
"recall": 0.912335377500754, |
|
"support": 29841.0 |
|
}, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_B": { |
|
"f1-score": 0.8833395452851286, |
|
"precision": 0.8537463976945245, |
|
"recall": 0.915057915057915, |
|
"support": 1295.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9549518227873249, |
|
"precision": 0.9516903430183636, |
|
"recall": 0.958235733864939, |
|
"support": 20065.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.88924750463212, |
|
"precision": 0.9016969696969697, |
|
"recall": 0.877137130055418, |
|
"support": 8481.0 |
|
}, |
|
"eval_accuracy": 0.9333132267685399, |
|
"eval_loss": 0.20076116919517517, |
|
"eval_macro avg": { |
|
"f1-score": 0.9091796242348579, |
|
"precision": 0.9023779034699526, |
|
"recall": 0.916810259659424, |
|
"support": 29841.0 |
|
}, |
|
"eval_runtime": 4.6524, |
|
"eval_samples_per_second": 17.195, |
|
"eval_steps_per_second": 2.149, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9331705077630417, |
|
"precision": 0.9332314707173981, |
|
"recall": 0.9333132267685399, |
|
"support": 29841.0 |
|
}, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_B": { |
|
"f1-score": 0.8927335640138409, |
|
"precision": 0.8889739663093414, |
|
"recall": 0.8965250965250965, |
|
"support": 1295.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9590235396687009, |
|
"precision": 0.9586653386454184, |
|
"recall": 0.9593820084724645, |
|
"support": 20065.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9006849315068494, |
|
"precision": 0.9020697811945594, |
|
"recall": 0.8993043273198915, |
|
"support": 8481.0 |
|
}, |
|
"eval_accuracy": 0.9395797727958178, |
|
"eval_loss": 0.19863653182983398, |
|
"eval_macro avg": { |
|
"f1-score": 0.9174806783964637, |
|
"precision": 0.9165696953831063, |
|
"recall": 0.9184038107724842, |
|
"support": 29841.0 |
|
}, |
|
"eval_runtime": 4.655, |
|
"eval_samples_per_second": 17.186, |
|
"eval_steps_per_second": 2.148, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9395665759512081, |
|
"precision": 0.9395561516236713, |
|
"recall": 0.9395797727958178, |
|
"support": 29841.0 |
|
}, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_B": { |
|
"f1-score": 0.8999616711383672, |
|
"precision": 0.893455098934551, |
|
"recall": 0.9065637065637066, |
|
"support": 1295.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9612657113512985, |
|
"precision": 0.9436362763454799, |
|
"recall": 0.9795664091701969, |
|
"support": 20065.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.8996847765622102, |
|
"precision": 0.9454403741231489, |
|
"recall": 0.8581535196321188, |
|
"support": 8481.0 |
|
}, |
|
"eval_accuracy": 0.9418920277470594, |
|
"eval_loss": 0.19743724167346954, |
|
"eval_macro avg": { |
|
"f1-score": 0.920304053017292, |
|
"precision": 0.9275105831343932, |
|
"recall": 0.9147612117886741, |
|
"support": 29841.0 |
|
}, |
|
"eval_runtime": 4.6615, |
|
"eval_samples_per_second": 17.162, |
|
"eval_steps_per_second": 2.145, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9411036309913238, |
|
"precision": 0.9419713163409645, |
|
"recall": 0.9418920277470594, |
|
"support": 29841.0 |
|
}, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_B": { |
|
"f1-score": 0.8953846153846153, |
|
"precision": 0.8919540229885058, |
|
"recall": 0.8988416988416988, |
|
"support": 1295.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9602426713742261, |
|
"precision": 0.9581224570804803, |
|
"recall": 0.9623722900573137, |
|
"support": 20065.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9022119433078337, |
|
"precision": 0.9075399665950847, |
|
"recall": 0.8969461148449476, |
|
"support": 8481.0 |
|
}, |
|
"eval_accuracy": 0.9410207432726785, |
|
"eval_loss": 0.2340461015701294, |
|
"eval_macro avg": { |
|
"f1-score": 0.9192797433555584, |
|
"precision": 0.9192054822213569, |
|
"recall": 0.9193867012479867, |
|
"support": 29841.0 |
|
}, |
|
"eval_runtime": 4.6719, |
|
"eval_samples_per_second": 17.124, |
|
"eval_steps_per_second": 2.14, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9409353496612266, |
|
"precision": 0.9408751053176122, |
|
"recall": 0.9410207432726785, |
|
"support": 29841.0 |
|
}, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_B": { |
|
"f1-score": 0.8929831438651511, |
|
"precision": 0.9068471337579618, |
|
"recall": 0.8795366795366796, |
|
"support": 1295.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9522293798155866, |
|
"precision": 0.9655225409836066, |
|
"recall": 0.9392972838275604, |
|
"support": 20065.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.8885216003647556, |
|
"precision": 0.8599007170435742, |
|
"recall": 0.919113312109421, |
|
"support": 8481.0 |
|
}, |
|
"eval_accuracy": 0.930967460875976, |
|
"eval_loss": 0.21774999797344208, |
|
"eval_macro avg": { |
|
"f1-score": 0.9112447080151643, |
|
"precision": 0.9107567972617142, |
|
"recall": 0.9126490918245537, |
|
"support": 29841.0 |
|
}, |
|
"eval_runtime": 4.647, |
|
"eval_samples_per_second": 17.215, |
|
"eval_steps_per_second": 2.152, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9315521386682287, |
|
"precision": 0.9329578366776978, |
|
"recall": 0.930967460875976, |
|
"support": 29841.0 |
|
}, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"grad_norm": 9.310327529907227, |
|
"learning_rate": 1.7530864197530865e-05, |
|
"loss": 0.1627, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_B": { |
|
"f1-score": 0.8936490041337843, |
|
"precision": 0.8704245973645681, |
|
"recall": 0.9181467181467181, |
|
"support": 1295.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9599271850630982, |
|
"precision": 0.9477800446905664, |
|
"recall": 0.9723897333665587, |
|
"support": 20065.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.8979841172877214, |
|
"precision": 0.9316770186335404, |
|
"recall": 0.8666430845419172, |
|
"support": 8481.0 |
|
}, |
|
"eval_accuracy": 0.9399819040916859, |
|
"eval_loss": 0.27376389503479004, |
|
"eval_macro avg": { |
|
"f1-score": 0.9171867688282013, |
|
"precision": 0.9166272202295582, |
|
"recall": 0.9190598453517312, |
|
"support": 29841.0 |
|
}, |
|
"eval_runtime": 4.6671, |
|
"eval_samples_per_second": 17.141, |
|
"eval_steps_per_second": 2.143, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9394463230910989, |
|
"precision": 0.9398464945991887, |
|
"recall": 0.9399819040916859, |
|
"support": 29841.0 |
|
}, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_B": { |
|
"f1-score": 0.8948545861297539, |
|
"precision": 0.8651766402307137, |
|
"recall": 0.9266409266409267, |
|
"support": 1295.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9590139880661254, |
|
"precision": 0.9414702069429106, |
|
"recall": 0.9772240219287316, |
|
"support": 20065.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.8927241122423641, |
|
"precision": 0.9427035531663825, |
|
"recall": 0.8477773847423653, |
|
"support": 8481.0 |
|
}, |
|
"eval_accuracy": 0.9382393351429241, |
|
"eval_loss": 0.3103167414665222, |
|
"eval_macro avg": { |
|
"f1-score": 0.9155308954794145, |
|
"precision": 0.916450133446669, |
|
"recall": 0.9172141111040079, |
|
"support": 29841.0 |
|
}, |
|
"eval_runtime": 4.6339, |
|
"eval_samples_per_second": 17.264, |
|
"eval_steps_per_second": 2.158, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9373896838414373, |
|
"precision": 0.9385098450391195, |
|
"recall": 0.9382393351429241, |
|
"support": 29841.0 |
|
}, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_B": { |
|
"f1-score": 0.9011235955056179, |
|
"precision": 0.8749090909090909, |
|
"recall": 0.9289575289575289, |
|
"support": 1295.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9615119937082186, |
|
"precision": 0.9485040973670175, |
|
"recall": 0.9748816346872664, |
|
"support": 20065.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9013722126929674, |
|
"precision": 0.9380339155935229, |
|
"recall": 0.8674684589081476, |
|
"support": 8481.0 |
|
}, |
|
"eval_accuracy": 0.9423611809255722, |
|
"eval_loss": 0.32883673906326294, |
|
"eval_macro avg": { |
|
"f1-score": 0.9213359339689347, |
|
"precision": 0.9204823679565438, |
|
"recall": 0.9237692075176476, |
|
"support": 29841.0 |
|
}, |
|
"eval_runtime": 4.6714, |
|
"eval_samples_per_second": 17.126, |
|
"eval_steps_per_second": 2.141, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9417992341337168, |
|
"precision": 0.9423346276781992, |
|
"recall": 0.9423611809255722, |
|
"support": 29841.0 |
|
}, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_B": { |
|
"f1-score": 0.8935361216730039, |
|
"precision": 0.8801498127340824, |
|
"recall": 0.9073359073359073, |
|
"support": 1295.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9632332861646931, |
|
"precision": 0.9596359319351009, |
|
"recall": 0.9668577124345876, |
|
"support": 20065.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9095462405342556, |
|
"precision": 0.9200241254523522, |
|
"recall": 0.8993043273198915, |
|
"support": 8481.0 |
|
}, |
|
"eval_accuracy": 0.9450755671726819, |
|
"eval_loss": 0.3041447699069977, |
|
"eval_macro avg": { |
|
"f1-score": 0.9221052161239841, |
|
"precision": 0.9199366233738452, |
|
"recall": 0.9244993156967954, |
|
"support": 29841.0 |
|
}, |
|
"eval_runtime": 4.6532, |
|
"eval_samples_per_second": 17.193, |
|
"eval_steps_per_second": 2.149, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9449504651463465, |
|
"precision": 0.9449285744355027, |
|
"recall": 0.9450755671726819, |
|
"support": 29841.0 |
|
}, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_B": { |
|
"f1-score": 0.8924122310305775, |
|
"precision": 0.8729689807976366, |
|
"recall": 0.9127413127413128, |
|
"support": 1295.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9588723786993913, |
|
"precision": 0.9483330083837005, |
|
"recall": 0.9696486419137802, |
|
"support": 20065.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.8956965718453683, |
|
"precision": 0.9243507715468574, |
|
"recall": 0.8687654757693668, |
|
"support": 8481.0 |
|
}, |
|
"eval_accuracy": 0.9385074226735028, |
|
"eval_loss": 0.3531416654586792, |
|
"eval_macro avg": { |
|
"f1-score": 0.9156603938584458, |
|
"precision": 0.9152175869093981, |
|
"recall": 0.9170518101414866, |
|
"support": 29841.0 |
|
}, |
|
"eval_runtime": 4.6508, |
|
"eval_samples_per_second": 17.201, |
|
"eval_steps_per_second": 2.15, |
|
"eval_weighted avg": { |
|
"f1-score": 0.938033267772811, |
|
"precision": 0.9382465579853486, |
|
"recall": 0.9385074226735028, |
|
"support": 29841.0 |
|
}, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_B": { |
|
"f1-score": 0.8970199924556771, |
|
"precision": 0.8768436578171092, |
|
"recall": 0.9181467181467181, |
|
"support": 1295.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9595004897159647, |
|
"precision": 0.9431046931407943, |
|
"recall": 0.9764764515325193, |
|
"support": 20065.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.8953122104873078, |
|
"precision": 0.9400778210116731, |
|
"recall": 0.8546162009197029, |
|
"support": 8481.0 |
|
}, |
|
"eval_accuracy": 0.9393116852652391, |
|
"eval_loss": 0.4237186312675476, |
|
"eval_macro avg": { |
|
"f1-score": 0.9172775642196499, |
|
"precision": 0.9200087239898588, |
|
"recall": 0.9164131235329801, |
|
"support": 29841.0 |
|
}, |
|
"eval_runtime": 4.6388, |
|
"eval_samples_per_second": 17.246, |
|
"eval_steps_per_second": 2.156, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9385463313402296, |
|
"precision": 0.9393689288141548, |
|
"recall": 0.9393116852652391, |
|
"support": 29841.0 |
|
}, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"grad_norm": 0.5223535895347595, |
|
"learning_rate": 1.506172839506173e-05, |
|
"loss": 0.0196, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_B": { |
|
"f1-score": 0.8891411275066211, |
|
"precision": 0.8716617210682492, |
|
"recall": 0.9073359073359073, |
|
"support": 1295.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.9610692322951795, |
|
"precision": 0.9563736860287223, |
|
"recall": 0.9658111138798904, |
|
"support": 20065.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9043145233678416, |
|
"precision": 0.918104495747266, |
|
"recall": 0.8909326730338404, |
|
"support": 8481.0 |
|
}, |
|
"eval_accuracy": 0.9419925605710264, |
|
"eval_loss": 0.4308999180793762, |
|
"eval_macro avg": { |
|
"f1-score": 0.9181749610565474, |
|
"precision": 0.9153799676147458, |
|
"recall": 0.9213598980832126, |
|
"support": 29841.0 |
|
}, |
|
"eval_runtime": 4.6555, |
|
"eval_samples_per_second": 17.184, |
|
"eval_steps_per_second": 2.148, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9418177466843106, |
|
"precision": 0.9418211242043584, |
|
"recall": 0.9419925605710264, |
|
"support": 29841.0 |
|
}, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_B": { |
|
"f1-score": 0.9025601834161253, |
|
"precision": 0.8933434190620272, |
|
"recall": 0.911969111969112, |
|
"support": 1295.0 |
|
}, |
|
"eval_I": { |
|
"f1-score": 0.960806215404473, |
|
"precision": 0.9570764513895758, |
|
"recall": 0.9645651632195366, |
|
"support": 20065.0 |
|
}, |
|
"eval_O": { |
|
"f1-score": 0.9030873763261413, |
|
"precision": 0.9131011208870676, |
|
"recall": 0.8932908855087843, |
|
"support": 8481.0 |
|
}, |
|
"eval_accuracy": 0.9420260715123487, |
|
"eval_loss": 0.4198700487613678, |
|
"eval_macro avg": { |
|
"f1-score": 0.9221512583822467, |
|
"precision": 0.9211736637795568, |
|
"recall": 0.9232750535658111, |
|
"support": 29841.0 |
|
}, |
|
"eval_runtime": 4.6834, |
|
"eval_samples_per_second": 17.082, |
|
"eval_steps_per_second": 2.135, |
|
"eval_weighted avg": { |
|
"f1-score": 0.9418744743217934, |
|
"precision": 0.9418125843993292, |
|
"recall": 0.9420260715123487, |
|
"support": 29841.0 |
|
}, |
|
"step": 1134 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4050, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 2013013138298400.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|