End of training

Browse files

Files changed (9) hide show

README.md +12 -11
all_results.json +23 -23
eval_results.json +9 -9
predict_results.json +8 -8
predictions.txt +0 -0
tb/events.out.tfevents.1725883955.0a1c9bec2a53.9893.1 +3 -0
train.log +48 -0
train_results.json +7 -7
trainer_state.json +129 -136

README.md CHANGED Viewed

@@ -3,9 +3,10 @@ library_name: transformers
 license: apache-2.0
 base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
 tags:
 - generated_from_trainer
 datasets:
-- symptemist-fasttext-8-ner
 metrics:
 - precision
 - recall
@@ -18,21 +19,21 @@ model-index:
       name: Token Classification
       type: token-classification
     dataset:
-      name: symptemist-fasttext-8-ner
-      type: symptemist-fasttext-8-ner
       config: SympTEMIST NER
       split: validation
       args: SympTEMIST NER
     metrics:
     - name: Precision
       type: precision
-      value: 0.6712820512820513
     - name: Recall
       type: recall
-      value: 0.7164750957854407
     - name: F1
       type: f1
-      value: 0.6931427058512046
     - name: Accuracy
       type: accuracy
       value: 0.9500465205813469
@@ -43,12 +44,12 @@ should probably proofread and complete it, then remove this comment. -->
 # output
-This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the symptemist-fasttext-8-ner dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.3079
-- Precision: 0.6713
-- Recall: 0.7165
-- F1: 0.6931
 - Accuracy: 0.9500
 ## Model description

 license: apache-2.0
 base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
 tags:
+- token-classification
 - generated_from_trainer
 datasets:
+- Rodrigo1771/symptemist-fasttext-8-ner
 metrics:
 - precision
 - recall
       name: Token Classification
       type: token-classification
     dataset:
+      name: Rodrigo1771/symptemist-fasttext-8-ner
+      type: Rodrigo1771/symptemist-fasttext-8-ner
       config: SympTEMIST NER
       split: validation
       args: SympTEMIST NER
     metrics:
     - name: Precision
       type: precision
+      value: 0.6764102564102564
     - name: Recall
       type: recall
+      value: 0.7219485495347564
     - name: F1
       type: f1
+      value: 0.6984379136881121
     - name: Accuracy
       type: accuracy
       value: 0.9500465205813469
 # output
+This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/symptemist-fasttext-8-ner dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.3073
+- Precision: 0.6764
+- Recall: 0.7219
+- F1: 0.6984
 - Accuracy: 0.9500
 ## Model description

all_results.json CHANGED Viewed

@@ -1,26 +1,26 @@
 {
-    "epoch": 10.0,
-    "eval_accuracy": 0.9490359010555359,
-    "eval_f1": 0.696671105193076,
-    "eval_loss": 0.33743786811828613,
-    "eval_precision": 0.6784232365145229,
-    "eval_recall": 0.715927750410509,
-    "eval_runtime": 5.8603,
     "eval_samples": 2519,
-    "eval_samples_per_second": 429.84,
-    "eval_steps_per_second": 53.751,
-    "predict_accuracy": 0.9470472034672878,
-    "predict_f1": 0.701497292131252,
-    "predict_loss": 0.3655967116355896,
-    "predict_precision": 0.6944181646168401,
-    "predict_recall": 0.7087222401029932,
-    "predict_runtime": 9.7752,
-    "predict_samples_per_second": 414.006,
-    "predict_steps_per_second": 51.764,
-    "total_flos": 8092971627384348.0,
-    "train_loss": 0.033683168437591816,
-    "train_runtime": 1318.6544,
-    "train_samples": 16483,
-    "train_samples_per_second": 124.999,
-    "train_steps_per_second": 1.957
 }

 {
+    "epoch": 9.975429975429975,
+    "eval_accuracy": 0.9500465205813469,
+    "eval_f1": 0.6984379136881121,
+    "eval_loss": 0.30729904770851135,
+    "eval_precision": 0.6764102564102564,
+    "eval_recall": 0.7219485495347564,
+    "eval_runtime": 6.0921,
     "eval_samples": 2519,
+    "eval_samples_per_second": 413.484,
+    "eval_steps_per_second": 51.706,
+    "predict_accuracy": 0.9466933985906772,
+    "predict_f1": 0.6951548848292296,
+    "predict_loss": 0.3347860872745514,
+    "predict_precision": 0.6863237139272271,
+    "predict_recall": 0.704216285806244,
+    "predict_runtime": 9.749,
+    "predict_samples_per_second": 415.118,
+    "predict_steps_per_second": 51.903,
+    "total_flos": 6404835399317064.0,
+    "train_loss": 0.04138289297302368,
+    "train_runtime": 1065.756,
+    "train_samples": 13013,
+    "train_samples_per_second": 122.101,
+    "train_steps_per_second": 1.905
 }

eval_results.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
-    "epoch": 10.0,
-    "eval_accuracy": 0.9490359010555359,
-    "eval_f1": 0.696671105193076,
-    "eval_loss": 0.33743786811828613,
-    "eval_precision": 0.6784232365145229,
-    "eval_recall": 0.715927750410509,
-    "eval_runtime": 5.8603,
     "eval_samples": 2519,
-    "eval_samples_per_second": 429.84,
-    "eval_steps_per_second": 53.751
 }

 {
+    "epoch": 9.975429975429975,
+    "eval_accuracy": 0.9500465205813469,
+    "eval_f1": 0.6984379136881121,
+    "eval_loss": 0.30729904770851135,
+    "eval_precision": 0.6764102564102564,
+    "eval_recall": 0.7219485495347564,
+    "eval_runtime": 6.0921,
     "eval_samples": 2519,
+    "eval_samples_per_second": 413.484,
+    "eval_steps_per_second": 51.706
 }

predict_results.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-    "predict_accuracy": 0.9470472034672878,
-    "predict_f1": 0.701497292131252,
-    "predict_loss": 0.3655967116355896,
-    "predict_precision": 0.6944181646168401,
-    "predict_recall": 0.7087222401029932,
-    "predict_runtime": 9.7752,
-    "predict_samples_per_second": 414.006,
-    "predict_steps_per_second": 51.764
 }

 {
+    "predict_accuracy": 0.9466933985906772,
+    "predict_f1": 0.6951548848292296,
+    "predict_loss": 0.3347860872745514,
+    "predict_precision": 0.6863237139272271,
+    "predict_recall": 0.704216285806244,
+    "predict_runtime": 9.749,
+    "predict_samples_per_second": 415.118,
+    "predict_steps_per_second": 51.903
 }

predictions.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

tb/events.out.tfevents.1725883955.0a1c9bec2a53.9893.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b842d84c89f0d88706e31e98b113fae6b45879220115930147db648f848a8c24
+size 560

train.log CHANGED Viewed

@@ -888,3 +888,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
 {'eval_loss': 0.3079104423522949, 'eval_precision': 0.6712820512820513, 'eval_recall': 0.7164750957854407, 'eval_f1': 0.6931427058512046, 'eval_accuracy': 0.9500465205813469, 'eval_runtime': 5.9033, 'eval_samples_per_second': 426.708, 'eval_steps_per_second': 53.36, 'epoch': 9.98}
 {'train_runtime': 1065.756, 'train_samples_per_second': 122.101, 'train_steps_per_second': 1.905, 'train_loss': 0.04138289297302368, 'epoch': 9.98}
  0%|          | 0/315 [00:00<?, ?it/s]
  3%|▎         | 8/315 [00:00<00:03, 79.87it/s]
  5%|▌         | 16/315 [00:00<00:03, 76.37it/s]
  8%|▊         | 24/315 [00:00<00:03, 76.98it/s]
 10%|█         | 32/315 [00:00<00:03, 73.14it/s]
 13%|█▎        | 41/315 [00:00<00:03, 77.06it/s]
 16%|█▌        | 49/315 [00:00<00:03, 75.87it/s]
 18%|█▊        | 57/315 [00:00<00:03, 75.22it/s]
 21%|██        | 65/315 [00:00<00:03, 72.75it/s]
 23%|██▎       | 73/315 [00:00<00:03, 74.58it/s]
 26%|██▌       | 81/315 [00:01<00:03, 70.30it/s]
 28%|██▊       | 89/315 [00:01<00:03, 67.40it/s]
 31%|███       | 97/315 [00:01<00:03, 67.21it/s]
 33%|███▎      | 105/315 [00:01<00:03, 68.92it/s]
 36%|███▌      | 113/315 [00:01<00:02, 71.06it/s]
 38%|███▊      | 121/315 [00:01<00:02, 69.27it/s]
 41%|████      | 129/315 [00:01<00:02, 70.12it/s]
 43%|████▎     | 137/315 [00:01<00:02, 69.43it/s]
 46%|████▌     | 144/315 [00:02<00:02, 69.31it/s]
 49%|████▊     | 153/315 [00:02<00:02, 73.00it/s]
 51%|█████     | 161/315 [00:02<00:02, 71.79it/s]
 54%|█████▎    | 169/315 [00:02<00:02, 71.32it/s]
 56%|█████▌    | 177/315 [00:02<00:01, 70.34it/s]
 59%|█████▊    | 185/315 [00:02<00:01, 68.51it/s]
 61%|██████    | 192/315 [00:02<00:01, 68.60it/s]
 63%|██████▎   | 199/315 [00:02<00:01, 65.97it/s]
 65%|██████▌   | 206/315 [00:02<00:01, 64.76it/s]
 68%|██████▊   | 214/315 [00:03<00:01, 68.25it/s]
 70%|███████   | 222/315 [00:03<00:01, 70.05it/s]
 73%|███████▎  | 230/315 [00:03<00:01, 72.77it/s]
 76%|███████▌  | 239/315 [00:03<00:01, 74.50it/s]
 78%|███████▊  | 247/315 [00:03<00:00, 70.67it/s]
 81%|████████  | 255/315 [00:03<00:00, 69.38it/s]
 83%|████████▎ | 263/315 [00:03<00:00, 70.77it/s]
 86%|████████▌ | 271/315 [00:03<00:00, 72.89it/s]
 89%|████████▉ | 280/315 [00:03<00:00, 75.53it/s]
 91%|█████████▏| 288/315 [00:04<00:00, 72.20it/s]
 94%|█████████▍| 296/315 [00:04<00:00, 70.67it/s]
 97%|█████████▋| 304/315 [00:04<00:00, 72.18it/s]
 99%|█████████▉| 312/315 [00:04<00:00, 72.30it/s]
  0%|          | 0/506 [00:00<?, ?it/s]
  2%|▏         | 10/506 [00:00<00:05, 89.99it/s]
  4%|▍         | 19/506 [00:00<00:06, 78.47it/s]
  5%|▌         | 27/506 [00:00<00:06, 77.29it/s]
  7%|▋         | 35/506 [00:00<00:06, 76.19it/s]
  8%|▊         | 43/506 [00:00<00:06, 75.01it/s]
 10%|█         | 51/506 [00:00<00:06, 74.72it/s]
 12%|█▏        | 59/506 [00:00<00:06, 72.73it/s]
 13%|█▎        | 67/506 [00:00<00:05, 74.41it/s]
 15%|█▍        | 75/506 [00:00<00:05, 73.98it/s]
 16%|█▋        | 83/506 [00:01<00:06, 64.09it/s]
 18%|█▊        | 90/506 [00:01<00:06, 64.05it/s]
 19%|█▉        | 98/506 [00:01<00:06, 67.57it/s]
 21%|██        | 106/506 [00:01<00:05, 69.34it/s]
 23%|██▎       | 114/506 [00:01<00:05, 72.09it/s]
 24%|██▍       | 122/506 [00:01<00:05, 70.62it/s]
 26%|██▌       | 130/506 [00:01<00:06, 60.47it/s]
 27%|██▋       | 137/506 [00:02<00:06, 59.29it/s]
 29%|██▊       | 145/506 [00:02<00:05, 63.13it/s]
 30%|███       | 153/506 [00:02<00:05, 62.09it/s]
 32%|███▏      | 160/506 [00:02<00:05, 60.95it/s]
 33%|███▎      | 167/506 [00:02<00:05, 61.89it/s]
 34%|███▍      | 174/506 [00:02<00:05, 63.30it/s]
 36%|███▌      | 182/506 [00:02<00:04, 65.88it/s]
 38%|███▊      | 190/506 [00:02<00:04, 68.04it/s]
 39%|███▉      | 197/506 [00:02<00:04, 67.64it/s]
 41%|████      | 205/506 [00:03<00:04, 69.76it/s]
 42%|████▏     | 213/506 [00:03<00:04, 67.95it/s]
 43%|████▎     | 220/506 [00:03<00:04, 66.58it/s]
 45%|████▍     | 227/506 [00:03<00:04, 63.22it/s]
 46%|████▌     | 234/506 [00:03<00:04, 61.33it/s]
 48%|████▊     | 241/506 [00:03<00:04, 63.41it/s]
 49%|████▉     | 249/506 [00:03<00:03, 66.94it/s]
 51%|█████     | 256/506 [00:03<00:03, 67.40it/s]
 52%|█████▏    | 264/506 [00:03<00:03, 70.60it/s]
 54%|█████▍    | 272/506 [00:04<00:03, 72.82it/s]
 55%|█████▌    | 280/506 [00:04<00:03, 72.01it/s]
 57%|█████▋    | 288/506 [00:04<00:03, 70.69it/s]
 58%|█████▊    | 296/506 [00:04<00:02, 70.79it/s]
 60%|██████    | 304/506 [00:04<00:02, 71.70it/s]
 62%|██████▏   | 312/506 [00:04<00:02, 71.70it/s]
 63%|██████▎   | 320/506 [00:04<00:02, 73.71it/s]
 65%|██████▌   | 329/506 [00:04<00:02, 76.51it/s]
 67%|██████▋   | 337/506 [00:04<00:02, 76.82it/s]
 68%|██████▊   | 345/506 [00:04<00:02, 77.52it/s]
 70%|██████▉   | 353/506 [00:05<00:01, 77.18it/s]
 71%|███████▏  | 361/506 [00:05<00:01, 76.54it/s]
 73%|███████▎  | 369/506 [00:05<00:01, 71.00it/s]
 75%|███████▍  | 377/506 [00:05<00:01, 66.99it/s]
 76%|███████▌  | 384/506 [00:05<00:01, 64.26it/s]
 77%|███████▋  | 391/506 [00:05<00:01, 60.26it/s]
 79%|███████▊  | 398/506 [00:05<00:01, 58.75it/s]
 80%|████████  | 405/506 [00:05<00:01, 59.98it/s]
 81%|████████▏ | 412/506 [00:06<00:01, 61.53it/s]
 83%|████████▎ | 419/506 [00:06<00:01, 63.24it/s]
 84%|████████▍ | 426/506 [00:06<00:01, 63.10it/s]
 86%|████████▌ | 434/506 [00:06<00:01, 66.36it/s]
 87%|████████▋ | 441/506 [00:06<00:00, 66.96it/s]
 89%|████████▊ | 448/506 [00:06<00:00, 66.98it/s]
 90%|█████████ | 456/506 [00:06<00:00, 69.30it/s]
 92%|█████████▏| 465/506 [00:06<00:00, 71.88it/s]
 93%|█████████▎| 473/506 [00:06<00:00, 73.65it/s]
 95%|█████████▌| 481/506 [00:07<00:00, 74.93it/s]
 97%|█████████▋| 489/506 [00:07<00:00, 70.10it/s]
 98%|█████████▊| 497/506 [00:07<00:00, 71.43it/s]

 {'eval_loss': 0.3079104423522949, 'eval_precision': 0.6712820512820513, 'eval_recall': 0.7164750957854407, 'eval_f1': 0.6931427058512046, 'eval_accuracy': 0.9500465205813469, 'eval_runtime': 5.9033, 'eval_samples_per_second': 426.708, 'eval_steps_per_second': 53.36, 'epoch': 9.98}
 {'train_runtime': 1065.756, 'train_samples_per_second': 122.101, 'train_steps_per_second': 1.905, 'train_loss': 0.04138289297302368, 'epoch': 9.98}
+***** train metrics *****
+  epoch                    =     9.9754
+  total_flos               =  5964967GF
+  train_loss               =     0.0414
+  train_runtime            = 0:17:45.75
+  train_samples            =      13013
+  train_samples_per_second =    122.101
+  train_steps_per_second   =      1.905
+09/09/2024 12:12:29 - INFO - __main__ -   *** Evaluate ***
+[INFO|trainer.py:811] 2024-09-09 12:12:29,073 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
+[INFO|trainer.py:3819] 2024-09-09 12:12:29,076 >>
+***** Running Evaluation *****
+[INFO|trainer.py:3821] 2024-09-09 12:12:29,076 >>   Num examples = 2519
+[INFO|trainer.py:3824] 2024-09-09 12:12:29,076 >>   Batch size = 8
  0%|          | 0/315 [00:00<?, ?it/s]
  3%|▎         | 8/315 [00:00<00:03, 79.87it/s]
  5%|▌         | 16/315 [00:00<00:03, 76.37it/s]
  8%|▊         | 24/315 [00:00<00:03, 76.98it/s]
 10%|█         | 32/315 [00:00<00:03, 73.14it/s]
 13%|█▎        | 41/315 [00:00<00:03, 77.06it/s]
 16%|█▌        | 49/315 [00:00<00:03, 75.87it/s]
 18%|█▊        | 57/315 [00:00<00:03, 75.22it/s]
 21%|██        | 65/315 [00:00<00:03, 72.75it/s]
 23%|██▎       | 73/315 [00:00<00:03, 74.58it/s]
 26%|██▌       | 81/315 [00:01<00:03, 70.30it/s]
 28%|██▊       | 89/315 [00:01<00:03, 67.40it/s]
 31%|███       | 97/315 [00:01<00:03, 67.21it/s]
 33%|███▎      | 105/315 [00:01<00:03, 68.92it/s]
 36%|███▌      | 113/315 [00:01<00:02, 71.06it/s]
 38%|███▊      | 121/315 [00:01<00:02, 69.27it/s]
 41%|████      | 129/315 [00:01<00:02, 70.12it/s]
 43%|████▎     | 137/315 [00:01<00:02, 69.43it/s]
 46%|████▌     | 144/315 [00:02<00:02, 69.31it/s]
 49%|████▊     | 153/315 [00:02<00:02, 73.00it/s]
 51%|█████     | 161/315 [00:02<00:02, 71.79it/s]
 54%|█████▎    | 169/315 [00:02<00:02, 71.32it/s]
 56%|█████▌    | 177/315 [00:02<00:01, 70.34it/s]
 59%|█████▊    | 185/315 [00:02<00:01, 68.51it/s]
 61%|██████    | 192/315 [00:02<00:01, 68.60it/s]
 63%|██████▎   | 199/315 [00:02<00:01, 65.97it/s]
 65%|██████▌   | 206/315 [00:02<00:01, 64.76it/s]
 68%|██████▊   | 214/315 [00:03<00:01, 68.25it/s]
 70%|███████   | 222/315 [00:03<00:01, 70.05it/s]
 73%|███████▎  | 230/315 [00:03<00:01, 72.77it/s]
 76%|███████▌  | 239/315 [00:03<00:01, 74.50it/s]
 78%|███████▊  | 247/315 [00:03<00:00, 70.67it/s]
 81%|████████  | 255/315 [00:03<00:00, 69.38it/s]
 83%|████████▎ | 263/315 [00:03<00:00, 70.77it/s]
 86%|████████▌ | 271/315 [00:03<00:00, 72.89it/s]
 89%|████████▉ | 280/315 [00:03<00:00, 75.53it/s]
 91%|█████████▏| 288/315 [00:04<00:00, 72.20it/s]
 94%|█████████▍| 296/315 [00:04<00:00, 70.67it/s]
 97%|█████████▋| 304/315 [00:04<00:00, 72.18it/s]
 99%|█████████▉| 312/315 [00:04<00:00, 72.30it/s]
+***** eval metrics *****
+  epoch                   =     9.9754
+  eval_accuracy           =       0.95
+  eval_f1                 =     0.6984
+  eval_loss               =     0.3073
+  eval_precision          =     0.6764
+  eval_recall             =     0.7219
+  eval_runtime            = 0:00:06.09
+  eval_samples            =       2519
+  eval_samples_per_second =    413.484
+  eval_steps_per_second   =     51.706
+09/09/2024 12:12:35 - INFO - __main__ -   *** Predict ***
+[INFO|trainer.py:811] 2024-09-09 12:12:35,170 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
+[INFO|trainer.py:3819] 2024-09-09 12:12:35,172 >>
+***** Running Prediction *****
+[INFO|trainer.py:3821] 2024-09-09 12:12:35,172 >>   Num examples = 4047
+[INFO|trainer.py:3824] 2024-09-09 12:12:35,172 >>   Batch size = 8
  0%|          | 0/506 [00:00<?, ?it/s]
  2%|▏         | 10/506 [00:00<00:05, 89.99it/s]
  4%|▍         | 19/506 [00:00<00:06, 78.47it/s]
  5%|▌         | 27/506 [00:00<00:06, 77.29it/s]
  7%|▋         | 35/506 [00:00<00:06, 76.19it/s]
  8%|▊         | 43/506 [00:00<00:06, 75.01it/s]
 10%|█         | 51/506 [00:00<00:06, 74.72it/s]
 12%|█▏        | 59/506 [00:00<00:06, 72.73it/s]
 13%|█▎        | 67/506 [00:00<00:05, 74.41it/s]
 15%|█▍        | 75/506 [00:00<00:05, 73.98it/s]
 16%|█▋        | 83/506 [00:01<00:06, 64.09it/s]
 18%|█▊        | 90/506 [00:01<00:06, 64.05it/s]
 19%|█▉        | 98/506 [00:01<00:06, 67.57it/s]
 21%|██        | 106/506 [00:01<00:05, 69.34it/s]
 23%|██▎       | 114/506 [00:01<00:05, 72.09it/s]
 24%|██▍       | 122/506 [00:01<00:05, 70.62it/s]
 26%|██▌       | 130/506 [00:01<00:06, 60.47it/s]
 27%|██▋       | 137/506 [00:02<00:06, 59.29it/s]
 29%|██▊       | 145/506 [00:02<00:05, 63.13it/s]
 30%|███       | 153/506 [00:02<00:05, 62.09it/s]
 32%|███▏      | 160/506 [00:02<00:05, 60.95it/s]
 33%|███▎      | 167/506 [00:02<00:05, 61.89it/s]
 34%|███▍      | 174/506 [00:02<00:05, 63.30it/s]
 36%|███▌      | 182/506 [00:02<00:04, 65.88it/s]
 38%|███▊      | 190/506 [00:02<00:04, 68.04it/s]
 39%|███▉      | 197/506 [00:02<00:04, 67.64it/s]
 41%|████      | 205/506 [00:03<00:04, 69.76it/s]
 42%|████▏     | 213/506 [00:03<00:04, 67.95it/s]
 43%|████▎     | 220/506 [00:03<00:04, 66.58it/s]
 45%|████▍     | 227/506 [00:03<00:04, 63.22it/s]
 46%|████▌     | 234/506 [00:03<00:04, 61.33it/s]
 48%|████▊     | 241/506 [00:03<00:04, 63.41it/s]
 49%|████▉     | 249/506 [00:03<00:03, 66.94it/s]
 51%|█████     | 256/506 [00:03<00:03, 67.40it/s]
 52%|█████▏    | 264/506 [00:03<00:03, 70.60it/s]
 54%|█████▍    | 272/506 [00:04<00:03, 72.82it/s]
 55%|█████▌    | 280/506 [00:04<00:03, 72.01it/s]
 57%|█████▋    | 288/506 [00:04<00:03, 70.69it/s]
 58%|█████▊    | 296/506 [00:04<00:02, 70.79it/s]
 60%|██████    | 304/506 [00:04<00:02, 71.70it/s]
 62%|██████▏   | 312/506 [00:04<00:02, 71.70it/s]
 63%|██████▎   | 320/506 [00:04<00:02, 73.71it/s]
 65%|██████▌   | 329/506 [00:04<00:02, 76.51it/s]
 67%|██████▋   | 337/506 [00:04<00:02, 76.82it/s]
 68%|██████▊   | 345/506 [00:04<00:02, 77.52it/s]
 70%|██████▉   | 353/506 [00:05<00:01, 77.18it/s]
 71%|███████▏  | 361/506 [00:05<00:01, 76.54it/s]
 73%|███████▎  | 369/506 [00:05<00:01, 71.00it/s]
 75%|███████▍  | 377/506 [00:05<00:01, 66.99it/s]
 76%|███████▌  | 384/506 [00:05<00:01, 64.26it/s]
 77%|███████▋  | 391/506 [00:05<00:01, 60.26it/s]
 79%|███████▊  | 398/506 [00:05<00:01, 58.75it/s]
 80%|████████  | 405/506 [00:05<00:01, 59.98it/s]
 81%|████████▏ | 412/506 [00:06<00:01, 61.53it/s]
 83%|████████▎ | 419/506 [00:06<00:01, 63.24it/s]
 84%|████████▍ | 426/506 [00:06<00:01, 63.10it/s]
 86%|████████▌ | 434/506 [00:06<00:01, 66.36it/s]
 87%|████████▋ | 441/506 [00:06<00:00, 66.96it/s]
 89%|████████▊ | 448/506 [00:06<00:00, 66.98it/s]
 90%|█████████ | 456/506 [00:06<00:00, 69.30it/s]
 92%|█████████▏| 465/506 [00:06<00:00, 71.88it/s]
 93%|█████████▎| 473/506 [00:06<00:00, 73.65it/s]
 95%|█████████▌| 481/506 [00:07<00:00, 74.93it/s]
 97%|█████████▋| 489/506 [00:07<00:00, 70.10it/s]
 98%|█████████▊| 497/506 [00:07<00:00, 71.43it/s]
+[INFO|trainer.py:3503] 2024-09-09 12:12:45,082 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
+[INFO|configuration_utils.py:472] 2024-09-09 12:12:45,084 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
+[INFO|modeling_utils.py:2799] 2024-09-09 12:12:46,408 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
+[INFO|tokenization_utils_base.py:2684] 2024-09-09 12:12:46,409 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2693] 2024-09-09 12:12:46,410 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
+***** predict metrics *****
+  predict_accuracy           =     0.9467
+  predict_f1                 =     0.6952
+  predict_loss               =     0.3348
+  predict_precision          =     0.6863
+  predict_recall             =     0.7042
+  predict_runtime            = 0:00:09.74
+  predict_samples_per_second =    415.118
+  predict_steps_per_second   =     51.903

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 10.0,
-    "total_flos": 8092971627384348.0,
-    "train_loss": 0.033683168437591816,
-    "train_runtime": 1318.6544,
-    "train_samples": 16483,
-    "train_samples_per_second": 124.999,
-    "train_steps_per_second": 1.957
 }

 {
+    "epoch": 9.975429975429975,
+    "total_flos": 6404835399317064.0,
+    "train_loss": 0.04138289297302368,
+    "train_runtime": 1065.756,
+    "train_samples": 13013,
+    "train_samples_per_second": 122.101,
+    "train_steps_per_second": 1.905
 }

trainer_state.json CHANGED Viewed

@@ -1,180 +1,173 @@
 {
-  "best_metric": 0.696671105193076,
-  "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2322",
-  "epoch": 10.0,
   "eval_steps": 500,
-  "global_step": 2580,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 1.0,
-      "eval_accuracy": 0.9488273605184638,
-      "eval_f1": 0.6400434664493344,
-      "eval_loss": 0.15167976915836334,
-      "eval_precision": 0.6353829557713053,
-      "eval_recall": 0.6447728516694033,
-      "eval_runtime": 5.9858,
-      "eval_samples_per_second": 420.828,
-      "eval_steps_per_second": 52.624,
-      "step": 258
-    },
-    {
-      "epoch": 1.937984496124031,
-      "grad_norm": 0.791233479976654,
-      "learning_rate": 4.0310077519379843e-05,
-      "loss": 0.1357,
-      "step": 500
     },
     {
       "epoch": 2.0,
-      "eval_accuracy": 0.945955917738779,
-      "eval_f1": 0.6695763799743261,
-      "eval_loss": 0.20252634584903717,
-      "eval_precision": 0.6305609284332688,
-      "eval_recall": 0.7137383689107827,
-      "eval_runtime": 5.9083,
-      "eval_samples_per_second": 426.351,
-      "eval_steps_per_second": 53.315,
-      "step": 516
     },
     {
-      "epoch": 3.0,
-      "eval_accuracy": 0.9495652731881036,
-      "eval_f1": 0.6838606753522999,
-      "eval_loss": 0.22940471768379211,
-      "eval_precision": 0.6649431230610134,
-      "eval_recall": 0.7038861521620142,
-      "eval_runtime": 5.8946,
-      "eval_samples_per_second": 427.339,
-      "eval_steps_per_second": 53.439,
-      "step": 774
     },
     {
-      "epoch": 3.875968992248062,
-      "grad_norm": 0.907940685749054,
-      "learning_rate": 3.062015503875969e-05,
-      "loss": 0.0238,
-      "step": 1000
     },
     {
       "epoch": 4.0,
-      "eval_accuracy": 0.9491963168532838,
-      "eval_f1": 0.6872504657971785,
-      "eval_loss": 0.28175726532936096,
-      "eval_precision": 0.6689119170984456,
-      "eval_recall": 0.7066228790366721,
-      "eval_runtime": 5.8968,
-      "eval_samples_per_second": 427.179,
-      "eval_steps_per_second": 53.419,
-      "step": 1032
     },
     {
-      "epoch": 5.0,
-      "eval_accuracy": 0.9487311110398152,
-      "eval_f1": 0.6773768764814327,
-      "eval_loss": 0.27620697021484375,
-      "eval_precision": 0.6527918781725889,
-      "eval_recall": 0.7038861521620142,
-      "eval_runtime": 5.9102,
-      "eval_samples_per_second": 426.215,
-      "eval_steps_per_second": 53.298,
-      "step": 1290
     },
     {
-      "epoch": 5.813953488372093,
-      "grad_norm": 0.2446424663066864,
-      "learning_rate": 2.0930232558139536e-05,
-      "loss": 0.0081,
-      "step": 1500
     },
     {
       "epoch": 6.0,
-      "eval_accuracy": 0.9483942378645449,
-      "eval_f1": 0.6922672277748553,
-      "eval_loss": 0.2938424348831177,
-      "eval_precision": 0.6663291139240506,
-      "eval_recall": 0.7203065134099617,
-      "eval_runtime": 5.9062,
-      "eval_samples_per_second": 426.502,
-      "eval_steps_per_second": 53.334,
-      "step": 1548
     },
     {
-      "epoch": 7.0,
-      "eval_accuracy": 0.9499181879431486,
-      "eval_f1": 0.6893020749124225,
-      "eval_loss": 0.3144644498825073,
-      "eval_precision": 0.6788747346072187,
-      "eval_recall": 0.7000547345374931,
-      "eval_runtime": 5.9264,
-      "eval_samples_per_second": 425.045,
-      "eval_steps_per_second": 53.152,
-      "step": 1806
     },
     {
-      "epoch": 7.751937984496124,
-      "grad_norm": 1.7770023345947266,
-      "learning_rate": 1.1240310077519382e-05,
-      "loss": 0.0039,
-      "step": 2000
     },
     {
       "epoch": 8.0,
-      "eval_accuracy": 0.949100067374635,
-      "eval_f1": 0.6865512649800267,
-      "eval_loss": 0.32666969299316406,
-      "eval_precision": 0.6685684647302904,
-      "eval_recall": 0.705528188286809,
-      "eval_runtime": 5.9058,
-      "eval_samples_per_second": 426.528,
-      "eval_steps_per_second": 53.337,
-      "step": 2064
     },
     {
-      "epoch": 9.0,
-      "eval_accuracy": 0.9490359010555359,
-      "eval_f1": 0.696671105193076,
-      "eval_loss": 0.33743786811828613,
-      "eval_precision": 0.6784232365145229,
-      "eval_recall": 0.715927750410509,
-      "eval_runtime": 5.9091,
-      "eval_samples_per_second": 426.292,
-      "eval_steps_per_second": 53.308,
-      "step": 2322
     },
     {
-      "epoch": 9.689922480620154,
-      "grad_norm": 0.005967797711491585,
-      "learning_rate": 1.550387596899225e-06,
-      "loss": 0.0021,
-      "step": 2500
     },
     {
-      "epoch": 10.0,
-      "eval_accuracy": 0.9495011068690045,
-      "eval_f1": 0.6949744692287019,
-      "eval_loss": 0.33997026085853577,
-      "eval_precision": 0.6826821541710665,
-      "eval_recall": 0.7077175697865353,
-      "eval_runtime": 6.2732,
-      "eval_samples_per_second": 401.551,
-      "eval_steps_per_second": 50.214,
-      "step": 2580
     },
     {
-      "epoch": 10.0,
-      "step": 2580,
-      "total_flos": 8092971627384348.0,
-      "train_loss": 0.033683168437591816,
-      "train_runtime": 1318.6544,
-      "train_samples_per_second": 124.999,
-      "train_steps_per_second": 1.957
     }
   ],
   "logging_steps": 500,
-  "max_steps": 2580,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 10,
   "save_steps": 500,
@@ -190,7 +183,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8092971627384348.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.6984379136881121,
+  "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1831",
+  "epoch": 9.975429975429975,
   "eval_steps": 500,
+  "global_step": 2030,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.9975429975429976,
+      "eval_accuracy": 0.9467740383072925,
+      "eval_f1": 0.6143236074270556,
+      "eval_loss": 0.15010379254817963,
+      "eval_precision": 0.5959855892949047,
+      "eval_recall": 0.6338259441707718,
+      "eval_runtime": 5.907,
+      "eval_samples_per_second": 426.445,
+      "eval_steps_per_second": 53.327,
+      "step": 203
     },
     {
       "epoch": 2.0,
+      "eval_accuracy": 0.949244441592608,
+      "eval_f1": 0.6728575218890952,
+      "eval_loss": 0.17612887918949127,
+      "eval_precision": 0.6529351184346035,
+      "eval_recall": 0.6940339354132458,
+      "eval_runtime": 5.8933,
+      "eval_samples_per_second": 427.436,
+      "eval_steps_per_second": 53.451,
+      "step": 407
     },
     {
+      "epoch": 2.457002457002457,
+      "grad_norm": 0.6181371212005615,
+      "learning_rate": 3.768472906403941e-05,
+      "loss": 0.1312,
+      "step": 500
     },
     {
+      "epoch": 2.9975429975429977,
+      "eval_accuracy": 0.9469665372645898,
+      "eval_f1": 0.671967171069505,
+      "eval_loss": 0.1995203047990799,
+      "eval_precision": 0.6322393822393823,
+      "eval_recall": 0.7170224411603722,
+      "eval_runtime": 5.8448,
+      "eval_samples_per_second": 430.983,
+      "eval_steps_per_second": 53.894,
+      "step": 610
     },
     {
       "epoch": 4.0,
+      "eval_accuracy": 0.9482979883858963,
+      "eval_f1": 0.6774025974025973,
+      "eval_loss": 0.21822449564933777,
+      "eval_precision": 0.6445872466633712,
+      "eval_recall": 0.7137383689107827,
+      "eval_runtime": 5.872,
+      "eval_samples_per_second": 428.988,
+      "eval_steps_per_second": 53.645,
+      "step": 814
     },
     {
+      "epoch": 4.914004914004914,
+      "grad_norm": 0.7616795301437378,
+      "learning_rate": 2.5369458128078822e-05,
+      "loss": 0.0248,
+      "step": 1000
     },
     {
+      "epoch": 4.997542997542998,
+      "eval_accuracy": 0.9448650903140942,
+      "eval_f1": 0.6700533401066802,
+      "eval_loss": 0.24612903594970703,
+      "eval_precision": 0.6251184834123222,
+      "eval_recall": 0.7219485495347564,
+      "eval_runtime": 5.8462,
+      "eval_samples_per_second": 430.877,
+      "eval_steps_per_second": 53.881,
+      "step": 1017
     },
     {
       "epoch": 6.0,
+      "eval_accuracy": 0.9469023709454907,
+      "eval_f1": 0.6827021494370521,
+      "eval_loss": 0.26953065395355225,
+      "eval_precision": 0.6410379625180201,
+      "eval_recall": 0.7301587301587301,
+      "eval_runtime": 5.9067,
+      "eval_samples_per_second": 426.468,
+      "eval_steps_per_second": 53.33,
+      "step": 1221
     },
     {
+      "epoch": 6.997542997542998,
+      "eval_accuracy": 0.9469986204241394,
+      "eval_f1": 0.6910590054109765,
+      "eval_loss": 0.2829184830188751,
+      "eval_precision": 0.6528724440116845,
+      "eval_recall": 0.7339901477832512,
+      "eval_runtime": 5.8572,
+      "eval_samples_per_second": 430.069,
+      "eval_steps_per_second": 53.78,
+      "step": 1424
     },
     {
+      "epoch": 7.371007371007371,
+      "grad_norm": 0.2855200171470642,
+      "learning_rate": 1.3054187192118228e-05,
+      "loss": 0.0081,
+      "step": 1500
     },
     {
       "epoch": 8.0,
+      "eval_accuracy": 0.9494048573903558,
+      "eval_f1": 0.6938127974616606,
+      "eval_loss": 0.29823970794677734,
+      "eval_precision": 0.6710997442455243,
+      "eval_recall": 0.7181171319102354,
+      "eval_runtime": 5.8929,
+      "eval_samples_per_second": 427.463,
+      "eval_steps_per_second": 53.454,
+      "step": 1628
     },
     {
+      "epoch": 8.997542997542997,
+      "eval_accuracy": 0.9500465205813469,
+      "eval_f1": 0.6984379136881121,
+      "eval_loss": 0.30729904770851135,
+      "eval_precision": 0.6764102564102564,
+      "eval_recall": 0.7219485495347564,
+      "eval_runtime": 5.8665,
+      "eval_samples_per_second": 429.386,
+      "eval_steps_per_second": 53.695,
+      "step": 1831
     },
     {
+      "epoch": 9.828009828009828,
+      "grad_norm": 0.6682894825935364,
+      "learning_rate": 7.389162561576355e-07,
+      "loss": 0.0038,
+      "step": 2000
     },
     {
+      "epoch": 9.975429975429975,
+      "eval_accuracy": 0.9500465205813469,
+      "eval_f1": 0.6931427058512046,
+      "eval_loss": 0.3079104423522949,
+      "eval_precision": 0.6712820512820513,
+      "eval_recall": 0.7164750957854407,
+      "eval_runtime": 5.9033,
+      "eval_samples_per_second": 426.708,
+      "eval_steps_per_second": 53.36,
+      "step": 2030
     },
     {
+      "epoch": 9.975429975429975,
+      "step": 2030,
+      "total_flos": 6404835399317064.0,
+      "train_loss": 0.04138289297302368,
+      "train_runtime": 1065.756,
+      "train_samples_per_second": 122.101,
+      "train_steps_per_second": 1.905
     }
   ],
   "logging_steps": 500,
+  "max_steps": 2030,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 10,
   "save_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 6404835399317064.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null