Rodrigo1771 commited on
Commit
8a12c6e
1 Parent(s): 7abe963

End of training

Browse files
README.md CHANGED
@@ -3,9 +3,10 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
5
  tags:
 
6
  - generated_from_trainer
7
  datasets:
8
- - symptemist-fasttext-8-ner
9
  metrics:
10
  - precision
11
  - recall
@@ -18,21 +19,21 @@ model-index:
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
- name: symptemist-fasttext-8-ner
22
- type: symptemist-fasttext-8-ner
23
  config: SympTEMIST NER
24
  split: validation
25
  args: SympTEMIST NER
26
  metrics:
27
  - name: Precision
28
  type: precision
29
- value: 0.6712820512820513
30
  - name: Recall
31
  type: recall
32
- value: 0.7164750957854407
33
  - name: F1
34
  type: f1
35
- value: 0.6931427058512046
36
  - name: Accuracy
37
  type: accuracy
38
  value: 0.9500465205813469
@@ -43,12 +44,12 @@ should probably proofread and complete it, then remove this comment. -->
43
 
44
  # output
45
 
46
- This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the symptemist-fasttext-8-ner dataset.
47
  It achieves the following results on the evaluation set:
48
- - Loss: 0.3079
49
- - Precision: 0.6713
50
- - Recall: 0.7165
51
- - F1: 0.6931
52
  - Accuracy: 0.9500
53
 
54
  ## Model description
 
3
  license: apache-2.0
4
  base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
5
  tags:
6
+ - token-classification
7
  - generated_from_trainer
8
  datasets:
9
+ - Rodrigo1771/symptemist-fasttext-8-ner
10
  metrics:
11
  - precision
12
  - recall
 
19
  name: Token Classification
20
  type: token-classification
21
  dataset:
22
+ name: Rodrigo1771/symptemist-fasttext-8-ner
23
+ type: Rodrigo1771/symptemist-fasttext-8-ner
24
  config: SympTEMIST NER
25
  split: validation
26
  args: SympTEMIST NER
27
  metrics:
28
  - name: Precision
29
  type: precision
30
+ value: 0.6764102564102564
31
  - name: Recall
32
  type: recall
33
+ value: 0.7219485495347564
34
  - name: F1
35
  type: f1
36
+ value: 0.6984379136881121
37
  - name: Accuracy
38
  type: accuracy
39
  value: 0.9500465205813469
 
44
 
45
  # output
46
 
47
+ This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/symptemist-fasttext-8-ner dataset.
48
  It achieves the following results on the evaluation set:
49
+ - Loss: 0.3073
50
+ - Precision: 0.6764
51
+ - Recall: 0.7219
52
+ - F1: 0.6984
53
  - Accuracy: 0.9500
54
 
55
  ## Model description
all_results.json CHANGED
@@ -1,26 +1,26 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.9490359010555359,
4
- "eval_f1": 0.696671105193076,
5
- "eval_loss": 0.33743786811828613,
6
- "eval_precision": 0.6784232365145229,
7
- "eval_recall": 0.715927750410509,
8
- "eval_runtime": 5.8603,
9
  "eval_samples": 2519,
10
- "eval_samples_per_second": 429.84,
11
- "eval_steps_per_second": 53.751,
12
- "predict_accuracy": 0.9470472034672878,
13
- "predict_f1": 0.701497292131252,
14
- "predict_loss": 0.3655967116355896,
15
- "predict_precision": 0.6944181646168401,
16
- "predict_recall": 0.7087222401029932,
17
- "predict_runtime": 9.7752,
18
- "predict_samples_per_second": 414.006,
19
- "predict_steps_per_second": 51.764,
20
- "total_flos": 8092971627384348.0,
21
- "train_loss": 0.033683168437591816,
22
- "train_runtime": 1318.6544,
23
- "train_samples": 16483,
24
- "train_samples_per_second": 124.999,
25
- "train_steps_per_second": 1.957
26
  }
 
1
  {
2
+ "epoch": 9.975429975429975,
3
+ "eval_accuracy": 0.9500465205813469,
4
+ "eval_f1": 0.6984379136881121,
5
+ "eval_loss": 0.30729904770851135,
6
+ "eval_precision": 0.6764102564102564,
7
+ "eval_recall": 0.7219485495347564,
8
+ "eval_runtime": 6.0921,
9
  "eval_samples": 2519,
10
+ "eval_samples_per_second": 413.484,
11
+ "eval_steps_per_second": 51.706,
12
+ "predict_accuracy": 0.9466933985906772,
13
+ "predict_f1": 0.6951548848292296,
14
+ "predict_loss": 0.3347860872745514,
15
+ "predict_precision": 0.6863237139272271,
16
+ "predict_recall": 0.704216285806244,
17
+ "predict_runtime": 9.749,
18
+ "predict_samples_per_second": 415.118,
19
+ "predict_steps_per_second": 51.903,
20
+ "total_flos": 6404835399317064.0,
21
+ "train_loss": 0.04138289297302368,
22
+ "train_runtime": 1065.756,
23
+ "train_samples": 13013,
24
+ "train_samples_per_second": 122.101,
25
+ "train_steps_per_second": 1.905
26
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.9490359010555359,
4
- "eval_f1": 0.696671105193076,
5
- "eval_loss": 0.33743786811828613,
6
- "eval_precision": 0.6784232365145229,
7
- "eval_recall": 0.715927750410509,
8
- "eval_runtime": 5.8603,
9
  "eval_samples": 2519,
10
- "eval_samples_per_second": 429.84,
11
- "eval_steps_per_second": 53.751
12
  }
 
1
  {
2
+ "epoch": 9.975429975429975,
3
+ "eval_accuracy": 0.9500465205813469,
4
+ "eval_f1": 0.6984379136881121,
5
+ "eval_loss": 0.30729904770851135,
6
+ "eval_precision": 0.6764102564102564,
7
+ "eval_recall": 0.7219485495347564,
8
+ "eval_runtime": 6.0921,
9
  "eval_samples": 2519,
10
+ "eval_samples_per_second": 413.484,
11
+ "eval_steps_per_second": 51.706
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.9470472034672878,
3
- "predict_f1": 0.701497292131252,
4
- "predict_loss": 0.3655967116355896,
5
- "predict_precision": 0.6944181646168401,
6
- "predict_recall": 0.7087222401029932,
7
- "predict_runtime": 9.7752,
8
- "predict_samples_per_second": 414.006,
9
- "predict_steps_per_second": 51.764
10
  }
 
1
  {
2
+ "predict_accuracy": 0.9466933985906772,
3
+ "predict_f1": 0.6951548848292296,
4
+ "predict_loss": 0.3347860872745514,
5
+ "predict_precision": 0.6863237139272271,
6
+ "predict_recall": 0.704216285806244,
7
+ "predict_runtime": 9.749,
8
+ "predict_samples_per_second": 415.118,
9
+ "predict_steps_per_second": 51.903
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tb/events.out.tfevents.1725883955.0a1c9bec2a53.9893.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b842d84c89f0d88706e31e98b113fae6b45879220115930147db648f848a8c24
3
+ size 560
train.log CHANGED
@@ -888,3 +888,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
888
  {'eval_loss': 0.3079104423522949, 'eval_precision': 0.6712820512820513, 'eval_recall': 0.7164750957854407, 'eval_f1': 0.6931427058512046, 'eval_accuracy': 0.9500465205813469, 'eval_runtime': 5.9033, 'eval_samples_per_second': 426.708, 'eval_steps_per_second': 53.36, 'epoch': 9.98}
889
  {'train_runtime': 1065.756, 'train_samples_per_second': 122.101, 'train_steps_per_second': 1.905, 'train_loss': 0.04138289297302368, 'epoch': 9.98}
890
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
891
  0%| | 0/315 [00:00<?, ?it/s]
892
  3%|▎ | 8/315 [00:00<00:03, 79.87it/s]
893
  5%|▌ | 16/315 [00:00<00:03, 76.37it/s]
894
  8%|▊ | 24/315 [00:00<00:03, 76.98it/s]
895
  10%|█ | 32/315 [00:00<00:03, 73.14it/s]
896
  13%|█▎ | 41/315 [00:00<00:03, 77.06it/s]
897
  16%|█▌ | 49/315 [00:00<00:03, 75.87it/s]
898
  18%|█▊ | 57/315 [00:00<00:03, 75.22it/s]
899
  21%|██ | 65/315 [00:00<00:03, 72.75it/s]
900
  23%|██▎ | 73/315 [00:00<00:03, 74.58it/s]
901
  26%|██▌ | 81/315 [00:01<00:03, 70.30it/s]
902
  28%|██▊ | 89/315 [00:01<00:03, 67.40it/s]
903
  31%|███ | 97/315 [00:01<00:03, 67.21it/s]
904
  33%|███▎ | 105/315 [00:01<00:03, 68.92it/s]
905
  36%|███▌ | 113/315 [00:01<00:02, 71.06it/s]
906
  38%|███▊ | 121/315 [00:01<00:02, 69.27it/s]
907
  41%|████ | 129/315 [00:01<00:02, 70.12it/s]
908
  43%|████▎ | 137/315 [00:01<00:02, 69.43it/s]
909
  46%|████▌ | 144/315 [00:02<00:02, 69.31it/s]
910
  49%|████▊ | 153/315 [00:02<00:02, 73.00it/s]
911
  51%|█████ | 161/315 [00:02<00:02, 71.79it/s]
912
  54%|█████▎ | 169/315 [00:02<00:02, 71.32it/s]
913
  56%|█████▌ | 177/315 [00:02<00:01, 70.34it/s]
914
  59%|█████▊ | 185/315 [00:02<00:01, 68.51it/s]
915
  61%|██████ | 192/315 [00:02<00:01, 68.60it/s]
916
  63%|██████▎ | 199/315 [00:02<00:01, 65.97it/s]
917
  65%|██████▌ | 206/315 [00:02<00:01, 64.76it/s]
918
  68%|██████▊ | 214/315 [00:03<00:01, 68.25it/s]
919
  70%|███████ | 222/315 [00:03<00:01, 70.05it/s]
920
  73%|███████▎ | 230/315 [00:03<00:01, 72.77it/s]
921
  76%|███████▌ | 239/315 [00:03<00:01, 74.50it/s]
922
  78%|███████▊ | 247/315 [00:03<00:00, 70.67it/s]
923
  81%|████████ | 255/315 [00:03<00:00, 69.38it/s]
924
  83%|████████▎ | 263/315 [00:03<00:00, 70.77it/s]
925
  86%|████████▌ | 271/315 [00:03<00:00, 72.89it/s]
926
  89%|████████▉ | 280/315 [00:03<00:00, 75.53it/s]
927
  91%|█████████▏| 288/315 [00:04<00:00, 72.20it/s]
928
  94%|█████████▍| 296/315 [00:04<00:00, 70.67it/s]
929
  97%|█████████▋| 304/315 [00:04<00:00, 72.18it/s]
930
  99%|█████████▉| 312/315 [00:04<00:00, 72.30it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
931
  0%| | 0/506 [00:00<?, ?it/s]
932
  2%|▏ | 10/506 [00:00<00:05, 89.99it/s]
933
  4%|▍ | 19/506 [00:00<00:06, 78.47it/s]
934
  5%|▌ | 27/506 [00:00<00:06, 77.29it/s]
935
  7%|▋ | 35/506 [00:00<00:06, 76.19it/s]
936
  8%|▊ | 43/506 [00:00<00:06, 75.01it/s]
937
  10%|█ | 51/506 [00:00<00:06, 74.72it/s]
938
  12%|█▏ | 59/506 [00:00<00:06, 72.73it/s]
939
  13%|█▎ | 67/506 [00:00<00:05, 74.41it/s]
940
  15%|█▍ | 75/506 [00:00<00:05, 73.98it/s]
941
  16%|█▋ | 83/506 [00:01<00:06, 64.09it/s]
942
  18%|█▊ | 90/506 [00:01<00:06, 64.05it/s]
943
  19%|█▉ | 98/506 [00:01<00:06, 67.57it/s]
944
  21%|██ | 106/506 [00:01<00:05, 69.34it/s]
945
  23%|██▎ | 114/506 [00:01<00:05, 72.09it/s]
946
  24%|██▍ | 122/506 [00:01<00:05, 70.62it/s]
947
  26%|██▌ | 130/506 [00:01<00:06, 60.47it/s]
948
  27%|██▋ | 137/506 [00:02<00:06, 59.29it/s]
949
  29%|██▊ | 145/506 [00:02<00:05, 63.13it/s]
950
  30%|███ | 153/506 [00:02<00:05, 62.09it/s]
951
  32%|███▏ | 160/506 [00:02<00:05, 60.95it/s]
952
  33%|███▎ | 167/506 [00:02<00:05, 61.89it/s]
953
  34%|███▍ | 174/506 [00:02<00:05, 63.30it/s]
954
  36%|███▌ | 182/506 [00:02<00:04, 65.88it/s]
955
  38%|███▊ | 190/506 [00:02<00:04, 68.04it/s]
956
  39%|███▉ | 197/506 [00:02<00:04, 67.64it/s]
957
  41%|████ | 205/506 [00:03<00:04, 69.76it/s]
958
  42%|████▏ | 213/506 [00:03<00:04, 67.95it/s]
959
  43%|████▎ | 220/506 [00:03<00:04, 66.58it/s]
960
  45%|████▍ | 227/506 [00:03<00:04, 63.22it/s]
961
  46%|████▌ | 234/506 [00:03<00:04, 61.33it/s]
962
  48%|████▊ | 241/506 [00:03<00:04, 63.41it/s]
963
  49%|████▉ | 249/506 [00:03<00:03, 66.94it/s]
964
  51%|█████ | 256/506 [00:03<00:03, 67.40it/s]
965
  52%|█████▏ | 264/506 [00:03<00:03, 70.60it/s]
966
  54%|█████▍ | 272/506 [00:04<00:03, 72.82it/s]
967
  55%|█████▌ | 280/506 [00:04<00:03, 72.01it/s]
968
  57%|█████▋ | 288/506 [00:04<00:03, 70.69it/s]
969
  58%|█████▊ | 296/506 [00:04<00:02, 70.79it/s]
970
  60%|██████ | 304/506 [00:04<00:02, 71.70it/s]
971
  62%|██████▏ | 312/506 [00:04<00:02, 71.70it/s]
972
  63%|██████▎ | 320/506 [00:04<00:02, 73.71it/s]
973
  65%|██████▌ | 329/506 [00:04<00:02, 76.51it/s]
974
  67%|██████▋ | 337/506 [00:04<00:02, 76.82it/s]
975
  68%|██████▊ | 345/506 [00:04<00:02, 77.52it/s]
976
  70%|██████▉ | 353/506 [00:05<00:01, 77.18it/s]
977
  71%|███████▏ | 361/506 [00:05<00:01, 76.54it/s]
978
  73%|███████▎ | 369/506 [00:05<00:01, 71.00it/s]
979
  75%|███████▍ | 377/506 [00:05<00:01, 66.99it/s]
980
  76%|███████▌ | 384/506 [00:05<00:01, 64.26it/s]
981
  77%|███████▋ | 391/506 [00:05<00:01, 60.26it/s]
982
  79%|███████▊ | 398/506 [00:05<00:01, 58.75it/s]
983
  80%|████████ | 405/506 [00:05<00:01, 59.98it/s]
984
  81%|████████▏ | 412/506 [00:06<00:01, 61.53it/s]
985
  83%|████████▎ | 419/506 [00:06<00:01, 63.24it/s]
986
  84%|████████▍ | 426/506 [00:06<00:01, 63.10it/s]
987
  86%|████████▌ | 434/506 [00:06<00:01, 66.36it/s]
988
  87%|████████▋ | 441/506 [00:06<00:00, 66.96it/s]
989
  89%|████████▊ | 448/506 [00:06<00:00, 66.98it/s]
990
  90%|█████████ | 456/506 [00:06<00:00, 69.30it/s]
991
  92%|█████████▏| 465/506 [00:06<00:00, 71.88it/s]
992
  93%|█████████▎| 473/506 [00:06<00:00, 73.65it/s]
993
  95%|█████████▌| 481/506 [00:07<00:00, 74.93it/s]
994
  97%|█████████▋| 489/506 [00:07<00:00, 70.10it/s]
995
  98%|█████████▊| 497/506 [00:07<00:00, 71.43it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
888
  {'eval_loss': 0.3079104423522949, 'eval_precision': 0.6712820512820513, 'eval_recall': 0.7164750957854407, 'eval_f1': 0.6931427058512046, 'eval_accuracy': 0.9500465205813469, 'eval_runtime': 5.9033, 'eval_samples_per_second': 426.708, 'eval_steps_per_second': 53.36, 'epoch': 9.98}
889
  {'train_runtime': 1065.756, 'train_samples_per_second': 122.101, 'train_steps_per_second': 1.905, 'train_loss': 0.04138289297302368, 'epoch': 9.98}
890
 
891
+ ***** train metrics *****
892
+ epoch = 9.9754
893
+ total_flos = 5964967GF
894
+ train_loss = 0.0414
895
+ train_runtime = 0:17:45.75
896
+ train_samples = 13013
897
+ train_samples_per_second = 122.101
898
+ train_steps_per_second = 1.905
899
+ 09/09/2024 12:12:29 - INFO - __main__ - *** Evaluate ***
900
+ [INFO|trainer.py:811] 2024-09-09 12:12:29,073 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
901
+ [INFO|trainer.py:3819] 2024-09-09 12:12:29,076 >>
902
+ ***** Running Evaluation *****
903
+ [INFO|trainer.py:3821] 2024-09-09 12:12:29,076 >> Num examples = 2519
904
+ [INFO|trainer.py:3824] 2024-09-09 12:12:29,076 >> Batch size = 8
905
+
906
  0%| | 0/315 [00:00<?, ?it/s]
907
  3%|▎ | 8/315 [00:00<00:03, 79.87it/s]
908
  5%|▌ | 16/315 [00:00<00:03, 76.37it/s]
909
  8%|▊ | 24/315 [00:00<00:03, 76.98it/s]
910
  10%|█ | 32/315 [00:00<00:03, 73.14it/s]
911
  13%|█▎ | 41/315 [00:00<00:03, 77.06it/s]
912
  16%|█▌ | 49/315 [00:00<00:03, 75.87it/s]
913
  18%|█▊ | 57/315 [00:00<00:03, 75.22it/s]
914
  21%|██ | 65/315 [00:00<00:03, 72.75it/s]
915
  23%|██▎ | 73/315 [00:00<00:03, 74.58it/s]
916
  26%|██▌ | 81/315 [00:01<00:03, 70.30it/s]
917
  28%|██▊ | 89/315 [00:01<00:03, 67.40it/s]
918
  31%|███ | 97/315 [00:01<00:03, 67.21it/s]
919
  33%|███▎ | 105/315 [00:01<00:03, 68.92it/s]
920
  36%|███▌ | 113/315 [00:01<00:02, 71.06it/s]
921
  38%|███▊ | 121/315 [00:01<00:02, 69.27it/s]
922
  41%|████ | 129/315 [00:01<00:02, 70.12it/s]
923
  43%|████▎ | 137/315 [00:01<00:02, 69.43it/s]
924
  46%|████▌ | 144/315 [00:02<00:02, 69.31it/s]
925
  49%|████▊ | 153/315 [00:02<00:02, 73.00it/s]
926
  51%|█████ | 161/315 [00:02<00:02, 71.79it/s]
927
  54%|█████▎ | 169/315 [00:02<00:02, 71.32it/s]
928
  56%|█████▌ | 177/315 [00:02<00:01, 70.34it/s]
929
  59%|█████▊ | 185/315 [00:02<00:01, 68.51it/s]
930
  61%|██████ | 192/315 [00:02<00:01, 68.60it/s]
931
  63%|██████▎ | 199/315 [00:02<00:01, 65.97it/s]
932
  65%|██████▌ | 206/315 [00:02<00:01, 64.76it/s]
933
  68%|██████▊ | 214/315 [00:03<00:01, 68.25it/s]
934
  70%|███████ | 222/315 [00:03<00:01, 70.05it/s]
935
  73%|███████▎ | 230/315 [00:03<00:01, 72.77it/s]
936
  76%|███████▌ | 239/315 [00:03<00:01, 74.50it/s]
937
  78%|███████▊ | 247/315 [00:03<00:00, 70.67it/s]
938
  81%|████████ | 255/315 [00:03<00:00, 69.38it/s]
939
  83%|████████▎ | 263/315 [00:03<00:00, 70.77it/s]
940
  86%|████████▌ | 271/315 [00:03<00:00, 72.89it/s]
941
  89%|████████▉ | 280/315 [00:03<00:00, 75.53it/s]
942
  91%|█████████▏| 288/315 [00:04<00:00, 72.20it/s]
943
  94%|█████████▍| 296/315 [00:04<00:00, 70.67it/s]
944
  97%|█████████▋| 304/315 [00:04<00:00, 72.18it/s]
945
  99%|█████████▉| 312/315 [00:04<00:00, 72.30it/s]
946
+ ***** eval metrics *****
947
+ epoch = 9.9754
948
+ eval_accuracy = 0.95
949
+ eval_f1 = 0.6984
950
+ eval_loss = 0.3073
951
+ eval_precision = 0.6764
952
+ eval_recall = 0.7219
953
+ eval_runtime = 0:00:06.09
954
+ eval_samples = 2519
955
+ eval_samples_per_second = 413.484
956
+ eval_steps_per_second = 51.706
957
+ 09/09/2024 12:12:35 - INFO - __main__ - *** Predict ***
958
+ [INFO|trainer.py:811] 2024-09-09 12:12:35,170 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
959
+ [INFO|trainer.py:3819] 2024-09-09 12:12:35,172 >>
960
+ ***** Running Prediction *****
961
+ [INFO|trainer.py:3821] 2024-09-09 12:12:35,172 >> Num examples = 4047
962
+ [INFO|trainer.py:3824] 2024-09-09 12:12:35,172 >> Batch size = 8
963
+
964
  0%| | 0/506 [00:00<?, ?it/s]
965
  2%|▏ | 10/506 [00:00<00:05, 89.99it/s]
966
  4%|▍ | 19/506 [00:00<00:06, 78.47it/s]
967
  5%|▌ | 27/506 [00:00<00:06, 77.29it/s]
968
  7%|▋ | 35/506 [00:00<00:06, 76.19it/s]
969
  8%|▊ | 43/506 [00:00<00:06, 75.01it/s]
970
  10%|█ | 51/506 [00:00<00:06, 74.72it/s]
971
  12%|█▏ | 59/506 [00:00<00:06, 72.73it/s]
972
  13%|█▎ | 67/506 [00:00<00:05, 74.41it/s]
973
  15%|█▍ | 75/506 [00:00<00:05, 73.98it/s]
974
  16%|█▋ | 83/506 [00:01<00:06, 64.09it/s]
975
  18%|█▊ | 90/506 [00:01<00:06, 64.05it/s]
976
  19%|█▉ | 98/506 [00:01<00:06, 67.57it/s]
977
  21%|██ | 106/506 [00:01<00:05, 69.34it/s]
978
  23%|██▎ | 114/506 [00:01<00:05, 72.09it/s]
979
  24%|██▍ | 122/506 [00:01<00:05, 70.62it/s]
980
  26%|██▌ | 130/506 [00:01<00:06, 60.47it/s]
981
  27%|██▋ | 137/506 [00:02<00:06, 59.29it/s]
982
  29%|██▊ | 145/506 [00:02<00:05, 63.13it/s]
983
  30%|███ | 153/506 [00:02<00:05, 62.09it/s]
984
  32%|███▏ | 160/506 [00:02<00:05, 60.95it/s]
985
  33%|███▎ | 167/506 [00:02<00:05, 61.89it/s]
986
  34%|███▍ | 174/506 [00:02<00:05, 63.30it/s]
987
  36%|███▌ | 182/506 [00:02<00:04, 65.88it/s]
988
  38%|███▊ | 190/506 [00:02<00:04, 68.04it/s]
989
  39%|███▉ | 197/506 [00:02<00:04, 67.64it/s]
990
  41%|████ | 205/506 [00:03<00:04, 69.76it/s]
991
  42%|████▏ | 213/506 [00:03<00:04, 67.95it/s]
992
  43%|████▎ | 220/506 [00:03<00:04, 66.58it/s]
993
  45%|████▍ | 227/506 [00:03<00:04, 63.22it/s]
994
  46%|████▌ | 234/506 [00:03<00:04, 61.33it/s]
995
  48%|████▊ | 241/506 [00:03<00:04, 63.41it/s]
996
  49%|████▉ | 249/506 [00:03<00:03, 66.94it/s]
997
  51%|█████ | 256/506 [00:03<00:03, 67.40it/s]
998
  52%|█████▏ | 264/506 [00:03<00:03, 70.60it/s]
999
  54%|█████▍ | 272/506 [00:04<00:03, 72.82it/s]
1000
  55%|█████▌ | 280/506 [00:04<00:03, 72.01it/s]
1001
  57%|█████▋ | 288/506 [00:04<00:03, 70.69it/s]
1002
  58%|█████▊ | 296/506 [00:04<00:02, 70.79it/s]
1003
  60%|██████ | 304/506 [00:04<00:02, 71.70it/s]
1004
  62%|██████▏ | 312/506 [00:04<00:02, 71.70it/s]
1005
  63%|██████▎ | 320/506 [00:04<00:02, 73.71it/s]
1006
  65%|██████▌ | 329/506 [00:04<00:02, 76.51it/s]
1007
  67%|██████▋ | 337/506 [00:04<00:02, 76.82it/s]
1008
  68%|██████▊ | 345/506 [00:04<00:02, 77.52it/s]
1009
  70%|██████▉ | 353/506 [00:05<00:01, 77.18it/s]
1010
  71%|███████▏ | 361/506 [00:05<00:01, 76.54it/s]
1011
  73%|███████▎ | 369/506 [00:05<00:01, 71.00it/s]
1012
  75%|███████▍ | 377/506 [00:05<00:01, 66.99it/s]
1013
  76%|███████▌ | 384/506 [00:05<00:01, 64.26it/s]
1014
  77%|███████▋ | 391/506 [00:05<00:01, 60.26it/s]
1015
  79%|███████▊ | 398/506 [00:05<00:01, 58.75it/s]
1016
  80%|████████ | 405/506 [00:05<00:01, 59.98it/s]
1017
  81%|████████▏ | 412/506 [00:06<00:01, 61.53it/s]
1018
  83%|████████▎ | 419/506 [00:06<00:01, 63.24it/s]
1019
  84%|████████▍ | 426/506 [00:06<00:01, 63.10it/s]
1020
  86%|████████▌ | 434/506 [00:06<00:01, 66.36it/s]
1021
  87%|████████▋ | 441/506 [00:06<00:00, 66.96it/s]
1022
  89%|████████▊ | 448/506 [00:06<00:00, 66.98it/s]
1023
  90%|█████████ | 456/506 [00:06<00:00, 69.30it/s]
1024
  92%|█████████▏| 465/506 [00:06<00:00, 71.88it/s]
1025
  93%|█████████▎| 473/506 [00:06<00:00, 73.65it/s]
1026
  95%|█████████▌| 481/506 [00:07<00:00, 74.93it/s]
1027
  97%|█████████▋| 489/506 [00:07<00:00, 70.10it/s]
1028
  98%|█████████▊| 497/506 [00:07<00:00, 71.43it/s]
1029
+ [INFO|trainer.py:3503] 2024-09-09 12:12:45,082 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
1030
+ [INFO|configuration_utils.py:472] 2024-09-09 12:12:45,084 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
1031
+ [INFO|modeling_utils.py:2799] 2024-09-09 12:12:46,408 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1032
+ [INFO|tokenization_utils_base.py:2684] 2024-09-09 12:12:46,409 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1033
+ [INFO|tokenization_utils_base.py:2693] 2024-09-09 12:12:46,410 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
1034
+ ***** predict metrics *****
1035
+ predict_accuracy = 0.9467
1036
+ predict_f1 = 0.6952
1037
+ predict_loss = 0.3348
1038
+ predict_precision = 0.6863
1039
+ predict_recall = 0.7042
1040
+ predict_runtime = 0:00:09.74
1041
+ predict_samples_per_second = 415.118
1042
+ predict_steps_per_second = 51.903
1043
+
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 10.0,
3
- "total_flos": 8092971627384348.0,
4
- "train_loss": 0.033683168437591816,
5
- "train_runtime": 1318.6544,
6
- "train_samples": 16483,
7
- "train_samples_per_second": 124.999,
8
- "train_steps_per_second": 1.957
9
  }
 
1
  {
2
+ "epoch": 9.975429975429975,
3
+ "total_flos": 6404835399317064.0,
4
+ "train_loss": 0.04138289297302368,
5
+ "train_runtime": 1065.756,
6
+ "train_samples": 13013,
7
+ "train_samples_per_second": 122.101,
8
+ "train_steps_per_second": 1.905
9
  }
trainer_state.json CHANGED
@@ -1,180 +1,173 @@
1
  {
2
- "best_metric": 0.696671105193076,
3
- "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2322",
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 2580,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.0,
13
- "eval_accuracy": 0.9488273605184638,
14
- "eval_f1": 0.6400434664493344,
15
- "eval_loss": 0.15167976915836334,
16
- "eval_precision": 0.6353829557713053,
17
- "eval_recall": 0.6447728516694033,
18
- "eval_runtime": 5.9858,
19
- "eval_samples_per_second": 420.828,
20
- "eval_steps_per_second": 52.624,
21
- "step": 258
22
- },
23
- {
24
- "epoch": 1.937984496124031,
25
- "grad_norm": 0.791233479976654,
26
- "learning_rate": 4.0310077519379843e-05,
27
- "loss": 0.1357,
28
- "step": 500
29
  },
30
  {
31
  "epoch": 2.0,
32
- "eval_accuracy": 0.945955917738779,
33
- "eval_f1": 0.6695763799743261,
34
- "eval_loss": 0.20252634584903717,
35
- "eval_precision": 0.6305609284332688,
36
- "eval_recall": 0.7137383689107827,
37
- "eval_runtime": 5.9083,
38
- "eval_samples_per_second": 426.351,
39
- "eval_steps_per_second": 53.315,
40
- "step": 516
41
  },
42
  {
43
- "epoch": 3.0,
44
- "eval_accuracy": 0.9495652731881036,
45
- "eval_f1": 0.6838606753522999,
46
- "eval_loss": 0.22940471768379211,
47
- "eval_precision": 0.6649431230610134,
48
- "eval_recall": 0.7038861521620142,
49
- "eval_runtime": 5.8946,
50
- "eval_samples_per_second": 427.339,
51
- "eval_steps_per_second": 53.439,
52
- "step": 774
53
  },
54
  {
55
- "epoch": 3.875968992248062,
56
- "grad_norm": 0.907940685749054,
57
- "learning_rate": 3.062015503875969e-05,
58
- "loss": 0.0238,
59
- "step": 1000
 
 
 
 
 
60
  },
61
  {
62
  "epoch": 4.0,
63
- "eval_accuracy": 0.9491963168532838,
64
- "eval_f1": 0.6872504657971785,
65
- "eval_loss": 0.28175726532936096,
66
- "eval_precision": 0.6689119170984456,
67
- "eval_recall": 0.7066228790366721,
68
- "eval_runtime": 5.8968,
69
- "eval_samples_per_second": 427.179,
70
- "eval_steps_per_second": 53.419,
71
- "step": 1032
72
  },
73
  {
74
- "epoch": 5.0,
75
- "eval_accuracy": 0.9487311110398152,
76
- "eval_f1": 0.6773768764814327,
77
- "eval_loss": 0.27620697021484375,
78
- "eval_precision": 0.6527918781725889,
79
- "eval_recall": 0.7038861521620142,
80
- "eval_runtime": 5.9102,
81
- "eval_samples_per_second": 426.215,
82
- "eval_steps_per_second": 53.298,
83
- "step": 1290
84
  },
85
  {
86
- "epoch": 5.813953488372093,
87
- "grad_norm": 0.2446424663066864,
88
- "learning_rate": 2.0930232558139536e-05,
89
- "loss": 0.0081,
90
- "step": 1500
 
 
 
 
 
91
  },
92
  {
93
  "epoch": 6.0,
94
- "eval_accuracy": 0.9483942378645449,
95
- "eval_f1": 0.6922672277748553,
96
- "eval_loss": 0.2938424348831177,
97
- "eval_precision": 0.6663291139240506,
98
- "eval_recall": 0.7203065134099617,
99
- "eval_runtime": 5.9062,
100
- "eval_samples_per_second": 426.502,
101
- "eval_steps_per_second": 53.334,
102
- "step": 1548
103
  },
104
  {
105
- "epoch": 7.0,
106
- "eval_accuracy": 0.9499181879431486,
107
- "eval_f1": 0.6893020749124225,
108
- "eval_loss": 0.3144644498825073,
109
- "eval_precision": 0.6788747346072187,
110
- "eval_recall": 0.7000547345374931,
111
- "eval_runtime": 5.9264,
112
- "eval_samples_per_second": 425.045,
113
- "eval_steps_per_second": 53.152,
114
- "step": 1806
115
  },
116
  {
117
- "epoch": 7.751937984496124,
118
- "grad_norm": 1.7770023345947266,
119
- "learning_rate": 1.1240310077519382e-05,
120
- "loss": 0.0039,
121
- "step": 2000
122
  },
123
  {
124
  "epoch": 8.0,
125
- "eval_accuracy": 0.949100067374635,
126
- "eval_f1": 0.6865512649800267,
127
- "eval_loss": 0.32666969299316406,
128
- "eval_precision": 0.6685684647302904,
129
- "eval_recall": 0.705528188286809,
130
- "eval_runtime": 5.9058,
131
- "eval_samples_per_second": 426.528,
132
- "eval_steps_per_second": 53.337,
133
- "step": 2064
134
  },
135
  {
136
- "epoch": 9.0,
137
- "eval_accuracy": 0.9490359010555359,
138
- "eval_f1": 0.696671105193076,
139
- "eval_loss": 0.33743786811828613,
140
- "eval_precision": 0.6784232365145229,
141
- "eval_recall": 0.715927750410509,
142
- "eval_runtime": 5.9091,
143
- "eval_samples_per_second": 426.292,
144
- "eval_steps_per_second": 53.308,
145
- "step": 2322
146
  },
147
  {
148
- "epoch": 9.689922480620154,
149
- "grad_norm": 0.005967797711491585,
150
- "learning_rate": 1.550387596899225e-06,
151
- "loss": 0.0021,
152
- "step": 2500
153
  },
154
  {
155
- "epoch": 10.0,
156
- "eval_accuracy": 0.9495011068690045,
157
- "eval_f1": 0.6949744692287019,
158
- "eval_loss": 0.33997026085853577,
159
- "eval_precision": 0.6826821541710665,
160
- "eval_recall": 0.7077175697865353,
161
- "eval_runtime": 6.2732,
162
- "eval_samples_per_second": 401.551,
163
- "eval_steps_per_second": 50.214,
164
- "step": 2580
165
  },
166
  {
167
- "epoch": 10.0,
168
- "step": 2580,
169
- "total_flos": 8092971627384348.0,
170
- "train_loss": 0.033683168437591816,
171
- "train_runtime": 1318.6544,
172
- "train_samples_per_second": 124.999,
173
- "train_steps_per_second": 1.957
174
  }
175
  ],
176
  "logging_steps": 500,
177
- "max_steps": 2580,
178
  "num_input_tokens_seen": 0,
179
  "num_train_epochs": 10,
180
  "save_steps": 500,
@@ -190,7 +183,7 @@
190
  "attributes": {}
191
  }
192
  },
193
- "total_flos": 8092971627384348.0,
194
  "train_batch_size": 32,
195
  "trial_name": null,
196
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6984379136881121,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1831",
4
+ "epoch": 9.975429975429975,
5
  "eval_steps": 500,
6
+ "global_step": 2030,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.9975429975429976,
13
+ "eval_accuracy": 0.9467740383072925,
14
+ "eval_f1": 0.6143236074270556,
15
+ "eval_loss": 0.15010379254817963,
16
+ "eval_precision": 0.5959855892949047,
17
+ "eval_recall": 0.6338259441707718,
18
+ "eval_runtime": 5.907,
19
+ "eval_samples_per_second": 426.445,
20
+ "eval_steps_per_second": 53.327,
21
+ "step": 203
 
 
 
 
 
 
 
22
  },
23
  {
24
  "epoch": 2.0,
25
+ "eval_accuracy": 0.949244441592608,
26
+ "eval_f1": 0.6728575218890952,
27
+ "eval_loss": 0.17612887918949127,
28
+ "eval_precision": 0.6529351184346035,
29
+ "eval_recall": 0.6940339354132458,
30
+ "eval_runtime": 5.8933,
31
+ "eval_samples_per_second": 427.436,
32
+ "eval_steps_per_second": 53.451,
33
+ "step": 407
34
  },
35
  {
36
+ "epoch": 2.457002457002457,
37
+ "grad_norm": 0.6181371212005615,
38
+ "learning_rate": 3.768472906403941e-05,
39
+ "loss": 0.1312,
40
+ "step": 500
 
 
 
 
 
41
  },
42
  {
43
+ "epoch": 2.9975429975429977,
44
+ "eval_accuracy": 0.9469665372645898,
45
+ "eval_f1": 0.671967171069505,
46
+ "eval_loss": 0.1995203047990799,
47
+ "eval_precision": 0.6322393822393823,
48
+ "eval_recall": 0.7170224411603722,
49
+ "eval_runtime": 5.8448,
50
+ "eval_samples_per_second": 430.983,
51
+ "eval_steps_per_second": 53.894,
52
+ "step": 610
53
  },
54
  {
55
  "epoch": 4.0,
56
+ "eval_accuracy": 0.9482979883858963,
57
+ "eval_f1": 0.6774025974025973,
58
+ "eval_loss": 0.21822449564933777,
59
+ "eval_precision": 0.6445872466633712,
60
+ "eval_recall": 0.7137383689107827,
61
+ "eval_runtime": 5.872,
62
+ "eval_samples_per_second": 428.988,
63
+ "eval_steps_per_second": 53.645,
64
+ "step": 814
65
  },
66
  {
67
+ "epoch": 4.914004914004914,
68
+ "grad_norm": 0.7616795301437378,
69
+ "learning_rate": 2.5369458128078822e-05,
70
+ "loss": 0.0248,
71
+ "step": 1000
 
 
 
 
 
72
  },
73
  {
74
+ "epoch": 4.997542997542998,
75
+ "eval_accuracy": 0.9448650903140942,
76
+ "eval_f1": 0.6700533401066802,
77
+ "eval_loss": 0.24612903594970703,
78
+ "eval_precision": 0.6251184834123222,
79
+ "eval_recall": 0.7219485495347564,
80
+ "eval_runtime": 5.8462,
81
+ "eval_samples_per_second": 430.877,
82
+ "eval_steps_per_second": 53.881,
83
+ "step": 1017
84
  },
85
  {
86
  "epoch": 6.0,
87
+ "eval_accuracy": 0.9469023709454907,
88
+ "eval_f1": 0.6827021494370521,
89
+ "eval_loss": 0.26953065395355225,
90
+ "eval_precision": 0.6410379625180201,
91
+ "eval_recall": 0.7301587301587301,
92
+ "eval_runtime": 5.9067,
93
+ "eval_samples_per_second": 426.468,
94
+ "eval_steps_per_second": 53.33,
95
+ "step": 1221
96
  },
97
  {
98
+ "epoch": 6.997542997542998,
99
+ "eval_accuracy": 0.9469986204241394,
100
+ "eval_f1": 0.6910590054109765,
101
+ "eval_loss": 0.2829184830188751,
102
+ "eval_precision": 0.6528724440116845,
103
+ "eval_recall": 0.7339901477832512,
104
+ "eval_runtime": 5.8572,
105
+ "eval_samples_per_second": 430.069,
106
+ "eval_steps_per_second": 53.78,
107
+ "step": 1424
108
  },
109
  {
110
+ "epoch": 7.371007371007371,
111
+ "grad_norm": 0.2855200171470642,
112
+ "learning_rate": 1.3054187192118228e-05,
113
+ "loss": 0.0081,
114
+ "step": 1500
115
  },
116
  {
117
  "epoch": 8.0,
118
+ "eval_accuracy": 0.9494048573903558,
119
+ "eval_f1": 0.6938127974616606,
120
+ "eval_loss": 0.29823970794677734,
121
+ "eval_precision": 0.6710997442455243,
122
+ "eval_recall": 0.7181171319102354,
123
+ "eval_runtime": 5.8929,
124
+ "eval_samples_per_second": 427.463,
125
+ "eval_steps_per_second": 53.454,
126
+ "step": 1628
127
  },
128
  {
129
+ "epoch": 8.997542997542997,
130
+ "eval_accuracy": 0.9500465205813469,
131
+ "eval_f1": 0.6984379136881121,
132
+ "eval_loss": 0.30729904770851135,
133
+ "eval_precision": 0.6764102564102564,
134
+ "eval_recall": 0.7219485495347564,
135
+ "eval_runtime": 5.8665,
136
+ "eval_samples_per_second": 429.386,
137
+ "eval_steps_per_second": 53.695,
138
+ "step": 1831
139
  },
140
  {
141
+ "epoch": 9.828009828009828,
142
+ "grad_norm": 0.6682894825935364,
143
+ "learning_rate": 7.389162561576355e-07,
144
+ "loss": 0.0038,
145
+ "step": 2000
146
  },
147
  {
148
+ "epoch": 9.975429975429975,
149
+ "eval_accuracy": 0.9500465205813469,
150
+ "eval_f1": 0.6931427058512046,
151
+ "eval_loss": 0.3079104423522949,
152
+ "eval_precision": 0.6712820512820513,
153
+ "eval_recall": 0.7164750957854407,
154
+ "eval_runtime": 5.9033,
155
+ "eval_samples_per_second": 426.708,
156
+ "eval_steps_per_second": 53.36,
157
+ "step": 2030
158
  },
159
  {
160
+ "epoch": 9.975429975429975,
161
+ "step": 2030,
162
+ "total_flos": 6404835399317064.0,
163
+ "train_loss": 0.04138289297302368,
164
+ "train_runtime": 1065.756,
165
+ "train_samples_per_second": 122.101,
166
+ "train_steps_per_second": 1.905
167
  }
168
  ],
169
  "logging_steps": 500,
170
+ "max_steps": 2030,
171
  "num_input_tokens_seen": 0,
172
  "num_train_epochs": 10,
173
  "save_steps": 500,
 
183
  "attributes": {}
184
  }
185
  },
186
+ "total_flos": 6404835399317064.0,
187
  "train_batch_size": 32,
188
  "trial_name": null,
189
  "trial_params": null