Rodrigo1771 commited on Sep 9

Commit

09f50a0

•

1 Parent(s): 86da576

Training in progress, epoch 0

Browse files

Files changed (19) hide show

README.md +103 -0
all_results.json +26 -0
config.json +39 -0
eval_results.json +12 -0
merges.txt +0 -0
model.safetensors +3 -0
predict_results.json +10 -0
predictions.txt +0 -0
special_tokens_map.json +51 -0
tb/events.out.tfevents.1725881335.0a1c9bec2a53.3232.0 +3 -0
tb/events.out.tfevents.1725882696.0a1c9bec2a53.3232.1 +3 -0
tb/events.out.tfevents.1725882852.0a1c9bec2a53.9893.0 +3 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
train.log +357 -0
train_results.json +9 -0
trainer_state.json +197 -0
training_args.bin +3 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,103 @@

+---
+library_name: transformers
+license: apache-2.0
+base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
+tags:
+- token-classification
+- generated_from_trainer
+datasets:
+- Rodrigo1771/symptemist-fasttext-75-ner
+metrics:
+- precision
+- recall
+- f1
+- accuracy
+model-index:
+- name: output
+  results:
+  - task:
+      name: Token Classification
+      type: token-classification
+    dataset:
+      name: Rodrigo1771/symptemist-fasttext-75-ner
+      type: Rodrigo1771/symptemist-fasttext-75-ner
+      config: SympTEMIST NER
+      split: validation
+      args: SympTEMIST NER
+    metrics:
+    - name: Precision
+      type: precision
+      value: 0.6784232365145229
+    - name: Recall
+      type: recall
+      value: 0.715927750410509
+    - name: F1
+      type: f1
+      value: 0.696671105193076
+    - name: Accuracy
+      type: accuracy
+      value: 0.9490359010555359
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# output
+This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/symptemist-fasttext-75-ner dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.3374
+- Precision: 0.6784
+- Recall: 0.7159
+- F1: 0.6967
+- Accuracy: 0.9490
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 32
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 64
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 10.0
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1     | Accuracy |
+|:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
+| No log        | 1.0   | 258  | 0.1517          | 0.6354    | 0.6448 | 0.6400 | 0.9488   |
+| 0.1357        | 2.0   | 516  | 0.2025          | 0.6306    | 0.7137 | 0.6696 | 0.9460   |
+| 0.1357        | 3.0   | 774  | 0.2294          | 0.6649    | 0.7039 | 0.6839 | 0.9496   |
+| 0.0238        | 4.0   | 1032 | 0.2818          | 0.6689    | 0.7066 | 0.6873 | 0.9492   |
+| 0.0238        | 5.0   | 1290 | 0.2762          | 0.6528    | 0.7039 | 0.6774 | 0.9487   |
+| 0.0081        | 6.0   | 1548 | 0.2938          | 0.6663    | 0.7203 | 0.6923 | 0.9484   |
+| 0.0081        | 7.0   | 1806 | 0.3145          | 0.6789    | 0.7001 | 0.6893 | 0.9499   |
+| 0.0039        | 8.0   | 2064 | 0.3267          | 0.6686    | 0.7055 | 0.6866 | 0.9491   |
+| 0.0039        | 9.0   | 2322 | 0.3374          | 0.6784    | 0.7159 | 0.6967 | 0.9490   |
+| 0.0021        | 10.0  | 2580 | 0.3400          | 0.6827    | 0.7077 | 0.6950 | 0.9495   |
+### Framework versions
+- Transformers 4.44.2
+- Pytorch 2.4.0+cu121
+- Datasets 2.21.0
+- Tokenizers 0.19.1

all_results.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+    "epoch": 10.0,
+    "eval_accuracy": 0.9490359010555359,
+    "eval_f1": 0.696671105193076,
+    "eval_loss": 0.33743786811828613,
+    "eval_precision": 0.6784232365145229,
+    "eval_recall": 0.715927750410509,
+    "eval_runtime": 5.8603,
+    "eval_samples": 2519,
+    "eval_samples_per_second": 429.84,
+    "eval_steps_per_second": 53.751,
+    "predict_accuracy": 0.9470472034672878,
+    "predict_f1": 0.701497292131252,
+    "predict_loss": 0.3655967116355896,
+    "predict_precision": 0.6944181646168401,
+    "predict_recall": 0.7087222401029932,
+    "predict_runtime": 9.7752,
+    "predict_samples_per_second": 414.006,
+    "predict_steps_per_second": 51.764,
+    "total_flos": 8092971627384348.0,
+    "train_loss": 0.033683168437591816,
+    "train_runtime": 1318.6544,
+    "train_samples": 16483,
+    "train_samples_per_second": 124.999,
+    "train_steps_per_second": 1.957
+}

config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
+  "architectures": [
+    "RobertaForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "finetuning_task": "ner",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "O",
+    "1": "B-SINTOMA",
+    "2": "I-SINTOMA"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "B-SINTOMA": 1,
+    "I-SINTOMA": 2,
+    "O": 0
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50262
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 10.0,
+    "eval_accuracy": 0.9490359010555359,
+    "eval_f1": 0.696671105193076,
+    "eval_loss": 0.33743786811828613,
+    "eval_precision": 0.6784232365145229,
+    "eval_recall": 0.715927750410509,
+    "eval_runtime": 5.8603,
+    "eval_samples": 2519,
+    "eval_samples_per_second": 429.84,
+    "eval_steps_per_second": 53.751
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bdf40d5f4f591a504434aa91dd3ebdc497806dee69c3e5919a3465e2334b9184
+size 496244100

predict_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "predict_accuracy": 0.9470472034672878,
+    "predict_f1": 0.701497292131252,
+    "predict_loss": 0.3655967116355896,
+    "predict_precision": 0.6944181646168401,
+    "predict_recall": 0.7087222401029932,
+    "predict_runtime": 9.7752,
+    "predict_samples_per_second": 414.006,
+    "predict_steps_per_second": 51.764
+}

predictions.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tb/events.out.tfevents.1725881335.0a1c9bec2a53.3232.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:593e737686a00ae0f64a94f2ef02389ad7dff30c0ba6a6f2b1c65ac31e873867
+size 11302

tb/events.out.tfevents.1725882696.0a1c9bec2a53.3232.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05ecdc6d00855fb66deb25a7b5be160aa0ebb2ebe07a43beb7d88fb0430fb141
+size 560

tb/events.out.tfevents.1725882852.0a1c9bec2a53.9893.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:96c1600632af625402c889456fb9a8dd268472a67db05041ef467ce081d18572
+size 5645

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50261": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "max_len": 512,
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

train.log ADDED Viewed

@@ -0,0 +1,357 @@
  0%|          | 0/2030 [00:00<?, ?it/s]
  0%|          | 1/2030 [00:01<43:23,  1.28s/it]
  0%|          | 2/2030 [00:01<27:30,  1.23it/s]
  0%|          | 3/2030 [00:02<22:20,  1.51it/s]
  0%|          | 4/2030 [00:02<20:58,  1.61it/s]
  0%|          | 5/2030 [00:03<18:15,  1.85it/s]
  0%|          | 6/2030 [00:03<16:47,  2.01it/s]
  0%|          | 7/2030 [00:04<16:12,  2.08it/s]
  0%|          | 8/2030 [00:04<16:25,  2.05it/s]
  0%|          | 9/2030 [00:05<15:51,  2.12it/s]
  0%|          | 10/2030 [00:05<16:55,  1.99it/s]
  1%|          | 11/2030 [00:05<15:19,  2.20it/s]
  1%|          | 12/2030 [00:06<17:18,  1.94it/s]
  1%|          | 13/2030 [00:07<16:43,  2.01it/s]
  1%|          | 14/2030 [00:07<18:38,  1.80it/s]
  1%|          | 15/2030 [00:08<16:34,  2.03it/s]
  1%|          | 16/2030 [00:08<17:35,  1.91it/s]
  1%|          | 17/2030 [00:09<16:44,  2.00it/s]
  1%|          | 18/2030 [00:09<18:13,  1.84it/s]
  1%|          | 19/2030 [00:10<23:35,  1.42it/s]
  1%|          | 20/2030 [00:11<22:07,  1.51it/s]
  1%|          | 21/2030 [00:11<19:46,  1.69it/s]
  1%|          | 22/2030 [00:12<17:22,  1.93it/s]
  1%|          | 23/2030 [00:12<19:14,  1.74it/s]
  1%|          | 24/2030 [00:13<18:36,  1.80it/s]
  1%|          | 25/2030 [00:13<17:01,  1.96it/s]
  1%|▏         | 26/2030 [00:14<16:28,  2.03it/s]
  1%|▏         | 27/2030 [00:14<16:55,  1.97it/s]
  1%|▏         | 28/2030 [00:15<18:05,  1.84it/s]
  1%|▏         | 29/2030 [00:15<16:38,  2.00it/s]
  1%|▏         | 30/2030 [00:16<16:25,  2.03it/s]
  2%|▏         | 31/2030 [00:16<16:14,  2.05it/s]
  2%|▏         | 32/2030 [00:17<16:03,  2.07it/s]
  2%|▏         | 33/2030 [00:17<17:38,  1.89it/s]
  2%|▏         | 34/2030 [00:18<16:22,  2.03it/s]
  2%|▏         | 35/2030 [00:18<15:07,  2.20it/s]
  2%|▏         | 36/2030 [00:19<14:36,  2.28it/s]
  2%|▏         | 37/2030 [00:19<15:03,  2.21it/s]
  2%|▏         | 38/2030 [00:19<14:25,  2.30it/s]
  2%|▏         | 39/2030 [00:20<15:44,  2.11it/s]
  2%|▏         | 40/2030 [00:20<14:43,  2.25it/s]
  2%|▏         | 41/2030 [00:21<14:17,  2.32it/s]
  2%|▏         | 42/2030 [00:21<14:54,  2.22it/s]
  2%|▏         | 43/2030 [00:22<14:50,  2.23it/s]
  2%|▏         | 44/2030 [00:22<15:11,  2.18it/s]
  2%|▏         | 45/2030 [00:23<15:19,  2.16it/s]
  2%|▏         | 46/2030 [00:23<14:40,  2.25it/s]
  2%|▏         | 47/2030 [00:24<15:29,  2.13it/s]
  2%|▏         | 48/2030 [00:24<14:57,  2.21it/s]
  2%|▏         | 49/2030 [00:24<15:07,  2.18it/s]
  2%|▏         | 50/2030 [00:25<15:57,  2.07it/s]
  3%|▎         | 51/2030 [00:25<15:31,  2.12it/s]
  3%|▎         | 52/2030 [00:26<15:21,  2.15it/s]
  3%|▎         | 53/2030 [00:26<14:55,  2.21it/s]
  3%|▎         | 54/2030 [00:27<13:40,  2.41it/s]
  3%|▎         | 55/2030 [00:27<13:39,  2.41it/s]
  3%|▎         | 56/2030 [00:28<14:39,  2.25it/s]
  3%|▎         | 57/2030 [00:28<14:34,  2.26it/s]
  3%|▎         | 58/2030 [00:28<14:11,  2.32it/s]
  3%|▎         | 59/2030 [00:29<14:11,  2.31it/s]
  3%|▎         | 60/2030 [00:30<16:39,  1.97it/s]
  3%|▎         | 61/2030 [00:30<14:58,  2.19it/s]
  3%|▎         | 62/2030 [00:30<13:48,  2.38it/s]
  3%|▎         | 63/2030 [00:31<13:58,  2.35it/s]
  3%|▎         | 64/2030 [00:31<14:10,  2.31it/s]
  3%|▎         | 65/2030 [00:32<14:04,  2.33it/s]
  3%|▎         | 66/2030 [00:32<13:33,  2.41it/s]
  3%|▎         | 67/2030 [00:32<13:57,  2.34it/s]
  3%|▎         | 68/2030 [00:33<13:23,  2.44it/s]
  3%|▎         | 69/2030 [00:33<12:56,  2.53it/s]
  3%|▎         | 70/2030 [00:34<14:55,  2.19it/s]
  3%|▎         | 71/2030 [00:34<14:42,  2.22it/s]
  4%|▎         | 72/2030 [00:35<14:08,  2.31it/s]
  4%|▎         | 73/2030 [00:35<13:54,  2.34it/s]
  4%|▎         | 74/2030 [00:35<13:32,  2.41it/s]
  4%|▎         | 75/2030 [00:36<13:34,  2.40it/s]
  4%|▎         | 76/2030 [00:36<13:41,  2.38it/s]
  4%|▍         | 77/2030 [00:37<14:07,  2.30it/s]
  4%|▍         | 78/2030 [00:37<14:33,  2.24it/s]
  4%|▍         | 79/2030 [00:38<14:16,  2.28it/s]
  4%|▍         | 80/2030 [00:38<14:14,  2.28it/s]
  4%|▍         | 81/2030 [00:38<13:49,  2.35it/s]
  4%|▍         | 82/2030 [00:39<14:52,  2.18it/s]
  4%|▍         | 83/2030 [00:39<13:50,  2.34it/s]
  4%|▍         | 84/2030 [00:40<15:49,  2.05it/s]
  4%|▍         | 85/2030 [00:40<15:30,  2.09it/s]
  4%|▍         | 86/2030 [00:41<15:10,  2.14it/s]
  4%|▍         | 87/2030 [00:41<15:02,  2.15it/s]
  4%|▍         | 88/2030 [00:42<15:54,  2.04it/s]
  4%|▍         | 89/2030 [00:42<15:01,  2.15it/s]
  4%|▍         | 90/2030 [00:43<14:36,  2.21it/s]
  4%|▍         | 91/2030 [00:43<13:44,  2.35it/s]
  5%|▍         | 92/2030 [00:43<14:17,  2.26it/s]
  5%|▍         | 93/2030 [00:44<17:39,  1.83it/s]
  5%|▍         | 94/2030 [00:45<16:23,  1.97it/s]
  5%|▍         | 95/2030 [00:45<15:11,  2.12it/s]
  5%|▍         | 96/2030 [00:46<14:51,  2.17it/s]
  5%|▍         | 97/2030 [00:46<14:32,  2.21it/s]
  5%|▍         | 98/2030 [00:46<14:14,  2.26it/s]
  5%|▍         | 99/2030 [00:47<14:50,  2.17it/s]
  5%|▍         | 100/2030 [00:47<13:48,  2.33it/s]
  5%|▍         | 101/2030 [00:48<13:31,  2.38it/s]
  5%|▌         | 102/2030 [00:48<13:42,  2.34it/s]
  5%|▌         | 103/2030 [00:48<12:47,  2.51it/s]
  5%|▌         | 104/2030 [00:49<14:29,  2.22it/s]
  5%|▌         | 105/2030 [00:49<14:31,  2.21it/s]
  5%|▌         | 106/2030 [00:50<14:53,  2.15it/s]
  5%|▌         | 107/2030 [00:50<14:26,  2.22it/s]
  5%|▌         | 108/2030 [00:51<13:20,  2.40it/s]
  5%|▌         | 109/2030 [00:51<14:11,  2.25it/s]
  5%|▌         | 110/2030 [00:52<14:00,  2.28it/s]
  5%|▌         | 111/2030 [00:52<13:40,  2.34it/s]
  6%|▌         | 112/2030 [00:52<13:15,  2.41it/s]
  6%|▌         | 113/2030 [00:53<14:36,  2.19it/s]
  6%|▌         | 114/2030 [00:53<15:33,  2.05it/s]
  6%|▌         | 115/2030 [00:54<15:08,  2.11it/s]
  6%|▌         | 116/2030 [00:54<14:34,  2.19it/s]
  6%|▌         | 117/2030 [00:55<14:45,  2.16it/s]
  6%|▌         | 118/2030 [00:55<14:07,  2.26it/s]
  6%|▌         | 119/2030 [00:56<14:05,  2.26it/s]
  6%|▌         | 120/2030 [00:56<13:44,  2.32it/s]
  6%|▌         | 121/2030 [00:57<14:00,  2.27it/s]
  6%|▌         | 122/2030 [00:57<13:23,  2.37it/s]
  6%|▌         | 123/2030 [00:57<12:58,  2.45it/s]
  6%|▌         | 124/2030 [00:58<15:07,  2.10it/s]
  6%|▌         | 125/2030 [00:58<15:02,  2.11it/s]
  6%|▌         | 126/2030 [00:59<15:04,  2.11it/s]
  6%|▋         | 127/2030 [00:59<14:34,  2.18it/s]
  6%|▋         | 128/2030 [01:00<14:47,  2.14it/s]
  6%|▋         | 129/2030 [01:00<13:53,  2.28it/s]
  6%|▋         | 130/2030 [01:01<16:03,  1.97it/s]
  6%|▋         | 131/2030 [01:01<16:28,  1.92it/s]
  7%|▋         | 132/2030 [01:02<15:23,  2.06it/s]
  7%|▋         | 133/2030 [01:03<18:04,  1.75it/s]
  7%|▋         | 134/2030 [01:03<17:08,  1.84it/s]
  7%|▋         | 135/2030 [01:03<16:13,  1.95it/s]
  7%|▋         | 136/2030 [01:04<15:14,  2.07it/s]
  7%|▋         | 137/2030 [01:04<13:58,  2.26it/s]
  7%|▋         | 138/2030 [01:05<13:47,  2.29it/s]
  7%|▋         | 139/2030 [01:05<13:46,  2.29it/s]
  7%|▋         | 140/2030 [01:06<17:13,  1.83it/s]
  7%|▋         | 141/2030 [01:06<15:44,  2.00it/s]
  7%|▋         | 142/2030 [01:07<18:04,  1.74it/s]
  7%|▋         | 143/2030 [01:07<16:32,  1.90it/s]
  7%|▋         | 144/2030 [01:08<17:46,  1.77it/s]
  7%|▋         | 145/2030 [01:08<15:39,  2.01it/s]
  7%|▋         | 146/2030 [01:09<15:15,  2.06it/s]
  7%|▋         | 147/2030 [01:09<14:20,  2.19it/s]
  7%|▋         | 148/2030 [01:10<13:42,  2.29it/s]
  7%|▋         | 149/2030 [01:10<13:13,  2.37it/s]
  7%|▋         | 150/2030 [01:11<14:12,  2.21it/s]
  7%|▋         | 151/2030 [01:11<15:01,  2.08it/s]
  7%|▋         | 152/2030 [01:12<14:34,  2.15it/s]
  8%|▊         | 153/2030 [01:12<13:22,  2.34it/s]
  8%|▊         | 154/2030 [01:12<14:04,  2.22it/s]
  8%|▊         | 155/2030 [01:13<13:31,  2.31it/s]
  8%|▊         | 156/2030 [01:13<14:24,  2.17it/s]
  8%|▊         | 157/2030 [01:14<14:18,  2.18it/s]
  8%|▊         | 158/2030 [01:14<15:19,  2.04it/s]
  8%|▊         | 159/2030 [01:15<14:23,  2.17it/s]
  8%|▊         | 160/2030 [01:15<13:47,  2.26it/s]
  8%|▊         | 161/2030 [01:16<13:50,  2.25it/s]
  8%|▊         | 162/2030 [01:16<14:12,  2.19it/s]
  8%|▊         | 163/2030 [01:17<13:53,  2.24it/s]
  8%|▊         | 164/2030 [01:17<13:45,  2.26it/s]
  8%|▊         | 165/2030 [01:17<13:39,  2.28it/s]
  8%|▊         | 166/2030 [01:18<15:08,  2.05it/s]
  8%|▊         | 167/2030 [01:18<14:35,  2.13it/s]
  8%|▊         | 168/2030 [01:19<14:27,  2.15it/s]
  8%|▊         | 169/2030 [01:19<13:34,  2.29it/s]
  8%|▊         | 170/2030 [01:20<14:37,  2.12it/s]
  8%|▊         | 171/2030 [01:20<13:29,  2.30it/s]
  8%|▊         | 172/2030 [01:21<16:15,  1.90it/s]
  9%|▊         | 173/2030 [01:21<15:03,  2.06it/s]
  9%|▊         | 174/2030 [01:22<13:56,  2.22it/s]
  9%|▊         | 175/2030 [01:22<13:27,  2.30it/s]
  9%|▊         | 176/2030 [01:22<12:39,  2.44it/s]
  9%|▊         | 177/2030 [01:23<14:01,  2.20it/s]
  9%|▉         | 178/2030 [01:23<12:55,  2.39it/s]
  9%|▉         | 179/2030 [01:24<12:19,  2.50it/s]
  9%|▉         | 180/2030 [01:24<12:59,  2.37it/s]
  9%|▉         | 181/2030 [01:24<12:19,  2.50it/s]
  9%|▉         | 182/2030 [01:25<12:00,  2.56it/s]
  9%|▉         | 183/2030 [01:25<12:02,  2.56it/s]
  9%|▉         | 184/2030 [01:26<12:22,  2.49it/s]
  9%|▉         | 185/2030 [01:26<12:37,  2.44it/s]
  9%|▉         | 186/2030 [01:26<12:00,  2.56it/s]
  9%|▉         | 187/2030 [01:27<12:49,  2.39it/s]
  9%|▉         | 188/2030 [01:27<13:38,  2.25it/s]
  9%|▉         | 189/2030 [01:28<13:21,  2.30it/s]
  9%|▉         | 190/2030 [01:29<16:36,  1.85it/s]
  9%|▉         | 191/2030 [01:29<16:06,  1.90it/s]
  9%|▉         | 192/2030 [01:29<14:24,  2.13it/s]
 10%|▉         | 193/2030 [01:30<16:50,  1.82it/s]
 10%|▉         | 194/2030 [01:31<15:12,  2.01it/s]
 10%|▉         | 195/2030 [01:31<15:09,  2.02it/s]
 10%|▉         | 196/2030 [01:31<14:00,  2.18it/s]
 10%|▉         | 197/2030 [01:32<16:28,  1.85it/s]
 10%|▉         | 198/2030 [01:33<18:54,  1.62it/s]
 10%|▉         | 199/2030 [01:33<17:25,  1.75it/s]
 10%|▉         | 200/2030 [01:34<16:29,  1.85it/s]
 10%|▉         | 201/2030 [01:34<15:25,  1.98it/s]
 10%|▉         | 202/2030 [01:35<14:43,  2.07it/s]
 10%|█         | 203/2030 [01:35<14:58,  2.03it/s][INFO|trainer.py:811] 2024-09-09 11:55:48,641 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
  0%|          | 0/315 [00:00<?, ?it/s][A
  3%|▎         | 8/315 [00:00<00:04, 76.12it/s][A
  5%|▌         | 16/315 [00:00<00:04, 74.05it/s][A
  8%|▊         | 24/315 [00:00<00:03, 75.18it/s][A
 10%|█         | 32/315 [00:00<00:03, 70.92it/s][A
 13%|█▎        | 40/315 [00:00<00:03, 73.74it/s][A
 15%|█▌        | 48/315 [00:00<00:03, 74.41it/s][A
 18%|█▊        | 56/315 [00:00<00:03, 73.32it/s][A
 20%|██        | 64/315 [00:00<00:03, 71.03it/s][A
 23%|██▎       | 72/315 [00:00<00:03, 73.04it/s][A
 25%|██▌       | 80/315 [00:01<00:03, 69.62it/s][A
 28%|██▊       | 88/315 [00:01<00:03, 67.05it/s][A
 30%|███       | 96/315 [00:01<00:03, 70.09it/s][A
 33%|███▎      | 104/315 [00:01<00:03, 67.36it/s][A
 36%|███▌      | 112/315 [00:01<00:02, 69.60it/s][A
 38%|███▊      | 120/315 [00:01<00:02, 69.30it/s][A
 40%|████      | 127/315 [00:01<00:02, 68.58it/s][A
 43%|████▎     | 134/315 [00:01<00:02, 67.97it/s][A
 45%|████▍     | 141/315 [00:02<00:02, 68.37it/s][A
 47%|████▋     | 149/315 [00:02<00:02, 70.96it/s][A
 50%|█████     | 158/315 [00:02<00:02, 74.16it/s][A
 53%|█████▎    | 166/315 [00:02<00:02, 72.11it/s][A
 55%|█████▌    | 174/315 [00:02<00:01, 71.61it/s][A
 58%|█████▊    | 182/315 [00:02<00:01, 68.73it/s][A
 60%|██████    | 189/315 [00:02<00:01, 68.38it/s][A
 62%|██████▏   | 196/315 [00:02<00:01, 67.76it/s][A
 64%|██████▍   | 203/315 [00:02<00:01, 64.44it/s][A
 67%|██████▋   | 210/315 [00:03<00:01, 64.84it/s][A
 69%|██████▉   | 218/315 [00:03<00:01, 68.36it/s][A
 72%|███████▏  | 226/315 [00:03<00:01, 70.84it/s][A
 75%|███████▍  | 235/315 [00:03<00:01, 73.77it/s][A
 77%|███████▋  | 243/315 [00:03<00:01, 70.62it/s][A
 80%|███████▉  | 251/315 [00:03<00:00, 70.66it/s][A
 82%|████████▏ | 259/315 [00:03<00:00, 69.14it/s][A
 85%|████████▍ | 267/315 [00:03<00:00, 70.28it/s][A
 88%|████████▊ | 276/315 [00:03<00:00, 73.48it/s][A
 90%|█████████ | 284/315 [00:04<00:00, 73.87it/s][A
 93%|█████████▎| 292/315 [00:04<00:00, 71.52it/s][A
 95%|█████████▌| 300/315 [00:04<00:00, 71.25it/s][A
 98%|█████████▊| 308/315 [00:04<00:00, 71.82it/s][A
 10%|█         | 203/2030 [01:41<14:58,  2.03it/s]
                                                 [A[INFO|trainer.py:3503] 2024-09-09 11:55:54,552 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-203
 10%|█         | 204/2030 [01:47<1:59:40,  3.93s/it]
 10%|█         | 205/2030 [01:48<1:28:45,  2.92s/it]
 10%|█         | 206/2030 [01:48<1:06:21,  2.18s/it]
 10%|█         | 207/2030 [01:49<50:53,  1.67s/it]
 10%|█         | 208/2030 [01:49<39:22,  1.30s/it]
 10%|█         | 209/2030 [01:49<30:44,  1.01s/it]
 10%|█         | 210/2030 [01:50<25:37,  1.18it/s]
 10%|█         | 211/2030 [01:51<24:14,  1.25it/s]
 10%|█         | 212/2030 [01:51<21:03,  1.44it/s]
 10%|█         | 213/2030 [01:51<18:36,  1.63it/s]
 11%|█         | 214/2030 [01:52<17:59,  1.68it/s]
 11%|█         | 215/2030 [01:52<16:37,  1.82it/s]
 11%|█         | 216/2030 [01:53<15:39,  1.93it/s]
 11%|█         | 217/2030 [01:54<19:55,  1.52it/s]
 11%|█         | 218/2030 [01:54<17:23,  1.74it/s]
 11%|█         | 219/2030 [01:55<16:28,  1.83it/s]
 11%|█         | 220/2030 [01:55<17:39,  1.71it/s]
 11%|█         | 221/2030 [01:56<15:31,  1.94it/s]
 11%|█         | 222/2030 [01:56<14:36,  2.06it/s]
 11%|█         | 223/2030 [01:57<14:18,  2.11it/s]
 11%|█         | 224/2030 [01:57<16:05,  1.87it/s]
 11%|█         | 225/2030 [01:58<15:11,  1.98it/s]
 11%|█         | 226/2030 [01:58<14:18,  2.10it/s]
 11%|█         | 227/2030 [01:59<14:05,  2.13it/s]
 11%|█         | 228/2030 [01:59<13:34,  2.21it/s]
 11%|█▏        | 229/2030 [01:59<12:44,  2.36it/s]
 11%|█▏        | 230/2030 [02:00<12:42,  2.36it/s]
 11%|█▏        | 231/2030 [02:00<12:22,  2.42it/s]
 11%|█▏        | 232/2030 [02:01<12:16,  2.44it/s]
 11%|█▏        | 233/2030 [02:01<12:45,  2.35it/s]
 12%|█▏        | 234/2030 [02:02<14:11,  2.11it/s]
 12%|█▏        | 235/2030 [02:02<13:37,  2.20it/s]
 12%|█▏        | 236/2030 [02:02<13:13,  2.26it/s]
 12%|█▏        | 237/2030 [02:03<13:48,  2.16it/s]
 12%|█▏        | 238/2030 [02:03<13:05,  2.28it/s]
 12%|█▏        | 239/2030 [02:04<12:34,  2.37it/s]
 12%|█▏        | 240/2030 [02:04<12:38,  2.36it/s]
 12%|█▏        | 241/2030 [02:05<12:12,  2.44it/s]
 12%|█▏        | 242/2030 [02:05<12:03,  2.47it/s]
 12%|█▏        | 243/2030 [02:05<13:09,  2.26it/s]
 12%|█▏        | 244/2030 [02:06<13:57,  2.13it/s]
 12%|█▏        | 245/2030 [02:06<12:28,  2.39it/s]
 12%|█▏        | 246/2030 [02:07<11:47,  2.52it/s]
 12%|█▏        | 247/2030 [02:07<12:04,  2.46it/s]
 12%|█▏        | 248/2030 [02:07<11:32,  2.57it/s]
 12%|█▏        | 249/2030 [02:08<12:22,  2.40it/s]
 12%|█▏        | 250/2030 [02:08<12:07,  2.45it/s]
 12%|█▏        | 251/2030 [02:09<11:40,  2.54it/s]
 12%|█▏        | 252/2030 [02:09<14:13,  2.08it/s]
 12%|█▏        | 253/2030 [02:10<15:51,  1.87it/s]
 13%|█▎        | 254/2030 [02:10<14:48,  2.00it/s]
 13%|█▎        | 255/2030 [02:11<16:12,  1.83it/s]
 13%|█▎        | 256/2030 [02:12<15:55,  1.86it/s]
 13%|█▎        | 257/2030 [02:12<15:12,  1.94it/s]
 13%|█▎        | 258/2030 [02:13<16:09,  1.83it/s]
 13%|█▎        | 259/2030 [02:13<14:20,  2.06it/s]
 13%|█▎        | 260/2030 [02:13<13:31,  2.18it/s]
 13%|█▎        | 261/2030 [02:14<16:16,  1.81it/s]
 13%|█▎        | 262/2030 [02:15<14:54,  1.98it/s]
 13%|█▎        | 263/2030 [02:15<14:32,  2.03it/s]
 13%|█▎        | 264/2030 [02:16<15:26,  1.91it/s]
 13%|█▎        | 265/2030 [02:16<14:04,  2.09it/s]
 13%|█▎        | 266/2030 [02:16<13:00,  2.26it/s]
 13%|█▎        | 267/2030 [02:17<12:11,  2.41it/s]
 13%|█▎        | 268/2030 [02:17<12:46,  2.30it/s]
 13%|█▎        | 269/2030 [02:18<11:57,  2.46it/s]
 13%|█▎        | 270/2030 [02:18<12:20,  2.38it/s]
 13%|█▎        | 271/2030 [02:19<12:47,  2.29it/s]
 13%|█▎        | 272/2030 [02:19<13:52,  2.11it/s]
 13%|█▎        | 273/2030 [02:20<13:54,  2.11it/s]
 13%|█▎        | 274/2030 [02:20<12:44,  2.30it/s]
 14%|█▎        | 275/2030 [02:20<12:38,  2.31it/s]
 14%|█▎        | 276/2030 [02:21<12:42,  2.30it/s]
 14%|█▎        | 277/2030 [02:21<12:34,  2.32it/s]
 14%|█▎        | 278/2030 [02:22<12:30,  2.33it/s]
 14%|█▎        | 279/2030 [02:22<12:28,  2.34it/s]
 14%|█▍        | 280/2030 [02:22<11:52,  2.46it/s]
 14%|█▍        | 281/2030 [02:23<12:37,  2.31it/s]
 14%|█▍        | 282/2030 [02:23<12:16,  2.37it/s]
 14%|█▍        | 283/2030 [02:24<12:33,  2.32it/s]
 14%|█▍        | 284/2030 [02:24<13:13,  2.20it/s]
 14%|█▍        | 285/2030 [02:25<13:08,  2.21it/s]
 14%|█▍        | 286/2030 [02:25<13:09,  2.21it/s]
 14%|█▍        | 287/2030 [02:26<12:27,  2.33it/s]
 14%|█▍        | 288/2030 [02:26<13:57,  2.08it/s]
 14%|█▍        | 289/2030 [02:27<13:51,  2.09it/s]
 14%|█▍        | 290/2030 [02:27<15:16,  1.90it/s]
 14%|█▍        | 291/2030 [02:28<14:18,  2.02it/s]
 14%|█▍        | 292/2030 [02:28<15:12,  1.91it/s]
 14%|█▍        | 293/2030 [02:29<14:05,  2.05it/s]
 14%|█▍        | 294/2030 [02:29<15:37,  1.85it/s]
 15%|█▍        | 295/2030 [02:30<13:51,  2.09it/s]
 15%|█▍        | 296/2030 [02:30<13:08,  2.20it/s]
 15%|█▍        | 297/2030 [02:31<13:30,  2.14it/s]
 15%|█▍        | 298/2030 [02:31<13:08,  2.20it/s]
 15%|█▍        | 299/2030 [02:31<13:26,  2.15it/s]
 15%|█▍        | 300/2030 [02:32<14:33,  1.98it/s]
 15%|█▍        | 301/2030 [02:32<14:07,  2.04it/s]
 15%|█▍        | 302/2030 [02:33<13:49,  2.08it/s]
 15%|█▍        | 303/2030 [02:33<12:42,  2.26it/s]
 15%|█▍        | 304/2030 [02:34<11:59,  2.40it/s]
 15%|█▌        | 305/2030 [02:34<14:03,  2.04it/s]
 15%|█▌        | 306/2030 [02:35<13:33,  2.12it/s]
 15%|█▌        | 307/2030 [02:35<13:04,  2.20it/s]
 15%|█▌        | 308/2030 [02:36<13:12,  2.17it/s]
 15%|█▌        | 309/2030 [02:36<12:55,  2.22it/s]
 15%|█▌        | 310/2030 [02:37<13:23,  2.14it/s]
 15%|█▌        | 311/2030 [02:37<12:53,  2.22it/s]

+2024-09-09 11:53:51.396276: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
+2024-09-09 11:53:51.414891: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
+2024-09-09 11:53:51.436268: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
+2024-09-09 11:53:51.442683: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
+2024-09-09 11:53:51.458047: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
+To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
+2024-09-09 11:53:52.683988: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
+/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+09/09/2024 11:53:54 - WARNING - __main__ -   Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: False
+09/09/2024 11:53:54 - INFO - __main__ -   Training/evaluation parameters TrainingArguments(
+_n_gpu=1,
+accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+batch_eval_metrics=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_persistent_workers=False,
+dataloader_pin_memory=True,
+dataloader_prefetch_factor=None,
+ddp_backend=None,
+ddp_broadcast_buffers=None,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+ddp_timeout=1800,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+dispatch_batches=None,
+do_eval=True,
+do_predict=True,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_do_concat_batches=True,
+eval_on_start=False,
+eval_steps=None,
+eval_strategy=epoch,
+eval_use_gather_object=False,
+evaluation_strategy=epoch,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=2,
+gradient_checkpointing=False,
+gradient_checkpointing_kwargs=None,
+greater_is_better=True,
+group_by_length=False,
+half_precision_backend=auto,
+hub_always_push=False,
+hub_model_id=None,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+include_num_input_tokens_seen=False,
+include_tokens_per_second=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=5e-05,
+length_column_name=length,
+load_best_model_at_end=True,
+local_rank=0,
+log_level=passive,
+log_level_replica=warning,
+log_on_each_node=True,
+logging_dir=/content/dissertation/scripts/ner/output/tb,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=500,
+logging_strategy=steps,
+lr_scheduler_kwargs={},
+lr_scheduler_type=linear,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=f1,
+mp_parameters=,
+neftune_noise_alpha=None,
+no_cuda=False,
+num_train_epochs=10.0,
+optim=adamw_torch,
+optim_args=None,
+optim_target_modules=None,
+output_dir=/content/dissertation/scripts/ner/output,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=8,
+per_device_train_batch_size=32,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard'],
+restore_callback_states_from_checkpoint=False,
+resume_from_checkpoint=None,
+run_name=/content/dissertation/scripts/ner/output,
+save_on_each_node=False,
+save_only_model=False,
+save_safetensors=True,
+save_steps=500,
+save_strategy=epoch,
+save_total_limit=None,
+seed=42,
+skip_memory_metrics=True,
+split_batches=None,
+tf32=None,
+torch_compile=False,
+torch_compile_backend=None,
+torch_compile_mode=None,
+torch_empty_cache_steps=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_cpu=False,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+use_mps_device=False,
+warmup_ratio=0.0,
+warmup_steps=0,
+weight_decay=0.0,
+)
+[INFO|configuration_utils.py:733] 2024-09-09 11:54:06,987 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
+[INFO|configuration_utils.py:800] 2024-09-09 11:54:06,991 >> Model config RobertaConfig {
+  "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "finetuning_task": "ner",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "O",
+    "1": "B-SINTOMA",
+    "2": "I-SINTOMA"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "B-SINTOMA": 1,
+    "I-SINTOMA": 2,
+    "O": 0
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50262
+}
+[INFO|configuration_utils.py:733] 2024-09-09 11:54:07,264 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
+[INFO|configuration_utils.py:800] 2024-09-09 11:54:07,265 >> Model config RobertaConfig {
+  "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50262
+}
+[INFO|tokenization_utils_base.py:2269] 2024-09-09 11:54:07,275 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/vocab.json
+[INFO|tokenization_utils_base.py:2269] 2024-09-09 11:54:07,275 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/merges.txt
+[INFO|tokenization_utils_base.py:2269] 2024-09-09 11:54:07,275 >> loading file tokenizer.json from cache at None
+[INFO|tokenization_utils_base.py:2269] 2024-09-09 11:54:07,275 >> loading file added_tokens.json from cache at None
+[INFO|tokenization_utils_base.py:2269] 2024-09-09 11:54:07,275 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/special_tokens_map.json
+[INFO|tokenization_utils_base.py:2269] 2024-09-09 11:54:07,275 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/tokenizer_config.json
+[INFO|configuration_utils.py:733] 2024-09-09 11:54:07,275 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
+[INFO|configuration_utils.py:800] 2024-09-09 11:54:07,276 >> Model config RobertaConfig {
+  "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50262
+}
+/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884
+  warnings.warn(
+[INFO|configuration_utils.py:733] 2024-09-09 11:54:07,353 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
+[INFO|configuration_utils.py:800] 2024-09-09 11:54:07,354 >> Model config RobertaConfig {
+  "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50262
+}
+[INFO|modeling_utils.py:3678] 2024-09-09 11:54:07,676 >> loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/pytorch_model.bin
+[INFO|modeling_utils.py:4497] 2024-09-09 11:54:07,755 >> Some weights of the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es were not used when initializing RobertaForTokenClassification: ['lm_head.bias', 'lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
+- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+[WARNING|modeling_utils.py:4509] 2024-09-09 11:54:07,755 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es and are newly initialized: ['classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/content/dissertation/scripts/ner/run_ner_train.py:397: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate
+  metric = load_metric("seqeval", trust_remote_code=True)
+[INFO|trainer.py:811] 2024-09-09 11:54:12,226 >> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
+[INFO|trainer.py:2134] 2024-09-09 11:54:12,775 >> ***** Running training *****
+[INFO|trainer.py:2135] 2024-09-09 11:54:12,776 >>   Num examples = 13,013
+[INFO|trainer.py:2136] 2024-09-09 11:54:12,776 >>   Num Epochs = 10
+[INFO|trainer.py:2137] 2024-09-09 11:54:12,776 >>   Instantaneous batch size per device = 32
+[INFO|trainer.py:2140] 2024-09-09 11:54:12,776 >>   Total train batch size (w. parallel, distributed & accumulation) = 64
+[INFO|trainer.py:2141] 2024-09-09 11:54:12,776 >>   Gradient Accumulation steps = 2
+[INFO|trainer.py:2142] 2024-09-09 11:54:12,776 >>   Total optimization steps = 2,030
+[INFO|trainer.py:2143] 2024-09-09 11:54:12,776 >>   Number of trainable parameters = 124,055,043
  0%|          | 0/2030 [00:00<?, ?it/s]
  0%|          | 1/2030 [00:01<43:23,  1.28s/it]
  0%|          | 2/2030 [00:01<27:30,  1.23it/s]
  0%|          | 3/2030 [00:02<22:20,  1.51it/s]
  0%|          | 4/2030 [00:02<20:58,  1.61it/s]
  0%|          | 5/2030 [00:03<18:15,  1.85it/s]
  0%|          | 6/2030 [00:03<16:47,  2.01it/s]
  0%|          | 7/2030 [00:04<16:12,  2.08it/s]
  0%|          | 8/2030 [00:04<16:25,  2.05it/s]
  0%|          | 9/2030 [00:05<15:51,  2.12it/s]
  0%|          | 10/2030 [00:05<16:55,  1.99it/s]
  1%|          | 11/2030 [00:05<15:19,  2.20it/s]
  1%|          | 12/2030 [00:06<17:18,  1.94it/s]
  1%|          | 13/2030 [00:07<16:43,  2.01it/s]
  1%|          | 14/2030 [00:07<18:38,  1.80it/s]
  1%|          | 15/2030 [00:08<16:34,  2.03it/s]
  1%|          | 16/2030 [00:08<17:35,  1.91it/s]
  1%|          | 17/2030 [00:09<16:44,  2.00it/s]
  1%|          | 18/2030 [00:09<18:13,  1.84it/s]
  1%|          | 19/2030 [00:10<23:35,  1.42it/s]
  1%|          | 20/2030 [00:11<22:07,  1.51it/s]
  1%|          | 21/2030 [00:11<19:46,  1.69it/s]
  1%|          | 22/2030 [00:12<17:22,  1.93it/s]
  1%|          | 23/2030 [00:12<19:14,  1.74it/s]
  1%|          | 24/2030 [00:13<18:36,  1.80it/s]
  1%|          | 25/2030 [00:13<17:01,  1.96it/s]
  1%|▏         | 26/2030 [00:14<16:28,  2.03it/s]
  1%|▏         | 27/2030 [00:14<16:55,  1.97it/s]
  1%|▏         | 28/2030 [00:15<18:05,  1.84it/s]
  1%|▏         | 29/2030 [00:15<16:38,  2.00it/s]
  1%|▏         | 30/2030 [00:16<16:25,  2.03it/s]
  2%|▏         | 31/2030 [00:16<16:14,  2.05it/s]
  2%|▏         | 32/2030 [00:17<16:03,  2.07it/s]
  2%|▏         | 33/2030 [00:17<17:38,  1.89it/s]
  2%|▏         | 34/2030 [00:18<16:22,  2.03it/s]
  2%|▏         | 35/2030 [00:18<15:07,  2.20it/s]
  2%|▏         | 36/2030 [00:19<14:36,  2.28it/s]
  2%|▏         | 37/2030 [00:19<15:03,  2.21it/s]
  2%|▏         | 38/2030 [00:19<14:25,  2.30it/s]
  2%|▏         | 39/2030 [00:20<15:44,  2.11it/s]
  2%|▏         | 40/2030 [00:20<14:43,  2.25it/s]
  2%|▏         | 41/2030 [00:21<14:17,  2.32it/s]
  2%|▏         | 42/2030 [00:21<14:54,  2.22it/s]
  2%|▏         | 43/2030 [00:22<14:50,  2.23it/s]
  2%|▏         | 44/2030 [00:22<15:11,  2.18it/s]
  2%|▏         | 45/2030 [00:23<15:19,  2.16it/s]
  2%|▏         | 46/2030 [00:23<14:40,  2.25it/s]
  2%|▏         | 47/2030 [00:24<15:29,  2.13it/s]
  2%|▏         | 48/2030 [00:24<14:57,  2.21it/s]
  2%|▏         | 49/2030 [00:24<15:07,  2.18it/s]
  2%|▏         | 50/2030 [00:25<15:57,  2.07it/s]
  3%|▎         | 51/2030 [00:25<15:31,  2.12it/s]
  3%|▎         | 52/2030 [00:26<15:21,  2.15it/s]
  3%|▎         | 53/2030 [00:26<14:55,  2.21it/s]
  3%|▎         | 54/2030 [00:27<13:40,  2.41it/s]
  3%|▎         | 55/2030 [00:27<13:39,  2.41it/s]
  3%|▎         | 56/2030 [00:28<14:39,  2.25it/s]
  3%|▎         | 57/2030 [00:28<14:34,  2.26it/s]
  3%|▎         | 58/2030 [00:28<14:11,  2.32it/s]
  3%|▎         | 59/2030 [00:29<14:11,  2.31it/s]
  3%|▎         | 60/2030 [00:30<16:39,  1.97it/s]
  3%|▎         | 61/2030 [00:30<14:58,  2.19it/s]
  3%|▎         | 62/2030 [00:30<13:48,  2.38it/s]
  3%|▎         | 63/2030 [00:31<13:58,  2.35it/s]
  3%|▎         | 64/2030 [00:31<14:10,  2.31it/s]
  3%|▎         | 65/2030 [00:32<14:04,  2.33it/s]
  3%|▎         | 66/2030 [00:32<13:33,  2.41it/s]
  3%|▎         | 67/2030 [00:32<13:57,  2.34it/s]
  3%|▎         | 68/2030 [00:33<13:23,  2.44it/s]
  3%|▎         | 69/2030 [00:33<12:56,  2.53it/s]
  3%|▎         | 70/2030 [00:34<14:55,  2.19it/s]
  3%|▎         | 71/2030 [00:34<14:42,  2.22it/s]
  4%|▎         | 72/2030 [00:35<14:08,  2.31it/s]
  4%|▎         | 73/2030 [00:35<13:54,  2.34it/s]
  4%|▎         | 74/2030 [00:35<13:32,  2.41it/s]
  4%|▎         | 75/2030 [00:36<13:34,  2.40it/s]
  4%|▎         | 76/2030 [00:36<13:41,  2.38it/s]
  4%|▍         | 77/2030 [00:37<14:07,  2.30it/s]
  4%|▍         | 78/2030 [00:37<14:33,  2.24it/s]
  4%|▍         | 79/2030 [00:38<14:16,  2.28it/s]
  4%|▍         | 80/2030 [00:38<14:14,  2.28it/s]
  4%|▍         | 81/2030 [00:38<13:49,  2.35it/s]
  4%|▍         | 82/2030 [00:39<14:52,  2.18it/s]
  4%|▍         | 83/2030 [00:39<13:50,  2.34it/s]
  4%|▍         | 84/2030 [00:40<15:49,  2.05it/s]
  4%|▍         | 85/2030 [00:40<15:30,  2.09it/s]
  4%|▍         | 86/2030 [00:41<15:10,  2.14it/s]
  4%|▍         | 87/2030 [00:41<15:02,  2.15it/s]
  4%|▍         | 88/2030 [00:42<15:54,  2.04it/s]
  4%|▍         | 89/2030 [00:42<15:01,  2.15it/s]
  4%|▍         | 90/2030 [00:43<14:36,  2.21it/s]
  4%|▍         | 91/2030 [00:43<13:44,  2.35it/s]
  5%|▍         | 92/2030 [00:43<14:17,  2.26it/s]
  5%|▍         | 93/2030 [00:44<17:39,  1.83it/s]
  5%|▍         | 94/2030 [00:45<16:23,  1.97it/s]
  5%|▍         | 95/2030 [00:45<15:11,  2.12it/s]
  5%|▍         | 96/2030 [00:46<14:51,  2.17it/s]
  5%|▍         | 97/2030 [00:46<14:32,  2.21it/s]
  5%|▍         | 98/2030 [00:46<14:14,  2.26it/s]
  5%|▍         | 99/2030 [00:47<14:50,  2.17it/s]
  5%|▍         | 100/2030 [00:47<13:48,  2.33it/s]
  5%|▍         | 101/2030 [00:48<13:31,  2.38it/s]
  5%|▌         | 102/2030 [00:48<13:42,  2.34it/s]
  5%|▌         | 103/2030 [00:48<12:47,  2.51it/s]
  5%|▌         | 104/2030 [00:49<14:29,  2.22it/s]
  5%|▌         | 105/2030 [00:49<14:31,  2.21it/s]
  5%|▌         | 106/2030 [00:50<14:53,  2.15it/s]
  5%|▌         | 107/2030 [00:50<14:26,  2.22it/s]
  5%|▌         | 108/2030 [00:51<13:20,  2.40it/s]
  5%|▌         | 109/2030 [00:51<14:11,  2.25it/s]
  5%|▌         | 110/2030 [00:52<14:00,  2.28it/s]
  5%|▌         | 111/2030 [00:52<13:40,  2.34it/s]
  6%|▌         | 112/2030 [00:52<13:15,  2.41it/s]
  6%|▌         | 113/2030 [00:53<14:36,  2.19it/s]
  6%|▌         | 114/2030 [00:53<15:33,  2.05it/s]
  6%|▌         | 115/2030 [00:54<15:08,  2.11it/s]
  6%|▌         | 116/2030 [00:54<14:34,  2.19it/s]
  6%|▌         | 117/2030 [00:55<14:45,  2.16it/s]
  6%|▌         | 118/2030 [00:55<14:07,  2.26it/s]
  6%|▌         | 119/2030 [00:56<14:05,  2.26it/s]
  6%|▌         | 120/2030 [00:56<13:44,  2.32it/s]
  6%|▌         | 121/2030 [00:57<14:00,  2.27it/s]
  6%|▌         | 122/2030 [00:57<13:23,  2.37it/s]
  6%|▌         | 123/2030 [00:57<12:58,  2.45it/s]
  6%|▌         | 124/2030 [00:58<15:07,  2.10it/s]
  6%|▌         | 125/2030 [00:58<15:02,  2.11it/s]
  6%|▌         | 126/2030 [00:59<15:04,  2.11it/s]
  6%|▋         | 127/2030 [00:59<14:34,  2.18it/s]
  6%|▋         | 128/2030 [01:00<14:47,  2.14it/s]
  6%|▋         | 129/2030 [01:00<13:53,  2.28it/s]
  6%|▋         | 130/2030 [01:01<16:03,  1.97it/s]
  6%|▋         | 131/2030 [01:01<16:28,  1.92it/s]
  7%|▋         | 132/2030 [01:02<15:23,  2.06it/s]
  7%|▋         | 133/2030 [01:03<18:04,  1.75it/s]
  7%|▋         | 134/2030 [01:03<17:08,  1.84it/s]
  7%|▋         | 135/2030 [01:03<16:13,  1.95it/s]
  7%|▋         | 136/2030 [01:04<15:14,  2.07it/s]
  7%|▋         | 137/2030 [01:04<13:58,  2.26it/s]
  7%|▋         | 138/2030 [01:05<13:47,  2.29it/s]
  7%|▋         | 139/2030 [01:05<13:46,  2.29it/s]
  7%|▋         | 140/2030 [01:06<17:13,  1.83it/s]
  7%|▋         | 141/2030 [01:06<15:44,  2.00it/s]
  7%|▋         | 142/2030 [01:07<18:04,  1.74it/s]
  7%|▋         | 143/2030 [01:07<16:32,  1.90it/s]
  7%|▋         | 144/2030 [01:08<17:46,  1.77it/s]
  7%|▋         | 145/2030 [01:08<15:39,  2.01it/s]
  7%|▋         | 146/2030 [01:09<15:15,  2.06it/s]
  7%|▋         | 147/2030 [01:09<14:20,  2.19it/s]
  7%|▋         | 148/2030 [01:10<13:42,  2.29it/s]
  7%|▋         | 149/2030 [01:10<13:13,  2.37it/s]
  7%|▋         | 150/2030 [01:11<14:12,  2.21it/s]
  7%|▋         | 151/2030 [01:11<15:01,  2.08it/s]
  7%|▋         | 152/2030 [01:12<14:34,  2.15it/s]
  8%|▊         | 153/2030 [01:12<13:22,  2.34it/s]
  8%|▊         | 154/2030 [01:12<14:04,  2.22it/s]
  8%|▊         | 155/2030 [01:13<13:31,  2.31it/s]
  8%|▊         | 156/2030 [01:13<14:24,  2.17it/s]
  8%|▊         | 157/2030 [01:14<14:18,  2.18it/s]
  8%|▊         | 158/2030 [01:14<15:19,  2.04it/s]
  8%|▊         | 159/2030 [01:15<14:23,  2.17it/s]
  8%|▊         | 160/2030 [01:15<13:47,  2.26it/s]
  8%|▊         | 161/2030 [01:16<13:50,  2.25it/s]
  8%|▊         | 162/2030 [01:16<14:12,  2.19it/s]
  8%|▊         | 163/2030 [01:17<13:53,  2.24it/s]
  8%|▊         | 164/2030 [01:17<13:45,  2.26it/s]
  8%|▊         | 165/2030 [01:17<13:39,  2.28it/s]
  8%|▊         | 166/2030 [01:18<15:08,  2.05it/s]
  8%|▊         | 167/2030 [01:18<14:35,  2.13it/s]
  8%|▊         | 168/2030 [01:19<14:27,  2.15it/s]
  8%|▊         | 169/2030 [01:19<13:34,  2.29it/s]
  8%|▊         | 170/2030 [01:20<14:37,  2.12it/s]
  8%|▊         | 171/2030 [01:20<13:29,  2.30it/s]
  8%|▊         | 172/2030 [01:21<16:15,  1.90it/s]
  9%|▊         | 173/2030 [01:21<15:03,  2.06it/s]
  9%|▊         | 174/2030 [01:22<13:56,  2.22it/s]
  9%|▊         | 175/2030 [01:22<13:27,  2.30it/s]
  9%|▊         | 176/2030 [01:22<12:39,  2.44it/s]
  9%|▊         | 177/2030 [01:23<14:01,  2.20it/s]
  9%|▉         | 178/2030 [01:23<12:55,  2.39it/s]
  9%|▉         | 179/2030 [01:24<12:19,  2.50it/s]
  9%|▉         | 180/2030 [01:24<12:59,  2.37it/s]
  9%|▉         | 181/2030 [01:24<12:19,  2.50it/s]
  9%|▉         | 182/2030 [01:25<12:00,  2.56it/s]
  9%|▉         | 183/2030 [01:25<12:02,  2.56it/s]
  9%|▉         | 184/2030 [01:26<12:22,  2.49it/s]
  9%|▉         | 185/2030 [01:26<12:37,  2.44it/s]
  9%|▉         | 186/2030 [01:26<12:00,  2.56it/s]
  9%|▉         | 187/2030 [01:27<12:49,  2.39it/s]
  9%|▉         | 188/2030 [01:27<13:38,  2.25it/s]
  9%|▉         | 189/2030 [01:28<13:21,  2.30it/s]
  9%|▉         | 190/2030 [01:29<16:36,  1.85it/s]
  9%|▉         | 191/2030 [01:29<16:06,  1.90it/s]
  9%|▉         | 192/2030 [01:29<14:24,  2.13it/s]
 10%|▉         | 193/2030 [01:30<16:50,  1.82it/s]
 10%|▉         | 194/2030 [01:31<15:12,  2.01it/s]
 10%|▉         | 195/2030 [01:31<15:09,  2.02it/s]
 10%|▉         | 196/2030 [01:31<14:00,  2.18it/s]
 10%|▉         | 197/2030 [01:32<16:28,  1.85it/s]
 10%|▉         | 198/2030 [01:33<18:54,  1.62it/s]
 10%|▉         | 199/2030 [01:33<17:25,  1.75it/s]
 10%|▉         | 200/2030 [01:34<16:29,  1.85it/s]
 10%|▉         | 201/2030 [01:34<15:25,  1.98it/s]
 10%|▉         | 202/2030 [01:35<14:43,  2.07it/s]
 10%|█         | 203/2030 [01:35<14:58,  2.03it/s][INFO|trainer.py:811] 2024-09-09 11:55:48,641 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
+[INFO|trainer.py:3819] 2024-09-09 11:55:48,644 >>
+***** Running Evaluation *****
+[INFO|trainer.py:3821] 2024-09-09 11:55:48,644 >>   Num examples = 2519
+[INFO|trainer.py:3824] 2024-09-09 11:55:48,644 >>   Batch size = 8
  0%|          | 0/315 [00:00<?, ?it/s][A
  3%|▎         | 8/315 [00:00<00:04, 76.12it/s][A
  5%|▌         | 16/315 [00:00<00:04, 74.05it/s][A
  8%|▊         | 24/315 [00:00<00:03, 75.18it/s][A
 10%|█         | 32/315 [00:00<00:03, 70.92it/s][A
 13%|█▎        | 40/315 [00:00<00:03, 73.74it/s][A
 15%|█▌        | 48/315 [00:00<00:03, 74.41it/s][A
 18%|█▊        | 56/315 [00:00<00:03, 73.32it/s][A
 20%|██        | 64/315 [00:00<00:03, 71.03it/s][A
 23%|██▎       | 72/315 [00:00<00:03, 73.04it/s][A
 25%|██▌       | 80/315 [00:01<00:03, 69.62it/s][A
 28%|██▊       | 88/315 [00:01<00:03, 67.05it/s][A
 30%|███       | 96/315 [00:01<00:03, 70.09it/s][A
 33%|███▎      | 104/315 [00:01<00:03, 67.36it/s][A
 36%|███▌      | 112/315 [00:01<00:02, 69.60it/s][A
 38%|███▊      | 120/315 [00:01<00:02, 69.30it/s][A
 40%|████      | 127/315 [00:01<00:02, 68.58it/s][A
 43%|████▎     | 134/315 [00:01<00:02, 67.97it/s][A
 45%|████▍     | 141/315 [00:02<00:02, 68.37it/s][A
 47%|████▋     | 149/315 [00:02<00:02, 70.96it/s][A
 50%|█████     | 158/315 [00:02<00:02, 74.16it/s][A
 53%|█████▎    | 166/315 [00:02<00:02, 72.11it/s][A
 55%|█████▌    | 174/315 [00:02<00:01, 71.61it/s][A
 58%|█████▊    | 182/315 [00:02<00:01, 68.73it/s][A
 60%|██████    | 189/315 [00:02<00:01, 68.38it/s][A
 62%|██████▏   | 196/315 [00:02<00:01, 67.76it/s][A
 64%|██████▍   | 203/315 [00:02<00:01, 64.44it/s][A
 67%|██████▋   | 210/315 [00:03<00:01, 64.84it/s][A
 69%|██████▉   | 218/315 [00:03<00:01, 68.36it/s][A
 72%|███████▏  | 226/315 [00:03<00:01, 70.84it/s][A
 75%|███████▍  | 235/315 [00:03<00:01, 73.77it/s][A
 77%|███████▋  | 243/315 [00:03<00:01, 70.62it/s][A
 80%|███████▉  | 251/315 [00:03<00:00, 70.66it/s][A
 82%|████████▏ | 259/315 [00:03<00:00, 69.14it/s][A
 85%|████████▍ | 267/315 [00:03<00:00, 70.28it/s][A
 88%|████████▊ | 276/315 [00:03<00:00, 73.48it/s][A
 90%|█████████ | 284/315 [00:04<00:00, 73.87it/s][A
 93%|█████████▎| 292/315 [00:04<00:00, 71.52it/s][A
 95%|█████████▌| 300/315 [00:04<00:00, 71.25it/s][A
 98%|█████████▊| 308/315 [00:04<00:00, 71.82it/s][A
 10%|█         | 203/2030 [01:41<14:58,  2.03it/s]
                                                 [A[INFO|trainer.py:3503] 2024-09-09 11:55:54,552 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-203
+[INFO|configuration_utils.py:472] 2024-09-09 11:55:54,553 >> Configuration saved in /content/dissertation/scripts/ner/output/checkpoint-203/config.json
+[INFO|modeling_utils.py:2799] 2024-09-09 11:55:55,568 >> Model weights saved in /content/dissertation/scripts/ner/output/checkpoint-203/model.safetensors
+[INFO|tokenization_utils_base.py:2684] 2024-09-09 11:55:55,569 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/checkpoint-203/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2693] 2024-09-09 11:55:55,569 >> Special tokens file saved in /content/dissertation/scripts/ner/output/checkpoint-203/special_tokens_map.json
+[INFO|tokenization_utils_base.py:2684] 2024-09-09 11:56:00,182 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2693] 2024-09-09 11:56:00,183 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
 10%|█         | 204/2030 [01:47<1:59:40,  3.93s/it]
 10%|█         | 205/2030 [01:48<1:28:45,  2.92s/it]
 10%|█         | 206/2030 [01:48<1:06:21,  2.18s/it]
 10%|█         | 207/2030 [01:49<50:53,  1.67s/it]
 10%|█         | 208/2030 [01:49<39:22,  1.30s/it]
 10%|█         | 209/2030 [01:49<30:44,  1.01s/it]
 10%|█         | 210/2030 [01:50<25:37,  1.18it/s]
 10%|█         | 211/2030 [01:51<24:14,  1.25it/s]
 10%|█         | 212/2030 [01:51<21:03,  1.44it/s]
 10%|█         | 213/2030 [01:51<18:36,  1.63it/s]
 11%|█         | 214/2030 [01:52<17:59,  1.68it/s]
 11%|█         | 215/2030 [01:52<16:37,  1.82it/s]
 11%|█         | 216/2030 [01:53<15:39,  1.93it/s]
 11%|█         | 217/2030 [01:54<19:55,  1.52it/s]
 11%|█         | 218/2030 [01:54<17:23,  1.74it/s]
 11%|█         | 219/2030 [01:55<16:28,  1.83it/s]
 11%|█         | 220/2030 [01:55<17:39,  1.71it/s]
 11%|█         | 221/2030 [01:56<15:31,  1.94it/s]
 11%|█         | 222/2030 [01:56<14:36,  2.06it/s]
 11%|█         | 223/2030 [01:57<14:18,  2.11it/s]
 11%|█         | 224/2030 [01:57<16:05,  1.87it/s]
 11%|█         | 225/2030 [01:58<15:11,  1.98it/s]
 11%|█         | 226/2030 [01:58<14:18,  2.10it/s]
 11%|█         | 227/2030 [01:59<14:05,  2.13it/s]
 11%|█         | 228/2030 [01:59<13:34,  2.21it/s]
 11%|█▏        | 229/2030 [01:59<12:44,  2.36it/s]
 11%|█▏        | 230/2030 [02:00<12:42,  2.36it/s]
 11%|█▏        | 231/2030 [02:00<12:22,  2.42it/s]
 11%|█▏        | 232/2030 [02:01<12:16,  2.44it/s]
 11%|█▏        | 233/2030 [02:01<12:45,  2.35it/s]
 12%|█▏        | 234/2030 [02:02<14:11,  2.11it/s]
 12%|█▏        | 235/2030 [02:02<13:37,  2.20it/s]
 12%|█▏        | 236/2030 [02:02<13:13,  2.26it/s]
 12%|█▏        | 237/2030 [02:03<13:48,  2.16it/s]
 12%|█▏        | 238/2030 [02:03<13:05,  2.28it/s]
 12%|█▏        | 239/2030 [02:04<12:34,  2.37it/s]
 12%|█▏        | 240/2030 [02:04<12:38,  2.36it/s]
 12%|█▏        | 241/2030 [02:05<12:12,  2.44it/s]
 12%|█▏        | 242/2030 [02:05<12:03,  2.47it/s]
 12%|█▏        | 243/2030 [02:05<13:09,  2.26it/s]
 12%|█▏        | 244/2030 [02:06<13:57,  2.13it/s]
 12%|█▏        | 245/2030 [02:06<12:28,  2.39it/s]
 12%|█▏        | 246/2030 [02:07<11:47,  2.52it/s]
 12%|█▏        | 247/2030 [02:07<12:04,  2.46it/s]
 12%|█▏        | 248/2030 [02:07<11:32,  2.57it/s]
 12%|█▏        | 249/2030 [02:08<12:22,  2.40it/s]
 12%|█▏        | 250/2030 [02:08<12:07,  2.45it/s]
 12%|█▏        | 251/2030 [02:09<11:40,  2.54it/s]
 12%|█▏        | 252/2030 [02:09<14:13,  2.08it/s]
 12%|█▏        | 253/2030 [02:10<15:51,  1.87it/s]
 13%|█▎        | 254/2030 [02:10<14:48,  2.00it/s]
 13%|█▎        | 255/2030 [02:11<16:12,  1.83it/s]
 13%|█▎        | 256/2030 [02:12<15:55,  1.86it/s]
 13%|█▎        | 257/2030 [02:12<15:12,  1.94it/s]
 13%|█▎        | 258/2030 [02:13<16:09,  1.83it/s]
 13%|█▎        | 259/2030 [02:13<14:20,  2.06it/s]
 13%|█▎        | 260/2030 [02:13<13:31,  2.18it/s]
 13%|█▎        | 261/2030 [02:14<16:16,  1.81it/s]
 13%|█▎        | 262/2030 [02:15<14:54,  1.98it/s]
 13%|█▎        | 263/2030 [02:15<14:32,  2.03it/s]
 13%|█▎        | 264/2030 [02:16<15:26,  1.91it/s]
 13%|█▎        | 265/2030 [02:16<14:04,  2.09it/s]
 13%|█▎        | 266/2030 [02:16<13:00,  2.26it/s]
 13%|█▎        | 267/2030 [02:17<12:11,  2.41it/s]
 13%|█▎        | 268/2030 [02:17<12:46,  2.30it/s]
 13%|█▎        | 269/2030 [02:18<11:57,  2.46it/s]
 13%|█▎        | 270/2030 [02:18<12:20,  2.38it/s]
 13%|█▎        | 271/2030 [02:19<12:47,  2.29it/s]
 13%|█▎        | 272/2030 [02:19<13:52,  2.11it/s]
 13%|█▎        | 273/2030 [02:20<13:54,  2.11it/s]
 13%|█▎        | 274/2030 [02:20<12:44,  2.30it/s]
 14%|█▎        | 275/2030 [02:20<12:38,  2.31it/s]
 14%|█▎        | 276/2030 [02:21<12:42,  2.30it/s]
 14%|█▎        | 277/2030 [02:21<12:34,  2.32it/s]
 14%|█▎        | 278/2030 [02:22<12:30,  2.33it/s]
 14%|█▎        | 279/2030 [02:22<12:28,  2.34it/s]
 14%|█▍        | 280/2030 [02:22<11:52,  2.46it/s]
 14%|█▍        | 281/2030 [02:23<12:37,  2.31it/s]
 14%|█▍        | 282/2030 [02:23<12:16,  2.37it/s]
 14%|█▍        | 283/2030 [02:24<12:33,  2.32it/s]
 14%|█▍        | 284/2030 [02:24<13:13,  2.20it/s]
 14%|█▍        | 285/2030 [02:25<13:08,  2.21it/s]
 14%|█▍        | 286/2030 [02:25<13:09,  2.21it/s]
 14%|█▍        | 287/2030 [02:26<12:27,  2.33it/s]
 14%|█▍        | 288/2030 [02:26<13:57,  2.08it/s]
 14%|█▍        | 289/2030 [02:27<13:51,  2.09it/s]
 14%|█▍        | 290/2030 [02:27<15:16,  1.90it/s]
 14%|█▍        | 291/2030 [02:28<14:18,  2.02it/s]
 14%|█▍        | 292/2030 [02:28<15:12,  1.91it/s]
 14%|█▍        | 293/2030 [02:29<14:05,  2.05it/s]
 14%|█▍        | 294/2030 [02:29<15:37,  1.85it/s]
 15%|█▍        | 295/2030 [02:30<13:51,  2.09it/s]
 15%|█▍        | 296/2030 [02:30<13:08,  2.20it/s]
 15%|█▍        | 297/2030 [02:31<13:30,  2.14it/s]
 15%|█▍        | 298/2030 [02:31<13:08,  2.20it/s]
 15%|█▍        | 299/2030 [02:31<13:26,  2.15it/s]
 15%|█▍        | 300/2030 [02:32<14:33,  1.98it/s]
 15%|█▍        | 301/2030 [02:32<14:07,  2.04it/s]
 15%|█▍        | 302/2030 [02:33<13:49,  2.08it/s]
 15%|█▍        | 303/2030 [02:33<12:42,  2.26it/s]
 15%|█▍        | 304/2030 [02:34<11:59,  2.40it/s]
 15%|█▌        | 305/2030 [02:34<14:03,  2.04it/s]
 15%|█▌        | 306/2030 [02:35<13:33,  2.12it/s]
 15%|█▌        | 307/2030 [02:35<13:04,  2.20it/s]
 15%|█▌        | 308/2030 [02:36<13:12,  2.17it/s]
 15%|█▌        | 309/2030 [02:36<12:55,  2.22it/s]
 15%|█▌        | 310/2030 [02:37<13:23,  2.14it/s]
 15%|█▌        | 311/2030 [02:37<12:53,  2.22it/s]

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 10.0,
+    "total_flos": 8092971627384348.0,
+    "train_loss": 0.033683168437591816,
+    "train_runtime": 1318.6544,
+    "train_samples": 16483,
+    "train_samples_per_second": 124.999,
+    "train_steps_per_second": 1.957
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,197 @@

+{
+  "best_metric": 0.696671105193076,
+  "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2322",
+  "epoch": 10.0,
+  "eval_steps": 500,
+  "global_step": 2580,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9488273605184638,
+      "eval_f1": 0.6400434664493344,
+      "eval_loss": 0.15167976915836334,
+      "eval_precision": 0.6353829557713053,
+      "eval_recall": 0.6447728516694033,
+      "eval_runtime": 5.9858,
+      "eval_samples_per_second": 420.828,
+      "eval_steps_per_second": 52.624,
+      "step": 258
+    },
+    {
+      "epoch": 1.937984496124031,
+      "grad_norm": 0.791233479976654,
+      "learning_rate": 4.0310077519379843e-05,
+      "loss": 0.1357,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.945955917738779,
+      "eval_f1": 0.6695763799743261,
+      "eval_loss": 0.20252634584903717,
+      "eval_precision": 0.6305609284332688,
+      "eval_recall": 0.7137383689107827,
+      "eval_runtime": 5.9083,
+      "eval_samples_per_second": 426.351,
+      "eval_steps_per_second": 53.315,
+      "step": 516
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9495652731881036,
+      "eval_f1": 0.6838606753522999,
+      "eval_loss": 0.22940471768379211,
+      "eval_precision": 0.6649431230610134,
+      "eval_recall": 0.7038861521620142,
+      "eval_runtime": 5.8946,
+      "eval_samples_per_second": 427.339,
+      "eval_steps_per_second": 53.439,
+      "step": 774
+    },
+    {
+      "epoch": 3.875968992248062,
+      "grad_norm": 0.907940685749054,
+      "learning_rate": 3.062015503875969e-05,
+      "loss": 0.0238,
+      "step": 1000
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.9491963168532838,
+      "eval_f1": 0.6872504657971785,
+      "eval_loss": 0.28175726532936096,
+      "eval_precision": 0.6689119170984456,
+      "eval_recall": 0.7066228790366721,
+      "eval_runtime": 5.8968,
+      "eval_samples_per_second": 427.179,
+      "eval_steps_per_second": 53.419,
+      "step": 1032
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.9487311110398152,
+      "eval_f1": 0.6773768764814327,
+      "eval_loss": 0.27620697021484375,
+      "eval_precision": 0.6527918781725889,
+      "eval_recall": 0.7038861521620142,
+      "eval_runtime": 5.9102,
+      "eval_samples_per_second": 426.215,
+      "eval_steps_per_second": 53.298,
+      "step": 1290
+    },
+    {
+      "epoch": 5.813953488372093,
+      "grad_norm": 0.2446424663066864,
+      "learning_rate": 2.0930232558139536e-05,
+      "loss": 0.0081,
+      "step": 1500
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.9483942378645449,
+      "eval_f1": 0.6922672277748553,
+      "eval_loss": 0.2938424348831177,
+      "eval_precision": 0.6663291139240506,
+      "eval_recall": 0.7203065134099617,
+      "eval_runtime": 5.9062,
+      "eval_samples_per_second": 426.502,
+      "eval_steps_per_second": 53.334,
+      "step": 1548
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.9499181879431486,
+      "eval_f1": 0.6893020749124225,
+      "eval_loss": 0.3144644498825073,
+      "eval_precision": 0.6788747346072187,
+      "eval_recall": 0.7000547345374931,
+      "eval_runtime": 5.9264,
+      "eval_samples_per_second": 425.045,
+      "eval_steps_per_second": 53.152,
+      "step": 1806
+    },
+    {
+      "epoch": 7.751937984496124,
+      "grad_norm": 1.7770023345947266,
+      "learning_rate": 1.1240310077519382e-05,
+      "loss": 0.0039,
+      "step": 2000
+    },
+    {
+      "epoch": 8.0,
+      "eval_accuracy": 0.949100067374635,
+      "eval_f1": 0.6865512649800267,
+      "eval_loss": 0.32666969299316406,
+      "eval_precision": 0.6685684647302904,
+      "eval_recall": 0.705528188286809,
+      "eval_runtime": 5.9058,
+      "eval_samples_per_second": 426.528,
+      "eval_steps_per_second": 53.337,
+      "step": 2064
+    },
+    {
+      "epoch": 9.0,
+      "eval_accuracy": 0.9490359010555359,
+      "eval_f1": 0.696671105193076,
+      "eval_loss": 0.33743786811828613,
+      "eval_precision": 0.6784232365145229,
+      "eval_recall": 0.715927750410509,
+      "eval_runtime": 5.9091,
+      "eval_samples_per_second": 426.292,
+      "eval_steps_per_second": 53.308,
+      "step": 2322
+    },
+    {
+      "epoch": 9.689922480620154,
+      "grad_norm": 0.005967797711491585,
+      "learning_rate": 1.550387596899225e-06,
+      "loss": 0.0021,
+      "step": 2500
+    },
+    {
+      "epoch": 10.0,
+      "eval_accuracy": 0.9495011068690045,
+      "eval_f1": 0.6949744692287019,
+      "eval_loss": 0.33997026085853577,
+      "eval_precision": 0.6826821541710665,
+      "eval_recall": 0.7077175697865353,
+      "eval_runtime": 6.2732,
+      "eval_samples_per_second": 401.551,
+      "eval_steps_per_second": 50.214,
+      "step": 2580
+    },
+    {
+      "epoch": 10.0,
+      "step": 2580,
+      "total_flos": 8092971627384348.0,
+      "train_loss": 0.033683168437591816,
+      "train_runtime": 1318.6544,
+      "train_samples_per_second": 124.999,
+      "train_steps_per_second": 1.957
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2580,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8092971627384348.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13556e6c97b2f39e25d5830ab0bc61ce81f807bcf643d150d23dd97c2f606c57
+size 5240

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff