Rodrigo1771
commited on
Commit
•
09f50a0
1
Parent(s):
86da576
Training in progress, epoch 0
Browse files- README.md +103 -0
- all_results.json +26 -0
- config.json +39 -0
- eval_results.json +12 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- predict_results.json +10 -0
- predictions.txt +0 -0
- special_tokens_map.json +51 -0
- tb/events.out.tfevents.1725881335.0a1c9bec2a53.3232.0 +3 -0
- tb/events.out.tfevents.1725882696.0a1c9bec2a53.3232.1 +3 -0
- tb/events.out.tfevents.1725882852.0a1c9bec2a53.9893.0 +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +58 -0
- train.log +357 -0
- train_results.json +9 -0
- trainer_state.json +197 -0
- training_args.bin +3 -0
- vocab.json +0 -0
README.md
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: transformers
|
3 |
+
license: apache-2.0
|
4 |
+
base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
|
5 |
+
tags:
|
6 |
+
- token-classification
|
7 |
+
- generated_from_trainer
|
8 |
+
datasets:
|
9 |
+
- Rodrigo1771/symptemist-fasttext-75-ner
|
10 |
+
metrics:
|
11 |
+
- precision
|
12 |
+
- recall
|
13 |
+
- f1
|
14 |
+
- accuracy
|
15 |
+
model-index:
|
16 |
+
- name: output
|
17 |
+
results:
|
18 |
+
- task:
|
19 |
+
name: Token Classification
|
20 |
+
type: token-classification
|
21 |
+
dataset:
|
22 |
+
name: Rodrigo1771/symptemist-fasttext-75-ner
|
23 |
+
type: Rodrigo1771/symptemist-fasttext-75-ner
|
24 |
+
config: SympTEMIST NER
|
25 |
+
split: validation
|
26 |
+
args: SympTEMIST NER
|
27 |
+
metrics:
|
28 |
+
- name: Precision
|
29 |
+
type: precision
|
30 |
+
value: 0.6784232365145229
|
31 |
+
- name: Recall
|
32 |
+
type: recall
|
33 |
+
value: 0.715927750410509
|
34 |
+
- name: F1
|
35 |
+
type: f1
|
36 |
+
value: 0.696671105193076
|
37 |
+
- name: Accuracy
|
38 |
+
type: accuracy
|
39 |
+
value: 0.9490359010555359
|
40 |
+
---
|
41 |
+
|
42 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
43 |
+
should probably proofread and complete it, then remove this comment. -->
|
44 |
+
|
45 |
+
# output
|
46 |
+
|
47 |
+
This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/symptemist-fasttext-75-ner dataset.
|
48 |
+
It achieves the following results on the evaluation set:
|
49 |
+
- Loss: 0.3374
|
50 |
+
- Precision: 0.6784
|
51 |
+
- Recall: 0.7159
|
52 |
+
- F1: 0.6967
|
53 |
+
- Accuracy: 0.9490
|
54 |
+
|
55 |
+
## Model description
|
56 |
+
|
57 |
+
More information needed
|
58 |
+
|
59 |
+
## Intended uses & limitations
|
60 |
+
|
61 |
+
More information needed
|
62 |
+
|
63 |
+
## Training and evaluation data
|
64 |
+
|
65 |
+
More information needed
|
66 |
+
|
67 |
+
## Training procedure
|
68 |
+
|
69 |
+
### Training hyperparameters
|
70 |
+
|
71 |
+
The following hyperparameters were used during training:
|
72 |
+
- learning_rate: 5e-05
|
73 |
+
- train_batch_size: 32
|
74 |
+
- eval_batch_size: 8
|
75 |
+
- seed: 42
|
76 |
+
- gradient_accumulation_steps: 2
|
77 |
+
- total_train_batch_size: 64
|
78 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
79 |
+
- lr_scheduler_type: linear
|
80 |
+
- num_epochs: 10.0
|
81 |
+
|
82 |
+
### Training results
|
83 |
+
|
84 |
+
| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
|
85 |
+
|:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
|
86 |
+
| No log | 1.0 | 258 | 0.1517 | 0.6354 | 0.6448 | 0.6400 | 0.9488 |
|
87 |
+
| 0.1357 | 2.0 | 516 | 0.2025 | 0.6306 | 0.7137 | 0.6696 | 0.9460 |
|
88 |
+
| 0.1357 | 3.0 | 774 | 0.2294 | 0.6649 | 0.7039 | 0.6839 | 0.9496 |
|
89 |
+
| 0.0238 | 4.0 | 1032 | 0.2818 | 0.6689 | 0.7066 | 0.6873 | 0.9492 |
|
90 |
+
| 0.0238 | 5.0 | 1290 | 0.2762 | 0.6528 | 0.7039 | 0.6774 | 0.9487 |
|
91 |
+
| 0.0081 | 6.0 | 1548 | 0.2938 | 0.6663 | 0.7203 | 0.6923 | 0.9484 |
|
92 |
+
| 0.0081 | 7.0 | 1806 | 0.3145 | 0.6789 | 0.7001 | 0.6893 | 0.9499 |
|
93 |
+
| 0.0039 | 8.0 | 2064 | 0.3267 | 0.6686 | 0.7055 | 0.6866 | 0.9491 |
|
94 |
+
| 0.0039 | 9.0 | 2322 | 0.3374 | 0.6784 | 0.7159 | 0.6967 | 0.9490 |
|
95 |
+
| 0.0021 | 10.0 | 2580 | 0.3400 | 0.6827 | 0.7077 | 0.6950 | 0.9495 |
|
96 |
+
|
97 |
+
|
98 |
+
### Framework versions
|
99 |
+
|
100 |
+
- Transformers 4.44.2
|
101 |
+
- Pytorch 2.4.0+cu121
|
102 |
+
- Datasets 2.21.0
|
103 |
+
- Tokenizers 0.19.1
|
all_results.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9490359010555359,
|
4 |
+
"eval_f1": 0.696671105193076,
|
5 |
+
"eval_loss": 0.33743786811828613,
|
6 |
+
"eval_precision": 0.6784232365145229,
|
7 |
+
"eval_recall": 0.715927750410509,
|
8 |
+
"eval_runtime": 5.8603,
|
9 |
+
"eval_samples": 2519,
|
10 |
+
"eval_samples_per_second": 429.84,
|
11 |
+
"eval_steps_per_second": 53.751,
|
12 |
+
"predict_accuracy": 0.9470472034672878,
|
13 |
+
"predict_f1": 0.701497292131252,
|
14 |
+
"predict_loss": 0.3655967116355896,
|
15 |
+
"predict_precision": 0.6944181646168401,
|
16 |
+
"predict_recall": 0.7087222401029932,
|
17 |
+
"predict_runtime": 9.7752,
|
18 |
+
"predict_samples_per_second": 414.006,
|
19 |
+
"predict_steps_per_second": 51.764,
|
20 |
+
"total_flos": 8092971627384348.0,
|
21 |
+
"train_loss": 0.033683168437591816,
|
22 |
+
"train_runtime": 1318.6544,
|
23 |
+
"train_samples": 16483,
|
24 |
+
"train_samples_per_second": 124.999,
|
25 |
+
"train_steps_per_second": 1.957
|
26 |
+
}
|
config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
3 |
+
"architectures": [
|
4 |
+
"RobertaForTokenClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"finetuning_task": "ner",
|
11 |
+
"gradient_checkpointing": false,
|
12 |
+
"hidden_act": "gelu",
|
13 |
+
"hidden_dropout_prob": 0.1,
|
14 |
+
"hidden_size": 768,
|
15 |
+
"id2label": {
|
16 |
+
"0": "O",
|
17 |
+
"1": "B-SINTOMA",
|
18 |
+
"2": "I-SINTOMA"
|
19 |
+
},
|
20 |
+
"initializer_range": 0.02,
|
21 |
+
"intermediate_size": 3072,
|
22 |
+
"label2id": {
|
23 |
+
"B-SINTOMA": 1,
|
24 |
+
"I-SINTOMA": 2,
|
25 |
+
"O": 0
|
26 |
+
},
|
27 |
+
"layer_norm_eps": 1e-05,
|
28 |
+
"max_position_embeddings": 514,
|
29 |
+
"model_type": "roberta",
|
30 |
+
"num_attention_heads": 12,
|
31 |
+
"num_hidden_layers": 12,
|
32 |
+
"pad_token_id": 1,
|
33 |
+
"position_embedding_type": "absolute",
|
34 |
+
"torch_dtype": "float32",
|
35 |
+
"transformers_version": "4.44.2",
|
36 |
+
"type_vocab_size": 1,
|
37 |
+
"use_cache": true,
|
38 |
+
"vocab_size": 50262
|
39 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_accuracy": 0.9490359010555359,
|
4 |
+
"eval_f1": 0.696671105193076,
|
5 |
+
"eval_loss": 0.33743786811828613,
|
6 |
+
"eval_precision": 0.6784232365145229,
|
7 |
+
"eval_recall": 0.715927750410509,
|
8 |
+
"eval_runtime": 5.8603,
|
9 |
+
"eval_samples": 2519,
|
10 |
+
"eval_samples_per_second": 429.84,
|
11 |
+
"eval_steps_per_second": 53.751
|
12 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdf40d5f4f591a504434aa91dd3ebdc497806dee69c3e5919a3465e2334b9184
|
3 |
+
size 496244100
|
predict_results.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"predict_accuracy": 0.9470472034672878,
|
3 |
+
"predict_f1": 0.701497292131252,
|
4 |
+
"predict_loss": 0.3655967116355896,
|
5 |
+
"predict_precision": 0.6944181646168401,
|
6 |
+
"predict_recall": 0.7087222401029932,
|
7 |
+
"predict_runtime": 9.7752,
|
8 |
+
"predict_samples_per_second": 414.006,
|
9 |
+
"predict_steps_per_second": 51.764
|
10 |
+
}
|
predictions.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": true,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": true,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": true,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": true,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "<unk>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": true,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tb/events.out.tfevents.1725881335.0a1c9bec2a53.3232.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:593e737686a00ae0f64a94f2ef02389ad7dff30c0ba6a6f2b1c65ac31e873867
|
3 |
+
size 11302
|
tb/events.out.tfevents.1725882696.0a1c9bec2a53.3232.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05ecdc6d00855fb66deb25a7b5be160aa0ebb2ebe07a43beb7d88fb0430fb141
|
3 |
+
size 560
|
tb/events.out.tfevents.1725882852.0a1c9bec2a53.9893.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96c1600632af625402c889456fb9a8dd268472a67db05041ef467ce081d18572
|
3 |
+
size 5645
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": true,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"0": {
|
5 |
+
"content": "<s>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": true,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"1": {
|
13 |
+
"content": "<pad>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": true,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"2": {
|
21 |
+
"content": "</s>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": true,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": true
|
27 |
+
},
|
28 |
+
"3": {
|
29 |
+
"content": "<unk>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": true,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": true
|
35 |
+
},
|
36 |
+
"50261": {
|
37 |
+
"content": "<mask>",
|
38 |
+
"lstrip": true,
|
39 |
+
"normalized": true,
|
40 |
+
"rstrip": false,
|
41 |
+
"single_word": false,
|
42 |
+
"special": true
|
43 |
+
}
|
44 |
+
},
|
45 |
+
"bos_token": "<s>",
|
46 |
+
"clean_up_tokenization_spaces": true,
|
47 |
+
"cls_token": "<s>",
|
48 |
+
"eos_token": "</s>",
|
49 |
+
"errors": "replace",
|
50 |
+
"mask_token": "<mask>",
|
51 |
+
"max_len": 512,
|
52 |
+
"model_max_length": 512,
|
53 |
+
"pad_token": "<pad>",
|
54 |
+
"sep_token": "</s>",
|
55 |
+
"tokenizer_class": "RobertaTokenizer",
|
56 |
+
"trim_offsets": true,
|
57 |
+
"unk_token": "<unk>"
|
58 |
+
}
|
train.log
ADDED
@@ -0,0 +1,357 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
0 |
0%| | 0/2030 [00:00<?, ?it/s]
|
1 |
0%| | 1/2030 [00:01<43:23, 1.28s/it]
|
2 |
0%| | 2/2030 [00:01<27:30, 1.23it/s]
|
3 |
0%| | 3/2030 [00:02<22:20, 1.51it/s]
|
4 |
0%| | 4/2030 [00:02<20:58, 1.61it/s]
|
5 |
0%| | 5/2030 [00:03<18:15, 1.85it/s]
|
6 |
0%| | 6/2030 [00:03<16:47, 2.01it/s]
|
7 |
0%| | 7/2030 [00:04<16:12, 2.08it/s]
|
8 |
0%| | 8/2030 [00:04<16:25, 2.05it/s]
|
9 |
0%| | 9/2030 [00:05<15:51, 2.12it/s]
|
10 |
0%| | 10/2030 [00:05<16:55, 1.99it/s]
|
11 |
1%| | 11/2030 [00:05<15:19, 2.20it/s]
|
12 |
1%| | 12/2030 [00:06<17:18, 1.94it/s]
|
13 |
1%| | 13/2030 [00:07<16:43, 2.01it/s]
|
14 |
1%| | 14/2030 [00:07<18:38, 1.80it/s]
|
15 |
1%| | 15/2030 [00:08<16:34, 2.03it/s]
|
16 |
1%| | 16/2030 [00:08<17:35, 1.91it/s]
|
17 |
1%| | 17/2030 [00:09<16:44, 2.00it/s]
|
18 |
1%| | 18/2030 [00:09<18:13, 1.84it/s]
|
19 |
1%| | 19/2030 [00:10<23:35, 1.42it/s]
|
20 |
1%| | 20/2030 [00:11<22:07, 1.51it/s]
|
21 |
1%| | 21/2030 [00:11<19:46, 1.69it/s]
|
22 |
1%| | 22/2030 [00:12<17:22, 1.93it/s]
|
23 |
1%| | 23/2030 [00:12<19:14, 1.74it/s]
|
24 |
1%| | 24/2030 [00:13<18:36, 1.80it/s]
|
25 |
1%| | 25/2030 [00:13<17:01, 1.96it/s]
|
26 |
1%|▏ | 26/2030 [00:14<16:28, 2.03it/s]
|
27 |
1%|▏ | 27/2030 [00:14<16:55, 1.97it/s]
|
28 |
1%|▏ | 28/2030 [00:15<18:05, 1.84it/s]
|
29 |
1%|▏ | 29/2030 [00:15<16:38, 2.00it/s]
|
30 |
1%|▏ | 30/2030 [00:16<16:25, 2.03it/s]
|
31 |
2%|▏ | 31/2030 [00:16<16:14, 2.05it/s]
|
32 |
2%|▏ | 32/2030 [00:17<16:03, 2.07it/s]
|
33 |
2%|▏ | 33/2030 [00:17<17:38, 1.89it/s]
|
34 |
2%|▏ | 34/2030 [00:18<16:22, 2.03it/s]
|
35 |
2%|▏ | 35/2030 [00:18<15:07, 2.20it/s]
|
36 |
2%|▏ | 36/2030 [00:19<14:36, 2.28it/s]
|
37 |
2%|▏ | 37/2030 [00:19<15:03, 2.21it/s]
|
38 |
2%|▏ | 38/2030 [00:19<14:25, 2.30it/s]
|
39 |
2%|▏ | 39/2030 [00:20<15:44, 2.11it/s]
|
40 |
2%|▏ | 40/2030 [00:20<14:43, 2.25it/s]
|
41 |
2%|▏ | 41/2030 [00:21<14:17, 2.32it/s]
|
42 |
2%|▏ | 42/2030 [00:21<14:54, 2.22it/s]
|
43 |
2%|▏ | 43/2030 [00:22<14:50, 2.23it/s]
|
44 |
2%|▏ | 44/2030 [00:22<15:11, 2.18it/s]
|
45 |
2%|▏ | 45/2030 [00:23<15:19, 2.16it/s]
|
46 |
2%|▏ | 46/2030 [00:23<14:40, 2.25it/s]
|
47 |
2%|▏ | 47/2030 [00:24<15:29, 2.13it/s]
|
48 |
2%|▏ | 48/2030 [00:24<14:57, 2.21it/s]
|
49 |
2%|▏ | 49/2030 [00:24<15:07, 2.18it/s]
|
50 |
2%|▏ | 50/2030 [00:25<15:57, 2.07it/s]
|
51 |
3%|▎ | 51/2030 [00:25<15:31, 2.12it/s]
|
52 |
3%|▎ | 52/2030 [00:26<15:21, 2.15it/s]
|
53 |
3%|▎ | 53/2030 [00:26<14:55, 2.21it/s]
|
54 |
3%|▎ | 54/2030 [00:27<13:40, 2.41it/s]
|
55 |
3%|▎ | 55/2030 [00:27<13:39, 2.41it/s]
|
56 |
3%|▎ | 56/2030 [00:28<14:39, 2.25it/s]
|
57 |
3%|▎ | 57/2030 [00:28<14:34, 2.26it/s]
|
58 |
3%|▎ | 58/2030 [00:28<14:11, 2.32it/s]
|
59 |
3%|▎ | 59/2030 [00:29<14:11, 2.31it/s]
|
60 |
3%|▎ | 60/2030 [00:30<16:39, 1.97it/s]
|
61 |
3%|▎ | 61/2030 [00:30<14:58, 2.19it/s]
|
62 |
3%|▎ | 62/2030 [00:30<13:48, 2.38it/s]
|
63 |
3%|▎ | 63/2030 [00:31<13:58, 2.35it/s]
|
64 |
3%|▎ | 64/2030 [00:31<14:10, 2.31it/s]
|
65 |
3%|▎ | 65/2030 [00:32<14:04, 2.33it/s]
|
66 |
3%|▎ | 66/2030 [00:32<13:33, 2.41it/s]
|
67 |
3%|▎ | 67/2030 [00:32<13:57, 2.34it/s]
|
68 |
3%|▎ | 68/2030 [00:33<13:23, 2.44it/s]
|
69 |
3%|▎ | 69/2030 [00:33<12:56, 2.53it/s]
|
70 |
3%|▎ | 70/2030 [00:34<14:55, 2.19it/s]
|
71 |
3%|▎ | 71/2030 [00:34<14:42, 2.22it/s]
|
72 |
4%|▎ | 72/2030 [00:35<14:08, 2.31it/s]
|
73 |
4%|▎ | 73/2030 [00:35<13:54, 2.34it/s]
|
74 |
4%|▎ | 74/2030 [00:35<13:32, 2.41it/s]
|
75 |
4%|▎ | 75/2030 [00:36<13:34, 2.40it/s]
|
76 |
4%|▎ | 76/2030 [00:36<13:41, 2.38it/s]
|
77 |
4%|▍ | 77/2030 [00:37<14:07, 2.30it/s]
|
78 |
4%|▍ | 78/2030 [00:37<14:33, 2.24it/s]
|
79 |
4%|▍ | 79/2030 [00:38<14:16, 2.28it/s]
|
80 |
4%|▍ | 80/2030 [00:38<14:14, 2.28it/s]
|
81 |
4%|▍ | 81/2030 [00:38<13:49, 2.35it/s]
|
82 |
4%|▍ | 82/2030 [00:39<14:52, 2.18it/s]
|
83 |
4%|▍ | 83/2030 [00:39<13:50, 2.34it/s]
|
84 |
4%|▍ | 84/2030 [00:40<15:49, 2.05it/s]
|
85 |
4%|▍ | 85/2030 [00:40<15:30, 2.09it/s]
|
86 |
4%|▍ | 86/2030 [00:41<15:10, 2.14it/s]
|
87 |
4%|▍ | 87/2030 [00:41<15:02, 2.15it/s]
|
88 |
4%|▍ | 88/2030 [00:42<15:54, 2.04it/s]
|
89 |
4%|▍ | 89/2030 [00:42<15:01, 2.15it/s]
|
90 |
4%|▍ | 90/2030 [00:43<14:36, 2.21it/s]
|
91 |
4%|▍ | 91/2030 [00:43<13:44, 2.35it/s]
|
92 |
5%|▍ | 92/2030 [00:43<14:17, 2.26it/s]
|
93 |
5%|▍ | 93/2030 [00:44<17:39, 1.83it/s]
|
94 |
5%|▍ | 94/2030 [00:45<16:23, 1.97it/s]
|
95 |
5%|▍ | 95/2030 [00:45<15:11, 2.12it/s]
|
96 |
5%|▍ | 96/2030 [00:46<14:51, 2.17it/s]
|
97 |
5%|▍ | 97/2030 [00:46<14:32, 2.21it/s]
|
98 |
5%|▍ | 98/2030 [00:46<14:14, 2.26it/s]
|
99 |
5%|▍ | 99/2030 [00:47<14:50, 2.17it/s]
|
100 |
5%|▍ | 100/2030 [00:47<13:48, 2.33it/s]
|
101 |
5%|▍ | 101/2030 [00:48<13:31, 2.38it/s]
|
102 |
5%|▌ | 102/2030 [00:48<13:42, 2.34it/s]
|
103 |
5%|▌ | 103/2030 [00:48<12:47, 2.51it/s]
|
104 |
5%|▌ | 104/2030 [00:49<14:29, 2.22it/s]
|
105 |
5%|▌ | 105/2030 [00:49<14:31, 2.21it/s]
|
106 |
5%|▌ | 106/2030 [00:50<14:53, 2.15it/s]
|
107 |
5%|▌ | 107/2030 [00:50<14:26, 2.22it/s]
|
108 |
5%|▌ | 108/2030 [00:51<13:20, 2.40it/s]
|
109 |
5%|▌ | 109/2030 [00:51<14:11, 2.25it/s]
|
110 |
5%|▌ | 110/2030 [00:52<14:00, 2.28it/s]
|
111 |
5%|▌ | 111/2030 [00:52<13:40, 2.34it/s]
|
112 |
6%|▌ | 112/2030 [00:52<13:15, 2.41it/s]
|
113 |
6%|▌ | 113/2030 [00:53<14:36, 2.19it/s]
|
114 |
6%|▌ | 114/2030 [00:53<15:33, 2.05it/s]
|
115 |
6%|▌ | 115/2030 [00:54<15:08, 2.11it/s]
|
116 |
6%|▌ | 116/2030 [00:54<14:34, 2.19it/s]
|
117 |
6%|▌ | 117/2030 [00:55<14:45, 2.16it/s]
|
118 |
6%|▌ | 118/2030 [00:55<14:07, 2.26it/s]
|
119 |
6%|▌ | 119/2030 [00:56<14:05, 2.26it/s]
|
120 |
6%|▌ | 120/2030 [00:56<13:44, 2.32it/s]
|
121 |
6%|▌ | 121/2030 [00:57<14:00, 2.27it/s]
|
122 |
6%|▌ | 122/2030 [00:57<13:23, 2.37it/s]
|
123 |
6%|▌ | 123/2030 [00:57<12:58, 2.45it/s]
|
124 |
6%|▌ | 124/2030 [00:58<15:07, 2.10it/s]
|
125 |
6%|▌ | 125/2030 [00:58<15:02, 2.11it/s]
|
126 |
6%|▌ | 126/2030 [00:59<15:04, 2.11it/s]
|
127 |
6%|▋ | 127/2030 [00:59<14:34, 2.18it/s]
|
128 |
6%|▋ | 128/2030 [01:00<14:47, 2.14it/s]
|
129 |
6%|▋ | 129/2030 [01:00<13:53, 2.28it/s]
|
130 |
6%|▋ | 130/2030 [01:01<16:03, 1.97it/s]
|
131 |
6%|▋ | 131/2030 [01:01<16:28, 1.92it/s]
|
132 |
7%|▋ | 132/2030 [01:02<15:23, 2.06it/s]
|
133 |
7%|▋ | 133/2030 [01:03<18:04, 1.75it/s]
|
134 |
7%|▋ | 134/2030 [01:03<17:08, 1.84it/s]
|
135 |
7%|▋ | 135/2030 [01:03<16:13, 1.95it/s]
|
136 |
7%|▋ | 136/2030 [01:04<15:14, 2.07it/s]
|
137 |
7%|▋ | 137/2030 [01:04<13:58, 2.26it/s]
|
138 |
7%|▋ | 138/2030 [01:05<13:47, 2.29it/s]
|
139 |
7%|▋ | 139/2030 [01:05<13:46, 2.29it/s]
|
140 |
7%|▋ | 140/2030 [01:06<17:13, 1.83it/s]
|
141 |
7%|▋ | 141/2030 [01:06<15:44, 2.00it/s]
|
142 |
7%|▋ | 142/2030 [01:07<18:04, 1.74it/s]
|
143 |
7%|▋ | 143/2030 [01:07<16:32, 1.90it/s]
|
144 |
7%|▋ | 144/2030 [01:08<17:46, 1.77it/s]
|
145 |
7%|▋ | 145/2030 [01:08<15:39, 2.01it/s]
|
146 |
7%|▋ | 146/2030 [01:09<15:15, 2.06it/s]
|
147 |
7%|▋ | 147/2030 [01:09<14:20, 2.19it/s]
|
148 |
7%|▋ | 148/2030 [01:10<13:42, 2.29it/s]
|
149 |
7%|▋ | 149/2030 [01:10<13:13, 2.37it/s]
|
150 |
7%|▋ | 150/2030 [01:11<14:12, 2.21it/s]
|
151 |
7%|▋ | 151/2030 [01:11<15:01, 2.08it/s]
|
152 |
7%|▋ | 152/2030 [01:12<14:34, 2.15it/s]
|
153 |
8%|▊ | 153/2030 [01:12<13:22, 2.34it/s]
|
154 |
8%|▊ | 154/2030 [01:12<14:04, 2.22it/s]
|
155 |
8%|▊ | 155/2030 [01:13<13:31, 2.31it/s]
|
156 |
8%|▊ | 156/2030 [01:13<14:24, 2.17it/s]
|
157 |
8%|▊ | 157/2030 [01:14<14:18, 2.18it/s]
|
158 |
8%|▊ | 158/2030 [01:14<15:19, 2.04it/s]
|
159 |
8%|▊ | 159/2030 [01:15<14:23, 2.17it/s]
|
160 |
8%|▊ | 160/2030 [01:15<13:47, 2.26it/s]
|
161 |
8%|▊ | 161/2030 [01:16<13:50, 2.25it/s]
|
162 |
8%|▊ | 162/2030 [01:16<14:12, 2.19it/s]
|
163 |
8%|▊ | 163/2030 [01:17<13:53, 2.24it/s]
|
164 |
8%|▊ | 164/2030 [01:17<13:45, 2.26it/s]
|
165 |
8%|▊ | 165/2030 [01:17<13:39, 2.28it/s]
|
166 |
8%|▊ | 166/2030 [01:18<15:08, 2.05it/s]
|
167 |
8%|▊ | 167/2030 [01:18<14:35, 2.13it/s]
|
168 |
8%|▊ | 168/2030 [01:19<14:27, 2.15it/s]
|
169 |
8%|▊ | 169/2030 [01:19<13:34, 2.29it/s]
|
170 |
8%|▊ | 170/2030 [01:20<14:37, 2.12it/s]
|
171 |
8%|▊ | 171/2030 [01:20<13:29, 2.30it/s]
|
172 |
8%|▊ | 172/2030 [01:21<16:15, 1.90it/s]
|
173 |
9%|▊ | 173/2030 [01:21<15:03, 2.06it/s]
|
174 |
9%|▊ | 174/2030 [01:22<13:56, 2.22it/s]
|
175 |
9%|▊ | 175/2030 [01:22<13:27, 2.30it/s]
|
176 |
9%|▊ | 176/2030 [01:22<12:39, 2.44it/s]
|
177 |
9%|▊ | 177/2030 [01:23<14:01, 2.20it/s]
|
178 |
9%|▉ | 178/2030 [01:23<12:55, 2.39it/s]
|
179 |
9%|▉ | 179/2030 [01:24<12:19, 2.50it/s]
|
180 |
9%|▉ | 180/2030 [01:24<12:59, 2.37it/s]
|
181 |
9%|▉ | 181/2030 [01:24<12:19, 2.50it/s]
|
182 |
9%|▉ | 182/2030 [01:25<12:00, 2.56it/s]
|
183 |
9%|▉ | 183/2030 [01:25<12:02, 2.56it/s]
|
184 |
9%|▉ | 184/2030 [01:26<12:22, 2.49it/s]
|
185 |
9%|▉ | 185/2030 [01:26<12:37, 2.44it/s]
|
186 |
9%|▉ | 186/2030 [01:26<12:00, 2.56it/s]
|
187 |
9%|▉ | 187/2030 [01:27<12:49, 2.39it/s]
|
188 |
9%|▉ | 188/2030 [01:27<13:38, 2.25it/s]
|
189 |
9%|▉ | 189/2030 [01:28<13:21, 2.30it/s]
|
190 |
9%|▉ | 190/2030 [01:29<16:36, 1.85it/s]
|
191 |
9%|▉ | 191/2030 [01:29<16:06, 1.90it/s]
|
192 |
9%|▉ | 192/2030 [01:29<14:24, 2.13it/s]
|
193 |
10%|▉ | 193/2030 [01:30<16:50, 1.82it/s]
|
194 |
10%|▉ | 194/2030 [01:31<15:12, 2.01it/s]
|
195 |
10%|▉ | 195/2030 [01:31<15:09, 2.02it/s]
|
196 |
10%|▉ | 196/2030 [01:31<14:00, 2.18it/s]
|
197 |
10%|▉ | 197/2030 [01:32<16:28, 1.85it/s]
|
198 |
10%|▉ | 198/2030 [01:33<18:54, 1.62it/s]
|
199 |
10%|▉ | 199/2030 [01:33<17:25, 1.75it/s]
|
200 |
10%|▉ | 200/2030 [01:34<16:29, 1.85it/s]
|
201 |
10%|▉ | 201/2030 [01:34<15:25, 1.98it/s]
|
202 |
10%|▉ | 202/2030 [01:35<14:43, 2.07it/s]
|
203 |
10%|█ | 203/2030 [01:35<14:58, 2.03it/s][INFO|trainer.py:811] 2024-09-09 11:55:48,641 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
0%| | 0/315 [00:00<?, ?it/s][A
|
|
|
205 |
3%|▎ | 8/315 [00:00<00:04, 76.12it/s][A
|
|
|
206 |
5%|▌ | 16/315 [00:00<00:04, 74.05it/s][A
|
|
|
207 |
8%|▊ | 24/315 [00:00<00:03, 75.18it/s][A
|
|
|
208 |
10%|█ | 32/315 [00:00<00:03, 70.92it/s][A
|
|
|
209 |
13%|█▎ | 40/315 [00:00<00:03, 73.74it/s][A
|
|
|
210 |
15%|█▌ | 48/315 [00:00<00:03, 74.41it/s][A
|
|
|
211 |
18%|█▊ | 56/315 [00:00<00:03, 73.32it/s][A
|
|
|
212 |
20%|██ | 64/315 [00:00<00:03, 71.03it/s][A
|
|
|
213 |
23%|██▎ | 72/315 [00:00<00:03, 73.04it/s][A
|
|
|
214 |
25%|██▌ | 80/315 [00:01<00:03, 69.62it/s][A
|
|
|
215 |
28%|██▊ | 88/315 [00:01<00:03, 67.05it/s][A
|
|
|
216 |
30%|███ | 96/315 [00:01<00:03, 70.09it/s][A
|
|
|
217 |
33%|███▎ | 104/315 [00:01<00:03, 67.36it/s][A
|
|
|
218 |
36%|███▌ | 112/315 [00:01<00:02, 69.60it/s][A
|
|
|
219 |
38%|███▊ | 120/315 [00:01<00:02, 69.30it/s][A
|
|
|
220 |
40%|████ | 127/315 [00:01<00:02, 68.58it/s][A
|
|
|
221 |
43%|████▎ | 134/315 [00:01<00:02, 67.97it/s][A
|
|
|
222 |
45%|████▍ | 141/315 [00:02<00:02, 68.37it/s][A
|
|
|
223 |
47%|████▋ | 149/315 [00:02<00:02, 70.96it/s][A
|
|
|
224 |
50%|█████ | 158/315 [00:02<00:02, 74.16it/s][A
|
|
|
225 |
53%|█████▎ | 166/315 [00:02<00:02, 72.11it/s][A
|
|
|
226 |
55%|█████▌ | 174/315 [00:02<00:01, 71.61it/s][A
|
|
|
227 |
58%|█████▊ | 182/315 [00:02<00:01, 68.73it/s][A
|
|
|
228 |
60%|██████ | 189/315 [00:02<00:01, 68.38it/s][A
|
|
|
229 |
62%|██████▏ | 196/315 [00:02<00:01, 67.76it/s][A
|
|
|
230 |
64%|██████▍ | 203/315 [00:02<00:01, 64.44it/s][A
|
|
|
231 |
67%|██████▋ | 210/315 [00:03<00:01, 64.84it/s][A
|
|
|
232 |
69%|██████▉ | 218/315 [00:03<00:01, 68.36it/s][A
|
|
|
233 |
72%|███████▏ | 226/315 [00:03<00:01, 70.84it/s][A
|
|
|
234 |
75%|███████▍ | 235/315 [00:03<00:01, 73.77it/s][A
|
|
|
235 |
77%|███████▋ | 243/315 [00:03<00:01, 70.62it/s][A
|
|
|
236 |
80%|███████▉ | 251/315 [00:03<00:00, 70.66it/s][A
|
|
|
237 |
82%|████████▏ | 259/315 [00:03<00:00, 69.14it/s][A
|
|
|
238 |
85%|████████▍ | 267/315 [00:03<00:00, 70.28it/s][A
|
|
|
239 |
88%|████████▊ | 276/315 [00:03<00:00, 73.48it/s][A
|
|
|
240 |
90%|█████████ | 284/315 [00:04<00:00, 73.87it/s][A
|
|
|
241 |
93%|█████████▎| 292/315 [00:04<00:00, 71.52it/s][A
|
|
|
242 |
95%|█████████▌| 300/315 [00:04<00:00, 71.25it/s][A
|
|
|
243 |
98%|█████████▊| 308/315 [00:04<00:00, 71.82it/s][A
|
244 |
|
|
|
245 |
|
246 |
10%|█ | 203/2030 [01:41<14:58, 2.03it/s]
|
|
|
|
|
247 |
[A[INFO|trainer.py:3503] 2024-09-09 11:55:54,552 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-203
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
10%|█ | 204/2030 [01:47<1:59:40, 3.93s/it]
|
249 |
10%|█ | 205/2030 [01:48<1:28:45, 2.92s/it]
|
250 |
10%|█ | 206/2030 [01:48<1:06:21, 2.18s/it]
|
251 |
10%|█ | 207/2030 [01:49<50:53, 1.67s/it]
|
252 |
10%|█ | 208/2030 [01:49<39:22, 1.30s/it]
|
253 |
10%|█ | 209/2030 [01:49<30:44, 1.01s/it]
|
254 |
10%|█ | 210/2030 [01:50<25:37, 1.18it/s]
|
255 |
10%|█ | 211/2030 [01:51<24:14, 1.25it/s]
|
256 |
10%|█ | 212/2030 [01:51<21:03, 1.44it/s]
|
257 |
10%|█ | 213/2030 [01:51<18:36, 1.63it/s]
|
258 |
11%|█ | 214/2030 [01:52<17:59, 1.68it/s]
|
259 |
11%|█ | 215/2030 [01:52<16:37, 1.82it/s]
|
260 |
11%|█ | 216/2030 [01:53<15:39, 1.93it/s]
|
261 |
11%|█ | 217/2030 [01:54<19:55, 1.52it/s]
|
262 |
11%|█ | 218/2030 [01:54<17:23, 1.74it/s]
|
263 |
11%|█ | 219/2030 [01:55<16:28, 1.83it/s]
|
264 |
11%|█ | 220/2030 [01:55<17:39, 1.71it/s]
|
265 |
11%|█ | 221/2030 [01:56<15:31, 1.94it/s]
|
266 |
11%|█ | 222/2030 [01:56<14:36, 2.06it/s]
|
267 |
11%|█ | 223/2030 [01:57<14:18, 2.11it/s]
|
268 |
11%|█ | 224/2030 [01:57<16:05, 1.87it/s]
|
269 |
11%|█ | 225/2030 [01:58<15:11, 1.98it/s]
|
270 |
11%|█ | 226/2030 [01:58<14:18, 2.10it/s]
|
271 |
11%|█ | 227/2030 [01:59<14:05, 2.13it/s]
|
272 |
11%|█ | 228/2030 [01:59<13:34, 2.21it/s]
|
273 |
11%|█▏ | 229/2030 [01:59<12:44, 2.36it/s]
|
274 |
11%|█▏ | 230/2030 [02:00<12:42, 2.36it/s]
|
275 |
11%|█▏ | 231/2030 [02:00<12:22, 2.42it/s]
|
276 |
11%|█▏ | 232/2030 [02:01<12:16, 2.44it/s]
|
277 |
11%|█▏ | 233/2030 [02:01<12:45, 2.35it/s]
|
278 |
12%|█▏ | 234/2030 [02:02<14:11, 2.11it/s]
|
279 |
12%|█▏ | 235/2030 [02:02<13:37, 2.20it/s]
|
280 |
12%|█▏ | 236/2030 [02:02<13:13, 2.26it/s]
|
281 |
12%|█▏ | 237/2030 [02:03<13:48, 2.16it/s]
|
282 |
12%|█▏ | 238/2030 [02:03<13:05, 2.28it/s]
|
283 |
12%|█▏ | 239/2030 [02:04<12:34, 2.37it/s]
|
284 |
12%|█▏ | 240/2030 [02:04<12:38, 2.36it/s]
|
285 |
12%|█▏ | 241/2030 [02:05<12:12, 2.44it/s]
|
286 |
12%|█▏ | 242/2030 [02:05<12:03, 2.47it/s]
|
287 |
12%|█▏ | 243/2030 [02:05<13:09, 2.26it/s]
|
288 |
12%|█▏ | 244/2030 [02:06<13:57, 2.13it/s]
|
289 |
12%|█▏ | 245/2030 [02:06<12:28, 2.39it/s]
|
290 |
12%|█▏ | 246/2030 [02:07<11:47, 2.52it/s]
|
291 |
12%|█▏ | 247/2030 [02:07<12:04, 2.46it/s]
|
292 |
12%|█▏ | 248/2030 [02:07<11:32, 2.57it/s]
|
293 |
12%|█▏ | 249/2030 [02:08<12:22, 2.40it/s]
|
294 |
12%|█▏ | 250/2030 [02:08<12:07, 2.45it/s]
|
295 |
12%|█▏ | 251/2030 [02:09<11:40, 2.54it/s]
|
296 |
12%|█▏ | 252/2030 [02:09<14:13, 2.08it/s]
|
297 |
12%|█▏ | 253/2030 [02:10<15:51, 1.87it/s]
|
298 |
13%|█▎ | 254/2030 [02:10<14:48, 2.00it/s]
|
299 |
13%|█▎ | 255/2030 [02:11<16:12, 1.83it/s]
|
300 |
13%|█▎ | 256/2030 [02:12<15:55, 1.86it/s]
|
301 |
13%|█▎ | 257/2030 [02:12<15:12, 1.94it/s]
|
302 |
13%|█▎ | 258/2030 [02:13<16:09, 1.83it/s]
|
303 |
13%|█▎ | 259/2030 [02:13<14:20, 2.06it/s]
|
304 |
13%|█▎ | 260/2030 [02:13<13:31, 2.18it/s]
|
305 |
13%|█▎ | 261/2030 [02:14<16:16, 1.81it/s]
|
306 |
13%|█▎ | 262/2030 [02:15<14:54, 1.98it/s]
|
307 |
13%|█▎ | 263/2030 [02:15<14:32, 2.03it/s]
|
308 |
13%|█▎ | 264/2030 [02:16<15:26, 1.91it/s]
|
309 |
13%|█▎ | 265/2030 [02:16<14:04, 2.09it/s]
|
310 |
13%|█▎ | 266/2030 [02:16<13:00, 2.26it/s]
|
311 |
13%|█▎ | 267/2030 [02:17<12:11, 2.41it/s]
|
312 |
13%|█▎ | 268/2030 [02:17<12:46, 2.30it/s]
|
313 |
13%|█▎ | 269/2030 [02:18<11:57, 2.46it/s]
|
314 |
13%|█▎ | 270/2030 [02:18<12:20, 2.38it/s]
|
315 |
13%|█▎ | 271/2030 [02:19<12:47, 2.29it/s]
|
316 |
13%|█▎ | 272/2030 [02:19<13:52, 2.11it/s]
|
317 |
13%|█▎ | 273/2030 [02:20<13:54, 2.11it/s]
|
318 |
13%|█▎ | 274/2030 [02:20<12:44, 2.30it/s]
|
319 |
14%|█▎ | 275/2030 [02:20<12:38, 2.31it/s]
|
320 |
14%|█▎ | 276/2030 [02:21<12:42, 2.30it/s]
|
321 |
14%|█▎ | 277/2030 [02:21<12:34, 2.32it/s]
|
322 |
14%|█▎ | 278/2030 [02:22<12:30, 2.33it/s]
|
323 |
14%|█▎ | 279/2030 [02:22<12:28, 2.34it/s]
|
324 |
14%|█▍ | 280/2030 [02:22<11:52, 2.46it/s]
|
325 |
14%|█▍ | 281/2030 [02:23<12:37, 2.31it/s]
|
326 |
14%|█▍ | 282/2030 [02:23<12:16, 2.37it/s]
|
327 |
14%|█▍ | 283/2030 [02:24<12:33, 2.32it/s]
|
328 |
14%|█▍ | 284/2030 [02:24<13:13, 2.20it/s]
|
329 |
14%|█▍ | 285/2030 [02:25<13:08, 2.21it/s]
|
330 |
14%|█▍ | 286/2030 [02:25<13:09, 2.21it/s]
|
331 |
14%|█▍ | 287/2030 [02:26<12:27, 2.33it/s]
|
332 |
14%|█▍ | 288/2030 [02:26<13:57, 2.08it/s]
|
333 |
14%|█▍ | 289/2030 [02:27<13:51, 2.09it/s]
|
334 |
14%|█▍ | 290/2030 [02:27<15:16, 1.90it/s]
|
335 |
14%|█▍ | 291/2030 [02:28<14:18, 2.02it/s]
|
336 |
14%|█▍ | 292/2030 [02:28<15:12, 1.91it/s]
|
337 |
14%|█▍ | 293/2030 [02:29<14:05, 2.05it/s]
|
338 |
14%|█▍ | 294/2030 [02:29<15:37, 1.85it/s]
|
339 |
15%|█▍ | 295/2030 [02:30<13:51, 2.09it/s]
|
340 |
15%|█▍ | 296/2030 [02:30<13:08, 2.20it/s]
|
341 |
15%|█▍ | 297/2030 [02:31<13:30, 2.14it/s]
|
342 |
15%|█▍ | 298/2030 [02:31<13:08, 2.20it/s]
|
343 |
15%|█▍ | 299/2030 [02:31<13:26, 2.15it/s]
|
344 |
15%|█▍ | 300/2030 [02:32<14:33, 1.98it/s]
|
345 |
15%|█▍ | 301/2030 [02:32<14:07, 2.04it/s]
|
346 |
15%|█▍ | 302/2030 [02:33<13:49, 2.08it/s]
|
347 |
15%|█▍ | 303/2030 [02:33<12:42, 2.26it/s]
|
348 |
15%|█▍ | 304/2030 [02:34<11:59, 2.40it/s]
|
349 |
15%|█▌ | 305/2030 [02:34<14:03, 2.04it/s]
|
350 |
15%|█▌ | 306/2030 [02:35<13:33, 2.12it/s]
|
351 |
15%|█▌ | 307/2030 [02:35<13:04, 2.20it/s]
|
352 |
15%|█▌ | 308/2030 [02:36<13:12, 2.17it/s]
|
353 |
15%|█▌ | 309/2030 [02:36<12:55, 2.22it/s]
|
354 |
15%|█▌ | 310/2030 [02:37<13:23, 2.14it/s]
|
355 |
15%|█▌ | 311/2030 [02:37<12:53, 2.22it/s]
|
|
|
1 |
+
2024-09-09 11:53:51.396276: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
2 |
+
2024-09-09 11:53:51.414891: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
3 |
+
2024-09-09 11:53:51.436268: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
4 |
+
2024-09-09 11:53:51.442683: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
5 |
+
2024-09-09 11:53:51.458047: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
6 |
+
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
7 |
+
2024-09-09 11:53:52.683988: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
8 |
+
/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
|
9 |
+
warnings.warn(
|
10 |
+
09/09/2024 11:53:54 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: False
|
11 |
+
09/09/2024 11:53:54 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
|
12 |
+
_n_gpu=1,
|
13 |
+
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
|
14 |
+
adafactor=False,
|
15 |
+
adam_beta1=0.9,
|
16 |
+
adam_beta2=0.999,
|
17 |
+
adam_epsilon=1e-08,
|
18 |
+
auto_find_batch_size=False,
|
19 |
+
batch_eval_metrics=False,
|
20 |
+
bf16=False,
|
21 |
+
bf16_full_eval=False,
|
22 |
+
data_seed=None,
|
23 |
+
dataloader_drop_last=False,
|
24 |
+
dataloader_num_workers=0,
|
25 |
+
dataloader_persistent_workers=False,
|
26 |
+
dataloader_pin_memory=True,
|
27 |
+
dataloader_prefetch_factor=None,
|
28 |
+
ddp_backend=None,
|
29 |
+
ddp_broadcast_buffers=None,
|
30 |
+
ddp_bucket_cap_mb=None,
|
31 |
+
ddp_find_unused_parameters=None,
|
32 |
+
ddp_timeout=1800,
|
33 |
+
debug=[],
|
34 |
+
deepspeed=None,
|
35 |
+
disable_tqdm=False,
|
36 |
+
dispatch_batches=None,
|
37 |
+
do_eval=True,
|
38 |
+
do_predict=True,
|
39 |
+
do_train=True,
|
40 |
+
eval_accumulation_steps=None,
|
41 |
+
eval_delay=0,
|
42 |
+
eval_do_concat_batches=True,
|
43 |
+
eval_on_start=False,
|
44 |
+
eval_steps=None,
|
45 |
+
eval_strategy=epoch,
|
46 |
+
eval_use_gather_object=False,
|
47 |
+
evaluation_strategy=epoch,
|
48 |
+
fp16=False,
|
49 |
+
fp16_backend=auto,
|
50 |
+
fp16_full_eval=False,
|
51 |
+
fp16_opt_level=O1,
|
52 |
+
fsdp=[],
|
53 |
+
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
|
54 |
+
fsdp_min_num_params=0,
|
55 |
+
fsdp_transformer_layer_cls_to_wrap=None,
|
56 |
+
full_determinism=False,
|
57 |
+
gradient_accumulation_steps=2,
|
58 |
+
gradient_checkpointing=False,
|
59 |
+
gradient_checkpointing_kwargs=None,
|
60 |
+
greater_is_better=True,
|
61 |
+
group_by_length=False,
|
62 |
+
half_precision_backend=auto,
|
63 |
+
hub_always_push=False,
|
64 |
+
hub_model_id=None,
|
65 |
+
hub_private_repo=False,
|
66 |
+
hub_strategy=every_save,
|
67 |
+
hub_token=<HUB_TOKEN>,
|
68 |
+
ignore_data_skip=False,
|
69 |
+
include_inputs_for_metrics=False,
|
70 |
+
include_num_input_tokens_seen=False,
|
71 |
+
include_tokens_per_second=False,
|
72 |
+
jit_mode_eval=False,
|
73 |
+
label_names=None,
|
74 |
+
label_smoothing_factor=0.0,
|
75 |
+
learning_rate=5e-05,
|
76 |
+
length_column_name=length,
|
77 |
+
load_best_model_at_end=True,
|
78 |
+
local_rank=0,
|
79 |
+
log_level=passive,
|
80 |
+
log_level_replica=warning,
|
81 |
+
log_on_each_node=True,
|
82 |
+
logging_dir=/content/dissertation/scripts/ner/output/tb,
|
83 |
+
logging_first_step=False,
|
84 |
+
logging_nan_inf_filter=True,
|
85 |
+
logging_steps=500,
|
86 |
+
logging_strategy=steps,
|
87 |
+
lr_scheduler_kwargs={},
|
88 |
+
lr_scheduler_type=linear,
|
89 |
+
max_grad_norm=1.0,
|
90 |
+
max_steps=-1,
|
91 |
+
metric_for_best_model=f1,
|
92 |
+
mp_parameters=,
|
93 |
+
neftune_noise_alpha=None,
|
94 |
+
no_cuda=False,
|
95 |
+
num_train_epochs=10.0,
|
96 |
+
optim=adamw_torch,
|
97 |
+
optim_args=None,
|
98 |
+
optim_target_modules=None,
|
99 |
+
output_dir=/content/dissertation/scripts/ner/output,
|
100 |
+
overwrite_output_dir=True,
|
101 |
+
past_index=-1,
|
102 |
+
per_device_eval_batch_size=8,
|
103 |
+
per_device_train_batch_size=32,
|
104 |
+
prediction_loss_only=False,
|
105 |
+
push_to_hub=True,
|
106 |
+
push_to_hub_model_id=None,
|
107 |
+
push_to_hub_organization=None,
|
108 |
+
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
|
109 |
+
ray_scope=last,
|
110 |
+
remove_unused_columns=True,
|
111 |
+
report_to=['tensorboard'],
|
112 |
+
restore_callback_states_from_checkpoint=False,
|
113 |
+
resume_from_checkpoint=None,
|
114 |
+
run_name=/content/dissertation/scripts/ner/output,
|
115 |
+
save_on_each_node=False,
|
116 |
+
save_only_model=False,
|
117 |
+
save_safetensors=True,
|
118 |
+
save_steps=500,
|
119 |
+
save_strategy=epoch,
|
120 |
+
save_total_limit=None,
|
121 |
+
seed=42,
|
122 |
+
skip_memory_metrics=True,
|
123 |
+
split_batches=None,
|
124 |
+
tf32=None,
|
125 |
+
torch_compile=False,
|
126 |
+
torch_compile_backend=None,
|
127 |
+
torch_compile_mode=None,
|
128 |
+
torch_empty_cache_steps=None,
|
129 |
+
torchdynamo=None,
|
130 |
+
tpu_metrics_debug=False,
|
131 |
+
tpu_num_cores=None,
|
132 |
+
use_cpu=False,
|
133 |
+
use_ipex=False,
|
134 |
+
use_legacy_prediction_loop=False,
|
135 |
+
use_mps_device=False,
|
136 |
+
warmup_ratio=0.0,
|
137 |
+
warmup_steps=0,
|
138 |
+
weight_decay=0.0,
|
139 |
+
)
|
140 |
+
|
141 |
+
|
142 |
+
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
|
147 |
+
[INFO|configuration_utils.py:733] 2024-09-09 11:54:06,987 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
|
148 |
+
[INFO|configuration_utils.py:800] 2024-09-09 11:54:06,991 >> Model config RobertaConfig {
|
149 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
150 |
+
"architectures": [
|
151 |
+
"RobertaForMaskedLM"
|
152 |
+
],
|
153 |
+
"attention_probs_dropout_prob": 0.1,
|
154 |
+
"bos_token_id": 0,
|
155 |
+
"classifier_dropout": null,
|
156 |
+
"eos_token_id": 2,
|
157 |
+
"finetuning_task": "ner",
|
158 |
+
"gradient_checkpointing": false,
|
159 |
+
"hidden_act": "gelu",
|
160 |
+
"hidden_dropout_prob": 0.1,
|
161 |
+
"hidden_size": 768,
|
162 |
+
"id2label": {
|
163 |
+
"0": "O",
|
164 |
+
"1": "B-SINTOMA",
|
165 |
+
"2": "I-SINTOMA"
|
166 |
+
},
|
167 |
+
"initializer_range": 0.02,
|
168 |
+
"intermediate_size": 3072,
|
169 |
+
"label2id": {
|
170 |
+
"B-SINTOMA": 1,
|
171 |
+
"I-SINTOMA": 2,
|
172 |
+
"O": 0
|
173 |
+
},
|
174 |
+
"layer_norm_eps": 1e-05,
|
175 |
+
"max_position_embeddings": 514,
|
176 |
+
"model_type": "roberta",
|
177 |
+
"num_attention_heads": 12,
|
178 |
+
"num_hidden_layers": 12,
|
179 |
+
"pad_token_id": 1,
|
180 |
+
"position_embedding_type": "absolute",
|
181 |
+
"transformers_version": "4.44.2",
|
182 |
+
"type_vocab_size": 1,
|
183 |
+
"use_cache": true,
|
184 |
+
"vocab_size": 50262
|
185 |
+
}
|
186 |
+
|
187 |
+
[INFO|configuration_utils.py:733] 2024-09-09 11:54:07,264 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
|
188 |
+
[INFO|configuration_utils.py:800] 2024-09-09 11:54:07,265 >> Model config RobertaConfig {
|
189 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
190 |
+
"architectures": [
|
191 |
+
"RobertaForMaskedLM"
|
192 |
+
],
|
193 |
+
"attention_probs_dropout_prob": 0.1,
|
194 |
+
"bos_token_id": 0,
|
195 |
+
"classifier_dropout": null,
|
196 |
+
"eos_token_id": 2,
|
197 |
+
"gradient_checkpointing": false,
|
198 |
+
"hidden_act": "gelu",
|
199 |
+
"hidden_dropout_prob": 0.1,
|
200 |
+
"hidden_size": 768,
|
201 |
+
"initializer_range": 0.02,
|
202 |
+
"intermediate_size": 3072,
|
203 |
+
"layer_norm_eps": 1e-05,
|
204 |
+
"max_position_embeddings": 514,
|
205 |
+
"model_type": "roberta",
|
206 |
+
"num_attention_heads": 12,
|
207 |
+
"num_hidden_layers": 12,
|
208 |
+
"pad_token_id": 1,
|
209 |
+
"position_embedding_type": "absolute",
|
210 |
+
"transformers_version": "4.44.2",
|
211 |
+
"type_vocab_size": 1,
|
212 |
+
"use_cache": true,
|
213 |
+
"vocab_size": 50262
|
214 |
+
}
|
215 |
+
|
216 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-09 11:54:07,275 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/vocab.json
|
217 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-09 11:54:07,275 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/merges.txt
|
218 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-09 11:54:07,275 >> loading file tokenizer.json from cache at None
|
219 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-09 11:54:07,275 >> loading file added_tokens.json from cache at None
|
220 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-09 11:54:07,275 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/special_tokens_map.json
|
221 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-09 11:54:07,275 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/tokenizer_config.json
|
222 |
+
[INFO|configuration_utils.py:733] 2024-09-09 11:54:07,275 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
|
223 |
+
[INFO|configuration_utils.py:800] 2024-09-09 11:54:07,276 >> Model config RobertaConfig {
|
224 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
225 |
+
"architectures": [
|
226 |
+
"RobertaForMaskedLM"
|
227 |
+
],
|
228 |
+
"attention_probs_dropout_prob": 0.1,
|
229 |
+
"bos_token_id": 0,
|
230 |
+
"classifier_dropout": null,
|
231 |
+
"eos_token_id": 2,
|
232 |
+
"gradient_checkpointing": false,
|
233 |
+
"hidden_act": "gelu",
|
234 |
+
"hidden_dropout_prob": 0.1,
|
235 |
+
"hidden_size": 768,
|
236 |
+
"initializer_range": 0.02,
|
237 |
+
"intermediate_size": 3072,
|
238 |
+
"layer_norm_eps": 1e-05,
|
239 |
+
"max_position_embeddings": 514,
|
240 |
+
"model_type": "roberta",
|
241 |
+
"num_attention_heads": 12,
|
242 |
+
"num_hidden_layers": 12,
|
243 |
+
"pad_token_id": 1,
|
244 |
+
"position_embedding_type": "absolute",
|
245 |
+
"transformers_version": "4.44.2",
|
246 |
+
"type_vocab_size": 1,
|
247 |
+
"use_cache": true,
|
248 |
+
"vocab_size": 50262
|
249 |
+
}
|
250 |
+
|
251 |
+
/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884
|
252 |
+
warnings.warn(
|
253 |
+
[INFO|configuration_utils.py:733] 2024-09-09 11:54:07,353 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
|
254 |
+
[INFO|configuration_utils.py:800] 2024-09-09 11:54:07,354 >> Model config RobertaConfig {
|
255 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
256 |
+
"architectures": [
|
257 |
+
"RobertaForMaskedLM"
|
258 |
+
],
|
259 |
+
"attention_probs_dropout_prob": 0.1,
|
260 |
+
"bos_token_id": 0,
|
261 |
+
"classifier_dropout": null,
|
262 |
+
"eos_token_id": 2,
|
263 |
+
"gradient_checkpointing": false,
|
264 |
+
"hidden_act": "gelu",
|
265 |
+
"hidden_dropout_prob": 0.1,
|
266 |
+
"hidden_size": 768,
|
267 |
+
"initializer_range": 0.02,
|
268 |
+
"intermediate_size": 3072,
|
269 |
+
"layer_norm_eps": 1e-05,
|
270 |
+
"max_position_embeddings": 514,
|
271 |
+
"model_type": "roberta",
|
272 |
+
"num_attention_heads": 12,
|
273 |
+
"num_hidden_layers": 12,
|
274 |
+
"pad_token_id": 1,
|
275 |
+
"position_embedding_type": "absolute",
|
276 |
+
"transformers_version": "4.44.2",
|
277 |
+
"type_vocab_size": 1,
|
278 |
+
"use_cache": true,
|
279 |
+
"vocab_size": 50262
|
280 |
+
}
|
281 |
+
|
282 |
+
[INFO|modeling_utils.py:3678] 2024-09-09 11:54:07,676 >> loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/pytorch_model.bin
|
283 |
+
[INFO|modeling_utils.py:4497] 2024-09-09 11:54:07,755 >> Some weights of the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es were not used when initializing RobertaForTokenClassification: ['lm_head.bias', 'lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
|
284 |
+
- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
|
285 |
+
- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
|
286 |
+
[WARNING|modeling_utils.py:4509] 2024-09-09 11:54:07,755 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es and are newly initialized: ['classifier.bias', 'classifier.weight']
|
287 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
288 |
+
|
289 |
+
|
290 |
+
|
291 |
+
/content/dissertation/scripts/ner/run_ner_train.py:397: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate
|
292 |
+
metric = load_metric("seqeval", trust_remote_code=True)
|
293 |
+
[INFO|trainer.py:811] 2024-09-09 11:54:12,226 >> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
294 |
+
[INFO|trainer.py:2134] 2024-09-09 11:54:12,775 >> ***** Running training *****
|
295 |
+
[INFO|trainer.py:2135] 2024-09-09 11:54:12,776 >> Num examples = 13,013
|
296 |
+
[INFO|trainer.py:2136] 2024-09-09 11:54:12,776 >> Num Epochs = 10
|
297 |
+
[INFO|trainer.py:2137] 2024-09-09 11:54:12,776 >> Instantaneous batch size per device = 32
|
298 |
+
[INFO|trainer.py:2140] 2024-09-09 11:54:12,776 >> Total train batch size (w. parallel, distributed & accumulation) = 64
|
299 |
+
[INFO|trainer.py:2141] 2024-09-09 11:54:12,776 >> Gradient Accumulation steps = 2
|
300 |
+
[INFO|trainer.py:2142] 2024-09-09 11:54:12,776 >> Total optimization steps = 2,030
|
301 |
+
[INFO|trainer.py:2143] 2024-09-09 11:54:12,776 >> Number of trainable parameters = 124,055,043
|
302 |
+
|
303 |
0%| | 0/2030 [00:00<?, ?it/s]
|
304 |
0%| | 1/2030 [00:01<43:23, 1.28s/it]
|
305 |
0%| | 2/2030 [00:01<27:30, 1.23it/s]
|
306 |
0%| | 3/2030 [00:02<22:20, 1.51it/s]
|
307 |
0%| | 4/2030 [00:02<20:58, 1.61it/s]
|
308 |
0%| | 5/2030 [00:03<18:15, 1.85it/s]
|
309 |
0%| | 6/2030 [00:03<16:47, 2.01it/s]
|
310 |
0%| | 7/2030 [00:04<16:12, 2.08it/s]
|
311 |
0%| | 8/2030 [00:04<16:25, 2.05it/s]
|
312 |
0%| | 9/2030 [00:05<15:51, 2.12it/s]
|
313 |
0%| | 10/2030 [00:05<16:55, 1.99it/s]
|
314 |
1%| | 11/2030 [00:05<15:19, 2.20it/s]
|
315 |
1%| | 12/2030 [00:06<17:18, 1.94it/s]
|
316 |
1%| | 13/2030 [00:07<16:43, 2.01it/s]
|
317 |
1%| | 14/2030 [00:07<18:38, 1.80it/s]
|
318 |
1%| | 15/2030 [00:08<16:34, 2.03it/s]
|
319 |
1%| | 16/2030 [00:08<17:35, 1.91it/s]
|
320 |
1%| | 17/2030 [00:09<16:44, 2.00it/s]
|
321 |
1%| | 18/2030 [00:09<18:13, 1.84it/s]
|
322 |
1%| | 19/2030 [00:10<23:35, 1.42it/s]
|
323 |
1%| | 20/2030 [00:11<22:07, 1.51it/s]
|
324 |
1%| | 21/2030 [00:11<19:46, 1.69it/s]
|
325 |
1%| | 22/2030 [00:12<17:22, 1.93it/s]
|
326 |
1%| | 23/2030 [00:12<19:14, 1.74it/s]
|
327 |
1%| | 24/2030 [00:13<18:36, 1.80it/s]
|
328 |
1%| | 25/2030 [00:13<17:01, 1.96it/s]
|
329 |
1%|▏ | 26/2030 [00:14<16:28, 2.03it/s]
|
330 |
1%|▏ | 27/2030 [00:14<16:55, 1.97it/s]
|
331 |
1%|▏ | 28/2030 [00:15<18:05, 1.84it/s]
|
332 |
1%|▏ | 29/2030 [00:15<16:38, 2.00it/s]
|
333 |
1%|▏ | 30/2030 [00:16<16:25, 2.03it/s]
|
334 |
2%|▏ | 31/2030 [00:16<16:14, 2.05it/s]
|
335 |
2%|▏ | 32/2030 [00:17<16:03, 2.07it/s]
|
336 |
2%|▏ | 33/2030 [00:17<17:38, 1.89it/s]
|
337 |
2%|▏ | 34/2030 [00:18<16:22, 2.03it/s]
|
338 |
2%|▏ | 35/2030 [00:18<15:07, 2.20it/s]
|
339 |
2%|▏ | 36/2030 [00:19<14:36, 2.28it/s]
|
340 |
2%|▏ | 37/2030 [00:19<15:03, 2.21it/s]
|
341 |
2%|▏ | 38/2030 [00:19<14:25, 2.30it/s]
|
342 |
2%|▏ | 39/2030 [00:20<15:44, 2.11it/s]
|
343 |
2%|▏ | 40/2030 [00:20<14:43, 2.25it/s]
|
344 |
2%|▏ | 41/2030 [00:21<14:17, 2.32it/s]
|
345 |
2%|▏ | 42/2030 [00:21<14:54, 2.22it/s]
|
346 |
2%|▏ | 43/2030 [00:22<14:50, 2.23it/s]
|
347 |
2%|▏ | 44/2030 [00:22<15:11, 2.18it/s]
|
348 |
2%|▏ | 45/2030 [00:23<15:19, 2.16it/s]
|
349 |
2%|▏ | 46/2030 [00:23<14:40, 2.25it/s]
|
350 |
2%|▏ | 47/2030 [00:24<15:29, 2.13it/s]
|
351 |
2%|▏ | 48/2030 [00:24<14:57, 2.21it/s]
|
352 |
2%|▏ | 49/2030 [00:24<15:07, 2.18it/s]
|
353 |
2%|▏ | 50/2030 [00:25<15:57, 2.07it/s]
|
354 |
3%|▎ | 51/2030 [00:25<15:31, 2.12it/s]
|
355 |
3%|▎ | 52/2030 [00:26<15:21, 2.15it/s]
|
356 |
3%|▎ | 53/2030 [00:26<14:55, 2.21it/s]
|
357 |
3%|▎ | 54/2030 [00:27<13:40, 2.41it/s]
|
358 |
3%|▎ | 55/2030 [00:27<13:39, 2.41it/s]
|
359 |
3%|▎ | 56/2030 [00:28<14:39, 2.25it/s]
|
360 |
3%|▎ | 57/2030 [00:28<14:34, 2.26it/s]
|
361 |
3%|▎ | 58/2030 [00:28<14:11, 2.32it/s]
|
362 |
3%|▎ | 59/2030 [00:29<14:11, 2.31it/s]
|
363 |
3%|▎ | 60/2030 [00:30<16:39, 1.97it/s]
|
364 |
3%|▎ | 61/2030 [00:30<14:58, 2.19it/s]
|
365 |
3%|▎ | 62/2030 [00:30<13:48, 2.38it/s]
|
366 |
3%|▎ | 63/2030 [00:31<13:58, 2.35it/s]
|
367 |
3%|▎ | 64/2030 [00:31<14:10, 2.31it/s]
|
368 |
3%|▎ | 65/2030 [00:32<14:04, 2.33it/s]
|
369 |
3%|▎ | 66/2030 [00:32<13:33, 2.41it/s]
|
370 |
3%|▎ | 67/2030 [00:32<13:57, 2.34it/s]
|
371 |
3%|▎ | 68/2030 [00:33<13:23, 2.44it/s]
|
372 |
3%|▎ | 69/2030 [00:33<12:56, 2.53it/s]
|
373 |
3%|▎ | 70/2030 [00:34<14:55, 2.19it/s]
|
374 |
3%|▎ | 71/2030 [00:34<14:42, 2.22it/s]
|
375 |
4%|▎ | 72/2030 [00:35<14:08, 2.31it/s]
|
376 |
4%|▎ | 73/2030 [00:35<13:54, 2.34it/s]
|
377 |
4%|▎ | 74/2030 [00:35<13:32, 2.41it/s]
|
378 |
4%|▎ | 75/2030 [00:36<13:34, 2.40it/s]
|
379 |
4%|▎ | 76/2030 [00:36<13:41, 2.38it/s]
|
380 |
4%|▍ | 77/2030 [00:37<14:07, 2.30it/s]
|
381 |
4%|▍ | 78/2030 [00:37<14:33, 2.24it/s]
|
382 |
4%|▍ | 79/2030 [00:38<14:16, 2.28it/s]
|
383 |
4%|▍ | 80/2030 [00:38<14:14, 2.28it/s]
|
384 |
4%|▍ | 81/2030 [00:38<13:49, 2.35it/s]
|
385 |
4%|▍ | 82/2030 [00:39<14:52, 2.18it/s]
|
386 |
4%|▍ | 83/2030 [00:39<13:50, 2.34it/s]
|
387 |
4%|▍ | 84/2030 [00:40<15:49, 2.05it/s]
|
388 |
4%|▍ | 85/2030 [00:40<15:30, 2.09it/s]
|
389 |
4%|▍ | 86/2030 [00:41<15:10, 2.14it/s]
|
390 |
4%|▍ | 87/2030 [00:41<15:02, 2.15it/s]
|
391 |
4%|▍ | 88/2030 [00:42<15:54, 2.04it/s]
|
392 |
4%|▍ | 89/2030 [00:42<15:01, 2.15it/s]
|
393 |
4%|▍ | 90/2030 [00:43<14:36, 2.21it/s]
|
394 |
4%|▍ | 91/2030 [00:43<13:44, 2.35it/s]
|
395 |
5%|▍ | 92/2030 [00:43<14:17, 2.26it/s]
|
396 |
5%|▍ | 93/2030 [00:44<17:39, 1.83it/s]
|
397 |
5%|▍ | 94/2030 [00:45<16:23, 1.97it/s]
|
398 |
5%|▍ | 95/2030 [00:45<15:11, 2.12it/s]
|
399 |
5%|▍ | 96/2030 [00:46<14:51, 2.17it/s]
|
400 |
5%|▍ | 97/2030 [00:46<14:32, 2.21it/s]
|
401 |
5%|▍ | 98/2030 [00:46<14:14, 2.26it/s]
|
402 |
5%|▍ | 99/2030 [00:47<14:50, 2.17it/s]
|
403 |
5%|▍ | 100/2030 [00:47<13:48, 2.33it/s]
|
404 |
5%|▍ | 101/2030 [00:48<13:31, 2.38it/s]
|
405 |
5%|▌ | 102/2030 [00:48<13:42, 2.34it/s]
|
406 |
5%|▌ | 103/2030 [00:48<12:47, 2.51it/s]
|
407 |
5%|▌ | 104/2030 [00:49<14:29, 2.22it/s]
|
408 |
5%|▌ | 105/2030 [00:49<14:31, 2.21it/s]
|
409 |
5%|▌ | 106/2030 [00:50<14:53, 2.15it/s]
|
410 |
5%|▌ | 107/2030 [00:50<14:26, 2.22it/s]
|
411 |
5%|▌ | 108/2030 [00:51<13:20, 2.40it/s]
|
412 |
5%|▌ | 109/2030 [00:51<14:11, 2.25it/s]
|
413 |
5%|▌ | 110/2030 [00:52<14:00, 2.28it/s]
|
414 |
5%|▌ | 111/2030 [00:52<13:40, 2.34it/s]
|
415 |
6%|▌ | 112/2030 [00:52<13:15, 2.41it/s]
|
416 |
6%|▌ | 113/2030 [00:53<14:36, 2.19it/s]
|
417 |
6%|▌ | 114/2030 [00:53<15:33, 2.05it/s]
|
418 |
6%|▌ | 115/2030 [00:54<15:08, 2.11it/s]
|
419 |
6%|▌ | 116/2030 [00:54<14:34, 2.19it/s]
|
420 |
6%|▌ | 117/2030 [00:55<14:45, 2.16it/s]
|
421 |
6%|▌ | 118/2030 [00:55<14:07, 2.26it/s]
|
422 |
6%|▌ | 119/2030 [00:56<14:05, 2.26it/s]
|
423 |
6%|▌ | 120/2030 [00:56<13:44, 2.32it/s]
|
424 |
6%|▌ | 121/2030 [00:57<14:00, 2.27it/s]
|
425 |
6%|▌ | 122/2030 [00:57<13:23, 2.37it/s]
|
426 |
6%|▌ | 123/2030 [00:57<12:58, 2.45it/s]
|
427 |
6%|▌ | 124/2030 [00:58<15:07, 2.10it/s]
|
428 |
6%|▌ | 125/2030 [00:58<15:02, 2.11it/s]
|
429 |
6%|▌ | 126/2030 [00:59<15:04, 2.11it/s]
|
430 |
6%|▋ | 127/2030 [00:59<14:34, 2.18it/s]
|
431 |
6%|▋ | 128/2030 [01:00<14:47, 2.14it/s]
|
432 |
6%|▋ | 129/2030 [01:00<13:53, 2.28it/s]
|
433 |
6%|▋ | 130/2030 [01:01<16:03, 1.97it/s]
|
434 |
6%|▋ | 131/2030 [01:01<16:28, 1.92it/s]
|
435 |
7%|▋ | 132/2030 [01:02<15:23, 2.06it/s]
|
436 |
7%|▋ | 133/2030 [01:03<18:04, 1.75it/s]
|
437 |
7%|▋ | 134/2030 [01:03<17:08, 1.84it/s]
|
438 |
7%|▋ | 135/2030 [01:03<16:13, 1.95it/s]
|
439 |
7%|▋ | 136/2030 [01:04<15:14, 2.07it/s]
|
440 |
7%|▋ | 137/2030 [01:04<13:58, 2.26it/s]
|
441 |
7%|▋ | 138/2030 [01:05<13:47, 2.29it/s]
|
442 |
7%|▋ | 139/2030 [01:05<13:46, 2.29it/s]
|
443 |
7%|▋ | 140/2030 [01:06<17:13, 1.83it/s]
|
444 |
7%|▋ | 141/2030 [01:06<15:44, 2.00it/s]
|
445 |
7%|▋ | 142/2030 [01:07<18:04, 1.74it/s]
|
446 |
7%|▋ | 143/2030 [01:07<16:32, 1.90it/s]
|
447 |
7%|▋ | 144/2030 [01:08<17:46, 1.77it/s]
|
448 |
7%|▋ | 145/2030 [01:08<15:39, 2.01it/s]
|
449 |
7%|▋ | 146/2030 [01:09<15:15, 2.06it/s]
|
450 |
7%|▋ | 147/2030 [01:09<14:20, 2.19it/s]
|
451 |
7%|▋ | 148/2030 [01:10<13:42, 2.29it/s]
|
452 |
7%|▋ | 149/2030 [01:10<13:13, 2.37it/s]
|
453 |
7%|▋ | 150/2030 [01:11<14:12, 2.21it/s]
|
454 |
7%|▋ | 151/2030 [01:11<15:01, 2.08it/s]
|
455 |
7%|▋ | 152/2030 [01:12<14:34, 2.15it/s]
|
456 |
8%|▊ | 153/2030 [01:12<13:22, 2.34it/s]
|
457 |
8%|▊ | 154/2030 [01:12<14:04, 2.22it/s]
|
458 |
8%|▊ | 155/2030 [01:13<13:31, 2.31it/s]
|
459 |
8%|▊ | 156/2030 [01:13<14:24, 2.17it/s]
|
460 |
8%|▊ | 157/2030 [01:14<14:18, 2.18it/s]
|
461 |
8%|▊ | 158/2030 [01:14<15:19, 2.04it/s]
|
462 |
8%|▊ | 159/2030 [01:15<14:23, 2.17it/s]
|
463 |
8%|▊ | 160/2030 [01:15<13:47, 2.26it/s]
|
464 |
8%|▊ | 161/2030 [01:16<13:50, 2.25it/s]
|
465 |
8%|▊ | 162/2030 [01:16<14:12, 2.19it/s]
|
466 |
8%|▊ | 163/2030 [01:17<13:53, 2.24it/s]
|
467 |
8%|▊ | 164/2030 [01:17<13:45, 2.26it/s]
|
468 |
8%|▊ | 165/2030 [01:17<13:39, 2.28it/s]
|
469 |
8%|▊ | 166/2030 [01:18<15:08, 2.05it/s]
|
470 |
8%|▊ | 167/2030 [01:18<14:35, 2.13it/s]
|
471 |
8%|▊ | 168/2030 [01:19<14:27, 2.15it/s]
|
472 |
8%|▊ | 169/2030 [01:19<13:34, 2.29it/s]
|
473 |
8%|▊ | 170/2030 [01:20<14:37, 2.12it/s]
|
474 |
8%|▊ | 171/2030 [01:20<13:29, 2.30it/s]
|
475 |
8%|▊ | 172/2030 [01:21<16:15, 1.90it/s]
|
476 |
9%|▊ | 173/2030 [01:21<15:03, 2.06it/s]
|
477 |
9%|▊ | 174/2030 [01:22<13:56, 2.22it/s]
|
478 |
9%|▊ | 175/2030 [01:22<13:27, 2.30it/s]
|
479 |
9%|▊ | 176/2030 [01:22<12:39, 2.44it/s]
|
480 |
9%|▊ | 177/2030 [01:23<14:01, 2.20it/s]
|
481 |
9%|▉ | 178/2030 [01:23<12:55, 2.39it/s]
|
482 |
9%|▉ | 179/2030 [01:24<12:19, 2.50it/s]
|
483 |
9%|▉ | 180/2030 [01:24<12:59, 2.37it/s]
|
484 |
9%|▉ | 181/2030 [01:24<12:19, 2.50it/s]
|
485 |
9%|▉ | 182/2030 [01:25<12:00, 2.56it/s]
|
486 |
9%|▉ | 183/2030 [01:25<12:02, 2.56it/s]
|
487 |
9%|▉ | 184/2030 [01:26<12:22, 2.49it/s]
|
488 |
9%|▉ | 185/2030 [01:26<12:37, 2.44it/s]
|
489 |
9%|▉ | 186/2030 [01:26<12:00, 2.56it/s]
|
490 |
9%|▉ | 187/2030 [01:27<12:49, 2.39it/s]
|
491 |
9%|▉ | 188/2030 [01:27<13:38, 2.25it/s]
|
492 |
9%|▉ | 189/2030 [01:28<13:21, 2.30it/s]
|
493 |
9%|▉ | 190/2030 [01:29<16:36, 1.85it/s]
|
494 |
9%|▉ | 191/2030 [01:29<16:06, 1.90it/s]
|
495 |
9%|▉ | 192/2030 [01:29<14:24, 2.13it/s]
|
496 |
10%|▉ | 193/2030 [01:30<16:50, 1.82it/s]
|
497 |
10%|▉ | 194/2030 [01:31<15:12, 2.01it/s]
|
498 |
10%|▉ | 195/2030 [01:31<15:09, 2.02it/s]
|
499 |
10%|▉ | 196/2030 [01:31<14:00, 2.18it/s]
|
500 |
10%|▉ | 197/2030 [01:32<16:28, 1.85it/s]
|
501 |
10%|▉ | 198/2030 [01:33<18:54, 1.62it/s]
|
502 |
10%|▉ | 199/2030 [01:33<17:25, 1.75it/s]
|
503 |
10%|▉ | 200/2030 [01:34<16:29, 1.85it/s]
|
504 |
10%|▉ | 201/2030 [01:34<15:25, 1.98it/s]
|
505 |
10%|▉ | 202/2030 [01:35<14:43, 2.07it/s]
|
506 |
10%|█ | 203/2030 [01:35<14:58, 2.03it/s][INFO|trainer.py:811] 2024-09-09 11:55:48,641 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
507 |
+
[INFO|trainer.py:3819] 2024-09-09 11:55:48,644 >>
|
508 |
+
***** Running Evaluation *****
|
509 |
+
[INFO|trainer.py:3821] 2024-09-09 11:55:48,644 >> Num examples = 2519
|
510 |
+
[INFO|trainer.py:3824] 2024-09-09 11:55:48,644 >> Batch size = 8
|
511 |
+
|
512 |
+
|
513 |
0%| | 0/315 [00:00<?, ?it/s][A
|
514 |
+
|
515 |
3%|▎ | 8/315 [00:00<00:04, 76.12it/s][A
|
516 |
+
|
517 |
5%|▌ | 16/315 [00:00<00:04, 74.05it/s][A
|
518 |
+
|
519 |
8%|▊ | 24/315 [00:00<00:03, 75.18it/s][A
|
520 |
+
|
521 |
10%|█ | 32/315 [00:00<00:03, 70.92it/s][A
|
522 |
+
|
523 |
13%|█▎ | 40/315 [00:00<00:03, 73.74it/s][A
|
524 |
+
|
525 |
15%|█▌ | 48/315 [00:00<00:03, 74.41it/s][A
|
526 |
+
|
527 |
18%|█▊ | 56/315 [00:00<00:03, 73.32it/s][A
|
528 |
+
|
529 |
20%|██ | 64/315 [00:00<00:03, 71.03it/s][A
|
530 |
+
|
531 |
23%|██▎ | 72/315 [00:00<00:03, 73.04it/s][A
|
532 |
+
|
533 |
25%|██▌ | 80/315 [00:01<00:03, 69.62it/s][A
|
534 |
+
|
535 |
28%|██▊ | 88/315 [00:01<00:03, 67.05it/s][A
|
536 |
+
|
537 |
30%|███ | 96/315 [00:01<00:03, 70.09it/s][A
|
538 |
+
|
539 |
33%|███▎ | 104/315 [00:01<00:03, 67.36it/s][A
|
540 |
+
|
541 |
36%|███▌ | 112/315 [00:01<00:02, 69.60it/s][A
|
542 |
+
|
543 |
38%|███▊ | 120/315 [00:01<00:02, 69.30it/s][A
|
544 |
+
|
545 |
40%|████ | 127/315 [00:01<00:02, 68.58it/s][A
|
546 |
+
|
547 |
43%|████▎ | 134/315 [00:01<00:02, 67.97it/s][A
|
548 |
+
|
549 |
45%|████▍ | 141/315 [00:02<00:02, 68.37it/s][A
|
550 |
+
|
551 |
47%|████▋ | 149/315 [00:02<00:02, 70.96it/s][A
|
552 |
+
|
553 |
50%|█████ | 158/315 [00:02<00:02, 74.16it/s][A
|
554 |
+
|
555 |
53%|█████▎ | 166/315 [00:02<00:02, 72.11it/s][A
|
556 |
+
|
557 |
55%|█████▌ | 174/315 [00:02<00:01, 71.61it/s][A
|
558 |
+
|
559 |
58%|█████▊ | 182/315 [00:02<00:01, 68.73it/s][A
|
560 |
+
|
561 |
60%|██████ | 189/315 [00:02<00:01, 68.38it/s][A
|
562 |
+
|
563 |
62%|██████▏ | 196/315 [00:02<00:01, 67.76it/s][A
|
564 |
+
|
565 |
64%|██████▍ | 203/315 [00:02<00:01, 64.44it/s][A
|
566 |
+
|
567 |
67%|██████▋ | 210/315 [00:03<00:01, 64.84it/s][A
|
568 |
+
|
569 |
69%|██████▉ | 218/315 [00:03<00:01, 68.36it/s][A
|
570 |
+
|
571 |
72%|███████▏ | 226/315 [00:03<00:01, 70.84it/s][A
|
572 |
+
|
573 |
75%|███████▍ | 235/315 [00:03<00:01, 73.77it/s][A
|
574 |
+
|
575 |
77%|███████▋ | 243/315 [00:03<00:01, 70.62it/s][A
|
576 |
+
|
577 |
80%|███████▉ | 251/315 [00:03<00:00, 70.66it/s][A
|
578 |
+
|
579 |
82%|████████▏ | 259/315 [00:03<00:00, 69.14it/s][A
|
580 |
+
|
581 |
85%|████████▍ | 267/315 [00:03<00:00, 70.28it/s][A
|
582 |
+
|
583 |
88%|████████▊ | 276/315 [00:03<00:00, 73.48it/s][A
|
584 |
+
|
585 |
90%|█████████ | 284/315 [00:04<00:00, 73.87it/s][A
|
586 |
+
|
587 |
93%|█████████▎| 292/315 [00:04<00:00, 71.52it/s][A
|
588 |
+
|
589 |
95%|█████████▌| 300/315 [00:04<00:00, 71.25it/s][A
|
590 |
+
|
591 |
98%|█████████▊| 308/315 [00:04<00:00, 71.82it/s][A
|
592 |
|
593 |
+
|
594 |
|
595 |
10%|█ | 203/2030 [01:41<14:58, 2.03it/s]
|
596 |
+
|
597 |
+
|
598 |
[A[INFO|trainer.py:3503] 2024-09-09 11:55:54,552 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-203
|
599 |
+
[INFO|configuration_utils.py:472] 2024-09-09 11:55:54,553 >> Configuration saved in /content/dissertation/scripts/ner/output/checkpoint-203/config.json
|
600 |
+
[INFO|modeling_utils.py:2799] 2024-09-09 11:55:55,568 >> Model weights saved in /content/dissertation/scripts/ner/output/checkpoint-203/model.safetensors
|
601 |
+
[INFO|tokenization_utils_base.py:2684] 2024-09-09 11:55:55,569 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/checkpoint-203/tokenizer_config.json
|
602 |
+
[INFO|tokenization_utils_base.py:2693] 2024-09-09 11:55:55,569 >> Special tokens file saved in /content/dissertation/scripts/ner/output/checkpoint-203/special_tokens_map.json
|
603 |
+
[INFO|tokenization_utils_base.py:2684] 2024-09-09 11:56:00,182 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
|
604 |
+
[INFO|tokenization_utils_base.py:2693] 2024-09-09 11:56:00,183 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
|
605 |
+
|
606 |
10%|█ | 204/2030 [01:47<1:59:40, 3.93s/it]
|
607 |
10%|█ | 205/2030 [01:48<1:28:45, 2.92s/it]
|
608 |
10%|█ | 206/2030 [01:48<1:06:21, 2.18s/it]
|
609 |
10%|█ | 207/2030 [01:49<50:53, 1.67s/it]
|
610 |
10%|█ | 208/2030 [01:49<39:22, 1.30s/it]
|
611 |
10%|█ | 209/2030 [01:49<30:44, 1.01s/it]
|
612 |
10%|█ | 210/2030 [01:50<25:37, 1.18it/s]
|
613 |
10%|█ | 211/2030 [01:51<24:14, 1.25it/s]
|
614 |
10%|█ | 212/2030 [01:51<21:03, 1.44it/s]
|
615 |
10%|█ | 213/2030 [01:51<18:36, 1.63it/s]
|
616 |
11%|█ | 214/2030 [01:52<17:59, 1.68it/s]
|
617 |
11%|█ | 215/2030 [01:52<16:37, 1.82it/s]
|
618 |
11%|█ | 216/2030 [01:53<15:39, 1.93it/s]
|
619 |
11%|█ | 217/2030 [01:54<19:55, 1.52it/s]
|
620 |
11%|█ | 218/2030 [01:54<17:23, 1.74it/s]
|
621 |
11%|█ | 219/2030 [01:55<16:28, 1.83it/s]
|
622 |
11%|█ | 220/2030 [01:55<17:39, 1.71it/s]
|
623 |
11%|█ | 221/2030 [01:56<15:31, 1.94it/s]
|
624 |
11%|█ | 222/2030 [01:56<14:36, 2.06it/s]
|
625 |
11%|█ | 223/2030 [01:57<14:18, 2.11it/s]
|
626 |
11%|█ | 224/2030 [01:57<16:05, 1.87it/s]
|
627 |
11%|█ | 225/2030 [01:58<15:11, 1.98it/s]
|
628 |
11%|█ | 226/2030 [01:58<14:18, 2.10it/s]
|
629 |
11%|█ | 227/2030 [01:59<14:05, 2.13it/s]
|
630 |
11%|█ | 228/2030 [01:59<13:34, 2.21it/s]
|
631 |
11%|█▏ | 229/2030 [01:59<12:44, 2.36it/s]
|
632 |
11%|█▏ | 230/2030 [02:00<12:42, 2.36it/s]
|
633 |
11%|█▏ | 231/2030 [02:00<12:22, 2.42it/s]
|
634 |
11%|█▏ | 232/2030 [02:01<12:16, 2.44it/s]
|
635 |
11%|█▏ | 233/2030 [02:01<12:45, 2.35it/s]
|
636 |
12%|█▏ | 234/2030 [02:02<14:11, 2.11it/s]
|
637 |
12%|█▏ | 235/2030 [02:02<13:37, 2.20it/s]
|
638 |
12%|█▏ | 236/2030 [02:02<13:13, 2.26it/s]
|
639 |
12%|█▏ | 237/2030 [02:03<13:48, 2.16it/s]
|
640 |
12%|█▏ | 238/2030 [02:03<13:05, 2.28it/s]
|
641 |
12%|█▏ | 239/2030 [02:04<12:34, 2.37it/s]
|
642 |
12%|█▏ | 240/2030 [02:04<12:38, 2.36it/s]
|
643 |
12%|█▏ | 241/2030 [02:05<12:12, 2.44it/s]
|
644 |
12%|█▏ | 242/2030 [02:05<12:03, 2.47it/s]
|
645 |
12%|█▏ | 243/2030 [02:05<13:09, 2.26it/s]
|
646 |
12%|█▏ | 244/2030 [02:06<13:57, 2.13it/s]
|
647 |
12%|█▏ | 245/2030 [02:06<12:28, 2.39it/s]
|
648 |
12%|█▏ | 246/2030 [02:07<11:47, 2.52it/s]
|
649 |
12%|█▏ | 247/2030 [02:07<12:04, 2.46it/s]
|
650 |
12%|█▏ | 248/2030 [02:07<11:32, 2.57it/s]
|
651 |
12%|█▏ | 249/2030 [02:08<12:22, 2.40it/s]
|
652 |
12%|█▏ | 250/2030 [02:08<12:07, 2.45it/s]
|
653 |
12%|█▏ | 251/2030 [02:09<11:40, 2.54it/s]
|
654 |
12%|█▏ | 252/2030 [02:09<14:13, 2.08it/s]
|
655 |
12%|█▏ | 253/2030 [02:10<15:51, 1.87it/s]
|
656 |
13%|█▎ | 254/2030 [02:10<14:48, 2.00it/s]
|
657 |
13%|█▎ | 255/2030 [02:11<16:12, 1.83it/s]
|
658 |
13%|█▎ | 256/2030 [02:12<15:55, 1.86it/s]
|
659 |
13%|█▎ | 257/2030 [02:12<15:12, 1.94it/s]
|
660 |
13%|█▎ | 258/2030 [02:13<16:09, 1.83it/s]
|
661 |
13%|█▎ | 259/2030 [02:13<14:20, 2.06it/s]
|
662 |
13%|█▎ | 260/2030 [02:13<13:31, 2.18it/s]
|
663 |
13%|█▎ | 261/2030 [02:14<16:16, 1.81it/s]
|
664 |
13%|█▎ | 262/2030 [02:15<14:54, 1.98it/s]
|
665 |
13%|█▎ | 263/2030 [02:15<14:32, 2.03it/s]
|
666 |
13%|█▎ | 264/2030 [02:16<15:26, 1.91it/s]
|
667 |
13%|█▎ | 265/2030 [02:16<14:04, 2.09it/s]
|
668 |
13%|█▎ | 266/2030 [02:16<13:00, 2.26it/s]
|
669 |
13%|█▎ | 267/2030 [02:17<12:11, 2.41it/s]
|
670 |
13%|█▎ | 268/2030 [02:17<12:46, 2.30it/s]
|
671 |
13%|█▎ | 269/2030 [02:18<11:57, 2.46it/s]
|
672 |
13%|█▎ | 270/2030 [02:18<12:20, 2.38it/s]
|
673 |
13%|█▎ | 271/2030 [02:19<12:47, 2.29it/s]
|
674 |
13%|█▎ | 272/2030 [02:19<13:52, 2.11it/s]
|
675 |
13%|█▎ | 273/2030 [02:20<13:54, 2.11it/s]
|
676 |
13%|█▎ | 274/2030 [02:20<12:44, 2.30it/s]
|
677 |
14%|█▎ | 275/2030 [02:20<12:38, 2.31it/s]
|
678 |
14%|█▎ | 276/2030 [02:21<12:42, 2.30it/s]
|
679 |
14%|█▎ | 277/2030 [02:21<12:34, 2.32it/s]
|
680 |
14%|█▎ | 278/2030 [02:22<12:30, 2.33it/s]
|
681 |
14%|█▎ | 279/2030 [02:22<12:28, 2.34it/s]
|
682 |
14%|█▍ | 280/2030 [02:22<11:52, 2.46it/s]
|
683 |
14%|█▍ | 281/2030 [02:23<12:37, 2.31it/s]
|
684 |
14%|█▍ | 282/2030 [02:23<12:16, 2.37it/s]
|
685 |
14%|█▍ | 283/2030 [02:24<12:33, 2.32it/s]
|
686 |
14%|█▍ | 284/2030 [02:24<13:13, 2.20it/s]
|
687 |
14%|█▍ | 285/2030 [02:25<13:08, 2.21it/s]
|
688 |
14%|█▍ | 286/2030 [02:25<13:09, 2.21it/s]
|
689 |
14%|█▍ | 287/2030 [02:26<12:27, 2.33it/s]
|
690 |
14%|█▍ | 288/2030 [02:26<13:57, 2.08it/s]
|
691 |
14%|█▍ | 289/2030 [02:27<13:51, 2.09it/s]
|
692 |
14%|█▍ | 290/2030 [02:27<15:16, 1.90it/s]
|
693 |
14%|█▍ | 291/2030 [02:28<14:18, 2.02it/s]
|
694 |
14%|█▍ | 292/2030 [02:28<15:12, 1.91it/s]
|
695 |
14%|█▍ | 293/2030 [02:29<14:05, 2.05it/s]
|
696 |
14%|█▍ | 294/2030 [02:29<15:37, 1.85it/s]
|
697 |
15%|█▍ | 295/2030 [02:30<13:51, 2.09it/s]
|
698 |
15%|█▍ | 296/2030 [02:30<13:08, 2.20it/s]
|
699 |
15%|█▍ | 297/2030 [02:31<13:30, 2.14it/s]
|
700 |
15%|█▍ | 298/2030 [02:31<13:08, 2.20it/s]
|
701 |
15%|█▍ | 299/2030 [02:31<13:26, 2.15it/s]
|
702 |
15%|█▍ | 300/2030 [02:32<14:33, 1.98it/s]
|
703 |
15%|█▍ | 301/2030 [02:32<14:07, 2.04it/s]
|
704 |
15%|█▍ | 302/2030 [02:33<13:49, 2.08it/s]
|
705 |
15%|█▍ | 303/2030 [02:33<12:42, 2.26it/s]
|
706 |
15%|█▍ | 304/2030 [02:34<11:59, 2.40it/s]
|
707 |
15%|█▌ | 305/2030 [02:34<14:03, 2.04it/s]
|
708 |
15%|█▌ | 306/2030 [02:35<13:33, 2.12it/s]
|
709 |
15%|█▌ | 307/2030 [02:35<13:04, 2.20it/s]
|
710 |
15%|█▌ | 308/2030 [02:36<13:12, 2.17it/s]
|
711 |
15%|█▌ | 309/2030 [02:36<12:55, 2.22it/s]
|
712 |
15%|█▌ | 310/2030 [02:37<13:23, 2.14it/s]
|
713 |
15%|█▌ | 311/2030 [02:37<12:53, 2.22it/s]
|
train_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"total_flos": 8092971627384348.0,
|
4 |
+
"train_loss": 0.033683168437591816,
|
5 |
+
"train_runtime": 1318.6544,
|
6 |
+
"train_samples": 16483,
|
7 |
+
"train_samples_per_second": 124.999,
|
8 |
+
"train_steps_per_second": 1.957
|
9 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.696671105193076,
|
3 |
+
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2322",
|
4 |
+
"epoch": 10.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 2580,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 1.0,
|
13 |
+
"eval_accuracy": 0.9488273605184638,
|
14 |
+
"eval_f1": 0.6400434664493344,
|
15 |
+
"eval_loss": 0.15167976915836334,
|
16 |
+
"eval_precision": 0.6353829557713053,
|
17 |
+
"eval_recall": 0.6447728516694033,
|
18 |
+
"eval_runtime": 5.9858,
|
19 |
+
"eval_samples_per_second": 420.828,
|
20 |
+
"eval_steps_per_second": 52.624,
|
21 |
+
"step": 258
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.937984496124031,
|
25 |
+
"grad_norm": 0.791233479976654,
|
26 |
+
"learning_rate": 4.0310077519379843e-05,
|
27 |
+
"loss": 0.1357,
|
28 |
+
"step": 500
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"epoch": 2.0,
|
32 |
+
"eval_accuracy": 0.945955917738779,
|
33 |
+
"eval_f1": 0.6695763799743261,
|
34 |
+
"eval_loss": 0.20252634584903717,
|
35 |
+
"eval_precision": 0.6305609284332688,
|
36 |
+
"eval_recall": 0.7137383689107827,
|
37 |
+
"eval_runtime": 5.9083,
|
38 |
+
"eval_samples_per_second": 426.351,
|
39 |
+
"eval_steps_per_second": 53.315,
|
40 |
+
"step": 516
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 3.0,
|
44 |
+
"eval_accuracy": 0.9495652731881036,
|
45 |
+
"eval_f1": 0.6838606753522999,
|
46 |
+
"eval_loss": 0.22940471768379211,
|
47 |
+
"eval_precision": 0.6649431230610134,
|
48 |
+
"eval_recall": 0.7038861521620142,
|
49 |
+
"eval_runtime": 5.8946,
|
50 |
+
"eval_samples_per_second": 427.339,
|
51 |
+
"eval_steps_per_second": 53.439,
|
52 |
+
"step": 774
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"epoch": 3.875968992248062,
|
56 |
+
"grad_norm": 0.907940685749054,
|
57 |
+
"learning_rate": 3.062015503875969e-05,
|
58 |
+
"loss": 0.0238,
|
59 |
+
"step": 1000
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 4.0,
|
63 |
+
"eval_accuracy": 0.9491963168532838,
|
64 |
+
"eval_f1": 0.6872504657971785,
|
65 |
+
"eval_loss": 0.28175726532936096,
|
66 |
+
"eval_precision": 0.6689119170984456,
|
67 |
+
"eval_recall": 0.7066228790366721,
|
68 |
+
"eval_runtime": 5.8968,
|
69 |
+
"eval_samples_per_second": 427.179,
|
70 |
+
"eval_steps_per_second": 53.419,
|
71 |
+
"step": 1032
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 5.0,
|
75 |
+
"eval_accuracy": 0.9487311110398152,
|
76 |
+
"eval_f1": 0.6773768764814327,
|
77 |
+
"eval_loss": 0.27620697021484375,
|
78 |
+
"eval_precision": 0.6527918781725889,
|
79 |
+
"eval_recall": 0.7038861521620142,
|
80 |
+
"eval_runtime": 5.9102,
|
81 |
+
"eval_samples_per_second": 426.215,
|
82 |
+
"eval_steps_per_second": 53.298,
|
83 |
+
"step": 1290
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"epoch": 5.813953488372093,
|
87 |
+
"grad_norm": 0.2446424663066864,
|
88 |
+
"learning_rate": 2.0930232558139536e-05,
|
89 |
+
"loss": 0.0081,
|
90 |
+
"step": 1500
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"epoch": 6.0,
|
94 |
+
"eval_accuracy": 0.9483942378645449,
|
95 |
+
"eval_f1": 0.6922672277748553,
|
96 |
+
"eval_loss": 0.2938424348831177,
|
97 |
+
"eval_precision": 0.6663291139240506,
|
98 |
+
"eval_recall": 0.7203065134099617,
|
99 |
+
"eval_runtime": 5.9062,
|
100 |
+
"eval_samples_per_second": 426.502,
|
101 |
+
"eval_steps_per_second": 53.334,
|
102 |
+
"step": 1548
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"epoch": 7.0,
|
106 |
+
"eval_accuracy": 0.9499181879431486,
|
107 |
+
"eval_f1": 0.6893020749124225,
|
108 |
+
"eval_loss": 0.3144644498825073,
|
109 |
+
"eval_precision": 0.6788747346072187,
|
110 |
+
"eval_recall": 0.7000547345374931,
|
111 |
+
"eval_runtime": 5.9264,
|
112 |
+
"eval_samples_per_second": 425.045,
|
113 |
+
"eval_steps_per_second": 53.152,
|
114 |
+
"step": 1806
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 7.751937984496124,
|
118 |
+
"grad_norm": 1.7770023345947266,
|
119 |
+
"learning_rate": 1.1240310077519382e-05,
|
120 |
+
"loss": 0.0039,
|
121 |
+
"step": 2000
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 8.0,
|
125 |
+
"eval_accuracy": 0.949100067374635,
|
126 |
+
"eval_f1": 0.6865512649800267,
|
127 |
+
"eval_loss": 0.32666969299316406,
|
128 |
+
"eval_precision": 0.6685684647302904,
|
129 |
+
"eval_recall": 0.705528188286809,
|
130 |
+
"eval_runtime": 5.9058,
|
131 |
+
"eval_samples_per_second": 426.528,
|
132 |
+
"eval_steps_per_second": 53.337,
|
133 |
+
"step": 2064
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"epoch": 9.0,
|
137 |
+
"eval_accuracy": 0.9490359010555359,
|
138 |
+
"eval_f1": 0.696671105193076,
|
139 |
+
"eval_loss": 0.33743786811828613,
|
140 |
+
"eval_precision": 0.6784232365145229,
|
141 |
+
"eval_recall": 0.715927750410509,
|
142 |
+
"eval_runtime": 5.9091,
|
143 |
+
"eval_samples_per_second": 426.292,
|
144 |
+
"eval_steps_per_second": 53.308,
|
145 |
+
"step": 2322
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"epoch": 9.689922480620154,
|
149 |
+
"grad_norm": 0.005967797711491585,
|
150 |
+
"learning_rate": 1.550387596899225e-06,
|
151 |
+
"loss": 0.0021,
|
152 |
+
"step": 2500
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 10.0,
|
156 |
+
"eval_accuracy": 0.9495011068690045,
|
157 |
+
"eval_f1": 0.6949744692287019,
|
158 |
+
"eval_loss": 0.33997026085853577,
|
159 |
+
"eval_precision": 0.6826821541710665,
|
160 |
+
"eval_recall": 0.7077175697865353,
|
161 |
+
"eval_runtime": 6.2732,
|
162 |
+
"eval_samples_per_second": 401.551,
|
163 |
+
"eval_steps_per_second": 50.214,
|
164 |
+
"step": 2580
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"epoch": 10.0,
|
168 |
+
"step": 2580,
|
169 |
+
"total_flos": 8092971627384348.0,
|
170 |
+
"train_loss": 0.033683168437591816,
|
171 |
+
"train_runtime": 1318.6544,
|
172 |
+
"train_samples_per_second": 124.999,
|
173 |
+
"train_steps_per_second": 1.957
|
174 |
+
}
|
175 |
+
],
|
176 |
+
"logging_steps": 500,
|
177 |
+
"max_steps": 2580,
|
178 |
+
"num_input_tokens_seen": 0,
|
179 |
+
"num_train_epochs": 10,
|
180 |
+
"save_steps": 500,
|
181 |
+
"stateful_callbacks": {
|
182 |
+
"TrainerControl": {
|
183 |
+
"args": {
|
184 |
+
"should_epoch_stop": false,
|
185 |
+
"should_evaluate": false,
|
186 |
+
"should_log": false,
|
187 |
+
"should_save": true,
|
188 |
+
"should_training_stop": true
|
189 |
+
},
|
190 |
+
"attributes": {}
|
191 |
+
}
|
192 |
+
},
|
193 |
+
"total_flos": 8092971627384348.0,
|
194 |
+
"train_batch_size": 32,
|
195 |
+
"trial_name": null,
|
196 |
+
"trial_params": null
|
197 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13556e6c97b2f39e25d5830ab0bc61ce81f807bcf643d150d23dd97c2f606c57
|
3 |
+
size 5240
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|