Rodrigo1771
commited on
Commit
•
8a12c6e
1
Parent(s):
7abe963
End of training
Browse files- README.md +12 -11
- all_results.json +23 -23
- eval_results.json +9 -9
- predict_results.json +8 -8
- predictions.txt +0 -0
- tb/events.out.tfevents.1725883955.0a1c9bec2a53.9893.1 +3 -0
- train.log +48 -0
- train_results.json +7 -7
- trainer_state.json +129 -136
README.md
CHANGED
@@ -3,9 +3,10 @@ library_name: transformers
|
|
3 |
license: apache-2.0
|
4 |
base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
|
5 |
tags:
|
|
|
6 |
- generated_from_trainer
|
7 |
datasets:
|
8 |
-
- symptemist-fasttext-8-ner
|
9 |
metrics:
|
10 |
- precision
|
11 |
- recall
|
@@ -18,21 +19,21 @@ model-index:
|
|
18 |
name: Token Classification
|
19 |
type: token-classification
|
20 |
dataset:
|
21 |
-
name: symptemist-fasttext-8-ner
|
22 |
-
type: symptemist-fasttext-8-ner
|
23 |
config: SympTEMIST NER
|
24 |
split: validation
|
25 |
args: SympTEMIST NER
|
26 |
metrics:
|
27 |
- name: Precision
|
28 |
type: precision
|
29 |
-
value: 0.
|
30 |
- name: Recall
|
31 |
type: recall
|
32 |
-
value: 0.
|
33 |
- name: F1
|
34 |
type: f1
|
35 |
-
value: 0.
|
36 |
- name: Accuracy
|
37 |
type: accuracy
|
38 |
value: 0.9500465205813469
|
@@ -43,12 +44,12 @@ should probably proofread and complete it, then remove this comment. -->
|
|
43 |
|
44 |
# output
|
45 |
|
46 |
-
This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the symptemist-fasttext-8-ner dataset.
|
47 |
It achieves the following results on the evaluation set:
|
48 |
-
- Loss: 0.
|
49 |
-
- Precision: 0.
|
50 |
-
- Recall: 0.
|
51 |
-
- F1: 0.
|
52 |
- Accuracy: 0.9500
|
53 |
|
54 |
## Model description
|
|
|
3 |
license: apache-2.0
|
4 |
base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
|
5 |
tags:
|
6 |
+
- token-classification
|
7 |
- generated_from_trainer
|
8 |
datasets:
|
9 |
+
- Rodrigo1771/symptemist-fasttext-8-ner
|
10 |
metrics:
|
11 |
- precision
|
12 |
- recall
|
|
|
19 |
name: Token Classification
|
20 |
type: token-classification
|
21 |
dataset:
|
22 |
+
name: Rodrigo1771/symptemist-fasttext-8-ner
|
23 |
+
type: Rodrigo1771/symptemist-fasttext-8-ner
|
24 |
config: SympTEMIST NER
|
25 |
split: validation
|
26 |
args: SympTEMIST NER
|
27 |
metrics:
|
28 |
- name: Precision
|
29 |
type: precision
|
30 |
+
value: 0.6764102564102564
|
31 |
- name: Recall
|
32 |
type: recall
|
33 |
+
value: 0.7219485495347564
|
34 |
- name: F1
|
35 |
type: f1
|
36 |
+
value: 0.6984379136881121
|
37 |
- name: Accuracy
|
38 |
type: accuracy
|
39 |
value: 0.9500465205813469
|
|
|
44 |
|
45 |
# output
|
46 |
|
47 |
+
This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/symptemist-fasttext-8-ner dataset.
|
48 |
It achieves the following results on the evaluation set:
|
49 |
+
- Loss: 0.3073
|
50 |
+
- Precision: 0.6764
|
51 |
+
- Recall: 0.7219
|
52 |
+
- F1: 0.6984
|
53 |
- Accuracy: 0.9500
|
54 |
|
55 |
## Model description
|
all_results.json
CHANGED
@@ -1,26 +1,26 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_f1": 0.
|
5 |
-
"eval_loss": 0.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime":
|
9 |
"eval_samples": 2519,
|
10 |
-
"eval_samples_per_second":
|
11 |
-
"eval_steps_per_second":
|
12 |
-
"predict_accuracy": 0.
|
13 |
-
"predict_f1": 0.
|
14 |
-
"predict_loss": 0.
|
15 |
-
"predict_precision": 0.
|
16 |
-
"predict_recall": 0.
|
17 |
-
"predict_runtime": 9.
|
18 |
-
"predict_samples_per_second":
|
19 |
-
"predict_steps_per_second": 51.
|
20 |
-
"total_flos":
|
21 |
-
"train_loss": 0.
|
22 |
-
"train_runtime":
|
23 |
-
"train_samples":
|
24 |
-
"train_samples_per_second":
|
25 |
-
"train_steps_per_second": 1.
|
26 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 9.975429975429975,
|
3 |
+
"eval_accuracy": 0.9500465205813469,
|
4 |
+
"eval_f1": 0.6984379136881121,
|
5 |
+
"eval_loss": 0.30729904770851135,
|
6 |
+
"eval_precision": 0.6764102564102564,
|
7 |
+
"eval_recall": 0.7219485495347564,
|
8 |
+
"eval_runtime": 6.0921,
|
9 |
"eval_samples": 2519,
|
10 |
+
"eval_samples_per_second": 413.484,
|
11 |
+
"eval_steps_per_second": 51.706,
|
12 |
+
"predict_accuracy": 0.9466933985906772,
|
13 |
+
"predict_f1": 0.6951548848292296,
|
14 |
+
"predict_loss": 0.3347860872745514,
|
15 |
+
"predict_precision": 0.6863237139272271,
|
16 |
+
"predict_recall": 0.704216285806244,
|
17 |
+
"predict_runtime": 9.749,
|
18 |
+
"predict_samples_per_second": 415.118,
|
19 |
+
"predict_steps_per_second": 51.903,
|
20 |
+
"total_flos": 6404835399317064.0,
|
21 |
+
"train_loss": 0.04138289297302368,
|
22 |
+
"train_runtime": 1065.756,
|
23 |
+
"train_samples": 13013,
|
24 |
+
"train_samples_per_second": 122.101,
|
25 |
+
"train_steps_per_second": 1.905
|
26 |
}
|
eval_results.json
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_f1": 0.
|
5 |
-
"eval_loss": 0.
|
6 |
-
"eval_precision": 0.
|
7 |
-
"eval_recall": 0.
|
8 |
-
"eval_runtime":
|
9 |
"eval_samples": 2519,
|
10 |
-
"eval_samples_per_second":
|
11 |
-
"eval_steps_per_second":
|
12 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 9.975429975429975,
|
3 |
+
"eval_accuracy": 0.9500465205813469,
|
4 |
+
"eval_f1": 0.6984379136881121,
|
5 |
+
"eval_loss": 0.30729904770851135,
|
6 |
+
"eval_precision": 0.6764102564102564,
|
7 |
+
"eval_recall": 0.7219485495347564,
|
8 |
+
"eval_runtime": 6.0921,
|
9 |
"eval_samples": 2519,
|
10 |
+
"eval_samples_per_second": 413.484,
|
11 |
+
"eval_steps_per_second": 51.706
|
12 |
}
|
predict_results.json
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
{
|
2 |
-
"predict_accuracy": 0.
|
3 |
-
"predict_f1": 0.
|
4 |
-
"predict_loss": 0.
|
5 |
-
"predict_precision": 0.
|
6 |
-
"predict_recall": 0.
|
7 |
-
"predict_runtime": 9.
|
8 |
-
"predict_samples_per_second":
|
9 |
-
"predict_steps_per_second": 51.
|
10 |
}
|
|
|
1 |
{
|
2 |
+
"predict_accuracy": 0.9466933985906772,
|
3 |
+
"predict_f1": 0.6951548848292296,
|
4 |
+
"predict_loss": 0.3347860872745514,
|
5 |
+
"predict_precision": 0.6863237139272271,
|
6 |
+
"predict_recall": 0.704216285806244,
|
7 |
+
"predict_runtime": 9.749,
|
8 |
+
"predict_samples_per_second": 415.118,
|
9 |
+
"predict_steps_per_second": 51.903
|
10 |
}
|
predictions.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
tb/events.out.tfevents.1725883955.0a1c9bec2a53.9893.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b842d84c89f0d88706e31e98b113fae6b45879220115930147db648f848a8c24
|
3 |
+
size 560
|
train.log
CHANGED
@@ -888,3 +888,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
|
|
888 |
{'eval_loss': 0.3079104423522949, 'eval_precision': 0.6712820512820513, 'eval_recall': 0.7164750957854407, 'eval_f1': 0.6931427058512046, 'eval_accuracy': 0.9500465205813469, 'eval_runtime': 5.9033, 'eval_samples_per_second': 426.708, 'eval_steps_per_second': 53.36, 'epoch': 9.98}
|
889 |
{'train_runtime': 1065.756, 'train_samples_per_second': 122.101, 'train_steps_per_second': 1.905, 'train_loss': 0.04138289297302368, 'epoch': 9.98}
|
890 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
891 |
0%| | 0/315 [00:00<?, ?it/s]
|
892 |
3%|▎ | 8/315 [00:00<00:03, 79.87it/s]
|
893 |
5%|▌ | 16/315 [00:00<00:03, 76.37it/s]
|
894 |
8%|▊ | 24/315 [00:00<00:03, 76.98it/s]
|
895 |
10%|█ | 32/315 [00:00<00:03, 73.14it/s]
|
896 |
13%|█▎ | 41/315 [00:00<00:03, 77.06it/s]
|
897 |
16%|█▌ | 49/315 [00:00<00:03, 75.87it/s]
|
898 |
18%|█▊ | 57/315 [00:00<00:03, 75.22it/s]
|
899 |
21%|██ | 65/315 [00:00<00:03, 72.75it/s]
|
900 |
23%|██▎ | 73/315 [00:00<00:03, 74.58it/s]
|
901 |
26%|██▌ | 81/315 [00:01<00:03, 70.30it/s]
|
902 |
28%|██▊ | 89/315 [00:01<00:03, 67.40it/s]
|
903 |
31%|███ | 97/315 [00:01<00:03, 67.21it/s]
|
904 |
33%|███▎ | 105/315 [00:01<00:03, 68.92it/s]
|
905 |
36%|███▌ | 113/315 [00:01<00:02, 71.06it/s]
|
906 |
38%|███▊ | 121/315 [00:01<00:02, 69.27it/s]
|
907 |
41%|████ | 129/315 [00:01<00:02, 70.12it/s]
|
908 |
43%|████▎ | 137/315 [00:01<00:02, 69.43it/s]
|
909 |
46%|████▌ | 144/315 [00:02<00:02, 69.31it/s]
|
910 |
49%|████▊ | 153/315 [00:02<00:02, 73.00it/s]
|
911 |
51%|█████ | 161/315 [00:02<00:02, 71.79it/s]
|
912 |
54%|█████▎ | 169/315 [00:02<00:02, 71.32it/s]
|
913 |
56%|█████▌ | 177/315 [00:02<00:01, 70.34it/s]
|
914 |
59%|█████▊ | 185/315 [00:02<00:01, 68.51it/s]
|
915 |
61%|██████ | 192/315 [00:02<00:01, 68.60it/s]
|
916 |
63%|██████▎ | 199/315 [00:02<00:01, 65.97it/s]
|
917 |
65%|██████▌ | 206/315 [00:02<00:01, 64.76it/s]
|
918 |
68%|██████▊ | 214/315 [00:03<00:01, 68.25it/s]
|
919 |
70%|███████ | 222/315 [00:03<00:01, 70.05it/s]
|
920 |
73%|███████▎ | 230/315 [00:03<00:01, 72.77it/s]
|
921 |
76%|███████▌ | 239/315 [00:03<00:01, 74.50it/s]
|
922 |
78%|███████▊ | 247/315 [00:03<00:00, 70.67it/s]
|
923 |
81%|████████ | 255/315 [00:03<00:00, 69.38it/s]
|
924 |
83%|████████▎ | 263/315 [00:03<00:00, 70.77it/s]
|
925 |
86%|████████▌ | 271/315 [00:03<00:00, 72.89it/s]
|
926 |
89%|████████▉ | 280/315 [00:03<00:00, 75.53it/s]
|
927 |
91%|█████████▏| 288/315 [00:04<00:00, 72.20it/s]
|
928 |
94%|█████████▍| 296/315 [00:04<00:00, 70.67it/s]
|
929 |
97%|█████████▋| 304/315 [00:04<00:00, 72.18it/s]
|
930 |
99%|█████████▉| 312/315 [00:04<00:00, 72.30it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
931 |
0%| | 0/506 [00:00<?, ?it/s]
|
932 |
2%|▏ | 10/506 [00:00<00:05, 89.99it/s]
|
933 |
4%|▍ | 19/506 [00:00<00:06, 78.47it/s]
|
934 |
5%|▌ | 27/506 [00:00<00:06, 77.29it/s]
|
935 |
7%|▋ | 35/506 [00:00<00:06, 76.19it/s]
|
936 |
8%|▊ | 43/506 [00:00<00:06, 75.01it/s]
|
937 |
10%|█ | 51/506 [00:00<00:06, 74.72it/s]
|
938 |
12%|█▏ | 59/506 [00:00<00:06, 72.73it/s]
|
939 |
13%|█▎ | 67/506 [00:00<00:05, 74.41it/s]
|
940 |
15%|█▍ | 75/506 [00:00<00:05, 73.98it/s]
|
941 |
16%|█▋ | 83/506 [00:01<00:06, 64.09it/s]
|
942 |
18%|█▊ | 90/506 [00:01<00:06, 64.05it/s]
|
943 |
19%|█▉ | 98/506 [00:01<00:06, 67.57it/s]
|
944 |
21%|██ | 106/506 [00:01<00:05, 69.34it/s]
|
945 |
23%|██▎ | 114/506 [00:01<00:05, 72.09it/s]
|
946 |
24%|██▍ | 122/506 [00:01<00:05, 70.62it/s]
|
947 |
26%|██▌ | 130/506 [00:01<00:06, 60.47it/s]
|
948 |
27%|██▋ | 137/506 [00:02<00:06, 59.29it/s]
|
949 |
29%|██▊ | 145/506 [00:02<00:05, 63.13it/s]
|
950 |
30%|███ | 153/506 [00:02<00:05, 62.09it/s]
|
951 |
32%|███▏ | 160/506 [00:02<00:05, 60.95it/s]
|
952 |
33%|███▎ | 167/506 [00:02<00:05, 61.89it/s]
|
953 |
34%|███▍ | 174/506 [00:02<00:05, 63.30it/s]
|
954 |
36%|███▌ | 182/506 [00:02<00:04, 65.88it/s]
|
955 |
38%|███▊ | 190/506 [00:02<00:04, 68.04it/s]
|
956 |
39%|███▉ | 197/506 [00:02<00:04, 67.64it/s]
|
957 |
41%|████ | 205/506 [00:03<00:04, 69.76it/s]
|
958 |
42%|████▏ | 213/506 [00:03<00:04, 67.95it/s]
|
959 |
43%|████▎ | 220/506 [00:03<00:04, 66.58it/s]
|
960 |
45%|████▍ | 227/506 [00:03<00:04, 63.22it/s]
|
961 |
46%|████▌ | 234/506 [00:03<00:04, 61.33it/s]
|
962 |
48%|████▊ | 241/506 [00:03<00:04, 63.41it/s]
|
963 |
49%|████▉ | 249/506 [00:03<00:03, 66.94it/s]
|
964 |
51%|█████ | 256/506 [00:03<00:03, 67.40it/s]
|
965 |
52%|█████▏ | 264/506 [00:03<00:03, 70.60it/s]
|
966 |
54%|█████▍ | 272/506 [00:04<00:03, 72.82it/s]
|
967 |
55%|█████▌ | 280/506 [00:04<00:03, 72.01it/s]
|
968 |
57%|█████▋ | 288/506 [00:04<00:03, 70.69it/s]
|
969 |
58%|█████▊ | 296/506 [00:04<00:02, 70.79it/s]
|
970 |
60%|██████ | 304/506 [00:04<00:02, 71.70it/s]
|
971 |
62%|██████▏ | 312/506 [00:04<00:02, 71.70it/s]
|
972 |
63%|██████▎ | 320/506 [00:04<00:02, 73.71it/s]
|
973 |
65%|██████▌ | 329/506 [00:04<00:02, 76.51it/s]
|
974 |
67%|██████▋ | 337/506 [00:04<00:02, 76.82it/s]
|
975 |
68%|██████▊ | 345/506 [00:04<00:02, 77.52it/s]
|
976 |
70%|██████▉ | 353/506 [00:05<00:01, 77.18it/s]
|
977 |
71%|███████▏ | 361/506 [00:05<00:01, 76.54it/s]
|
978 |
73%|███████▎ | 369/506 [00:05<00:01, 71.00it/s]
|
979 |
75%|███████▍ | 377/506 [00:05<00:01, 66.99it/s]
|
980 |
76%|███████▌ | 384/506 [00:05<00:01, 64.26it/s]
|
981 |
77%|███████▋ | 391/506 [00:05<00:01, 60.26it/s]
|
982 |
79%|███████▊ | 398/506 [00:05<00:01, 58.75it/s]
|
983 |
80%|████████ | 405/506 [00:05<00:01, 59.98it/s]
|
984 |
81%|████████▏ | 412/506 [00:06<00:01, 61.53it/s]
|
985 |
83%|████████▎ | 419/506 [00:06<00:01, 63.24it/s]
|
986 |
84%|████████▍ | 426/506 [00:06<00:01, 63.10it/s]
|
987 |
86%|████████▌ | 434/506 [00:06<00:01, 66.36it/s]
|
988 |
87%|████████▋ | 441/506 [00:06<00:00, 66.96it/s]
|
989 |
89%|████████▊ | 448/506 [00:06<00:00, 66.98it/s]
|
990 |
90%|█████████ | 456/506 [00:06<00:00, 69.30it/s]
|
991 |
92%|█████████▏| 465/506 [00:06<00:00, 71.88it/s]
|
992 |
93%|█████████▎| 473/506 [00:06<00:00, 73.65it/s]
|
993 |
95%|█████████▌| 481/506 [00:07<00:00, 74.93it/s]
|
994 |
97%|█████████▋| 489/506 [00:07<00:00, 70.10it/s]
|
995 |
98%|█████████▊| 497/506 [00:07<00:00, 71.43it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
888 |
{'eval_loss': 0.3079104423522949, 'eval_precision': 0.6712820512820513, 'eval_recall': 0.7164750957854407, 'eval_f1': 0.6931427058512046, 'eval_accuracy': 0.9500465205813469, 'eval_runtime': 5.9033, 'eval_samples_per_second': 426.708, 'eval_steps_per_second': 53.36, 'epoch': 9.98}
|
889 |
{'train_runtime': 1065.756, 'train_samples_per_second': 122.101, 'train_steps_per_second': 1.905, 'train_loss': 0.04138289297302368, 'epoch': 9.98}
|
890 |
|
891 |
+
***** train metrics *****
|
892 |
+
epoch = 9.9754
|
893 |
+
total_flos = 5964967GF
|
894 |
+
train_loss = 0.0414
|
895 |
+
train_runtime = 0:17:45.75
|
896 |
+
train_samples = 13013
|
897 |
+
train_samples_per_second = 122.101
|
898 |
+
train_steps_per_second = 1.905
|
899 |
+
09/09/2024 12:12:29 - INFO - __main__ - *** Evaluate ***
|
900 |
+
[INFO|trainer.py:811] 2024-09-09 12:12:29,073 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
901 |
+
[INFO|trainer.py:3819] 2024-09-09 12:12:29,076 >>
|
902 |
+
***** Running Evaluation *****
|
903 |
+
[INFO|trainer.py:3821] 2024-09-09 12:12:29,076 >> Num examples = 2519
|
904 |
+
[INFO|trainer.py:3824] 2024-09-09 12:12:29,076 >> Batch size = 8
|
905 |
+
|
906 |
0%| | 0/315 [00:00<?, ?it/s]
|
907 |
3%|▎ | 8/315 [00:00<00:03, 79.87it/s]
|
908 |
5%|▌ | 16/315 [00:00<00:03, 76.37it/s]
|
909 |
8%|▊ | 24/315 [00:00<00:03, 76.98it/s]
|
910 |
10%|█ | 32/315 [00:00<00:03, 73.14it/s]
|
911 |
13%|█▎ | 41/315 [00:00<00:03, 77.06it/s]
|
912 |
16%|█▌ | 49/315 [00:00<00:03, 75.87it/s]
|
913 |
18%|█▊ | 57/315 [00:00<00:03, 75.22it/s]
|
914 |
21%|██ | 65/315 [00:00<00:03, 72.75it/s]
|
915 |
23%|██▎ | 73/315 [00:00<00:03, 74.58it/s]
|
916 |
26%|██▌ | 81/315 [00:01<00:03, 70.30it/s]
|
917 |
28%|██▊ | 89/315 [00:01<00:03, 67.40it/s]
|
918 |
31%|███ | 97/315 [00:01<00:03, 67.21it/s]
|
919 |
33%|███▎ | 105/315 [00:01<00:03, 68.92it/s]
|
920 |
36%|███▌ | 113/315 [00:01<00:02, 71.06it/s]
|
921 |
38%|███▊ | 121/315 [00:01<00:02, 69.27it/s]
|
922 |
41%|████ | 129/315 [00:01<00:02, 70.12it/s]
|
923 |
43%|████▎ | 137/315 [00:01<00:02, 69.43it/s]
|
924 |
46%|████▌ | 144/315 [00:02<00:02, 69.31it/s]
|
925 |
49%|████▊ | 153/315 [00:02<00:02, 73.00it/s]
|
926 |
51%|█████ | 161/315 [00:02<00:02, 71.79it/s]
|
927 |
54%|█████▎ | 169/315 [00:02<00:02, 71.32it/s]
|
928 |
56%|█████▌ | 177/315 [00:02<00:01, 70.34it/s]
|
929 |
59%|█████▊ | 185/315 [00:02<00:01, 68.51it/s]
|
930 |
61%|██████ | 192/315 [00:02<00:01, 68.60it/s]
|
931 |
63%|██████▎ | 199/315 [00:02<00:01, 65.97it/s]
|
932 |
65%|██████▌ | 206/315 [00:02<00:01, 64.76it/s]
|
933 |
68%|██████▊ | 214/315 [00:03<00:01, 68.25it/s]
|
934 |
70%|███████ | 222/315 [00:03<00:01, 70.05it/s]
|
935 |
73%|███████▎ | 230/315 [00:03<00:01, 72.77it/s]
|
936 |
76%|███████▌ | 239/315 [00:03<00:01, 74.50it/s]
|
937 |
78%|███████▊ | 247/315 [00:03<00:00, 70.67it/s]
|
938 |
81%|████████ | 255/315 [00:03<00:00, 69.38it/s]
|
939 |
83%|████████▎ | 263/315 [00:03<00:00, 70.77it/s]
|
940 |
86%|████████▌ | 271/315 [00:03<00:00, 72.89it/s]
|
941 |
89%|████████▉ | 280/315 [00:03<00:00, 75.53it/s]
|
942 |
91%|█████████▏| 288/315 [00:04<00:00, 72.20it/s]
|
943 |
94%|█████████▍| 296/315 [00:04<00:00, 70.67it/s]
|
944 |
97%|█████████▋| 304/315 [00:04<00:00, 72.18it/s]
|
945 |
99%|█████████▉| 312/315 [00:04<00:00, 72.30it/s]
|
946 |
+
***** eval metrics *****
|
947 |
+
epoch = 9.9754
|
948 |
+
eval_accuracy = 0.95
|
949 |
+
eval_f1 = 0.6984
|
950 |
+
eval_loss = 0.3073
|
951 |
+
eval_precision = 0.6764
|
952 |
+
eval_recall = 0.7219
|
953 |
+
eval_runtime = 0:00:06.09
|
954 |
+
eval_samples = 2519
|
955 |
+
eval_samples_per_second = 413.484
|
956 |
+
eval_steps_per_second = 51.706
|
957 |
+
09/09/2024 12:12:35 - INFO - __main__ - *** Predict ***
|
958 |
+
[INFO|trainer.py:811] 2024-09-09 12:12:35,170 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: tokens, ner_tags, id. If tokens, ner_tags, id are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
959 |
+
[INFO|trainer.py:3819] 2024-09-09 12:12:35,172 >>
|
960 |
+
***** Running Prediction *****
|
961 |
+
[INFO|trainer.py:3821] 2024-09-09 12:12:35,172 >> Num examples = 4047
|
962 |
+
[INFO|trainer.py:3824] 2024-09-09 12:12:35,172 >> Batch size = 8
|
963 |
+
|
964 |
0%| | 0/506 [00:00<?, ?it/s]
|
965 |
2%|▏ | 10/506 [00:00<00:05, 89.99it/s]
|
966 |
4%|▍ | 19/506 [00:00<00:06, 78.47it/s]
|
967 |
5%|▌ | 27/506 [00:00<00:06, 77.29it/s]
|
968 |
7%|▋ | 35/506 [00:00<00:06, 76.19it/s]
|
969 |
8%|▊ | 43/506 [00:00<00:06, 75.01it/s]
|
970 |
10%|█ | 51/506 [00:00<00:06, 74.72it/s]
|
971 |
12%|█▏ | 59/506 [00:00<00:06, 72.73it/s]
|
972 |
13%|█▎ | 67/506 [00:00<00:05, 74.41it/s]
|
973 |
15%|█▍ | 75/506 [00:00<00:05, 73.98it/s]
|
974 |
16%|█▋ | 83/506 [00:01<00:06, 64.09it/s]
|
975 |
18%|█▊ | 90/506 [00:01<00:06, 64.05it/s]
|
976 |
19%|█▉ | 98/506 [00:01<00:06, 67.57it/s]
|
977 |
21%|██ | 106/506 [00:01<00:05, 69.34it/s]
|
978 |
23%|██▎ | 114/506 [00:01<00:05, 72.09it/s]
|
979 |
24%|██▍ | 122/506 [00:01<00:05, 70.62it/s]
|
980 |
26%|██▌ | 130/506 [00:01<00:06, 60.47it/s]
|
981 |
27%|██▋ | 137/506 [00:02<00:06, 59.29it/s]
|
982 |
29%|██▊ | 145/506 [00:02<00:05, 63.13it/s]
|
983 |
30%|███ | 153/506 [00:02<00:05, 62.09it/s]
|
984 |
32%|███▏ | 160/506 [00:02<00:05, 60.95it/s]
|
985 |
33%|███▎ | 167/506 [00:02<00:05, 61.89it/s]
|
986 |
34%|███▍ | 174/506 [00:02<00:05, 63.30it/s]
|
987 |
36%|███▌ | 182/506 [00:02<00:04, 65.88it/s]
|
988 |
38%|███▊ | 190/506 [00:02<00:04, 68.04it/s]
|
989 |
39%|███▉ | 197/506 [00:02<00:04, 67.64it/s]
|
990 |
41%|████ | 205/506 [00:03<00:04, 69.76it/s]
|
991 |
42%|████▏ | 213/506 [00:03<00:04, 67.95it/s]
|
992 |
43%|████▎ | 220/506 [00:03<00:04, 66.58it/s]
|
993 |
45%|████▍ | 227/506 [00:03<00:04, 63.22it/s]
|
994 |
46%|████▌ | 234/506 [00:03<00:04, 61.33it/s]
|
995 |
48%|████▊ | 241/506 [00:03<00:04, 63.41it/s]
|
996 |
49%|████▉ | 249/506 [00:03<00:03, 66.94it/s]
|
997 |
51%|█████ | 256/506 [00:03<00:03, 67.40it/s]
|
998 |
52%|█████▏ | 264/506 [00:03<00:03, 70.60it/s]
|
999 |
54%|█████▍ | 272/506 [00:04<00:03, 72.82it/s]
|
1000 |
55%|█████▌ | 280/506 [00:04<00:03, 72.01it/s]
|
1001 |
57%|█████▋ | 288/506 [00:04<00:03, 70.69it/s]
|
1002 |
58%|█████▊ | 296/506 [00:04<00:02, 70.79it/s]
|
1003 |
60%|██████ | 304/506 [00:04<00:02, 71.70it/s]
|
1004 |
62%|██████▏ | 312/506 [00:04<00:02, 71.70it/s]
|
1005 |
63%|██████▎ | 320/506 [00:04<00:02, 73.71it/s]
|
1006 |
65%|██████▌ | 329/506 [00:04<00:02, 76.51it/s]
|
1007 |
67%|██████▋ | 337/506 [00:04<00:02, 76.82it/s]
|
1008 |
68%|██████▊ | 345/506 [00:04<00:02, 77.52it/s]
|
1009 |
70%|██████▉ | 353/506 [00:05<00:01, 77.18it/s]
|
1010 |
71%|███████▏ | 361/506 [00:05<00:01, 76.54it/s]
|
1011 |
73%|███████▎ | 369/506 [00:05<00:01, 71.00it/s]
|
1012 |
75%|███████▍ | 377/506 [00:05<00:01, 66.99it/s]
|
1013 |
76%|███████▌ | 384/506 [00:05<00:01, 64.26it/s]
|
1014 |
77%|███████▋ | 391/506 [00:05<00:01, 60.26it/s]
|
1015 |
79%|███████▊ | 398/506 [00:05<00:01, 58.75it/s]
|
1016 |
80%|████████ | 405/506 [00:05<00:01, 59.98it/s]
|
1017 |
81%|████████▏ | 412/506 [00:06<00:01, 61.53it/s]
|
1018 |
83%|████████▎ | 419/506 [00:06<00:01, 63.24it/s]
|
1019 |
84%|████████▍ | 426/506 [00:06<00:01, 63.10it/s]
|
1020 |
86%|████████▌ | 434/506 [00:06<00:01, 66.36it/s]
|
1021 |
87%|████████▋ | 441/506 [00:06<00:00, 66.96it/s]
|
1022 |
89%|████████▊ | 448/506 [00:06<00:00, 66.98it/s]
|
1023 |
90%|█████████ | 456/506 [00:06<00:00, 69.30it/s]
|
1024 |
92%|█████████▏| 465/506 [00:06<00:00, 71.88it/s]
|
1025 |
93%|█████████▎| 473/506 [00:06<00:00, 73.65it/s]
|
1026 |
95%|█████████▌| 481/506 [00:07<00:00, 74.93it/s]
|
1027 |
97%|█████████▋| 489/506 [00:07<00:00, 70.10it/s]
|
1028 |
98%|█████████▊| 497/506 [00:07<00:00, 71.43it/s]
|
1029 |
+
[INFO|trainer.py:3503] 2024-09-09 12:12:45,082 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
|
1030 |
+
[INFO|configuration_utils.py:472] 2024-09-09 12:12:45,084 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
|
1031 |
+
[INFO|modeling_utils.py:2799] 2024-09-09 12:12:46,408 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
|
1032 |
+
[INFO|tokenization_utils_base.py:2684] 2024-09-09 12:12:46,409 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
|
1033 |
+
[INFO|tokenization_utils_base.py:2693] 2024-09-09 12:12:46,410 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
|
1034 |
+
***** predict metrics *****
|
1035 |
+
predict_accuracy = 0.9467
|
1036 |
+
predict_f1 = 0.6952
|
1037 |
+
predict_loss = 0.3348
|
1038 |
+
predict_precision = 0.6863
|
1039 |
+
predict_recall = 0.7042
|
1040 |
+
predict_runtime = 0:00:09.74
|
1041 |
+
predict_samples_per_second = 415.118
|
1042 |
+
predict_steps_per_second = 51.903
|
1043 |
+
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"total_flos":
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
-
"train_samples":
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 1.
|
9 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 9.975429975429975,
|
3 |
+
"total_flos": 6404835399317064.0,
|
4 |
+
"train_loss": 0.04138289297302368,
|
5 |
+
"train_runtime": 1065.756,
|
6 |
+
"train_samples": 13013,
|
7 |
+
"train_samples_per_second": 122.101,
|
8 |
+
"train_steps_per_second": 1.905
|
9 |
}
|
trainer_state.json
CHANGED
@@ -1,180 +1,173 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch":
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_f1": 0.
|
15 |
-
"eval_loss": 0.
|
16 |
-
"eval_precision": 0.
|
17 |
-
"eval_recall": 0.
|
18 |
-
"eval_runtime": 5.
|
19 |
-
"eval_samples_per_second":
|
20 |
-
"eval_steps_per_second":
|
21 |
-
"step":
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"epoch": 1.937984496124031,
|
25 |
-
"grad_norm": 0.791233479976654,
|
26 |
-
"learning_rate": 4.0310077519379843e-05,
|
27 |
-
"loss": 0.1357,
|
28 |
-
"step": 500
|
29 |
},
|
30 |
{
|
31 |
"epoch": 2.0,
|
32 |
-
"eval_accuracy": 0.
|
33 |
-
"eval_f1": 0.
|
34 |
-
"eval_loss": 0.
|
35 |
-
"eval_precision": 0.
|
36 |
-
"eval_recall": 0.
|
37 |
-
"eval_runtime": 5.
|
38 |
-
"eval_samples_per_second":
|
39 |
-
"eval_steps_per_second": 53.
|
40 |
-
"step":
|
41 |
},
|
42 |
{
|
43 |
-
"epoch":
|
44 |
-
"
|
45 |
-
"
|
46 |
-
"
|
47 |
-
"
|
48 |
-
"eval_recall": 0.7038861521620142,
|
49 |
-
"eval_runtime": 5.8946,
|
50 |
-
"eval_samples_per_second": 427.339,
|
51 |
-
"eval_steps_per_second": 53.439,
|
52 |
-
"step": 774
|
53 |
},
|
54 |
{
|
55 |
-
"epoch":
|
56 |
-
"
|
57 |
-
"
|
58 |
-
"
|
59 |
-
"
|
|
|
|
|
|
|
|
|
|
|
60 |
},
|
61 |
{
|
62 |
"epoch": 4.0,
|
63 |
-
"eval_accuracy": 0.
|
64 |
-
"eval_f1": 0.
|
65 |
-
"eval_loss": 0.
|
66 |
-
"eval_precision": 0.
|
67 |
-
"eval_recall": 0.
|
68 |
-
"eval_runtime": 5.
|
69 |
-
"eval_samples_per_second":
|
70 |
-
"eval_steps_per_second": 53.
|
71 |
-
"step":
|
72 |
},
|
73 |
{
|
74 |
-
"epoch":
|
75 |
-
"
|
76 |
-
"
|
77 |
-
"
|
78 |
-
"
|
79 |
-
"eval_recall": 0.7038861521620142,
|
80 |
-
"eval_runtime": 5.9102,
|
81 |
-
"eval_samples_per_second": 426.215,
|
82 |
-
"eval_steps_per_second": 53.298,
|
83 |
-
"step": 1290
|
84 |
},
|
85 |
{
|
86 |
-
"epoch":
|
87 |
-
"
|
88 |
-
"
|
89 |
-
"
|
90 |
-
"
|
|
|
|
|
|
|
|
|
|
|
91 |
},
|
92 |
{
|
93 |
"epoch": 6.0,
|
94 |
-
"eval_accuracy": 0.
|
95 |
-
"eval_f1": 0.
|
96 |
-
"eval_loss": 0.
|
97 |
-
"eval_precision": 0.
|
98 |
-
"eval_recall": 0.
|
99 |
-
"eval_runtime": 5.
|
100 |
-
"eval_samples_per_second": 426.
|
101 |
-
"eval_steps_per_second": 53.
|
102 |
-
"step":
|
103 |
},
|
104 |
{
|
105 |
-
"epoch":
|
106 |
-
"eval_accuracy": 0.
|
107 |
-
"eval_f1": 0.
|
108 |
-
"eval_loss": 0.
|
109 |
-
"eval_precision": 0.
|
110 |
-
"eval_recall": 0.
|
111 |
-
"eval_runtime": 5.
|
112 |
-
"eval_samples_per_second":
|
113 |
-
"eval_steps_per_second": 53.
|
114 |
-
"step":
|
115 |
},
|
116 |
{
|
117 |
-
"epoch": 7.
|
118 |
-
"grad_norm":
|
119 |
-
"learning_rate": 1.
|
120 |
-
"loss": 0.
|
121 |
-
"step":
|
122 |
},
|
123 |
{
|
124 |
"epoch": 8.0,
|
125 |
-
"eval_accuracy": 0.
|
126 |
-
"eval_f1": 0.
|
127 |
-
"eval_loss": 0.
|
128 |
-
"eval_precision": 0.
|
129 |
-
"eval_recall": 0.
|
130 |
-
"eval_runtime": 5.
|
131 |
-
"eval_samples_per_second":
|
132 |
-
"eval_steps_per_second": 53.
|
133 |
-
"step":
|
134 |
},
|
135 |
{
|
136 |
-
"epoch":
|
137 |
-
"eval_accuracy": 0.
|
138 |
-
"eval_f1": 0.
|
139 |
-
"eval_loss": 0.
|
140 |
-
"eval_precision": 0.
|
141 |
-
"eval_recall": 0.
|
142 |
-
"eval_runtime": 5.
|
143 |
-
"eval_samples_per_second":
|
144 |
-
"eval_steps_per_second": 53.
|
145 |
-
"step":
|
146 |
},
|
147 |
{
|
148 |
-
"epoch": 9.
|
149 |
-
"grad_norm": 0.
|
150 |
-
"learning_rate":
|
151 |
-
"loss": 0.
|
152 |
-
"step":
|
153 |
},
|
154 |
{
|
155 |
-
"epoch":
|
156 |
-
"eval_accuracy": 0.
|
157 |
-
"eval_f1": 0.
|
158 |
-
"eval_loss": 0.
|
159 |
-
"eval_precision": 0.
|
160 |
-
"eval_recall": 0.
|
161 |
-
"eval_runtime":
|
162 |
-
"eval_samples_per_second":
|
163 |
-
"eval_steps_per_second":
|
164 |
-
"step":
|
165 |
},
|
166 |
{
|
167 |
-
"epoch":
|
168 |
-
"step":
|
169 |
-
"total_flos":
|
170 |
-
"train_loss": 0.
|
171 |
-
"train_runtime":
|
172 |
-
"train_samples_per_second":
|
173 |
-
"train_steps_per_second": 1.
|
174 |
}
|
175 |
],
|
176 |
"logging_steps": 500,
|
177 |
-
"max_steps":
|
178 |
"num_input_tokens_seen": 0,
|
179 |
"num_train_epochs": 10,
|
180 |
"save_steps": 500,
|
@@ -190,7 +183,7 @@
|
|
190 |
"attributes": {}
|
191 |
}
|
192 |
},
|
193 |
-
"total_flos":
|
194 |
"train_batch_size": 32,
|
195 |
"trial_name": null,
|
196 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6984379136881121,
|
3 |
+
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1831",
|
4 |
+
"epoch": 9.975429975429975,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2030,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.9975429975429976,
|
13 |
+
"eval_accuracy": 0.9467740383072925,
|
14 |
+
"eval_f1": 0.6143236074270556,
|
15 |
+
"eval_loss": 0.15010379254817963,
|
16 |
+
"eval_precision": 0.5959855892949047,
|
17 |
+
"eval_recall": 0.6338259441707718,
|
18 |
+
"eval_runtime": 5.907,
|
19 |
+
"eval_samples_per_second": 426.445,
|
20 |
+
"eval_steps_per_second": 53.327,
|
21 |
+
"step": 203
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
},
|
23 |
{
|
24 |
"epoch": 2.0,
|
25 |
+
"eval_accuracy": 0.949244441592608,
|
26 |
+
"eval_f1": 0.6728575218890952,
|
27 |
+
"eval_loss": 0.17612887918949127,
|
28 |
+
"eval_precision": 0.6529351184346035,
|
29 |
+
"eval_recall": 0.6940339354132458,
|
30 |
+
"eval_runtime": 5.8933,
|
31 |
+
"eval_samples_per_second": 427.436,
|
32 |
+
"eval_steps_per_second": 53.451,
|
33 |
+
"step": 407
|
34 |
},
|
35 |
{
|
36 |
+
"epoch": 2.457002457002457,
|
37 |
+
"grad_norm": 0.6181371212005615,
|
38 |
+
"learning_rate": 3.768472906403941e-05,
|
39 |
+
"loss": 0.1312,
|
40 |
+
"step": 500
|
|
|
|
|
|
|
|
|
|
|
41 |
},
|
42 |
{
|
43 |
+
"epoch": 2.9975429975429977,
|
44 |
+
"eval_accuracy": 0.9469665372645898,
|
45 |
+
"eval_f1": 0.671967171069505,
|
46 |
+
"eval_loss": 0.1995203047990799,
|
47 |
+
"eval_precision": 0.6322393822393823,
|
48 |
+
"eval_recall": 0.7170224411603722,
|
49 |
+
"eval_runtime": 5.8448,
|
50 |
+
"eval_samples_per_second": 430.983,
|
51 |
+
"eval_steps_per_second": 53.894,
|
52 |
+
"step": 610
|
53 |
},
|
54 |
{
|
55 |
"epoch": 4.0,
|
56 |
+
"eval_accuracy": 0.9482979883858963,
|
57 |
+
"eval_f1": 0.6774025974025973,
|
58 |
+
"eval_loss": 0.21822449564933777,
|
59 |
+
"eval_precision": 0.6445872466633712,
|
60 |
+
"eval_recall": 0.7137383689107827,
|
61 |
+
"eval_runtime": 5.872,
|
62 |
+
"eval_samples_per_second": 428.988,
|
63 |
+
"eval_steps_per_second": 53.645,
|
64 |
+
"step": 814
|
65 |
},
|
66 |
{
|
67 |
+
"epoch": 4.914004914004914,
|
68 |
+
"grad_norm": 0.7616795301437378,
|
69 |
+
"learning_rate": 2.5369458128078822e-05,
|
70 |
+
"loss": 0.0248,
|
71 |
+
"step": 1000
|
|
|
|
|
|
|
|
|
|
|
72 |
},
|
73 |
{
|
74 |
+
"epoch": 4.997542997542998,
|
75 |
+
"eval_accuracy": 0.9448650903140942,
|
76 |
+
"eval_f1": 0.6700533401066802,
|
77 |
+
"eval_loss": 0.24612903594970703,
|
78 |
+
"eval_precision": 0.6251184834123222,
|
79 |
+
"eval_recall": 0.7219485495347564,
|
80 |
+
"eval_runtime": 5.8462,
|
81 |
+
"eval_samples_per_second": 430.877,
|
82 |
+
"eval_steps_per_second": 53.881,
|
83 |
+
"step": 1017
|
84 |
},
|
85 |
{
|
86 |
"epoch": 6.0,
|
87 |
+
"eval_accuracy": 0.9469023709454907,
|
88 |
+
"eval_f1": 0.6827021494370521,
|
89 |
+
"eval_loss": 0.26953065395355225,
|
90 |
+
"eval_precision": 0.6410379625180201,
|
91 |
+
"eval_recall": 0.7301587301587301,
|
92 |
+
"eval_runtime": 5.9067,
|
93 |
+
"eval_samples_per_second": 426.468,
|
94 |
+
"eval_steps_per_second": 53.33,
|
95 |
+
"step": 1221
|
96 |
},
|
97 |
{
|
98 |
+
"epoch": 6.997542997542998,
|
99 |
+
"eval_accuracy": 0.9469986204241394,
|
100 |
+
"eval_f1": 0.6910590054109765,
|
101 |
+
"eval_loss": 0.2829184830188751,
|
102 |
+
"eval_precision": 0.6528724440116845,
|
103 |
+
"eval_recall": 0.7339901477832512,
|
104 |
+
"eval_runtime": 5.8572,
|
105 |
+
"eval_samples_per_second": 430.069,
|
106 |
+
"eval_steps_per_second": 53.78,
|
107 |
+
"step": 1424
|
108 |
},
|
109 |
{
|
110 |
+
"epoch": 7.371007371007371,
|
111 |
+
"grad_norm": 0.2855200171470642,
|
112 |
+
"learning_rate": 1.3054187192118228e-05,
|
113 |
+
"loss": 0.0081,
|
114 |
+
"step": 1500
|
115 |
},
|
116 |
{
|
117 |
"epoch": 8.0,
|
118 |
+
"eval_accuracy": 0.9494048573903558,
|
119 |
+
"eval_f1": 0.6938127974616606,
|
120 |
+
"eval_loss": 0.29823970794677734,
|
121 |
+
"eval_precision": 0.6710997442455243,
|
122 |
+
"eval_recall": 0.7181171319102354,
|
123 |
+
"eval_runtime": 5.8929,
|
124 |
+
"eval_samples_per_second": 427.463,
|
125 |
+
"eval_steps_per_second": 53.454,
|
126 |
+
"step": 1628
|
127 |
},
|
128 |
{
|
129 |
+
"epoch": 8.997542997542997,
|
130 |
+
"eval_accuracy": 0.9500465205813469,
|
131 |
+
"eval_f1": 0.6984379136881121,
|
132 |
+
"eval_loss": 0.30729904770851135,
|
133 |
+
"eval_precision": 0.6764102564102564,
|
134 |
+
"eval_recall": 0.7219485495347564,
|
135 |
+
"eval_runtime": 5.8665,
|
136 |
+
"eval_samples_per_second": 429.386,
|
137 |
+
"eval_steps_per_second": 53.695,
|
138 |
+
"step": 1831
|
139 |
},
|
140 |
{
|
141 |
+
"epoch": 9.828009828009828,
|
142 |
+
"grad_norm": 0.6682894825935364,
|
143 |
+
"learning_rate": 7.389162561576355e-07,
|
144 |
+
"loss": 0.0038,
|
145 |
+
"step": 2000
|
146 |
},
|
147 |
{
|
148 |
+
"epoch": 9.975429975429975,
|
149 |
+
"eval_accuracy": 0.9500465205813469,
|
150 |
+
"eval_f1": 0.6931427058512046,
|
151 |
+
"eval_loss": 0.3079104423522949,
|
152 |
+
"eval_precision": 0.6712820512820513,
|
153 |
+
"eval_recall": 0.7164750957854407,
|
154 |
+
"eval_runtime": 5.9033,
|
155 |
+
"eval_samples_per_second": 426.708,
|
156 |
+
"eval_steps_per_second": 53.36,
|
157 |
+
"step": 2030
|
158 |
},
|
159 |
{
|
160 |
+
"epoch": 9.975429975429975,
|
161 |
+
"step": 2030,
|
162 |
+
"total_flos": 6404835399317064.0,
|
163 |
+
"train_loss": 0.04138289297302368,
|
164 |
+
"train_runtime": 1065.756,
|
165 |
+
"train_samples_per_second": 122.101,
|
166 |
+
"train_steps_per_second": 1.905
|
167 |
}
|
168 |
],
|
169 |
"logging_steps": 500,
|
170 |
+
"max_steps": 2030,
|
171 |
"num_input_tokens_seen": 0,
|
172 |
"num_train_epochs": 10,
|
173 |
"save_steps": 500,
|
|
|
183 |
"attributes": {}
|
184 |
}
|
185 |
},
|
186 |
+
"total_flos": 6404835399317064.0,
|
187 |
"train_batch_size": 32,
|
188 |
"trial_name": null,
|
189 |
"trial_params": null
|