Marcos12886
commited on
Commit
•
a0c9418
1
Parent(s):
08fe348
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +15 -15
- checkpoint-121/model.safetensors +1 -1
- checkpoint-121/optimizer.pt +1 -1
- checkpoint-121/scheduler.pt +1 -1
- checkpoint-121/trainer_state.json +43 -43
- checkpoint-121/training_args.bin +1 -1
- checkpoint-145/model.safetensors +1 -1
- checkpoint-145/optimizer.pt +1 -1
- checkpoint-145/scheduler.pt +1 -1
- checkpoint-145/trainer_state.json +51 -51
- checkpoint-145/training_args.bin +1 -1
- checkpoint-169/model.safetensors +1 -1
- checkpoint-169/optimizer.pt +1 -1
- checkpoint-169/rng_state.pth +1 -1
- checkpoint-169/scheduler.pt +1 -1
- checkpoint-169/trainer_state.json +73 -97
- checkpoint-169/training_args.bin +1 -1
- checkpoint-194/model.safetensors +1 -1
- checkpoint-194/optimizer.pt +1 -1
- checkpoint-194/scheduler.pt +1 -1
- checkpoint-194/trainer_state.json +68 -68
- checkpoint-194/training_args.bin +1 -1
- checkpoint-218/model.safetensors +1 -1
- checkpoint-218/optimizer.pt +1 -1
- checkpoint-218/scheduler.pt +1 -1
- checkpoint-218/trainer_state.json +76 -76
- checkpoint-218/training_args.bin +1 -1
- checkpoint-24/model.safetensors +1 -1
- checkpoint-24/optimizer.pt +1 -1
- checkpoint-24/scheduler.pt +1 -1
- checkpoint-24/trainer_state.json +11 -11
- checkpoint-24/training_args.bin +1 -1
- checkpoint-240/config.json +85 -0
- checkpoint-240/model.safetensors +3 -0
- checkpoint-240/optimizer.pt +3 -0
- checkpoint-240/rng_state.pth +3 -0
- checkpoint-240/scheduler.pt +3 -0
- checkpoint-240/trainer_state.json +162 -0
- checkpoint-240/training_args.bin +3 -0
- checkpoint-48/model.safetensors +1 -1
- checkpoint-48/optimizer.pt +1 -1
- checkpoint-48/scheduler.pt +1 -1
- checkpoint-48/trainer_state.json +19 -19
- checkpoint-48/training_args.bin +1 -1
- checkpoint-72/model.safetensors +1 -1
- checkpoint-72/optimizer.pt +1 -1
- checkpoint-72/scheduler.pt +1 -1
- checkpoint-72/trainer_state.json +27 -27
- checkpoint-72/training_args.bin +1 -1
- checkpoint-97/model.safetensors +1 -1
README.md
CHANGED
@@ -21,11 +21,11 @@ should probably proofread and complete it, then remove this comment. -->
|
|
21 |
|
22 |
This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on an unknown dataset.
|
23 |
It achieves the following results on the evaluation set:
|
24 |
-
- Loss: 0.
|
25 |
-
- Accuracy: 0.
|
26 |
-
- Precision: 0.
|
27 |
-
- Recall: 0.
|
28 |
-
- F1: 0.
|
29 |
|
30 |
## Model description
|
31 |
|
@@ -59,16 +59,16 @@ The following hyperparameters were used during training:
|
|
59 |
|
60 |
| Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 |
|
61 |
|:-------------:|:------:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
|
62 |
-
| No log | 0.
|
63 |
-
| No log | 1.
|
64 |
-
| No log | 2.
|
65 |
-
| No log |
|
66 |
-
| No log | 4.
|
67 |
-
| No log | 5.
|
68 |
-
| No log | 6.
|
69 |
-
| No log | 8.0 |
|
70 |
-
| No log | 8.
|
71 |
-
| No log | 9.
|
72 |
|
73 |
|
74 |
### Framework versions
|
|
|
21 |
|
22 |
This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on an unknown dataset.
|
23 |
It achieves the following results on the evaluation set:
|
24 |
+
- Loss: 0.4341
|
25 |
+
- Accuracy: 0.8656
|
26 |
+
- Precision: 0.8628
|
27 |
+
- Recall: 0.8656
|
28 |
+
- F1: 0.8629
|
29 |
|
30 |
## Model description
|
31 |
|
|
|
59 |
|
60 |
| Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 |
|
61 |
|:-------------:|:------:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
|
62 |
+
| No log | 0.9897 | 24 | 1.1718 | 0.4832 | 0.2335 | 0.4832 | 0.3148 |
|
63 |
+
| No log | 1.9794 | 48 | 0.7551 | 0.7235 | 0.7164 | 0.7235 | 0.7026 |
|
64 |
+
| No log | 2.9691 | 72 | 0.6834 | 0.7313 | 0.7621 | 0.7313 | 0.7074 |
|
65 |
+
| No log | 4.0 | 97 | 0.9938 | 0.6202 | 0.6844 | 0.6202 | 0.6161 |
|
66 |
+
| No log | 4.9897 | 121 | 0.4973 | 0.7881 | 0.7961 | 0.7881 | 0.7854 |
|
67 |
+
| No log | 5.9794 | 145 | 0.5254 | 0.7804 | 0.7843 | 0.7804 | 0.7817 |
|
68 |
+
| No log | 6.9691 | 169 | 0.4264 | 0.8475 | 0.8558 | 0.8475 | 0.8504 |
|
69 |
+
| No log | 8.0 | 194 | 0.4415 | 0.8656 | 0.8670 | 0.8656 | 0.8628 |
|
70 |
+
| No log | 8.9897 | 218 | 0.4115 | 0.8656 | 0.8651 | 0.8656 | 0.8649 |
|
71 |
+
| No log | 9.8969 | 240 | 0.4341 | 0.8656 | 0.8628 | 0.8656 | 0.8629 |
|
72 |
|
73 |
|
74 |
### Framework versions
|
checkpoint-121/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 94765560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70018b912cc31f04407d1e82b6579b497cb0a4c9d1fe58cce991fc2459540c94
|
3 |
size 94765560
|
checkpoint-121/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 189556666
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:997c0052dc18c15b0c00ff6b7f5f979b1f10ee85d5dd478ac31aec3c59104615
|
3 |
size 189556666
|
checkpoint-121/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f9c5f2a04362b085af0242d72c5af05a769e7bae4cee46ee7d346c9df7f1eba
|
3 |
size 1064
|
checkpoint-121/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
|
4 |
"epoch": 4.989690721649485,
|
5 |
"eval_steps": 500,
|
@@ -10,69 +10,69 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_f1": 0.
|
15 |
-
"eval_loss":
|
16 |
-
"eval_precision": 0.
|
17 |
-
"eval_recall": 0.
|
18 |
-
"eval_runtime": 1.
|
19 |
-
"eval_samples_per_second":
|
20 |
-
"eval_steps_per_second": 29.
|
21 |
"step": 24
|
22 |
},
|
23 |
{
|
24 |
"epoch": 1.9793814432989691,
|
25 |
-
"eval_accuracy": 0.
|
26 |
-
"eval_f1": 0.
|
27 |
-
"eval_loss": 0.
|
28 |
-
"eval_precision": 0.
|
29 |
-
"eval_recall": 0.
|
30 |
-
"eval_runtime": 1.
|
31 |
-
"eval_samples_per_second":
|
32 |
-
"eval_steps_per_second":
|
33 |
"step": 48
|
34 |
},
|
35 |
{
|
36 |
"epoch": 2.9690721649484537,
|
37 |
-
"eval_accuracy": 0.
|
38 |
-
"eval_f1": 0.
|
39 |
-
"eval_loss": 0.
|
40 |
-
"eval_precision": 0.
|
41 |
-
"eval_recall": 0.
|
42 |
-
"eval_runtime": 1.
|
43 |
-
"eval_samples_per_second":
|
44 |
-
"eval_steps_per_second":
|
45 |
"step": 72
|
46 |
},
|
47 |
{
|
48 |
"epoch": 4.0,
|
49 |
-
"eval_accuracy": 0.
|
50 |
-
"eval_f1": 0.
|
51 |
-
"eval_loss": 0.
|
52 |
-
"eval_precision": 0.
|
53 |
-
"eval_recall": 0.
|
54 |
-
"eval_runtime": 1.
|
55 |
-
"eval_samples_per_second":
|
56 |
-
"eval_steps_per_second":
|
57 |
"step": 97
|
58 |
},
|
59 |
{
|
60 |
"epoch": 4.989690721649485,
|
61 |
-
"eval_accuracy": 0.
|
62 |
-
"eval_f1": 0.
|
63 |
-
"eval_loss": 0.
|
64 |
-
"eval_precision": 0.
|
65 |
-
"eval_recall": 0.
|
66 |
-
"eval_runtime": 1.
|
67 |
-
"eval_samples_per_second":
|
68 |
-
"eval_steps_per_second":
|
69 |
"step": 121
|
70 |
}
|
71 |
],
|
72 |
"logging_steps": 500,
|
73 |
-
"max_steps":
|
74 |
"num_input_tokens_seen": 0,
|
75 |
-
"num_train_epochs":
|
76 |
"save_steps": 500,
|
77 |
"stateful_callbacks": {
|
78 |
"EarlyStoppingCallback": {
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.7881136950904393,
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
|
4 |
"epoch": 4.989690721649485,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.48320413436692505,
|
14 |
+
"eval_f1": 0.31484032448297905,
|
15 |
+
"eval_loss": 1.1717983484268188,
|
16 |
+
"eval_precision": 0.23348623546928937,
|
17 |
+
"eval_recall": 0.48320413436692505,
|
18 |
+
"eval_runtime": 1.6682,
|
19 |
+
"eval_samples_per_second": 231.984,
|
20 |
+
"eval_steps_per_second": 29.373,
|
21 |
"step": 24
|
22 |
},
|
23 |
{
|
24 |
"epoch": 1.9793814432989691,
|
25 |
+
"eval_accuracy": 0.7235142118863049,
|
26 |
+
"eval_f1": 0.7025965349533153,
|
27 |
+
"eval_loss": 0.7550917863845825,
|
28 |
+
"eval_precision": 0.7163905403042491,
|
29 |
+
"eval_recall": 0.7235142118863049,
|
30 |
+
"eval_runtime": 1.6003,
|
31 |
+
"eval_samples_per_second": 241.836,
|
32 |
+
"eval_steps_per_second": 30.62,
|
33 |
"step": 48
|
34 |
},
|
35 |
{
|
36 |
"epoch": 2.9690721649484537,
|
37 |
+
"eval_accuracy": 0.7312661498708011,
|
38 |
+
"eval_f1": 0.7074435618655927,
|
39 |
+
"eval_loss": 0.6834394335746765,
|
40 |
+
"eval_precision": 0.7620582207432803,
|
41 |
+
"eval_recall": 0.7312661498708011,
|
42 |
+
"eval_runtime": 1.604,
|
43 |
+
"eval_samples_per_second": 241.278,
|
44 |
+
"eval_steps_per_second": 30.549,
|
45 |
"step": 72
|
46 |
},
|
47 |
{
|
48 |
"epoch": 4.0,
|
49 |
+
"eval_accuracy": 0.6201550387596899,
|
50 |
+
"eval_f1": 0.616059564047211,
|
51 |
+
"eval_loss": 0.9937827587127686,
|
52 |
+
"eval_precision": 0.6843785822798868,
|
53 |
+
"eval_recall": 0.6201550387596899,
|
54 |
+
"eval_runtime": 1.5998,
|
55 |
+
"eval_samples_per_second": 241.901,
|
56 |
+
"eval_steps_per_second": 30.628,
|
57 |
"step": 97
|
58 |
},
|
59 |
{
|
60 |
"epoch": 4.989690721649485,
|
61 |
+
"eval_accuracy": 0.7881136950904393,
|
62 |
+
"eval_f1": 0.7854190251131377,
|
63 |
+
"eval_loss": 0.4972754120826721,
|
64 |
+
"eval_precision": 0.7961139879912458,
|
65 |
+
"eval_recall": 0.7881136950904393,
|
66 |
+
"eval_runtime": 1.5913,
|
67 |
+
"eval_samples_per_second": 243.196,
|
68 |
+
"eval_steps_per_second": 30.792,
|
69 |
"step": 121
|
70 |
}
|
71 |
],
|
72 |
"logging_steps": 500,
|
73 |
+
"max_steps": 240,
|
74 |
"num_input_tokens_seen": 0,
|
75 |
+
"num_train_epochs": 10,
|
76 |
"save_steps": 500,
|
77 |
"stateful_callbacks": {
|
78 |
"EarlyStoppingCallback": {
|
checkpoint-121/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
|
3 |
size 5240
|
checkpoint-145/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 94765560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9b4c786da0e0851be69580368fd5d534a86467be3071d9ea96324ca5e0176a7
|
3 |
size 94765560
|
checkpoint-145/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 189556666
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a673f8f7ab1809cc9b549b3982b0a64589cbdccccf65a873c995d3b5ba6af28d
|
3 |
size 189556666
|
checkpoint-145/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ce77a15ff28da0b94b70192478b0d3dfb304ac1b9a3e90e47f0a9e682aca696
|
3 |
size 1064
|
checkpoint-145/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
|
4 |
"epoch": 5.979381443298969,
|
5 |
"eval_steps": 500,
|
@@ -10,81 +10,81 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_f1": 0.
|
15 |
-
"eval_loss":
|
16 |
-
"eval_precision": 0.
|
17 |
-
"eval_recall": 0.
|
18 |
-
"eval_runtime": 1.
|
19 |
-
"eval_samples_per_second":
|
20 |
-
"eval_steps_per_second": 29.
|
21 |
"step": 24
|
22 |
},
|
23 |
{
|
24 |
"epoch": 1.9793814432989691,
|
25 |
-
"eval_accuracy": 0.
|
26 |
-
"eval_f1": 0.
|
27 |
-
"eval_loss": 0.
|
28 |
-
"eval_precision": 0.
|
29 |
-
"eval_recall": 0.
|
30 |
-
"eval_runtime": 1.
|
31 |
-
"eval_samples_per_second":
|
32 |
-
"eval_steps_per_second":
|
33 |
"step": 48
|
34 |
},
|
35 |
{
|
36 |
"epoch": 2.9690721649484537,
|
37 |
-
"eval_accuracy": 0.
|
38 |
-
"eval_f1": 0.
|
39 |
-
"eval_loss": 0.
|
40 |
-
"eval_precision": 0.
|
41 |
-
"eval_recall": 0.
|
42 |
-
"eval_runtime": 1.
|
43 |
-
"eval_samples_per_second":
|
44 |
-
"eval_steps_per_second":
|
45 |
"step": 72
|
46 |
},
|
47 |
{
|
48 |
"epoch": 4.0,
|
49 |
-
"eval_accuracy": 0.
|
50 |
-
"eval_f1": 0.
|
51 |
-
"eval_loss": 0.
|
52 |
-
"eval_precision": 0.
|
53 |
-
"eval_recall": 0.
|
54 |
-
"eval_runtime": 1.
|
55 |
-
"eval_samples_per_second":
|
56 |
-
"eval_steps_per_second":
|
57 |
"step": 97
|
58 |
},
|
59 |
{
|
60 |
"epoch": 4.989690721649485,
|
61 |
-
"eval_accuracy": 0.
|
62 |
-
"eval_f1": 0.
|
63 |
-
"eval_loss": 0.
|
64 |
-
"eval_precision": 0.
|
65 |
-
"eval_recall": 0.
|
66 |
-
"eval_runtime": 1.
|
67 |
-
"eval_samples_per_second":
|
68 |
-
"eval_steps_per_second":
|
69 |
"step": 121
|
70 |
},
|
71 |
{
|
72 |
"epoch": 5.979381443298969,
|
73 |
-
"eval_accuracy": 0.
|
74 |
-
"eval_f1": 0.
|
75 |
-
"eval_loss": 0.
|
76 |
-
"eval_precision": 0.
|
77 |
-
"eval_recall": 0.
|
78 |
-
"eval_runtime": 1.
|
79 |
-
"eval_samples_per_second":
|
80 |
-
"eval_steps_per_second":
|
81 |
"step": 145
|
82 |
}
|
83 |
],
|
84 |
"logging_steps": 500,
|
85 |
-
"max_steps":
|
86 |
"num_input_tokens_seen": 0,
|
87 |
-
"num_train_epochs":
|
88 |
"save_steps": 500,
|
89 |
"stateful_callbacks": {
|
90 |
"EarlyStoppingCallback": {
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.7881136950904393,
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121",
|
4 |
"epoch": 5.979381443298969,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.48320413436692505,
|
14 |
+
"eval_f1": 0.31484032448297905,
|
15 |
+
"eval_loss": 1.1717983484268188,
|
16 |
+
"eval_precision": 0.23348623546928937,
|
17 |
+
"eval_recall": 0.48320413436692505,
|
18 |
+
"eval_runtime": 1.6682,
|
19 |
+
"eval_samples_per_second": 231.984,
|
20 |
+
"eval_steps_per_second": 29.373,
|
21 |
"step": 24
|
22 |
},
|
23 |
{
|
24 |
"epoch": 1.9793814432989691,
|
25 |
+
"eval_accuracy": 0.7235142118863049,
|
26 |
+
"eval_f1": 0.7025965349533153,
|
27 |
+
"eval_loss": 0.7550917863845825,
|
28 |
+
"eval_precision": 0.7163905403042491,
|
29 |
+
"eval_recall": 0.7235142118863049,
|
30 |
+
"eval_runtime": 1.6003,
|
31 |
+
"eval_samples_per_second": 241.836,
|
32 |
+
"eval_steps_per_second": 30.62,
|
33 |
"step": 48
|
34 |
},
|
35 |
{
|
36 |
"epoch": 2.9690721649484537,
|
37 |
+
"eval_accuracy": 0.7312661498708011,
|
38 |
+
"eval_f1": 0.7074435618655927,
|
39 |
+
"eval_loss": 0.6834394335746765,
|
40 |
+
"eval_precision": 0.7620582207432803,
|
41 |
+
"eval_recall": 0.7312661498708011,
|
42 |
+
"eval_runtime": 1.604,
|
43 |
+
"eval_samples_per_second": 241.278,
|
44 |
+
"eval_steps_per_second": 30.549,
|
45 |
"step": 72
|
46 |
},
|
47 |
{
|
48 |
"epoch": 4.0,
|
49 |
+
"eval_accuracy": 0.6201550387596899,
|
50 |
+
"eval_f1": 0.616059564047211,
|
51 |
+
"eval_loss": 0.9937827587127686,
|
52 |
+
"eval_precision": 0.6843785822798868,
|
53 |
+
"eval_recall": 0.6201550387596899,
|
54 |
+
"eval_runtime": 1.5998,
|
55 |
+
"eval_samples_per_second": 241.901,
|
56 |
+
"eval_steps_per_second": 30.628,
|
57 |
"step": 97
|
58 |
},
|
59 |
{
|
60 |
"epoch": 4.989690721649485,
|
61 |
+
"eval_accuracy": 0.7881136950904393,
|
62 |
+
"eval_f1": 0.7854190251131377,
|
63 |
+
"eval_loss": 0.4972754120826721,
|
64 |
+
"eval_precision": 0.7961139879912458,
|
65 |
+
"eval_recall": 0.7881136950904393,
|
66 |
+
"eval_runtime": 1.5913,
|
67 |
+
"eval_samples_per_second": 243.196,
|
68 |
+
"eval_steps_per_second": 30.792,
|
69 |
"step": 121
|
70 |
},
|
71 |
{
|
72 |
"epoch": 5.979381443298969,
|
73 |
+
"eval_accuracy": 0.7803617571059431,
|
74 |
+
"eval_f1": 0.781727235222138,
|
75 |
+
"eval_loss": 0.5254013538360596,
|
76 |
+
"eval_precision": 0.7843017570642586,
|
77 |
+
"eval_recall": 0.7803617571059431,
|
78 |
+
"eval_runtime": 1.6096,
|
79 |
+
"eval_samples_per_second": 240.428,
|
80 |
+
"eval_steps_per_second": 30.442,
|
81 |
"step": 145
|
82 |
}
|
83 |
],
|
84 |
"logging_steps": 500,
|
85 |
+
"max_steps": 240,
|
86 |
"num_input_tokens_seen": 0,
|
87 |
+
"num_train_epochs": 10,
|
88 |
"save_steps": 500,
|
89 |
"stateful_callbacks": {
|
90 |
"EarlyStoppingCallback": {
|
checkpoint-145/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
|
3 |
size 5240
|
checkpoint-169/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 94765560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ddbb394005b9ac70d5bc17bcb01362284d0d2278a1e44b6ce7c3513e050d418
|
3 |
size 94765560
|
checkpoint-169/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 189556666
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e66e987ee75130be9aa72386f5d1880fcb1b2c247117c7798a2a85074ab7eba
|
3 |
size 189556666
|
checkpoint-169/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14308
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3d85d4b435d4ac4552bb46f32d5f63a55dbc65baaa5af6a14b6b39e968f4b8e
|
3 |
size 14308
|
checkpoint-169/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df821f017f2db25687bec6d111c0161751da33a1a44b650c4ff88a56a7c16d8f
|
3 |
size 1064
|
checkpoint-169/trainer_state.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-169",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 169,
|
7 |
"is_hyper_param_search": false,
|
@@ -9,116 +9,92 @@
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
-
"epoch": 0.
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_f1": 0.
|
15 |
-
"eval_loss": 1.
|
16 |
-
"eval_precision": 0.
|
17 |
-
"eval_recall": 0.
|
18 |
-
"eval_runtime": 1.
|
19 |
-
"eval_samples_per_second":
|
20 |
-
"eval_steps_per_second":
|
21 |
-
"step":
|
22 |
},
|
23 |
{
|
24 |
-
"epoch": 1.
|
25 |
-
"eval_accuracy": 0.
|
26 |
-
"eval_f1": 0.
|
27 |
-
"eval_loss": 0.
|
28 |
-
"eval_precision": 0.
|
29 |
-
"eval_recall": 0.
|
30 |
-
"eval_runtime": 1.
|
31 |
-
"eval_samples_per_second":
|
32 |
-
"eval_steps_per_second":
|
33 |
-
"step":
|
34 |
},
|
35 |
{
|
36 |
-
"epoch": 2.
|
37 |
-
"eval_accuracy": 0.
|
38 |
-
"eval_f1": 0.
|
39 |
-
"eval_loss": 0.
|
40 |
-
"eval_precision": 0.
|
41 |
-
"eval_recall": 0.
|
42 |
-
"eval_runtime": 1.
|
43 |
-
"eval_samples_per_second":
|
44 |
-
"eval_steps_per_second": 30.
|
45 |
-
"step":
|
46 |
},
|
47 |
{
|
48 |
-
"epoch":
|
49 |
-
"eval_accuracy": 0.
|
50 |
-
"eval_f1": 0.
|
51 |
-
"eval_loss": 0.
|
52 |
-
"eval_precision": 0.
|
53 |
-
"eval_recall": 0.
|
54 |
-
"eval_runtime": 1.
|
55 |
-
"eval_samples_per_second":
|
56 |
-
"eval_steps_per_second": 30.
|
57 |
-
"step":
|
58 |
},
|
59 |
{
|
60 |
-
"epoch": 4.
|
61 |
-
"eval_accuracy": 0.
|
62 |
-
"eval_f1": 0.
|
63 |
-
"eval_loss": 0.
|
64 |
-
"eval_precision": 0.
|
65 |
-
"eval_recall": 0.
|
66 |
-
"eval_runtime": 1.
|
67 |
-
"eval_samples_per_second":
|
68 |
-
"eval_steps_per_second":
|
69 |
-
"step":
|
70 |
},
|
71 |
{
|
72 |
-
"epoch": 5.
|
73 |
-
"eval_accuracy": 0.
|
74 |
-
"eval_f1": 0.
|
75 |
-
"eval_loss": 0.
|
76 |
-
"eval_precision": 0.
|
77 |
-
"eval_recall": 0.
|
78 |
-
"eval_runtime": 1.
|
79 |
-
"eval_samples_per_second":
|
80 |
-
"eval_steps_per_second":
|
81 |
-
"step":
|
82 |
},
|
83 |
{
|
84 |
-
"epoch": 6.
|
85 |
-
"eval_accuracy": 0.
|
86 |
-
"eval_f1": 0.
|
87 |
-
"eval_loss": 0.
|
88 |
-
"eval_precision": 0.
|
89 |
-
"eval_recall": 0.
|
90 |
-
"eval_runtime": 1.
|
91 |
-
"eval_samples_per_second":
|
92 |
-
"eval_steps_per_second": 29.
|
93 |
-
"step": 132
|
94 |
-
},
|
95 |
-
{
|
96 |
-
"epoch": 8.0,
|
97 |
-
"eval_accuracy": 0.8112582781456954,
|
98 |
-
"eval_f1": 0.8021247299665692,
|
99 |
-
"eval_loss": 0.4742371141910553,
|
100 |
-
"eval_precision": 0.8054865043662888,
|
101 |
-
"eval_recall": 0.8112582781456954,
|
102 |
-
"eval_runtime": 1.381,
|
103 |
-
"eval_samples_per_second": 218.682,
|
104 |
-
"eval_steps_per_second": 27.516,
|
105 |
-
"step": 151
|
106 |
-
},
|
107 |
-
{
|
108 |
-
"epoch": 8.95364238410596,
|
109 |
-
"eval_accuracy": 0.8145695364238411,
|
110 |
-
"eval_f1": 0.805819805920304,
|
111 |
-
"eval_loss": 0.4742475152015686,
|
112 |
-
"eval_precision": 0.8065208989148904,
|
113 |
-
"eval_recall": 0.8145695364238411,
|
114 |
-
"eval_runtime": 1.2663,
|
115 |
-
"eval_samples_per_second": 238.482,
|
116 |
-
"eval_steps_per_second": 30.008,
|
117 |
"step": 169
|
118 |
}
|
119 |
],
|
120 |
"logging_steps": 500,
|
121 |
-
"max_steps":
|
122 |
"num_input_tokens_seen": 0,
|
123 |
"num_train_epochs": 10,
|
124 |
"save_steps": 500,
|
@@ -143,7 +119,7 @@
|
|
143 |
"attributes": {}
|
144 |
}
|
145 |
},
|
146 |
-
"total_flos": 2.
|
147 |
"train_batch_size": 8,
|
148 |
"trial_name": null,
|
149 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.8475452196382429,
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-169",
|
4 |
+
"epoch": 6.969072164948454,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 169,
|
7 |
"is_hyper_param_search": false,
|
|
|
9 |
"is_world_process_zero": true,
|
10 |
"log_history": [
|
11 |
{
|
12 |
+
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.48320413436692505,
|
14 |
+
"eval_f1": 0.31484032448297905,
|
15 |
+
"eval_loss": 1.1717983484268188,
|
16 |
+
"eval_precision": 0.23348623546928937,
|
17 |
+
"eval_recall": 0.48320413436692505,
|
18 |
+
"eval_runtime": 1.6682,
|
19 |
+
"eval_samples_per_second": 231.984,
|
20 |
+
"eval_steps_per_second": 29.373,
|
21 |
+
"step": 24
|
22 |
},
|
23 |
{
|
24 |
+
"epoch": 1.9793814432989691,
|
25 |
+
"eval_accuracy": 0.7235142118863049,
|
26 |
+
"eval_f1": 0.7025965349533153,
|
27 |
+
"eval_loss": 0.7550917863845825,
|
28 |
+
"eval_precision": 0.7163905403042491,
|
29 |
+
"eval_recall": 0.7235142118863049,
|
30 |
+
"eval_runtime": 1.6003,
|
31 |
+
"eval_samples_per_second": 241.836,
|
32 |
+
"eval_steps_per_second": 30.62,
|
33 |
+
"step": 48
|
34 |
},
|
35 |
{
|
36 |
+
"epoch": 2.9690721649484537,
|
37 |
+
"eval_accuracy": 0.7312661498708011,
|
38 |
+
"eval_f1": 0.7074435618655927,
|
39 |
+
"eval_loss": 0.6834394335746765,
|
40 |
+
"eval_precision": 0.7620582207432803,
|
41 |
+
"eval_recall": 0.7312661498708011,
|
42 |
+
"eval_runtime": 1.604,
|
43 |
+
"eval_samples_per_second": 241.278,
|
44 |
+
"eval_steps_per_second": 30.549,
|
45 |
+
"step": 72
|
46 |
},
|
47 |
{
|
48 |
+
"epoch": 4.0,
|
49 |
+
"eval_accuracy": 0.6201550387596899,
|
50 |
+
"eval_f1": 0.616059564047211,
|
51 |
+
"eval_loss": 0.9937827587127686,
|
52 |
+
"eval_precision": 0.6843785822798868,
|
53 |
+
"eval_recall": 0.6201550387596899,
|
54 |
+
"eval_runtime": 1.5998,
|
55 |
+
"eval_samples_per_second": 241.901,
|
56 |
+
"eval_steps_per_second": 30.628,
|
57 |
+
"step": 97
|
58 |
},
|
59 |
{
|
60 |
+
"epoch": 4.989690721649485,
|
61 |
+
"eval_accuracy": 0.7881136950904393,
|
62 |
+
"eval_f1": 0.7854190251131377,
|
63 |
+
"eval_loss": 0.4972754120826721,
|
64 |
+
"eval_precision": 0.7961139879912458,
|
65 |
+
"eval_recall": 0.7881136950904393,
|
66 |
+
"eval_runtime": 1.5913,
|
67 |
+
"eval_samples_per_second": 243.196,
|
68 |
+
"eval_steps_per_second": 30.792,
|
69 |
+
"step": 121
|
70 |
},
|
71 |
{
|
72 |
+
"epoch": 5.979381443298969,
|
73 |
+
"eval_accuracy": 0.7803617571059431,
|
74 |
+
"eval_f1": 0.781727235222138,
|
75 |
+
"eval_loss": 0.5254013538360596,
|
76 |
+
"eval_precision": 0.7843017570642586,
|
77 |
+
"eval_recall": 0.7803617571059431,
|
78 |
+
"eval_runtime": 1.6096,
|
79 |
+
"eval_samples_per_second": 240.428,
|
80 |
+
"eval_steps_per_second": 30.442,
|
81 |
+
"step": 145
|
82 |
},
|
83 |
{
|
84 |
+
"epoch": 6.969072164948454,
|
85 |
+
"eval_accuracy": 0.8475452196382429,
|
86 |
+
"eval_f1": 0.8504309219603966,
|
87 |
+
"eval_loss": 0.42636802792549133,
|
88 |
+
"eval_precision": 0.8557626671638175,
|
89 |
+
"eval_recall": 0.8475452196382429,
|
90 |
+
"eval_runtime": 1.6741,
|
91 |
+
"eval_samples_per_second": 231.171,
|
92 |
+
"eval_steps_per_second": 29.27,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
"step": 169
|
94 |
}
|
95 |
],
|
96 |
"logging_steps": 500,
|
97 |
+
"max_steps": 240,
|
98 |
"num_input_tokens_seen": 0,
|
99 |
"num_train_epochs": 10,
|
100 |
"save_steps": 500,
|
|
|
119 |
"attributes": {}
|
120 |
}
|
121 |
},
|
122 |
+
"total_flos": 2.45957602464e+16,
|
123 |
"train_batch_size": 8,
|
124 |
"trial_name": null,
|
125 |
"trial_params": null
|
checkpoint-169/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
|
3 |
size 5240
|
checkpoint-194/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 94765560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3a627702cd14b16dbfa2578e7673bd3814fac0eb9375d373c2f06a0a1d5a738
|
3 |
size 94765560
|
checkpoint-194/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 189556666
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96506f68c35bc380113ed68f683cfdb89562b8e18cd99f04207e7f2cf6c07543
|
3 |
size 189556666
|
checkpoint-194/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9baadb1a4e65e06e7e3878b0e06173eb575209134ab9848c9fd367b8c2a762e0
|
3 |
size 1064
|
checkpoint-194/trainer_state.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-
|
4 |
"epoch": 8.0,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 194,
|
@@ -10,105 +10,105 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_f1": 0.
|
15 |
-
"eval_loss":
|
16 |
-
"eval_precision": 0.
|
17 |
-
"eval_recall": 0.
|
18 |
-
"eval_runtime": 1.
|
19 |
-
"eval_samples_per_second":
|
20 |
-
"eval_steps_per_second": 29.
|
21 |
"step": 24
|
22 |
},
|
23 |
{
|
24 |
"epoch": 1.9793814432989691,
|
25 |
-
"eval_accuracy": 0.
|
26 |
-
"eval_f1": 0.
|
27 |
-
"eval_loss": 0.
|
28 |
-
"eval_precision": 0.
|
29 |
-
"eval_recall": 0.
|
30 |
-
"eval_runtime": 1.
|
31 |
-
"eval_samples_per_second":
|
32 |
-
"eval_steps_per_second":
|
33 |
"step": 48
|
34 |
},
|
35 |
{
|
36 |
"epoch": 2.9690721649484537,
|
37 |
-
"eval_accuracy": 0.
|
38 |
-
"eval_f1": 0.
|
39 |
-
"eval_loss": 0.
|
40 |
-
"eval_precision": 0.
|
41 |
-
"eval_recall": 0.
|
42 |
-
"eval_runtime": 1.
|
43 |
-
"eval_samples_per_second":
|
44 |
-
"eval_steps_per_second":
|
45 |
"step": 72
|
46 |
},
|
47 |
{
|
48 |
"epoch": 4.0,
|
49 |
-
"eval_accuracy": 0.
|
50 |
-
"eval_f1": 0.
|
51 |
-
"eval_loss": 0.
|
52 |
-
"eval_precision": 0.
|
53 |
-
"eval_recall": 0.
|
54 |
-
"eval_runtime": 1.
|
55 |
-
"eval_samples_per_second":
|
56 |
-
"eval_steps_per_second":
|
57 |
"step": 97
|
58 |
},
|
59 |
{
|
60 |
"epoch": 4.989690721649485,
|
61 |
-
"eval_accuracy": 0.
|
62 |
-
"eval_f1": 0.
|
63 |
-
"eval_loss": 0.
|
64 |
-
"eval_precision": 0.
|
65 |
-
"eval_recall": 0.
|
66 |
-
"eval_runtime": 1.
|
67 |
-
"eval_samples_per_second":
|
68 |
-
"eval_steps_per_second":
|
69 |
"step": 121
|
70 |
},
|
71 |
{
|
72 |
"epoch": 5.979381443298969,
|
73 |
-
"eval_accuracy": 0.
|
74 |
-
"eval_f1": 0.
|
75 |
-
"eval_loss": 0.
|
76 |
-
"eval_precision": 0.
|
77 |
-
"eval_recall": 0.
|
78 |
-
"eval_runtime": 1.
|
79 |
-
"eval_samples_per_second":
|
80 |
-
"eval_steps_per_second":
|
81 |
"step": 145
|
82 |
},
|
83 |
{
|
84 |
"epoch": 6.969072164948454,
|
85 |
-
"eval_accuracy": 0.
|
86 |
-
"eval_f1": 0.
|
87 |
-
"eval_loss": 0.
|
88 |
-
"eval_precision": 0.
|
89 |
-
"eval_recall": 0.
|
90 |
-
"eval_runtime": 1.
|
91 |
-
"eval_samples_per_second":
|
92 |
-
"eval_steps_per_second": 29.
|
93 |
"step": 169
|
94 |
},
|
95 |
{
|
96 |
"epoch": 8.0,
|
97 |
-
"eval_accuracy": 0.
|
98 |
-
"eval_f1": 0.
|
99 |
-
"eval_loss": 0.
|
100 |
-
"eval_precision": 0.
|
101 |
-
"eval_recall": 0.
|
102 |
-
"eval_runtime": 1.
|
103 |
-
"eval_samples_per_second":
|
104 |
-
"eval_steps_per_second": 29.
|
105 |
"step": 194
|
106 |
}
|
107 |
],
|
108 |
"logging_steps": 500,
|
109 |
-
"max_steps":
|
110 |
"num_input_tokens_seen": 0,
|
111 |
-
"num_train_epochs":
|
112 |
"save_steps": 500,
|
113 |
"stateful_callbacks": {
|
114 |
"EarlyStoppingCallback": {
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.8656330749354005,
|
3 |
+
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-194",
|
4 |
"epoch": 8.0,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 194,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.48320413436692505,
|
14 |
+
"eval_f1": 0.31484032448297905,
|
15 |
+
"eval_loss": 1.1717983484268188,
|
16 |
+
"eval_precision": 0.23348623546928937,
|
17 |
+
"eval_recall": 0.48320413436692505,
|
18 |
+
"eval_runtime": 1.6682,
|
19 |
+
"eval_samples_per_second": 231.984,
|
20 |
+
"eval_steps_per_second": 29.373,
|
21 |
"step": 24
|
22 |
},
|
23 |
{
|
24 |
"epoch": 1.9793814432989691,
|
25 |
+
"eval_accuracy": 0.7235142118863049,
|
26 |
+
"eval_f1": 0.7025965349533153,
|
27 |
+
"eval_loss": 0.7550917863845825,
|
28 |
+
"eval_precision": 0.7163905403042491,
|
29 |
+
"eval_recall": 0.7235142118863049,
|
30 |
+
"eval_runtime": 1.6003,
|
31 |
+
"eval_samples_per_second": 241.836,
|
32 |
+
"eval_steps_per_second": 30.62,
|
33 |
"step": 48
|
34 |
},
|
35 |
{
|
36 |
"epoch": 2.9690721649484537,
|
37 |
+
"eval_accuracy": 0.7312661498708011,
|
38 |
+
"eval_f1": 0.7074435618655927,
|
39 |
+
"eval_loss": 0.6834394335746765,
|
40 |
+
"eval_precision": 0.7620582207432803,
|
41 |
+
"eval_recall": 0.7312661498708011,
|
42 |
+
"eval_runtime": 1.604,
|
43 |
+
"eval_samples_per_second": 241.278,
|
44 |
+
"eval_steps_per_second": 30.549,
|
45 |
"step": 72
|
46 |
},
|
47 |
{
|
48 |
"epoch": 4.0,
|
49 |
+
"eval_accuracy": 0.6201550387596899,
|
50 |
+
"eval_f1": 0.616059564047211,
|
51 |
+
"eval_loss": 0.9937827587127686,
|
52 |
+
"eval_precision": 0.6843785822798868,
|
53 |
+
"eval_recall": 0.6201550387596899,
|
54 |
+
"eval_runtime": 1.5998,
|
55 |
+
"eval_samples_per_second": 241.901,
|
56 |
+
"eval_steps_per_second": 30.628,
|
57 |
"step": 97
|
58 |
},
|
59 |
{
|
60 |
"epoch": 4.989690721649485,
|
61 |
+
"eval_accuracy": 0.7881136950904393,
|
62 |
+
"eval_f1": 0.7854190251131377,
|
63 |
+
"eval_loss": 0.4972754120826721,
|
64 |
+
"eval_precision": 0.7961139879912458,
|
65 |
+
"eval_recall": 0.7881136950904393,
|
66 |
+
"eval_runtime": 1.5913,
|
67 |
+
"eval_samples_per_second": 243.196,
|
68 |
+
"eval_steps_per_second": 30.792,
|
69 |
"step": 121
|
70 |
},
|
71 |
{
|
72 |
"epoch": 5.979381443298969,
|
73 |
+
"eval_accuracy": 0.7803617571059431,
|
74 |
+
"eval_f1": 0.781727235222138,
|
75 |
+
"eval_loss": 0.5254013538360596,
|
76 |
+
"eval_precision": 0.7843017570642586,
|
77 |
+
"eval_recall": 0.7803617571059431,
|
78 |
+
"eval_runtime": 1.6096,
|
79 |
+
"eval_samples_per_second": 240.428,
|
80 |
+
"eval_steps_per_second": 30.442,
|
81 |
"step": 145
|
82 |
},
|
83 |
{
|
84 |
"epoch": 6.969072164948454,
|
85 |
+
"eval_accuracy": 0.8475452196382429,
|
86 |
+
"eval_f1": 0.8504309219603966,
|
87 |
+
"eval_loss": 0.42636802792549133,
|
88 |
+
"eval_precision": 0.8557626671638175,
|
89 |
+
"eval_recall": 0.8475452196382429,
|
90 |
+
"eval_runtime": 1.6741,
|
91 |
+
"eval_samples_per_second": 231.171,
|
92 |
+
"eval_steps_per_second": 29.27,
|
93 |
"step": 169
|
94 |
},
|
95 |
{
|
96 |
"epoch": 8.0,
|
97 |
+
"eval_accuracy": 0.8656330749354005,
|
98 |
+
"eval_f1": 0.8627731371728347,
|
99 |
+
"eval_loss": 0.4414582848548889,
|
100 |
+
"eval_precision": 0.8670254176803228,
|
101 |
+
"eval_recall": 0.8656330749354005,
|
102 |
+
"eval_runtime": 1.6573,
|
103 |
+
"eval_samples_per_second": 233.512,
|
104 |
+
"eval_steps_per_second": 29.566,
|
105 |
"step": 194
|
106 |
}
|
107 |
],
|
108 |
"logging_steps": 500,
|
109 |
+
"max_steps": 240,
|
110 |
"num_input_tokens_seen": 0,
|
111 |
+
"num_train_epochs": 10,
|
112 |
"save_steps": 500,
|
113 |
"stateful_callbacks": {
|
114 |
"EarlyStoppingCallback": {
|
checkpoint-194/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
|
3 |
size 5240
|
checkpoint-218/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 94765560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28660ecbedd8800f912a5673efca37a66998ebce7acd99c4bc922036560b2975
|
3 |
size 94765560
|
checkpoint-218/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 189556666
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22cdbaee61cabc6fda6bb731be8bc654cb40396c4c4e2be92d86d4a184ef0728
|
3 |
size 189556666
|
checkpoint-218/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05468f3e42af9ca556e835b8f8bb03232840d74963a2a7b94013d776ea21eb80
|
3 |
size 1064
|
checkpoint-218/trainer_state.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-
|
4 |
"epoch": 8.989690721649485,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 218,
|
@@ -10,117 +10,117 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_f1": 0.
|
15 |
-
"eval_loss":
|
16 |
-
"eval_precision": 0.
|
17 |
-
"eval_recall": 0.
|
18 |
-
"eval_runtime": 1.
|
19 |
-
"eval_samples_per_second":
|
20 |
-
"eval_steps_per_second": 29.
|
21 |
"step": 24
|
22 |
},
|
23 |
{
|
24 |
"epoch": 1.9793814432989691,
|
25 |
-
"eval_accuracy": 0.
|
26 |
-
"eval_f1": 0.
|
27 |
-
"eval_loss": 0.
|
28 |
-
"eval_precision": 0.
|
29 |
-
"eval_recall": 0.
|
30 |
-
"eval_runtime": 1.
|
31 |
-
"eval_samples_per_second":
|
32 |
-
"eval_steps_per_second":
|
33 |
"step": 48
|
34 |
},
|
35 |
{
|
36 |
"epoch": 2.9690721649484537,
|
37 |
-
"eval_accuracy": 0.
|
38 |
-
"eval_f1": 0.
|
39 |
-
"eval_loss": 0.
|
40 |
-
"eval_precision": 0.
|
41 |
-
"eval_recall": 0.
|
42 |
-
"eval_runtime": 1.
|
43 |
-
"eval_samples_per_second":
|
44 |
-
"eval_steps_per_second":
|
45 |
"step": 72
|
46 |
},
|
47 |
{
|
48 |
"epoch": 4.0,
|
49 |
-
"eval_accuracy": 0.
|
50 |
-
"eval_f1": 0.
|
51 |
-
"eval_loss": 0.
|
52 |
-
"eval_precision": 0.
|
53 |
-
"eval_recall": 0.
|
54 |
-
"eval_runtime": 1.
|
55 |
-
"eval_samples_per_second":
|
56 |
-
"eval_steps_per_second":
|
57 |
"step": 97
|
58 |
},
|
59 |
{
|
60 |
"epoch": 4.989690721649485,
|
61 |
-
"eval_accuracy": 0.
|
62 |
-
"eval_f1": 0.
|
63 |
-
"eval_loss": 0.
|
64 |
-
"eval_precision": 0.
|
65 |
-
"eval_recall": 0.
|
66 |
-
"eval_runtime": 1.
|
67 |
-
"eval_samples_per_second":
|
68 |
-
"eval_steps_per_second":
|
69 |
"step": 121
|
70 |
},
|
71 |
{
|
72 |
"epoch": 5.979381443298969,
|
73 |
-
"eval_accuracy": 0.
|
74 |
-
"eval_f1": 0.
|
75 |
-
"eval_loss": 0.
|
76 |
-
"eval_precision": 0.
|
77 |
-
"eval_recall": 0.
|
78 |
-
"eval_runtime": 1.
|
79 |
-
"eval_samples_per_second":
|
80 |
-
"eval_steps_per_second":
|
81 |
"step": 145
|
82 |
},
|
83 |
{
|
84 |
"epoch": 6.969072164948454,
|
85 |
-
"eval_accuracy": 0.
|
86 |
-
"eval_f1": 0.
|
87 |
-
"eval_loss": 0.
|
88 |
-
"eval_precision": 0.
|
89 |
-
"eval_recall": 0.
|
90 |
-
"eval_runtime": 1.
|
91 |
-
"eval_samples_per_second":
|
92 |
-
"eval_steps_per_second": 29.
|
93 |
"step": 169
|
94 |
},
|
95 |
{
|
96 |
"epoch": 8.0,
|
97 |
-
"eval_accuracy": 0.
|
98 |
-
"eval_f1": 0.
|
99 |
-
"eval_loss": 0.
|
100 |
-
"eval_precision": 0.
|
101 |
-
"eval_recall": 0.
|
102 |
-
"eval_runtime": 1.
|
103 |
-
"eval_samples_per_second":
|
104 |
-
"eval_steps_per_second": 29.
|
105 |
"step": 194
|
106 |
},
|
107 |
{
|
108 |
"epoch": 8.989690721649485,
|
109 |
-
"eval_accuracy": 0.
|
110 |
-
"eval_f1": 0.
|
111 |
-
"eval_loss": 0.
|
112 |
-
"eval_precision": 0.
|
113 |
-
"eval_recall": 0.
|
114 |
-
"eval_runtime": 1.
|
115 |
-
"eval_samples_per_second":
|
116 |
-
"eval_steps_per_second": 29.
|
117 |
"step": 218
|
118 |
}
|
119 |
],
|
120 |
"logging_steps": 500,
|
121 |
-
"max_steps":
|
122 |
"num_input_tokens_seen": 0,
|
123 |
-
"num_train_epochs":
|
124 |
"save_steps": 500,
|
125 |
"stateful_callbacks": {
|
126 |
"EarlyStoppingCallback": {
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.8656330749354005,
|
3 |
+
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-194",
|
4 |
"epoch": 8.989690721649485,
|
5 |
"eval_steps": 500,
|
6 |
"global_step": 218,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.48320413436692505,
|
14 |
+
"eval_f1": 0.31484032448297905,
|
15 |
+
"eval_loss": 1.1717983484268188,
|
16 |
+
"eval_precision": 0.23348623546928937,
|
17 |
+
"eval_recall": 0.48320413436692505,
|
18 |
+
"eval_runtime": 1.6682,
|
19 |
+
"eval_samples_per_second": 231.984,
|
20 |
+
"eval_steps_per_second": 29.373,
|
21 |
"step": 24
|
22 |
},
|
23 |
{
|
24 |
"epoch": 1.9793814432989691,
|
25 |
+
"eval_accuracy": 0.7235142118863049,
|
26 |
+
"eval_f1": 0.7025965349533153,
|
27 |
+
"eval_loss": 0.7550917863845825,
|
28 |
+
"eval_precision": 0.7163905403042491,
|
29 |
+
"eval_recall": 0.7235142118863049,
|
30 |
+
"eval_runtime": 1.6003,
|
31 |
+
"eval_samples_per_second": 241.836,
|
32 |
+
"eval_steps_per_second": 30.62,
|
33 |
"step": 48
|
34 |
},
|
35 |
{
|
36 |
"epoch": 2.9690721649484537,
|
37 |
+
"eval_accuracy": 0.7312661498708011,
|
38 |
+
"eval_f1": 0.7074435618655927,
|
39 |
+
"eval_loss": 0.6834394335746765,
|
40 |
+
"eval_precision": 0.7620582207432803,
|
41 |
+
"eval_recall": 0.7312661498708011,
|
42 |
+
"eval_runtime": 1.604,
|
43 |
+
"eval_samples_per_second": 241.278,
|
44 |
+
"eval_steps_per_second": 30.549,
|
45 |
"step": 72
|
46 |
},
|
47 |
{
|
48 |
"epoch": 4.0,
|
49 |
+
"eval_accuracy": 0.6201550387596899,
|
50 |
+
"eval_f1": 0.616059564047211,
|
51 |
+
"eval_loss": 0.9937827587127686,
|
52 |
+
"eval_precision": 0.6843785822798868,
|
53 |
+
"eval_recall": 0.6201550387596899,
|
54 |
+
"eval_runtime": 1.5998,
|
55 |
+
"eval_samples_per_second": 241.901,
|
56 |
+
"eval_steps_per_second": 30.628,
|
57 |
"step": 97
|
58 |
},
|
59 |
{
|
60 |
"epoch": 4.989690721649485,
|
61 |
+
"eval_accuracy": 0.7881136950904393,
|
62 |
+
"eval_f1": 0.7854190251131377,
|
63 |
+
"eval_loss": 0.4972754120826721,
|
64 |
+
"eval_precision": 0.7961139879912458,
|
65 |
+
"eval_recall": 0.7881136950904393,
|
66 |
+
"eval_runtime": 1.5913,
|
67 |
+
"eval_samples_per_second": 243.196,
|
68 |
+
"eval_steps_per_second": 30.792,
|
69 |
"step": 121
|
70 |
},
|
71 |
{
|
72 |
"epoch": 5.979381443298969,
|
73 |
+
"eval_accuracy": 0.7803617571059431,
|
74 |
+
"eval_f1": 0.781727235222138,
|
75 |
+
"eval_loss": 0.5254013538360596,
|
76 |
+
"eval_precision": 0.7843017570642586,
|
77 |
+
"eval_recall": 0.7803617571059431,
|
78 |
+
"eval_runtime": 1.6096,
|
79 |
+
"eval_samples_per_second": 240.428,
|
80 |
+
"eval_steps_per_second": 30.442,
|
81 |
"step": 145
|
82 |
},
|
83 |
{
|
84 |
"epoch": 6.969072164948454,
|
85 |
+
"eval_accuracy": 0.8475452196382429,
|
86 |
+
"eval_f1": 0.8504309219603966,
|
87 |
+
"eval_loss": 0.42636802792549133,
|
88 |
+
"eval_precision": 0.8557626671638175,
|
89 |
+
"eval_recall": 0.8475452196382429,
|
90 |
+
"eval_runtime": 1.6741,
|
91 |
+
"eval_samples_per_second": 231.171,
|
92 |
+
"eval_steps_per_second": 29.27,
|
93 |
"step": 169
|
94 |
},
|
95 |
{
|
96 |
"epoch": 8.0,
|
97 |
+
"eval_accuracy": 0.8656330749354005,
|
98 |
+
"eval_f1": 0.8627731371728347,
|
99 |
+
"eval_loss": 0.4414582848548889,
|
100 |
+
"eval_precision": 0.8670254176803228,
|
101 |
+
"eval_recall": 0.8656330749354005,
|
102 |
+
"eval_runtime": 1.6573,
|
103 |
+
"eval_samples_per_second": 233.512,
|
104 |
+
"eval_steps_per_second": 29.566,
|
105 |
"step": 194
|
106 |
},
|
107 |
{
|
108 |
"epoch": 8.989690721649485,
|
109 |
+
"eval_accuracy": 0.8656330749354005,
|
110 |
+
"eval_f1": 0.8648808138390941,
|
111 |
+
"eval_loss": 0.4115408658981323,
|
112 |
+
"eval_precision": 0.8650901526054525,
|
113 |
+
"eval_recall": 0.8656330749354005,
|
114 |
+
"eval_runtime": 1.6461,
|
115 |
+
"eval_samples_per_second": 235.106,
|
116 |
+
"eval_steps_per_second": 29.768,
|
117 |
"step": 218
|
118 |
}
|
119 |
],
|
120 |
"logging_steps": 500,
|
121 |
+
"max_steps": 240,
|
122 |
"num_input_tokens_seen": 0,
|
123 |
+
"num_train_epochs": 10,
|
124 |
"save_steps": 500,
|
125 |
"stateful_callbacks": {
|
126 |
"EarlyStoppingCallback": {
|
checkpoint-218/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
|
3 |
size 5240
|
checkpoint-24/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 94765560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d95db9670c0e00359e684b13ba791d256154dc75e057deabe4d20fd9d0554235
|
3 |
size 94765560
|
checkpoint-24/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 189556666
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a82ca177e922f7eb2017da7cfd0f85cb2f6f94396cd239a01bca3e3798f310b8
|
3 |
size 189556666
|
checkpoint-24/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:199b7ea3cf1a15c2128c334c968f9f4a32fee4fdb9c39d77658a7126eada7cf8
|
3 |
size 1064
|
checkpoint-24/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-24",
|
4 |
"epoch": 0.9896907216494846,
|
5 |
"eval_steps": 500,
|
@@ -10,21 +10,21 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_f1": 0.
|
15 |
-
"eval_loss":
|
16 |
-
"eval_precision": 0.
|
17 |
-
"eval_recall": 0.
|
18 |
-
"eval_runtime": 1.
|
19 |
-
"eval_samples_per_second":
|
20 |
-
"eval_steps_per_second": 29.
|
21 |
"step": 24
|
22 |
}
|
23 |
],
|
24 |
"logging_steps": 500,
|
25 |
-
"max_steps":
|
26 |
"num_input_tokens_seen": 0,
|
27 |
-
"num_train_epochs":
|
28 |
"save_steps": 500,
|
29 |
"stateful_callbacks": {
|
30 |
"EarlyStoppingCallback": {
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.48320413436692505,
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-24",
|
4 |
"epoch": 0.9896907216494846,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.48320413436692505,
|
14 |
+
"eval_f1": 0.31484032448297905,
|
15 |
+
"eval_loss": 1.1717983484268188,
|
16 |
+
"eval_precision": 0.23348623546928937,
|
17 |
+
"eval_recall": 0.48320413436692505,
|
18 |
+
"eval_runtime": 1.6682,
|
19 |
+
"eval_samples_per_second": 231.984,
|
20 |
+
"eval_steps_per_second": 29.373,
|
21 |
"step": 24
|
22 |
}
|
23 |
],
|
24 |
"logging_steps": 500,
|
25 |
+
"max_steps": 240,
|
26 |
"num_input_tokens_seen": 0,
|
27 |
+
"num_train_epochs": 10,
|
28 |
"save_steps": 500,
|
29 |
"stateful_callbacks": {
|
30 |
"EarlyStoppingCallback": {
|
checkpoint-24/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
|
3 |
size 5240
|
checkpoint-240/config.json
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ntu-spml/distilhubert",
|
3 |
+
"activation_dropout": 0.1,
|
4 |
+
"apply_spec_augment": false,
|
5 |
+
"architectures": [
|
6 |
+
"HubertForSequenceClassification"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 1,
|
10 |
+
"classifier_proj_size": 256,
|
11 |
+
"conv_bias": false,
|
12 |
+
"conv_dim": [
|
13 |
+
512,
|
14 |
+
512,
|
15 |
+
512,
|
16 |
+
512,
|
17 |
+
512,
|
18 |
+
512,
|
19 |
+
512
|
20 |
+
],
|
21 |
+
"conv_kernel": [
|
22 |
+
10,
|
23 |
+
3,
|
24 |
+
3,
|
25 |
+
3,
|
26 |
+
3,
|
27 |
+
2,
|
28 |
+
2
|
29 |
+
],
|
30 |
+
"conv_stride": [
|
31 |
+
5,
|
32 |
+
2,
|
33 |
+
2,
|
34 |
+
2,
|
35 |
+
2,
|
36 |
+
2,
|
37 |
+
2
|
38 |
+
],
|
39 |
+
"ctc_loss_reduction": "sum",
|
40 |
+
"ctc_zero_infinity": false,
|
41 |
+
"do_stable_layer_norm": false,
|
42 |
+
"eos_token_id": 2,
|
43 |
+
"feat_extract_activation": "gelu",
|
44 |
+
"feat_extract_norm": "group",
|
45 |
+
"feat_proj_dropout": 0.0,
|
46 |
+
"feat_proj_layer_norm": false,
|
47 |
+
"final_dropout": 0.0,
|
48 |
+
"finetuning_task": "audio-classification",
|
49 |
+
"hidden_act": "gelu",
|
50 |
+
"hidden_dropout": 0.1,
|
51 |
+
"hidden_size": 768,
|
52 |
+
"id2label": {
|
53 |
+
"0": "1s_normal",
|
54 |
+
"1": "1s_pain",
|
55 |
+
"2": "1s_hunger",
|
56 |
+
"3": "1s_asphyxia"
|
57 |
+
},
|
58 |
+
"initializer_range": 0.02,
|
59 |
+
"intermediate_size": 3072,
|
60 |
+
"label2id": {
|
61 |
+
"1s_asphyxia": 3,
|
62 |
+
"1s_hunger": 2,
|
63 |
+
"1s_normal": 0,
|
64 |
+
"1s_pain": 1
|
65 |
+
},
|
66 |
+
"layer_norm_eps": 1e-05,
|
67 |
+
"layerdrop": 0.0,
|
68 |
+
"mask_feature_length": 10,
|
69 |
+
"mask_feature_min_masks": 0,
|
70 |
+
"mask_feature_prob": 0.0,
|
71 |
+
"mask_time_length": 10,
|
72 |
+
"mask_time_min_masks": 2,
|
73 |
+
"mask_time_prob": 0.05,
|
74 |
+
"model_type": "hubert",
|
75 |
+
"num_attention_heads": 12,
|
76 |
+
"num_conv_pos_embedding_groups": 16,
|
77 |
+
"num_conv_pos_embeddings": 128,
|
78 |
+
"num_feat_extract_layers": 7,
|
79 |
+
"num_hidden_layers": 2,
|
80 |
+
"pad_token_id": 0,
|
81 |
+
"torch_dtype": "float32",
|
82 |
+
"transformers_version": "4.44.2",
|
83 |
+
"use_weighted_layer_sum": false,
|
84 |
+
"vocab_size": 32
|
85 |
+
}
|
checkpoint-240/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92d699cc9f111730f27e926e204d785c6edcdaa6db0e4e31a03b31178120be85
|
3 |
+
size 94765560
|
checkpoint-240/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02265945a9b9df7108c893934c9f1e83b666674d8cf2044e05bfb54774f0ec5e
|
3 |
+
size 189556666
|
checkpoint-240/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa08735989c33a0829468f970a7eea2caca2871ec3d3c1f5b6fd56289f75077f
|
3 |
+
size 14308
|
checkpoint-240/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa3fb3e2f8633048c1c6df5f62d8f415873cae8a8bc433ff826bcb996ed1b35b
|
3 |
+
size 1064
|
checkpoint-240/trainer_state.json
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.8656330749354005,
|
3 |
+
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-194",
|
4 |
+
"epoch": 9.896907216494846,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 240,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.48320413436692505,
|
14 |
+
"eval_f1": 0.31484032448297905,
|
15 |
+
"eval_loss": 1.1717983484268188,
|
16 |
+
"eval_precision": 0.23348623546928937,
|
17 |
+
"eval_recall": 0.48320413436692505,
|
18 |
+
"eval_runtime": 1.6682,
|
19 |
+
"eval_samples_per_second": 231.984,
|
20 |
+
"eval_steps_per_second": 29.373,
|
21 |
+
"step": 24
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 1.9793814432989691,
|
25 |
+
"eval_accuracy": 0.7235142118863049,
|
26 |
+
"eval_f1": 0.7025965349533153,
|
27 |
+
"eval_loss": 0.7550917863845825,
|
28 |
+
"eval_precision": 0.7163905403042491,
|
29 |
+
"eval_recall": 0.7235142118863049,
|
30 |
+
"eval_runtime": 1.6003,
|
31 |
+
"eval_samples_per_second": 241.836,
|
32 |
+
"eval_steps_per_second": 30.62,
|
33 |
+
"step": 48
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 2.9690721649484537,
|
37 |
+
"eval_accuracy": 0.7312661498708011,
|
38 |
+
"eval_f1": 0.7074435618655927,
|
39 |
+
"eval_loss": 0.6834394335746765,
|
40 |
+
"eval_precision": 0.7620582207432803,
|
41 |
+
"eval_recall": 0.7312661498708011,
|
42 |
+
"eval_runtime": 1.604,
|
43 |
+
"eval_samples_per_second": 241.278,
|
44 |
+
"eval_steps_per_second": 30.549,
|
45 |
+
"step": 72
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 4.0,
|
49 |
+
"eval_accuracy": 0.6201550387596899,
|
50 |
+
"eval_f1": 0.616059564047211,
|
51 |
+
"eval_loss": 0.9937827587127686,
|
52 |
+
"eval_precision": 0.6843785822798868,
|
53 |
+
"eval_recall": 0.6201550387596899,
|
54 |
+
"eval_runtime": 1.5998,
|
55 |
+
"eval_samples_per_second": 241.901,
|
56 |
+
"eval_steps_per_second": 30.628,
|
57 |
+
"step": 97
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 4.989690721649485,
|
61 |
+
"eval_accuracy": 0.7881136950904393,
|
62 |
+
"eval_f1": 0.7854190251131377,
|
63 |
+
"eval_loss": 0.4972754120826721,
|
64 |
+
"eval_precision": 0.7961139879912458,
|
65 |
+
"eval_recall": 0.7881136950904393,
|
66 |
+
"eval_runtime": 1.5913,
|
67 |
+
"eval_samples_per_second": 243.196,
|
68 |
+
"eval_steps_per_second": 30.792,
|
69 |
+
"step": 121
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 5.979381443298969,
|
73 |
+
"eval_accuracy": 0.7803617571059431,
|
74 |
+
"eval_f1": 0.781727235222138,
|
75 |
+
"eval_loss": 0.5254013538360596,
|
76 |
+
"eval_precision": 0.7843017570642586,
|
77 |
+
"eval_recall": 0.7803617571059431,
|
78 |
+
"eval_runtime": 1.6096,
|
79 |
+
"eval_samples_per_second": 240.428,
|
80 |
+
"eval_steps_per_second": 30.442,
|
81 |
+
"step": 145
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"epoch": 6.969072164948454,
|
85 |
+
"eval_accuracy": 0.8475452196382429,
|
86 |
+
"eval_f1": 0.8504309219603966,
|
87 |
+
"eval_loss": 0.42636802792549133,
|
88 |
+
"eval_precision": 0.8557626671638175,
|
89 |
+
"eval_recall": 0.8475452196382429,
|
90 |
+
"eval_runtime": 1.6741,
|
91 |
+
"eval_samples_per_second": 231.171,
|
92 |
+
"eval_steps_per_second": 29.27,
|
93 |
+
"step": 169
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 8.0,
|
97 |
+
"eval_accuracy": 0.8656330749354005,
|
98 |
+
"eval_f1": 0.8627731371728347,
|
99 |
+
"eval_loss": 0.4414582848548889,
|
100 |
+
"eval_precision": 0.8670254176803228,
|
101 |
+
"eval_recall": 0.8656330749354005,
|
102 |
+
"eval_runtime": 1.6573,
|
103 |
+
"eval_samples_per_second": 233.512,
|
104 |
+
"eval_steps_per_second": 29.566,
|
105 |
+
"step": 194
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 8.989690721649485,
|
109 |
+
"eval_accuracy": 0.8656330749354005,
|
110 |
+
"eval_f1": 0.8648808138390941,
|
111 |
+
"eval_loss": 0.4115408658981323,
|
112 |
+
"eval_precision": 0.8650901526054525,
|
113 |
+
"eval_recall": 0.8656330749354005,
|
114 |
+
"eval_runtime": 1.6461,
|
115 |
+
"eval_samples_per_second": 235.106,
|
116 |
+
"eval_steps_per_second": 29.768,
|
117 |
+
"step": 218
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"epoch": 9.896907216494846,
|
121 |
+
"eval_accuracy": 0.8656330749354005,
|
122 |
+
"eval_f1": 0.8628955278832856,
|
123 |
+
"eval_loss": 0.43414339423179626,
|
124 |
+
"eval_precision": 0.8627959308862513,
|
125 |
+
"eval_recall": 0.8656330749354005,
|
126 |
+
"eval_runtime": 1.6443,
|
127 |
+
"eval_samples_per_second": 235.354,
|
128 |
+
"eval_steps_per_second": 29.799,
|
129 |
+
"step": 240
|
130 |
+
}
|
131 |
+
],
|
132 |
+
"logging_steps": 500,
|
133 |
+
"max_steps": 240,
|
134 |
+
"num_input_tokens_seen": 0,
|
135 |
+
"num_train_epochs": 10,
|
136 |
+
"save_steps": 500,
|
137 |
+
"stateful_callbacks": {
|
138 |
+
"EarlyStoppingCallback": {
|
139 |
+
"args": {
|
140 |
+
"early_stopping_patience": 3,
|
141 |
+
"early_stopping_threshold": 0.0
|
142 |
+
},
|
143 |
+
"attributes": {
|
144 |
+
"early_stopping_patience_counter": 0
|
145 |
+
}
|
146 |
+
},
|
147 |
+
"TrainerControl": {
|
148 |
+
"args": {
|
149 |
+
"should_epoch_stop": false,
|
150 |
+
"should_evaluate": false,
|
151 |
+
"should_log": false,
|
152 |
+
"should_save": true,
|
153 |
+
"should_training_stop": true
|
154 |
+
},
|
155 |
+
"attributes": {}
|
156 |
+
}
|
157 |
+
},
|
158 |
+
"total_flos": 3.478884368832e+16,
|
159 |
+
"train_batch_size": 8,
|
160 |
+
"trial_name": null,
|
161 |
+
"trial_params": null
|
162 |
+
}
|
checkpoint-240/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
|
3 |
+
size 5240
|
checkpoint-48/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 94765560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b9973cea0a92ccc3ba76ebb1572eeb391cb070cd494b1e82ef996c7e69204f3
|
3 |
size 94765560
|
checkpoint-48/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 189556666
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33f1b47628687588437ef703f63052450c7ccfdc0cef0369b13f2275efdac0b9
|
3 |
size 189556666
|
checkpoint-48/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6157ea2d7163d9ac50ea4f077ca9c7fa0d3ce8899911031b31211d905abbd26
|
3 |
size 1064
|
checkpoint-48/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-48",
|
4 |
"epoch": 1.9793814432989691,
|
5 |
"eval_steps": 500,
|
@@ -10,33 +10,33 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_f1": 0.
|
15 |
-
"eval_loss":
|
16 |
-
"eval_precision": 0.
|
17 |
-
"eval_recall": 0.
|
18 |
-
"eval_runtime": 1.
|
19 |
-
"eval_samples_per_second":
|
20 |
-
"eval_steps_per_second": 29.
|
21 |
"step": 24
|
22 |
},
|
23 |
{
|
24 |
"epoch": 1.9793814432989691,
|
25 |
-
"eval_accuracy": 0.
|
26 |
-
"eval_f1": 0.
|
27 |
-
"eval_loss": 0.
|
28 |
-
"eval_precision": 0.
|
29 |
-
"eval_recall": 0.
|
30 |
-
"eval_runtime": 1.
|
31 |
-
"eval_samples_per_second":
|
32 |
-
"eval_steps_per_second":
|
33 |
"step": 48
|
34 |
}
|
35 |
],
|
36 |
"logging_steps": 500,
|
37 |
-
"max_steps":
|
38 |
"num_input_tokens_seen": 0,
|
39 |
-
"num_train_epochs":
|
40 |
"save_steps": 500,
|
41 |
"stateful_callbacks": {
|
42 |
"EarlyStoppingCallback": {
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.7235142118863049,
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-48",
|
4 |
"epoch": 1.9793814432989691,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.48320413436692505,
|
14 |
+
"eval_f1": 0.31484032448297905,
|
15 |
+
"eval_loss": 1.1717983484268188,
|
16 |
+
"eval_precision": 0.23348623546928937,
|
17 |
+
"eval_recall": 0.48320413436692505,
|
18 |
+
"eval_runtime": 1.6682,
|
19 |
+
"eval_samples_per_second": 231.984,
|
20 |
+
"eval_steps_per_second": 29.373,
|
21 |
"step": 24
|
22 |
},
|
23 |
{
|
24 |
"epoch": 1.9793814432989691,
|
25 |
+
"eval_accuracy": 0.7235142118863049,
|
26 |
+
"eval_f1": 0.7025965349533153,
|
27 |
+
"eval_loss": 0.7550917863845825,
|
28 |
+
"eval_precision": 0.7163905403042491,
|
29 |
+
"eval_recall": 0.7235142118863049,
|
30 |
+
"eval_runtime": 1.6003,
|
31 |
+
"eval_samples_per_second": 241.836,
|
32 |
+
"eval_steps_per_second": 30.62,
|
33 |
"step": 48
|
34 |
}
|
35 |
],
|
36 |
"logging_steps": 500,
|
37 |
+
"max_steps": 240,
|
38 |
"num_input_tokens_seen": 0,
|
39 |
+
"num_train_epochs": 10,
|
40 |
"save_steps": 500,
|
41 |
"stateful_callbacks": {
|
42 |
"EarlyStoppingCallback": {
|
checkpoint-48/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
|
3 |
size 5240
|
checkpoint-72/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 94765560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fafe5e8d3a221adf5fdc2f30cb4c69093527425a911ded5ed478ac7f0fbbab8
|
3 |
size 94765560
|
checkpoint-72/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 189556666
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5253a07ab7d3da8498ae929967529fe03141787454a800994517fb089f02a794
|
3 |
size 189556666
|
checkpoint-72/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfa659e6db81ea93ae5bc5128ce82974afbbc53d7f1ac1e100d0fe2f646dac9c
|
3 |
size 1064
|
checkpoint-72/trainer_state.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-72",
|
4 |
"epoch": 2.9690721649484537,
|
5 |
"eval_steps": 500,
|
@@ -10,45 +10,45 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
-
"eval_accuracy": 0.
|
14 |
-
"eval_f1": 0.
|
15 |
-
"eval_loss":
|
16 |
-
"eval_precision": 0.
|
17 |
-
"eval_recall": 0.
|
18 |
-
"eval_runtime": 1.
|
19 |
-
"eval_samples_per_second":
|
20 |
-
"eval_steps_per_second": 29.
|
21 |
"step": 24
|
22 |
},
|
23 |
{
|
24 |
"epoch": 1.9793814432989691,
|
25 |
-
"eval_accuracy": 0.
|
26 |
-
"eval_f1": 0.
|
27 |
-
"eval_loss": 0.
|
28 |
-
"eval_precision": 0.
|
29 |
-
"eval_recall": 0.
|
30 |
-
"eval_runtime": 1.
|
31 |
-
"eval_samples_per_second":
|
32 |
-
"eval_steps_per_second":
|
33 |
"step": 48
|
34 |
},
|
35 |
{
|
36 |
"epoch": 2.9690721649484537,
|
37 |
-
"eval_accuracy": 0.
|
38 |
-
"eval_f1": 0.
|
39 |
-
"eval_loss": 0.
|
40 |
-
"eval_precision": 0.
|
41 |
-
"eval_recall": 0.
|
42 |
-
"eval_runtime": 1.
|
43 |
-
"eval_samples_per_second":
|
44 |
-
"eval_steps_per_second":
|
45 |
"step": 72
|
46 |
}
|
47 |
],
|
48 |
"logging_steps": 500,
|
49 |
-
"max_steps":
|
50 |
"num_input_tokens_seen": 0,
|
51 |
-
"num_train_epochs":
|
52 |
"save_steps": 500,
|
53 |
"stateful_callbacks": {
|
54 |
"EarlyStoppingCallback": {
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.7312661498708011,
|
3 |
"best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-72",
|
4 |
"epoch": 2.9690721649484537,
|
5 |
"eval_steps": 500,
|
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 0.9896907216494846,
|
13 |
+
"eval_accuracy": 0.48320413436692505,
|
14 |
+
"eval_f1": 0.31484032448297905,
|
15 |
+
"eval_loss": 1.1717983484268188,
|
16 |
+
"eval_precision": 0.23348623546928937,
|
17 |
+
"eval_recall": 0.48320413436692505,
|
18 |
+
"eval_runtime": 1.6682,
|
19 |
+
"eval_samples_per_second": 231.984,
|
20 |
+
"eval_steps_per_second": 29.373,
|
21 |
"step": 24
|
22 |
},
|
23 |
{
|
24 |
"epoch": 1.9793814432989691,
|
25 |
+
"eval_accuracy": 0.7235142118863049,
|
26 |
+
"eval_f1": 0.7025965349533153,
|
27 |
+
"eval_loss": 0.7550917863845825,
|
28 |
+
"eval_precision": 0.7163905403042491,
|
29 |
+
"eval_recall": 0.7235142118863049,
|
30 |
+
"eval_runtime": 1.6003,
|
31 |
+
"eval_samples_per_second": 241.836,
|
32 |
+
"eval_steps_per_second": 30.62,
|
33 |
"step": 48
|
34 |
},
|
35 |
{
|
36 |
"epoch": 2.9690721649484537,
|
37 |
+
"eval_accuracy": 0.7312661498708011,
|
38 |
+
"eval_f1": 0.7074435618655927,
|
39 |
+
"eval_loss": 0.6834394335746765,
|
40 |
+
"eval_precision": 0.7620582207432803,
|
41 |
+
"eval_recall": 0.7312661498708011,
|
42 |
+
"eval_runtime": 1.604,
|
43 |
+
"eval_samples_per_second": 241.278,
|
44 |
+
"eval_steps_per_second": 30.549,
|
45 |
"step": 72
|
46 |
}
|
47 |
],
|
48 |
"logging_steps": 500,
|
49 |
+
"max_steps": 240,
|
50 |
"num_input_tokens_seen": 0,
|
51 |
+
"num_train_epochs": 10,
|
52 |
"save_steps": 500,
|
53 |
"stateful_callbacks": {
|
54 |
"EarlyStoppingCallback": {
|
checkpoint-72/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2ff390684074a66989eebeff6ed959257b86635b10b23f58026b7546138ab89
|
3 |
size 5240
|
checkpoint-97/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 94765560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c696fbcea3f1eabb848deb6435364111d0671928df2bef22696b10348e54c32
|
3 |
size 94765560
|