Step... (29000/50000 | Loss: 1.6177186965942383, Acc: 0.67269366979599): 58%|████████████████▊ | 29067/50000 [11:37:39<7:29:53, 1.29s/it]
Browse files- flax_model.msgpack +1 -1
- outputs/checkpoints/checkpoint-22000/training_state.json +0 -1
- outputs/checkpoints/checkpoint-23000/training_state.json +0 -1
- outputs/checkpoints/checkpoint-24000/training_state.json +0 -1
- outputs/checkpoints/{checkpoint-22000 → checkpoint-27000}/config.json +0 -0
- outputs/checkpoints/{checkpoint-22000 → checkpoint-27000}/data_collator.joblib +0 -0
- outputs/checkpoints/{checkpoint-22000 → checkpoint-27000}/flax_model.msgpack +1 -1
- outputs/checkpoints/{checkpoint-24000 → checkpoint-27000}/optimizer_state.msgpack +1 -1
- outputs/checkpoints/{checkpoint-22000 → checkpoint-27000}/training_args.joblib +0 -0
- outputs/checkpoints/checkpoint-27000/training_state.json +1 -0
- outputs/checkpoints/{checkpoint-23000 → checkpoint-28000}/config.json +0 -0
- outputs/checkpoints/{checkpoint-23000 → checkpoint-28000}/data_collator.joblib +0 -0
- outputs/checkpoints/{checkpoint-24000 → checkpoint-28000}/flax_model.msgpack +1 -1
- outputs/checkpoints/{checkpoint-22000 → checkpoint-28000}/optimizer_state.msgpack +1 -1
- outputs/checkpoints/{checkpoint-23000 → checkpoint-28000}/training_args.joblib +0 -0
- outputs/checkpoints/checkpoint-28000/training_state.json +1 -0
- outputs/checkpoints/{checkpoint-24000 → checkpoint-29000}/config.json +0 -0
- outputs/checkpoints/{checkpoint-24000 → checkpoint-29000}/data_collator.joblib +0 -0
- outputs/checkpoints/{checkpoint-23000 → checkpoint-29000}/flax_model.msgpack +1 -1
- outputs/checkpoints/{checkpoint-23000 → checkpoint-29000}/optimizer_state.msgpack +1 -1
- outputs/checkpoints/{checkpoint-24000 → checkpoint-29000}/training_args.joblib +0 -0
- outputs/checkpoints/checkpoint-29000/training_state.json +1 -0
- outputs/events.out.tfevents.1627258355.tablespoon.3000110.3.v2 +2 -2
- outputs/flax_model.msgpack +1 -1
- outputs/optimizer_state.msgpack +1 -1
- outputs/training_state.json +1 -1
- pytorch_model.bin +1 -1
- run_stream.512.log +0 -0
- wandb/run-20210726_001233-17u6inbn/files/output.log +1689 -0
- wandb/run-20210726_001233-17u6inbn/files/wandb-summary.json +1 -1
- wandb/run-20210726_001233-17u6inbn/logs/debug-internal.log +2 -2
- wandb/run-20210726_001233-17u6inbn/run-17u6inbn.wandb +2 -2
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 249750019
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab9e1ec9fe5381c6cd01fb75c25867879a3e432dfaf4960054ef157b5e52d406
|
3 |
size 249750019
|
outputs/checkpoints/checkpoint-22000/training_state.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"step": 22001}
|
|
|
|
outputs/checkpoints/checkpoint-23000/training_state.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"step": 23001}
|
|
|
|
outputs/checkpoints/checkpoint-24000/training_state.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"step": 24001}
|
|
|
|
outputs/checkpoints/{checkpoint-22000 → checkpoint-27000}/config.json
RENAMED
File without changes
|
outputs/checkpoints/{checkpoint-22000 → checkpoint-27000}/data_collator.joblib
RENAMED
File without changes
|
outputs/checkpoints/{checkpoint-22000 → checkpoint-27000}/flax_model.msgpack
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 249750019
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3ab5739e776f02370c13f8fea2309cb8ca0ed3a123eae940e7afdb05a7bef05
|
3 |
size 249750019
|
outputs/checkpoints/{checkpoint-24000 → checkpoint-27000}/optimizer_state.msgpack
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 499500278
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61a747cb65ecae47682216a9acae813b1c986f07969e392fec94655708af0c9d
|
3 |
size 499500278
|
outputs/checkpoints/{checkpoint-22000 → checkpoint-27000}/training_args.joblib
RENAMED
File without changes
|
outputs/checkpoints/checkpoint-27000/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 27001}
|
outputs/checkpoints/{checkpoint-23000 → checkpoint-28000}/config.json
RENAMED
File without changes
|
outputs/checkpoints/{checkpoint-23000 → checkpoint-28000}/data_collator.joblib
RENAMED
File without changes
|
outputs/checkpoints/{checkpoint-24000 → checkpoint-28000}/flax_model.msgpack
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 249750019
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d58a98de8c35c88d685164db5fe4129857b5bcb473789b009556e5c1739a2c4c
|
3 |
size 249750019
|
outputs/checkpoints/{checkpoint-22000 → checkpoint-28000}/optimizer_state.msgpack
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 499500278
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be66fa7e148ce2b9d6c3645265528720ff474cf68ea20817c379b14fca3fc646
|
3 |
size 499500278
|
outputs/checkpoints/{checkpoint-23000 → checkpoint-28000}/training_args.joblib
RENAMED
File without changes
|
outputs/checkpoints/checkpoint-28000/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 28001}
|
outputs/checkpoints/{checkpoint-24000 → checkpoint-29000}/config.json
RENAMED
File without changes
|
outputs/checkpoints/{checkpoint-24000 → checkpoint-29000}/data_collator.joblib
RENAMED
File without changes
|
outputs/checkpoints/{checkpoint-23000 → checkpoint-29000}/flax_model.msgpack
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 249750019
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab9e1ec9fe5381c6cd01fb75c25867879a3e432dfaf4960054ef157b5e52d406
|
3 |
size 249750019
|
outputs/checkpoints/{checkpoint-23000 → checkpoint-29000}/optimizer_state.msgpack
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 499500278
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54e623bbd0f1873163b786ac2b8889bdc078c9a03f02878216a3a20fb1d27569
|
3 |
size 499500278
|
outputs/checkpoints/{checkpoint-24000 → checkpoint-29000}/training_args.joblib
RENAMED
File without changes
|
outputs/checkpoints/checkpoint-29000/training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 29001}
|
outputs/events.out.tfevents.1627258355.tablespoon.3000110.3.v2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4371c0c54c365271287a80d90ab3836df6db4026abad278a225bc389a870e17
|
3 |
+
size 4296275
|
outputs/flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 249750019
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab9e1ec9fe5381c6cd01fb75c25867879a3e432dfaf4960054ef157b5e52d406
|
3 |
size 249750019
|
outputs/optimizer_state.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 499500278
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54e623bbd0f1873163b786ac2b8889bdc078c9a03f02878216a3a20fb1d27569
|
3 |
size 499500278
|
outputs/training_state.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"step":
|
|
|
1 |
+
{"step": 29001}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 498858859
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d7187b7156584b95e4c067f8b7d04c6f66b165d9d934533bd83dc404dc48c81
|
3 |
size 498858859
|
run_stream.512.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20210726_001233-17u6inbn/files/output.log
CHANGED
@@ -18053,6 +18053,1695 @@ You should probably TRAIN this model on a down-stream task to be able to use it
|
|
18053 |
|
18054 |
|
18055 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18056 |
|
18057 |
|
18058 |
|
|
|
18053 |
|
18054 |
|
18055 |
|
18056 |
+
|
18057 |
+
|
18058 |
+
|
18059 |
+
|
18060 |
+
|
18061 |
+
|
18062 |
+
|
18063 |
+
|
18064 |
+
|
18065 |
+
|
18066 |
+
|
18067 |
+
|
18068 |
+
|
18069 |
+
|
18070 |
+
|
18071 |
+
|
18072 |
+
|
18073 |
+
|
18074 |
+
|
18075 |
+
|
18076 |
+
|
18077 |
+
|
18078 |
+
|
18079 |
+
|
18080 |
+
|
18081 |
+
|
18082 |
+
|
18083 |
+
|
18084 |
+
|
18085 |
+
|
18086 |
+
|
18087 |
+
|
18088 |
+
|
18089 |
+
|
18090 |
+
|
18091 |
+
|
18092 |
+
|
18093 |
+
|
18094 |
+
|
18095 |
+
|
18096 |
+
|
18097 |
+
|
18098 |
+
|
18099 |
+
|
18100 |
+
|
18101 |
+
|
18102 |
+
|
18103 |
+
|
18104 |
+
|
18105 |
+
|
18106 |
+
|
18107 |
+
|
18108 |
+
|
18109 |
+
|
18110 |
+
|
18111 |
+
|
18112 |
+
|
18113 |
+
|
18114 |
+
|
18115 |
+
|
18116 |
+
|
18117 |
+
|
18118 |
+
|
18119 |
+
|
18120 |
+
|
18121 |
+
|
18122 |
+
|
18123 |
+
|
18124 |
+
|
18125 |
+
|
18126 |
+
|
18127 |
+
|
18128 |
+
|
18129 |
+
|
18130 |
+
|
18131 |
+
|
18132 |
+
|
18133 |
+
|
18134 |
+
|
18135 |
+
|
18136 |
+
|
18137 |
+
|
18138 |
+
|
18139 |
+
|
18140 |
+
|
18141 |
+
|
18142 |
+
|
18143 |
+
|
18144 |
+
|
18145 |
+
|
18146 |
+
|
18147 |
+
|
18148 |
+
|
18149 |
+
|
18150 |
+
|
18151 |
+
|
18152 |
+
|
18153 |
+
|
18154 |
+
|
18155 |
+
|
18156 |
+
|
18157 |
+
|
18158 |
+
|
18159 |
+
|
18160 |
+
|
18161 |
+
|
18162 |
+
|
18163 |
+
|
18164 |
+
|
18165 |
+
|
18166 |
+
|
18167 |
+
|
18168 |
+
|
18169 |
+
|
18170 |
+
|
18171 |
+
|
18172 |
+
|
18173 |
+
|
18174 |
+
|
18175 |
+
|
18176 |
+
|
18177 |
+
|
18178 |
+
|
18179 |
+
|
18180 |
+
|
18181 |
+
|
18182 |
+
|
18183 |
+
|
18184 |
+
|
18185 |
+
|
18186 |
+
|
18187 |
+
|
18188 |
+
|
18189 |
+
|
18190 |
+
|
18191 |
+
|
18192 |
+
|
18193 |
+
|
18194 |
+
|
18195 |
+
|
18196 |
+
|
18197 |
+
|
18198 |
+
|
18199 |
+
|
18200 |
+
|
18201 |
+
|
18202 |
+
|
18203 |
+
|
18204 |
+
|
18205 |
+
|
18206 |
+
|
18207 |
+
|
18208 |
+
|
18209 |
+
|
18210 |
+
|
18211 |
+
|
18212 |
+
|
18213 |
+
|
18214 |
+
|
18215 |
+
|
18216 |
+
|
18217 |
+
|
18218 |
+
|
18219 |
+
|
18220 |
+
|
18221 |
+
|
18222 |
+
|
18223 |
+
|
18224 |
+
|
18225 |
+
|
18226 |
+
|
18227 |
+
|
18228 |
+
|
18229 |
+
|
18230 |
+
|
18231 |
+
|
18232 |
+
|
18233 |
+
|
18234 |
+
|
18235 |
+
|
18236 |
+
|
18237 |
+
|
18238 |
+
|
18239 |
+
|
18240 |
+
|
18241 |
+
|
18242 |
+
|
18243 |
+
|
18244 |
+
|
18245 |
+
|
18246 |
+
|
18247 |
+
|
18248 |
+
|
18249 |
+
|
18250 |
+
|
18251 |
+
|
18252 |
+
|
18253 |
+
|
18254 |
+
|
18255 |
+
|
18256 |
+
|
18257 |
+
|
18258 |
+
|
18259 |
+
|
18260 |
+
|
18261 |
+
|
18262 |
+
|
18263 |
+
|
18264 |
+
|
18265 |
+
|
18266 |
+
|
18267 |
+
|
18268 |
+
|
18269 |
+
|
18270 |
+
|
18271 |
+
|
18272 |
+
|
18273 |
+
|
18274 |
+
|
18275 |
+
|
18276 |
+
|
18277 |
+
|
18278 |
+
|
18279 |
+
|
18280 |
+
|
18281 |
+
|
18282 |
+
|
18283 |
+
|
18284 |
+
|
18285 |
+
|
18286 |
+
|
18287 |
+
|
18288 |
+
|
18289 |
+
|
18290 |
+
|
18291 |
+
|
18292 |
+
|
18293 |
+
|
18294 |
+
|
18295 |
+
|
18296 |
+
|
18297 |
+
|
18298 |
+
|
18299 |
+
|
18300 |
+
|
18301 |
+
|
18302 |
+
|
18303 |
+
|
18304 |
+
|
18305 |
+
|
18306 |
+
|
18307 |
+
|
18308 |
+
|
18309 |
+
|
18310 |
+
|
18311 |
+
|
18312 |
+
|
18313 |
+
|
18314 |
+
Step... (26000/50000 | Loss: 1.6362030506134033, Acc: 0.6691190600395203): 54%|██████████████ | 27000/50000 [10:45:19<10:31:50, 1.65s/it]
|
18315 |
+
Step... (26500 | Loss: 1.707963228225708, Learning Rate: 0.0002848485019057989)
|
18316 |
+
Step... (27000 | Loss: 1.7799105644226074, Learning Rate: 0.00027878789114765823)
|
18317 |
+
|
18318 |
+
|
18319 |
+
|
18320 |
+
|
18321 |
+
|
18322 |
+
|
18323 |
+
|
18324 |
+
|
18325 |
+
|
18326 |
+
|
18327 |
+
|
18328 |
+
|
18329 |
+
[12:53:43] - INFO - __main__ - Saving checkpoint at 27000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
|
18330 |
+
All Flax model weights were used when initializing RobertaForMaskedLM.
|
18331 |
+
Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
|
18332 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
18333 |
+
|
18334 |
+
|
18335 |
+
|
18336 |
+
|
18337 |
+
|
18338 |
+
|
18339 |
+
|
18340 |
+
|
18341 |
+
|
18342 |
+
|
18343 |
+
|
18344 |
+
|
18345 |
+
|
18346 |
+
|
18347 |
+
|
18348 |
+
|
18349 |
+
|
18350 |
+
|
18351 |
+
|
18352 |
+
|
18353 |
+
|
18354 |
+
|
18355 |
+
|
18356 |
+
|
18357 |
+
|
18358 |
+
|
18359 |
+
|
18360 |
+
|
18361 |
+
|
18362 |
+
|
18363 |
+
|
18364 |
+
|
18365 |
+
|
18366 |
+
|
18367 |
+
|
18368 |
+
|
18369 |
+
|
18370 |
+
|
18371 |
+
|
18372 |
+
|
18373 |
+
|
18374 |
+
|
18375 |
+
|
18376 |
+
|
18377 |
+
|
18378 |
+
|
18379 |
+
|
18380 |
+
|
18381 |
+
|
18382 |
+
|
18383 |
+
|
18384 |
+
|
18385 |
+
|
18386 |
+
|
18387 |
+
|
18388 |
+
|
18389 |
+
|
18390 |
+
|
18391 |
+
|
18392 |
+
|
18393 |
+
|
18394 |
+
|
18395 |
+
|
18396 |
+
|
18397 |
+
|
18398 |
+
|
18399 |
+
|
18400 |
+
|
18401 |
+
|
18402 |
+
|
18403 |
+
|
18404 |
+
|
18405 |
+
|
18406 |
+
|
18407 |
+
|
18408 |
+
|
18409 |
+
|
18410 |
+
|
18411 |
+
|
18412 |
+
|
18413 |
+
|
18414 |
+
|
18415 |
+
|
18416 |
+
|
18417 |
+
|
18418 |
+
|
18419 |
+
|
18420 |
+
|
18421 |
+
|
18422 |
+
|
18423 |
+
|
18424 |
+
|
18425 |
+
|
18426 |
+
|
18427 |
+
|
18428 |
+
|
18429 |
+
|
18430 |
+
|
18431 |
+
|
18432 |
+
|
18433 |
+
|
18434 |
+
|
18435 |
+
|
18436 |
+
|
18437 |
+
|
18438 |
+
|
18439 |
+
|
18440 |
+
|
18441 |
+
|
18442 |
+
|
18443 |
+
|
18444 |
+
|
18445 |
+
|
18446 |
+
|
18447 |
+
|
18448 |
+
|
18449 |
+
|
18450 |
+
|
18451 |
+
|
18452 |
+
|
18453 |
+
|
18454 |
+
|
18455 |
+
|
18456 |
+
|
18457 |
+
|
18458 |
+
|
18459 |
+
|
18460 |
+
|
18461 |
+
|
18462 |
+
|
18463 |
+
|
18464 |
+
|
18465 |
+
|
18466 |
+
|
18467 |
+
|
18468 |
+
|
18469 |
+
|
18470 |
+
|
18471 |
+
|
18472 |
+
|
18473 |
+
|
18474 |
+
|
18475 |
+
|
18476 |
+
|
18477 |
+
|
18478 |
+
|
18479 |
+
|
18480 |
+
|
18481 |
+
|
18482 |
+
|
18483 |
+
|
18484 |
+
|
18485 |
+
|
18486 |
+
|
18487 |
+
|
18488 |
+
|
18489 |
+
|
18490 |
+
|
18491 |
+
|
18492 |
+
|
18493 |
+
|
18494 |
+
|
18495 |
+
|
18496 |
+
|
18497 |
+
|
18498 |
+
|
18499 |
+
|
18500 |
+
|
18501 |
+
|
18502 |
+
|
18503 |
+
|
18504 |
+
|
18505 |
+
|
18506 |
+
|
18507 |
+
|
18508 |
+
|
18509 |
+
|
18510 |
+
|
18511 |
+
|
18512 |
+
|
18513 |
+
|
18514 |
+
|
18515 |
+
|
18516 |
+
|
18517 |
+
|
18518 |
+
|
18519 |
+
|
18520 |
+
|
18521 |
+
|
18522 |
+
|
18523 |
+
|
18524 |
+
|
18525 |
+
|
18526 |
+
|
18527 |
+
|
18528 |
+
|
18529 |
+
|
18530 |
+
|
18531 |
+
|
18532 |
+
|
18533 |
+
|
18534 |
+
|
18535 |
+
|
18536 |
+
|
18537 |
+
|
18538 |
+
|
18539 |
+
|
18540 |
+
|
18541 |
+
|
18542 |
+
|
18543 |
+
|
18544 |
+
|
18545 |
+
|
18546 |
+
|
18547 |
+
|
18548 |
+
|
18549 |
+
|
18550 |
+
|
18551 |
+
|
18552 |
+
|
18553 |
+
|
18554 |
+
|
18555 |
+
|
18556 |
+
|
18557 |
+
|
18558 |
+
|
18559 |
+
|
18560 |
+
|
18561 |
+
|
18562 |
+
|
18563 |
+
|
18564 |
+
|
18565 |
+
|
18566 |
+
|
18567 |
+
|
18568 |
+
|
18569 |
+
|
18570 |
+
|
18571 |
+
|
18572 |
+
|
18573 |
+
|
18574 |
+
|
18575 |
+
|
18576 |
+
|
18577 |
+
|
18578 |
+
|
18579 |
+
|
18580 |
+
|
18581 |
+
|
18582 |
+
|
18583 |
+
|
18584 |
+
|
18585 |
+
|
18586 |
+
|
18587 |
+
|
18588 |
+
|
18589 |
+
|
18590 |
+
|
18591 |
+
|
18592 |
+
|
18593 |
+
|
18594 |
+
|
18595 |
+
|
18596 |
+
|
18597 |
+
|
18598 |
+
|
18599 |
+
|
18600 |
+
|
18601 |
+
|
18602 |
+
|
18603 |
+
|
18604 |
+
|
18605 |
+
|
18606 |
+
|
18607 |
+
|
18608 |
+
|
18609 |
+
|
18610 |
+
|
18611 |
+
|
18612 |
+
|
18613 |
+
|
18614 |
+
|
18615 |
+
|
18616 |
+
|
18617 |
+
|
18618 |
+
|
18619 |
+
|
18620 |
+
|
18621 |
+
|
18622 |
+
|
18623 |
+
|
18624 |
+
|
18625 |
+
|
18626 |
+
|
18627 |
+
|
18628 |
+
|
18629 |
+
|
18630 |
+
|
18631 |
+
|
18632 |
+
|
18633 |
+
|
18634 |
+
|
18635 |
+
|
18636 |
+
|
18637 |
+
|
18638 |
+
|
18639 |
+
|
18640 |
+
|
18641 |
+
|
18642 |
+
|
18643 |
+
|
18644 |
+
|
18645 |
+
|
18646 |
+
|
18647 |
+
|
18648 |
+
|
18649 |
+
|
18650 |
+
|
18651 |
+
|
18652 |
+
|
18653 |
+
|
18654 |
+
|
18655 |
+
|
18656 |
+
|
18657 |
+
|
18658 |
+
|
18659 |
+
|
18660 |
+
|
18661 |
+
|
18662 |
+
|
18663 |
+
|
18664 |
+
|
18665 |
+
|
18666 |
+
|
18667 |
+
|
18668 |
+
|
18669 |
+
|
18670 |
+
|
18671 |
+
|
18672 |
+
|
18673 |
+
|
18674 |
+
|
18675 |
+
|
18676 |
+
|
18677 |
+
|
18678 |
+
|
18679 |
+
|
18680 |
+
|
18681 |
+
|
18682 |
+
|
18683 |
+
|
18684 |
+
|
18685 |
+
|
18686 |
+
|
18687 |
+
|
18688 |
+
|
18689 |
+
|
18690 |
+
|
18691 |
+
|
18692 |
+
|
18693 |
+
|
18694 |
+
|
18695 |
+
|
18696 |
+
|
18697 |
+
|
18698 |
+
|
18699 |
+
|
18700 |
+
|
18701 |
+
|
18702 |
+
|
18703 |
+
|
18704 |
+
|
18705 |
+
|
18706 |
+
|
18707 |
+
|
18708 |
+
|
18709 |
+
|
18710 |
+
|
18711 |
+
|
18712 |
+
|
18713 |
+
|
18714 |
+
|
18715 |
+
|
18716 |
+
|
18717 |
+
|
18718 |
+
|
18719 |
+
|
18720 |
+
|
18721 |
+
|
18722 |
+
|
18723 |
+
|
18724 |
+
|
18725 |
+
|
18726 |
+
|
18727 |
+
|
18728 |
+
|
18729 |
+
|
18730 |
+
|
18731 |
+
|
18732 |
+
|
18733 |
+
|
18734 |
+
|
18735 |
+
|
18736 |
+
|
18737 |
+
|
18738 |
+
|
18739 |
+
|
18740 |
+
|
18741 |
+
|
18742 |
+
|
18743 |
+
|
18744 |
+
|
18745 |
+
|
18746 |
+
|
18747 |
+
|
18748 |
+
|
18749 |
+
|
18750 |
+
|
18751 |
+
|
18752 |
+
|
18753 |
+
|
18754 |
+
|
18755 |
+
|
18756 |
+
|
18757 |
+
|
18758 |
+
|
18759 |
+
|
18760 |
+
|
18761 |
+
|
18762 |
+
|
18763 |
+
|
18764 |
+
|
18765 |
+
|
18766 |
+
|
18767 |
+
|
18768 |
+
|
18769 |
+
|
18770 |
+
|
18771 |
+
|
18772 |
+
|
18773 |
+
|
18774 |
+
|
18775 |
+
|
18776 |
+
|
18777 |
+
|
18778 |
+
|
18779 |
+
|
18780 |
+
|
18781 |
+
|
18782 |
+
|
18783 |
+
|
18784 |
+
|
18785 |
+
|
18786 |
+
|
18787 |
+
|
18788 |
+
|
18789 |
+
|
18790 |
+
|
18791 |
+
|
18792 |
+
|
18793 |
+
|
18794 |
+
|
18795 |
+
|
18796 |
+
|
18797 |
+
|
18798 |
+
|
18799 |
+
|
18800 |
+
|
18801 |
+
|
18802 |
+
|
18803 |
+
|
18804 |
+
|
18805 |
+
|
18806 |
+
|
18807 |
+
|
18808 |
+
|
18809 |
+
|
18810 |
+
|
18811 |
+
|
18812 |
+
|
18813 |
+
|
18814 |
+
|
18815 |
+
|
18816 |
+
|
18817 |
+
|
18818 |
+
|
18819 |
+
|
18820 |
+
|
18821 |
+
|
18822 |
+
|
18823 |
+
|
18824 |
+
|
18825 |
+
|
18826 |
+
|
18827 |
+
|
18828 |
+
|
18829 |
+
|
18830 |
+
|
18831 |
+
|
18832 |
+
|
18833 |
+
|
18834 |
+
|
18835 |
+
|
18836 |
+
|
18837 |
+
|
18838 |
+
|
18839 |
+
|
18840 |
+
|
18841 |
+
|
18842 |
+
|
18843 |
+
|
18844 |
+
|
18845 |
+
|
18846 |
+
|
18847 |
+
|
18848 |
+
|
18849 |
+
|
18850 |
+
|
18851 |
+
|
18852 |
+
|
18853 |
+
|
18854 |
+
|
18855 |
+
|
18856 |
+
|
18857 |
+
|
18858 |
+
|
18859 |
+
|
18860 |
+
|
18861 |
+
|
18862 |
+
|
18863 |
+
|
18864 |
+
|
18865 |
+
|
18866 |
+
|
18867 |
+
|
18868 |
+
|
18869 |
+
|
18870 |
+
|
18871 |
+
|
18872 |
+
|
18873 |
+
|
18874 |
+
|
18875 |
+
|
18876 |
+
|
18877 |
+
|
18878 |
+
|
18879 |
+
|
18880 |
+
|
18881 |
+
|
18882 |
+
|
18883 |
+
|
18884 |
+
|
18885 |
+
|
18886 |
+
|
18887 |
+
|
18888 |
+
|
18889 |
+
|
18890 |
+
|
18891 |
+
|
18892 |
+
|
18893 |
+
|
18894 |
+
|
18895 |
+
|
18896 |
+
|
18897 |
+
|
18898 |
+
|
18899 |
+
|
18900 |
+
|
18901 |
+
|
18902 |
+
|
18903 |
+
|
18904 |
+
|
18905 |
+
|
18906 |
+
|
18907 |
+
|
18908 |
+
|
18909 |
+
|
18910 |
+
|
18911 |
+
|
18912 |
+
|
18913 |
+
|
18914 |
+
|
18915 |
+
|
18916 |
+
|
18917 |
+
|
18918 |
+
|
18919 |
+
|
18920 |
+
|
18921 |
+
|
18922 |
+
|
18923 |
+
|
18924 |
+
|
18925 |
+
|
18926 |
+
|
18927 |
+
|
18928 |
+
|
18929 |
+
|
18930 |
+
|
18931 |
+
|
18932 |
+
|
18933 |
+
|
18934 |
+
|
18935 |
+
|
18936 |
+
|
18937 |
+
|
18938 |
+
|
18939 |
+
|
18940 |
+
|
18941 |
+
|
18942 |
+
|
18943 |
+
|
18944 |
+
|
18945 |
+
|
18946 |
+
|
18947 |
+
|
18948 |
+
|
18949 |
+
|
18950 |
+
|
18951 |
+
|
18952 |
+
|
18953 |
+
|
18954 |
+
|
18955 |
+
|
18956 |
+
|
18957 |
+
|
18958 |
+
|
18959 |
+
|
18960 |
+
|
18961 |
+
|
18962 |
+
|
18963 |
+
|
18964 |
+
|
18965 |
+
|
18966 |
+
|
18967 |
+
|
18968 |
+
|
18969 |
+
|
18970 |
+
|
18971 |
+
|
18972 |
+
|
18973 |
+
|
18974 |
+
|
18975 |
+
|
18976 |
+
|
18977 |
+
|
18978 |
+
|
18979 |
+
|
18980 |
+
|
18981 |
+
|
18982 |
+
|
18983 |
+
|
18984 |
+
|
18985 |
+
|
18986 |
+
|
18987 |
+
|
18988 |
+
|
18989 |
+
|
18990 |
+
|
18991 |
+
|
18992 |
+
|
18993 |
+
|
18994 |
+
|
18995 |
+
|
18996 |
+
|
18997 |
+
|
18998 |
+
|
18999 |
+
|
19000 |
+
|
19001 |
+
|
19002 |
+
|
19003 |
+
Step... (27000/50000 | Loss: 1.6304749250411987, Acc: 0.670651376247406): 56%|███████████████▋ | 28000/50000 [11:10:01<8:36:13, 1.41s/it]
|
19004 |
+
Step... (27500 | Loss: 1.8015278577804565, Learning Rate: 0.00027272728038951755)
|
19005 |
+
Step... (27000/50000 | Loss: 1.6304749250411987, Acc: 0.670651376247406): 56%|███████████████▋ | 28000/50000 [11:10:04<8:36:13, 1.41s/it]
|
19006 |
+
|
19007 |
+
|
19008 |
+
|
19009 |
+
|
19010 |
+
|
19011 |
+
|
19012 |
+
|
19013 |
+
|
19014 |
+
|
19015 |
+
|
19016 |
+
|
19017 |
+
|
19018 |
+
[13:18:28] - INFO - __main__ - Saving checkpoint at 28000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
|
19019 |
+
All Flax model weights were used when initializing RobertaForMaskedLM.
|
19020 |
+
Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
|
19021 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
19022 |
+
|
19023 |
+
|
19024 |
+
|
19025 |
+
|
19026 |
+
|
19027 |
+
|
19028 |
+
|
19029 |
+
|
19030 |
+
|
19031 |
+
|
19032 |
+
|
19033 |
+
|
19034 |
+
|
19035 |
+
|
19036 |
+
|
19037 |
+
|
19038 |
+
|
19039 |
+
|
19040 |
+
|
19041 |
+
|
19042 |
+
|
19043 |
+
|
19044 |
+
|
19045 |
+
|
19046 |
+
|
19047 |
+
|
19048 |
+
|
19049 |
+
|
19050 |
+
|
19051 |
+
|
19052 |
+
|
19053 |
+
|
19054 |
+
|
19055 |
+
|
19056 |
+
|
19057 |
+
|
19058 |
+
|
19059 |
+
|
19060 |
+
|
19061 |
+
|
19062 |
+
|
19063 |
+
|
19064 |
+
|
19065 |
+
|
19066 |
+
|
19067 |
+
|
19068 |
+
|
19069 |
+
|
19070 |
+
|
19071 |
+
|
19072 |
+
|
19073 |
+
|
19074 |
+
|
19075 |
+
|
19076 |
+
|
19077 |
+
|
19078 |
+
|
19079 |
+
|
19080 |
+
|
19081 |
+
|
19082 |
+
|
19083 |
+
|
19084 |
+
|
19085 |
+
|
19086 |
+
|
19087 |
+
|
19088 |
+
|
19089 |
+
|
19090 |
+
|
19091 |
+
|
19092 |
+
|
19093 |
+
|
19094 |
+
|
19095 |
+
|
19096 |
+
|
19097 |
+
|
19098 |
+
|
19099 |
+
|
19100 |
+
|
19101 |
+
|
19102 |
+
|
19103 |
+
|
19104 |
+
|
19105 |
+
|
19106 |
+
|
19107 |
+
|
19108 |
+
|
19109 |
+
|
19110 |
+
|
19111 |
+
|
19112 |
+
|
19113 |
+
|
19114 |
+
|
19115 |
+
|
19116 |
+
|
19117 |
+
|
19118 |
+
|
19119 |
+
|
19120 |
+
|
19121 |
+
|
19122 |
+
|
19123 |
+
|
19124 |
+
|
19125 |
+
|
19126 |
+
|
19127 |
+
|
19128 |
+
|
19129 |
+
|
19130 |
+
|
19131 |
+
|
19132 |
+
|
19133 |
+
|
19134 |
+
|
19135 |
+
|
19136 |
+
|
19137 |
+
|
19138 |
+
|
19139 |
+
|
19140 |
+
|
19141 |
+
|
19142 |
+
|
19143 |
+
|
19144 |
+
|
19145 |
+
|
19146 |
+
|
19147 |
+
|
19148 |
+
|
19149 |
+
|
19150 |
+
|
19151 |
+
|
19152 |
+
|
19153 |
+
|
19154 |
+
|
19155 |
+
|
19156 |
+
|
19157 |
+
|
19158 |
+
|
19159 |
+
|
19160 |
+
|
19161 |
+
|
19162 |
+
|
19163 |
+
|
19164 |
+
|
19165 |
+
|
19166 |
+
|
19167 |
+
|
19168 |
+
|
19169 |
+
|
19170 |
+
|
19171 |
+
|
19172 |
+
|
19173 |
+
|
19174 |
+
|
19175 |
+
|
19176 |
+
|
19177 |
+
|
19178 |
+
|
19179 |
+
|
19180 |
+
|
19181 |
+
|
19182 |
+
|
19183 |
+
|
19184 |
+
|
19185 |
+
|
19186 |
+
|
19187 |
+
|
19188 |
+
|
19189 |
+
|
19190 |
+
|
19191 |
+
|
19192 |
+
|
19193 |
+
|
19194 |
+
|
19195 |
+
|
19196 |
+
|
19197 |
+
|
19198 |
+
|
19199 |
+
|
19200 |
+
|
19201 |
+
|
19202 |
+
|
19203 |
+
|
19204 |
+
|
19205 |
+
|
19206 |
+
|
19207 |
+
|
19208 |
+
|
19209 |
+
|
19210 |
+
|
19211 |
+
|
19212 |
+
|
19213 |
+
|
19214 |
+
|
19215 |
+
|
19216 |
+
|
19217 |
+
|
19218 |
+
|
19219 |
+
|
19220 |
+
|
19221 |
+
|
19222 |
+
|
19223 |
+
|
19224 |
+
|
19225 |
+
|
19226 |
+
|
19227 |
+
|
19228 |
+
|
19229 |
+
|
19230 |
+
|
19231 |
+
|
19232 |
+
|
19233 |
+
|
19234 |
+
|
19235 |
+
|
19236 |
+
|
19237 |
+
|
19238 |
+
|
19239 |
+
|
19240 |
+
|
19241 |
+
|
19242 |
+
|
19243 |
+
|
19244 |
+
|
19245 |
+
|
19246 |
+
|
19247 |
+
|
19248 |
+
|
19249 |
+
|
19250 |
+
|
19251 |
+
|
19252 |
+
|
19253 |
+
|
19254 |
+
|
19255 |
+
|
19256 |
+
|
19257 |
+
|
19258 |
+
|
19259 |
+
|
19260 |
+
|
19261 |
+
|
19262 |
+
|
19263 |
+
|
19264 |
+
|
19265 |
+
|
19266 |
+
|
19267 |
+
|
19268 |
+
|
19269 |
+
|
19270 |
+
|
19271 |
+
|
19272 |
+
|
19273 |
+
|
19274 |
+
|
19275 |
+
|
19276 |
+
|
19277 |
+
|
19278 |
+
|
19279 |
+
|
19280 |
+
|
19281 |
+
|
19282 |
+
|
19283 |
+
|
19284 |
+
|
19285 |
+
|
19286 |
+
|
19287 |
+
|
19288 |
+
|
19289 |
+
|
19290 |
+
|
19291 |
+
|
19292 |
+
|
19293 |
+
|
19294 |
+
|
19295 |
+
|
19296 |
+
|
19297 |
+
|
19298 |
+
|
19299 |
+
|
19300 |
+
|
19301 |
+
|
19302 |
+
|
19303 |
+
|
19304 |
+
|
19305 |
+
|
19306 |
+
|
19307 |
+
|
19308 |
+
|
19309 |
+
|
19310 |
+
|
19311 |
+
|
19312 |
+
|
19313 |
+
|
19314 |
+
|
19315 |
+
|
19316 |
+
|
19317 |
+
|
19318 |
+
|
19319 |
+
|
19320 |
+
|
19321 |
+
|
19322 |
+
|
19323 |
+
|
19324 |
+
|
19325 |
+
|
19326 |
+
|
19327 |
+
|
19328 |
+
|
19329 |
+
|
19330 |
+
|
19331 |
+
|
19332 |
+
|
19333 |
+
|
19334 |
+
|
19335 |
+
|
19336 |
+
|
19337 |
+
|
19338 |
+
|
19339 |
+
|
19340 |
+
|
19341 |
+
|
19342 |
+
|
19343 |
+
|
19344 |
+
|
19345 |
+
|
19346 |
+
|
19347 |
+
|
19348 |
+
|
19349 |
+
|
19350 |
+
|
19351 |
+
|
19352 |
+
|
19353 |
+
|
19354 |
+
|
19355 |
+
|
19356 |
+
|
19357 |
+
|
19358 |
+
|
19359 |
+
|
19360 |
+
|
19361 |
+
|
19362 |
+
|
19363 |
+
|
19364 |
+
|
19365 |
+
|
19366 |
+
|
19367 |
+
|
19368 |
+
|
19369 |
+
|
19370 |
+
|
19371 |
+
|
19372 |
+
|
19373 |
+
|
19374 |
+
|
19375 |
+
|
19376 |
+
|
19377 |
+
|
19378 |
+
|
19379 |
+
|
19380 |
+
|
19381 |
+
|
19382 |
+
|
19383 |
+
|
19384 |
+
|
19385 |
+
|
19386 |
+
|
19387 |
+
|
19388 |
+
|
19389 |
+
|
19390 |
+
|
19391 |
+
|
19392 |
+
|
19393 |
+
|
19394 |
+
|
19395 |
+
|
19396 |
+
|
19397 |
+
|
19398 |
+
|
19399 |
+
|
19400 |
+
|
19401 |
+
|
19402 |
+
|
19403 |
+
|
19404 |
+
|
19405 |
+
|
19406 |
+
|
19407 |
+
|
19408 |
+
|
19409 |
+
|
19410 |
+
|
19411 |
+
|
19412 |
+
|
19413 |
+
|
19414 |
+
|
19415 |
+
|
19416 |
+
|
19417 |
+
|
19418 |
+
|
19419 |
+
|
19420 |
+
|
19421 |
+
|
19422 |
+
|
19423 |
+
|
19424 |
+
|
19425 |
+
|
19426 |
+
|
19427 |
+
|
19428 |
+
|
19429 |
+
|
19430 |
+
|
19431 |
+
|
19432 |
+
|
19433 |
+
|
19434 |
+
|
19435 |
+
|
19436 |
+
|
19437 |
+
|
19438 |
+
|
19439 |
+
|
19440 |
+
|
19441 |
+
|
19442 |
+
|
19443 |
+
|
19444 |
+
|
19445 |
+
|
19446 |
+
|
19447 |
+
|
19448 |
+
|
19449 |
+
|
19450 |
+
|
19451 |
+
|
19452 |
+
|
19453 |
+
|
19454 |
+
|
19455 |
+
|
19456 |
+
|
19457 |
+
|
19458 |
+
|
19459 |
+
|
19460 |
+
|
19461 |
+
|
19462 |
+
|
19463 |
+
|
19464 |
+
|
19465 |
+
|
19466 |
+
|
19467 |
+
|
19468 |
+
|
19469 |
+
|
19470 |
+
|
19471 |
+
|
19472 |
+
|
19473 |
+
|
19474 |
+
|
19475 |
+
|
19476 |
+
|
19477 |
+
|
19478 |
+
|
19479 |
+
|
19480 |
+
|
19481 |
+
|
19482 |
+
|
19483 |
+
|
19484 |
+
|
19485 |
+
|
19486 |
+
|
19487 |
+
|
19488 |
+
|
19489 |
+
|
19490 |
+
|
19491 |
+
|
19492 |
+
|
19493 |
+
|
19494 |
+
|
19495 |
+
|
19496 |
+
|
19497 |
+
|
19498 |
+
|
19499 |
+
|
19500 |
+
|
19501 |
+
|
19502 |
+
|
19503 |
+
|
19504 |
+
|
19505 |
+
|
19506 |
+
|
19507 |
+
|
19508 |
+
|
19509 |
+
|
19510 |
+
|
19511 |
+
|
19512 |
+
|
19513 |
+
|
19514 |
+
|
19515 |
+
|
19516 |
+
|
19517 |
+
|
19518 |
+
|
19519 |
+
|
19520 |
+
|
19521 |
+
|
19522 |
+
|
19523 |
+
|
19524 |
+
|
19525 |
+
|
19526 |
+
|
19527 |
+
|
19528 |
+
|
19529 |
+
|
19530 |
+
|
19531 |
+
|
19532 |
+
|
19533 |
+
|
19534 |
+
|
19535 |
+
|
19536 |
+
|
19537 |
+
|
19538 |
+
|
19539 |
+
|
19540 |
+
|
19541 |
+
|
19542 |
+
|
19543 |
+
|
19544 |
+
|
19545 |
+
|
19546 |
+
|
19547 |
+
|
19548 |
+
|
19549 |
+
|
19550 |
+
|
19551 |
+
|
19552 |
+
|
19553 |
+
|
19554 |
+
|
19555 |
+
|
19556 |
+
|
19557 |
+
|
19558 |
+
|
19559 |
+
|
19560 |
+
|
19561 |
+
|
19562 |
+
|
19563 |
+
|
19564 |
+
|
19565 |
+
|
19566 |
+
|
19567 |
+
|
19568 |
+
|
19569 |
+
|
19570 |
+
|
19571 |
+
|
19572 |
+
|
19573 |
+
|
19574 |
+
|
19575 |
+
|
19576 |
+
|
19577 |
+
|
19578 |
+
|
19579 |
+
|
19580 |
+
|
19581 |
+
|
19582 |
+
|
19583 |
+
|
19584 |
+
|
19585 |
+
|
19586 |
+
|
19587 |
+
|
19588 |
+
|
19589 |
+
|
19590 |
+
|
19591 |
+
|
19592 |
+
|
19593 |
+
|
19594 |
+
|
19595 |
+
|
19596 |
+
|
19597 |
+
|
19598 |
+
|
19599 |
+
|
19600 |
+
|
19601 |
+
|
19602 |
+
|
19603 |
+
|
19604 |
+
|
19605 |
+
|
19606 |
+
|
19607 |
+
|
19608 |
+
|
19609 |
+
|
19610 |
+
|
19611 |
+
|
19612 |
+
|
19613 |
+
|
19614 |
+
|
19615 |
+
|
19616 |
+
|
19617 |
+
|
19618 |
+
|
19619 |
+
|
19620 |
+
|
19621 |
+
|
19622 |
+
|
19623 |
+
|
19624 |
+
|
19625 |
+
|
19626 |
+
|
19627 |
+
|
19628 |
+
|
19629 |
+
|
19630 |
+
|
19631 |
+
|
19632 |
+
|
19633 |
+
|
19634 |
+
|
19635 |
+
|
19636 |
+
|
19637 |
+
|
19638 |
+
|
19639 |
+
|
19640 |
+
|
19641 |
+
|
19642 |
+
|
19643 |
+
|
19644 |
+
|
19645 |
+
|
19646 |
+
|
19647 |
+
|
19648 |
+
|
19649 |
+
|
19650 |
+
|
19651 |
+
|
19652 |
+
|
19653 |
+
|
19654 |
+
|
19655 |
+
|
19656 |
+
|
19657 |
+
|
19658 |
+
|
19659 |
+
|
19660 |
+
|
19661 |
+
|
19662 |
+
|
19663 |
+
|
19664 |
+
|
19665 |
+
|
19666 |
+
|
19667 |
+
|
19668 |
+
|
19669 |
+
|
19670 |
+
|
19671 |
+
|
19672 |
+
|
19673 |
+
|
19674 |
+
|
19675 |
+
|
19676 |
+
|
19677 |
+
|
19678 |
+
|
19679 |
+
|
19680 |
+
|
19681 |
+
|
19682 |
+
|
19683 |
+
|
19684 |
+
|
19685 |
+
|
19686 |
+
|
19687 |
+
|
19688 |
+
|
19689 |
+
|
19690 |
+
|
19691 |
+
|
19692 |
+
|
19693 |
+
|
19694 |
+
|
19695 |
+
|
19696 |
+
|
19697 |
+
|
19698 |
+
|
19699 |
+
|
19700 |
+
|
19701 |
+
|
19702 |
+
|
19703 |
+
|
19704 |
+
|
19705 |
+
|
19706 |
+
Step... (28000/50000 | Loss: 1.627186894416809, Acc: 0.671392560005188): 58%|████████████████▊ | 29000/50000 [11:35:03<8:36:33, 1.48s/it]
|
19707 |
+
Step... (28500 | Loss: 1.738811731338501, Learning Rate: 0.00026060608797706664)
|
19708 |
+
Step... (29000 | Loss: 1.5798612833023071, Learning Rate: 0.00025454547721892595)
|
19709 |
+
|
19710 |
+
|
19711 |
+
|
19712 |
+
|
19713 |
+
|
19714 |
+
|
19715 |
+
|
19716 |
+
|
19717 |
+
|
19718 |
+
|
19719 |
+
|
19720 |
+
|
19721 |
+
[13:43:27] - INFO - __main__ - Saving checkpoint at 29000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
|
19722 |
+
All Flax model weights were used when initializing RobertaForMaskedLM.
|
19723 |
+
Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
|
19724 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
19725 |
+
|
19726 |
+
|
19727 |
+
|
19728 |
+
|
19729 |
+
|
19730 |
+
|
19731 |
+
|
19732 |
+
|
19733 |
+
|
19734 |
+
|
19735 |
+
|
19736 |
+
|
19737 |
+
|
19738 |
+
|
19739 |
+
|
19740 |
+
|
19741 |
+
|
19742 |
+
|
19743 |
+
|
19744 |
+
|
19745 |
|
19746 |
|
19747 |
|
wandb/run-20210726_001233-17u6inbn/files/wandb-summary.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"global_step":
|
|
|
1 |
+
{"global_step": 29000, "_timestamp": 1627306978.644497, "train_time": 1608587.125, "train_learning_rate": 0.00025454547721892595, "_step": 57826, "train_loss": 1.61568021774292, "eval_accuracy": 0.671392560005188, "eval_loss": 1.627186894416809}
|
wandb/run-20210726_001233-17u6inbn/logs/debug-internal.log
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94c5b08086a46b44790fad9ed823f1a906f51889fca276b7086bf41d20a3a8ce
|
3 |
+
size 22872190
|
wandb/run-20210726_001233-17u6inbn/run-17u6inbn.wandb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f83135fb6bd2b5e62d5771f3ca67b4afa426518c31fb39f682e6d326f6393e00
|
3 |
+
size 11452146
|