Saving weights and logs of epoch 1
Browse files- events.out.tfevents.1625581688.t1v-n-b95d739e-w-0.336670.3.v2 +3 -0
- events.out.tfevents.1625582209.t1v-n-b95d739e-w-0.356659.3.v2 +3 -0
- events.out.tfevents.1625583521.t1v-n-b95d739e-w-0.359108.3.v2 +3 -0
- events.out.tfevents.1625584262.t1v-n-b95d739e-w-0.361208.3.v2 +3 -0
- flax_model.msgpack +3 -0
- nohup.out +0 -0
- run.sh +4 -4
events.out.tfevents.1625581688.t1v-n-b95d739e-w-0.336670.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45533194c2bd2c9db5983f60df0835da8f9ba7d6028f0d549ae7004fd67d352b
|
3 |
+
size 40
|
events.out.tfevents.1625582209.t1v-n-b95d739e-w-0.356659.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d22fdeee3f2124d1ffe54f8925e1ad3b5a17f0bb1142ffe64ad8fbdf097f0ff
|
3 |
+
size 40
|
events.out.tfevents.1625583521.t1v-n-b95d739e-w-0.359108.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b4d8923c28351dcbe1a8151b42d5faeffbf737edbef774cb1d816a5576a7d31
|
3 |
+
size 40
|
events.out.tfevents.1625584262.t1v-n-b95d739e-w-0.361208.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fca1557618740152934c8eeb2b9c21185ad9ab9d6a34b2e82e8d93edf2ab60f1
|
3 |
+
size 3581817
|
flax_model.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6bb01d5359dbefb657b35284b44e1522eebbbbc9ef3ef15f2f2e567624773db
|
3 |
+
size 498796983
|
nohup.out
ADDED
The diff for this file is too large to render.
See raw diff
|
|
run.sh
CHANGED
@@ -4,12 +4,12 @@ python3 run_mlm_flax.py \
|
|
4 |
--model_type="roberta" \
|
5 |
--config_name="./" \
|
6 |
--tokenizer_name="./" \
|
7 |
-
--dataset_name="
|
8 |
-
--dataset_config_name="
|
9 |
--max_seq_length="128" \
|
10 |
--weight_decay="0.01" \
|
11 |
-
--per_device_train_batch_size="
|
12 |
-
--per_device_eval_batch_size="
|
13 |
--learning_rate="3e-4" \
|
14 |
--warmup_steps="1000" \
|
15 |
--overwrite_output_dir \
|
|
|
4 |
--model_type="roberta" \
|
5 |
--config_name="./" \
|
6 |
--tokenizer_name="./" \
|
7 |
+
--dataset_name="oscar" \
|
8 |
+
--dataset_config_name="unshuffled_deduplicated_id" \
|
9 |
--max_seq_length="128" \
|
10 |
--weight_decay="0.01" \
|
11 |
+
--per_device_train_batch_size="128" \
|
12 |
+
--per_device_eval_batch_size="128" \
|
13 |
--learning_rate="3e-4" \
|
14 |
--warmup_steps="1000" \
|
15 |
--overwrite_output_dir \
|