w11wo commited on
Commit
6b694b3
1 Parent(s): cd9ad5b

Saving weights and logs of epoch 1

Browse files
events.out.tfevents.1625581688.t1v-n-b95d739e-w-0.336670.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45533194c2bd2c9db5983f60df0835da8f9ba7d6028f0d549ae7004fd67d352b
3
+ size 40
events.out.tfevents.1625582209.t1v-n-b95d739e-w-0.356659.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d22fdeee3f2124d1ffe54f8925e1ad3b5a17f0bb1142ffe64ad8fbdf097f0ff
3
+ size 40
events.out.tfevents.1625583521.t1v-n-b95d739e-w-0.359108.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b4d8923c28351dcbe1a8151b42d5faeffbf737edbef774cb1d816a5576a7d31
3
+ size 40
events.out.tfevents.1625584262.t1v-n-b95d739e-w-0.361208.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fca1557618740152934c8eeb2b9c21185ad9ab9d6a34b2e82e8d93edf2ab60f1
3
+ size 3581817
flax_model.msgpack ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6bb01d5359dbefb657b35284b44e1522eebbbbc9ef3ef15f2f2e567624773db
3
+ size 498796983
nohup.out ADDED
The diff for this file is too large to render. See raw diff
 
run.sh CHANGED
@@ -4,12 +4,12 @@ python3 run_mlm_flax.py \
4
  --model_type="roberta" \
5
  --config_name="./" \
6
  --tokenizer_name="./" \
7
- --dataset_name="mc4" \
8
- --dataset_config_name="id" \
9
  --max_seq_length="128" \
10
  --weight_decay="0.01" \
11
- --per_device_train_batch_size="256" \
12
- --per_device_eval_batch_size="256" \
13
  --learning_rate="3e-4" \
14
  --warmup_steps="1000" \
15
  --overwrite_output_dir \
 
4
  --model_type="roberta" \
5
  --config_name="./" \
6
  --tokenizer_name="./" \
7
+ --dataset_name="oscar" \
8
+ --dataset_config_name="unshuffled_deduplicated_id" \
9
  --max_seq_length="128" \
10
  --weight_decay="0.01" \
11
+ --per_device_train_batch_size="128" \
12
+ --per_device_eval_batch_size="128" \
13
  --learning_rate="3e-4" \
14
  --warmup_steps="1000" \
15
  --overwrite_output_dir \