readme
Browse files- README.md +2 -30
- events.out.tfevents.1639128562.t1v-n-358ff5d1-w-0.686981.3.v2 +3 -0
- events.out.tfevents.1639128677.t1v-n-358ff5d1-w-0.688351.3.v2 +3 -0
- events.out.tfevents.1639128772.t1v-n-358ff5d1-w-0.689734.3.v2 +3 -0
- events.out.tfevents.1639128961.t1v-n-358ff5d1-w-0.691429.3.v2 +3 -0
- events.out.tfevents.1639129025.t1v-n-358ff5d1-w-0.692738.3.v2 +3 -0
- events.out.tfevents.1639129541.t1v-n-358ff5d1-w-0.694454.3.v2 +3 -0
- run.sh +3 -3
- run_t5_mlm_flax.py +1 -1
- run_t5_mlm_flax_streaming.py +1 -5
README.md
CHANGED
@@ -8,35 +8,7 @@ datasets:
|
|
8 |
---
|
9 |
# 🇳🇴 Norwegian T5 Base model Trained on the NCC🇳🇴
|
10 |
|
11 |
-
This is a Norwegian T5-base model trained on the Norwegian Colossal Corpus (NCC) on a TPU v3-8.
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
The following setting were used in training:
|
16 |
-
```bash
|
17 |
-
./run_t5_mlm_flax_streaming.py \
|
18 |
-
--output_dir="./" \
|
19 |
-
--model_type="t5" \
|
20 |
-
--config_name="./" \
|
21 |
-
--tokenizer_name="./" \
|
22 |
-
--dataset_name="pere/norwegian_colossal_corpus_v2_short100k" \
|
23 |
-
--max_seq_length="512" \
|
24 |
-
--weight_decay="0.01" \
|
25 |
-
--per_device_train_batch_size="32" \
|
26 |
-
--per_device_eval_batch_size="32" \
|
27 |
-
--learning_rate="8e-3" \
|
28 |
-
--warmup_steps="5000" \
|
29 |
-
--overwrite_output_dir \
|
30 |
-
--cache_dir /mnt/disks/flaxdisk/cache/ \
|
31 |
-
--num_train_epochs="5" \
|
32 |
-
--adam_beta1="0.9" \
|
33 |
-
--adam_beta2="0.98" \
|
34 |
-
--logging_steps="500" \
|
35 |
-
--num_train_steps="1000000" \
|
36 |
-
--num_eval_samples="5000" \
|
37 |
-
--save_steps="5000" \
|
38 |
-
--eval_steps="5000" \
|
39 |
-
--preprocessing_num_workers 96 \
|
40 |
-
--adafactor \
|
41 |
-
--push_to_hub
|
42 |
```
|
|
|
8 |
---
|
9 |
# 🇳🇴 Norwegian T5 Base model Trained on the NCC🇳🇴
|
10 |
|
11 |
+
This is a Norwegian T5-base model trained on the Norwegian Colossal Corpus (NCC) on a TPU v3-8.
|
12 |
|
13 |
+
This model is currently training. It will finish in January 2022. Please do not use yet..
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
```
|
events.out.tfevents.1639128562.t1v-n-358ff5d1-w-0.686981.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5dfce941b39945a6d6d07c566fd3bc9b6fac8b6004009113d225a917c03538e7
|
3 |
+
size 40
|
events.out.tfevents.1639128677.t1v-n-358ff5d1-w-0.688351.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8932ca5995a80bb77d76b41daf7a8a4d63d987473564afc7e00a9d10158be496
|
3 |
+
size 40
|
events.out.tfevents.1639128772.t1v-n-358ff5d1-w-0.689734.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92f6bb44edb94beb130d22c68fed5473bfea890e3f0770e698d0786b3ae1bb84
|
3 |
+
size 40
|
events.out.tfevents.1639128961.t1v-n-358ff5d1-w-0.691429.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce6c72f09124acb4634a23ea7eb63aec00eba13595fe0887a7ef8116c5361376
|
3 |
+
size 40
|
events.out.tfevents.1639129025.t1v-n-358ff5d1-w-0.692738.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46c8eace8366230b32c94f9c7986a751c6faeb2da07e46805c33cf50072349ba
|
3 |
+
size 40
|
events.out.tfevents.1639129541.t1v-n-358ff5d1-w-0.694454.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbabad7dcae1ed7eeaf7462fd304baeae84d3fe85bf4eb6371738109d9654c7a
|
3 |
+
size 40
|
run.sh
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
./run_t5_mlm_flax_streaming.py \
|
2 |
-
--output_dir="
|
3 |
--model_type="t5" \
|
4 |
-
--config_name="./" \
|
5 |
-
--tokenizer_name="
|
6 |
--dataset_name="NbAiLab/nbailab_extended" \
|
7 |
--max_seq_length="512" \
|
8 |
--weight_decay="0.01" \
|
|
|
1 |
./run_t5_mlm_flax_streaming.py \
|
2 |
+
--output_dir="." \
|
3 |
--model_type="t5" \
|
4 |
+
--config_name="./config.json" \
|
5 |
+
--tokenizer_name="." \
|
6 |
--dataset_name="NbAiLab/nbailab_extended" \
|
7 |
--max_seq_length="512" \
|
8 |
--weight_decay="0.01" \
|
run_t5_mlm_flax.py
CHANGED
@@ -599,7 +599,7 @@ if __name__ == "__main__":
|
|
599 |
dropout_rngs = jax.random.split(rng, jax.local_device_count())
|
600 |
|
601 |
if model_args.model_name_or_path:
|
602 |
-
model = FlaxT5ForConditionalGeneration.
|
603 |
model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
|
604 |
)
|
605 |
else:
|
|
|
599 |
dropout_rngs = jax.random.split(rng, jax.local_device_count())
|
600 |
|
601 |
if model_args.model_name_or_path:
|
602 |
+
model = FlaxT5ForConditionalGeneration.from_pretrainedu
|
603 |
model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
|
604 |
)
|
605 |
else:
|
run_t5_mlm_flax_streaming.py
CHANGED
@@ -554,17 +554,13 @@ if __name__ == "__main__":
|
|
554 |
rng = jax.random.PRNGKey(training_args.seed)
|
555 |
dropout_rngs = jax.random.split(rng, jax.local_device_count())
|
556 |
|
557 |
-
#Pere changed 13 august
|
558 |
-
#model = FlaxT5ForConditionalGeneration(config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype))
|
559 |
|
560 |
if model_args.model_name_or_path:
|
561 |
model = FlaxT5ForConditionalGeneration.from_pretrained(
|
562 |
model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
|
563 |
)
|
564 |
else:
|
565 |
-
model = FlaxT5ForConditionalGeneration.
|
566 |
-
config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
|
567 |
-
)
|
568 |
|
569 |
|
570 |
# Data collator
|
|
|
554 |
rng = jax.random.PRNGKey(training_args.seed)
|
555 |
dropout_rngs = jax.random.split(rng, jax.local_device_count())
|
556 |
|
|
|
|
|
557 |
|
558 |
if model_args.model_name_or_path:
|
559 |
model = FlaxT5ForConditionalGeneration.from_pretrained(
|
560 |
model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
|
561 |
)
|
562 |
else:
|
563 |
+
model = FlaxT5ForConditionalGeneration(config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype))
|
|
|
|
|
564 |
|
565 |
|
566 |
# Data collator
|