pere commited on
Commit
0efb42b
1 Parent(s): f1564ec
README.md CHANGED
@@ -8,35 +8,7 @@ datasets:
8
  ---
9
  # 🇳🇴 Norwegian T5 Base model Trained on the NCC🇳🇴
10
 
11
- This is a Norwegian T5-base model trained on the Norwegian Colossal Corpus (NCC) on a TPU v3-8. It needs to be finetuned on a specific task before being used for anything.
12
 
13
- Currently the model is training. It is expected that it should be finished by the end of August 2021.
14
-
15
- The following setting were used in training:
16
- ```bash
17
- ./run_t5_mlm_flax_streaming.py \
18
- --output_dir="./" \
19
- --model_type="t5" \
20
- --config_name="./" \
21
- --tokenizer_name="./" \
22
- --dataset_name="pere/norwegian_colossal_corpus_v2_short100k" \
23
- --max_seq_length="512" \
24
- --weight_decay="0.01" \
25
- --per_device_train_batch_size="32" \
26
- --per_device_eval_batch_size="32" \
27
- --learning_rate="8e-3" \
28
- --warmup_steps="5000" \
29
- --overwrite_output_dir \
30
- --cache_dir /mnt/disks/flaxdisk/cache/ \
31
- --num_train_epochs="5" \
32
- --adam_beta1="0.9" \
33
- --adam_beta2="0.98" \
34
- --logging_steps="500" \
35
- --num_train_steps="1000000" \
36
- --num_eval_samples="5000" \
37
- --save_steps="5000" \
38
- --eval_steps="5000" \
39
- --preprocessing_num_workers 96 \
40
- --adafactor \
41
- --push_to_hub
42
  ```
 
8
  ---
9
  # 🇳🇴 Norwegian T5 Base model Trained on the NCC🇳🇴
10
 
11
+ This is a Norwegian T5-base model trained on the Norwegian Colossal Corpus (NCC) on a TPU v3-8.
12
 
13
+ This model is currently training. It will finish in January 2022. Please do not use yet..
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ```
events.out.tfevents.1639128562.t1v-n-358ff5d1-w-0.686981.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dfce941b39945a6d6d07c566fd3bc9b6fac8b6004009113d225a917c03538e7
3
+ size 40
events.out.tfevents.1639128677.t1v-n-358ff5d1-w-0.688351.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8932ca5995a80bb77d76b41daf7a8a4d63d987473564afc7e00a9d10158be496
3
+ size 40
events.out.tfevents.1639128772.t1v-n-358ff5d1-w-0.689734.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92f6bb44edb94beb130d22c68fed5473bfea890e3f0770e698d0786b3ae1bb84
3
+ size 40
events.out.tfevents.1639128961.t1v-n-358ff5d1-w-0.691429.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce6c72f09124acb4634a23ea7eb63aec00eba13595fe0887a7ef8116c5361376
3
+ size 40
events.out.tfevents.1639129025.t1v-n-358ff5d1-w-0.692738.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46c8eace8366230b32c94f9c7986a751c6faeb2da07e46805c33cf50072349ba
3
+ size 40
events.out.tfevents.1639129541.t1v-n-358ff5d1-w-0.694454.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbabad7dcae1ed7eeaf7462fd304baeae84d3fe85bf4eb6371738109d9654c7a
3
+ size 40
run.sh CHANGED
@@ -1,8 +1,8 @@
1
  ./run_t5_mlm_flax_streaming.py \
2
- --output_dir="./" \
3
  --model_type="t5" \
4
- --config_name="./" \
5
- --tokenizer_name="./" \
6
  --dataset_name="NbAiLab/nbailab_extended" \
7
  --max_seq_length="512" \
8
  --weight_decay="0.01" \
 
1
  ./run_t5_mlm_flax_streaming.py \
2
+ --output_dir="." \
3
  --model_type="t5" \
4
+ --config_name="./config.json" \
5
+ --tokenizer_name="." \
6
  --dataset_name="NbAiLab/nbailab_extended" \
7
  --max_seq_length="512" \
8
  --weight_decay="0.01" \
run_t5_mlm_flax.py CHANGED
@@ -599,7 +599,7 @@ if __name__ == "__main__":
599
  dropout_rngs = jax.random.split(rng, jax.local_device_count())
600
 
601
  if model_args.model_name_or_path:
602
- model = FlaxT5ForConditionalGeneration.from_pretrained(
603
  model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
604
  )
605
  else:
 
599
  dropout_rngs = jax.random.split(rng, jax.local_device_count())
600
 
601
  if model_args.model_name_or_path:
602
+ model = FlaxT5ForConditionalGeneration.from_pretrainedu
603
  model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
604
  )
605
  else:
run_t5_mlm_flax_streaming.py CHANGED
@@ -554,17 +554,13 @@ if __name__ == "__main__":
554
  rng = jax.random.PRNGKey(training_args.seed)
555
  dropout_rngs = jax.random.split(rng, jax.local_device_count())
556
 
557
- #Pere changed 13 august
558
- #model = FlaxT5ForConditionalGeneration(config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype))
559
 
560
  if model_args.model_name_or_path:
561
  model = FlaxT5ForConditionalGeneration.from_pretrained(
562
  model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
563
  )
564
  else:
565
- model = FlaxT5ForConditionalGeneration.from_pretrained(
566
- config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
567
- )
568
 
569
 
570
  # Data collator
 
554
  rng = jax.random.PRNGKey(training_args.seed)
555
  dropout_rngs = jax.random.split(rng, jax.local_device_count())
556
 
 
 
557
 
558
  if model_args.model_name_or_path:
559
  model = FlaxT5ForConditionalGeneration.from_pretrained(
560
  model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
561
  )
562
  else:
563
+ model = FlaxT5ForConditionalGeneration(config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype))
 
 
564
 
565
 
566
  # Data collator