pretrain model
Browse files
scripts/pretrain-model.yaml
CHANGED
@@ -11,7 +11,7 @@ model_config:
|
|
11 |
vocab_size: 32768
|
12 |
block_size: 8192
|
13 |
n_layer: 32
|
14 |
-
n_head:
|
15 |
head_size: 64
|
16 |
n_embd: 768
|
17 |
n_query_groups: 4
|
@@ -69,7 +69,7 @@ train:
|
|
69 |
global_batch_size: 512
|
70 |
|
71 |
# Number of samples per data-parallel rank (type: int, default: 4)
|
72 |
-
micro_batch_size:
|
73 |
|
74 |
# Number of iterations with learning rate warmup active (type: int, default: 2000)
|
75 |
lr_warmup_steps: 0
|
|
|
11 |
vocab_size: 32768
|
12 |
block_size: 8192
|
13 |
n_layer: 32
|
14 |
+
n_head: 16
|
15 |
head_size: 64
|
16 |
n_embd: 768
|
17 |
n_query_groups: 4
|
|
|
69 |
global_batch_size: 512
|
70 |
|
71 |
# Number of samples per data-parallel rank (type: int, default: 4)
|
72 |
+
micro_batch_size: 32
|
73 |
|
74 |
# Number of iterations with learning rate warmup active (type: int, default: 2000)
|
75 |
lr_warmup_steps: 0
|