mtasic85 commited on
Commit
60e17da
1 Parent(s): 8fa271a

pretrain model

Browse files
Files changed (1) hide show
  1. scripts/pretrain-model.yaml +2 -2
scripts/pretrain-model.yaml CHANGED
@@ -11,7 +11,7 @@ model_config:
11
  vocab_size: 32768
12
  block_size: 8192
13
  n_layer: 32
14
- n_head: 32
15
  head_size: 64
16
  n_embd: 768
17
  n_query_groups: 4
@@ -69,7 +69,7 @@ train:
69
  global_batch_size: 512
70
 
71
  # Number of samples per data-parallel rank (type: int, default: 4)
72
- micro_batch_size: 29
73
 
74
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
75
  lr_warmup_steps: 0
 
11
  vocab_size: 32768
12
  block_size: 8192
13
  n_layer: 32
14
+ n_head: 16
15
  head_size: 64
16
  n_embd: 768
17
  n_query_groups: 4
 
69
  global_batch_size: 512
70
 
71
  # Number of samples per data-parallel rank (type: int, default: 4)
72
+ micro_batch_size: 32
73
 
74
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
75
  lr_warmup_steps: 0