mtasic85 commited on
Commit
24ca5e3
1 Parent(s): 049ca3f
Files changed (1) hide show
  1. scripts/model.yaml +3 -3
scripts/model.yaml CHANGED
@@ -73,7 +73,7 @@ train:
73
 
74
  # Total number of tokens to train on (type: Optional[int], default: 3000000000000)
75
  # max_tokens: 3000000000000
76
- max_tokens: 12757004469 # 129767 * 32769 * 3
77
 
78
  # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
79
  max_steps:
@@ -110,9 +110,9 @@ eval:
110
  # Optimizer-related arguments
111
  optimizer:
112
  # class_path: torch.optim.AdamW
113
- # class_path: grokadamw.GrokAdamW
114
  # class_path: bitsandbytes.optim.AdamW8bit
115
- class_path: bitsandbytes.optim.PagedAdamW8bit
116
 
117
  init_args:
118
  # (type: float, default: 0.001)
 
73
 
74
  # Total number of tokens to train on (type: Optional[int], default: 3000000000000)
75
  # max_tokens: 3000000000000
76
+ max_tokens: 4252334823 # 129767 * 32769 * 1
77
 
78
  # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
79
  max_steps:
 
110
  # Optimizer-related arguments
111
  optimizer:
112
  # class_path: torch.optim.AdamW
113
+ class_path: grokadamw.GrokAdamW
114
  # class_path: bitsandbytes.optim.AdamW8bit
115
+ # class_path: bitsandbytes.optim.PagedAdamW8bit
116
 
117
  init_args:
118
  # (type: float, default: 0.001)