tangledgroup
/

tangled-llama-v-128k-base-v0.1

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

mtasic85 commited on 11 days ago

Commit

5121df2

•

1 Parent(s): c721106

pretrain mode

Files changed (1) hide show

scripts/pretrain-model.yaml +2 -2

scripts/pretrain-model.yaml CHANGED Viewed

@@ -19,7 +19,7 @@ model_config:
   norm_class_name: "RMSNorm"
   norm_eps: 1e-05
   mlp_class_name: "LLaMAMLP"
-  intermediate_size: 3584
   rope_base: 500000
   rope_adjustments:
     factor: 32.0
@@ -76,7 +76,7 @@ train:
   # Total number of tokens to train on (type: Optional[int], default: 3000000000000)
   # max_tokens: 3000000000000
-  max_tokens: ??? # ? * 2049 * 5
   # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
   max_steps:

   norm_class_name: "RMSNorm"
   norm_eps: 1e-05
   mlp_class_name: "LLaMAMLP"
+  intermediate_size: 4096
   rope_base: 500000
   rope_adjustments:
     factor: 32.0
   # Total number of tokens to train on (type: Optional[int], default: 3000000000000)
   # max_tokens: 3000000000000
+  max_tokens: 8159107755 # 796399 * 2049 * 5
   # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
   max_steps: