homebrewltd
/

llama3-s-base-v0.2

sound language model

Model card Files Files and versions Community

jan-hq commited on Aug 19

Commit

d1840ba

•

1 Parent(s): b7b5f3b

Update training_config.yaml

Files changed (1) hide show

training_config.yaml +18 -17

training_config.yaml CHANGED Viewed

@@ -20,16 +20,14 @@
 tokenizer:
   _component_: torchtune.models.llama3.llama3_s_tokenizer
   path: ../model_zoo/tokenizer.model
-  max_seq_len: 1024
 # Dataset
 dataset:
-  _component_: torchtune.datasets.chat_dataset
-  source: homebrewltd/instruction-speech-whispervq-v2
-  conversation_style: openai
-  max_seq_len: 1024
   split: train
-  train_on_input: True
 seed: 42
 shuffle: True
@@ -39,31 +37,34 @@ model:
   # path: model_zoo/Llama3.1_s_8b_init
 checkpointer:
   _component_: torchtune.utils.FullModelHFCheckpointerSaveSteps
-  checkpoint_dir: ../model_zoo/llama3.1-s-base-2024-08-17
   checkpoint_files: [
-    pytorch_model.bin,
   ]
   recipe_checkpoint: null
-  output_dir: ../model_zoo/llama3-s-instruct2
   model_type: LLAMA3
 resume_from_checkpoint: False
 save_every_n_steps: 1000
 max_checkpoints: 3
 # Fine-tuning arguments
-batch_size: 8
-epochs: 5
 max_steps_per_epoch: null
-gradient_accumulation_steps: 2
 compile: False
 # Optimizer and Scheduler
 optimizer:
   _component_: torch.optim.AdamW #change this to use adam_mini: torchtune.modules.optimizer.Adam_mini
-  weight_decay: 0.005
-  lr: 1e-4
   fused: True
 lr_scheduler:
   _component_: torchtune.modules.get_cosine_schedule_with_warmup
-  num_warmup_steps: 80
 loss:
   _component_: torch.nn.CrossEntropyLoss
@@ -85,6 +86,6 @@ ac_mode: 'selective'
 metric_logger:
   _component_: torchtune.utils.metric_logging.DiskLogger
   log_dir: ${output_dir}
-output_dir: ../model_zoo/Llama3-instruct2-log/
 log_every_n_steps: 1
 log_peak_memory_stats: False

 tokenizer:
   _component_: torchtune.models.llama3.llama3_s_tokenizer
   path: ../model_zoo/tokenizer.model
+  max_seq_len: 512
 # Dataset
 dataset:
+  _component_: torchtune.datasets.sound_completion_dataset
+  source: jan-hq/raw_audio_with_audio_tokens_for_pretraining_using_Whisper_VQ
+  max_seq_len: 512
   split: train
+  column: text
 seed: 42
 shuffle: True
   # path: model_zoo/Llama3.1_s_8b_init
 checkpointer:
   _component_: torchtune.utils.FullModelHFCheckpointerSaveSteps
+  checkpoint_dir: ../model_zoo/Llama3.1_s_8b_init
   checkpoint_files: [
+    model-00001-of-00004.safetensors,
+    model-00002-of-00004.safetensors,
+    model-00003-of-00004.safetensors,
+    model-00004-of-00004.safetensors,
   ]
   recipe_checkpoint: null
+  output_dir: ../model_zoo/llama3-s
   model_type: LLAMA3
 resume_from_checkpoint: False
 save_every_n_steps: 1000
 max_checkpoints: 3
 # Fine-tuning arguments
+batch_size: 12
+epochs: 1
 max_steps_per_epoch: null
+gradient_accumulation_steps: 4
 compile: False
 # Optimizer and Scheduler
 optimizer:
   _component_: torch.optim.AdamW #change this to use adam_mini: torchtune.modules.optimizer.Adam_mini
+  weight_decay: 0.01
+  lr: 2e-4
   fused: True
 lr_scheduler:
   _component_: torchtune.modules.get_cosine_schedule_with_warmup
+  num_warmup_steps: 50
 loss:
   _component_: torch.nn.CrossEntropyLoss
 metric_logger:
   _component_: torchtune.utils.metric_logging.DiskLogger
   log_dir: ${output_dir}
+output_dir: ../model_zoo/Llama3-sound2-log/
 log_every_n_steps: 1
 log_peak_memory_stats: False