Safetensors
English
llama
sound language model
jan-hq commited on
Commit
d1840ba
1 Parent(s): b7b5f3b

Update training_config.yaml

Browse files
Files changed (1) hide show
  1. training_config.yaml +18 -17
training_config.yaml CHANGED
@@ -20,16 +20,14 @@
20
  tokenizer:
21
  _component_: torchtune.models.llama3.llama3_s_tokenizer
22
  path: ../model_zoo/tokenizer.model
23
- max_seq_len: 1024
24
-
25
  # Dataset
26
  dataset:
27
- _component_: torchtune.datasets.chat_dataset
28
- source: homebrewltd/instruction-speech-whispervq-v2
29
- conversation_style: openai
30
- max_seq_len: 1024
31
  split: train
32
- train_on_input: True
33
 
34
  seed: 42
35
  shuffle: True
@@ -39,31 +37,34 @@ model:
39
  # path: model_zoo/Llama3.1_s_8b_init
40
  checkpointer:
41
  _component_: torchtune.utils.FullModelHFCheckpointerSaveSteps
42
- checkpoint_dir: ../model_zoo/llama3.1-s-base-2024-08-17
43
  checkpoint_files: [
44
- pytorch_model.bin,
 
 
 
45
  ]
46
  recipe_checkpoint: null
47
- output_dir: ../model_zoo/llama3-s-instruct2
48
  model_type: LLAMA3
49
  resume_from_checkpoint: False
50
  save_every_n_steps: 1000
51
  max_checkpoints: 3
52
  # Fine-tuning arguments
53
- batch_size: 8
54
- epochs: 5
55
  max_steps_per_epoch: null
56
- gradient_accumulation_steps: 2
57
  compile: False
58
  # Optimizer and Scheduler
59
  optimizer:
60
  _component_: torch.optim.AdamW #change this to use adam_mini: torchtune.modules.optimizer.Adam_mini
61
- weight_decay: 0.005
62
- lr: 1e-4
63
  fused: True
64
  lr_scheduler:
65
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
66
- num_warmup_steps: 80
67
 
68
  loss:
69
  _component_: torch.nn.CrossEntropyLoss
@@ -85,6 +86,6 @@ ac_mode: 'selective'
85
  metric_logger:
86
  _component_: torchtune.utils.metric_logging.DiskLogger
87
  log_dir: ${output_dir}
88
- output_dir: ../model_zoo/Llama3-instruct2-log/
89
  log_every_n_steps: 1
90
  log_peak_memory_stats: False
 
20
  tokenizer:
21
  _component_: torchtune.models.llama3.llama3_s_tokenizer
22
  path: ../model_zoo/tokenizer.model
23
+ max_seq_len: 512
 
24
  # Dataset
25
  dataset:
26
+ _component_: torchtune.datasets.sound_completion_dataset
27
+ source: jan-hq/raw_audio_with_audio_tokens_for_pretraining_using_Whisper_VQ
28
+ max_seq_len: 512
 
29
  split: train
30
+ column: text
31
 
32
  seed: 42
33
  shuffle: True
 
37
  # path: model_zoo/Llama3.1_s_8b_init
38
  checkpointer:
39
  _component_: torchtune.utils.FullModelHFCheckpointerSaveSteps
40
+ checkpoint_dir: ../model_zoo/Llama3.1_s_8b_init
41
  checkpoint_files: [
42
+ model-00001-of-00004.safetensors,
43
+ model-00002-of-00004.safetensors,
44
+ model-00003-of-00004.safetensors,
45
+ model-00004-of-00004.safetensors,
46
  ]
47
  recipe_checkpoint: null
48
+ output_dir: ../model_zoo/llama3-s
49
  model_type: LLAMA3
50
  resume_from_checkpoint: False
51
  save_every_n_steps: 1000
52
  max_checkpoints: 3
53
  # Fine-tuning arguments
54
+ batch_size: 12
55
+ epochs: 1
56
  max_steps_per_epoch: null
57
+ gradient_accumulation_steps: 4
58
  compile: False
59
  # Optimizer and Scheduler
60
  optimizer:
61
  _component_: torch.optim.AdamW #change this to use adam_mini: torchtune.modules.optimizer.Adam_mini
62
+ weight_decay: 0.01
63
+ lr: 2e-4
64
  fused: True
65
  lr_scheduler:
66
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
67
+ num_warmup_steps: 50
68
 
69
  loss:
70
  _component_: torch.nn.CrossEntropyLoss
 
86
  metric_logger:
87
  _component_: torchtune.utils.metric_logging.DiskLogger
88
  log_dir: ${output_dir}
89
+ output_dir: ../model_zoo/Llama3-sound2-log/
90
  log_every_n_steps: 1
91
  log_peak_memory_stats: False