Update training_config.yaml
Browse files- training_config.yaml +18 -17
training_config.yaml
CHANGED
@@ -20,16 +20,14 @@
|
|
20 |
tokenizer:
|
21 |
_component_: torchtune.models.llama3.llama3_s_tokenizer
|
22 |
path: ../model_zoo/tokenizer.model
|
23 |
-
max_seq_len:
|
24 |
-
|
25 |
# Dataset
|
26 |
dataset:
|
27 |
-
_component_: torchtune.datasets.
|
28 |
-
source:
|
29 |
-
|
30 |
-
max_seq_len: 1024
|
31 |
split: train
|
32 |
-
|
33 |
|
34 |
seed: 42
|
35 |
shuffle: True
|
@@ -39,31 +37,34 @@ model:
|
|
39 |
# path: model_zoo/Llama3.1_s_8b_init
|
40 |
checkpointer:
|
41 |
_component_: torchtune.utils.FullModelHFCheckpointerSaveSteps
|
42 |
-
checkpoint_dir: ../model_zoo/
|
43 |
checkpoint_files: [
|
44 |
-
|
|
|
|
|
|
|
45 |
]
|
46 |
recipe_checkpoint: null
|
47 |
-
output_dir: ../model_zoo/llama3-s
|
48 |
model_type: LLAMA3
|
49 |
resume_from_checkpoint: False
|
50 |
save_every_n_steps: 1000
|
51 |
max_checkpoints: 3
|
52 |
# Fine-tuning arguments
|
53 |
-
batch_size:
|
54 |
-
epochs:
|
55 |
max_steps_per_epoch: null
|
56 |
-
gradient_accumulation_steps:
|
57 |
compile: False
|
58 |
# Optimizer and Scheduler
|
59 |
optimizer:
|
60 |
_component_: torch.optim.AdamW #change this to use adam_mini: torchtune.modules.optimizer.Adam_mini
|
61 |
-
weight_decay: 0.
|
62 |
-
lr:
|
63 |
fused: True
|
64 |
lr_scheduler:
|
65 |
_component_: torchtune.modules.get_cosine_schedule_with_warmup
|
66 |
-
num_warmup_steps:
|
67 |
|
68 |
loss:
|
69 |
_component_: torch.nn.CrossEntropyLoss
|
@@ -85,6 +86,6 @@ ac_mode: 'selective'
|
|
85 |
metric_logger:
|
86 |
_component_: torchtune.utils.metric_logging.DiskLogger
|
87 |
log_dir: ${output_dir}
|
88 |
-
output_dir: ../model_zoo/Llama3-
|
89 |
log_every_n_steps: 1
|
90 |
log_peak_memory_stats: False
|
|
|
20 |
tokenizer:
|
21 |
_component_: torchtune.models.llama3.llama3_s_tokenizer
|
22 |
path: ../model_zoo/tokenizer.model
|
23 |
+
max_seq_len: 512
|
|
|
24 |
# Dataset
|
25 |
dataset:
|
26 |
+
_component_: torchtune.datasets.sound_completion_dataset
|
27 |
+
source: jan-hq/raw_audio_with_audio_tokens_for_pretraining_using_Whisper_VQ
|
28 |
+
max_seq_len: 512
|
|
|
29 |
split: train
|
30 |
+
column: text
|
31 |
|
32 |
seed: 42
|
33 |
shuffle: True
|
|
|
37 |
# path: model_zoo/Llama3.1_s_8b_init
|
38 |
checkpointer:
|
39 |
_component_: torchtune.utils.FullModelHFCheckpointerSaveSteps
|
40 |
+
checkpoint_dir: ../model_zoo/Llama3.1_s_8b_init
|
41 |
checkpoint_files: [
|
42 |
+
model-00001-of-00004.safetensors,
|
43 |
+
model-00002-of-00004.safetensors,
|
44 |
+
model-00003-of-00004.safetensors,
|
45 |
+
model-00004-of-00004.safetensors,
|
46 |
]
|
47 |
recipe_checkpoint: null
|
48 |
+
output_dir: ../model_zoo/llama3-s
|
49 |
model_type: LLAMA3
|
50 |
resume_from_checkpoint: False
|
51 |
save_every_n_steps: 1000
|
52 |
max_checkpoints: 3
|
53 |
# Fine-tuning arguments
|
54 |
+
batch_size: 12
|
55 |
+
epochs: 1
|
56 |
max_steps_per_epoch: null
|
57 |
+
gradient_accumulation_steps: 4
|
58 |
compile: False
|
59 |
# Optimizer and Scheduler
|
60 |
optimizer:
|
61 |
_component_: torch.optim.AdamW #change this to use adam_mini: torchtune.modules.optimizer.Adam_mini
|
62 |
+
weight_decay: 0.01
|
63 |
+
lr: 2e-4
|
64 |
fused: True
|
65 |
lr_scheduler:
|
66 |
_component_: torchtune.modules.get_cosine_schedule_with_warmup
|
67 |
+
num_warmup_steps: 50
|
68 |
|
69 |
loss:
|
70 |
_component_: torch.nn.CrossEntropyLoss
|
|
|
86 |
metric_logger:
|
87 |
_component_: torchtune.utils.metric_logging.DiskLogger
|
88 |
log_dir: ${output_dir}
|
89 |
+
output_dir: ../model_zoo/Llama3-sound2-log/
|
90 |
log_every_n_steps: 1
|
91 |
log_peak_memory_stats: False
|