Safetensors
English
llama
sound language model
jan-hq commited on
Commit
55b3a34
1 Parent(s): d7fba62

Delete 8B_full.yaml

Browse files
Files changed (1) hide show
  1. 8B_full.yaml +0 -91
8B_full.yaml DELETED
@@ -1,91 +0,0 @@
1
- # Config for multi-device full finetuning in full_finetune_distributed.py
2
- # using a Llama3 8B Instruct model
3
- #
4
- # This config assumes that you've run the following command before launching
5
- # this run:
6
- # tune download meta-llama/Meta-Llama-3-8B-Instruct --output-dir /tmp/Meta-Llama-3-8B-Instruct --hf-token <HF_TOKEN>
7
- #
8
- # To launch on 4 devices, run the following command from root:
9
- # tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full
10
- #
11
- # You can add specific overrides through the command line. For example
12
- # to override the checkpointer directory while launching training
13
- # you can run:
14
- # tune run --nproc_per_node 4 full_finetune_distributed --config llama3/8B_full checkpointer.checkpoint_dir=<YOUR_CHECKPOINT_DIR>
15
- #
16
- # This config works best when the model is being fine-tuned on 2+ GPUs.
17
- # Single device full finetuning requires more memory optimizations. It's
18
- # best to use 8B_full_single_device.yaml for those cases
19
- # Tokenizer
20
- tokenizer:
21
- _component_: torchtune.models.llama3.llama3_s_tokenizer
22
- path: ../model_zoo/tokenizer.model
23
- max_seq_len: 512
24
- # Dataset
25
- dataset:
26
- _component_: torchtune.datasets.sound_completion_dataset
27
- source: jan-hq/raw_audio_with_audio_tokens_for_pretraining_using_Whisper_VQ
28
- max_seq_len: 512
29
- split: train
30
- column: text
31
-
32
- seed: 42
33
- shuffle: True
34
- # Model Arguments
35
- model:
36
- _component_: torchtune.models.llama3_1.llama3_1_s_8b
37
- # path: model_zoo/Llama3.1_s_8b_init
38
- checkpointer:
39
- _component_: torchtune.utils.FullModelHFCheckpointerSaveSteps
40
- checkpoint_dir: ../model_zoo/Llama3.1_s_8b_init
41
- checkpoint_files: [
42
- model-00001-of-00004.safetensors,
43
- model-00002-of-00004.safetensors,
44
- model-00003-of-00004.safetensors,
45
- model-00004-of-00004.safetensors,
46
- ]
47
- recipe_checkpoint: null
48
- output_dir: ../model_zoo/llama3-s
49
- model_type: LLAMA3
50
- resume_from_checkpoint: False
51
- save_every_n_steps: 1000
52
- max_checkpoints: 3
53
- # Fine-tuning arguments
54
- batch_size: 12
55
- epochs: 1
56
- max_steps_per_epoch: null
57
- gradient_accumulation_steps: 4
58
- compile: False
59
- # Optimizer and Scheduler
60
- optimizer:
61
- _component_: torch.optim.AdamW #change this to use adam_mini: torchtune.modules.optimizer.Adam_mini
62
- weight_decay: 0.01
63
- lr: 2e-4
64
- fused: True
65
- lr_scheduler:
66
- _component_: torchtune.modules.get_cosine_schedule_with_warmup
67
- num_warmup_steps: 50
68
-
69
- loss:
70
- _component_: torch.nn.CrossEntropyLoss
71
-
72
- fsdp:
73
- cpu_offload: False
74
-
75
- # Training env
76
- device: cuda
77
- dtype: bf16
78
-
79
- # Memory management
80
- enable_activation_checkpointing: True
81
- memory_efficient_fsdp_wrap: True
82
- ac_mode: 'selective'
83
-
84
-
85
- # Logging
86
- metric_logger:
87
- _component_: torchtune.utils.metric_logging.DiskLogger
88
- log_dir: ${output_dir}
89
- output_dir: ../model_zoo/Llama3-sound2-log/
90
- log_every_n_steps: 1
91
- log_peak_memory_stats: False