|
--- |
|
license: mit |
|
datasets: |
|
- iamplus/Orca |
|
--- |
|
|
|
**Base model :** meta-llama/Llama-2-70b-hf |
|
|
|
**Data :** 100k from 1M Gpt-4 Orca data (Dolphin) |
|
|
|
**Training Params :** |
|
``` |
|
batch_size_training: '16' |
|
checkpoint_type: StateDictType.FULL_STATE_DICT |
|
dataset: orca_dolphin_100k_gpt4 |
|
dist_checkpoint_folder: fine-tuned |
|
dist_checkpoint_root_folder: model_checkpoints |
|
enable_fsdp: 'True' |
|
freeze_layers: 'False' |
|
fsdp_activation_checkpointing: 'True' |
|
gamma: '0.85' |
|
low_cpu_fsdp: 'True' |
|
lr: 1e-05 |
|
micro_batch_size: '16' |
|
mixed_precision: 'True' |
|
model_name: meta-llama/Llama-2-70b-hf |
|
num_epochs: '1' |
|
num_freeze_layers: '1' |
|
num_workers_dataloader: '1' |
|
one_gpu: 'False' |
|
optimizer: anyprecision |
|
output_dir: ~/llama-recipes-70b/output |
|
peft_method: lora |
|
pure_bf16: 'True' |
|
quantization: 'False' |
|
run_validation: 'True' |
|
save_model: 'True' |
|
save_optimizer: 'True' |
|
seed: '42' |
|
sharding_strategy: ShardingStrategy.FULL_SHARD |
|
use_fast_kernels: (False,) |
|
use_fp16: 'False' |
|
use_peft: 'False' |
|
val_batch_size: '16' |
|
weight_decay: '0.0' |
|
``` |