bf16: true cutoff_len: 4096 dataset: gpt4_judge,Judge dataset_dir: data ddp_timeout: 180000000 do_train: true finetuning_type: lora flash_attn: auto gradient_accumulation_steps: 8 include_num_input_tokens_seen: true learning_rate: 5.0e-05 logging_steps: 5 lora_alpha: 16 lora_dropout: 0 lora_rank: 16 lora_target: all lr_scheduler_type: cosine max_grad_norm: 1.0 max_samples: 100000 model_name_or_path: /home/marl/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3.1-8B-Instruct/snapshots/07eb05b21d191a58c577b4a45982fe0c049d0693 num_train_epochs: 10.0 optim: adamw_torch output_dir: saves/LLaMA3-8B-Chat/lora/JudgePierce packing: false per_device_train_batch_size: 2 pissa_convert: true pissa_init: true plot_loss: true preprocessing_num_workers: 16 quantization_method: bitsandbytes report_to: none rope_scaling: linear save_steps: 100 stage: sft template: llama3 use_dora: true use_rslora: true use_unsloth: true warmup_steps: 0