model_name: mnoukhov/pythia410m-tldr-sft | |
dataset_name: mnoukhov/openai_summarize_comparisons_tldrprompt_relabel1b | |
gold_model_name: mnoukhov/pythia1b-sft-rm-tldrprompt | |
beta: 0.5 | |
max_steps: 10000 | |
eval_steps: 1000 | |
load_in_8bit: False | |
bf16: False | |
fp16: True | |
learning_rate: 1e-5 | |
use_peft: True | |
lora_all_linear: True | |
lora_r: 8 | |
lora_alpha: 32 | |
lora_dropout: 0.05 | |
gradient_accumulation_steps: 4 | |
per_device_train_batch_size: 4 | |
warmup_steps: 150 | |