model_name: mnoukhov/pythia410m-tldr-sft | |
dataset_name: mnoukhov/openai_summarize_comparisons_tldrprompt_relabel1b | |
gold_model_name: mnoukhov/pythia1b-sft-rm-tldrprompt | |
pseudo_dataset_name: mnoukhov/openai_summarize_generated_10k | |
beta: 0.5 | |
num_train_epochs: 5 | |
eval_steps: 750 | |
load_in_8bit: False | |
bf16: False | |
fp16: True | |
learning_rate: 1e-5 | |
use_peft: True | |
lora_all_linear: True | |
lora_r: 8 | |
lora_alpha: 32 | |
lora_dropout: 0.05 | |
gradient_accumulation_steps: 4 | |
per_device_train_batch_size: 4 | |
warmup_steps: 150 | |