model_name: mnoukhov/pythia410m-tldr-sft dataset_name: mnoukhov/openai_summarize_comparisons_tldrprompt_relabel1b gold_model_name: mnoukhov/pythia1b-sft-rm-tldrprompt pseudo_dataset_name: mnoukhov/openai_summarize_generated_10k beta: 0.5 num_train_epochs: 5 eval_steps: 750 load_in_8bit: False bf16: False fp16: True learning_rate: 1e-5 use_peft: True lora_all_linear: True lora_r: 8 lora_alpha: 32 lora_dropout: 0.05 gradient_accumulation_steps: 4 per_device_train_batch_size: 4 warmup_steps: 150