model_name: mnoukhov/pythia410m-tldr-sft dataset_name: mnoukhov/openai_summarize_vllm_generated_20k_label410m gold_model_name: mnoukhov/pythia1b-sft-rm-tldrprompt beta: 0.5 max_steps: 10000 eval_steps: 1000 load_in_8bit: False bf16: False fp16: True learning_rate: 1e-5 use_peft: True lora_all_linear: True lora_r: 8 lora_alpha: 32 lora_dropout: 0.05 gradient_accumulation_steps: 4 per_device_train_batch_size: 4 warmup_steps: 150