model_name: mnoukhov/pythia410m-tldr-sft dataset_name: mnoukhov/openai_summarize_comparisons_tldrprompt_relabel1b train_split: train[:1] gold_model_name: mnoukhov/pythia1b-sft-rm-tldrprompt pseudo_dataset_name: mnoukhov/openai_summarize_generated_20k_relabel_410m_dpo1 beta: 0.5 max_steps: 10000 eval_steps: 1000 load_in_8bit: False bf16: True fp16: False learning_rate: 1e-5 use_peft: True lora_all_linear: True lora_r: 8 lora_alpha: 32 lora_dropout: 0.05 gradient_accumulation_steps: 4 per_device_train_batch_size: 16 warmup_steps: 150