## costa stuff model_name: sophiex/pythia-2.8b-sft_hh_rlhf # model_revision: null dataset_name: sophiex/hh-rlhf tokenizer_name: EleutherAI/pythia-2.8b-deduped prompt_field: prompt eval_split: test max_prompt_length: 256 max_target_length: 256 max_length: 512 lr_scheduler_type: cosine ## hub stuff push_to_hub: True push_to_hub_organization: mnoukhov ## training stuff save_strategy: steps gold_eval: ppl gold_dataset_name: sophiex/hh-rlhf gold_target_field: chosen gold_eval_split: test eval_steps: 0.2 save_steps: 0.2 beta: 0.1 max_steps: -1 num_train_epochs: 1 load_in_8bit: False bf16: False fp16: True learning_rate: 1e-5 use_peft: True lora_r: 16 lora_alpha: 32 lora_dropout: 0. gradient_accumulation_steps: 4 per_device_train_batch_size: 4 per_device_eval_batch_size: 4