pythia410m-sft-tldr / code /configs /dpo_relabel_summarize_generated_1b_dpo.yml
mnoukhov's picture
Training in progress, step 500
1904ee8 verified
raw
history blame
661 Bytes
output_dir: summarize_from_feedback_tldr3_generated_20k_relabel_pythia1b_dpo_temp0.7_length128
mode: relabel
model_name: vwxyzjn/EleutherAI_pythia-1b-deduped__dpo__tldr
model_revision: dpo__55513__1707379566
ref_model_name: vwxyzjn/EleutherAI_pythia-1b-deduped__sft__tldr
ref_model_revision: sft__55513__1706646024
tokenizer_name: EleutherAI/pythia-1b-deduped
dataset_name: mnoukhov/summarize_from_feedback_tldr3_generated_20k_vllm_pythia1b_dpo_temp0.7_length128
max_prompt_length: 512
max_target_length: 128
max_length: 640
eval_split: train
use_peft: False
beta: 0.5
load_in_8bit: False
bf16: True
fp16: False
per_device_eval_batch_size: 8
warmup_steps: 150