pythia410m-sft-tldr / code /configs /create_rlhf_410m.yml
mnoukhov's picture
Training in progress, step 500
1904ee8 verified
raw
history blame contribute delete
310 Bytes
output_dir: /home/toolkit/huggingface/openai_summarize_tldr_rbaseline
train_split: train
eval_split: valid[:2000]
###
model_name: mnoukhov/pythia410m-tldr-sft-rm-adapter
new_column_name: reward_baseline
dataset_name: CarperAI/openai_summarize_tldr
load_in_8bit: False
fp16: True
batch_size: 32
max_length: 560