pythia410m-sft-tldr / code /configs /dpo_eval_costa_1b_fp16.yml

Training in progress, step 500

1904ee8 verified 6 months ago

781 Bytes

	mode: eval
	push_to_hub: False
	gold_eval: none
	## costa stuff
	model_name: vwxyzjn/EleutherAI_pythia-1b-deduped__dpo__tldr
	model_revision: dpo__55513__1707379566
	dataset_name: vwxyzjn/summarize_from_feedback_oai_preprocessing_1706381144
	tokenizer_name: EleutherAI/pythia-1b-deduped
	prompt_field: query
	eval_split: validation
	max_prompt_length: 512
	max_target_length: 169
	max_length: 638
	## hub stuff
	push_to_hub_organization: mnoukhov
	## training stuff
	eval_steps: 0.2
	save_steps: 0.2
	beta: 0.5
	max_steps: -1
	num_train_epochs: 2
	load_in_8bit: False
	bf16: False
	fp16: True
	learning_rate: 1e-5
	use_peft: True
	lora_all_linear: True
	lora_r: 8
	lora_alpha: 32
	lora_dropout: 0.05
	gradient_accumulation_steps: 4
	per_device_train_batch_size: 4
	per_device_eval_batch_size: 4
	warmup_steps: 150