{ "bf16": true, "dataset_eval_split": "validation", "dataset_name": "vwxyzjn/summarize_from_feedback_tldr_3_filtered_oai_preprocessing_1706381144", "ddp_find_unused_parameters": false, "eval_steps": 0.2, "evaluation_strategy": "steps", "git": "7906781", "gradient_accumulation_steps": 4, "gradient_checkpointing": false, "hub_model_id": "mnoukhov/pythia410m-sft-tldr", "learning_rate": "3e-6", "logging_steps": 100, "lr_scheduler_type": "cosine", "max_seq_length": 580, "model_name": "EleutherAI/pythia-410m-deduped", "name": "newsft_pythia410m_tldr.yml", "num_train_epochs": 1, "per_device_eval_batch_size": 8, "per_device_train_batch_size": 32, "push_to_hub": true, "report_to": "wandb", "task_type": "tldr" }