tim-lawson's picture
Push model using huggingface_hub.
693df6b verified
{
"accumulate_grad_batches": 64,
"auxk": 256,
"auxk_coef": 0.03125,
"batch_size": 1,
"dead_steps_threshold": null,
"dead_threshold": 0.001,
"dead_tokens_threshold": 10000000,
"expansion_factor": 64,
"k": 128,
"layers": null,
"lr": 0.0001,
"max_length": 2048,
"model_name": "EleutherAI/pythia-70m-deduped",
"skip_special_tokens": true,
"standardize": true,
"tuned_lens": true
}