|
{ |
|
"dataset_reader": { |
|
"type": "snli", |
|
"token_indexers": { |
|
"tokens": { |
|
"type": "pretrained_transformer", |
|
"max_length": 512, |
|
"model_name": "distilroberta-base" |
|
} |
|
}, |
|
"tokenizer": { |
|
"type": "pretrained_transformer", |
|
"add_special_tokens": false, |
|
"model_name": "distilroberta-base" |
|
} |
|
}, |
|
"model": { |
|
"type": "basic_classifier", |
|
"dropout": 0.1, |
|
"feedforward": { |
|
"activations": "tanh", |
|
"hidden_dims": 768, |
|
"input_dim": 768, |
|
"num_layers": 1 |
|
}, |
|
"namespace": "tags", |
|
"seq2vec_encoder": { |
|
"type": "cls_pooler", |
|
"embedding_dim": 768 |
|
}, |
|
"text_field_embedder": { |
|
"token_embedders": { |
|
"tokens": { |
|
"type": "pretrained_transformer", |
|
"max_length": 512, |
|
"model_name": "distilroberta-base" |
|
} |
|
} |
|
} |
|
}, |
|
"train_data_path": "data/mnli/multinli_1.0/multinli_1.0_train_10_perc.jsonl", |
|
"validation_data_path": "data/mnli/multinli_1.0/multinli_1.0_dev_matched_10_perc.jsonl", |
|
"test_data_path": "data/mnli/multinli_1.0/multinli_1.0_dev_mismatched_10_perc.jsonl", |
|
"trainer": { |
|
"learning_rate_scheduler": { |
|
"type": "slanted_triangular", |
|
"cut_frac": 0.06 |
|
}, |
|
"num_epochs": 10, |
|
"optimizer": { |
|
"type": "huggingface_adamw", |
|
"lr": 2e-06, |
|
"weight_decay": 0.1 |
|
}, |
|
"validation_metric": "+accuracy" |
|
}, |
|
"data_loader": { |
|
"batch_sampler": { |
|
"type": "bucket", |
|
"batch_size": 16 |
|
} |
|
} |
|
} |