iwslt14_prompt / config.yaml
may-ohta's picture
update README
3aea04f
name: "iwslt14_deenfr_prompt"
joeynmt_version: "2.3.0"
model_dir: "iwslt14_prompt"
use_cuda: True
fp16: True
random_seed: 42
data:
#train: "iwslt14_prompt/train" # cf. https://wit3.fbk.eu/2014-01
#dev: "iwslt14_prompt/dev"
test: "iwslt14_prompt/test.ref.de-en" # ['TED.dev2010', 'TEDX.dev2012', 'TED.tst2010', 'TED.tst2011', 'TED.tst2012']
dataset_type: "tsv"
sample_dev_subset: 500
src:
lang: "src"
max_length: 512
lowercase: False
normalize: False
level: "bpe"
voc_limit: 32000
voc_min_freq: 1
voc_file: "iwslt14_prompt/src_vocab.txt"
tokenizer_type: "sentencepiece"
tokenizer_cfg:
model_file: "iwslt14_prompt/sp.model"
model_type: "unigram"
character_coverage: 1.0
trg:
lang: "trg"
max_length: 512
lowercase: False
normalize: False
level: "bpe"
voc_limit: 32000
voc_min_freq: 1
voc_file: "iwslt14_prompt/trg_vocab.txt"
tokenizer_type: "sentencepiece"
tokenizer_cfg:
model_file: "iwslt14_prompt/sp.model"
model_type: "unigram"
character_coverage: 1.0
special_symbols:
unk_token: "<unk>"
unk_id: 0
pad_token: "<pad>"
pad_id: 1
bos_token: "<s>"
bos_id: 2
eos_token: "</s>"
eos_id: 3
sep_token: "<sep>"
sep_id: 4
lang_tags: ["<de>", "<en>", "<fr>"]
testing:
load_model: "iwslt14_prompt/avg5.ckpt"
n_best: 1
beam_size: 5
beam_alpha: 1.0
batch_size: 32
batch_type: "sentence"
max_output_length: 512
eval_metrics: ["bleu"]
sacrebleu_cfg:
tokenize: "13a"
lowercase: True
training:
#load_model: "iwslt14_prompt/latest.ckpt"
#reset_best_ckpt: True
#reset_scheduler: True
#reset_optimizer: True
#reset_iter_state: True
optimizer: "adamw"
normalization: "tokens"
adam_betas: [0.9, 0.98]
scheduling: "warmupinversesquareroot"
learning_rate_warmup: 10000
learning_rate: 0.0002
learning_rate_min: 0.0000001
weight_decay: 0.001
label_smoothing: 0.1
loss: "crossentropy"
batch_size: 32
batch_type: "sentence"
batch_multiplier: 4
early_stopping_metric: "bleu"
epochs: 50
validation_freq: 1000
logging_freq: 100
overwrite: False
shuffle: True
print_valid_sents: [0, 1, 2, 3]
keep_best_ckpts: 5
model:
initializer: "xavier_uniform"
bias_initializer: "zeros"
init_gain: 1.0
embed_initializer: "xavier_uniform"
embed_init_gain: 1.0
tied_embeddings: True
tied_softmax: True
encoder:
type: "transformer"
num_layers: 6
num_heads: 8
embeddings:
embedding_dim: 1024
scale: True
dropout: 0.1
# typically ff_size = 4 x hidden_size
hidden_size: 1024
ff_size: 4096
dropout: 0.1
layer_norm: "pre"
activation: "relu"
decoder:
type: "transformer"
num_layers: 6
num_heads: 8
embeddings:
embedding_dim: 1024
scale: True
dropout: 0.1
# typically ff_size = 4 x hidden_size
hidden_size: 1024
ff_size: 4096
dropout: 0.1
layer_norm: "pre"
activation: "relu"