|
name: "iwslt14_deenfr_prompt" |
|
joeynmt_version: "2.3.0" |
|
model_dir: "iwslt14_prompt" |
|
use_cuda: True |
|
fp16: True |
|
random_seed: 42 |
|
|
|
data: |
|
|
|
|
|
test: "iwslt14_prompt/test.ref.de-en" |
|
dataset_type: "tsv" |
|
sample_dev_subset: 500 |
|
src: |
|
lang: "src" |
|
max_length: 512 |
|
lowercase: False |
|
normalize: False |
|
level: "bpe" |
|
voc_limit: 32000 |
|
voc_min_freq: 1 |
|
voc_file: "iwslt14_prompt/src_vocab.txt" |
|
tokenizer_type: "sentencepiece" |
|
tokenizer_cfg: |
|
model_file: "iwslt14_prompt/sp.model" |
|
model_type: "unigram" |
|
character_coverage: 1.0 |
|
trg: |
|
lang: "trg" |
|
max_length: 512 |
|
lowercase: False |
|
normalize: False |
|
level: "bpe" |
|
voc_limit: 32000 |
|
voc_min_freq: 1 |
|
voc_file: "iwslt14_prompt/trg_vocab.txt" |
|
tokenizer_type: "sentencepiece" |
|
tokenizer_cfg: |
|
model_file: "iwslt14_prompt/sp.model" |
|
model_type: "unigram" |
|
character_coverage: 1.0 |
|
special_symbols: |
|
unk_token: "<unk>" |
|
unk_id: 0 |
|
pad_token: "<pad>" |
|
pad_id: 1 |
|
bos_token: "<s>" |
|
bos_id: 2 |
|
eos_token: "</s>" |
|
eos_id: 3 |
|
sep_token: "<sep>" |
|
sep_id: 4 |
|
lang_tags: ["<de>", "<en>", "<fr>"] |
|
|
|
testing: |
|
load_model: "iwslt14_prompt/avg5.ckpt" |
|
n_best: 1 |
|
beam_size: 5 |
|
beam_alpha: 1.0 |
|
batch_size: 32 |
|
batch_type: "sentence" |
|
max_output_length: 512 |
|
eval_metrics: ["bleu"] |
|
sacrebleu_cfg: |
|
tokenize: "13a" |
|
lowercase: True |
|
|
|
training: |
|
|
|
|
|
|
|
|
|
|
|
optimizer: "adamw" |
|
normalization: "tokens" |
|
adam_betas: [0.9, 0.98] |
|
scheduling: "warmupinversesquareroot" |
|
learning_rate_warmup: 10000 |
|
learning_rate: 0.0002 |
|
learning_rate_min: 0.0000001 |
|
weight_decay: 0.001 |
|
label_smoothing: 0.1 |
|
loss: "crossentropy" |
|
batch_size: 32 |
|
batch_type: "sentence" |
|
batch_multiplier: 4 |
|
early_stopping_metric: "bleu" |
|
epochs: 50 |
|
validation_freq: 1000 |
|
logging_freq: 100 |
|
overwrite: False |
|
shuffle: True |
|
print_valid_sents: [0, 1, 2, 3] |
|
keep_best_ckpts: 5 |
|
|
|
model: |
|
initializer: "xavier_uniform" |
|
bias_initializer: "zeros" |
|
init_gain: 1.0 |
|
embed_initializer: "xavier_uniform" |
|
embed_init_gain: 1.0 |
|
tied_embeddings: True |
|
tied_softmax: True |
|
encoder: |
|
type: "transformer" |
|
num_layers: 6 |
|
num_heads: 8 |
|
embeddings: |
|
embedding_dim: 1024 |
|
scale: True |
|
dropout: 0.1 |
|
|
|
hidden_size: 1024 |
|
ff_size: 4096 |
|
dropout: 0.1 |
|
layer_norm: "pre" |
|
activation: "relu" |
|
decoder: |
|
type: "transformer" |
|
num_layers: 6 |
|
num_heads: 8 |
|
embeddings: |
|
embedding_dim: 1024 |
|
scale: True |
|
dropout: 0.1 |
|
|
|
hidden_size: 1024 |
|
ff_size: 4096 |
|
dropout: 0.1 |
|
layer_norm: "pre" |
|
activation: "relu" |
|
|