name: "iwslt14_deenfr_prompt" joeynmt_version: "2.3.0" model_dir: "iwslt14_prompt" use_cuda: True fp16: True random_seed: 42 data: #train: "iwslt14_prompt/train" # cf. https://wit3.fbk.eu/2014-01 #dev: "iwslt14_prompt/dev" test: "iwslt14_prompt/test.ref.de-en" # ['TED.dev2010', 'TEDX.dev2012', 'TED.tst2010', 'TED.tst2011', 'TED.tst2012'] dataset_type: "tsv" sample_dev_subset: 500 src: lang: "src" max_length: 512 lowercase: False normalize: False level: "bpe" voc_limit: 32000 voc_min_freq: 1 voc_file: "iwslt14_prompt/src_vocab.txt" tokenizer_type: "sentencepiece" tokenizer_cfg: model_file: "iwslt14_prompt/sp.model" model_type: "unigram" character_coverage: 1.0 trg: lang: "trg" max_length: 512 lowercase: False normalize: False level: "bpe" voc_limit: 32000 voc_min_freq: 1 voc_file: "iwslt14_prompt/trg_vocab.txt" tokenizer_type: "sentencepiece" tokenizer_cfg: model_file: "iwslt14_prompt/sp.model" model_type: "unigram" character_coverage: 1.0 special_symbols: unk_token: "" unk_id: 0 pad_token: "" pad_id: 1 bos_token: "" bos_id: 2 eos_token: "" eos_id: 3 sep_token: "" sep_id: 4 lang_tags: ["", "", ""] testing: load_model: "iwslt14_prompt/avg5.ckpt" n_best: 1 beam_size: 5 beam_alpha: 1.0 batch_size: 32 batch_type: "sentence" max_output_length: 512 eval_metrics: ["bleu"] sacrebleu_cfg: tokenize: "13a" lowercase: True training: #load_model: "iwslt14_prompt/latest.ckpt" #reset_best_ckpt: True #reset_scheduler: True #reset_optimizer: True #reset_iter_state: True optimizer: "adamw" normalization: "tokens" adam_betas: [0.9, 0.98] scheduling: "warmupinversesquareroot" learning_rate_warmup: 10000 learning_rate: 0.0002 learning_rate_min: 0.0000001 weight_decay: 0.001 label_smoothing: 0.1 loss: "crossentropy" batch_size: 32 batch_type: "sentence" batch_multiplier: 4 early_stopping_metric: "bleu" epochs: 50 validation_freq: 1000 logging_freq: 100 overwrite: False shuffle: True print_valid_sents: [0, 1, 2, 3] keep_best_ckpts: 5 model: initializer: "xavier_uniform" bias_initializer: "zeros" init_gain: 1.0 embed_initializer: "xavier_uniform" embed_init_gain: 1.0 tied_embeddings: True tied_softmax: True encoder: type: "transformer" num_layers: 6 num_heads: 8 embeddings: embedding_dim: 1024 scale: True dropout: 0.1 # typically ff_size = 4 x hidden_size hidden_size: 1024 ff_size: 4096 dropout: 0.1 layer_norm: "pre" activation: "relu" decoder: type: "transformer" num_layers: 6 num_heads: 8 embeddings: embedding_dim: 1024 scale: True dropout: 0.1 # typically ff_size = 4 x hidden_size hidden_size: 1024 ff_size: 4096 dropout: 0.1 layer_norm: "pre" activation: "relu"