{ "activation_function": "gelu", "architectures": [ "DeepShallowModel" ], "attention_heads": 8, "decoder_layers": 6, "dropout": 0.1, "emb_size": 512, "encoder_layers": 12, "ffn_hid_dim": 2048, "max_position_embeddings": 64, "model_type": "transformer", "src_vocab_size": 10000, "tgt_vocab_size": 10000, "torch_dtype": "float32", "transformers_version": "4.17.0" }