tokenizer_config.json · vpelloin/MEDIA_NLU-flaubert_oral_mixed at e6de00934c3d11401e3629cf4c3ff9339fd7808b

MEDIA_NLU-flaubert_oral_mixed / tokenizer_config.json

Upload tokenizer

e1481b8 over 1 year ago

616 Bytes

	{
	"additional_special_tokens": [
	"<special0>",
	"<special1>",
	"<special2>",
	"<special3>",
	"<special4>",
	"<special5>",
	"<special6>",
	"<special7>",
	"<special8>",
	"<special9>"
	],
	"bos_token": "<s>",
	"clean_up_tokenization_spaces": true,
	"cls_token": "</s>",
	"do_lower_case": true,
	"id2lang": null,
	"keep_accents": true,
	"lang2id": null,
	"mask_token": "<special1>",
	"model_max_length": 1000000000000000019884624838656,
	"pad_token": "<pad>",
	"sep_token": "</s>",
	"tokenizer_class": "FlaubertTokenizer",
	"tokenizer_file": null,
	"unk_token": "<unk>"
	}