webhook-space / chunk_config.json
plaggy's picture
init
97fdba5
raw
history blame
282 Bytes
{
"input_dataset": "sergeipetrov/transformers-diffusers-docs-raw",
"input_splits": ["train"],
"input_text_col": "text",
"output_dataset": "sergeipetrov/transformers-diffusers-docs-chunked",
"strategy": "spacy",
"split_seq": "\n\n",
"chunk_len": 512,
"private": "false"
}