Upload tokenizer

Files changed (5) hide show

added_tokens.json ADDED Viewed

+{
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644
+}

merges.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 Ġ Ġ
 ĠĠ ĠĠ
 i n

+#version: 0.2
 Ġ Ġ
 ĠĠ ĠĠ
 i n

special_tokens_map.json ADDED Viewed

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>"
+}

tokenizer.json CHANGED Viewed

@@ -1,7 +1,21 @@
 {
   "version": "1.0",
-  "truncation": null,
-  "padding": null,
   "added_tokens": [
     {
       "id": 151643,
@@ -73,6 +87,7 @@
     "end_of_word_suffix": "",
     "fuse_unk": false,
     "byte_fallback": false,
     "vocab": {
       "!": 0,
       "\"": 1,

 {
   "version": "1.0",
+  "truncation": {
+    "direction": "Right",
+    "max_length": 1536,
+    "strategy": "LongestFirst",
+    "stride": 0
+  },
+  "padding": {
+    "strategy": {
+      "Fixed": 1536
+    },
+    "direction": "Right",
+    "pad_to_multiple_of": null,
+    "pad_id": 151643,
+    "pad_type_id": 0,
+    "pad_token": "<|endoftext|>"
+  },
   "added_tokens": [
     {
       "id": 151643,
     "end_of_word_suffix": "",
     "fuse_unk": false,
     "byte_fallback": false,
+    "ignore_merges": false,
     "vocab": {
       "!": 0,
       "\"": 1,

tokenizer_config.json CHANGED Viewed

@@ -26,7 +26,10 @@
       "special": true
     }
   },
-  "additional_special_tokens": ["<|im_start|>", "<|im_end|>"],
   "bos_token": null,
   "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,

       "special": true
     }
   },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>"
+  ],
   "bos_token": null,
   "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,