Add chat_template
Browse files- tokenizer_config.json +261 -1
tokenizer_config.json
CHANGED
@@ -2049,13 +2049,273 @@
|
|
2049 |
"special": true
|
2050 |
}
|
2051 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2052 |
"bos_token": "<|begin_of_text|>",
|
|
|
2053 |
"clean_up_tokenization_spaces": true,
|
2054 |
-
"eos_token": "<|
|
2055 |
"model_input_names": [
|
2056 |
"input_ids",
|
2057 |
"attention_mask"
|
2058 |
],
|
2059 |
"model_max_length": 1000000000000000019884624838656,
|
|
|
2060 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
2061 |
}
|
|
|
2049 |
"special": true
|
2050 |
}
|
2051 |
},
|
2052 |
+
"additional_special_tokens": [
|
2053 |
+
"<|begin_of_text|>",
|
2054 |
+
"<|end_of_text|>",
|
2055 |
+
"<|reserved_special_token_0|>",
|
2056 |
+
"<|reserved_special_token_1|>",
|
2057 |
+
"<|reserved_special_token_2|>",
|
2058 |
+
"<|reserved_special_token_3|>",
|
2059 |
+
"<|start_header_id|>",
|
2060 |
+
"<|end_header_id|>",
|
2061 |
+
"<|reserved_special_token_4|>",
|
2062 |
+
"<|eot_id|>",
|
2063 |
+
"<|reserved_special_token_5|>",
|
2064 |
+
"<|reserved_special_token_6|>",
|
2065 |
+
"<|reserved_special_token_7|>",
|
2066 |
+
"<|reserved_special_token_8|>",
|
2067 |
+
"<|reserved_special_token_9|>",
|
2068 |
+
"<|reserved_special_token_10|>",
|
2069 |
+
"<|reserved_special_token_11|>",
|
2070 |
+
"<|reserved_special_token_12|>",
|
2071 |
+
"<|reserved_special_token_13|>",
|
2072 |
+
"<|reserved_special_token_14|>",
|
2073 |
+
"<|reserved_special_token_15|>",
|
2074 |
+
"<|reserved_special_token_16|>",
|
2075 |
+
"<|reserved_special_token_17|>",
|
2076 |
+
"<|reserved_special_token_18|>",
|
2077 |
+
"<|reserved_special_token_19|>",
|
2078 |
+
"<|reserved_special_token_20|>",
|
2079 |
+
"<|reserved_special_token_21|>",
|
2080 |
+
"<|reserved_special_token_22|>",
|
2081 |
+
"<|reserved_special_token_23|>",
|
2082 |
+
"<|reserved_special_token_24|>",
|
2083 |
+
"<|reserved_special_token_25|>",
|
2084 |
+
"<|reserved_special_token_26|>",
|
2085 |
+
"<|reserved_special_token_27|>",
|
2086 |
+
"<|reserved_special_token_28|>",
|
2087 |
+
"<|reserved_special_token_29|>",
|
2088 |
+
"<|reserved_special_token_30|>",
|
2089 |
+
"<|reserved_special_token_31|>",
|
2090 |
+
"<|reserved_special_token_32|>",
|
2091 |
+
"<|reserved_special_token_33|>",
|
2092 |
+
"<|reserved_special_token_34|>",
|
2093 |
+
"<|reserved_special_token_35|>",
|
2094 |
+
"<|reserved_special_token_36|>",
|
2095 |
+
"<|reserved_special_token_37|>",
|
2096 |
+
"<|reserved_special_token_38|>",
|
2097 |
+
"<|reserved_special_token_39|>",
|
2098 |
+
"<|reserved_special_token_40|>",
|
2099 |
+
"<|reserved_special_token_41|>",
|
2100 |
+
"<|reserved_special_token_42|>",
|
2101 |
+
"<|reserved_special_token_43|>",
|
2102 |
+
"<|reserved_special_token_44|>",
|
2103 |
+
"<|reserved_special_token_45|>",
|
2104 |
+
"<|reserved_special_token_46|>",
|
2105 |
+
"<|reserved_special_token_47|>",
|
2106 |
+
"<|reserved_special_token_48|>",
|
2107 |
+
"<|reserved_special_token_49|>",
|
2108 |
+
"<|reserved_special_token_50|>",
|
2109 |
+
"<|reserved_special_token_51|>",
|
2110 |
+
"<|reserved_special_token_52|>",
|
2111 |
+
"<|reserved_special_token_53|>",
|
2112 |
+
"<|reserved_special_token_54|>",
|
2113 |
+
"<|reserved_special_token_55|>",
|
2114 |
+
"<|reserved_special_token_56|>",
|
2115 |
+
"<|reserved_special_token_57|>",
|
2116 |
+
"<|reserved_special_token_58|>",
|
2117 |
+
"<|reserved_special_token_59|>",
|
2118 |
+
"<|reserved_special_token_60|>",
|
2119 |
+
"<|reserved_special_token_61|>",
|
2120 |
+
"<|reserved_special_token_62|>",
|
2121 |
+
"<|reserved_special_token_63|>",
|
2122 |
+
"<|reserved_special_token_64|>",
|
2123 |
+
"<|reserved_special_token_65|>",
|
2124 |
+
"<|reserved_special_token_66|>",
|
2125 |
+
"<|reserved_special_token_67|>",
|
2126 |
+
"<|reserved_special_token_68|>",
|
2127 |
+
"<|reserved_special_token_69|>",
|
2128 |
+
"<|reserved_special_token_70|>",
|
2129 |
+
"<|reserved_special_token_71|>",
|
2130 |
+
"<|reserved_special_token_72|>",
|
2131 |
+
"<|reserved_special_token_73|>",
|
2132 |
+
"<|reserved_special_token_74|>",
|
2133 |
+
"<|reserved_special_token_75|>",
|
2134 |
+
"<|reserved_special_token_76|>",
|
2135 |
+
"<|reserved_special_token_77|>",
|
2136 |
+
"<|reserved_special_token_78|>",
|
2137 |
+
"<|reserved_special_token_79|>",
|
2138 |
+
"<|reserved_special_token_80|>",
|
2139 |
+
"<|reserved_special_token_81|>",
|
2140 |
+
"<|reserved_special_token_82|>",
|
2141 |
+
"<|reserved_special_token_83|>",
|
2142 |
+
"<|reserved_special_token_84|>",
|
2143 |
+
"<|reserved_special_token_85|>",
|
2144 |
+
"<|reserved_special_token_86|>",
|
2145 |
+
"<|reserved_special_token_87|>",
|
2146 |
+
"<|reserved_special_token_88|>",
|
2147 |
+
"<|reserved_special_token_89|>",
|
2148 |
+
"<|reserved_special_token_90|>",
|
2149 |
+
"<|reserved_special_token_91|>",
|
2150 |
+
"<|reserved_special_token_92|>",
|
2151 |
+
"<|reserved_special_token_93|>",
|
2152 |
+
"<|reserved_special_token_94|>",
|
2153 |
+
"<|reserved_special_token_95|>",
|
2154 |
+
"<|reserved_special_token_96|>",
|
2155 |
+
"<|reserved_special_token_97|>",
|
2156 |
+
"<|reserved_special_token_98|>",
|
2157 |
+
"<|reserved_special_token_99|>",
|
2158 |
+
"<|reserved_special_token_100|>",
|
2159 |
+
"<|reserved_special_token_101|>",
|
2160 |
+
"<|reserved_special_token_102|>",
|
2161 |
+
"<|reserved_special_token_103|>",
|
2162 |
+
"<|reserved_special_token_104|>",
|
2163 |
+
"<|reserved_special_token_105|>",
|
2164 |
+
"<|reserved_special_token_106|>",
|
2165 |
+
"<|reserved_special_token_107|>",
|
2166 |
+
"<|reserved_special_token_108|>",
|
2167 |
+
"<|reserved_special_token_109|>",
|
2168 |
+
"<|reserved_special_token_110|>",
|
2169 |
+
"<|reserved_special_token_111|>",
|
2170 |
+
"<|reserved_special_token_112|>",
|
2171 |
+
"<|reserved_special_token_113|>",
|
2172 |
+
"<|reserved_special_token_114|>",
|
2173 |
+
"<|reserved_special_token_115|>",
|
2174 |
+
"<|reserved_special_token_116|>",
|
2175 |
+
"<|reserved_special_token_117|>",
|
2176 |
+
"<|reserved_special_token_118|>",
|
2177 |
+
"<|reserved_special_token_119|>",
|
2178 |
+
"<|reserved_special_token_120|>",
|
2179 |
+
"<|reserved_special_token_121|>",
|
2180 |
+
"<|reserved_special_token_122|>",
|
2181 |
+
"<|reserved_special_token_123|>",
|
2182 |
+
"<|reserved_special_token_124|>",
|
2183 |
+
"<|reserved_special_token_125|>",
|
2184 |
+
"<|reserved_special_token_126|>",
|
2185 |
+
"<|reserved_special_token_127|>",
|
2186 |
+
"<|reserved_special_token_128|>",
|
2187 |
+
"<|reserved_special_token_129|>",
|
2188 |
+
"<|reserved_special_token_130|>",
|
2189 |
+
"<|reserved_special_token_131|>",
|
2190 |
+
"<|reserved_special_token_132|>",
|
2191 |
+
"<|reserved_special_token_133|>",
|
2192 |
+
"<|reserved_special_token_134|>",
|
2193 |
+
"<|reserved_special_token_135|>",
|
2194 |
+
"<|reserved_special_token_136|>",
|
2195 |
+
"<|reserved_special_token_137|>",
|
2196 |
+
"<|reserved_special_token_138|>",
|
2197 |
+
"<|reserved_special_token_139|>",
|
2198 |
+
"<|reserved_special_token_140|>",
|
2199 |
+
"<|reserved_special_token_141|>",
|
2200 |
+
"<|reserved_special_token_142|>",
|
2201 |
+
"<|reserved_special_token_143|>",
|
2202 |
+
"<|reserved_special_token_144|>",
|
2203 |
+
"<|reserved_special_token_145|>",
|
2204 |
+
"<|reserved_special_token_146|>",
|
2205 |
+
"<|reserved_special_token_147|>",
|
2206 |
+
"<|reserved_special_token_148|>",
|
2207 |
+
"<|reserved_special_token_149|>",
|
2208 |
+
"<|reserved_special_token_150|>",
|
2209 |
+
"<|reserved_special_token_151|>",
|
2210 |
+
"<|reserved_special_token_152|>",
|
2211 |
+
"<|reserved_special_token_153|>",
|
2212 |
+
"<|reserved_special_token_154|>",
|
2213 |
+
"<|reserved_special_token_155|>",
|
2214 |
+
"<|reserved_special_token_156|>",
|
2215 |
+
"<|reserved_special_token_157|>",
|
2216 |
+
"<|reserved_special_token_158|>",
|
2217 |
+
"<|reserved_special_token_159|>",
|
2218 |
+
"<|reserved_special_token_160|>",
|
2219 |
+
"<|reserved_special_token_161|>",
|
2220 |
+
"<|reserved_special_token_162|>",
|
2221 |
+
"<|reserved_special_token_163|>",
|
2222 |
+
"<|reserved_special_token_164|>",
|
2223 |
+
"<|reserved_special_token_165|>",
|
2224 |
+
"<|reserved_special_token_166|>",
|
2225 |
+
"<|reserved_special_token_167|>",
|
2226 |
+
"<|reserved_special_token_168|>",
|
2227 |
+
"<|reserved_special_token_169|>",
|
2228 |
+
"<|reserved_special_token_170|>",
|
2229 |
+
"<|reserved_special_token_171|>",
|
2230 |
+
"<|reserved_special_token_172|>",
|
2231 |
+
"<|reserved_special_token_173|>",
|
2232 |
+
"<|reserved_special_token_174|>",
|
2233 |
+
"<|reserved_special_token_175|>",
|
2234 |
+
"<|reserved_special_token_176|>",
|
2235 |
+
"<|reserved_special_token_177|>",
|
2236 |
+
"<|reserved_special_token_178|>",
|
2237 |
+
"<|reserved_special_token_179|>",
|
2238 |
+
"<|reserved_special_token_180|>",
|
2239 |
+
"<|reserved_special_token_181|>",
|
2240 |
+
"<|reserved_special_token_182|>",
|
2241 |
+
"<|reserved_special_token_183|>",
|
2242 |
+
"<|reserved_special_token_184|>",
|
2243 |
+
"<|reserved_special_token_185|>",
|
2244 |
+
"<|reserved_special_token_186|>",
|
2245 |
+
"<|reserved_special_token_187|>",
|
2246 |
+
"<|reserved_special_token_188|>",
|
2247 |
+
"<|reserved_special_token_189|>",
|
2248 |
+
"<|reserved_special_token_190|>",
|
2249 |
+
"<|reserved_special_token_191|>",
|
2250 |
+
"<|reserved_special_token_192|>",
|
2251 |
+
"<|reserved_special_token_193|>",
|
2252 |
+
"<|reserved_special_token_194|>",
|
2253 |
+
"<|reserved_special_token_195|>",
|
2254 |
+
"<|reserved_special_token_196|>",
|
2255 |
+
"<|reserved_special_token_197|>",
|
2256 |
+
"<|reserved_special_token_198|>",
|
2257 |
+
"<|reserved_special_token_199|>",
|
2258 |
+
"<|reserved_special_token_200|>",
|
2259 |
+
"<|reserved_special_token_201|>",
|
2260 |
+
"<|reserved_special_token_202|>",
|
2261 |
+
"<|reserved_special_token_203|>",
|
2262 |
+
"<|reserved_special_token_204|>",
|
2263 |
+
"<|reserved_special_token_205|>",
|
2264 |
+
"<|reserved_special_token_206|>",
|
2265 |
+
"<|reserved_special_token_207|>",
|
2266 |
+
"<|reserved_special_token_208|>",
|
2267 |
+
"<|reserved_special_token_209|>",
|
2268 |
+
"<|reserved_special_token_210|>",
|
2269 |
+
"<|reserved_special_token_211|>",
|
2270 |
+
"<|reserved_special_token_212|>",
|
2271 |
+
"<|reserved_special_token_213|>",
|
2272 |
+
"<|reserved_special_token_214|>",
|
2273 |
+
"<|reserved_special_token_215|>",
|
2274 |
+
"<|reserved_special_token_216|>",
|
2275 |
+
"<|reserved_special_token_217|>",
|
2276 |
+
"<|reserved_special_token_218|>",
|
2277 |
+
"<|reserved_special_token_219|>",
|
2278 |
+
"<|reserved_special_token_220|>",
|
2279 |
+
"<|reserved_special_token_221|>",
|
2280 |
+
"<|reserved_special_token_222|>",
|
2281 |
+
"<|reserved_special_token_223|>",
|
2282 |
+
"<|reserved_special_token_224|>",
|
2283 |
+
"<|reserved_special_token_225|>",
|
2284 |
+
"<|reserved_special_token_226|>",
|
2285 |
+
"<|reserved_special_token_227|>",
|
2286 |
+
"<|reserved_special_token_228|>",
|
2287 |
+
"<|reserved_special_token_229|>",
|
2288 |
+
"<|reserved_special_token_230|>",
|
2289 |
+
"<|reserved_special_token_231|>",
|
2290 |
+
"<|reserved_special_token_232|>",
|
2291 |
+
"<|reserved_special_token_233|>",
|
2292 |
+
"<|reserved_special_token_234|>",
|
2293 |
+
"<|reserved_special_token_235|>",
|
2294 |
+
"<|reserved_special_token_236|>",
|
2295 |
+
"<|reserved_special_token_237|>",
|
2296 |
+
"<|reserved_special_token_238|>",
|
2297 |
+
"<|reserved_special_token_239|>",
|
2298 |
+
"<|reserved_special_token_240|>",
|
2299 |
+
"<|reserved_special_token_241|>",
|
2300 |
+
"<|reserved_special_token_242|>",
|
2301 |
+
"<|reserved_special_token_243|>",
|
2302 |
+
"<|reserved_special_token_244|>",
|
2303 |
+
"<|reserved_special_token_245|>",
|
2304 |
+
"<|reserved_special_token_246|>",
|
2305 |
+
"<|reserved_special_token_247|>",
|
2306 |
+
"<|reserved_special_token_248|>",
|
2307 |
+
"<|reserved_special_token_249|>",
|
2308 |
+
"<|reserved_special_token_250|>"
|
2309 |
+
],
|
2310 |
"bos_token": "<|begin_of_text|>",
|
2311 |
+
"chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
2312 |
"clean_up_tokenization_spaces": true,
|
2313 |
+
"eos_token": "<|eot_id|>",
|
2314 |
"model_input_names": [
|
2315 |
"input_ids",
|
2316 |
"attention_mask"
|
2317 |
],
|
2318 |
"model_max_length": 1000000000000000019884624838656,
|
2319 |
+
"pad_token": "<|end_of_text|>",
|
2320 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
2321 |
}
|