mtasic85 commited on
Commit
54c27fe
1 Parent(s): 80f3ec1

new tokenizer 38400

Browse files
merges.txt CHANGED
@@ -36637,53 +36637,3 @@ aki w
36637
  ĠÛģÙĪ تا
36638
  ari hi
36639
  pa i
36640
- zi y
36641
- Ġvolt ages
36642
- d achadh
36643
- Ġt xim
36644
- Ġб аÑĤ
36645
- ĠáĭŃáĪħ áĪĿ
36646
- ù n
36647
- ê¯ģ ê¯ħê¯ģ
36648
- 模 åŀĭ
36649
- omb iso
36650
- Ġà¤ķ à¤Ł
36651
- Ħì Ľ
36652
- er at
36653
- Ġelect rod
36654
- Ġin sect
36655
- Ġcomp ares
36656
- ĠÑĩ ел
36657
- Ġeng els
36658
- Ġgi hatag
36659
- ĠG ẹẹsi
36660
- ×ķ׳ ×Ļ
36661
- ul sion
36662
- ش ر
36663
- аÑĨ а
36664
- Ġh ire
36665
- us oro
36666
- ĠBo oks
36667
- رÙĪ Ùģ
36668
- en ç
36669
- Ġter ra
36670
- Ġa za
36671
- Ġd ance
36672
- Ġin zawm
36673
- Ġverb ess
36674
- Ġuk w
36675
- ĠB io
36676
- Ġng ak
36677
- r ise
36678
- ĠìĹ Ĩ
36679
- Ġding we
36680
- áŀ ¯
36681
- Ġf inger
36682
- ĠP ul
36683
- Ġv ann
36684
- Ġsap ertos
36685
- Pro bability
36686
- Ġlic ense
36687
- Ġclos ing
36688
- Ġs ow
36689
- Ġs amp
 
36637
  ĠÛģÙĪ تا
36638
  ari hi
36639
  pa i
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/prepare_pretrain_dataset.py CHANGED
@@ -20,7 +20,13 @@ def batch_iterator(name=None):
20
  for d in dataset:
21
  for row in d:
22
  for n in row:
23
- yield row['instruction'] + ' ' + row['input'] + ' ' + row['output']
 
 
 
 
 
 
24
 
25
  del dataset
26
  gc.collect()
@@ -57,19 +63,40 @@ def batch_iterator(name=None):
57
  if name in (None, 'ontocord/fineweb-permissive-multilingual-2m'):
58
  dataset = load_dataset(name, split='train')
59
 
60
- for row in dataset:
61
- yield row['text']
62
 
63
  del dataset
64
  gc.collect()
65
 
66
  # text
67
- if name in (None, 'nampdn-ai/tiny-textbooks'):
 
 
 
 
 
 
 
 
 
 
68
  for split in ['train', 'test']:
69
  dataset = load_dataset(name, split=split)
70
 
71
  for row in dataset:
72
- yield row['textbook']
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  del dataset
75
  gc.collect()
@@ -79,7 +106,11 @@ def batch_iterator(name=None):
79
  dataset = load_dataset(name, split='train')
80
 
81
  for row in dataset:
82
- yield row['prompt'] + ' ' + row['response']
 
 
 
 
83
 
84
  del dataset
85
  gc.collect()
@@ -109,8 +140,8 @@ def batch_iterator(name=None):
109
  )
110
 
111
  for d in dataset:
112
- for row in d:
113
- yield row['content']
114
 
115
  del dataset
116
  gc.collect()
@@ -120,7 +151,11 @@ def batch_iterator(name=None):
120
  dataset = load_dataset(name, split='train')
121
 
122
  for row in dataset:
123
- yield row['query'] + ' ' + row['answer']
 
 
 
 
124
 
125
  del dataset
126
  gc.collect()
@@ -130,7 +165,11 @@ def batch_iterator(name=None):
130
  dataset = load_dataset(name, split='train')
131
 
132
  for row in dataset:
133
- yield row['instruction'] + ' ' + row['output']
 
 
 
 
134
 
135
  del dataset
136
  gc.collect()
@@ -140,27 +179,38 @@ def batch_iterator(name=None):
140
  dataset = load_dataset(name, split='train')
141
 
142
  for row in dataset:
143
- yield row['instruction'] + ' ' + row['input'] + ' ' + row['output']
 
 
 
 
 
 
144
 
145
  del dataset
146
  gc.collect()
147
 
148
  # code
149
  if name in (None, 'HuggingFaceH4/CodeAlpaca_20K'):
150
- dataset = load_dataset(name, split='train')
 
151
 
152
- for row in dataset:
153
- yield row['prompt'] + ' ' + row['completion']
 
 
 
 
154
 
155
- del dataset
156
- gc.collect()
157
 
158
  # math
159
  if name in (None, 'gair-prox/open-web-math-pro'):
160
  dataset = load_dataset(name, split='train')
161
 
162
- for row in dataset:
163
- yield row['text']
164
 
165
  del dataset
166
  gc.collect()
@@ -171,7 +221,11 @@ def batch_iterator(name=None):
171
  dataset = load_dataset(name, split=split)
172
 
173
  for row in dataset:
174
- yield row['question'] + ' ' + row['answer']
 
 
 
 
175
 
176
  del dataset
177
  gc.collect()
@@ -181,7 +235,11 @@ def batch_iterator(name=None):
181
  dataset = load_dataset(name, split='train')
182
 
183
  for row in dataset:
184
- yield row['instruction'] + ' ' + row['output']
 
 
 
 
185
 
186
  del dataset
187
  gc.collect()
@@ -191,17 +249,42 @@ def batch_iterator(name=None):
191
  dataset = load_dataset(name, split='train')
192
 
193
  for row in dataset:
194
- yield row['question'] + ' ' + row['answer']
 
 
 
 
195
 
196
  del dataset
197
  gc.collect()
198
 
199
- # math serbian
200
- if name in (None, 'datatab/orca_math_world_problem_200k_serbian'):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  dataset = load_dataset(name, split='train')
202
 
203
  for row in dataset:
204
- yield row['question_translated_srb'] + ' ' + row['answer_translated_srb']
 
 
 
 
 
 
205
 
206
  del dataset
207
  gc.collect()
@@ -212,10 +295,14 @@ def batch_iterator(name=None):
212
 
213
  for row in dataset:
214
  yield (
215
- row['character'] + ' ' +
216
- row['unicode'] + ' ' +
217
- row['short description'] + ' ' +
218
- row['tags'] + ' ' +
 
 
 
 
219
  row['LLM description']
220
  )
221
 
@@ -233,17 +320,21 @@ datasets_names = [
233
  'saillab/taco-datasets',
234
  # 'xu-song/cc100-samples',
235
  # 'ontocord/fineweb-permissive-multilingual-2m',
 
 
236
  'nampdn-ai/tiny-textbooks',
237
- 'nampdn-ai/tiny-codes',
238
  'bigcode/the-stack-smol-xs',
239
  'm-a-p/CodeFeedback-Filtered-Instruction',
240
- 'jtatman/python-code-dataset-500k',
241
  'iamtarun/python_code_instructions_18k_alpaca',
242
  'HuggingFaceH4/CodeAlpaca_20K',
243
  # 'gair-prox/open-web-math-pro',
244
  'rvv-karma/Math-QA',
245
- 'ajibawa-2023/Maths-College',
246
  'microsoft/orca-math-word-problems-200k',
 
 
247
  'badrex/llm-emoji-dataset',
248
  ]
249
 
@@ -252,6 +343,6 @@ outputs = optimize(
252
  inputs=datasets_names,
253
  output_dir='../pretrain-data/',
254
  # Number of tokens to store by chunks. This is roughly 64MB of tokens per chunk.
255
- chunk_size=(4097 * 4006),
256
  num_workers=16,
257
  )
 
20
  for d in dataset:
21
  for row in d:
22
  for n in row:
23
+ yield (
24
+ row['instruction'] +
25
+ ' ' +
26
+ row['input'] +
27
+ ' ' +
28
+ row['output']
29
+ )
30
 
31
  del dataset
32
  gc.collect()
 
63
  if name in (None, 'ontocord/fineweb-permissive-multilingual-2m'):
64
  dataset = load_dataset(name, split='train')
65
 
66
+ for row in dataset['text']:
67
+ yield row
68
 
69
  del dataset
70
  gc.collect()
71
 
72
  # text
73
+ if name in (None, 'MuskumPillerum/General-Knowledge'):
74
+ dataset = load_dataset(name, split='train')
75
+
76
+ for row in dataset:
77
+ yield row['Question'] + ' ' + row['Answer']
78
+
79
+ del dataset
80
+ gc.collect()
81
+
82
+ # text
83
+ if name in (None, 'yirenc/general_knowledge_boolean'):
84
  for split in ['train', 'test']:
85
  dataset = load_dataset(name, split=split)
86
 
87
  for row in dataset:
88
+ yield row['question'] + '? ' + row['answer'] + '. ' + row['passage']
89
+
90
+ del dataset
91
+ gc.collect()
92
+
93
+ # text
94
+ if name in (None, 'nampdn-ai/tiny-textbooks'):
95
+ for split in ['train', 'test']:
96
+ dataset = load_dataset(name, split=split)
97
+
98
+ for row in dataset['textbook']:
99
+ yield row
100
 
101
  del dataset
102
  gc.collect()
 
106
  dataset = load_dataset(name, split='train')
107
 
108
  for row in dataset:
109
+ yield (
110
+ row['prompt'] +
111
+ ' ' +
112
+ row['response']
113
+ )
114
 
115
  del dataset
116
  gc.collect()
 
140
  )
141
 
142
  for d in dataset:
143
+ for row in d['content']:
144
+ yield row
145
 
146
  del dataset
147
  gc.collect()
 
151
  dataset = load_dataset(name, split='train')
152
 
153
  for row in dataset:
154
+ yield (
155
+ row['query'] +
156
+ ' ' +
157
+ row['answer']
158
+ )
159
 
160
  del dataset
161
  gc.collect()
 
165
  dataset = load_dataset(name, split='train')
166
 
167
  for row in dataset:
168
+ yield (
169
+ row['instruction'] +
170
+ ' ' +
171
+ row['output']
172
+ )
173
 
174
  del dataset
175
  gc.collect()
 
179
  dataset = load_dataset(name, split='train')
180
 
181
  for row in dataset:
182
+ yield (
183
+ row['instruction'] +
184
+ ' ' +
185
+ row['input'] +
186
+ ' ' +
187
+ row['output']
188
+ )
189
 
190
  del dataset
191
  gc.collect()
192
 
193
  # code
194
  if name in (None, 'HuggingFaceH4/CodeAlpaca_20K'):
195
+ for split in ['train', 'test']:
196
+ dataset = load_dataset(name, split=split)
197
 
198
+ for row in dataset:
199
+ yield (
200
+ row['prompt'] +
201
+ ' ' +
202
+ row['completion']
203
+ )
204
 
205
+ del dataset
206
+ gc.collect()
207
 
208
  # math
209
  if name in (None, 'gair-prox/open-web-math-pro'):
210
  dataset = load_dataset(name, split='train')
211
 
212
+ for row in dataset['text']:
213
+ yield row
214
 
215
  del dataset
216
  gc.collect()
 
221
  dataset = load_dataset(name, split=split)
222
 
223
  for row in dataset:
224
+ yield (
225
+ row['question'] +
226
+ ' ' +
227
+ row['answer']
228
+ )
229
 
230
  del dataset
231
  gc.collect()
 
235
  dataset = load_dataset(name, split='train')
236
 
237
  for row in dataset:
238
+ yield (
239
+ row['instruction'] +
240
+ ' ' +
241
+ row['output']
242
+ )
243
 
244
  del dataset
245
  gc.collect()
 
249
  dataset = load_dataset(name, split='train')
250
 
251
  for row in dataset:
252
+ yield (
253
+ row['question'] +
254
+ ' ' +
255
+ row['answer']
256
+ )
257
 
258
  del dataset
259
  gc.collect()
260
 
261
+ # math
262
+ if name in (None, 'fblgit/simple-math'):
263
+ for split in ['train', 'test']:
264
+ dataset = load_dataset(name, split=split)
265
+
266
+ for row in dataset:
267
+ yield (
268
+ row['instruction'] +
269
+ ' = ' +
270
+ row['output']
271
+ )
272
+
273
+ del dataset
274
+ gc.collect()
275
+
276
+ # reasoning
277
+ if name in (None, 'SkunkworksAI/reasoning-0.01'):
278
  dataset = load_dataset(name, split='train')
279
 
280
  for row in dataset:
281
+ yield (
282
+ row['instruction'] +
283
+ ' ' +
284
+ row['reasoning'] +
285
+ ' ' +
286
+ row['output']
287
+ )
288
 
289
  del dataset
290
  gc.collect()
 
295
 
296
  for row in dataset:
297
  yield (
298
+ row['character'] +
299
+ ' ' +
300
+ row['unicode'] +
301
+ ' ' +
302
+ row['short description'] +
303
+ ' ' +
304
+ row['tags'] +
305
+ ' ' +
306
  row['LLM description']
307
  )
308
 
 
320
  'saillab/taco-datasets',
321
  # 'xu-song/cc100-samples',
322
  # 'ontocord/fineweb-permissive-multilingual-2m',
323
+ 'MuskumPillerum/General-Knowledge',
324
+ 'yirenc/general_knowledge_boolean',
325
  'nampdn-ai/tiny-textbooks',
326
+ # 'nampdn-ai/tiny-codes',
327
  'bigcode/the-stack-smol-xs',
328
  'm-a-p/CodeFeedback-Filtered-Instruction',
329
+ # 'jtatman/python-code-dataset-500k',
330
  'iamtarun/python_code_instructions_18k_alpaca',
331
  'HuggingFaceH4/CodeAlpaca_20K',
332
  # 'gair-prox/open-web-math-pro',
333
  'rvv-karma/Math-QA',
334
+ # 'ajibawa-2023/Maths-College',
335
  'microsoft/orca-math-word-problems-200k',
336
+ 'fblgit/simple-math',
337
+ # 'SkunkworksAI/reasoning-0.01',
338
  'badrex/llm-emoji-dataset',
339
  ]
340
 
 
343
  inputs=datasets_names,
344
  output_dir='../pretrain-data/',
345
  # Number of tokens to store by chunks. This is roughly 64MB of tokens per chunk.
346
+ chunk_size=(2049 * 8012),
347
  num_workers=16,
348
  )
scripts/pretrain-model.yaml CHANGED
@@ -61,7 +61,7 @@ train:
61
  global_batch_size: 512
62
 
63
  # Number of samples per data-parallel rank (type: int, default: 4)
64
- micro_batch_size: 8
65
 
66
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
67
  lr_warmup_steps: 2000
@@ -71,7 +71,7 @@ train:
71
 
72
  # Total number of tokens to train on (type: Optional[int], default: 3000000000000)
73
  # max_tokens: 3000000000000
74
- max_tokens: ??? # ? * 8193 * 3
75
 
76
  # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
77
  max_steps:
 
61
  global_batch_size: 512
62
 
63
  # Number of samples per data-parallel rank (type: int, default: 4)
64
+ micro_batch_size: 16
65
 
66
  # Number of iterations with learning rate warmup active (type: int, default: 2000)
67
  lr_warmup_steps: 2000
 
71
 
72
  # Total number of tokens to train on (type: Optional[int], default: 3000000000000)
73
  # max_tokens: 3000000000000
74
+ max_tokens: ??? # ? * 2049 * 3
75
 
76
  # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
77
  max_steps:
scripts/train_tokenizer.py CHANGED
@@ -158,8 +158,11 @@ special_tokens = [
158
  'system',
159
  'user',
160
  'assistant',
 
161
  'tool',
 
162
 
 
163
  '<tools>',
164
  '</tools>',
165
  '<tool_call>',
@@ -210,12 +213,70 @@ special_tokens = [
210
  '</function-type>',
211
  '<function-value>',
212
  '</function-value>',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  ]
214
 
215
  for i in range(2, 25):
216
  special_tokens.append(' ' * i)
217
 
218
- for i in range(64 - len(special_tokens)):
219
  special_tokens.append(f'<|reserved_{i}|>')
220
 
221
  # emoji
 
158
  'system',
159
  'user',
160
  'assistant',
161
+ 'resource',
162
  'tool',
163
+ 'agent',
164
 
165
+ # tool/function calling
166
  '<tools>',
167
  '</tools>',
168
  '<tool_call>',
 
213
  '</function-type>',
214
  '<function-value>',
215
  '</function-value>',
216
+
217
+ # qa
218
+ '<qa>',
219
+ '</qa>',
220
+ '<question>',
221
+ '</question>',
222
+ '<answer>',
223
+ '</answer>',
224
+
225
+ # cot, tot
226
+ '<cot>',
227
+ '</cot>',
228
+ '<tot>',
229
+ '</tot>',
230
+ '<input>',
231
+ '</input>',
232
+ '<output>',
233
+ '</output>',
234
+ '<thoughts>',
235
+ '</thoughts>',
236
+ '<thought>',
237
+ '</thought>',
238
+ '<plans>',
239
+ '</plans>',
240
+ '<plan>',
241
+ '</plan>',
242
+ '<votes>',
243
+ '</votes>',
244
+ '<vote>',
245
+ '</vote>',
246
+ '<passages>',
247
+ '</passages>',
248
+ '<passage>',
249
+ '</passage>',
250
+
251
+ # react
252
+ '<react>',
253
+ '</react>',
254
+ '<reasoning>',
255
+ '</reasoning>',
256
+ '<acting>',
257
+ '</acting>',
258
+ '<action>',
259
+ '</action>',
260
+ '<observation>',
261
+ '</observation>',
262
+ '<claim>',
263
+ '</claim>',
264
+
265
+ # reflection
266
+ '<thinking>',
267
+ '</thinking>',
268
+ '<step>',
269
+ '</step>',
270
+ '<reflection>',
271
+ '</reflection>',
272
+ '<output>',
273
+ '</output>',
274
  ]
275
 
276
  for i in range(2, 25):
277
  special_tokens.append(' ' * i)
278
 
279
+ for i in range(128 - len(special_tokens)):
280
  special_tokens.append(f'<|reserved_{i}|>')
281
 
282
  # emoji
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -65,7 +65,7 @@
65
  "special": true
66
  },
67
  "8": {
68
- "content": "tool",
69
  "lstrip": false,
70
  "normalized": false,
71
  "rstrip": false,
@@ -73,7 +73,7 @@
73
  "special": true
74
  },
75
  "9": {
76
- "content": "<tools>",
77
  "lstrip": false,
78
  "normalized": false,
79
  "rstrip": false,
@@ -81,7 +81,7 @@
81
  "special": true
82
  },
83
  "10": {
84
- "content": "</tools>",
85
  "lstrip": false,
86
  "normalized": false,
87
  "rstrip": false,
@@ -89,7 +89,7 @@
89
  "special": true
90
  },
91
  "11": {
92
- "content": "<tool_call>",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
@@ -97,7 +97,7 @@
97
  "special": true
98
  },
99
  "12": {
100
- "content": "</tool_call>",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
@@ -105,7 +105,7 @@
105
  "special": true
106
  },
107
  "13": {
108
- "content": "<tool_response>",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  "special": true
114
  },
115
  "14": {
116
- "content": "</tool_response>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
@@ -121,7 +121,7 @@
121
  "special": true
122
  },
123
  "15": {
124
- "content": "\"arguments\"",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
@@ -129,7 +129,7 @@
129
  "special": true
130
  },
131
  "16": {
132
- "content": "\"name\"",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,
@@ -137,7 +137,7 @@
137
  "special": true
138
  },
139
  "17": {
140
- "content": "<arguments>",
141
  "lstrip": false,
142
  "normalized": false,
143
  "rstrip": false,
@@ -145,7 +145,7 @@
145
  "special": true
146
  },
147
  "18": {
148
- "content": "</arguments>",
149
  "lstrip": false,
150
  "normalized": false,
151
  "rstrip": false,
@@ -153,7 +153,7 @@
153
  "special": true
154
  },
155
  "19": {
156
- "content": "<argument>",
157
  "lstrip": false,
158
  "normalized": false,
159
  "rstrip": false,
@@ -161,7 +161,7 @@
161
  "special": true
162
  },
163
  "20": {
164
- "content": "</argument>",
165
  "lstrip": false,
166
  "normalized": false,
167
  "rstrip": false,
@@ -169,7 +169,7 @@
169
  "special": true
170
  },
171
  "21": {
172
- "content": "<argument-name>",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
@@ -177,7 +177,7 @@
177
  "special": true
178
  },
179
  "22": {
180
- "content": "</argument-name>",
181
  "lstrip": false,
182
  "normalized": false,
183
  "rstrip": false,
@@ -185,7 +185,7 @@
185
  "special": true
186
  },
187
  "23": {
188
- "content": "<argument-type>",
189
  "lstrip": false,
190
  "normalized": false,
191
  "rstrip": false,
@@ -193,7 +193,7 @@
193
  "special": true
194
  },
195
  "24": {
196
- "content": "</argument-type>",
197
  "lstrip": false,
198
  "normalized": false,
199
  "rstrip": false,
@@ -201,7 +201,7 @@
201
  "special": true
202
  },
203
  "25": {
204
- "content": "<argument-value>",
205
  "lstrip": false,
206
  "normalized": false,
207
  "rstrip": false,
@@ -209,7 +209,7 @@
209
  "special": true
210
  },
211
  "26": {
212
- "content": "</argument-value>",
213
  "lstrip": false,
214
  "normalized": false,
215
  "rstrip": false,
@@ -217,7 +217,7 @@
217
  "special": true
218
  },
219
  "27": {
220
- "content": "<parameter>",
221
  "lstrip": false,
222
  "normalized": false,
223
  "rstrip": false,
@@ -225,7 +225,7 @@
225
  "special": true
226
  },
227
  "28": {
228
- "content": "</parameter>",
229
  "lstrip": false,
230
  "normalized": false,
231
  "rstrip": false,
@@ -233,7 +233,7 @@
233
  "special": true
234
  },
235
  "29": {
236
- "content": "<parameter-name>",
237
  "lstrip": false,
238
  "normalized": false,
239
  "rstrip": false,
@@ -241,7 +241,7 @@
241
  "special": true
242
  },
243
  "30": {
244
- "content": "</parameter-name>",
245
  "lstrip": false,
246
  "normalized": false,
247
  "rstrip": false,
@@ -249,7 +249,7 @@
249
  "special": true
250
  },
251
  "31": {
252
- "content": "<parameter-type>",
253
  "lstrip": false,
254
  "normalized": false,
255
  "rstrip": false,
@@ -257,7 +257,7 @@
257
  "special": true
258
  },
259
  "32": {
260
- "content": "</parameter-type>",
261
  "lstrip": false,
262
  "normalized": false,
263
  "rstrip": false,
@@ -265,7 +265,7 @@
265
  "special": true
266
  },
267
  "33": {
268
- "content": "<parameter-value>",
269
  "lstrip": false,
270
  "normalized": false,
271
  "rstrip": false,
@@ -273,7 +273,7 @@
273
  "special": true
274
  },
275
  "34": {
276
- "content": "</parameter-value>",
277
  "lstrip": false,
278
  "normalized": false,
279
  "rstrip": false,
@@ -281,7 +281,7 @@
281
  "special": true
282
  },
283
  "35": {
284
- "content": "<field>",
285
  "lstrip": false,
286
  "normalized": false,
287
  "rstrip": false,
@@ -289,7 +289,7 @@
289
  "special": true
290
  },
291
  "36": {
292
- "content": "</field>",
293
  "lstrip": false,
294
  "normalized": false,
295
  "rstrip": false,
@@ -297,7 +297,7 @@
297
  "special": true
298
  },
299
  "37": {
300
- "content": "<field-name>",
301
  "lstrip": false,
302
  "normalized": false,
303
  "rstrip": false,
@@ -305,7 +305,7 @@
305
  "special": true
306
  },
307
  "38": {
308
- "content": "</field-name>",
309
  "lstrip": false,
310
  "normalized": false,
311
  "rstrip": false,
@@ -313,7 +313,7 @@
313
  "special": true
314
  },
315
  "39": {
316
- "content": "<field-type>",
317
  "lstrip": false,
318
  "normalized": false,
319
  "rstrip": false,
@@ -321,7 +321,7 @@
321
  "special": true
322
  },
323
  "40": {
324
- "content": "</field-type>",
325
  "lstrip": false,
326
  "normalized": false,
327
  "rstrip": false,
@@ -329,7 +329,7 @@
329
  "special": true
330
  },
331
  "41": {
332
- "content": "<field-value>",
333
  "lstrip": false,
334
  "normalized": false,
335
  "rstrip": false,
@@ -337,7 +337,7 @@
337
  "special": true
338
  },
339
  "42": {
340
- "content": "</field-value>",
341
  "lstrip": false,
342
  "normalized": false,
343
  "rstrip": false,
@@ -345,7 +345,7 @@
345
  "special": true
346
  },
347
  "43": {
348
- "content": "<name>",
349
  "lstrip": false,
350
  "normalized": false,
351
  "rstrip": false,
@@ -353,7 +353,7 @@
353
  "special": true
354
  },
355
  "44": {
356
- "content": "</name>",
357
  "lstrip": false,
358
  "normalized": false,
359
  "rstrip": false,
@@ -361,7 +361,7 @@
361
  "special": true
362
  },
363
  "45": {
364
- "content": "<type>",
365
  "lstrip": false,
366
  "normalized": false,
367
  "rstrip": false,
@@ -369,7 +369,7 @@
369
  "special": true
370
  },
371
  "46": {
372
- "content": "</type>",
373
  "lstrip": false,
374
  "normalized": false,
375
  "rstrip": false,
@@ -377,7 +377,7 @@
377
  "special": true
378
  },
379
  "47": {
380
- "content": "<value>",
381
  "lstrip": false,
382
  "normalized": false,
383
  "rstrip": false,
@@ -385,7 +385,7 @@
385
  "special": true
386
  },
387
  "48": {
388
- "content": "</value>",
389
  "lstrip": false,
390
  "normalized": false,
391
  "rstrip": false,
@@ -393,7 +393,7 @@
393
  "special": true
394
  },
395
  "49": {
396
- "content": "<function>",
397
  "lstrip": false,
398
  "normalized": false,
399
  "rstrip": false,
@@ -401,7 +401,7 @@
401
  "special": true
402
  },
403
  "50": {
404
- "content": "</function>",
405
  "lstrip": false,
406
  "normalized": false,
407
  "rstrip": false,
@@ -409,7 +409,7 @@
409
  "special": true
410
  },
411
  "51": {
412
- "content": "<function-name>",
413
  "lstrip": false,
414
  "normalized": false,
415
  "rstrip": false,
@@ -417,7 +417,7 @@
417
  "special": true
418
  },
419
  "52": {
420
- "content": "</function-name>",
421
  "lstrip": false,
422
  "normalized": false,
423
  "rstrip": false,
@@ -425,7 +425,7 @@
425
  "special": true
426
  },
427
  "53": {
428
- "content": "<function-type>",
429
  "lstrip": false,
430
  "normalized": false,
431
  "rstrip": false,
@@ -433,7 +433,7 @@
433
  "special": true
434
  },
435
  "54": {
436
- "content": "</function-type>",
437
  "lstrip": false,
438
  "normalized": false,
439
  "rstrip": false,
@@ -441,7 +441,7 @@
441
  "special": true
442
  },
443
  "55": {
444
- "content": "<function-value>",
445
  "lstrip": false,
446
  "normalized": false,
447
  "rstrip": false,
@@ -449,7 +449,7 @@
449
  "special": true
450
  },
451
  "56": {
452
- "content": "</function-value>",
453
  "lstrip": false,
454
  "normalized": false,
455
  "rstrip": false,
@@ -457,7 +457,7 @@
457
  "special": true
458
  },
459
  "57": {
460
- "content": " ",
461
  "lstrip": false,
462
  "normalized": false,
463
  "rstrip": false,
@@ -465,7 +465,7 @@
465
  "special": true
466
  },
467
  "58": {
468
- "content": " ",
469
  "lstrip": false,
470
  "normalized": false,
471
  "rstrip": false,
@@ -473,7 +473,7 @@
473
  "special": true
474
  },
475
  "59": {
476
- "content": " ",
477
  "lstrip": false,
478
  "normalized": false,
479
  "rstrip": false,
@@ -481,7 +481,7 @@
481
  "special": true
482
  },
483
  "60": {
484
- "content": " ",
485
  "lstrip": false,
486
  "normalized": false,
487
  "rstrip": false,
@@ -489,7 +489,7 @@
489
  "special": true
490
  },
491
  "61": {
492
- "content": " ",
493
  "lstrip": false,
494
  "normalized": false,
495
  "rstrip": false,
@@ -497,7 +497,7 @@
497
  "special": true
498
  },
499
  "62": {
500
- "content": " ",
501
  "lstrip": false,
502
  "normalized": false,
503
  "rstrip": false,
@@ -505,7 +505,7 @@
505
  "special": true
506
  },
507
  "63": {
508
- "content": " ",
509
  "lstrip": false,
510
  "normalized": false,
511
  "rstrip": false,
@@ -513,7 +513,7 @@
513
  "special": true
514
  },
515
  "64": {
516
- "content": " ",
517
  "lstrip": false,
518
  "normalized": false,
519
  "rstrip": false,
@@ -521,7 +521,7 @@
521
  "special": true
522
  },
523
  "65": {
524
- "content": " ",
525
  "lstrip": false,
526
  "normalized": false,
527
  "rstrip": false,
@@ -529,7 +529,7 @@
529
  "special": true
530
  },
531
  "66": {
532
- "content": " ",
533
  "lstrip": false,
534
  "normalized": false,
535
  "rstrip": false,
@@ -537,7 +537,7 @@
537
  "special": true
538
  },
539
  "67": {
540
- "content": " ",
541
  "lstrip": false,
542
  "normalized": false,
543
  "rstrip": false,
@@ -545,7 +545,7 @@
545
  "special": true
546
  },
547
  "68": {
548
- "content": " ",
549
  "lstrip": false,
550
  "normalized": false,
551
  "rstrip": false,
@@ -553,7 +553,7 @@
553
  "special": true
554
  },
555
  "69": {
556
- "content": " ",
557
  "lstrip": false,
558
  "normalized": false,
559
  "rstrip": false,
@@ -561,7 +561,7 @@
561
  "special": true
562
  },
563
  "70": {
564
- "content": " ",
565
  "lstrip": false,
566
  "normalized": false,
567
  "rstrip": false,
@@ -569,7 +569,7 @@
569
  "special": true
570
  },
571
  "71": {
572
- "content": " ",
573
  "lstrip": false,
574
  "normalized": false,
575
  "rstrip": false,
@@ -577,7 +577,7 @@
577
  "special": true
578
  },
579
  "72": {
580
- "content": " ",
581
  "lstrip": false,
582
  "normalized": false,
583
  "rstrip": false,
@@ -585,7 +585,7 @@
585
  "special": true
586
  },
587
  "73": {
588
- "content": " ",
589
  "lstrip": false,
590
  "normalized": false,
591
  "rstrip": false,
@@ -593,7 +593,7 @@
593
  "special": true
594
  },
595
  "74": {
596
- "content": " ",
597
  "lstrip": false,
598
  "normalized": false,
599
  "rstrip": false,
@@ -601,7 +601,7 @@
601
  "special": true
602
  },
603
  "75": {
604
- "content": " ",
605
  "lstrip": false,
606
  "normalized": false,
607
  "rstrip": false,
@@ -609,7 +609,7 @@
609
  "special": true
610
  },
611
  "76": {
612
- "content": " ",
613
  "lstrip": false,
614
  "normalized": false,
615
  "rstrip": false,
@@ -617,7 +617,7 @@
617
  "special": true
618
  },
619
  "77": {
620
- "content": " ",
621
  "lstrip": false,
622
  "normalized": false,
623
  "rstrip": false,
@@ -625,7 +625,7 @@
625
  "special": true
626
  },
627
  "78": {
628
- "content": " ",
629
  "lstrip": false,
630
  "normalized": false,
631
  "rstrip": false,
@@ -633,6 +633,406 @@
633
  "special": true
634
  },
635
  "79": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
636
  "content": " ",
637
  "lstrip": false,
638
  "normalized": false,
 
65
  "special": true
66
  },
67
  "8": {
68
+ "content": "resource",
69
  "lstrip": false,
70
  "normalized": false,
71
  "rstrip": false,
 
73
  "special": true
74
  },
75
  "9": {
76
+ "content": "tool",
77
  "lstrip": false,
78
  "normalized": false,
79
  "rstrip": false,
 
81
  "special": true
82
  },
83
  "10": {
84
+ "content": "agent",
85
  "lstrip": false,
86
  "normalized": false,
87
  "rstrip": false,
 
89
  "special": true
90
  },
91
  "11": {
92
+ "content": "<tools>",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
 
97
  "special": true
98
  },
99
  "12": {
100
+ "content": "</tools>",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
 
105
  "special": true
106
  },
107
  "13": {
108
+ "content": "<tool_call>",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
 
113
  "special": true
114
  },
115
  "14": {
116
+ "content": "</tool_call>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
 
121
  "special": true
122
  },
123
  "15": {
124
+ "content": "<tool_response>",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
 
129
  "special": true
130
  },
131
  "16": {
132
+ "content": "</tool_response>",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,
 
137
  "special": true
138
  },
139
  "17": {
140
+ "content": "\"arguments\"",
141
  "lstrip": false,
142
  "normalized": false,
143
  "rstrip": false,
 
145
  "special": true
146
  },
147
  "18": {
148
+ "content": "\"name\"",
149
  "lstrip": false,
150
  "normalized": false,
151
  "rstrip": false,
 
153
  "special": true
154
  },
155
  "19": {
156
+ "content": "<arguments>",
157
  "lstrip": false,
158
  "normalized": false,
159
  "rstrip": false,
 
161
  "special": true
162
  },
163
  "20": {
164
+ "content": "</arguments>",
165
  "lstrip": false,
166
  "normalized": false,
167
  "rstrip": false,
 
169
  "special": true
170
  },
171
  "21": {
172
+ "content": "<argument>",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
 
177
  "special": true
178
  },
179
  "22": {
180
+ "content": "</argument>",
181
  "lstrip": false,
182
  "normalized": false,
183
  "rstrip": false,
 
185
  "special": true
186
  },
187
  "23": {
188
+ "content": "<argument-name>",
189
  "lstrip": false,
190
  "normalized": false,
191
  "rstrip": false,
 
193
  "special": true
194
  },
195
  "24": {
196
+ "content": "</argument-name>",
197
  "lstrip": false,
198
  "normalized": false,
199
  "rstrip": false,
 
201
  "special": true
202
  },
203
  "25": {
204
+ "content": "<argument-type>",
205
  "lstrip": false,
206
  "normalized": false,
207
  "rstrip": false,
 
209
  "special": true
210
  },
211
  "26": {
212
+ "content": "</argument-type>",
213
  "lstrip": false,
214
  "normalized": false,
215
  "rstrip": false,
 
217
  "special": true
218
  },
219
  "27": {
220
+ "content": "<argument-value>",
221
  "lstrip": false,
222
  "normalized": false,
223
  "rstrip": false,
 
225
  "special": true
226
  },
227
  "28": {
228
+ "content": "</argument-value>",
229
  "lstrip": false,
230
  "normalized": false,
231
  "rstrip": false,
 
233
  "special": true
234
  },
235
  "29": {
236
+ "content": "<parameter>",
237
  "lstrip": false,
238
  "normalized": false,
239
  "rstrip": false,
 
241
  "special": true
242
  },
243
  "30": {
244
+ "content": "</parameter>",
245
  "lstrip": false,
246
  "normalized": false,
247
  "rstrip": false,
 
249
  "special": true
250
  },
251
  "31": {
252
+ "content": "<parameter-name>",
253
  "lstrip": false,
254
  "normalized": false,
255
  "rstrip": false,
 
257
  "special": true
258
  },
259
  "32": {
260
+ "content": "</parameter-name>",
261
  "lstrip": false,
262
  "normalized": false,
263
  "rstrip": false,
 
265
  "special": true
266
  },
267
  "33": {
268
+ "content": "<parameter-type>",
269
  "lstrip": false,
270
  "normalized": false,
271
  "rstrip": false,
 
273
  "special": true
274
  },
275
  "34": {
276
+ "content": "</parameter-type>",
277
  "lstrip": false,
278
  "normalized": false,
279
  "rstrip": false,
 
281
  "special": true
282
  },
283
  "35": {
284
+ "content": "<parameter-value>",
285
  "lstrip": false,
286
  "normalized": false,
287
  "rstrip": false,
 
289
  "special": true
290
  },
291
  "36": {
292
+ "content": "</parameter-value>",
293
  "lstrip": false,
294
  "normalized": false,
295
  "rstrip": false,
 
297
  "special": true
298
  },
299
  "37": {
300
+ "content": "<field>",
301
  "lstrip": false,
302
  "normalized": false,
303
  "rstrip": false,
 
305
  "special": true
306
  },
307
  "38": {
308
+ "content": "</field>",
309
  "lstrip": false,
310
  "normalized": false,
311
  "rstrip": false,
 
313
  "special": true
314
  },
315
  "39": {
316
+ "content": "<field-name>",
317
  "lstrip": false,
318
  "normalized": false,
319
  "rstrip": false,
 
321
  "special": true
322
  },
323
  "40": {
324
+ "content": "</field-name>",
325
  "lstrip": false,
326
  "normalized": false,
327
  "rstrip": false,
 
329
  "special": true
330
  },
331
  "41": {
332
+ "content": "<field-type>",
333
  "lstrip": false,
334
  "normalized": false,
335
  "rstrip": false,
 
337
  "special": true
338
  },
339
  "42": {
340
+ "content": "</field-type>",
341
  "lstrip": false,
342
  "normalized": false,
343
  "rstrip": false,
 
345
  "special": true
346
  },
347
  "43": {
348
+ "content": "<field-value>",
349
  "lstrip": false,
350
  "normalized": false,
351
  "rstrip": false,
 
353
  "special": true
354
  },
355
  "44": {
356
+ "content": "</field-value>",
357
  "lstrip": false,
358
  "normalized": false,
359
  "rstrip": false,
 
361
  "special": true
362
  },
363
  "45": {
364
+ "content": "<name>",
365
  "lstrip": false,
366
  "normalized": false,
367
  "rstrip": false,
 
369
  "special": true
370
  },
371
  "46": {
372
+ "content": "</name>",
373
  "lstrip": false,
374
  "normalized": false,
375
  "rstrip": false,
 
377
  "special": true
378
  },
379
  "47": {
380
+ "content": "<type>",
381
  "lstrip": false,
382
  "normalized": false,
383
  "rstrip": false,
 
385
  "special": true
386
  },
387
  "48": {
388
+ "content": "</type>",
389
  "lstrip": false,
390
  "normalized": false,
391
  "rstrip": false,
 
393
  "special": true
394
  },
395
  "49": {
396
+ "content": "<value>",
397
  "lstrip": false,
398
  "normalized": false,
399
  "rstrip": false,
 
401
  "special": true
402
  },
403
  "50": {
404
+ "content": "</value>",
405
  "lstrip": false,
406
  "normalized": false,
407
  "rstrip": false,
 
409
  "special": true
410
  },
411
  "51": {
412
+ "content": "<function>",
413
  "lstrip": false,
414
  "normalized": false,
415
  "rstrip": false,
 
417
  "special": true
418
  },
419
  "52": {
420
+ "content": "</function>",
421
  "lstrip": false,
422
  "normalized": false,
423
  "rstrip": false,
 
425
  "special": true
426
  },
427
  "53": {
428
+ "content": "<function-name>",
429
  "lstrip": false,
430
  "normalized": false,
431
  "rstrip": false,
 
433
  "special": true
434
  },
435
  "54": {
436
+ "content": "</function-name>",
437
  "lstrip": false,
438
  "normalized": false,
439
  "rstrip": false,
 
441
  "special": true
442
  },
443
  "55": {
444
+ "content": "<function-type>",
445
  "lstrip": false,
446
  "normalized": false,
447
  "rstrip": false,
 
449
  "special": true
450
  },
451
  "56": {
452
+ "content": "</function-type>",
453
  "lstrip": false,
454
  "normalized": false,
455
  "rstrip": false,
 
457
  "special": true
458
  },
459
  "57": {
460
+ "content": "<function-value>",
461
  "lstrip": false,
462
  "normalized": false,
463
  "rstrip": false,
 
465
  "special": true
466
  },
467
  "58": {
468
+ "content": "</function-value>",
469
  "lstrip": false,
470
  "normalized": false,
471
  "rstrip": false,
 
473
  "special": true
474
  },
475
  "59": {
476
+ "content": "<qa>",
477
  "lstrip": false,
478
  "normalized": false,
479
  "rstrip": false,
 
481
  "special": true
482
  },
483
  "60": {
484
+ "content": "</qa>",
485
  "lstrip": false,
486
  "normalized": false,
487
  "rstrip": false,
 
489
  "special": true
490
  },
491
  "61": {
492
+ "content": "<question>",
493
  "lstrip": false,
494
  "normalized": false,
495
  "rstrip": false,
 
497
  "special": true
498
  },
499
  "62": {
500
+ "content": "</question>",
501
  "lstrip": false,
502
  "normalized": false,
503
  "rstrip": false,
 
505
  "special": true
506
  },
507
  "63": {
508
+ "content": "<answer>",
509
  "lstrip": false,
510
  "normalized": false,
511
  "rstrip": false,
 
513
  "special": true
514
  },
515
  "64": {
516
+ "content": "</answer>",
517
  "lstrip": false,
518
  "normalized": false,
519
  "rstrip": false,
 
521
  "special": true
522
  },
523
  "65": {
524
+ "content": "<cot>",
525
  "lstrip": false,
526
  "normalized": false,
527
  "rstrip": false,
 
529
  "special": true
530
  },
531
  "66": {
532
+ "content": "</cot>",
533
  "lstrip": false,
534
  "normalized": false,
535
  "rstrip": false,
 
537
  "special": true
538
  },
539
  "67": {
540
+ "content": "<tot>",
541
  "lstrip": false,
542
  "normalized": false,
543
  "rstrip": false,
 
545
  "special": true
546
  },
547
  "68": {
548
+ "content": "</tot>",
549
  "lstrip": false,
550
  "normalized": false,
551
  "rstrip": false,
 
553
  "special": true
554
  },
555
  "69": {
556
+ "content": "<input>",
557
  "lstrip": false,
558
  "normalized": false,
559
  "rstrip": false,
 
561
  "special": true
562
  },
563
  "70": {
564
+ "content": "</input>",
565
  "lstrip": false,
566
  "normalized": false,
567
  "rstrip": false,
 
569
  "special": true
570
  },
571
  "71": {
572
+ "content": "<output>",
573
  "lstrip": false,
574
  "normalized": false,
575
  "rstrip": false,
 
577
  "special": true
578
  },
579
  "72": {
580
+ "content": "</output>",
581
  "lstrip": false,
582
  "normalized": false,
583
  "rstrip": false,
 
585
  "special": true
586
  },
587
  "73": {
588
+ "content": "<thoughts>",
589
  "lstrip": false,
590
  "normalized": false,
591
  "rstrip": false,
 
593
  "special": true
594
  },
595
  "74": {
596
+ "content": "</thoughts>",
597
  "lstrip": false,
598
  "normalized": false,
599
  "rstrip": false,
 
601
  "special": true
602
  },
603
  "75": {
604
+ "content": "<thought>",
605
  "lstrip": false,
606
  "normalized": false,
607
  "rstrip": false,
 
609
  "special": true
610
  },
611
  "76": {
612
+ "content": "</thought>",
613
  "lstrip": false,
614
  "normalized": false,
615
  "rstrip": false,
 
617
  "special": true
618
  },
619
  "77": {
620
+ "content": "<plans>",
621
  "lstrip": false,
622
  "normalized": false,
623
  "rstrip": false,
 
625
  "special": true
626
  },
627
  "78": {
628
+ "content": "</plans>",
629
  "lstrip": false,
630
  "normalized": false,
631
  "rstrip": false,
 
633
  "special": true
634
  },
635
  "79": {
636
+ "content": "<plan>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "80": {
644
+ "content": "</plan>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "81": {
652
+ "content": "<votes>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "82": {
660
+ "content": "</votes>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "83": {
668
+ "content": "<vote>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "84": {
676
+ "content": "</vote>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "85": {
684
+ "content": "<passages>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "86": {
692
+ "content": "</passages>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "87": {
700
+ "content": "<passage>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "88": {
708
+ "content": "</passage>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "89": {
716
+ "content": "<react>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "90": {
724
+ "content": "</react>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "91": {
732
+ "content": "<reasoning>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "92": {
740
+ "content": "</reasoning>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "93": {
748
+ "content": "<acting>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "94": {
756
+ "content": "</acting>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "95": {
764
+ "content": "<action>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "96": {
772
+ "content": "</action>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "97": {
780
+ "content": "<observation>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "98": {
788
+ "content": "</observation>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "99": {
796
+ "content": "<claim>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "100": {
804
+ "content": "</claim>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "101": {
812
+ "content": "<thinking>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "102": {
820
+ "content": "</thinking>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "103": {
828
+ "content": "<step>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "104": {
836
+ "content": "</step>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "105": {
844
+ "content": "<reflection>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "106": {
852
+ "content": "</reflection>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "107": {
860
+ "content": " ",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "108": {
868
+ "content": " ",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "109": {
876
+ "content": " ",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "110": {
884
+ "content": " ",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "111": {
892
+ "content": " ",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "112": {
900
+ "content": " ",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "113": {
908
+ "content": " ",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "114": {
916
+ "content": " ",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "115": {
924
+ "content": " ",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "116": {
932
+ "content": " ",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "117": {
940
+ "content": " ",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "118": {
948
+ "content": " ",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "119": {
956
+ "content": " ",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "120": {
964
+ "content": " ",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "121": {
972
+ "content": " ",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "122": {
980
+ "content": " ",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "123": {
988
+ "content": " ",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "124": {
996
+ "content": " ",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "125": {
1004
+ "content": " ",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "126": {
1012
+ "content": " ",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "127": {
1020
+ "content": " ",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128": {
1028
+ "content": " ",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "129": {
1036
  "content": " ",
1037
  "lstrip": false,
1038
  "normalized": false,
vocab.json CHANGED
The diff for this file is too large to render. See raw diff