Spaces:
Sleeping
Sleeping
“[shujaatalishariati]”
commited on
Commit
•
847e3e1
1
Parent(s):
9367038
Initial commit for Gradio app with GECToR
Browse files- app.py +37 -36
- gector/bert_token_embedder.py +269 -0
- gector/datareader.py +151 -0
- gector/gec_model.py +298 -0
- gector/seq2labels_model.py +194 -0
- gector/tokenization.py +181 -0
- gector/tokenizer_indexer.py +161 -0
- gector/trainer.py +845 -0
- output_vocabulary/d_tags.txt +4 -0
- output_vocabulary/labels.txt +5002 -0
- output_vocabulary/non_padded_namespaces.txt +2 -0
- requirements.txt +8 -4
- utils/filter_brackets.py +35 -0
- utils/helpers.py +233 -0
- utils/prepare_clc_fce_data.py +123 -0
- utils/preprocess_data.py +488 -0
app.py
CHANGED
@@ -7,6 +7,8 @@ import nltk
|
|
7 |
from nltk.corpus import wordnet
|
8 |
from textblob import TextBlob
|
9 |
from pattern.en import conjugate, lemma, pluralize, singularize
|
|
|
|
|
10 |
|
11 |
# Initialize the English text classification pipeline for AI detection
|
12 |
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
|
@@ -84,29 +86,41 @@ def correct_singular_plural_errors(text):
|
|
84 |
|
85 |
return ' '.join(corrected_text)
|
86 |
|
87 |
-
# Function to check and correct article errors
|
88 |
-
def correct_article_errors(text):
|
89 |
-
doc = nlp(text)
|
90 |
-
corrected_text = []
|
91 |
-
for token in doc:
|
92 |
-
if token.text in ['a', 'an']:
|
93 |
-
next_token = token.nbor(1)
|
94 |
-
if token.text == "a" and next_token.text[0].lower() in "aeiou":
|
95 |
-
corrected_text.append("an")
|
96 |
-
elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
|
97 |
-
corrected_text.append("a")
|
98 |
-
else:
|
99 |
-
corrected_text.append(token.text)
|
100 |
-
else:
|
101 |
-
corrected_text.append(token.text)
|
102 |
-
return ' '.join(corrected_text)
|
103 |
-
|
104 |
# Function to correct overall grammar using TextBlob
|
105 |
-
def
|
106 |
blob = TextBlob(text)
|
107 |
corrected_text = str(blob.correct()) # TextBlob's built-in grammar correction
|
108 |
return corrected_text
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
# Paraphrasing function using SpaCy and NLTK (Humanifier)
|
111 |
def paraphrase_with_spacy_nltk(text):
|
112 |
doc = nlp(text)
|
@@ -132,28 +146,17 @@ def paraphrase_with_spacy_nltk(text):
|
|
132 |
else:
|
133 |
paraphrased_words.append(token.text)
|
134 |
|
135 |
-
|
136 |
-
paraphrased_sentence = ' '.join(paraphrased_words)
|
137 |
-
|
138 |
-
return paraphrased_sentence
|
139 |
|
140 |
# Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
|
141 |
def paraphrase_and_correct(text):
|
142 |
# Step 1: Paraphrase the text
|
143 |
paraphrased_text = paraphrase_with_spacy_nltk(text)
|
144 |
|
145 |
-
# Step 2: Apply grammatical corrections
|
146 |
-
corrected_text =
|
147 |
-
corrected_text = capitalize_sentences_and_nouns(corrected_text)
|
148 |
-
corrected_text = correct_singular_plural_errors(corrected_text)
|
149 |
|
150 |
-
|
151 |
-
corrected_text = correct_tense_errors(corrected_text)
|
152 |
-
|
153 |
-
# Step 4: Correct overall grammar using TextBlob
|
154 |
-
final_text = correct_grammar(corrected_text)
|
155 |
-
|
156 |
-
return final_text
|
157 |
|
158 |
# Gradio app setup with two tabs
|
159 |
with gr.Blocks() as demo:
|
@@ -163,15 +166,13 @@ with gr.Blocks() as demo:
|
|
163 |
label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
|
164 |
score1 = gr.Textbox(lines=1, label='Prob')
|
165 |
|
166 |
-
# Connect the prediction function to the button
|
167 |
button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
|
168 |
|
169 |
with gr.Tab("Humanifier"):
|
170 |
text_input = gr.Textbox(lines=5, label="Input Text")
|
171 |
paraphrase_button = gr.Button("Paraphrase & Correct")
|
172 |
-
output_text = gr.Textbox(label="Paraphrased Text")
|
173 |
|
174 |
-
# Connect the paraphrasing function to the button
|
175 |
paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
|
176 |
|
177 |
# Launch the app
|
|
|
7 |
from nltk.corpus import wordnet
|
8 |
from textblob import TextBlob
|
9 |
from pattern.en import conjugate, lemma, pluralize, singularize
|
10 |
+
from gector.gec_model import GecBERTModel # Import GECToR Model
|
11 |
+
from utils.helpers import read_lines, normalize # GECToR utilities
|
12 |
|
13 |
# Initialize the English text classification pipeline for AI detection
|
14 |
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
|
|
|
86 |
|
87 |
return ' '.join(corrected_text)
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
# Function to correct overall grammar using TextBlob
|
90 |
+
def correct_grammar_textblob(text):
|
91 |
blob = TextBlob(text)
|
92 |
corrected_text = str(blob.correct()) # TextBlob's built-in grammar correction
|
93 |
return corrected_text
|
94 |
|
95 |
+
# Initialize GECToR Model for Grammar Correction
|
96 |
+
def load_gector_model():
|
97 |
+
model_path = ["gector/roberta_1_gector.th"] # Ensure model file is placed correctly
|
98 |
+
vocab_path = "output_vocabulary"
|
99 |
+
model = GecBERTModel(vocab_path=vocab_path,
|
100 |
+
model_paths=model_path,
|
101 |
+
max_len=50,
|
102 |
+
min_len=3,
|
103 |
+
iterations=5,
|
104 |
+
min_error_probability=0.0,
|
105 |
+
lowercase_tokens=0,
|
106 |
+
model_name="roberta",
|
107 |
+
special_tokens_fix=1,
|
108 |
+
log=False,
|
109 |
+
confidence=0,
|
110 |
+
del_confidence=0,
|
111 |
+
is_ensemble=False,
|
112 |
+
weigths=None)
|
113 |
+
return model
|
114 |
+
|
115 |
+
# Load the GECToR model
|
116 |
+
gector_model = load_gector_model()
|
117 |
+
|
118 |
+
# Function to correct grammar using GECToR
|
119 |
+
def correct_grammar_gector(text):
|
120 |
+
sentences = [text.split()]
|
121 |
+
corrected_sentences, _ = gector_model.handle_batch(sentences)
|
122 |
+
return " ".join(corrected_sentences[0])
|
123 |
+
|
124 |
# Paraphrasing function using SpaCy and NLTK (Humanifier)
|
125 |
def paraphrase_with_spacy_nltk(text):
|
126 |
doc = nlp(text)
|
|
|
146 |
else:
|
147 |
paraphrased_words.append(token.text)
|
148 |
|
149 |
+
return ' '.join(paraphrased_words)
|
|
|
|
|
|
|
150 |
|
151 |
# Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
|
152 |
def paraphrase_and_correct(text):
|
153 |
# Step 1: Paraphrase the text
|
154 |
paraphrased_text = paraphrase_with_spacy_nltk(text)
|
155 |
|
156 |
+
# Step 2: Apply grammatical corrections using GECToR
|
157 |
+
corrected_text = correct_grammar_gector(paraphrased_text)
|
|
|
|
|
158 |
|
159 |
+
return corrected_text
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
# Gradio app setup with two tabs
|
162 |
with gr.Blocks() as demo:
|
|
|
166 |
label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
|
167 |
score1 = gr.Textbox(lines=1, label='Prob')
|
168 |
|
|
|
169 |
button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
|
170 |
|
171 |
with gr.Tab("Humanifier"):
|
172 |
text_input = gr.Textbox(lines=5, label="Input Text")
|
173 |
paraphrase_button = gr.Button("Paraphrase & Correct")
|
174 |
+
output_text = gr.Textbox(label="Paraphrased and Corrected Text")
|
175 |
|
|
|
176 |
paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
|
177 |
|
178 |
# Launch the app
|
gector/bert_token_embedder.py
ADDED
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Tweaked version of corresponding AllenNLP file"""
|
2 |
+
import logging
|
3 |
+
from copy import deepcopy
|
4 |
+
from typing import Dict
|
5 |
+
|
6 |
+
import torch
|
7 |
+
import torch.nn.functional as F
|
8 |
+
from allennlp.modules.token_embedders.token_embedder import TokenEmbedder
|
9 |
+
from allennlp.nn import util
|
10 |
+
from transformers import AutoModel, PreTrainedModel
|
11 |
+
|
12 |
+
logger = logging.getLogger(__name__)
|
13 |
+
|
14 |
+
|
15 |
+
class PretrainedBertModel:
|
16 |
+
"""
|
17 |
+
In some instances you may want to load the same BERT model twice
|
18 |
+
(e.g. to use as a token embedder and also as a pooling layer).
|
19 |
+
This factory provides a cache so that you don't actually have to load the model twice.
|
20 |
+
"""
|
21 |
+
|
22 |
+
_cache: Dict[str, PreTrainedModel] = {}
|
23 |
+
|
24 |
+
@classmethod
|
25 |
+
def load(cls, model_name: str, cache_model: bool = True) -> PreTrainedModel:
|
26 |
+
if model_name in cls._cache:
|
27 |
+
return PretrainedBertModel._cache[model_name]
|
28 |
+
|
29 |
+
model = AutoModel.from_pretrained(model_name)
|
30 |
+
if cache_model:
|
31 |
+
cls._cache[model_name] = model
|
32 |
+
|
33 |
+
return model
|
34 |
+
|
35 |
+
|
36 |
+
class BertEmbedder(TokenEmbedder):
|
37 |
+
"""
|
38 |
+
A ``TokenEmbedder`` that produces BERT embeddings for your tokens.
|
39 |
+
Should be paired with a ``BertIndexer``, which produces wordpiece ids.
|
40 |
+
Most likely you probably want to use ``PretrainedBertEmbedder``
|
41 |
+
for one of the named pretrained models, not this base class.
|
42 |
+
Parameters
|
43 |
+
----------
|
44 |
+
bert_model: ``BertModel``
|
45 |
+
The BERT model being wrapped.
|
46 |
+
top_layer_only: ``bool``, optional (default = ``False``)
|
47 |
+
If ``True``, then only return the top layer instead of apply the scalar mix.
|
48 |
+
max_pieces : int, optional (default: 512)
|
49 |
+
The BERT embedder uses positional embeddings and so has a corresponding
|
50 |
+
maximum length for its input ids. Assuming the inputs are windowed
|
51 |
+
and padded appropriately by this length, the embedder will split them into a
|
52 |
+
large batch, feed them into BERT, and recombine the output as if it was a
|
53 |
+
longer sequence.
|
54 |
+
num_start_tokens : int, optional (default: 1)
|
55 |
+
The number of starting special tokens input to BERT (usually 1, i.e., [CLS])
|
56 |
+
num_end_tokens : int, optional (default: 1)
|
57 |
+
The number of ending tokens input to BERT (usually 1, i.e., [SEP])
|
58 |
+
scalar_mix_parameters: ``List[float]``, optional, (default = None)
|
59 |
+
If not ``None``, use these scalar mix parameters to weight the representations
|
60 |
+
produced by different layers. These mixing weights are not updated during
|
61 |
+
training.
|
62 |
+
"""
|
63 |
+
|
64 |
+
def __init__(
|
65 |
+
self,
|
66 |
+
bert_model: PreTrainedModel,
|
67 |
+
top_layer_only: bool = False,
|
68 |
+
max_pieces: int = 512,
|
69 |
+
num_start_tokens: int = 1,
|
70 |
+
num_end_tokens: int = 1
|
71 |
+
) -> None:
|
72 |
+
super().__init__()
|
73 |
+
self.bert_model = deepcopy(bert_model)
|
74 |
+
self.output_dim = bert_model.config.hidden_size
|
75 |
+
self.max_pieces = max_pieces
|
76 |
+
self.num_start_tokens = num_start_tokens
|
77 |
+
self.num_end_tokens = num_end_tokens
|
78 |
+
self._scalar_mix = None
|
79 |
+
|
80 |
+
def set_weights(self, freeze):
|
81 |
+
for param in self.bert_model.parameters():
|
82 |
+
param.requires_grad = not freeze
|
83 |
+
return
|
84 |
+
|
85 |
+
def get_output_dim(self) -> int:
|
86 |
+
return self.output_dim
|
87 |
+
|
88 |
+
def forward(
|
89 |
+
self,
|
90 |
+
input_ids: torch.LongTensor,
|
91 |
+
offsets: torch.LongTensor = None
|
92 |
+
) -> torch.Tensor:
|
93 |
+
"""
|
94 |
+
Parameters
|
95 |
+
----------
|
96 |
+
input_ids : ``torch.LongTensor``
|
97 |
+
The (batch_size, ..., max_sequence_length) tensor of wordpiece ids.
|
98 |
+
offsets : ``torch.LongTensor``, optional
|
99 |
+
The BERT embeddings are one per wordpiece. However it's possible/likely
|
100 |
+
you might want one per original token. In that case, ``offsets``
|
101 |
+
represents the indices of the desired wordpiece for each original token.
|
102 |
+
Depending on how your token indexer is configured, this could be the
|
103 |
+
position of the last wordpiece for each token, or it could be the position
|
104 |
+
of the first wordpiece for each token.
|
105 |
+
For example, if you had the sentence "Definitely not", and if the corresponding
|
106 |
+
wordpieces were ["Def", "##in", "##ite", "##ly", "not"], then the input_ids
|
107 |
+
would be 5 wordpiece ids, and the "last wordpiece" offsets would be [3, 4].
|
108 |
+
If offsets are provided, the returned tensor will contain only the wordpiece
|
109 |
+
embeddings at those positions, and (in particular) will contain one embedding
|
110 |
+
per token. If offsets are not provided, the entire tensor of wordpiece embeddings
|
111 |
+
will be returned.
|
112 |
+
"""
|
113 |
+
|
114 |
+
batch_size, full_seq_len = input_ids.size(0), input_ids.size(-1)
|
115 |
+
initial_dims = list(input_ids.shape[:-1])
|
116 |
+
|
117 |
+
# The embedder may receive an input tensor that has a sequence length longer than can
|
118 |
+
# be fit. In that case, we should expect the wordpiece indexer to create padded windows
|
119 |
+
# of length `self.max_pieces` for us, and have them concatenated into one long sequence.
|
120 |
+
# E.g., "[CLS] I went to the [SEP] [CLS] to the store to [SEP] ..."
|
121 |
+
# We can then split the sequence into sub-sequences of that length, and concatenate them
|
122 |
+
# along the batch dimension so we effectively have one huge batch of partial sentences.
|
123 |
+
# This can then be fed into BERT without any sentence length issues. Keep in mind
|
124 |
+
# that the memory consumption can dramatically increase for large batches with extremely
|
125 |
+
# long sentences.
|
126 |
+
needs_split = full_seq_len > self.max_pieces
|
127 |
+
last_window_size = 0
|
128 |
+
if needs_split:
|
129 |
+
# Split the flattened list by the window size, `max_pieces`
|
130 |
+
split_input_ids = list(input_ids.split(self.max_pieces, dim=-1))
|
131 |
+
|
132 |
+
# We want all sequences to be the same length, so pad the last sequence
|
133 |
+
last_window_size = split_input_ids[-1].size(-1)
|
134 |
+
padding_amount = self.max_pieces - last_window_size
|
135 |
+
split_input_ids[-1] = F.pad(split_input_ids[-1], pad=[0, padding_amount], value=0)
|
136 |
+
|
137 |
+
# Now combine the sequences along the batch dimension
|
138 |
+
input_ids = torch.cat(split_input_ids, dim=0)
|
139 |
+
|
140 |
+
input_mask = (input_ids != 0).long()
|
141 |
+
# input_ids may have extra dimensions, so we reshape down to 2-d
|
142 |
+
# before calling the BERT model and then reshape back at the end.
|
143 |
+
all_encoder_layers = self.bert_model(
|
144 |
+
input_ids=util.combine_initial_dims(input_ids),
|
145 |
+
attention_mask=util.combine_initial_dims(input_mask),
|
146 |
+
)[0]
|
147 |
+
if len(all_encoder_layers[0].shape) == 3:
|
148 |
+
all_encoder_layers = torch.stack(all_encoder_layers)
|
149 |
+
elif len(all_encoder_layers[0].shape) == 2:
|
150 |
+
all_encoder_layers = torch.unsqueeze(all_encoder_layers, dim=0)
|
151 |
+
|
152 |
+
if needs_split:
|
153 |
+
# First, unpack the output embeddings into one long sequence again
|
154 |
+
unpacked_embeddings = torch.split(all_encoder_layers, batch_size, dim=1)
|
155 |
+
unpacked_embeddings = torch.cat(unpacked_embeddings, dim=2)
|
156 |
+
|
157 |
+
# Next, select indices of the sequence such that it will result in embeddings representing the original
|
158 |
+
# sentence. To capture maximal context, the indices will be the middle part of each embedded window
|
159 |
+
# sub-sequence (plus any leftover start and final edge windows), e.g.,
|
160 |
+
# 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
161 |
+
# "[CLS] I went to the very fine [SEP] [CLS] the very fine store to eat [SEP]"
|
162 |
+
# with max_pieces = 8 should produce max context indices [2, 3, 4, 10, 11, 12] with additional start
|
163 |
+
# and final windows with indices [0, 1] and [14, 15] respectively.
|
164 |
+
|
165 |
+
# Find the stride as half the max pieces, ignoring the special start and end tokens
|
166 |
+
# Calculate an offset to extract the centermost embeddings of each window
|
167 |
+
stride = (self.max_pieces - self.num_start_tokens - self.num_end_tokens) // 2
|
168 |
+
stride_offset = stride // 2 + self.num_start_tokens
|
169 |
+
|
170 |
+
first_window = list(range(stride_offset))
|
171 |
+
|
172 |
+
max_context_windows = [
|
173 |
+
i
|
174 |
+
for i in range(full_seq_len)
|
175 |
+
if stride_offset - 1 < i % self.max_pieces < stride_offset + stride
|
176 |
+
]
|
177 |
+
|
178 |
+
# Lookback what's left, unless it's the whole self.max_pieces window
|
179 |
+
if full_seq_len % self.max_pieces == 0:
|
180 |
+
lookback = self.max_pieces
|
181 |
+
else:
|
182 |
+
lookback = full_seq_len % self.max_pieces
|
183 |
+
|
184 |
+
final_window_start = full_seq_len - lookback + stride_offset + stride
|
185 |
+
final_window = list(range(final_window_start, full_seq_len))
|
186 |
+
|
187 |
+
select_indices = first_window + max_context_windows + final_window
|
188 |
+
|
189 |
+
initial_dims.append(len(select_indices))
|
190 |
+
|
191 |
+
recombined_embeddings = unpacked_embeddings[:, :, select_indices]
|
192 |
+
else:
|
193 |
+
recombined_embeddings = all_encoder_layers
|
194 |
+
|
195 |
+
# Recombine the outputs of all layers
|
196 |
+
# (layers, batch_size * d1 * ... * dn, sequence_length, embedding_dim)
|
197 |
+
# recombined = torch.cat(combined, dim=2)
|
198 |
+
input_mask = (recombined_embeddings != 0).long()
|
199 |
+
|
200 |
+
if self._scalar_mix is not None:
|
201 |
+
mix = self._scalar_mix(recombined_embeddings, input_mask)
|
202 |
+
else:
|
203 |
+
mix = recombined_embeddings[-1]
|
204 |
+
|
205 |
+
# At this point, mix is (batch_size * d1 * ... * dn, sequence_length, embedding_dim)
|
206 |
+
|
207 |
+
if offsets is None:
|
208 |
+
# Resize to (batch_size, d1, ..., dn, sequence_length, embedding_dim)
|
209 |
+
dims = initial_dims if needs_split else input_ids.size()
|
210 |
+
return util.uncombine_initial_dims(mix, dims)
|
211 |
+
else:
|
212 |
+
# offsets is (batch_size, d1, ..., dn, orig_sequence_length)
|
213 |
+
offsets2d = util.combine_initial_dims(offsets)
|
214 |
+
# now offsets is (batch_size * d1 * ... * dn, orig_sequence_length)
|
215 |
+
range_vector = util.get_range_vector(
|
216 |
+
offsets2d.size(0), device=util.get_device_of(mix)
|
217 |
+
).unsqueeze(1)
|
218 |
+
# selected embeddings is also (batch_size * d1 * ... * dn, orig_sequence_length)
|
219 |
+
selected_embeddings = mix[range_vector, offsets2d]
|
220 |
+
|
221 |
+
return util.uncombine_initial_dims(selected_embeddings, offsets.size())
|
222 |
+
|
223 |
+
|
224 |
+
# @TokenEmbedder.register("bert-pretrained")
|
225 |
+
class PretrainedBertEmbedder(BertEmbedder):
|
226 |
+
|
227 |
+
"""
|
228 |
+
Parameters
|
229 |
+
----------
|
230 |
+
pretrained_model: ``str``
|
231 |
+
Either the name of the pretrained model to use (e.g. 'bert-base-uncased'),
|
232 |
+
or the path to the .tar.gz file with the model weights.
|
233 |
+
If the name is a key in the list of pretrained models at
|
234 |
+
https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/pytorch_pretrained_bert/modeling.py#L41
|
235 |
+
the corresponding path will be used; otherwise it will be interpreted as a path or URL.
|
236 |
+
requires_grad : ``bool``, optional (default = False)
|
237 |
+
If True, compute gradient of BERT parameters for fine tuning.
|
238 |
+
top_layer_only: ``bool``, optional (default = ``False``)
|
239 |
+
If ``True``, then only return the top layer instead of apply the scalar mix.
|
240 |
+
scalar_mix_parameters: ``List[float]``, optional, (default = None)
|
241 |
+
If not ``None``, use these scalar mix parameters to weight the representations
|
242 |
+
produced by different layers. These mixing weights are not updated during
|
243 |
+
training.
|
244 |
+
"""
|
245 |
+
|
246 |
+
def __init__(
|
247 |
+
self,
|
248 |
+
pretrained_model: str,
|
249 |
+
requires_grad: bool = False,
|
250 |
+
top_layer_only: bool = False,
|
251 |
+
special_tokens_fix: int = 0,
|
252 |
+
) -> None:
|
253 |
+
model = PretrainedBertModel.load(pretrained_model)
|
254 |
+
|
255 |
+
for param in model.parameters():
|
256 |
+
param.requires_grad = requires_grad
|
257 |
+
|
258 |
+
super().__init__(
|
259 |
+
bert_model=model,
|
260 |
+
top_layer_only=top_layer_only
|
261 |
+
)
|
262 |
+
|
263 |
+
if special_tokens_fix:
|
264 |
+
try:
|
265 |
+
vocab_size = self.bert_model.embeddings.word_embeddings.num_embeddings
|
266 |
+
except AttributeError:
|
267 |
+
# reserve more space
|
268 |
+
vocab_size = self.bert_model.word_embedding.num_embeddings + 5
|
269 |
+
self.bert_model.resize_token_embeddings(vocab_size + 1)
|
gector/datareader.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Tweaked AllenNLP dataset reader."""
|
2 |
+
import logging
|
3 |
+
import re
|
4 |
+
from random import random
|
5 |
+
from typing import Dict, List
|
6 |
+
|
7 |
+
from allennlp.common.file_utils import cached_path
|
8 |
+
from allennlp.data.dataset_readers.dataset_reader import DatasetReader
|
9 |
+
from allennlp.data.fields import TextField, SequenceLabelField, MetadataField, Field
|
10 |
+
from allennlp.data.instance import Instance
|
11 |
+
from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer
|
12 |
+
from allennlp.data.tokenizers import Token
|
13 |
+
from overrides import overrides
|
14 |
+
|
15 |
+
from utils.helpers import SEQ_DELIMETERS, START_TOKEN
|
16 |
+
|
17 |
+
logger = logging.getLogger(__name__) # pylint: disable=invalid-name
|
18 |
+
|
19 |
+
|
20 |
+
@DatasetReader.register("seq2labels_datareader")
|
21 |
+
class Seq2LabelsDatasetReader(DatasetReader):
|
22 |
+
"""
|
23 |
+
Reads instances from a pretokenised file where each line is in the following format:
|
24 |
+
|
25 |
+
WORD###TAG [TAB] WORD###TAG [TAB] ..... \n
|
26 |
+
|
27 |
+
and converts it into a ``Dataset`` suitable for sequence tagging. You can also specify
|
28 |
+
alternative delimiters in the constructor.
|
29 |
+
|
30 |
+
Parameters
|
31 |
+
----------
|
32 |
+
delimiters: ``dict``
|
33 |
+
The dcitionary with all delimeters.
|
34 |
+
token_indexers : ``Dict[str, TokenIndexer]``, optional (default=``{"tokens": SingleIdTokenIndexer()}``)
|
35 |
+
We use this to define the input representation for the text. See :class:`TokenIndexer`.
|
36 |
+
Note that the `output` tags will always correspond to single token IDs based on how they
|
37 |
+
are pre-tokenised in the data file.
|
38 |
+
max_len: if set than will truncate long sentences
|
39 |
+
"""
|
40 |
+
# fix broken sentences mostly in Lang8
|
41 |
+
BROKEN_SENTENCES_REGEXP = re.compile(r'\.[a-zA-RT-Z]')
|
42 |
+
|
43 |
+
def __init__(self,
|
44 |
+
token_indexers: Dict[str, TokenIndexer] = None,
|
45 |
+
delimeters: dict = SEQ_DELIMETERS,
|
46 |
+
skip_correct: bool = False,
|
47 |
+
skip_complex: int = 0,
|
48 |
+
lazy: bool = False,
|
49 |
+
max_len: int = None,
|
50 |
+
test_mode: bool = False,
|
51 |
+
tag_strategy: str = "keep_one",
|
52 |
+
tn_prob: float = 0,
|
53 |
+
tp_prob: float = 0,
|
54 |
+
broken_dot_strategy: str = "keep") -> None:
|
55 |
+
super().__init__(lazy)
|
56 |
+
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
|
57 |
+
self._delimeters = delimeters
|
58 |
+
self._max_len = max_len
|
59 |
+
self._skip_correct = skip_correct
|
60 |
+
self._skip_complex = skip_complex
|
61 |
+
self._tag_strategy = tag_strategy
|
62 |
+
self._broken_dot_strategy = broken_dot_strategy
|
63 |
+
self._test_mode = test_mode
|
64 |
+
self._tn_prob = tn_prob
|
65 |
+
self._tp_prob = tp_prob
|
66 |
+
|
67 |
+
@overrides
|
68 |
+
def _read(self, file_path):
|
69 |
+
# if `file_path` is a URL, redirect to the cache
|
70 |
+
file_path = cached_path(file_path)
|
71 |
+
with open(file_path, "r") as data_file:
|
72 |
+
logger.info("Reading instances from lines in file at: %s", file_path)
|
73 |
+
for line in data_file:
|
74 |
+
line = line.strip("\n")
|
75 |
+
# skip blank and broken lines
|
76 |
+
if not line or (not self._test_mode and self._broken_dot_strategy == 'skip'
|
77 |
+
and self.BROKEN_SENTENCES_REGEXP.search(line) is not None):
|
78 |
+
continue
|
79 |
+
|
80 |
+
tokens_and_tags = [pair.rsplit(self._delimeters['labels'], 1)
|
81 |
+
for pair in line.split(self._delimeters['tokens'])]
|
82 |
+
try:
|
83 |
+
tokens = [Token(token) for token, tag in tokens_and_tags]
|
84 |
+
tags = [tag for token, tag in tokens_and_tags]
|
85 |
+
except ValueError:
|
86 |
+
tokens = [Token(token[0]) for token in tokens_and_tags]
|
87 |
+
tags = None
|
88 |
+
|
89 |
+
if tokens and tokens[0] != Token(START_TOKEN):
|
90 |
+
tokens = [Token(START_TOKEN)] + tokens
|
91 |
+
|
92 |
+
words = [x.text for x in tokens]
|
93 |
+
if self._max_len is not None:
|
94 |
+
tokens = tokens[:self._max_len]
|
95 |
+
tags = None if tags is None else tags[:self._max_len]
|
96 |
+
instance = self.text_to_instance(tokens, tags, words)
|
97 |
+
if instance:
|
98 |
+
yield instance
|
99 |
+
|
100 |
+
def extract_tags(self, tags: List[str]):
|
101 |
+
op_del = self._delimeters['operations']
|
102 |
+
|
103 |
+
labels = [x.split(op_del) for x in tags]
|
104 |
+
|
105 |
+
comlex_flag_dict = {}
|
106 |
+
# get flags
|
107 |
+
for i in range(5):
|
108 |
+
idx = i + 1
|
109 |
+
comlex_flag_dict[idx] = sum([len(x) > idx for x in labels])
|
110 |
+
|
111 |
+
if self._tag_strategy == "keep_one":
|
112 |
+
# get only first candidates for r_tags in right and the last for left
|
113 |
+
labels = [x[0] for x in labels]
|
114 |
+
elif self._tag_strategy == "merge_all":
|
115 |
+
# consider phrases as a words
|
116 |
+
pass
|
117 |
+
else:
|
118 |
+
raise Exception("Incorrect tag strategy")
|
119 |
+
|
120 |
+
detect_tags = ["CORRECT" if label == "$KEEP" else "INCORRECT" for label in labels]
|
121 |
+
return labels, detect_tags, comlex_flag_dict
|
122 |
+
|
123 |
+
def text_to_instance(self, tokens: List[Token], tags: List[str] = None,
|
124 |
+
words: List[str] = None) -> Instance: # type: ignore
|
125 |
+
"""
|
126 |
+
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
|
127 |
+
"""
|
128 |
+
# pylint: disable=arguments-differ
|
129 |
+
fields: Dict[str, Field] = {}
|
130 |
+
sequence = TextField(tokens, self._token_indexers)
|
131 |
+
fields["tokens"] = sequence
|
132 |
+
fields["metadata"] = MetadataField({"words": words})
|
133 |
+
if tags is not None:
|
134 |
+
labels, detect_tags, complex_flag_dict = self.extract_tags(tags)
|
135 |
+
if self._skip_complex and complex_flag_dict[self._skip_complex] > 0:
|
136 |
+
return None
|
137 |
+
rnd = random()
|
138 |
+
# skip TN
|
139 |
+
if self._skip_correct and all(x == "CORRECT" for x in detect_tags):
|
140 |
+
if rnd > self._tn_prob:
|
141 |
+
return None
|
142 |
+
# skip TP
|
143 |
+
else:
|
144 |
+
if rnd > self._tp_prob:
|
145 |
+
return None
|
146 |
+
|
147 |
+
fields["labels"] = SequenceLabelField(labels, sequence,
|
148 |
+
label_namespace="labels")
|
149 |
+
fields["d_tags"] = SequenceLabelField(detect_tags, sequence,
|
150 |
+
label_namespace="d_tags")
|
151 |
+
return Instance(fields)
|
gector/gec_model.py
ADDED
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Wrapper of AllenNLP model. Fixes errors based on model predictions"""
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
from time import time
|
6 |
+
|
7 |
+
import torch
|
8 |
+
from allennlp.data.dataset import Batch
|
9 |
+
from allennlp.data.fields import TextField
|
10 |
+
from allennlp.data.instance import Instance
|
11 |
+
from allennlp.data.tokenizers import Token
|
12 |
+
from allennlp.data.vocabulary import Vocabulary
|
13 |
+
from allennlp.modules.text_field_embedders import BasicTextFieldEmbedder
|
14 |
+
from allennlp.nn import util
|
15 |
+
|
16 |
+
from gector.bert_token_embedder import PretrainedBertEmbedder
|
17 |
+
from gector.seq2labels_model import Seq2Labels
|
18 |
+
from gector.tokenizer_indexer import PretrainedBertIndexer
|
19 |
+
from utils.helpers import PAD, UNK, get_target_sent_by_edits, START_TOKEN
|
20 |
+
from utils.helpers import get_weights_name
|
21 |
+
|
22 |
+
logging.getLogger("werkzeug").setLevel(logging.ERROR)
|
23 |
+
logger = logging.getLogger(__file__)
|
24 |
+
|
25 |
+
|
26 |
+
class GecBERTModel(object):
|
27 |
+
def __init__(self, vocab_path=None, model_paths=None,
|
28 |
+
weigths=None,
|
29 |
+
max_len=50,
|
30 |
+
min_len=3,
|
31 |
+
lowercase_tokens=False,
|
32 |
+
log=False,
|
33 |
+
iterations=3,
|
34 |
+
model_name='roberta',
|
35 |
+
special_tokens_fix=1,
|
36 |
+
is_ensemble=True,
|
37 |
+
min_error_probability=0.0,
|
38 |
+
confidence=0,
|
39 |
+
del_confidence=0,
|
40 |
+
resolve_cycles=False,
|
41 |
+
):
|
42 |
+
self.model_weights = list(map(float, weigths)) if weigths else [1] * len(model_paths)
|
43 |
+
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
44 |
+
self.max_len = max_len
|
45 |
+
self.min_len = min_len
|
46 |
+
self.lowercase_tokens = lowercase_tokens
|
47 |
+
self.min_error_probability = min_error_probability
|
48 |
+
self.vocab = Vocabulary.from_files(vocab_path)
|
49 |
+
self.log = log
|
50 |
+
self.iterations = iterations
|
51 |
+
self.confidence = confidence
|
52 |
+
self.del_conf = del_confidence
|
53 |
+
self.resolve_cycles = resolve_cycles
|
54 |
+
# set training parameters and operations
|
55 |
+
|
56 |
+
self.indexers = []
|
57 |
+
self.models = []
|
58 |
+
for model_path in model_paths:
|
59 |
+
if is_ensemble:
|
60 |
+
model_name, special_tokens_fix = self._get_model_data(model_path)
|
61 |
+
weights_name = get_weights_name(model_name, lowercase_tokens)
|
62 |
+
self.indexers.append(self._get_indexer(weights_name, special_tokens_fix))
|
63 |
+
model = Seq2Labels(vocab=self.vocab,
|
64 |
+
text_field_embedder=self._get_embbeder(weights_name, special_tokens_fix),
|
65 |
+
confidence=self.confidence,
|
66 |
+
del_confidence=self.del_conf,
|
67 |
+
).to(self.device)
|
68 |
+
if torch.cuda.is_available():
|
69 |
+
model.load_state_dict(torch.load(model_path), strict=False)
|
70 |
+
else:
|
71 |
+
model.load_state_dict(torch.load(model_path,
|
72 |
+
map_location=torch.device('cpu')),
|
73 |
+
strict=False)
|
74 |
+
model.eval()
|
75 |
+
self.models.append(model)
|
76 |
+
|
77 |
+
@staticmethod
|
78 |
+
def _get_model_data(model_path):
|
79 |
+
model_name = model_path.split('/')[-1]
|
80 |
+
tr_model, stf = model_name.split('_')[:2]
|
81 |
+
return tr_model, int(stf)
|
82 |
+
|
83 |
+
def _restore_model(self, input_path):
|
84 |
+
if os.path.isdir(input_path):
|
85 |
+
print("Model could not be restored from directory", file=sys.stderr)
|
86 |
+
filenames = []
|
87 |
+
else:
|
88 |
+
filenames = [input_path]
|
89 |
+
for model_path in filenames:
|
90 |
+
try:
|
91 |
+
if torch.cuda.is_available():
|
92 |
+
loaded_model = torch.load(model_path)
|
93 |
+
else:
|
94 |
+
loaded_model = torch.load(model_path,
|
95 |
+
map_location=lambda storage,
|
96 |
+
loc: storage)
|
97 |
+
except:
|
98 |
+
print(f"{model_path} is not valid model", file=sys.stderr)
|
99 |
+
own_state = self.model.state_dict()
|
100 |
+
for name, weights in loaded_model.items():
|
101 |
+
if name not in own_state:
|
102 |
+
continue
|
103 |
+
try:
|
104 |
+
if len(filenames) == 1:
|
105 |
+
own_state[name].copy_(weights)
|
106 |
+
else:
|
107 |
+
own_state[name] += weights
|
108 |
+
except RuntimeError:
|
109 |
+
continue
|
110 |
+
print("Model is restored", file=sys.stderr)
|
111 |
+
|
112 |
+
def predict(self, batches):
|
113 |
+
t11 = time()
|
114 |
+
predictions = []
|
115 |
+
for batch, model in zip(batches, self.models):
|
116 |
+
batch = util.move_to_device(batch.as_tensor_dict(), 0 if torch.cuda.is_available() else -1)
|
117 |
+
with torch.no_grad():
|
118 |
+
prediction = model.forward(**batch)
|
119 |
+
predictions.append(prediction)
|
120 |
+
|
121 |
+
preds, idx, error_probs = self._convert(predictions)
|
122 |
+
t55 = time()
|
123 |
+
if self.log:
|
124 |
+
print(f"Inference time {t55 - t11}")
|
125 |
+
return preds, idx, error_probs
|
126 |
+
|
127 |
+
def get_token_action(self, token, index, prob, sugg_token):
|
128 |
+
"""Get lost of suggested actions for token."""
|
129 |
+
# cases when we don't need to do anything
|
130 |
+
if prob < self.min_error_probability or sugg_token in [UNK, PAD, '$KEEP']:
|
131 |
+
return None
|
132 |
+
|
133 |
+
if sugg_token.startswith('$REPLACE_') or sugg_token.startswith('$TRANSFORM_') or sugg_token == '$DELETE':
|
134 |
+
start_pos = index
|
135 |
+
end_pos = index + 1
|
136 |
+
elif sugg_token.startswith("$APPEND_") or sugg_token.startswith("$MERGE_"):
|
137 |
+
start_pos = index + 1
|
138 |
+
end_pos = index + 1
|
139 |
+
|
140 |
+
if sugg_token == "$DELETE":
|
141 |
+
sugg_token_clear = ""
|
142 |
+
elif sugg_token.startswith('$TRANSFORM_') or sugg_token.startswith("$MERGE_"):
|
143 |
+
sugg_token_clear = sugg_token[:]
|
144 |
+
else:
|
145 |
+
sugg_token_clear = sugg_token[sugg_token.index('_') + 1:]
|
146 |
+
|
147 |
+
return start_pos - 1, end_pos - 1, sugg_token_clear, prob
|
148 |
+
|
149 |
+
def _get_embbeder(self, weigths_name, special_tokens_fix):
|
150 |
+
embedders = {'bert': PretrainedBertEmbedder(
|
151 |
+
pretrained_model=weigths_name,
|
152 |
+
requires_grad=False,
|
153 |
+
top_layer_only=True,
|
154 |
+
special_tokens_fix=special_tokens_fix)
|
155 |
+
}
|
156 |
+
text_field_embedder = BasicTextFieldEmbedder(
|
157 |
+
token_embedders=embedders,
|
158 |
+
embedder_to_indexer_map={"bert": ["bert", "bert-offsets"]},
|
159 |
+
allow_unmatched_keys=True)
|
160 |
+
return text_field_embedder
|
161 |
+
|
162 |
+
def _get_indexer(self, weights_name, special_tokens_fix):
|
163 |
+
bert_token_indexer = PretrainedBertIndexer(
|
164 |
+
pretrained_model=weights_name,
|
165 |
+
do_lowercase=self.lowercase_tokens,
|
166 |
+
max_pieces_per_token=5,
|
167 |
+
special_tokens_fix=special_tokens_fix
|
168 |
+
)
|
169 |
+
return {'bert': bert_token_indexer}
|
170 |
+
|
171 |
+
def preprocess(self, token_batch):
|
172 |
+
seq_lens = [len(sequence) for sequence in token_batch if sequence]
|
173 |
+
if not seq_lens:
|
174 |
+
return []
|
175 |
+
max_len = min(max(seq_lens), self.max_len)
|
176 |
+
batches = []
|
177 |
+
for indexer in self.indexers:
|
178 |
+
batch = []
|
179 |
+
for sequence in token_batch:
|
180 |
+
tokens = sequence[:max_len]
|
181 |
+
tokens = [Token(token) for token in ['$START'] + tokens]
|
182 |
+
batch.append(Instance({'tokens': TextField(tokens, indexer)}))
|
183 |
+
batch = Batch(batch)
|
184 |
+
batch.index_instances(self.vocab)
|
185 |
+
batches.append(batch)
|
186 |
+
|
187 |
+
return batches
|
188 |
+
|
189 |
+
def _convert(self, data):
|
190 |
+
all_class_probs = torch.zeros_like(data[0]['class_probabilities_labels'])
|
191 |
+
error_probs = torch.zeros_like(data[0]['max_error_probability'])
|
192 |
+
for output, weight in zip(data, self.model_weights):
|
193 |
+
all_class_probs += weight * output['class_probabilities_labels'] / sum(self.model_weights)
|
194 |
+
error_probs += weight * output['max_error_probability'] / sum(self.model_weights)
|
195 |
+
|
196 |
+
max_vals = torch.max(all_class_probs, dim=-1)
|
197 |
+
probs = max_vals[0].tolist()
|
198 |
+
idx = max_vals[1].tolist()
|
199 |
+
return probs, idx, error_probs.tolist()
|
200 |
+
|
201 |
+
def update_final_batch(self, final_batch, pred_ids, pred_batch,
|
202 |
+
prev_preds_dict):
|
203 |
+
new_pred_ids = []
|
204 |
+
total_updated = 0
|
205 |
+
for i, orig_id in enumerate(pred_ids):
|
206 |
+
orig = final_batch[orig_id]
|
207 |
+
pred = pred_batch[i]
|
208 |
+
prev_preds = prev_preds_dict[orig_id]
|
209 |
+
if orig != pred and pred not in prev_preds:
|
210 |
+
final_batch[orig_id] = pred
|
211 |
+
new_pred_ids.append(orig_id)
|
212 |
+
prev_preds_dict[orig_id].append(pred)
|
213 |
+
total_updated += 1
|
214 |
+
elif orig != pred and pred in prev_preds:
|
215 |
+
# update final batch, but stop iterations
|
216 |
+
final_batch[orig_id] = pred
|
217 |
+
total_updated += 1
|
218 |
+
else:
|
219 |
+
continue
|
220 |
+
return final_batch, new_pred_ids, total_updated
|
221 |
+
|
222 |
+
def postprocess_batch(self, batch, all_probabilities, all_idxs,
|
223 |
+
error_probs):
|
224 |
+
all_results = []
|
225 |
+
noop_index = self.vocab.get_token_index("$KEEP", "labels")
|
226 |
+
for tokens, probabilities, idxs, error_prob in zip(batch,
|
227 |
+
all_probabilities,
|
228 |
+
all_idxs,
|
229 |
+
error_probs):
|
230 |
+
length = min(len(tokens), self.max_len)
|
231 |
+
edits = []
|
232 |
+
|
233 |
+
# skip whole sentences if there no errors
|
234 |
+
if max(idxs) == 0:
|
235 |
+
all_results.append(tokens)
|
236 |
+
continue
|
237 |
+
|
238 |
+
# skip whole sentence if probability of correctness is not high
|
239 |
+
if error_prob < self.min_error_probability:
|
240 |
+
all_results.append(tokens)
|
241 |
+
continue
|
242 |
+
|
243 |
+
for i in range(length + 1):
|
244 |
+
# because of START token
|
245 |
+
if i == 0:
|
246 |
+
token = START_TOKEN
|
247 |
+
else:
|
248 |
+
token = tokens[i - 1]
|
249 |
+
# skip if there is no error
|
250 |
+
if idxs[i] == noop_index:
|
251 |
+
continue
|
252 |
+
|
253 |
+
sugg_token = self.vocab.get_token_from_index(idxs[i],
|
254 |
+
namespace='labels')
|
255 |
+
action = self.get_token_action(token, i, probabilities[i],
|
256 |
+
sugg_token)
|
257 |
+
if not action:
|
258 |
+
continue
|
259 |
+
|
260 |
+
edits.append(action)
|
261 |
+
all_results.append(get_target_sent_by_edits(tokens, edits))
|
262 |
+
return all_results
|
263 |
+
|
264 |
+
def handle_batch(self, full_batch):
|
265 |
+
"""
|
266 |
+
Handle batch of requests.
|
267 |
+
"""
|
268 |
+
final_batch = full_batch[:]
|
269 |
+
batch_size = len(full_batch)
|
270 |
+
prev_preds_dict = {i: [final_batch[i]] for i in range(len(final_batch))}
|
271 |
+
short_ids = [i for i in range(len(full_batch))
|
272 |
+
if len(full_batch[i]) < self.min_len]
|
273 |
+
pred_ids = [i for i in range(len(full_batch)) if i not in short_ids]
|
274 |
+
total_updates = 0
|
275 |
+
|
276 |
+
for n_iter in range(self.iterations):
|
277 |
+
orig_batch = [final_batch[i] for i in pred_ids]
|
278 |
+
|
279 |
+
sequences = self.preprocess(orig_batch)
|
280 |
+
|
281 |
+
if not sequences:
|
282 |
+
break
|
283 |
+
probabilities, idxs, error_probs = self.predict(sequences)
|
284 |
+
|
285 |
+
pred_batch = self.postprocess_batch(orig_batch, probabilities,
|
286 |
+
idxs, error_probs)
|
287 |
+
if self.log:
|
288 |
+
print(f"Iteration {n_iter + 1}. Predicted {round(100*len(pred_ids)/batch_size, 1)}% of sentences.")
|
289 |
+
|
290 |
+
final_batch, pred_ids, cnt = \
|
291 |
+
self.update_final_batch(final_batch, pred_ids, pred_batch,
|
292 |
+
prev_preds_dict)
|
293 |
+
total_updates += cnt
|
294 |
+
|
295 |
+
if not pred_ids:
|
296 |
+
break
|
297 |
+
|
298 |
+
return final_batch, total_updates
|
gector/seq2labels_model.py
ADDED
@@ -0,0 +1,194 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Basic model. Predicts tags for every token"""
|
2 |
+
from typing import Dict, Optional, List, Any
|
3 |
+
|
4 |
+
import numpy
|
5 |
+
import torch
|
6 |
+
import torch.nn.functional as F
|
7 |
+
from allennlp.data import Vocabulary
|
8 |
+
from allennlp.models.model import Model
|
9 |
+
from allennlp.modules import TimeDistributed, TextFieldEmbedder
|
10 |
+
from allennlp.nn import InitializerApplicator, RegularizerApplicator
|
11 |
+
from allennlp.nn.util import get_text_field_mask, sequence_cross_entropy_with_logits
|
12 |
+
from allennlp.training.metrics import CategoricalAccuracy
|
13 |
+
from overrides import overrides
|
14 |
+
from torch.nn.modules.linear import Linear
|
15 |
+
|
16 |
+
|
17 |
+
@Model.register("seq2labels")
|
18 |
+
class Seq2Labels(Model):
|
19 |
+
"""
|
20 |
+
This ``Seq2Labels`` simply encodes a sequence of text with a stacked ``Seq2SeqEncoder``, then
|
21 |
+
predicts a tag (or couple tags) for each token in the sequence.
|
22 |
+
|
23 |
+
Parameters
|
24 |
+
----------
|
25 |
+
vocab : ``Vocabulary``, required
|
26 |
+
A Vocabulary, required in order to compute sizes for input/output projections.
|
27 |
+
text_field_embedder : ``TextFieldEmbedder``, required
|
28 |
+
Used to embed the ``tokens`` ``TextField`` we get as input to the model.
|
29 |
+
encoder : ``Seq2SeqEncoder``
|
30 |
+
The encoder (with its own internal stacking) that we will use in between embedding tokens
|
31 |
+
and predicting output tags.
|
32 |
+
calculate_span_f1 : ``bool``, optional (default=``None``)
|
33 |
+
Calculate span-level F1 metrics during training. If this is ``True``, then
|
34 |
+
``label_encoding`` is required. If ``None`` and
|
35 |
+
label_encoding is specified, this is set to ``True``.
|
36 |
+
If ``None`` and label_encoding is not specified, it defaults
|
37 |
+
to ``False``.
|
38 |
+
label_encoding : ``str``, optional (default=``None``)
|
39 |
+
Label encoding to use when calculating span f1.
|
40 |
+
Valid options are "BIO", "BIOUL", "IOB1", "BMES".
|
41 |
+
Required if ``calculate_span_f1`` is true.
|
42 |
+
labels_namespace : ``str``, optional (default=``labels``)
|
43 |
+
This is needed to compute the SpanBasedF1Measure metric, if desired.
|
44 |
+
Unless you did something unusual, the default value should be what you want.
|
45 |
+
verbose_metrics : ``bool``, optional (default = False)
|
46 |
+
If true, metrics will be returned per label class in addition
|
47 |
+
to the overall statistics.
|
48 |
+
initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
|
49 |
+
Used to initialize the model parameters.
|
50 |
+
regularizer : ``RegularizerApplicator``, optional (default=``None``)
|
51 |
+
If provided, will be used to calculate the regularization penalty during training.
|
52 |
+
"""
|
53 |
+
|
54 |
+
def __init__(self, vocab: Vocabulary,
|
55 |
+
text_field_embedder: TextFieldEmbedder,
|
56 |
+
predictor_dropout=0.0,
|
57 |
+
labels_namespace: str = "labels",
|
58 |
+
detect_namespace: str = "d_tags",
|
59 |
+
verbose_metrics: bool = False,
|
60 |
+
label_smoothing: float = 0.0,
|
61 |
+
confidence: float = 0.0,
|
62 |
+
del_confidence: float = 0.0,
|
63 |
+
initializer: InitializerApplicator = InitializerApplicator(),
|
64 |
+
regularizer: Optional[RegularizerApplicator] = None) -> None:
|
65 |
+
super(Seq2Labels, self).__init__(vocab, regularizer)
|
66 |
+
|
67 |
+
self.label_namespaces = [labels_namespace,
|
68 |
+
detect_namespace]
|
69 |
+
self.text_field_embedder = text_field_embedder
|
70 |
+
self.num_labels_classes = self.vocab.get_vocab_size(labels_namespace)
|
71 |
+
self.num_detect_classes = self.vocab.get_vocab_size(detect_namespace)
|
72 |
+
self.label_smoothing = label_smoothing
|
73 |
+
self.confidence = confidence
|
74 |
+
self.del_conf = del_confidence
|
75 |
+
self.incorr_index = self.vocab.get_token_index("INCORRECT",
|
76 |
+
namespace=detect_namespace)
|
77 |
+
|
78 |
+
self._verbose_metrics = verbose_metrics
|
79 |
+
self.predictor_dropout = TimeDistributed(torch.nn.Dropout(predictor_dropout))
|
80 |
+
|
81 |
+
self.tag_labels_projection_layer = TimeDistributed(
|
82 |
+
Linear(text_field_embedder._token_embedders['bert'].get_output_dim(), self.num_labels_classes))
|
83 |
+
|
84 |
+
self.tag_detect_projection_layer = TimeDistributed(
|
85 |
+
Linear(text_field_embedder._token_embedders['bert'].get_output_dim(), self.num_detect_classes))
|
86 |
+
|
87 |
+
self.metrics = {"accuracy": CategoricalAccuracy()}
|
88 |
+
|
89 |
+
initializer(self)
|
90 |
+
|
91 |
+
@overrides
|
92 |
+
def forward(self, # type: ignore
|
93 |
+
tokens: Dict[str, torch.LongTensor],
|
94 |
+
labels: torch.LongTensor = None,
|
95 |
+
d_tags: torch.LongTensor = None,
|
96 |
+
metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]:
|
97 |
+
# pylint: disable=arguments-differ
|
98 |
+
"""
|
99 |
+
Parameters
|
100 |
+
----------
|
101 |
+
tokens : Dict[str, torch.LongTensor], required
|
102 |
+
The output of ``TextField.as_array()``, which should typically be passed directly to a
|
103 |
+
``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
|
104 |
+
tensors. At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
|
105 |
+
Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
|
106 |
+
for the ``TokenIndexers`` when you created the ``TextField`` representing your
|
107 |
+
sequence. The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
|
108 |
+
which knows how to combine different word representations into a single vector per
|
109 |
+
token in your input.
|
110 |
+
labels : torch.LongTensor, optional (default = None)
|
111 |
+
A torch tensor representing the sequence of integer gold class labels of shape
|
112 |
+
``(batch_size, num_tokens)``.
|
113 |
+
d_tags : torch.LongTensor, optional (default = None)
|
114 |
+
A torch tensor representing the sequence of integer gold class labels of shape
|
115 |
+
``(batch_size, num_tokens)``.
|
116 |
+
metadata : ``List[Dict[str, Any]]``, optional, (default = None)
|
117 |
+
metadata containing the original words in the sentence to be tagged under a 'words' key.
|
118 |
+
|
119 |
+
Returns
|
120 |
+
-------
|
121 |
+
An output dictionary consisting of:
|
122 |
+
logits : torch.FloatTensor
|
123 |
+
A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
|
124 |
+
unnormalised log probabilities of the tag classes.
|
125 |
+
class_probabilities : torch.FloatTensor
|
126 |
+
A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
|
127 |
+
a distribution of the tag classes per word.
|
128 |
+
loss : torch.FloatTensor, optional
|
129 |
+
A scalar loss to be optimised.
|
130 |
+
|
131 |
+
"""
|
132 |
+
encoded_text = self.text_field_embedder(tokens)
|
133 |
+
batch_size, sequence_length, _ = encoded_text.size()
|
134 |
+
mask = get_text_field_mask(tokens)
|
135 |
+
logits_labels = self.tag_labels_projection_layer(self.predictor_dropout(encoded_text))
|
136 |
+
logits_d = self.tag_detect_projection_layer(encoded_text)
|
137 |
+
|
138 |
+
class_probabilities_labels = F.softmax(logits_labels, dim=-1).view(
|
139 |
+
[batch_size, sequence_length, self.num_labels_classes])
|
140 |
+
class_probabilities_d = F.softmax(logits_d, dim=-1).view(
|
141 |
+
[batch_size, sequence_length, self.num_detect_classes])
|
142 |
+
error_probs = class_probabilities_d[:, :, self.incorr_index] * mask
|
143 |
+
incorr_prob = torch.max(error_probs, dim=-1)[0]
|
144 |
+
|
145 |
+
probability_change = [self.confidence, self.del_conf] + [0] * (self.num_labels_classes - 2)
|
146 |
+
class_probabilities_labels += torch.FloatTensor(probability_change).repeat(
|
147 |
+
(batch_size, sequence_length, 1)).to(class_probabilities_labels.device)
|
148 |
+
|
149 |
+
output_dict = {"logits_labels": logits_labels,
|
150 |
+
"logits_d_tags": logits_d,
|
151 |
+
"class_probabilities_labels": class_probabilities_labels,
|
152 |
+
"class_probabilities_d_tags": class_probabilities_d,
|
153 |
+
"max_error_probability": incorr_prob}
|
154 |
+
if labels is not None and d_tags is not None:
|
155 |
+
loss_labels = sequence_cross_entropy_with_logits(logits_labels, labels, mask,
|
156 |
+
label_smoothing=self.label_smoothing)
|
157 |
+
loss_d = sequence_cross_entropy_with_logits(logits_d, d_tags, mask)
|
158 |
+
for metric in self.metrics.values():
|
159 |
+
metric(logits_labels, labels, mask.float())
|
160 |
+
metric(logits_d, d_tags, mask.float())
|
161 |
+
output_dict["loss"] = loss_labels + loss_d
|
162 |
+
|
163 |
+
if metadata is not None:
|
164 |
+
output_dict["words"] = [x["words"] for x in metadata]
|
165 |
+
return output_dict
|
166 |
+
|
167 |
+
@overrides
|
168 |
+
def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
|
169 |
+
"""
|
170 |
+
Does a simple position-wise argmax over each token, converts indices to string labels, and
|
171 |
+
adds a ``"tags"`` key to the dictionary with the result.
|
172 |
+
"""
|
173 |
+
for label_namespace in self.label_namespaces:
|
174 |
+
all_predictions = output_dict[f'class_probabilities_{label_namespace}']
|
175 |
+
all_predictions = all_predictions.cpu().data.numpy()
|
176 |
+
if all_predictions.ndim == 3:
|
177 |
+
predictions_list = [all_predictions[i] for i in range(all_predictions.shape[0])]
|
178 |
+
else:
|
179 |
+
predictions_list = [all_predictions]
|
180 |
+
all_tags = []
|
181 |
+
|
182 |
+
for predictions in predictions_list:
|
183 |
+
argmax_indices = numpy.argmax(predictions, axis=-1)
|
184 |
+
tags = [self.vocab.get_token_from_index(x, namespace=label_namespace)
|
185 |
+
for x in argmax_indices]
|
186 |
+
all_tags.append(tags)
|
187 |
+
output_dict[f'{label_namespace}'] = all_tags
|
188 |
+
return output_dict
|
189 |
+
|
190 |
+
@overrides
|
191 |
+
def get_metrics(self, reset: bool = False) -> Dict[str, float]:
|
192 |
+
metrics_to_return = {metric_name: metric.get_metric(reset) for
|
193 |
+
metric_name, metric in self.metrics.items()}
|
194 |
+
return metrics_to_return
|
gector/tokenization.py
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from time import time
|
3 |
+
|
4 |
+
|
5 |
+
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
|
6 |
+
|
7 |
+
|
8 |
+
def get_bpe_groups(token_offsets, bpe_offsets, input_ids, max_bpe_pieces=5):
|
9 |
+
bpe_groups = []
|
10 |
+
last_used_bpe = 0
|
11 |
+
# find the size of offsets
|
12 |
+
if (0, 0) in bpe_offsets:
|
13 |
+
bpe_size = bpe_offsets.index((0, 0))
|
14 |
+
else:
|
15 |
+
bpe_size = len(bpe_offsets)
|
16 |
+
|
17 |
+
saved_ids = [i for i in range(len(input_ids))]
|
18 |
+
redundant_ids = []
|
19 |
+
for token_offset in token_offsets:
|
20 |
+
start_token, end_token = token_offset
|
21 |
+
bpe_group = []
|
22 |
+
mapping_is_found = False
|
23 |
+
for i in range(last_used_bpe, bpe_size):
|
24 |
+
start_bpe, end_bpe = bpe_offsets[i]
|
25 |
+
if start_bpe >= start_token and end_bpe <= end_token:
|
26 |
+
# check if bpe_group is satisfy max_bpe_pieces constraint
|
27 |
+
if len(bpe_group) < max_bpe_pieces:
|
28 |
+
bpe_group.append(i)
|
29 |
+
else:
|
30 |
+
redundant_ids.append(i)
|
31 |
+
last_used_bpe = i + 1
|
32 |
+
mapping_is_found = True
|
33 |
+
elif mapping_is_found:
|
34 |
+
# stop doing useless iterations
|
35 |
+
break
|
36 |
+
else:
|
37 |
+
continue
|
38 |
+
bpe_groups.append(bpe_group)
|
39 |
+
saved_ids = [i for i in saved_ids if i not in redundant_ids]
|
40 |
+
return bpe_groups, saved_ids
|
41 |
+
|
42 |
+
|
43 |
+
def reduce_input_ids(input_ids, bpe_groups, saved_ids,
|
44 |
+
max_bpe_length=80, max_bpe_pieces=5):
|
45 |
+
# check if sequence is satisfy max_bpe_length constraint
|
46 |
+
while len(saved_ids) > max_bpe_length:
|
47 |
+
max_bpe_pieces -= 1
|
48 |
+
for token_id in range(len(bpe_groups)):
|
49 |
+
if len(bpe_groups[token_id]) > max_bpe_pieces:
|
50 |
+
redundant_ids = bpe_groups[token_id][max_bpe_pieces:]
|
51 |
+
bpe_groups[token_id] = bpe_groups[token_id][:max_bpe_pieces]
|
52 |
+
saved_ids = [i for i in saved_ids if i not in redundant_ids]
|
53 |
+
|
54 |
+
# get offsets
|
55 |
+
reduced_ids = [input_ids[i] for i in saved_ids]
|
56 |
+
correct_offsets = []
|
57 |
+
idx = 0
|
58 |
+
for i, bpe_group in enumerate(bpe_groups):
|
59 |
+
norm_idx = min(idx, len(reduced_ids) - 1)
|
60 |
+
correct_offsets.append(norm_idx)
|
61 |
+
idx += len(bpe_group)
|
62 |
+
|
63 |
+
return reduced_ids, correct_offsets
|
64 |
+
|
65 |
+
|
66 |
+
def get_offsets_and_reduce_input_ids(tokenizer_output, token_offset_list,
|
67 |
+
index_name="bert", max_bpe_length=80,
|
68 |
+
max_bpe_pieces=5):
|
69 |
+
timings = {"bpe": 0, "reduce": 0, "mask": 0}
|
70 |
+
output_ids, output_offsets, output_masks = [], [], []
|
71 |
+
for i, token_offsets in enumerate(token_offset_list):
|
72 |
+
input_ids = tokenizer_output['input_ids'][i]
|
73 |
+
|
74 |
+
t0 = time()
|
75 |
+
# get bpe level offsets
|
76 |
+
bpe_offsets = tokenizer_output['offset_mapping'][i]
|
77 |
+
bpe_groups, saved_ids = get_bpe_groups(token_offsets, bpe_offsets,
|
78 |
+
input_ids,
|
79 |
+
max_bpe_pieces=max_bpe_pieces)
|
80 |
+
t1 = time()
|
81 |
+
timings["bpe"] += t1 - t0
|
82 |
+
|
83 |
+
# reduce sequence length
|
84 |
+
reduced_ids, correct_offsets = reduce_input_ids(input_ids, bpe_groups,
|
85 |
+
saved_ids,
|
86 |
+
max_bpe_length=max_bpe_length,
|
87 |
+
max_bpe_pieces=max_bpe_pieces)
|
88 |
+
|
89 |
+
t2 = time()
|
90 |
+
timings["reduce"] += t2 - t1
|
91 |
+
|
92 |
+
# get mask
|
93 |
+
bpe_mask = [1 for _ in correct_offsets]
|
94 |
+
output_ids.append(reduced_ids)
|
95 |
+
output_offsets.append(correct_offsets)
|
96 |
+
output_masks.append(bpe_mask)
|
97 |
+
|
98 |
+
t3 = time()
|
99 |
+
timings["mask"] += t3 - t2
|
100 |
+
|
101 |
+
# tt = sum(timings.values())
|
102 |
+
# timings = {k: f"{round(v * 100 / tt, 2)}%" for k, v in timings.items()}
|
103 |
+
# print(timings)
|
104 |
+
|
105 |
+
output = {index_name: output_ids,
|
106 |
+
f"{index_name}-offsets": output_offsets,
|
107 |
+
"mask": output_masks}
|
108 |
+
return output
|
109 |
+
|
110 |
+
|
111 |
+
def get_offset_for_tokens(tokens):
|
112 |
+
sentence = " ".join(tokens)
|
113 |
+
token_offsets = []
|
114 |
+
end_idx = 0
|
115 |
+
for token in tokens:
|
116 |
+
idx = sentence[end_idx:].index(token) + end_idx
|
117 |
+
end_idx = idx + len(token)
|
118 |
+
offset = (idx, end_idx)
|
119 |
+
token_offsets.append(offset)
|
120 |
+
return token_offsets
|
121 |
+
|
122 |
+
|
123 |
+
def get_token_offsets(batch):
|
124 |
+
token_offset_list = []
|
125 |
+
for tokens in batch:
|
126 |
+
token_offsets = get_offset_for_tokens(tokens)
|
127 |
+
token_offset_list.append(token_offsets)
|
128 |
+
return token_offset_list
|
129 |
+
|
130 |
+
|
131 |
+
def pad_output(output, pad_idx=0):
|
132 |
+
padded_output = {}
|
133 |
+
for input_key in output.keys():
|
134 |
+
indexes = output[input_key]
|
135 |
+
max_len = max([len(x) for x in indexes])
|
136 |
+
padded_indexes = []
|
137 |
+
for index_list in indexes:
|
138 |
+
cur_len = len(index_list)
|
139 |
+
pad_len = max_len - cur_len
|
140 |
+
padded_indexes.append(index_list + [pad_idx] * pad_len)
|
141 |
+
padded_output[input_key] = padded_indexes
|
142 |
+
return padded_output
|
143 |
+
|
144 |
+
|
145 |
+
def tokenize_batch(tokenizer, batch_tokens, index_name="bert",
|
146 |
+
max_bpe_length=80, max_bpe_pieces=5):
|
147 |
+
timings = {}
|
148 |
+
t0 = time()
|
149 |
+
# get batch with sentences
|
150 |
+
batch_sentences = [" ".join(x) for x in batch_tokens]
|
151 |
+
# get token level offsets
|
152 |
+
token_offset_list = get_token_offsets(batch_tokens)
|
153 |
+
# token_offset_list = get_token_offsets_multi(batch_tokens)
|
154 |
+
t1 = time()
|
155 |
+
timings["offset_time"] = t1 - t0
|
156 |
+
# tokenize batch
|
157 |
+
tokenizer_output = tokenizer.batch_encode_plus(batch_sentences,
|
158 |
+
pad_to_max_length=False,
|
159 |
+
return_offsets_mapping=True,
|
160 |
+
add_special_tokens=False)
|
161 |
+
|
162 |
+
t2 = time()
|
163 |
+
timings["tokenize_time"] = t2 - t1
|
164 |
+
# postprocess batch
|
165 |
+
output = get_offsets_and_reduce_input_ids(tokenizer_output,
|
166 |
+
token_offset_list,
|
167 |
+
index_name=index_name,
|
168 |
+
max_bpe_length=max_bpe_length,
|
169 |
+
max_bpe_pieces=max_bpe_pieces)
|
170 |
+
|
171 |
+
t3 = time()
|
172 |
+
timings["reduce_time"] = t3 - t2
|
173 |
+
# pad output
|
174 |
+
output = pad_output(output)
|
175 |
+
t4 = time()
|
176 |
+
timings["pading_time"] = t4 - t3
|
177 |
+
# tt = sum(timings.values())
|
178 |
+
# timings = {k:f"{round(v*100/tt, 2)}%" for k,v in timings.items()}
|
179 |
+
# print(timings)
|
180 |
+
|
181 |
+
return output
|
gector/tokenizer_indexer.py
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Tweaked version of corresponding AllenNLP file"""
|
2 |
+
import logging
|
3 |
+
from collections import defaultdict
|
4 |
+
from typing import Dict, List, Callable
|
5 |
+
|
6 |
+
from allennlp.common.util import pad_sequence_to_length
|
7 |
+
from allennlp.data.token_indexers.token_indexer import TokenIndexer
|
8 |
+
from allennlp.data.tokenizers.token import Token
|
9 |
+
from allennlp.data.vocabulary import Vocabulary
|
10 |
+
from overrides import overrides
|
11 |
+
from transformers import AutoTokenizer
|
12 |
+
|
13 |
+
from utils.helpers import START_TOKEN
|
14 |
+
|
15 |
+
from gector.tokenization import tokenize_batch
|
16 |
+
import copy
|
17 |
+
|
18 |
+
logger = logging.getLogger(__name__)
|
19 |
+
|
20 |
+
|
21 |
+
# TODO(joelgrus): Figure out how to generate token_type_ids out of this token indexer.
|
22 |
+
|
23 |
+
|
24 |
+
class TokenizerIndexer(TokenIndexer[int]):
|
25 |
+
"""
|
26 |
+
A token indexer that does the wordpiece-tokenization (e.g. for BERT embeddings).
|
27 |
+
If you are using one of the pretrained BERT models, you'll want to use the ``PretrainedBertIndexer``
|
28 |
+
subclass rather than this base class.
|
29 |
+
|
30 |
+
Parameters
|
31 |
+
----------
|
32 |
+
tokenizer : ``Callable[[str], List[str]]``
|
33 |
+
A function that does the actual tokenization.
|
34 |
+
max_pieces : int, optional (default: 512)
|
35 |
+
The BERT embedder uses positional embeddings and so has a corresponding
|
36 |
+
maximum length for its input ids. Any inputs longer than this will
|
37 |
+
either be truncated (default), or be split apart and batched using a
|
38 |
+
sliding window.
|
39 |
+
token_min_padding_length : ``int``, optional (default=``0``)
|
40 |
+
See :class:`TokenIndexer`.
|
41 |
+
"""
|
42 |
+
|
43 |
+
def __init__(self,
|
44 |
+
tokenizer: Callable[[str], List[str]],
|
45 |
+
max_pieces: int = 512,
|
46 |
+
max_pieces_per_token: int = 3,
|
47 |
+
token_min_padding_length: int = 0) -> None:
|
48 |
+
super().__init__(token_min_padding_length)
|
49 |
+
|
50 |
+
# The BERT code itself does a two-step tokenization:
|
51 |
+
# sentence -> [words], and then word -> [wordpieces]
|
52 |
+
# In AllenNLP, the first step is implemented as the ``BertBasicWordSplitter``,
|
53 |
+
# and this token indexer handles the second.
|
54 |
+
|
55 |
+
self.tokenizer = tokenizer
|
56 |
+
self.max_pieces_per_token = max_pieces_per_token
|
57 |
+
self.max_pieces = max_pieces
|
58 |
+
self.max_pieces_per_sentence = 80
|
59 |
+
|
60 |
+
@overrides
|
61 |
+
def tokens_to_indices(self, tokens: List[Token],
|
62 |
+
vocabulary: Vocabulary,
|
63 |
+
index_name: str) -> Dict[str, List[int]]:
|
64 |
+
text = [token.text for token in tokens]
|
65 |
+
batch_tokens = [text]
|
66 |
+
|
67 |
+
output_fast = tokenize_batch(self.tokenizer,
|
68 |
+
batch_tokens,
|
69 |
+
max_bpe_length=self.max_pieces,
|
70 |
+
max_bpe_pieces=self.max_pieces_per_token)
|
71 |
+
output_fast = {k: v[0] for k, v in output_fast.items()}
|
72 |
+
return output_fast
|
73 |
+
|
74 |
+
@overrides
|
75 |
+
def count_vocab_items(self, token: Token, counter: Dict[str, Dict[str, int]]):
|
76 |
+
# If we only use pretrained models, we don't need to do anything here.
|
77 |
+
pass
|
78 |
+
|
79 |
+
@overrides
|
80 |
+
def get_padding_token(self) -> int:
|
81 |
+
return 0
|
82 |
+
|
83 |
+
@overrides
|
84 |
+
def get_padding_lengths(self, token: int) -> Dict[str, int]: # pylint: disable=unused-argument
|
85 |
+
return {}
|
86 |
+
|
87 |
+
@overrides
|
88 |
+
def pad_token_sequence(self,
|
89 |
+
tokens: Dict[str, List[int]],
|
90 |
+
desired_num_tokens: Dict[str, int],
|
91 |
+
padding_lengths: Dict[str, int]) -> Dict[str, List[int]]: # pylint: disable=unused-argument
|
92 |
+
return {key: pad_sequence_to_length(val, desired_num_tokens[key])
|
93 |
+
for key, val in tokens.items()}
|
94 |
+
|
95 |
+
@overrides
|
96 |
+
def get_keys(self, index_name: str) -> List[str]:
|
97 |
+
"""
|
98 |
+
We need to override this because the indexer generates multiple keys.
|
99 |
+
"""
|
100 |
+
# pylint: disable=no-self-use
|
101 |
+
return [index_name, f"{index_name}-offsets", f"{index_name}-type-ids", "mask"]
|
102 |
+
|
103 |
+
|
104 |
+
class PretrainedBertIndexer(TokenizerIndexer):
|
105 |
+
# pylint: disable=line-too-long
|
106 |
+
"""
|
107 |
+
A ``TokenIndexer`` corresponding to a pretrained BERT model.
|
108 |
+
|
109 |
+
Parameters
|
110 |
+
----------
|
111 |
+
pretrained_model: ``str``
|
112 |
+
Either the name of the pretrained model to use (e.g. 'bert-base-uncased'),
|
113 |
+
or the path to the .txt file with its vocabulary.
|
114 |
+
If the name is a key in the list of pretrained models at
|
115 |
+
https://github.com/huggingface/pytorch-pretrained-BERT/blob/master/pytorch_pretrained_bert/tokenization.py#L33
|
116 |
+
the corresponding path will be used; otherwise it will be interpreted as a path or URL.
|
117 |
+
do_lowercase: ``bool``, optional (default = True)
|
118 |
+
Whether to lowercase the tokens before converting to wordpiece ids.
|
119 |
+
max_pieces: int, optional (default: 512)
|
120 |
+
The BERT embedder uses positional embeddings and so has a corresponding
|
121 |
+
maximum length for its input ids. Any inputs longer than this will
|
122 |
+
either be truncated (default), or be split apart and batched using a
|
123 |
+
sliding window.
|
124 |
+
"""
|
125 |
+
|
126 |
+
def __init__(self,
|
127 |
+
pretrained_model: str,
|
128 |
+
do_lowercase: bool = True,
|
129 |
+
max_pieces: int = 512,
|
130 |
+
max_pieces_per_token: int = 5,
|
131 |
+
special_tokens_fix: int = 0) -> None:
|
132 |
+
|
133 |
+
if pretrained_model.endswith("-cased") and do_lowercase:
|
134 |
+
logger.warning("Your BERT model appears to be cased, "
|
135 |
+
"but your indexer is lowercasing tokens.")
|
136 |
+
elif pretrained_model.endswith("-uncased") and not do_lowercase:
|
137 |
+
logger.warning("Your BERT model appears to be uncased, "
|
138 |
+
"but your indexer is not lowercasing tokens.")
|
139 |
+
|
140 |
+
model_name = copy.deepcopy(pretrained_model)
|
141 |
+
|
142 |
+
model_tokenizer = AutoTokenizer.from_pretrained(
|
143 |
+
model_name, do_lower_case=do_lowercase, do_basic_tokenize=False, use_fast=True)
|
144 |
+
|
145 |
+
# to adjust all tokenizers
|
146 |
+
if hasattr(model_tokenizer, 'encoder'):
|
147 |
+
model_tokenizer.vocab = model_tokenizer.encoder
|
148 |
+
if hasattr(model_tokenizer, 'sp_model'):
|
149 |
+
model_tokenizer.vocab = defaultdict(lambda: 1)
|
150 |
+
for i in range(model_tokenizer.sp_model.get_piece_size()):
|
151 |
+
model_tokenizer.vocab[model_tokenizer.sp_model.id_to_piece(i)] = i
|
152 |
+
|
153 |
+
if special_tokens_fix:
|
154 |
+
model_tokenizer.add_tokens([START_TOKEN])
|
155 |
+
model_tokenizer.vocab[START_TOKEN] = len(model_tokenizer) - 1
|
156 |
+
|
157 |
+
super().__init__(tokenizer=model_tokenizer,
|
158 |
+
max_pieces=max_pieces,
|
159 |
+
max_pieces_per_token=max_pieces_per_token
|
160 |
+
)
|
161 |
+
|
gector/trainer.py
ADDED
@@ -0,0 +1,845 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Tweaked version of corresponding AllenNLP file"""
|
2 |
+
import datetime
|
3 |
+
import logging
|
4 |
+
import math
|
5 |
+
import os
|
6 |
+
import time
|
7 |
+
import traceback
|
8 |
+
from typing import Dict, Optional, List, Tuple, Union, Iterable, Any
|
9 |
+
|
10 |
+
import torch
|
11 |
+
import torch.optim.lr_scheduler
|
12 |
+
from allennlp.common import Params
|
13 |
+
from allennlp.common.checks import ConfigurationError, parse_cuda_device
|
14 |
+
from allennlp.common.tqdm import Tqdm
|
15 |
+
from allennlp.common.util import dump_metrics, gpu_memory_mb, peak_memory_mb, lazy_groups_of
|
16 |
+
from allennlp.data.instance import Instance
|
17 |
+
from allennlp.data.iterators.data_iterator import DataIterator, TensorDict
|
18 |
+
from allennlp.models.model import Model
|
19 |
+
from allennlp.nn import util as nn_util
|
20 |
+
from allennlp.training import util as training_util
|
21 |
+
from allennlp.training.checkpointer import Checkpointer
|
22 |
+
from allennlp.training.learning_rate_schedulers import LearningRateScheduler
|
23 |
+
from allennlp.training.metric_tracker import MetricTracker
|
24 |
+
from allennlp.training.momentum_schedulers import MomentumScheduler
|
25 |
+
from allennlp.training.moving_average import MovingAverage
|
26 |
+
from allennlp.training.optimizers import Optimizer
|
27 |
+
from allennlp.training.tensorboard_writer import TensorboardWriter
|
28 |
+
from allennlp.training.trainer_base import TrainerBase
|
29 |
+
|
30 |
+
logger = logging.getLogger(__name__)
|
31 |
+
|
32 |
+
|
33 |
+
class Trainer(TrainerBase):
|
34 |
+
def __init__(
|
35 |
+
self,
|
36 |
+
model: Model,
|
37 |
+
optimizer: torch.optim.Optimizer,
|
38 |
+
scheduler: torch.optim.lr_scheduler,
|
39 |
+
iterator: DataIterator,
|
40 |
+
train_dataset: Iterable[Instance],
|
41 |
+
validation_dataset: Optional[Iterable[Instance]] = None,
|
42 |
+
patience: Optional[int] = None,
|
43 |
+
validation_metric: str = "-loss",
|
44 |
+
validation_iterator: DataIterator = None,
|
45 |
+
shuffle: bool = True,
|
46 |
+
num_epochs: int = 20,
|
47 |
+
accumulated_batch_count: int = 1,
|
48 |
+
serialization_dir: Optional[str] = None,
|
49 |
+
num_serialized_models_to_keep: int = 20,
|
50 |
+
keep_serialized_model_every_num_seconds: int = None,
|
51 |
+
checkpointer: Checkpointer = None,
|
52 |
+
model_save_interval: float = None,
|
53 |
+
cuda_device: Union[int, List] = -1,
|
54 |
+
grad_norm: Optional[float] = None,
|
55 |
+
grad_clipping: Optional[float] = None,
|
56 |
+
learning_rate_scheduler: Optional[LearningRateScheduler] = None,
|
57 |
+
momentum_scheduler: Optional[MomentumScheduler] = None,
|
58 |
+
summary_interval: int = 100,
|
59 |
+
histogram_interval: int = None,
|
60 |
+
should_log_parameter_statistics: bool = True,
|
61 |
+
should_log_learning_rate: bool = False,
|
62 |
+
log_batch_size_period: Optional[int] = None,
|
63 |
+
moving_average: Optional[MovingAverage] = None,
|
64 |
+
cold_step_count: int = 0,
|
65 |
+
cold_lr: float = 1e-3,
|
66 |
+
cuda_verbose_step=None,
|
67 |
+
) -> None:
|
68 |
+
"""
|
69 |
+
A trainer for doing supervised learning. It just takes a labeled dataset
|
70 |
+
and a ``DataIterator``, and uses the supplied ``Optimizer`` to learn the weights
|
71 |
+
for your model over some fixed number of epochs. You can also pass in a validation
|
72 |
+
dataset and enable early stopping. There are many other bells and whistles as well.
|
73 |
+
|
74 |
+
Parameters
|
75 |
+
----------
|
76 |
+
model : ``Model``, required.
|
77 |
+
An AllenNLP model to be optimized. Pytorch Modules can also be optimized if
|
78 |
+
their ``forward`` method returns a dictionary with a "loss" key, containing a
|
79 |
+
scalar tensor representing the loss function to be optimized.
|
80 |
+
|
81 |
+
If you are training your model using GPUs, your model should already be
|
82 |
+
on the correct device. (If you use `Trainer.from_params` this will be
|
83 |
+
handled for you.)
|
84 |
+
optimizer : ``torch.nn.Optimizer``, required.
|
85 |
+
An instance of a Pytorch Optimizer, instantiated with the parameters of the
|
86 |
+
model to be optimized.
|
87 |
+
iterator : ``DataIterator``, required.
|
88 |
+
A method for iterating over a ``Dataset``, yielding padded indexed batches.
|
89 |
+
train_dataset : ``Dataset``, required.
|
90 |
+
A ``Dataset`` to train on. The dataset should have already been indexed.
|
91 |
+
validation_dataset : ``Dataset``, optional, (default = None).
|
92 |
+
A ``Dataset`` to evaluate on. The dataset should have already been indexed.
|
93 |
+
patience : Optional[int] > 0, optional (default=None)
|
94 |
+
Number of epochs to be patient before early stopping: the training is stopped
|
95 |
+
after ``patience`` epochs with no improvement. If given, it must be ``> 0``.
|
96 |
+
If None, early stopping is disabled.
|
97 |
+
validation_metric : str, optional (default="loss")
|
98 |
+
Validation metric to measure for whether to stop training using patience
|
99 |
+
and whether to serialize an ``is_best`` model each epoch. The metric name
|
100 |
+
must be prepended with either "+" or "-", which specifies whether the metric
|
101 |
+
is an increasing or decreasing function.
|
102 |
+
validation_iterator : ``DataIterator``, optional (default=None)
|
103 |
+
An iterator to use for the validation set. If ``None``, then
|
104 |
+
use the training `iterator`.
|
105 |
+
shuffle: ``bool``, optional (default=True)
|
106 |
+
Whether to shuffle the instances in the iterator or not.
|
107 |
+
num_epochs : int, optional (default = 20)
|
108 |
+
Number of training epochs.
|
109 |
+
serialization_dir : str, optional (default=None)
|
110 |
+
Path to directory for saving and loading model files. Models will not be saved if
|
111 |
+
this parameter is not passed.
|
112 |
+
num_serialized_models_to_keep : ``int``, optional (default=20)
|
113 |
+
Number of previous model checkpoints to retain. Default is to keep 20 checkpoints.
|
114 |
+
A value of None or -1 means all checkpoints will be kept.
|
115 |
+
keep_serialized_model_every_num_seconds : ``int``, optional (default=None)
|
116 |
+
If num_serialized_models_to_keep is not None, then occasionally it's useful to
|
117 |
+
save models at a given interval in addition to the last num_serialized_models_to_keep.
|
118 |
+
To do so, specify keep_serialized_model_every_num_seconds as the number of seconds
|
119 |
+
between permanently saved checkpoints. Note that this option is only used if
|
120 |
+
num_serialized_models_to_keep is not None, otherwise all checkpoints are kept.
|
121 |
+
checkpointer : ``Checkpointer``, optional (default=None)
|
122 |
+
An instance of class Checkpointer to use instead of the default. If a checkpointer is specified,
|
123 |
+
the arguments num_serialized_models_to_keep and keep_serialized_model_every_num_seconds should
|
124 |
+
not be specified. The caller is responsible for initializing the checkpointer so that it is
|
125 |
+
consistent with serialization_dir.
|
126 |
+
model_save_interval : ``float``, optional (default=None)
|
127 |
+
If provided, then serialize models every ``model_save_interval``
|
128 |
+
seconds within single epochs. In all cases, models are also saved
|
129 |
+
at the end of every epoch if ``serialization_dir`` is provided.
|
130 |
+
cuda_device : ``Union[int, List[int]]``, optional (default = -1)
|
131 |
+
An integer or list of integers specifying the CUDA device(s) to use. If -1, the CPU is used.
|
132 |
+
grad_norm : ``float``, optional, (default = None).
|
133 |
+
If provided, gradient norms will be rescaled to have a maximum of this value.
|
134 |
+
grad_clipping : ``float``, optional (default = ``None``).
|
135 |
+
If provided, gradients will be clipped `during the backward pass` to have an (absolute)
|
136 |
+
maximum of this value. If you are getting ``NaNs`` in your gradients during training
|
137 |
+
that are not solved by using ``grad_norm``, you may need this.
|
138 |
+
learning_rate_scheduler : ``LearningRateScheduler``, optional (default = None)
|
139 |
+
If specified, the learning rate will be decayed with respect to
|
140 |
+
this schedule at the end of each epoch (or batch, if the scheduler implements
|
141 |
+
the ``step_batch`` method). If you use :class:`torch.optim.lr_scheduler.ReduceLROnPlateau`,
|
142 |
+
this will use the ``validation_metric`` provided to determine if learning has plateaued.
|
143 |
+
To support updating the learning rate on every batch, this can optionally implement
|
144 |
+
``step_batch(batch_num_total)`` which updates the learning rate given the batch number.
|
145 |
+
momentum_scheduler : ``MomentumScheduler``, optional (default = None)
|
146 |
+
If specified, the momentum will be updated at the end of each batch or epoch
|
147 |
+
according to the schedule.
|
148 |
+
summary_interval: ``int``, optional, (default = 100)
|
149 |
+
Number of batches between logging scalars to tensorboard
|
150 |
+
histogram_interval : ``int``, optional, (default = ``None``)
|
151 |
+
If not None, then log histograms to tensorboard every ``histogram_interval`` batches.
|
152 |
+
When this parameter is specified, the following additional logging is enabled:
|
153 |
+
* Histograms of model parameters
|
154 |
+
* The ratio of parameter update norm to parameter norm
|
155 |
+
* Histogram of layer activations
|
156 |
+
We log histograms of the parameters returned by
|
157 |
+
``model.get_parameters_for_histogram_tensorboard_logging``.
|
158 |
+
The layer activations are logged for any modules in the ``Model`` that have
|
159 |
+
the attribute ``should_log_activations`` set to ``True``. Logging
|
160 |
+
histograms requires a number of GPU-CPU copies during training and is typically
|
161 |
+
slow, so we recommend logging histograms relatively infrequently.
|
162 |
+
Note: only Modules that return tensors, tuples of tensors or dicts
|
163 |
+
with tensors as values currently support activation logging.
|
164 |
+
should_log_parameter_statistics : ``bool``, optional, (default = True)
|
165 |
+
Whether to send parameter statistics (mean and standard deviation
|
166 |
+
of parameters and gradients) to tensorboard.
|
167 |
+
should_log_learning_rate : ``bool``, optional, (default = False)
|
168 |
+
Whether to send parameter specific learning rate to tensorboard.
|
169 |
+
log_batch_size_period : ``int``, optional, (default = ``None``)
|
170 |
+
If defined, how often to log the average batch size.
|
171 |
+
moving_average: ``MovingAverage``, optional, (default = None)
|
172 |
+
If provided, we will maintain moving averages for all parameters. During training, we
|
173 |
+
employ a shadow variable for each parameter, which maintains the moving average. During
|
174 |
+
evaluation, we backup the original parameters and assign the moving averages to corresponding
|
175 |
+
parameters. Be careful that when saving the checkpoint, we will save the moving averages of
|
176 |
+
parameters. This is necessary because we want the saved model to perform as well as the validated
|
177 |
+
model if we load it later. But this may cause problems if you restart the training from checkpoint.
|
178 |
+
"""
|
179 |
+
super().__init__(serialization_dir, cuda_device)
|
180 |
+
|
181 |
+
# I am not calling move_to_gpu here, because if the model is
|
182 |
+
# not already on the GPU then the optimizer is going to be wrong.
|
183 |
+
self.model = model
|
184 |
+
|
185 |
+
self.iterator = iterator
|
186 |
+
self._validation_iterator = validation_iterator
|
187 |
+
self.shuffle = shuffle
|
188 |
+
self.optimizer = optimizer
|
189 |
+
self.scheduler = scheduler
|
190 |
+
self.train_data = train_dataset
|
191 |
+
self._validation_data = validation_dataset
|
192 |
+
self.accumulated_batch_count = accumulated_batch_count
|
193 |
+
self.cold_step_count = cold_step_count
|
194 |
+
self.cold_lr = cold_lr
|
195 |
+
self.cuda_verbose_step = cuda_verbose_step
|
196 |
+
|
197 |
+
if patience is None: # no early stopping
|
198 |
+
if validation_dataset:
|
199 |
+
logger.warning(
|
200 |
+
"You provided a validation dataset but patience was set to None, "
|
201 |
+
"meaning that early stopping is disabled"
|
202 |
+
)
|
203 |
+
elif (not isinstance(patience, int)) or patience <= 0:
|
204 |
+
raise ConfigurationError(
|
205 |
+
'{} is an invalid value for "patience": it must be a positive integer '
|
206 |
+
"or None (if you want to disable early stopping)".format(patience)
|
207 |
+
)
|
208 |
+
|
209 |
+
# For tracking is_best_so_far and should_stop_early
|
210 |
+
self._metric_tracker = MetricTracker(patience, validation_metric)
|
211 |
+
# Get rid of + or -
|
212 |
+
self._validation_metric = validation_metric[1:]
|
213 |
+
|
214 |
+
self._num_epochs = num_epochs
|
215 |
+
|
216 |
+
if checkpointer is not None:
|
217 |
+
# We can't easily check if these parameters were passed in, so check against their default values.
|
218 |
+
# We don't check against serialization_dir since it is also used by the parent class.
|
219 |
+
if num_serialized_models_to_keep != 20 \
|
220 |
+
or keep_serialized_model_every_num_seconds is not None:
|
221 |
+
raise ConfigurationError(
|
222 |
+
"When passing a custom Checkpointer, you may not also pass in separate checkpointer "
|
223 |
+
"args 'num_serialized_models_to_keep' or 'keep_serialized_model_every_num_seconds'."
|
224 |
+
)
|
225 |
+
self._checkpointer = checkpointer
|
226 |
+
else:
|
227 |
+
self._checkpointer = Checkpointer(
|
228 |
+
serialization_dir,
|
229 |
+
keep_serialized_model_every_num_seconds,
|
230 |
+
num_serialized_models_to_keep,
|
231 |
+
)
|
232 |
+
|
233 |
+
self._model_save_interval = model_save_interval
|
234 |
+
|
235 |
+
self._grad_norm = grad_norm
|
236 |
+
self._grad_clipping = grad_clipping
|
237 |
+
|
238 |
+
self._learning_rate_scheduler = learning_rate_scheduler
|
239 |
+
self._momentum_scheduler = momentum_scheduler
|
240 |
+
self._moving_average = moving_average
|
241 |
+
|
242 |
+
# We keep the total batch number as an instance variable because it
|
243 |
+
# is used inside a closure for the hook which logs activations in
|
244 |
+
# ``_enable_activation_logging``.
|
245 |
+
self._batch_num_total = 0
|
246 |
+
|
247 |
+
self._tensorboard = TensorboardWriter(
|
248 |
+
get_batch_num_total=lambda: self._batch_num_total,
|
249 |
+
serialization_dir=serialization_dir,
|
250 |
+
summary_interval=summary_interval,
|
251 |
+
histogram_interval=histogram_interval,
|
252 |
+
should_log_parameter_statistics=should_log_parameter_statistics,
|
253 |
+
should_log_learning_rate=should_log_learning_rate,
|
254 |
+
)
|
255 |
+
|
256 |
+
self._log_batch_size_period = log_batch_size_period
|
257 |
+
|
258 |
+
self._last_log = 0.0 # time of last logging
|
259 |
+
|
260 |
+
# Enable activation logging.
|
261 |
+
if histogram_interval is not None:
|
262 |
+
self._tensorboard.enable_activation_logging(self.model)
|
263 |
+
|
264 |
+
def rescale_gradients(self) -> Optional[float]:
|
265 |
+
return training_util.rescale_gradients(self.model, self._grad_norm)
|
266 |
+
|
267 |
+
def batch_loss(self, batch_group: List[TensorDict], for_training: bool) -> torch.Tensor:
|
268 |
+
"""
|
269 |
+
Does a forward pass on the given batches and returns the ``loss`` value in the result.
|
270 |
+
If ``for_training`` is `True` also applies regularization penalty.
|
271 |
+
"""
|
272 |
+
if self._multiple_gpu:
|
273 |
+
output_dict = training_util.data_parallel(batch_group, self.model, self._cuda_devices)
|
274 |
+
else:
|
275 |
+
assert len(batch_group) == 1
|
276 |
+
batch = batch_group[0]
|
277 |
+
batch = nn_util.move_to_device(batch, self._cuda_devices[0])
|
278 |
+
output_dict = self.model(**batch)
|
279 |
+
|
280 |
+
try:
|
281 |
+
loss = output_dict["loss"]
|
282 |
+
if for_training:
|
283 |
+
loss += self.model.get_regularization_penalty()
|
284 |
+
except KeyError:
|
285 |
+
if for_training:
|
286 |
+
raise RuntimeError(
|
287 |
+
"The model you are trying to optimize does not contain a"
|
288 |
+
" 'loss' key in the output of model.forward(inputs)."
|
289 |
+
)
|
290 |
+
loss = None
|
291 |
+
|
292 |
+
return loss
|
293 |
+
|
294 |
+
def _train_epoch(self, epoch: int) -> Dict[str, float]:
|
295 |
+
"""
|
296 |
+
Trains one epoch and returns metrics.
|
297 |
+
"""
|
298 |
+
logger.info("Epoch %d/%d", epoch, self._num_epochs - 1)
|
299 |
+
peak_cpu_usage = peak_memory_mb()
|
300 |
+
logger.info(f"Peak CPU memory usage MB: {peak_cpu_usage}")
|
301 |
+
gpu_usage = []
|
302 |
+
for gpu, memory in gpu_memory_mb().items():
|
303 |
+
gpu_usage.append((gpu, memory))
|
304 |
+
logger.info(f"GPU {gpu} memory usage MB: {memory}")
|
305 |
+
|
306 |
+
train_loss = 0.0
|
307 |
+
# Set the model to "train" mode.
|
308 |
+
self.model.train()
|
309 |
+
|
310 |
+
num_gpus = len(self._cuda_devices)
|
311 |
+
|
312 |
+
# Get tqdm for the training batches
|
313 |
+
raw_train_generator = self.iterator(self.train_data, num_epochs=1, shuffle=self.shuffle)
|
314 |
+
train_generator = lazy_groups_of(raw_train_generator, num_gpus)
|
315 |
+
num_training_batches = math.ceil(self.iterator.get_num_batches(self.train_data) / num_gpus)
|
316 |
+
residue = num_training_batches % self.accumulated_batch_count
|
317 |
+
self._last_log = time.time()
|
318 |
+
last_save_time = time.time()
|
319 |
+
|
320 |
+
batches_this_epoch = 0
|
321 |
+
if self._batch_num_total is None:
|
322 |
+
self._batch_num_total = 0
|
323 |
+
|
324 |
+
histogram_parameters = set(self.model.get_parameters_for_histogram_tensorboard_logging())
|
325 |
+
|
326 |
+
logger.info("Training")
|
327 |
+
train_generator_tqdm = Tqdm.tqdm(train_generator, total=num_training_batches)
|
328 |
+
cumulative_batch_size = 0
|
329 |
+
self.optimizer.zero_grad()
|
330 |
+
for batch_group in train_generator_tqdm:
|
331 |
+
batches_this_epoch += 1
|
332 |
+
self._batch_num_total += 1
|
333 |
+
batch_num_total = self._batch_num_total
|
334 |
+
|
335 |
+
iter_len = self.accumulated_batch_count \
|
336 |
+
if batches_this_epoch <= (num_training_batches - residue) else residue
|
337 |
+
|
338 |
+
if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
|
339 |
+
print(f'Before forward pass - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
|
340 |
+
print(f'Before forward pass - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
|
341 |
+
try:
|
342 |
+
loss = self.batch_loss(batch_group, for_training=True) / iter_len
|
343 |
+
except RuntimeError as e:
|
344 |
+
print(e)
|
345 |
+
for x in batch_group:
|
346 |
+
all_words = [len(y['words']) for y in x['metadata']]
|
347 |
+
print(f"Total sents: {len(all_words)}. "
|
348 |
+
f"Min {min(all_words)}. Max {max(all_words)}")
|
349 |
+
for elem in ['labels', 'd_tags']:
|
350 |
+
tt = x[elem]
|
351 |
+
print(
|
352 |
+
f"{elem} shape {list(tt.shape)} and min {tt.min().item()} and {tt.max().item()}")
|
353 |
+
for elem in ["bert", "mask", "bert-offsets"]:
|
354 |
+
tt = x['tokens'][elem]
|
355 |
+
print(
|
356 |
+
f"{elem} shape {list(tt.shape)} and min {tt.min().item()} and {tt.max().item()}")
|
357 |
+
raise e
|
358 |
+
|
359 |
+
if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
|
360 |
+
print(f'After forward pass - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
|
361 |
+
print(f'After forward pass - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
|
362 |
+
|
363 |
+
if torch.isnan(loss):
|
364 |
+
raise ValueError("nan loss encountered")
|
365 |
+
|
366 |
+
loss.backward()
|
367 |
+
|
368 |
+
if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
|
369 |
+
print(f'After backprop - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
|
370 |
+
print(f'After backprop - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
|
371 |
+
|
372 |
+
train_loss += loss.item() * iter_len
|
373 |
+
|
374 |
+
del batch_group, loss
|
375 |
+
torch.cuda.empty_cache()
|
376 |
+
|
377 |
+
if self.cuda_verbose_step is not None and batch_num_total % self.cuda_verbose_step == 0:
|
378 |
+
print(f'After collecting garbage - Cuda memory allocated: {torch.cuda.memory_allocated() / 1e9}')
|
379 |
+
print(f'After collecting garbage - Cuda memory cached: {torch.cuda.memory_cached() / 1e9}')
|
380 |
+
|
381 |
+
batch_grad_norm = self.rescale_gradients()
|
382 |
+
|
383 |
+
# This does nothing if batch_num_total is None or you are using a
|
384 |
+
# scheduler which doesn't update per batch.
|
385 |
+
if self._learning_rate_scheduler:
|
386 |
+
self._learning_rate_scheduler.step_batch(batch_num_total)
|
387 |
+
if self._momentum_scheduler:
|
388 |
+
self._momentum_scheduler.step_batch(batch_num_total)
|
389 |
+
|
390 |
+
if self._tensorboard.should_log_histograms_this_batch():
|
391 |
+
# get the magnitude of parameter updates for logging
|
392 |
+
# We need a copy of current parameters to compute magnitude of updates,
|
393 |
+
# and copy them to CPU so large models won't go OOM on the GPU.
|
394 |
+
param_updates = {
|
395 |
+
name: param.detach().cpu().clone()
|
396 |
+
for name, param in self.model.named_parameters()
|
397 |
+
}
|
398 |
+
if batches_this_epoch % self.accumulated_batch_count == 0 or \
|
399 |
+
batches_this_epoch == num_training_batches:
|
400 |
+
self.optimizer.step()
|
401 |
+
self.optimizer.zero_grad()
|
402 |
+
for name, param in self.model.named_parameters():
|
403 |
+
param_updates[name].sub_(param.detach().cpu())
|
404 |
+
update_norm = torch.norm(param_updates[name].view(-1))
|
405 |
+
param_norm = torch.norm(param.view(-1)).cpu()
|
406 |
+
self._tensorboard.add_train_scalar(
|
407 |
+
"gradient_update/" + name, update_norm / (param_norm + 1e-7)
|
408 |
+
)
|
409 |
+
else:
|
410 |
+
if batches_this_epoch % self.accumulated_batch_count == 0 or \
|
411 |
+
batches_this_epoch == num_training_batches:
|
412 |
+
self.optimizer.step()
|
413 |
+
self.optimizer.zero_grad()
|
414 |
+
|
415 |
+
# Update moving averages
|
416 |
+
if self._moving_average is not None:
|
417 |
+
self._moving_average.apply(batch_num_total)
|
418 |
+
|
419 |
+
# Update the description with the latest metrics
|
420 |
+
metrics = training_util.get_metrics(self.model, train_loss, batches_this_epoch)
|
421 |
+
description = training_util.description_from_metrics(metrics)
|
422 |
+
|
423 |
+
train_generator_tqdm.set_description(description, refresh=False)
|
424 |
+
|
425 |
+
# Log parameter values to Tensorboard
|
426 |
+
if self._tensorboard.should_log_this_batch():
|
427 |
+
self._tensorboard.log_parameter_and_gradient_statistics(self.model, batch_grad_norm)
|
428 |
+
self._tensorboard.log_learning_rates(self.model, self.optimizer)
|
429 |
+
|
430 |
+
self._tensorboard.add_train_scalar("loss/loss_train", metrics["loss"])
|
431 |
+
self._tensorboard.log_metrics({"epoch_metrics/" + k: v for k, v in metrics.items()})
|
432 |
+
|
433 |
+
if self._tensorboard.should_log_histograms_this_batch():
|
434 |
+
self._tensorboard.log_histograms(self.model, histogram_parameters)
|
435 |
+
|
436 |
+
if self._log_batch_size_period:
|
437 |
+
cur_batch = sum([training_util.get_batch_size(batch) for batch in batch_group])
|
438 |
+
cumulative_batch_size += cur_batch
|
439 |
+
if (batches_this_epoch - 1) % self._log_batch_size_period == 0:
|
440 |
+
average = cumulative_batch_size / batches_this_epoch
|
441 |
+
logger.info(f"current batch size: {cur_batch} mean batch size: {average}")
|
442 |
+
self._tensorboard.add_train_scalar("current_batch_size", cur_batch)
|
443 |
+
self._tensorboard.add_train_scalar("mean_batch_size", average)
|
444 |
+
|
445 |
+
# Save model if needed.
|
446 |
+
if self._model_save_interval is not None and (
|
447 |
+
time.time() - last_save_time > self._model_save_interval
|
448 |
+
):
|
449 |
+
last_save_time = time.time()
|
450 |
+
self._save_checkpoint(
|
451 |
+
"{0}.{1}".format(epoch, training_util.time_to_str(int(last_save_time)))
|
452 |
+
)
|
453 |
+
|
454 |
+
metrics = training_util.get_metrics(self.model, train_loss, batches_this_epoch, reset=True)
|
455 |
+
metrics["cpu_memory_MB"] = peak_cpu_usage
|
456 |
+
for (gpu_num, memory) in gpu_usage:
|
457 |
+
metrics["gpu_" + str(gpu_num) + "_memory_MB"] = memory
|
458 |
+
return metrics
|
459 |
+
|
460 |
+
def _validation_loss(self) -> Tuple[float, int]:
|
461 |
+
"""
|
462 |
+
Computes the validation loss. Returns it and the number of batches.
|
463 |
+
"""
|
464 |
+
logger.info("Validating")
|
465 |
+
|
466 |
+
self.model.eval()
|
467 |
+
|
468 |
+
# Replace parameter values with the shadow values from the moving averages.
|
469 |
+
if self._moving_average is not None:
|
470 |
+
self._moving_average.assign_average_value()
|
471 |
+
|
472 |
+
if self._validation_iterator is not None:
|
473 |
+
val_iterator = self._validation_iterator
|
474 |
+
else:
|
475 |
+
val_iterator = self.iterator
|
476 |
+
|
477 |
+
num_gpus = len(self._cuda_devices)
|
478 |
+
|
479 |
+
raw_val_generator = val_iterator(self._validation_data, num_epochs=1, shuffle=False)
|
480 |
+
val_generator = lazy_groups_of(raw_val_generator, num_gpus)
|
481 |
+
num_validation_batches = math.ceil(
|
482 |
+
val_iterator.get_num_batches(self._validation_data) / num_gpus
|
483 |
+
)
|
484 |
+
val_generator_tqdm = Tqdm.tqdm(val_generator, total=num_validation_batches)
|
485 |
+
batches_this_epoch = 0
|
486 |
+
val_loss = 0
|
487 |
+
for batch_group in val_generator_tqdm:
|
488 |
+
|
489 |
+
loss = self.batch_loss(batch_group, for_training=False)
|
490 |
+
if loss is not None:
|
491 |
+
# You shouldn't necessarily have to compute a loss for validation, so we allow for
|
492 |
+
# `loss` to be None. We need to be careful, though - `batches_this_epoch` is
|
493 |
+
# currently only used as the divisor for the loss function, so we can safely only
|
494 |
+
# count those batches for which we actually have a loss. If this variable ever
|
495 |
+
# gets used for something else, we might need to change things around a bit.
|
496 |
+
batches_this_epoch += 1
|
497 |
+
val_loss += loss.detach().cpu().numpy()
|
498 |
+
|
499 |
+
# Update the description with the latest metrics
|
500 |
+
val_metrics = training_util.get_metrics(self.model, val_loss, batches_this_epoch)
|
501 |
+
description = training_util.description_from_metrics(val_metrics)
|
502 |
+
val_generator_tqdm.set_description(description, refresh=False)
|
503 |
+
|
504 |
+
# Now restore the original parameter values.
|
505 |
+
if self._moving_average is not None:
|
506 |
+
self._moving_average.restore()
|
507 |
+
|
508 |
+
return val_loss, batches_this_epoch
|
509 |
+
|
510 |
+
def train(self) -> Dict[str, Any]:
|
511 |
+
"""
|
512 |
+
Trains the supplied model with the supplied parameters.
|
513 |
+
"""
|
514 |
+
try:
|
515 |
+
epoch_counter = self._restore_checkpoint()
|
516 |
+
except RuntimeError:
|
517 |
+
traceback.print_exc()
|
518 |
+
raise ConfigurationError(
|
519 |
+
"Could not recover training from the checkpoint. Did you mean to output to "
|
520 |
+
"a different serialization directory or delete the existing serialization "
|
521 |
+
"directory?"
|
522 |
+
)
|
523 |
+
|
524 |
+
training_util.enable_gradient_clipping(self.model, self._grad_clipping)
|
525 |
+
|
526 |
+
logger.info("Beginning training.")
|
527 |
+
|
528 |
+
train_metrics: Dict[str, float] = {}
|
529 |
+
val_metrics: Dict[str, float] = {}
|
530 |
+
this_epoch_val_metric: float = None
|
531 |
+
metrics: Dict[str, Any] = {}
|
532 |
+
epochs_trained = 0
|
533 |
+
training_start_time = time.time()
|
534 |
+
|
535 |
+
if self.cold_step_count > 0:
|
536 |
+
base_lr = self.optimizer.param_groups[0]['lr']
|
537 |
+
for param_group in self.optimizer.param_groups:
|
538 |
+
param_group['lr'] = self.cold_lr
|
539 |
+
self.model.text_field_embedder._token_embedders['bert'].set_weights(freeze=True)
|
540 |
+
|
541 |
+
metrics["best_epoch"] = self._metric_tracker.best_epoch
|
542 |
+
for key, value in self._metric_tracker.best_epoch_metrics.items():
|
543 |
+
metrics["best_validation_" + key] = value
|
544 |
+
|
545 |
+
for epoch in range(epoch_counter, self._num_epochs):
|
546 |
+
if epoch == self.cold_step_count and epoch != 0:
|
547 |
+
for param_group in self.optimizer.param_groups:
|
548 |
+
param_group['lr'] = base_lr
|
549 |
+
self.model.text_field_embedder._token_embedders['bert'].set_weights(freeze=False)
|
550 |
+
|
551 |
+
epoch_start_time = time.time()
|
552 |
+
train_metrics = self._train_epoch(epoch)
|
553 |
+
|
554 |
+
# get peak of memory usage
|
555 |
+
if "cpu_memory_MB" in train_metrics:
|
556 |
+
metrics["peak_cpu_memory_MB"] = max(
|
557 |
+
metrics.get("peak_cpu_memory_MB", 0), train_metrics["cpu_memory_MB"]
|
558 |
+
)
|
559 |
+
for key, value in train_metrics.items():
|
560 |
+
if key.startswith("gpu_"):
|
561 |
+
metrics["peak_" + key] = max(metrics.get("peak_" + key, 0), value)
|
562 |
+
|
563 |
+
# clear cache before validation
|
564 |
+
torch.cuda.empty_cache()
|
565 |
+
if self._validation_data is not None:
|
566 |
+
with torch.no_grad():
|
567 |
+
# We have a validation set, so compute all the metrics on it.
|
568 |
+
val_loss, num_batches = self._validation_loss()
|
569 |
+
val_metrics = training_util.get_metrics(
|
570 |
+
self.model, val_loss, num_batches, reset=True
|
571 |
+
)
|
572 |
+
|
573 |
+
# Check validation metric for early stopping
|
574 |
+
this_epoch_val_metric = val_metrics[self._validation_metric]
|
575 |
+
self._metric_tracker.add_metric(this_epoch_val_metric)
|
576 |
+
|
577 |
+
if self._metric_tracker.should_stop_early():
|
578 |
+
logger.info("Ran out of patience. Stopping training.")
|
579 |
+
break
|
580 |
+
|
581 |
+
self._tensorboard.log_metrics(
|
582 |
+
train_metrics, val_metrics=val_metrics, log_to_console=True, epoch=epoch + 1
|
583 |
+
) # +1 because tensorboard doesn't like 0
|
584 |
+
|
585 |
+
# Create overall metrics dict
|
586 |
+
training_elapsed_time = time.time() - training_start_time
|
587 |
+
metrics["training_duration"] = str(datetime.timedelta(seconds=training_elapsed_time))
|
588 |
+
metrics["training_start_epoch"] = epoch_counter
|
589 |
+
metrics["training_epochs"] = epochs_trained
|
590 |
+
metrics["epoch"] = epoch
|
591 |
+
|
592 |
+
for key, value in train_metrics.items():
|
593 |
+
metrics["training_" + key] = value
|
594 |
+
for key, value in val_metrics.items():
|
595 |
+
metrics["validation_" + key] = value
|
596 |
+
|
597 |
+
# if self.cold_step_count <= epoch:
|
598 |
+
self.scheduler.step(metrics['validation_loss'])
|
599 |
+
|
600 |
+
if self._metric_tracker.is_best_so_far():
|
601 |
+
# Update all the best_ metrics.
|
602 |
+
# (Otherwise they just stay the same as they were.)
|
603 |
+
metrics["best_epoch"] = epoch
|
604 |
+
for key, value in val_metrics.items():
|
605 |
+
metrics["best_validation_" + key] = value
|
606 |
+
|
607 |
+
self._metric_tracker.best_epoch_metrics = val_metrics
|
608 |
+
|
609 |
+
if self._serialization_dir:
|
610 |
+
dump_metrics(
|
611 |
+
os.path.join(self._serialization_dir, f"metrics_epoch_{epoch}.json"), metrics
|
612 |
+
)
|
613 |
+
|
614 |
+
# The Scheduler API is agnostic to whether your schedule requires a validation metric -
|
615 |
+
# if it doesn't, the validation metric passed here is ignored.
|
616 |
+
if self._learning_rate_scheduler:
|
617 |
+
self._learning_rate_scheduler.step(this_epoch_val_metric, epoch)
|
618 |
+
if self._momentum_scheduler:
|
619 |
+
self._momentum_scheduler.step(this_epoch_val_metric, epoch)
|
620 |
+
|
621 |
+
self._save_checkpoint(epoch)
|
622 |
+
|
623 |
+
epoch_elapsed_time = time.time() - epoch_start_time
|
624 |
+
logger.info("Epoch duration: %s", datetime.timedelta(seconds=epoch_elapsed_time))
|
625 |
+
|
626 |
+
if epoch < self._num_epochs - 1:
|
627 |
+
training_elapsed_time = time.time() - training_start_time
|
628 |
+
estimated_time_remaining = training_elapsed_time * (
|
629 |
+
(self._num_epochs - epoch_counter) / float(epoch - epoch_counter + 1) - 1
|
630 |
+
)
|
631 |
+
formatted_time = str(datetime.timedelta(seconds=int(estimated_time_remaining)))
|
632 |
+
logger.info("Estimated training time remaining: %s", formatted_time)
|
633 |
+
|
634 |
+
epochs_trained += 1
|
635 |
+
|
636 |
+
# make sure pending events are flushed to disk and files are closed properly
|
637 |
+
# self._tensorboard.close()
|
638 |
+
|
639 |
+
# Load the best model state before returning
|
640 |
+
best_model_state = self._checkpointer.best_model_state()
|
641 |
+
if best_model_state:
|
642 |
+
self.model.load_state_dict(best_model_state)
|
643 |
+
|
644 |
+
return metrics
|
645 |
+
|
646 |
+
def _save_checkpoint(self, epoch: Union[int, str]) -> None:
|
647 |
+
"""
|
648 |
+
Saves a checkpoint of the model to self._serialization_dir.
|
649 |
+
Is a no-op if self._serialization_dir is None.
|
650 |
+
|
651 |
+
Parameters
|
652 |
+
----------
|
653 |
+
epoch : Union[int, str], required.
|
654 |
+
The epoch of training. If the checkpoint is saved in the middle
|
655 |
+
of an epoch, the parameter is a string with the epoch and timestamp.
|
656 |
+
"""
|
657 |
+
# If moving averages are used for parameters, we save
|
658 |
+
# the moving average values into checkpoint, instead of the current values.
|
659 |
+
if self._moving_average is not None:
|
660 |
+
self._moving_average.assign_average_value()
|
661 |
+
|
662 |
+
# These are the training states we need to persist.
|
663 |
+
training_states = {
|
664 |
+
"metric_tracker": self._metric_tracker.state_dict(),
|
665 |
+
"optimizer": self.optimizer.state_dict(),
|
666 |
+
"batch_num_total": self._batch_num_total,
|
667 |
+
}
|
668 |
+
|
669 |
+
# If we have a learning rate or momentum scheduler, we should persist them too.
|
670 |
+
if self._learning_rate_scheduler is not None:
|
671 |
+
training_states["learning_rate_scheduler"] = self._learning_rate_scheduler.state_dict()
|
672 |
+
if self._momentum_scheduler is not None:
|
673 |
+
training_states["momentum_scheduler"] = self._momentum_scheduler.state_dict()
|
674 |
+
|
675 |
+
self._checkpointer.save_checkpoint(
|
676 |
+
model_state=self.model.state_dict(),
|
677 |
+
epoch=epoch,
|
678 |
+
training_states=training_states,
|
679 |
+
is_best_so_far=self._metric_tracker.is_best_so_far(),
|
680 |
+
)
|
681 |
+
|
682 |
+
# Restore the original values for parameters so that training will not be affected.
|
683 |
+
if self._moving_average is not None:
|
684 |
+
self._moving_average.restore()
|
685 |
+
|
686 |
+
def _restore_checkpoint(self) -> int:
|
687 |
+
"""
|
688 |
+
Restores the model and training state from the last saved checkpoint.
|
689 |
+
This includes an epoch count and optimizer state, which is serialized separately
|
690 |
+
from model parameters. This function should only be used to continue training -
|
691 |
+
if you wish to load a model for inference/load parts of a model into a new
|
692 |
+
computation graph, you should use the native Pytorch functions:
|
693 |
+
`` model.load_state_dict(torch.load("/path/to/model/weights.th"))``
|
694 |
+
|
695 |
+
If ``self._serialization_dir`` does not exist or does not contain any checkpointed weights,
|
696 |
+
this function will do nothing and return 0.
|
697 |
+
|
698 |
+
Returns
|
699 |
+
-------
|
700 |
+
epoch: int
|
701 |
+
The epoch at which to resume training, which should be one after the epoch
|
702 |
+
in the saved training state.
|
703 |
+
"""
|
704 |
+
model_state, training_state = self._checkpointer.restore_checkpoint()
|
705 |
+
|
706 |
+
if not training_state:
|
707 |
+
# No checkpoint to restore, start at 0
|
708 |
+
return 0
|
709 |
+
|
710 |
+
self.model.load_state_dict(model_state)
|
711 |
+
self.optimizer.load_state_dict(training_state["optimizer"])
|
712 |
+
if self._learning_rate_scheduler is not None \
|
713 |
+
and "learning_rate_scheduler" in training_state:
|
714 |
+
self._learning_rate_scheduler.load_state_dict(training_state["learning_rate_scheduler"])
|
715 |
+
if self._momentum_scheduler is not None and "momentum_scheduler" in training_state:
|
716 |
+
self._momentum_scheduler.load_state_dict(training_state["momentum_scheduler"])
|
717 |
+
training_util.move_optimizer_to_cuda(self.optimizer)
|
718 |
+
|
719 |
+
# Currently the ``training_state`` contains a serialized ``MetricTracker``.
|
720 |
+
if "metric_tracker" in training_state:
|
721 |
+
self._metric_tracker.load_state_dict(training_state["metric_tracker"])
|
722 |
+
# It used to be the case that we tracked ``val_metric_per_epoch``.
|
723 |
+
elif "val_metric_per_epoch" in training_state:
|
724 |
+
self._metric_tracker.clear()
|
725 |
+
self._metric_tracker.add_metrics(training_state["val_metric_per_epoch"])
|
726 |
+
# And before that we didn't track anything.
|
727 |
+
else:
|
728 |
+
self._metric_tracker.clear()
|
729 |
+
|
730 |
+
if isinstance(training_state["epoch"], int):
|
731 |
+
epoch_to_return = training_state["epoch"] + 1
|
732 |
+
else:
|
733 |
+
epoch_to_return = int(training_state["epoch"].split(".")[0]) + 1
|
734 |
+
|
735 |
+
# For older checkpoints with batch_num_total missing, default to old behavior where
|
736 |
+
# it is unchanged.
|
737 |
+
batch_num_total = training_state.get("batch_num_total")
|
738 |
+
if batch_num_total is not None:
|
739 |
+
self._batch_num_total = batch_num_total
|
740 |
+
|
741 |
+
return epoch_to_return
|
742 |
+
|
743 |
+
# Requires custom from_params.
|
744 |
+
@classmethod
|
745 |
+
def from_params( # type: ignore
|
746 |
+
cls,
|
747 |
+
model: Model,
|
748 |
+
serialization_dir: str,
|
749 |
+
iterator: DataIterator,
|
750 |
+
train_data: Iterable[Instance],
|
751 |
+
validation_data: Optional[Iterable[Instance]],
|
752 |
+
params: Params,
|
753 |
+
validation_iterator: DataIterator = None,
|
754 |
+
) -> "Trainer":
|
755 |
+
|
756 |
+
patience = params.pop_int("patience", None)
|
757 |
+
validation_metric = params.pop("validation_metric", "-loss")
|
758 |
+
shuffle = params.pop_bool("shuffle", True)
|
759 |
+
num_epochs = params.pop_int("num_epochs", 20)
|
760 |
+
cuda_device = parse_cuda_device(params.pop("cuda_device", -1))
|
761 |
+
grad_norm = params.pop_float("grad_norm", None)
|
762 |
+
grad_clipping = params.pop_float("grad_clipping", None)
|
763 |
+
lr_scheduler_params = params.pop("learning_rate_scheduler", None)
|
764 |
+
momentum_scheduler_params = params.pop("momentum_scheduler", None)
|
765 |
+
|
766 |
+
if isinstance(cuda_device, list):
|
767 |
+
model_device = cuda_device[0]
|
768 |
+
else:
|
769 |
+
model_device = cuda_device
|
770 |
+
if model_device >= 0:
|
771 |
+
# Moving model to GPU here so that the optimizer state gets constructed on
|
772 |
+
# the right device.
|
773 |
+
model = model.cuda(model_device)
|
774 |
+
|
775 |
+
parameters = [[n, p] for n, p in model.named_parameters() if p.requires_grad]
|
776 |
+
optimizer = Optimizer.from_params(parameters, params.pop("optimizer"))
|
777 |
+
if "moving_average" in params:
|
778 |
+
moving_average = MovingAverage.from_params(
|
779 |
+
params.pop("moving_average"), parameters=parameters
|
780 |
+
)
|
781 |
+
else:
|
782 |
+
moving_average = None
|
783 |
+
|
784 |
+
if lr_scheduler_params:
|
785 |
+
lr_scheduler = LearningRateScheduler.from_params(optimizer, lr_scheduler_params)
|
786 |
+
else:
|
787 |
+
lr_scheduler = None
|
788 |
+
if momentum_scheduler_params:
|
789 |
+
momentum_scheduler = MomentumScheduler.from_params(optimizer, momentum_scheduler_params)
|
790 |
+
else:
|
791 |
+
momentum_scheduler = None
|
792 |
+
|
793 |
+
if "checkpointer" in params:
|
794 |
+
if "keep_serialized_model_every_num_seconds" in params \
|
795 |
+
or "num_serialized_models_to_keep" in params:
|
796 |
+
raise ConfigurationError(
|
797 |
+
"Checkpointer may be initialized either from the 'checkpointer' key or from the "
|
798 |
+
"keys 'num_serialized_models_to_keep' and 'keep_serialized_model_every_num_seconds'"
|
799 |
+
" but the passed config uses both methods."
|
800 |
+
)
|
801 |
+
checkpointer = Checkpointer.from_params(params.pop("checkpointer"))
|
802 |
+
else:
|
803 |
+
num_serialized_models_to_keep = params.pop_int("num_serialized_models_to_keep", 20)
|
804 |
+
keep_serialized_model_every_num_seconds = params.pop_int(
|
805 |
+
"keep_serialized_model_every_num_seconds", None
|
806 |
+
)
|
807 |
+
checkpointer = Checkpointer(
|
808 |
+
serialization_dir=serialization_dir,
|
809 |
+
num_serialized_models_to_keep=num_serialized_models_to_keep,
|
810 |
+
keep_serialized_model_every_num_seconds=keep_serialized_model_every_num_seconds,
|
811 |
+
)
|
812 |
+
model_save_interval = params.pop_float("model_save_interval", None)
|
813 |
+
summary_interval = params.pop_int("summary_interval", 100)
|
814 |
+
histogram_interval = params.pop_int("histogram_interval", None)
|
815 |
+
should_log_parameter_statistics = params.pop_bool("should_log_parameter_statistics", True)
|
816 |
+
should_log_learning_rate = params.pop_bool("should_log_learning_rate", False)
|
817 |
+
log_batch_size_period = params.pop_int("log_batch_size_period", None)
|
818 |
+
|
819 |
+
params.assert_empty(cls.__name__)
|
820 |
+
return cls(
|
821 |
+
model,
|
822 |
+
optimizer,
|
823 |
+
iterator,
|
824 |
+
train_data,
|
825 |
+
validation_data,
|
826 |
+
patience=patience,
|
827 |
+
validation_metric=validation_metric,
|
828 |
+
validation_iterator=validation_iterator,
|
829 |
+
shuffle=shuffle,
|
830 |
+
num_epochs=num_epochs,
|
831 |
+
serialization_dir=serialization_dir,
|
832 |
+
cuda_device=cuda_device,
|
833 |
+
grad_norm=grad_norm,
|
834 |
+
grad_clipping=grad_clipping,
|
835 |
+
learning_rate_scheduler=lr_scheduler,
|
836 |
+
momentum_scheduler=momentum_scheduler,
|
837 |
+
checkpointer=checkpointer,
|
838 |
+
model_save_interval=model_save_interval,
|
839 |
+
summary_interval=summary_interval,
|
840 |
+
histogram_interval=histogram_interval,
|
841 |
+
should_log_parameter_statistics=should_log_parameter_statistics,
|
842 |
+
should_log_learning_rate=should_log_learning_rate,
|
843 |
+
log_batch_size_period=log_batch_size_period,
|
844 |
+
moving_average=moving_average,
|
845 |
+
)
|
output_vocabulary/d_tags.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
CORRECT
|
2 |
+
INCORRECT
|
3 |
+
@@UNKNOWN@@
|
4 |
+
@@PADDING@@
|
output_vocabulary/labels.txt
ADDED
@@ -0,0 +1,5002 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
$KEEP
|
2 |
+
$DELETE
|
3 |
+
$TRANSFORM_CASE_CAPITAL
|
4 |
+
$APPEND_the
|
5 |
+
$APPEND_,
|
6 |
+
$APPEND_a
|
7 |
+
$TRANSFORM_VERB_VB_VBZ
|
8 |
+
$TRANSFORM_AGREEMENT_PLURAL
|
9 |
+
$TRANSFORM_CASE_LOWER
|
10 |
+
$TRANSFORM_VERB_VB_VBN
|
11 |
+
$REPLACE_the
|
12 |
+
$REPLACE_a
|
13 |
+
$REPLACE_to
|
14 |
+
$TRANSFORM_VERB_VB_VBG
|
15 |
+
$REPLACE_.
|
16 |
+
$APPEND_to
|
17 |
+
$REPLACE_,
|
18 |
+
$REPLACE_in
|
19 |
+
$REPLACE_was
|
20 |
+
$TRANSFORM_VERB_VBZ_VB
|
21 |
+
$TRANSFORM_AGREEMENT_SINGULAR
|
22 |
+
$APPEND_I
|
23 |
+
$APPEND_.
|
24 |
+
$REPLACE_for
|
25 |
+
$REPLACE_I
|
26 |
+
$APPEND_(
|
27 |
+
$TRANSFORM_VERB_VBG_VB
|
28 |
+
$REPLACE_is
|
29 |
+
$REPLACE_have
|
30 |
+
$REPLACE_on
|
31 |
+
$REPLACE_are
|
32 |
+
$REPLACE_of
|
33 |
+
$REPLACE_it
|
34 |
+
$TRANSFORM_VERB_VBN_VB
|
35 |
+
$REPLACE_that
|
36 |
+
$APPEND_in
|
37 |
+
$REPLACE_and
|
38 |
+
$APPEND_it
|
39 |
+
$APPEND_that
|
40 |
+
$REPLACE_at
|
41 |
+
$APPEND_for
|
42 |
+
$APPEND_of
|
43 |
+
$APPEND_and
|
44 |
+
$REPLACE_an
|
45 |
+
$REPLACE_my
|
46 |
+
$REPLACE_had
|
47 |
+
$APPEND_have
|
48 |
+
$APPEND_an
|
49 |
+
$REPLACE_has
|
50 |
+
$APPEND_my
|
51 |
+
$APPEND_is
|
52 |
+
$APPEND_The
|
53 |
+
$APPEND_will
|
54 |
+
$REPLACE_with
|
55 |
+
$REPLACE_were
|
56 |
+
$REPLACE_be
|
57 |
+
$TRANSFORM_VERB_VBN_VBG
|
58 |
+
$APPEND_``
|
59 |
+
$REPLACE_do
|
60 |
+
$TRANSFORM_VERB_VBG_VBN
|
61 |
+
$REPLACE_this
|
62 |
+
$REPLACE_will
|
63 |
+
$TRANSFORM_VERB_VB_VBD
|
64 |
+
$APPEND_was
|
65 |
+
$REPLACE_n't
|
66 |
+
$APPEND_about
|
67 |
+
$REPLACE_from
|
68 |
+
$REPLACE_about
|
69 |
+
$REPLACE_It
|
70 |
+
$APPEND_on
|
71 |
+
$REPLACE_would
|
72 |
+
$MERGE_SPACE
|
73 |
+
$APPEND_at
|
74 |
+
$APPEND_'s
|
75 |
+
$REPLACE_as
|
76 |
+
$REPLACE_'s
|
77 |
+
$REPLACE_could
|
78 |
+
$APPEND_with
|
79 |
+
$REPLACE_did
|
80 |
+
$REPLACE_them
|
81 |
+
$REPLACE_The
|
82 |
+
$REPLACE_by
|
83 |
+
$REPLACE_so
|
84 |
+
$REPLACE_not
|
85 |
+
$REPLACE_can
|
86 |
+
$APPEND_am
|
87 |
+
$APPEND_be
|
88 |
+
$REPLACE_because
|
89 |
+
$APPEND_/
|
90 |
+
$REPLACE_they
|
91 |
+
$REPLACE_am
|
92 |
+
$APPEND_are
|
93 |
+
$TRANSFORM_VERB_VBZ_VBN
|
94 |
+
$REPLACE_'m
|
95 |
+
$REPLACE_their
|
96 |
+
$TRANSFORM_VERB_VBN_VBZ
|
97 |
+
$APPEND_had
|
98 |
+
$APPEND_would
|
99 |
+
$APPEND_-
|
100 |
+
$REPLACE_(
|
101 |
+
$TRANSFORM_VERB_VBN_VBD
|
102 |
+
$REPLACE_very
|
103 |
+
$REPLACE_people
|
104 |
+
$REPLACE_get
|
105 |
+
$REPLACE_there
|
106 |
+
$REPLACE_?
|
107 |
+
$APPEND_do
|
108 |
+
$REPLACE_;
|
109 |
+
$REPLACE_me
|
110 |
+
$REPLACE_one
|
111 |
+
$REPLACE_been
|
112 |
+
$APPEND_so
|
113 |
+
$APPEND_)
|
114 |
+
$APPEND_'m
|
115 |
+
$REPLACE_or
|
116 |
+
$REPLACE_some
|
117 |
+
$REPLACE_you
|
118 |
+
$TRANSFORM_VERB_VBD_VBN
|
119 |
+
$APPEND_as
|
120 |
+
$REPLACE_like
|
121 |
+
$TRANSFORM_VERB_VBD_VB
|
122 |
+
$REPLACE_which
|
123 |
+
$APPEND_has
|
124 |
+
$REPLACE_these
|
125 |
+
$REPLACE_This
|
126 |
+
$APPEND_from
|
127 |
+
$REPLACE_when
|
128 |
+
$APPEND_'ve
|
129 |
+
$REPLACE_``
|
130 |
+
$APPEND_there
|
131 |
+
$REPLACE_does
|
132 |
+
$APPEND_also
|
133 |
+
$APPEND_It
|
134 |
+
$APPEND_can
|
135 |
+
$REPLACE_:
|
136 |
+
$REPLACE_other
|
137 |
+
$APPEND_more
|
138 |
+
$REPLACE_want
|
139 |
+
$REPLACE_we
|
140 |
+
$REPLACE_'ve
|
141 |
+
$REPLACE_what
|
142 |
+
$REPLACE_more
|
143 |
+
$REPLACE_many
|
144 |
+
$REPLACE_into
|
145 |
+
$APPEND_been
|
146 |
+
$APPEND_by
|
147 |
+
$APPEND_this
|
148 |
+
$REPLACE_went
|
149 |
+
$REPLACE_time
|
150 |
+
$APPEND_only
|
151 |
+
$TRANSFORM_VERB_VBG_VBZ
|
152 |
+
$REPLACE_go
|
153 |
+
$REPLACE_while
|
154 |
+
$REPLACE_but
|
155 |
+
$APPEND_all
|
156 |
+
$APPEND_if
|
157 |
+
$REPLACE_should
|
158 |
+
$REPLACE_out
|
159 |
+
$APPEND_'
|
160 |
+
$REPLACE_during
|
161 |
+
$REPLACE_much
|
162 |
+
$APPEND_like
|
163 |
+
$REPLACE_!
|
164 |
+
$APPEND_but
|
165 |
+
$REPLACE_if
|
166 |
+
$REPLACE_since
|
167 |
+
$APPEND_people
|
168 |
+
$APPEND_because
|
169 |
+
$REPLACE_any
|
170 |
+
$APPEND_A
|
171 |
+
$REPLACE_another
|
172 |
+
$REPLACE_They
|
173 |
+
$APPEND_you
|
174 |
+
$REPLACE_ca
|
175 |
+
$REPLACE_our
|
176 |
+
$REPLACE_who
|
177 |
+
$APPEND_now
|
178 |
+
$REPLACE_really
|
179 |
+
$REPLACE_make
|
180 |
+
$APPEND_me
|
181 |
+
$APPEND_who
|
182 |
+
$REPLACE_In
|
183 |
+
$REPLACE_her
|
184 |
+
$REPLACE_English
|
185 |
+
$APPEND_some
|
186 |
+
$APPEND_when
|
187 |
+
$APPEND_still
|
188 |
+
$APPEND_them
|
189 |
+
$REPLACE_use
|
190 |
+
$APPEND_just
|
191 |
+
$REPLACE_things
|
192 |
+
$REPLACE_/
|
193 |
+
$REPLACE_got
|
194 |
+
$REPLACE_My
|
195 |
+
$APPEND_were
|
196 |
+
$REPLACE_he
|
197 |
+
$REPLACE_countries
|
198 |
+
$APPEND_their
|
199 |
+
$REPLACE_using
|
200 |
+
$TRANSFORM_VERB_VBZ_VBG
|
201 |
+
$APPEND_'ll
|
202 |
+
$REPLACE_being
|
203 |
+
$REPLACE_too
|
204 |
+
$APPEND_we
|
205 |
+
$APPEND_they
|
206 |
+
$REPLACE_lot
|
207 |
+
$REPLACE_-
|
208 |
+
$REPLACE_all
|
209 |
+
$REPLACE_good
|
210 |
+
$APPEND_[
|
211 |
+
$REPLACE_every
|
212 |
+
$REPLACE_)
|
213 |
+
$REPLACE_your
|
214 |
+
$APPEND_My
|
215 |
+
$APPEND_even
|
216 |
+
$APPEND_out
|
217 |
+
$REPLACE_his
|
218 |
+
$REPLACE_made
|
219 |
+
$APPEND_any
|
220 |
+
$REPLACE_where
|
221 |
+
$APPEND_which
|
222 |
+
$REPLACE_work
|
223 |
+
$REPLACE_used
|
224 |
+
$APPEND_one
|
225 |
+
$REPLACE_take
|
226 |
+
$APPEND_In
|
227 |
+
$REPLACE_There
|
228 |
+
$REPLACE_up
|
229 |
+
$REPLACE_how
|
230 |
+
$REPLACE_myself
|
231 |
+
$APPEND_what
|
232 |
+
$APPEND_very
|
233 |
+
$APPEND_?
|
234 |
+
$REPLACE_become
|
235 |
+
$REPLACE_think
|
236 |
+
$REPLACE_going
|
237 |
+
$REPLACE_Japanese
|
238 |
+
$REPLACE_well
|
239 |
+
$APPEND_being
|
240 |
+
$APPEND_or
|
241 |
+
$REPLACE_just
|
242 |
+
$REPLACE_write
|
243 |
+
$REPLACE_those
|
244 |
+
$REPLACE_feel
|
245 |
+
$REPLACE_until
|
246 |
+
$APPEND_However
|
247 |
+
$APPEND_our
|
248 |
+
$REPLACE_something
|
249 |
+
$APPEND_get
|
250 |
+
$REPLACE_diary
|
251 |
+
$REPLACE_no
|
252 |
+
$REPLACE_over
|
253 |
+
$APPEND_time
|
254 |
+
$APPEND_then
|
255 |
+
$REPLACE_see
|
256 |
+
$REPLACE_writing
|
257 |
+
$REPLACE_wo
|
258 |
+
$REPLACE_only
|
259 |
+
$REPLACE_'ll
|
260 |
+
$REPLACE_after
|
261 |
+
$REPLACE_know
|
262 |
+
$REPLACE_anything
|
263 |
+
$REPLACE_now
|
264 |
+
$REPLACE_That
|
265 |
+
$REPLACE_first
|
266 |
+
$REPLACE_than
|
267 |
+
$APPEND_up
|
268 |
+
$REPLACE_better
|
269 |
+
$REPLACE_hope
|
270 |
+
$REPLACE_through
|
271 |
+
$REPLACE_doing
|
272 |
+
$APPEND_go
|
273 |
+
$REPLACE_then
|
274 |
+
$APPEND_too
|
275 |
+
$REPLACE_studying
|
276 |
+
$REPLACE_its
|
277 |
+
$REPLACE_learn
|
278 |
+
$REPLACE_lives
|
279 |
+
$REPLACE_having
|
280 |
+
$REPLACE_told
|
281 |
+
$REPLACE_What
|
282 |
+
$REPLACE_she
|
283 |
+
$REPLACE_thought
|
284 |
+
$APPEND_not
|
285 |
+
$REPLACE_around
|
286 |
+
$REPLACE_him
|
287 |
+
$REPLACE_different
|
288 |
+
$APPEND_could
|
289 |
+
$APPEND_such
|
290 |
+
$REPLACE_able
|
291 |
+
$REPLACE_On
|
292 |
+
$REPLACE_before
|
293 |
+
$REPLACE_though
|
294 |
+
$REPLACE_also
|
295 |
+
$APPEND_entry
|
296 |
+
$REPLACE_learned
|
297 |
+
$TRANSFORM_CASE_UPPER
|
298 |
+
$APPEND_again
|
299 |
+
$REPLACE_friends
|
300 |
+
$APPEND_This
|
301 |
+
$REPLACE_might
|
302 |
+
$REPLACE_A
|
303 |
+
$REPLACE_However
|
304 |
+
$APPEND_really
|
305 |
+
$REPLACE_started
|
306 |
+
$REPLACE_improve
|
307 |
+
$APPEND_English
|
308 |
+
$REPLACE_years
|
309 |
+
$REPLACE_'
|
310 |
+
$REPLACE_most
|
311 |
+
$APPEND_how
|
312 |
+
$REPLACE_day
|
313 |
+
$APPEND_:
|
314 |
+
$APPEND_today
|
315 |
+
$REPLACE_find
|
316 |
+
$REPLACE_help
|
317 |
+
$APPEND_should
|
318 |
+
$REPLACE_We
|
319 |
+
$REPLACE_even
|
320 |
+
$REPLACE_may
|
321 |
+
$REPLACE_left
|
322 |
+
$REPLACE_called
|
323 |
+
$APPEND_did
|
324 |
+
$REPLACE_course
|
325 |
+
$REPLACE_These
|
326 |
+
$REPLACE_understand
|
327 |
+
$REPLACE_So
|
328 |
+
$REPLACE_said
|
329 |
+
$REPLACE_took
|
330 |
+
$REPLACE_person
|
331 |
+
$REPLACE_school
|
332 |
+
$REPLACE_such
|
333 |
+
$APPEND_called
|
334 |
+
$REPLACE_At
|
335 |
+
$APPEND_before
|
336 |
+
$REPLACE_way
|
337 |
+
$APPEND_he
|
338 |
+
$REPLACE_everyone
|
339 |
+
$REPLACE_here
|
340 |
+
$REPLACE_When
|
341 |
+
$REPLACE_everything
|
342 |
+
$REPLACE_need
|
343 |
+
$APPEND_her
|
344 |
+
$REPLACE_Because
|
345 |
+
$TRANSFORM_VERB_VBD_VBG
|
346 |
+
$REPLACE_say
|
347 |
+
$REPLACE_study
|
348 |
+
$APPEND_much
|
349 |
+
$REPLACE_still
|
350 |
+
$REPLACE_found
|
351 |
+
$APPEND_always
|
352 |
+
$REPLACE_last
|
353 |
+
$APPEND_other
|
354 |
+
$TRANSFORM_VERB_VBG_VBD
|
355 |
+
$REPLACE_learning
|
356 |
+
$REPLACE_correct
|
357 |
+
$REPLACE_two
|
358 |
+
$REPLACE_days
|
359 |
+
$REPLACE_difficult
|
360 |
+
$REPLACE_never
|
361 |
+
$APPEND__
|
362 |
+
$REPLACE_'d
|
363 |
+
$APPEND_your
|
364 |
+
$REPLACE_us
|
365 |
+
$REPLACE_foreign
|
366 |
+
$REPLACE_entry
|
367 |
+
$APPEND_!
|
368 |
+
$REPLACE_Japan
|
369 |
+
$APPEND_;
|
370 |
+
$REPLACE_tell
|
371 |
+
$REPLACE_give
|
372 |
+
$REPLACE_decided
|
373 |
+
$APPEND_during
|
374 |
+
$REPLACE_Also
|
375 |
+
$APPEND_his
|
376 |
+
$REPLACE_speak
|
377 |
+
$REPLACE_came
|
378 |
+
$REPLACE_little
|
379 |
+
$APPEND_while
|
380 |
+
$TRANSFORM_VERB_VBZ_VBD
|
381 |
+
$APPEND_things
|
382 |
+
$REPLACE_especially
|
383 |
+
$REPLACE_Recently
|
384 |
+
$REPLACE_come
|
385 |
+
$APPEND_especially
|
386 |
+
$REPLACE_needed
|
387 |
+
$APPEND_make
|
388 |
+
$REPLACE_whether
|
389 |
+
$REPLACE_felt
|
390 |
+
$REPLACE_Although
|
391 |
+
$REPLACE_someone
|
392 |
+
$REPLACE_As
|
393 |
+
$REPLACE_great
|
394 |
+
$REPLACE_today
|
395 |
+
$APPEND_since
|
396 |
+
$REPLACE_hard
|
397 |
+
$REPLACE_For
|
398 |
+
$REPLACE_became
|
399 |
+
$REPLACE_between
|
400 |
+
$REPLACE_beautiful
|
401 |
+
$REPLACE_life
|
402 |
+
$REPLACE_why
|
403 |
+
$APPEND_though
|
404 |
+
$APPEND_There
|
405 |
+
$APPEND_going
|
406 |
+
$REPLACE_long
|
407 |
+
$APPEND_where
|
408 |
+
$REPLACE_believe
|
409 |
+
$REPLACE_website
|
410 |
+
$REPLACE_heard
|
411 |
+
$REPLACE_job
|
412 |
+
$REPLACE_home
|
413 |
+
$REPLACE_'re
|
414 |
+
$REPLACE_But
|
415 |
+
$REPLACE_anyone
|
416 |
+
$REPLACE_again
|
417 |
+
$REPLACE_bad
|
418 |
+
$REPLACE_recently
|
419 |
+
$APPEND_here
|
420 |
+
$REPLACE_practice
|
421 |
+
$REPLACE_often
|
422 |
+
$APPEND_got
|
423 |
+
$APPEND_feel
|
424 |
+
$REPLACE_saw
|
425 |
+
$REPLACE_quickly
|
426 |
+
$REPLACE_language
|
427 |
+
$REPLACE_wanted
|
428 |
+
$APPEND_each
|
429 |
+
$REPLACE_put
|
430 |
+
$REPLACE_done
|
431 |
+
$REPLACE_minutes
|
432 |
+
$REPLACE_each
|
433 |
+
$APPEND_she
|
434 |
+
$REPLACE_grammar
|
435 |
+
$REPLACE_watch
|
436 |
+
$REPLACE_happy
|
437 |
+
$REPLACE_back
|
438 |
+
$REPLACE_friend
|
439 |
+
$REPLACE_off
|
440 |
+
$REPLACE_He
|
441 |
+
$REPLACE_Since
|
442 |
+
$APPEND_something
|
443 |
+
$APPEND_using
|
444 |
+
$APPEND_At
|
445 |
+
$REPLACE_university
|
446 |
+
$REPLACE_country
|
447 |
+
$REPLACE_watching
|
448 |
+
$REPLACE_received
|
449 |
+
$REPLACE_enough
|
450 |
+
$REPLACE_weather
|
451 |
+
$REPLACE_usually
|
452 |
+
$APPEND_back
|
453 |
+
$REPLACE_happened
|
454 |
+
$APPEND_having
|
455 |
+
$REPLACE_always
|
456 |
+
$APPEND_does
|
457 |
+
$REPLACE_After
|
458 |
+
$REPLACE_try
|
459 |
+
$REPLACE_start
|
460 |
+
$APPEND_already
|
461 |
+
$REPLACE_talk
|
462 |
+
$REPLACE_thing
|
463 |
+
$APPEND_But
|
464 |
+
$APPEND_For
|
465 |
+
$REPLACE_Then
|
466 |
+
$REPLACE_fun
|
467 |
+
$REPLACE_soon
|
468 |
+
$REPLACE_starting
|
469 |
+
$REPLACE_away
|
470 |
+
$APPEND_want
|
471 |
+
$REPLACE_asked
|
472 |
+
$APPEND_went
|
473 |
+
$REPLACE_trip
|
474 |
+
$REPLACE_new
|
475 |
+
$REPLACE_right
|
476 |
+
$APPEND_after
|
477 |
+
$REPLACE_keep
|
478 |
+
$REPLACE_interesting
|
479 |
+
$REPLACE_together
|
480 |
+
$REPLACE_Do
|
481 |
+
$APPEND_So
|
482 |
+
$REPLACE_beginning
|
483 |
+
$APPEND_myself
|
484 |
+
$REPLACE_getting
|
485 |
+
$APPEND_On
|
486 |
+
$REPLACE_restaurant
|
487 |
+
$REPLACE_looking
|
488 |
+
$REPLACE_children
|
489 |
+
$APPEND_last
|
490 |
+
$REPLACE_college
|
491 |
+
$APPEND_right
|
492 |
+
$REPLACE_stay
|
493 |
+
$REPLACE_year
|
494 |
+
$REPLACE_live
|
495 |
+
$REPLACE_travel
|
496 |
+
$REPLACE_favorite
|
497 |
+
$REPLACE_read
|
498 |
+
$APPEND_well
|
499 |
+
$REPLACE_written
|
500 |
+
$REPLACE_months
|
501 |
+
$APPEND_yet
|
502 |
+
$APPEND_first
|
503 |
+
$APPEND_most
|
504 |
+
$REPLACE_look
|
505 |
+
$REPLACE_tried
|
506 |
+
$REPLACE_clothes
|
507 |
+
$REPLACE_[
|
508 |
+
$REPLACE_kind
|
509 |
+
$APPEND_its
|
510 |
+
$REPLACE_&
|
511 |
+
$REPLACE_remember
|
512 |
+
$APPEND_him
|
513 |
+
$REPLACE_problem
|
514 |
+
$APPEND_*
|
515 |
+
$REPLACE_meet
|
516 |
+
$REPLACE_gave
|
517 |
+
$REPLACE_either
|
518 |
+
$REPLACE_makes
|
519 |
+
$REPLACE_elderly
|
520 |
+
$REPLACE_hobbies
|
521 |
+
$REPLACE_easily
|
522 |
+
$REPLACE_important
|
523 |
+
$APPEND_take
|
524 |
+
$APPEND_thing
|
525 |
+
$REPLACE_vocabulary
|
526 |
+
$REPLACE_listening
|
527 |
+
$REPLACE_must
|
528 |
+
$REPLACE_hours
|
529 |
+
$REPLACE_place
|
530 |
+
$REPLACE_While
|
531 |
+
$REPLACE_without
|
532 |
+
$REPLACE_end
|
533 |
+
$REPLACE_Korean
|
534 |
+
$REPLACE_Therefore
|
535 |
+
$REPLACE_working
|
536 |
+
$REPLACE_high
|
537 |
+
$REPLACE_house
|
538 |
+
$REPLACE_already
|
539 |
+
$APPEND_good
|
540 |
+
$REPLACE_opportunity
|
541 |
+
$APPEND_many
|
542 |
+
$REPLACE_family
|
543 |
+
$REPLACE_During
|
544 |
+
$REPLACE_First
|
545 |
+
$APPEND_both
|
546 |
+
$REPLACE_once
|
547 |
+
$REPLACE_experience
|
548 |
+
$REPLACE_tomorrow
|
549 |
+
$APPEND_these
|
550 |
+
$REPLACE_true
|
551 |
+
$APPEND_day
|
552 |
+
$REPLACE_leave
|
553 |
+
$APPEND_When
|
554 |
+
$REPLACE_watched
|
555 |
+
$APPEND_person
|
556 |
+
$REPLACE_best
|
557 |
+
$REPLACE_harder
|
558 |
+
$REPLACE_Today
|
559 |
+
$REPLACE_morning
|
560 |
+
$REPLACE_If
|
561 |
+
$REPLACE_woke
|
562 |
+
$APPEND_into
|
563 |
+
$APPEND_made
|
564 |
+
$REPLACE_foreigners
|
565 |
+
$REPLACE_part
|
566 |
+
$APPEND_ever
|
567 |
+
$APPEND_probably
|
568 |
+
$APPEND_way
|
569 |
+
$APPEND_over
|
570 |
+
$APPEND_n't
|
571 |
+
$REPLACE_towards
|
572 |
+
$REPLACE_three
|
573 |
+
$REPLACE_One
|
574 |
+
$REPLACE_studied
|
575 |
+
$REPLACE_nervous
|
576 |
+
$REPLACE_forward
|
577 |
+
$REPLACE_seen
|
578 |
+
$REPLACE_Chinese
|
579 |
+
$REPLACE_night
|
580 |
+
$APPEND_own
|
581 |
+
$REPLACE_taught
|
582 |
+
$APPEND_usually
|
583 |
+
$REPLACE_To
|
584 |
+
$REPLACE_communicate
|
585 |
+
$APPEND_Japanese
|
586 |
+
$REPLACE_entries
|
587 |
+
$REPLACE_traveling
|
588 |
+
$REPLACE_site
|
589 |
+
$REPLACE_difference
|
590 |
+
$APPEND_those
|
591 |
+
$TRANSFORM_VERB_VBD_VBZ
|
592 |
+
$REPLACE_rainy
|
593 |
+
$REPLACE_play
|
594 |
+
$REPLACE_comfortable
|
595 |
+
$REPLACE_recommend
|
596 |
+
$REPLACE_coming
|
597 |
+
$REPLACE_Is
|
598 |
+
$REPLACE_asleep
|
599 |
+
$REPLACE_realized
|
600 |
+
$APPEND_recently
|
601 |
+
$APPEND_around
|
602 |
+
$REPLACE_men
|
603 |
+
$REPLACE_Finally
|
604 |
+
$REPLACE_excited
|
605 |
+
$REPLACE_near
|
606 |
+
$APPEND_often
|
607 |
+
$REPLACE_t
|
608 |
+
$REPLACE_next
|
609 |
+
$REPLACE_ever
|
610 |
+
$APPEND_Today
|
611 |
+
$REPLACE_taking
|
612 |
+
$APPEND_started
|
613 |
+
$REPLACE_please
|
614 |
+
$APPEND_than
|
615 |
+
$REPLACE_sentences
|
616 |
+
$APPEND_What
|
617 |
+
$REPLACE_She
|
618 |
+
$APPEND_work
|
619 |
+
$REPLACE_visit
|
620 |
+
$REPLACE_surprised
|
621 |
+
$REPLACE_show
|
622 |
+
$REPLACE_You
|
623 |
+
$APPEND_used
|
624 |
+
$REPLACE_ago
|
625 |
+
$APPEND_Even
|
626 |
+
$APPEND_That
|
627 |
+
$REPLACE_similar
|
628 |
+
$APPEND_soon
|
629 |
+
$REPLACE_less
|
630 |
+
$REPLACE_enjoy
|
631 |
+
$REPLACE_diaries
|
632 |
+
$REPLACE_speaking
|
633 |
+
$REPLACE_past
|
634 |
+
$APPEND_through
|
635 |
+
$REPLACE_women
|
636 |
+
$REPLACE_planned
|
637 |
+
$REPLACE_later
|
638 |
+
$REPLACE_looked
|
639 |
+
$REPLACE_yet
|
640 |
+
$APPEND_us
|
641 |
+
$REPLACE_And
|
642 |
+
$APPEND_'d
|
643 |
+
$APPEND_As
|
644 |
+
$REPLACE_healthy
|
645 |
+
$APPEND_might
|
646 |
+
$REPLACE_class
|
647 |
+
$REPLACE_Now
|
648 |
+
$REPLACE_outside
|
649 |
+
$REPLACE_tired
|
650 |
+
$APPEND_else
|
651 |
+
$REPLACE_Please
|
652 |
+
$REPLACE_problems
|
653 |
+
$APPEND_They
|
654 |
+
$REPLACE_food
|
655 |
+
$REPLACE_reading
|
656 |
+
$APPEND_&
|
657 |
+
$APPEND_think
|
658 |
+
$REPLACE_finished
|
659 |
+
$REPLACE_popular
|
660 |
+
$REPLACE_Are
|
661 |
+
$APPEND_2
|
662 |
+
$APPEND_may
|
663 |
+
$APPEND_found
|
664 |
+
$APPEND_whether
|
665 |
+
$APPEND_We
|
666 |
+
$REPLACE_How
|
667 |
+
$REPLACE_continue
|
668 |
+
$REPLACE_everyday
|
669 |
+
$REPLACE_daily
|
670 |
+
$REPLACE_talked
|
671 |
+
$APPEND_new
|
672 |
+
$REPLACE_reason
|
673 |
+
$REPLACE_means
|
674 |
+
$REPLACE_opportunities
|
675 |
+
$APPEND_different
|
676 |
+
$REPLACE_business
|
677 |
+
$REPLACE_making
|
678 |
+
$APPEND_ago
|
679 |
+
$REPLACE_favourite
|
680 |
+
$REPLACE_bit
|
681 |
+
$REPLACE_delicious
|
682 |
+
$APPEND_every
|
683 |
+
$REPLACE_spend
|
684 |
+
$APPEND_finally
|
685 |
+
$APPEND_part
|
686 |
+
$REPLACE_yesterday
|
687 |
+
$REPLACE_down
|
688 |
+
$REPLACE_times
|
689 |
+
$REPLACE_holiday
|
690 |
+
$REPLACE_nice
|
691 |
+
$REPLACE_although
|
692 |
+
$REPLACE_earlier
|
693 |
+
$REPLACE_Can
|
694 |
+
$REPLACE_due
|
695 |
+
$APPEND_help
|
696 |
+
$REPLACE_caught
|
697 |
+
$REPLACE_quite
|
698 |
+
$APPEND_kind
|
699 |
+
$REPLACE_words
|
700 |
+
$REPLACE_movie
|
701 |
+
$REPLACE_else
|
702 |
+
$APPEND_together
|
703 |
+
$REPLACE_advertisement
|
704 |
+
$APPEND_Is
|
705 |
+
$APPEND_between
|
706 |
+
$APPEND_enough
|
707 |
+
$REPLACE_let
|
708 |
+
$REPLACE_instead
|
709 |
+
$REPLACE_disappointed
|
710 |
+
$REPLACE_Have
|
711 |
+
$APPEND_After
|
712 |
+
$APPEND_no
|
713 |
+
$APPEND_doing
|
714 |
+
$REPLACE_skills
|
715 |
+
$APPEND_instead
|
716 |
+
$REPLACE_Some
|
717 |
+
$REPLACE_Actually
|
718 |
+
$APPEND_3
|
719 |
+
$REPLACE_choose
|
720 |
+
$REPLACE_An
|
721 |
+
$APPEND_away
|
722 |
+
$REPLACE_Does
|
723 |
+
$REPLACE_played
|
724 |
+
$APPEND_Because
|
725 |
+
$REPLACE_both
|
726 |
+
$REPLACE_easier
|
727 |
+
$REPLACE_others
|
728 |
+
$REPLACE_eat
|
729 |
+
$REPLACE_onto
|
730 |
+
$REPLACE_sometimes
|
731 |
+
$REPLACE_began
|
732 |
+
$REPLACE_usual
|
733 |
+
$REPLACE_expensive
|
734 |
+
$APPEND_To
|
735 |
+
$APPEND_actually
|
736 |
+
$REPLACE_old
|
737 |
+
$APPEND_see
|
738 |
+
$APPEND_know
|
739 |
+
$REPLACE_few
|
740 |
+
$APPEND_why
|
741 |
+
$APPEND_sometimes
|
742 |
+
$REPLACE_Unfortunately
|
743 |
+
$APPEND_use
|
744 |
+
$REPLACE_older
|
745 |
+
$REPLACE_joined
|
746 |
+
$REPLACE_own
|
747 |
+
$REPLACE_raining
|
748 |
+
$REPLACE_themselves
|
749 |
+
$REPLACE_example
|
750 |
+
$APPEND_able
|
751 |
+
$REPLACE_arrived
|
752 |
+
$REPLACE_whom
|
753 |
+
$REPLACE_nothing
|
754 |
+
$REPLACE_fluently
|
755 |
+
$APPEND_getting
|
756 |
+
$REPLACE_convenient
|
757 |
+
$REPLACE_met
|
758 |
+
$REPLACE_becoming
|
759 |
+
$APPEND_better
|
760 |
+
$APPEND_become
|
761 |
+
$REPLACE_lots
|
762 |
+
$REPLACE_fast
|
763 |
+
$REPLACE_memories
|
764 |
+
$REPLACE_worse
|
765 |
+
$REPLACE_interested
|
766 |
+
$REPLACE_hear
|
767 |
+
$REPLACE_Secondly
|
768 |
+
$REPLACE_thoughts
|
769 |
+
$REPLACE_journal
|
770 |
+
$REPLACE_bought
|
771 |
+
$REPLACE_useful
|
772 |
+
$REPLACE_teach
|
773 |
+
$APPEND_learn
|
774 |
+
$REPLACE_throughout
|
775 |
+
$REPLACE_money
|
776 |
+
$REPLACE_change
|
777 |
+
$REPLACE_imagine
|
778 |
+
$REPLACE_late
|
779 |
+
$REPLACE_mine
|
780 |
+
$REPLACE_same
|
781 |
+
$REPLACE_future
|
782 |
+
$REPLACE_sure
|
783 |
+
$REPLACE_students
|
784 |
+
$REPLACE_along
|
785 |
+
$REPLACE_exercise
|
786 |
+
$REPLACE_opinion
|
787 |
+
$REPLACE_return
|
788 |
+
$REPLACE_cause
|
789 |
+
$REPLACE_month
|
790 |
+
$REPLACE_stop
|
791 |
+
$REPLACE_worried
|
792 |
+
$REPLACE_trying
|
793 |
+
$REPLACE_health
|
794 |
+
$REPLACE_American
|
795 |
+
$APPEND_writing
|
796 |
+
$REPLACE_enjoyed
|
797 |
+
$REPLACE_second
|
798 |
+
$APPEND_look
|
799 |
+
$APPEND_old
|
800 |
+
$REPLACE_finally
|
801 |
+
$REPLACE_wish
|
802 |
+
$REPLACE_famous
|
803 |
+
$REPLACE_talking
|
804 |
+
$REPLACE_abroad
|
805 |
+
$REPLACE_information
|
806 |
+
$APPEND_And
|
807 |
+
$REPLACE_stopped
|
808 |
+
$REPLACE_lose
|
809 |
+
$REPLACE_sentence
|
810 |
+
$REPLACE_pronunciation
|
811 |
+
$REPLACE_feeling
|
812 |
+
$REPLACE_younger
|
813 |
+
$REPLACE_passed
|
814 |
+
$REPLACE_among
|
815 |
+
$REPLACE_paid
|
816 |
+
$REPLACE_playing
|
817 |
+
$REPLACE_attend
|
818 |
+
$REPLACE_early
|
819 |
+
$REPLACE_All
|
820 |
+
$REPLACE_Maybe
|
821 |
+
$APPEND_high
|
822 |
+
$REPLACE_child
|
823 |
+
$APPEND_anything
|
824 |
+
$REPLACE_order
|
825 |
+
$REPLACE_saying
|
826 |
+
$REPLACE_families
|
827 |
+
$REPLACE_special
|
828 |
+
$REPLACE_spent
|
829 |
+
$REPLACE_appreciate
|
830 |
+
$REPLACE_successful
|
831 |
+
$APPEND_If
|
832 |
+
$REPLACE_turned
|
833 |
+
$REPLACE_cities
|
834 |
+
$REPLACE_definitely
|
835 |
+
$REPLACE_fell
|
836 |
+
$APPEND_try
|
837 |
+
$APPEND_skills
|
838 |
+
$REPLACE_world
|
839 |
+
$REPLACE_technology
|
840 |
+
$REPLACE_small
|
841 |
+
$REPLACE_wrote
|
842 |
+
$REPLACE_takes
|
843 |
+
$REPLACE_seems
|
844 |
+
$REPLACE_various
|
845 |
+
$REPLACE_actually
|
846 |
+
$REPLACE_exam
|
847 |
+
$REPLACE_free
|
848 |
+
$REPLACE_gone
|
849 |
+
$REPLACE_strong
|
850 |
+
$REPLACE_receive
|
851 |
+
$REPLACE_Yesterday
|
852 |
+
$REPLACE_probably
|
853 |
+
$REPLACE_Every
|
854 |
+
$REPLACE_normal
|
855 |
+
$REPLACE_ask
|
856 |
+
$REPLACE_company
|
857 |
+
$REPLACE_environment
|
858 |
+
$REPLACE_buy
|
859 |
+
$REPLACE_shows
|
860 |
+
$REPLACE_easy
|
861 |
+
$REPLACE_sincerely
|
862 |
+
$REPLACE_vacation
|
863 |
+
$REPLACE_far
|
864 |
+
$REPLACE_sauce
|
865 |
+
$REPLACE_teacher
|
866 |
+
$REPLACE_living
|
867 |
+
$REPLACE_parties
|
868 |
+
$REPLACE_goes
|
869 |
+
$REPLACE_Christmas
|
870 |
+
$REPLACE_Hello
|
871 |
+
$APPEND_start
|
872 |
+
$REPLACE_hurt
|
873 |
+
$REPLACE_wonder
|
874 |
+
$REPLACE_mind
|
875 |
+
$REPLACE_possible
|
876 |
+
$REPLACE_thinking
|
877 |
+
$APPEND_Now
|
878 |
+
$REPLACE_relationship
|
879 |
+
$REPLACE_plan
|
880 |
+
$REPLACE_man
|
881 |
+
$REPLACE_woman
|
882 |
+
$REPLACE_activities
|
883 |
+
$APPEND_down
|
884 |
+
$REPLACE_returned
|
885 |
+
$REPLACE_pay
|
886 |
+
$REPLACE_ability
|
887 |
+
$REPLACE_exciting
|
888 |
+
$REPLACE_safe
|
889 |
+
$APPEND_off
|
890 |
+
$APPEND_until
|
891 |
+
$REPLACE_goal
|
892 |
+
$APPEND_either
|
893 |
+
$REPLACE_mistakes
|
894 |
+
$REPLACE_within
|
895 |
+
$REPLACE_etc
|
896 |
+
$REPLACE_cost
|
897 |
+
$REPLACE_particular
|
898 |
+
$REPLACE_sense
|
899 |
+
$REPLACE_longer
|
900 |
+
$REPLACE_advice
|
901 |
+
$REPLACE_several
|
902 |
+
$APPEND_Do
|
903 |
+
$APPEND_currently
|
904 |
+
$REPLACE_answer
|
905 |
+
$REPLACE_Even
|
906 |
+
$REPLACE_held
|
907 |
+
$REPLACE_online
|
908 |
+
$APPEND_life
|
909 |
+
$REPLACE_Firstly
|
910 |
+
$REPLACE_close
|
911 |
+
$APPEND_study
|
912 |
+
$REPLACE_wear
|
913 |
+
$APPEND_language
|
914 |
+
$REPLACE_number
|
915 |
+
$REPLACE_young
|
916 |
+
$APPEND_whole
|
917 |
+
$APPEND_two
|
918 |
+
$APPEND_Then
|
919 |
+
$REPLACE_large
|
920 |
+
$REPLACE_eating
|
921 |
+
$REPLACE_given
|
922 |
+
$REPLACE_video
|
923 |
+
$REPLACE_spoke
|
924 |
+
$REPLACE_Another
|
925 |
+
$APPEND_rather
|
926 |
+
$APPEND_Of
|
927 |
+
$APPEND_once
|
928 |
+
$REPLACE_wonderful
|
929 |
+
$APPEND_must
|
930 |
+
$REPLACE_tonight
|
931 |
+
$REPLACE_Their
|
932 |
+
$REPLACE_languages
|
933 |
+
$REPLACE_big
|
934 |
+
$REPLACE_break
|
935 |
+
$REPLACE_government
|
936 |
+
$REPLACE_staff
|
937 |
+
$REPLACE_prepare
|
938 |
+
$REPLACE_quit
|
939 |
+
$REPLACE_completely
|
940 |
+
$REPLACE_ourselves
|
941 |
+
$APPEND_He
|
942 |
+
$REPLACE_nor
|
943 |
+
$APPEND_someone
|
944 |
+
$REPLACE_sad
|
945 |
+
$REPLACE_against
|
946 |
+
$REPLACE_anymore
|
947 |
+
$APPEND_give
|
948 |
+
$REPLACE_stayed
|
949 |
+
$REPLACE_achieve
|
950 |
+
$APPEND_An
|
951 |
+
$APPEND_Right
|
952 |
+
$REPLACE_maybe
|
953 |
+
$REPLACE_lost
|
954 |
+
$APPEND_came
|
955 |
+
$REPLACE_accommodation
|
956 |
+
$APPEND_One
|
957 |
+
$APPEND_"
|
958 |
+
$REPLACE_daughter
|
959 |
+
$APPEND_next
|
960 |
+
$REPLACE_love
|
961 |
+
$REPLACE_cheap
|
962 |
+
$REPLACE_poor
|
963 |
+
$REPLACE_By
|
964 |
+
$REPLACE_whole
|
965 |
+
$REPLACE_bring
|
966 |
+
$REPLACE_real
|
967 |
+
$REPLACE_worked
|
968 |
+
$REPLACE_almost
|
969 |
+
$REPLACE_prefer
|
970 |
+
$APPEND_find
|
971 |
+
$REPLACE_everybody
|
972 |
+
$APPEND_another
|
973 |
+
$REPLACE_create
|
974 |
+
$REPLACE_addition
|
975 |
+
$REPLACE_turn
|
976 |
+
$REPLACE_situation
|
977 |
+
$APPEND_due
|
978 |
+
$REPLACE_boyfriend
|
979 |
+
$APPEND_home
|
980 |
+
$REPLACE_finish
|
981 |
+
$REPLACE_rather
|
982 |
+
$APPEND_said
|
983 |
+
$APPEND_'re
|
984 |
+
$REPLACE_careful
|
985 |
+
$APPEND_long
|
986 |
+
$REPLACE_recommended
|
987 |
+
$REPLACE_customers
|
988 |
+
$REPLACE_level
|
989 |
+
$REPLACE_died
|
990 |
+
$REPLACE_comes
|
991 |
+
$APPEND_You
|
992 |
+
$REPLACE_glad
|
993 |
+
$APPEND_come
|
994 |
+
$REPLACE_short
|
995 |
+
$REPLACE_knowledge
|
996 |
+
$REPLACE_set
|
997 |
+
$REPLACE_Lang
|
998 |
+
$REPLACE_planning
|
999 |
+
$REPLACE_confidence
|
1000 |
+
$REPLACE_gain
|
1001 |
+
$REPLACE_check
|
1002 |
+
$REPLACE_immediately
|
1003 |
+
$REPLACE_scared
|
1004 |
+
$REPLACE_conversation
|
1005 |
+
$REPLACE_native
|
1006 |
+
$REPLACE_His
|
1007 |
+
$REPLACE_full
|
1008 |
+
$REPLACE_express
|
1009 |
+
$REPLACE_married
|
1010 |
+
$REPLACE_shopping
|
1011 |
+
$APPEND_working
|
1012 |
+
$APPEND_food
|
1013 |
+
$REPLACE_research
|
1014 |
+
$REPLACE_whenever
|
1015 |
+
$REPLACE_corrections
|
1016 |
+
$REPLACE_weird
|
1017 |
+
$APPEND_quite
|
1018 |
+
$REPLACE_necessary
|
1019 |
+
$REPLACE_Korea
|
1020 |
+
$REPLACE_whose
|
1021 |
+
$REPLACE_higher
|
1022 |
+
$APPEND_entries
|
1023 |
+
$REPLACE_Starting
|
1024 |
+
$REPLACE_attended
|
1025 |
+
$APPEND_5
|
1026 |
+
$APPEND_past
|
1027 |
+
$REPLACE_realize
|
1028 |
+
$REPLACE_cold
|
1029 |
+
$APPEND_playing
|
1030 |
+
$REPLACE_ended
|
1031 |
+
$REPLACE_inside
|
1032 |
+
$APPEND_please
|
1033 |
+
$REPLACE_coffee
|
1034 |
+
$REPLACE_enjoyable
|
1035 |
+
$APPEND_took
|
1036 |
+
$REPLACE_economic
|
1037 |
+
$APPEND_member
|
1038 |
+
$REPLACE_natural
|
1039 |
+
$REPLACE_registered
|
1040 |
+
$REPLACE_idea
|
1041 |
+
$APPEND_Recently
|
1042 |
+
$APPEND_<
|
1043 |
+
$REPLACE_caused
|
1044 |
+
$REPLACE_student
|
1045 |
+
$REPLACE_questions
|
1046 |
+
$REPLACE_music
|
1047 |
+
$REPLACE_story
|
1048 |
+
$REPLACE_happiness
|
1049 |
+
$REPLACE_gives
|
1050 |
+
$APPEND_She
|
1051 |
+
$REPLACE_Especially
|
1052 |
+
$REPLACE_energy
|
1053 |
+
$REPLACE_available
|
1054 |
+
$REPLACE_anywhere
|
1055 |
+
$REPLACE_taken
|
1056 |
+
$REPLACE_four
|
1057 |
+
$REPLACE_sleep
|
1058 |
+
$REPLACE_afraid
|
1059 |
+
$REPLACE_Everyone
|
1060 |
+
$APPEND_learning
|
1061 |
+
$REPLACE_ate
|
1062 |
+
$APPEND_few
|
1063 |
+
$APPEND_Since
|
1064 |
+
$REPLACE_helps
|
1065 |
+
$REPLACE_vegetables
|
1066 |
+
$REPLACE_kept
|
1067 |
+
$REPLACE_gets
|
1068 |
+
$REPLACE_explain
|
1069 |
+
$REPLACE_girlfriend
|
1070 |
+
$REPLACE_choice
|
1071 |
+
$REPLACE_waiting
|
1072 |
+
$APPEND_put
|
1073 |
+
$APPEND_yesterday
|
1074 |
+
$APPEND_During
|
1075 |
+
$REPLACE_From
|
1076 |
+
$APPEND_starting
|
1077 |
+
$REPLACE_scary
|
1078 |
+
$REPLACE_program
|
1079 |
+
$REPLACE_fish
|
1080 |
+
$REPLACE_hand
|
1081 |
+
$REPLACE_enter
|
1082 |
+
$APPEND_friends
|
1083 |
+
$REPLACE_decide
|
1084 |
+
$REPLACE_score
|
1085 |
+
$REPLACE_lonely
|
1086 |
+
$APPEND_easily
|
1087 |
+
$REPLACE_discovered
|
1088 |
+
$REPLACE_seeing
|
1089 |
+
$REPLACE_message
|
1090 |
+
$REPLACE_week
|
1091 |
+
$APPEND_studying
|
1092 |
+
$REPLACE_universities
|
1093 |
+
$REPLACE_introduce
|
1094 |
+
$REPLACE_common
|
1095 |
+
$REPLACE_heavily
|
1096 |
+
$REPLACE_People
|
1097 |
+
$REPLACE_care
|
1098 |
+
$APPEND_hard
|
1099 |
+
$REPLACE_hit
|
1100 |
+
$REPLACE_America
|
1101 |
+
$REPLACE_point
|
1102 |
+
$APPEND_need
|
1103 |
+
$REPLACE_funny
|
1104 |
+
$APPEND_almost
|
1105 |
+
$REPLACE_pass
|
1106 |
+
$REPLACE_temperature
|
1107 |
+
$REPLACE_performance
|
1108 |
+
$REPLACE_call
|
1109 |
+
$REPLACE_extremely
|
1110 |
+
$REPLACE_chance
|
1111 |
+
$REPLACE_main
|
1112 |
+
$REPLACE_season
|
1113 |
+
$REPLACE_series
|
1114 |
+
$REPLACE_nearby
|
1115 |
+
$REPLACE_license
|
1116 |
+
$REPLACE_expected
|
1117 |
+
$REPLACE_Last
|
1118 |
+
$REPLACE_picture
|
1119 |
+
$REPLACE_movies
|
1120 |
+
$APPEND_Also
|
1121 |
+
$REPLACE_seriously
|
1122 |
+
$REPLACE_via
|
1123 |
+
$REPLACE_running
|
1124 |
+
$REPLACE_run
|
1125 |
+
$REPLACE_regarding
|
1126 |
+
$REPLACE_chose
|
1127 |
+
$REPLACE_moment
|
1128 |
+
$APPEND_feeling
|
1129 |
+
$APPEND_bit
|
1130 |
+
$REPLACE_occurred
|
1131 |
+
$REPLACE_travelling
|
1132 |
+
$REPLACE_brought
|
1133 |
+
$APPEND_makes
|
1134 |
+
$REPLACE_amount
|
1135 |
+
$REPLACE_speakers
|
1136 |
+
$REPLACE_scenery
|
1137 |
+
$APPEND_year
|
1138 |
+
$APPEND_quickly
|
1139 |
+
$REPLACE_grateful
|
1140 |
+
$REPLACE_character
|
1141 |
+
$REPLACE_sleepy
|
1142 |
+
$REPLACE_bed
|
1143 |
+
$REPLACE_increase
|
1144 |
+
$APPEND_Good
|
1145 |
+
$REPLACE_area
|
1146 |
+
$REPLACE_certain
|
1147 |
+
$REPLACE_ways
|
1148 |
+
$REPLACE_looks
|
1149 |
+
$REPLACE_Nowadays
|
1150 |
+
$REPLACE_lucky
|
1151 |
+
$REPLACE_current
|
1152 |
+
$REPLACE_traditional
|
1153 |
+
$APPEND_write
|
1154 |
+
$APPEND_anymore
|
1155 |
+
$REPLACE_noticed
|
1156 |
+
$REPLACE_Did
|
1157 |
+
$REPLACE_matter
|
1158 |
+
$REPLACE_worry
|
1159 |
+
$REPLACE_angry
|
1160 |
+
$REPLACE_With
|
1161 |
+
$REPLACE_biggest
|
1162 |
+
$REPLACE_alcohol
|
1163 |
+
$APPEND_left
|
1164 |
+
$REPLACE_move
|
1165 |
+
$REPLACE_succeed
|
1166 |
+
$REPLACE_post
|
1167 |
+
$REPLACE_]
|
1168 |
+
$REPLACE_abilities
|
1169 |
+
$REPLACE_earthquake
|
1170 |
+
$REPLACE_visited
|
1171 |
+
$APPEND_]
|
1172 |
+
$REPLACE_speech
|
1173 |
+
$REPLACE_Thank
|
1174 |
+
$REPLACE_fewer
|
1175 |
+
$REPLACE_happen
|
1176 |
+
$APPEND_tomorrow
|
1177 |
+
$REPLACE_dinner
|
1178 |
+
$REPLACE_quiet
|
1179 |
+
$APPEND_type
|
1180 |
+
$REPLACE_previous
|
1181 |
+
$REPLACE_Furthermore
|
1182 |
+
$REPLACE_colleagues
|
1183 |
+
$REPLACE_present
|
1184 |
+
$REPLACE_No
|
1185 |
+
$REPLACE_chicken
|
1186 |
+
$REPLACE_city
|
1187 |
+
$REPLACE_weeks
|
1188 |
+
$REPLACE_develop
|
1189 |
+
$REPLACE_join
|
1190 |
+
$APPEND_Last
|
1191 |
+
$REPLACE_except
|
1192 |
+
$REPLACE_economy
|
1193 |
+
$REPLACE_sang
|
1194 |
+
$REPLACE_phrase
|
1195 |
+
$REPLACE_provide
|
1196 |
+
$REPLACE_lately
|
1197 |
+
$REPLACE_experienced
|
1198 |
+
$REPLACE_won
|
1199 |
+
$REPLACE_Though
|
1200 |
+
$APPEND_Therefore
|
1201 |
+
$APPEND_piece
|
1202 |
+
$REPLACE_including
|
1203 |
+
$REPLACE_husband
|
1204 |
+
$REPLACE_changed
|
1205 |
+
$REPLACE_view
|
1206 |
+
$REPLACE_becomes
|
1207 |
+
$REPLACE_share
|
1208 |
+
$APPEND_place
|
1209 |
+
$REPLACE_test
|
1210 |
+
$APPEND_4
|
1211 |
+
$APPEND_years
|
1212 |
+
$REPLACE_Our
|
1213 |
+
$REPLACE_wrong
|
1214 |
+
$REPLACE_seemed
|
1215 |
+
$REPLACE_wondering
|
1216 |
+
$REPLACE_computer
|
1217 |
+
$REPLACE_known
|
1218 |
+
$REPLACE_culture
|
1219 |
+
$REPLACE_Hong
|
1220 |
+
$REPLACE_clear
|
1221 |
+
$REPLACE_birthday
|
1222 |
+
$REPLACE_despite
|
1223 |
+
$REPLACE_front
|
1224 |
+
$REPLACE_sound
|
1225 |
+
$REPLACE_thankful
|
1226 |
+
$REPLACE_practise
|
1227 |
+
$REPLACE_Will
|
1228 |
+
$REPLACE_atmosphere
|
1229 |
+
$REPLACE_activity
|
1230 |
+
$APPEND_movie
|
1231 |
+
$REPLACE_China
|
1232 |
+
$REPLACE_reasons
|
1233 |
+
$REPLACE_name
|
1234 |
+
$REPLACE_serious
|
1235 |
+
$REPLACE_2
|
1236 |
+
$REPLACE_warm
|
1237 |
+
$REPLACE_depressed
|
1238 |
+
$REPLACE_simple
|
1239 |
+
$APPEND_trying
|
1240 |
+
$REPLACE_alone
|
1241 |
+
$APPEND_`
|
1242 |
+
$REPLACE_listen
|
1243 |
+
$REPLACE__
|
1244 |
+
$REPLACE_faithfully
|
1245 |
+
$REPLACE_Which
|
1246 |
+
$REPLACE_relieved
|
1247 |
+
$APPEND_1
|
1248 |
+
$REPLACE_price
|
1249 |
+
$REPLACE_store
|
1250 |
+
$REPLACE_lower
|
1251 |
+
$REPLACE_strange
|
1252 |
+
$REPLACE_game
|
1253 |
+
$REPLACE_sick
|
1254 |
+
$REPLACE_focus
|
1255 |
+
$REPLACE_suddenly
|
1256 |
+
$APPEND_Please
|
1257 |
+
$REPLACE_Would
|
1258 |
+
$REPLACE_traveled
|
1259 |
+
$REPLACE_event
|
1260 |
+
$REPLACE_ones
|
1261 |
+
$APPEND_Yesterday
|
1262 |
+
$APPEND_making
|
1263 |
+
$REPLACE_remembered
|
1264 |
+
$REPLACE_s
|
1265 |
+
$REPLACE_Lately
|
1266 |
+
$APPEND_S
|
1267 |
+
$REPLACE_member
|
1268 |
+
$APPEND_decided
|
1269 |
+
$REPLACE_across
|
1270 |
+
$REPLACE_entered
|
1271 |
+
$APPEND_maybe
|
1272 |
+
$REPLACE_University
|
1273 |
+
$REPLACE_difficulties
|
1274 |
+
$REPLACE_terrible
|
1275 |
+
$REPLACE_places
|
1276 |
+
$REPLACE_pretty
|
1277 |
+
$REPLACE_weekend
|
1278 |
+
$REPLACE_decision
|
1279 |
+
$APPEND_later
|
1280 |
+
$REPLACE_anybody
|
1281 |
+
$REPLACE_result
|
1282 |
+
$REPLACE_buses
|
1283 |
+
$REPLACE_Fortunately
|
1284 |
+
$APPEND_suddenly
|
1285 |
+
$REPLACE_slept
|
1286 |
+
$APPEND_school
|
1287 |
+
$REPLACE_group
|
1288 |
+
$REPLACE_electricity
|
1289 |
+
$REPLACE_fan
|
1290 |
+
$REPLACE_supposed
|
1291 |
+
$REPLACE_recent
|
1292 |
+
$REPLACE_wants
|
1293 |
+
$APPEND_10
|
1294 |
+
$REPLACE_low
|
1295 |
+
$APPEND_continue
|
1296 |
+
$APPEND_keep
|
1297 |
+
$APPEND_words
|
1298 |
+
$APPEND_Sometimes
|
1299 |
+
$REPLACE_type
|
1300 |
+
$REPLACE_Tomorrow
|
1301 |
+
$REPLACE_okay
|
1302 |
+
$APPEND_class
|
1303 |
+
$REPLACE_Her
|
1304 |
+
$APPEND_everything
|
1305 |
+
$APPEND_university
|
1306 |
+
$REPLACE_behind
|
1307 |
+
$REPLACE_clean
|
1308 |
+
$REPLACE_anxious
|
1309 |
+
$REPLACE_follow
|
1310 |
+
$APPEND_amount
|
1311 |
+
$REPLACE_parents
|
1312 |
+
$APPEND_While
|
1313 |
+
$REPLACE_email
|
1314 |
+
$REPLACE_mean
|
1315 |
+
$REPLACE_Most
|
1316 |
+
$APPEND_watching
|
1317 |
+
$REPLACE_taste
|
1318 |
+
$APPEND_taking
|
1319 |
+
$REPLACE_Sometimes
|
1320 |
+
$REPLACE_French
|
1321 |
+
$REPLACE_wearing
|
1322 |
+
$APPEND_weather
|
1323 |
+
$REPLACE_law
|
1324 |
+
$REPLACE_difficulty
|
1325 |
+
$APPEND_job
|
1326 |
+
$REPLACE_training
|
1327 |
+
$REPLACE_crowded
|
1328 |
+
$APPEND_All
|
1329 |
+
$REPLACE_gotten
|
1330 |
+
$REPLACE_catch
|
1331 |
+
$REPLACE_method
|
1332 |
+
$REPLACE_public
|
1333 |
+
$REPLACE_classes
|
1334 |
+
$REPLACE_seem
|
1335 |
+
$APPEND_show
|
1336 |
+
$REPLACE_question
|
1337 |
+
$REPLACE_development
|
1338 |
+
$REPLACE_says
|
1339 |
+
$REPLACE_faster
|
1340 |
+
$REPLACE_mother
|
1341 |
+
$REPLACE_guitar
|
1342 |
+
$REPLACE_teeth
|
1343 |
+
$REPLACE_song
|
1344 |
+
$REPLACE_lesson
|
1345 |
+
$REPLACE_knew
|
1346 |
+
$REPLACE_sent
|
1347 |
+
$REPLACE_unable
|
1348 |
+
$REPLACE_alot
|
1349 |
+
$REPLACE_Those
|
1350 |
+
$REPLACE_concert
|
1351 |
+
$APPEND_speak
|
1352 |
+
$REPLACE_software
|
1353 |
+
$REPLACE_German
|
1354 |
+
$REPLACE_Currently
|
1355 |
+
$REPLACE_yourself
|
1356 |
+
$REPLACE_fact
|
1357 |
+
$REPLACE_major
|
1358 |
+
$REPLACE_snowboarding
|
1359 |
+
$REPLACE_apartment
|
1360 |
+
$REPLACE_none
|
1361 |
+
$REPLACE_Here
|
1362 |
+
$REPLACE_reply
|
1363 |
+
$REPLACE_lived
|
1364 |
+
$APPEND_site
|
1365 |
+
$REPLACE_introduction
|
1366 |
+
$REPLACE_exchange
|
1367 |
+
$APPEND_level
|
1368 |
+
$REPLACE_iPhone
|
1369 |
+
$REPLACE_consider
|
1370 |
+
$REPLACE_leaves
|
1371 |
+
$APPEND_early
|
1372 |
+
$REPLACE_requires
|
1373 |
+
$REPLACE_Saturday
|
1374 |
+
$TRANSFORM_CASE_CAPITAL_1
|
1375 |
+
$REPLACE_further
|
1376 |
+
$REPLACE_absolutely
|
1377 |
+
$REPLACE_realised
|
1378 |
+
$APPEND_heard
|
1379 |
+
$REPLACE_following
|
1380 |
+
$REPLACE_doctor
|
1381 |
+
$REPLACE_beginner
|
1382 |
+
$APPEND_against
|
1383 |
+
$REPLACE_embarrassed
|
1384 |
+
$REPLACE_correctly
|
1385 |
+
$REPLACE_half
|
1386 |
+
$REPLACE_dangerous
|
1387 |
+
$REPLACE_moved
|
1388 |
+
$REPLACE_complete
|
1389 |
+
$REPLACE_perfect
|
1390 |
+
$REPLACE_Anyway
|
1391 |
+
$REPLACE_hold
|
1392 |
+
$REPLACE_differences
|
1393 |
+
$REPLACE_lunch
|
1394 |
+
$REPLACE_himself
|
1395 |
+
$REPLACE_based
|
1396 |
+
$APPEND_thought
|
1397 |
+
$REPLACE_reach
|
1398 |
+
$REPLACE_cheaper
|
1399 |
+
$REPLACE_loud
|
1400 |
+
$APPEND_By
|
1401 |
+
$APPEND_everyone
|
1402 |
+
$REPLACE_leaving
|
1403 |
+
$REPLACE_released
|
1404 |
+
$REPLACE_fine
|
1405 |
+
$REPLACE_Australia
|
1406 |
+
$REPLACE_style
|
1407 |
+
$REPLACE_deal
|
1408 |
+
$APPEND_along
|
1409 |
+
$REPLACE_satisfied
|
1410 |
+
$REPLACE_Of
|
1411 |
+
$REPLACE_variety
|
1412 |
+
$APPEND_improve
|
1413 |
+
$REPLACE_under
|
1414 |
+
$REPLACE_giving
|
1415 |
+
$REPLACE_party
|
1416 |
+
$APPEND_understand
|
1417 |
+
$REPLACE_everywhere
|
1418 |
+
$REPLACE_confident
|
1419 |
+
$APPEND_play
|
1420 |
+
$REPLACE_slow
|
1421 |
+
$REPLACE_centre
|
1422 |
+
$REPLACE_light
|
1423 |
+
$REPLACE_trouble
|
1424 |
+
$REPLACE_Its
|
1425 |
+
$APPEND_became
|
1426 |
+
$REPLACE_begin
|
1427 |
+
$REPLACE_grade
|
1428 |
+
$REPLACE_exams
|
1429 |
+
$REPLACE_busy
|
1430 |
+
$REPLACE_nbsp
|
1431 |
+
$REPLACE_3
|
1432 |
+
$REPLACE_control
|
1433 |
+
$REPLACE_characters
|
1434 |
+
$REPLACE_needs
|
1435 |
+
$REPLACE_pictures
|
1436 |
+
$APPEND_New
|
1437 |
+
$APPEND_test
|
1438 |
+
$REPLACE_currently
|
1439 |
+
$REPLACE_describe
|
1440 |
+
$REPLACE_uncomfortable
|
1441 |
+
$REPLACE_affected
|
1442 |
+
$REPLACE_songs
|
1443 |
+
$REPLACE_helped
|
1444 |
+
$REPLACE_head
|
1445 |
+
$APPEND_let
|
1446 |
+
$REPLACE_costs
|
1447 |
+
$REPLACE_five
|
1448 |
+
$REPLACE_slowly
|
1449 |
+
$REPLACE_1
|
1450 |
+
$REPLACE_causes
|
1451 |
+
$REPLACE_ashamed
|
1452 |
+
$APPEND_coming
|
1453 |
+
$APPEND_everyday
|
1454 |
+
$REPLACE_products
|
1455 |
+
$REPLACE_dishes
|
1456 |
+
$REPLACE_least
|
1457 |
+
$REPLACE_wore
|
1458 |
+
$REPLACE_internet
|
1459 |
+
$REPLACE_mentioned
|
1460 |
+
$APPEND_began
|
1461 |
+
$REPLACE_word
|
1462 |
+
$REPLACE_service
|
1463 |
+
$REPLACE_workers
|
1464 |
+
$REPLACE_continued
|
1465 |
+
$REPLACE_sounds
|
1466 |
+
$REPLACE_hour
|
1467 |
+
$REPLACE_jobs
|
1468 |
+
$REPLACE_career
|
1469 |
+
$REPLACE_personal
|
1470 |
+
$REPLACE_piece
|
1471 |
+
$REPLACE_per
|
1472 |
+
$REPLACE_Regarding
|
1473 |
+
$REPLACE_entrance
|
1474 |
+
$REPLACE_improving
|
1475 |
+
$APPEND_=
|
1476 |
+
$REPLACE_areas
|
1477 |
+
$REPLACE_1st
|
1478 |
+
$REPLACE_mostly
|
1479 |
+
$REPLACE_lessons
|
1480 |
+
$REPLACE_drink
|
1481 |
+
$REPLACE_hair
|
1482 |
+
$APPEND_exactly
|
1483 |
+
$REPLACE_e
|
1484 |
+
$REPLACE_luck
|
1485 |
+
$REPLACE_members
|
1486 |
+
$APPEND_means
|
1487 |
+
$REPLACE_mistake
|
1488 |
+
$REPLACE_somewhere
|
1489 |
+
$APPEND_pair
|
1490 |
+
$REPLACE_tomatoes
|
1491 |
+
$APPEND_definitely
|
1492 |
+
$REPLACE_swimming
|
1493 |
+
$REPLACE_perform
|
1494 |
+
$REPLACE_compared
|
1495 |
+
$REPLACE_unfortunately
|
1496 |
+
$REPLACE_however
|
1497 |
+
$REPLACE_twice
|
1498 |
+
$REPLACE_society
|
1499 |
+
$APPEND_20
|
1500 |
+
$REPLACE_preparing
|
1501 |
+
$REPLACE_Two
|
1502 |
+
$APPEND_Japan
|
1503 |
+
$REPLACE_nobody
|
1504 |
+
$REPLACE_environmental
|
1505 |
+
$REPLACE_till
|
1506 |
+
$REPLACE_fall
|
1507 |
+
$REPLACE_spoken
|
1508 |
+
$REPLACE_forget
|
1509 |
+
$REPLACE_form
|
1510 |
+
$APPEND_number
|
1511 |
+
$APPEND_watch
|
1512 |
+
$APPEND_live
|
1513 |
+
$REPLACE_include
|
1514 |
+
$REPLACE_related
|
1515 |
+
$REPLACE_wait
|
1516 |
+
$APPEND_These
|
1517 |
+
$REPLACE_European
|
1518 |
+
$APPEND_tell
|
1519 |
+
$REPLACE_meeting
|
1520 |
+
$REPLACE_evening
|
1521 |
+
$REPLACE_nowadays
|
1522 |
+
$REPLACE_northern
|
1523 |
+
$REPLACE_convenience
|
1524 |
+
$REPLACE_performed
|
1525 |
+
$REPLACE_plans
|
1526 |
+
$REPLACE_competition
|
1527 |
+
$REPLACE_open
|
1528 |
+
$REPLACE_confused
|
1529 |
+
$REPLACE_practicing
|
1530 |
+
$REPLACE_quality
|
1531 |
+
$REPLACE_professional
|
1532 |
+
$REPLACE_maintain
|
1533 |
+
$REPLACE_pain
|
1534 |
+
$REPLACE_familiar
|
1535 |
+
$REPLACE_classical
|
1536 |
+
$REPLACE_shop
|
1537 |
+
$REPLACE_filled
|
1538 |
+
$REPLACE_improved
|
1539 |
+
$REPLACE_meant
|
1540 |
+
$APPEND_listening
|
1541 |
+
$REPLACE_ceremony
|
1542 |
+
$REPLACE_increasing
|
1543 |
+
$REPLACE_drove
|
1544 |
+
$APPEND_completely
|
1545 |
+
$REPLACE_account
|
1546 |
+
$REPLACE_developed
|
1547 |
+
$REPLACE_lack
|
1548 |
+
$REPLACE_purpose
|
1549 |
+
$REPLACE_upon
|
1550 |
+
$REPLACE_tasted
|
1551 |
+
$REPLACE_crazy
|
1552 |
+
$REPLACE_summer
|
1553 |
+
$REPLACE_regret
|
1554 |
+
$REPLACE_born
|
1555 |
+
$REPLACE_rain
|
1556 |
+
$REPLACE_weight
|
1557 |
+
$REPLACE_required
|
1558 |
+
$REPLACE_accept
|
1559 |
+
$REPLACE_cut
|
1560 |
+
$REPLACE_flew
|
1561 |
+
$REPLACE_waste
|
1562 |
+
$APPEND_ca
|
1563 |
+
$APPEND_trip
|
1564 |
+
$REPLACE_Going
|
1565 |
+
$REPLACE_excellent
|
1566 |
+
$REPLACE_created
|
1567 |
+
$REPLACE_reality
|
1568 |
+
$REPLACE_cultural
|
1569 |
+
$REPLACE_save
|
1570 |
+
$REPLACE_programs
|
1571 |
+
$REPLACE_painful
|
1572 |
+
$REPLACE_Many
|
1573 |
+
$REPLACE_dish
|
1574 |
+
$REPLACE_teaching
|
1575 |
+
$REPLACE_Studying
|
1576 |
+
$REPLACE_water
|
1577 |
+
$REPLACE_happens
|
1578 |
+
$REPLACE_pleased
|
1579 |
+
$REPLACE_ordinary
|
1580 |
+
$APPEND_practice
|
1581 |
+
$REPLACE_train
|
1582 |
+
$REPLACE_results
|
1583 |
+
$REPLACE_Italian
|
1584 |
+
$REPLACE_weak
|
1585 |
+
$REPLACE_period
|
1586 |
+
$REPLACE_above
|
1587 |
+
$REPLACE_hot
|
1588 |
+
$REPLACE_Not
|
1589 |
+
$REPLACE_feelings
|
1590 |
+
$REPLACE_mobile
|
1591 |
+
$REPLACE_walk
|
1592 |
+
$APPEND_game
|
1593 |
+
$REPLACE_impressed
|
1594 |
+
$APPEND_same
|
1595 |
+
$REPLACE_Germany
|
1596 |
+
$REPLACE_girl
|
1597 |
+
$REPLACE_closer
|
1598 |
+
$REPLACE_communication
|
1599 |
+
$REPLACE_worst
|
1600 |
+
$APPEND_No
|
1601 |
+
$REPLACE_located
|
1602 |
+
$REPLACE_phone
|
1603 |
+
$REPLACE_sit
|
1604 |
+
$REPLACE_Lastly
|
1605 |
+
$REPLACE_feels
|
1606 |
+
$APPEND_listen
|
1607 |
+
$APPEND_done
|
1608 |
+
$REPLACE_subtitles
|
1609 |
+
$REPLACE_Whenever
|
1610 |
+
$REPLACE_potatoes
|
1611 |
+
$REPLACE_fluent
|
1612 |
+
$REPLACE_amazing
|
1613 |
+
$REPLACE_neither
|
1614 |
+
$APPEND_With
|
1615 |
+
$APPEND_never
|
1616 |
+
$REPLACE_stressed
|
1617 |
+
$REPLACE_prevent
|
1618 |
+
$REPLACE_photos
|
1619 |
+
$APPEND_$
|
1620 |
+
$REPLACE_non
|
1621 |
+
$REPLACE_agree
|
1622 |
+
$REPLACE_Moreover
|
1623 |
+
$REPLACE_restaurants
|
1624 |
+
$REPLACE_types
|
1625 |
+
$REPLACE_office
|
1626 |
+
$REPLACE_studies
|
1627 |
+
$REPLACE_history
|
1628 |
+
$REPLACE_calm
|
1629 |
+
$REPLACE_walked
|
1630 |
+
$REPLACE_modern
|
1631 |
+
$APPEND_three
|
1632 |
+
$REPLACE_clothing
|
1633 |
+
$REPLACE_private
|
1634 |
+
$APPEND_little
|
1635 |
+
$APPEND_outside
|
1636 |
+
$APPEND_OR
|
1637 |
+
$REPLACE_simply
|
1638 |
+
$REPLACE_particularly
|
1639 |
+
$REPLACE_notice
|
1640 |
+
$REPLACE_side
|
1641 |
+
$APPEND_looked
|
1642 |
+
$REPLACE_YouTube
|
1643 |
+
$APPEND_students
|
1644 |
+
$REPLACE_afterwards
|
1645 |
+
$APPEND_reading
|
1646 |
+
$REPLACE_graduate
|
1647 |
+
$REPLACE_library
|
1648 |
+
$REPLACE_gained
|
1649 |
+
$REPLACE_bicycle
|
1650 |
+
$REPLACE_son
|
1651 |
+
$APPEND_compared
|
1652 |
+
$REPLACE_events
|
1653 |
+
$APPEND_Although
|
1654 |
+
$REPLACE_US
|
1655 |
+
$REPLACE_properly
|
1656 |
+
$APPEND_Maybe
|
1657 |
+
$APPEND_Can
|
1658 |
+
$APPEND_best
|
1659 |
+
$REPLACE_wondered
|
1660 |
+
$REPLACE_arrive
|
1661 |
+
$APPEND_say
|
1662 |
+
$REPLACE_considered
|
1663 |
+
$REPLACE_dream
|
1664 |
+
$REPLACE_feet
|
1665 |
+
$REPLACE_broke
|
1666 |
+
$APPEND_From
|
1667 |
+
$REPLACE_southern
|
1668 |
+
$REPLACE_hometown
|
1669 |
+
$APPEND_journal
|
1670 |
+
$REPLACE_Everything
|
1671 |
+
$APPEND_money
|
1672 |
+
$REPLACE_concentrate
|
1673 |
+
$REPLACE_stories
|
1674 |
+
$REPLACE_teachers
|
1675 |
+
$APPEND_happened
|
1676 |
+
$REPLACE_New
|
1677 |
+
$REPLACE_transport
|
1678 |
+
$REPLACE_stronger
|
1679 |
+
$REPLACE_heart
|
1680 |
+
$REPLACE_staying
|
1681 |
+
$REPLACE_honest
|
1682 |
+
$REPLACE_sold
|
1683 |
+
$APPEND_wrong
|
1684 |
+
$APPEND_Or
|
1685 |
+
$REPLACE_relax
|
1686 |
+
$REPLACE_heavy
|
1687 |
+
$REPLACE_*
|
1688 |
+
$REPLACE_speaker
|
1689 |
+
$REPLACE_limited
|
1690 |
+
$APPEND_speaking
|
1691 |
+
$APPEND_e
|
1692 |
+
$REPLACE_countryside
|
1693 |
+
$REPLACE_heat
|
1694 |
+
$REPLACE_prepared
|
1695 |
+
$REPLACE_truth
|
1696 |
+
$REPLACE_books
|
1697 |
+
$REPLACE_drank
|
1698 |
+
$REPLACE_nuclear
|
1699 |
+
$REPLACE_title
|
1700 |
+
$REPLACE_6
|
1701 |
+
$REPLACE_boring
|
1702 |
+
$REPLACE_totally
|
1703 |
+
$REPLACE_practiced
|
1704 |
+
$REPLACE_therefore
|
1705 |
+
$REPLACE_book
|
1706 |
+
$REPLACE_regularly
|
1707 |
+
$REPLACE_safety
|
1708 |
+
$REPLACE_normally
|
1709 |
+
$REPLACE_visiting
|
1710 |
+
$APPEND_kinds
|
1711 |
+
$REPLACE_impressive
|
1712 |
+
$REPLACE_final
|
1713 |
+
$REPLACE_driving
|
1714 |
+
$REPLACE_stuff
|
1715 |
+
$REPLACE_guess
|
1716 |
+
$REPLACE_avoid
|
1717 |
+
$REPLACE_answered
|
1718 |
+
$REPLACE_pleasant
|
1719 |
+
$APPEND_times
|
1720 |
+
$APPEND_without
|
1721 |
+
$REPLACE_focused
|
1722 |
+
$REPLACE_badly
|
1723 |
+
$REPLACE_solve
|
1724 |
+
$REPLACE_grow
|
1725 |
+
$REPLACE_drive
|
1726 |
+
$APPEND_although
|
1727 |
+
$REPLACE_news
|
1728 |
+
$REPLACE_Afterwards
|
1729 |
+
$APPEND_6
|
1730 |
+
$REPLACE_Learning
|
1731 |
+
$REPLACE_Thanks
|
1732 |
+
$REPLACE_flight
|
1733 |
+
$REPLACE_building
|
1734 |
+
$REPLACE_opened
|
1735 |
+
$REPLACE_shocked
|
1736 |
+
$REPLACE_volleyball
|
1737 |
+
$REPLACE_accepted
|
1738 |
+
$APPEND_exam
|
1739 |
+
$REPLACE_team
|
1740 |
+
$REPLACE_system
|
1741 |
+
$APPEND_ones
|
1742 |
+
$REPLACE_goals
|
1743 |
+
$REPLACE_Before
|
1744 |
+
$REPLACE_meat
|
1745 |
+
$APPEND_Does
|
1746 |
+
$REPLACE_schedule
|
1747 |
+
$REPLACE_cream
|
1748 |
+
$REPLACE_listened
|
1749 |
+
$REPLACE_Why
|
1750 |
+
$REPLACE_worth
|
1751 |
+
$APPEND_members
|
1752 |
+
$REPLACE_strength
|
1753 |
+
$REPLACE_works
|
1754 |
+
$APPEND_m
|
1755 |
+
$REPLACE_surprise
|
1756 |
+
$REPLACE_holidays
|
1757 |
+
$REPLACE_7
|
1758 |
+
$APPEND_written
|
1759 |
+
$REPLACE_medicine
|
1760 |
+
$REPLACE_contact
|
1761 |
+
$REPLACE_position
|
1762 |
+
$APPEND_tried
|
1763 |
+
$REPLACE_highly
|
1764 |
+
$REPLACE_missed
|
1765 |
+
$REPLACE_typhoon
|
1766 |
+
$REPLACE_celebrate
|
1767 |
+
$REPLACE_February
|
1768 |
+
$REPLACE_greater
|
1769 |
+
$REPLACE_support
|
1770 |
+
$REPLACE_allow
|
1771 |
+
$REPLACE_appeared
|
1772 |
+
$REPLACE_naturally
|
1773 |
+
$REPLACE_breakfast
|
1774 |
+
$REPLACE_afternoon
|
1775 |
+
$REPLACE_dead
|
1776 |
+
$REPLACE_proud
|
1777 |
+
$REPLACE_stuck
|
1778 |
+
$APPEND_half
|
1779 |
+
$REPLACE_lyrics
|
1780 |
+
$APPEND_based
|
1781 |
+
$REPLACE_sing
|
1782 |
+
$REPLACE_process
|
1783 |
+
$REPLACE_search
|
1784 |
+
$REPLACE_sell
|
1785 |
+
$REPLACE_learnt
|
1786 |
+
$REPLACE_responsibility
|
1787 |
+
$REPLACE_field
|
1788 |
+
$REPLACE_lifestyle
|
1789 |
+
$REPLACE_helpful
|
1790 |
+
$REPLACE_Koreans
|
1791 |
+
$REPLACE_awake
|
1792 |
+
$REPLACE_success
|
1793 |
+
$APPEND_living
|
1794 |
+
$REPLACE_latest
|
1795 |
+
$REPLACE_corrected
|
1796 |
+
$REPLACE_communicating
|
1797 |
+
$REPLACE_raise
|
1798 |
+
$REPLACE_showed
|
1799 |
+
$REPLACE_father
|
1800 |
+
$REPLACE_marriage
|
1801 |
+
$REPLACE_elementary
|
1802 |
+
$REPLACE_allows
|
1803 |
+
$APPEND_lot
|
1804 |
+
$REPLACE_eventually
|
1805 |
+
$REPLACE_customer
|
1806 |
+
$REPLACE_unusual
|
1807 |
+
$REPLACE_advise
|
1808 |
+
$REPLACE_letter
|
1809 |
+
$REPLACE_clearly
|
1810 |
+
$REPLACE_essay
|
1811 |
+
$REPLACE_bigger
|
1812 |
+
$REPLACE_habit
|
1813 |
+
$APPEND_system
|
1814 |
+
$REPLACE_ran
|
1815 |
+
$APPEND_speakers
|
1816 |
+
$REPLACE_bored
|
1817 |
+
$REPLACE_whatever
|
1818 |
+
$REPLACE_fourth
|
1819 |
+
$REPLACE_chosen
|
1820 |
+
$REPLACE_room
|
1821 |
+
$REPLACE_30
|
1822 |
+
$REPLACE_carefully
|
1823 |
+
$REPLACE_loss
|
1824 |
+
$REPLACE_ingredients
|
1825 |
+
$REPLACE_singing
|
1826 |
+
$REPLACE_ride
|
1827 |
+
$REPLACE_build
|
1828 |
+
$REPLACE_cooking
|
1829 |
+
$REPLACE_add
|
1830 |
+
$REPLACE_mom
|
1831 |
+
$REPLACE_sign
|
1832 |
+
$REPLACE_chatting
|
1833 |
+
$REPLACE_happier
|
1834 |
+
$REPLACE_seat
|
1835 |
+
$REPLACE_affect
|
1836 |
+
$REPLACE_appropriate
|
1837 |
+
$REPLACE_named
|
1838 |
+
$APPEND_30
|
1839 |
+
$REPLACE_female
|
1840 |
+
$REPLACE_fashion
|
1841 |
+
$REPLACE_attending
|
1842 |
+
$REPLACE_Tonight
|
1843 |
+
$REPLACE_role
|
1844 |
+
$REPLACE_somebody
|
1845 |
+
$APPEND_Unfortunately
|
1846 |
+
$REPLACE_employees
|
1847 |
+
$REPLACE_face
|
1848 |
+
$REPLACE_middle
|
1849 |
+
$REPLACE_junior
|
1850 |
+
$REPLACE_lovely
|
1851 |
+
$REPLACE_reduce
|
1852 |
+
$REPLACE_positive
|
1853 |
+
$REPLACE_concerned
|
1854 |
+
$REPLACE_overseas
|
1855 |
+
$REPLACE_"
|
1856 |
+
$REPLACE_Second
|
1857 |
+
$APPEND_Our
|
1858 |
+
$APPEND_named
|
1859 |
+
$REPLACE_mountain
|
1860 |
+
$APPEND_eating
|
1861 |
+
$REPLACE_warmer
|
1862 |
+
$REPLACE_death
|
1863 |
+
$REPLACE_electronic
|
1864 |
+
$REPLACE_figure
|
1865 |
+
$REPLACE_frequently
|
1866 |
+
$REPLACE_pair
|
1867 |
+
$REPLACE_Americans
|
1868 |
+
$REPLACE_rest
|
1869 |
+
$REPLACE_TV
|
1870 |
+
$APPEND_themselves
|
1871 |
+
$APPEND_however
|
1872 |
+
$REPLACE_subject
|
1873 |
+
$APPEND_music
|
1874 |
+
$REPLACE_dormitory
|
1875 |
+
$APPEND_forward
|
1876 |
+
$REPLACE_department
|
1877 |
+
$REPLACE_pronounce
|
1878 |
+
$REPLACE_wake
|
1879 |
+
$REPLACE_cook
|
1880 |
+
$APPEND_visit
|
1881 |
+
$REPLACE_raised
|
1882 |
+
$REPLACE_smaller
|
1883 |
+
$REPLACE_stressful
|
1884 |
+
$APPEND_lately
|
1885 |
+
$REPLACE_completed
|
1886 |
+
$REPLACE_photography
|
1887 |
+
$REPLACE_10
|
1888 |
+
$APPEND_saying
|
1889 |
+
$REPLACE_dropped
|
1890 |
+
$REPLACE_laughed
|
1891 |
+
$APPEND_read
|
1892 |
+
$REPLACE_complain
|
1893 |
+
$REPLACE_Usually
|
1894 |
+
$APPEND_felt
|
1895 |
+
$REPLACE_Thus
|
1896 |
+
$REPLACE_foreigner
|
1897 |
+
$REPLACE_theatre
|
1898 |
+
$APPEND_website
|
1899 |
+
$APPEND_days
|
1900 |
+
$REPLACE_slightly
|
1901 |
+
$REPLACE_incorrect
|
1902 |
+
$REPLACE_frustrated
|
1903 |
+
$REPLACE_grandmother
|
1904 |
+
$REPLACE_forty
|
1905 |
+
$REPLACE_signed
|
1906 |
+
$APPEND_book
|
1907 |
+
$REPLACE_sore
|
1908 |
+
$REPLACE_classmates
|
1909 |
+
$REPLACE_equipment
|
1910 |
+
$REPLACE_memory
|
1911 |
+
$REPLACE_ordered
|
1912 |
+
$APPEND_stay
|
1913 |
+
$REPLACE_expect
|
1914 |
+
$REPLACE_drunk
|
1915 |
+
$APPEND_gave
|
1916 |
+
$REPLACE_midnight
|
1917 |
+
$APPEND_seem
|
1918 |
+
$APPEND_cut
|
1919 |
+
$REPLACE_address
|
1920 |
+
$REPLACE_couple
|
1921 |
+
$REPLACE_Compared
|
1922 |
+
$REPLACE_friendly
|
1923 |
+
$REPLACE_rode
|
1924 |
+
$REPLACE_losing
|
1925 |
+
$REPLACE_nearly
|
1926 |
+
$REPLACE_six
|
1927 |
+
$REPLACE_speeches
|
1928 |
+
$REPLACE_international
|
1929 |
+
$REPLACE_understood
|
1930 |
+
$REPLACE_thank
|
1931 |
+
$REPLACE_rarely
|
1932 |
+
$REPLACE_match
|
1933 |
+
$REPLACE_uploaded
|
1934 |
+
$REPLACE_Luckily
|
1935 |
+
$REPLACE_failed
|
1936 |
+
$REPLACE_hamburger
|
1937 |
+
$REPLACE_sleeping
|
1938 |
+
$REPLACE_tongue
|
1939 |
+
$REPLACE_colleague
|
1940 |
+
$REPLACE_require
|
1941 |
+
$REPLACE_terribly
|
1942 |
+
$REPLACE_case
|
1943 |
+
$APPEND_traditional
|
1944 |
+
$REPLACE_graduation
|
1945 |
+
$REPLACE_offer
|
1946 |
+
$REPLACE_respond
|
1947 |
+
$REPLACE_perfectly
|
1948 |
+
$REPLACE_businesses
|
1949 |
+
$REPLACE_8
|
1950 |
+
$APPEND_s
|
1951 |
+
$REPLACE_understanding
|
1952 |
+
$REPLACE_hungry
|
1953 |
+
$REPLACE_conclusion
|
1954 |
+
$REPLACE_homework
|
1955 |
+
$REPLACE_design
|
1956 |
+
$REPLACE_British
|
1957 |
+
$REPLACE_peaceful
|
1958 |
+
$REPLACE_forgot
|
1959 |
+
$REPLACE_suitable
|
1960 |
+
$REPLACE_soccer
|
1961 |
+
$REPLACE_tells
|
1962 |
+
$REPLACE_third
|
1963 |
+
$REPLACE_exactly
|
1964 |
+
$REPLACE_term
|
1965 |
+
$REPLACE_drinking
|
1966 |
+
$REPLACE_searching
|
1967 |
+
$REPLACE_hung
|
1968 |
+
$REPLACE_air
|
1969 |
+
$REPLACE_strongly
|
1970 |
+
$APPEND_looking
|
1971 |
+
$REPLACE_band
|
1972 |
+
$REPLACE_checked
|
1973 |
+
$REPLACE_send
|
1974 |
+
$REPLACE_Zealand
|
1975 |
+
$REPLACE_draw
|
1976 |
+
$REPLACE_educational
|
1977 |
+
$REPLACE_incident
|
1978 |
+
$APPEND_Some
|
1979 |
+
$APPEND_friend
|
1980 |
+
$APPEND_free
|
1981 |
+
$REPLACE_toward
|
1982 |
+
$REPLACE_interview
|
1983 |
+
$APPEND_>
|
1984 |
+
$REPLACE_tough
|
1985 |
+
$REPLACE_canceled
|
1986 |
+
$REPLACE_memorize
|
1987 |
+
$REPLACE_historical
|
1988 |
+
$REPLACE_slang
|
1989 |
+
$REPLACE_replied
|
1990 |
+
$REPLACE_considering
|
1991 |
+
$REPLACE_skill
|
1992 |
+
$REPLACE_musical
|
1993 |
+
$REPLACE_improvement
|
1994 |
+
$REPLACE_carry
|
1995 |
+
$REPLACE_education
|
1996 |
+
$APPEND_great
|
1997 |
+
$REPLACE_companies
|
1998 |
+
$REPLACE_cool
|
1999 |
+
$APPEND_comes
|
2000 |
+
$REPLACE_employee
|
2001 |
+
$REPLACE_age
|
2002 |
+
$APPEND_Yes
|
2003 |
+
$REPLACE_Could
|
2004 |
+
$REPLACE_relaxed
|
2005 |
+
$REPLACE_greatest
|
2006 |
+
$REPLACE_total
|
2007 |
+
$REPLACE_ready
|
2008 |
+
$REPLACE_guy
|
2009 |
+
$REPLACE_chocolate
|
2010 |
+
$APPEND_tense
|
2011 |
+
$REPLACE_earn
|
2012 |
+
$REPLACE_topic
|
2013 |
+
$REPLACE_beat
|
2014 |
+
$REPLACE_date
|
2015 |
+
$REPLACE_illnesses
|
2016 |
+
$REPLACE_conditioner
|
2017 |
+
$APPEND_inside
|
2018 |
+
$REPLACE_suggested
|
2019 |
+
$REPLACE_drama
|
2020 |
+
$REPLACE_pick
|
2021 |
+
$REPLACE_starts
|
2022 |
+
$REPLACE_manage
|
2023 |
+
$APPEND_anyway
|
2024 |
+
$REPLACE_Thailand
|
2025 |
+
$REPLACE_McDonald
|
2026 |
+
$REPLACE_Writing
|
2027 |
+
$APPEND_Are
|
2028 |
+
$REPLACE_2nd
|
2029 |
+
$APPEND_fall
|
2030 |
+
$REPLACE_flu
|
2031 |
+
$REPLACE_websites
|
2032 |
+
$REPLACE_snowy
|
2033 |
+
$APPEND_diary
|
2034 |
+
$REPLACE_road
|
2035 |
+
$REPLACE_professor
|
2036 |
+
$REPLACE_exhausted
|
2037 |
+
$APPEND_held
|
2038 |
+
$REPLACE_colored
|
2039 |
+
$REPLACE_sitting
|
2040 |
+
$REPLACE_wanna
|
2041 |
+
$REPLACE_according
|
2042 |
+
$REPLACE_lead
|
2043 |
+
$REPLACE_scene
|
2044 |
+
$REPLACE_hardly
|
2045 |
+
$REPLACE_ticket
|
2046 |
+
$REPLACE_remain
|
2047 |
+
$REPLACE_worrying
|
2048 |
+
$REPLACE_patience
|
2049 |
+
$REPLACE_Having
|
2050 |
+
$REPLACE_allowed
|
2051 |
+
$REPLACE_whilst
|
2052 |
+
$REPLACE_entire
|
2053 |
+
$REPLACE_promised
|
2054 |
+
$REPLACE_photo
|
2055 |
+
$REPLACE_motivated
|
2056 |
+
$REPLACE_dairy
|
2057 |
+
$APPEND_full
|
2058 |
+
$REPLACE_points
|
2059 |
+
$REPLACE_Soon
|
2060 |
+
$REPLACE_messages
|
2061 |
+
$APPEND_alone
|
2062 |
+
$REPLACE_alive
|
2063 |
+
$APPEND_Every
|
2064 |
+
$APPEND_entire
|
2065 |
+
$REPLACE_programme
|
2066 |
+
$REPLACE_fully
|
2067 |
+
$REPLACE_cloudy
|
2068 |
+
$REPLACE_occur
|
2069 |
+
$REPLACE_meaning
|
2070 |
+
$APPEND_area
|
2071 |
+
$REPLACE_liked
|
2072 |
+
$REPLACE_sweet
|
2073 |
+
$REPLACE_act
|
2074 |
+
$REPLACE_graduated
|
2075 |
+
$REPLACE_childhood
|
2076 |
+
$APPEND_available
|
2077 |
+
$REPLACE_believed
|
2078 |
+
$REPLACE_newspaper
|
2079 |
+
$REPLACE_enjoying
|
2080 |
+
$REPLACE_riding
|
2081 |
+
$APPEND_Not
|
2082 |
+
$REPLACE_body
|
2083 |
+
$REPLACE_beneficial
|
2084 |
+
$REPLACE_recognize
|
2085 |
+
$APPEND_native
|
2086 |
+
$REPLACE_attention
|
2087 |
+
$REPLACE_Until
|
2088 |
+
$REPLACE_struck
|
2089 |
+
$REPLACE_Just
|
2090 |
+
$REPLACE_correcting
|
2091 |
+
$REPLACE_interest
|
2092 |
+
$REPLACE_changing
|
2093 |
+
$REPLACE_pollution
|
2094 |
+
$APPEND_pieces
|
2095 |
+
$REPLACE_According
|
2096 |
+
$REPLACE_autumn
|
2097 |
+
$APPEND_problem
|
2098 |
+
$REPLACE_gym
|
2099 |
+
$REPLACE_basic
|
2100 |
+
$REPLACE_includes
|
2101 |
+
$REPLACE_games
|
2102 |
+
$APPEND_seeing
|
2103 |
+
$REPLACE_sunny
|
2104 |
+
$REPLACE_5
|
2105 |
+
$APPEND_learned
|
2106 |
+
$REPLACE_stage
|
2107 |
+
$REPLACE_touch
|
2108 |
+
$REPLACE_discuss
|
2109 |
+
$REPLACE_airplane
|
2110 |
+
$REPLACE_Has
|
2111 |
+
$REPLACE_die
|
2112 |
+
$REPLACE_relationships
|
2113 |
+
$REPLACE_effects
|
2114 |
+
$REPLACE_sat
|
2115 |
+
$REPLACE_parts
|
2116 |
+
$REPLACE_tsunami
|
2117 |
+
$REPLACE_response
|
2118 |
+
$REPLACE_teaches
|
2119 |
+
$REPLACE_self
|
2120 |
+
$REPLACE_thanks
|
2121 |
+
$REPLACE_rained
|
2122 |
+
$REPLACE_laundry
|
2123 |
+
$REPLACE_dependent
|
2124 |
+
$APPEND_near
|
2125 |
+
$REPLACE_below
|
2126 |
+
$REPLACE_custom
|
2127 |
+
$REPLACE_inconvenient
|
2128 |
+
$REPLACE_relaxing
|
2129 |
+
$REPLACE_wedding
|
2130 |
+
$REPLACE_challenge
|
2131 |
+
$APPEND_set
|
2132 |
+
$REPLACE_chatted
|
2133 |
+
$APPEND_immediately
|
2134 |
+
$REPLACE_attractive
|
2135 |
+
$REPLACE_translate
|
2136 |
+
$APPEND_Just
|
2137 |
+
$APPEND_TV
|
2138 |
+
$REPLACE_win
|
2139 |
+
$REPLACE_museum
|
2140 |
+
$REPLACE_neighborhood
|
2141 |
+
$REPLACE_Right
|
2142 |
+
$REPLACE_regular
|
2143 |
+
$REPLACE_experiences
|
2144 |
+
$APPEND_word
|
2145 |
+
$APPEND_played
|
2146 |
+
$REPLACE_hobby
|
2147 |
+
$REPLACE_developing
|
2148 |
+
$REPLACE_truly
|
2149 |
+
$APPEND_ended
|
2150 |
+
$REPLACE_issue
|
2151 |
+
$APPEND_correct
|
2152 |
+
$REPLACE_impossible
|
2153 |
+
$REPLACE_concerning
|
2154 |
+
$REPLACE_realise
|
2155 |
+
$REPLACE_brings
|
2156 |
+
$APPEND_room
|
2157 |
+
$REPLACE_advised
|
2158 |
+
$REPLACE_workplace
|
2159 |
+
$REPLACE_surfing
|
2160 |
+
$APPEND_Let
|
2161 |
+
$APPEND_daily
|
2162 |
+
$REPLACE_stomach
|
2163 |
+
$APPEND_night
|
2164 |
+
$REPLACE_meal
|
2165 |
+
$REPLACE_disadvantages
|
2166 |
+
$REPLACE_loudly
|
2167 |
+
$REPLACE_prize
|
2168 |
+
$REPLACE_besides
|
2169 |
+
$APPEND_experience
|
2170 |
+
$REPLACE_Despite
|
2171 |
+
$REPLACE_4
|
2172 |
+
$APPEND_concert
|
2173 |
+
$REPLACE_3rd
|
2174 |
+
$REPLACE_power
|
2175 |
+
$REPLACE_`
|
2176 |
+
$APPEND_lots
|
2177 |
+
$REPLACE_changes
|
2178 |
+
$REPLACE_kindergarten
|
2179 |
+
$REPLACE_sweat
|
2180 |
+
$REPLACE_ten
|
2181 |
+
$APPEND_wo
|
2182 |
+
$REPLACE_overcome
|
2183 |
+
$REPLACE_effective
|
2184 |
+
$REPLACE_terms
|
2185 |
+
$REPLACE_shown
|
2186 |
+
$REPLACE_chat
|
2187 |
+
$APPEND_team
|
2188 |
+
$REPLACE_sorry
|
2189 |
+
$APPEND_7
|
2190 |
+
$REPLACE_station
|
2191 |
+
$APPEND_man
|
2192 |
+
$REPLACE_produce
|
2193 |
+
$REPLACE_technological
|
2194 |
+
$REPLACE_differently
|
2195 |
+
$REPLACE_transferred
|
2196 |
+
$APPEND_told
|
2197 |
+
$APPEND_late
|
2198 |
+
$REPLACE_laugh
|
2199 |
+
$REPLACE_worker
|
2200 |
+
$REPLACE_space
|
2201 |
+
$REPLACE_introduced
|
2202 |
+
$REPLACE_single
|
2203 |
+
$REPLACE_cancelled
|
2204 |
+
$REPLACE_methods
|
2205 |
+
$REPLACE_transportation
|
2206 |
+
$REPLACE_Philippines
|
2207 |
+
$REPLACE_possibility
|
2208 |
+
$REPLACE_tasty
|
2209 |
+
$REPLACE_location
|
2210 |
+
$REPLACE_male
|
2211 |
+
$APPEND_simply
|
2212 |
+
$REPLACE_tastes
|
2213 |
+
$REPLACE_ease
|
2214 |
+
$REPLACE_straight
|
2215 |
+
$REPLACE_uses
|
2216 |
+
$REPLACE_participate
|
2217 |
+
$REPLACE_discover
|
2218 |
+
$APPEND_co
|
2219 |
+
$REPLACE_details
|
2220 |
+
$REPLACE_logged
|
2221 |
+
$REPLACE_bright
|
2222 |
+
$REPLACE_Once
|
2223 |
+
$REPLACE_walking
|
2224 |
+
$APPEND_spent
|
2225 |
+
$MERGE_HYPHEN
|
2226 |
+
$REPLACE_growing
|
2227 |
+
$REPLACE_slight
|
2228 |
+
$APPEND_current
|
2229 |
+
$REPLACE_moving
|
2230 |
+
$REPLACE_spring
|
2231 |
+
$REPLACE_August
|
2232 |
+
$REPLACE_fans
|
2233 |
+
$REPLACE_Well
|
2234 |
+
$APPEND_nervous
|
2235 |
+
$REPLACE_version
|
2236 |
+
$REPLACE_upset
|
2237 |
+
$REPLACE_stress
|
2238 |
+
$REPLACE_appointment
|
2239 |
+
$REPLACE_tasks
|
2240 |
+
$REPLACE_Being
|
2241 |
+
$REPLACE_encouraged
|
2242 |
+
$REPLACE_town
|
2243 |
+
$REPLACE_eight
|
2244 |
+
$REPLACE_mood
|
2245 |
+
$REPLACE_forecast
|
2246 |
+
$APPEND_lessons
|
2247 |
+
$APPEND_finished
|
2248 |
+
$REPLACE_increased
|
2249 |
+
$REPLACE_blossoms
|
2250 |
+
$REPLACE_aware
|
2251 |
+
$REPLACE_Besides
|
2252 |
+
$REPLACE_Taiwanese
|
2253 |
+
$REPLACE_someday
|
2254 |
+
$REPLACE_happening
|
2255 |
+
$REPLACE_volunteer
|
2256 |
+
$REPLACE_fireworks
|
2257 |
+
$REPLACE_ideas
|
2258 |
+
$REPLACE_curious
|
2259 |
+
$REPLACE_responsible
|
2260 |
+
$REPLACE_voice
|
2261 |
+
$REPLACE_covered
|
2262 |
+
$APPEND_ice
|
2263 |
+
$REPLACE_rang
|
2264 |
+
$REPLACE_items
|
2265 |
+
$REPLACE_apart
|
2266 |
+
$APPEND_program
|
2267 |
+
$REPLACE_bye
|
2268 |
+
$REPLACE_Next
|
2269 |
+
$REPLACE_complicated
|
2270 |
+
$REPLACE_Someone
|
2271 |
+
$APPEND_earlier
|
2272 |
+
$APPEND_difficult
|
2273 |
+
$REPLACE_invited
|
2274 |
+
$REPLACE_applied
|
2275 |
+
$APPEND_anyone
|
2276 |
+
$REPLACE_gaining
|
2277 |
+
$REPLACE_cute
|
2278 |
+
$REPLACE_line
|
2279 |
+
$REPLACE_partner
|
2280 |
+
$REPLACE_regretted
|
2281 |
+
$REPLACE_clock
|
2282 |
+
$APPEND_according
|
2283 |
+
$REPLACE_greatly
|
2284 |
+
$REPLACE_appear
|
2285 |
+
$REPLACE_opposite
|
2286 |
+
$REPLACE_Like
|
2287 |
+
$REPLACE_patient
|
2288 |
+
$REPLACE_spread
|
2289 |
+
$REPLACE_dollars
|
2290 |
+
$REPLACE_relieve
|
2291 |
+
$REPLACE_article
|
2292 |
+
$REPLACE_benefits
|
2293 |
+
$APPEND_American
|
2294 |
+
$REPLACE_Looking
|
2295 |
+
$REPLACE_Who
|
2296 |
+
$REPLACE_fix
|
2297 |
+
$REPLACE_human
|
2298 |
+
$REPLACE_technologies
|
2299 |
+
$REPLACE_breathe
|
2300 |
+
$REPLACE_strict
|
2301 |
+
$REPLACE_opinions
|
2302 |
+
$APPEND_possibly
|
2303 |
+
$REPLACE_appearance
|
2304 |
+
$REPLACE_explanation
|
2305 |
+
$REPLACE_herself
|
2306 |
+
$APPEND_student
|
2307 |
+
$REPLACE_plane
|
2308 |
+
$REPLACE_hearing
|
2309 |
+
$REPLACE_personality
|
2310 |
+
$REPLACE_attitude
|
2311 |
+
$REPLACE_journey
|
2312 |
+
$REPLACE_recover
|
2313 |
+
$REPLACE_magazine
|
2314 |
+
$REPLACE_disappeared
|
2315 |
+
$APPEND_taken
|
2316 |
+
$REPLACE_Me
|
2317 |
+
$REPLACE_efficiently
|
2318 |
+
$REPLACE_strawberries
|
2319 |
+
$APPEND_becoming
|
2320 |
+
$REPLACE_October
|
2321 |
+
$REPLACE_social
|
2322 |
+
$REPLACE_suicide
|
2323 |
+
$REPLACE_reached
|
2324 |
+
$REPLACE_damaged
|
2325 |
+
$REPLACE_personalities
|
2326 |
+
$REPLACE_valuable
|
2327 |
+
$REPLACE_height
|
2328 |
+
$REPLACE_Asian
|
2329 |
+
$REPLACE_sight
|
2330 |
+
$REPLACE_issues
|
2331 |
+
$REPLACE_titled
|
2332 |
+
$REPLACE_science
|
2333 |
+
$REPLACE_cell
|
2334 |
+
$REPLACE_amongst
|
2335 |
+
$APPEND_movies
|
2336 |
+
$REPLACE_June
|
2337 |
+
$REPLACE_policies
|
2338 |
+
$REPLACE_silent
|
2339 |
+
$REPLACE_girls
|
2340 |
+
$APPEND_company
|
2341 |
+
$APPEND_second
|
2342 |
+
$APPEND_ability
|
2343 |
+
$APPEND_hope
|
2344 |
+
$REPLACE_former
|
2345 |
+
$APPEND_GOOD
|
2346 |
+
$REPLACE_fashionable
|
2347 |
+
$REPLACE_club
|
2348 |
+
$APPEND_end
|
2349 |
+
$REPLACE_path
|
2350 |
+
$APPEND_+
|
2351 |
+
$REPLACE_top
|
2352 |
+
$APPEND_happy
|
2353 |
+
$REPLACE_lay
|
2354 |
+
$REPLACE_accident
|
2355 |
+
$REPLACE_festival
|
2356 |
+
$REPLACE_Later
|
2357 |
+
$REPLACE_destroyed
|
2358 |
+
$APPEND_plan
|
2359 |
+
$APPEND_famous
|
2360 |
+
$REPLACE_safely
|
2361 |
+
$APPEND_related
|
2362 |
+
$REPLACE_suit
|
2363 |
+
$REPLACE_stand
|
2364 |
+
$REPLACE_contrast
|
2365 |
+
$APPEND_period
|
2366 |
+
$REPLACE_highest
|
2367 |
+
$REPLACE_habits
|
2368 |
+
$APPEND_First
|
2369 |
+
$REPLACE_January
|
2370 |
+
$REPLACE_putting
|
2371 |
+
$REPLACE_grew
|
2372 |
+
$REPLACE_degrees
|
2373 |
+
$REPLACE_latter
|
2374 |
+
$REPLACE_extent
|
2375 |
+
$REPLACE_lang
|
2376 |
+
$REPLACE_episode
|
2377 |
+
$REPLACE_physically
|
2378 |
+
$APPEND_types
|
2379 |
+
$REPLACE_cooked
|
2380 |
+
$REPLACE_original
|
2381 |
+
$REPLACE_fresh
|
2382 |
+
$APPEND_world
|
2383 |
+
$REPLACE_l
|
2384 |
+
$REPLACE_Year
|
2385 |
+
$APPEND_wanted
|
2386 |
+
$REPLACE_Wednesday
|
2387 |
+
$REPLACE_unique
|
2388 |
+
$REPLACE_active
|
2389 |
+
$REPLACE_center
|
2390 |
+
$APPEND_problems
|
2391 |
+
$REPLACE_encourage
|
2392 |
+
$APPEND_8
|
2393 |
+
$REPLACE_individual
|
2394 |
+
$REPLACE_included
|
2395 |
+
$REPLACE_suggestions
|
2396 |
+
$REPLACE_sea
|
2397 |
+
$REPLACE_smoothly
|
2398 |
+
$REPLACE_headache
|
2399 |
+
$REPLACE_Was
|
2400 |
+
$REPLACE_Internet
|
2401 |
+
$REPLACE_pleasure
|
2402 |
+
$REPLACE_Thursday
|
2403 |
+
$REPLACE_board
|
2404 |
+
$REPLACE_phrases
|
2405 |
+
$REPLACE_built
|
2406 |
+
$APPEND_caused
|
2407 |
+
$REPLACE_subjects
|
2408 |
+
$APPEND_places
|
2409 |
+
$REPLACE_grammatical
|
2410 |
+
$REPLACE_suggest
|
2411 |
+
$APPEND_big
|
2412 |
+
$REPLACE_bath
|
2413 |
+
$APPEND_train
|
2414 |
+
$REPLACE_hesitant
|
2415 |
+
$APPEND_seriously
|
2416 |
+
$REPLACE_deep
|
2417 |
+
$APPEND_children
|
2418 |
+
$REPLACE_refreshed
|
2419 |
+
$APPEND_Correct
|
2420 |
+
$APPEND_yourself
|
2421 |
+
$APPEND_THE
|
2422 |
+
$REPLACE_reasonable
|
2423 |
+
$APPEND_spend
|
2424 |
+
$APPEND_skill
|
2425 |
+
$REPLACE_obvious
|
2426 |
+
$REPLACE_Friday
|
2427 |
+
$REPLACE_soup
|
2428 |
+
$REPLACE_basketball
|
2429 |
+
$REPLACE_Your
|
2430 |
+
$REPLACE_drawing
|
2431 |
+
$REPLACE_m
|
2432 |
+
$APPEND_sentences
|
2433 |
+
$REPLACE_english
|
2434 |
+
$APPEND_fell
|
2435 |
+
$REPLACE_colder
|
2436 |
+
$REPLACE_car
|
2437 |
+
$APPEND_group
|
2438 |
+
$REPLACE_receiving
|
2439 |
+
$REPLACE_sun
|
2440 |
+
$APPEND_15
|
2441 |
+
$APPEND_hot
|
2442 |
+
$APPEND_verb
|
2443 |
+
$REPLACE_technical
|
2444 |
+
$REPLACE_Through
|
2445 |
+
$APPEND_buy
|
2446 |
+
$REPLACE_route
|
2447 |
+
$REPLACE_Vietnamese
|
2448 |
+
$REPLACE_grandfather
|
2449 |
+
$REPLACE_April
|
2450 |
+
$REPLACE_lasts
|
2451 |
+
$REPLACE_environmentally
|
2452 |
+
$REPLACE_progress
|
2453 |
+
$REPLACE_telling
|
2454 |
+
$REPLACE_preparation
|
2455 |
+
$REPLACE_supermarket
|
2456 |
+
$REPLACE_Perhaps
|
2457 |
+
$REPLACE_plays
|
2458 |
+
$REPLACE_driver
|
2459 |
+
$REPLACE_anyway
|
2460 |
+
$APPEND_within
|
2461 |
+
$REPLACE_Vietnam
|
2462 |
+
$REPLACE_green
|
2463 |
+
$REPLACE_access
|
2464 |
+
$APPEND_t
|
2465 |
+
$REPLACE_concerns
|
2466 |
+
$REPLACE_laptop
|
2467 |
+
$APPEND_eventually
|
2468 |
+
$REPLACE_fried
|
2469 |
+
$REPLACE_pieces
|
2470 |
+
$REPLACE_security
|
2471 |
+
$REPLACE_condition
|
2472 |
+
$REPLACE_dreams
|
2473 |
+
$REPLACE_reminded
|
2474 |
+
$REPLACE_December
|
2475 |
+
$REPLACE_finding
|
2476 |
+
$REPLACE_produced
|
2477 |
+
$REPLACE_broken
|
2478 |
+
$REPLACE_raising
|
2479 |
+
$REPLACE_specific
|
2480 |
+
$REPLACE_humid
|
2481 |
+
$APPEND_reason
|
2482 |
+
$REPLACE_programming
|
2483 |
+
$REPLACE_brush
|
2484 |
+
$REPLACE_powerful
|
2485 |
+
$REPLACE_shape
|
2486 |
+
$REPLACE_involves
|
2487 |
+
$APPEND_summer
|
2488 |
+
$REPLACE_kinds
|
2489 |
+
$APPEND_eat
|
2490 |
+
$REPLACE_market
|
2491 |
+
$REPLACE_Introducing
|
2492 |
+
$APPEND_kept
|
2493 |
+
$APPEND_information
|
2494 |
+
$REPLACE_Filipino
|
2495 |
+
$REPLACE_hang
|
2496 |
+
$REPLACE_nature
|
2497 |
+
$REPLACE_stood
|
2498 |
+
$REPLACE_oldest
|
2499 |
+
$APPEND_books
|
2500 |
+
$APPEND_top
|
2501 |
+
$REPLACE_physical
|
2502 |
+
$REPLACE_Thai
|
2503 |
+
$REPLACE_effort
|
2504 |
+
$REPLACE_U
|
2505 |
+
$APPEND_phone
|
2506 |
+
$REPLACE_author
|
2507 |
+
$REPLACE_imagined
|
2508 |
+
$REPLACE_request
|
2509 |
+
$REPLACE_Australian
|
2510 |
+
$REPLACE_didn
|
2511 |
+
$REPLACE_Something
|
2512 |
+
$REPLACE_translator
|
2513 |
+
$REPLACE_text
|
2514 |
+
$APPEND_account
|
2515 |
+
$REPLACE_protect
|
2516 |
+
$REPLACE_resources
|
2517 |
+
$REPLACE_Additionally
|
2518 |
+
$APPEND_afterwards
|
2519 |
+
$APPEND_Should
|
2520 |
+
$REPLACE_awhile
|
2521 |
+
$REPLACE_meanings
|
2522 |
+
$APPEND_pictures
|
2523 |
+
$REPLACE_benefit
|
2524 |
+
$REPLACE_exist
|
2525 |
+
$REPLACE_connection
|
2526 |
+
$REPLACE_impression
|
2527 |
+
$APPEND_meeting
|
2528 |
+
$REPLACE_electrical
|
2529 |
+
$APPEND_style
|
2530 |
+
$REPLACE_larger
|
2531 |
+
$REPLACE_hotter
|
2532 |
+
$REPLACE_foot
|
2533 |
+
$APPEND_further
|
2534 |
+
$REPLACE_described
|
2535 |
+
$REPLACE_note
|
2536 |
+
$REPLACE_football
|
2537 |
+
$APPEND_ourselves
|
2538 |
+
$REPLACE_searched
|
2539 |
+
$REPLACE_temporary
|
2540 |
+
$REPLACE_semester
|
2541 |
+
$REPLACE_announced
|
2542 |
+
$REPLACE_Suddenly
|
2543 |
+
$APPEND_others
|
2544 |
+
$APPEND_goes
|
2545 |
+
$REPLACE_sort
|
2546 |
+
$REPLACE_itself
|
2547 |
+
$REPLACE_rich
|
2548 |
+
$APPEND_song
|
2549 |
+
$REPLACE_memorable
|
2550 |
+
$REPLACE_Europe
|
2551 |
+
$REPLACE_features
|
2552 |
+
$REPLACE_apply
|
2553 |
+
$REPLACE_celebrated
|
2554 |
+
$REPLACE_delivery
|
2555 |
+
$REPLACE_winter
|
2556 |
+
$REPLACE_miss
|
2557 |
+
$REPLACE_application
|
2558 |
+
$APPEND_onwards
|
2559 |
+
$REPLACE_population
|
2560 |
+
$REPLACE_failure
|
2561 |
+
$REPLACE_lazy
|
2562 |
+
$REPLACE_scored
|
2563 |
+
$REPLACE_November
|
2564 |
+
$APPEND_travel
|
2565 |
+
$REPLACE_Let
|
2566 |
+
$REPLACE_alcoholic
|
2567 |
+
$REPLACE_disappointment
|
2568 |
+
$REPLACE_severe
|
2569 |
+
$REPLACE_effect
|
2570 |
+
$REPLACE_speed
|
2571 |
+
$APPEND_How
|
2572 |
+
$APPEND_sounds
|
2573 |
+
$REPLACE_cooler
|
2574 |
+
$REPLACE_'cause
|
2575 |
+
$APPEND_mean
|
2576 |
+
$REPLACE_divided
|
2577 |
+
$REPLACE_ha
|
2578 |
+
$REPLACE_9
|
2579 |
+
$REPLACE_advantages
|
2580 |
+
$APPEND_call
|
2581 |
+
$REPLACE_21st
|
2582 |
+
$REPLACE_fit
|
2583 |
+
$REPLACE_lit
|
2584 |
+
$REPLACE_directly
|
2585 |
+
$REPLACE_videos
|
2586 |
+
$REPLACE_pressure
|
2587 |
+
$REPLACE_pursue
|
2588 |
+
$REPLACE_forgotten
|
2589 |
+
$REPLACE_industry
|
2590 |
+
$REPLACE_Speaking
|
2591 |
+
$APPEND_enjoy
|
2592 |
+
$REPLACE_Should
|
2593 |
+
$REPLACE_grown
|
2594 |
+
$REPLACE_participated
|
2595 |
+
$REPLACE_treat
|
2596 |
+
$REPLACE_expression
|
2597 |
+
$REPLACE_fly
|
2598 |
+
$REPLACE_tall
|
2599 |
+
$REPLACE_situations
|
2600 |
+
$REPLACE_host
|
2601 |
+
$REPLACE_visitors
|
2602 |
+
$APPEND_hear
|
2603 |
+
$REPLACE_Instead
|
2604 |
+
$REPLACE_agreed
|
2605 |
+
$REPLACE_affects
|
2606 |
+
$REPLACE_drew
|
2607 |
+
$REPLACE_spending
|
2608 |
+
$REPLACE_huge
|
2609 |
+
$REPLACE_ill
|
2610 |
+
$REPLACE_tradition
|
2611 |
+
$REPLACE_argue
|
2612 |
+
$REPLACE_turns
|
2613 |
+
$REPLACE_ground
|
2614 |
+
$REPLACE_sometime
|
2615 |
+
$REPLACE_Italy
|
2616 |
+
$APPEND_works
|
2617 |
+
$REPLACE_likely
|
2618 |
+
$REPLACE_Madam
|
2619 |
+
$APPEND_questions
|
2620 |
+
$REPLACE_ceremonies
|
2621 |
+
$APPEND_turn
|
2622 |
+
$APPEND_Korean
|
2623 |
+
$REPLACE_gradually
|
2624 |
+
$REPLACE_financial
|
2625 |
+
$REPLACE_involved
|
2626 |
+
$REPLACE_throw
|
2627 |
+
$REPLACE_advertising
|
2628 |
+
$REPLACE_tend
|
2629 |
+
$REPLACE_characteristics
|
2630 |
+
$APPEND_among
|
2631 |
+
$REPLACE_electric
|
2632 |
+
$REPLACE_sister
|
2633 |
+
$APPEND_car
|
2634 |
+
$REPLACE_fantastic
|
2635 |
+
$REPLACE_examination
|
2636 |
+
$APPEND_city
|
2637 |
+
$REPLACE_eaten
|
2638 |
+
$REPLACE_film
|
2639 |
+
$APPEND_small
|
2640 |
+
$REPLACE_players
|
2641 |
+
$REPLACE_stores
|
2642 |
+
$REPLACE_machine
|
2643 |
+
$REPLACE_managed
|
2644 |
+
$REPLACE_tour
|
2645 |
+
$APPEND_video
|
2646 |
+
$REPLACE_journals
|
2647 |
+
$REPLACE_guys
|
2648 |
+
$APPEND_meet
|
2649 |
+
$REPLACE_deeply
|
2650 |
+
$REPLACE_floor
|
2651 |
+
$REPLACE_keeps
|
2652 |
+
$REPLACE_talks
|
2653 |
+
$REPLACE_focusing
|
2654 |
+
$REPLACE_mysterious
|
2655 |
+
$APPEND_less
|
2656 |
+
$REPLACE_rice
|
2657 |
+
$REPLACE_recovered
|
2658 |
+
$REPLACE_injured
|
2659 |
+
$REPLACE_poorly
|
2660 |
+
$REPLACE_comedy
|
2661 |
+
$REPLACE_cigarettes
|
2662 |
+
$REPLACE_anime
|
2663 |
+
$REPLACE_influence
|
2664 |
+
$REPLACE_Eventually
|
2665 |
+
$REPLACE_offered
|
2666 |
+
$REPLACE_sale
|
2667 |
+
$REPLACE_effectively
|
2668 |
+
$REPLACE_disappointing
|
2669 |
+
$REPLACE_illness
|
2670 |
+
$REPLACE_comments
|
2671 |
+
$APPEND_talk
|
2672 |
+
$REPLACE_contains
|
2673 |
+
$APPEND_People
|
2674 |
+
$APPEND_power
|
2675 |
+
$REPLACE_31st
|
2676 |
+
$REPLACE_distance
|
2677 |
+
$REPLACE_appears
|
2678 |
+
$REPLACE_importance
|
2679 |
+
$REPLACE_choosing
|
2680 |
+
$APPEND_interesting
|
2681 |
+
$REPLACE_snow
|
2682 |
+
$APPEND_o
|
2683 |
+
$REPLACE_tennis
|
2684 |
+
$REPLACE_continues
|
2685 |
+
$REPLACE_dress
|
2686 |
+
$REPLACE_percent
|
2687 |
+
$REPLACE_size
|
2688 |
+
$REPLACE_dictionaries
|
2689 |
+
$APPEND_seems
|
2690 |
+
$REPLACE_fever
|
2691 |
+
$APPEND_etc
|
2692 |
+
$APPEND_Though
|
2693 |
+
$REPLACE_whereas
|
2694 |
+
$APPEND_several
|
2695 |
+
$APPEND_far
|
2696 |
+
$APPEND_classes
|
2697 |
+
$APPEND_public
|
2698 |
+
$REPLACE_traffic
|
2699 |
+
$REPLACE_damage
|
2700 |
+
$APPEND_nothing
|
2701 |
+
$REPLACE_worthwhile
|
2702 |
+
$REPLACE_appreciated
|
2703 |
+
$REPLACE_articles
|
2704 |
+
$APPEND_begin
|
2705 |
+
$APPEND_needed
|
2706 |
+
$REPLACE_recommendations
|
2707 |
+
$REPLACE_don
|
2708 |
+
$REPLACE_buildings
|
2709 |
+
$APPEND_four
|
2710 |
+
$REPLACE_jealous
|
2711 |
+
$REPLACE_seminar
|
2712 |
+
$APPEND_gradually
|
2713 |
+
$REPLACE_complaints
|
2714 |
+
$REPLACE_Nothing
|
2715 |
+
$REPLACE_advance
|
2716 |
+
$REPLACE_flowers
|
2717 |
+
$APPEND_Starting
|
2718 |
+
$REPLACE_beyond
|
2719 |
+
$REPLACE_advertised
|
2720 |
+
$APPEND_mainly
|
2721 |
+
$APPEND_possible
|
2722 |
+
$REPLACE_suffering
|
2723 |
+
$APPEND_12
|
2724 |
+
$REPLACE_Hopefully
|
2725 |
+
$APPEND_countries
|
2726 |
+
$APPEND_similar
|
2727 |
+
$REPLACE_quick
|
2728 |
+
$REPLACE_general
|
2729 |
+
$REPLACE_successfully
|
2730 |
+
$REPLACE_dark
|
2731 |
+
$REPLACE_unbelievable
|
2732 |
+
$REPLACE_causing
|
2733 |
+
$REPLACE_13th
|
2734 |
+
$REPLACE_unexpected
|
2735 |
+
$REPLACE_begins
|
2736 |
+
$REPLACE_tea
|
2737 |
+
$REPLACE_Sunday
|
2738 |
+
$APPEND_somewhere
|
2739 |
+
$REPLACE_digital
|
2740 |
+
$APPEND_stories
|
2741 |
+
$APPEND_idea
|
2742 |
+
$APPEND_tired
|
2743 |
+
$APPEND_family
|
2744 |
+
$REPLACE_animation
|
2745 |
+
$REPLACE_shot
|
2746 |
+
$REPLACE_Or
|
2747 |
+
$APPEND_managed
|
2748 |
+
$REPLACE_bus
|
2749 |
+
$APPEND_close
|
2750 |
+
$REPLACE_disease
|
2751 |
+
$REPLACE_desire
|
2752 |
+
$REPLACE_carried
|
2753 |
+
$REPLACE_disappear
|
2754 |
+
$REPLACE_essential
|
2755 |
+
$APPEND_news
|
2756 |
+
$REPLACE_forced
|
2757 |
+
$REPLACE_fault
|
2758 |
+
$REPLACE_translation
|
2759 |
+
$REPLACE_television
|
2760 |
+
$REPLACE_cried
|
2761 |
+
$REPLACE_freely
|
2762 |
+
$REPLACE_Valentine
|
2763 |
+
$REPLACE_somewhat
|
2764 |
+
$REPLACE_operation
|
2765 |
+
$REPLACE_conversational
|
2766 |
+
$APPEND_absolutely
|
2767 |
+
$APPEND_properly
|
2768 |
+
$REPLACE_sites
|
2769 |
+
$REPLACE_allergies
|
2770 |
+
$REPLACE_salary
|
2771 |
+
$REPLACE_rise
|
2772 |
+
$REPLACE_entertainment
|
2773 |
+
$REPLACE_kitchen
|
2774 |
+
$REPLACE_emotional
|
2775 |
+
$REPLACE_McDonalds
|
2776 |
+
$REPLACE_extra
|
2777 |
+
$APPEND_nearby
|
2778 |
+
$REPLACE_mention
|
2779 |
+
$APPEND_Here
|
2780 |
+
$APPEND_nice
|
2781 |
+
$APPEND_college
|
2782 |
+
$APPEND_Before
|
2783 |
+
$APPEND_form
|
2784 |
+
$REPLACE_likes
|
2785 |
+
$APPEND_turned
|
2786 |
+
$REPLACE_rent
|
2787 |
+
$REPLACE_tourists
|
2788 |
+
$REPLACE_unknown
|
2789 |
+
$REPLACE_actors
|
2790 |
+
$APPEND_longer
|
2791 |
+
$REPLACE_fill
|
2792 |
+
$REPLACE_Nobody
|
2793 |
+
$REPLACE_Singapore
|
2794 |
+
$REPLACE_helping
|
2795 |
+
$REPLACE_exercises
|
2796 |
+
$APPEND_real
|
2797 |
+
$APPEND_located
|
2798 |
+
$APPEND_received
|
2799 |
+
$APPEND_gets
|
2800 |
+
$APPEND_bad
|
2801 |
+
$REPLACE_doubt
|
2802 |
+
$REPLACE_sweaty
|
2803 |
+
$REPLACE_prefecture
|
2804 |
+
$REPLACE_audience
|
2805 |
+
$REPLACE_sports
|
2806 |
+
$REPLACE_minute
|
2807 |
+
$REPLACE_product
|
2808 |
+
$REPLACE_buying
|
2809 |
+
$REPLACE_exact
|
2810 |
+
$REPLACE_temporarily
|
2811 |
+
$REPLACE_Avatar
|
2812 |
+
$REPLACE_Skype
|
2813 |
+
$REPLACE_discussion
|
2814 |
+
$REPLACE_item
|
2815 |
+
$REPLACE_gon
|
2816 |
+
$REPLACE_accessories
|
2817 |
+
$REPLACE_incredibly
|
2818 |
+
$REPLACE_Where
|
2819 |
+
$REPLACE_World
|
2820 |
+
$REPLACE_advantage
|
2821 |
+
$REPLACE_ridiculous
|
2822 |
+
$REPLACE_wherever
|
2823 |
+
$REPLACE_shook
|
2824 |
+
$REPLACE_global
|
2825 |
+
$REPLACE_entitled
|
2826 |
+
$REPLACE_Working
|
2827 |
+
$APPEND_hours
|
2828 |
+
$REPLACE_Starbucks
|
2829 |
+
$REPLACE_routine
|
2830 |
+
$REPLACE_flavored
|
2831 |
+
$APPEND_item
|
2832 |
+
$REPLACE_techniques
|
2833 |
+
$REPLACE_creates
|
2834 |
+
$REPLACE_peace
|
2835 |
+
$REPLACE_annoyed
|
2836 |
+
$REPLACE_rate
|
2837 |
+
$REPLACE_September
|
2838 |
+
$REPLACE_Russian
|
2839 |
+
$REPLACE_assistant
|
2840 |
+
$REPLACE_plenty
|
2841 |
+
$REPLACE_local
|
2842 |
+
$APPEND_store
|
2843 |
+
$REPLACE_sooner
|
2844 |
+
$REPLACE_overslept
|
2845 |
+
$REPLACE_Everybody
|
2846 |
+
$REPLACE_selling
|
2847 |
+
$REPLACE_negative
|
2848 |
+
$REPLACE_setting
|
2849 |
+
$APPEND_helps
|
2850 |
+
$REPLACE_lecture
|
2851 |
+
$APPEND_happen
|
2852 |
+
$REPLACE_survive
|
2853 |
+
$REPLACE_art
|
2854 |
+
$APPEND_certainly
|
2855 |
+
$APPEND_fully
|
2856 |
+
$APPEND_above
|
2857 |
+
$REPLACE_speaks
|
2858 |
+
$REPLACE_asking
|
2859 |
+
$REPLACE_economical
|
2860 |
+
$REPLACE_salaries
|
2861 |
+
$APPEND_clearly
|
2862 |
+
$REPLACE_mail
|
2863 |
+
$REPLACE_holding
|
2864 |
+
$REPLACE_organise
|
2865 |
+
$REPLACE_efficient
|
2866 |
+
$APPEND_name
|
2867 |
+
$REPLACE_constantly
|
2868 |
+
$REPLACE_overtime
|
2869 |
+
$REPLACE_grandma
|
2870 |
+
$REPLACE_returning
|
2871 |
+
$REPLACE_laziness
|
2872 |
+
$REPLACE_importantly
|
2873 |
+
$APPEND_true
|
2874 |
+
$APPEND_series
|
2875 |
+
$REPLACE_converse
|
2876 |
+
$APPEND_session
|
2877 |
+
$REPLACE_sugar
|
2878 |
+
$APPEND_Currently
|
2879 |
+
$REPLACE_mentally
|
2880 |
+
$APPEND_starts
|
2881 |
+
$REPLACE_theater
|
2882 |
+
$APPEND_tonight
|
2883 |
+
$REPLACE_succeeded
|
2884 |
+
$REPLACE_awful
|
2885 |
+
$REPLACE_political
|
2886 |
+
$APPEND_important
|
2887 |
+
$REPLACE_log
|
2888 |
+
$REPLACE_awesome
|
2889 |
+
$REPLACE_00
|
2890 |
+
$APPEND_Did
|
2891 |
+
$REPLACE_announcement
|
2892 |
+
$REPLACE_addicted
|
2893 |
+
$REPLACE_disaster
|
2894 |
+
$REPLACE_page
|
2895 |
+
$REPLACE_blossom
|
2896 |
+
$REPLACE_stars
|
2897 |
+
$REPLACE_presentation
|
2898 |
+
$REPLACE_Nevertheless
|
2899 |
+
$APPEND_talking
|
2900 |
+
$APPEND_Instead
|
2901 |
+
$APPEND_Chinese
|
2902 |
+
$REPLACE_Festival
|
2903 |
+
$REPLACE_reasonably
|
2904 |
+
$APPEND_someday
|
2905 |
+
$REPLACE_expressions
|
2906 |
+
$APPEND_Lately
|
2907 |
+
$REPLACE_average
|
2908 |
+
$APPEND_season
|
2909 |
+
$REPLACE_cover
|
2910 |
+
$REPLACE_manager
|
2911 |
+
$REPLACE_wife
|
2912 |
+
$REPLACE_12
|
2913 |
+
$REPLACE_possibly
|
2914 |
+
$REPLACE_approaching
|
2915 |
+
$REPLACE_keeping
|
2916 |
+
$REPLACE_motorcycle
|
2917 |
+
$REPLACE_happily
|
2918 |
+
$APPEND_items
|
2919 |
+
$REPLACE_cherry
|
2920 |
+
$REPLACE_shall
|
2921 |
+
$REPLACE_determined
|
2922 |
+
$REPLACE_cheerful
|
2923 |
+
$REPLACE_ahead
|
2924 |
+
$REPLACE_solution
|
2925 |
+
$REPLACE_patients
|
2926 |
+
$REPLACE_unforgettable
|
2927 |
+
$REPLACE_decreasing
|
2928 |
+
$REPLACE_laid
|
2929 |
+
$REPLACE_arrange
|
2930 |
+
$REPLACE_content
|
2931 |
+
$REPLACE_starring
|
2932 |
+
$REPLACE_opening
|
2933 |
+
$REPLACE_continuing
|
2934 |
+
$REPLACE_bloom
|
2935 |
+
$REPLACE_concern
|
2936 |
+
$APPEND_towards
|
2937 |
+
$REPLACE_extreme
|
2938 |
+
$APPEND_Will
|
2939 |
+
$REPLACE_tests
|
2940 |
+
$REPLACE_replace
|
2941 |
+
$APPEND_mostly
|
2942 |
+
$REPLACE_inform
|
2943 |
+
$REPLACE_lying
|
2944 |
+
$REPLACE_barely
|
2945 |
+
$REPLACE_unpleasant
|
2946 |
+
$REPLACE_brand
|
2947 |
+
$REPLACE_turning
|
2948 |
+
$REPLACE_added
|
2949 |
+
$APPEND_age
|
2950 |
+
$REPLACE_wide
|
2951 |
+
$REPLACE_passing
|
2952 |
+
$REPLACE_production
|
2953 |
+
$REPLACE_23rd
|
2954 |
+
$REPLACE_ramen
|
2955 |
+
$REPLACE_occasionally
|
2956 |
+
$REPLACE_borrowed
|
2957 |
+
$REPLACE_comparison
|
2958 |
+
$REPLACE_curry
|
2959 |
+
$REPLACE_upcoming
|
2960 |
+
$REPLACE_begun
|
2961 |
+
$APPEND_mistakes
|
2962 |
+
$REPLACE_mouth
|
2963 |
+
$REPLACE_scenes
|
2964 |
+
$REPLACE_accidentally
|
2965 |
+
$REPLACE_gases
|
2966 |
+
$REPLACE_blog
|
2967 |
+
$REPLACE_Disney
|
2968 |
+
$APPEND_straight
|
2969 |
+
$REPLACE_topics
|
2970 |
+
$REPLACE_register
|
2971 |
+
$REPLACE_color
|
2972 |
+
$REPLACE_explained
|
2973 |
+
$APPEND_shopping
|
2974 |
+
$REPLACE_Taiwan
|
2975 |
+
$REPLACE_sales
|
2976 |
+
$REPLACE_dictionary
|
2977 |
+
$REPLACE_inexpensive
|
2978 |
+
$APPEND_directly
|
2979 |
+
$REPLACE_comfortably
|
2980 |
+
$REPLACE_suprised
|
2981 |
+
$APPEND_AM
|
2982 |
+
$REPLACE_dance
|
2983 |
+
$REPLACE_eager
|
2984 |
+
$REPLACE_envious
|
2985 |
+
$REPLACE_lie
|
2986 |
+
$REPLACE_Apart
|
2987 |
+
$REPLACE_closed
|
2988 |
+
$REPLACE_brother
|
2989 |
+
$REPLACE_hopefully
|
2990 |
+
$APPEND_caught
|
2991 |
+
$REPLACE_background
|
2992 |
+
$REPLACE_conditions
|
2993 |
+
$REPLACE_attracted
|
2994 |
+
$REPLACE_aim
|
2995 |
+
$REPLACE_twenty
|
2996 |
+
$REPLACE_Each
|
2997 |
+
$APPEND_air
|
2998 |
+
$REPLACE_technique
|
2999 |
+
$REPLACE_umbrella
|
3000 |
+
$REPLACE_Buddhist
|
3001 |
+
$REPLACE_yen
|
3002 |
+
$APPEND_clothes
|
3003 |
+
$APPEND_open
|
3004 |
+
$REPLACE_originally
|
3005 |
+
$APPEND_OK
|
3006 |
+
$REPLACE_complex
|
3007 |
+
$APPEND_upon
|
3008 |
+
$REPLACE_<
|
3009 |
+
$REPLACE_showing
|
3010 |
+
$REPLACE_weaknesses
|
3011 |
+
$REPLACE_OR
|
3012 |
+
$REPLACE_potato
|
3013 |
+
$APPEND_photo
|
3014 |
+
$REPLACE_flavor
|
3015 |
+
$REPLACE_Tuesday
|
3016 |
+
$REPLACE_organized
|
3017 |
+
$REPLACE_preferred
|
3018 |
+
$REPLACE_state
|
3019 |
+
$APPEND_normally
|
3020 |
+
$APPEND_areas
|
3021 |
+
$REPLACE_arranged
|
3022 |
+
$REPLACE_embarrassing
|
3023 |
+
$REPLACE_positively
|
3024 |
+
$REPLACE_coworkers
|
3025 |
+
$APPEND_host
|
3026 |
+
$REPLACE_influenced
|
3027 |
+
$REPLACE_respect
|
3028 |
+
$REPLACE_separate
|
3029 |
+
$REPLACE_comedies
|
3030 |
+
$APPEND_listened
|
3031 |
+
$REPLACE_report
|
3032 |
+
$REPLACE_Using
|
3033 |
+
$REPLACE_performing
|
3034 |
+
$REPLACE_construction
|
3035 |
+
$REPLACE_trees
|
3036 |
+
$REPLACE_conversations
|
3037 |
+
$REPLACE_western
|
3038 |
+
$APPEND_drinking
|
3039 |
+
$APPEND_Next
|
3040 |
+
$APPEND_points
|
3041 |
+
$APPEND_young
|
3042 |
+
$REPLACE_provides
|
3043 |
+
$REPLACE_motivation
|
3044 |
+
$REPLACE_muscle
|
3045 |
+
$REPLACE_diet
|
3046 |
+
$APPEND_fluently
|
3047 |
+
$REPLACE_Such
|
3048 |
+
$REPLACE_task
|
3049 |
+
$REPLACE_sounded
|
3050 |
+
$REPLACE_schools
|
3051 |
+
$REPLACE_park
|
3052 |
+
$APPEND_various
|
3053 |
+
$APPEND_five
|
3054 |
+
$REPLACE_unhappy
|
3055 |
+
$REPLACE_Due
|
3056 |
+
$REPLACE_alright
|
3057 |
+
$REPLACE_campus
|
3058 |
+
$APPEND_foreign
|
3059 |
+
$APPEND_studies
|
3060 |
+
$REPLACE_handle
|
3061 |
+
$REPLACE_continuous
|
3062 |
+
$REPLACE_drug
|
3063 |
+
$REPLACE_expenses
|
3064 |
+
$REPLACE_aged
|
3065 |
+
$REPLACE_surrounded
|
3066 |
+
$REPLACE_thus
|
3067 |
+
$REPLACE_noise
|
3068 |
+
$REPLACE_healthier
|
3069 |
+
$REPLACE_potential
|
3070 |
+
$REPLACE_Potter
|
3071 |
+
$APPEND_self
|
3072 |
+
$APPEND_picture
|
3073 |
+
$REPLACE_None
|
3074 |
+
$REPLACE_sudden
|
3075 |
+
$REPLACE_lifestyles
|
3076 |
+
$APPEND_given
|
3077 |
+
$REPLACE_aspects
|
3078 |
+
$REPLACE_specifically
|
3079 |
+
$REPLACE_destination
|
3080 |
+
$REPLACE_followed
|
3081 |
+
$REPLACE_Other
|
3082 |
+
$REPLACE_horrible
|
3083 |
+
$REPLACE_radiation
|
3084 |
+
$REPLACE_essays
|
3085 |
+
$REPLACE_apologize
|
3086 |
+
$REPLACE_placed
|
3087 |
+
$APPEND_future
|
3088 |
+
$REPLACE_awkward
|
3089 |
+
$REPLACE_thirty
|
3090 |
+
$REPLACE_kids
|
3091 |
+
$REPLACE_responsibilities
|
3092 |
+
$REPLACE_Generally
|
3093 |
+
$REPLACE_relatives
|
3094 |
+
$APPEND_More
|
3095 |
+
$REPLACE_safer
|
3096 |
+
$REPLACE_hoping
|
3097 |
+
$REPLACE_heroes
|
3098 |
+
$REPLACE_psychological
|
3099 |
+
$REPLACE_posted
|
3100 |
+
$REPLACE_treatment
|
3101 |
+
$REPLACE_glasses
|
3102 |
+
$REPLACE_souvenirs
|
3103 |
+
$REPLACE_entertaining
|
3104 |
+
$APPEND_Tomorrow
|
3105 |
+
$APPEND_activities
|
3106 |
+
$REPLACE_serve
|
3107 |
+
$REPLACE_actions
|
3108 |
+
$APPEND_teacher
|
3109 |
+
$REPLACE_o
|
3110 |
+
$REPLACE_forever
|
3111 |
+
$REPLACE_colour
|
3112 |
+
$APPEND_change
|
3113 |
+
$REPLACE_plants
|
3114 |
+
$REPLACE_fulfill
|
3115 |
+
$REPLACE_animated
|
3116 |
+
$REPLACE_textbook
|
3117 |
+
$REPLACE_mathematics
|
3118 |
+
$REPLACE_figured
|
3119 |
+
$APPEND_running
|
3120 |
+
$REPLACE_computers
|
3121 |
+
$REPLACE_Singaporean
|
3122 |
+
$REPLACE_imagination
|
3123 |
+
$REPLACE_runny
|
3124 |
+
$REPLACE_bill
|
3125 |
+
$REPLACE_meals
|
3126 |
+
$APPEND_perhaps
|
3127 |
+
$REPLACE_stupid
|
3128 |
+
$REPLACE_worries
|
3129 |
+
$APPEND_bought
|
3130 |
+
$APPEND_article
|
3131 |
+
$REPLACE_wasted
|
3132 |
+
$REPLACE_falling
|
3133 |
+
$REPLACE_necessity
|
3134 |
+
$APPEND_common
|
3135 |
+
$REPLACE_Tokyo
|
3136 |
+
$REPLACE_fascinating
|
3137 |
+
$REPLACE_Only
|
3138 |
+
$REPLACE_tense
|
3139 |
+
$APPEND_Ever
|
3140 |
+
$REPLACE_behaviour
|
3141 |
+
$REPLACE_magazines
|
3142 |
+
$REPLACE_cultures
|
3143 |
+
$REPLACE_rid
|
3144 |
+
$REPLACE_choices
|
3145 |
+
$REPLACE_track
|
3146 |
+
$REPLACE_complaint
|
3147 |
+
$REPLACE_white
|
3148 |
+
$REPLACE_approximately
|
3149 |
+
$REPLACE_largest
|
3150 |
+
$REPLACE_smart
|
3151 |
+
$APPEND_finish
|
3152 |
+
$REPLACE_acting
|
3153 |
+
$REPLACE_foolish
|
3154 |
+
$REPLACE_prices
|
3155 |
+
$REPLACE_r
|
3156 |
+
$REPLACE_swim
|
3157 |
+
$REPLACE_review
|
3158 |
+
$REPLACE_shameful
|
3159 |
+
$APPEND_Anyway
|
3160 |
+
$REPLACE_senior
|
3161 |
+
$REPLACE_proper
|
3162 |
+
$REPLACE_provided
|
3163 |
+
$REPLACE_troublesome
|
3164 |
+
$APPEND_known
|
3165 |
+
$REPLACE_homes
|
3166 |
+
$REPLACE_spirit
|
3167 |
+
$REPLACE_ga
|
3168 |
+
$REPLACE_Michael
|
3169 |
+
$APPEND_wish
|
3170 |
+
$APPEND_performance
|
3171 |
+
$REPLACE_typical
|
3172 |
+
$APPEND_Well
|
3173 |
+
$REPLACE_previously
|
3174 |
+
$REPLACE_fail
|
3175 |
+
$APPEND_itself
|
3176 |
+
$REPLACE_sung
|
3177 |
+
$REPLACE_citizens
|
3178 |
+
$REPLACE_rapidly
|
3179 |
+
$REPLACE_stadium
|
3180 |
+
$APPEND_page
|
3181 |
+
$APPEND_unfortunately
|
3182 |
+
$REPLACE_surprising
|
3183 |
+
$REPLACE_unfamiliar
|
3184 |
+
$REPLACE_repair
|
3185 |
+
$REPLACE_escape
|
3186 |
+
$REPLACE_actor
|
3187 |
+
$REPLACE_Almost
|
3188 |
+
$REPLACE_shoes
|
3189 |
+
$REPLACE_disagree
|
3190 |
+
$REPLACE_co
|
3191 |
+
$REPLACE_attempt
|
3192 |
+
$REPLACE_instance
|
3193 |
+
$REPLACE_lasted
|
3194 |
+
$APPEND_connect
|
3195 |
+
$APPEND_San
|
3196 |
+
$REPLACE_hairstyle
|
3197 |
+
$REPLACE_internship
|
3198 |
+
$REPLACE_Bye
|
3199 |
+
$REPLACE_tourist
|
3200 |
+
$REPLACE_5th
|
3201 |
+
$REPLACE_cousin
|
3202 |
+
$REPLACE_beside
|
3203 |
+
$REPLACE_facilities
|
3204 |
+
$REPLACE_yummy
|
3205 |
+
$REPLACE_prove
|
3206 |
+
$APPEND_certain
|
3207 |
+
$REPLACE_beginners
|
3208 |
+
$REPLACE_worn
|
3209 |
+
$REPLACE_wont
|
3210 |
+
$APPEND_wearing
|
3211 |
+
$REPLACE_improves
|
3212 |
+
$REPLACE_electronics
|
3213 |
+
$REPLACE_realistic
|
3214 |
+
$REPLACE_annoying
|
3215 |
+
$REPLACE_dreamed
|
3216 |
+
$APPEND_results
|
3217 |
+
$REPLACE_certainly
|
3218 |
+
$REPLACE_classroom
|
3219 |
+
$REPLACE_argument
|
3220 |
+
$REPLACE_warmth
|
3221 |
+
$REPLACE_achieved
|
3222 |
+
$APPEND_meaning
|
3223 |
+
$REPLACE_photographs
|
3224 |
+
$REPLACE_animals
|
3225 |
+
$REPLACE_community
|
3226 |
+
$REPLACE_interests
|
3227 |
+
$REPLACE_medium
|
3228 |
+
$REPLACE_beer
|
3229 |
+
$REPLACE_engineer
|
3230 |
+
$REPLACE_Good
|
3231 |
+
$APPEND_looks
|
3232 |
+
$REPLACE_beauty
|
3233 |
+
$APPEND_speaker
|
3234 |
+
$REPLACE_connect
|
3235 |
+
$APPEND_driving
|
3236 |
+
$APPEND_Have
|
3237 |
+
$REPLACE_reminds
|
3238 |
+
$REPLACE_apologized
|
3239 |
+
$REPLACE_obtain
|
3240 |
+
$REPLACE_Probably
|
3241 |
+
$REPLACE_strangers
|
3242 |
+
$APPEND_bring
|
3243 |
+
$REPLACE_smile
|
3244 |
+
$REPLACE_exhibition
|
3245 |
+
$REPLACE_pot
|
3246 |
+
$REPLACE_encounter
|
3247 |
+
$APPEND_degrees
|
3248 |
+
$REPLACE_lights
|
3249 |
+
$APPEND_bus
|
3250 |
+
$REPLACE_movement
|
3251 |
+
$REPLACE_cancel
|
3252 |
+
$REPLACE_y
|
3253 |
+
$REPLACE_black
|
3254 |
+
$REPLACE_concentration
|
3255 |
+
$REPLACE_graduating
|
3256 |
+
$REPLACE_usage
|
3257 |
+
$REPLACE_handsome
|
3258 |
+
$APPEND_ride
|
3259 |
+
$REPLACE_degree
|
3260 |
+
$APPEND_point
|
3261 |
+
$APPEND_conversation
|
3262 |
+
$REPLACE_menu
|
3263 |
+
$REPLACE_assistance
|
3264 |
+
$REPLACE_Summer
|
3265 |
+
$APPEND_behind
|
3266 |
+
$REPLACE_police
|
3267 |
+
$REPLACE_15th
|
3268 |
+
$REPLACE_separated
|
3269 |
+
$REPLACE_regardless
|
3270 |
+
$REPLACE_significant
|
3271 |
+
$REPLACE_transfer
|
3272 |
+
$REPLACE_religious
|
3273 |
+
$REPLACE_tempura
|
3274 |
+
$REPLACE_academic
|
3275 |
+
$REPLACE_otherwise
|
3276 |
+
$REPLACE_useless
|
3277 |
+
$REPLACE_celebrities
|
3278 |
+
$REPLACE_dislike
|
3279 |
+
$REPLACE_11
|
3280 |
+
$APPEND_sound
|
3281 |
+
$APPEND_^
|
3282 |
+
$REPLACE_replaced
|
3283 |
+
$REPLACE_sushi
|
3284 |
+
$REPLACE_wind
|
3285 |
+
$REPLACE_web
|
3286 |
+
$REPLACE_Britain
|
3287 |
+
$REPLACE_complained
|
3288 |
+
$REPLACE_model
|
3289 |
+
$REPLACE_de
|
3290 |
+
$REPLACE_depends
|
3291 |
+
$REPLACE_pm
|
3292 |
+
$REPLACE_cafe
|
3293 |
+
$REPLACE_congratulate
|
3294 |
+
$REPLACE_ending
|
3295 |
+
$APPEND_building
|
3296 |
+
$REPLACE_presented
|
3297 |
+
$REPLACE_shut
|
3298 |
+
$APPEND_restaurant
|
3299 |
+
$APPEND_March
|
3300 |
+
$REPLACE_freedom
|
3301 |
+
$APPEND_story
|
3302 |
+
$REPLACE_creating
|
3303 |
+
$REPLACE_concept
|
3304 |
+
$REPLACE_conduct
|
3305 |
+
$REPLACE_France
|
3306 |
+
$REPLACE_paper
|
3307 |
+
$REPLACE_offers
|
3308 |
+
$REPLACE_Oh
|
3309 |
+
$REPLACE_occured
|
3310 |
+
$REPLACE_touched
|
3311 |
+
$REPLACE_travelled
|
3312 |
+
$APPEND_Thus
|
3313 |
+
$REPLACE_sickness
|
3314 |
+
$REPLACE_neighbor
|
3315 |
+
$REPLACE_paying
|
3316 |
+
$REPLACE_national
|
3317 |
+
$APPEND_needs
|
3318 |
+
$REPLACE_climb
|
3319 |
+
$REPLACE_Take
|
3320 |
+
$APPEND_Everyone
|
3321 |
+
$REPLACE_aftershocks
|
3322 |
+
$REPLACE_committed
|
3323 |
+
$REPLACE_textbooks
|
3324 |
+
$REPLACE_waited
|
3325 |
+
$REPLACE_round
|
3326 |
+
$REPLACE_Okay
|
3327 |
+
$REPLACE_eldest
|
3328 |
+
$APPEND_allow
|
3329 |
+
$REPLACE_Spanish
|
3330 |
+
$REPLACE_Spring
|
3331 |
+
$REPLACE_absence
|
3332 |
+
$REPLACE_actresses
|
3333 |
+
$REPLACE_majority
|
3334 |
+
$REPLACE_growth
|
3335 |
+
$APPEND_requires
|
3336 |
+
$REPLACE_About
|
3337 |
+
$REPLACE_intend
|
3338 |
+
$APPEND_deep
|
3339 |
+
$REPLACE_enjoyment
|
3340 |
+
$APPEND_raining
|
3341 |
+
$REPLACE_Am
|
3342 |
+
$REPLACE_eyes
|
3343 |
+
$REPLACE_Afterward
|
3344 |
+
$REPLACE_drugs
|
3345 |
+
$REPLACE_cram
|
3346 |
+
$REPLACE_dancing
|
3347 |
+
$APPEND_M
|
3348 |
+
$REPLACE_nationalities
|
3349 |
+
$REPLACE_throat
|
3350 |
+
$APPEND_shows
|
3351 |
+
$REPLACE_Facebook
|
3352 |
+
$APPEND_TO
|
3353 |
+
$REPLACE_brilliant
|
3354 |
+
$REPLACE_drop
|
3355 |
+
$REPLACE_owner
|
3356 |
+
$APPEND_side
|
3357 |
+
$REPLACE_struggling
|
3358 |
+
$REPLACE_100
|
3359 |
+
$REPLACE_surely
|
3360 |
+
$REPLACE_devices
|
3361 |
+
$APPEND_takes
|
3362 |
+
$REPLACE_TO
|
3363 |
+
$REPLACE_neighbors
|
3364 |
+
$REPLACE_youth
|
3365 |
+
$REPLACE_connected
|
3366 |
+
$REPLACE_passes
|
3367 |
+
$REPLACE_kilometers
|
3368 |
+
$APPEND_fun
|
3369 |
+
$REPLACE_viewing
|
3370 |
+
$REPLACE_behavior
|
3371 |
+
$REPLACE_chores
|
3372 |
+
$REPLACE_mystery
|
3373 |
+
$APPEND_shall
|
3374 |
+
$APPEND_taught
|
3375 |
+
$REPLACE_display
|
3376 |
+
$REPLACE_ensure
|
3377 |
+
$APPEND_online
|
3378 |
+
$REPLACE_assignment
|
3379 |
+
$REPLACE_compare
|
3380 |
+
$APPEND_Still
|
3381 |
+
$REPLACE_conditioning
|
3382 |
+
$REPLACE_suffered
|
3383 |
+
$REPLACE_haven't
|
3384 |
+
$REPLACE_muscles
|
3385 |
+
$APPEND_grammar
|
3386 |
+
$APPEND_Two
|
3387 |
+
$REPLACE_chemistry
|
3388 |
+
$REPLACE_consideration
|
3389 |
+
$REPLACE_smoking
|
3390 |
+
$REPLACE_Harry
|
3391 |
+
$APPEND_seemed
|
3392 |
+
$REPLACE_marry
|
3393 |
+
$REPLACE_hunting
|
3394 |
+
$REPLACE_recommendation
|
3395 |
+
$APPEND_previously
|
3396 |
+
$REPLACE_dramas
|
3397 |
+
$REPLACE_passionate
|
3398 |
+
$APPEND_ways
|
3399 |
+
$REPLACE_hurts
|
3400 |
+
$APPEND_sense
|
3401 |
+
$APPEND_drink
|
3402 |
+
$REPLACE_refrigerator
|
3403 |
+
$REPLACE_organised
|
3404 |
+
$REPLACE_cleaning
|
3405 |
+
$REPLACE_courage
|
3406 |
+
$APPEND_arrived
|
3407 |
+
$REPLACE_housework
|
3408 |
+
$REPLACE_charge
|
3409 |
+
$REPLACE_violin
|
3410 |
+
$APPEND_offer
|
3411 |
+
$APPEND_water
|
3412 |
+
$REPLACE_injuries
|
3413 |
+
$REPLACE_perspective
|
3414 |
+
$REPLACE_hoped
|
3415 |
+
$REPLACE_challenging
|
3416 |
+
$REPLACE_THE
|
3417 |
+
$APPEND_regarding
|
3418 |
+
$APPEND_Their
|
3419 |
+
$REPLACE_upload
|
3420 |
+
$REPLACE_luxurious
|
3421 |
+
$REPLACE_unnecessary
|
3422 |
+
$APPEND_harder
|
3423 |
+
$APPEND_twice
|
3424 |
+
$REPLACE_rules
|
3425 |
+
$APPEND_rest
|
3426 |
+
$REPLACE_afford
|
3427 |
+
$APPEND_says
|
3428 |
+
$REPLACE_project
|
3429 |
+
$REPLACE_bear
|
3430 |
+
$REPLACE_mainly
|
3431 |
+
$REPLACE_Yet
|
3432 |
+
$REPLACE_diligently
|
3433 |
+
$REPLACE_led
|
3434 |
+
$REPLACE_architecture
|
3435 |
+
$REPLACE_accurate
|
3436 |
+
$REPLACE_mindset
|
3437 |
+
$REPLACE_fought
|
3438 |
+
$REPLACE_mid
|
3439 |
+
$REPLACE_vocalist
|
3440 |
+
$REPLACE_flexible
|
3441 |
+
$APPEND_girl
|
3442 |
+
$REPLACE_tiring
|
3443 |
+
$REPLACE_broadcast
|
3444 |
+
$REPLACE_July
|
3445 |
+
$APPEND_version
|
3446 |
+
$REPLACE_seven
|
3447 |
+
$REPLACE_Nice
|
3448 |
+
$REPLACE_alarm
|
3449 |
+
$APPEND_dish
|
3450 |
+
$REPLACE_jewelry
|
3451 |
+
$REPLACE_studing
|
3452 |
+
$REPLACE_cuisine
|
3453 |
+
$APPEND_According
|
3454 |
+
$APPEND_delicious
|
3455 |
+
$REPLACE_ladies
|
3456 |
+
$REPLACE_hospital
|
3457 |
+
$REPLACE_sweating
|
3458 |
+
$REPLACE_obviously
|
3459 |
+
$APPEND_interested
|
3460 |
+
$REPLACE_College
|
3461 |
+
$REPLACE_Autumn
|
3462 |
+
$REPLACE_Hawaii
|
3463 |
+
$REPLACE_scheduled
|
3464 |
+
$REPLACE_crying
|
3465 |
+
$REPLACE_climbing
|
3466 |
+
$APPEND_giving
|
3467 |
+
$REPLACE_smoke
|
3468 |
+
$APPEND_9
|
3469 |
+
$REPLACE_limit
|
3470 |
+
$REPLACE_flying
|
3471 |
+
$APPEND_knowledge
|
3472 |
+
$REPLACE_4th
|
3473 |
+
$REPLACE_Francisco
|
3474 |
+
$REPLACE_tournament
|
3475 |
+
$APPEND_sleep
|
3476 |
+
$REPLACE_participants
|
3477 |
+
$REPLACE_snacks
|
3478 |
+
$REPLACE_energetic
|
3479 |
+
$REPLACE_allergic
|
3480 |
+
$APPEND_fast
|
3481 |
+
$APPEND_score
|
3482 |
+
$REPLACE_clearer
|
3483 |
+
$APPEND_source
|
3484 |
+
$REPLACE_lottery
|
3485 |
+
$APPEND_service
|
3486 |
+
$REPLACE_acquire
|
3487 |
+
$REPLACE_arrival
|
3488 |
+
$APPEND_situation
|
3489 |
+
$REPLACE_polite
|
3490 |
+
$REPLACE_laughter
|
3491 |
+
$REPLACE_Thirdly
|
3492 |
+
$APPEND_particular
|
3493 |
+
$REPLACE_standard
|
3494 |
+
$REPLACE_suppose
|
3495 |
+
$REPLACE_emails
|
3496 |
+
$REPLACE_Disneyland
|
3497 |
+
$REPLACE_nine
|
3498 |
+
$REPLACE_rising
|
3499 |
+
$REPLACE_cartoon
|
3500 |
+
$REPLACE_refreshing
|
3501 |
+
$REPLACE_factories
|
3502 |
+
$REPLACE_20th
|
3503 |
+
$APPEND_single
|
3504 |
+
$APPEND_sometime
|
3505 |
+
$REPLACE_cleaner
|
3506 |
+
$APPEND_Such
|
3507 |
+
$APPEND_particularly
|
3508 |
+
$REPLACE_fruit
|
3509 |
+
$REPLACE_beforehand
|
3510 |
+
$REPLACE_11th
|
3511 |
+
$REPLACE_Halloween
|
3512 |
+
$REPLACE_attract
|
3513 |
+
$APPEND_forms
|
3514 |
+
$APPEND_under
|
3515 |
+
$REPLACE_guests
|
3516 |
+
$REPLACE_classmate
|
3517 |
+
$APPEND_Yours
|
3518 |
+
$REPLACE_learners
|
3519 |
+
$REPLACE_red
|
3520 |
+
$REPLACE_critical
|
3521 |
+
$REPLACE_pitiful
|
3522 |
+
$REPLACE_groups
|
3523 |
+
$REPLACE_grandparents
|
3524 |
+
$REPLACE_primary
|
3525 |
+
$REPLACE_Both
|
3526 |
+
$REPLACE_aside
|
3527 |
+
$REPLACE_youngest
|
3528 |
+
$REPLACE_practising
|
3529 |
+
$APPEND_Am
|
3530 |
+
$REPLACE_summary
|
3531 |
+
$REPLACE_telephone
|
3532 |
+
$APPEND_nowadays
|
3533 |
+
$REPLACE_20
|
3534 |
+
$REPLACE_tons
|
3535 |
+
$REPLACE_Listening
|
3536 |
+
$REPLACE_guilt
|
3537 |
+
$REPLACE_occurs
|
3538 |
+
$REPLACE_Anyways
|
3539 |
+
$REPLACE_rush
|
3540 |
+
$REPLACE_intermediate
|
3541 |
+
$REPLACE_theirs
|
3542 |
+
$APPEND_business
|
3543 |
+
$REPLACE_neighboring
|
3544 |
+
$REPLACE_independence
|
3545 |
+
$APPEND_cost
|
3546 |
+
$APPEND_country
|
3547 |
+
$REPLACE_beef
|
3548 |
+
$REPLACE_formal
|
3549 |
+
$APPEND_worked
|
3550 |
+
$REPLACE_Hence
|
3551 |
+
$REPLACE_Mother
|
3552 |
+
$REPLACE_picked
|
3553 |
+
$REPLACE_star
|
3554 |
+
$REPLACE_fishing
|
3555 |
+
$REPLACE_planted
|
3556 |
+
$REPLACE_fear
|
3557 |
+
$APPEND_100
|
3558 |
+
$APPEND_onto
|
3559 |
+
$REPLACE_choir
|
3560 |
+
$REPLACE_spot
|
3561 |
+
$REPLACE_correction
|
3562 |
+
$REPLACE_suits
|
3563 |
+
$REPLACE_Day
|
3564 |
+
$REPLACE_supported
|
3565 |
+
$REPLACE_comfort
|
3566 |
+
$REPLACE_newspapers
|
3567 |
+
$REPLACE_friendship
|
3568 |
+
$REPLACE_May
|
3569 |
+
$REPLACE_freezing
|
3570 |
+
$REPLACE_discussed
|
3571 |
+
$APPEND_{
|
3572 |
+
$APPEND_whom
|
3573 |
+
$REPLACE_trust
|
3574 |
+
$REPLACE_industries
|
3575 |
+
$REPLACE_decisions
|
3576 |
+
$APPEND_poor
|
3577 |
+
$APPEND_correctly
|
3578 |
+
$REPLACE_hundred
|
3579 |
+
$REPLACE_recipe
|
3580 |
+
$REPLACE_competitive
|
3581 |
+
$REPLACE_burden
|
3582 |
+
$REPLACE_abandoned
|
3583 |
+
$APPEND_walking
|
3584 |
+
$REPLACE_individuals
|
3585 |
+
$APPEND_travelling
|
3586 |
+
$REPLACE_theme
|
3587 |
+
$REPLACE_runs
|
3588 |
+
$REPLACE_threw
|
3589 |
+
$REPLACE_rock
|
3590 |
+
$APPEND_thinking
|
3591 |
+
$REPLACE_Taking
|
3592 |
+
$REPLACE_ideal
|
3593 |
+
$REPLACE_practical
|
3594 |
+
$APPEND_re
|
3595 |
+
$APPEND_station
|
3596 |
+
$REPLACE_collect
|
3597 |
+
$REPLACE_perhaps
|
3598 |
+
$REPLACE_advanced
|
3599 |
+
$REPLACE_humans
|
3600 |
+
$APPEND_realized
|
3601 |
+
$REPLACE_remove
|
3602 |
+
$REPLACE_notebook
|
3603 |
+
$REPLACE_continuously
|
3604 |
+
$REPLACE_beach
|
3605 |
+
$REPLACE_ends
|
3606 |
+
$REPLACE_secret
|
3607 |
+
$REPLACE_skilled
|
3608 |
+
$REPLACE_jump
|
3609 |
+
$REPLACE_episodes
|
3610 |
+
$REPLACE_cup
|
3611 |
+
$REPLACE_consists
|
3612 |
+
$REPLACE_release
|
3613 |
+
$REPLACE_notes
|
3614 |
+
$REPLACE_22nd
|
3615 |
+
$REPLACE_fallen
|
3616 |
+
$APPEND_Which
|
3617 |
+
$APPEND_saw
|
3618 |
+
$REPLACE_libraries
|
3619 |
+
$REPLACE_consecutive
|
3620 |
+
$REPLACE_March
|
3621 |
+
$REPLACE_closely
|
3622 |
+
$REPLACE_century
|
3623 |
+
$APPEND_per
|
3624 |
+
$REPLACE_circumstances
|
3625 |
+
$REPLACE_whoever
|
3626 |
+
$REPLACE_rented
|
3627 |
+
$REPLACE_aging
|
3628 |
+
$APPEND_regularly
|
3629 |
+
$REPLACE_cycling
|
3630 |
+
$REPLACE_depression
|
3631 |
+
$REPLACE_row
|
3632 |
+
$APPEND_constantly
|
3633 |
+
$APPEND_feelings
|
3634 |
+
$REPLACE_Angeles
|
3635 |
+
$REPLACE_talented
|
3636 |
+
$REPLACE_00am
|
3637 |
+
$REPLACE_shower
|
3638 |
+
$REPLACE_functions
|
3639 |
+
$APPEND_love
|
3640 |
+
$APPEND_believe
|
3641 |
+
$REPLACE_basis
|
3642 |
+
$REPLACE_follows
|
3643 |
+
$APPEND_hardly
|
3644 |
+
$REPLACE_teenager
|
3645 |
+
$REPLACE_diverse
|
3646 |
+
$REPLACE_Sir
|
3647 |
+
$REPLACE_decrease
|
3648 |
+
$REPLACE_goodbye
|
3649 |
+
$REPLACE_behave
|
3650 |
+
$APPEND_everywhere
|
3651 |
+
$REPLACE_users
|
3652 |
+
$REPLACE_analysis
|
3653 |
+
$REPLACE_translating
|
3654 |
+
$REPLACE_relaxation
|
3655 |
+
$REPLACE_unexpectedly
|
3656 |
+
$REPLACE_Russia
|
3657 |
+
$REPLACE_championship
|
3658 |
+
$APPEND_lives
|
3659 |
+
$REPLACE_hate
|
3660 |
+
$APPEND_somehow
|
3661 |
+
$REPLACE_joining
|
3662 |
+
$APPEND_stop
|
3663 |
+
$APPEND_enjoyed
|
3664 |
+
$APPEND_cup
|
3665 |
+
$REPLACE_flies
|
3666 |
+
$REPLACE_Talking
|
3667 |
+
$REPLACE_painting
|
3668 |
+
$REPLACE_letters
|
3669 |
+
$REPLACE_master
|
3670 |
+
$REPLACE_stated
|
3671 |
+
$REPLACE_aggressive
|
3672 |
+
$REPLACE_shy
|
3673 |
+
$APPEND_care
|
3674 |
+
$APPEND_wear
|
3675 |
+
$REPLACE_served
|
3676 |
+
$REPLACE_stops
|
3677 |
+
$APPEND_house
|
3678 |
+
$REPLACE_diligent
|
3679 |
+
$REPLACE_IN
|
3680 |
+
$REPLACE_deciding
|
3681 |
+
$REPLACE_sweets
|
3682 |
+
$REPLACE_argued
|
3683 |
+
$REPLACE_bookstore
|
3684 |
+
$APPEND_pretty
|
3685 |
+
$REPLACE_range
|
3686 |
+
$REPLACE_vegetable
|
3687 |
+
$REPLACE_appreciation
|
3688 |
+
$REPLACE_pity
|
3689 |
+
$REPLACE_update
|
3690 |
+
$REPLACE_More
|
3691 |
+
$REPLACE_laughing
|
3692 |
+
$REPLACE_economics
|
3693 |
+
$REPLACE_cellphone
|
3694 |
+
$REPLACE_OK
|
3695 |
+
$REPLACE_pregnant
|
3696 |
+
$REPLACE_spite
|
3697 |
+
$REPLACE_karaoke
|
3698 |
+
$REPLACE_tutor
|
3699 |
+
$REPLACE_cockroaches
|
3700 |
+
$APPEND_Most
|
3701 |
+
$REPLACE_additional
|
3702 |
+
$APPEND_energy
|
3703 |
+
$REPLACE_contain
|
3704 |
+
$REPLACE_actual
|
3705 |
+
$REPLACE_shining
|
3706 |
+
$APPEND_feels
|
3707 |
+
$REPLACE_lesser
|
3708 |
+
$REPLACE_pages
|
3709 |
+
$REPLACE_cartoons
|
3710 |
+
$REPLACE_arise
|
3711 |
+
$REPLACE_f
|
3712 |
+
$REPLACE_luckily
|
3713 |
+
$REPLACE_airport
|
3714 |
+
$REPLACE_windy
|
3715 |
+
$REPLACE_instructor
|
3716 |
+
$APPEND_Why
|
3717 |
+
$REPLACE_weighed
|
3718 |
+
$REPLACE_river
|
3719 |
+
$APPEND_frequently
|
3720 |
+
$APPEND_method
|
3721 |
+
$REPLACE_shrine
|
3722 |
+
$APPEND_short
|
3723 |
+
$REPLACE_suffer
|
3724 |
+
$REPLACE_6th
|
3725 |
+
$REPLACE_fight
|
3726 |
+
$APPEND_worth
|
3727 |
+
$REPLACE_absent
|
3728 |
+
$REPLACE_United
|
3729 |
+
$REPLACE_chef
|
3730 |
+
$REPLACE_anytime
|
3731 |
+
$REPLACE_Three
|
3732 |
+
$REPLACE_noisy
|
3733 |
+
$APPEND_therefore
|
3734 |
+
$REPLACE_iPod
|
3735 |
+
$APPEND_French
|
3736 |
+
$REPLACE_wishes
|
3737 |
+
$REPLACE_Yours
|
3738 |
+
$APPEND_Being
|
3739 |
+
$APPEND_Its
|
3740 |
+
$APPEND_field
|
3741 |
+
$APPEND_photos
|
3742 |
+
$REPLACE_definition
|
3743 |
+
$APPEND_gives
|
3744 |
+
$REPLACE_scores
|
3745 |
+
$APPEND_Having
|
3746 |
+
$REPLACE_statement
|
3747 |
+
$APPEND_spoken
|
3748 |
+
$APPEND_price
|
3749 |
+
$REPLACE_cleaned
|
3750 |
+
$REPLACE_varied
|
3751 |
+
$APPEND_Oh
|
3752 |
+
$REPLACE_wash
|
3753 |
+
$REPLACE_satisfactory
|
3754 |
+
$REPLACE_ceiling
|
3755 |
+
$APPEND_including
|
3756 |
+
$APPEND_special
|
3757 |
+
$APPEND_popular
|
3758 |
+
$REPLACE_invention
|
3759 |
+
$REPLACE_materials
|
3760 |
+
$REPLACE_media
|
3761 |
+
$REPLACE_=
|
3762 |
+
$REPLACE_dialogue
|
3763 |
+
$REPLACE_designed
|
3764 |
+
$REPLACE_popularity
|
3765 |
+
$REPLACE_York
|
3766 |
+
$REPLACE_Getting
|
3767 |
+
$APPEND_shown
|
3768 |
+
$REPLACE_carrying
|
3769 |
+
$REPLACE_00pm
|
3770 |
+
$REPLACE_stations
|
3771 |
+
$REPLACE_puts
|
3772 |
+
$REPLACE_screen
|
3773 |
+
$REPLACE_appreciative
|
3774 |
+
$REPLACE_cruel
|
3775 |
+
$APPEND_main
|
3776 |
+
$REPLACE_action
|
3777 |
+
$REPLACE_unlucky
|
3778 |
+
$REPLACE_God
|
3779 |
+
$APPEND_basically
|
3780 |
+
$REPLACE_d
|
3781 |
+
$REPLACE_climbed
|
3782 |
+
$REPLACE_thoroughly
|
3783 |
+
$REPLACE_Canada
|
3784 |
+
$REPLACE_hesitate
|
3785 |
+
$APPEND_developed
|
3786 |
+
$APPEND_post
|
3787 |
+
$REPLACE_represent
|
3788 |
+
$REPLACE_comment
|
3789 |
+
$REPLACE_controlled
|
3790 |
+
$REPLACE_source
|
3791 |
+
$REPLACE_customs
|
3792 |
+
$REPLACE_drawn
|
3793 |
+
$REPLACE_mature
|
3794 |
+
$REPLACE_commute
|
3795 |
+
$APPEND_Once
|
3796 |
+
$APPEND_letter
|
3797 |
+
$REPLACE_attached
|
3798 |
+
$REPLACE_gift
|
3799 |
+
$REPLACE_nap
|
3800 |
+
$APPEND_asked
|
3801 |
+
$REPLACE_inspired
|
3802 |
+
$APPEND_event
|
3803 |
+
$REPLACE_seafood
|
3804 |
+
$APPEND_watched
|
3805 |
+
$REPLACE_errors
|
3806 |
+
$APPEND_passed
|
3807 |
+
$APPEND_english
|
3808 |
+
$REPLACE_complaining
|
3809 |
+
$REPLACE_roommate
|
3810 |
+
$REPLACE_Life
|
3811 |
+
$REPLACE_mental
|
3812 |
+
$REPLACE_grades
|
3813 |
+
$APPEND_parts
|
3814 |
+
$REPLACE_pronounciation
|
3815 |
+
$REPLACE_strengthen
|
3816 |
+
$REPLACE_priority
|
3817 |
+
$APPEND_abroad
|
3818 |
+
$APPEND_ticket
|
3819 |
+
$REPLACE_insurance
|
3820 |
+
$REPLACE_hesitation
|
3821 |
+
$REPLACE_researched
|
3822 |
+
$REPLACE_unlike
|
3823 |
+
$REPLACE_exercising
|
3824 |
+
$REPLACE_exchanged
|
3825 |
+
$REPLACE_knows
|
3826 |
+
$REPLACE_founded
|
3827 |
+
$REPLACE_messy
|
3828 |
+
$REPLACE_dying
|
3829 |
+
$APPEND_plans
|
3830 |
+
$APPEND_match
|
3831 |
+
$REPLACE_Fourth
|
3832 |
+
$REPLACE_answers
|
3833 |
+
$REPLACE_assignments
|
3834 |
+
$REPLACE_Whether
|
3835 |
+
$REPLACE_elder
|
3836 |
+
$REPLACE_gas
|
3837 |
+
$REPLACE_heading
|
3838 |
+
$REPLACE_laws
|
3839 |
+
$REPLACE_kindly
|
3840 |
+
$REPLACE_wine
|
3841 |
+
$REPLACE_household
|
3842 |
+
$REPLACE_dining
|
3843 |
+
$REPLACE_sensitive
|
3844 |
+
$REPLACE_wet
|
3845 |
+
$REPLACE_Personally
|
3846 |
+
$APPEND_middle
|
3847 |
+
$REPLACE_busier
|
3848 |
+
$REPLACE_dirty
|
3849 |
+
$REPLACE_religion
|
3850 |
+
$REPLACE_facing
|
3851 |
+
$APPEND_totally
|
3852 |
+
$REPLACE_repeatedly
|
3853 |
+
$REPLACE_tries
|
3854 |
+
$REPLACE_organising
|
3855 |
+
$REPLACE_operating
|
3856 |
+
$REPLACE_ex
|
3857 |
+
$APPEND_languages
|
3858 |
+
$REPLACE_services
|
3859 |
+
$REPLACE_remaining
|
3860 |
+
$REPLACE_killed
|
3861 |
+
$REPLACE_fair
|
3862 |
+
$REPLACE_bike
|
3863 |
+
$REPLACE_'t
|
3864 |
+
$APPEND_titled
|
3865 |
+
$REPLACE_exception
|
3866 |
+
$APPEND_carefully
|
3867 |
+
$REPLACE_salon
|
3868 |
+
$REPLACE_translated
|
3869 |
+
$REPLACE_welcome
|
3870 |
+
$REPLACE_gratitude
|
3871 |
+
$REPLACE_Watching
|
3872 |
+
$REPLACE_adults
|
3873 |
+
$APPEND_large
|
3874 |
+
$REPLACE_untill
|
3875 |
+
$REPLACE_coach
|
3876 |
+
$REPLACE_mountains
|
3877 |
+
$REPLACE_sandwich
|
3878 |
+
$REPLACE_examples
|
3879 |
+
$APPEND_gone
|
3880 |
+
$REPLACE_multiple
|
3881 |
+
$APPEND_meant
|
3882 |
+
$REPLACE_delivered
|
3883 |
+
$REPLACE_entering
|
3884 |
+
$APPEND_Hello
|
3885 |
+
$REPLACE_option
|
3886 |
+
$REPLACE_cigarette
|
3887 |
+
$REPLACE_acted
|
3888 |
+
$REPLACE_bathroom
|
3889 |
+
$REPLACE_accustomed
|
3890 |
+
$REPLACE_literature
|
3891 |
+
$REPLACE_bottom
|
3892 |
+
$APPEND_course
|
3893 |
+
$APPEND_choose
|
3894 |
+
$REPLACE_resume
|
3895 |
+
$APPEND_web
|
3896 |
+
$REPLACE_aloud
|
3897 |
+
$REPLACE_material
|
3898 |
+
$REPLACE_struggle
|
3899 |
+
$REPLACE_trains
|
3900 |
+
$REPLACE_dog
|
3901 |
+
$APPEND_Both
|
3902 |
+
$REPLACE_leisure
|
3903 |
+
$REPLACE_climate
|
3904 |
+
$REPLACE_japanese
|
3905 |
+
$REPLACE_reduced
|
3906 |
+
$APPEND_break
|
3907 |
+
$APPEND_grow
|
3908 |
+
$REPLACE_Thinking
|
3909 |
+
$REPLACE_dessert
|
3910 |
+
$REPLACE_Yeah
|
3911 |
+
$REPLACE_salt
|
3912 |
+
$REPLACE_rare
|
3913 |
+
$REPLACE_fairly
|
3914 |
+
$REPLACE_knowing
|
3915 |
+
$REPLACE_varieties
|
3916 |
+
$APPEND_festival
|
3917 |
+
$REPLACE_kitten
|
3918 |
+
$APPEND_changes
|
3919 |
+
$REPLACE_Introduction
|
3920 |
+
$REPLACE_viruses
|
3921 |
+
$APPEND_gotten
|
3922 |
+
$REPLACE_h
|
3923 |
+
$REPLACE_experiencing
|
3924 |
+
$APPEND_rain
|
3925 |
+
$APPEND_weight
|
3926 |
+
$REPLACE_brown
|
3927 |
+
$REPLACE_Everyday
|
3928 |
+
$APPEND_Tokyo
|
3929 |
+
$REPLACE_split
|
3930 |
+
$REPLACE_section
|
3931 |
+
$APPEND_dinner
|
3932 |
+
$REPLACE_Making
|
3933 |
+
$REPLACE_courses
|
3934 |
+
$REPLACE_remains
|
3935 |
+
$REPLACE_Dragon
|
3936 |
+
$REPLACE_soft
|
3937 |
+
$REPLACE_independent
|
3938 |
+
$REPLACE_conducted
|
3939 |
+
$APPEND_mode
|
3940 |
+
$APPEND_tickets
|
3941 |
+
$APPEND_leave
|
3942 |
+
$APPEND_culture
|
3943 |
+
$REPLACE_Iam
|
3944 |
+
$REPLACE_joy
|
3945 |
+
$REPLACE_violent
|
3946 |
+
$REPLACE_leaf
|
3947 |
+
$REPLACE_fortune
|
3948 |
+
$APPEND_reasons
|
3949 |
+
$REPLACE_Fukushima
|
3950 |
+
$APPEND_thus
|
3951 |
+
$REPLACE_boss
|
3952 |
+
$REPLACE_player
|
3953 |
+
$REPLACE_closest
|
3954 |
+
$REPLACE_lies
|
3955 |
+
$APPEND_consists
|
3956 |
+
$REPLACE_impolite
|
3957 |
+
$REPLACE_unpredictable
|
3958 |
+
$REPLACE_shared
|
3959 |
+
$REPLACE_7th
|
3960 |
+
$APPEND_Up
|
3961 |
+
$REPLACE_step
|
3962 |
+
$APPEND_football
|
3963 |
+
$REPLACE_central
|
3964 |
+
$REPLACE_symptoms
|
3965 |
+
$REPLACE_funds
|
3966 |
+
$REPLACE_resolve
|
3967 |
+
$REPLACE_Technology
|
3968 |
+
$REPLACE_solutions
|
3969 |
+
$REPLACE_adult
|
3970 |
+
$REPLACE_military
|
3971 |
+
$REPLACE_supermarkets
|
3972 |
+
$APPEND_sites
|
3973 |
+
$REPLACE_levels
|
3974 |
+
$REPLACE_broad
|
3975 |
+
$REPLACE_smiling
|
3976 |
+
$REPLACE_expecting
|
3977 |
+
$REPLACE_shorter
|
3978 |
+
$APPEND_Like
|
3979 |
+
$REPLACE_gloomy
|
3980 |
+
$REPLACE_weekdays
|
3981 |
+
$REPLACE_blew
|
3982 |
+
$REPLACE_determine
|
3983 |
+
$REPLACE_discount
|
3984 |
+
$APPEND_attend
|
3985 |
+
$REPLACE_treated
|
3986 |
+
$REPLACE_length
|
3987 |
+
$REPLACE_raw
|
3988 |
+
$REPLACE_promote
|
3989 |
+
$REPLACE_court
|
3990 |
+
$REPLACE_commercial
|
3991 |
+
$REPLACE_expectations
|
3992 |
+
$APPEND_exercise
|
3993 |
+
$REPLACE_tickets
|
3994 |
+
$REPLACE_status
|
3995 |
+
$REPLACE_retirement
|
3996 |
+
$REPLACE_crowd
|
3997 |
+
$REPLACE_requested
|
3998 |
+
$REPLACE_South
|
3999 |
+
$APPEND_corrected
|
4000 |
+
$REPLACE_aunt
|
4001 |
+
$REPLACE_Traveling
|
4002 |
+
$REPLACE_region
|
4003 |
+
$REPLACE_pulled
|
4004 |
+
$APPEND_14
|
4005 |
+
$REPLACE_impatient
|
4006 |
+
$REPLACE_roads
|
4007 |
+
$REPLACE_value
|
4008 |
+
$REPLACE_existence
|
4009 |
+
$REPLACE_applications
|
4010 |
+
$REPLACE_boiled
|
4011 |
+
$REPLACE_warming
|
4012 |
+
$REPLACE_15
|
4013 |
+
$REPLACE_Iwas
|
4014 |
+
$REPLACE_accomplish
|
4015 |
+
$APPEND_Sounds
|
4016 |
+
$APPEND_send
|
4017 |
+
$APPEND_programs
|
4018 |
+
$REPLACE_costume
|
4019 |
+
$APPEND_1st
|
4020 |
+
$REPLACE_ancient
|
4021 |
+
$REPLACE_physics
|
4022 |
+
$REPLACE_record
|
4023 |
+
$REPLACE_published
|
4024 |
+
$REPLACE_cross
|
4025 |
+
$REPLACE_harmful
|
4026 |
+
$REPLACE_description
|
4027 |
+
$APPEND_wrote
|
4028 |
+
$APPEND_pay
|
4029 |
+
$REPLACE_fond
|
4030 |
+
$APPEND_color
|
4031 |
+
$REPLACE_asks
|
4032 |
+
$APPEND_stuff
|
4033 |
+
$REPLACE_specially
|
4034 |
+
$REPLACE_uneasy
|
4035 |
+
$APPEND_riding
|
4036 |
+
$REPLACE_inthe
|
4037 |
+
$REPLACE_nose
|
4038 |
+
$REPLACE_scientific
|
4039 |
+
$REPLACE_Among
|
4040 |
+
$REPLACE_danger
|
4041 |
+
$REPLACE_commit
|
4042 |
+
$REPLACE_Particularly
|
4043 |
+
$REPLACE_troubles
|
4044 |
+
$REPLACE_button
|
4045 |
+
$REPLACE_delayed
|
4046 |
+
$REPLACE_Diego
|
4047 |
+
$REPLACE_daytime
|
4048 |
+
$REPLACE_phenomenon
|
4049 |
+
$APPEND_following
|
4050 |
+
$REPLACE_Consequently
|
4051 |
+
$REPLACE_saving
|
4052 |
+
$REPLACE_souvenir
|
4053 |
+
$REPLACE_missing
|
4054 |
+
$REPLACE_unless
|
4055 |
+
$APPEND_office
|
4056 |
+
$REPLACE_anniversary
|
4057 |
+
$REPLACE_anger
|
4058 |
+
$APPEND_himself
|
4059 |
+
$APPEND_happening
|
4060 |
+
$REPLACE_cheer
|
4061 |
+
$REPLACE_animal
|
4062 |
+
$APPEND_subject
|
4063 |
+
$REPLACE_nicer
|
4064 |
+
$REPLACE_sells
|
4065 |
+
$REPLACE_lenses
|
4066 |
+
$REPLACE_OF
|
4067 |
+
$REPLACE_possibilities
|
4068 |
+
$REPLACE_efforts
|
4069 |
+
$REPLACE_Years
|
4070 |
+
$REPLACE_merchandise
|
4071 |
+
$REPLACE_subsidies
|
4072 |
+
$REPLACE_forms
|
4073 |
+
$REPLACE_hotel
|
4074 |
+
$APPEND_non
|
4075 |
+
$REPLACE_appetite
|
4076 |
+
$REPLACE_sport
|
4077 |
+
$REPLACE_expand
|
4078 |
+
$REPLACE_rhythm
|
4079 |
+
$APPEND_Another
|
4080 |
+
$REPLACE_Language
|
4081 |
+
$APPEND_Each
|
4082 |
+
$REPLACE_window
|
4083 |
+
$REPLACE_increases
|
4084 |
+
$REPLACE_states
|
4085 |
+
$REPLACE_excitement
|
4086 |
+
$REPLACE_promise
|
4087 |
+
$APPEND_seen
|
4088 |
+
$REPLACE_luggage
|
4089 |
+
$APPEND_generally
|
4090 |
+
$REPLACE_frustrating
|
4091 |
+
$REPLACE_colors
|
4092 |
+
$REPLACE_mosquitoes
|
4093 |
+
$REPLACE_seats
|
4094 |
+
$REPLACE_woken
|
4095 |
+
$REPLACE_switched
|
4096 |
+
$REPLACE_grammatically
|
4097 |
+
$REPLACE_ON
|
4098 |
+
$REPLACE_kindness
|
4099 |
+
$REPLACE_thieves
|
4100 |
+
$REPLACE_spoiled
|
4101 |
+
$REPLACE_States
|
4102 |
+
$REPLACE_hamburgers
|
4103 |
+
$APPEND_nearly
|
4104 |
+
$REPLACE_situated
|
4105 |
+
$REPLACE_foods
|
4106 |
+
$REPLACE_collecting
|
4107 |
+
$REPLACE_unfortunate
|
4108 |
+
$REPLACE_camera
|
4109 |
+
$REPLACE_dramatic
|
4110 |
+
$REPLACE_noodle
|
4111 |
+
$APPEND_human
|
4112 |
+
$REPLACE_re
|
4113 |
+
$REPLACE_humidity
|
4114 |
+
$APPEND_strongly
|
4115 |
+
$REPLACE_kimchi
|
4116 |
+
$APPEND_difference
|
4117 |
+
$REPLACE_artists
|
4118 |
+
$REPLACE_medical
|
4119 |
+
$REPLACE_incredible
|
4120 |
+
$APPEND_helping
|
4121 |
+
$APPEND_ahead
|
4122 |
+
$REPLACE_lines
|
4123 |
+
$REPLACE_thinks
|
4124 |
+
$REPLACE_thousand
|
4125 |
+
$REPLACE_sixth
|
4126 |
+
$REPLACE_exposed
|
4127 |
+
$REPLACE_colours
|
4128 |
+
$REPLACE_widely
|
4129 |
+
$APPEND_nuclear
|
4130 |
+
$REPLACE_worldwide
|
4131 |
+
$REPLACE_comprehension
|
4132 |
+
$APPEND_hair
|
4133 |
+
$REPLACE_halfway
|
4134 |
+
$APPEND_cause
|
4135 |
+
$REPLACE_cast
|
4136 |
+
$APPEND_coffee
|
4137 |
+
$REPLACE_attractions
|
4138 |
+
$REPLACE_beautifully
|
4139 |
+
$REPLACE_handwritten
|
4140 |
+
$APPEND_band
|
4141 |
+
$APPEND_improving
|
4142 |
+
$APPEND_40
|
4143 |
+
$REPLACE_shops
|
4144 |
+
$REPLACE_basically
|
4145 |
+
$APPEND_studied
|
4146 |
+
$REPLACE_manufacturer
|
4147 |
+
$REPLACE_Western
|
4148 |
+
$APPEND_throughout
|
4149 |
+
$REPLACE_identify
|
4150 |
+
$APPEND_Would
|
4151 |
+
$REPLACE_Switzerland
|
4152 |
+
$APPEND_everybody
|
4153 |
+
$APPEND_grade
|
4154 |
+
$REPLACE_farewell
|
4155 |
+
$REPLACE_romantic
|
4156 |
+
$REPLACE_Celsius
|
4157 |
+
$REPLACE_bread
|
4158 |
+
$APPEND_favorite
|
4159 |
+
$APPEND_Despite
|
4160 |
+
$REPLACE_downloaded
|
4161 |
+
$REPLACE_balance
|
4162 |
+
$APPEND_carry
|
4163 |
+
$REPLACE_cure
|
4164 |
+
$REPLACE_programmer
|
4165 |
+
$APPEND_considered
|
4166 |
+
$APPEND_slowly
|
4167 |
+
$REPLACE_discovery
|
4168 |
+
$APPEND_stopped
|
4169 |
+
$REPLACE_standing
|
4170 |
+
$REPLACE_earned
|
4171 |
+
$REPLACE_skating
|
4172 |
+
$REPLACE_detail
|
4173 |
+
$REPLACE_apology
|
4174 |
+
$REPLACE_writer
|
4175 |
+
$REPLACE_highway
|
4176 |
+
$REPLACE_Goodbye
|
4177 |
+
$REPLACE_quote
|
4178 |
+
$REPLACE_maintenance
|
4179 |
+
$APPEND_taste
|
4180 |
+
$REPLACE_package
|
4181 |
+
$REPLACE_responded
|
4182 |
+
$REPLACE_criticize
|
4183 |
+
$APPEND_deeply
|
4184 |
+
$REPLACE_jogging
|
4185 |
+
$APPEND_waiting
|
4186 |
+
$REPLACE_fatter
|
4187 |
+
$REPLACE_cycle
|
4188 |
+
$APPEND_Only
|
4189 |
+
$REPLACE_afterward
|
4190 |
+
$REPLACE_specialty
|
4191 |
+
$REPLACE_goodness
|
4192 |
+
$REPLACE_groceries
|
4193 |
+
$APPEND_staff
|
4194 |
+
$REPLACE_somehow
|
4195 |
+
$APPEND_Moreover
|
4196 |
+
$APPEND_training
|
4197 |
+
$REPLACE_clever
|
4198 |
+
$REPLACE_camp
|
4199 |
+
$APPEND_traveling
|
4200 |
+
$APPEND_minutes
|
4201 |
+
$REPLACE_sandwiches
|
4202 |
+
$APPEND_run
|
4203 |
+
$REPLACE_options
|
4204 |
+
$REPLACE_calories
|
4205 |
+
$REPLACE_branch
|
4206 |
+
$REPLACE_barbecue
|
4207 |
+
$APPEND_entrance
|
4208 |
+
$REPLACE_noodles
|
4209 |
+
$APPEND_products
|
4210 |
+
$APPEND_helped
|
4211 |
+
$REPLACE_newly
|
4212 |
+
$APPEND_drank
|
4213 |
+
$REPLACE_precise
|
4214 |
+
$REPLACE_increasingly
|
4215 |
+
$APPEND_Dear
|
4216 |
+
$REPLACE_novels
|
4217 |
+
$REPLACE_mix
|
4218 |
+
$REPLACE_budget
|
4219 |
+
$REPLACE_petrol
|
4220 |
+
$REPLACE_trial
|
4221 |
+
$APPEND_Perhaps
|
4222 |
+
$REPLACE_occasions
|
4223 |
+
$APPEND_Actually
|
4224 |
+
$REPLACE_eastern
|
4225 |
+
$REPLACE_sights
|
4226 |
+
$REPLACE_industrial
|
4227 |
+
$APPEND_result
|
4228 |
+
$REPLACE_generally
|
4229 |
+
$REPLACE_Canadian
|
4230 |
+
$REPLACE_Surprisingly
|
4231 |
+
$APPEND_strong
|
4232 |
+
$REPLACE_memorizing
|
4233 |
+
$REPLACE_irritated
|
4234 |
+
$REPLACE_implemented
|
4235 |
+
$REPLACE_Welcome
|
4236 |
+
$REPLACE_coast
|
4237 |
+
$REPLACE_signs
|
4238 |
+
$REPLACE_leading
|
4239 |
+
$APPEND_PM
|
4240 |
+
$APPEND_access
|
4241 |
+
$REPLACE_fat
|
4242 |
+
$REPLACE_breeze
|
4243 |
+
$REPLACE_India
|
4244 |
+
$APPEND_slept
|
4245 |
+
$REPLACE_weigh
|
4246 |
+
$REPLACE_commonly
|
4247 |
+
$REPLACE_supervisor
|
4248 |
+
$REPLACE_tomato
|
4249 |
+
$REPLACE_agency
|
4250 |
+
$APPEND_till
|
4251 |
+
$REPLACE_couldn
|
4252 |
+
$REPLACE_strangely
|
4253 |
+
$APPEND_stayed
|
4254 |
+
$REPLACE_ni
|
4255 |
+
$APPEND_exams
|
4256 |
+
$REPLACE_School
|
4257 |
+
$REPLACE_blue
|
4258 |
+
$APPEND_allowed
|
4259 |
+
$REPLACE_30th
|
4260 |
+
$REPLACE_kittens
|
4261 |
+
$REPLACE_typing
|
4262 |
+
$REPLACE_headed
|
4263 |
+
$APPEND_present
|
4264 |
+
$REPLACE_Reading
|
4265 |
+
$REPLACE_injury
|
4266 |
+
$REPLACE_Dear
|
4267 |
+
$REPLACE_PM
|
4268 |
+
$REPLACE_minor
|
4269 |
+
$REPLACE_drinks
|
4270 |
+
$REPLACE_enthusiasm
|
4271 |
+
$REPLACE_dilemma
|
4272 |
+
$REPLACE_income
|
4273 |
+
$REPLACE_sadness
|
4274 |
+
$REPLACE_weaker
|
4275 |
+
$REPLACE_Thanksgiving
|
4276 |
+
$REPLACE_documents
|
4277 |
+
$REPLACE_fake
|
4278 |
+
$REPLACE_boy
|
4279 |
+
$REPLACE_regards
|
4280 |
+
$APPEND_Finally
|
4281 |
+
$REPLACE_obstacle
|
4282 |
+
$REPLACE_batteries
|
4283 |
+
$APPEND_talked
|
4284 |
+
$APPEND_becomes
|
4285 |
+
$REPLACE_numerous
|
4286 |
+
$REPLACE_cheese
|
4287 |
+
$REPLACE_judge
|
4288 |
+
$APPEND_busy
|
4289 |
+
$APPEND_reach
|
4290 |
+
$APPEND_Fuji
|
4291 |
+
$REPLACE_intelligent
|
4292 |
+
$REPLACE_reception
|
4293 |
+
$REPLACE_Chinatown
|
4294 |
+
$REPLACE_repeat
|
4295 |
+
$APPEND_June
|
4296 |
+
$REPLACE_reported
|
4297 |
+
$APPEND_required
|
4298 |
+
$REPLACE_cases
|
4299 |
+
$REPLACE_matters
|
4300 |
+
$REPLACE_prepositions
|
4301 |
+
$REPLACE_accidents
|
4302 |
+
$REPLACE_fields
|
4303 |
+
$APPEND_ask
|
4304 |
+
$APPEND_sad
|
4305 |
+
$REPLACE_selected
|
4306 |
+
$REPLACE_skipped
|
4307 |
+
$REPLACE_freshmen
|
4308 |
+
$REPLACE_mode
|
4309 |
+
$REPLACE_calendar
|
4310 |
+
$REPLACE_luxury
|
4311 |
+
$REPLACE_summertime
|
4312 |
+
$REPLACE_device
|
4313 |
+
$APPEND_lesson
|
4314 |
+
$APPEND_surely
|
4315 |
+
$REPLACE_loved
|
4316 |
+
$REPLACE_reflect
|
4317 |
+
$REPLACE_shoulder
|
4318 |
+
$REPLACE_muscular
|
4319 |
+
$APPEND_plenty
|
4320 |
+
$REPLACE_Indian
|
4321 |
+
$REPLACE_pork
|
4322 |
+
$REPLACE_double
|
4323 |
+
$REPLACE_loneliness
|
4324 |
+
$REPLACE_economies
|
4325 |
+
$REPLACE_meaningful
|
4326 |
+
$REPLACE_cooperate
|
4327 |
+
$REPLACE_land
|
4328 |
+
$APPEND_report
|
4329 |
+
$REPLACE_block
|
4330 |
+
$REPLACE_cheapest
|
4331 |
+
$REPLACE_mirror
|
4332 |
+
$REPLACE_wealthy
|
4333 |
+
$APPEND_application
|
4334 |
+
$REPLACE_quarter
|
4335 |
+
$REPLACE_babies
|
4336 |
+
$REPLACE_risk
|
4337 |
+
$REPLACE_discussions
|
4338 |
+
$REPLACE_lightning
|
4339 |
+
$REPLACE_briefly
|
4340 |
+
$REPLACE_congratulated
|
4341 |
+
$REPLACE_breathing
|
4342 |
+
$REPLACE_eagerly
|
4343 |
+
$REPLACE_resolved
|
4344 |
+
$APPEND_staying
|
4345 |
+
$APPEND_history
|
4346 |
+
$APPEND_phones
|
4347 |
+
$REPLACE_involving
|
4348 |
+
$REPLACE_enthusiastic
|
4349 |
+
$REPLACE_cookies
|
4350 |
+
$REPLACE_frightened
|
4351 |
+
$REPLACE_entirely
|
4352 |
+
$REPLACE_enormous
|
4353 |
+
$APPEND_aspects
|
4354 |
+
$REPLACE_stable
|
4355 |
+
$APPEND_section
|
4356 |
+
$APPEND_Thanks
|
4357 |
+
$APPEND_women
|
4358 |
+
$REPLACE_phase
|
4359 |
+
$REPLACE_16th
|
4360 |
+
$REPLACE_spicy
|
4361 |
+
$APPEND_produced
|
4362 |
+
$REPLACE_street
|
4363 |
+
$REPLACE_ignore
|
4364 |
+
$REPLACE_designer
|
4365 |
+
$APPEND_club
|
4366 |
+
$REPLACE_mum
|
4367 |
+
$REPLACE_sincere
|
4368 |
+
$REPLACE_offensive
|
4369 |
+
$REPLACE_memorized
|
4370 |
+
$APPEND_question
|
4371 |
+
$REPLACE_wa
|
4372 |
+
$REPLACE_garbage
|
4373 |
+
$REPLACE_Playing
|
4374 |
+
$REPLACE_castle
|
4375 |
+
$REPLACE_swam
|
4376 |
+
$REPLACE_leader
|
4377 |
+
$REPLACE_earthquakes
|
4378 |
+
$REPLACE_displayed
|
4379 |
+
$REPLACE_marathon
|
4380 |
+
$APPEND_songs
|
4381 |
+
$REPLACE_See
|
4382 |
+
$REPLACE_burn
|
4383 |
+
$APPEND_happily
|
4384 |
+
$REPLACE_salesman
|
4385 |
+
$REPLACE_unhealthy
|
4386 |
+
$REPLACE_base
|
4387 |
+
$REPLACE_crossing
|
4388 |
+
$REPLACE_Honestly
|
4389 |
+
$REPLACE_machines
|
4390 |
+
$REPLACE_freshman
|
4391 |
+
$REPLACE_dry
|
4392 |
+
$APPEND_exact
|
4393 |
+
$APPEND_January
|
4394 |
+
$APPEND_terms
|
4395 |
+
$REPLACE_happiest
|
4396 |
+
$APPEND_tastes
|
4397 |
+
$APPEND_design
|
4398 |
+
$REPLACE_champion
|
4399 |
+
$REPLACE_Diary
|
4400 |
+
$REPLACE_expressing
|
4401 |
+
$REPLACE_hardest
|
4402 |
+
$REPLACE_installed
|
4403 |
+
$REPLACE_Go
|
4404 |
+
$REPLACE_dollar
|
4405 |
+
$REPLACE_wooden
|
4406 |
+
$REPLACE_contrary
|
4407 |
+
$REPLACE_refers
|
4408 |
+
$REPLACE_employment
|
4409 |
+
$REPLACE_removed
|
4410 |
+
$REPLACE_opposing
|
4411 |
+
$REPLACE_actress
|
4412 |
+
$REPLACE_Ever
|
4413 |
+
$APPEND_beginning
|
4414 |
+
$REPLACE_approach
|
4415 |
+
$REPLACE_guide
|
4416 |
+
$REPLACE_blooming
|
4417 |
+
$REPLACE_necessarily
|
4418 |
+
$REPLACE_fed
|
4419 |
+
$REPLACE_stands
|
4420 |
+
$REPLACE_principal
|
4421 |
+
$REPLACE_faced
|
4422 |
+
$APPEND_local
|
4423 |
+
$APPEND_highly
|
4424 |
+
$REPLACE_fiction
|
4425 |
+
$APPEND_finding
|
4426 |
+
$REPLACE_attracts
|
4427 |
+
$REPLACE_2011
|
4428 |
+
$REPLACE_businessmen
|
4429 |
+
$REPLACE_Friends
|
4430 |
+
$REPLACE_repaired
|
4431 |
+
$REPLACE_bet
|
4432 |
+
$REPLACE_hunger
|
4433 |
+
$REPLACE_dealing
|
4434 |
+
$REPLACE_Except
|
4435 |
+
$APPEND_role
|
4436 |
+
$REPLACE_admitted
|
4437 |
+
$REPLACE_island
|
4438 |
+
$REPLACE_quietly
|
4439 |
+
$REPLACE_lets
|
4440 |
+
$REPLACE_fee
|
4441 |
+
$REPLACE_performances
|
4442 |
+
$REPLACE_bar
|
4443 |
+
$REPLACE_maximum
|
4444 |
+
$REPLACE_escaped
|
4445 |
+
$REPLACE_ours
|
4446 |
+
$APPEND_originally
|
4447 |
+
$REPLACE_surroundings
|
4448 |
+
$REPLACE_golden
|
4449 |
+
$APPEND_technology
|
4450 |
+
$APPEND_research
|
4451 |
+
$REPLACE_borrow
|
4452 |
+
$REPLACE_remind
|
4453 |
+
$REPLACE_Beginning
|
4454 |
+
$REPLACE_passage
|
4455 |
+
$APPEND_drive
|
4456 |
+
$APPEND_teaching
|
4457 |
+
$REPLACE_typhoons
|
4458 |
+
$REPLACE_grabbed
|
4459 |
+
$REPLACE_incidents
|
4460 |
+
$REPLACE_hid
|
4461 |
+
$REPLACE_operate
|
4462 |
+
$REPLACE_19th
|
4463 |
+
$APPEND_sure
|
4464 |
+
$REPLACE_permission
|
4465 |
+
$APPEND_previous
|
4466 |
+
$REPLACE_rental
|
4467 |
+
$REPLACE_tothe
|
4468 |
+
$APPEND_round
|
4469 |
+
$REPLACE_Oops
|
4470 |
+
$REPLACE_survival
|
4471 |
+
$REPLACE_shaped
|
4472 |
+
$APPEND_costs
|
4473 |
+
$REPLACE_conference
|
4474 |
+
$APPEND_move
|
4475 |
+
$REPLACE_dressed
|
4476 |
+
$REPLACE_smells
|
4477 |
+
$REPLACE_artistic
|
4478 |
+
$REPLACE_holds
|
4479 |
+
$REPLACE_introducing
|
4480 |
+
$REPLACE_nursery
|
4481 |
+
$APPEND_May
|
4482 |
+
$REPLACE_troubled
|
4483 |
+
$REPLACE_optimistic
|
4484 |
+
$REPLACE_guarantee
|
4485 |
+
$REPLACE_toothache
|
4486 |
+
$REPLACE_bother
|
4487 |
+
$REPLACE_Congratulations
|
4488 |
+
$REPLACE_purchased
|
4489 |
+
$APPEND_21
|
4490 |
+
$REPLACE_accurately
|
4491 |
+
$REPLACE_belief
|
4492 |
+
$REPLACE_numbers
|
4493 |
+
$REPLACE_switch
|
4494 |
+
$REPLACE_personally
|
4495 |
+
$REPLACE_negatively
|
4496 |
+
$REPLACE_fireflies
|
4497 |
+
$APPEND_receive
|
4498 |
+
$APPEND_shop
|
4499 |
+
$REPLACE_haircut
|
4500 |
+
$REPLACE_productive
|
4501 |
+
$REPLACE_crisis
|
4502 |
+
$REPLACE_relatively
|
4503 |
+
$REPLACE_celebration
|
4504 |
+
$REPLACE_controversial
|
4505 |
+
$REPLACE_AM
|
4506 |
+
$REPLACE_factors
|
4507 |
+
$REPLACE_snowing
|
4508 |
+
$REPLACE_amusing
|
4509 |
+
$REPLACE_sharing
|
4510 |
+
$REPLACE_Companies
|
4511 |
+
$REPLACE_NYC
|
4512 |
+
$REPLACE_moves
|
4513 |
+
$REPLACE_hanging
|
4514 |
+
$REPLACE_simpler
|
4515 |
+
$APPEND_apart
|
4516 |
+
$REPLACE_race
|
4517 |
+
$REPLACE_hip
|
4518 |
+
$REPLACE_underwear
|
4519 |
+
$REPLACE_official
|
4520 |
+
$REPLACE_shift
|
4521 |
+
$APPEND_week
|
4522 |
+
$REPLACE_analyse
|
4523 |
+
$REPLACE_25th
|
4524 |
+
$REPLACE_teenage
|
4525 |
+
$APPEND_recent
|
4526 |
+
$REPLACE_skin
|
4527 |
+
$REPLACE_enroll
|
4528 |
+
$REPLACE_nickname
|
4529 |
+
$APPEND_accidentally
|
4530 |
+
$REPLACE_inventions
|
4531 |
+
$REPLACE_boys
|
4532 |
+
$APPEND_Afterwards
|
4533 |
+
$REPLACE_gentle
|
4534 |
+
$REPLACE_overnight
|
4535 |
+
$APPEND_explain
|
4536 |
+
$REPLACE_wanting
|
4537 |
+
$REPLACE_encouraging
|
4538 |
+
$REPLACE_contribute
|
4539 |
+
$REPLACE_necessities
|
4540 |
+
$REPLACE_enrolled
|
4541 |
+
$REPLACE_Normally
|
4542 |
+
$REPLACE_balloon
|
4543 |
+
$REPLACE_applying
|
4544 |
+
$APPEND_uses
|
4545 |
+
$REPLACE_recall
|
4546 |
+
$REPLACE_nearest
|
4547 |
+
$REPLACE_cashier
|
4548 |
+
$REPLACE_corner
|
4549 |
+
$APPEND_space
|
4550 |
+
$REPLACE_thatI
|
4551 |
+
$REPLACE_treasure
|
4552 |
+
$REPLACE_International
|
4553 |
+
$REPLACE_forth
|
4554 |
+
$REPLACE_assigned
|
4555 |
+
$APPEND_education
|
4556 |
+
$APPEND_except
|
4557 |
+
$REPLACE_jewellery
|
4558 |
+
$REPLACE_manga
|
4559 |
+
$APPEND_participate
|
4560 |
+
$APPEND_increase
|
4561 |
+
$REPLACE_slippery
|
4562 |
+
$REPLACE_snowboard
|
4563 |
+
$REPLACE_novel
|
4564 |
+
$REPLACE_predict
|
4565 |
+
$REPLACE_remained
|
4566 |
+
$REPLACE_outcome
|
4567 |
+
$APPEND_whose
|
4568 |
+
$APPEND_slightly
|
4569 |
+
$APPEND_serious
|
4570 |
+
$REPLACE_Research
|
4571 |
+
$REPLACE_marvelous
|
4572 |
+
$APPEND_excited
|
4573 |
+
$REPLACE_organization
|
4574 |
+
$REPLACE_list
|
4575 |
+
$REPLACE_automatically
|
4576 |
+
$REPLACE_differ
|
4577 |
+
$REPLACE_Mount
|
4578 |
+
$REPLACE_arrangement
|
4579 |
+
$APPEND_spending
|
4580 |
+
$REPLACE_adopt
|
4581 |
+
$APPEND_Soon
|
4582 |
+
$APPEND_Mr
|
4583 |
+
$REPLACE_irritable
|
4584 |
+
$REPLACE_Wish
|
4585 |
+
$REPLACE_writting
|
4586 |
+
$REPLACE_Sincerely
|
4587 |
+
$APPEND_winter
|
4588 |
+
$REPLACE_rose
|
4589 |
+
$REPLACE_businessman
|
4590 |
+
$REPLACE_flavors
|
4591 |
+
$REPLACE_smell
|
4592 |
+
$REPLACE_fortunate
|
4593 |
+
$APPEND_TOEIC
|
4594 |
+
$APPEND_mentioned
|
4595 |
+
$APPEND_process
|
4596 |
+
$APPEND_amp
|
4597 |
+
$APPEND_neither
|
4598 |
+
$REPLACE_enemies
|
4599 |
+
$REPLACE_acceptance
|
4600 |
+
$REPLACE_drivers
|
4601 |
+
$REPLACE_murderer
|
4602 |
+
$REPLACE_Melbourne
|
4603 |
+
$REPLACE_Specifically
|
4604 |
+
$APPEND_complete
|
4605 |
+
$APPEND_focus
|
4606 |
+
$REPLACE_illegal
|
4607 |
+
$APPEND_hurts
|
4608 |
+
$REPLACE_groom
|
4609 |
+
$APPEND_preposition
|
4610 |
+
$APPEND_com
|
4611 |
+
$APPEND_beautiful
|
4612 |
+
$REPLACE_sightseeing
|
4613 |
+
$REPLACE_bringing
|
4614 |
+
$REPLACE_sources
|
4615 |
+
$APPEND_videos
|
4616 |
+
$APPEND_lunch
|
4617 |
+
$APPEND_11
|
4618 |
+
$REPLACE_suggestion
|
4619 |
+
$REPLACE_programmes
|
4620 |
+
$APPEND_jobs
|
4621 |
+
$REPLACE_scent
|
4622 |
+
$REPLACE_crime
|
4623 |
+
$REPLACE_desperate
|
4624 |
+
$REPLACE_deliver
|
4625 |
+
$APPEND_performed
|
4626 |
+
$REPLACE_cars
|
4627 |
+
$REPLACE_pet
|
4628 |
+
$REPLACE_dangers
|
4629 |
+
$APPEND_perform
|
4630 |
+
$REPLACE_vehicles
|
4631 |
+
$APPEND_figure
|
4632 |
+
$APPEND_Later
|
4633 |
+
$REPLACE_matches
|
4634 |
+
$REPLACE_spaghetti
|
4635 |
+
$APPEND_light
|
4636 |
+
$REPLACE_corrects
|
4637 |
+
$REPLACE_Unlike
|
4638 |
+
$APPEND_occasionally
|
4639 |
+
$APPEND_truly
|
4640 |
+
$REPLACE_silence
|
4641 |
+
$REPLACE_intense
|
4642 |
+
$REPLACE_substitute
|
4643 |
+
$APPEND_freely
|
4644 |
+
$APPEND_party
|
4645 |
+
$APPEND_His
|
4646 |
+
$REPLACE_bothersome
|
4647 |
+
$REPLACE_pursuing
|
4648 |
+
$REPLACE_Out
|
4649 |
+
$REPLACE_direction
|
4650 |
+
$APPEND_check
|
4651 |
+
$REPLACE_authorities
|
4652 |
+
$APPEND_sort
|
4653 |
+
$REPLACE_challenges
|
4654 |
+
$REPLACE_plural
|
4655 |
+
$REPLACE_refused
|
4656 |
+
$REPLACE_informed
|
4657 |
+
$REPLACE_demand
|
4658 |
+
$REPLACE_mess
|
4659 |
+
$REPLACE_force
|
4660 |
+
$REPLACE_paintings
|
4661 |
+
$APPEND_remember
|
4662 |
+
$REPLACE_sky
|
4663 |
+
$APPEND_practicing
|
4664 |
+
$REPLACE_understandable
|
4665 |
+
$REPLACE_crashed
|
4666 |
+
$APPEND_communicate
|
4667 |
+
$REPLACE_manner
|
4668 |
+
$REPLACE_payment
|
4669 |
+
$REPLACE_artist
|
4670 |
+
$APPEND_tend
|
4671 |
+
$REPLACE_recession
|
4672 |
+
$REPLACE_til
|
4673 |
+
$REPLACE_mixed
|
4674 |
+
$APPEND_bar
|
4675 |
+
$REPLACE_England
|
4676 |
+
$REPLACE_gathered
|
4677 |
+
$REPLACE_combined
|
4678 |
+
$REPLACE_Rome
|
4679 |
+
$APPEND_wet
|
4680 |
+
$REPLACE_network
|
4681 |
+
$REPLACE_steak
|
4682 |
+
$REPLACE_California
|
4683 |
+
$REPLACE_birth
|
4684 |
+
$APPEND_state
|
4685 |
+
$REPLACE_expressed
|
4686 |
+
$REPLACE_haven
|
4687 |
+
$REPLACE_seldom
|
4688 |
+
$APPEND_health
|
4689 |
+
$REPLACE_partners
|
4690 |
+
$REPLACE_finishing
|
4691 |
+
$REPLACE_Monday
|
4692 |
+
$REPLACE_liters
|
4693 |
+
$REPLACE_Hi
|
4694 |
+
$APPEND_August
|
4695 |
+
$REPLACE_gorgeous
|
4696 |
+
$APPEND_seven
|
4697 |
+
$APPEND_remaining
|
4698 |
+
$REPLACE_chances
|
4699 |
+
$APPEND_older
|
4700 |
+
$REPLACE_Eating
|
4701 |
+
$APPEND_Christmas
|
4702 |
+
$REPLACE_dentist
|
4703 |
+
$REPLACE_league
|
4704 |
+
$REPLACE_korean
|
4705 |
+
$APPEND_greatly
|
4706 |
+
$APPEND_return
|
4707 |
+
$REPLACE_genres
|
4708 |
+
$REPLACE_authors
|
4709 |
+
$APPEND_Thank
|
4710 |
+
$REPLACE_diseases
|
4711 |
+
$REPLACE_travels
|
4712 |
+
$REPLACE_sheet
|
4713 |
+
$REPLACE_fastest
|
4714 |
+
$APPEND_surprised
|
4715 |
+
$REPLACE_rushed
|
4716 |
+
$APPEND_attending
|
4717 |
+
$APPEND_Furthermore
|
4718 |
+
$REPLACE_Laden
|
4719 |
+
$REPLACE_creative
|
4720 |
+
$REPLACE_meantime
|
4721 |
+
$REPLACE_Turkey
|
4722 |
+
$REPLACE_presenting
|
4723 |
+
$REPLACE_Christian
|
4724 |
+
$REPLACE_nervousness
|
4725 |
+
$REPLACE_meaningless
|
4726 |
+
$APPEND_player
|
4727 |
+
$REPLACE_motivate
|
4728 |
+
$REPLACE_advertisements
|
4729 |
+
$REPLACE_artwork
|
4730 |
+
$REPLACE_encouragement
|
4731 |
+
$REPLACE_regard
|
4732 |
+
$REPLACE_slower
|
4733 |
+
$REPLACE_dolls
|
4734 |
+
$REPLACE_200
|
4735 |
+
$REPLACE_unconsciously
|
4736 |
+
$APPEND_happens
|
4737 |
+
$REPLACE_facility
|
4738 |
+
$APPEND_advice
|
4739 |
+
$REPLACE_North
|
4740 |
+
$REPLACE_awareness
|
4741 |
+
$APPEND_planned
|
4742 |
+
$REPLACE_genetic
|
4743 |
+
$REPLACE_management
|
4744 |
+
$REPLACE_refund
|
4745 |
+
$REPLACE_brighter
|
4746 |
+
$REPLACE_confirm
|
4747 |
+
$REPLACE_burning
|
4748 |
+
$REPLACE_composition
|
4749 |
+
$APPEND_answer
|
4750 |
+
$REPLACE_conserve
|
4751 |
+
$REPLACE_destruction
|
4752 |
+
$REPLACE_duties
|
4753 |
+
$REPLACE_creativity
|
4754 |
+
$APPEND_expressions
|
4755 |
+
$APPEND_commit
|
4756 |
+
$REPLACE_East
|
4757 |
+
$REPLACE_milk
|
4758 |
+
$REPLACE_30pm
|
4759 |
+
$REPLACE_belong
|
4760 |
+
$REPLACE_autograph
|
4761 |
+
$REPLACE_caring
|
4762 |
+
$REPLACE_download
|
4763 |
+
$APPEND_development
|
4764 |
+
$REPLACE_compete
|
4765 |
+
$REPLACE_qualities
|
4766 |
+
$APPEND_avoid
|
4767 |
+
$REPLACE_recieved
|
4768 |
+
$APPEND_Perfect
|
4769 |
+
$REPLACE_yours
|
4770 |
+
$REPLACE_breaks
|
4771 |
+
$REPLACE_amusement
|
4772 |
+
$REPLACE_models
|
4773 |
+
$REPLACE_persevere
|
4774 |
+
$REPLACE_emergency
|
4775 |
+
$REPLACE_empty
|
4776 |
+
$REPLACE_rescue
|
4777 |
+
$APPEND_term
|
4778 |
+
$REPLACE_requirements
|
4779 |
+
$REPLACE_sufficient
|
4780 |
+
$APPEND_cooking
|
4781 |
+
$REPLACE_fascinated
|
4782 |
+
$REPLACE_14th
|
4783 |
+
$REPLACE_relevant
|
4784 |
+
$REPLACE_listed
|
4785 |
+
$REPLACE_vision
|
4786 |
+
$REPLACE_g
|
4787 |
+
$REPLACE_leadership
|
4788 |
+
$REPLACE_butI
|
4789 |
+
$APPEND_provide
|
4790 |
+
$REPLACE_organize
|
4791 |
+
$APPEND_created
|
4792 |
+
$REPLACE_12th
|
4793 |
+
$REPLACE_collection
|
4794 |
+
$REPLACE_supply
|
4795 |
+
$APPEND_Besides
|
4796 |
+
$REPLACE_stranger
|
4797 |
+
$REPLACE_combination
|
4798 |
+
$REPLACE_farther
|
4799 |
+
$REPLACE_awaiting
|
4800 |
+
$APPEND_hand
|
4801 |
+
$REPLACE_unsure
|
4802 |
+
$REPLACE_profile
|
4803 |
+
$APPEND_moving
|
4804 |
+
$APPEND_street
|
4805 |
+
$REPLACE_delighted
|
4806 |
+
$REPLACE_pretended
|
4807 |
+
$REPLACE_driven
|
4808 |
+
$REPLACE_maintaining
|
4809 |
+
$REPLACE_liar
|
4810 |
+
$TRANSFORM_SPLIT_HYPHEN
|
4811 |
+
$REPLACE_glass
|
4812 |
+
$REPLACE_stick
|
4813 |
+
$REPLACE_itchy
|
4814 |
+
$REPLACE_ought
|
4815 |
+
$REPLACE_consumption
|
4816 |
+
$REPLACE_quicker
|
4817 |
+
$REPLACE_spare
|
4818 |
+
$REPLACE_governments
|
4819 |
+
$APPEND_view
|
4820 |
+
$REPLACE_P
|
4821 |
+
$REPLACE_colorful
|
4822 |
+
$REPLACE_guitarist
|
4823 |
+
$APPEND_wants
|
4824 |
+
$REPLACE_million
|
4825 |
+
$REPLACE_behalf
|
4826 |
+
$REPLACE_kilometres
|
4827 |
+
$REPLACE_bank
|
4828 |
+
$APPEND_morning
|
4829 |
+
$REPLACE_weekends
|
4830 |
+
$REPLACE_occasion
|
4831 |
+
$APPEND_tour
|
4832 |
+
$REPLACE_object
|
4833 |
+
$REPLACE_Others
|
4834 |
+
$REPLACE_Considering
|
4835 |
+
$REPLACE_species
|
4836 |
+
$REPLACE_session
|
4837 |
+
$APPEND_removed
|
4838 |
+
$REPLACE_hiking
|
4839 |
+
$REPLACE_resolutions
|
4840 |
+
$REPLACE_peak
|
4841 |
+
$REPLACE_consequences
|
4842 |
+
$REPLACE_soaked
|
4843 |
+
$REPLACE_presents
|
4844 |
+
$APPEND_25
|
4845 |
+
$REPLACE_salad
|
4846 |
+
$REPLACE_filling
|
4847 |
+
$REPLACE_attack
|
4848 |
+
$APPEND_foods
|
4849 |
+
$REPLACE_tendency
|
4850 |
+
$REPLACE_discoveries
|
4851 |
+
$REPLACE_immediate
|
4852 |
+
$REPLACE_submitted
|
4853 |
+
$REPLACE_THAT
|
4854 |
+
$APPEND_develop
|
4855 |
+
$REPLACE_battery
|
4856 |
+
$REPLACE_dont
|
4857 |
+
$REPLACE_feature
|
4858 |
+
$APPEND_opportunity
|
4859 |
+
$REPLACE_bodies
|
4860 |
+
$REPLACE_goldfish
|
4861 |
+
$REPLACE_adapt
|
4862 |
+
$REPLACE_views
|
4863 |
+
$REPLACE_forgetting
|
4864 |
+
$REPLACE_saved
|
4865 |
+
$REPLACE_doesn
|
4866 |
+
$REPLACE_thirst
|
4867 |
+
$APPEND_Me
|
4868 |
+
$REPLACE_distant
|
4869 |
+
$REPLACE_opposition
|
4870 |
+
$REPLACE_breed
|
4871 |
+
$REPLACE_practised
|
4872 |
+
$REPLACE_miserable
|
4873 |
+
$APPEND_sore
|
4874 |
+
$REPLACE_brain
|
4875 |
+
$REPLACE_sessions
|
4876 |
+
$REPLACE_policeman
|
4877 |
+
$REPLACE_favor
|
4878 |
+
$REPLACE_managing
|
4879 |
+
$REPLACE_rains
|
4880 |
+
$REPLACE_baths
|
4881 |
+
$REPLACE_surrounding
|
4882 |
+
$REPLACE_Seoul
|
4883 |
+
$APPEND_regardless
|
4884 |
+
$APPEND_Something
|
4885 |
+
$REPLACE_architectural
|
4886 |
+
$REPLACE_ok
|
4887 |
+
$REPLACE_welfare
|
4888 |
+
$APPEND_share
|
4889 |
+
$REPLACE_daughters
|
4890 |
+
$REPLACE_phones
|
4891 |
+
$REPLACE_downstairs
|
4892 |
+
$REPLACE_arriving
|
4893 |
+
$REPLACE_stepped
|
4894 |
+
$REPLACE_competing
|
4895 |
+
$REPLACE_catching
|
4896 |
+
$REPLACE_conversing
|
4897 |
+
$REPLACE_encourages
|
4898 |
+
$REPLACE_depressing
|
4899 |
+
$REPLACE_begining
|
4900 |
+
$REPLACE_admission
|
4901 |
+
$APPEND_voice
|
4902 |
+
$REPLACE_boredom
|
4903 |
+
$APPEND_alot
|
4904 |
+
$APPEND_familiar
|
4905 |
+
$REPLACE_breaking
|
4906 |
+
$REPLACE_fortunately
|
4907 |
+
$REPLACE_Over
|
4908 |
+
$APPEND_lost
|
4909 |
+
$REPLACE_intended
|
4910 |
+
$REPLACE_neighbourhood
|
4911 |
+
$REPLACE_mysteries
|
4912 |
+
$REPLACE_certificate
|
4913 |
+
$REPLACE_data
|
4914 |
+
$APPEND_personal
|
4915 |
+
$REPLACE_joyful
|
4916 |
+
$REPLACE_immigrants
|
4917 |
+
$REPLACE_emotions
|
4918 |
+
$REPLACE_checkup
|
4919 |
+
$REPLACE_licence
|
4920 |
+
$REPLACE_juice
|
4921 |
+
$APPEND_whenever
|
4922 |
+
$REPLACE_dogs
|
4923 |
+
$REPLACE_thereby
|
4924 |
+
$APPEND_department
|
4925 |
+
$APPEND_assignment
|
4926 |
+
$REPLACE_defend
|
4927 |
+
$REPLACE_approached
|
4928 |
+
$REPLACE_Fireworks
|
4929 |
+
$APPEND_activity
|
4930 |
+
$APPEND_quality
|
4931 |
+
$REPLACE_basics
|
4932 |
+
$REPLACE_costumes
|
4933 |
+
$REPLACE_key
|
4934 |
+
$REPLACE_outdoors
|
4935 |
+
$REPLACE_hay
|
4936 |
+
$APPEND_prepare
|
4937 |
+
$REPLACE_hiding
|
4938 |
+
$REPLACE_curiosity
|
4939 |
+
$APPEND_dealing
|
4940 |
+
$REPLACE_passion
|
4941 |
+
$REPLACE_costed
|
4942 |
+
$REPLACE_fries
|
4943 |
+
$REPLACE_HAVE
|
4944 |
+
$REPLACE_divorced
|
4945 |
+
$APPEND_display
|
4946 |
+
$REPLACE_baby
|
4947 |
+
$APPEND_cherry
|
4948 |
+
$REPLACE_Returning
|
4949 |
+
$APPEND_lack
|
4950 |
+
$APPEND_learnt
|
4951 |
+
$REPLACE_Im
|
4952 |
+
$APPEND_naturally
|
4953 |
+
$REPLACE_router
|
4954 |
+
$APPEND_goals
|
4955 |
+
$REPLACE_seaside
|
4956 |
+
$REPLACE_summarize
|
4957 |
+
$APPEND_appeared
|
4958 |
+
$REPLACE_claim
|
4959 |
+
$APPEND_ate
|
4960 |
+
$REPLACE_exchanging
|
4961 |
+
$APPEND_arrive
|
4962 |
+
$APPEND_art
|
4963 |
+
$REPLACE_participating
|
4964 |
+
$REPLACE_seek
|
4965 |
+
$REPLACE_innocent
|
4966 |
+
$APPEND_express
|
4967 |
+
$REPLACE_lunchtime
|
4968 |
+
$REPLACE_reaction
|
4969 |
+
$REPLACE_consisted
|
4970 |
+
$REPLACE_Eastern
|
4971 |
+
$APPEND_track
|
4972 |
+
$APPEND_baby
|
4973 |
+
$REPLACE_touching
|
4974 |
+
$REPLACE_lively
|
4975 |
+
$REPLACE_bridge
|
4976 |
+
$REPLACE_murderers
|
4977 |
+
$REPLACE_Brazil
|
4978 |
+
$REPLACE_feeding
|
4979 |
+
$REPLACE_honestly
|
4980 |
+
$REPLACE_Piece
|
4981 |
+
$REPLACE_springs
|
4982 |
+
$REPLACE_purchase
|
4983 |
+
$REPLACE_pray
|
4984 |
+
$REPLACE_washed
|
4985 |
+
$APPEND_sentence
|
4986 |
+
$REPLACE_Olympics
|
4987 |
+
$REPLACE_strongest
|
4988 |
+
$REPLACE_leads
|
4989 |
+
$REPLACE_stomachache
|
4990 |
+
$REPLACE_John
|
4991 |
+
$REPLACE_opponent
|
4992 |
+
$REPLACE_contents
|
4993 |
+
$REPLACE_plot
|
4994 |
+
$APPEND_Many
|
4995 |
+
$REPLACE_experiment
|
4996 |
+
$REPLACE_beings
|
4997 |
+
$REPLACE_owns
|
4998 |
+
$REPLACE_airline
|
4999 |
+
$REPLACE_severely
|
5000 |
+
$REPLACE_ages
|
5001 |
+
@@UNKNOWN@@
|
5002 |
+
@@PADDING@@
|
output_vocabulary/non_padded_namespaces.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
*tags
|
2 |
+
*labels
|
requirements.txt
CHANGED
@@ -1,10 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
gradio
|
2 |
-
transformers
|
3 |
-
torch
|
4 |
spacy
|
5 |
nltk
|
6 |
gensim
|
7 |
pattern
|
8 |
textblob
|
9 |
-
|
10 |
-
|
|
|
1 |
+
torch==1.10.0
|
2 |
+
allennlp==0.8.4
|
3 |
+
python-Levenshtein==0.12.1
|
4 |
+
transformers==4.11.3
|
5 |
+
scikit-learn==0.20.0
|
6 |
+
sentencepiece==0.1.95
|
7 |
+
overrides==4.1.2
|
8 |
+
numpy==1.19.5
|
9 |
gradio
|
|
|
|
|
10 |
spacy
|
11 |
nltk
|
12 |
gensim
|
13 |
pattern
|
14 |
textblob
|
|
|
|
utils/filter_brackets.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import re
|
3 |
+
|
4 |
+
from helpers import write_lines
|
5 |
+
|
6 |
+
|
7 |
+
def filter_line(line):
|
8 |
+
if "-LRB-" in line and "-RRB-" in line:
|
9 |
+
rep = re.sub(r'\-.*?LRB.*?\-.*?\-.*?RRB.*?\-', '', line)
|
10 |
+
line_cleaned = rep
|
11 |
+
elif ("-LRB-" in line and "-RRB-" not in line) or (
|
12 |
+
"-LRB-" not in line and "-RRB-" in line):
|
13 |
+
line_cleaned = line.replace("-LRB-", '"').replace("-RRB-", '"')
|
14 |
+
else:
|
15 |
+
line_cleaned = line
|
16 |
+
return line_cleaned
|
17 |
+
|
18 |
+
|
19 |
+
def main(args):
|
20 |
+
with open(args.source) as f:
|
21 |
+
data = [row.rstrip() for row in f]
|
22 |
+
|
23 |
+
write_lines(args.output, [filter_line(row) for row in data])
|
24 |
+
|
25 |
+
|
26 |
+
if __name__ == '__main__':
|
27 |
+
parser = argparse.ArgumentParser()
|
28 |
+
parser.add_argument('-s', '--source',
|
29 |
+
help='Path to the source file',
|
30 |
+
required=True)
|
31 |
+
parser.add_argument('-o', '--output',
|
32 |
+
help='Path to the output file',
|
33 |
+
required=True)
|
34 |
+
args = parser.parse_args()
|
35 |
+
main(args)
|
utils/helpers.py
ADDED
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
|
5 |
+
VOCAB_DIR = Path(__file__).resolve().parent.parent / "data"
|
6 |
+
PAD = "@@PADDING@@"
|
7 |
+
UNK = "@@UNKNOWN@@"
|
8 |
+
START_TOKEN = "$START"
|
9 |
+
SEQ_DELIMETERS = {"tokens": " ",
|
10 |
+
"labels": "SEPL|||SEPR",
|
11 |
+
"operations": "SEPL__SEPR"}
|
12 |
+
REPLACEMENTS = {
|
13 |
+
"''": '"',
|
14 |
+
'--': '—',
|
15 |
+
'`': "'",
|
16 |
+
"'ve": "' ve",
|
17 |
+
}
|
18 |
+
|
19 |
+
|
20 |
+
def get_verb_form_dicts():
|
21 |
+
path_to_dict = os.path.join(VOCAB_DIR, "verb-form-vocab.txt")
|
22 |
+
encode, decode = {}, {}
|
23 |
+
with open(path_to_dict, encoding="utf-8") as f:
|
24 |
+
for line in f:
|
25 |
+
words, tags = line.split(":")
|
26 |
+
word1, word2 = words.split("_")
|
27 |
+
tag1, tag2 = tags.split("_")
|
28 |
+
decode_key = f"{word1}_{tag1}_{tag2.strip()}"
|
29 |
+
if decode_key not in decode:
|
30 |
+
encode[words] = tags
|
31 |
+
decode[decode_key] = word2
|
32 |
+
return encode, decode
|
33 |
+
|
34 |
+
|
35 |
+
ENCODE_VERB_DICT, DECODE_VERB_DICT = get_verb_form_dicts()
|
36 |
+
|
37 |
+
|
38 |
+
def get_target_sent_by_edits(source_tokens, edits):
|
39 |
+
target_tokens = source_tokens[:]
|
40 |
+
shift_idx = 0
|
41 |
+
for edit in edits:
|
42 |
+
start, end, label, _ = edit
|
43 |
+
target_pos = start + shift_idx
|
44 |
+
source_token = target_tokens[target_pos] \
|
45 |
+
if len(target_tokens) > target_pos >= 0 else ''
|
46 |
+
if label == "":
|
47 |
+
del target_tokens[target_pos]
|
48 |
+
shift_idx -= 1
|
49 |
+
elif start == end:
|
50 |
+
word = label.replace("$APPEND_", "")
|
51 |
+
target_tokens[target_pos: target_pos] = [word]
|
52 |
+
shift_idx += 1
|
53 |
+
elif label.startswith("$TRANSFORM_"):
|
54 |
+
word = apply_reverse_transformation(source_token, label)
|
55 |
+
if word is None:
|
56 |
+
word = source_token
|
57 |
+
target_tokens[target_pos] = word
|
58 |
+
elif start == end - 1:
|
59 |
+
word = label.replace("$REPLACE_", "")
|
60 |
+
target_tokens[target_pos] = word
|
61 |
+
elif label.startswith("$MERGE_"):
|
62 |
+
target_tokens[target_pos + 1: target_pos + 1] = [label]
|
63 |
+
shift_idx += 1
|
64 |
+
|
65 |
+
return replace_merge_transforms(target_tokens)
|
66 |
+
|
67 |
+
|
68 |
+
def replace_merge_transforms(tokens):
|
69 |
+
if all(not x.startswith("$MERGE_") for x in tokens):
|
70 |
+
return tokens
|
71 |
+
|
72 |
+
target_line = " ".join(tokens)
|
73 |
+
target_line = target_line.replace(" $MERGE_HYPHEN ", "-")
|
74 |
+
target_line = target_line.replace(" $MERGE_SPACE ", "")
|
75 |
+
return target_line.split()
|
76 |
+
|
77 |
+
|
78 |
+
def convert_using_case(token, smart_action):
|
79 |
+
if not smart_action.startswith("$TRANSFORM_CASE_"):
|
80 |
+
return token
|
81 |
+
if smart_action.endswith("LOWER"):
|
82 |
+
return token.lower()
|
83 |
+
elif smart_action.endswith("UPPER"):
|
84 |
+
return token.upper()
|
85 |
+
elif smart_action.endswith("CAPITAL"):
|
86 |
+
return token.capitalize()
|
87 |
+
elif smart_action.endswith("CAPITAL_1"):
|
88 |
+
return token[0] + token[1:].capitalize()
|
89 |
+
elif smart_action.endswith("UPPER_-1"):
|
90 |
+
return token[:-1].upper() + token[-1]
|
91 |
+
else:
|
92 |
+
return token
|
93 |
+
|
94 |
+
|
95 |
+
def convert_using_verb(token, smart_action):
|
96 |
+
key_word = "$TRANSFORM_VERB_"
|
97 |
+
if not smart_action.startswith(key_word):
|
98 |
+
raise Exception(f"Unknown action type {smart_action}")
|
99 |
+
encoding_part = f"{token}_{smart_action[len(key_word):]}"
|
100 |
+
decoded_target_word = decode_verb_form(encoding_part)
|
101 |
+
return decoded_target_word
|
102 |
+
|
103 |
+
|
104 |
+
def convert_using_split(token, smart_action):
|
105 |
+
key_word = "$TRANSFORM_SPLIT"
|
106 |
+
if not smart_action.startswith(key_word):
|
107 |
+
raise Exception(f"Unknown action type {smart_action}")
|
108 |
+
target_words = token.split("-")
|
109 |
+
return " ".join(target_words)
|
110 |
+
|
111 |
+
|
112 |
+
def convert_using_plural(token, smart_action):
|
113 |
+
if smart_action.endswith("PLURAL"):
|
114 |
+
return token + "s"
|
115 |
+
elif smart_action.endswith("SINGULAR"):
|
116 |
+
return token[:-1]
|
117 |
+
else:
|
118 |
+
raise Exception(f"Unknown action type {smart_action}")
|
119 |
+
|
120 |
+
|
121 |
+
def apply_reverse_transformation(source_token, transform):
|
122 |
+
if transform.startswith("$TRANSFORM"):
|
123 |
+
# deal with equal
|
124 |
+
if transform == "$KEEP":
|
125 |
+
return source_token
|
126 |
+
# deal with case
|
127 |
+
if transform.startswith("$TRANSFORM_CASE"):
|
128 |
+
return convert_using_case(source_token, transform)
|
129 |
+
# deal with verb
|
130 |
+
if transform.startswith("$TRANSFORM_VERB"):
|
131 |
+
return convert_using_verb(source_token, transform)
|
132 |
+
# deal with split
|
133 |
+
if transform.startswith("$TRANSFORM_SPLIT"):
|
134 |
+
return convert_using_split(source_token, transform)
|
135 |
+
# deal with single/plural
|
136 |
+
if transform.startswith("$TRANSFORM_AGREEMENT"):
|
137 |
+
return convert_using_plural(source_token, transform)
|
138 |
+
# raise exception if not find correct type
|
139 |
+
raise Exception(f"Unknown action type {transform}")
|
140 |
+
else:
|
141 |
+
return source_token
|
142 |
+
|
143 |
+
|
144 |
+
def read_parallel_lines(fn1, fn2):
|
145 |
+
lines1 = read_lines(fn1, skip_strip=True)
|
146 |
+
lines2 = read_lines(fn2, skip_strip=True)
|
147 |
+
assert len(lines1) == len(lines2)
|
148 |
+
out_lines1, out_lines2 = [], []
|
149 |
+
for line1, line2 in zip(lines1, lines2):
|
150 |
+
if not line1.strip() or not line2.strip():
|
151 |
+
continue
|
152 |
+
else:
|
153 |
+
out_lines1.append(line1)
|
154 |
+
out_lines2.append(line2)
|
155 |
+
return out_lines1, out_lines2
|
156 |
+
|
157 |
+
|
158 |
+
def read_lines(fn, skip_strip=False):
|
159 |
+
if not os.path.exists(fn):
|
160 |
+
return []
|
161 |
+
with open(fn, 'r', encoding='utf-8') as f:
|
162 |
+
lines = f.readlines()
|
163 |
+
return [s.strip() for s in lines if s.strip() or skip_strip]
|
164 |
+
|
165 |
+
|
166 |
+
def write_lines(fn, lines, mode='w'):
|
167 |
+
if mode == 'w' and os.path.exists(fn):
|
168 |
+
os.remove(fn)
|
169 |
+
with open(fn, encoding='utf-8', mode=mode) as f:
|
170 |
+
f.writelines(['%s\n' % s for s in lines])
|
171 |
+
|
172 |
+
|
173 |
+
def decode_verb_form(original):
|
174 |
+
return DECODE_VERB_DICT.get(original)
|
175 |
+
|
176 |
+
|
177 |
+
def encode_verb_form(original_word, corrected_word):
|
178 |
+
decoding_request = original_word + "_" + corrected_word
|
179 |
+
decoding_response = ENCODE_VERB_DICT.get(decoding_request, "").strip()
|
180 |
+
if original_word and decoding_response:
|
181 |
+
answer = decoding_response
|
182 |
+
else:
|
183 |
+
answer = None
|
184 |
+
return answer
|
185 |
+
|
186 |
+
|
187 |
+
def get_weights_name(transformer_name, lowercase):
|
188 |
+
if transformer_name == 'bert' and lowercase:
|
189 |
+
return 'bert-base-uncased'
|
190 |
+
if transformer_name == 'bert' and not lowercase:
|
191 |
+
return 'bert-base-cased'
|
192 |
+
if transformer_name == 'bert-large' and not lowercase:
|
193 |
+
return 'bert-large-cased'
|
194 |
+
if transformer_name == 'distilbert':
|
195 |
+
if not lowercase:
|
196 |
+
print('Warning! This model was trained only on uncased sentences.')
|
197 |
+
return 'distilbert-base-uncased'
|
198 |
+
if transformer_name == 'albert':
|
199 |
+
if not lowercase:
|
200 |
+
print('Warning! This model was trained only on uncased sentences.')
|
201 |
+
return 'albert-base-v1'
|
202 |
+
if lowercase:
|
203 |
+
print('Warning! This model was trained only on cased sentences.')
|
204 |
+
if transformer_name == 'roberta':
|
205 |
+
return 'roberta-base'
|
206 |
+
if transformer_name == 'roberta-large':
|
207 |
+
return 'roberta-large'
|
208 |
+
if transformer_name == 'gpt2':
|
209 |
+
return 'gpt2'
|
210 |
+
if transformer_name == 'transformerxl':
|
211 |
+
return 'transfo-xl-wt103'
|
212 |
+
if transformer_name == 'xlnet':
|
213 |
+
return 'xlnet-base-cased'
|
214 |
+
if transformer_name == 'xlnet-large':
|
215 |
+
return 'xlnet-large-cased'
|
216 |
+
|
217 |
+
|
218 |
+
def remove_double_tokens(sent):
|
219 |
+
tokens = sent.split(' ')
|
220 |
+
deleted_idx = []
|
221 |
+
for i in range(len(tokens) -1):
|
222 |
+
if tokens[i] == tokens[i + 1]:
|
223 |
+
deleted_idx.append(i + 1)
|
224 |
+
if deleted_idx:
|
225 |
+
tokens = [tokens[i] for i in range(len(tokens)) if i not in deleted_idx]
|
226 |
+
return ' '.join(tokens)
|
227 |
+
|
228 |
+
|
229 |
+
def normalize(sent):
|
230 |
+
sent = remove_double_tokens(sent)
|
231 |
+
for fr, to in REPLACEMENTS.items():
|
232 |
+
sent = sent.replace(fr, to)
|
233 |
+
return sent.lower()
|
utils/prepare_clc_fce_data.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
"""
|
3 |
+
Convert CLC-FCE dataset (The Cambridge Learner Corpus) to the parallel sentences format.
|
4 |
+
"""
|
5 |
+
|
6 |
+
import argparse
|
7 |
+
import glob
|
8 |
+
import os
|
9 |
+
import re
|
10 |
+
from xml.etree import cElementTree
|
11 |
+
|
12 |
+
from nltk.tokenize import sent_tokenize, word_tokenize
|
13 |
+
from tqdm import tqdm
|
14 |
+
|
15 |
+
|
16 |
+
def annotate_fce_doc(xml):
|
17 |
+
"""Takes a FCE xml document and yields sentences with annotated errors."""
|
18 |
+
result = []
|
19 |
+
doc = cElementTree.fromstring(xml)
|
20 |
+
paragraphs = doc.findall('head/text/*/coded_answer/p')
|
21 |
+
for p in paragraphs:
|
22 |
+
text = _get_formatted_text(p)
|
23 |
+
result.append(text)
|
24 |
+
|
25 |
+
return '\n'.join(result)
|
26 |
+
|
27 |
+
|
28 |
+
def _get_formatted_text(elem, ignore_tags=None):
|
29 |
+
text = elem.text or ''
|
30 |
+
ignore_tags = [tag.upper() for tag in (ignore_tags or [])]
|
31 |
+
correct = None
|
32 |
+
mistake = None
|
33 |
+
|
34 |
+
for child in elem.getchildren():
|
35 |
+
tag = child.tag.upper()
|
36 |
+
if tag == 'NS':
|
37 |
+
text += _get_formatted_text(child)
|
38 |
+
|
39 |
+
elif tag == 'UNKNOWN':
|
40 |
+
text += ' UNKNOWN '
|
41 |
+
|
42 |
+
elif tag == 'C':
|
43 |
+
assert correct is None
|
44 |
+
correct = _get_formatted_text(child)
|
45 |
+
|
46 |
+
elif tag == 'I':
|
47 |
+
assert mistake is None
|
48 |
+
mistake = _get_formatted_text(child)
|
49 |
+
|
50 |
+
elif tag in ignore_tags:
|
51 |
+
pass
|
52 |
+
|
53 |
+
else:
|
54 |
+
raise ValueError(f"Unknown tag `{child.tag}`", text)
|
55 |
+
|
56 |
+
if correct or mistake:
|
57 |
+
correct = correct or ''
|
58 |
+
mistake = mistake or ''
|
59 |
+
if '=>' not in mistake:
|
60 |
+
text += f'{{{mistake}=>{correct}}}'
|
61 |
+
else:
|
62 |
+
text += mistake
|
63 |
+
|
64 |
+
text += elem.tail or ''
|
65 |
+
return text
|
66 |
+
|
67 |
+
|
68 |
+
def convert_fce(fce_dir):
|
69 |
+
"""Processes the whole FCE directory. Yields annotated documents (strings)."""
|
70 |
+
|
71 |
+
# Ensure we got the valid dataset path
|
72 |
+
if not os.path.isdir(fce_dir):
|
73 |
+
raise UserWarning(
|
74 |
+
f"{fce_dir} is not a valid path")
|
75 |
+
|
76 |
+
dataset_dir = os.path.join(fce_dir, 'dataset')
|
77 |
+
if not os.path.exists(dataset_dir):
|
78 |
+
raise UserWarning(
|
79 |
+
f"{fce_dir} doesn't point to a dataset's root dir")
|
80 |
+
|
81 |
+
# Convert XML docs to the corpora format
|
82 |
+
filenames = sorted(glob.glob(os.path.join(dataset_dir, '*/*.xml')))
|
83 |
+
|
84 |
+
docs = []
|
85 |
+
for filename in filenames:
|
86 |
+
with open(filename, encoding='utf-8') as f:
|
87 |
+
doc = annotate_fce_doc(f.read())
|
88 |
+
docs.append(doc)
|
89 |
+
return docs
|
90 |
+
|
91 |
+
|
92 |
+
def main():
|
93 |
+
fce = convert_fce(args.fce_dataset_path)
|
94 |
+
with open(args.output + "/fce-original.txt", 'w', encoding='utf-8') as out_original, \
|
95 |
+
open(args.output + "/fce-applied.txt", 'w', encoding='utf-8') as out_applied:
|
96 |
+
for doc in tqdm(fce, unit='doc'):
|
97 |
+
sents = re.split(r"\n +\n", doc)
|
98 |
+
for sent in sents:
|
99 |
+
tokenized_sents = sent_tokenize(sent)
|
100 |
+
for i in range(len(tokenized_sents)):
|
101 |
+
if re.search(r"[{>][.?!]$", tokenized_sents[i]):
|
102 |
+
tokenized_sents[i + 1] = tokenized_sents[i] + " " + tokenized_sents[i + 1]
|
103 |
+
tokenized_sents[i] = ""
|
104 |
+
regexp = r'{([^{}]*?)=>([^{}]*?)}'
|
105 |
+
original = re.sub(regexp, r"\1", tokenized_sents[i])
|
106 |
+
applied = re.sub(regexp, r"\2", tokenized_sents[i])
|
107 |
+
# filter out nested alerts
|
108 |
+
if original != "" and applied != "" and not re.search(r"[{}=]", original) \
|
109 |
+
and not re.search(r"[{}=]", applied):
|
110 |
+
out_original.write(" ".join(word_tokenize(original)) + "\n")
|
111 |
+
out_applied.write(" ".join(word_tokenize(applied)) + "\n")
|
112 |
+
|
113 |
+
|
114 |
+
if __name__ == '__main__':
|
115 |
+
parser = argparse.ArgumentParser(description=(
|
116 |
+
"Convert CLC-FCE dataset to the parallel sentences format."))
|
117 |
+
parser.add_argument('fce_dataset_path',
|
118 |
+
help='Path to the folder with the FCE dataset')
|
119 |
+
parser.add_argument('--output',
|
120 |
+
help='Path to the output folder')
|
121 |
+
args = parser.parse_args()
|
122 |
+
|
123 |
+
main()
|
utils/preprocess_data.py
ADDED
@@ -0,0 +1,488 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import os
|
3 |
+
from difflib import SequenceMatcher
|
4 |
+
|
5 |
+
import Levenshtein
|
6 |
+
import numpy as np
|
7 |
+
from tqdm import tqdm
|
8 |
+
|
9 |
+
from helpers import write_lines, read_parallel_lines, encode_verb_form, \
|
10 |
+
apply_reverse_transformation, SEQ_DELIMETERS, START_TOKEN
|
11 |
+
|
12 |
+
|
13 |
+
def perfect_align(t, T, insertions_allowed=0,
|
14 |
+
cost_function=Levenshtein.distance):
|
15 |
+
# dp[i, j, k] is a minimal cost of matching first `i` tokens of `t` with
|
16 |
+
# first `j` tokens of `T`, after making `k` insertions after last match of
|
17 |
+
# token from `t`. In other words t[:i] aligned with T[:j].
|
18 |
+
|
19 |
+
# Initialize with INFINITY (unknown)
|
20 |
+
shape = (len(t) + 1, len(T) + 1, insertions_allowed + 1)
|
21 |
+
dp = np.ones(shape, dtype=int) * int(1e9)
|
22 |
+
come_from = np.ones(shape, dtype=int) * int(1e9)
|
23 |
+
come_from_ins = np.ones(shape, dtype=int) * int(1e9)
|
24 |
+
|
25 |
+
dp[0, 0, 0] = 0 # The only known starting point. Nothing matched to nothing.
|
26 |
+
for i in range(len(t) + 1): # Go inclusive
|
27 |
+
for j in range(len(T) + 1): # Go inclusive
|
28 |
+
for q in range(insertions_allowed + 1): # Go inclusive
|
29 |
+
if i < len(t):
|
30 |
+
# Given matched sequence of t[:i] and T[:j], match token
|
31 |
+
# t[i] with following tokens T[j:k].
|
32 |
+
for k in range(j, len(T) + 1):
|
33 |
+
transform = \
|
34 |
+
apply_transformation(t[i], ' '.join(T[j:k]))
|
35 |
+
if transform:
|
36 |
+
cost = 0
|
37 |
+
else:
|
38 |
+
cost = cost_function(t[i], ' '.join(T[j:k]))
|
39 |
+
current = dp[i, j, q] + cost
|
40 |
+
if dp[i + 1, k, 0] > current:
|
41 |
+
dp[i + 1, k, 0] = current
|
42 |
+
come_from[i + 1, k, 0] = j
|
43 |
+
come_from_ins[i + 1, k, 0] = q
|
44 |
+
if q < insertions_allowed:
|
45 |
+
# Given matched sequence of t[:i] and T[:j], create
|
46 |
+
# insertion with following tokens T[j:k].
|
47 |
+
for k in range(j, len(T) + 1):
|
48 |
+
cost = len(' '.join(T[j:k]))
|
49 |
+
current = dp[i, j, q] + cost
|
50 |
+
if dp[i, k, q + 1] > current:
|
51 |
+
dp[i, k, q + 1] = current
|
52 |
+
come_from[i, k, q + 1] = j
|
53 |
+
come_from_ins[i, k, q + 1] = q
|
54 |
+
|
55 |
+
# Solution is in the dp[len(t), len(T), *]. Backtracking from there.
|
56 |
+
alignment = []
|
57 |
+
i = len(t)
|
58 |
+
j = len(T)
|
59 |
+
q = dp[i, j, :].argmin()
|
60 |
+
while i > 0 or q > 0:
|
61 |
+
is_insert = (come_from_ins[i, j, q] != q) and (q != 0)
|
62 |
+
j, k, q = come_from[i, j, q], j, come_from_ins[i, j, q]
|
63 |
+
if not is_insert:
|
64 |
+
i -= 1
|
65 |
+
|
66 |
+
if is_insert:
|
67 |
+
alignment.append(['INSERT', T[j:k], (i, i)])
|
68 |
+
else:
|
69 |
+
alignment.append([f'REPLACE_{t[i]}', T[j:k], (i, i + 1)])
|
70 |
+
|
71 |
+
assert j == 0
|
72 |
+
|
73 |
+
return dp[len(t), len(T)].min(), list(reversed(alignment))
|
74 |
+
|
75 |
+
|
76 |
+
def _split(token):
|
77 |
+
if not token:
|
78 |
+
return []
|
79 |
+
parts = token.split()
|
80 |
+
return parts or [token]
|
81 |
+
|
82 |
+
|
83 |
+
def apply_merge_transformation(source_tokens, target_words, shift_idx):
|
84 |
+
edits = []
|
85 |
+
if len(source_tokens) > 1 and len(target_words) == 1:
|
86 |
+
# check merge
|
87 |
+
transform = check_merge(source_tokens, target_words)
|
88 |
+
if transform:
|
89 |
+
for i in range(len(source_tokens) - 1):
|
90 |
+
edits.append([(shift_idx + i, shift_idx + i + 1), transform])
|
91 |
+
return edits
|
92 |
+
|
93 |
+
if len(source_tokens) == len(target_words) == 2:
|
94 |
+
# check swap
|
95 |
+
transform = check_swap(source_tokens, target_words)
|
96 |
+
if transform:
|
97 |
+
edits.append([(shift_idx, shift_idx + 1), transform])
|
98 |
+
return edits
|
99 |
+
|
100 |
+
|
101 |
+
def is_sent_ok(sent, delimeters=SEQ_DELIMETERS):
|
102 |
+
for del_val in delimeters.values():
|
103 |
+
if del_val in sent and del_val != delimeters["tokens"]:
|
104 |
+
return False
|
105 |
+
return True
|
106 |
+
|
107 |
+
|
108 |
+
def check_casetype(source_token, target_token):
|
109 |
+
if source_token.lower() != target_token.lower():
|
110 |
+
return None
|
111 |
+
if source_token.lower() == target_token:
|
112 |
+
return "$TRANSFORM_CASE_LOWER"
|
113 |
+
elif source_token.capitalize() == target_token:
|
114 |
+
return "$TRANSFORM_CASE_CAPITAL"
|
115 |
+
elif source_token.upper() == target_token:
|
116 |
+
return "$TRANSFORM_CASE_UPPER"
|
117 |
+
elif source_token[1:].capitalize() == target_token[1:] and source_token[0] == target_token[0]:
|
118 |
+
return "$TRANSFORM_CASE_CAPITAL_1"
|
119 |
+
elif source_token[:-1].upper() == target_token[:-1] and source_token[-1] == target_token[-1]:
|
120 |
+
return "$TRANSFORM_CASE_UPPER_-1"
|
121 |
+
else:
|
122 |
+
return None
|
123 |
+
|
124 |
+
|
125 |
+
def check_equal(source_token, target_token):
|
126 |
+
if source_token == target_token:
|
127 |
+
return "$KEEP"
|
128 |
+
else:
|
129 |
+
return None
|
130 |
+
|
131 |
+
|
132 |
+
def check_split(source_token, target_tokens):
|
133 |
+
if source_token.split("-") == target_tokens:
|
134 |
+
return "$TRANSFORM_SPLIT_HYPHEN"
|
135 |
+
else:
|
136 |
+
return None
|
137 |
+
|
138 |
+
|
139 |
+
def check_merge(source_tokens, target_tokens):
|
140 |
+
if "".join(source_tokens) == "".join(target_tokens):
|
141 |
+
return "$MERGE_SPACE"
|
142 |
+
elif "-".join(source_tokens) == "-".join(target_tokens):
|
143 |
+
return "$MERGE_HYPHEN"
|
144 |
+
else:
|
145 |
+
return None
|
146 |
+
|
147 |
+
|
148 |
+
def check_swap(source_tokens, target_tokens):
|
149 |
+
if source_tokens == [x for x in reversed(target_tokens)]:
|
150 |
+
return "$MERGE_SWAP"
|
151 |
+
else:
|
152 |
+
return None
|
153 |
+
|
154 |
+
|
155 |
+
def check_plural(source_token, target_token):
|
156 |
+
if source_token.endswith("s") and source_token[:-1] == target_token:
|
157 |
+
return "$TRANSFORM_AGREEMENT_SINGULAR"
|
158 |
+
elif target_token.endswith("s") and source_token == target_token[:-1]:
|
159 |
+
return "$TRANSFORM_AGREEMENT_PLURAL"
|
160 |
+
else:
|
161 |
+
return None
|
162 |
+
|
163 |
+
|
164 |
+
def check_verb(source_token, target_token):
|
165 |
+
encoding = encode_verb_form(source_token, target_token)
|
166 |
+
if encoding:
|
167 |
+
return f"$TRANSFORM_VERB_{encoding}"
|
168 |
+
else:
|
169 |
+
return None
|
170 |
+
|
171 |
+
|
172 |
+
def apply_transformation(source_token, target_token):
|
173 |
+
target_tokens = target_token.split()
|
174 |
+
if len(target_tokens) > 1:
|
175 |
+
# check split
|
176 |
+
transform = check_split(source_token, target_tokens)
|
177 |
+
if transform:
|
178 |
+
return transform
|
179 |
+
checks = [check_equal, check_casetype, check_verb, check_plural]
|
180 |
+
for check in checks:
|
181 |
+
transform = check(source_token, target_token)
|
182 |
+
if transform:
|
183 |
+
return transform
|
184 |
+
return None
|
185 |
+
|
186 |
+
|
187 |
+
def align_sequences(source_sent, target_sent):
|
188 |
+
# check if sent is OK
|
189 |
+
if not is_sent_ok(source_sent) or not is_sent_ok(target_sent):
|
190 |
+
return None
|
191 |
+
source_tokens = source_sent.split()
|
192 |
+
target_tokens = target_sent.split()
|
193 |
+
matcher = SequenceMatcher(None, source_tokens, target_tokens)
|
194 |
+
diffs = list(matcher.get_opcodes())
|
195 |
+
all_edits = []
|
196 |
+
for diff in diffs:
|
197 |
+
tag, i1, i2, j1, j2 = diff
|
198 |
+
source_part = _split(" ".join(source_tokens[i1:i2]))
|
199 |
+
target_part = _split(" ".join(target_tokens[j1:j2]))
|
200 |
+
if tag == 'equal':
|
201 |
+
continue
|
202 |
+
elif tag == 'delete':
|
203 |
+
# delete all words separatly
|
204 |
+
for j in range(i2 - i1):
|
205 |
+
edit = [(i1 + j, i1 + j + 1), '$DELETE']
|
206 |
+
all_edits.append(edit)
|
207 |
+
elif tag == 'insert':
|
208 |
+
# append to the previous word
|
209 |
+
for target_token in target_part:
|
210 |
+
edit = ((i1 - 1, i1), f"$APPEND_{target_token}")
|
211 |
+
all_edits.append(edit)
|
212 |
+
else:
|
213 |
+
# check merge first of all
|
214 |
+
edits = apply_merge_transformation(source_part, target_part,
|
215 |
+
shift_idx=i1)
|
216 |
+
if edits:
|
217 |
+
all_edits.extend(edits)
|
218 |
+
continue
|
219 |
+
|
220 |
+
# normalize alignments if need (make them singleton)
|
221 |
+
_, alignments = perfect_align(source_part, target_part,
|
222 |
+
insertions_allowed=0)
|
223 |
+
for alignment in alignments:
|
224 |
+
new_shift = alignment[2][0]
|
225 |
+
edits = convert_alignments_into_edits(alignment,
|
226 |
+
shift_idx=i1 + new_shift)
|
227 |
+
all_edits.extend(edits)
|
228 |
+
|
229 |
+
# get labels
|
230 |
+
labels = convert_edits_into_labels(source_tokens, all_edits)
|
231 |
+
# match tags to source tokens
|
232 |
+
sent_with_tags = add_labels_to_the_tokens(source_tokens, labels)
|
233 |
+
return sent_with_tags
|
234 |
+
|
235 |
+
|
236 |
+
def convert_edits_into_labels(source_tokens, all_edits):
|
237 |
+
# make sure that edits are flat
|
238 |
+
flat_edits = []
|
239 |
+
for edit in all_edits:
|
240 |
+
(start, end), edit_operations = edit
|
241 |
+
if isinstance(edit_operations, list):
|
242 |
+
for operation in edit_operations:
|
243 |
+
new_edit = [(start, end), operation]
|
244 |
+
flat_edits.append(new_edit)
|
245 |
+
elif isinstance(edit_operations, str):
|
246 |
+
flat_edits.append(edit)
|
247 |
+
else:
|
248 |
+
raise Exception("Unknown operation type")
|
249 |
+
all_edits = flat_edits[:]
|
250 |
+
labels = []
|
251 |
+
total_labels = len(source_tokens) + 1
|
252 |
+
if not all_edits:
|
253 |
+
labels = [["$KEEP"] for x in range(total_labels)]
|
254 |
+
else:
|
255 |
+
for i in range(total_labels):
|
256 |
+
edit_operations = [x[1] for x in all_edits if x[0][0] == i - 1
|
257 |
+
and x[0][1] == i]
|
258 |
+
if not edit_operations:
|
259 |
+
labels.append(["$KEEP"])
|
260 |
+
else:
|
261 |
+
labels.append(edit_operations)
|
262 |
+
return labels
|
263 |
+
|
264 |
+
|
265 |
+
def convert_alignments_into_edits(alignment, shift_idx):
|
266 |
+
edits = []
|
267 |
+
action, target_tokens, new_idx = alignment
|
268 |
+
source_token = action.replace("REPLACE_", "")
|
269 |
+
|
270 |
+
# check if delete
|
271 |
+
if not target_tokens:
|
272 |
+
edit = [(shift_idx, 1 + shift_idx), "$DELETE"]
|
273 |
+
return [edit]
|
274 |
+
|
275 |
+
# check splits
|
276 |
+
for i in range(1, len(target_tokens)):
|
277 |
+
target_token = " ".join(target_tokens[:i + 1])
|
278 |
+
transform = apply_transformation(source_token, target_token)
|
279 |
+
if transform:
|
280 |
+
edit = [(shift_idx, shift_idx + 1), transform]
|
281 |
+
edits.append(edit)
|
282 |
+
target_tokens = target_tokens[i + 1:]
|
283 |
+
for target in target_tokens:
|
284 |
+
edits.append([(shift_idx, shift_idx + 1), f"$APPEND_{target}"])
|
285 |
+
return edits
|
286 |
+
|
287 |
+
transform_costs = []
|
288 |
+
transforms = []
|
289 |
+
for target_token in target_tokens:
|
290 |
+
transform = apply_transformation(source_token, target_token)
|
291 |
+
if transform:
|
292 |
+
cost = 0
|
293 |
+
transforms.append(transform)
|
294 |
+
else:
|
295 |
+
cost = Levenshtein.distance(source_token, target_token)
|
296 |
+
transforms.append(None)
|
297 |
+
transform_costs.append(cost)
|
298 |
+
min_cost_idx = transform_costs.index(min(transform_costs))
|
299 |
+
# append to the previous word
|
300 |
+
for i in range(0, min_cost_idx):
|
301 |
+
target = target_tokens[i]
|
302 |
+
edit = [(shift_idx - 1, shift_idx), f"$APPEND_{target}"]
|
303 |
+
edits.append(edit)
|
304 |
+
# replace/transform target word
|
305 |
+
transform = transforms[min_cost_idx]
|
306 |
+
target = transform if transform is not None \
|
307 |
+
else f"$REPLACE_{target_tokens[min_cost_idx]}"
|
308 |
+
edit = [(shift_idx, 1 + shift_idx), target]
|
309 |
+
edits.append(edit)
|
310 |
+
# append to this word
|
311 |
+
for i in range(min_cost_idx + 1, len(target_tokens)):
|
312 |
+
target = target_tokens[i]
|
313 |
+
edit = [(shift_idx, 1 + shift_idx), f"$APPEND_{target}"]
|
314 |
+
edits.append(edit)
|
315 |
+
return edits
|
316 |
+
|
317 |
+
|
318 |
+
def add_labels_to_the_tokens(source_tokens, labels, delimeters=SEQ_DELIMETERS):
|
319 |
+
tokens_with_all_tags = []
|
320 |
+
source_tokens_with_start = [START_TOKEN] + source_tokens
|
321 |
+
for token, label_list in zip(source_tokens_with_start, labels):
|
322 |
+
all_tags = delimeters['operations'].join(label_list)
|
323 |
+
comb_record = token + delimeters['labels'] + all_tags
|
324 |
+
tokens_with_all_tags.append(comb_record)
|
325 |
+
return delimeters['tokens'].join(tokens_with_all_tags)
|
326 |
+
|
327 |
+
|
328 |
+
def convert_data_from_raw_files(source_file, target_file, output_file, chunk_size):
|
329 |
+
tagged = []
|
330 |
+
source_data, target_data = read_parallel_lines(source_file, target_file)
|
331 |
+
print(f"The size of raw dataset is {len(source_data)}")
|
332 |
+
cnt_total, cnt_all, cnt_tp = 0, 0, 0
|
333 |
+
for source_sent, target_sent in tqdm(zip(source_data, target_data)):
|
334 |
+
try:
|
335 |
+
aligned_sent = align_sequences(source_sent, target_sent)
|
336 |
+
except Exception:
|
337 |
+
aligned_sent = align_sequences(source_sent, target_sent)
|
338 |
+
if source_sent != target_sent:
|
339 |
+
cnt_tp += 1
|
340 |
+
alignments = [aligned_sent]
|
341 |
+
cnt_all += len(alignments)
|
342 |
+
try:
|
343 |
+
check_sent = convert_tagged_line(aligned_sent)
|
344 |
+
except Exception:
|
345 |
+
# debug mode
|
346 |
+
aligned_sent = align_sequences(source_sent, target_sent)
|
347 |
+
check_sent = convert_tagged_line(aligned_sent)
|
348 |
+
|
349 |
+
if "".join(check_sent.split()) != "".join(
|
350 |
+
target_sent.split()):
|
351 |
+
# do it again for debugging
|
352 |
+
aligned_sent = align_sequences(source_sent, target_sent)
|
353 |
+
check_sent = convert_tagged_line(aligned_sent)
|
354 |
+
print(f"Incorrect pair: \n{target_sent}\n{check_sent}")
|
355 |
+
continue
|
356 |
+
if alignments:
|
357 |
+
cnt_total += len(alignments)
|
358 |
+
tagged.extend(alignments)
|
359 |
+
if len(tagged) > chunk_size:
|
360 |
+
write_lines(output_file, tagged, mode='a')
|
361 |
+
tagged = []
|
362 |
+
|
363 |
+
print(f"Overall extracted {cnt_total}. "
|
364 |
+
f"Original TP {cnt_tp}."
|
365 |
+
f" Original TN {cnt_all - cnt_tp}")
|
366 |
+
if tagged:
|
367 |
+
write_lines(output_file, tagged, 'a')
|
368 |
+
|
369 |
+
|
370 |
+
def convert_labels_into_edits(labels):
|
371 |
+
all_edits = []
|
372 |
+
for i, label_list in enumerate(labels):
|
373 |
+
if label_list == ["$KEEP"]:
|
374 |
+
continue
|
375 |
+
else:
|
376 |
+
edit = [(i - 1, i), label_list]
|
377 |
+
all_edits.append(edit)
|
378 |
+
return all_edits
|
379 |
+
|
380 |
+
|
381 |
+
def get_target_sent_by_levels(source_tokens, labels):
|
382 |
+
relevant_edits = convert_labels_into_edits(labels)
|
383 |
+
target_tokens = source_tokens[:]
|
384 |
+
leveled_target_tokens = {}
|
385 |
+
if not relevant_edits:
|
386 |
+
target_sentence = " ".join(target_tokens)
|
387 |
+
return leveled_target_tokens, target_sentence
|
388 |
+
max_level = max([len(x[1]) for x in relevant_edits])
|
389 |
+
for level in range(max_level):
|
390 |
+
rest_edits = []
|
391 |
+
shift_idx = 0
|
392 |
+
for edits in relevant_edits:
|
393 |
+
(start, end), label_list = edits
|
394 |
+
label = label_list[0]
|
395 |
+
target_pos = start + shift_idx
|
396 |
+
source_token = target_tokens[target_pos] if target_pos >= 0 else START_TOKEN
|
397 |
+
if label == "$DELETE":
|
398 |
+
del target_tokens[target_pos]
|
399 |
+
shift_idx -= 1
|
400 |
+
elif label.startswith("$APPEND_"):
|
401 |
+
word = label.replace("$APPEND_", "")
|
402 |
+
target_tokens[target_pos + 1: target_pos + 1] = [word]
|
403 |
+
shift_idx += 1
|
404 |
+
elif label.startswith("$REPLACE_"):
|
405 |
+
word = label.replace("$REPLACE_", "")
|
406 |
+
target_tokens[target_pos] = word
|
407 |
+
elif label.startswith("$TRANSFORM"):
|
408 |
+
word = apply_reverse_transformation(source_token, label)
|
409 |
+
if word is None:
|
410 |
+
word = source_token
|
411 |
+
target_tokens[target_pos] = word
|
412 |
+
elif label.startswith("$MERGE_"):
|
413 |
+
# apply merge only on last stage
|
414 |
+
if level == (max_level - 1):
|
415 |
+
target_tokens[target_pos + 1: target_pos + 1] = [label]
|
416 |
+
shift_idx += 1
|
417 |
+
else:
|
418 |
+
rest_edit = [(start + shift_idx, end + shift_idx), [label]]
|
419 |
+
rest_edits.append(rest_edit)
|
420 |
+
rest_labels = label_list[1:]
|
421 |
+
if rest_labels:
|
422 |
+
rest_edit = [(start + shift_idx, end + shift_idx), rest_labels]
|
423 |
+
rest_edits.append(rest_edit)
|
424 |
+
|
425 |
+
leveled_tokens = target_tokens[:]
|
426 |
+
# update next step
|
427 |
+
relevant_edits = rest_edits[:]
|
428 |
+
if level == (max_level - 1):
|
429 |
+
leveled_tokens = replace_merge_transforms(leveled_tokens)
|
430 |
+
leveled_labels = convert_edits_into_labels(leveled_tokens,
|
431 |
+
relevant_edits)
|
432 |
+
leveled_target_tokens[level + 1] = {"tokens": leveled_tokens,
|
433 |
+
"labels": leveled_labels}
|
434 |
+
|
435 |
+
target_sentence = " ".join(leveled_target_tokens[max_level]["tokens"])
|
436 |
+
return leveled_target_tokens, target_sentence
|
437 |
+
|
438 |
+
|
439 |
+
def replace_merge_transforms(tokens):
|
440 |
+
if all(not x.startswith("$MERGE_") for x in tokens):
|
441 |
+
return tokens
|
442 |
+
target_tokens = tokens[:]
|
443 |
+
allowed_range = (1, len(tokens) - 1)
|
444 |
+
for i in range(len(tokens)):
|
445 |
+
target_token = tokens[i]
|
446 |
+
if target_token.startswith("$MERGE"):
|
447 |
+
if target_token.startswith("$MERGE_SWAP") and i in allowed_range:
|
448 |
+
target_tokens[i - 1] = tokens[i + 1]
|
449 |
+
target_tokens[i + 1] = tokens[i - 1]
|
450 |
+
target_tokens[i: i + 1] = []
|
451 |
+
target_line = " ".join(target_tokens)
|
452 |
+
target_line = target_line.replace(" $MERGE_HYPHEN ", "-")
|
453 |
+
target_line = target_line.replace(" $MERGE_SPACE ", "")
|
454 |
+
return target_line.split()
|
455 |
+
|
456 |
+
|
457 |
+
def convert_tagged_line(line, delimeters=SEQ_DELIMETERS):
|
458 |
+
label_del = delimeters['labels']
|
459 |
+
source_tokens = [x.split(label_del)[0]
|
460 |
+
for x in line.split(delimeters['tokens'])][1:]
|
461 |
+
labels = [x.split(label_del)[1].split(delimeters['operations'])
|
462 |
+
for x in line.split(delimeters['tokens'])]
|
463 |
+
assert len(source_tokens) + 1 == len(labels)
|
464 |
+
levels_dict, target_line = get_target_sent_by_levels(source_tokens, labels)
|
465 |
+
return target_line
|
466 |
+
|
467 |
+
|
468 |
+
def main(args):
|
469 |
+
convert_data_from_raw_files(args.source, args.target, args.output_file, args.chunk_size)
|
470 |
+
|
471 |
+
|
472 |
+
if __name__ == '__main__':
|
473 |
+
parser = argparse.ArgumentParser()
|
474 |
+
parser.add_argument('-s', '--source',
|
475 |
+
help='Path to the source file',
|
476 |
+
required=True)
|
477 |
+
parser.add_argument('-t', '--target',
|
478 |
+
help='Path to the target file',
|
479 |
+
required=True)
|
480 |
+
parser.add_argument('-o', '--output_file',
|
481 |
+
help='Path to the output file',
|
482 |
+
required=True)
|
483 |
+
parser.add_argument('--chunk_size',
|
484 |
+
type=int,
|
485 |
+
help='Dump each chunk size.',
|
486 |
+
default=1000000)
|
487 |
+
args = parser.parse_args()
|
488 |
+
main(args)
|