Spaces:
Sleeping
Sleeping
Try to fix pickle error
Browse files
app.py
CHANGED
@@ -137,7 +137,7 @@ def calculate_embeddings(docs):
|
|
137 |
|
138 |
|
139 |
@spaces.GPU
|
140 |
-
def fit_model(
|
141 |
new_model = BERTopic(
|
142 |
"english",
|
143 |
# Sub-models
|
@@ -154,15 +154,7 @@ def fit_model(base_model, docs, embeddings):
|
|
154 |
logging.debug("Fitting new model")
|
155 |
new_model.fit(docs, embeddings)
|
156 |
logging.debug("End fitting new model")
|
157 |
-
|
158 |
-
if base_model is None:
|
159 |
-
return new_model, new_model
|
160 |
-
|
161 |
-
updated_model = BERTopic.merge_models([base_model, new_model])
|
162 |
-
nr_new_topics = len(set(updated_model.topics_)) - len(set(base_model.topics_))
|
163 |
-
new_topics = list(updated_model.topic_labels_.values())[-nr_new_topics:]
|
164 |
-
logging.info(f"The following topics are newly found: {new_topics}")
|
165 |
-
return updated_model, new_model
|
166 |
|
167 |
|
168 |
def generate_topics(dataset, config, split, column, nested_column):
|
@@ -188,7 +180,18 @@ def generate_topics(dataset, config, split, column, nested_column):
|
|
188 |
)
|
189 |
|
190 |
embeddings = calculate_embeddings(docs)
|
191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
|
193 |
repr_model_topics = {
|
194 |
key: label[0][0].split("\n")[0]
|
|
|
137 |
|
138 |
|
139 |
@spaces.GPU
|
140 |
+
def fit_model(docs, embeddings):
|
141 |
new_model = BERTopic(
|
142 |
"english",
|
143 |
# Sub-models
|
|
|
154 |
logging.debug("Fitting new model")
|
155 |
new_model.fit(docs, embeddings)
|
156 |
logging.debug("End fitting new model")
|
157 |
+
return new_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
|
160 |
def generate_topics(dataset, config, split, column, nested_column):
|
|
|
180 |
)
|
181 |
|
182 |
embeddings = calculate_embeddings(docs)
|
183 |
+
new_model = fit_model(docs, embeddings)
|
184 |
+
|
185 |
+
if base_model is None:
|
186 |
+
base_model = new_model
|
187 |
+
else:
|
188 |
+
updated_model = BERTopic.merge_models([base_model, new_model])
|
189 |
+
nr_new_topics = len(set(updated_model.topics_)) - len(
|
190 |
+
set(base_model.topics_)
|
191 |
+
)
|
192 |
+
new_topics = list(updated_model.topic_labels_.values())[-nr_new_topics:]
|
193 |
+
logging.info(f"The following topics are newly found: {new_topics}")
|
194 |
+
base_model = updated_model
|
195 |
|
196 |
repr_model_topics = {
|
197 |
key: label[0][0].split("\n")[0]
|