asoria HF staff commited on
Commit
c79877a
1 Parent(s): 4d7b893

Try to fix pickle error

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -137,7 +137,7 @@ def calculate_embeddings(docs):
137
 
138
 
139
  @spaces.GPU
140
- def fit_model(base_model, docs, embeddings):
141
  new_model = BERTopic(
142
  "english",
143
  # Sub-models
@@ -154,15 +154,7 @@ def fit_model(base_model, docs, embeddings):
154
  logging.debug("Fitting new model")
155
  new_model.fit(docs, embeddings)
156
  logging.debug("End fitting new model")
157
-
158
- if base_model is None:
159
- return new_model, new_model
160
-
161
- updated_model = BERTopic.merge_models([base_model, new_model])
162
- nr_new_topics = len(set(updated_model.topics_)) - len(set(base_model.topics_))
163
- new_topics = list(updated_model.topic_labels_.values())[-nr_new_topics:]
164
- logging.info(f"The following topics are newly found: {new_topics}")
165
- return updated_model, new_model
166
 
167
 
168
  def generate_topics(dataset, config, split, column, nested_column):
@@ -188,7 +180,18 @@ def generate_topics(dataset, config, split, column, nested_column):
188
  )
189
 
190
  embeddings = calculate_embeddings(docs)
191
- base_model, _ = fit_model(base_model, docs, embeddings)
 
 
 
 
 
 
 
 
 
 
 
192
 
193
  repr_model_topics = {
194
  key: label[0][0].split("\n")[0]
 
137
 
138
 
139
  @spaces.GPU
140
+ def fit_model(docs, embeddings):
141
  new_model = BERTopic(
142
  "english",
143
  # Sub-models
 
154
  logging.debug("Fitting new model")
155
  new_model.fit(docs, embeddings)
156
  logging.debug("End fitting new model")
157
+ return new_model
 
 
 
 
 
 
 
 
158
 
159
 
160
  def generate_topics(dataset, config, split, column, nested_column):
 
180
  )
181
 
182
  embeddings = calculate_embeddings(docs)
183
+ new_model = fit_model(docs, embeddings)
184
+
185
+ if base_model is None:
186
+ base_model = new_model
187
+ else:
188
+ updated_model = BERTopic.merge_models([base_model, new_model])
189
+ nr_new_topics = len(set(updated_model.topics_)) - len(
190
+ set(base_model.topics_)
191
+ )
192
+ new_topics = list(updated_model.topic_labels_.values())[-nr_new_topics:]
193
+ logging.info(f"The following topics are newly found: {new_topics}")
194
+ base_model = updated_model
195
 
196
  repr_model_topics = {
197
  key: label[0][0].split("\n")[0]