asoria HF staff commited on
Commit
4996a19
1 Parent(s): 2441f3f

Removing stop words but just for english

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -22,6 +22,7 @@ from transformers import (
22
  from prompts import system_prompt, example_prompt, main_prompt
23
  from umap import UMAP
24
  from hdbscan import HDBSCAN
 
25
 
26
  # from cuml.cluster import HDBSCAN
27
  # from cuml.manifold import UMAP
@@ -36,7 +37,7 @@ session = requests.Session()
36
  sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
37
  keybert = KeyBERTInspired()
38
  mmr = MaximalMarginalRelevance(diversity=0.3)
39
-
40
 
41
  model_id = "meta-llama/Llama-2-7b-chat-hf"
42
  device = f"cuda:{cuda.current_device()}" if cuda.is_available() else "cpu"
@@ -125,6 +126,7 @@ def fit_model(base_model, docs, embeddings):
125
  umap_model=umap_model,
126
  hdbscan_model=hdbscan_model,
127
  representation_model=representation_model,
 
128
  # Hyperparameters
129
  top_n_words=10,
130
  verbose=True,
 
22
  from prompts import system_prompt, example_prompt, main_prompt
23
  from umap import UMAP
24
  from hdbscan import HDBSCAN
25
+ from sklearn.feature_extraction.text import CountVectorizer
26
 
27
  # from cuml.cluster import HDBSCAN
28
  # from cuml.manifold import UMAP
 
37
  sentence_model = SentenceTransformer("all-MiniLM-L6-v2")
38
  keybert = KeyBERTInspired()
39
  mmr = MaximalMarginalRelevance(diversity=0.3)
40
+ vectorizer_model = CountVectorizer(stop_words="english")
41
 
42
  model_id = "meta-llama/Llama-2-7b-chat-hf"
43
  device = f"cuda:{cuda.current_device()}" if cuda.is_available() else "cpu"
 
126
  umap_model=umap_model,
127
  hdbscan_model=hdbscan_model,
128
  representation_model=representation_model,
129
+ vectorizer_model=vectorizer_model,
130
  # Hyperparameters
131
  top_n_words=10,
132
  verbose=True,