khaerens commited on
Commit
b40f434
1 Parent(s): b9584cf

updated to singletons

Browse files
Files changed (4) hide show
  1. .vscode/launch.json +0 -16
  2. .vscode/settings.json +0 -7
  3. rebel.py +17 -2
  4. utils.py +3 -1
.vscode/launch.json DELETED
@@ -1,16 +0,0 @@
1
- {
2
- // Use IntelliSense to learn about possible attributes.
3
- // Hover to view descriptions of existing attributes.
4
- // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
- "version": "0.2.0",
6
- "configurations": [
7
- {
8
- "name": "Python: Current File",
9
- "type": "python",
10
- "request": "launch",
11
- "program": "${file}",
12
- "console": "integratedTerminal",
13
- "justMyCode": false
14
- }
15
- ]
16
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.vscode/settings.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "workbench.colorCustomizations": {
3
- "activityBar.background": "#09323E",
4
- "titleBar.activeBackground": "#0C4656",
5
- "titleBar.activeForeground": "#F6FCFE"
6
- }
7
- }
 
 
 
 
 
 
 
 
rebel.py CHANGED
@@ -27,8 +27,23 @@ DEFAULT_LABEL_COLORS = {
27
  "PERCENT": "#e4e7d2",
28
  }
29
 
30
- def generate_knowledge_graph(texts: List[str], filename: str):
 
 
 
 
 
 
 
 
 
31
  nlp = spacy.load("en_core_web_sm")
 
 
 
 
 
 
32
  doc = nlp("\n".join(texts).lower())
33
  NERs = [ent.text for ent in doc.ents]
34
  NER_types = [ent.label_ for ent in doc.ents]
@@ -78,7 +93,7 @@ def generate_knowledge_graph(texts: List[str], filename: str):
78
 
79
  @lru_cache(maxsize=16)
80
  def generate_partial_graph(text: str):
81
- triplet_extractor = pipeline('text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
82
  a = triplet_extractor(text, return_tensors=True, return_text=False)[0]["generated_token_ids"]["output_ids"]
83
  extracted_text = triplet_extractor.tokenizer.batch_decode(a)
84
  extracted_triplets = extract_triplets(extracted_text[0])
 
27
  "PERCENT": "#e4e7d2",
28
  }
29
 
30
+
31
+ @st.experimental_singleton(max_entries=1)
32
+ def get_pipeline():
33
+ triplet_extractor = pipeline('text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
34
+ return triplet_extractor
35
+
36
+
37
+
38
+ @st.experimental_singleton(max_entries=1)
39
+ def load_spacy():
40
  nlp = spacy.load("en_core_web_sm")
41
+ return nlp
42
+
43
+
44
+ def generate_knowledge_graph(texts: List[str], filename: str):
45
+ nlp = load_spacy()
46
+
47
  doc = nlp("\n".join(texts).lower())
48
  NERs = [ent.text for ent in doc.ents]
49
  NER_types = [ent.label_ for ent in doc.ents]
 
93
 
94
  @lru_cache(maxsize=16)
95
  def generate_partial_graph(text: str):
96
+ triplet_extractor = get_pipeline()
97
  a = triplet_extractor(text, return_tensors=True, return_text=False)[0]["generated_token_ids"]["output_ids"]
98
  extracted_text = triplet_extractor.tokenizer.batch_decode(a)
99
  extracted_triplets = extract_triplets(extracted_text[0])
utils.py CHANGED
@@ -3,4 +3,6 @@ def clip_text(t, lenght = 4):
3
  t_sub = t.replace("...", "dotdotdot")
4
  t_clipped = ".".join(t_sub.split(".")[:lenght]) + "."
5
  t_reverted = t_clipped.replace("dotdotdot", "...")
6
- return t_reverted
 
 
 
3
  t_sub = t.replace("...", "dotdotdot")
4
  t_clipped = ".".join(t_sub.split(".")[:lenght]) + "."
5
  t_reverted = t_clipped.replace("dotdotdot", "...")
6
+ return t_reverted
7
+
8
+