Spaces:
Build error
Build error
updated to singletons
Browse files- .vscode/launch.json +0 -16
- .vscode/settings.json +0 -7
- rebel.py +17 -2
- utils.py +3 -1
.vscode/launch.json
DELETED
@@ -1,16 +0,0 @@
|
|
1 |
-
{
|
2 |
-
// Use IntelliSense to learn about possible attributes.
|
3 |
-
// Hover to view descriptions of existing attributes.
|
4 |
-
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
5 |
-
"version": "0.2.0",
|
6 |
-
"configurations": [
|
7 |
-
{
|
8 |
-
"name": "Python: Current File",
|
9 |
-
"type": "python",
|
10 |
-
"request": "launch",
|
11 |
-
"program": "${file}",
|
12 |
-
"console": "integratedTerminal",
|
13 |
-
"justMyCode": false
|
14 |
-
}
|
15 |
-
]
|
16 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.vscode/settings.json
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"workbench.colorCustomizations": {
|
3 |
-
"activityBar.background": "#09323E",
|
4 |
-
"titleBar.activeBackground": "#0C4656",
|
5 |
-
"titleBar.activeForeground": "#F6FCFE"
|
6 |
-
}
|
7 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rebel.py
CHANGED
@@ -27,8 +27,23 @@ DEFAULT_LABEL_COLORS = {
|
|
27 |
"PERCENT": "#e4e7d2",
|
28 |
}
|
29 |
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
nlp = spacy.load("en_core_web_sm")
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
doc = nlp("\n".join(texts).lower())
|
33 |
NERs = [ent.text for ent in doc.ents]
|
34 |
NER_types = [ent.label_ for ent in doc.ents]
|
@@ -78,7 +93,7 @@ def generate_knowledge_graph(texts: List[str], filename: str):
|
|
78 |
|
79 |
@lru_cache(maxsize=16)
|
80 |
def generate_partial_graph(text: str):
|
81 |
-
triplet_extractor =
|
82 |
a = triplet_extractor(text, return_tensors=True, return_text=False)[0]["generated_token_ids"]["output_ids"]
|
83 |
extracted_text = triplet_extractor.tokenizer.batch_decode(a)
|
84 |
extracted_triplets = extract_triplets(extracted_text[0])
|
|
|
27 |
"PERCENT": "#e4e7d2",
|
28 |
}
|
29 |
|
30 |
+
|
31 |
+
@st.experimental_singleton(max_entries=1)
|
32 |
+
def get_pipeline():
|
33 |
+
triplet_extractor = pipeline('text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
|
34 |
+
return triplet_extractor
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
@st.experimental_singleton(max_entries=1)
|
39 |
+
def load_spacy():
|
40 |
nlp = spacy.load("en_core_web_sm")
|
41 |
+
return nlp
|
42 |
+
|
43 |
+
|
44 |
+
def generate_knowledge_graph(texts: List[str], filename: str):
|
45 |
+
nlp = load_spacy()
|
46 |
+
|
47 |
doc = nlp("\n".join(texts).lower())
|
48 |
NERs = [ent.text for ent in doc.ents]
|
49 |
NER_types = [ent.label_ for ent in doc.ents]
|
|
|
93 |
|
94 |
@lru_cache(maxsize=16)
|
95 |
def generate_partial_graph(text: str):
|
96 |
+
triplet_extractor = get_pipeline()
|
97 |
a = triplet_extractor(text, return_tensors=True, return_text=False)[0]["generated_token_ids"]["output_ids"]
|
98 |
extracted_text = triplet_extractor.tokenizer.batch_decode(a)
|
99 |
extracted_triplets = extract_triplets(extracted_text[0])
|
utils.py
CHANGED
@@ -3,4 +3,6 @@ def clip_text(t, lenght = 4):
|
|
3 |
t_sub = t.replace("...", "dotdotdot")
|
4 |
t_clipped = ".".join(t_sub.split(".")[:lenght]) + "."
|
5 |
t_reverted = t_clipped.replace("dotdotdot", "...")
|
6 |
-
return t_reverted
|
|
|
|
|
|
3 |
t_sub = t.replace("...", "dotdotdot")
|
4 |
t_clipped = ".".join(t_sub.split(".")[:lenght]) + "."
|
5 |
t_reverted = t_clipped.replace("dotdotdot", "...")
|
6 |
+
return t_reverted
|
7 |
+
|
8 |
+
|