Serega6678
commited on
Commit
•
518cd2f
1
Parent(s):
b2d06a5
Update README.md
Browse files
README.md
CHANGED
@@ -36,22 +36,46 @@ The key differences between NuNerZero Token Long in comparison to GLiNER are:
|
|
36 |
```python
|
37 |
from gliner import GLiNER
|
38 |
|
39 |
-
model = GLiNER.from_pretrained("numind/NuNerZero_long_contex")
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
-
|
|
|
|
|
46 |
|
47 |
-
""
|
48 |
|
49 |
entities = model.predict_entities(text, labels)
|
50 |
|
|
|
|
|
51 |
for entity in entities:
|
52 |
print(entity["text"], "=>", entity["label"])
|
53 |
```
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
## Fine-tuning
|
56 |
|
57 |
A fine-tuning script can be found [here](https://colab.research.google.com/drive/19WDnuD2U-B0h-FzX7I5FySNP6sHt4Cru?usp=sharing).
|
|
|
36 |
```python
|
37 |
from gliner import GLiNER
|
38 |
|
|
|
39 |
|
40 |
+
def merge_entities(entities):
|
41 |
+
if not entities:
|
42 |
+
return []
|
43 |
+
merged = []
|
44 |
+
current = entities[0]
|
45 |
+
for next_entity in entities[1:]:
|
46 |
+
if next_entity['label'] == current['label'] and (next_entity['start'] == current['end'] + 1 or next_entity['start'] == current['end']):
|
47 |
+
current['text'] = text[current['start']: next_entity['end']].strip()
|
48 |
+
current['end'] = next_entity['end']
|
49 |
+
else:
|
50 |
+
merged.append(current)
|
51 |
+
current = next_entity
|
52 |
+
# Append the last entity
|
53 |
+
merged.append(current)
|
54 |
+
return merged
|
55 |
+
|
56 |
+
|
57 |
+
model = GLiNER.from_pretrained("numind/NuNerZero_long_context")
|
58 |
|
59 |
+
# NuZero requires labels to be lower-cased!
|
60 |
+
labels = ["organization", "initiative", "project"]
|
61 |
+
labels = [l.lower() for l in labels]
|
62 |
|
63 |
+
text = "At the annual technology summit, the keynote address was delivered by a senior member of the Association for Computing Machinery Special Interest Group on Algorithms and Computation Theory, which recently launched an expansive initiative titled 'Quantum Computing and Algorithmic Innovations: Shaping the Future of Technology'. This initiative explores the implications of quantum mechanics on next-generation computing and algorithm design and is part of a broader effort that includes the 'Global Computational Science Advancement Project'. The latter focuses on enhancing computational methodologies across scientific disciplines, aiming to set new benchmarks in computational efficiency and accuracy."
|
64 |
|
65 |
entities = model.predict_entities(text, labels)
|
66 |
|
67 |
+
entities = merge_entities(entities)
|
68 |
+
|
69 |
for entity in entities:
|
70 |
print(entity["text"], "=>", entity["label"])
|
71 |
```
|
72 |
|
73 |
+
```
|
74 |
+
Association for Computing Machinery Special Interest Group on Algorithms and Computation Theory => organization
|
75 |
+
Quantum Computing and Algorithmic Innovations: Shaping the Future of Technology => initiative
|
76 |
+
Global Computational Science Advancement Project => project
|
77 |
+
```
|
78 |
+
|
79 |
## Fine-tuning
|
80 |
|
81 |
A fine-tuning script can be found [here](https://colab.research.google.com/drive/19WDnuD2U-B0h-FzX7I5FySNP6sHt4Cru?usp=sharing).
|