crabz commited on
Commit
44ae05b
1 Parent(s): 97fee74

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +59 -8
README.md CHANGED
@@ -10,7 +10,7 @@ metrics:
10
  - f1
11
  - accuracy
12
  model-index:
13
- - name: output_dir
14
  results:
15
  - task:
16
  name: Token Classification
@@ -34,7 +34,7 @@ model-index:
34
  value: 0.9785228256835333
35
  ---
36
 
37
- # output_dir
38
 
39
  This model is a fine-tuned version of [gerulata/slovakbert](https://huggingface.co/gerulata/slovakbert) on the wikiann_sk dataset.
40
  It achieves the following results on the evaluation set:
@@ -44,17 +44,68 @@ It achieves the following results on the evaluation set:
44
  - F1: 0.9398
45
  - Accuracy: 0.9785
46
 
47
- ## Model description
48
 
49
- More information needed
 
50
 
51
- ## Intended uses & limitations
52
 
53
- More information needed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- ## Training and evaluation data
 
 
 
 
56
 
57
- More information needed
 
 
 
 
 
 
 
 
 
58
 
59
  ## Training procedure
60
 
 
10
  - f1
11
  - accuracy
12
  model-index:
13
+ - name: slovakbert-ner
14
  results:
15
  - task:
16
  name: Token Classification
 
34
  value: 0.9785228256835333
35
  ---
36
 
37
+ # Named Entity Recognition based on SlovakBERT
38
 
39
  This model is a fine-tuned version of [gerulata/slovakbert](https://huggingface.co/gerulata/slovakbert) on the wikiann_sk dataset.
40
  It achieves the following results on the evaluation set:
 
44
  - F1: 0.9398
45
  - Accuracy: 0.9785
46
 
47
+ ## Intended uses & limitations
48
 
49
+ ```
50
+ from transformers import pipeline
51
 
 
52
 
53
+ ner_pipeline = pipeline(task='ner', model='crabz/slovakbert-ner')
54
+ input_sentence = "Minister financií a líder mandátovo najsilnejšieho hnutia OĽaNO Igor Matovič upozorňuje, že následky tretej vlny budú na Slovensku veľmi veľké."
55
+ classifications = ner_pipeline(input_sentence)
56
+ ```
57
+
58
+ with `displaCy`:
59
+
60
+ ```
61
+ import spacy
62
+ from spacy import displacy
63
+
64
+
65
+ ner_map = {0: '0', 1: 'B-OSOBA', 2: 'I-OSOBA', 3: 'B-ORGANIZÁCIA', 4: 'I-ORGANIZÁCIA', 5: 'B-LOKALITA', 6: 'I-LOKALITA'}
66
+
67
+ entities = []
68
+ for i in range(len(classifications)):
69
+ if classifications[i]['entity'] != 0:
70
+ if ner_map[classifications[i]['entity']][0] == 'B':
71
+ j = i + 1
72
+ while j < len(classifications) and ner_map[classifications[j]['entity']][0] == 'I':
73
+ j += 1
74
+ entities.append((ner_map[classifications[i]['entity']].split('-')[1], classifications[i]['start'],
75
+ classifications[j - 1]['end']))
76
+
77
+ nlp = spacy.blank("en") # it should work with any language
78
+
79
+ doc = nlp(input_sentence)
80
+
81
+ ents = []
82
+ for ee in entities:
83
+ ents.append(doc.char_span(ee[1], ee[2], ee[0]))
84
+
85
+ doc.ents = ents
86
+
87
+ options = {"ents": ["OSOBA", "ORGANIZÁCIA", "LOKALITA"],
88
+ "colors": {"OSOBA": "lightblue", "ORGANIZÁCIA": "lightcoral", "LOKALITA": "lightgreen"}}
89
+ displacy_html = displacy.render(doc, style="ent", options=options)
90
+
91
+ ```
92
 
93
+ <div class="entities" style="line-height: 2.5; direction: ltr">Minister financií a líder mandátovo najsilnejšieho hnutia
94
+ <mark class="entity" style="background: lightcoral; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
95
+ OĽaNO
96
+ <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">ORGANIZÁCIA</span>
97
+ </mark>
98
 
99
+ <mark class="entity" style="background: lightblue; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
100
+ Igor Matovič
101
+ <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">OSOBA</span>
102
+ </mark>
103
+ upozorňuje, že následky tretej vlny budú na
104
+ <mark class="entity" style="background: lightgreen; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
105
+ Slovensku
106
+ <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">LOKALITA</span>
107
+ </mark>
108
+ veľmi veľké.</div>
109
 
110
  ## Training procedure
111