dicta-il
/

dictabert-joint

Feature Extraction

text-embeddings-inference

Model card Files Files and versions Community

Shaltiel commited on Apr 4

Commit

64f9a08

•

1 Parent(s): deb5cae

Fixed ud break

Files changed (1) hide show

BertForJointParsing.py +3 -3

BertForJointParsing.py CHANGED Viewed

@@ -239,7 +239,7 @@ class BertForJointParsing(BertPreTrainedModel):
                 final_output[sent_idx]['ner_entities'] = aggregate_ner_tokens(final_output[sent_idx], parsed)
         if output_style in ['ud', 'iahlt_ud']:
-            final_output = convert_output_to_ud(final_output, style='htb' if output_style == 'ud' else 'iahlt')
         if is_single_sentence:
             final_output = final_output[0]
@@ -369,7 +369,7 @@ ud_suffix_to_htb_str = {
 	'Gender=Fem|Number=Sing|Person=2': '_את',
 	'Gender=Masc|Number=Plur|Person=3': '_הם'
 }
-def convert_output_to_ud(output_sentences, style: Literal['htb', 'iahlt']):
     if style not in ['htb', 'iahlt']:
         raise ValueError('style must be htb/iahlt')
@@ -393,7 +393,7 @@ def convert_output_to_ud(output_sentences, style: Literal['htb', 'iahlt']):
             start = len(intermediate_output)
             # Add in all the prefixes
             if len(word['seg']) > 1:
-                for pre in get_prefixes_from_str(word['seg'][0], greedy=True):
                     # pos - just take the first valid pos that appears in the predicted prefixes list.
                     pos = next((pos for pos in ud_prefixes_to_pos[pre] if pos in word['morph']['prefixes']), ud_prefixes_to_pos[pre][0])
                     dep, func = ud_get_prefix_dep(pre, word, word_idx)

                 final_output[sent_idx]['ner_entities'] = aggregate_ner_tokens(final_output[sent_idx], parsed)
         if output_style in ['ud', 'iahlt_ud']:
+            final_output = convert_output_to_ud(final_output, self.config, style='htb' if output_style == 'ud' else 'iahlt')
         if is_single_sentence:
             final_output = final_output[0]
 	'Gender=Fem|Number=Sing|Person=2': '_את',
 	'Gender=Masc|Number=Plur|Person=3': '_הם'
 }
+def convert_output_to_ud(output_sentences, model_cfg, style: Literal['htb', 'iahlt']):
     if style not in ['htb', 'iahlt']:
         raise ValueError('style must be htb/iahlt')
             start = len(intermediate_output)
             # Add in all the prefixes
             if len(word['seg']) > 1:
+                for pre in get_prefixes_from_str(word['seg'][0], model_cfg.prefix_cfg, greedy=True):
                     # pos - just take the first valid pos that appears in the predicted prefixes list.
                     pos = next((pos for pos in ud_prefixes_to_pos[pre] if pos in word['morph']['prefixes']), ud_prefixes_to_pos[pre][0])
                     dep, func = ud_get_prefix_dep(pre, word, word_idx)