ANLPRL commited on
Commit
5d58462
1 Parent(s): 9112cbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -69
app.py CHANGED
@@ -1,55 +1,14 @@
1
- import streamlit as st
 
 
 
2
  import pickle
3
  import numpy as np
4
- import pandas as pd
5
- from transformers import AutoTokenizer,AutoModel
6
- import torch
7
- import tensorflow as tf
8
  from keras.models import load_model
9
- import re
10
  import io
11
  import PyPDF2
12
-
13
-
14
- def predict(new_data):
15
-
16
- tokens = tokenizer(new_data.split(), padding=True, truncation=True, max_length=128, return_tensors='pt')
17
- with torch.no_grad():
18
- embeddings = model(tokens['input_ids'], attention_mask=tokens['attention_mask'])[0][:, 0, :].numpy()
19
- y_pred = rf.predict(embeddings)
20
- prev_label=" "
21
- text=new_data.split()
22
- data=[]
23
- labels=[]
24
- for i,(word,label) in enumerate(zip(text,y_pred)):
25
- if label!="Other":
26
- label=label.split('-')[1]
27
- if prev_label==label:
28
- data[-1]=data[-1]+" "+word
29
- else:
30
- data.append(word)
31
- labels.append(label)
32
- prev_label=label
33
- return(data,labels)
34
-
35
-
36
- def highlight(sentence):
37
- highlighted_text = ""
38
- entity_colors = {"Symptom":"#87cefa","Medical Condition":"#ffb6c1"}
39
- words, labels = predict(sentence)
40
- for words, label in zip(words, labels):
41
- prev_label=""
42
- if label!="Other" and words!="a":
43
- if label in ["Medical Condition","Symptom"]:
44
- word_color = entity_colors.get(label, "yellow")
45
- label_color = entity_colors.get(label + '-label', "<b>black</b>")
46
- highlighted_text += f'<mark style="background-color: {word_color}; color: {label_color}; padding: 0 0.25rem; border-radius: 0.25rem; border: 2px solid {word_color}; border-bottom-width: 1px">{words}<sup style="background-color: white; color: black; border: 1px solid black; border-radius: 2px; padding: 0 0.15rem; font-size: 70%; margin-left: 0.15rem; font-weight: bold;">{label}</sup></mark> '
47
- else:
48
- highlighted_text += f'{words} '
49
- else:
50
- highlighted_text += f'{words} '
51
- st.markdown(highlighted_text, unsafe_allow_html=True)
52
-
53
 
54
 
55
  def read_uploaded_file(uploaded_file):
@@ -88,16 +47,60 @@ def preprocess(text):
88
  return text
89
 
90
 
91
- #Load the trained model
92
- with open("biobert_rf.pkl", 'rb') as f:
93
- rf = pickle.load(f)
94
- # Load the BioBERT model and tokenizer
95
- model_name = "dmis-lab/biobert-base-cased-v1.1"
96
- tokenizer = AutoTokenizer.from_pretrained(model_name)
97
- model = AutoModel.from_pretrained(model_name)
98
- st.title('Oral Medicine Meets NLP')
99
- st.subheader('Named Entity Recoginition System For Oral Medicine ')
100
- sentence = st.text_area('Enter a sentence:')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  st.write("OR")
103
  uploaded_file = st.file_uploader("Upload a file")
@@ -105,7 +108,7 @@ uploaded_file = st.file_uploader("Upload a file")
105
  if uploaded_file is not None:
106
  # Do something with the file
107
  st.write("File uploaded!")
108
-
109
  st.write("OR")
110
  selected_options = st.selectbox(
111
  'Choose a text from dropdown: ',
@@ -114,6 +117,7 @@ selected_options = st.selectbox(
114
  'Hemophilia is a genetic illness that mainly affects the blood ability to clot properly. Individuals with significant hemophilia are at an elevated possibility of experiencing unforeseen bleeding episodes, which can occur in various parts of the body, including the mouth. Oral bleeding can be a sign of hemophilia and can present as gum bleeding or mouth sores.',
115
  "Von Willebrand disease VWD is a genetic condition that impairs the blood's ability to clot properly. One of the symptoms of VWD is spontaneous gingival bleeding , which can occur without any apparent cause or trauma")) # set default to None
116
 
 
117
  # Define the colors for each label
118
 
119
  if st.button('Analyze'):
@@ -123,17 +127,8 @@ if st.button('Analyze'):
123
  text=read_uploaded_file(uploaded_file)
124
  text=preprocess(text)
125
  highlight(text)
126
- elif selected_options:
127
  highlight(selected_options)
128
  else:
129
- st.write("Please enter a text or select an example to analyze")
130
-
131
-
132
-
133
-
134
-
135
-
136
-
137
 
138
-
139
-
 
1
+ from transformers import AutoTokenizer, TFAutoModel
2
+ import tensorflow as tf
3
+ #from keras.preprocessing.sequence import pad_sequences
4
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
5
  import pickle
6
  import numpy as np
 
 
 
 
7
  from keras.models import load_model
8
+ import streamlit as st
9
  import io
10
  import PyPDF2
11
+ import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
 
14
  def read_uploaded_file(uploaded_file):
 
47
  return text
48
 
49
 
50
+
51
+ def predict(new_data):
52
+ #Load the trained model
53
+
54
+ X_tokens = [tokenizer.encode(text, add_special_tokens=True) for text in new_data.split()]
55
+ X_padded = pad_sequences(X_tokens, maxlen=22, dtype='long', truncating='post', padding='post')
56
+ X_tensor = tf.convert_to_tensor(X_padded)
57
+ X_embeddings = biobert_model(X_tensor)[0]
58
+ pred=model.predict(X_embeddings)
59
+ predicted_labels = list(le.inverse_transform(np.argmax(pred, axis=1)))
60
+ text=new_data.split()
61
+ prev_label=" "
62
+ data=[]
63
+ labels=[]
64
+ for i,(word,label) in enumerate(zip(text,predicted_labels)):
65
+ if label!="Other":
66
+ label=label.split('-')[1]
67
+ if prev_label==label:
68
+ data[-1]=data[-1]+" "+word
69
+ else:
70
+ data.append(word)
71
+ labels.append(label)
72
+ prev_label=label
73
+ return(data,labels)
74
+
75
+ def highlight(sentence):
76
+ highlighted_text = ""
77
+ entity_colors = {"Symptom":"#87cefa","Medical Condition":"#ffb6c1"}
78
+ words, labels = predict(sentence)
79
+ for words, label in zip(words, labels):
80
+ if label!="Other" and words!="a":
81
+ if label in ["Medical Condition","Symptom"]:
82
+ word_color = entity_colors.get(label, "yellow")
83
+ label_color = entity_colors.get(label + '-label', "<b>black</b>")
84
+ highlighted_text += f'<mark style="background-color: {word_color}; color: {label_color}; padding: 0 0.25rem; border-radius: 0.25rem; border: 2px solid {word_color}; border-bottom-width: 1px">{words}<sup style="background-color: white; color: black; border: 1px solid black; border-radius: 2px; padding: 0 0.15rem; font-size: 70%; margin-left: 0.15rem; font-weight: bold;">{label}</sup></mark> '
85
+ else:
86
+ highlighted_text += f'{words} '
87
+ else:
88
+ highlighted_text += f'{words} '
89
+ st.markdown(highlighted_text, unsafe_allow_html=True)
90
+
91
+
92
+
93
+
94
+
95
+ # Create a LabelEncoder object
96
+ with open("label_encoder.pkl", 'rb') as f:
97
+ le = pickle.load(f)
98
+ model= tf.keras.models.load_model("biobert_rnn_weightless.h5")
99
+ tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-base-cased-v1.1")
100
+ biobert_model = TFAutoModel.from_pretrained("dmis-lab/biobert-base-cased-v1.1", from_pt=True)
101
+
102
+ st.title('Named Entity Recognition')
103
+ sentence = st.text_input('Enter a sentence:')
104
 
105
  st.write("OR")
106
  uploaded_file = st.file_uploader("Upload a file")
 
108
  if uploaded_file is not None:
109
  # Do something with the file
110
  st.write("File uploaded!")
111
+
112
  st.write("OR")
113
  selected_options = st.selectbox(
114
  'Choose a text from dropdown: ',
 
117
  'Hemophilia is a genetic illness that mainly affects the blood ability to clot properly. Individuals with significant hemophilia are at an elevated possibility of experiencing unforeseen bleeding episodes, which can occur in various parts of the body, including the mouth. Oral bleeding can be a sign of hemophilia and can present as gum bleeding or mouth sores.',
118
  "Von Willebrand disease VWD is a genetic condition that impairs the blood's ability to clot properly. One of the symptoms of VWD is spontaneous gingival bleeding , which can occur without any apparent cause or trauma")) # set default to None
119
 
120
+
121
  # Define the colors for each label
122
 
123
  if st.button('Analyze'):
 
127
  text=read_uploaded_file(uploaded_file)
128
  text=preprocess(text)
129
  highlight(text)
130
+ elif selected_options:
131
  highlight(selected_options)
132
  else:
133
+ st.write('Please enter a sentence or select an option from the dropdown.')
 
 
 
 
 
 
 
134