|
import subprocess |
|
|
|
subprocess.run(["pip", "install", "-q", "transformers", "datasets", "streamlit"]) |
|
|
|
|
|
from transformers import AutoModelForSequenceClassification |
|
from transformers import TFAutoModelForSequenceClassification |
|
from transformers import AutoTokenizer, AutoConfig |
|
import numpy as np |
|
from scipy.special import softmax |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained('bert-base-cased') |
|
|
|
model_path = f"avichr/heBERT_sentiment_analysis" |
|
config = AutoConfig.from_pretrained(model_path) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_path) |
|
|
|
|
|
def preprocess(text): |
|
new_text = [] |
|
for t in text.split(" "): |
|
t = '@user' if t.startswith('@') and len(t) > 1 else t |
|
t = 'http' if t.startswith('http') else t |
|
new_text.append(t) |
|
return " ".join(new_text) |
|
|
|
|
|
text = "Covid cases are increasing fast!" |
|
text = preprocess(text) |
|
|
|
|
|
encoded_input = tokenizer(text, return_tensors='pt') |
|
output = model(**encoded_input) |
|
scores = output[0][0].detach().numpy() |
|
scores = softmax(scores) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.id2label = {0: 'NEGATIVE', 1: 'NEUTRAL', 2: 'POSITIVE'} |
|
|
|
|
|
ranking = np.argsort(scores) |
|
ranking = ranking[::-1] |
|
print(f"Classified text: {text}") |
|
for i in range(scores.shape[0]): |
|
l = config.id2label[ranking[i]] |
|
s = scores[ranking[i]] |
|
print(f"{i+1}) {l} {np.round(float(s), 4)}") |
|
|
|
from transformers import AutoModelForSequenceClassification |
|
from transformers import TFAutoModelForSequenceClassification |
|
from transformers import AutoTokenizer, AutoConfig |
|
from scipy.special import softmax |
|
import streamlit as st |
|
|
|
|
|
def preprocess(text): |
|
new_text = [] |
|
for t in text.split(" "): |
|
t = '@user' if t.startswith('@') and len(t) > 1 else t |
|
t = 'http' if t.startswith('http') else t |
|
new_text.append(t) |
|
return " ".join(new_text) |
|
|
|
|
|
def sentiment_analysis(text): |
|
text = preprocess(text) |
|
|
|
|
|
model_path = f"avichr/heBERT_sentiment_analysis" |
|
tokenizer = AutoTokenizer.from_pretrained('bert-base-cased') |
|
config = AutoConfig.from_pretrained(model_path) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_path) |
|
|
|
|
|
encoded_input = tokenizer(text, return_tensors='pt') |
|
output = model(**encoded_input) |
|
scores_ = output[0][0].detach().numpy() |
|
|
|
|
|
scores_ = softmax(scores_) |
|
|
|
|
|
labels = ['Negative', 'Neutral', 'Positive'] |
|
scores = {l:float(s) for (l,s) in zip(labels, scores_) } |
|
|
|
return scores |
|
|
|
|
|
import streamlit as st |
|
|
|
st.title("Sentiment Analysis for Covid Feelings") |
|
|
|
|
|
text = st.text_input(label="Enter your text:") |
|
|
|
|
|
if text: |
|
scores = sentiment_analysis(text) |
|
|
|
|
|
st.subheader("Sentiment Scores") |
|
for label in scores: |
|
score = scores[label] |
|
st.write(f"{label}: {score:.2f}") |
|
|
|
|