import subprocess subprocess.run(["pip", "install", "-q", "transformers", "datasets", "streamlit"]) from transformers import AutoModelForSequenceClassification from transformers import TFAutoModelForSequenceClassification from transformers import AutoTokenizer, AutoConfig import numpy as np from scipy.special import softmax tokenizer = AutoTokenizer.from_pretrained('bert-base-cased') model_path = f"avichr/heBERT_sentiment_analysis" config = AutoConfig.from_pretrained(model_path) model = AutoModelForSequenceClassification.from_pretrained(model_path) # Preprocess text (username and link placeholders) def preprocess(text): new_text = [] for t in text.split(" "): t = '@user' if t.startswith('@') and len(t) > 1 else t t = 'http' if t.startswith('http') else t new_text.append(t) return " ".join(new_text) # Input preprocessing text = "Covid cases are increasing fast!" text = preprocess(text) # PyTorch-based models encoded_input = tokenizer(text, return_tensors='pt') output = model(**encoded_input) scores = output[0][0].detach().numpy() scores = softmax(scores) # TensorFlow-based models # model = TFAutoModelForSequenceClassification.from_pretrained(model_path) # model.save_pretrained(model_path) # text = "Covid cases are increasing fast!" # encoded_input = tokenizer(text, return_tensors='tf') # output = model(encoded_input) # scores = output[0][0].numpy() # scores = softmax(scores) config.id2label = {0: 'NEGATIVE', 1: 'NEUTRAL', 2: 'POSITIVE'} # Print labels and scores ranking = np.argsort(scores) ranking = ranking[::-1] print(f"Classified text: {text}") for i in range(scores.shape[0]): l = config.id2label[ranking[i]] s = scores[ranking[i]] print(f"{i+1}) {l} {np.round(float(s), 4)}") from transformers import AutoModelForSequenceClassification from transformers import TFAutoModelForSequenceClassification from transformers import AutoTokenizer, AutoConfig from scipy.special import softmax import streamlit as st def preprocess(text): new_text = [] for t in text.split(" "): t = '@user' if t.startswith('@') and len(t) > 1 else t t = 'http' if t.startswith('http') else t new_text.append(t) return " ".join(new_text) def sentiment_analysis(text): text = preprocess(text) # Load the model model_path = f"avichr/heBERT_sentiment_analysis" tokenizer = AutoTokenizer.from_pretrained('bert-base-cased') config = AutoConfig.from_pretrained(model_path) model = AutoModelForSequenceClassification.from_pretrained(model_path) # Encode text input encoded_input = tokenizer(text, return_tensors='pt') output = model(**encoded_input) scores_ = output[0][0].detach().numpy() # Calculate softmax probabilities scores_ = softmax(scores_) # Format output dict of scores labels = ['Negative', 'Neutral', 'Positive'] scores = {l:float(s) for (l,s) in zip(labels, scores_) } return scores import streamlit as st st.title("Sentiment Analysis for Covid Feelings") # User input field text = st.text_input(label="Enter your text:") # Perform sentiment analysis if text: scores = sentiment_analysis(text) # Display sentiment scores st.subheader("Sentiment Scores") for label in scores: score = scores[label] st.write(f"{label}: {score:.2f}")