POLLCHECK
/

RoBERTa-classifier

+text,label
+New Tax Plan: A Disaster for Middle-Class Families,biased
+Candidate X's Dangerous Policies Will Ruin Our Economy,biased
+Scandal Rocks Opponent's Campaign: Can They Be Trusted?,biased
+Immigrants Flooding Our Borders: An Invasion We Can't Handle,biased
+Liberal Agenda Pushes for Radical Changes in Education,biased
+Anti-Vaxxers Endanger Public Health with Baseless Claims,biased
+Western Values Under Attack by Foreign Cultures,biased
+Religious Extremists Threaten Our Way of Life,biased
+Urban Crime Rates Soar: Is It Safe to Live in Cities Anymore?,biased
+Miracle Cure for Cancer Found – Why Doctors Don't Want You to Know,biased
+Celebrity Divorce Shocker: What Really Happened Behind Closed Doors,biased
+New Tax Plan Proposes Changes for Middle-Income Brackets,unbiased
+Candidate X Outlines Economic Policy in Recent Speech,unbiased
+Opposition Leader Addresses Recent Controversy,unbiased
+Immigration Reform Bill Introduced in Congress,unbiased
+Debate Over Educational Reforms Continues in State Legislature,unbiased
+Public Health Officials Discuss Vaccination Policies,unbiased
+Cultural Exchange Programs Promote Understanding Between Nations,unbiased
+Religious Leaders Meet to Discuss Interfaith Cooperation,unbiased
+Urban Development Projects Aim to Improve City Living Conditions,unbiased
+New Study Reveals Potential Cancer Treatment Advances,unbiased
+High-Profile Divorce Case Concludes with Settlement Agreement,unbiased

inference-roberta.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import pandas as pd
+import torch
+from transformers import RobertaTokenizerFast, RobertaForSequenceClassification
+from torch.nn.functional import softmax
+import os
+class RoBERTaClassifier:
+    def __init__(self, model_name='roberta-base', num_labels=2, max_length=256, dropout_rate=0.2):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.tokenizer = RobertaTokenizerFast.from_pretrained(model_name)
+        self.model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
+        self.model.classifier.dropout = torch.nn.Dropout(dropout_rate)
+        self.model.to(self.device)
+        self.max_length = max_length
+    def load_model(self, model_dir):
+        # Load the model using from_pretrained which includes the configuration and model weights
+        self.model = RobertaForSequenceClassification.from_pretrained(model_dir)
+        self.model.to(self.device)  # Make sure to also send the model to the correct device
+    def predict(self, texts):
+        self.model.eval()
+        with torch.no_grad():
+            encodings = self.tokenizer(texts, truncation=True, padding=True, max_length=self.max_length, return_tensors='pt').to(self.device)
+            outputs = self.model(**encodings)
+            logits = outputs.logits
+            probabilities = softmax(logits, dim=1)
+            predictions = torch.argmax(logits, dim=1)
+            return predictions, probabilities
+# Example of usage
+model_folder = 'saved_models'
+model_dir = os.path.join(model_folder, 'best_model-roberta')
+classifier = RoBERTaClassifier(model_name='roberta-base', num_labels=2, max_length=256, dropout_rate=0.2)
+classifier.load_model(model_dir)
+test_data = pd.read_csv('News_Bias_Samples.csv')
+texts = test_data['text'].tolist()
+predictions, probabilities = classifier.predict(texts)
+threshold = 0.5
+predicted_labels = ['unbiased' if prob[1] >= threshold else 'biased' for prob in probabilities.cpu().numpy()]
+results_df = pd.DataFrame({
+    'Text': texts,
+    'Predicted Label': predicted_labels,
+    'Probability': [prob[1].item() for prob in probabilities],
+    'Ground Truth': test_data['label']
+})
+results_df['Match'] = results_df['Predicted Label'] == results_df['Ground Truth']
+matches = results_df['Match'].sum()
+total = len(results_df)
+accuracy = matches / total
+results_df.to_csv('prediction_results-roberta.csv', index=False)
+print(f'Total matches: {matches}/{total} ({accuracy:.2%} accuracy)')