Upload 2 files
Browse files- News_Bias_Samples.csv +23 -0
- inference-roberta.py +56 -0
News_Bias_Samples.csv
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
text,label
|
2 |
+
New Tax Plan: A Disaster for Middle-Class Families,biased
|
3 |
+
Candidate X's Dangerous Policies Will Ruin Our Economy,biased
|
4 |
+
Scandal Rocks Opponent's Campaign: Can They Be Trusted?,biased
|
5 |
+
Immigrants Flooding Our Borders: An Invasion We Can't Handle,biased
|
6 |
+
Liberal Agenda Pushes for Radical Changes in Education,biased
|
7 |
+
Anti-Vaxxers Endanger Public Health with Baseless Claims,biased
|
8 |
+
Western Values Under Attack by Foreign Cultures,biased
|
9 |
+
Religious Extremists Threaten Our Way of Life,biased
|
10 |
+
Urban Crime Rates Soar: Is It Safe to Live in Cities Anymore?,biased
|
11 |
+
Miracle Cure for Cancer Found – Why Doctors Don't Want You to Know,biased
|
12 |
+
Celebrity Divorce Shocker: What Really Happened Behind Closed Doors,biased
|
13 |
+
New Tax Plan Proposes Changes for Middle-Income Brackets,unbiased
|
14 |
+
Candidate X Outlines Economic Policy in Recent Speech,unbiased
|
15 |
+
Opposition Leader Addresses Recent Controversy,unbiased
|
16 |
+
Immigration Reform Bill Introduced in Congress,unbiased
|
17 |
+
Debate Over Educational Reforms Continues in State Legislature,unbiased
|
18 |
+
Public Health Officials Discuss Vaccination Policies,unbiased
|
19 |
+
Cultural Exchange Programs Promote Understanding Between Nations,unbiased
|
20 |
+
Religious Leaders Meet to Discuss Interfaith Cooperation,unbiased
|
21 |
+
Urban Development Projects Aim to Improve City Living Conditions,unbiased
|
22 |
+
New Study Reveals Potential Cancer Treatment Advances,unbiased
|
23 |
+
High-Profile Divorce Case Concludes with Settlement Agreement,unbiased
|
inference-roberta.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import torch
|
3 |
+
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification
|
4 |
+
from torch.nn.functional import softmax
|
5 |
+
import os
|
6 |
+
|
7 |
+
class RoBERTaClassifier:
|
8 |
+
def __init__(self, model_name='roberta-base', num_labels=2, max_length=256, dropout_rate=0.2):
|
9 |
+
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
10 |
+
self.tokenizer = RobertaTokenizerFast.from_pretrained(model_name)
|
11 |
+
self.model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
|
12 |
+
self.model.classifier.dropout = torch.nn.Dropout(dropout_rate)
|
13 |
+
self.model.to(self.device)
|
14 |
+
self.max_length = max_length
|
15 |
+
|
16 |
+
def load_model(self, model_dir):
|
17 |
+
# Load the model using from_pretrained which includes the configuration and model weights
|
18 |
+
self.model = RobertaForSequenceClassification.from_pretrained(model_dir)
|
19 |
+
self.model.to(self.device) # Make sure to also send the model to the correct device
|
20 |
+
|
21 |
+
def predict(self, texts):
|
22 |
+
self.model.eval()
|
23 |
+
with torch.no_grad():
|
24 |
+
encodings = self.tokenizer(texts, truncation=True, padding=True, max_length=self.max_length, return_tensors='pt').to(self.device)
|
25 |
+
outputs = self.model(**encodings)
|
26 |
+
logits = outputs.logits
|
27 |
+
probabilities = softmax(logits, dim=1)
|
28 |
+
predictions = torch.argmax(logits, dim=1)
|
29 |
+
return predictions, probabilities
|
30 |
+
|
31 |
+
# Example of usage
|
32 |
+
model_folder = 'saved_models'
|
33 |
+
model_dir = os.path.join(model_folder, 'best_model-roberta')
|
34 |
+
classifier = RoBERTaClassifier(model_name='roberta-base', num_labels=2, max_length=256, dropout_rate=0.2)
|
35 |
+
classifier.load_model(model_dir)
|
36 |
+
|
37 |
+
test_data = pd.read_csv('News_Bias_Samples.csv')
|
38 |
+
texts = test_data['text'].tolist()
|
39 |
+
predictions, probabilities = classifier.predict(texts)
|
40 |
+
threshold = 0.5
|
41 |
+
predicted_labels = ['unbiased' if prob[1] >= threshold else 'biased' for prob in probabilities.cpu().numpy()]
|
42 |
+
|
43 |
+
results_df = pd.DataFrame({
|
44 |
+
'Text': texts,
|
45 |
+
'Predicted Label': predicted_labels,
|
46 |
+
'Probability': [prob[1].item() for prob in probabilities],
|
47 |
+
'Ground Truth': test_data['label']
|
48 |
+
})
|
49 |
+
|
50 |
+
results_df['Match'] = results_df['Predicted Label'] == results_df['Ground Truth']
|
51 |
+
matches = results_df['Match'].sum()
|
52 |
+
total = len(results_df)
|
53 |
+
accuracy = matches / total
|
54 |
+
|
55 |
+
results_df.to_csv('prediction_results-roberta.csv', index=False)
|
56 |
+
print(f'Total matches: {matches}/{total} ({accuracy:.2%} accuracy)')
|