shainar commited on
Commit
25d9bf7
1 Parent(s): 5f34015

Upload 2 files

Browse files
Files changed (2) hide show
  1. News_Bias_Samples.csv +23 -0
  2. inference-roberta.py +56 -0
News_Bias_Samples.csv ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ text,label
2
+ New Tax Plan: A Disaster for Middle-Class Families,biased
3
+ Candidate X's Dangerous Policies Will Ruin Our Economy,biased
4
+ Scandal Rocks Opponent's Campaign: Can They Be Trusted?,biased
5
+ Immigrants Flooding Our Borders: An Invasion We Can't Handle,biased
6
+ Liberal Agenda Pushes for Radical Changes in Education,biased
7
+ Anti-Vaxxers Endanger Public Health with Baseless Claims,biased
8
+ Western Values Under Attack by Foreign Cultures,biased
9
+ Religious Extremists Threaten Our Way of Life,biased
10
+ Urban Crime Rates Soar: Is It Safe to Live in Cities Anymore?,biased
11
+ Miracle Cure for Cancer Found – Why Doctors Don't Want You to Know,biased
12
+ Celebrity Divorce Shocker: What Really Happened Behind Closed Doors,biased
13
+ New Tax Plan Proposes Changes for Middle-Income Brackets,unbiased
14
+ Candidate X Outlines Economic Policy in Recent Speech,unbiased
15
+ Opposition Leader Addresses Recent Controversy,unbiased
16
+ Immigration Reform Bill Introduced in Congress,unbiased
17
+ Debate Over Educational Reforms Continues in State Legislature,unbiased
18
+ Public Health Officials Discuss Vaccination Policies,unbiased
19
+ Cultural Exchange Programs Promote Understanding Between Nations,unbiased
20
+ Religious Leaders Meet to Discuss Interfaith Cooperation,unbiased
21
+ Urban Development Projects Aim to Improve City Living Conditions,unbiased
22
+ New Study Reveals Potential Cancer Treatment Advances,unbiased
23
+ High-Profile Divorce Case Concludes with Settlement Agreement,unbiased
inference-roberta.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import torch
3
+ from transformers import RobertaTokenizerFast, RobertaForSequenceClassification
4
+ from torch.nn.functional import softmax
5
+ import os
6
+
7
+ class RoBERTaClassifier:
8
+ def __init__(self, model_name='roberta-base', num_labels=2, max_length=256, dropout_rate=0.2):
9
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ self.tokenizer = RobertaTokenizerFast.from_pretrained(model_name)
11
+ self.model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
12
+ self.model.classifier.dropout = torch.nn.Dropout(dropout_rate)
13
+ self.model.to(self.device)
14
+ self.max_length = max_length
15
+
16
+ def load_model(self, model_dir):
17
+ # Load the model using from_pretrained which includes the configuration and model weights
18
+ self.model = RobertaForSequenceClassification.from_pretrained(model_dir)
19
+ self.model.to(self.device) # Make sure to also send the model to the correct device
20
+
21
+ def predict(self, texts):
22
+ self.model.eval()
23
+ with torch.no_grad():
24
+ encodings = self.tokenizer(texts, truncation=True, padding=True, max_length=self.max_length, return_tensors='pt').to(self.device)
25
+ outputs = self.model(**encodings)
26
+ logits = outputs.logits
27
+ probabilities = softmax(logits, dim=1)
28
+ predictions = torch.argmax(logits, dim=1)
29
+ return predictions, probabilities
30
+
31
+ # Example of usage
32
+ model_folder = 'saved_models'
33
+ model_dir = os.path.join(model_folder, 'best_model-roberta')
34
+ classifier = RoBERTaClassifier(model_name='roberta-base', num_labels=2, max_length=256, dropout_rate=0.2)
35
+ classifier.load_model(model_dir)
36
+
37
+ test_data = pd.read_csv('News_Bias_Samples.csv')
38
+ texts = test_data['text'].tolist()
39
+ predictions, probabilities = classifier.predict(texts)
40
+ threshold = 0.5
41
+ predicted_labels = ['unbiased' if prob[1] >= threshold else 'biased' for prob in probabilities.cpu().numpy()]
42
+
43
+ results_df = pd.DataFrame({
44
+ 'Text': texts,
45
+ 'Predicted Label': predicted_labels,
46
+ 'Probability': [prob[1].item() for prob in probabilities],
47
+ 'Ground Truth': test_data['label']
48
+ })
49
+
50
+ results_df['Match'] = results_df['Predicted Label'] == results_df['Ground Truth']
51
+ matches = results_df['Match'].sum()
52
+ total = len(results_df)
53
+ accuracy = matches / total
54
+
55
+ results_df.to_csv('prediction_results-roberta.csv', index=False)
56
+ print(f'Total matches: {matches}/{total} ({accuracy:.2%} accuracy)')