BDA594-fake-news-classification_App

Sleeping

File size: 3,668 Bytes

fa5a555
 
 
 
 
 
 
 
 
c804c6f
fa5a555
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c804c6f
fa5a555
 
 
 
 
 
 
c804c6f
fa5a555
c804c6f
11f2ab6
bbd2303
fa5a555
1d5b05f
fa5a555

# Import the required Libraries
import gradio as gr
import numpy as np
import pandas as pd
import pickle
import transformers 
from transformers import AutoTokenizer, AutoConfig,AutoModelForSequenceClassification,TFAutoModelForSequenceClassification
from scipy.special import softmax
# Requirements
model_path = "Kaludi/BDA594-fake-news-classification"
tokenizer = AutoTokenizer.from_pretrained(model_path)
config = AutoConfig.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

# Preprocess text (username and link placeholders)
def preprocess(text):
    new_text = []
    for t in text.split(" "):
        t = "@user" if t.startswith("@") and len(t) > 1 else t
        t = "http" if t.startswith("http") else t
        new_text.append(t)
    return " ".join(new_text)

# ---- Function to process the input and return prediction
def sentiment_analysis(text):
    text = preprocess(text)

    encoded_input = tokenizer(text, return_tensors = "pt") # for PyTorch-based models
    output = model(**encoded_input)
    scores_ = output[0][0].detach().numpy()
    scores_ = softmax(scores_)
    
    # Format output dict of scores
    labels = ["Fake", "Real"]
    scores = {l:float(s) for (l,s) in zip(labels, scores_) }
    
    return scores


# ---- Gradio app interface
app = gr.Interface(fn = sentiment_analysis,
                   inputs = gr.Textbox("Write your text or news article here..."),
                   outputs = "label",
                   title = "BDA594 Fake News Classification",
                   description  = "This is a Fake News Classifier model that has been trained by [Kaludi](https://huggingface.co/Kaludi) to determine the authenticity of news articles. It classifies articles into two categories: **Real** and **Fake**. By analyzing the content and context of a given article, this model can accurately determine whether the news is genuine or fabricated.",
                   article = "<p style='text-align: center'><a href='https://github.com/Kaludii'>Github</a> | <a href='https://huggingface.co/Kaludi'>HuggingFace</a></p>",
                   interpretation = "default",
                   examples = [["WASHINGTON—Noting that there was no excuse for the first dog’s most recent instance of bad behavior, the White House confirmed Thursday that Commander Biden had gnawed the Washington Monument down to a slobber-covered stub. “We turn our backs for two minutes, and boom, we find Commander sitting there on the National Mall with the entire Washington Monument in his mouth,” said White House aide Paul Stephens, adding that no matter how much they attempted to distract the dog with treats, tried to pry his mouth open with their hands, or yelled “Drop it,” the 2-year-old German shepherd continued to growl and bite enormous chunks of cement off the 555-foot tall obelisk. “While we love Commander, we cannot continue to allow him to misbehave, especially after he took what remained of the monument and tried to hide it under a rug in the West Wing. This is almost as bad as when he tore apart the Lincoln Memorial in search of a squeaker. We will not let this happen again.” At press time, sources confirmed Commander Biden had a bad case of the runs and was defecating chunks of cement all over the Reflecting Pool."],[ "WASHINGTON — Rep. Jim Jordan said Thursday that he has spoken about his bid to be the next speaker of the House with Donald Trump and that he would not support ousting Rep. Matt Gaetz from the Republican caucus even amid anger from some lawmakers after he led Kevin McCarthy's removal."]]
                   )

app.launch()