File size: 4,747 Bytes
6c3cf43
 
 
 
 
b420fee
38c2418
6c3cf43
5dcc927
70b86a2
 
 
 
 
 
 
 
38c2418
6c3cf43
38c2418
 
 
 
 
 
6c3cf43
38c2418
 
 
 
 
 
 
 
 
 
 
6c3cf43
 
38c2418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c3cf43
38c2418
 
 
a285409
70b86a2
b25e082
70b86a2
b25e082
 
 
 
70b86a2
 
b25e082
 
70b86a2
b25e082
70b86a2
 
 
 
b25e082
38c2418
 
6c3cf43
38c2418
b25e082
38c2418
 
 
b420fee
 
 
 
 
 
 
 
 
38c2418
70b86a2
 
 
 
 
b25e082
70b86a2
b25e082
6c3cf43
 
b420fee
 
 
 
 
 
 
 
38c2418
b420fee
 
 
bda744e
b420fee
 
 
 
70b86a2
a285409
38c2418
c135afa
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import gradio as gr
import requests
from bs4 import BeautifulSoup
import pandas as pd
from transformers import pipeline
import plotly.express as px
from datetime import datetime, timedelta

file_path = '/home/user/app/Top 2000 Valued Companies with Ticker Symbols.xlsx'
companies_df = pd.read_excel(file_path)

def get_stock_symbol(company_name):
    match = companies_df[companies_df['Name'].str.contains(company_name, case=False, na=False)]
    if not match.empty:
        return match.iloc[0]['Symbol']
    return None

sentiment_model = pipeline(model="finiteautomata/bertweet-base-sentiment-analysis")

def encode_special_characters(text):
    encoded_text = ''
    special_characters = {'&': '%26', '=': '%3D', '+': '%2B', ' ': '%20'}
    for char in text.lower():
        encoded_text += special_characters.get(char, char)
    return encoded_text

def fetch_news(query, num_articles=10):
    encoded_query = encode_special_characters(query)
    url = f"https://news.google.com/search?q={encoded_query}&hl=en-US&gl=in&ceid=US%3Aen&num={num_articles}"
    
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.RequestException as e:
        print(f"Error fetching news: {e}")
        return pd.DataFrame()
    
    soup = BeautifulSoup(response.text, 'html.parser')
    articles = soup.find_all('article')
    
    news_data = []
    for article in articles[:num_articles]:
        link = article.find('a')['href'].replace("./articles/", "https://news.google.com/articles/")
        text_parts = article.get_text(separator='\n').split('\n')
        
        news_data.append({
            'Title': text_parts[2] if len(text_parts) > 2 else 'Missing',
            'Source': text_parts[0] if len(text_parts) > 0 else 'Missing',
            'Time': text_parts[3] if len(text_parts) > 3 else 'Missing',
            'Author': text_parts[4].split('By ')[-1] if len(text_parts) > 4 else 'Missing',
            'Link': link
        })
    
    return pd.DataFrame(news_data)

def analyze_sentiment(text):
    result = sentiment_model(text)[0]
    return result['label'], result['score']

def fetch_stock_data(symbol):
    url = "https://alpha-vantage.p.rapidapi.com/query"
    querystring = {"function":"TIME_SERIES_DAILY", "symbol":symbol, "outputsize":"compact", "datatype":"json"}
    headers = {
        "x-rapidapi-key": "e078dae417mshb13ddc2d8149768p1608e9jsn888ce49e8554",
        "x-rapidapi-host": "alpha-vantage.p.rapidapi.com"
    }
    response = requests.get(url, headers=headers, params=querystring)
    data = response.json()
    
    if "Time Series (Daily)" not in data:
        return pd.DataFrame()
    
    stock_data = pd.DataFrame(data["Time Series (Daily)"]).T
    stock_data.index = pd.to_datetime(stock_data.index)
    stock_data.columns = ["Open", "High", "Low", "Close", "Volume"]
    return stock_data

def news_and_analysis(query):
    news_df = fetch_news(query)
    
    if news_df.empty:
        return "No news articles found.", None, None
    
    news_df['Sentiment'], news_df['Sentiment_Score'] = zip(*news_df['Title'].apply(analyze_sentiment))
    
    sentiment_fig = px.bar(
        news_df,
        x='Time',
        y='Sentiment_Score',
        color='Sentiment',
        color_discrete_map={'positive': 'green', 'neutral': 'gray', 'negative': 'red'},
        title='News Sentiment Over Time',
        labels={'Time': 'Publication Time', 'Sentiment_Score': 'Sentiment Score'}
    )
    
    stock_symbol = get_stock_symbol(query)
    if stock_symbol:
        stock_data = fetch_stock_data(stock_symbol)
        if not stock_data.empty:
            stock_fig = px.line(stock_data, x=stock_data.index, y='Close', title=f'{stock_symbol} Stock Price')
            return news_df, sentiment_fig, stock_fig
    
    return news_df, sentiment_fig, None

with gr.Blocks() as demo:
    gr.Markdown(
        """
        # Financial News Sentiment Analysis
        
        Analyze the sentiment of news articles related to financial topics or companies. 
        Enter a topic or company name to get started.
        """
    )
    
    with gr.Row():
        with gr.Column():
            topic = gr.Textbox(label="Enter a financial topic or company name", placeholder="e.g., Apple Inc.")
            analyze_btn = gr.Button(value="Analyze")
        
        with gr.Column():
            news_output = gr.DataFrame(label="News and Sentiment Analysis")
            sentiment_plot = gr.Plot(label="Sentiment Analysis")
            stock_plot = gr.Plot(label="Stock Price Movement")
    
    analyze_btn.click(
        news_and_analysis,
        inputs=[topic],
        outputs=[news_output, sentiment_plot, stock_plot]
    )

if __name__ == "__main__":
    demo.launch()