Spaces:

rafaldembski
/

ScamDetector

Sleeping

App Files Files Community

ScamDetector / pages /Statistics.py

rafaldembski

Update pages/Statistics.py

faeea2e verified about 2 months ago

raw

history blame

7.3 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	import json
	import re
	from datetime import datetime

	# Definiowanie tłumaczeń dla zakładki "Statystyki"
	page_translations = {
	'Polish': {
	'page_title': "📊 Statystyki",
	'page_icon': "📈",
	'header': "📊 Statystyki Aplikacji",
	'description': "Poniżej znajdują się statystyki analizy wiadomości w aplikacji.",
	'total_analyses': "Liczba przeanalizowanych wiadomości",
	'total_frauds_detected': "Wykryte oszustwa",
	'fraud_percentage': "Procent oszustw",
	'history_title': "Historia analizowanych wiadomości",
	'frauds_over_time': "Liczba wykrytych oszustw w czasie",
	'risk_distribution': "Rozkład ocen ryzyka oszustwa",
	'fraud_country_distribution': "Rozkład oszustw według krajów",
	'heatmap_title': "Mapa ciepła oszustw w czasie",
	'fraud_vs_nonfraud': "Procentowy podział: Oszustwa vs Bezpieczne",
	'no_data': "Brak dostępnych danych do wyświetlenia."
	},
	'German': {
	'page_title': "📊 Statistiken",
	'page_icon': "📈",
	'header': "📊 Anwendungsstatistiken",
	'description': "Nachfolgend finden Sie die Statistiken zur Nachrichtenanalyse in der Anwendung.",
	'total_analyses': "Anzahl der analysierten Nachrichten",
	'total_frauds_detected': "Erkannte Betrügereien",
	'fraud_percentage': "Betrugsprozentsatz",
	'history_title': "Analyseverlauf der Nachrichten",
	'frauds_over_time': "Anzahl der erkannten Betrügereien im Laufe der Zeit",
	'risk_distribution': "Verteilung der Betrugsrisikobewertungen",
	'fraud_country_distribution': "Betrug nach Ländern",
	'heatmap_title': "Heatmap der Betrügereien im Laufe der Zeit",
	'fraud_vs_nonfraud': "Prozentanteil: Betrug vs Sichere Nachrichten",
	'no_data': "Keine Daten zur Anzeige verfügbar."
	},
	'English': {
	'page_title': "📊 Statistics",
	'page_icon': "📈",
	'header': "📊 Application Statistics",
	'description': "Below are the statistics of message analysis in the app.",
	'total_analyses': "Total Messages Analyzed",
	'total_frauds_detected': "Frauds Detected",
	'fraud_percentage': "Fraud Percentage",
	'history_title': "History of Analyzed Messages",
	'frauds_over_time': "Number of Detected Frauds Over Time",
	'risk_distribution': "Distribution of Fraud Risk Scores",
	'fraud_country_distribution': "Fraud Distribution by Countries",
	'heatmap_title': "Fraud Heatmap Over Time",
	'fraud_vs_nonfraud': "Fraud vs Safe Messages Percentage",
	'no_data': "No data available to display."
	}
	}

	# Funkcja do pobierania statystyk
	def get_stats():
	stats_file = 'stats.json'
	try:
	with open(stats_file, 'r') as f:
	stats = json.load(f)
	return stats
	except (json.JSONDecodeError, FileNotFoundError):
	return {"total_analyses": 0, "total_frauds_detected": 0}

	# Funkcja do pobierania historii analiz
	def get_history():
	history_file = 'history.json'
	try:
	with open(history_file, 'r') as f:
	history = json.load(f)
	return history
	except (json.JSONDecodeError, FileNotFoundError):
	return []

	# Główna funkcja zakładki "Statystyki"
	def main(language):
	translations = page_translations[language]

	# Pobieranie danych z plików
	stats = get_stats()
	history = get_history()

	# Kluczowe metryki
	total_analyses = stats["total_analyses"]
	total_frauds_detected = stats["total_frauds_detected"]

	# Wyświetlenie metryk
	st.title(translations['header'])
	st.markdown(translations['description'])

	col1, col2, col3 = st.columns(3)
	col1.metric(label=translations['total_analyses'], value=total_analyses)
	col2.metric(label=translations['total_frauds_detected'], value=total_frauds_detected)

	# Obsługa dzielenia przez zero
	if total_analyses > 0:
	fraud_percentage = (total_frauds_detected / total_analyses) * 100
	else:
	fraud_percentage = 0 # Ustawienie na 0% w przypadku braku analiz

	col3.metric(label=translations['fraud_percentage'], value=f"{fraud_percentage:.2f}%")

	# Wizualizacja procentowego podziału oszustw
	fraud_data = [total_frauds_detected, total_analyses - total_frauds_detected]
	fraud_labels = ['Fraud', 'Non-Fraud']
	fig_fraud_pie = go.Figure(data=[go.Pie(labels=fraud_labels, values=fraud_data, hole=.3, marker_colors=['#FF6347', '#4682B4'])])
	fig_fraud_pie.update_layout(title_text=translations['fraud_vs_nonfraud'])
	st.plotly_chart(fig_fraud_pie)

	# Wyświetlenie historii analiz w tabeli
	if history:
	st.markdown(f"### {translations['history_title']}")
	df_history = pd.DataFrame(history)

	# Konwersja timestamp na datę i dodanie kolumny 'date'
	df_history['timestamp'] = pd.to_datetime(df_history['timestamp'])
	df_history['date'] = df_history['timestamp'].dt.date # Dodanie kolumny date

	# Wyświetlenie tabeli historii
	st.dataframe(df_history[['timestamp', 'phone_number', 'risk_assessment']], height=300)

	# Wykres kołowy dla ocen ryzyka
	st.markdown(f"### {translations['risk_distribution']}")

	# Użycie wyrażenia regularnego do wyodrębnienia liczby z tekstu oceny ryzyka
	def extract_risk_score(risk_assessment):
	match = re.search(r'(\d+)/10', risk_assessment)
	return int(match.group(1)) if match else 0

	df_history['risk_score'] = df_history['risk_assessment'].apply(extract_risk_score)
	risk_data = df_history['risk_score'].value_counts().sort_index()
	risk_labels = [f'Risk {i}/10' for i in risk_data.index]
	fig_risk_pie = go.Figure(data=[go.Pie(labels=risk_labels, values=risk_data, hole=.3, marker_colors=px.colors.sequential.RdBu)])
	fig_risk_pie.update_layout(title_text=translations['risk_distribution'])
	st.plotly_chart(fig_risk_pie)

	# Wizualizacja mapy ciepła (heatmap)
	st.markdown(f"### {translations['heatmap_title']}")
	heatmap_data = df_history.groupby('date').size().reset_index(name='count')
	fig_heatmap = px.density_heatmap(heatmap_data, x='date', y='count', nbinsx=20, nbinsy=20, title=translations['heatmap_title'], color_continuous_scale='Blues')
	st.plotly_chart(fig_heatmap)

	# Dodanie mapy Europy (wymaga danych krajów dla numerów telefonów)
	st.markdown(f"### {translations['fraud_country_distribution']}")
	if 'country' in df_history.columns:
	country_data = df_history.groupby('country').size().reset_index(name='counts')
	fig_map = px.choropleth(country_data, locations='country', locationmode='country names', color='counts',
	title=translations['fraud_country_distribution'], color_continuous_scale=px.colors.sequential.Plasma)
	st.plotly_chart(fig_map)
	else:
	st.info("Brak danych o krajach numerów telefonów.")
	else:
	st.info(translations['no_data'])

	# Nie dodawaj "if __name__ == '__main__':" w podstronach