|
|
|
import glob, os, sys; |
|
sys.path.append('../utils') |
|
|
|
|
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
import pandas as pd |
|
import streamlit as st |
|
from utils.target_classifier import load_targetClassifier, target_classification |
|
import logging |
|
logger = logging.getLogger(__name__) |
|
from utils.config import get_classifier_params |
|
from utils.preprocessing import paraLengthCheck |
|
from io import BytesIO |
|
import xlsxwriter |
|
import plotly.express as px |
|
from utils.target_classifier import label_dict |
|
from appStore.rag import run_query |
|
|
|
|
|
classifier_identifier = 'target' |
|
params = get_classifier_params(classifier_identifier) |
|
|
|
@st.cache_data |
|
def to_excel(df,sectorlist): |
|
len_df = len(df) |
|
output = BytesIO() |
|
writer = pd.ExcelWriter(output, engine='xlsxwriter') |
|
df.to_excel(writer, index=False, sheet_name='Sheet1') |
|
workbook = writer.book |
|
worksheet = writer.sheets['Sheet1'] |
|
worksheet.data_validation('S2:S{}'.format(len_df), |
|
{'validate': 'list', |
|
'source': ['No', 'Yes', 'Discard']}) |
|
worksheet.data_validation('X2:X{}'.format(len_df), |
|
{'validate': 'list', |
|
'source': sectorlist + ['Blank']}) |
|
worksheet.data_validation('T2:T{}'.format(len_df), |
|
{'validate': 'list', |
|
'source': sectorlist + ['Blank']}) |
|
worksheet.data_validation('U2:U{}'.format(len_df), |
|
{'validate': 'list', |
|
'source': sectorlist + ['Blank']}) |
|
worksheet.data_validation('V2:V{}'.format(len_df), |
|
{'validate': 'list', |
|
'source': sectorlist + ['Blank']}) |
|
worksheet.data_validation('W2:U{}'.format(len_df), |
|
{'validate': 'list', |
|
'source': sectorlist + ['Blank']}) |
|
writer.save() |
|
processed_data = output.getvalue() |
|
return processed_data |
|
|
|
def app(): |
|
|
|
|
|
with st.container(): |
|
|
|
if 'key1' in st.session_state: |
|
|
|
|
|
df = st.session_state.key1 |
|
|
|
|
|
df = df[df['Vulnerability Label'].apply(lambda x: len(x) > 0 and 'Other' not in x)] |
|
|
|
|
|
classifier = load_targetClassifier(classifier_name=params['model_name']) |
|
|
|
st.session_state['{}_classifier'.format(classifier_identifier)] = classifier |
|
|
|
df = target_classification(haystack_doc=df, |
|
threshold= params['threshold']) |
|
|
|
|
|
df.rename(columns={'Target Label': 'Specific action/target/measure mentioned'}, inplace=True) |
|
|
|
|
|
st.session_state.key2 = df |
|
|
|
|
|
def target_display(): |
|
|
|
|
|
|
|
|
|
df = st.session_state['key2'] |
|
st.write(df) |
|
|
|
|
|
|
|
|
|
df_expand = ( |
|
df.query("`Specific action/target/measure mentioned` == 'YES'") |
|
.explode('Vulnerability Label') |
|
) |
|
|
|
df_agg = df_expand.groupby('Vulnerability Label')['text'].agg('; '.join).reset_index() |
|
|
|
|
|
|
|
st.markdown("----") |
|
st.markdown('**DOCUMENT FINDINGS SUMMARY BY VULNERABILITY LABEL:**') |
|
|
|
|
|
for i in range(0,len(df_agg)): |
|
st.write(df_agg['Vulnerability Label'].iloc[i]) |
|
run_query(context = df_agg['text'].iloc[i], label = df_agg['Vulnerability Label'].iloc[i]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|