|
|
|
"""GradioInterface_v2.ipynb |
|
|
|
Automatically generated by Colaboratory. |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import torch |
|
from transformers import pipeline |
|
from parsezeeklogs import ParseZeekLogs |
|
from transformers import BertTokenizer |
|
import gradio as gr |
|
import pandas as pd |
|
|
|
|
|
pipe = pipeline(model="19kmunz/IoT-23-BERT-Network-Logs-Classification", tokenizer=BertTokenizer.from_pretrained("bert-base-cased")) |
|
|
|
|
|
LOG = "conn.log Output" |
|
HEADER_TABLE = "Headers Table" |
|
SENTENCES = "Sentences" |
|
OUT = "out" |
|
INPUT_TYPES = [LOG, HEADER_TABLE, SENTENCES] |
|
STEPS = [HEADER_TABLE, SENTENCES] |
|
HEADERS=['id.resp_p', 'proto', 'conn_state', 'orig_pkts', 'orig_ip_bytes', 'resp_ip_bytes'] |
|
|
|
|
|
|
|
feature_names = {'id.resp_p':'response port', |
|
'proto':'transport protocol', |
|
'orig_pkts':'number of packets sent by the origin', |
|
'conn_state':'connection state', |
|
'orig_ip_bytes':'number of IP level bytes sent by the originator', |
|
'resp_ip_bytes':'number of IP level bytes sent by the responder'} |
|
|
|
|
|
def make_sentence(row): |
|
sentences = {} |
|
for feature in row.keys(): |
|
if feature == 'label' or feature == "#": |
|
sentences[feature] = row[feature] |
|
else: |
|
sentences[feature] = feature_names[feature] + " is " + str(row[feature]) + "." |
|
return sentences |
|
|
|
|
|
def make_paragraphs(ser): |
|
paragraphs_list = [] |
|
for index,obs in ser.items(): |
|
new_para = obs['id.resp_p'] + " " + obs['proto'] + " " + obs['conn_state'] + " " + obs['orig_pkts'] + " " + obs['orig_ip_bytes'] + " " + obs['resp_ip_bytes'] |
|
paragraphs_list.append(new_para) |
|
return pd.Series(paragraphs_list, name="Sentences", index=ser.index).to_frame() |
|
|
|
|
|
def predictFromSentences(sentenceTable): |
|
output = pipe(sentenceTable[SENTENCES].tolist()) |
|
return { OUT: pd.DataFrame({"Output": ["Malicious" if pred['label'] == "LABEL_0" else "Benign" for pred in output] }) } |
|
|
|
def predictFromHeaderTable(headerTable): |
|
sentences = headerTable.apply(make_sentence, axis=1); |
|
paragraphs = make_paragraphs(sentences) |
|
return { |
|
SENTENCES: paragraphs, |
|
OUT: predictFromSentences(paragraphs)[OUT] |
|
} |
|
|
|
def predictFromFileUpload(fileUpload): |
|
if(fileUpload is None): |
|
raise gr.Error("No file uploaded") |
|
fileType = fileUpload.split('.')[-1] |
|
if(fileType == 'csv'): |
|
dataFrame = pd.read_csv(fileUpload, usecols=HEADERS) |
|
elif(fileType == 'log' or fileType == 'labeled'): |
|
with open('out.csv',"w") as outfile: |
|
for log_record in ParseZeekLogs(fileUpload, output_format="csv", safe_headers=False, fields=HEADERS): |
|
if log_record is not None: |
|
outfile.write(log_record + "\n") |
|
dataFrame = pd.read_csv('out.csv', names=HEADERS) |
|
result = predictFromHeaderTable(dataFrame) |
|
toReturn = { |
|
HEADER_TABLE: dataFrame, |
|
SENTENCES: result[SENTENCES], |
|
OUT: result[OUT] |
|
} |
|
return toReturn |
|
|
|
def makeIndexColumn(allInputs): |
|
def _makeIndexColumnFor(column): |
|
theseHeaders = allInputs[column].columns |
|
newHeaders = ['#', *theseHeaders] |
|
allInputs[column]['#'] = allInputs[column].index |
|
allInputs[column] = allInputs[column][newHeaders] |
|
|
|
if(SENTENCES in allInputs): |
|
_makeIndexColumnFor(SENTENCES) |
|
if(HEADER_TABLE in allInputs): |
|
_makeIndexColumnFor(HEADER_TABLE) |
|
if(OUT in allInputs): |
|
_makeIndexColumnFor(OUT) |
|
return allInputs |
|
|
|
def predict(inputType, fileUpload, headerTable, sentenceTable, out): |
|
output = {}; |
|
if(inputType == LOG): |
|
|
|
output = makeIndexColumn(predictFromFileUpload(fileUpload)) |
|
return [output[HEADER_TABLE], output[SENTENCES], output[OUT]] |
|
elif(inputType == HEADER_TABLE): |
|
|
|
output = makeIndexColumn(predictFromHeaderTable(headerTable)) |
|
return [headerTable, output[SENTENCES], output[OUT]] |
|
elif(inputType == SENTENCES): |
|
|
|
output = makeIndexColumn(predictFromSentences(sentenceTable)) |
|
return [headerTable, sentenceTable, output[OUT]] |
|
|
|
|
|
def updateInputOutputBlocks(inputType, steps): |
|
|
|
fileUpload = gr.File( |
|
visible=(True if inputType == LOG else False), |
|
interactive=(1 if inputType == LOG else 0) |
|
) |
|
headerTable = gr.Dataframe( |
|
visible=(True if (inputType == HEADER_TABLE or HEADER_TABLE in steps) else False), |
|
interactive=(1 if inputType == HEADER_TABLE else 0) |
|
) |
|
sentenceTable = gr.Dataframe( |
|
interactive=(1 if inputType == SENTENCES else 0), |
|
visible=(True if (inputType == SENTENCES or SENTENCES in steps) else False) |
|
) |
|
return fileUpload, headerTable, sentenceTable |
|
|
|
|
|
with gr.Blocks() as app: |
|
gr.Markdown(""" |
|
# Network Log Predictions |
|
Input log information below and click 'Run' to get predictions from our model! |
|
Access the settings at the bottom for different types of input and to see inbetween steps. |
|
""") |
|
|
|
fileUpload = gr.File(file_types=[".log", ".log.labeled", ".csv"], label="Zeek Log File", visible=False, file_count='single') |
|
headerTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(7,"fixed"), headers=['#', *HEADERS], label="Header Inputs", interactive=1) |
|
sentenceTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=["#", "Sentence"], label="Sentences", interactive=0, visible=False) |
|
out = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=['#', "Output"], label="Predictions", column_widths=["60px", "100%"]) |
|
btn = gr.Button("Run") |
|
|
|
|
|
with gr.Accordion("Settings", open=False): |
|
inputType = gr.Radio(INPUT_TYPES, value="Headers Table", label="Input") |
|
steps = gr.CheckboxGroup(STEPS, label="Display Intermediary Steps") |
|
inputType.change( |
|
fn=updateInputOutputBlocks, |
|
inputs=[inputType, steps], |
|
outputs=[fileUpload, headerTable, sentenceTable] |
|
) |
|
steps.change( |
|
fn=updateInputOutputBlocks, |
|
inputs=[inputType, steps], |
|
outputs=[fileUpload, headerTable, sentenceTable] |
|
) |
|
|
|
btn.click( |
|
fn=predict, |
|
inputs=[inputType, fileUpload, headerTable, sentenceTable, out], |
|
outputs=[headerTable, sentenceTable, out] |
|
) |
|
|
|
app.launch() |