Spaces:

19kmunz
/

IoT-23-BERT-Network-Logs-Classification

Running

App Files Files Community

IoT-23-BERT-Network-Logs-Classification / app.py

19kmunz

Create app.py

5e87978 11 months ago

raw

history blame

6.9 kB

	# -- coding: utf-8 --
	"""GradioInterface_v2.ipynb

	Automatically generated by Colaboratory.
	"""

	# Commented out IPython magic to ensure Python compatibility.
	# # Capture to supress the download ouput
	# %%capture
	# !pip install gradio
	# !pip install pandas
	# !pip install transformers
	# !pip install parsezeeklogs
	# !pip install elasticsearch

	# Define imports for model use
	import torch
	from transformers import pipeline
	from parsezeeklogs import ParseZeekLogs
	from transformers import BertTokenizer
	import gradio as gr
	import pandas as pd

	# Define model
	pipe = pipeline(model="19kmunz/IoT-23-BERT-Network-Logs-Classification", tokenizer=BertTokenizer.from_pretrained("bert-base-cased"))

	# Define string constants
	LOG = "conn.log Output"
	HEADER_TABLE = "Headers Table"
	SENTENCES = "Sentences"
	OUT = "out"
	INPUT_TYPES = [LOG, HEADER_TABLE, SENTENCES]
	STEPS = [HEADER_TABLE, SENTENCES]
	HEADERS=['id.resp_p', 'proto', 'conn_state', 'orig_pkts', 'orig_ip_bytes', 'resp_ip_bytes']

	# Define sentence-ization functions
	# Dictionary of feature names to use in the make sentence function
	feature_names = {'id.resp_p':'response port',
	'proto':'transport protocol',
	'orig_pkts':'number of packets sent by the origin',
	'conn_state':'connection state',
	'orig_ip_bytes':'number of IP level bytes sent by the originator',
	'resp_ip_bytes':'number of IP level bytes sent by the responder'}

	# Function to make sentences out of the data
	def make_sentence(row):
	sentences = {}
	for feature in row.keys():
	if feature == 'label' or feature == "#":
	sentences[feature] = row[feature]
	else:
	sentences[feature] = feature_names[feature] + " is " + str(row[feature]) + "."
	return sentences

	# Take all sentence observations and make them into paragraph inputs
	def make_paragraphs(ser):
	paragraphs_list = []
	for index,obs in ser.items():
	new_para = obs['id.resp_p'] + " " + obs['proto'] + " " + obs['conn_state'] + " " + obs['orig_pkts'] + " " + obs['orig_ip_bytes'] + " " + obs['resp_ip_bytes']
	paragraphs_list.append(new_para)
	return pd.Series(paragraphs_list, name="Sentences", index=ser.index).to_frame()

	# Define prediction Functions For Different Settings
	def predictFromSentences(sentenceTable):
	output = pipe(sentenceTable[SENTENCES].tolist()) # This does the prediction!
	return { OUT: pd.DataFrame({"Output": ["Malicious" if pred['label'] == "LABEL_0" else "Benign" for pred in output] }) }

	def predictFromHeaderTable(headerTable):
	sentences = headerTable.apply(make_sentence, axis=1);
	paragraphs = make_paragraphs(sentences)
	return {
	SENTENCES: paragraphs,
	OUT: predictFromSentences(paragraphs)[OUT]
	}

	def predictFromFileUpload(fileUpload):
	if(fileUpload is None):
	raise gr.Error("No file uploaded")
	fileType = fileUpload.split('.')[-1]
	if(fileType == 'csv'):
	dataFrame = pd.read_csv(fileUpload, usecols=HEADERS)
	elif(fileType == 'log' or fileType == 'labeled'):
	with open('out.csv',"w") as outfile:
	for log_record in ParseZeekLogs(fileUpload, output_format="csv", safe_headers=False, fields=HEADERS):
	if log_record is not None:
	outfile.write(log_record + "\n")
	dataFrame = pd.read_csv('out.csv', names=HEADERS)
	result = predictFromHeaderTable(dataFrame)
	toReturn = {
	HEADER_TABLE: dataFrame,
	SENTENCES: result[SENTENCES],
	OUT: result[OUT]
	}
	return toReturn

	def makeIndexColumn(allInputs):
	def _makeIndexColumnFor(column):
	theseHeaders = allInputs[column].columns
	newHeaders = ['#', *theseHeaders]
	allInputs[column]['#'] = allInputs[column].index
	allInputs[column] = allInputs[column][newHeaders]

	if(SENTENCES in allInputs):
	_makeIndexColumnFor(SENTENCES)
	if(HEADER_TABLE in allInputs):
	_makeIndexColumnFor(HEADER_TABLE)
	if(OUT in allInputs):
	_makeIndexColumnFor(OUT)
	return allInputs

	def predict(inputType, fileUpload, headerTable, sentenceTable, out):
	output = {};
	if(inputType == LOG):
	# Process File Upload
	output = makeIndexColumn(predictFromFileUpload(fileUpload))
	return [output[HEADER_TABLE], output[SENTENCES], output[OUT]]
	elif(inputType == HEADER_TABLE):
	# Process Header Table
	output = makeIndexColumn(predictFromHeaderTable(headerTable))
	return [headerTable, output[SENTENCES], output[OUT]]
	elif(inputType == SENTENCES):
	# Process From Sentences
	output = makeIndexColumn(predictFromSentences(sentenceTable))
	return [headerTable, sentenceTable, output[OUT]]

	# Update UI
	def updateInputOutputBlocks(inputType, steps):
	# Update visibility and Interactivity of Gradio Blocks based on Settings
	fileUpload = gr.File(
	visible=(True if inputType == LOG else False),
	interactive=(1 if inputType == LOG else 0)
	)
	headerTable = gr.Dataframe(
	visible=(True if (inputType == HEADER_TABLE or HEADER_TABLE in steps) else False),
	interactive=(1 if inputType == HEADER_TABLE else 0)
	)
	sentenceTable = gr.Dataframe(
	interactive=(1 if inputType == SENTENCES else 0),
	visible=(True if (inputType == SENTENCES or SENTENCES in steps) else False)
	)
	return fileUpload, headerTable, sentenceTable

	# Create Gradio UI
	with gr.Blocks() as app:
	gr.Markdown("""
	# Network Log Predictions
	Input log information below and click 'Run' to get predictions from our model!
	Access the settings at the bottom for different types of input and to see inbetween steps.
	""")
	# Inputs / Outputs
	fileUpload = gr.File(file_types=[".log", ".log.labeled", ".csv"], label="Zeek Log File", visible=False, file_count='single')
	headerTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(7,"fixed"), headers=['#', *HEADERS], label="Header Inputs", interactive=1)
	sentenceTable = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=["#", "Sentence"], label="Sentences", interactive=0, visible=False)
	out = gr.Dataframe(row_count = (2, "dynamic"), col_count=(2, "fixed"), headers=['#', "Output"], label="Predictions", column_widths=["60px", "100%"])
	btn = gr.Button("Run")

	# Settings
	with gr.Accordion("Settings", open=False):
	inputType = gr.Radio(INPUT_TYPES, value="Headers Table", label="Input")
	steps = gr.CheckboxGroup(STEPS, label="Display Intermediary Steps")
	inputType.change(
	fn=updateInputOutputBlocks,
	inputs=[inputType, steps],
	outputs=[fileUpload, headerTable, sentenceTable]
	)
	steps.change(
	fn=updateInputOutputBlocks,
	inputs=[inputType, steps],
	outputs=[fileUpload, headerTable, sentenceTable]
	)
	# Assign Callback
	btn.click(
	fn=predict,
	inputs=[inputType, fileUpload, headerTable, sentenceTable, out],
	outputs=[headerTable, sentenceTable, out]
	)

	app.launch()