Spaces:

rameshmoorthy
/

NAC-Product-Clustering-analysis

Sleeping

App Files Files Community

NAC-Product-Clustering-analysis / app.py

rameshmoorthy

Update app.py

a39c7da verified 9 months ago

raw

history blame contribute delete

10.1 kB

	import gradio as gr
	import pandas as pd
	from functions import process_file_bm25 , process_file_bert , generate_plot , generate
	from tempfile import NamedTemporaryFile
	#------------------------------------------------------

	# Create the state object
	# state = gr.State()
	# state.df_bm25 = pd.DataFrame({"Products": [1, 2, 3], "column2": ["A", "B", "C"]})
	# state.df_bert = pd.DataFrame({"Products": [1, 2, 3], "column2": ["A", "B", "C"]})

	# state.df_topics_bert = pd.DataFrame({"Topic": [1, 2, 3], "column2": ["A", "B", "C"]})
	# state.df_topics_bm25 = pd.DataFrame({"Topic": [1, 2, 3], "column2": ["A", "B", "C"]})
	df_bm25=gr.State(value=pd.DataFrame({"Products": [1, 2, 3], "column2": ["A", "B", "C"]}))
	df_topics_bm25 = gr.State(value=pd.DataFrame({"Topic": [1, 2, 3], "column2": ["A", "B", "C"]}))


	with gr.Blocks() as bm25:
	with gr.Row():
	with gr.Column():
	# gr.Markdown("NAC Product Clustering Analysis", center=True, style={"font-size": "24px", "font-weight": "bold","color": "blue"}),
	# gr.Markdown("This module helps to quickly cluster the products in any excel/csv file for product wise analysis for any NAC(National Assessment centre) of CBIC Indian Customs .", style={"font-size": "18px", "font-weight": "normal","color": "green"})
	try:
	gr.HTML(
	"""
	<h1 style="text-align: center; font-size: 24px; font-weight: bold; color: blue;">NAC Product Clustering Analysis</h1>
	<p style="text-align: center; font-size: 18px; color: green;">This module helps to quickly cluster the products in any excel/csv file for product wise analysis for any NAC(National Assessment centre) of CBIC Indian Customs.</p>
	""",
	markup=True # Indicate content is HTML
	)
	except:
	print("Warning: Styling within Markdown might not be fully supported. Consider using gr.HTML for more control.")

	gr.Markdown(
	"""
	# Select a CSV/Excel file with column as 'products'
	""")
	inputfile = gr.File(file_types=['.csv','.xlsx'], label="Upload CSV/Excel file")



	#german = gr.Textbox(label="German Text")
	def confirmation(file):
	if file.name.endswith('.csv'):
	df = pd.read_csv(file)
	elif file.name.endswith('.xls') or file.name.endswith('.xlsx'):
	df = pd.read_excel(file)
	else:
	doc = "Unsupported file format. Please provide a CSV or Excel file."
	return None,doc # Return immediately with the error message

	# Ensure that the 'products' column is present in the dataframe
	if 'products' not in df.columns.str.lower():
	doc = "The input file must have a column named 'products'."
	return None,doc # Return immediately with the error message

	doc = 'File uploaded! Press Cluster button'
	temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
	df.to_excel(temp_file.name, index=False)
	return temp_file.name,doc # Return the success message


	def download_df():
	df1=df_bm25
	print(df1)
	return df1

	out = gr.Textbox()
	mode=gr.Radio(["Automated clustering", "Manually choose parameters"], label="Type of algorithm", value="Automated clustering",info="Choose any mode u want")
	inputfile.upload(confirmation,inputs=[inputfile],outputs=[gr.File(label="Uploaded File"),out])
	with gr.Row():

	min_cluster_size=gr.Slider(2, 500, value=5, step=1,label="min_cluster_size", info="Choose minimum No. of docs in a cluster. Lower the value ,higher the clusters created")

	top_n_words=gr.Slider(1, 25, value=10, step=1,label="top_n_words", info="Choose no of key words for a cluster")

	ngram=gr.Slider(1, 3, value=2, step=1,label="ngram", info="Choose no of n-grams words to be taken for clustering")

	cluster_btn = gr.Button(value="Cluster")
	#[ df,topics_info,barchart,topics_plot,heatmap,hierarchy]
	tup=cluster_btn.click(process_file_bm25, inputs=[inputfile,mode,min_cluster_size,top_n_words,ngram],
	outputs=[
	gr.Dataframe(),
	gr.File(label="Download CSV"),
	gr.Dataframe(),
	#'html',
	gr.Plot(label="Barchart"),
	gr.Plot(label="Topics Plot"),
	gr.Plot(label="Heatmap"),
	gr.Plot(label="Hierarchy"),

	])
	llm_btn = gr.Button(value="Download Excel with Topics ")
	llm_btn.click(download_df,inputs=[],outputs=gr.Dataframe(label="Output"))


	with gr.Blocks() as bert:
	with gr.Row():
	with gr.Column():
	gr.Markdown(
	"""
	# Select a CSV/Excel file with column as 'products'
	""")
	inputfile = gr.File(file_types=['.csv','.xlsx'], label="Upload CSV/Excel file")



	#german = gr.Textbox(label="German Text")
	def confirmation():
	doc='File uploaded! Press Cluster button'

	return doc

	out = gr.Textbox()
	mode=gr.Radio(["Automated clustering", "Manually choose parameters"], label="Type of algorithm", value="Automated clustering",info="Choose any mode u want")
	inputfile.upload(confirmation,inputs=[],outputs=out)
	with gr.Row():
	with gr.Column():
	min_cluster_size=gr.Slider(1, 100, value=5, step=1,label="min_cluster_size", info="Choose minimum No. of docs in a cluster. Lower the value ,higher the clusters created")
	with gr.Column():
	top_n_words=gr.Slider(1, 25, value=10, step=1,label="top_n_words", info="Choose no of key words for a cluster")
	with gr.Column():
	ngram=gr.Slider(1, 3, value=2, step=1,label="ngram", info="Choose no of n-grams words to be taken for clustering")

	cluster_btn = gr.Button(value="Cluster")

	#[ df,topics_info,barchart,topics_plot,heatmap,hierarchy]
	tup=cluster_btn.click(process_file_bert, inputs=[inputfile,mode,min_cluster_size],
	outputs=[
	gr.Dataframe(),
	gr.Dataframe(),
	gr.Plot(label="Barchart"),
	gr.Plot(label="Topics Plot"),
	gr.Plot(label="Heatmap"),
	gr.Plot(label="Hierarchy")
	])

	#___________________________________________
	additional_inputs=[
	gr.Textbox(
	label="System Prompt",
	max_lines=1,
	interactive=True,
	),
	gr.Slider(
	label="Temperature",
	value=0.9,
	minimum=0.0,
	maximum=1.0,
	step=0.05,
	interactive=True,
	info="Higher values produce more diverse outputs",
	),
	gr.Slider(
	label="Max new tokens",
	value=256,
	minimum=0,
	maximum=4096,
	step=64,
	interactive=True,
	info="The maximum numbers of new tokens",
	),
	gr.Slider(
	label="Top-p (nucleus sampling)",
	value=0.90,
	minimum=0.0,
	maximum=1,
	step=0.05,
	interactive=True,
	info="Higher values sample more low-probability tokens",
	),
	gr.Slider(
	label="Repetition penalty",
	value=1.2,
	minimum=1.0,
	maximum=2.0,
	step=0.05,
	interactive=True,
	info="Penalize repeated tokens",
	)
	]

	examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, ],
	["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None,],
	["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None,],
	["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None,],
	["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None,],
	["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None,],
	]

	chat_interface=gr.ChatInterface(
	fn=generate,
	chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
	additional_inputs=additional_inputs,
	title="Mixtral 46.7B",
	examples=examples,
	concurrency_limit=20,
	)

	#______________________________________________________
	# Create a Gradio interface
	#df=pd.DataFrame(columns=['Topic'])
	df=df_topics_bm25.value
	print(df)

	excel_analysis_bm25 = gr.Interface(
	fn=generate_plot, # Placeholder function, will be defined later
	inputs=[
	gr.Dropdown(df['Topic'].unique().tolist(), label="Select Topic Number", type="index"),
	gr.Dropdown(list(df.columns[~df.columns.isin(['Topic'])]), label="Select X Axis", type="index"),
	gr.Dropdown(list(df.columns[~df.columns.isin(['Topic'])]), label="Select Y Axis", type="index"),
	gr.Radio(["scatter", "bar", "line", "box", "wordcloud", "pie"], label="Select Chart Type"),
	gr.Dropdown(["count", "count_distinct", "sum", "average"], label="Select Aggregation Function")
	],
	outputs=gr.Plot(label="Visualization")
	)
	demo = gr.TabbedInterface([bm25,chat_interface,excel_analysis_bm25,
	bert], ["TFIDF-BM25 Clustering", "TFIDF-BM25-Topics AI","TFIDF-BM25-Topic analysis","keyBERT"])


	demo.launch(share=True,debug=True)