|
import gradio as gr |
|
import pandas as pd |
|
from functions import process_file_bm25 , process_file_bert , generate_plot , generate |
|
from tempfile import NamedTemporaryFile |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
df_bm25=gr.State(value=pd.DataFrame({"Products": [1, 2, 3], "column2": ["A", "B", "C"]})) |
|
df_topics_bm25 = gr.State(value=pd.DataFrame({"Topic": [1, 2, 3], "column2": ["A", "B", "C"]})) |
|
|
|
|
|
with gr.Blocks() as bm25: |
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
|
|
try: |
|
gr.HTML( |
|
""" |
|
<h1 style="text-align: center; font-size: 24px; font-weight: bold; color: blue;">NAC Product Clustering Analysis</h1> |
|
<p style="text-align: center; font-size: 18px; color: green;">This module helps to quickly cluster the products in any excel/csv file for product wise analysis for any NAC(National Assessment centre) of CBIC Indian Customs.</p> |
|
""", |
|
markup=True |
|
) |
|
except: |
|
print("Warning: Styling within Markdown might not be fully supported. Consider using gr.HTML for more control.") |
|
|
|
gr.Markdown( |
|
""" |
|
# Select a CSV/Excel file with column as 'products' |
|
""") |
|
inputfile = gr.File(file_types=['.csv','.xlsx'], label="Upload CSV/Excel file") |
|
|
|
|
|
|
|
|
|
def confirmation(file): |
|
if file.name.endswith('.csv'): |
|
df = pd.read_csv(file) |
|
elif file.name.endswith('.xls') or file.name.endswith('.xlsx'): |
|
df = pd.read_excel(file) |
|
else: |
|
doc = "Unsupported file format. Please provide a CSV or Excel file." |
|
return None,doc |
|
|
|
|
|
if 'products' not in df.columns.str.lower(): |
|
doc = "The input file must have a column named 'products'." |
|
return None,doc |
|
|
|
doc = 'File uploaded! Press Cluster button' |
|
temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx") |
|
df.to_excel(temp_file.name, index=False) |
|
return temp_file.name,doc |
|
|
|
|
|
def download_df(): |
|
df1=df_bm25 |
|
print(df1) |
|
return df1 |
|
|
|
out = gr.Textbox() |
|
mode=gr.Radio(["Automated clustering", "Manually choose parameters"], label="Type of algorithm", value="Automated clustering",info="Choose any mode u want") |
|
inputfile.upload(confirmation,inputs=[inputfile],outputs=[gr.File(label="Uploaded File"),out]) |
|
with gr.Row(): |
|
|
|
min_cluster_size=gr.Slider(2, 500, value=5, step=1,label="min_cluster_size", info="Choose minimum No. of docs in a cluster. Lower the value ,higher the clusters created") |
|
|
|
top_n_words=gr.Slider(1, 25, value=10, step=1,label="top_n_words", info="Choose no of key words for a cluster") |
|
|
|
ngram=gr.Slider(1, 3, value=2, step=1,label="ngram", info="Choose no of n-grams words to be taken for clustering") |
|
|
|
cluster_btn = gr.Button(value="Cluster") |
|
|
|
tup=cluster_btn.click(process_file_bm25, inputs=[inputfile,mode,min_cluster_size,top_n_words,ngram], |
|
outputs=[ |
|
gr.Dataframe(), |
|
gr.File(label="Download CSV"), |
|
gr.Dataframe(), |
|
|
|
gr.Plot(label="Barchart"), |
|
gr.Plot(label="Topics Plot"), |
|
gr.Plot(label="Heatmap"), |
|
gr.Plot(label="Hierarchy"), |
|
|
|
]) |
|
llm_btn = gr.Button(value="Download Excel with Topics ") |
|
llm_btn.click(download_df,inputs=[],outputs=gr.Dataframe(label="Output")) |
|
|
|
|
|
with gr.Blocks() as bert: |
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown( |
|
""" |
|
# Select a CSV/Excel file with column as 'products' |
|
""") |
|
inputfile = gr.File(file_types=['.csv','.xlsx'], label="Upload CSV/Excel file") |
|
|
|
|
|
|
|
|
|
def confirmation(): |
|
doc='File uploaded! Press Cluster button' |
|
|
|
return doc |
|
|
|
out = gr.Textbox() |
|
mode=gr.Radio(["Automated clustering", "Manually choose parameters"], label="Type of algorithm", value="Automated clustering",info="Choose any mode u want") |
|
inputfile.upload(confirmation,inputs=[],outputs=out) |
|
with gr.Row(): |
|
with gr.Column(): |
|
min_cluster_size=gr.Slider(1, 100, value=5, step=1,label="min_cluster_size", info="Choose minimum No. of docs in a cluster. Lower the value ,higher the clusters created") |
|
with gr.Column(): |
|
top_n_words=gr.Slider(1, 25, value=10, step=1,label="top_n_words", info="Choose no of key words for a cluster") |
|
with gr.Column(): |
|
ngram=gr.Slider(1, 3, value=2, step=1,label="ngram", info="Choose no of n-grams words to be taken for clustering") |
|
|
|
cluster_btn = gr.Button(value="Cluster") |
|
|
|
|
|
tup=cluster_btn.click(process_file_bert, inputs=[inputfile,mode,min_cluster_size], |
|
outputs=[ |
|
gr.Dataframe(), |
|
gr.Dataframe(), |
|
gr.Plot(label="Barchart"), |
|
gr.Plot(label="Topics Plot"), |
|
gr.Plot(label="Heatmap"), |
|
gr.Plot(label="Hierarchy") |
|
]) |
|
|
|
|
|
additional_inputs=[ |
|
gr.Textbox( |
|
label="System Prompt", |
|
max_lines=1, |
|
interactive=True, |
|
), |
|
gr.Slider( |
|
label="Temperature", |
|
value=0.9, |
|
minimum=0.0, |
|
maximum=1.0, |
|
step=0.05, |
|
interactive=True, |
|
info="Higher values produce more diverse outputs", |
|
), |
|
gr.Slider( |
|
label="Max new tokens", |
|
value=256, |
|
minimum=0, |
|
maximum=4096, |
|
step=64, |
|
interactive=True, |
|
info="The maximum numbers of new tokens", |
|
), |
|
gr.Slider( |
|
label="Top-p (nucleus sampling)", |
|
value=0.90, |
|
minimum=0.0, |
|
maximum=1, |
|
step=0.05, |
|
interactive=True, |
|
info="Higher values sample more low-probability tokens", |
|
), |
|
gr.Slider( |
|
label="Repetition penalty", |
|
value=1.2, |
|
minimum=1.0, |
|
maximum=2.0, |
|
step=0.05, |
|
interactive=True, |
|
info="Penalize repeated tokens", |
|
) |
|
] |
|
|
|
examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, ], |
|
["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None,], |
|
["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None,], |
|
["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None,], |
|
["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None,], |
|
["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None,], |
|
] |
|
|
|
chat_interface=gr.ChatInterface( |
|
fn=generate, |
|
chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"), |
|
additional_inputs=additional_inputs, |
|
title="Mixtral 46.7B", |
|
examples=examples, |
|
concurrency_limit=20, |
|
) |
|
|
|
|
|
|
|
|
|
df=df_topics_bm25.value |
|
print(df) |
|
|
|
excel_analysis_bm25 = gr.Interface( |
|
fn=generate_plot, |
|
inputs=[ |
|
gr.Dropdown(df['Topic'].unique().tolist(), label="Select Topic Number", type="index"), |
|
gr.Dropdown(list(df.columns[~df.columns.isin(['Topic'])]), label="Select X Axis", type="index"), |
|
gr.Dropdown(list(df.columns[~df.columns.isin(['Topic'])]), label="Select Y Axis", type="index"), |
|
gr.Radio(["scatter", "bar", "line", "box", "wordcloud", "pie"], label="Select Chart Type"), |
|
gr.Dropdown(["count", "count_distinct", "sum", "average"], label="Select Aggregation Function") |
|
], |
|
outputs=gr.Plot(label="Visualization") |
|
) |
|
demo = gr.TabbedInterface([bm25,chat_interface,excel_analysis_bm25, |
|
bert], ["TFIDF-BM25 Clustering", "TFIDF-BM25-Topics AI","TFIDF-BM25-Topic analysis","keyBERT"]) |
|
|
|
|
|
demo.launch(share=True,debug=True) |