Spaces:

rameshmoorthy
/

NAC-Product-Clustering-analysis

Sleeping

File size: 7,976 Bytes

import gradio as gr
import pandas as pd
from functions import process_file_bm25 , process_file_bert , generate_plot , generate

#------------------------------------------------------

# Create the state object
state = gr.State()
state.df_bm25 = pd.DataFrame({"Products": [1, 2, 3], "column2": ["A", "B", "C"]})
state.df_bert = pd.DataFrame({"Products": [1, 2, 3], "column2": ["A", "B", "C"]})

state.df_topics_bert = pd.DataFrame({"Topic": [1, 2, 3], "column2": ["A", "B", "C"]})
state.df_topics_bm25 = pd.DataFrame({"Topic": [1, 2, 3], "column2": ["A", "B", "C"]})



with gr.Blocks() as bm25:
    with gr.Row():
        with gr.Column():
            gr.Markdown(
                """
                # Select a CSV/Excel file with column as 'products'
                """)
            inputfile = gr.File(file_types=['.csv','.xlsx'], label="Upload CSV/Excel file")



            #german = gr.Textbox(label="German Text")
            def confirmation():
                doc='File uploaded! Press Cluster button'
                return doc

            def download_doc(doc):
                return doc

            def download_df():
                df1=state.df
                print(df1)
                return df1

            out = gr.Textbox()
            mode=gr.Radio(["Automated clustering", "Manually choose parameters"], label="Type of algorithm", value="Automated clustering",info="Choose any mode u want")
            inputfile.upload(confirmation,inputs=[],outputs=out)
            with gr.Row():
              with gr.Column():
                min_cluster_size=gr.Slider(1, 100, value=5, step=1,label="min_cluster_size", info="Choose minimum No. of docs in a cluster. Lower the value ,higher the clusters created")
                with gr.Column():
                      top_n_words=gr.Slider(1, 25, value=10, step=1,label="top_n_words", info="Choose no of key words for a cluster")
                      with gr.Column():
                            ngram=gr.Slider(1, 3, value=2, step=1,label="ngram", info="Choose no of n-grams words to be taken for clustering")
               
            cluster_btn = gr.Button(value="Cluster")
            #[  df,topics_info,barchart,topics_plot,heatmap,hierarchy]
            tup=cluster_btn.click(process_file_bm25,    inputs=[inputfile,mode,min_cluster_size,top_n_words,ngram],
            outputs=[
                gr.Dataframe(),
                gr.File(label="Download CSV"),
                gr.Dataframe(),
                #'html',
                gr.Plot(label="Barchart"),
                gr.Plot(label="Topics Plot"),
                gr.Plot(label="Heatmap"),
                gr.Plot(label="Hierarchy")
            ])
            print('Tuple **************************' ,tup)
            #[df1, df2, barchart_plot, topics_plot, heatmap_plot, hierarchy_plot] = tup


            llm_btn = gr.Button(value="AI generation ")
            llm_btn.click(download_df,inputs=[],outputs=gr.Dataframe(label="Output"))


with gr.Blocks() as bert:
    with gr.Row():
        with gr.Column():
            gr.Markdown(
                """
                # Select a CSV/Excel file with column as 'products'
                """)
            inputfile = gr.File(file_types=['.csv','.xlsx'], label="Upload CSV/Excel file")



            #german = gr.Textbox(label="German Text")
            def confirmation():
                doc='File uploaded! Press Cluster button'

                return doc

            out = gr.Textbox()
            mode=gr.Radio(["Automated clustering", "Manually choose parameters"], label="Type of algorithm", value="Automated clustering",info="Choose any mode u want")
            inputfile.upload(confirmation,inputs=[],outputs=out)
            with gr.Row():
              with gr.Column():
                min_cluster_size=gr.Slider(1, 100, value=5, step=1,label="min_cluster_size", info="Choose minimum No. of docs in a cluster. Lower the value ,higher the clusters created")
                with gr.Column():
                      top_n_words=gr.Slider(1, 25, value=10, step=1,label="top_n_words", info="Choose no of key words for a cluster")
                      with gr.Column():
                            ngram=gr.Slider(1, 3, value=2, step=1,label="ngram", info="Choose no of n-grams words to be taken for clustering")
               
            cluster_btn = gr.Button(value="Cluster")

            #[  df,topics_info,barchart,topics_plot,heatmap,hierarchy]
            tup=cluster_btn.click(process_file_bert,    inputs=[inputfile,mode,min_cluster_size],
            outputs=[
            gr.Dataframe(),
            gr.Dataframe(),
            gr.Plot(label="Barchart"),
            gr.Plot(label="Topics Plot"),
            gr.Plot(label="Heatmap"),
            gr.Plot(label="Hierarchy")
            ])
         
#___________________________________________
additional_inputs=[
gr.Textbox(
    label="System Prompt",
    max_lines=1,
    interactive=True,
),
gr.Slider(
    label="Temperature",
    value=0.9,
    minimum=0.0,
    maximum=1.0,
    step=0.05,
    interactive=True,
    info="Higher values produce more diverse outputs",
),
gr.Slider(
    label="Max new tokens",
    value=256,
    minimum=0,
    maximum=4096,
    step=64,
    interactive=True,
    info="The maximum numbers of new tokens",
),
gr.Slider(
    label="Top-p (nucleus sampling)",
    value=0.90,
    minimum=0.0,
    maximum=1,
    step=0.05,
    interactive=True,
    info="Higher values sample more low-probability tokens",
),
gr.Slider(
    label="Repetition penalty",
    value=1.2,
    minimum=1.0,
    maximum=2.0,
    step=0.05,
    interactive=True,
    info="Penalize repeated tokens",
)
]

examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, ],
          ["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None,],
          ["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None,],
          ["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None,],
          ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None,],
          ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None,],
        ]

chat_interface=gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    additional_inputs=additional_inputs,
    title="Mixtral 46.7B",
    examples=examples,
    concurrency_limit=20,
)

#______________________________________________________
# Create a Gradio interface
df=pd.DataFrame(columns=['Topic'])
df=state.df_topics_bm25
print(df)

excel_analysis_bm25 = gr.Interface(
    fn=generate_plot,  # Placeholder function, will be defined later
    inputs=[
        gr.Dropdown(df['Topic'].unique().tolist(), label="Select Topic Number", type="index"),
        gr.Dropdown(list(df.columns[~df.columns.isin(['Topic'])]), label="Select X Axis", type="index"),
        gr.Dropdown(list(df.columns[~df.columns.isin(['Topic'])]), label="Select Y Axis", type="index"),
        gr.Radio(["scatter", "bar", "line", "box", "wordcloud", "pie"], label="Select Chart Type"),
        gr.Dropdown(["count", "count_distinct", "sum", "average"], label="Select Aggregation Function")
    ],
    outputs=gr.Plot(label="Visualization")
)
demo = gr.TabbedInterface([bm25,chat_interface,excel_analysis_bm25,
     bert], ["TFIDF-BM25 Clustering", "TFIDF-BM25-Topics AI","TFIDF-BM25-Topic analysis","keyBERT"])


demo.launch(share=True,debug=True)