rameshmoorthy
commited on
Commit
•
7efeab0
1
Parent(s):
ba6d530
Upload 3 files
Browse files- app.py +197 -0
- functions.py +206 -0
- requirements.txt.txt +7 -0
app.py
ADDED
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
from functions import process_file_bm25 , process_file_bert , generate_plot , generate
|
4 |
+
|
5 |
+
|
6 |
+
|
7 |
+
#------------------------------------------------------
|
8 |
+
|
9 |
+
# Create the state object
|
10 |
+
state = gr.State()
|
11 |
+
state.df_bm25 = pd.DataFrame({"column1": [1, 2, 3], "column2": ["A", "B", "C"]})
|
12 |
+
state.df_bert = pd.DataFrame({"column1": [1, 2, 3], "column2": ["A", "B", "C"]})
|
13 |
+
state = gr.State()
|
14 |
+
state.df_topics_bert = pd.DataFrame({"column1": [1, 2, 3], "column2": ["A", "B", "C"]})
|
15 |
+
state.df_topics_bm25 = pd.DataFrame({"column1": [1, 2, 3], "column2": ["A", "B", "C"]})
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
with gr.Blocks() as bm25:
|
20 |
+
with gr.Row():
|
21 |
+
with gr.Column():
|
22 |
+
gr.Markdown(
|
23 |
+
"""
|
24 |
+
# Select a CSV/Excel file with column as 'products'
|
25 |
+
""")
|
26 |
+
inputfile = gr.File(file_types=['.csv','.xlsx'], label="Upload CSV/Excel file")
|
27 |
+
|
28 |
+
|
29 |
+
|
30 |
+
#german = gr.Textbox(label="German Text")
|
31 |
+
def confirmation():
|
32 |
+
doc='File uploaded! Press Cluster button'
|
33 |
+
return doc
|
34 |
+
|
35 |
+
def download_doc(doc):
|
36 |
+
return doc
|
37 |
+
|
38 |
+
def download_df():
|
39 |
+
df1=state.df
|
40 |
+
print(df1)
|
41 |
+
return df1
|
42 |
+
|
43 |
+
out = gr.Textbox()
|
44 |
+
mode=gr.Radio(["Automated clustering", "Manually choose parameters"], label="Type of algorithm", value="Automated clustering",info="Choose any mode u want")
|
45 |
+
inputfile.upload(confirmation,inputs=[],outputs=out)
|
46 |
+
with gr.Row():
|
47 |
+
with gr.Column():
|
48 |
+
min_cluster_size=gr.Slider(1, 100, value=5, step=1,label="min_cluster_size", info="Choose minimum No. of docs in a cluster. Lower the value ,higher the clusters created")
|
49 |
+
with gr.Column():
|
50 |
+
top_n_words=gr.Slider(1, 25, value=10, step=1,label="top_n_words", info="Choose no of key words for a cluster")
|
51 |
+
with gr.Column():
|
52 |
+
ngram=gr.Slider(1, 3, value=2, step=1,label="ngram", info="Choose no of n-grams words to be taken for clustering")
|
53 |
+
|
54 |
+
cluster_btn = gr.Button(value="Cluster")
|
55 |
+
#[ df,topics_info,barchart,topics_plot,heatmap,hierarchy]
|
56 |
+
tup=cluster_btn.click(process_file_bm25, inputs=[inputfile,mode,min_cluster_size,top_n_words,ngram],
|
57 |
+
outputs=[
|
58 |
+
gr.Dataframe(),
|
59 |
+
gr.File(label="Download CSV"),
|
60 |
+
gr.Dataframe(),
|
61 |
+
#'html',
|
62 |
+
gr.Plot(label="Barchart"),
|
63 |
+
gr.Plot(label="Topics Plot"),
|
64 |
+
gr.Plot(label="Heatmap"),
|
65 |
+
gr.Plot(label="Hierarchy")
|
66 |
+
])
|
67 |
+
print('Tuple **************************' ,tup)
|
68 |
+
#[df1, df2, barchart_plot, topics_plot, heatmap_plot, hierarchy_plot] = tup
|
69 |
+
|
70 |
+
|
71 |
+
llm_btn = gr.Button(value="AI generation ")
|
72 |
+
llm_btn.click(download_df,inputs=[],outputs=gr.Dataframe(label="Output"))
|
73 |
+
|
74 |
+
|
75 |
+
with gr.Blocks() as bert:
|
76 |
+
with gr.Row():
|
77 |
+
with gr.Column():
|
78 |
+
gr.Markdown(
|
79 |
+
"""
|
80 |
+
# Select a CSV/Excel file with column as 'products'
|
81 |
+
""")
|
82 |
+
inputfile = gr.File(file_types=['.csv','.xlsx'], label="Upload CSV/Excel file")
|
83 |
+
|
84 |
+
|
85 |
+
|
86 |
+
#german = gr.Textbox(label="German Text")
|
87 |
+
def confirmation():
|
88 |
+
doc='File uploaded! Press Cluster button'
|
89 |
+
|
90 |
+
return doc
|
91 |
+
|
92 |
+
out = gr.Textbox()
|
93 |
+
mode=gr.Radio(["Automated clustering", "Manually choose parameters"], label="Type of algorithm", value="Automated clustering",info="Choose any mode u want")
|
94 |
+
inputfile.upload(confirmation,inputs=[],outputs=out)
|
95 |
+
with gr.Row():
|
96 |
+
with gr.Column():
|
97 |
+
min_cluster_size=gr.Slider(1, 100, value=5, step=1,label="min_cluster_size", info="Choose minimum No. of docs in a cluster. Lower the value ,higher the clusters created")
|
98 |
+
with gr.Column():
|
99 |
+
top_n_words=gr.Slider(1, 25, value=10, step=1,label="top_n_words", info="Choose no of key words for a cluster")
|
100 |
+
with gr.Column():
|
101 |
+
ngram=gr.Slider(1, 3, value=2, step=1,label="ngram", info="Choose no of n-grams words to be taken for clustering")
|
102 |
+
|
103 |
+
cluster_btn = gr.Button(value="Cluster")
|
104 |
+
|
105 |
+
#[ df,topics_info,barchart,topics_plot,heatmap,hierarchy]
|
106 |
+
tup=cluster_btn.click(process_file_bert, inputs=[inputfile,mode,min_cluster_size],
|
107 |
+
outputs=[
|
108 |
+
gr.Dataframe(),
|
109 |
+
gr.Dataframe(),
|
110 |
+
gr.Plot(label="Barchart"),
|
111 |
+
gr.Plot(label="Topics Plot"),
|
112 |
+
gr.Plot(label="Heatmap"),
|
113 |
+
gr.Plot(label="Hierarchy")
|
114 |
+
])
|
115 |
+
|
116 |
+
#___________________________________________
|
117 |
+
additional_inputs=[
|
118 |
+
gr.Textbox(
|
119 |
+
label="System Prompt",
|
120 |
+
max_lines=1,
|
121 |
+
interactive=True,
|
122 |
+
),
|
123 |
+
gr.Slider(
|
124 |
+
label="Temperature",
|
125 |
+
value=0.9,
|
126 |
+
minimum=0.0,
|
127 |
+
maximum=1.0,
|
128 |
+
step=0.05,
|
129 |
+
interactive=True,
|
130 |
+
info="Higher values produce more diverse outputs",
|
131 |
+
),
|
132 |
+
gr.Slider(
|
133 |
+
label="Max new tokens",
|
134 |
+
value=256,
|
135 |
+
minimum=0,
|
136 |
+
maximum=1048,
|
137 |
+
step=64,
|
138 |
+
interactive=True,
|
139 |
+
info="The maximum numbers of new tokens",
|
140 |
+
),
|
141 |
+
gr.Slider(
|
142 |
+
label="Top-p (nucleus sampling)",
|
143 |
+
value=0.90,
|
144 |
+
minimum=0.0,
|
145 |
+
maximum=1,
|
146 |
+
step=0.05,
|
147 |
+
interactive=True,
|
148 |
+
info="Higher values sample more low-probability tokens",
|
149 |
+
),
|
150 |
+
gr.Slider(
|
151 |
+
label="Repetition penalty",
|
152 |
+
value=1.2,
|
153 |
+
minimum=1.0,
|
154 |
+
maximum=2.0,
|
155 |
+
step=0.05,
|
156 |
+
interactive=True,
|
157 |
+
info="Penalize repeated tokens",
|
158 |
+
)
|
159 |
+
]
|
160 |
+
|
161 |
+
examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, ],
|
162 |
+
["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None,],
|
163 |
+
["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None,],
|
164 |
+
["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None,],
|
165 |
+
["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None,],
|
166 |
+
["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None,],
|
167 |
+
]
|
168 |
+
|
169 |
+
chat_interface=gr.ChatInterface(
|
170 |
+
fn=generate,
|
171 |
+
chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
|
172 |
+
additional_inputs=additional_inputs,
|
173 |
+
title="Mixtral 46.7B",
|
174 |
+
examples=examples,
|
175 |
+
concurrency_limit=20,
|
176 |
+
)
|
177 |
+
|
178 |
+
#______________________________________________________
|
179 |
+
# Create a Gradio interface
|
180 |
+
df=state.df_topics_bm25
|
181 |
+
#df=pd.DataFrame(columns=['Topic'])
|
182 |
+
excel_analysis_bm25 = gr.Interface(
|
183 |
+
fn=generate_plot, # Placeholder function, will be defined later
|
184 |
+
inputs=[
|
185 |
+
gr.Dropdown(df['Topic'].unique().tolist(), label="Select Topic Number", type="index"),
|
186 |
+
gr.Dropdown(list(df.columns[~df.columns.isin(['Topic'])]), label="Select X Axis", type="index"),
|
187 |
+
gr.Dropdown(list(df.columns[~df.columns.isin(['Topic'])]), label="Select Y Axis", type="index"),
|
188 |
+
gr.Radio(["scatter", "bar", "line", "box", "wordcloud", "pie"], label="Select Chart Type"),
|
189 |
+
gr.Dropdown(["count", "count_distinct", "sum", "average"], label="Select Aggregation Function")
|
190 |
+
],
|
191 |
+
outputs=gr.Plot(label="Visualization")
|
192 |
+
)
|
193 |
+
demo = gr.TabbedInterface([bm25,chat_interface,excel_analysis_bm25,
|
194 |
+
bert], ["TFIDF-BM25 Clustering", "TFIDF-BM25-Topics AI","TFIDF-BM25-Topic analysis","keyBERT"])
|
195 |
+
|
196 |
+
|
197 |
+
demo.launch(share=True,debug=True)
|
functions.py
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from bertopic import BERTopic
|
3 |
+
from huggingface_hub import InferenceClient
|
4 |
+
from bertopic.vectorizers import ClassTfidfTransformer
|
5 |
+
from sentence_transformers import SentenceTransformer
|
6 |
+
from sklearn import preprocessing
|
7 |
+
from sklearn.preprocessing import LabelEncoder
|
8 |
+
from tempfile import NamedTemporaryFile
|
9 |
+
import matplotlib.pyplot as plt
|
10 |
+
import plotly.express as px
|
11 |
+
|
12 |
+
|
13 |
+
from wordcloud import WordCloud
|
14 |
+
|
15 |
+
|
16 |
+
def process_file_bm25(file,mode,min_cluster_size,top_n_words,ngram):
|
17 |
+
# Read the Excel sheet or CSV file
|
18 |
+
|
19 |
+
if file.name.endswith('.csv'):
|
20 |
+
df = pd.read_csv(file)
|
21 |
+
elif file.name.endswith('.xls') or file.name.endswith('.xlsx'):
|
22 |
+
df = pd.read_excel(file)
|
23 |
+
else:
|
24 |
+
raise ValueError("Unsupported file format. Please provide a CSV or Excel file.")
|
25 |
+
|
26 |
+
# Ensure that the 'products' column is present in the dataframe
|
27 |
+
if 'products' not in df.columns.str.lower():
|
28 |
+
raise ValueError("The input file must have a column named 'products'.")
|
29 |
+
|
30 |
+
# Convert the 'products' column to a list
|
31 |
+
sentences_list = df['products'].tolist()
|
32 |
+
print(len(sentences_list))
|
33 |
+
ctfidf_model = ClassTfidfTransformer(bm25_weighting=True,reduce_frequent_words=True)
|
34 |
+
|
35 |
+
if mode=="Automated clustering":
|
36 |
+
|
37 |
+
topic_model = BERTopic(ctfidf_model=ctfidf_model,n_gram_range =(1,ngram),top_n_words=top_n_words)
|
38 |
+
|
39 |
+
else:
|
40 |
+
|
41 |
+
topic_model = BERTopic(ctfidf_model=ctfidf_model,n_gram_range =(1,ngram),top_n_words=top_n_words,min_topic_size=min_cluster_size)
|
42 |
+
|
43 |
+
|
44 |
+
# Perform topic modeling
|
45 |
+
topics, probabilities = topic_model.fit_transform(sentences_list)
|
46 |
+
|
47 |
+
# Visualize all graphs
|
48 |
+
|
49 |
+
topics_info=topic_model.get_topic_info()
|
50 |
+
state.df_topics_bm25= topics_info
|
51 |
+
#print(topics)
|
52 |
+
try:
|
53 |
+
barchart = topic_model.visualize_barchart(top_n_topics=10)
|
54 |
+
except:
|
55 |
+
barchart='Error message'
|
56 |
+
try:
|
57 |
+
topics_plot = topic_model.visualize_topics()
|
58 |
+
except:
|
59 |
+
topics_plot = ' Error message'
|
60 |
+
heatmap = topic_model.visualize_heatmap()
|
61 |
+
hierarchy = topic_model.visualize_hierarchy()
|
62 |
+
df['topic_number'] = topics
|
63 |
+
|
64 |
+
# Encode the topic numbers to make them categorical
|
65 |
+
label_encoder = LabelEncoder()
|
66 |
+
df['topic_number_encoded'] = label_encoder.fit_transform(df['topic_number'])
|
67 |
+
temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
|
68 |
+
df.to_excel(temp_file.name, index=False)
|
69 |
+
state.df_bm25=df
|
70 |
+
#print(df)
|
71 |
+
|
72 |
+
return df,temp_file.name,topics_info ,barchart,topics_plot, heatmap, hierarchy
|
73 |
+
|
74 |
+
|
75 |
+
def process_file_bert(file,mode,min_cluster_size,top_n_words,ngram):
|
76 |
+
# Read the Excel sheet or CSV file
|
77 |
+
if file.name.endswith('.csv'):
|
78 |
+
df = pd.read_csv(file)
|
79 |
+
elif file.name.endswith('.xls') or file.name.endswith('.xlsx'):
|
80 |
+
df = pd.read_excel(file)
|
81 |
+
else:
|
82 |
+
raise ValueError("Unsupported file format. Please provide a CSV or Excel file.")
|
83 |
+
|
84 |
+
# Ensure that the 'products' column is present in the dataframe
|
85 |
+
if 'products' not in df.columns.str.lower():
|
86 |
+
raise ValueError("The input file must have a column named 'products'.")
|
87 |
+
|
88 |
+
# Convert the 'products' column to a list
|
89 |
+
sentences_list = df['products'].tolist()
|
90 |
+
print(len(sentences_list))
|
91 |
+
representation_model = KeyBERTInspired()
|
92 |
+
if mode=="Automated clustering":
|
93 |
+
# Fine-tune your topic representations
|
94 |
+
|
95 |
+
topic_model = BERTopic(representation_model=representation_model,n_gram_range =(1,ngram),top_n_words=top_n_words)
|
96 |
+
|
97 |
+
else:
|
98 |
+
|
99 |
+
topic_model = BERTopic(representation_model=representation_model,n_gram_range =(1,ngram),top_n_words=top_n_words,min_topic_size=min_cluster_size)
|
100 |
+
|
101 |
+
topics, probabilities = topic_model.fit_transform(sentences_list)
|
102 |
+
|
103 |
+
# Visualize all graphs
|
104 |
+
|
105 |
+
topics_info=topic_model.get_topic_info()
|
106 |
+
state.df_topics_bert= topics_info
|
107 |
+
#print(topics)
|
108 |
+
try:
|
109 |
+
barchart = topic_model.visualize_barchart(top_n_topics=10)
|
110 |
+
except:
|
111 |
+
barchart='Error message'
|
112 |
+
try:
|
113 |
+
topics_plot = topic_model.visualize_topics()
|
114 |
+
except:
|
115 |
+
topics_plot = ' Error message'
|
116 |
+
heatmap = topic_model.visualize_heatmap()
|
117 |
+
hierarchy = topic_model.visualize_hierarchy()
|
118 |
+
df['topic_number'] = topics
|
119 |
+
|
120 |
+
# Encode the topic numbers to make them categorical
|
121 |
+
label_encoder = LabelEncoder()
|
122 |
+
df['topic_number_encoded'] = label_encoder.fit_transform(df['topic_number'])
|
123 |
+
temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
|
124 |
+
df.to_excel(temp_file.name, index=False)
|
125 |
+
|
126 |
+
state.df_bert=df
|
127 |
+
return df, topics_info ,barchart,topics_plot, heatmap, hierarchy
|
128 |
+
|
129 |
+
|
130 |
+
client = InferenceClient(
|
131 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1"
|
132 |
+
)
|
133 |
+
|
134 |
+
def format_prompt(message, history):
|
135 |
+
prompt = "<s>"
|
136 |
+
for user_prompt, bot_response in history:
|
137 |
+
prompt += f"[INST] {user_prompt} [/INST]"
|
138 |
+
prompt += f" {bot_response}</s> "
|
139 |
+
prompt += f"[INST] {message} [/INST]"
|
140 |
+
return prompt
|
141 |
+
|
142 |
+
def generate(
|
143 |
+
prompt, history, system_prompt, temperature=0.9, max_new_tokens=4096, top_p=0.95, repetition_penalty=1.0,
|
144 |
+
):
|
145 |
+
temperature = float(temperature)
|
146 |
+
if temperature < 1e-2:
|
147 |
+
temperature = 1e-2
|
148 |
+
top_p = float(top_p)
|
149 |
+
|
150 |
+
generate_kwargs = dict(
|
151 |
+
temperature=temperature,
|
152 |
+
max_new_tokens=max_new_tokens,
|
153 |
+
top_p=top_p,
|
154 |
+
repetition_penalty=repetition_penalty,
|
155 |
+
do_sample=True,
|
156 |
+
seed=42,
|
157 |
+
)
|
158 |
+
|
159 |
+
formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
|
160 |
+
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
161 |
+
output = ""
|
162 |
+
|
163 |
+
for response in stream:
|
164 |
+
output += response.token.text
|
165 |
+
yield output
|
166 |
+
return output
|
167 |
+
|
168 |
+
|
169 |
+
# Define the function to generate the plot based on user inputs
|
170 |
+
def generate_plot(topic, x_axis_index, y_axis_index, chart_type, agg_func):
|
171 |
+
x_axis = df.columns[1:][x_axis_index]
|
172 |
+
y_axis = df.columns[1:][y_axis_index]
|
173 |
+
print(x_axis,y_axis)
|
174 |
+
filtered_df = df[df['Topic Number'] == topic]
|
175 |
+
|
176 |
+
if chart_type == "scatter":
|
177 |
+
fig = px.scatter(filtered_df, x=x_axis, y=y_axis)
|
178 |
+
elif chart_type == "bar":
|
179 |
+
print('Bar chart selected')
|
180 |
+
if agg_func == "count_distinct":
|
181 |
+
fig = px.bar(filtered_df, x=x_axis, y=y_axis, color=y_axis, barmode='group')
|
182 |
+
else:
|
183 |
+
fig = px.bar(filtered_df, x=x_axis, y=y_axis, color=y_axis)
|
184 |
+
elif chart_type == "line":
|
185 |
+
fig = px.line(filtered_df, x=x_axis, y=y_axis)
|
186 |
+
elif chart_type == "box":
|
187 |
+
fig = px.box(filtered_df, x=x_axis, y=y_axis)
|
188 |
+
elif chart_type == "wordcloud":
|
189 |
+
text = ' '.join(filtered_df[y_axis].astype(str))
|
190 |
+
wordcloud = WordCloud(width=800, height=400, random_state=21, max_font_size=110).generate(text)
|
191 |
+
plt.figure(figsize=(10, 7))
|
192 |
+
plt.imshow(wordcloud, interpolation="bilinear")
|
193 |
+
plt.axis('off')
|
194 |
+
plt.show()
|
195 |
+
return None
|
196 |
+
elif chart_type == "pie":
|
197 |
+
fig = px.pie(filtered_df, names=x_axis, values=y_axis)
|
198 |
+
print('Pie chart selected')
|
199 |
+
|
200 |
+
return fig
|
201 |
+
|
202 |
+
|
203 |
+
|
204 |
+
|
205 |
+
|
206 |
+
|
requirements.txt.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas
|
2 |
+
bertopic
|
3 |
+
huggingface_hub
|
4 |
+
sentence-transformers
|
5 |
+
scikit-learn
|
6 |
+
matplotlib
|
7 |
+
plotly
|