singhtech commited on
Commit
715848f
1 Parent(s): 74387ba

Upload 3 files

Browse files

Code Modularization

Files changed (3) hide show
  1. app.py +283 -0
  2. autotabml_agents.py +90 -0
  3. autotabml_tasks.py +66 -0
app.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ from crewai import Crew
5
+ from langchain_groq import ChatGroq
6
+ import streamlit_ace as st_ace
7
+ import traceback
8
+ import contextlib
9
+ import io
10
+ from crewai_tools import FileReadTool
11
+ import matplotlib.pyplot as plt
12
+ import glob
13
+ from dotenv import load_dotenv
14
+ from autotabml_agents import initialize_agents
15
+ from autotabml_tasks import create_tasks
16
+
17
+
18
+ TEMP_DIR = "temp_dir"
19
+ OUTPUT_DIR = "Output_dir"
20
+ # Ensure the temporary directory exists
21
+ if not os.path.exists(TEMP_DIR):
22
+ os.makedirs(TEMP_DIR)
23
+
24
+ # Ensure the Output directory exits
25
+ if not os.path.exists(OUTPUT_DIR):
26
+ os.makedirs(OUTPUT_DIR)
27
+
28
+ # Function to save uploaded file
29
+ def save_uploaded_file(uploaded_file):
30
+ file_path = os.path.join(TEMP_DIR, uploaded_file.name)
31
+ with open(file_path, 'wb') as f:
32
+ f.write(uploaded_file.getbuffer())
33
+ return file_path
34
+
35
+ # load the .env file
36
+ load_dotenv()
37
+ # Set up Groq API key
38
+ groq_api_key = os.environ.get("GROQ_API_KEY") # os.environ["GROQ_API_KEY"] =
39
+
40
+
41
+ def main():
42
+ # Set custom CSS for UI
43
+ set_custom_css()
44
+
45
+ # Initialize session state for edited code
46
+ if 'edited_code' not in st.session_state:
47
+ st.session_state['edited_code'] = ""
48
+
49
+ # Initialize session state for whether the initial code is generated
50
+ if 'code_generated' not in st.session_state:
51
+ st.session_state['code_generated'] = False
52
+
53
+ # Header with futuristic design
54
+ st.markdown("""
55
+ <div class="header">
56
+ <h1>AutoTabML</h1>
57
+ <p>Automated Machine Learning Code Generation for Tabluar Data</p>
58
+ </div>
59
+ """, unsafe_allow_html=True)
60
+
61
+ # Sidebar for customization options
62
+ st.sidebar.title('LLM Model')
63
+ model = st.sidebar.selectbox(
64
+ 'Model',
65
+ ["llama3-70b-8192"]
66
+ )
67
+
68
+ # Initialize LLM
69
+ llm = initialize_llm(model)
70
+
71
+
72
+
73
+ # User inputs
74
+ user_question = st.text_area("Describe your ML problem:", key="user_question")
75
+ uploaded_file = st.file_uploader("Upload a sample .csv of your data", key="uploaded_file")
76
+ try:
77
+ file_name = uploaded_file.name
78
+ except:
79
+ file_name = "dataset.csv"
80
+
81
+ # Initialize agents
82
+ agents = initialize_agents(llm,file_name)
83
+ # Process uploaded file
84
+ if uploaded_file:
85
+ try:
86
+ file_path = save_uploaded_file(uploaded_file)
87
+ df = pd.read_csv(uploaded_file)
88
+ st.write("Data successfully uploaded:")
89
+ st.dataframe(df.head())
90
+ data_upload = True
91
+ except Exception as e:
92
+ st.error(f"Error reading the file: {e}")
93
+ data_upload = False
94
+ else:
95
+ df = None
96
+ data_upload = False
97
+
98
+ # Process button
99
+ if st.button('Process'):
100
+ tasks = create_tasks("Process",user_question,file_name, data_upload, df, None, st.session_state['edited_code'], None, agents)
101
+ with st.spinner('Processing...'):
102
+ crew = Crew(
103
+ agents=list(agents.values()),
104
+ tasks=tasks,
105
+ verbose=2
106
+ )
107
+
108
+ result = crew.kickoff()
109
+
110
+ if result: # Only call st_ace if code has a valid value
111
+ code = result.strip("```")
112
+ try:
113
+ filt_idx = code.index("```")
114
+ code = code[:filt_idx]
115
+ except:
116
+ pass
117
+ st.session_state['edited_code'] = code
118
+ st.session_state['code_generated'] = True
119
+
120
+ st.session_state['edited_code'] = st_ace.st_ace(
121
+ value=st.session_state['edited_code'],
122
+ language='python',
123
+ theme='monokai',
124
+ keybinding='vscode',
125
+ min_lines=20,
126
+ max_lines=50
127
+ )
128
+
129
+ if st.session_state['code_generated']:
130
+ # Show options for modification, debugging, and running the code
131
+ suggestion = st.text_area("Suggest modifications to the generated code (optional):", key="suggestion")
132
+ if st.button('Modify'):
133
+ if st.session_state['edited_code'] and suggestion:
134
+ tasks = create_tasks("Modify",user_question,file_name, data_upload, df, suggestion, st.session_state['edited_code'], None, agents)
135
+ with st.spinner('Modifying code...'):
136
+ crew = Crew(
137
+ agents=list(agents.values()),
138
+ tasks=tasks,
139
+ verbose=2
140
+ )
141
+
142
+ result = crew.kickoff()
143
+
144
+ if result: # Only call st_ace if code has a valid value
145
+ code = result.strip("```")
146
+ try:
147
+ filter_idx = code.index("```")
148
+ code = code[:filter_idx]
149
+ except:
150
+ pass
151
+ st.session_state['edited_code'] = code
152
+
153
+ st.write("Modified code:")
154
+ st.session_state['edited_code']= st_ace.st_ace(
155
+ value=st.session_state['edited_code'],
156
+ language='python',
157
+ theme='monokai',
158
+ keybinding='vscode',
159
+ min_lines=20,
160
+ max_lines=50
161
+ )
162
+
163
+ debugger = st.text_area("Paste error message here for debugging (optional):", key="debugger")
164
+ if st.button('Debug'):
165
+ if st.session_state['edited_code'] and debugger:
166
+ tasks = create_tasks("Debug",user_question,file_name, data_upload, df, None, st.session_state['edited_code'], debugger, agents)
167
+ with st.spinner('Debugging code...'):
168
+ crew = Crew(
169
+ agents=list(agents.values()),
170
+ tasks=tasks,
171
+ verbose=2
172
+ )
173
+
174
+ result = crew.kickoff()
175
+
176
+ if result: # Only call st_ace if code has a valid value
177
+ code = result.strip("```")
178
+ try:
179
+ filter_idx = code.index("```")
180
+ code = code[:filter_idx]
181
+ except:
182
+ pass
183
+ st.session_state['edited_code'] = code
184
+
185
+ st.write("Debugged code:")
186
+ st.session_state['edited_code'] = st_ace.st_ace(
187
+ value=st.session_state['edited_code'],
188
+ language='python',
189
+ theme='monokai',
190
+ keybinding='vscode',
191
+ min_lines=20,
192
+ max_lines=50
193
+ )
194
+
195
+ if st.button('Run'):
196
+ output = io.StringIO()
197
+ with contextlib.redirect_stdout(output):
198
+ try:
199
+ globals().update({'dataset': df})
200
+ final_code = st.session_state["edited_code"]
201
+
202
+ with st.expander("Final Code"):
203
+ st.code(final_code, language='python')
204
+
205
+ exec(final_code, globals())
206
+ result = output.getvalue()
207
+ success = True
208
+ except Exception as e:
209
+ result = str(e)
210
+ success = False
211
+
212
+ st.subheader('Output:')
213
+ st.text(result)
214
+
215
+ figs = [manager.canvas.figure for manager in plt._pylab_helpers.Gcf.get_all_fig_managers()]
216
+ if figs:
217
+ st.subheader('Generated Plots:')
218
+ for fig in figs:
219
+ st.pyplot(fig)
220
+
221
+ if success:
222
+ st.success("Code executed successfully!")
223
+ else:
224
+ st.error("Code execution failed! Waiting for debugging input...")
225
+
226
+ # Move the generated files section to the sidebar
227
+ with st.sidebar:
228
+ st.header('Output_dir :')
229
+ files = glob.glob(os.path.join(OUTPUT_DIR,"/", '*'))
230
+ for file in files:
231
+ if os.path.isfile(file):
232
+ with open(file, 'rb') as f:
233
+ st.download_button(label=f'Download {os.path.basename(file)}', data=f, file_name=os.path.basename(file))
234
+
235
+
236
+
237
+ # Function to set custom CSS for futuristic UI
238
+ def set_custom_css():
239
+ st.markdown("""
240
+ <style>
241
+ body {
242
+ background: #0e0e0e;
243
+ color: #e0e0e0;
244
+ font-family: 'Roboto', sans-serif;
245
+ }
246
+ .header {
247
+ background: linear-gradient(135deg, #6e3aff, #b839ff);
248
+ padding: 10px;
249
+ border-radius: 10px;
250
+ }
251
+ .header h1, .header p {
252
+ color: white;
253
+ text-align: center;
254
+ }
255
+ .stButton button {
256
+ background-color: #b839ff;
257
+ color: white;
258
+ border-radius: 10px;
259
+ font-size: 16px;
260
+ padding: 10px 20px;
261
+ }
262
+ .stButton button:hover {
263
+ background-color: #6e3aff;
264
+ color: #e0e0e0;
265
+ }
266
+ .spinner {
267
+ display: flex;
268
+ justify-content: center;
269
+ align-items: center;
270
+ }
271
+ </style>
272
+ """, unsafe_allow_html=True)
273
+
274
+ # Function to initialize LLM
275
+ def initialize_llm(model):
276
+ return ChatGroq(
277
+ temperature=0,
278
+ groq_api_key=groq_api_key,
279
+ model_name=model
280
+ )
281
+
282
+ if __name__ == "__main__":
283
+ main()
autotabml_agents.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Agent
2
+ from crewai_tools import FileReadTool
3
+
4
+
5
+ # Function to initialize agents
6
+ def initialize_agents(llm,file_name):
7
+ file_read_tool = FileReadTool()
8
+ return {
9
+ "Data_Reader_Agent": Agent(
10
+ role='Data_Reader_Agent',
11
+ goal="Read the uploaded dataset and provide it to other agents.",
12
+ backstory="Responsible for reading the uploaded dataset.",
13
+ verbose=True,
14
+ allow_delegation=False,
15
+ llm=llm,
16
+ tools=[file_read_tool]
17
+ ),
18
+ "Problem_Definition_Agent": Agent(
19
+ role='Problem_Definition_Agent',
20
+ goal="Clarify the machine learning problem the user wants to solve.",
21
+ backstory="Expert in defining machine learning problems.",
22
+ verbose=True,
23
+ allow_delegation=False,
24
+ llm=llm,
25
+ ),
26
+ "EDA_Agent": Agent(
27
+ role='EDA_Agent',
28
+ goal="Perform all possible Exploratory Data Analysis (EDA) on the data provided by the user.",
29
+ backstory="Specializes in conducting comprehensive EDA to understand the data characteristics, distributions, and relationships.",
30
+ verbose=True,
31
+ allow_delegation=False,
32
+ llm=llm,
33
+ ),
34
+ "Feature_Engineering_Agent": Agent(
35
+ role='Feature_Engineering_Agent',
36
+ goal="Perform feature engineering on the data based on the EDA results provided by the EDA agent.",
37
+ backstory="Expert in deriving new features, transforming existing features, and preprocessing data to prepare it for modeling.",
38
+ verbose=True,
39
+ allow_delegation=False,
40
+ llm=llm,
41
+ ),
42
+ "Model_Recommendation_Agent": Agent(
43
+ role='Model_Recommendation_Agent',
44
+ goal="Suggest the most suitable machine learning models.",
45
+ backstory="Expert in recommending machine learning algorithms.",
46
+ verbose=True,
47
+ allow_delegation=False,
48
+ llm=llm,
49
+ ),
50
+ "Starter_Code_Generator_Agent": Agent(
51
+ role='Starter_Code_Generator_Agent',
52
+ goal=f"Generate starter Python code for the project. Always give dataset name as 'temp_files/{file_name}",
53
+ backstory="Code wizard for generating starter code templates.",
54
+ verbose=True,
55
+ allow_delegation=False,
56
+ llm=llm,
57
+ ),
58
+ "Code_Modification_Agent": Agent(
59
+ role='Code_Modification_Agent',
60
+ goal="Modify the generated Python code based on user suggestions.",
61
+ backstory="Expert in adapting code according to user feedback.",
62
+ verbose=True,
63
+ allow_delegation=False,
64
+ llm=llm,
65
+ ),
66
+ # "Code_Runner_Agent": Agent(
67
+ # role='Code_Runner_Agent',
68
+ # goal="Run the generated Python code and catch any errors.",
69
+ # backstory="Debugging expert.",
70
+ # verbose=True,
71
+ # allow_delegation=True,
72
+ # llm=llm,
73
+ # ),
74
+ "Code_Debugger_Agent": Agent(
75
+ role='Code_Debugger_Agent',
76
+ goal="Debug the generated Python code.",
77
+ backstory="Seasoned code debugger.",
78
+ verbose=True,
79
+ allow_delegation=False,
80
+ llm=llm,
81
+ ),
82
+ "Compiler_Agent":Agent(
83
+ role = "Code_compiler",
84
+ goal = "Extract only the python code.",
85
+ backstory = "You are the compiler which extract only the python code.",
86
+ verbose = True,
87
+ allow_delegation = False,
88
+ llm = llm
89
+ )
90
+ }
autotabml_tasks.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Task
2
+ # Function to create tasks based on user inputs
3
+ def create_tasks(func_call,user_question,file_name, data_upload, df, suggestion, edited_code, debugger, agents):
4
+ info = df.info()
5
+ tasks = []
6
+ if(func_call == "Process"):
7
+ tasks.append(Task(
8
+ description=f"Clarify the ML problem: {user_question}",
9
+ agent=agents["Problem_Definition_Agent"],
10
+ expected_output="A clear and concise definition of the ML problem."
11
+ )
12
+ )
13
+
14
+ if data_upload:
15
+ tasks.extend([
16
+ Task(
17
+ description=f"Evaluate the data provided by the file name . This is the data: {df}",
18
+ agent=agents["EDA_Agent"],
19
+ expected_output="An assessment of the EDA and preprocessing like dataset info, missing value, duplication, outliers etc. on the data provided"
20
+ ),
21
+ Task(
22
+ description=f"Feature Engineering on data {df} based on EDA output: {info}",
23
+ agent=agents["Feature_Engineering_Agent"],
24
+ expected_output="An assessment of the Featuring Engineering and preprocessing like handling missing values, handling duplication, handling outliers, feature encoding, feature scaling etc. on the data provided"
25
+ )
26
+ ])
27
+
28
+ tasks.extend([
29
+ Task(
30
+ description="Suggest suitable ML models.",
31
+ agent=agents["Model_Recommendation_Agent"],
32
+ expected_output="A list of suitable ML models."
33
+ ),
34
+ Task(
35
+ description=f"Generate starter Python code based on feature engineering, where column names are {df.columns.tolist()}. Generate only the code without any extra text",
36
+ agent=agents["Starter_Code_Generator_Agent"],
37
+ expected_output="Starter Python code."
38
+ ),
39
+ ])
40
+ if(func_call == "Modify"):
41
+ if suggestion:
42
+ tasks.append(
43
+ Task(
44
+ description=f"Modify the already generated code {edited_code} according to the suggestion: {suggestion} \n\n Do not generate entire new code.",
45
+ agent=agents["Code_Modification_Agent"],
46
+ expected_output="Modified code."
47
+ )
48
+ )
49
+ if(func_call == "Debug"):
50
+ if debugger:
51
+ tasks.append(
52
+ Task(
53
+ description=f"Debug and fix any errors for data with column names {df.columns.tolist()} with data as {df} in the generated code: {edited_code} \n\n According to the debugging: {debugger}. \n\n Do not generate entire new code. Just remove the error in the code by modifying only necessary parts of the code.",
54
+ agent=agents["Code_Debugger_Agent"],
55
+ expected_output="Debugged and successfully executed code."
56
+ )
57
+ )
58
+ tasks.append(
59
+ Task(
60
+ description = "Your job is to only extract python code from string",
61
+ agent = agents["Compiler_Agent"],
62
+ expected_output = "Running python code."
63
+ )
64
+ )
65
+
66
+ return tasks