File size: 9,940 Bytes
a45aa21 c114fc1 91e2918 c114fc1 67e558e c114fc1 f4f59e4 c114fc1 213d97d c114fc1 07bfeb2 c114fc1 07bfeb2 c114fc1 07bfeb2 c114fc1 07bfeb2 c114fc1 07bfeb2 c114fc1 07bfeb2 c114fc1 07bfeb2 c114fc1 07bfeb2 c114fc1 07bfeb2 c114fc1 07bfeb2 8b54131 07bfeb2 c114fc1 07bfeb2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 |
import superimport
import streamlit as st
import os
import pandas as pd
import random
from os.path import join
from src import preprocess_and_load_df, load_agent, ask_agent, decorate_with_code, show_response, get_from_user, load_smart_df, ask_question
from dotenv import load_dotenv
from langchain_groq.chat_models import ChatGroq
load_dotenv("Groq.txt")
Groq_Token = os.environ["GROQ_API_KEY"]
models = {"llama3":"llama3-70b-8192","mixtral": "mixtral-8x7b-32768", "llama2": "llama2-70b-4096", "gemma": "gemma-7b-it"}
self_path = os.path.dirname(os.path.abspath(__file__))
# Using HTML and CSS to center the title
st.write(
"""
<style>
.title {
text-align: center;
color: #17becf;
}
""",
unsafe_allow_html=True,
)
# Displaying the centered title
st.markdown("<h2 class='title'>VayuBuddy</h2>", unsafe_allow_html=True)
# os.environ["PANDASAI_API_KEY"] = "$2a$10$gbmqKotzJOnqa7iYOun8eO50TxMD/6Zw1pLI2JEoqncwsNx4XeBS2"
# with open(join(self_path, "context1.txt")) as f:
# context = f.read().strip()
# agent = load_agent(join(self_path, "app_trial_1.csv"), context)
# df = preprocess_and_load_df(join(self_path, "Data.csv"))
# inference_server = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
# inference_server = "https://api-inference.huggingface.co/models/codellama/CodeLlama-13b-hf"
# inference_server = "https://api-inference.huggingface.co/models/pandasai/bamboo-llm"
model_name = st.sidebar.selectbox("Select LLM:", ["llama3","mixtral", "gemma"])
questions = ('Custom Prompt',
'Plot the monthly average PM2.5 for the year 2023.',
'Which month has the highest average PM2.5 overall?',
'Which month has the highest PM2.5 overall?',
'Which month has the highest average PM2.5 in 2023 for Mumbai?',
'Plot and compare monthly timeseries of pollution for Mumbai and Bengaluru.',
'Plot the yearly average PM2.5.',
'Plot the monthly average PM2.5 of Delhi, Mumbai and Bengaluru for the year 2022.',
'Which month has the highest pollution?',
'Plot the monthly average PM2.5 of Delhi for the year 2022.',
'Which city has the highest PM2.5 level in July 2022?',
'Plot and compare monthly timeseries of PM2.5 for Mumbai and Bengaluru.',
'Plot and compare the monthly average PM2.5 of Delhi, Mumbai and Bengaluru for the year 2022.',
'Plot the monthly average PM2.5.',
'Plot the monthly average PM10 for the year 2023.',
'Which month has the highest PM2.5?',
'Plot the monthly average PM2.5 of Delhi for the year 2022.',
'Plot the monthly average PM2.5 of Bengaluru for the year 2022.',
'Plot the monthly average PM2.5 of Mumbai for the year 2022.',
'Which state has the highest average PM2.5?',
'Plot monthly PM2.5 in Gujarat for 2023.',
'What is the name of the month with the highest average PM2.5 overall?')
waiting_lines = ("Thinking...", "Just a moment...", "Let me think...", "Working on it...", "Processing...", "Hold on...", "One moment...", "On it...")
# agent = load_agent(df, context="", inference_server=inference_server, name=model_name)
# Initialize chat history
if "responses" not in st.session_state:
st.session_state.responses = []
# Display chat responses from history on app rerun
for response in st.session_state.responses:
if not response["no_response"]:
show_response(st, response)
show = True
if prompt := st.sidebar.selectbox("Select a Prompt:", questions):
# add a note "select custom prompt to ask your own question"
st.sidebar.info("Select 'Custom Prompt' to ask your own question.")
if prompt == 'Custom Prompt':
show = False
# React to user input
prompt = st.chat_input("Ask me anything about air quality!", key=10)
if prompt : show = True
if show :
# Add user input to chat history
response = get_from_user(prompt)
response["no_response"] = False
st.session_state.responses.append(response)
# Display user input
show_response(st, response)
no_response = False
# select random waiting line
with st.spinner(random.choice(waiting_lines)):
ran = False
for i in range(5):
llm = ChatGroq(model=models[model_name], api_key=os.getenv("GROQ_API"), temperature=0.1)
df_check = pd.read_csv("Data.csv")
df_check["Timestamp"] = pd.to_datetime(df_check["Timestamp"])
df_check = df_check.head(5)
new_line = "\n"
parameters = {"font.size": 18}
template = f"""```python
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams.update({parameters})
df = pd.read_csv("Data.csv")
df["Timestamp"] = pd.to_datetime(df["Timestamp"])
def calculator(Pollutant, concentration):
Calculator_index = Pollutant
breakpoints_low = {{
"O3": [0, 50, 100, 168, 208, 748],
"PM2.5": [0, 30, 60, 90, 120, 250],
"PM10": [0, 50, 100, 250, 350, 430],
"CO": [0, 1000, 2000, 10000, 17000, 34000],
"SO2": [0, 40, 80, 380, 800, 1600],
"NO2": [0, 40, 80, 180, 280, 400]
}}
breakpoints_high = {{
"O3": [50, 100, 168, 208, 748,1000],
"PM2.5": [30, 60, 90, 120, 250,1000],
"PM10": [50, 100, 250, 350, 430,1000],
"CO": [1000, 2000, 10000, 17000, 34000,50000],
"SO2": [40, 80, 380, 800, 1600,2000],
"NO2": [ 40, 80, 180, 280, 400,1000]
}}
# Define corresponding AQI categories
categories_low= [0, 50, 100, 200, 300, 400]
categories_high = [50, 100, 200, 300, 400,500]
# Find the appropriate AQI category based on concentration
for i in range(len(breakpoints_high[Calculator_index])):
if concentration <= breakpoints_high[Calculator_index][i]:
BPHI = breakpoints_high[Calculator_index][i]
IHI = categories_high[i]
# Calculate AQI using India formula
#AQI = ((categories[i] - categories[i-1]) / (breakpoints[Calculator_index][i] - breakpoints[Calculator_index][i-1])) * (concentration - breakpoints[Calculator_index][i-1]) + categories[i-1]
#st.sidebar.write(f"The Air Quality Index (AQI) for {{Calculator_index}} is: {{AQI}}")
break
for i in range(len(breakpoints_low[Calculator_index])):
if concentration >= breakpoints_low[Calculator_index][i]:
BPLI = breakpoints_low[Calculator_index][i]
ILI = categories_low[i]
# Calculate AQI using India formula
#AQI = ((categories[i] - categories[i-1]) / (breakpoints[Calculator_index][i] - breakpoints[Calculator_index][i-1])) * (concentration - breakpoints[Calculator_index][i-1]) + categories[i-1]
#st.sidebar.write(f"The Air Quality Index (AQI) for {{Calculator_index}} is: {{AQI}}")
break
AQI = ((IHI - ILI) / (BPHI - BPLI)) * (round(concentration) - BPLI) + ILI
return AQI
# df.dtypes
{new_line.join(map(lambda x: '# '+x, str(df_check.dtypes).split(new_line)))}
# {prompt.strip()}
# <your code here>
```
"""
query = f"""I have a pandas dataframe data of PM2.5 and PM10.
* Frequency of data is daily.
* `pollution` generally means `PM2.5`.
* You already have df, so don't read the csv file
* Don't print, but save result in a variable `answer` and make it global.
* Unless explicitly mentioned, don't consider the result as a plot.
* PM2.5 guidelines: India: 60, WHO: 15.
* PM10 guidelines: India: 100, WHO: 50.
* If result is a plot, show the India and WHO guidelines in the plot.
* If result is a plot make it in tight layout, save it and save path in `answer`. Example: `answer='plot.png'`
* If result is not a plot, save it as a string in `answer`. Example: `answer='The city is Mumbai'`
* Whenever you do any sort of aggregation, report the corresponding standard deviation, standard error and the number of data points for that aggregation.
* Whenever you're reporting a floating point number, round it to 2 decimal places.
* Always report the unit of the data. Example: `The average PM2.5 is 45.67 µg/m³`
Complete the following code.
{template}
"""
answer = llm.invoke(query)
code = f"""
{template.split("```python")[1].split("```")[0]}
{answer.content.split("```python")[1].split("```")[0]}
"""
# update variable `answer` when code is executed
try:
exec(code)
ran = True
no_response = False
except Exception as e:
no_response = True
exception = e
response = {"role": "assistant", "content": answer, "gen_code": code, "ex_code": code, "last_prompt": prompt, "no_response": no_response}
# Get response from agent
# response = ask_question(model_name=model_name, question=prompt)
# response = ask_agent(agent, prompt)
if ran:
break
if no_response:
st.error(f"Failed to generate right output due to the following error:\n\n{exception}")
# Add agent response to chat history
st.session_state.responses.append(response)
# Display agent response
if not no_response:
show_response(st, response)
del prompt |