File size: 10,605 Bytes
61047f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ad22d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61047f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb6e23f
61047f3
 
 
 
 
 
cb6e23f
a91c47b
 
 
 
 
 
 
dfcaf10
 
a91c47b
 
 
61047f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
import redis 
import os 
import google.generativeai as genai
from typing import List
import numpy as np 
from redis.commands.search.query import Query
from haystack import Pipeline, component
from haystack.utils import Secret
from haystack_integrations.components.generators.google_ai import GoogleAIGeminiChatGenerator, GoogleAIGeminiGenerator
from haystack.components.builders import PromptBuilder
import streamlit as st
from data_processor import fetch_data,ingest_data

genai.configure(api_key=os.environ["GEMINI_API_KEY"])

generation_config = {
  "temperature": 1,
  "top_p": 0.95,
  "top_k": 64,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config,
  system_instruction="You are optimized to generate accurate descriptions for given Python codes. When the user inputs the code, you must return the description according to its goal and functionality.  You are not allowed to generate additional details. The user expects at least 5 sentence-long descriptions.",
)

gemini = GoogleAIGeminiGenerator(api_key=Secret.from_env_var("GEMINI_API_KEY"), model='gemini-1.5-flash')

def get_embeddings(content: List):
    return genai.embed_content(model='models/text-embedding-004',content=content)['embedding']


def draft_prompt(query: str, chat_history: str) -> str:
    """
    Perform a vector similarity search and retrieve related functions.

    Args:
        query (str): The input query to encode.

    Returns:
        str: A formatted string containing details of related functions.
    """
    INDEX_NAME = "idx:codes_vss"
    client = st.session_state.client
    vector_search_query = (
        Query('(*)=>[KNN 2 @vector $query_vector AS vector_score]')
        .sort_by('vector_score')
        .return_fields('vector_score', 'id', 'name', 'definition', 'file_name', 'type', 'uses')
        .dialect(2)
    )
    
    encoded_query = get_embeddings(query)
    vector_params = {
        "query_vector": np.array(encoded_query, dtype=np.float32).tobytes()
    }
    
    result_docs = client.ft(INDEX_NAME).search(vector_search_query, vector_params).docs
    
    related_items: List[str] = []
    dependencies: List[str] = []
    for doc in result_docs:
        related_items.append(doc.name)
        if doc.uses:
            dependencies.extend(use for use in doc.uses.split(", ") if use)
    
    dependencies = list(set(dependencies) - set(related_items))
    
    def get_query(item_list):
        return Query(f"@name:({' | '.join(item_list)})").return_fields(
            'id', 'name', 'definition', 'file_name', 'type'
        )
    
    related_docs = client.ft(INDEX_NAME).search(get_query(related_items)).docs
    dependency_docs = client.ft(INDEX_NAME).search(get_query(dependencies)).docs
    
    def format_doc(doc):
        return (
            f"{'*' * 28} CODE SNIPPET {doc.id} {'*' * 28}\n"
            f"* Name: {doc.name}\n"
            f"* File: {doc.file_name}\n"
            f"* {doc.type.capitalize()} definition:\n"
            f"```python\n{doc.definition}\n```\n"
        )
    
    formatted_results_main = [format_doc(doc) for doc in related_docs]
    formatted_results_support = [format_doc(doc) for doc in dependency_docs]
    
    return (
        f"User Question: {query}\n\n"
        f"Current Chat History: \n{chat_history}\n\n"
        f"USE BELOW CODES TO ANSWER USER QUESTIONS.\n"
        f"{chr(10).join(formatted_results_main)}\n\n"
        f"SOME SUPPORTING FUNCTIONS AND CLASS YOU MAY WANT.\n"
        f"{chr(10).join(formatted_results_support)}"
    )

@component
class RedisRetreiver:
  @component.output_types(context=str)
  def run(self, query:str, chat_history:str):
    return {"context": draft_prompt(query, chat_history)}

llm = GoogleAIGeminiGenerator(api_key=Secret.from_env_var("GEMINI_API_KEY"), model='gemini-1.5-pro')
# llm = OpenAIGenerator()

template = """
You are a helpful agent optimized to resolve GitHub issues for your organization's libraries. Users will ask questions when they encounter problems with the code repository.
You have access to all the necessary code for addressing these issues. 
First, you should understand the user's question and identify the relevant code blocks. 
Then, craft a precise and targeted response that allows the user to find an exact solution to their problem. 
You must provide code snippets rather than just opinions.
You should always assume user has installed this python package in their system and raised question raised while they are using the library.

In addition to the above tasks, you are free to:
 * Greet the user.
 * [ONLY IF THE QUESTION IS INSUFFICIENT] Request additional clarity.
 * Politely decline irrelevant queries.
 * Inform the user if their query cannot be processed or accomplished.

By any chance you should NOT,
 * Ask or recommend user to use different library. Or code snipits related to other similar libraies.
 * Provide inaccurate explnations.
 * Provide sugestions without code examples.

{{context}}
"""

prompt_builder = PromptBuilder(template=template)

pipeline = Pipeline()
pipeline.add_component(name="retriever", instance=RedisRetreiver())
pipeline.add_component("prompt_builder", prompt_builder)
pipeline.add_component("llm", llm)
pipeline.connect("retriever.context", "prompt_builder")
pipeline.connect("prompt_builder", "llm")

# Initialize Streamlit app
st.title("Code Assistant Chat")
st.subheader("Frequently Asked Questions")

st.markdown("""
    <style>
    .streamlit-expanderHeader {
        background-color: #f0f2f6;
        border: 1px solid #ddd;
        border-radius: 5px;
        padding: 10px;
    }
    .streamlit-expanderContent {
        background-color: #ffffff;
        border: 1px solid #ddd;
        border-radius: 5px;
        padding: 10px;
    }
    </style>
""", unsafe_allow_html=True)

with st.expander("How can I use this space?"):
    st.markdown("""
    This space is created based on steps described in [this Medium article](https://medium.com/towards-data-science/using-llama-3-for-building-ai-agents-7e74f79d1ccc). To use this space:
    
    1. Create a Redis Cloud account and set up a database
    2. Add your Redis credentials to this space
    3. Enter your preferred GitHub repository clone URL for data fetching and indexing
    
    """)

with st.expander("Do I need a Gemini API key?"):
    st.markdown("""
    No, you don't need to provide a Gemini API key for testing this repository. 
    
    - This repo includes a Gemini free tier API key itself. 
    - However, if you encounter any resource exhaustion error:
        - Consider cloning this space
        - Add your own key as the `GEMINI_API_KEY` secret
    
    """)

with st.expander("I don't want to create a Redis database. Can I still check the output?"):
    st.markdown("""
    Absolutely! Here's what you can do:
    
    1. Send me a message on LinkedIn mentioning your requirement
    2. I'll provide you with preconfigured database credentials
    3. Enter these credentials in the appropriate fields
    4. You'll then be able to use the assistant as you wish
    
    > **Important**: Please use the provided credentials responsibly and for testing purposes only.
    """)

tabs = ["Data Fetching","Assistant"]
selected_tab = st.sidebar.radio("Select a Tab", tabs)
if selected_tab == 'Data Fetching':
    if 'redis_connected' not in st.session_state:
        st.session_state.redis_connected = False

    if not st.session_state.redis_connected:
        st.header("Redis Connection Settings")
        
        redis_host = st.text_input("Redis Host")
        redis_port = st.number_input("Redis Port", min_value=1, max_value=65535, value=6379)
        redis_password = st.text_input("Redis Password", type="password")
        
        if st.button("Connect to Redis"):
            try:
                client = redis.Redis(
                    host=redis_host,
                    port=redis_port,
                    password=redis_password
                )
                
                if client.ping():
                    st.success("Successfully connected to Redis!")
                    st.session_state.redis_connected = True
                    st.session_state.client = client
                    st.session_state.host = redis_host
                else:
                    st.error("Failed to connect to Redis. Please check your settings.")
            except redis.ConnectionError:
                st.error("Failed to connect to Redis. Please check your settings and try again.")
    
    if st.session_state.redis_connected:
        if st.session_state.host == os.environ['REDIS_HOST']:
            st.success("You are all set!")
        else:
            url = st.text_input("Enter git clone URL")
            if url:
                with st.spinner("Fetching data..."):
                    data = fetch_data(url)
                
                with st.spinner("Ingesting data..."):
                    response_string = ingest_data(st.session_state.client, data)
                    
                    st.write(response_string)

if selected_tab == 'Assistant':
    if "messages" not in st.session_state:
        st.session_state.messages = []

    # Display chat messages
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    st.session_state.response = None

    if prompt := st.chat_input("What's your question?"):
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.markdown(prompt)

        with st.chat_message("assistant"):
            response_placeholder = st.empty()
            response_placeholder.markdown("Thinking...")
            
            try:
                response = pipeline.run({"retriever": {"query": prompt, "chat_history": st.session_state.messages}}, include_outputs_from=['prompt_builder'])
                st.session_state.response = response
                llm_response = response["llm"]["replies"][0]
                
                response_placeholder.markdown(llm_response)
                st.session_state.messages.append({"role": "assistant", "content": llm_response})
            except Exception as e:
                response_placeholder.markdown(f"An error occurred: {str(e)}")

    if st.button("Clear Chat History"):
        st.session_state.messages = []
        st.experimental_rerun()

    with st.expander("See Chat History"):
        st.markdown(st.session_state.response)