awacke1 commited on
Commit
3fdb869
β€’
1 Parent(s): c71121f

Create backupapp2.py

Browse files
Files changed (1) hide show
  1. backupapp2.py +740 -0
backupapp2.py ADDED
@@ -0,0 +1,740 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import streamlit.components.v1 as components
3
+ import os
4
+ import base64
5
+ import glob
6
+ import io
7
+ import json
8
+ import mistune
9
+ import pytz
10
+ import math
11
+ import requests
12
+ import sys
13
+ import time
14
+ import re
15
+ import textract
16
+ import zipfile
17
+ import random
18
+ import httpx # add 11/13/23
19
+ import asyncio
20
+ from openai import OpenAI
21
+ #from openai import AsyncOpenAI
22
+ from datetime import datetime
23
+ from xml.etree import ElementTree as ET
24
+ from bs4 import BeautifulSoup
25
+ from collections import deque
26
+ from audio_recorder_streamlit import audio_recorder
27
+ from dotenv import load_dotenv
28
+ from PyPDF2 import PdfReader
29
+ from langchain.text_splitter import CharacterTextSplitter
30
+ from langchain.embeddings import OpenAIEmbeddings
31
+ from langchain.vectorstores import FAISS
32
+ from langchain.chat_models import ChatOpenAI
33
+ from langchain.memory import ConversationBufferMemory
34
+ from langchain.chains import ConversationalRetrievalChain
35
+ from templates import css, bot_template, user_template
36
+ from io import BytesIO
37
+ from contextlib import redirect_stdout
38
+ # code import tests
39
+ import seaborn
40
+ import plotly
41
+ import vega_datasets
42
+ import bokeh
43
+ import holoviews
44
+ import plotnine
45
+ import graphviz
46
+ import tensorflow
47
+ import torch
48
+
49
+ # set page config once
50
+ st.set_page_config(page_title="Python AI Pair Programmer", layout="wide")
51
+
52
+ # UI for sidebar controls
53
+ should_save = st.sidebar.checkbox("πŸ’Ύ Save", value=True)
54
+ col1, col2, col3, col4 = st.columns(4)
55
+ with col1:
56
+ with st.expander("Settings πŸ§ πŸ’Ύ", expanded=True):
57
+ # File type for output, model choice
58
+ menu = ["txt", "htm", "xlsx", "csv", "md", "py"]
59
+ choice = st.sidebar.selectbox("Output File Type:", menu)
60
+ model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
61
+
62
+ # Define a context dictionary to maintain the state between exec calls
63
+ context = {}
64
+
65
+ def create_file(filename, prompt, response, should_save=True):
66
+ if not should_save:
67
+ return
68
+
69
+ # Extract base filename without extension
70
+ base_filename, ext = os.path.splitext(filename)
71
+
72
+ # Initialize the combined content
73
+ combined_content = ""
74
+
75
+ # Add Prompt with markdown title and emoji
76
+ combined_content += "# Prompt πŸ“\n" + prompt + "\n\n"
77
+
78
+ # Add Response with markdown title and emoji
79
+ combined_content += "# Response πŸ’¬\n" + response + "\n\n"
80
+
81
+ # Check for code blocks in the response
82
+ resources = re.findall(r"```([\s\S]*?)```", response)
83
+ for resource in resources:
84
+ # Check if the resource contains Python code
85
+ if "python" in resource.lower():
86
+ # Remove the 'python' keyword from the code block
87
+ cleaned_code = re.sub(r'^\s*python', '', resource, flags=re.IGNORECASE | re.MULTILINE)
88
+
89
+ # Add Code Results title with markdown and emoji
90
+ combined_content += "# Code Results πŸš€\n"
91
+
92
+ # Redirect standard output to capture it
93
+ original_stdout = sys.stdout
94
+ sys.stdout = io.StringIO()
95
+
96
+ # Execute the cleaned Python code within the context
97
+ try:
98
+ exec(cleaned_code, context)
99
+ code_output = sys.stdout.getvalue()
100
+ combined_content += f"```\n{code_output}\n```\n\n"
101
+ realtimeEvalResponse = "# Code Results πŸš€\n" + "```" + code_output + "```\n\n"
102
+ st.code(realtimeEvalResponse)
103
+
104
+ except Exception as e:
105
+ combined_content += f"```python\nError executing Python code: {e}\n```\n\n"
106
+
107
+ # Restore the original standard output
108
+ sys.stdout = original_stdout
109
+ else:
110
+ # Add non-Python resources with markdown and emoji
111
+ combined_content += "# Resource πŸ› οΈ\n" + "```" + resource + "```\n\n"
112
+
113
+ # Save the combined content to a Markdown file
114
+ if should_save:
115
+ with open(f"{base_filename}.md", 'w') as file:
116
+ file.write(combined_content)
117
+ st.code(combined_content)
118
+
119
+ # Create a Base64 encoded link for the file
120
+ with open(f"{base_filename}.md", 'rb') as file:
121
+ encoded_file = base64.b64encode(file.read()).decode()
122
+ href = f'<a href="data:file/markdown;base64,{encoded_file}" download="{filename}">Download File πŸ“„</a>'
123
+ st.markdown(href, unsafe_allow_html=True)
124
+
125
+
126
+ # Read it aloud
127
+ def readitaloud(result):
128
+ documentHTML5='''
129
+ <!DOCTYPE html>
130
+ <html>
131
+ <head>
132
+ <title>Read It Aloud</title>
133
+ <script type="text/javascript">
134
+ function readAloud() {
135
+ const text = document.getElementById("textArea").value;
136
+ const speech = new SpeechSynthesisUtterance(text);
137
+ window.speechSynthesis.speak(speech);
138
+ }
139
+ </script>
140
+ </head>
141
+ <body>
142
+ <h1>πŸ”Š Read It Aloud</h1>
143
+ <textarea id="textArea" rows="10" cols="80">
144
+ '''
145
+ documentHTML5 = documentHTML5 + result
146
+ documentHTML5 = documentHTML5 + '''
147
+ </textarea>
148
+ <br>
149
+ <button onclick="readAloud()">πŸ”Š Read Aloud</button>
150
+ </body>
151
+ </html>
152
+ '''
153
+
154
+ components.html(documentHTML5, width=800, height=300)
155
+ #return result
156
+
157
+ def generate_filename(prompt, file_type):
158
+ central = pytz.timezone('US/Central')
159
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
160
+ replaced_prompt = prompt.replace(" ", "_").replace("\n", "_")
161
+ safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:90]
162
+ return f"{safe_date_time}_{safe_prompt}.{file_type}"
163
+
164
+ # Chat and Chat with files
165
+ def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):
166
+ model = model_choice
167
+ conversation = [{'role': 'system', 'content': 'You are a python script writer.'}]
168
+ conversation.append({'role': 'user', 'content': prompt})
169
+ if len(document_section)>0:
170
+ conversation.append({'role': 'assistant', 'content': document_section})
171
+ start_time = time.time()
172
+ report = []
173
+ res_box = st.empty()
174
+ collected_chunks = []
175
+ collected_messages = []
176
+ key = os.getenv('OPENAI_API_KEY')
177
+
178
+ client = OpenAI(
179
+ api_key= os.getenv('OPENAI_API_KEY')
180
+ )
181
+ stream = client.chat.completions.create(
182
+ model='gpt-3.5-turbo',
183
+ messages=conversation,
184
+ stream=True,
185
+ )
186
+ all_content = "" # Initialize an empty string to hold all content
187
+ for part in stream:
188
+ chunk_message = (part.choices[0].delta.content or "")
189
+ collected_messages.append(chunk_message) # save the message
190
+ content=part.choices[0].delta.content
191
+ try:
192
+ if len(content) > 0:
193
+ report.append(content)
194
+ all_content += content
195
+ result = "".join(report).strip()
196
+ res_box.markdown(f'*{result}*')
197
+ except:
198
+ st.write(' ')
199
+ full_reply_content = all_content
200
+ st.write("Elapsed time:")
201
+ st.write(time.time() - start_time)
202
+ filename = generate_filename(full_reply_content, choice)
203
+ create_file(filename, prompt, full_reply_content, should_save)
204
+ readitaloud(full_reply_content)
205
+ return full_reply_content
206
+
207
+ def chat_with_file_contents(prompt, file_content, model_choice='gpt-3.5-turbo'):
208
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
209
+ conversation.append({'role': 'user', 'content': prompt})
210
+ if len(file_content)>0:
211
+ conversation.append({'role': 'assistant', 'content': file_content})
212
+ client = OpenAI(
213
+ api_key= os.getenv('OPENAI_API_KEY')
214
+ )
215
+ response = client.chat.completions.create(model=model_choice, messages=conversation)
216
+ return response['choices'][0]['message']['content']
217
+
218
+ def link_button_with_emoji(url, title, emoji_summary):
219
+ emojis = ["πŸ’‰", "πŸ₯", "🌑️", "🩺", "πŸ”¬", "πŸ’Š", "πŸ§ͺ", "πŸ‘¨β€βš•οΈ", "πŸ‘©β€βš•οΈ"]
220
+ random_emoji = random.choice(emojis)
221
+ st.markdown(f"[{random_emoji} {emoji_summary} - {title}]({url})")
222
+
223
+ python_parts = {
224
+ "Syntax": {"emoji": "✏️", "details": "Variables, Comments, Printing"},
225
+ "Data Types": {"emoji": "πŸ“Š", "details": "Numbers, Strings, Lists, Tuples, Sets, Dictionaries"},
226
+ "Control Structures": {"emoji": "πŸ”", "details": "If, Elif, Else, Loops, Break, Continue"},
227
+ "Functions": {"emoji": "πŸ”§", "details": "Defining, Calling, Parameters, Return Values"},
228
+ "Classes": {"emoji": "πŸ—οΈ", "details": "Creating, Inheritance, Methods, Properties"},
229
+ "API Interaction": {"emoji": "🌐", "details": "Requests, JSON Parsing, HTTP Methods"},
230
+ "Data Visualization Libraries1": {"emoji": "πŸ“ˆ", "details": "matplotlib"},
231
+ "Data Visualization Libraries2": {"emoji": "πŸ“ˆ", "details": "seaborn"},
232
+ "Data Visualization Libraries3": {"emoji": "πŸ“ˆ", "details": "plotly"},
233
+ "Data Visualization Libraries4": {"emoji": "πŸ“ˆ", "details": "altair"},
234
+ "Data Visualization Libraries5": {"emoji": "πŸ“ˆ", "details": "bokeh"},
235
+ "Data Visualization Libraries6": {"emoji": "πŸ“ˆ", "details": "pydeck"},
236
+ "Data Visualization Libraries7": {"emoji": "πŸ“ˆ", "details": "holoviews"},
237
+ "Data Visualization Libraries8": {"emoji": "πŸ“ˆ", "details": "plotnine"},
238
+ "Data Visualization Libraries9": {"emoji": "πŸ“ˆ", "details": "graphviz"},
239
+ "Error Handling": {"emoji": "⚠️", "details": "Try, Except, Finally, Raising"},
240
+ "Scientific & Data Analysis Libraries": {"emoji": "πŸ§ͺ", "details": "Numpy, Pandas, Scikit-Learn, TensorFlow, SciPy, Pillow"},
241
+ "Advanced Concepts": {"emoji": "🧠", "details": "Decorators, Generators, Context Managers, Metaclasses, Asynchronous Programming"},
242
+ "Web & Network Libraries": {"emoji": "πŸ•ΈοΈ", "details": "Flask, Django, Requests, BeautifulSoup, HTTPX, Asyncio"},
243
+ "Streamlit & Extensions1": {"emoji": "πŸ’‘", "details": "Streamlit"},
244
+ "Streamlit & Extensions2": {"emoji": "πŸ’‘", "details": "Streamlit-AgGrid"},
245
+ "Streamlit & Extensions3": {"emoji": "πŸ’‘", "details": "Streamlit-Folium"},
246
+ "Streamlit & Extensions4": {"emoji": "πŸ’‘", "details": "Streamlit-Pandas-Profiling"},
247
+ "Streamlit & Extensions5": {"emoji": "πŸ’‘", "details": "Streamlit-Vega-Lite, Gradio"},
248
+ "Gradio": {"emoji": "πŸ’‘", "details": "gradio"},
249
+ "File Handling & Serialization": {"emoji": "πŸ“", "details": "PyPDF2, Pytz, Json, Base64, Zipfile, Random, Glob, IO"},
250
+ "Machine Learning & AI": {"emoji": "πŸ€–", "details": "OpenAI, LangChain, HuggingFace"},
251
+ "Text & Data Extraction": {"emoji": "πŸ”", "details": "TikToken, Textract, SQLAlchemy, Pillow"},
252
+ "XML & Collections Libraries": {"emoji": "πŸ“š", "details": "XML, Collections"},
253
+ "Top PyPI Libraries1": {"emoji": "πŸ†", "details": "Requests, Pillow, SQLAlchemy, Flask, Django, SciPy, Beautiful Soup, PyTest, PyGame, Twisted"},
254
+ "Top PyPI Libraries2": {"emoji": "πŸ†", "details": "numpy, pandas, matplotlib, requests, beautifulsoup4"},
255
+ "Top PyPI Libraries3": {"emoji": "πŸ†", "details": "langchain, openai, PyPDF2, pytz"},
256
+ "Top PyPI Libraries4": {"emoji": "πŸ†", "details": "streamlit, audio_recorder_streamlit, gradio"},
257
+ "Top PyPI Libraries5": {"emoji": "πŸ†", "details": "tiktoken, textract, glob, io"},
258
+ "Top PyPI Libraries6": {"emoji": "πŸ†", "details": "matplotlib, seaborn, plotly, altair, bokeh, pydeck"},
259
+ "Top PyPI Libraries7": {"emoji": "πŸ†", "details": "streamlit, streamlit-aggrid, streamlit-folium, streamlit-pandas-profiling, streamlit-vega-lite"},
260
+ "Top PyPI Libraries8": {"emoji": "πŸ†", "details": "holoviews, plotnine, graphviz"},
261
+ "Top PyPI Libraries9": {"emoji": "πŸ†", "details": "json, base64, zipfile, random"},
262
+ "Top PyPI Libraries10": {"emoji": "πŸ†", "details": "httpx, asyncio, xml, collections, huggingface "}
263
+ }
264
+
265
+
266
+ response_placeholders = {}
267
+ example_placeholders = {}
268
+
269
+ def display_python_parts_old2():
270
+ st.title("Python Interactive Learning Platform")
271
+
272
+ for part, content in python_parts.items():
273
+ with st.expander(f"{content['emoji']} {part} - {content['details']}", expanded=False):
274
+ if st.button(f"Show Example for {part}", key=f"example_{part}"):
275
+ example = "Write short python script examples with mock data in python list dictionary for inputs for " + part
276
+ example_placeholders[part] = example
277
+ st.code(example_placeholders[part], language="python")
278
+ response = chat_with_model(f'Write python script with short code examples for: {content["details"]}', part)
279
+ response_placeholders[part] = response
280
+ st.write(f"#### {content['emoji']} {part} Example")
281
+ st.code(response_placeholders[part], language="python")
282
+
283
+ if st.button(f"Take Quiz on {part}", key=f"quiz_{part}"):
284
+ quiz = "Write Python script quiz examples with mock static data inputs for " + part
285
+ response = chat_with_model(f'Write python code blocks for quiz program: {quiz}', part)
286
+ response_placeholders[part] = response
287
+ st.write(f"#### {content['emoji']} {part} Quiz")
288
+ st.code(response_placeholders[part], language="python")
289
+
290
+ prompt = f"Write python script with a few advanced coding examples using mock data input for {content['details']}"
291
+ if st.button(f"Explore {part}", key=part):
292
+ response = chat_with_model(prompt, part)
293
+ response_placeholders[part] = response
294
+ st.write(f"#### {content['emoji']} {part} Details")
295
+ st.code(response_placeholders[part], language="python")
296
+
297
+
298
+ def display_python_parts():
299
+ st.title("Python Interactive Learning Platform")
300
+ for part, content in python_parts.items():
301
+ with st.expander(f"{content['emoji']} {part} - {content['details']}", expanded=False):
302
+ if st.button(f"Show Example for {part}", key=f"example_{part}"):
303
+ example = "Python script example with mock example inputs for " + part
304
+ example_placeholders[part] = example
305
+ st.code(example_placeholders[part], language="python")
306
+ response = chat_with_model('Create detailed advanced python script code examples for:' + example_placeholders[part], part)
307
+ if st.button(f"Take Quiz on {part}", key=f"quiz_{part}"):
308
+ quiz = "Python script quiz example with mock example inputs for " + part
309
+ response = chat_with_model(quiz, part)
310
+ prompt = f"Learn about advanced coding examples using mock example inputs for {content['details']}"
311
+ if st.button(f"Explore {part}", key=part):
312
+ response = chat_with_model(prompt, part)
313
+ response_placeholders[part] = response
314
+ if part in response_placeholders:
315
+ st.markdown(f"**Response:** {response_placeholders[part]}")
316
+
317
+ def add_paper_buttons_and_links():
318
+ page = st.sidebar.radio("Choose a page:", ["Python Pair Programmer"])
319
+ if page == "Python Pair Programmer":
320
+ display_python_parts()
321
+
322
+ col1, col2, col3, col4 = st.columns(4)
323
+
324
+ with col1:
325
+ with st.expander("MemGPT πŸ§ πŸ’Ύ", expanded=False):
326
+ link_button_with_emoji("https://arxiv.org/abs/2310.08560", "MemGPT", "πŸ§ πŸ’Ύ Memory OS")
327
+ outline_memgpt = "Memory Hierarchy, Context Paging, Self-directed Memory Updates, Memory Editing, Memory Retrieval, Preprompt Instructions, Semantic Memory, Episodic Memory, Emotional Contextual Understanding"
328
+ if st.button("Discuss MemGPT Features"):
329
+ chat_with_model("Discuss the key features of MemGPT: " + outline_memgpt, "MemGPT")
330
+
331
+ with col2:
332
+ with st.expander("AutoGen πŸ€–πŸ”—", expanded=False):
333
+ link_button_with_emoji("https://arxiv.org/abs/2308.08155", "AutoGen", "πŸ€–πŸ”— Multi-Agent LLM")
334
+ outline_autogen = "Cooperative Conversations, Combining Capabilities, Complex Task Solving, Divergent Thinking, Factuality, Highly Capable Agents, Generic Abstraction, Effective Implementation"
335
+ if st.button("Explore AutoGen Multi-Agent LLM"):
336
+ chat_with_model("Explore the key features of AutoGen: " + outline_autogen, "AutoGen")
337
+
338
+ with col3:
339
+ with st.expander("Whisper πŸ”ŠπŸ§‘β€πŸš€", expanded=False):
340
+ link_button_with_emoji("https://arxiv.org/abs/2212.04356", "Whisper", "πŸ”ŠπŸ§‘β€πŸš€ Robust STT")
341
+ outline_whisper = "Scaling, Deep Learning Approaches, Weak Supervision, Zero-shot Transfer Learning, Accuracy & Robustness, Pre-training Techniques, Broad Range of Environments, Combining Multiple Datasets"
342
+ if st.button("Learn About Whisper STT"):
343
+ chat_with_model("Learn about the key features of Whisper: " + outline_whisper, "Whisper")
344
+
345
+ with col4:
346
+ with st.expander("ChatDev πŸ’¬πŸ’»", expanded=False):
347
+ link_button_with_emoji("https://arxiv.org/pdf/2307.07924.pdf", "ChatDev", "πŸ’¬πŸ’» Comm. Agents")
348
+ outline_chatdev = "Effective Communication, Comprehensive Software Solutions, Diverse Social Identities, Tailored Codes, Environment Dependencies, User Manuals"
349
+ if st.button("Deep Dive into ChatDev"):
350
+ chat_with_model("Deep dive into the features of ChatDev: " + outline_chatdev, "ChatDev")
351
+
352
+ add_paper_buttons_and_links()
353
+
354
+
355
+ # Process user input is a post processor algorithm which runs after document embedding vector DB play of GPT on context of documents..
356
+ def process_user_input(user_question):
357
+ # Check and initialize 'conversation' in session state if not present
358
+ if 'conversation' not in st.session_state:
359
+ st.session_state.conversation = {} # Initialize with an empty dictionary or an appropriate default value
360
+
361
+ response = st.session_state.conversation({'question': user_question})
362
+ st.session_state.chat_history = response['chat_history']
363
+
364
+ for i, message in enumerate(st.session_state.chat_history):
365
+ template = user_template if i % 2 == 0 else bot_template
366
+ st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
367
+
368
+ # Save file output from PDF query results
369
+ filename = generate_filename(user_question, 'txt')
370
+ create_file(filename, user_question, message.content, should_save)
371
+
372
+ # New functionality to create expanders and buttons
373
+ create_expanders_and_buttons(message.content)
374
+
375
+ def create_expanders_and_buttons(content):
376
+ # Split the content into paragraphs
377
+ paragraphs = content.split("\n\n")
378
+ for paragraph in paragraphs:
379
+ # Identify the header and detail in the paragraph
380
+ header, detail = extract_feature_and_detail(paragraph)
381
+ if header and detail:
382
+ with st.expander(header, expanded=False):
383
+ if st.button(f"Explore {header}"):
384
+ expanded_outline = "Expand on the feature: " + detail
385
+ chat_with_model(expanded_outline, header)
386
+
387
+ def extract_feature_and_detail(paragraph):
388
+ # Use regex to find the header and detail in the paragraph
389
+ match = re.match(r"(.*?):(.*)", paragraph)
390
+ if match:
391
+ header = match.group(1).strip()
392
+ detail = match.group(2).strip()
393
+ return header, detail
394
+ return None, None
395
+
396
+ def transcribe_audio(file_path, model):
397
+ key = os.getenv('OPENAI_API_KEY')
398
+ headers = {
399
+ "Authorization": f"Bearer {key}",
400
+ }
401
+ with open(file_path, 'rb') as f:
402
+ data = {'file': f}
403
+ st.write("Read file {file_path}", file_path)
404
+ OPENAI_API_URL = "https://api.openai.com/v1/audio/transcriptions"
405
+ response = requests.post(OPENAI_API_URL, headers=headers, files=data, data={'model': model})
406
+ if response.status_code == 200:
407
+ st.write(response.json())
408
+ chatResponse = chat_with_model(response.json().get('text'), '') # *************************************
409
+ transcript = response.json().get('text')
410
+ #st.write('Responses:')
411
+ #st.write(chatResponse)
412
+ filename = generate_filename(transcript, 'txt')
413
+ #create_file(filename, transcript, chatResponse)
414
+ response = chatResponse
415
+ user_prompt = transcript
416
+ create_file(filename, user_prompt, response, should_save)
417
+ return transcript
418
+ else:
419
+ st.write(response.json())
420
+ st.error("Error in API call.")
421
+ return None
422
+
423
+ def save_and_play_audio(audio_recorder):
424
+ audio_bytes = audio_recorder()
425
+ if audio_bytes:
426
+ filename = generate_filename("Recording", "wav")
427
+ with open(filename, 'wb') as f:
428
+ f.write(audio_bytes)
429
+ st.audio(audio_bytes, format="audio/wav")
430
+ return filename
431
+ return None
432
+
433
+
434
+
435
+ def truncate_document(document, length):
436
+ return document[:length]
437
+
438
+ def divide_document(document, max_length):
439
+ return [document[i:i+max_length] for i in range(0, len(document), max_length)]
440
+
441
+ def get_table_download_link(file_path):
442
+ with open(file_path, 'r') as file:
443
+ try:
444
+ data = file.read()
445
+ except:
446
+ st.write('')
447
+ return file_path
448
+ b64 = base64.b64encode(data.encode()).decode()
449
+ file_name = os.path.basename(file_path)
450
+ ext = os.path.splitext(file_name)[1] # get the file extension
451
+ if ext == '.txt':
452
+ mime_type = 'text/plain'
453
+ elif ext == '.py':
454
+ mime_type = 'text/plain'
455
+ elif ext == '.xlsx':
456
+ mime_type = 'text/plain'
457
+ elif ext == '.csv':
458
+ mime_type = 'text/plain'
459
+ elif ext == '.htm':
460
+ mime_type = 'text/html'
461
+ elif ext == '.md':
462
+ mime_type = 'text/markdown'
463
+ else:
464
+ mime_type = 'application/octet-stream' # general binary data type
465
+ href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
466
+ return href
467
+
468
+ def CompressXML(xml_text):
469
+ root = ET.fromstring(xml_text)
470
+ for elem in list(root.iter()):
471
+ if isinstance(elem.tag, str) and 'Comment' in elem.tag:
472
+ elem.parent.remove(elem)
473
+ return ET.tostring(root, encoding='unicode', method="xml")
474
+
475
+ def read_file_content(file,max_length):
476
+ if file.type == "application/json":
477
+ content = json.load(file)
478
+ return str(content)
479
+ elif file.type == "text/html" or file.type == "text/htm":
480
+ content = BeautifulSoup(file, "html.parser")
481
+ return content.text
482
+ elif file.type == "application/xml" or file.type == "text/xml":
483
+ tree = ET.parse(file)
484
+ root = tree.getroot()
485
+ xml = CompressXML(ET.tostring(root, encoding='unicode'))
486
+ return xml
487
+ elif file.type == "text/markdown" or file.type == "text/md":
488
+ md = mistune.create_markdown()
489
+ content = md(file.read().decode())
490
+ return content
491
+ elif file.type == "text/plain":
492
+ return file.getvalue().decode()
493
+ else:
494
+ return ""
495
+
496
+ def extract_mime_type(file):
497
+ # Check if the input is a string
498
+ if isinstance(file, str):
499
+ pattern = r"type='(.*?)'"
500
+ match = re.search(pattern, file)
501
+ if match:
502
+ return match.group(1)
503
+ else:
504
+ raise ValueError(f"Unable to extract MIME type from {file}")
505
+ # If it's not a string, assume it's a streamlit.UploadedFile object
506
+ elif isinstance(file, streamlit.UploadedFile):
507
+ return file.type
508
+ else:
509
+ raise TypeError("Input should be a string or a streamlit.UploadedFile object")
510
+
511
+
512
+
513
+ def extract_file_extension(file):
514
+ # get the file name directly from the UploadedFile object
515
+ file_name = file.name
516
+ pattern = r".*?\.(.*?)$"
517
+ match = re.search(pattern, file_name)
518
+ if match:
519
+ return match.group(1)
520
+ else:
521
+ raise ValueError(f"Unable to extract file extension from {file_name}")
522
+
523
+ def pdf2txt(docs):
524
+ text = ""
525
+ for file in docs:
526
+ file_extension = extract_file_extension(file)
527
+ # print the file extension
528
+ st.write(f"File type extension: {file_extension}")
529
+
530
+ # read the file according to its extension
531
+ try:
532
+ if file_extension.lower() in ['py', 'txt', 'html', 'htm', 'xml', 'json']:
533
+ text += file.getvalue().decode('utf-8')
534
+ elif file_extension.lower() == 'pdf':
535
+ from PyPDF2 import PdfReader
536
+ pdf = PdfReader(BytesIO(file.getvalue()))
537
+ for page in range(len(pdf.pages)):
538
+ text += pdf.pages[page].extract_text() # new PyPDF2 syntax
539
+ except Exception as e:
540
+ st.write(f"Error processing file {file.name}: {e}")
541
+ return text
542
+
543
+ def txt2chunks(text):
544
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
545
+ return text_splitter.split_text(text)
546
+
547
+ def vector_store(text_chunks):
548
+ key = os.getenv('OPENAI_API_KEY')
549
+ embeddings = OpenAIEmbeddings(openai_api_key=key)
550
+ return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
551
+
552
+ def get_chain(vectorstore):
553
+ llm = ChatOpenAI()
554
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
555
+ return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
556
+
557
+ def divide_prompt(prompt, max_length):
558
+ words = prompt.split()
559
+ chunks = []
560
+ current_chunk = []
561
+ current_length = 0
562
+ for word in words:
563
+ if len(word) + current_length <= max_length:
564
+ current_length += len(word) + 1 # Adding 1 to account for spaces
565
+ current_chunk.append(word)
566
+ else:
567
+ chunks.append(' '.join(current_chunk))
568
+ current_chunk = [word]
569
+ current_length = len(word)
570
+ chunks.append(' '.join(current_chunk)) # Append the final chunk
571
+ return chunks
572
+
573
+ def create_zip_of_files(files):
574
+ """
575
+ Create a zip file from a list of files.
576
+ """
577
+ zip_name = "all_files.zip"
578
+ with zipfile.ZipFile(zip_name, 'w') as zipf:
579
+ for file in files:
580
+ zipf.write(file)
581
+ return zip_name
582
+
583
+
584
+ def get_zip_download_link(zip_file):
585
+ """
586
+ Generate a link to download the zip file.
587
+ """
588
+ with open(zip_file, 'rb') as f:
589
+ data = f.read()
590
+ b64 = base64.b64encode(data).decode()
591
+ href = f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
592
+ return href
593
+
594
+
595
+ def main():
596
+
597
+ # Audio, transcribe, GPT:
598
+ filename = save_and_play_audio(audio_recorder)
599
+
600
+ if filename is not None:
601
+ try:
602
+ transcription = transcribe_audio(filename, "whisper-1")
603
+ except:
604
+ st.write(' ')
605
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
606
+ filename = None
607
+
608
+ # prompt interfaces
609
+ user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100)
610
+
611
+ # file section interface for prompts against large documents as context
612
+ collength, colupload = st.columns([2,3]) # adjust the ratio as needed
613
+ with collength:
614
+ max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
615
+ with colupload:
616
+ uploaded_file = st.file_uploader("Add a file for context:", type=["pdf", "xml", "json", "xlsx", "csv", "html", "htm", "md", "txt"])
617
+
618
+
619
+ # Document section chat
620
+
621
+ document_sections = deque()
622
+ document_responses = {}
623
+ if uploaded_file is not None:
624
+ file_content = read_file_content(uploaded_file, max_length)
625
+ document_sections.extend(divide_document(file_content, max_length))
626
+ if len(document_sections) > 0:
627
+ if st.button("πŸ‘οΈ View Upload"):
628
+ st.markdown("**Sections of the uploaded file:**")
629
+ for i, section in enumerate(list(document_sections)):
630
+ st.markdown(f"**Section {i+1}**\n{section}")
631
+ st.markdown("**Chat with the model:**")
632
+ for i, section in enumerate(list(document_sections)):
633
+ if i in document_responses:
634
+ st.markdown(f"**Section {i+1}**\n{document_responses[i]}")
635
+ else:
636
+ if st.button(f"Chat about Section {i+1}"):
637
+ st.write('Reasoning with your inputs...')
638
+ response = chat_with_model(user_prompt, section, model_choice)
639
+ document_responses[i] = response
640
+ filename = generate_filename(f"{user_prompt}_section_{i+1}", choice)
641
+ create_file(filename, user_prompt, response, should_save)
642
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
643
+
644
+ if st.button('πŸ’¬ Chat'):
645
+ st.write('Reasoning with your inputs...')
646
+
647
+ # Divide the user_prompt into smaller sections
648
+ user_prompt_sections = divide_prompt(user_prompt, max_length)
649
+ full_response = ''
650
+ for prompt_section in user_prompt_sections:
651
+ # Process each section with the model
652
+ response = chat_with_model(prompt_section, ''.join(list(document_sections)), model_choice)
653
+ full_response += response + '\n' # Combine the responses
654
+ response = full_response
655
+ filename = generate_filename(user_prompt, choice)
656
+ create_file(filename, user_prompt, response, should_save)
657
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
658
+
659
+ all_files = glob.glob("*.*")
660
+ all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 20] # exclude files with short names
661
+ all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
662
+
663
+
664
+ # Sidebar buttons Download All and Delete All
665
+ colDownloadAll, colDeleteAll = st.sidebar.columns([3,3])
666
+ with colDownloadAll:
667
+ if st.button("⬇️ Download All"):
668
+ zip_file = create_zip_of_files(all_files)
669
+ st.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True)
670
+ with colDeleteAll:
671
+ if st.button("πŸ—‘ Delete All"):
672
+ for file in all_files:
673
+ os.remove(file)
674
+ st.experimental_rerun()
675
+
676
+ # Sidebar of Files Saving History and surfacing files as context of prompts and responses
677
+ file_contents=''
678
+ next_action=''
679
+ for file in all_files:
680
+ col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1]) # adjust the ratio as needed
681
+ with col1:
682
+ if st.button("🌐", key="md_"+file): # md emoji button
683
+ with open(file, 'r') as f:
684
+ file_contents = f.read()
685
+ next_action='md'
686
+ with col2:
687
+ st.markdown(get_table_download_link(file), unsafe_allow_html=True)
688
+ with col3:
689
+ if st.button("πŸ“‚", key="open_"+file): # open emoji button
690
+ with open(file, 'r') as f:
691
+ file_contents = f.read()
692
+ next_action='open'
693
+ with col4:
694
+ if st.button("πŸ”", key="read_"+file): # search emoji button
695
+ with open(file, 'r') as f:
696
+ file_contents = f.read()
697
+ next_action='search'
698
+ with col5:
699
+ if st.button("πŸ—‘", key="delete_"+file):
700
+ os.remove(file)
701
+ st.experimental_rerun()
702
+
703
+ if len(file_contents) > 0:
704
+ if next_action=='open':
705
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
706
+ if next_action=='md':
707
+ st.markdown(file_contents)
708
+ if next_action=='search':
709
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
710
+ st.write('Reasoning with your inputs...')
711
+ response = chat_with_model(user_prompt, file_contents, model_choice)
712
+ filename = generate_filename(file_contents, choice)
713
+ create_file(filename, user_prompt, response, should_save)
714
+
715
+ st.experimental_rerun()
716
+
717
+ if __name__ == "__main__":
718
+ main()
719
+
720
+ load_dotenv()
721
+ st.write(css, unsafe_allow_html=True)
722
+
723
+ st.header("Chat with documents :books:")
724
+ user_question = st.text_input("Ask a question about your documents:")
725
+ if user_question:
726
+ process_user_input(user_question)
727
+
728
+ with st.sidebar:
729
+ st.subheader("Your documents")
730
+ docs = st.file_uploader("import documents", accept_multiple_files=True)
731
+ with st.spinner("Processing"):
732
+ raw = pdf2txt(docs)
733
+ if len(raw) > 0:
734
+ length = str(len(raw))
735
+ text_chunks = txt2chunks(raw)
736
+ vectorstore = vector_store(text_chunks)
737
+ st.session_state.conversation = get_chain(vectorstore)
738
+ st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
739
+ filename = generate_filename(raw, 'txt')
740
+ create_file(filename, raw, '', should_save)