jeremierostan
commited on
Commit
•
5a8ed59
1
Parent(s):
cea34fb
Update app.py
Browse files
app.py
CHANGED
@@ -2,51 +2,80 @@ import os
|
|
2 |
import gradio as gr
|
3 |
from anthropic import Anthropic
|
4 |
from pypdf import PdfReader
|
|
|
|
|
5 |
|
6 |
-
# Set up
|
|
|
|
|
|
|
|
|
7 |
username = os.getenv('username')
|
8 |
password = os.getenv('password')
|
9 |
|
10 |
# Add the path to your desired knowledge base
|
11 |
reference_document = "Rosenshine+Principles+red.pdf"
|
12 |
reader = PdfReader(reference_document)
|
13 |
-
|
|
|
14 |
|
15 |
-
#
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
def chat_with_assistant(message, history):
|
20 |
-
|
|
|
|
|
21 |
|
|
|
22 |
ai_message = f"""You are an AI assistant answering questions based on a reference document.
|
23 |
-
You provide short, clear answers
|
24 |
-
Use the following
|
25 |
-
|
26 |
-
{text}
|
27 |
-
|
28 |
-
Previous conversation history:
|
29 |
-
{history_str}
|
30 |
"""
|
31 |
|
32 |
-
#
|
33 |
-
|
34 |
instructions = """
|
35 |
|
36 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
|
39 |
-
|
|
|
|
|
40 |
client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
|
41 |
|
|
|
42 |
response = client.messages.create(
|
43 |
-
model="claude-3-
|
44 |
-
# model="claude-3-haiku-20240307",
|
45 |
max_tokens=500,
|
46 |
-
|
47 |
-
messages=[
|
48 |
-
{"role": "user", "content": message}
|
49 |
-
]
|
50 |
)
|
51 |
|
52 |
return response.content[0].text.strip()
|
|
|
2 |
import gradio as gr
|
3 |
from anthropic import Anthropic
|
4 |
from pypdf import PdfReader
|
5 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
6 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
7 |
|
8 |
+
# Set up your Anthropic API key in HF secrets
|
9 |
+
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')
|
10 |
+
os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_API_KEY
|
11 |
+
|
12 |
+
# Set up username and password in HF secrets
|
13 |
username = os.getenv('username')
|
14 |
password = os.getenv('password')
|
15 |
|
16 |
# Add the path to your desired knowledge base
|
17 |
reference_document = "Rosenshine+Principles+red.pdf"
|
18 |
reader = PdfReader(reference_document)
|
19 |
+
full_text = ''.join(page.extract_text() for page in reader.pages)
|
20 |
+
text_chunks = chunk_text(full_text)
|
21 |
|
22 |
+
# Function to chunk the document
|
23 |
+
def chunk_text(text, chunk_size=1000, overlap=100):
|
24 |
+
chunks = []
|
25 |
+
start = 0
|
26 |
+
while start < len(text):
|
27 |
+
end = start + chunk_size
|
28 |
+
chunk = text[start:end]
|
29 |
+
chunks.append(chunk)
|
30 |
+
start = end - overlap
|
31 |
+
return chunks
|
32 |
+
|
33 |
+
# Function to find the most relevant chunks
|
34 |
+
def get_relevant_chunks(query, chunks, top_n=3):
|
35 |
+
vectorizer = TfidfVectorizer()
|
36 |
+
tfidf_matrix = vectorizer.fit_transform(chunks + [query])
|
37 |
+
cosine_similarities = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1]).flatten()
|
38 |
+
relevant_indices = cosine_similarities.argsort()[-top_n:][::-1]
|
39 |
+
return [chunks[i] for i in relevant_indices]
|
40 |
|
41 |
def chat_with_assistant(message, history):
|
42 |
+
# Find relevant chunks based on the user message
|
43 |
+
relevant_chunks = get_relevant_chunks(message, text_chunks)
|
44 |
+
context = "\n".join(relevant_chunks)
|
45 |
|
46 |
+
# Prepare the system message
|
47 |
ai_message = f"""You are an AI assistant answering questions based on a reference document.
|
48 |
+
You provide short, clear answers in simple language.
|
49 |
+
Use the following as context for all of your answers:
|
50 |
+
{context}
|
|
|
|
|
|
|
|
|
51 |
"""
|
52 |
|
53 |
+
# Customize instructions as needed
|
|
|
54 |
instructions = """
|
55 |
|
56 |
+
"""
|
57 |
+
system message = f"{ai_message} {instructions}"
|
58 |
+
|
59 |
+
# Prepare the message array
|
60 |
+
messages = [{"role": "system", "content": system_message}]
|
61 |
+
|
62 |
+
# Add conversation history
|
63 |
+
for human_msg, ai_msg in history:
|
64 |
+
messages.append({"role": "user", "content": human_msg})
|
65 |
+
messages.append({"role": "assistant", "content": ai_msg})
|
66 |
|
67 |
+
# Add the current user message
|
68 |
+
messages.append({"role": "user", "content": message})
|
69 |
+
|
70 |
+
# Create Anthropic client
|
71 |
client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
|
72 |
|
73 |
+
# Make the API call
|
74 |
response = client.messages.create(
|
75 |
+
model="claude-3-sonnet-20240307",
|
76 |
+
# model ="claude-3-haiku-20240307",
|
77 |
max_tokens=500,
|
78 |
+
messages=messages
|
|
|
|
|
|
|
79 |
)
|
80 |
|
81 |
return response.content[0].text.strip()
|