Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
•
7b432a6
1
Parent(s):
72faa49
update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
11 |
|
12 |
#Persistent storage is mounted to /data
|
13 |
DB_URL = "./data/lancedb"
|
14 |
-
|
15 |
|
16 |
# define schema for table with embedding api
|
17 |
|
@@ -28,7 +28,7 @@ def lanceDBConnection(df):
|
|
28 |
"""
|
29 |
db = lancedb.connect(DB_URL)
|
30 |
table = db.create_table(
|
31 |
-
|
32 |
schema=TextModel,
|
33 |
mode="overwrite",
|
34 |
)
|
@@ -48,14 +48,14 @@ def get_pdf(file):
|
|
48 |
|
49 |
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1500,chunk_overlap=50)
|
50 |
docs_sp=text_splitter.split_documents(documents)
|
51 |
-
texts = [chunk.page_content for chunk in docs_sp]
|
52 |
df = pd.DataFrame({"text": texts})
|
53 |
table = lanceDBConnection(df)
|
54 |
return f"PDF uploaded successfully. Total number of documents: {len(df)}"
|
55 |
|
56 |
def get_nearest_neighbours(query):
|
57 |
db = lancedb.connect(DB_URL)
|
58 |
-
table = db.open_table(
|
59 |
result = table.search(query).limit(3).to_list()
|
60 |
context = [r["text"] for r in result]
|
61 |
return context
|
@@ -69,11 +69,11 @@ pdf_interface=gr.Interface(
|
|
69 |
question_interface=gr.Interface(
|
70 |
fn=get_nearest_neighbours,
|
71 |
inputs=[gr.Textbox(label="Enter your question")],
|
72 |
-
outputs=["
|
73 |
)
|
74 |
|
75 |
demo=gr.TabbedInterface(
|
76 |
interface_list=[pdf_interface, question_interface],
|
77 |
tab_names=["Upload Pdfs","Get relevant chunks"],
|
78 |
-
title="
|
79 |
demo.launch(share=True)
|
|
|
11 |
|
12 |
#Persistent storage is mounted to /data
|
13 |
DB_URL = "./data/lancedb"
|
14 |
+
TABLE_NAME = "pdf_table"
|
15 |
|
16 |
# define schema for table with embedding api
|
17 |
|
|
|
28 |
"""
|
29 |
db = lancedb.connect(DB_URL)
|
30 |
table = db.create_table(
|
31 |
+
TABLE_NAME,
|
32 |
schema=TextModel,
|
33 |
mode="overwrite",
|
34 |
)
|
|
|
48 |
|
49 |
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1500,chunk_overlap=50)
|
50 |
docs_sp=text_splitter.split_documents(documents)
|
51 |
+
texts = [chunk.page_content for chunk in docs_sp]
|
52 |
df = pd.DataFrame({"text": texts})
|
53 |
table = lanceDBConnection(df)
|
54 |
return f"PDF uploaded successfully. Total number of documents: {len(df)}"
|
55 |
|
56 |
def get_nearest_neighbours(query):
|
57 |
db = lancedb.connect(DB_URL)
|
58 |
+
table = db.open_table(TABLE_NAME)
|
59 |
result = table.search(query).limit(3).to_list()
|
60 |
context = [r["text"] for r in result]
|
61 |
return context
|
|
|
69 |
question_interface=gr.Interface(
|
70 |
fn=get_nearest_neighbours,
|
71 |
inputs=[gr.Textbox(label="Enter your question")],
|
72 |
+
outputs=[gr.Textbox(label="Relevant chunk")]*3
|
73 |
)
|
74 |
|
75 |
demo=gr.TabbedInterface(
|
76 |
interface_list=[pdf_interface, question_interface],
|
77 |
tab_names=["Upload Pdfs","Get relevant chunks"],
|
78 |
+
title="Save PDF chunks into LanceDB on persitent storage")
|
79 |
demo.launch(share=True)
|