florentgbelidji HF staff commited on
Commit
7b432a6
1 Parent(s): 72faa49

update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -11,7 +11,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
11
 
12
  #Persistent storage is mounted to /data
13
  DB_URL = "./data/lancedb"
14
-
15
 
16
  # define schema for table with embedding api
17
 
@@ -28,7 +28,7 @@ def lanceDBConnection(df):
28
  """
29
  db = lancedb.connect(DB_URL)
30
  table = db.create_table(
31
- "scratch",
32
  schema=TextModel,
33
  mode="overwrite",
34
  )
@@ -48,14 +48,14 @@ def get_pdf(file):
48
 
49
  text_splitter=RecursiveCharacterTextSplitter(chunk_size=1500,chunk_overlap=50)
50
  docs_sp=text_splitter.split_documents(documents)
51
- texts = [chunk.page_content for chunk in docs_sp]
52
  df = pd.DataFrame({"text": texts})
53
  table = lanceDBConnection(df)
54
  return f"PDF uploaded successfully. Total number of documents: {len(df)}"
55
 
56
  def get_nearest_neighbours(query):
57
  db = lancedb.connect(DB_URL)
58
- table = db.open_table("scratch")
59
  result = table.search(query).limit(3).to_list()
60
  context = [r["text"] for r in result]
61
  return context
@@ -69,11 +69,11 @@ pdf_interface=gr.Interface(
69
  question_interface=gr.Interface(
70
  fn=get_nearest_neighbours,
71
  inputs=[gr.Textbox(label="Enter your question")],
72
- outputs=["text"]*3
73
  )
74
 
75
  demo=gr.TabbedInterface(
76
  interface_list=[pdf_interface, question_interface],
77
  tab_names=["Upload Pdfs","Get relevant chunks"],
78
- title="Your study Partner")
79
  demo.launch(share=True)
 
11
 
12
  #Persistent storage is mounted to /data
13
  DB_URL = "./data/lancedb"
14
+ TABLE_NAME = "pdf_table"
15
 
16
  # define schema for table with embedding api
17
 
 
28
  """
29
  db = lancedb.connect(DB_URL)
30
  table = db.create_table(
31
+ TABLE_NAME,
32
  schema=TextModel,
33
  mode="overwrite",
34
  )
 
48
 
49
  text_splitter=RecursiveCharacterTextSplitter(chunk_size=1500,chunk_overlap=50)
50
  docs_sp=text_splitter.split_documents(documents)
51
+ texts = [chunk.page_content for chunk in docs_sp]
52
  df = pd.DataFrame({"text": texts})
53
  table = lanceDBConnection(df)
54
  return f"PDF uploaded successfully. Total number of documents: {len(df)}"
55
 
56
  def get_nearest_neighbours(query):
57
  db = lancedb.connect(DB_URL)
58
+ table = db.open_table(TABLE_NAME)
59
  result = table.search(query).limit(3).to_list()
60
  context = [r["text"] for r in result]
61
  return context
 
69
  question_interface=gr.Interface(
70
  fn=get_nearest_neighbours,
71
  inputs=[gr.Textbox(label="Enter your question")],
72
+ outputs=[gr.Textbox(label="Relevant chunk")]*3
73
  )
74
 
75
  demo=gr.TabbedInterface(
76
  interface_list=[pdf_interface, question_interface],
77
  tab_names=["Upload Pdfs","Get relevant chunks"],
78
+ title="Save PDF chunks into LanceDB on persitent storage")
79
  demo.launch(share=True)