whitead commited on
Commit
6c0ef19
1 Parent(s): 5371f78

Fixed a bunch of stuff

Browse files
Files changed (2) hide show
  1. app.py +38 -27
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  import paperqa
3
  import pickle
 
4
  from pathlib import Path
5
  import requests
6
  import zipfile
@@ -9,27 +10,35 @@ import tempfile
9
  import os
10
 
11
 
12
- docs = None
13
 
 
 
 
 
14
 
15
- def request_pathname(files):
 
16
  if files is None:
17
  return [[]]
18
- return [[file.name, file.name.split('/')[-1], None] for file in files], [[len(files), 0]]
 
 
 
 
 
19
 
20
 
21
  def validate_dataset(dataset, openapi):
22
- global docs
23
- docs = None # clear it out if dataset is modified
24
  docs_ready = dataset.iloc[-1, 0] != ""
25
  if docs_ready and type(openapi) is str and len(openapi) > 0:
26
  return "✨Ready✨"
27
  elif docs_ready:
28
- return "⚠️Waiting for key..."
29
  elif type(openapi) is str and len(openapi) > 0:
30
- return "⚠️Waiting for documents..."
31
  else:
32
- return "⚠️Waiting for documents and key..."
33
 
34
 
35
  def make_stats(docs):
@@ -41,18 +50,17 @@ def do_ask(question, button, openapi, dataset, length, do_marg, k, max_sources,
41
  passages = ""
42
  docs_ready = dataset.iloc[-1, 0] != ""
43
  if button == "✨Ready✨" and type(openapi) is str and len(openapi) > 0 and docs_ready:
44
- if docs is None: # don't want to rebuild index if it's already built
45
- import os
46
- os.environ['OPENAI_API_KEY'] = openapi.strip()
47
  docs = paperqa.Docs()
48
- # dataset is pandas dataframe
49
- for _, row in dataset.iterrows():
50
- try:
51
- docs.add(row['filepath'], row['citation string'],
52
- key=row['key'], disable_check=True)
53
- yield "", "", "", docs, make_stats(docs)
54
- except Exception as e:
55
- pass
56
  else:
57
  yield "", "", "", docs, [[0, 0]]
58
  #progress(0, "Building Index...")
@@ -71,10 +79,9 @@ def do_ask(question, button, openapi, dataset, length, do_marg, k, max_sources,
71
  yield result.formatted_answer, result.context, passages, docs, make_stats(docs)
72
 
73
 
74
- def download_repo(gh_repo, pbar=gr.Progress()):
75
  # download zipped version of repo
76
  r = requests.get(f'https://api.github.com/repos/{gh_repo}/zipball')
77
- files = []
78
  if r.status_code == 200:
79
  pbar(1, 'Downloaded')
80
 
@@ -101,18 +108,22 @@ def download_repo(gh_repo, pbar=gr.Progress()):
101
  rel_path = '/'.join(f.split('/')[1:])
102
  key = os.path.basename(f)
103
  citation = f'[{rel_path}](https://github.com/{gh_repo}/tree/main/{rel_path})'
104
- files.append([path, citation, key])
105
- yield files, [[len(files), 0]]
 
 
106
  pbar(int((i+1)/len(z.namelist()) * 99),
107
  f'Added {f}')
108
  pbar(100, 'Done')
109
  else:
110
  raise ValueError('Unknown Github Repo')
 
111
 
112
 
113
- with gr.Blocks() as demo:
114
 
115
  docs = gr.State(None)
 
116
  openai_api_key = gr.State('')
117
 
118
  gr.Markdown(f"""
@@ -147,7 +158,7 @@ with gr.Blocks() as demo:
147
  headers=["filepath", "citation string", "key"],
148
  datatype=["str", "str", "str"],
149
  col_count=(3, "fixed"),
150
- interactive=True,
151
  label="Documents and Citations",
152
  overflow_row_behaviour='paginate',
153
  max_rows=5
@@ -165,9 +176,9 @@ with gr.Blocks() as demo:
165
  dataset.change(validate_dataset, inputs=[
166
  dataset, openai_api_key], outputs=[buildb])
167
  uploaded_files.change(request_pathname, inputs=[
168
- uploaded_files], outputs=[dataset, stats])
169
  download.click(fn=download_repo, inputs=[
170
- gh_repo], outputs=[dataset, stats])
171
  query = gr.Textbox(
172
  placeholder="Enter your question here...", label="Question")
173
  with gr.Row():
 
1
  import gradio as gr
2
  import paperqa
3
  import pickle
4
+ import pandas as pd
5
  from pathlib import Path
6
  import requests
7
  import zipfile
 
10
  import os
11
 
12
 
13
+ css_style = """
14
 
15
+ .gradio-container {
16
+ font-family: "IBM Plex Mono";
17
+ }
18
+ """
19
 
20
+
21
+ def request_pathname(files, data, openai_api_key):
22
  if files is None:
23
  return [[]]
24
+ for file in files:
25
+ # make sure we're not duplicating things in the dataset
26
+ if file.name in [x[0] for x in data]:
27
+ continue
28
+ data.append([file.name, None, None])
29
+ return [[len(data), 0]], data, data, validate_dataset(pd.DataFrame(data), openai_api_key)
30
 
31
 
32
  def validate_dataset(dataset, openapi):
 
 
33
  docs_ready = dataset.iloc[-1, 0] != ""
34
  if docs_ready and type(openapi) is str and len(openapi) > 0:
35
  return "✨Ready✨"
36
  elif docs_ready:
37
+ return "⚠️Waiting for key⚠️"
38
  elif type(openapi) is str and len(openapi) > 0:
39
+ return "⚠️Waiting for documents⚠️"
40
  else:
41
+ return "⚠️Waiting for documents and key⚠️"
42
 
43
 
44
  def make_stats(docs):
 
50
  passages = ""
51
  docs_ready = dataset.iloc[-1, 0] != ""
52
  if button == "✨Ready✨" and type(openapi) is str and len(openapi) > 0 and docs_ready:
53
+ os.environ['OPENAI_API_KEY'] = openapi.strip()
54
+ if docs is None:
 
55
  docs = paperqa.Docs()
56
+ # dataset is pandas dataframe
57
+ for _, row in dataset.iterrows():
58
+ try:
59
+ docs.add(row['filepath'], row['citation string'],
60
+ key=row['key'], disable_check=True)
61
+ yield "", "", "", docs, make_stats(docs)
62
+ except Exception as e:
63
+ pass
64
  else:
65
  yield "", "", "", docs, [[0, 0]]
66
  #progress(0, "Building Index...")
 
79
  yield result.formatted_answer, result.context, passages, docs, make_stats(docs)
80
 
81
 
82
+ def download_repo(gh_repo, data, openai_api_key, pbar=gr.Progress()):
83
  # download zipped version of repo
84
  r = requests.get(f'https://api.github.com/repos/{gh_repo}/zipball')
 
85
  if r.status_code == 200:
86
  pbar(1, 'Downloaded')
87
 
 
108
  rel_path = '/'.join(f.split('/')[1:])
109
  key = os.path.basename(f)
110
  citation = f'[{rel_path}](https://github.com/{gh_repo}/tree/main/{rel_path})'
111
+ if path in [x[0] for x in data]:
112
+ continue
113
+ data.append([path, citation, key])
114
+ yield [[len(data), 0]], data, data, validate_dataset(pd.DataFrame(data), openai_api_key)
115
  pbar(int((i+1)/len(z.namelist()) * 99),
116
  f'Added {f}')
117
  pbar(100, 'Done')
118
  else:
119
  raise ValueError('Unknown Github Repo')
120
+ return data
121
 
122
 
123
+ with gr.Blocks(css=css_style) as demo:
124
 
125
  docs = gr.State(None)
126
+ data = gr.State([])
127
  openai_api_key = gr.State('')
128
 
129
  gr.Markdown(f"""
 
158
  headers=["filepath", "citation string", "key"],
159
  datatype=["str", "str", "str"],
160
  col_count=(3, "fixed"),
161
+ interactive=False,
162
  label="Documents and Citations",
163
  overflow_row_behaviour='paginate',
164
  max_rows=5
 
176
  dataset.change(validate_dataset, inputs=[
177
  dataset, openai_api_key], outputs=[buildb])
178
  uploaded_files.change(request_pathname, inputs=[
179
+ uploaded_files, data, openai_api_key], outputs=[stats, data, dataset, buildb])
180
  download.click(fn=download_repo, inputs=[
181
+ gh_repo, data, openai_api_key], outputs=[stats, data, dataset, buildb])
182
  query = gr.Textbox(
183
  placeholder="Enter your question here...", label="Question")
184
  with gr.Row():
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- paper-qa>=0.0.18
2
  gradio
3
  requests
4
  transformers
 
1
+ paper-qa>=0.0.20
2
  gradio
3
  requests
4
  transformers