abnerguzman commited on
Commit
84b9cf4
1 Parent(s): 669631f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -10
app.py CHANGED
@@ -12,7 +12,7 @@ from tqdm.autonotebook import tqdm
12
  from pinecone import Pinecone, ServerlessSpec
13
  pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
14
 
15
- index_name = "prorata-postman-ds-128"
16
  index = pc.Index(index_name)
17
 
18
 
@@ -75,30 +75,101 @@ def get_matches_reranked(q, k=20):
75
 
76
  return matches_colbertv2
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  import gradio as gr
80
  from io import StringIO
 
81
 
82
  def output_chunks_reranked(msg):
83
  matches_colbertv2 = get_matches_reranked(msg, k=20)
84
-
85
  _out = StringIO()
 
 
 
 
 
 
 
 
86
  for idx, match in enumerate(matches_colbertv2):
87
- print(f"{idx+1}: {match['colbertv2_score']:.2f}", end='', file=_out)
88
- print(textwrap.fill(match['metadata']['text'], initial_indent=' ', subsequent_indent=' ', width=100), file=_out)
89
- print(file=_out)
 
 
 
 
 
90
 
91
  return _out.getvalue()
92
 
93
-
94
  with gr.Blocks() as demo:
95
  msg = gr.Textbox(label='Target')
96
- results_box = gr.Textbox(label='Matches', lines=30, autoscroll=False)
 
97
 
98
  msg.submit(output_chunks_reranked, msg, results_box, queue=False)
99
-
100
- demo.queue()
101
- demo.launch(share=True)
102
 
103
 
104
  if __name__ == "__main__":
 
12
  from pinecone import Pinecone, ServerlessSpec
13
  pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
14
 
15
+ index_name = "prorata-postman-ds-128-v2"
16
  index = pc.Index(index_name)
17
 
18
 
 
75
 
76
  return matches_colbertv2
77
 
78
+ def filter_matches(matches_colbertv2, score_thr=0.0):
79
+ matches_colbertv2_f = []
80
+ url_to_chunk_l = {}
81
+
82
+ for idx, match in enumerate(matches_colbertv2):
83
+ if match['colbertv2_score'] > score_thr:
84
+ _url = match['metadata']['url']
85
+ if not _url in url_to_chunk_l:
86
+ url_to_chunk_l[_url] = []
87
+ url_to_chunk_l[_url].append(match)
88
+
89
+ matches_colbertv2_f.append(match)
90
+
91
+ return matches_colbertv2_f
92
+
93
+ style_str = """
94
+ <style>
95
+ .doc-title {
96
+ /* font-family: cursive, sans-serif; */
97
+ font-family: Optima, sans-serif;
98
+ width: 100%;
99
+ display: inline-block;
100
+ font-size: 2em;
101
+ font-weight: bolder;
102
+ padding-top: 20px;
103
+ /* font-style: italic; */
104
+ }
105
+ .doc-url {
106
+ /* font-family: cursive, sans-serif; */
107
+ font-size: 1em;
108
+ padding-left: 40px;
109
+ padding-bottom: 10px;
110
+ /* font-weight: bolder; */
111
+ /* font-style: italic; */
112
+ }
113
+ .doc-text {
114
+ /* font-family: cursive, sans-serif; */
115
+ font-family: Optima, sans-serif;
116
+ font-size: 1.5em;
117
+ padding-left: 40px;
118
+ padding-bottom: 20px;
119
+ /* font-weight: bolder; */
120
+ /* font-style: italic; */
121
+ }
122
+ .doc-title > img {
123
+ width: 22px;
124
+ height: 22px;
125
+ border-radius: 50%;
126
+ overflow: hidden;
127
+ background-color: transparent;
128
+ display: inline-block;
129
+ vertical-align: middle;
130
+ }
131
+ .doc-title > score {
132
+ font-family: Optima, sans-serif;
133
+ font-weight: normal;
134
+ float: right;
135
+ }
136
+ </style>
137
+ """
138
 
139
  import gradio as gr
140
  from io import StringIO
141
+ from urllib.parse import urlparse
142
 
143
  def output_chunks_reranked(msg):
144
  matches_colbertv2 = get_matches_reranked(msg, k=20)
145
+ matches_colbertv2 = filter_matches(matches_colbertv2, score_thr=0.55)
146
  _out = StringIO()
147
+
148
+ if not matches_colbertv2:
149
+ print(style_str, file=_out)
150
+ print(f"<div>", file=_out)
151
+ print(f"<div class=\"doc-title\">No sources relevant to this target were found.</div>", file=_out)
152
+ print(f"</div>", file=_out)
153
+ return _out.getvalue()
154
+
155
  for idx, match in enumerate(matches_colbertv2):
156
+ print(style_str, file=_out)
157
+ print(f"<div>", file=_out)
158
+ favicon = f"<img src=\"https://www.google.com/s2/favicons?sz=128&amp;domain={urlparse(match['metadata']['url']).netloc}\"/>"
159
+ print(f"<div class=\"doc-title\">{favicon}&nbsp&nbsp;{match['metadata']['title']}<score>{match['colbertv2_score']:.2f}</score></div>", file=_out)
160
+ print(f"<div class=\"doc-url\"><a href=\"{match['metadata']['url']}\" target=\"_blank\">{match['metadata']['url']}</a></div>", file=_out)
161
+ # print(f" (Score: {match['colbertv2_score']:.2f})", file=_out)
162
+ print(f"<div class=\"doc-text\">{match['metadata']['text']}</div>", file=_out)
163
+ print(f"</div>", file=_out)
164
 
165
  return _out.getvalue()
166
 
 
167
  with gr.Blocks() as demo:
168
  msg = gr.Textbox(label='Target')
169
+ # results_box = gr.Textbox(label='Matches', lines=30, autoscroll=False)
170
+ results_box = gr.HTML(label='Matches')
171
 
172
  msg.submit(output_chunks_reranked, msg, results_box, queue=False)
 
 
 
173
 
174
 
175
  if __name__ == "__main__":