taishi-i's picture
debug for app.py
2b88e44
raw
history blame
5.25 kB
import difflib
import json
import numpy as np
import streamlit as st
from pyserini.search.lucene import LuceneSearcher
def read_json(file_name):
with open(file_name, "r") as f:
json_data = json.load(f)
return json_data
class SearchApplication:
def __init__(self):
self.title = "Awesome ChatGPT repositories search"
self.set_page_config()
self.searcher = self.set_searcher()
st.header(self.title)
col1, col2 = st.columns(2)
with col1:
self.query = st.text_input("Search English words", value="")
with col2:
st.write("#")
self.search_button = st.button("πŸ”Ž")
st.caption(
"You can search for open-source software from [1250+ "
" repositories](https://github.com/taishi-i/awesome-ChatGPT-repositories)."
)
st.write("#")
candidate_words_file = "candidate_words.json"
candidate_words_json = read_json(candidate_words_file)
self.candidate_words = candidate_words_json["candidate_words"]
self.show_popular_words()
self.show_search_results()
def set_page_config(self):
st.set_page_config(
page_title=self.title,
page_icon="😎",
layout="centered",
)
def set_searcher(self):
searcher = LuceneSearcher("indexes/docs")
return searcher
def show_popular_words(self):
st.caption("Popular words")
word1, word2, word3, word4, word5, word6 = st.columns(6)
with word1:
button1 = st.button("Prompt")
if button1:
self.query = "prompt"
with word2:
button2 = st.button("Chatbot")
if button2:
self.query = "chatbot"
with word3:
button3 = st.button("Langchain")
if button3:
self.query = "langchain"
with word4:
button4 = st.button("Extension")
if button4:
self.query = "extension"
with word5:
button5 = st.button("LLMs")
if button5:
self.query = "llms"
with word6:
button6 = st.button("API")
if button6:
self.query = "api"
def show_search_results(self):
if self.query or self.search_button:
st.write("#")
search_results = self.searcher.search(self.query, k=500)
num_search_results = len(search_results)
st.write(f"A total of {num_search_results} repositories found.")
if num_search_results > 0:
json_search_results = []
for result in search_results:
# print(result.lucene_document.getValues())
# print(result.lucene_document())
# print(result.lucene_document.toString())
# json_data = json.loads(result.raw)
docid = result.docid
doc = self.searcher.doc(docid)
print(doc)
print(doc.get("contents"))
# json_data = json.loads(result.lucene_document.toString())
json_data = doc.get("contents")
json_search_results.append(json_data)
for json_data in sorted(
json_search_results, key=lambda x: x["freq"], reverse=True
):
description = json_data["description"]
url = json_data["url"]
project_name = json_data["project_name"]
st.write("---")
st.subheader(f"[{project_name}]({url})")
st.write(description)
info = []
language = json_data["language"]
if language is not None and len(language) > 0:
info.append(language)
else:
info.append("Laugage: Unkwown")
license = json_data["license"]
if license is None:
info.append("License: Unkwown")
else:
info.append(license)
st.caption(" / ".join(info))
else:
if len(self.query) > 0:
scores = []
for candidate_word in self.candidate_words:
score = difflib.SequenceMatcher(
None, self.query, candidate_word
).ratio()
scores.append(score)
num_candidate_words = 6
indexes = np.argsort(scores)[::-1][:num_candidate_words]
suggestions = [self.candidate_words[i] for i in indexes]
suggestions = sorted(
set(suggestions), key=suggestions.index
)
st.caption("Suggestions")
for i, word in enumerate(suggestions, start=1):
st.write(f"{i}: {word}")
def main():
SearchApplication()
if __name__ == "__main__":
main()