taishi-i's picture
update index files to search English descriptions
eedb0ad
raw
history blame
4.78 kB
import difflib
import json
import numpy as np
import streamlit as st
from pyserini.search.lucene import LuceneSearcher
def read_json(file_name):
with open(file_name, "r") as f:
json_data = json.load(f)
return json_data
class SearchApplication:
def __init__(self):
self.title = "Awesome ChatGPT repositories search"
self.set_page_config()
self.searcher = self.set_searcher()
st.header(self.title)
col1, col2 = st.columns(2)
with col1:
self.query = st.text_input("Search English words", value="")
with col2:
st.write("#")
self.search_button = st.button("πŸ”Ž")
st.caption(
"You can search for open-source software from [900+ "
" repositories](https://github.com/taishi-i/awesome-ChatGPT-repositories)."
)
st.write("#")
candidate_words_file = "candidate_words.json"
candidate_words_json = read_json(candidate_words_file)
self.candidate_words = candidate_words_json["candidate_words"]
self.show_popular_words()
self.show_search_results()
def set_page_config(self):
st.set_page_config(
page_title=self.title,
page_icon="😎",
layout="centered",
)
def set_searcher(self):
searcher = LuceneSearcher("indexes/docs")
return searcher
def show_popular_words(self):
st.caption("Popular words")
word1, word2, word3, word4, word5, word6 = st.columns(6)
with word1:
button1 = st.button("Prompt")
if button1:
self.query = "prompt"
with word2:
button2 = st.button("Chatbot")
if button2:
self.query = "chatbot"
with word3:
button3 = st.button("Langchain")
if button3:
self.query = "langchain"
with word4:
button4 = st.button("Extension")
if button4:
self.query = "extension"
with word5:
button5 = st.button("LLMs")
if button5:
self.query = "llms"
with word6:
button6 = st.button("API")
if button6:
self.query = "api"
def show_search_results(self):
if self.query or self.search_button:
st.write("#")
search_results = self.searcher.search(self.query, k=500)
num_search_results = len(search_results)
st.write(f"A total of {num_search_results} repositories found.")
if num_search_results > 0:
json_search_results = []
for result in search_results:
json_data = json.loads(result.raw)
json_search_results.append(json_data)
for json_data in sorted(
json_search_results, key=lambda x: x["freq"], reverse=True
):
description = json_data["description"]
url = json_data["url"]
project_name = json_data["project_name"]
st.write("---")
st.subheader(f"[{project_name}]({url})")
st.write(description)
info = []
language = json_data["language"]
if language is not None and len(language) > 0:
info.append(language)
else:
info.append("Laugage: Unkwown")
license = json_data["license"]
if license is not None:
info.append(license["name"])
else:
info.append("License: Unkwown")
st.caption(" / ".join(info))
else:
if len(self.query) > 0:
scores = []
for candidate_word in self.candidate_words:
score = difflib.SequenceMatcher(
None, self.query, candidate_word
).ratio()
scores.append(score)
num_candidate_words = 6
indexes = np.argsort(scores)[::-1][:num_candidate_words]
suggestions = [self.candidate_words[i] for i in indexes]
suggestions = sorted(
set(suggestions), key=suggestions.index
)
st.caption("Suggestions")
for i, word in enumerate(suggestions, start=1):
st.write(f"{i}: {word}")
def main():
SearchApplication()
if __name__ == "__main__":
main()