File size: 4,778 Bytes
18c08a2 b7d0764 18c08a2 b7d0764 18c08a2 b7d0764 24d6a96 b7d0764 eedb0ad b7d0764 18c08a2 b7d0764 1cdbe99 b7d0764 18c08a2 b7d0764 24d6a96 b7d0764 18c08a2 b7d0764 18c08a2 b7d0764 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
import difflib
import json
import numpy as np
import streamlit as st
from pyserini.search.lucene import LuceneSearcher
def read_json(file_name):
with open(file_name, "r") as f:
json_data = json.load(f)
return json_data
class SearchApplication:
def __init__(self):
self.title = "Awesome ChatGPT repositories search"
self.set_page_config()
self.searcher = self.set_searcher()
st.header(self.title)
col1, col2 = st.columns(2)
with col1:
self.query = st.text_input("Search English words", value="")
with col2:
st.write("#")
self.search_button = st.button("🔎")
st.caption(
"You can search for open-source software from [900+ "
" repositories](https://github.com/taishi-i/awesome-ChatGPT-repositories)."
)
st.write("#")
candidate_words_file = "candidate_words.json"
candidate_words_json = read_json(candidate_words_file)
self.candidate_words = candidate_words_json["candidate_words"]
self.show_popular_words()
self.show_search_results()
def set_page_config(self):
st.set_page_config(
page_title=self.title,
page_icon="😎",
layout="centered",
)
def set_searcher(self):
searcher = LuceneSearcher("indexes/docs")
return searcher
def show_popular_words(self):
st.caption("Popular words")
word1, word2, word3, word4, word5, word6 = st.columns(6)
with word1:
button1 = st.button("Prompt")
if button1:
self.query = "prompt"
with word2:
button2 = st.button("Chatbot")
if button2:
self.query = "chatbot"
with word3:
button3 = st.button("Langchain")
if button3:
self.query = "langchain"
with word4:
button4 = st.button("Extension")
if button4:
self.query = "extension"
with word5:
button5 = st.button("LLMs")
if button5:
self.query = "llms"
with word6:
button6 = st.button("API")
if button6:
self.query = "api"
def show_search_results(self):
if self.query or self.search_button:
st.write("#")
search_results = self.searcher.search(self.query, k=500)
num_search_results = len(search_results)
st.write(f"A total of {num_search_results} repositories found.")
if num_search_results > 0:
json_search_results = []
for result in search_results:
json_data = json.loads(result.raw)
json_search_results.append(json_data)
for json_data in sorted(
json_search_results, key=lambda x: x["freq"], reverse=True
):
description = json_data["description"]
url = json_data["url"]
project_name = json_data["project_name"]
st.write("---")
st.subheader(f"[{project_name}]({url})")
st.write(description)
info = []
language = json_data["language"]
if language is not None and len(language) > 0:
info.append(language)
else:
info.append("Laugage: Unkwown")
license = json_data["license"]
if license is not None:
info.append(license["name"])
else:
info.append("License: Unkwown")
st.caption(" / ".join(info))
else:
if len(self.query) > 0:
scores = []
for candidate_word in self.candidate_words:
score = difflib.SequenceMatcher(
None, self.query, candidate_word
).ratio()
scores.append(score)
num_candidate_words = 6
indexes = np.argsort(scores)[::-1][:num_candidate_words]
suggestions = [self.candidate_words[i] for i in indexes]
suggestions = sorted(
set(suggestions), key=suggestions.index
)
st.caption("Suggestions")
for i, word in enumerate(suggestions, start=1):
st.write(f"{i}: {word}")
def main():
SearchApplication()
if __name__ == "__main__":
main()
|