|
import difflib |
|
import json |
|
|
|
import numpy as np |
|
import streamlit as st |
|
from pyserini.search.lucene import LuceneSearcher |
|
|
|
|
|
def read_json(file_name): |
|
with open(file_name, "r") as f: |
|
json_data = json.load(f) |
|
return json_data |
|
|
|
|
|
class SearchApplication: |
|
def __init__(self): |
|
self.title = "Awesome ChatGPT repositories search" |
|
|
|
self.set_page_config() |
|
self.searcher = self.set_searcher() |
|
|
|
st.header(self.title) |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
self.query = st.text_input("Search English words", value="") |
|
|
|
with col2: |
|
st.write("#") |
|
self.search_button = st.button("π") |
|
|
|
st.caption( |
|
"You can search for open-source software from [1250+ " |
|
" repositories](https://github.com/taishi-i/awesome-ChatGPT-repositories)." |
|
) |
|
st.write("#") |
|
|
|
candidate_words_file = "candidate_words.json" |
|
candidate_words_json = read_json(candidate_words_file) |
|
self.candidate_words = candidate_words_json["candidate_words"] |
|
|
|
self.show_popular_words() |
|
self.show_search_results() |
|
|
|
def set_page_config(self): |
|
st.set_page_config( |
|
page_title=self.title, |
|
page_icon="π", |
|
layout="centered", |
|
) |
|
|
|
def set_searcher(self): |
|
searcher = LuceneSearcher("indexes/docs") |
|
return searcher |
|
|
|
def show_popular_words(self): |
|
st.caption("Popular words") |
|
|
|
word1, word2, word3, word4, word5, word6 = st.columns(6) |
|
with word1: |
|
button1 = st.button("Prompt") |
|
if button1: |
|
self.query = "prompt" |
|
|
|
with word2: |
|
button2 = st.button("Chatbot") |
|
if button2: |
|
self.query = "chatbot" |
|
|
|
with word3: |
|
button3 = st.button("Langchain") |
|
if button3: |
|
self.query = "langchain" |
|
|
|
with word4: |
|
button4 = st.button("Extension") |
|
if button4: |
|
self.query = "extension" |
|
|
|
with word5: |
|
button5 = st.button("LLMs") |
|
if button5: |
|
self.query = "llms" |
|
|
|
with word6: |
|
button6 = st.button("API") |
|
if button6: |
|
self.query = "api" |
|
|
|
def show_search_results(self): |
|
if self.query or self.search_button: |
|
st.write("#") |
|
|
|
search_results = self.searcher.search(self.query, k=500) |
|
num_search_results = len(search_results) |
|
st.write(f"A total of {num_search_results} repositories found.") |
|
|
|
if num_search_results > 0: |
|
json_search_results = [] |
|
for result in search_results: |
|
|
|
|
|
|
|
|
|
docid = result.docid |
|
doc = self.searcher.doc(docid) |
|
print(doc) |
|
print(doc.get("contents")) |
|
|
|
json_data = doc.get("contents") |
|
json_search_results.append(json_data) |
|
|
|
for json_data in sorted( |
|
json_search_results, key=lambda x: x["freq"], reverse=True |
|
): |
|
description = json_data["description"] |
|
url = json_data["url"] |
|
project_name = json_data["project_name"] |
|
|
|
st.write("---") |
|
st.subheader(f"[{project_name}]({url})") |
|
st.write(description) |
|
|
|
info = [] |
|
language = json_data["language"] |
|
if language is not None and len(language) > 0: |
|
info.append(language) |
|
else: |
|
info.append("Laugage: Unkwown") |
|
|
|
license = json_data["license"] |
|
if license is None: |
|
info.append("License: Unkwown") |
|
else: |
|
info.append(license) |
|
|
|
st.caption(" / ".join(info)) |
|
|
|
else: |
|
if len(self.query) > 0: |
|
scores = [] |
|
for candidate_word in self.candidate_words: |
|
score = difflib.SequenceMatcher( |
|
None, self.query, candidate_word |
|
).ratio() |
|
scores.append(score) |
|
|
|
num_candidate_words = 6 |
|
|
|
indexes = np.argsort(scores)[::-1][:num_candidate_words] |
|
suggestions = [self.candidate_words[i] for i in indexes] |
|
suggestions = sorted( |
|
set(suggestions), key=suggestions.index |
|
) |
|
st.caption("Suggestions") |
|
for i, word in enumerate(suggestions, start=1): |
|
st.write(f"{i}: {word}") |
|
|
|
|
|
def main(): |
|
SearchApplication() |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|