Spaces:

taishi-i
/

awesome-ChatGPT-repositories-search

Running

App Files Files Community

awesome-ChatGPT-repositories-search / app.py

taishi-i

debug for app.py

2b88e44 9 months ago

raw

history blame

5.25 kB

	import difflib
	import json

	import numpy as np
	import streamlit as st
	from pyserini.search.lucene import LuceneSearcher


	def read_json(file_name):
	with open(file_name, "r") as f:
	json_data = json.load(f)
	return json_data


	class SearchApplication:
	def __init__(self):
	self.title = "Awesome ChatGPT repositories search"

	self.set_page_config()
	self.searcher = self.set_searcher()

	st.header(self.title)
	col1, col2 = st.columns(2)
	with col1:
	self.query = st.text_input("Search English words", value="")

	with col2:
	st.write("#")
	self.search_button = st.button("🔎")

	st.caption(
	"You can search for open-source software from [1250+ "
	" repositories](https://github.com/taishi-i/awesome-ChatGPT-repositories)."
	)
	st.write("#")

	candidate_words_file = "candidate_words.json"
	candidate_words_json = read_json(candidate_words_file)
	self.candidate_words = candidate_words_json["candidate_words"]

	self.show_popular_words()
	self.show_search_results()

	def set_page_config(self):
	st.set_page_config(
	page_title=self.title,
	page_icon="😎",
	layout="centered",
	)

	def set_searcher(self):
	searcher = LuceneSearcher("indexes/docs")
	return searcher

	def show_popular_words(self):
	st.caption("Popular words")

	word1, word2, word3, word4, word5, word6 = st.columns(6)
	with word1:
	button1 = st.button("Prompt")
	if button1:
	self.query = "prompt"

	with word2:
	button2 = st.button("Chatbot")
	if button2:
	self.query = "chatbot"

	with word3:
	button3 = st.button("Langchain")
	if button3:
	self.query = "langchain"

	with word4:
	button4 = st.button("Extension")
	if button4:
	self.query = "extension"

	with word5:
	button5 = st.button("LLMs")
	if button5:
	self.query = "llms"

	with word6:
	button6 = st.button("API")
	if button6:
	self.query = "api"

	def show_search_results(self):
	if self.query or self.search_button:
	st.write("#")

	search_results = self.searcher.search(self.query, k=500)
	num_search_results = len(search_results)
	st.write(f"A total of {num_search_results} repositories found.")

	if num_search_results > 0:
	json_search_results = []
	for result in search_results:
	# print(result.lucene_document.getValues())
	# print(result.lucene_document())
	# print(result.lucene_document.toString())
	# json_data = json.loads(result.raw)
	docid = result.docid
	doc = self.searcher.doc(docid)
	print(doc)
	print(doc.get("contents"))
	# json_data = json.loads(result.lucene_document.toString())
	json_data = doc.get("contents")
	json_search_results.append(json_data)

	for json_data in sorted(
	json_search_results, key=lambda x: x["freq"], reverse=True
	):
	description = json_data["description"]
	url = json_data["url"]
	project_name = json_data["project_name"]

	st.write("---")
	st.subheader(f"[{project_name}]({url})")
	st.write(description)

	info = []
	language = json_data["language"]
	if language is not None and len(language) > 0:
	info.append(language)
	else:
	info.append("Laugage: Unkwown")

	license = json_data["license"]
	if license is None:
	info.append("License: Unkwown")
	else:
	info.append(license)

	st.caption(" / ".join(info))

	else:
	if len(self.query) > 0:
	scores = []
	for candidate_word in self.candidate_words:
	score = difflib.SequenceMatcher(
	None, self.query, candidate_word
	).ratio()
	scores.append(score)

	num_candidate_words = 6

	indexes = np.argsort(scores)[::-1][:num_candidate_words]
	suggestions = [self.candidate_words[i] for i in indexes]
	suggestions = sorted(
	set(suggestions), key=suggestions.index
	)
	st.caption("Suggestions")
	for i, word in enumerate(suggestions, start=1):
	st.write(f"{i}: {word}")


	def main():
	SearchApplication()


	if __name__ == "__main__":
	main()