Update app.py
Browse files
app.py
CHANGED
@@ -1,81 +1,96 @@
|
|
|
|
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
from wordcloud import WordCloud
|
3 |
import nltk
|
4 |
from collections import Counter
|
5 |
-
from
|
6 |
-
from nltk.
|
7 |
import gradio as gr
|
|
|
8 |
|
9 |
# Download necessary NLTK data
|
10 |
nltk.download('punkt')
|
11 |
nltk.download('averaged_perceptron_tagger')
|
12 |
nltk.download('stopwords')
|
|
|
13 |
|
14 |
-
# Set up the translator
|
15 |
-
translator = Translator(to_lang="ko")
|
16 |
stop_words = set(stopwords.words('english'))
|
|
|
17 |
|
18 |
-
# Define example sentences and
|
19 |
word_data_examples = {
|
20 |
-
"village": ("The village was quiet at night.", "hamlet, community"),
|
21 |
-
"adventure": ("They went on an exciting adventure in the forest.", "expedition, quest"),
|
22 |
-
"map": ("We used a map to find the hidden treasure.", "chart, atlas"),
|
23 |
-
"cave": ("They explored a dark cave in the mountains.", "cavern, grotto"),
|
24 |
-
"among": ("She found her book among the pile of papers.", "amidst, between"),
|
25 |
-
"mountains": ("The mountains were covered with snow in winter.", "peaks, ranges"),
|
26 |
-
"children": ("The children played games in the park.", "kids, youngsters"),
|
27 |
-
"known": ("He was known for his kindness and bravery.", "recognized, famous"),
|
28 |
-
"hidden": ("They found a hidden door behind the bookshelf.", "concealed, secret"),
|
29 |
-
"local": ("The local market was full of fresh produce.", "regional, native"),
|
30 |
-
"discovery": ("The discovery of the old map excited everyone.", "finding, revelation"),
|
31 |
-
"eagle": ("An eagle soared high above the valley.", "raptor, bird of prey"),
|
32 |
-
"villagers": ("The villagers gathered in the square for the festival.", "residents, townsfolk"),
|
33 |
-
"legend": ("The legend of the lost city intrigued the adventurers.", "myth, lore"),
|
34 |
-
"tales": ("Grandma told us tales of her childhood.", "stories, narratives"),
|
35 |
-
"daring": ("His daring escape from the cave was legendary.", "bold, audacious"),
|
36 |
-
"spirit": ("The spirit of adventure was alive in their hearts.", "soul, essence"),
|
37 |
-
"exploring": ("They spent the summer exploring the forest.", "investigating, discovering"),
|
38 |
-
"old": ("The old castle was full of secrets.", "ancient, aged"),
|
39 |
-
"lost": ("He felt lost without his best friend.", "missing, misplaced"),
|
40 |
-
"ancient": ("They discovered ancient artifacts in the desert.", "archaic, antique"),
|
41 |
-
"inside": ("Inside the box was a beautiful necklace.", "within, interior"),
|
42 |
-
"treasure": ("They dreamed of finding hidden treasure.", "riches, valuables"),
|
43 |
-
"whispering": ("The trees were whispering secrets in the wind.", "murmuring, softly speaking"),
|
44 |
-
"hollow": ("They found a hollow tree to hide in during the storm.", "cavity, void"),
|
45 |
-
"decided": ("She decided to take the long way home.", "determined, resolved"),
|
46 |
-
"journey": ("Their journey took them across the country.", "trip, voyage"),
|
47 |
-
"together": ("They worked together to solve the mystery.", "jointly, collectively"),
|
48 |
-
"way": ("She found a new way to solve the puzzle.", "method, manner"),
|
49 |
-
"reached": ("They finally reached the top of the hill.", "arrived, attained"),
|
50 |
-
"chest": ("The chest was filled with gold coins.", "trunk, box"),
|
51 |
-
"boulder": ("A large boulder blocked the path.", "rock, stone"),
|
52 |
-
"artifacts": ("The museum displayed artifacts from ancient Egypt.", "relics, antiquities"),
|
53 |
-
"legends": ("The legends spoke of a hidden kingdom.", "myths, sagas"),
|
54 |
-
"explore": ("They wanted to explore the old mansion.", "investigate, examine"),
|
55 |
-
"secret": ("She kept the secret hidden from everyone.", "confidential, hidden"),
|
56 |
-
"small": ("The small kitten was very playful.", "tiny, little"),
|
57 |
-
"mountain": ("The mountain was covered in thick forests.", "peak, hill"),
|
58 |
-
"part": ("Each part of the puzzle was important.", "piece, segment"),
|
59 |
-
"everyday": ("He wore his everyday clothes to the party.", "daily, routine"),
|
60 |
-
"life": ("Life in the village was peaceful.", "existence, being"),
|
61 |
-
"nestled": ("The cabin was nestled in the woods.", "tucked, situated"),
|
62 |
-
"towering": ("The towering trees made the forest dark and cool.", "lofty, soaring"),
|
63 |
-
"peaks": ("The mountain peaks were covered in snow.", "summits, crests"),
|
64 |
-
"said": ("He said he would be back soon.", "stated, remarked"),
|
65 |
-
"protected": ("The ancient ruins were protected by law.", "guarded, sheltered"),
|
66 |
-
"massive": ("The massive ship docked at the port.", "enormous, huge"),
|
67 |
-
"supposedly": ("The treasure was supposedly buried under the tree.", "allegedly, reportedly"),
|
68 |
-
"watched": ("They watched the movie together.", "observed, viewed"),
|
69 |
-
"perch": ("The bird found a perch on the windowsill.", "roost, rest")
|
70 |
}
|
71 |
|
72 |
# Words to be excluded from both the word cloud and the word list
|
73 |
exclude_words = set([
|
74 |
-
'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them',
|
75 |
'my', 'your', 'his', 'its', 'our', 'their', 'mine', 'yours', 'hers', 'ours', 'theirs',
|
76 |
'alex', 'mia', 'sam', 'echo', 'ridge', 'guardian', 'of', 'the', 'glen'
|
77 |
])
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
def process_text(text):
|
80 |
words = nltk.word_tokenize(text)
|
81 |
words = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words and word.lower() not in exclude_words]
|
@@ -93,30 +108,59 @@ def generate_wordcloud(word_freq):
|
|
93 |
|
94 |
def translate_and_get_pos(word_freq, pos_tags):
|
95 |
pos_map = {
|
96 |
-
'NN': 'n.', 'NNS': 'n.', '
|
97 |
-
'
|
98 |
-
'RBR': 'adv.', 'RBS': 'adv.'
|
|
|
99 |
}
|
100 |
|
|
|
101 |
word_data = []
|
102 |
for word, freq in word_freq.items():
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
def main(text):
|
114 |
word_freq, pos_tags = process_text(text)
|
115 |
wordcloud_image = generate_wordcloud(word_freq)
|
116 |
word_data = translate_and_get_pos(word_freq, pos_tags)
|
117 |
-
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
120 |
|
121 |
# Custom CSS for the Gradio interface
|
122 |
css = """
|
@@ -128,6 +172,18 @@ body {
|
|
128 |
background-color: blue !important;
|
129 |
border-color: blue !important;
|
130 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
</style>
|
132 |
"""
|
133 |
|
@@ -135,7 +191,7 @@ body {
|
|
135 |
interface = gr.Interface(
|
136 |
fn=main,
|
137 |
inputs="text",
|
138 |
-
outputs=["image", "
|
139 |
title="Wordcloud Vocabulary Learning App",
|
140 |
description="Input text to generate a word cloud and a frequency list with Korean meanings, parts of speech, and example sentences."
|
141 |
"<br><br><b>The full text:</b><br>"
|
|
|
1 |
+
|
2 |
+
|
3 |
import matplotlib.pyplot as plt
|
4 |
from wordcloud import WordCloud
|
5 |
import nltk
|
6 |
from collections import Counter
|
7 |
+
from nltk.corpus import stopwords, wordnet
|
8 |
+
from nltk.stem import WordNetLemmatizer
|
9 |
import gradio as gr
|
10 |
+
import pandas as pd
|
11 |
|
12 |
# Download necessary NLTK data
|
13 |
nltk.download('punkt')
|
14 |
nltk.download('averaged_perceptron_tagger')
|
15 |
nltk.download('stopwords')
|
16 |
+
nltk.download('wordnet')
|
17 |
|
|
|
|
|
18 |
stop_words = set(stopwords.words('english'))
|
19 |
+
lemmatizer = WordNetLemmatizer()
|
20 |
|
21 |
+
# Define example sentences, synonyms, and Korean meanings for the word list
|
22 |
word_data_examples = {
|
23 |
+
"village": ("The village was quiet at night.", "hamlet, community", "λ§μ", "μ골"),
|
24 |
+
"adventure": ("They went on an exciting adventure in the forest.", "expedition, quest", "λͺ¨ν", "μ¬ν"),
|
25 |
+
"map": ("We used a map to find the hidden treasure.", "chart, atlas", "μ§λ", "μ½λ"),
|
26 |
+
"cave": ("They explored a dark cave in the mountains.", "cavern, grotto", "λκ΅΄", "κ΅΄"),
|
27 |
+
"among": ("She found her book among the pile of papers.", "amidst, between", "κ°μ΄λ°", "μ¬μ΄μ"),
|
28 |
+
"mountains": ("The mountains were covered with snow in winter.", "peaks, ranges", "μ°", "μ°λ§₯"),
|
29 |
+
"children": ("The children played games in the park.", "kids, youngsters", "μμ΄λ€", "μ΄λ¦°μ΄"),
|
30 |
+
"known": ("He was known for his kindness and bravery.", "recognized, famous", "μλ €μ§", "μ λͺ
ν"),
|
31 |
+
"hidden": ("They found a hidden door behind the bookshelf.", "concealed, secret", "μ¨κ²¨μ§", "λΉλ°μ"),
|
32 |
+
"local": ("The local market was full of fresh produce.", "regional, native", "μ§μμ", "νμ§μ"),
|
33 |
+
"discovery": ("The discovery of the old map excited everyone.", "finding, revelation", "λ°κ²¬", "νꡬ"),
|
34 |
+
"eagle": ("An eagle soared high above the valley.", "raptor, bird of prey", "λ
μ리", "λ§ΉκΈ"),
|
35 |
+
"villagers": ("The villagers gathered in the square for the festival.", "residents, townsfolk", "λ§μ μ¬λλ€", "μ£Όλ―Όλ€"),
|
36 |
+
"legend": ("The legend of the lost city intrigued the adventurers.", "myth, lore", "μ μ€", "μ ν"),
|
37 |
+
"tales": ("Grandma told us tales of her childhood.", "stories, narratives", "μ΄μΌκΈ°", "λν"),
|
38 |
+
"daring": ("His daring escape from the cave was legendary.", "bold, audacious", "λλ΄ν", "μ©κ°ν"),
|
39 |
+
"spirit": ("The spirit of adventure was alive in their hearts.", "soul, essence", "μ μ ", "νΌ"),
|
40 |
+
"exploring": ("They spent the summer exploring the forest.", "investigating, discovering", "νννλ€", "νꡬνλ€"),
|
41 |
+
"old": ("The old castle was full of secrets.", "ancient, aged", "μ€λλ", "λ‘μ"),
|
42 |
+
"lost": ("He felt lost without his best friend.", "missing, misplaced", "μμ΄λ²λ¦°", "κΈΈμ μμ"),
|
43 |
+
"ancient": ("They discovered ancient artifacts in the desert.", "archaic, antique", "κ³ λμ", "μλ μ"),
|
44 |
+
"inside": ("Inside the box was a beautiful necklace.", "within, interior", "μμͺ½", "λ΄λΆ"),
|
45 |
+
"treasure": ("They dreamed of finding hidden treasure.", "riches, valuables", "보물", "κ·μ€ν"),
|
46 |
+
"whispering": ("The trees were whispering secrets in the wind.", "murmuring, softly speaking", "μμμ΄λ", "μ‘°μ©ν λ§νλ"),
|
47 |
+
"hollow": ("They found a hollow tree to hide in during the storm.", "cavity, void", "λΉ", "κ΅¬λ© λ"),
|
48 |
+
"decided": ("She decided to take the long way home.", "determined, resolved", "κ²°μ νλ€", "κ²°μ¬νλ€"),
|
49 |
+
"journey": ("Their journey took them across the country.", "trip, voyage", "μ¬ν", "μ¬μ "),
|
50 |
+
"together": ("They worked together to solve the mystery.", "jointly, collectively", "ν¨κ»", "κ°μ΄"),
|
51 |
+
"way": ("She found a new way to solve the puzzle.", "method, manner", "λ°©λ²", "λ°©μ"),
|
52 |
+
"reached": ("They finally reached the top of the hill.", "arrived, attained", "λλ¬νλ€", "λμ°©νλ€"),
|
53 |
+
"chest": ("The chest was filled with gold coins.", "trunk, box", "μμ", "κ°μ΄"),
|
54 |
+
"boulder": ("A large boulder blocked the path.", "rock, stone", "λ°μ", "λ"),
|
55 |
+
"artifacts": ("The museum displayed artifacts from ancient Egypt.", "relics, antiquities", "μ λ¬Ό", "κ³ λ μ λ¬Ό"),
|
56 |
+
"legends": ("The legends spoke of a hidden kingdom.", "myths, sagas", "μ μ€", "μ ν"),
|
57 |
+
"explore": ("They wanted to explore the old mansion.", "investigate, examine", "νννλ€", "μ‘°μ¬νλ€"),
|
58 |
+
"secret": ("She kept the secret hidden from everyone.", "confidential, hidden", "λΉλ°", "μ¨κ²¨μ§"),
|
59 |
+
"small": ("The small kitten was very playful.", "tiny, little", "μμ", "μν"),
|
60 |
+
"mountain": ("The mountain was covered in thick forests.", "peak, hill", "μ°", "μ°λ§₯"),
|
61 |
+
"part": ("Each part of the puzzle was important.", "piece, segment", "λΆλΆ", "μ‘°κ°"),
|
62 |
+
"everyday": ("He wore his everyday clothes to the party.", "daily, routine", "μΌμμ μΈ", "맀μΌμ"),
|
63 |
+
"life": ("Life in the village was peaceful.", "existence, being", "μΆ", "μλͺ
"),
|
64 |
+
"nestled": ("The cabin was nestled in the woods.", "tucked, situated", "μ리 μ‘λ€", "μμΉνλ€"),
|
65 |
+
"towering": ("The towering trees made the forest dark and cool.", "lofty, soaring", "μ°λ μμ", "λμ"),
|
66 |
+
"peaks": ("The mountain peaks were covered in snow.", "summits, crests", "μ°λ΄μ°λ¦¬", "μ μ"),
|
67 |
+
"said": ("He said he would be back soon.", "stated, remarked", "λ§νλ€", "μΈκΈνλ€"),
|
68 |
+
"protected": ("The ancient ruins were protected by law.", "guarded, sheltered", "보νΈλ", "μ§μΌμ§"),
|
69 |
+
"massive": ("The massive ship docked at the port.", "enormous, huge", "κ±°λν", "μμ²λ"),
|
70 |
+
"supposedly": ("The treasure was supposedly buried under the tree.", "allegedly, reportedly", "μλ§", "μΆμ μ"),
|
71 |
+
"watched": ("They watched the movie together.", "observed, viewed", "보λ€", "κ΄μ°°νλ€"),
|
72 |
+
"perch": ("The bird found a perch on the windowsill.", "roost, rest", "νλ", "μλ€")
|
73 |
}
|
74 |
|
75 |
# Words to be excluded from both the word cloud and the word list
|
76 |
exclude_words = set([
|
77 |
+
'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them',
|
78 |
'my', 'your', 'his', 'its', 'our', 'their', 'mine', 'yours', 'hers', 'ours', 'theirs',
|
79 |
'alex', 'mia', 'sam', 'echo', 'ridge', 'guardian', 'of', 'the', 'glen'
|
80 |
])
|
81 |
|
82 |
+
def get_wordnet_pos(treebank_tag):
|
83 |
+
if treebank_tag.startswith('J'):
|
84 |
+
return wordnet.ADJ
|
85 |
+
elif treebank_tag.startswith('V'):
|
86 |
+
return wordnet.VERB
|
87 |
+
elif treebank_tag.startswith('N'):
|
88 |
+
return wordnet.NOUN
|
89 |
+
elif treebank_tag.startswith('R'):
|
90 |
+
return wordnet.ADV
|
91 |
+
else:
|
92 |
+
return None
|
93 |
+
|
94 |
def process_text(text):
|
95 |
words = nltk.word_tokenize(text)
|
96 |
words = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words and word.lower() not in exclude_words]
|
|
|
108 |
|
109 |
def translate_and_get_pos(word_freq, pos_tags):
|
110 |
pos_map = {
|
111 |
+
'NN': 'n.', 'NNS': 'n.', 'NNP': 'n.', 'NNPS': 'n.', 'VB': 'v.', 'VBD': 'v. (κ³Όκ±°ν)', 'VBG': 'v. (ingν)',
|
112 |
+
'VBN': 'v. (κ³Όκ±°λΆμ¬ν/μλν)', 'VBP': 'v.', 'VBZ': 'v.', 'JJ': 'adj.', 'JJR': 'adj.', 'JJS': 'adj.',
|
113 |
+
'RB': 'adv.', 'RBR': 'adv.', 'RBS': 'adv.', 'IN': 'prep.', 'DT': 'det.', 'CC': 'conj.',
|
114 |
+
'UH': 'intj.'
|
115 |
}
|
116 |
|
117 |
+
seen_verbs = set() # To track if we have already annotated specific verb forms
|
118 |
word_data = []
|
119 |
for word, freq in word_freq.items():
|
120 |
+
if word not in word_data_examples:
|
121 |
+
continue # Skip if the word is not in the word_data_examples
|
122 |
+
|
123 |
+
pos_list = [pos_map.get(pos_tag[1], 'N/A') for pos_tag in pos_tags if pos_tag[0] == word and pos_tag[1] in pos_map]
|
124 |
+
pos_list = set(pos_list) if pos_list else {'N/A'}
|
125 |
+
if 'N/A' in pos_list or word in exclude_words:
|
126 |
+
continue # Skip if no valid POS or excluded word
|
127 |
+
pos_str = ", ".join(pos_list)
|
128 |
+
|
129 |
+
# Check if the word is a specific verb form and get the base form
|
130 |
+
lemmatized_word = word
|
131 |
+
original_pos_tags = [pos_tag[1] for pos_tag in pos_tags if pos_tag[0] == word]
|
132 |
+
for pos_tag in original_pos_tags:
|
133 |
+
wn_pos = get_wordnet_pos(pos_tag)
|
134 |
+
if wn_pos == wordnet.VERB:
|
135 |
+
lemmatized_word = lemmatizer.lemmatize(word, wn_pos)
|
136 |
+
if word != lemmatized_word and lemmatized_word not in seen_verbs:
|
137 |
+
if pos_tag.startswith('VBD'):
|
138 |
+
pos_str += f" (v. {lemmatized_word}μ κ³Όκ±°ν)"
|
139 |
+
elif pos_tag.startswith('VBG'):
|
140 |
+
pos_str += f" (v. {lemmatized_word}μ ingν)"
|
141 |
+
elif pos_tag.startswith('VBN'):
|
142 |
+
pos_str += f" (v. {lemmatized_word}μ κ³Όκ±°λΆμ¬ν/μλν)"
|
143 |
+
seen_verbs.add(lemmatized_word)
|
144 |
+
|
145 |
+
translation = f"{word_data_examples[word][2]}, {word_data_examples[word][3]}"
|
146 |
+
example_sentence, synonyms = word_data_examples[word][:2]
|
147 |
+
word_data.append((word, pos_str, translation, example_sentence, synonyms))
|
148 |
+
|
149 |
+
# Sort the word data by frequency
|
150 |
+
word_data.sort(key=lambda x: word_freq[x[0]], reverse=True)
|
151 |
+
|
152 |
+
return word_data
|
153 |
|
154 |
def main(text):
|
155 |
word_freq, pos_tags = process_text(text)
|
156 |
wordcloud_image = generate_wordcloud(word_freq)
|
157 |
word_data = translate_and_get_pos(word_freq, pos_tags)
|
158 |
+
|
159 |
+
# Create a DataFrame to display the word data in a table format
|
160 |
+
df = pd.DataFrame(word_data, columns=["μ΄ν (Word)", "λ²μ£Ό (Category)", "λ» (Meaning)", "μλ¬Έ (Example)", "λμμ΄ (Synonyms)"])
|
161 |
+
word_data_table = df.to_html(index=False, justify='center')
|
162 |
+
|
163 |
+
return wordcloud_image, word_data_table
|
164 |
|
165 |
# Custom CSS for the Gradio interface
|
166 |
css = """
|
|
|
172 |
background-color: blue !important;
|
173 |
border-color: blue !important;
|
174 |
}
|
175 |
+
table {
|
176 |
+
width: 100%;
|
177 |
+
border-collapse: collapse;
|
178 |
+
text-align: center;
|
179 |
+
}
|
180 |
+
th, td {
|
181 |
+
padding: 8px;
|
182 |
+
border: 1px solid #ddd;
|
183 |
+
}
|
184 |
+
th {
|
185 |
+
background-color: #f2f2f2;
|
186 |
+
}
|
187 |
</style>
|
188 |
"""
|
189 |
|
|
|
191 |
interface = gr.Interface(
|
192 |
fn=main,
|
193 |
inputs="text",
|
194 |
+
outputs=["image", "html"],
|
195 |
title="Wordcloud Vocabulary Learning App",
|
196 |
description="Input text to generate a word cloud and a frequency list with Korean meanings, parts of speech, and example sentences."
|
197 |
"<br><br><b>The full text:</b><br>"
|