import matplotlib.pyplot as plt
from wordcloud import WordCloud
import nltk
from collections import Counter
from nltk.corpus import stopwords, wordnet
from nltk.stem import WordNetLemmatizer
import gradio as gr
import pandas as pd
# Download necessary NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')
nltk.download('wordnet')
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
# Define example sentences, synonyms, and Korean meanings for the word list
word_data_examples = {
"village": ("The village was quiet at night.", "hamlet, community", "마을", "시골"),
"adventure": ("They went on an exciting adventure in the forest.", "expedition, quest", "모험", "여행"),
"map": ("We used a map to find the hidden treasure.", "chart, atlas", "지도", "약도"),
"cave": ("They explored a dark cave in the mountains.", "cavern, grotto", "동굴", "굴"),
"among": ("She found her book among the pile of papers.", "amidst, between", "가운데", "사이에"),
"mountains": ("The mountains were covered with snow in winter.", "peaks, ranges", "산", "산맥"),
"children": ("The children played games in the park.", "kids, youngsters", "아이들", "어린이"),
"known": ("He was known for his kindness and bravery.", "recognized, famous", "알려진", "유명한"),
"hidden": ("They found a hidden door behind the bookshelf.", "concealed, secret", "숨겨진", "비밀의"),
"local": ("The local market was full of fresh produce.", "regional, native", "지역의", "현지의"),
"discovery": ("The discovery of the old map excited everyone.", "finding, revelation", "발견", "탐구"),
"eagle": ("An eagle soared high above the valley.", "raptor, bird of prey", "독수리", "맹금"),
"villagers": ("The villagers gathered in the square for the festival.", "residents, townsfolk", "마을 사람들", "주민들"),
"legend": ("The legend of the lost city intrigued the adventurers.", "myth, lore", "전설", "신화"),
"tales": ("Grandma told us tales of her childhood.", "stories, narratives", "이야기", "동화"),
"daring": ("His daring escape from the cave was legendary.", "bold, audacious", "대담한", "용감한"),
"spirit": ("The spirit of adventure was alive in their hearts.", "soul, essence", "정신", "혼"),
"exploring": ("They spent the summer exploring the forest.", "investigating, discovering", "탐험하다", "탐구하다"),
"old": ("The old castle was full of secrets.", "ancient, aged", "오래된", "낡은"),
"lost": ("He felt lost without his best friend.", "missing, misplaced", "잃어버린", "길을 잃은"),
"ancient": ("They discovered ancient artifacts in the desert.", "archaic, antique", "고대의", "옛날의"),
"inside": ("Inside the box was a beautiful necklace.", "within, interior", "안쪽", "내부"),
"treasure": ("They dreamed of finding hidden treasure.", "riches, valuables", "보물", "귀중품"),
"whispering": ("The trees were whispering secrets in the wind.", "murmuring, softly speaking", "속삭이는", "조용히 말하는"),
"hollow": ("They found a hollow tree to hide in during the storm.", "cavity, void", "빈", "구멍 난"),
"decided": ("She decided to take the long way home.", "determined, resolved", "결정하다", "결심하다"),
"journey": ("Their journey took them across the country.", "trip, voyage", "여행", "여정"),
"together": ("They worked together to solve the mystery.", "jointly, collectively", "함께", "같이"),
"way": ("She found a new way to solve the puzzle.", "method, manner", "방법", "방식"),
"reached": ("They finally reached the top of the hill.", "arrived, attained", "도달하다", "도착하다"),
"chest": ("The chest was filled with gold coins.", "trunk, box", "상자", "가슴"),
"boulder": ("A large boulder blocked the path.", "rock, stone", "바위", "돌"),
"artifacts": ("The museum displayed artifacts from ancient Egypt.", "relics, antiquities", "유물", "고대 유물"),
"legends": ("The legends spoke of a hidden kingdom.", "myths, sagas", "전설", "신화"),
"explore": ("They wanted to explore the old mansion.", "investigate, examine", "탐험하다", "조사하다"),
"secret": ("She kept the secret hidden from everyone.", "confidential, hidden", "비밀", "숨겨진"),
"small": ("The small kitten was very playful.", "tiny, little", "작은", "소형"),
"mountain": ("The mountain was covered in thick forests.", "peak, hill", "산", "산맥"),
"part": ("Each part of the puzzle was important.", "piece, segment", "부분", "조각"),
"everyday": ("He wore his everyday clothes to the party.", "daily, routine", "일상적인", "매일의"),
"life": ("Life in the village was peaceful.", "existence, being", "삶", "생명"),
"nestled": ("The cabin was nestled in the woods.", "tucked, situated", "자리 잡다", "위치하다"),
"towering": ("The towering trees made the forest dark and cool.", "lofty, soaring", "우뚝 솟은", "높은"),
"peaks": ("The mountain peaks were covered in snow.", "summits, crests", "산봉우리", "정상"),
"said": ("He said he would be back soon.", "stated, remarked", "말하다", "언급하다"),
"protected": ("The ancient ruins were protected by law.", "guarded, sheltered", "보호된", "지켜진"),
"massive": ("The massive ship docked at the port.", "enormous, huge", "거대한", "엄청난"),
"supposedly": ("The treasure was supposedly buried under the tree.", "allegedly, reportedly", "아마", "추정상"),
"watched": ("They watched the movie together.", "observed, viewed", "보다", "관찰하다"),
"perch": ("The bird found a perch on the windowsill.", "roost, rest", "횃대", "앉다")
}
# Words to be excluded from both the word cloud and the word list
exclude_words = set([
'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them',
'my', 'your', 'his', 'its', 'our', 'their', 'mine', 'yours', 'hers', 'ours', 'theirs',
'alex', 'mia', 'sam', 'echo', 'ridge', 'guardian', 'of', 'the', 'glen'
])
def get_wordnet_pos(treebank_tag):
if treebank_tag.startswith('J'):
return wordnet.ADJ
elif treebank_tag.startswith('V'):
return wordnet.VERB
elif treebank_tag.startswith('N'):
return wordnet.NOUN
elif treebank_tag.startswith('R'):
return wordnet.ADV
else:
return None
def process_text(text):
words = nltk.word_tokenize(text)
words = [word.lower() for word in words if word.isalnum() and word.lower() not in stop_words and word.lower() not in exclude_words]
word_freq = Counter(words)
pos_tags = nltk.pos_tag(words)
return word_freq, pos_tags
def generate_wordcloud(word_freq):
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_freq)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.savefig('wordcloud.png')
return 'wordcloud.png'
def translate_and_get_pos(word_freq, pos_tags):
pos_map = {
'NN': 'n.', 'NNS': 'n.', 'NNP': 'n.', 'NNPS': 'n.', 'VB': 'v.', 'VBD': 'v. (과거형)', 'VBG': 'v. (ing형)',
'VBN': 'v. (과거분사형/수동태)', 'VBP': 'v.', 'VBZ': 'v.', 'JJ': 'adj.', 'JJR': 'adj.', 'JJS': 'adj.',
'RB': 'adv.', 'RBR': 'adv.', 'RBS': 'adv.', 'IN': 'prep.', 'DT': 'det.', 'CC': 'conj.',
'UH': 'intj.'
}
seen_verbs = set() # To track if we have already annotated specific verb forms
word_data = []
for word, freq in word_freq.items():
if word not in word_data_examples:
continue # Skip if the word is not in the word_data_examples
pos_list = [pos_map.get(pos_tag[1], 'N/A') for pos_tag in pos_tags if pos_tag[0] == word and pos_tag[1] in pos_map]
pos_list = set(pos_list) if pos_list else {'N/A'}
if 'N/A' in pos_list or word in exclude_words:
continue # Skip if no valid POS or excluded word
pos_str = ", ".join(pos_list)
# Check if the word is a specific verb form and get the base form
lemmatized_word = word
original_pos_tags = [pos_tag[1] for pos_tag in pos_tags if pos_tag[0] == word]
for pos_tag in original_pos_tags:
wn_pos = get_wordnet_pos(pos_tag)
if wn_pos == wordnet.VERB:
lemmatized_word = lemmatizer.lemmatize(word, wn_pos)
if word != lemmatized_word and lemmatized_word not in seen_verbs:
if pos_tag.startswith('VBD'):
pos_str += f" (v. {lemmatized_word}의 과거형)"
elif pos_tag.startswith('VBG'):
pos_str += f" (v. {lemmatized_word}의 ing형)"
elif pos_tag.startswith('VBN'):
pos_str += f" (v. {lemmatized_word}의 과거분사형/수동태)"
seen_verbs.add(lemmatized_word)
translation = f"{word_data_examples[word][2]}, {word_data_examples[word][3]}"
example_sentence, synonyms = word_data_examples[word][:2]
word_data.append((word, pos_str, translation, example_sentence, synonyms))
# Sort the word data by frequency
word_data.sort(key=lambda x: word_freq[x[0]], reverse=True)
return word_data
def main(text):
word_freq, pos_tags = process_text(text)
wordcloud_image = generate_wordcloud(word_freq)
word_data = translate_and_get_pos(word_freq, pos_tags)
# Create a DataFrame to display the word data in a table format
df = pd.DataFrame(word_data, columns=["어휘 (Word)", "범주 (Category)", "뜻 (Meaning)", "예문 (Example)", "동의어 (Synonyms)"])
word_data_table = df.to_html(index=False, justify='center')
return wordcloud_image, word_data_table
# Custom CSS for the Gradio interface
css = """
"""
# Gradio interface
interface = gr.Interface(
fn=main,
inputs="text",
outputs=["image", "html"],
title="Wordcloud Vocabulary Learning App",
description="Input text to generate a word cloud and a frequency list with Korean meanings, parts of speech, and example sentences."
"
The full text:
"
"""
In the small mountain village of Echo Ridge, adventure was a part of everyday life. Nestled among towering peaks, the village was said to be protected by the "Guardian of the Glen," a massive eagle that supposedly watched over the villagers from its perch high in the mountains. The legend inspired many adventurous tales among the villagers, especially the children. Among these children was a bright-eyed eighth grader named Alex. Alex was known for his daring spirit and his love for exploring the rugged landscapes around Echo Ridge. He had a particular fascination with the old maps and tales of hidden treasures that had been lost in the mountains centuries ago. One day, while exploring the local library, Alex stumbled upon an ancient map tucked inside a forgotten book on village lore. The map hinted at the location of a lost treasure, hidden deep within a cave known as Whispering Hollow. Excited by the prospect of a real adventure, Alex decided to seek out the treasure. Knowing the journey would be risky, he enlisted the help of his best friends, Mia and Sam. Together, they prepared for the expedition, gathering supplies and studying the map extensively. They planned their route, took note of landmarks, and readied themselves for any challenges they might face. Their journey began at dawn. They trekked through dense forests, crossed rushing streams, and climbed steep cliffs. Along the way, they encountered various wildlife and navigated through tricky terrains, their map guiding them every step of the way. After hours of hiking, they finally reached Whispering Hollow. The cave was more magnificent than they had imagined, filled with intricate stalactites and echoes of dripping water. Using their flashlights, they ventured deeper into the cave, guided by the markings on the map. As they reached the heart of the cave, they discovered an ancient chest hidden behind a fallen boulder. With hearts pounding, they moved the boulder and opened the chest. Inside, instead of gold or jewels, they found a collection of old artifacts: pottery, coins, and a beautifully carved statuette of an eagle — the Guardian of the Glen. Realizing the historical significance of their find, they decided to donate the artifacts to the local museum. The village celebrated their discovery, and the children were hailed as heroes. Their adventure brought the community together, sparking a renewed interest in the history and legends of Echo Ridge. Alex, Mia, and Sam became local legends, known not only for their daring but also for their spirit of discovery and respect for heritage. They continued to explore the mountains, each adventure strengthening their friendship and deepening their connection to their village. The legend of the Guardian of the Glen lived on, not just as a protector but as a symbol of adventure and discovery, inspiring future generations to explore the mysteries of Echo Ridge.""", ) # Launch the interface and include the custom CSS interface.launch() gr.HTML(css)
Copy and paste to try.