Spaces:

ningrumdaud
/

CogMap-Demo

Sleeping

App Files Files Community

CogMap-Demo / app.py

ningrumdaud

Update app.py

5e6d14e verified 5 months ago

raw

history blame contribute delete

10.9 kB

	import gradio as gr
	import spacy
	import pandas as pd
	import networkx as nx
	import matplotlib.pyplot as plt

	# Initialize spaCy NLP model
	nlp = spacy.load("en_core_web_sm")

	# Import Lexicon
	cues = pd.read_excel('link_cues.xlsx')
	list_causalmarkers = cues['causal_markers']

	# def contains_words_or_phrases(words_list, sentence):
	# """
	# Check if any word or phrase from words_list is present in the sentence.

	# :param words_list: List of words or phrases to check
	# :param sentence: The input sentence where to look for words or phrases
	# :return: Entities if any word or phrase is found, otherwise None
	# """
	# # Normalize the sentence to lower case to make the search case insensitive
	# normalized_sentence = sentence.lower()

	# # Check each word or phrase in the list
	# for word_or_phrase in words_list:
	# # Check if the word or phrase is in the normalized sentence
	# if word_or_phrase.lower() in normalized_sentence:
	# return True # Return True immediately if any word or phrase is found

	# return False # Return False if none of the words or phrases are found

	def contains_words_or_phrases(words_list, sentence):
	"""
	Check if any word or phrase from words_list is present in the sentence.
	:param words_list: List of words or phrases to check
	:param sentence: The input sentence where to look for words or phrases
	:return: True if any word or phrase is found, otherwise False
	"""
	# Process the sentence with spaCy to obtain the lemmatized form of each token
	processed_sentence = nlp(sentence.lower())
	lemmatized_sentence = " ".join(token.lemma_ for token in processed_sentence)

	# Process each word or phrase for lemmatization
	for word_or_phrase in words_list:
	# Process and create a lemma string for the word or phrase
	processed_word_or_phrase = nlp(word_or_phrase.lower())
	lemmatized_word_or_phrase = " ".join(token.lemma_ for token in processed_word_or_phrase)

	# Check if the lemmatized word or phrase is in the lemmatized sentence
	if lemmatized_word_or_phrase in lemmatized_sentence:
	return True # Return True immediately if any word or phrase is found

	return False # Return False if none of the words or phrases are found

	class NounExtractor:
	def __init__(self, nlp):
	"""
	Initialize the NounExtractor with a pre-loaded spaCy NLP model.
	"""
	self.nlp = nlp

	def process_text(self, text):
	"""
	Process the text using the spaCy NLP pipeline.
	"""
	return self.nlp(text)

	def get_noun_phrases(self, doc):
	"""
	Extract and refine noun phrases from the spaCy doc, tracking and using dependency labels accurately.
	"""
	noun_phrases = list(doc.noun_chunks)
	merged_phrases = []
	skip_indexes = set() # Indexes to skip because they have been merged into another phrase
	list_dep_labels = [token.dep_ for token in doc] # List of dependency labels for each token

	for i in range(len(noun_phrases)):
	if i in skip_indexes:
	continue

	current = noun_phrases[i]
	# Collect dependency labels for the current noun phrase
	deps_in_phrase = {list_dep_labels[tok.i] for tok in current}

	# Merge logic based on 'phrases connectors' construction
	if i + 1 < len(noun_phrases) and (doc[current.end].text in ['of', 'in', 'among', 'on', 'towards', 'to', 'for', 'across']):
	next_phrase = noun_phrases[i + 1]
	if i + 2 < len(noun_phrases) and doc[next_phrase.end].dep_ == 'pcomp':
	extended_phrase = doc[current.start:noun_phrases[i + 2].end]
	skip_indexes.update({i + 1, i + 2})
	extended_deps = {list_dep_labels[tok.i] for tok in extended_phrase}
	dep_label = self.determine_dep_label(extended_deps)
	merged_phrases.append((extended_phrase.text, dep_label))
	continue
	else:
	merged_phrase = doc[current.start:next_phrase.end]
	skip_indexes.add(i + 1)
	merged_deps = {list_dep_labels[tok.i] for tok in merged_phrase}
	dep_label = self.determine_dep_label(merged_deps)
	merged_phrases.append((merged_phrase.text, dep_label))
	continue

	if i not in skip_indexes:
	dep_label = self.determine_dep_label(deps_in_phrase)
	merged_phrases.append((current.text, dep_label))

	return merged_phrases

	def determine_dep_label(self, deps_in_phrase):
	"""
	Determine the most appropriate dependency label for a phrase based on internal dependencies.
	"""
	if 'nsubj' in deps_in_phrase:
	return 'ROOTnsubj'
	elif 'nsubjpass' in deps_in_phrase:
	return 'ROOTnsubjpass'
	else:
	# Choose a representative dependency if no clear subject is present
	return deps_in_phrase.pop() if deps_in_phrase else 'unknown'

	def extract(self, sentence, causative_verb):
	"""
	Extracts and returns noun phrases with their detailed dependency tags from the sentence.
	"""
	doc = self.process_text(sentence)
	noun_phrases = self.get_noun_phrases(doc)
	result_dict = {phrase: dep for phrase, dep in noun_phrases}

	# Check for the presence of causative verbs like 'cause', in the sentence
	found_verbs = [v for v in causative_verb if v.lower() in sentence.lower()]
	if found_verbs:
	# Adjust dependency labels for noun phrases based on the presence of an causative verb.
	for phrase, dep in list(result_dict.items()): # Work on a copy of items to safely modify the dict
	if dep == 'ROOTnsubj':
	result_dict[phrase] = 'dobj'
	elif dep == 'dobj':
	result_dict[phrase] = 'ROOT'

	return result_dict

	def format_results(results):
	formatted = []
	# Find all roots or central subjects to structure the phrases around them
	root_keys = [key for key, value in results.items() if value == 'ROOTnsubj' or value == 'ROOTnsubjpass']

	for key, value in results.items():
	if key in root_keys:
	continue # Skip the roots themselves when adding to the formatted list
	for root_key in root_keys:
	if value == 'ROOTnsubjpass': # If the dependency indicates a passive subject
	formatted.append(f"{key} -> {root_key}")
	else:
	formatted.append(f"{root_key} <- {key}")

	# Remove duplicates and return the formatted results
	formatted = list(set(formatted))
	return formatted

	def wrap_label(label):
	"""Helper function to wrap labels after every three words."""
	words = label.split()
	wrapped_label = '\n'.join(' '.join(words[i:i+3]) for i in range(0, len(words), 3))
	return wrapped_label

	def visualize_cognitive_map(formatted_results):
	G = nx.DiGraph() # Directed graph to show direction of relationships

	# Add edges based on formatted results
	for result in formatted_results:
	if '<-' in result:
	# Extract nodes and add edge in the reverse direction
	nodes = result.split(' <- ')
	G.add_edge(nodes[1], nodes[0])
	elif '->' in result:
	# Extract nodes and add edge in the specified direction
	nodes = result.split(' -> ')
	G.add_edge(nodes[0], nodes[1])

	# Position nodes using the spring layout
	pos = nx.spring_layout(G, k=0.50)

	# Setup the plot with a larger size
	plt.figure(figsize=(12, 8)) # Larger figure size for better visibility

	# Prepare custom labels with wrapped text
	labels = {node: wrap_label(node) for node in G.nodes()}

	# Draw the graph with custom labels
	nx.draw(G, pos, labels=labels, node_color='skyblue', edge_color='#FF5733',
	node_size=5000, font_size=10, font_weight='bold', with_labels=True, arrowstyle='-\|>', arrowsize=30)

	plt.show()

	return plt


	# Use the functions
	extractor = NounExtractor(nlp=nlp)

	# Example of how to use this function
	# words_list = ["so", "because", "contribute", "due to", "therefore", "as"]
	causative_verb = ['affect', 'influence', 'increase', 'against', 'bring', 'lead', 'cause', 'is to', 'was to',
	'are to', "were to", 'contribute', 'make', 'attribute', 'provoke']

	# Define the callback function for the GUI
	def CogMapAnalysis(text):
	if contains_words_or_phrases(list_causalmarkers, text):
	result = extractor.extract(text, causative_verb)
	formatted_result = format_results(result)
	plot = visualize_cognitive_map(formatted_result)
	return formatted_result, plot
	else:
	formatted_result = "❌ No causal expression was identified."
	plot = None # Use None instead of empty string for non-existent objects
	return formatted_result, plot


	# Create the GUI using the 'gr' library

	with gr.Blocks() as demo:
	gr.Markdown('# CogMApp')
	gr.Markdown('### Generate cognitive maps from text with one click!')

	with gr.Row():
	with gr.Column():
	inputs = gr.Textbox(label="Input", lines=2, placeholder="Enter your text here...")
	submit_button = gr.Button("Submit")
	with gr.Column():
	examples = gr.Examples(examples=[
	"Public support for anti-discrimination laws and the movement to support immigrants grew due to the impact of getting widespread education on social justice issues.",
	"The introduction of new anti-discrimination laws has been driven by an increasing awareness of social injustices and grassroots movements.",
	"The weak law enforcement in this country is due to its citizens's ignorance.",
	"CogMApp is a tool that lets you create cognitive maps from text.",
	"The protests across the country are caused by the announcement of the new regulation."
	], inputs=inputs)

	with gr.Row():
	output = gr.Textbox(label="Result", lines=5, placeholder=" ")

	with gr.Row():
	cogmap_plot = gr.Plot(label="Generated Cognitive Map")

	with gr.Row():
	gr.Markdown("⚠️ Feel free to flag me if you find any errors. 🙂")

	with gr.Column():
	gr.Markdown('Demo made with ❤ by P.K. Ningrum (2024) \| Contact: [https://ningrumdaud.github.io/](https://ningrumdaud.github.io/)')

	# Set up the button to execute the function when clicked
	submit_button.click(CogMapAnalysis, inputs=[inputs], outputs=[output, cogmap_plot])

	if __name__ == "__main__":
	demo.launch(show_api=False)