import gradio as gr from wordllama import WordLlama # Load the default WordLlama model wl = WordLlama.load() def calculate_similarity(sentence1, sentence2): similarity_score = wl.similarity(sentence1, sentence2) return similarity_score def rank_documents(query, candidates): ranked_docs = wl.rank(query, candidates) return ranked_docs def deduplicate_candidates(candidates, threshold): deduplicated = wl.deduplicate(candidates, threshold) return deduplicated def filter_candidates(query, candidates, threshold): filtered = wl.filter(query, candidates, threshold) return filtered def topk_candidates(query, candidates, k): topk = wl.topk(query, candidates, k) return topk def create_gradio_interface(): with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# WordLlama") gr.Markdown("## NLP Toolkit") with gr.Tab("Similarity"): with gr.Row(): sentence1 = gr.Textbox(label="Sentence 1", placeholder="Enter the first sentence here...") sentence2 = gr.Textbox(label="Sentence 2", placeholder="Enter the second sentence here...") similarity_output = gr.Number(label="Similarity Score") submit_similarity_btn = gr.Button("Calculate Similarity") submit_similarity_btn.click( fn=calculate_similarity, inputs=[sentence1, sentence2], outputs=[similarity_output] ) examples_similarity = gr.Examples( examples=[ ["I love programming.", "I enjoy coding."], ["The weather is sunny.", "It's a bright day."], ["I need coffee.", "I'm looking for a coffee shop."] ], inputs=[sentence1, sentence2], ) with gr.Tab("Rank Documents"): query = gr.Textbox(label="Query", placeholder="Enter the query here...") candidates = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...") ranked_docs_output = gr.Dataframe(headers=["Document", "Score"]) submit_rank_btn = gr.Button("Rank Documents") submit_rank_btn.click( fn=lambda q, c: rank_documents(q, c.split(',')), inputs=[query, candidates], outputs=[ranked_docs_output] ) examples_rank = gr.Examples( examples=[ ["I went to the car", "I went to the park, I went to the shop, I went to the truck, I went to the vehicle"], ["Looking for a restaurant", "I need food, I'm hungry, I want to eat, Let's find a place to eat"], ["Best programming languages", "Python, JavaScript, Java, C++"] ], inputs=[query, candidates], ) with gr.Tab("Deduplicate Candidates"): candidates_dedup = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...") threshold_dedup = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.8) deduplicated_output = gr.Textbox(label="Deduplicated Candidates") submit_dedup_btn = gr.Button("Deduplicate") submit_dedup_btn.click( fn=lambda c, t: deduplicate_candidates(c.split(','), t), inputs=[candidates_dedup, threshold_dedup], outputs=[deduplicated_output] ) examples_dedup = gr.Examples( examples=[ ["apple, apple", 0.8], ["delhi, new delhi", 0.87], ["text, textual", 0.7] ], inputs=[candidates_dedup, threshold_dedup], ) with gr.Tab("Filter Candidates"): filter_query = gr.Textbox(label="Query", placeholder="Enter the query here...") candidates_filter = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...") threshold_filter = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, step=0.01, value=0.3) filtered_output = gr.Textbox(label="Filtered Candidates") submit_filter_btn = gr.Button("Filter Candidates") submit_filter_btn.click( fn=lambda q, c, t: filter_candidates(q, c.split(','), t), inputs=[filter_query, candidates_filter, threshold_filter], outputs=[filtered_output] ) examples_filter = gr.Examples( examples=[ ["I went to the car", "I went to the park, I went to the shop, I went to the truck", 0.3], ["Looking for a restaurant", "I want to eat, I'm hungry, Let's find a place to eat", 0.4], ["Best programming languages", "Python, JavaScript, Java, C++", 0.5] ], inputs=[filter_query, candidates_filter, threshold_filter], ) with gr.Tab("Top-k Candidates"): topk_query = gr.Textbox(label="Query", placeholder="Enter the query here...") candidates_topk = gr.Textbox(label="Candidates (comma separated)", placeholder="Enter candidate sentences here...") k = gr.Slider(label="Top-k", minimum=1, maximum=10, step=1, value=3) topk_output = gr.Textbox(label="Top-k Candidates") submit_topk_btn = gr.Button("Get Top-k Candidates") submit_topk_btn.click( fn=lambda q, c, k: topk_candidates(q, c.split(','), k), inputs=[topk_query, candidates_topk, k], outputs=[topk_output] ) examples_topk = gr.Examples( examples=[ ["I went to the car", "I went to the park, I went to the shop, I went to the truck, I went to the vehicle", 3], ["Looking for a restaurant", "I want to eat, I'm hungry, Let's find a place to eat", 2], ["Best programming languages", "Python, JavaScript, Java, C++", 4] ], inputs=[topk_query, candidates_topk, k], ) gr.Markdown(""" # WordLlama Gradio Demo **WordLlama** is a fast, lightweight NLP toolkit that handles tasks like fuzzy deduplication, similarity, and ranking with minimal inference-time dependencies and is optimized for CPU hardware. For more details, visit the [WordLlama GitHub repository](https://github.com/dleemiller/WordLlama). ## Examples **Calculate Similarity** ```python from wordllama import WordLlama # Load the default WordLlama model wl = WordLlama.load() # Calculate similarity between two sentences similarity_score = wl.similarity("i went to the car", "i went to the pawn shop") print(similarity_score) # Output: 0.06641249096796882 ``` **Rank Documents** ```python query = "i went to the car" candidates = ["i went to the park", "i went to the shop", "i went to the truck", "i went to the vehicle"] ranked_docs = wl.rank(query, candidates) print(ranked_docs) # Output: # [ # ('i went to the vehicle', 0.7441646856486314), # ('i went to the truck', 0.2832691551894259), # ('i went to the shop', 0.19732814982305436), # ('i went to the park', 0.15101404519322253) # ] ``` **Additional Inference Methods** ```python # Fuzzy Deduplication wl.deduplicate(candidates, threshold=0.8) # Clustering with K-means wl.cluster(docs, k=5, max_iterations=100, tolerance=1e-4) # Filtering Candidates wl.filter(query, candidates, threshold=0.3) # Top-k Candidates wl.topk(query, candidates, k=3) ``` """) return demo # Create and launch the Gradio interface demo = create_gradio_interface() demo.launch()