Ahsen Khaliq
commited on
Commit
•
04a7a46
1
Parent(s):
3c1ad6f
Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,51 @@
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
title = "SimCSE"
|
3 |
-
description = "
|
4 |
-
article = "<p style='text-align: center'><a href='https://
|
5 |
examples = [
|
6 |
-
['
|
7 |
-
|
8 |
-
|
9 |
]
|
10 |
-
|
|
|
|
1 |
+
import torch
|
2 |
+
from scipy.spatial.distance import cosine
|
3 |
+
from transformers import AutoModel, AutoTokenizer
|
4 |
import gradio as gr
|
5 |
+
|
6 |
+
# Import our models. The package will take care of downloading the models automatically
|
7 |
+
tokenizer = AutoTokenizer.from_pretrained("princeton-nlp/sup-simcse-bert-base-uncased")
|
8 |
+
model = AutoModel.from_pretrained("princeton-nlp/sup-simcse-bert-base-uncased")
|
9 |
+
|
10 |
+
def simcse(text1, text2, text3):
|
11 |
+
# Tokenize input texts
|
12 |
+
texts = [
|
13 |
+
text1,
|
14 |
+
text2,
|
15 |
+
text3
|
16 |
+
]
|
17 |
+
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
|
18 |
+
|
19 |
+
# Get the embeddings
|
20 |
+
with torch.no_grad():
|
21 |
+
embeddings = model(**inputs, output_hidden_states=True, return_dict=True).pooler_output
|
22 |
+
|
23 |
+
# Calculate cosine similarities
|
24 |
+
# Cosine similarities are in [-1, 1]. Higher means more similar
|
25 |
+
cosine_sim_0_1 = 1 - cosine(embeddings[0], embeddings[1])
|
26 |
+
cosine_sim_0_2 = 1 - cosine(embeddings[0], embeddings[2])
|
27 |
+
return {"cosine similarity":cosine_sim_0_1}, {"cosine similarity":cosine_sim_0_2}
|
28 |
+
|
29 |
+
|
30 |
+
inputs = [
|
31 |
+
gr.inputs.Textbox(lines=5, label="Input Text One"),
|
32 |
+
gr.inputs.Textbox(lines=5, label="Input Text Two"),
|
33 |
+
gr.inputs.Textbox(lines=5, label="Input Text Three")
|
34 |
+
]
|
35 |
+
|
36 |
+
outputs = [
|
37 |
+
gr.outputs.Label(type="confidences",label="Cosine similarity between text one and two"),
|
38 |
+
gr.outputs.Label(type="confidences", label="Cosine similarity between text one and three")
|
39 |
+
]
|
40 |
+
|
41 |
+
|
42 |
title = "SimCSE"
|
43 |
+
description = "demo for Princeton-NLP SimCSE. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
|
44 |
+
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2104.08821'>SimCSE: Simple Contrastive Learning of Sentence Embeddings</a> | <a href='https://github.com/princeton-nlp/SimCSE'>Github Repo</a></p>"
|
45 |
examples = [
|
46 |
+
["There's a kid on a skateboard.",
|
47 |
+
"A kid is skateboarding.",
|
48 |
+
"A kid is inside the house."]
|
49 |
]
|
50 |
+
|
51 |
+
gr.Interface(simcse, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()
|