Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import os, sys
|
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
import numpy as np
|
|
|
5 |
from sklearn.metrics.pairwise import paired_cosine_distances
|
6 |
from sklearn.preprocessing import normalize
|
7 |
from rolaser import RoLaserEncoder
|
@@ -21,18 +22,33 @@ c_rolaser_vocab = f"{os.environ['ROLASER']}/models/c-rolaser.cvocab"
|
|
21 |
c_rolaser_tokenizer = 'char'
|
22 |
c_rolaser_model = RoLaserEncoder(model_path=c_rolaser_checkpoint, vocab=c_rolaser_vocab, tokenizer=c_rolaser_tokenizer)
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def add_text_inputs(i):
|
25 |
col1, col2 = st.columns(2)
|
26 |
with col1:
|
27 |
-
text_input1 = st.text_input('Enter standard text here:', f'std{i}')
|
28 |
with col2:
|
29 |
-
text_input2 = st.text_input('Enter non-standard text here:', f'ugc{i}')
|
30 |
return text_input1, text_input2
|
31 |
|
32 |
def main():
|
33 |
st.title('Pairwise Cosine Distance Calculator')
|
34 |
|
35 |
-
num_pairs = st.sidebar.number_input('Number of Text Input Pairs', min_value=1, max_value=10, value=
|
36 |
|
37 |
std_text_inputs = []
|
38 |
ugc_text_inputs = []
|
@@ -41,11 +57,6 @@ def main():
|
|
41 |
std_text_inputs.append(pair[0])
|
42 |
ugc_text_inputs.append(pair[1])
|
43 |
|
44 |
-
if st.button('Add Text Input Pair'):
|
45 |
-
pair = add_text_inputs(len(std_text_inputs))
|
46 |
-
std_text_inputs.append(pair[0])
|
47 |
-
ugc_text_inputs.append(pair[1])
|
48 |
-
|
49 |
if st.button('Submit'):
|
50 |
X_std_laser = normalize(laser_model.encode(std_text_inputs))
|
51 |
X_ugc_laser = normalize(laser_model.encode(ugc_text_inputs))
|
@@ -60,19 +71,22 @@ def main():
|
|
60 |
X_cos_c_rolaser = paired_cosine_distances(X_std_c_rolaser, X_ugc_c_rolaser)
|
61 |
|
62 |
outputs = pd.DataFrame(columns=[ 'model', 'pair', 'ugc', 'std', 'cos'])
|
63 |
-
outputs['model'] = np.repeat(['LASER', 'RoLASER', '
|
64 |
outputs['pair'] = np.tile(np.arange(1,num_pairs+1), 3)
|
65 |
outputs['std'] = np.tile(std_text_inputs, 3)
|
66 |
outputs['ugc'] = np.tile(ugc_text_inputs, 3)
|
67 |
-
outputs['cos'] = np.concatenate([X_cos_laser, X_cos_rolaser, X_cos_c_rolaser]
|
68 |
|
69 |
st.write('## Cosine Distance Scores:')
|
70 |
-
|
|
|
|
|
|
|
|
|
71 |
|
72 |
st.write('## Average Cosine Distance Scores:')
|
73 |
-
st.write(
|
74 |
-
|
75 |
-
st.write(f'C-RoLASER: {outputs[outputs["model"]=="C-RoLASER"]["cos"].mean()}')
|
76 |
|
77 |
if __name__ == "__main__":
|
78 |
main()
|
|
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
import numpy as np
|
5 |
+
import plotly.express as px
|
6 |
from sklearn.metrics.pairwise import paired_cosine_distances
|
7 |
from sklearn.preprocessing import normalize
|
8 |
from rolaser import RoLaserEncoder
|
|
|
22 |
c_rolaser_tokenizer = 'char'
|
23 |
c_rolaser_model = RoLaserEncoder(model_path=c_rolaser_checkpoint, vocab=c_rolaser_vocab, tokenizer=c_rolaser_tokenizer)
|
24 |
|
25 |
+
|
26 |
+
STD_SENTENCES = ['See you tomorrow.'] * 10
|
27 |
+
UGC_SENTENCES = [
|
28 |
+
'See you tmrw.',
|
29 |
+
'See you t03orro3.',
|
30 |
+
'C. U. tomorrow.',
|
31 |
+
'sea you tomorrow.',
|
32 |
+
'See yo utomorrow.',
|
33 |
+
'See you tkmoerow.',
|
34 |
+
'Cu 2moro.',
|
35 |
+
'See yow tomorrow.',
|
36 |
+
'C. Yew tomorrow.',
|
37 |
+
'c ya 2morrow.'
|
38 |
+
]
|
39 |
+
|
40 |
def add_text_inputs(i):
|
41 |
col1, col2 = st.columns(2)
|
42 |
with col1:
|
43 |
+
text_input1 = st.text_input('Enter standard text here:', key=f'std{i}', value=STD_SENTENCES[i])
|
44 |
with col2:
|
45 |
+
text_input2 = st.text_input('Enter non-standard text here:', key=f'ugc{i}', value=UGC_SENTENCES[i])
|
46 |
return text_input1, text_input2
|
47 |
|
48 |
def main():
|
49 |
st.title('Pairwise Cosine Distance Calculator')
|
50 |
|
51 |
+
num_pairs = st.sidebar.number_input('Number of Text Input Pairs', min_value=1, max_value=10, value=5)
|
52 |
|
53 |
std_text_inputs = []
|
54 |
ugc_text_inputs = []
|
|
|
57 |
std_text_inputs.append(pair[0])
|
58 |
ugc_text_inputs.append(pair[1])
|
59 |
|
|
|
|
|
|
|
|
|
|
|
60 |
if st.button('Submit'):
|
61 |
X_std_laser = normalize(laser_model.encode(std_text_inputs))
|
62 |
X_ugc_laser = normalize(laser_model.encode(ugc_text_inputs))
|
|
|
71 |
X_cos_c_rolaser = paired_cosine_distances(X_std_c_rolaser, X_ugc_c_rolaser)
|
72 |
|
73 |
outputs = pd.DataFrame(columns=[ 'model', 'pair', 'ugc', 'std', 'cos'])
|
74 |
+
outputs['model'] = np.repeat(['LASER', 'RoLASER', 'c-RoLASER'], num_pairs)
|
75 |
outputs['pair'] = np.tile(np.arange(1,num_pairs+1), 3)
|
76 |
outputs['std'] = np.tile(std_text_inputs, 3)
|
77 |
outputs['ugc'] = np.tile(ugc_text_inputs, 3)
|
78 |
+
outputs['cos'] = np.concatenate([X_cos_laser, X_cos_rolaser, X_cos_c_rolaser])
|
79 |
|
80 |
st.write('## Cosine Distance Scores:')
|
81 |
+
fig = px.bar(outputs, x='x_column', y='y_column', color='model', barmode='group')
|
82 |
+
fig.update_layout(title='Cosine Distance Scores')
|
83 |
+
fig.update_xaxes(title_text='Text Input Pair')
|
84 |
+
fig.update_yaxes(title_text='Cosine Distance')
|
85 |
+
st.plotly_chart(fig, use_container_width=True)
|
86 |
|
87 |
st.write('## Average Cosine Distance Scores:')
|
88 |
+
st.write(outputs.groupby('model')['cos'].describe())
|
89 |
+
|
|
|
90 |
|
91 |
if __name__ == "__main__":
|
92 |
main()
|