File size: 4,647 Bytes
b38e27d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
770d15a
 
 
b38e27d
 
 
 
 
 
 
770d15a
 
 
 
 
 
 
 
 
 
 
 
 
b38e27d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
770d15a
b38e27d
 
 
 
 
 
 
 
 
ea3796f
b38e27d
 
 
 
 
ea3796f
b38e27d
 
 
770d15a
 
 
b38e27d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import streamlit as st
import os
import pandas as pd
import json
from html import escape
import difflib

def generate_diff_html_word_level(text1, text2):
    """
    Generates word-level difference between text1 and text2 as HTML, correctly handling spaces.
    """
    words1 = text1.split()
    words2 = text2.split()
    
    diff = []
    matcher = difflib.SequenceMatcher(None, words1, words2)
    
    for opcode in matcher.get_opcodes():
        tag, i1, i2, j1, j2 = opcode
        if tag == 'replace':
            diff.append('<del style="background-color: #fbb6ce;">' + escape(' '.join(words1[i1:i2])) + '</del>')
            diff.append('<ins style="background-color: #b7e4c7;">' + escape(' '.join(words2[j1:j2])) + '</ins>')
        elif tag == 'delete':
            diff.append('<del style="background-color: #fbb6ce;">' + escape(' '.join(words1[i1:i2])) + '</del>')
        elif tag == 'insert':
            diff.append('<ins style="background-color: #b7e4c7;">' + escape(' '.join(words2[j1:j2])) + '</ins>')
        elif tag == 'equal':
            diff.append(escape(' '.join(words1[i1:i2])))

    final_html = ' '.join(diff).replace('</del> <ins', '</del>&nbsp;<ins')
    return f'<pre style="white-space: pre-wrap;">{final_html}</pre>'

os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
st.set_page_config(layout="wide")

@st.cache_data
def convert_df(df):
    return df.to_csv(index=False, quotechar='"').encode('utf-8')

@st.cache_data
def load_narratives_data():
    data = []
    with open("narratives.jsonl", "r") as f:
        for line in f:
            data.append(json.loads(line))
    return pd.DataFrame(data)

narratives_df = load_narratives_data()

# Extract language from id
narratives_df['language'] = narratives_df['id'].str.extract('-(rus|zho|fas)-')

col1, col2 = st.columns([1, 3], gap="large")

with st.sidebar:
    st.title("Options")

with col1:
    st.title("Narratives")
    
    # Add language filter
    selected_language = st.selectbox(
        "Select language",
        ["All", "rus", "zho", "fas"]
    )
    
    if selected_language != "All":
        filtered_df = narratives_df[narratives_df['language'] == selected_language]
    else:
        filtered_df = narratives_df
    
    narrative_ids = filtered_df["id"].tolist()
    container_for_nav = st.container()
    
    def sync_from_drop():
        if st.session_state.selectbox_narrative == "Overview":
            st.session_state.narrative_index = -1
        else:
            st.session_state.narrative_index = narrative_ids.index(st.session_state.selectbox_narrative)

    def sync_from_number():
        st.session_state.narrative_index = st.session_state.narrative_number
        if st.session_state.narrative_number == -1:
            st.session_state.selectbox_narrative = "Overview"
        else:
            st.session_state.selectbox_narrative = narrative_ids[st.session_state.narrative_number]

    narrative_number = container_for_nav.number_input(
        min_value=-1, step=1, max_value=len(narrative_ids) - 1,
        on_change=sync_from_number,
        label=f"Select narrative by index (up to **{len(narrative_ids) - 1}**)",
        key="narrative_number"
    )
    selectbox_narrative = container_for_nav.selectbox(
        "Select narrative by ID",
        ["Overview"] + narrative_ids,
        on_change=sync_from_drop,
        key="selectbox_narrative"
    )
    st.divider()

with col2:
    narrative_index = narrative_number

    if narrative_index >= 0:
        narrative = filtered_df.iloc[narrative_index]
        
        st.markdown("<h1 style='text-align: center; color: black;text-decoration: underline;'>Editor</h1>", unsafe_allow_html=True)

        container = st.container()

        container.subheader(f"Narrative ID: {narrative['id']}")
        container.divider()

        container.subheader("Diff: Original English vs Altered English")
        processed_diff = generate_diff_html_word_level(narrative['original_english'].strip(), narrative['altered_english'].strip())
        with container.container(border=True):
            st.markdown(processed_diff, unsafe_allow_html=True)
        container.divider()

        container.subheader("Original Text")
        original_input = container.text_area("Edit the original text", value=narrative['original'].strip(), height=300)

    elif narrative_index < 0:
        st.title("Overview")
        st.write(f"Total number of narratives: {len(filtered_df)}")
        if selected_language != "All":
            st.write(f"Selected language: {selected_language}")
        st.write("Select a narrative from the sidebar to view and edit its details.")