|
import streamlit as st |
|
import os |
|
import pandas as pd |
|
import json |
|
from html import escape |
|
import difflib |
|
|
|
def generate_diff_html_word_level(text1, text2): |
|
""" |
|
Generates word-level difference between text1 and text2 as HTML, correctly handling spaces. |
|
""" |
|
words1 = text1.split() |
|
words2 = text2.split() |
|
|
|
diff = [] |
|
matcher = difflib.SequenceMatcher(None, words1, words2) |
|
|
|
for opcode in matcher.get_opcodes(): |
|
tag, i1, i2, j1, j2 = opcode |
|
if tag == 'replace': |
|
diff.append('<del style="background-color: #fbb6ce;">' + escape(' '.join(words1[i1:i2])) + '</del>') |
|
diff.append('<ins style="background-color: #b7e4c7;">' + escape(' '.join(words2[j1:j2])) + '</ins>') |
|
elif tag == 'delete': |
|
diff.append('<del style="background-color: #fbb6ce;">' + escape(' '.join(words1[i1:i2])) + '</del>') |
|
elif tag == 'insert': |
|
diff.append('<ins style="background-color: #b7e4c7;">' + escape(' '.join(words2[j1:j2])) + '</ins>') |
|
elif tag == 'equal': |
|
diff.append(escape(' '.join(words1[i1:i2]))) |
|
|
|
final_html = ' '.join(diff).replace('</del> <ins', '</del> <ins') |
|
return f'<pre style="white-space: pre-wrap;">{final_html}</pre>' |
|
|
|
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" |
|
st.set_page_config(layout="wide") |
|
|
|
@st.cache_data |
|
def convert_df(df): |
|
return df.to_csv(index=False, quotechar='"').encode('utf-8') |
|
|
|
@st.cache_data |
|
def load_narratives_data(): |
|
data = [] |
|
with open("narratives.jsonl", "r") as f: |
|
for line in f: |
|
data.append(json.loads(line)) |
|
return pd.DataFrame(data) |
|
|
|
narratives_df = load_narratives_data() |
|
|
|
|
|
narratives_df['language'] = narratives_df['id'].str.extract('-(rus|zho|fas)-') |
|
|
|
col1, col2 = st.columns([1, 3], gap="large") |
|
|
|
with st.sidebar: |
|
st.title("Options") |
|
|
|
with col1: |
|
st.title("Narratives") |
|
|
|
|
|
selected_language = st.selectbox( |
|
"Select language", |
|
["All", "rus", "zho", "fas"] |
|
) |
|
|
|
if selected_language != "All": |
|
filtered_df = narratives_df[narratives_df['language'] == selected_language] |
|
else: |
|
filtered_df = narratives_df |
|
|
|
narrative_ids = filtered_df["id"].tolist() |
|
container_for_nav = st.container() |
|
|
|
def sync_from_drop(): |
|
if st.session_state.selectbox_narrative == "Overview": |
|
st.session_state.narrative_index = -1 |
|
else: |
|
st.session_state.narrative_index = narrative_ids.index(st.session_state.selectbox_narrative) |
|
|
|
def sync_from_number(): |
|
st.session_state.narrative_index = st.session_state.narrative_number |
|
if st.session_state.narrative_number == -1: |
|
st.session_state.selectbox_narrative = "Overview" |
|
else: |
|
st.session_state.selectbox_narrative = narrative_ids[st.session_state.narrative_number] |
|
|
|
narrative_number = container_for_nav.number_input( |
|
min_value=-1, step=1, max_value=len(narrative_ids) - 1, |
|
on_change=sync_from_number, |
|
label=f"Select narrative by index (up to **{len(narrative_ids) - 1}**)", |
|
key="narrative_number" |
|
) |
|
selectbox_narrative = container_for_nav.selectbox( |
|
"Select narrative by ID", |
|
["Overview"] + narrative_ids, |
|
on_change=sync_from_drop, |
|
key="selectbox_narrative" |
|
) |
|
st.divider() |
|
|
|
with col2: |
|
narrative_index = narrative_number |
|
|
|
if narrative_index >= 0: |
|
narrative = filtered_df.iloc[narrative_index] |
|
|
|
st.markdown("<h1 style='text-align: center; color: black;text-decoration: underline;'>Editor</h1>", unsafe_allow_html=True) |
|
|
|
container = st.container() |
|
|
|
container.subheader(f"Narrative ID: {narrative['id']}") |
|
container.divider() |
|
|
|
container.subheader("Diff: Original English vs Altered English") |
|
processed_diff = generate_diff_html_word_level(narrative['original_english'].strip(), narrative['altered_english'].strip()) |
|
with container.container(border=True): |
|
st.markdown(processed_diff, unsafe_allow_html=True) |
|
container.divider() |
|
|
|
container.subheader("Original Text") |
|
original_input = container.text_area("Edit the original text", value=narrative['original'].strip(), height=300) |
|
|
|
elif narrative_index < 0: |
|
st.title("Overview") |
|
st.write(f"Total number of narratives: {len(filtered_df)}") |
|
if selected_language != "All": |
|
st.write(f"Selected language: {selected_language}") |
|
st.write("Select a narrative from the sidebar to view and edit its details.") |