|
import streamlit as st |
|
from transformers import T5ForConditionalGeneration, T5Tokenizer |
|
import opencc |
|
|
|
|
|
local_path = 'utrobinmv/t5_summary_en_ru_zh_base_2048' |
|
model = T5ForConditionalGeneration.from_pretrained(local_path) |
|
tokenizer = T5Tokenizer.from_pretrained(local_path) |
|
|
|
|
|
|
|
|
|
st.title("中文文章摘要工具") |
|
|
|
converter = opencc.OpenCC('s2t') |
|
|
|
article = st.text_area("請輸入文章", "") |
|
|
|
|
|
@st.cache_data |
|
def generate_summary(article): |
|
inputs = tokenizer.encode("摘要:" + article, return_tensors="pt", max_length=1024, truncation=True) |
|
summary_ids = model.generate(inputs, max_length=180, min_length=60, length_penalty=2.0, num_beams=4, early_stopping=True) |
|
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) |
|
return summary |
|
|
|
|
|
if st.button("生成摘要"): |
|
if article.strip() == "": |
|
st.error("請輸入文章。") |
|
else: |
|
summary = generate_summary(article) |
|
traditional_summary = converter.convert(summary) |
|
st.subheader("摘要:") |
|
st.write(traditional_summary) |
|
|