unable baidu
Browse files- README.md +1 -1
- __pycache__/outline.cpython-39.pyc +0 -0
- __pycache__/run.cpython-39.pyc +0 -0
- __pycache__/util.cpython-39.pyc +0 -0
- app.py +4 -11
- outline.py +4 -4
- run.py +1 -16
- util.py +12 -10
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🐨
|
|
4 |
colorFrom: blue
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
timeout: 300
|
10 |
pinned: false
|
|
|
4 |
colorFrom: blue
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.11
|
8 |
app_file: app.py
|
9 |
timeout: 300
|
10 |
pinned: false
|
__pycache__/outline.cpython-39.pyc
CHANGED
Binary files a/__pycache__/outline.cpython-39.pyc and b/__pycache__/outline.cpython-39.pyc differ
|
|
__pycache__/run.cpython-39.pyc
CHANGED
Binary files a/__pycache__/run.cpython-39.pyc and b/__pycache__/run.cpython-39.pyc differ
|
|
__pycache__/util.cpython-39.pyc
CHANGED
Binary files a/__pycache__/util.cpython-39.pyc and b/__pycache__/util.cpython-39.pyc differ
|
|
app.py
CHANGED
@@ -4,10 +4,6 @@ import textInput
|
|
4 |
from BERT_inference import BertClassificationModel
|
5 |
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
output = []
|
12 |
keys = []
|
13 |
|
@@ -16,7 +12,7 @@ keys = []
|
|
16 |
|
17 |
with gr.Blocks(css = ".output {min-height: 500px}") as demo:
|
18 |
#用markdown语法编辑输出一段话
|
19 |
-
gr.Markdown("#
|
20 |
gr.Markdown("请选择要输入的文件或填入文本")
|
21 |
topic_num = gr.Number(label="主题个数")
|
22 |
max_length = gr.Number(label="摘要最大长度")
|
@@ -42,12 +38,9 @@ with gr.Blocks(css = ".output {min-height: 500px}") as demo:
|
|
42 |
file_txt_output = gr.File(label="txt格式")
|
43 |
file_docx_output = gr.File(label="docx格式")
|
44 |
file_pdf_output = gr.File(label="pdf格式")
|
45 |
-
|
46 |
-
# gr.Markdown("Look at me...")
|
47 |
text_button.click(textInput.text_dump_to_lines, inputs=[text_input,topic_num,max_length], outputs=[text_keys_output,text_ab_output,file_txt_output,file_docx_output,file_pdf_output])
|
48 |
file_button.click(textInput.file_dump_to_lines,inputs=[file_input,topic_num,max_length], outputs=[text_keys_output,text_ab_output,file_txt_output,file_docx_output,file_pdf_output])
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
except Exception as e:
|
53 |
-
print("error",e)
|
|
|
4 |
from BERT_inference import BertClassificationModel
|
5 |
|
6 |
|
|
|
|
|
|
|
|
|
7 |
output = []
|
8 |
keys = []
|
9 |
|
|
|
12 |
|
13 |
with gr.Blocks(css = ".output {min-height: 500px}") as demo:
|
14 |
#用markdown语法编辑输出一段话
|
15 |
+
gr.Markdown("# TSA - 文本整理助手")
|
16 |
gr.Markdown("请选择要输入的文件或填入文本")
|
17 |
topic_num = gr.Number(label="主题个数")
|
18 |
max_length = gr.Number(label="摘要最大长度")
|
|
|
38 |
file_txt_output = gr.File(label="txt格式")
|
39 |
file_docx_output = gr.File(label="docx格式")
|
40 |
file_pdf_output = gr.File(label="pdf格式")
|
41 |
+
|
|
|
42 |
text_button.click(textInput.text_dump_to_lines, inputs=[text_input,topic_num,max_length], outputs=[text_keys_output,text_ab_output,file_txt_output,file_docx_output,file_pdf_output])
|
43 |
file_button.click(textInput.file_dump_to_lines,inputs=[file_input,topic_num,max_length], outputs=[text_keys_output,text_ab_output,file_txt_output,file_docx_output,file_pdf_output])
|
44 |
|
45 |
+
|
46 |
+
demo.queue().launch()
|
|
|
|
outline.py
CHANGED
@@ -65,15 +65,15 @@ def passage_outline(matrix,sentences):
|
|
65 |
structure = {}
|
66 |
for each in result.keys():
|
67 |
structure[each] =[sentences[i] for i in result[each]]
|
68 |
-
|
69 |
outline_list = []
|
70 |
for key in sorted(structure.keys()):
|
71 |
outline_list.append(f"主题:")
|
72 |
-
|
73 |
for sentence in structure[key]:
|
74 |
outline_list.append(sentence)
|
75 |
-
|
76 |
-
return
|
77 |
if __name__ == "__main__":
|
78 |
matrix = np.array([[0.0 ,0.02124888, 0.10647043 ,0.09494194 ,0.0689209 ],
|
79 |
[0.01600688 ,0.0 ,0.05879448 ,0.0331325 , 0.0155093 ],
|
|
|
65 |
structure = {}
|
66 |
for each in result.keys():
|
67 |
structure[each] =[sentences[i] for i in result[each]]
|
68 |
+
outl = []
|
69 |
outline_list = []
|
70 |
for key in sorted(structure.keys()):
|
71 |
outline_list.append(f"主题:")
|
72 |
+
outl.append(f"主题:\n")
|
73 |
for sentence in structure[key]:
|
74 |
outline_list.append(sentence)
|
75 |
+
outl.append(f"- {sentence}\n")
|
76 |
+
return outl,outline_list
|
77 |
if __name__ == "__main__":
|
78 |
matrix = np.array([[0.0 ,0.02124888, 0.10647043 ,0.09494194 ,0.0689209 ],
|
79 |
[0.01600688 ,0.0 ,0.05879448 ,0.0331325 , 0.0155093 ],
|
run.py
CHANGED
@@ -8,21 +8,6 @@ from inference import BertClassificationModel
|
|
8 |
# output:file/text/topic_sentence
|
9 |
|
10 |
|
11 |
-
# file_process:
|
12 |
-
# in util
|
13 |
-
# read file code
|
14 |
-
# file to json_text
|
15 |
-
|
16 |
-
# convert:
|
17 |
-
# in util
|
18 |
-
# convert code
|
19 |
-
# json_text to text
|
20 |
-
|
21 |
-
# process:
|
22 |
-
# in util
|
23 |
-
# text process code
|
24 |
-
# del stop seg
|
25 |
-
|
26 |
def texClear(article):
|
27 |
sentencesCleared = [util.clean_text(sentence) for sentence in article]
|
28 |
sentencesCleared = [string for string in sentencesCleared if string != '' ]
|
@@ -36,7 +21,7 @@ def textToAb(sentences, article, topic_num, max_length):
|
|
36 |
title_dict,title = util.generation(groups, max_length)
|
37 |
# ans:
|
38 |
# {Ai_abstruct:(main_sentence,paragraph)}
|
39 |
-
print(title)
|
40 |
matrix = inference.inference_matrix(title)
|
41 |
|
42 |
outl,outline_list = outline.passage_outline(matrix,title)
|
|
|
8 |
# output:file/text/topic_sentence
|
9 |
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def texClear(article):
|
12 |
sentencesCleared = [util.clean_text(sentence) for sentence in article]
|
13 |
sentencesCleared = [string for string in sentencesCleared if string != '' ]
|
|
|
21 |
title_dict,title = util.generation(groups, max_length)
|
22 |
# ans:
|
23 |
# {Ai_abstruct:(main_sentence,paragraph)}
|
24 |
+
# print(title)
|
25 |
matrix = inference.inference_matrix(title)
|
26 |
|
27 |
outl,outline_list = outline.passage_outline(matrix,title)
|
util.py
CHANGED
@@ -15,7 +15,7 @@ def post_url(url, headers, payload):
|
|
15 |
|
16 |
def seg(text):
|
17 |
text = text.replace('\n', " ")
|
18 |
-
sentences = re.split(r'(?<=[。!?.!?:])\s*', text)
|
19 |
sentences = [string for string in sentences if string != '']
|
20 |
return sentences
|
21 |
|
@@ -72,16 +72,18 @@ def generation(para, max_length):
|
|
72 |
'Accept': 'application/json'
|
73 |
}
|
74 |
|
75 |
-
response = post_url(url, headers, payload)
|
76 |
-
text_dict = json.loads(response.text)
|
77 |
# print(text_dict)
|
78 |
-
while('summary' not in text_dict.keys()):
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
topic[text_dict['summary']] = (j, k)
|
84 |
-
Ai_abstract.append(text_dict['summary'])
|
|
|
|
|
85 |
return topic,Ai_abstract
|
86 |
def formate_text(title_dict,outline_list):
|
87 |
formated = []
|
|
|
15 |
|
16 |
def seg(text):
|
17 |
text = text.replace('\n', " ")
|
18 |
+
sentences = re.split(r'(?<=[。!?.!?: ])\s*', text)
|
19 |
sentences = [string for string in sentences if string != '']
|
20 |
return sentences
|
21 |
|
|
|
72 |
'Accept': 'application/json'
|
73 |
}
|
74 |
|
75 |
+
# response = post_url(url, headers, payload)
|
76 |
+
# text_dict = json.loads(response.text)
|
77 |
# print(text_dict)
|
78 |
+
# while('summary' not in text_dict.keys()):
|
79 |
+
# response = post_url(url, headers, payload)
|
80 |
+
# text_dict = json.loads(response.text)
|
81 |
+
# print("ReTrying")
|
82 |
+
|
83 |
+
# topic[text_dict['summary']] = (j, k)
|
84 |
+
# Ai_abstract.append(text_dict['summary'])
|
85 |
+
topic[j] = (j, k)
|
86 |
+
Ai_abstract.append(j)
|
87 |
return topic,Ai_abstract
|
88 |
def formate_text(title_dict,outline_list):
|
89 |
formated = []
|