File size: 8,187 Bytes
b576d3c
e05abd0
 
 
672fa81
edadbff
407d034
ffacbee
8886fb4
 
6220981
 
 
 
b214254
 
6220981
 
 
 
 
 
 
8886fb4
edadbff
f95c130
 
 
e05abd0
 
 
 
edadbff
c01bf0a
2aec54c
edadbff
012a8ca
 
 
 
 
a917722
012a8ca
 
 
057af42
012a8ca
 
 
 
 
edadbff
012a8ca
edadbff
012a8ca
edadbff
012a8ca
 
 
cb652c0
012a8ca
 
 
057af42
edadbff
 
08488e6
012a8ca
8fccea5
 
012a8ca
 
 
 
 
 
 
cb652c0
012a8ca
 
 
057af42
012a8ca
 
08488e6
012a8ca
 
 
 
edadbff
 
 
 
 
 
012a8ca
edadbff
 
e05abd0
c01bf0a
 
 
 
 
 
 
 
 
 
 
 
012a8ca
8151f8b
edadbff
e05abd0
08f3fa6
68418ab
 
edadbff
6104c6f
edadbff
3e9caff
b731d08
 
a1f109b
1ff1946
6d2fdfb
6794548
3e9caff
6794548
b731d08
82a0bb9
1ff1946
6d2fdfb
8151f8b
 
012a8ca
b731d08
82a0bb9
1ff1946
6d2fdfb
8151f8b
 
012a8ca
b731d08
8886fb4
 
 
 
 
 
0b128e9
7c12ff5
3937c15
c01bf0a
 
3937c15
6de3b6b
7c3c2eb
 
 
6de3b6b
8fccea5
c14ef23
 
745ad96
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
from transformers import BertTokenizerFast,TFBertForSequenceClassification,TextClassificationPipeline, AutoTokenizer, T5ForConditionalGeneration, pipeline, AutoModelForQuestionAnswering
import numpy as np
import tensorflow as tf
import gradio as gr 
import openai
import os
import torch 

#Summarization Fine Tune Model

def summarize_text(text, model_path="leadingbridge/summarization"):
    # Load the tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = T5ForConditionalGeneration.from_pretrained(model_path)

    # Tokenize the input text
    inputs = tokenizer.encode(text, return_tensors="pt")

    # Generate the summary
    summary_ids = model.generate(inputs)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    return summary

# Sentiment Analysis Pre-Trained Model
def sentiment_analysis(text, model_path="leadingbridge/sentiment-analysis", id2label={0: 'negative', 1: 'positive'}):
  tokenizer = BertTokenizerFast.from_pretrained(model_path)
  model = TFBertForSequenceClassification.from_pretrained(model_path, id2label=id2label)
  pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer)
  result = pipe(text)
  return result


# Open AI Model
openai.api_key = os.environ['openai_api']

def openai_chatbot(prompt):

    response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role":"system","content":"You are a general chatbot that can answer anything in Chinese"},
        {"role":"user","content":prompt}
    ],
    temperature=0.8,
    max_tokens=3000,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0.6
    
    )

    return response.choices[0].message.content

def openai_translation_ec(prompt):

    response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role":"system","content":"As a professional translator, your task is to translate the following article to Chinese, ensuring that the original tone, meaning, and context are preserved. It's important to provide an accurate and culturally appropriate translation for the target audience."},
        {"role":"user","content":prompt}
    ],
    temperature=0.8,
    max_tokens=3000,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=1
    
    )

    return response.choices[0].message.content

def openai_translation_ce(prompt):

    response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role":"system","content":"As a professional translator, your task is to translate the following article to English, ensuring that the original tone, meaning, and context are preserved. It's important to provide an accurate and culturally appropriate translation for the target audience."},
        {"role":"user","content":prompt}
    ],
    temperature=0.8,
    max_tokens=3000,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=1
    
    )

    return response.choices[0].message.content

def chatgpt_clone(input, history):
    history = history or []
    s = list(sum(history, ()))
    s.append(input)
    inp = ' '.join(s)
    output = openai_chatbot(inp)
    history.append((input, output))
    return history, history

# Pretrained Question Answering Model

model = AutoModelForQuestionAnswering.from_pretrained('uer/roberta-base-chinese-extractive-qa')
tokenizer = AutoTokenizer.from_pretrained('uer/roberta-base-chinese-extractive-qa')
QA = pipeline('question-answering', model=model, tokenizer=tokenizer)
model.eval()

def cqa(question,context): #Chinese QA model function
      QA_input = {'question': question,
             'context':  context}
      return QA(QA_input)

"""# **Gradio Model**"""

# Gradio Output Model
with gr.Blocks() as demo:
    gr.Markdown('Welcome to the Chinese NLP Demo! Please select a model tab to interact with:')
    with gr.Tab("🤖Chatbot"):
        gr.Markdown("""<h4><center>🤖Chatbot</center></h4>""")
        chatbot = gr.Chatbot()
        message = gr.Textbox(placeholder="You can discuss any topic with the Chinese Chatbot assistant by typing any natural language in here",lines=3)
        state = gr.State()
        submit = gr.Button("Send")
        submit.click(chatgpt_clone, inputs=[message, state], outputs=[chatbot, state])
        gr.Markdown("This is a Chinese chatbot powered by the OpenAI language model. Enter your message above in Chinese and the chatbot will respond.")   
    with gr.Tab("🤗Sentiment Analysis"):
        gr.Markdown("""<h4><center>🤗Sentiment Analysis</center></h4>""")
        inputs = gr.Textbox(placeholder="Type a Chinese sentence here, either positive or negative in sentiment.",lines=3)
        outputs = gr.Textbox(label="Sentiment Analysis")
        proceed_button = gr.Button("Proceed")           
        proceed_button.click(fn=sentiment_analysis, inputs=inputs, outputs=outputs)
        gr.Markdown("This is a self-trained fine-tuned model using Chinese BERT for sentiment analysis. Enter a sentence in Chinese in the input box and click the 'proceed' button to get the sentiment analysis result.")    
    with gr.Tab("🀄Chinese Translation"):
        gr.Markdown("""<h4><center>🀄Chinese Translation</center></h4>""")
        inputs = gr.Textbox(placeholder="Enter a short English sentence to translate to Chinese here.",lines=3)
        outputs = gr.Textbox(label="Translation Result")
        proceed_button = gr.Button("Translate")
        proceed_button.click(fn=openai_translation_ec, inputs=inputs, outputs=outputs)
        gr.Markdown("This model translate an English sentence to Chinese using the OpenAI engine. Enter an English short sentence in the input box and click the 'Translate' button to get the translation result in Chinese.")  
    with gr.Tab("🔤English Translation"):
        gr.Markdown("""<h4><center>🔤English Translation</center></h4>""")
        inputs = gr.Textbox(placeholder="Enter a short Chinese sentence to translate to English here.",lines=3)
        outputs = gr.Textbox(label="Translation Result")
        proceed_button = gr.Button("Translate")
        proceed_button.click(fn=openai_translation_ce, inputs=inputs, outputs=outputs)
        gr.Markdown("This model translate a Chinese sentence to English using the OpenAI engine. Enter a Chinese short sentence in the input box and click the 'Translate' button to get the translation result in English.")
    with gr.Tab("📑Text Summarization"):
        gr.Markdown("""<h4><center>📑Text Summarization</center></h4>""")
        inputs = gr.Textbox(placeholder="Enter a Chinese text to summarize here.",lines=3)
        outputs = gr.Textbox(label="Summary")
        proceed_button = gr.Button("Summarize")
        proceed_button.click(fn=summarize_text, inputs=inputs, outputs=outputs)
        gr.Markdown("This self-trained fine-tuned model summarizes Chinese text using the MT5 language model. Enter a Chinese text in the input box and click the 'Summarize' button to get the summary.")
    with gr.Tab("❓Chinese Q&A"):
        gr.Markdown("""<h4><center>❓Chinese Q&A</center></h4>""")
        text_button = gr.Button("proceed")          
        text_button.click(fn=cqa, inputs=[gr.Textbox(lines=1,label="Question Input", placeholder="Enter the question you want to ask"),gr.Textbox(lines=9,label="Answer Soruce", placeholder="Enter the answer source article in here")],outputs=gr.Textbox(label="Answer Output"))
        gr.Markdown("This is a pre-trained Roberta Base Chinese Extractive question answering model. Enter the answer source and the question you want to ask and click the 'Proceed' button to get the answer of your question")
    gr.Markdown('''
    We are happy to share with you some Chinese language models that we've made using NLP. When we looked online, we noticed that there weren't many resources available for Chinese NLP, so we hope that our models can be useful to you.
    We want to mention that these models aren't perfect and there is still room for improvement. Because of limited resources, there might be some mistakes or limitations in the models.
    However, We hope that you find them helpful and that you can help make them even better.
    ''')



demo.launch ()