Spaces:
Running
Running
Add token streaming
Browse files- .gitignore +4 -1
- app.py +56 -66
- requirements.txt +2 -1
.gitignore
CHANGED
@@ -1 +1,4 @@
|
|
1 |
-
.env
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
prompts.txt
|
3 |
+
test.py
|
4 |
+
app_backup.py
|
app.py
CHANGED
@@ -2,11 +2,13 @@ import gradio as gr
|
|
2 |
import openai
|
3 |
import os
|
4 |
import requests
|
|
|
|
|
5 |
|
6 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
7 |
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD")
|
8 |
|
9 |
-
|
10 |
|
11 |
default_system_message = {"role": "system", "content": "You are a brilliant, helpful assistant, always providing answers to the best of your knowledge. If you are unsure of the answer, you indicate it to the user. Currently, you don't have access to the internet."}
|
12 |
personalities = {
|
@@ -26,40 +28,35 @@ def get_completion(model, personality, user_message, message_history, chatlog_hi
|
|
26 |
updated_message_history[0] = system_message
|
27 |
new_history_row = {"role": "user", "content": user_message}
|
28 |
updated_message_history = updated_message_history + [new_history_row]
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
"top_p":top_p,
|
39 |
-
"frequency_penalty":frequency_penalty,
|
40 |
-
"presence_penalty":presence_penalty,
|
41 |
-
}
|
42 |
-
completion = requests.post(
|
43 |
-
"https://api.openai.com/v1/chat/completions",
|
44 |
-
headers=headers,
|
45 |
-
json=payload,
|
46 |
)
|
47 |
-
|
48 |
-
# completion = openai.ChatCompletion.create(
|
49 |
-
# model=model,
|
50 |
-
# messages=updated_message_history,
|
51 |
-
# temperature=temperature,
|
52 |
-
# max_tokens=maximum_length,
|
53 |
-
# top_p=top_p,
|
54 |
-
# frequency_penalty=frequency_penalty,
|
55 |
-
# presence_penalty=presence_penalty,
|
56 |
-
# )
|
57 |
-
assistant_message = completion["choices"][0]["message"]["content"]
|
58 |
-
new_history_row = {"role": "assistant", "content": assistant_message}
|
59 |
updated_message_history = updated_message_history + [new_history_row]
|
60 |
-
updated_chatlog_history = chatlog_history + [
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
def retry_completion(model, personality, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty):
|
65 |
# set personality
|
@@ -72,40 +69,33 @@ def retry_completion(model, personality, message_history, chatlog_history, tempe
|
|
72 |
updated_chatlog_history = chatlog_history[:-1]
|
73 |
# delete latest assistant message from message_history
|
74 |
updated_message_history = updated_message_history[:-1]
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
"top_p":top_p,
|
85 |
-
"frequency_penalty":frequency_penalty,
|
86 |
-
"presence_penalty":presence_penalty,
|
87 |
-
}
|
88 |
-
completion = requests.post(
|
89 |
-
"https://api.openai.com/v1/chat/completions",
|
90 |
-
headers=headers,
|
91 |
-
json=payload,
|
92 |
)
|
93 |
-
|
94 |
-
# completion = openai.ChatCompletion.create(
|
95 |
-
# model=model,
|
96 |
-
# messages=updated_message_history,
|
97 |
-
# temperature=temperature,
|
98 |
-
# max_tokens=maximum_length,
|
99 |
-
# top_p=top_p,
|
100 |
-
# frequency_penalty=frequency_penalty,
|
101 |
-
# presence_penalty=presence_penalty,
|
102 |
-
# )
|
103 |
-
assistant_message = completion["choices"][0]["message"]["content"]
|
104 |
-
new_history_row = {"role": "assistant", "content": assistant_message}
|
105 |
updated_message_history = updated_message_history + [new_history_row]
|
106 |
-
updated_chatlog_history = updated_chatlog_history + [
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
def reset_chat():
|
111 |
return "", [default_system_message], [], [], 0
|
@@ -140,4 +130,4 @@ with gr.Blocks(theme=theme) as app:
|
|
140 |
retry_button.click(retry_completion, inputs=[model, personality, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty], outputs=[user_message, message_history, chatlog_history, chatbot, token_count])
|
141 |
reset_button.click(reset_chat, inputs=[], outputs=[user_message, message_history, chatlog_history, chatbot, token_count])
|
142 |
|
143 |
-
app.launch(auth=("admin", ADMIN_PASSWORD))
|
|
|
2 |
import openai
|
3 |
import os
|
4 |
import requests
|
5 |
+
from transformers import GPT2TokenizerFast
|
6 |
+
|
7 |
|
8 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
9 |
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD")
|
10 |
|
11 |
+
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
|
12 |
|
13 |
default_system_message = {"role": "system", "content": "You are a brilliant, helpful assistant, always providing answers to the best of your knowledge. If you are unsure of the answer, you indicate it to the user. Currently, you don't have access to the internet."}
|
14 |
personalities = {
|
|
|
28 |
updated_message_history[0] = system_message
|
29 |
new_history_row = {"role": "user", "content": user_message}
|
30 |
updated_message_history = updated_message_history + [new_history_row]
|
31 |
+
response = openai.ChatCompletion.create(
|
32 |
+
model=model,
|
33 |
+
messages=updated_message_history,
|
34 |
+
temperature=temperature,
|
35 |
+
max_tokens=maximum_length,
|
36 |
+
top_p=top_p,
|
37 |
+
frequency_penalty=frequency_penalty,
|
38 |
+
presence_penalty=presence_penalty,
|
39 |
+
stream=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
)
|
41 |
+
new_history_row = {"role": "assistant", "content": ""}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
updated_message_history = updated_message_history + [new_history_row]
|
43 |
+
updated_chatlog_history = chatlog_history + [[user_message, ""]]
|
44 |
+
# create variables to collect the stream of chunks
|
45 |
+
collected_chunks = []
|
46 |
+
collected_messages = []
|
47 |
+
# iterate through the stream of events
|
48 |
+
for chunk in response:
|
49 |
+
collected_chunks.append(chunk) # save the event response
|
50 |
+
chunk_message = chunk['choices'][0]['delta'] # extract the message
|
51 |
+
collected_messages.append(chunk_message) # save the message
|
52 |
+
assistant_message = ''.join([m.get('content', '') for m in collected_messages])
|
53 |
+
updated_message_history[-1]["content"] = assistant_message
|
54 |
+
updated_chatlog_history[-1][1] = assistant_message
|
55 |
+
full_prompt = '\n'.join([row[0] + row[1] for row in updated_chatlog_history])
|
56 |
+
token_count = len(tokenizer(full_prompt)["input_ids"])#completion["usage"]["total_tokens"]
|
57 |
+
yield "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
|
58 |
+
# assistant_message = completion["choices"][0]["message"]["content"]
|
59 |
+
# return "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
|
60 |
|
61 |
def retry_completion(model, personality, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty):
|
62 |
# set personality
|
|
|
69 |
updated_chatlog_history = chatlog_history[:-1]
|
70 |
# delete latest assistant message from message_history
|
71 |
updated_message_history = updated_message_history[:-1]
|
72 |
+
response = openai.ChatCompletion.create(
|
73 |
+
model=model,
|
74 |
+
messages=updated_message_history,
|
75 |
+
temperature=temperature,
|
76 |
+
max_tokens=maximum_length,
|
77 |
+
top_p=top_p,
|
78 |
+
frequency_penalty=frequency_penalty,
|
79 |
+
presence_penalty=presence_penalty,
|
80 |
+
stream=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
)
|
82 |
+
new_history_row = {"role": "assistant", "content": ""}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
updated_message_history = updated_message_history + [new_history_row]
|
84 |
+
updated_chatlog_history = updated_chatlog_history + [[user_message, ""]]
|
85 |
+
# create variables to collect the stream of chunks
|
86 |
+
collected_chunks = []
|
87 |
+
collected_messages = []
|
88 |
+
# iterate through the stream of events
|
89 |
+
for chunk in response:
|
90 |
+
collected_chunks.append(chunk) # save the event response
|
91 |
+
chunk_message = chunk['choices'][0]['delta'] # extract the message
|
92 |
+
collected_messages.append(chunk_message) # save the message
|
93 |
+
assistant_message = ''.join([m.get('content', '') for m in collected_messages])
|
94 |
+
updated_message_history[-1]["content"] = assistant_message
|
95 |
+
updated_chatlog_history[-1][1] = assistant_message
|
96 |
+
full_prompt = '\n'.join([row[0] + row[1] for row in updated_chatlog_history])
|
97 |
+
token_count = len(tokenizer(full_prompt)["input_ids"])#completion["usage"]["total_tokens"]
|
98 |
+
yield "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
|
99 |
|
100 |
def reset_chat():
|
101 |
return "", [default_system_message], [], [], 0
|
|
|
130 |
retry_button.click(retry_completion, inputs=[model, personality, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty], outputs=[user_message, message_history, chatlog_history, chatbot, token_count])
|
131 |
reset_button.click(reset_chat, inputs=[], outputs=[user_message, message_history, chatlog_history, chatbot, token_count])
|
132 |
|
133 |
+
app.launch(auth=("admin", ADMIN_PASSWORD), enable_queue=True)
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
gradio
|
2 |
-
openai
|
|
|
|
1 |
gradio
|
2 |
+
openai
|
3 |
+
transformers
|