Spaces:

baruga
/

gpt4-sandbox

Running

App Files Files Community

baruga commited on Mar 31, 2023

Commit

4fcdc53

•

1 Parent(s): 3c87ea6

Add token streaming

Browse files

Files changed (3) hide show

.gitignore +4 -1
app.py +56 -66
requirements.txt +2 -1

.gitignore CHANGED Viewed

	@@ -1 +1,4 @@
1	- .env

+.env
+prompts.txt
+test.py
+app_backup.py

app.py CHANGED Viewed

@@ -2,11 +2,13 @@ import gradio as gr
 import openai
 import os
 import requests
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD")
-openai.api_key = OPENAI_API_KEY
 default_system_message = {"role": "system", "content": "You are a brilliant, helpful assistant, always providing answers to the best of your knowledge. If you are unsure of the answer, you indicate it to the user. Currently, you don't have access to the internet."}
 personalities = {
@@ -26,40 +28,35 @@ def get_completion(model, personality, user_message, message_history, chatlog_hi
     updated_message_history[0] = system_message
     new_history_row = {"role": "user", "content": user_message}
     updated_message_history = updated_message_history + [new_history_row]
-    headers = {
-        "Content-Type": "application/json",
-        "Authorization": f"Bearer {openai.api_key}",
-    }
-    payload = {
-        "model":model,
-        "messages":updated_message_history,
-        "temperature":temperature,
-        "max_tokens":maximum_length,
-        "top_p":top_p,
-        "frequency_penalty":frequency_penalty,
-        "presence_penalty":presence_penalty,
-    }
-    completion = requests.post(
-        "https://api.openai.com/v1/chat/completions",
-        headers=headers,
-        json=payload,
     )
-    completion = completion.json()
-    # completion = openai.ChatCompletion.create(
-    #     model=model,
-    #     messages=updated_message_history,
-    #     temperature=temperature,
-    #     max_tokens=maximum_length,
-    #     top_p=top_p,
-    #     frequency_penalty=frequency_penalty,
-    #     presence_penalty=presence_penalty,
-    # )
-    assistant_message = completion["choices"][0]["message"]["content"]
-    new_history_row = {"role": "assistant", "content": assistant_message}
     updated_message_history = updated_message_history + [new_history_row]
-    updated_chatlog_history = chatlog_history + [(user_message, assistant_message)]
-    token_count = completion["usage"]["total_tokens"]
-    return "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
 def retry_completion(model, personality, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty):
     # set personality
@@ -72,40 +69,33 @@ def retry_completion(model, personality, message_history, chatlog_history, tempe
     updated_chatlog_history = chatlog_history[:-1]
     # delete latest assistant message from message_history
     updated_message_history = updated_message_history[:-1]
-    headers = {
-        "Content-Type": "application/json",
-        "Authorization": f"Bearer {openai.api_key}",
-    }
-    payload = {
-        "model":model,
-        "messages":updated_message_history,
-        "temperature":temperature,
-        "max_tokens":maximum_length,
-        "top_p":top_p,
-        "frequency_penalty":frequency_penalty,
-        "presence_penalty":presence_penalty,
-    }
-    completion = requests.post(
-        "https://api.openai.com/v1/chat/completions",
-        headers=headers,
-        json=payload,
     )
-    completion = completion.json()
-    # completion = openai.ChatCompletion.create(
-    #     model=model,
-    #     messages=updated_message_history,
-    #     temperature=temperature,
-    #     max_tokens=maximum_length,
-    #     top_p=top_p,
-    #     frequency_penalty=frequency_penalty,
-    #     presence_penalty=presence_penalty,
-    # )
-    assistant_message = completion["choices"][0]["message"]["content"]
-    new_history_row = {"role": "assistant", "content": assistant_message}
     updated_message_history = updated_message_history + [new_history_row]
-    updated_chatlog_history = updated_chatlog_history + [(user_message, assistant_message)]
-    token_count = completion["usage"]["total_tokens"]
-    return "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
 def reset_chat():
     return "", [default_system_message], [], [], 0
@@ -140,4 +130,4 @@ with gr.Blocks(theme=theme) as app:
     retry_button.click(retry_completion, inputs=[model, personality, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty], outputs=[user_message, message_history, chatlog_history, chatbot, token_count])
     reset_button.click(reset_chat, inputs=[], outputs=[user_message, message_history, chatlog_history, chatbot, token_count])
-app.launch(auth=("admin", ADMIN_PASSWORD))

 import openai
 import os
 import requests
+from transformers import GPT2TokenizerFast
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD")
+tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
 default_system_message = {"role": "system", "content": "You are a brilliant, helpful assistant, always providing answers to the best of your knowledge. If you are unsure of the answer, you indicate it to the user. Currently, you don't have access to the internet."}
 personalities = {
     updated_message_history[0] = system_message
     new_history_row = {"role": "user", "content": user_message}
     updated_message_history = updated_message_history + [new_history_row]
+    response = openai.ChatCompletion.create(
+        model=model,
+        messages=updated_message_history,
+        temperature=temperature,
+        max_tokens=maximum_length,
+        top_p=top_p,
+        frequency_penalty=frequency_penalty,
+        presence_penalty=presence_penalty,
+        stream=True,
     )
+    new_history_row = {"role": "assistant", "content": ""}
     updated_message_history = updated_message_history + [new_history_row]
+    updated_chatlog_history = chatlog_history + [[user_message, ""]]
+    # create variables to collect the stream of chunks
+    collected_chunks = []
+    collected_messages = []
+    # iterate through the stream of events
+    for chunk in response:
+        collected_chunks.append(chunk)  # save the event response
+        chunk_message = chunk['choices'][0]['delta']  # extract the message
+        collected_messages.append(chunk_message)  # save the message
+        assistant_message = ''.join([m.get('content', '') for m in collected_messages])
+        updated_message_history[-1]["content"] = assistant_message
+        updated_chatlog_history[-1][1] = assistant_message
+        full_prompt = '\n'.join([row[0] + row[1] for row in updated_chatlog_history])
+        token_count = len(tokenizer(full_prompt)["input_ids"])#completion["usage"]["total_tokens"]
+        yield "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
+    # assistant_message = completion["choices"][0]["message"]["content"]
+    # return "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
 def retry_completion(model, personality, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty):
     # set personality
     updated_chatlog_history = chatlog_history[:-1]
     # delete latest assistant message from message_history
     updated_message_history = updated_message_history[:-1]
+    response = openai.ChatCompletion.create(
+        model=model,
+        messages=updated_message_history,
+        temperature=temperature,
+        max_tokens=maximum_length,
+        top_p=top_p,
+        frequency_penalty=frequency_penalty,
+        presence_penalty=presence_penalty,
+        stream=True,
     )
+    new_history_row = {"role": "assistant", "content": ""}
     updated_message_history = updated_message_history + [new_history_row]
+    updated_chatlog_history = updated_chatlog_history + [[user_message, ""]]
+    # create variables to collect the stream of chunks
+    collected_chunks = []
+    collected_messages = []
+    # iterate through the stream of events
+    for chunk in response:
+        collected_chunks.append(chunk)  # save the event response
+        chunk_message = chunk['choices'][0]['delta']  # extract the message
+        collected_messages.append(chunk_message)  # save the message
+        assistant_message = ''.join([m.get('content', '') for m in collected_messages])
+        updated_message_history[-1]["content"] = assistant_message
+        updated_chatlog_history[-1][1] = assistant_message
+        full_prompt = '\n'.join([row[0] + row[1] for row in updated_chatlog_history])
+        token_count = len(tokenizer(full_prompt)["input_ids"])#completion["usage"]["total_tokens"]
+        yield "", updated_message_history, updated_chatlog_history, updated_chatlog_history, token_count
 def reset_chat():
     return "", [default_system_message], [], [], 0
     retry_button.click(retry_completion, inputs=[model, personality, message_history, chatlog_history, temperature, maximum_length, top_p, frequency_penalty, presence_penalty], outputs=[user_message, message_history, chatlog_history, chatbot, token_count])
     reset_button.click(reset_chat, inputs=[], outputs=[user_message, message_history, chatlog_history, chatbot, token_count])
+app.launch(auth=("admin", ADMIN_PASSWORD), enable_queue=True)

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 gradio
-openai

 gradio
+openai
+transformers