Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import T5Tokenizer, T5ForConditionalGeneration | |
# xl size run out of memory on 16GB vm | |
# All the models have input length of 512 tokens and outputs of 512 tokens | |
# small 80M param | |
# base 250M | |
# large 780M | |
# xl | |
# xxl | |
model_name = "large" | |
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-" + model_name) | |
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-" + model_name) | |
title = "" | |
def get_examples (): | |
return [ | |
["Peter goes to the store to buy a soda. The soda costs $.25 an ounce. \ | |
He brought $2 with him and leaves with $.50. How many ounces of soda did he buy?", | |
"How much did Peter spend on soda? ** He spend $1.5 on soda because 2 - .5 = <<2-.5=1.5>>1.5 \ | |
How many ounces of soda did Peter buy? ** He bought 6 ounces of soda because 1.5 / .25 = <<6=6>>6 #### 6" | |
], | |
["Krystian works in the library. He borrows an average of 40 books every day. \ | |
Every Friday, his number of borrowed books is about 40% higher than the daily average. How many books does he borrow in a week if the library is open from Monday to Friday?" | |
,"How many books does Krystian borrow on Friday? ** The number of books borrowed \ | |
on Friday is higher by 40 * 40/100 = <<40*40/100=16>>16 books. How many books does Krystian borrow in a week? ** There are 5 days from Monday to Friday inclusive, so Krystian borrows an average of 5 * 40 = <<5*40=200>>200 books during that time. How many books does Krystian borrow in a week? ** With Friday's increase in borrowings, during one week Krystian borrows 200 + 16 = <<200+16=216>>216 books."] | |
, ["Jane had $60 but gave $30 to dave and went to movies and spend $2. How much money does Jane has left? Answer by reasoning step by step:", "$28"], | |
["Cat is a friend of a Dog. Are cat and Dog friends?", "Yes"] | |
] | |
def text2text(input_text): | |
input_ids = tokenizer(input_text, return_tensors="pt").input_ids | |
input_num_tokens = input_ids.shape[1] | |
print( "Number of input tokens: " + str(input_num_tokens)) | |
print("Length of input: " + str(len(input_text))) | |
list_of_tokens = tokenizer.convert_ids_to_tokens(input_ids.view(-1).tolist()) | |
print( "Tokens : " + ' '.join(list_of_tokens)) | |
# Does not seem to care if it goes over 512... humm... | |
# To make it faster generate 100 tokens at a time | |
# sampling mode.. don't greedily take the highest probability token every time. Helps it chat with some variation | |
# temperature.. how random should the sampling be. | |
# top_p Which set of tokens to sample from. Filters out some low probability tokens before smapling. | |
# | |
# input_ids should not be over 512 tokens. This method does not break over 512 tokens.. what is it doing? | |
outputs = model.generate(input_ids, max_new_tokens=100, do_sample=True, temperature=0.7, top_p=0.8) | |
# Remove <pad> and </s> eof sequence tokens | |
model_output_text = tokenizer.decode(outputs[0],skip_special_tokens=True) | |
print("Number of output tokens: " + str(outputs.shape[1])) | |
print("length of output: " + str(len(model_output_text))) | |
print("Output: " + model_output_text) | |
# Space is added because model seem to not add it automatically. | |
output_text = input_text + " " + model_output_text | |
return output_text | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
# Flan T5 Large Demo (Chat Mode) | |
780M parameter Large language model fine tuned on diverse tasks. | |
Prompt the model in the Input box. Models output is appended to input. To get additional generation hit submit again. | |
""") | |
txt_in = gr.Textbox(label="Input", lines=8) | |
correct_label = gr.Label(label="Correct") | |
# txt_out = gr.Textbox(value="", label="Output", lines=4) | |
btn = gr.Button(value="Submit") | |
# Send back to inputs | |
btn.click(text2text, inputs=[txt_in], outputs=[txt_in]) | |
gr.Examples( | |
examples=get_examples(), | |
inputs=[txt_in,correct_label] | |
) | |
if __name__ == "__main__": | |
demo.launch() |