import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained( 'kakaobrain/kogpt', revision='KoGPT6B-ryan1.5b-float16', # or float32 version: revision=KoGPT6B-ryan1.5b bos_token='[BOS]', eos_token='[EOS]', unk_token='[UNK]', pad_token='[PAD]', mask_token='[MASK]' ) model = AutoModelForCausalLM.from_pretrained( 'kakaobrain/kogpt', revision='KoGPT6B-ryan1.5b-float16', # or float32 version: revision=KoGPT6B-ryan1.5b pad_token_id=tokenizer.eos_token_id, torch_dtype='auto', low_cpu_mem_usage=True ).to(device='cpu', non_blocking=True) _ = model.eval() title = "KoGPT" description = "Gradio demo for KoGPT(Korean Generative Pre-trained Transformer). To use it, simply add your text, or click one of the examples to load them. Read more at the links below." article = "
KoGPT: KakaoBrain Korean(hangul) Generative Pre-trained Transformer | Huggingface Model
" examples=[['인간처럼 생각하고, 행동하는 \'지능\'을 통해 인류가 이제까지 풀지 못했던']] def greet(text): prompt = text with torch.no_grad(): tokens = tokenizer.encode(prompt, return_tensors='pt').to(device='cpu', non_blocking=True) gen_tokens = model.generate(tokens, do_sample=True, temperature=0.8, max_length=64) generated = tokenizer.batch_decode(gen_tokens)[0] print(f"generated {generated}") return generated iface = gr.Interface(fn=greet, inputs="text", outputs="text", title=title, description=description, article=article, examples=examples,enable_queue=True) iface.launch()