unspool / app.py
DanielWong76
Fixing the output
5626072
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
# Load the base model and LoRA adapter
model_name = "unsloth/llama-3-8b-bnb-4bit" # Replace with your base model
adapter_model_name = "DanielWong76/lora_model1" # Replace with your LoRA adapter
# Load the base model and tokenizer
base_model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
model = PeftModel.from_pretrained(base_model, adapter_model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Define the Alpaca-style prompt
alpaca_prompt = """### Instruction:
{instruction}
### Input:
{input}
### Output:
{output}
"""
# Define the function to generate text
def generate_response(instruction, input_text):
# Format the prompt with the instruction and input text
prompt = alpaca_prompt.format(
instruction=instruction,
input=input_text,
output="" # Leave output blank for generation
)
# Tokenize the prompt and move it to the GPU
inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
# Generate the response from the model
outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
# Decode the output into human-readable text
generated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
output_start = generated_text.find("### Output:")
if output_start != -1:
generated_text = generated_text[output_start + len("### Output:"):].strip()
return generated_text
# Gradio Interface with two inputs: Instruction and Input Text
iface = gr.Interface(
fn=generate_response,
inputs=[
gr.Textbox(lines=2, placeholder="Enter the instruction here..."),
gr.Textbox(lines=5, placeholder="Enter the input text here...")
],
outputs="text",
title="Alpaca-Style Instruction-Input-Output Model"
)
# Launch the Gradio app
iface.launch()