import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig # Use a pipeline as a high-level helper from transformers import pipeline messages = [ {"role": "user", "content": "Who are you?"}, ] pipe = pipeline("text-generation", model="Qwen/Qwen2.5-Math-1.5B") pipe(messages) # Load the model and tokenizer # model_name = "Qwen/Qwen2-Math-1.5B" # device = "cuda" if torch.cuda.is_available() else "cpu" # model = AutoModelForCausalLM.from_pretrained( # model_name, # torch_dtype="auto", # device_map="auto" # ).to(device) # tokenizer = AutoTokenizer.from_pretrained(model_name) # # Define a function for Gradio to handle user input # def solve_math(prompt): # messages = [ # {"role": "system", "content": "You are a helpful assistant."}, # {"role": "user", "content": prompt} # ] # text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) # model_inputs = tokenizer([text], return_tensors="pt").to(device) # generation_config = GenerationConfig( # do_sample=False, # For greedy decoding # max_new_tokens=512 # ) # generated_ids = model.generate( # **model_inputs, # generation_config=generation_config # ) # # Remove the input tokens from the output # generated_ids = [ # output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) # ] # # Decode the generated output and return the result # response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] # return response # # Create the Gradio interface # iface = gr.Interface( # fn=solve_math, # Function to call # inputs="text", # Text input for the user prompt # outputs="text", # Text output for the model's response # title="Math Solver", # App title # description="Provide a math problem and the model will solve it." # ) # Launch the app if __name__ == "__main__": iface.launch()