import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

# Use a pipeline as a high-level helper
from transformers import pipeline

messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe = pipeline("text-generation", model="Qwen/Qwen2.5-Math-1.5B")
pipe(messages)
# Load the model and tokenizer
# model_name = "Qwen/Qwen2-Math-1.5B"
# device = "cuda" if torch.cuda.is_available() else "cpu"

# model = AutoModelForCausalLM.from_pretrained(
#     model_name,
#     torch_dtype="auto",
#     device_map="auto"
# ).to(device)

# tokenizer = AutoTokenizer.from_pretrained(model_name)

# # Define a function for Gradio to handle user input
# def solve_math(prompt):
#     messages = [
#         {"role": "system", "content": "You are a helpful assistant."},
#         {"role": "user", "content": prompt}
#     ]
#     text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
#     model_inputs = tokenizer([text], return_tensors="pt").to(device)

#     generation_config = GenerationConfig(
#         do_sample=False,  # For greedy decoding
#         max_new_tokens=512
#     )

#     generated_ids = model.generate(
#         **model_inputs,
#         generation_config=generation_config
#     )

#     # Remove the input tokens from the output
#     generated_ids = [
#         output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
#     ]

#     # Decode the generated output and return the result
#     response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
#     return response

# # Create the Gradio interface
# iface = gr.Interface(
#     fn=solve_math,  # Function to call
#     inputs="text",  # Text input for the user prompt
#     outputs="text", # Text output for the model's response
#     title="Math Solver",  # App title
#     description="Provide a math problem and the model will solve it."
# )

# Launch the app
if __name__ == "__main__":
    iface.launch()