import os import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # טוען את המודל וה-tokenizer tokenizer = AutoTokenizer.from_pretrained('dicta-il/dictalm-7b-instruct') model = AutoModelForCausalLM.from_pretrained('dicta-il/dictalm-7b-instruct', trust_remote_code=True).cuda() # הגדרת הפונקציה לצ'אט עם המודל def chat_with_model(prompt): model.eval() with torch.inference_mode(): kwargs = dict( inputs=tokenizer(prompt, return_tensors='pt').input_ids.to(model.device), do_sample=True, top_k=50, top_p=0.95, temperature=0.75, max_length=100, min_new_tokens=5 ) output = model.generate(**kwargs) response_text = tokenizer.batch_decode(output, skip_special_tokens=True)[0] return response_text # יצירת ממשק עם Gradio interface = gr.Interface(fn=chat_with_model, inputs="text", outputs="text", title="Chat with DictaLM Model") interface.launch()