Spaces:
Runtime error
Runtime error
import gradio as gr | |
import random | |
import time | |
from transformers import AutoModelForSequenceClassification | |
import os | |
with gr.Blocks() as demo: | |
# Instalar accelerate | |
os.system("pip install accelerate") | |
# Instalar bitsandbytes | |
os.system("pip install -i https://pypi.org/simple/ bitsandbytes") | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox() | |
clear = gr.ClearButton([msg, chatbot]) | |
model = AutoModelForSequenceClassification.from_pretrained("./modelo") | |
tokenizer = AutoTokenizer.from_pretrained("./tokenizer") | |
query_pipeline = transformers.pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
torch_dtype=torch.float16, | |
device_map="auto", max_new_tokens=200) | |
vectordb = Chroma.load(persist_directory="./chroma_db") | |
def test_rag(pipeline, query): | |
docs = vectordb.similarity_search_with_score(query) | |
context = [] | |
for doc,score in docs: | |
if(score<7): | |
doc_details = doc.to_json()['kwargs'] | |
context.append( doc_details['page_content']) | |
if(len(context)!=0): | |
messages = [{"role": "user", "content": "Basándote en la siguiente información: " + "\n".join(context) + "\n Responde en castellano a la pregunta: " + query}] | |
prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95) | |
answer = outputs[0]["generated_text"] | |
return answer[answer.rfind("[/INST]")+8:],docs | |
else: | |
return "No tengo información para responder a esta pregunta",docs | |
def respond(message, chat_history): | |
query = message | |
answer, docs = test_rag(query_pipeline, query) | |
chat_history.append((message, answer)) | |
time.sleep(2) | |
return "", chat_history | |
msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
if __name__ == "__main__": | |
demo.launch() | |