Spaces:

zahraghamari
/

QA

Sleeping

App Files Files Community

QA / app.py

zahraghamari

update app.py

1f05fb1 2 months ago

raw

history blame contribute delete

3.7 kB

	import os
	os.environ['KMP_DUPLICATE_LIB_OK']='True'

	import streamlit as st
	import pandas as pd
	from transformers import AutoTokenizer, AutoModel
	from datasets import Dataset, DatasetDict
	from groq import Groq
	from huggingface_hub import snapshot_download

	st.set_page_config(page_title="Q&A Demo")
	# Load model and tokenizer
	model_ckpt = "intfloat/multilingual-e5-small"
	tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
	semantic_model = AutoModel.from_pretrained(model_ckpt)

	# Define functions for getting embeddings and answering questions
	@st.cache_resource
	def load_model():
	snapshot_download(repo_id="zahraghamari/QA", local_dir='./', repo_type='model')
	snapshot_download(repo_id="zahraghamari/MeDiaPQA", local_dir='./', repo_type='dataset')
	dataset = pd.read_csv('MeDiaPQA.csv', sep=';')
	dataset = Dataset.from_pandas(dataset)
	index = dataset.load_faiss_index('embeddings', 'Dindex.faiss')
	return index, dataset
	index, dataset = load_model()

	def cls_pooling(model_output):
	return model_output.last_hidden_state[:, 0]

	def get_embeddings(text_list):
	encoded_input = tokenizer(text_list, padding=True, truncation=True, return_tensors="pt")
	encoded_input = {k: v for k, v in encoded_input.items()}
	model_output = semantic_model(**encoded_input)
	return cls_pooling(model_output).detach().cpu().numpy()

	def answer_by_llm(question, context):
	messages = [
	{"role": "system", "content": 'you are helpful Assistant'},
	{"role": "user", "content": prompt_template.format(context=context, question=question)},
	]
	response = client.chat.completions.create(
	model=GPT_MODEL,
	messages=messages,
	temperature=0
	)
	response_message = response.choices[0].message.content
	print(response_message)
	return response_message

	# Load dataset and index
	# ds = pd.read_csv('MeDiaPQA.csv', sep=';')
	# ds = Dataset.from_pandas(ds)
	# ds.load_faiss_index('embeddings', 'Dindex.faiss')

	# Set up Groq client
	GPT_MODEL = "llama3-70b-8192"
	OPENAI_TOKEN = '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'
	client = Groq(api_key=OPENAI_TOKEN)

	# Define prompt template
	prompt_template = """Use the following context to answer the medical question at the end.
	If you don't know the answer, just say that you don't know, and don't try to make up an answer.
	Use context and keep the answer as concise as possible.
	If the question is not related to medical and medicine, such as the capital of countries or other unrelated topics, do not answer and say "پاسخ این سوال را نمی‌دانم".
	If asked who you are, respond: "من دستیار پزشکی فارسی هستم که در دانشکده ریاضی دانشگاه فردوسی توسعه یافته‌ام."

	{context}

	Question: {question}

	Answer in Persian:"""

	# Define function to get response from model
	def get_response(question):
	question_embedding = get_embeddings([question])
	scores, samples = dataset.get_nearest_examples("embeddings", question_embedding, k=5)
	samples_df = pd.DataFrame.from_dict(samples)
	samples_df["scores"] = scores
	samples_df.sort_values("scores", ascending=False, inplace=True)
	context = ''
	for _, row in samples_df.iterrows():
	context = context + '\n' + ''.join(row.Q + row.A)
	return answer_by_llm(question, context)

	# Initialize Streamlit app

	st.header("برنامه‌ی پرسش و پاسخ")

	# Get user input and display response
	input_text = st.text_input("سوالت را وارد کن: ", key="input")
	submit_button = st.button("بپرس")
	if submit_button:
	response = get_response(input_text)
	st.subheader("پاسخ")
	st.write(response)