orangepro_app / app.py
akadhim-ai's picture
Update app.py
9547287
raw
history blame
3.89 kB
import streamlit as st
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
# Define a function to load PDF and perform processing
def process_pdf(pdf_path):
pdfreader = PdfReader(pdf_path)
raw_text = ''
for page in pdfreader.pages:
content = page.extract_text()
if content:
raw_text += content
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=800,
chunk_overlap=100,
length_function=len,
)
texts = text_splitter.split_text(raw_text)
embeddings = OpenAIEmbeddings()
document_search = FAISS.from_texts(texts, embeddings)
chain = load_qa_chain(OpenAI(), chain_type="stuff")
return document_search, chain
# Function to get yes/no emoji based on answer content
def get_answer_emoji(answer):
answer = answer.lower()
if "yes" in answer:
return "βœ…"
elif "no" in answer:
return "❌"
else:
return "🟑"
# Streamlit UI
st.title("OrangePro.AI LLM Output Testing")
# Upload a PDF file
uploaded_pdf_file = st.file_uploader("Upload a PDF file for analysis", type=["pdf"])
uploaded_text_file = st.file_uploader("Upload a text file with questions (if available)", type=["txt"])
if uploaded_pdf_file:
st.subheader("Selected PDF Content")
# Display the content of the PDF
pdf_reader, qa_chain = process_pdf(uploaded_pdf_file)
st.write("PDF Content:")
st.text(pdf_reader)
if uploaded_text_file:
st.warning("Questions will be extracted from the uploaded text file. Disabling question input below.")
text_content = uploaded_text_file.read().decode('utf-8') # Decode bytes to string
questions = text_content.splitlines()
else:
# Allow the user to enter a list of questions
questions = st.text_area("Enter a list of questions (one per line):").split('\n')
if st.button("Analyze Questions"):
# Perform question answering for each question
st.subheader("Answers:")
answer_summary = []
yes_count = 0
total_questions = len(questions)
for question in questions:
if question.strip() == "":
continue
docs = pdf_reader.similarity_search(question)
answer = qa_chain.run(input_documents=docs, question=question)
emoji = get_answer_emoji(answer)
answer_summary.append([question, answer, emoji])
if emoji == "βœ…":
yes_count += 1
# Calculate and display the percentage of "yes" answers
if total_questions > 0:
yes_percentage = (yes_count / total_questions) * 100
else:
yes_percentage = 0
answer_summary.append(["Output Quality (βœ…/Total Questions)", f"{yes_percentage:.2f}%", ""])
# Display the summary in a table
st.table(answer_summary)
# About section
st.sidebar.title("About OrangePro AI")
st.sidebar.info(
"OrangePro AI is an artificial intelligence testing and benchmarking platform for large language models (LLMs). It scores model performance based on real-world scenarios, allowing corporate clients such as Fortune 500 companies to choose the best model for their specific use cases."
"\n\n"
"The platform automates scoring, ranking model performance in real-world scenarios and key criteria like hallucinations and safety. OrangePro AI also automatically generates adversarial test suites at a large scale and benchmarks models to help customers identify the best model for specific use cases."
)
# Footer
st.sidebar.text("Powered by Streamlit and Langchain")