Spaces:

rohan13
/

canvas-discussion-grader-with-feedback

Runtime error

App Files Files Community

rohan13 commited on Jul 21, 2023

Commit

c1fe6fb

•

1 Parent(s): 1cc9d33

map reduce to answer summary based questions

Browse files

Files changed (3) hide show

app.py +20 -21
grader.py +8 -12
utils.py +64 -19

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import asyncio
 import os
 import time
-import glob
 import gradio as gr
 from dotenv import load_dotenv
 from langchain.chat_models import ChatOpenAI
@@ -17,7 +18,7 @@ pickle_file = "vector_stores/canvas-discussions.pkl"
 index_file = "vector_stores/canvas-discussions.index"
 grading_model = 'gpt-4'
-qa_model = 'gpt-3.5-turbo-16k'
 llm = ChatOpenAI(model_name=qa_model, temperature=0, verbose=True)
 embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
@@ -28,7 +29,6 @@ grader_qa = None
 def add_text(history, text):
     print("Question asked: " + text)
-    get_grading_status(history)
     response = run_model(text)
     history = history + [(text, response)]
     print(history)
@@ -43,16 +43,16 @@ def run_model(text):
     sources = []
     for document in response['source_documents']:
         sources.append(str(document.metadata))
-        print(sources)
     source = ','.join(set(sources))
-    response = response['answer'] + '\nSources: ' + source
     end_time = time.time()
     # # If response contains string `SOURCES:`, then add a \n before `SOURCES`
     # if "SOURCES:" in response:
     #     response = response.replace("SOURCES:", "\nSOURCES:")
     response = response + "\n\n" + "Time taken: " + str(end_time - start_time)
     print(response)
     print("Time taken: " + str(end_time - start_time))
     return response
@@ -68,16 +68,15 @@ def ingest(url, canvas_api_key, history):
     grader = Grader(grading_model)
     response = "Ingested canvas data successfully"
     history = history + [(text, response)]
-    return get_grading_status(history)
-def start_grading(url, canvas_api_key, history):
     global grader, grader_qa
     text = f"Start grading discussions from {url}"
-    if not url or not canvas_api_key:
-        response = "Please enter all the fields to initiate grading"
-    elif grader:
-        if grader.llm.model_name != grading_model:
-            grader = Grader(grading_model)
         # Create a new event loop
         loop = asyncio.new_event_loop()
         asyncio.set_event_loop(loop)
@@ -108,26 +107,28 @@ def get_first_message(history):
     global grader_qa
     history = [(None,
                 'Get feedback on your canvas discussions. Add your discussion url and get your discussions graded in instantly.')]
-    history = get_grading_status(history)
-    return history
 def get_grading_status(history):
     global grader, grader_qa
     # Check if grading is complete
-    if os.path.isdir('output') and len(glob.glob("docs/*.json")) > 0 and len(glob.glob("docs/*.html")) > 0:
         if not grader:
             grader = Grader(qa_model)
             grader_qa = GraderQA(grader, embeddings)
         elif not grader_qa:
             grader_qa = GraderQA(grader, embeddings)
-        history = history + [(None, 'Grading is already complete. You can now ask questions')]
         enable_fields(False, False, False, False, True, True, True)
     # Check if data is ingested
     elif len(glob.glob("docs/*.json")) > 0 and len(glob.glob("docs/*.html")):
         if not grader_qa:
             grader = Grader(qa_model)
-        history = history + [(None, 'Canvas data is already ingested. You can grade discussions now')]
         enable_fields(False, False, False, True, True, False, False)
     else:
         history = history + [(None, 'Please ingest data and start grading')]
@@ -157,7 +158,7 @@ def enable_fields(url_status, canvas_api_key_status, submit_status, grade_status
 def bot(history):
-    return history
 with gr.Blocks() as demo:
@@ -196,7 +197,7 @@ with gr.Blocks() as demo:
         bot, chatbot, chatbot
     )
-    grade.click(start_grading, inputs=[url, canvas_api_key, chatbot], outputs=[chatbot],
                 postprocess=False).then(
         bot, chatbot, chatbot
     )
@@ -213,8 +214,6 @@ with gr.Blocks() as demo:
         bot, chatbot, chatbot
     )
-    set_model(chatbot)
 if __name__ == "__main__":
     demo.queue()
     demo.queue(concurrency_count=5)

 import asyncio
+import glob
 import os
 import time
 import gradio as gr
 from dotenv import load_dotenv
 from langchain.chat_models import ChatOpenAI
 index_file = "vector_stores/canvas-discussions.index"
 grading_model = 'gpt-4'
+qa_model = 'gpt-4'
 llm = ChatOpenAI(model_name=qa_model, temperature=0, verbose=True)
 embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
 def add_text(history, text):
     print("Question asked: " + text)
     response = run_model(text)
     history = history + [(text, response)]
     print(history)
     sources = []
     for document in response['source_documents']:
         sources.append(str(document.metadata))
     source = ','.join(set(sources))
+    response = response['answer'] + '\nSources: ' + str(len(sources))
     end_time = time.time()
     # # If response contains string `SOURCES:`, then add a \n before `SOURCES`
     # if "SOURCES:" in response:
     #     response = response.replace("SOURCES:", "\nSOURCES:")
     response = response + "\n\n" + "Time taken: " + str(end_time - start_time)
     print(response)
+    print(sources)
     print("Time taken: " + str(end_time - start_time))
     return response
     grader = Grader(grading_model)
     response = "Ingested canvas data successfully"
     history = history + [(text, response)]
+    return history
+def start_grading(history):
     global grader, grader_qa
     text = f"Start grading discussions from {url}"
+    if grader:
+        # if grader.llm.model_name != grading_model:
+        #     grader = Grader(grading_model)
         # Create a new event loop
         loop = asyncio.new_event_loop()
         asyncio.set_event_loop(loop)
     global grader_qa
     history = [(None,
                 'Get feedback on your canvas discussions. Add your discussion url and get your discussions graded in instantly.')]
+    return get_grading_status(history)
 def get_grading_status(history):
     global grader, grader_qa
     # Check if grading is complete
+    if os.path.isdir('output') and len(glob.glob("output/*.csv")) > 0 and len(glob.glob("docs/*.json")) > 0 and len(
+            glob.glob("docs/*.html")) > 0:
         if not grader:
             grader = Grader(qa_model)
             grader_qa = GraderQA(grader, embeddings)
         elif not grader_qa:
             grader_qa = GraderQA(grader, embeddings)
+        if len(history) == 1:
+            history = history + [(None, 'Grading is already complete. You can now ask questions')]
         enable_fields(False, False, False, False, True, True, True)
     # Check if data is ingested
     elif len(glob.glob("docs/*.json")) > 0 and len(glob.glob("docs/*.html")):
         if not grader_qa:
             grader = Grader(qa_model)
+        if len(history) == 1:
+            history = history + [(None, 'Canvas data is already ingested. You can grade discussions now')]
         enable_fields(False, False, False, True, True, False, False)
     else:
         history = history + [(None, 'Please ingest data and start grading')]
 def bot(history):
+    return get_grading_status(history)
 with gr.Blocks() as demo:
         bot, chatbot, chatbot
     )
+    grade.click(start_grading, inputs=[chatbot], outputs=[chatbot],
                 postprocess=False).then(
         bot, chatbot, chatbot
     )
         bot, chatbot, chatbot
     )
 if __name__ == "__main__":
     demo.queue()
     demo.queue(concurrency_count=5)

grader.py CHANGED Viewed

@@ -2,23 +2,19 @@ import asyncio
 import csv
 import glob
 import json
 import shutil
 from datetime import datetime
 from typing import Optional
 from langchain import PromptTemplate
-from langchain.chains import LLMChain, MapReduceChain
-from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain, ReduceDocumentsChain
-from langchain.chains.combine_documents.stuff import StuffDocumentsChain
-from langchain.chains.summarize import load_summarize_chain
 from langchain.chat_models import ChatOpenAI
 from langchain.document_loaders import DirectoryLoader, UnstructuredHTMLLoader
 from langchain.output_parsers import PydanticOutputParser
-from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, Language
 from pathvalidate import sanitize_filename
 from pydantic import BaseModel, Field
 from tqdm import tqdm
-import os
 class Grader:
@@ -69,16 +65,16 @@ class Grader:
         self.title = None  # Initialize title
         for r in rubric:
             if 'description' in r and 'ratings' in r:
-                rubric_text.append(f"description:{r['description']}\n" + "\n".join(
-                    [f"points:{rating['points']} points: {rating['description']}" for rating in r['ratings']]))
             elif 'points_possible' in r:
-                rubric_text.append(f"points_possible:{r['points_possible']}")
                 print("added points_possible")
             elif 'title' in r:  # Check if title exists in rubric
                 self.title = r['title']  # Save title for later use
-                rubric_text.append(f"title:{self.title}")
             elif 'instruction' in r:
-                rubric_text.append(f"instruction:{r['instruction']}")
         rubric_text = "\n".join(rubric_text)
         # print(rubric_text) Add this to log when moving to application
@@ -100,7 +96,7 @@ class Grader:
     def create_reduce_prompt(self):
         reduce_template_string = f"""I am a Canvas Discussion Grader! I am here to grade the following summarized sections of canvas discussion responses of the student on the basis of instructions and rubric provided.
         --------------------
-        To grade student discussion, I will follow the rubric below. I will not deviate from the grading scheme.
         {self.rubric_text}
         --------------------
         I will be able to identify each student by name, their key interests, key features pertinent to the discussion intruction and rubric.

 import csv
 import glob
 import json
+import os
 import shutil
 from datetime import datetime
 from typing import Optional
 from langchain import PromptTemplate
+from langchain.chains import LLMChain
 from langchain.chat_models import ChatOpenAI
 from langchain.document_loaders import DirectoryLoader, UnstructuredHTMLLoader
 from langchain.output_parsers import PydanticOutputParser
 from pathvalidate import sanitize_filename
 from pydantic import BaseModel, Field
 from tqdm import tqdm
 class Grader:
         self.title = None  # Initialize title
         for r in rubric:
             if 'description' in r and 'ratings' in r:
+                rubric_text.append(f"RUBRIC CATEGORY: {r['description']}\n" + "\n".join(
+                    [f"POINTS: {rating['points']} CRITERIA: {rating['description']}" for rating in r['ratings']]))
             elif 'points_possible' in r:
+                rubric_text.append(f"MAX POINTS POSSIBLE: {r['points_possible']}")
                 print("added points_possible")
             elif 'title' in r:  # Check if title exists in rubric
                 self.title = r['title']  # Save title for later use
+                rubric_text.append(f"TITLE: {self.title}")
             elif 'instruction' in r:
+                rubric_text.append(f"DISCUSSION INSTRUCTIONS: {r['instruction']}")
         rubric_text = "\n".join(rubric_text)
         # print(rubric_text) Add this to log when moving to application
     def create_reduce_prompt(self):
         reduce_template_string = f"""I am a Canvas Discussion Grader! I am here to grade the following summarized sections of canvas discussion responses of the student on the basis of instructions and rubric provided.
         --------------------
+        To grade student discussion, I will use the discussion instructions and rubric below. I will not deviate from the grading scheme.
         {self.rubric_text}
         --------------------
         I will be able to identify each student by name, their key interests, key features pertinent to the discussion intruction and rubric.

utils.py CHANGED Viewed

@@ -2,12 +2,11 @@ import os
 from langchain import FAISS
 from langchain.chains import ConversationalRetrievalChain
-from langchain.document_loaders import DirectoryLoader, UnstructuredHTMLLoader, TextLoader, CSVLoader
-from langchain.memory import ConversationSummaryBufferMemory
 from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
-from langchain.text_splitter import RecursiveCharacterTextSplitter, Language
-from grader import Grader
 def search_index_from_docs(source_chunks, embeddings):
@@ -86,28 +85,74 @@ class GraderQA:
     def create_chain(self, embeddings):
         if not self.search_index:
             self.search_index = self.load_index(embeddings)
-        chain = ConversationalRetrievalChain.from_llm(self.llm, self.search_index.as_retriever(search_type='mmr',
-                                                                                search_kwargs={'lambda_mult': 1,
-                                                                                               'fetch_k': 50,
-                                                                                               'k': 30}),
                                                       return_source_documents=True,
                                                       verbose=True,
-                                                      memory=ConversationSummaryBufferMemory(memory_key='chat_history',
-                                                                                             llm=self.llm,
-                                                                                             max_token_limit=40,
-                                                                                             return_messages=True,
-                                                                                             output_key='answer'),
-                                                      get_chat_history=get_chat_history,
-                                                      combine_docs_chain_kwargs={"prompt": self.create_prompt()})
         return chain
     def create_prompt(self):
-        system_template = f"""You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the following questions as best you can.
-        You are a grading assistant who graded the canvas discussions to create the following grading results and feedback. Use the following pieces of the grading results and feedback to answer the users question.
-        Use the following pieces of context to answer the users question.
         ----------------
         {self.rubric_text}
         ----------------
         {{context}}"""
         messages = [
             SystemMessagePromptTemplate.from_template(system_template),

 from langchain import FAISS
 from langchain.chains import ConversationalRetrievalChain
+from langchain.chat_models import ChatOpenAI
+from langchain.document_loaders import CSVLoader
+from langchain.memory import ConversationBufferMemory
 from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 def search_index_from_docs(source_chunks, embeddings):
     def create_chain(self, embeddings):
         if not self.search_index:
             self.search_index = self.load_index(embeddings)
+        question_prompt, combine_prompt = self.create_map_reduce_prompt()
+        chain = ConversationalRetrievalChain.from_llm(llm=self.llm, chain_type='map_reduce',
+                                                      retriever=self.search_index.as_retriever(search_type='mmr',
+                                                                                               search_kwargs={
+                                                                                                   'lambda_mult': 1,
+                                                                                                   'fetch_k': 50,
+                                                                                                   'k': 30}),
                                                       return_source_documents=True,
                                                       verbose=True,
+                                                      memory=ConversationBufferMemory(memory_key='chat_history',
+                                                                                      return_messages=True,
+                                                                                      output_key='answer'),
+                                                      condense_question_llm=ChatOpenAI(temperature=0,
+                                                                                       model='gpt-3.5-turbo'),
+                                                      combine_docs_chain_kwargs={"question_prompt": question_prompt,
+                                                                                 "combine_prompt": combine_prompt})
         return chain
+    def create_map_reduce_prompt(self):
+        system_template = f"""Use the following portion of a long grading results document to answer the question BUT ONLY FOR THE STUDENT MENTIONED. Use the following examples to take guidance on how to answer the question.
+        Examples:
+        Question: How many students participated in the discussion?
+        Answer: This student participated in the discussion./This student did not participate in the discussion.
+        Question: What was the average score for the discussion?
+        Answer: This student received a score of 10/10 for the discussion.
+        Question: How many students received a full score?/How many students did not receive a full score?
+        Answer: This student received a full score./This student did not receive a full score.
+        Question: How many students lost marks in X category of the rubric?
+        Answer: This student lost marks in X category of the rubric./This student did not lose marks in X category of the rubric.
+        ______________________
+        Grading Result For:
+        {{context}}
+        ______________________
+        Following are the instructions and rubric of the discussion post for reference, used to grade the discussion.
+        ----------------
+        Instructions and Rubric:
+        {self.rubric_text}
+        """
+        messages = [
+            SystemMessagePromptTemplate.from_template(system_template),
+            HumanMessagePromptTemplate.from_template("{question}"),
+        ]
+        CHAT_QUESTION_PROMPT = ChatPromptTemplate.from_messages(messages)
+        system_template = """You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the questions about the grading results, feedback, answers as accurately as possible.
+        Use the following answers for each student to answer the users question as accurately as possible.
+        You are an expert at basic calculations and answering questions on grading results and can answer the following questions with ease.
+        If you don't know the answer, just say that you don't know. Don't try to make up an answer.
+        ______________________
+        {summaries}"""
+        messages = [
+            SystemMessagePromptTemplate.from_template(system_template),
+            HumanMessagePromptTemplate.from_template("{question}"),
+        ]
+        CHAT_COMBINE_PROMPT = ChatPromptTemplate.from_messages(messages)
+        return CHAT_QUESTION_PROMPT, CHAT_COMBINE_PROMPT
     def create_prompt(self):
+        system_template = f"""You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the questions about the grading results, feedback, answers as accurately as possible.
+        You are a grading assistant who graded the canvas discussions to create the following grading results and feedback.
+        Use the following instruction, rubric of the discussion which were used to grade the discussions and refine the answer if needed.
         ----------------
         {self.rubric_text}
         ----------------
+        Use the following pieces of the grading results, score, feedback and summary of student responses to answer the users question as accurately as possible.
         {{context}}"""
         messages = [
             SystemMessagePromptTemplate.from_template(system_template),