rohan13 commited on
Commit
c1fe6fb
1 Parent(s): 1cc9d33

map reduce to answer summary based questions

Browse files
Files changed (3) hide show
  1. app.py +20 -21
  2. grader.py +8 -12
  3. utils.py +64 -19
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import asyncio
 
2
  import os
3
  import time
4
- import glob
5
  import gradio as gr
6
  from dotenv import load_dotenv
7
  from langchain.chat_models import ChatOpenAI
@@ -17,7 +18,7 @@ pickle_file = "vector_stores/canvas-discussions.pkl"
17
  index_file = "vector_stores/canvas-discussions.index"
18
 
19
  grading_model = 'gpt-4'
20
- qa_model = 'gpt-3.5-turbo-16k'
21
 
22
  llm = ChatOpenAI(model_name=qa_model, temperature=0, verbose=True)
23
  embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
@@ -28,7 +29,6 @@ grader_qa = None
28
 
29
  def add_text(history, text):
30
  print("Question asked: " + text)
31
- get_grading_status(history)
32
  response = run_model(text)
33
  history = history + [(text, response)]
34
  print(history)
@@ -43,16 +43,16 @@ def run_model(text):
43
  sources = []
44
  for document in response['source_documents']:
45
  sources.append(str(document.metadata))
46
- print(sources)
47
 
48
  source = ','.join(set(sources))
49
- response = response['answer'] + '\nSources: ' + source
50
  end_time = time.time()
51
  # # If response contains string `SOURCES:`, then add a \n before `SOURCES`
52
  # if "SOURCES:" in response:
53
  # response = response.replace("SOURCES:", "\nSOURCES:")
54
  response = response + "\n\n" + "Time taken: " + str(end_time - start_time)
55
  print(response)
 
56
  print("Time taken: " + str(end_time - start_time))
57
  return response
58
 
@@ -68,16 +68,15 @@ def ingest(url, canvas_api_key, history):
68
  grader = Grader(grading_model)
69
  response = "Ingested canvas data successfully"
70
  history = history + [(text, response)]
71
- return get_grading_status(history)
 
72
 
73
- def start_grading(url, canvas_api_key, history):
74
  global grader, grader_qa
75
  text = f"Start grading discussions from {url}"
76
- if not url or not canvas_api_key:
77
- response = "Please enter all the fields to initiate grading"
78
- elif grader:
79
- if grader.llm.model_name != grading_model:
80
- grader = Grader(grading_model)
81
  # Create a new event loop
82
  loop = asyncio.new_event_loop()
83
  asyncio.set_event_loop(loop)
@@ -108,26 +107,28 @@ def get_first_message(history):
108
  global grader_qa
109
  history = [(None,
110
  'Get feedback on your canvas discussions. Add your discussion url and get your discussions graded in instantly.')]
111
- history = get_grading_status(history)
112
- return history
113
 
114
 
115
  def get_grading_status(history):
116
  global grader, grader_qa
117
  # Check if grading is complete
118
- if os.path.isdir('output') and len(glob.glob("docs/*.json")) > 0 and len(glob.glob("docs/*.html")) > 0:
 
119
  if not grader:
120
  grader = Grader(qa_model)
121
  grader_qa = GraderQA(grader, embeddings)
122
  elif not grader_qa:
123
  grader_qa = GraderQA(grader, embeddings)
124
- history = history + [(None, 'Grading is already complete. You can now ask questions')]
 
125
  enable_fields(False, False, False, False, True, True, True)
126
  # Check if data is ingested
127
  elif len(glob.glob("docs/*.json")) > 0 and len(glob.glob("docs/*.html")):
128
  if not grader_qa:
129
  grader = Grader(qa_model)
130
- history = history + [(None, 'Canvas data is already ingested. You can grade discussions now')]
 
131
  enable_fields(False, False, False, True, True, False, False)
132
  else:
133
  history = history + [(None, 'Please ingest data and start grading')]
@@ -157,7 +158,7 @@ def enable_fields(url_status, canvas_api_key_status, submit_status, grade_status
157
 
158
 
159
  def bot(history):
160
- return history
161
 
162
 
163
  with gr.Blocks() as demo:
@@ -196,7 +197,7 @@ with gr.Blocks() as demo:
196
  bot, chatbot, chatbot
197
  )
198
 
199
- grade.click(start_grading, inputs=[url, canvas_api_key, chatbot], outputs=[chatbot],
200
  postprocess=False).then(
201
  bot, chatbot, chatbot
202
  )
@@ -213,8 +214,6 @@ with gr.Blocks() as demo:
213
  bot, chatbot, chatbot
214
  )
215
 
216
- set_model(chatbot)
217
-
218
  if __name__ == "__main__":
219
  demo.queue()
220
  demo.queue(concurrency_count=5)
 
1
  import asyncio
2
+ import glob
3
  import os
4
  import time
5
+
6
  import gradio as gr
7
  from dotenv import load_dotenv
8
  from langchain.chat_models import ChatOpenAI
 
18
  index_file = "vector_stores/canvas-discussions.index"
19
 
20
  grading_model = 'gpt-4'
21
+ qa_model = 'gpt-4'
22
 
23
  llm = ChatOpenAI(model_name=qa_model, temperature=0, verbose=True)
24
  embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')
 
29
 
30
  def add_text(history, text):
31
  print("Question asked: " + text)
 
32
  response = run_model(text)
33
  history = history + [(text, response)]
34
  print(history)
 
43
  sources = []
44
  for document in response['source_documents']:
45
  sources.append(str(document.metadata))
 
46
 
47
  source = ','.join(set(sources))
48
+ response = response['answer'] + '\nSources: ' + str(len(sources))
49
  end_time = time.time()
50
  # # If response contains string `SOURCES:`, then add a \n before `SOURCES`
51
  # if "SOURCES:" in response:
52
  # response = response.replace("SOURCES:", "\nSOURCES:")
53
  response = response + "\n\n" + "Time taken: " + str(end_time - start_time)
54
  print(response)
55
+ print(sources)
56
  print("Time taken: " + str(end_time - start_time))
57
  return response
58
 
 
68
  grader = Grader(grading_model)
69
  response = "Ingested canvas data successfully"
70
  history = history + [(text, response)]
71
+ return history
72
+
73
 
74
+ def start_grading(history):
75
  global grader, grader_qa
76
  text = f"Start grading discussions from {url}"
77
+ if grader:
78
+ # if grader.llm.model_name != grading_model:
79
+ # grader = Grader(grading_model)
 
 
80
  # Create a new event loop
81
  loop = asyncio.new_event_loop()
82
  asyncio.set_event_loop(loop)
 
107
  global grader_qa
108
  history = [(None,
109
  'Get feedback on your canvas discussions. Add your discussion url and get your discussions graded in instantly.')]
110
+ return get_grading_status(history)
 
111
 
112
 
113
  def get_grading_status(history):
114
  global grader, grader_qa
115
  # Check if grading is complete
116
+ if os.path.isdir('output') and len(glob.glob("output/*.csv")) > 0 and len(glob.glob("docs/*.json")) > 0 and len(
117
+ glob.glob("docs/*.html")) > 0:
118
  if not grader:
119
  grader = Grader(qa_model)
120
  grader_qa = GraderQA(grader, embeddings)
121
  elif not grader_qa:
122
  grader_qa = GraderQA(grader, embeddings)
123
+ if len(history) == 1:
124
+ history = history + [(None, 'Grading is already complete. You can now ask questions')]
125
  enable_fields(False, False, False, False, True, True, True)
126
  # Check if data is ingested
127
  elif len(glob.glob("docs/*.json")) > 0 and len(glob.glob("docs/*.html")):
128
  if not grader_qa:
129
  grader = Grader(qa_model)
130
+ if len(history) == 1:
131
+ history = history + [(None, 'Canvas data is already ingested. You can grade discussions now')]
132
  enable_fields(False, False, False, True, True, False, False)
133
  else:
134
  history = history + [(None, 'Please ingest data and start grading')]
 
158
 
159
 
160
  def bot(history):
161
+ return get_grading_status(history)
162
 
163
 
164
  with gr.Blocks() as demo:
 
197
  bot, chatbot, chatbot
198
  )
199
 
200
+ grade.click(start_grading, inputs=[chatbot], outputs=[chatbot],
201
  postprocess=False).then(
202
  bot, chatbot, chatbot
203
  )
 
214
  bot, chatbot, chatbot
215
  )
216
 
 
 
217
  if __name__ == "__main__":
218
  demo.queue()
219
  demo.queue(concurrency_count=5)
grader.py CHANGED
@@ -2,23 +2,19 @@ import asyncio
2
  import csv
3
  import glob
4
  import json
 
5
  import shutil
6
  from datetime import datetime
7
  from typing import Optional
8
 
9
  from langchain import PromptTemplate
10
- from langchain.chains import LLMChain, MapReduceChain
11
- from langchain.chains.combine_documents.map_reduce import MapReduceDocumentsChain, ReduceDocumentsChain
12
- from langchain.chains.combine_documents.stuff import StuffDocumentsChain
13
- from langchain.chains.summarize import load_summarize_chain
14
  from langchain.chat_models import ChatOpenAI
15
  from langchain.document_loaders import DirectoryLoader, UnstructuredHTMLLoader
16
  from langchain.output_parsers import PydanticOutputParser
17
- from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, Language
18
  from pathvalidate import sanitize_filename
19
  from pydantic import BaseModel, Field
20
  from tqdm import tqdm
21
- import os
22
 
23
 
24
  class Grader:
@@ -69,16 +65,16 @@ class Grader:
69
  self.title = None # Initialize title
70
  for r in rubric:
71
  if 'description' in r and 'ratings' in r:
72
- rubric_text.append(f"description:{r['description']}\n" + "\n".join(
73
- [f"points:{rating['points']} points: {rating['description']}" for rating in r['ratings']]))
74
  elif 'points_possible' in r:
75
- rubric_text.append(f"points_possible:{r['points_possible']}")
76
  print("added points_possible")
77
  elif 'title' in r: # Check if title exists in rubric
78
  self.title = r['title'] # Save title for later use
79
- rubric_text.append(f"title:{self.title}")
80
  elif 'instruction' in r:
81
- rubric_text.append(f"instruction:{r['instruction']}")
82
 
83
  rubric_text = "\n".join(rubric_text)
84
  # print(rubric_text) Add this to log when moving to application
@@ -100,7 +96,7 @@ class Grader:
100
  def create_reduce_prompt(self):
101
  reduce_template_string = f"""I am a Canvas Discussion Grader! I am here to grade the following summarized sections of canvas discussion responses of the student on the basis of instructions and rubric provided.
102
  --------------------
103
- To grade student discussion, I will follow the rubric below. I will not deviate from the grading scheme.
104
  {self.rubric_text}
105
  --------------------
106
  I will be able to identify each student by name, their key interests, key features pertinent to the discussion intruction and rubric.
 
2
  import csv
3
  import glob
4
  import json
5
+ import os
6
  import shutil
7
  from datetime import datetime
8
  from typing import Optional
9
 
10
  from langchain import PromptTemplate
11
+ from langchain.chains import LLMChain
 
 
 
12
  from langchain.chat_models import ChatOpenAI
13
  from langchain.document_loaders import DirectoryLoader, UnstructuredHTMLLoader
14
  from langchain.output_parsers import PydanticOutputParser
 
15
  from pathvalidate import sanitize_filename
16
  from pydantic import BaseModel, Field
17
  from tqdm import tqdm
 
18
 
19
 
20
  class Grader:
 
65
  self.title = None # Initialize title
66
  for r in rubric:
67
  if 'description' in r and 'ratings' in r:
68
+ rubric_text.append(f"RUBRIC CATEGORY: {r['description']}\n" + "\n".join(
69
+ [f"POINTS: {rating['points']} CRITERIA: {rating['description']}" for rating in r['ratings']]))
70
  elif 'points_possible' in r:
71
+ rubric_text.append(f"MAX POINTS POSSIBLE: {r['points_possible']}")
72
  print("added points_possible")
73
  elif 'title' in r: # Check if title exists in rubric
74
  self.title = r['title'] # Save title for later use
75
+ rubric_text.append(f"TITLE: {self.title}")
76
  elif 'instruction' in r:
77
+ rubric_text.append(f"DISCUSSION INSTRUCTIONS: {r['instruction']}")
78
 
79
  rubric_text = "\n".join(rubric_text)
80
  # print(rubric_text) Add this to log when moving to application
 
96
  def create_reduce_prompt(self):
97
  reduce_template_string = f"""I am a Canvas Discussion Grader! I am here to grade the following summarized sections of canvas discussion responses of the student on the basis of instructions and rubric provided.
98
  --------------------
99
+ To grade student discussion, I will use the discussion instructions and rubric below. I will not deviate from the grading scheme.
100
  {self.rubric_text}
101
  --------------------
102
  I will be able to identify each student by name, their key interests, key features pertinent to the discussion intruction and rubric.
utils.py CHANGED
@@ -2,12 +2,11 @@ import os
2
 
3
  from langchain import FAISS
4
  from langchain.chains import ConversationalRetrievalChain
5
- from langchain.document_loaders import DirectoryLoader, UnstructuredHTMLLoader, TextLoader, CSVLoader
6
- from langchain.memory import ConversationSummaryBufferMemory
 
7
  from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
8
- from langchain.text_splitter import RecursiveCharacterTextSplitter, Language
9
-
10
- from grader import Grader
11
 
12
 
13
  def search_index_from_docs(source_chunks, embeddings):
@@ -86,28 +85,74 @@ class GraderQA:
86
  def create_chain(self, embeddings):
87
  if not self.search_index:
88
  self.search_index = self.load_index(embeddings)
89
- chain = ConversationalRetrievalChain.from_llm(self.llm, self.search_index.as_retriever(search_type='mmr',
90
- search_kwargs={'lambda_mult': 1,
91
- 'fetch_k': 50,
92
- 'k': 30}),
 
 
 
 
 
93
  return_source_documents=True,
94
  verbose=True,
95
- memory=ConversationSummaryBufferMemory(memory_key='chat_history',
96
- llm=self.llm,
97
- max_token_limit=40,
98
- return_messages=True,
99
- output_key='answer'),
100
- get_chat_history=get_chat_history,
101
- combine_docs_chain_kwargs={"prompt": self.create_prompt()})
102
  return chain
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  def create_prompt(self):
105
- system_template = f"""You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the following questions as best you can.
106
- You are a grading assistant who graded the canvas discussions to create the following grading results and feedback. Use the following pieces of the grading results and feedback to answer the users question.
107
- Use the following pieces of context to answer the users question.
108
  ----------------
109
  {self.rubric_text}
110
  ----------------
 
111
  {{context}}"""
112
  messages = [
113
  SystemMessagePromptTemplate.from_template(system_template),
 
2
 
3
  from langchain import FAISS
4
  from langchain.chains import ConversationalRetrievalChain
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain.document_loaders import CSVLoader
7
+ from langchain.memory import ConversationBufferMemory
8
  from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
10
 
11
 
12
  def search_index_from_docs(source_chunks, embeddings):
 
85
  def create_chain(self, embeddings):
86
  if not self.search_index:
87
  self.search_index = self.load_index(embeddings)
88
+
89
+ question_prompt, combine_prompt = self.create_map_reduce_prompt()
90
+
91
+ chain = ConversationalRetrievalChain.from_llm(llm=self.llm, chain_type='map_reduce',
92
+ retriever=self.search_index.as_retriever(search_type='mmr',
93
+ search_kwargs={
94
+ 'lambda_mult': 1,
95
+ 'fetch_k': 50,
96
+ 'k': 30}),
97
  return_source_documents=True,
98
  verbose=True,
99
+ memory=ConversationBufferMemory(memory_key='chat_history',
100
+ return_messages=True,
101
+ output_key='answer'),
102
+ condense_question_llm=ChatOpenAI(temperature=0,
103
+ model='gpt-3.5-turbo'),
104
+ combine_docs_chain_kwargs={"question_prompt": question_prompt,
105
+ "combine_prompt": combine_prompt})
106
  return chain
107
 
108
+ def create_map_reduce_prompt(self):
109
+ system_template = f"""Use the following portion of a long grading results document to answer the question BUT ONLY FOR THE STUDENT MENTIONED. Use the following examples to take guidance on how to answer the question.
110
+ Examples:
111
+ Question: How many students participated in the discussion?
112
+ Answer: This student participated in the discussion./This student did not participate in the discussion.
113
+ Question: What was the average score for the discussion?
114
+ Answer: This student received a score of 10/10 for the discussion.
115
+ Question: How many students received a full score?/How many students did not receive a full score?
116
+ Answer: This student received a full score./This student did not receive a full score.
117
+ Question: How many students lost marks in X category of the rubric?
118
+ Answer: This student lost marks in X category of the rubric./This student did not lose marks in X category of the rubric.
119
+
120
+
121
+ ______________________
122
+ Grading Result For:
123
+ {{context}}
124
+ ______________________
125
+ Following are the instructions and rubric of the discussion post for reference, used to grade the discussion.
126
+ ----------------
127
+ Instructions and Rubric:
128
+ {self.rubric_text}
129
+ """
130
+ messages = [
131
+ SystemMessagePromptTemplate.from_template(system_template),
132
+ HumanMessagePromptTemplate.from_template("{question}"),
133
+ ]
134
+ CHAT_QUESTION_PROMPT = ChatPromptTemplate.from_messages(messages)
135
+ system_template = """You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the questions about the grading results, feedback, answers as accurately as possible.
136
+ Use the following answers for each student to answer the users question as accurately as possible.
137
+ You are an expert at basic calculations and answering questions on grading results and can answer the following questions with ease.
138
+ If you don't know the answer, just say that you don't know. Don't try to make up an answer.
139
+ ______________________
140
+ {summaries}"""
141
+ messages = [
142
+ SystemMessagePromptTemplate.from_template(system_template),
143
+ HumanMessagePromptTemplate.from_template("{question}"),
144
+ ]
145
+ CHAT_COMBINE_PROMPT = ChatPromptTemplate.from_messages(messages)
146
+ return CHAT_QUESTION_PROMPT, CHAT_COMBINE_PROMPT
147
+
148
  def create_prompt(self):
149
+ system_template = f"""You are Canvas Discussions Grading + Feedback QA Bot. Have a conversation with a human, answering the questions about the grading results, feedback, answers as accurately as possible.
150
+ You are a grading assistant who graded the canvas discussions to create the following grading results and feedback.
151
+ Use the following instruction, rubric of the discussion which were used to grade the discussions and refine the answer if needed.
152
  ----------------
153
  {self.rubric_text}
154
  ----------------
155
+ Use the following pieces of the grading results, score, feedback and summary of student responses to answer the users question as accurately as possible.
156
  {{context}}"""
157
  messages = [
158
  SystemMessagePromptTemplate.from_template(system_template),