RAMYASRI-39 commited on
Commit
be3f613
1 Parent(s): b213bee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +230 -230
app.py CHANGED
@@ -1,230 +1,230 @@
1
- # Importing libraries
2
- import pandas as pd
3
- import json
4
- import gradio as gr
5
- from pathlib import Path
6
- from ragatouille import RAGPretrainedModel
7
- from gradio_client import Client
8
- from tempfile import NamedTemporaryFile
9
- from sentence_transformers import CrossEncoder
10
- import numpy as np
11
- from time import perf_counter
12
- from sentence_transformers import CrossEncoder
13
-
14
- #calling functions from other files - to call the knowledge database tables (lancedb for accurate mode) for creating quiz
15
- from backend.semantic_search import table, retriever
16
-
17
- VECTOR_COLUMN_NAME = "vector"
18
- TEXT_COLUMN_NAME = "text"
19
- proj_dir = Path.cwd()
20
-
21
- # Set up logging
22
- import logging
23
- logging.basicConfig(level=logging.INFO)
24
- logger = logging.getLogger(__name__)
25
-
26
- # Replace Mixtral client with Qwen Client
27
- client = Client("Qwen/Qwen1.5-110B-Chat-demo")
28
-
29
- def system_instructions(question_difficulty, topic, documents_str):
30
- return f"""<s> [INST] You are a great teacher and your task is to create 10 questions with 4 choices with {question_difficulty} difficulty about the topic request "{topic}" only from the below given documents, {documents_str}. Then create answers. Index in JSON format, the questions as "Q#":"" to "Q#":"", the four choices as "Q#:C1":"" to "Q#:C4":"", and the answers as "A#":"Q#:C#" to "A#":"Q#:C#". Example: 'A10':'Q10:C3' [/INST]"""
31
-
32
- # Ragatouille database for Colbert ie highly accurate mode
33
- RAG_db = gr.State()
34
- quiz_data = None
35
-
36
-
37
- #defining a function to convert json file to excel file
38
- def json_to_excel(output_json):
39
- # Initialize list for DataFrame
40
- data = []
41
- gr.Warning('Generating Shareable file link..', duration=30)
42
- for i in range(1, 11): # Assuming there are 10 questions
43
- question_key = f"Q{i}"
44
- answer_key = f"A{i}"
45
-
46
- question = output_json.get(question_key, '')
47
- correct_answer_key = output_json.get(answer_key, '')
48
- #correct_answer = correct_answer_key.split(':')[-1] if correct_answer_key else ''
49
- correct_answer = correct_answer_key.split(':')[-1].replace('C', '').strip() if correct_answer_key else ''
50
-
51
- # Extract options
52
- option_keys = [f"{question_key}:C{i}" for i in range(1, 6)]
53
- options = [output_json.get(key, '') for key in option_keys]
54
-
55
- # Add data row
56
- data.append([
57
- question, # Question Text
58
- "Multiple Choice", # Question Type
59
- options[0], # Option 1
60
- options[1], # Option 2
61
- options[2] if len(options) > 2 else '', # Option 3
62
- options[3] if len(options) > 3 else '', # Option 4
63
- options[4] if len(options) > 4 else '', # Option 5
64
- correct_answer, # Correct Answer
65
- 30, # Time in seconds
66
- '' # Image Link
67
- ])
68
-
69
- # Create DataFrame
70
- df = pd.DataFrame(data, columns=[
71
- "Question Text",
72
- "Question Type",
73
- "Option 1",
74
- "Option 2",
75
- "Option 3",
76
- "Option 4",
77
- "Option 5",
78
- "Correct Answer",
79
- "Time in seconds",
80
- "Image Link"
81
- ])
82
-
83
- temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
84
- df.to_excel(temp_file.name, index=False)
85
- return temp_file.name
86
- # Define a colorful theme
87
- colorful_theme = gr.themes.Default(
88
- primary_hue="cyan", # Set a bright cyan as primary color
89
- secondary_hue="yellow", # Set a bright magenta as secondary color
90
- neutral_hue="purple" # Optionally set a neutral color
91
-
92
- )
93
-
94
- #gradio app creation for a user interface
95
- with gr.Blocks(title="Quiz Maker", theme=colorful_theme) as QUIZBOT:
96
-
97
-
98
- # Create a single row for the HTML and Image
99
- with gr.Row():
100
- with gr.Column(scale=2):
101
- gr.Image(value='logo.png', height=200, width=200)
102
- with gr.Column(scale=6):
103
- gr.HTML("""
104
- <center>
105
- <h1><span style="color: purple;">GOVERNMENT HIGH SCHOOL,SUTHUKENY</span>STUDENTS QUIZBOT</h1>
106
- <h2>Generative AI-powered Capacity building for STUDENTS</h2>
107
- <i>⚠️ NACIN Faculties create quiz from any topic dynamically for classroom evaluation after their sessions ! ⚠️</i>
108
- </center>
109
- """)
110
-
111
-
112
-
113
-
114
- topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic/details from Customs Manual")
115
-
116
- with gr.Row():
117
- difficulty_radio = gr.Radio(["easy", "average", "hard"], label="How difficult should the quiz be?")
118
- model_radio = gr.Radio(choices=[ '(ACCURATE) BGE reranker', '(HIGH ACCURATE) ColBERT'],
119
- value='(ACCURATE) BGE reranker', label="Embeddings",
120
- info="First query to ColBERT may take a little time")
121
-
122
- generate_quiz_btn = gr.Button("Generate Quiz!🚀")
123
- quiz_msg = gr.Textbox()
124
-
125
- question_radios = [gr.Radio(visible=False) for _ in range(10)]
126
-
127
- @generate_quiz_btn.click(inputs=[difficulty_radio, topic, model_radio], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")])
128
- def generate_quiz(question_difficulty, topic, cross_encoder):
129
- top_k_rank = 10
130
- documents = []
131
- gr.Warning('Generating Quiz may take 1-2 minutes. Please wait.', duration=60)
132
-
133
- if cross_encoder == '(HIGH ACCURATE) ColBERT':
134
- gr.Warning('Retrieving using ColBERT.. First-time query will take 2 minute for model to load.. please wait',duration=100)
135
- RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
136
- RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
137
- documents_full = RAG_db.value.search(topic, k=top_k_rank)
138
- documents = [item['content'] for item in documents_full]
139
-
140
- else:
141
- document_start = perf_counter()
142
- query_vec = retriever.encode(topic)
143
- doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank)
144
-
145
- documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
146
- documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
147
-
148
- query_doc_pair = [[topic, doc] for doc in documents]
149
-
150
- # if cross_encoder == '(FAST) MiniLM-L6v2':
151
- # cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
152
- if cross_encoder == '(ACCURATE) BGE reranker':
153
- cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base')
154
-
155
- cross_scores = cross_encoder1.predict(query_doc_pair)
156
- sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
157
- documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
158
-
159
- #creating a text prompt to Qwen model combining the documents and system instruction
160
- formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents))
161
- print(' Formatted Prompt : ' ,formatted_prompt)
162
- try:
163
- response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat")
164
- response1 = response[1][0][1]
165
-
166
- # Extract JSON
167
- start_index = response1.find('{')
168
- end_index = response1.rfind('}')
169
- cleaned_response = response1[start_index:end_index + 1] if start_index != -1 and end_index != -1 else ''
170
- print('Cleaned Response :',cleaned_response)
171
- output_json = json.loads(cleaned_response)
172
- # Assign the extracted JSON to quiz_data for use in the comparison function
173
- global quiz_data
174
- quiz_data = output_json
175
- # Generate the Excel file
176
- excel_file = json_to_excel(output_json)
177
-
178
-
179
- #Create a Quiz display in app
180
- question_radio_list = []
181
- for question_num in range(1, 11):
182
- question_key = f"Q{question_num}"
183
- answer_key = f"A{question_num}"
184
-
185
- question = output_json.get(question_key)
186
- answer = output_json.get(output_json.get(answer_key))
187
-
188
- if not question or not answer:
189
- continue
190
-
191
- choice_keys = [f"{question_key}:C{i}" for i in range(1, 5)]
192
- choice_list = [output_json.get(choice_key, "Choice not found") for choice_key in choice_keys]
193
-
194
- radio = gr.Radio(choices=choice_list, label=question, visible=True, interactive=True)
195
- question_radio_list.append(radio)
196
-
197
- return ['Quiz Generated!'] + question_radio_list + [excel_file]
198
-
199
- except json.JSONDecodeError as e:
200
- print(f"Failed to decode JSON: {e}")
201
-
202
- check_button = gr.Button("Check Score")
203
- score_textbox = gr.Markdown()
204
-
205
- @check_button.click(inputs=question_radios, outputs=score_textbox)
206
- def compare_answers(*user_answers):
207
- user_answer_list = list(user_answers)
208
- answers_list = []
209
-
210
- for question_num in range(1, 20):
211
- answer_key = f"A{question_num}"
212
- answer = quiz_data.get(quiz_data.get(answer_key))
213
- if not answer:
214
- break
215
- answers_list.append(answer)
216
-
217
- score = sum(1 for item in user_answer_list if item in answers_list)
218
-
219
- if score > 7:
220
- message = f"### Excellent! You got {score} out of 10!"
221
- elif score > 5:
222
- message = f"### Good! You got {score} out of 10!"
223
- else:
224
- message = f"### You got {score} out of 10! Don't worry. You can prepare well and try better next time!"
225
-
226
- return message
227
-
228
- QUIZBOT.queue()
229
- QUIZBOT.launch(debug=True)
230
-
 
1
+ # Importing libraries
2
+ import pandas as pd
3
+ import json
4
+ import gradio as gr
5
+ from pathlib import Path
6
+ from ragatouille import RAGPretrainedModel
7
+ from gradio_client import Client
8
+ from tempfile import NamedTemporaryFile
9
+ from sentence_transformers import CrossEncoder
10
+ import numpy as np
11
+ from time import perf_counter
12
+ from sentence_transformers import CrossEncoder
13
+
14
+ #calling functions from other files - to call the knowledge database tables (lancedb for accurate mode) for creating quiz
15
+ from backend.semantic_search import table, retriever
16
+
17
+ VECTOR_COLUMN_NAME = "vector"
18
+ TEXT_COLUMN_NAME = "text"
19
+ proj_dir = Path.cwd()
20
+
21
+ # Set up logging
22
+ import logging
23
+ logging.basicConfig(level=logging.INFO)
24
+ logger = logging.getLogger(__name__)
25
+
26
+ # Replace Mixtral client with Qwen Client
27
+ client = Client("Qwen/Qwen1.5-110B-Chat-demo")
28
+
29
+ def system_instructions(question_difficulty, topic, documents_str):
30
+ return f"""<s> [INST] You are a great teacher and your task is to create 10 questions with 4 choices with {question_difficulty} difficulty about the topic request "{topic}" only from the below given documents, {documents_str}. Then create answers. Index in JSON format, the questions as "Q#":"" to "Q#":"", the four choices as "Q#:C1":"" to "Q#:C4":"", and the answers as "A#":"Q#:C#" to "A#":"Q#:C#". Example: 'A10':'Q10:C3' [/INST]"""
31
+
32
+ # Ragatouille database for Colbert ie highly accurate mode
33
+ RAG_db = gr.State()
34
+ quiz_data = None
35
+
36
+
37
+ #defining a function to convert json file to excel file
38
+ def json_to_excel(output_json):
39
+ # Initialize list for DataFrame
40
+ data = []
41
+ gr.Warning('Generating Shareable file link..', duration=30)
42
+ for i in range(1, 11): # Assuming there are 10 questions
43
+ question_key = f"Q{i}"
44
+ answer_key = f"A{i}"
45
+
46
+ question = output_json.get(question_key, '')
47
+ correct_answer_key = output_json.get(answer_key, '')
48
+ #correct_answer = correct_answer_key.split(':')[-1] if correct_answer_key else ''
49
+ correct_answer = correct_answer_key.split(':')[-1].replace('C', '').strip() if correct_answer_key else ''
50
+
51
+ # Extract options
52
+ option_keys = [f"{question_key}:C{i}" for i in range(1, 6)]
53
+ options = [output_json.get(key, '') for key in option_keys]
54
+
55
+ # Add data row
56
+ data.append([
57
+ question, # Question Text
58
+ "Multiple Choice", # Question Type
59
+ options[0], # Option 1
60
+ options[1], # Option 2
61
+ options[2] if len(options) > 2 else '', # Option 3
62
+ options[3] if len(options) > 3 else '', # Option 4
63
+ options[4] if len(options) > 4 else '', # Option 5
64
+ correct_answer, # Correct Answer
65
+ 30, # Time in seconds
66
+ '' # Image Link
67
+ ])
68
+
69
+ # Create DataFrame
70
+ df = pd.DataFrame(data, columns=[
71
+ "Question Text",
72
+ "Question Type",
73
+ "Option 1",
74
+ "Option 2",
75
+ "Option 3",
76
+ "Option 4",
77
+ "Option 5",
78
+ "Correct Answer",
79
+ "Time in seconds",
80
+ "Image Link"
81
+ ])
82
+
83
+ temp_file = NamedTemporaryFile(delete=False, suffix=".xlsx")
84
+ df.to_excel(temp_file.name, index=False)
85
+ return temp_file.name
86
+ # Define a colorful theme
87
+ colorful_theme = gr.themes.Default(
88
+ primary_hue="cyan", # Set a bright cyan as primary color
89
+ secondary_hue="yellow", # Set a bright magenta as secondary color
90
+ neutral_hue="purple" # Optionally set a neutral color
91
+
92
+ )
93
+
94
+ #gradio app creation for a user interface
95
+ with gr.Blocks(title="Quiz Maker", theme=colorful_theme) as QUIZBOT:
96
+
97
+
98
+ # Create a single row for the HTML and Image
99
+ with gr.Row():
100
+ with gr.Column(scale=2):
101
+ gr.Image(value='logo.png', height=200, width=200)
102
+ with gr.Column(scale=6):
103
+ gr.HTML("""
104
+ <center>
105
+ <h1><span style="color: purple;">GOVERNMENT HIGH SCHOOL,SUTHUKENY</span> STUDENTS QUIZBOT</h1>
106
+ <h2>Generative AI-powered Capacity building for STUDENTS</h2>
107
+ <i>⚠️STUDENTS CAN CREATE QUIZ AND EVALUATE BY THEMSELVES ! ⚠️</i>
108
+ </center>
109
+ """)
110
+
111
+
112
+
113
+
114
+ topic = gr.Textbox(label="Enter the Topic for Quiz", placeholder="Write any topic/details from Customs Manual")
115
+
116
+ with gr.Row():
117
+ difficulty_radio = gr.Radio(["easy", "average", "hard"], label="How difficult should the quiz be?")
118
+ model_radio = gr.Radio(choices=[ '(ACCURATE) BGE reranker', '(HIGH ACCURATE) ColBERT'],
119
+ value='(ACCURATE) BGE reranker', label="Embeddings",
120
+ info="First query to ColBERT may take a little time")
121
+
122
+ generate_quiz_btn = gr.Button("Generate Quiz!🚀")
123
+ quiz_msg = gr.Textbox()
124
+
125
+ question_radios = [gr.Radio(visible=False) for _ in range(10)]
126
+
127
+ @generate_quiz_btn.click(inputs=[difficulty_radio, topic, model_radio], outputs=[quiz_msg] + question_radios + [gr.File(label="Download Excel")])
128
+ def generate_quiz(question_difficulty, topic, cross_encoder):
129
+ top_k_rank = 10
130
+ documents = []
131
+ gr.Warning('Generating Quiz may take 1-2 minutes. Please wait.', duration=60)
132
+
133
+ if cross_encoder == '(HIGH ACCURATE) ColBERT':
134
+ gr.Warning('Retrieving using ColBERT.. First-time query will take 2 minute for model to load.. please wait',duration=100)
135
+ RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")
136
+ RAG_db.value = RAG.from_index('.ragatouille/colbert/indexes/cbseclass10index')
137
+ documents_full = RAG_db.value.search(topic, k=top_k_rank)
138
+ documents = [item['content'] for item in documents_full]
139
+
140
+ else:
141
+ document_start = perf_counter()
142
+ query_vec = retriever.encode(topic)
143
+ doc1 = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank)
144
+
145
+ documents = table.search(query_vec, vector_column_name=VECTOR_COLUMN_NAME).limit(top_k_rank).to_list()
146
+ documents = [doc[TEXT_COLUMN_NAME] for doc in documents]
147
+
148
+ query_doc_pair = [[topic, doc] for doc in documents]
149
+
150
+ # if cross_encoder == '(FAST) MiniLM-L6v2':
151
+ # cross_encoder1 = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
152
+ if cross_encoder == '(ACCURATE) BGE reranker':
153
+ cross_encoder1 = CrossEncoder('BAAI/bge-reranker-base')
154
+
155
+ cross_scores = cross_encoder1.predict(query_doc_pair)
156
+ sim_scores_argsort = list(reversed(np.argsort(cross_scores)))
157
+ documents = [documents[idx] for idx in sim_scores_argsort[:top_k_rank]]
158
+
159
+ #creating a text prompt to Qwen model combining the documents and system instruction
160
+ formatted_prompt = system_instructions(question_difficulty, topic, '\n'.join(documents))
161
+ print(' Formatted Prompt : ' ,formatted_prompt)
162
+ try:
163
+ response = client.predict(query=formatted_prompt, history=[], system="You are a helpful assistant.", api_name="/model_chat")
164
+ response1 = response[1][0][1]
165
+
166
+ # Extract JSON
167
+ start_index = response1.find('{')
168
+ end_index = response1.rfind('}')
169
+ cleaned_response = response1[start_index:end_index + 1] if start_index != -1 and end_index != -1 else ''
170
+ print('Cleaned Response :',cleaned_response)
171
+ output_json = json.loads(cleaned_response)
172
+ # Assign the extracted JSON to quiz_data for use in the comparison function
173
+ global quiz_data
174
+ quiz_data = output_json
175
+ # Generate the Excel file
176
+ excel_file = json_to_excel(output_json)
177
+
178
+
179
+ #Create a Quiz display in app
180
+ question_radio_list = []
181
+ for question_num in range(1, 11):
182
+ question_key = f"Q{question_num}"
183
+ answer_key = f"A{question_num}"
184
+
185
+ question = output_json.get(question_key)
186
+ answer = output_json.get(output_json.get(answer_key))
187
+
188
+ if not question or not answer:
189
+ continue
190
+
191
+ choice_keys = [f"{question_key}:C{i}" for i in range(1, 5)]
192
+ choice_list = [output_json.get(choice_key, "Choice not found") for choice_key in choice_keys]
193
+
194
+ radio = gr.Radio(choices=choice_list, label=question, visible=True, interactive=True)
195
+ question_radio_list.append(radio)
196
+
197
+ return ['Quiz Generated!'] + question_radio_list + [excel_file]
198
+
199
+ except json.JSONDecodeError as e:
200
+ print(f"Failed to decode JSON: {e}")
201
+
202
+ check_button = gr.Button("Check Score")
203
+ score_textbox = gr.Markdown()
204
+
205
+ @check_button.click(inputs=question_radios, outputs=score_textbox)
206
+ def compare_answers(*user_answers):
207
+ user_answer_list = list(user_answers)
208
+ answers_list = []
209
+
210
+ for question_num in range(1, 20):
211
+ answer_key = f"A{question_num}"
212
+ answer = quiz_data.get(quiz_data.get(answer_key))
213
+ if not answer:
214
+ break
215
+ answers_list.append(answer)
216
+
217
+ score = sum(1 for item in user_answer_list if item in answers_list)
218
+
219
+ if score > 7:
220
+ message = f"### Excellent! You got {score} out of 10!"
221
+ elif score > 5:
222
+ message = f"### Good! You got {score} out of 10!"
223
+ else:
224
+ message = f"### You got {score} out of 10! Don't worry. You can prepare well and try better next time!"
225
+
226
+ return message
227
+
228
+ QUIZBOT.queue()
229
+ QUIZBOT.launch(debug=True)
230
+