Spaces:
Runtime error
Runtime error
merge files from main branch
Browse files- README.md +2 -0
- README_zh.md +2 -0
- llmriddles/questions/executor.py +13 -2
- llmriddles/questions/level1.py +82 -47
- llmriddles/questions/level2.py +7 -12
- llmriddles/questions/level3.py +41 -47
- llmriddles/questions/level4.py +60 -51
- llmriddles/questions/level5.py +1 -26
- llmriddles/questions/math_tools.py +20 -0
- llmriddles/questions/question.py +22 -11
- llmriddles/questions/utility.py +34 -0
README.md
CHANGED
@@ -30,8 +30,10 @@ Welcome to LLM Riddles! This is a game of wits and courage with language models.
|
|
30 |
We provide an online version for players to directly access and try out.
|
31 |
- [Hugging Face][ChatGPT + English(w/o key)](https://huggingface.co/spaces/OpenDILabCommunity/LLMRiddlesChatGPTEN)
|
32 |
- [Hugging Face][ChatGPT + Chinese(w/o key)](https://huggingface.co/spaces/OpenDILabCommunity/LLMRiddlesChatGPTCN)
|
|
|
33 |
- [Hugging Face][ChatGLM + Chinese(w/ key)](https://huggingface.co/spaces/OpenDILabCommunity/LLMRiddlesChatGLMCN)
|
34 |
- [OpenXLab][ChatGPT + Chinese(w/o key)](https://openxlab.org.cn/apps/detail/OpenDILab/LLMRiddlesChatGPTCN)
|
|
|
35 |
- [OpenXLab][ChatGLM + Chinese(w/ key)](https://openxlab.org.cn/apps/detail/OpenDILab/LLMRiddlesChatGLMCN)
|
36 |
- [OpenXLab][ChatGLM + English(w/ key)](https://openxlab.org.cn/apps/detail/OpenDILab/LLMRiddlesChatGLMEN)
|
37 |
- [Private Server][Mistral + English(w/ key)](https://d9b451a97791dd8ef3.gradio.live)
|
|
|
30 |
We provide an online version for players to directly access and try out.
|
31 |
- [Hugging Face][ChatGPT + English(w/o key)](https://huggingface.co/spaces/OpenDILabCommunity/LLMRiddlesChatGPTEN)
|
32 |
- [Hugging Face][ChatGPT + Chinese(w/o key)](https://huggingface.co/spaces/OpenDILabCommunity/LLMRiddlesChatGPTCN)
|
33 |
+
- [Hugging Face][ChatGLM + English(w/ key)](https://huggingface.co/spaces/OpenDILabCommunity/LLMRiddlesChatGLMEN)
|
34 |
- [Hugging Face][ChatGLM + Chinese(w/ key)](https://huggingface.co/spaces/OpenDILabCommunity/LLMRiddlesChatGLMCN)
|
35 |
- [OpenXLab][ChatGPT + Chinese(w/o key)](https://openxlab.org.cn/apps/detail/OpenDILab/LLMRiddlesChatGPTCN)
|
36 |
+
- [OpenXLab][ChatGPT + English(w/o key)](https://openxlab.org.cn/apps/detail/OpenDILab/LLMRiddlesChatGPTEN)
|
37 |
- [OpenXLab][ChatGLM + Chinese(w/ key)](https://openxlab.org.cn/apps/detail/OpenDILab/LLMRiddlesChatGLMCN)
|
38 |
- [OpenXLab][ChatGLM + English(w/ key)](https://openxlab.org.cn/apps/detail/OpenDILab/LLMRiddlesChatGLMEN)
|
39 |
- [Private Server][Mistral + English(w/ key)](https://d9b451a97791dd8ef3.gradio.live)
|
README_zh.md
CHANGED
@@ -17,8 +17,10 @@
|
|
17 |
我们提供了在线版本以供玩家直接访问试玩:
|
18 |
- [Hugging Face][ChatGPT + 英文(需配置api key)](https://huggingface.co/spaces/OpenDILabCommunity/LLMRiddlesChatGPTEN)
|
19 |
- [Hugging Face][ChatGPT + 中文(需配置api key)](https://huggingface.co/spaces/OpenDILabCommunity/LLMRiddlesChatGPTCN)
|
|
|
20 |
- [Hugging Face][ChatGLM + 中文(已预设api key)](https://huggingface.co/spaces/OpenDILabCommunity/LLMRiddlesChatGLMCN)
|
21 |
- [OpenXLab][ChatGPT + 中文(需配置api key)](https://openxlab.org.cn/apps/detail/OpenDILab/LLMRiddlesChatGPTCN)
|
|
|
22 |
- [OpenXLab][ChatGLM + 中文(已预设api key)](https://openxlab.org.cn/apps/detail/OpenDILab/LLMRiddlesChatGLMCN)
|
23 |
- [OpenXLab][ChatGLM + 英文(已预设api key)](https://openxlab.org.cn/apps/detail/OpenDILab/LLMRiddlesChatGLMEN)
|
24 |
- [Private Server][Mistral + 英文(已预设api key)](https://d9b451a97791dd8ef3.gradio.live)
|
|
|
17 |
我们提供了在线版本以供玩家直接访问试玩:
|
18 |
- [Hugging Face][ChatGPT + 英文(需配置api key)](https://huggingface.co/spaces/OpenDILabCommunity/LLMRiddlesChatGPTEN)
|
19 |
- [Hugging Face][ChatGPT + 中文(需配置api key)](https://huggingface.co/spaces/OpenDILabCommunity/LLMRiddlesChatGPTCN)
|
20 |
+
- [Hugging Face][ChatGLM + 英文(已预设api key)](https://huggingface.co/spaces/OpenDILabCommunity/LLMRiddlesChatGLMEN)
|
21 |
- [Hugging Face][ChatGLM + 中文(已预设api key)](https://huggingface.co/spaces/OpenDILabCommunity/LLMRiddlesChatGLMCN)
|
22 |
- [OpenXLab][ChatGPT + 中文(需配置api key)](https://openxlab.org.cn/apps/detail/OpenDILab/LLMRiddlesChatGPTCN)
|
23 |
+
- [OpenXLab][ChatGPT + 英文(需配置api key)](https://openxlab.org.cn/apps/detail/OpenDILab/LLMRiddlesChatGPTEN)
|
24 |
- [OpenXLab][ChatGLM + 中文(已预设api key)](https://openxlab.org.cn/apps/detail/OpenDILab/LLMRiddlesChatGLMCN)
|
25 |
- [OpenXLab][ChatGLM + 英文(已预设api key)](https://openxlab.org.cn/apps/detail/OpenDILab/LLMRiddlesChatGLMEN)
|
26 |
- [Private Server][Mistral + 英文(已预设api key)](https://d9b451a97791dd8ef3.gradio.live)
|
llmriddles/questions/executor.py
CHANGED
@@ -18,14 +18,25 @@ class QuestionExecutor:
|
|
18 |
@property
|
19 |
def question_name(self):
|
20 |
return self.question.names[self.lang]
|
|
|
|
|
|
|
21 |
|
22 |
def check(self, qs_text: str) -> Tuple[str, bool, str]:
|
23 |
-
answer_text =
|
24 |
correct, explanation = self.check_answer(qs_text, answer_text)
|
25 |
return answer_text, correct, explanation
|
26 |
|
27 |
def check_answer(self, user_text: str, answer_text: str) -> Tuple[bool, str]:
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
if explanation is None:
|
30 |
if correct:
|
31 |
explanation = 'LLM的回答满足要求' if self.lang == 'cn' else 'Correct Answer From LLM'
|
|
|
18 |
@property
|
19 |
def question_name(self):
|
20 |
return self.question.names[self.lang]
|
21 |
+
|
22 |
+
def llm_answer(self, qs_text: str) -> str:
|
23 |
+
return get_llm_fn(self.llm)(qs_text, **self.llm_cfgs)
|
24 |
|
25 |
def check(self, qs_text: str) -> Tuple[str, bool, str]:
|
26 |
+
answer_text = self.llm_answer(qs_text)
|
27 |
correct, explanation = self.check_answer(qs_text, answer_text)
|
28 |
return answer_text, correct, explanation
|
29 |
|
30 |
def check_answer(self, user_text: str, answer_text: str) -> Tuple[bool, str]:
|
31 |
+
inputs = {
|
32 |
+
'question_text': self.question_text,
|
33 |
+
'user_text': user_text,
|
34 |
+
'answer_text': answer_text,
|
35 |
+
'lang': self.lang,
|
36 |
+
'llm_callback': self.llm_answer,
|
37 |
+
# this list can be extended if needed in the future
|
38 |
+
}
|
39 |
+
correct, explanation = self.question.checker(inputs)
|
40 |
if explanation is None:
|
41 |
if correct:
|
42 |
explanation = 'LLM的回答满足要求' if self.lang == 'cn' else 'Correct Answer From LLM'
|
llmriddles/questions/level1.py
CHANGED
@@ -1,5 +1,9 @@
|
|
1 |
from .question import register_question
|
2 |
-
|
|
|
|
|
|
|
|
|
3 |
|
4 |
def count_english_words(text: str):
|
5 |
return len(text.split(' '))
|
@@ -13,31 +17,6 @@ def check_if_chinese(text: str):
|
|
13 |
return all('\u4e00' <= char <= '\u9fff' for char in text)
|
14 |
|
15 |
|
16 |
-
def count_words(text: str, contain_punctuation: bool = False):
|
17 |
-
chinese_words = []
|
18 |
-
english_words = []
|
19 |
-
other_words = []
|
20 |
-
temp_english_words = []
|
21 |
-
for char in text:
|
22 |
-
if '\u4e00' <= char <= '\u9fff':
|
23 |
-
chinese_words.append(char)
|
24 |
-
if len(temp_english_words) > 0:
|
25 |
-
english_words.append(''.join(temp_english_words))
|
26 |
-
temp_english_words = []
|
27 |
-
else:
|
28 |
-
if char.isalpha():
|
29 |
-
temp_english_words.append(char)
|
30 |
-
else:
|
31 |
-
if len(temp_english_words) > 0:
|
32 |
-
english_words.append(''.join(temp_english_words))
|
33 |
-
temp_english_words = []
|
34 |
-
other_words.append(char)
|
35 |
-
if contain_punctuation:
|
36 |
-
return len(chinese_words) + len(english_words) + len(other_words)
|
37 |
-
else:
|
38 |
-
return len(chinese_words) + len(english_words)
|
39 |
-
|
40 |
-
|
41 |
CN_TEXT_1 = """
|
42 |
第一章第一题(初来乍到),请你构造一个问题使模型的回答是一字不差的“1+1=3”(不需要引号)。
|
43 |
|
@@ -175,30 +154,86 @@ register_question(
|
|
175 |
name={'cn': '1-4 短说短话', 'en': '1-4'},
|
176 |
)
|
177 |
|
178 |
-
|
179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
-
|
182 |
-
|
183 |
|
184 |
-
|
185 |
-
|
186 |
|
187 |
-
|
188 |
-
|
189 |
|
190 |
-
# def _checker_5(question_text: str, answer_text: str, lang: str):
|
191 |
-
# _ = question_text, lang
|
192 |
-
# answer_text = answer_text.strip()
|
193 |
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
# return False, 'Answer should be less than 20 words.'
|
198 |
-
# else:
|
199 |
-
# return True, None
|
200 |
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from .question import register_question
|
2 |
+
from .utility import count_words, word_reverse_text
|
3 |
+
from typing import Callable
|
4 |
+
from .question import Checker
|
5 |
+
from typing import Tuple, Optional
|
6 |
+
import re
|
7 |
|
8 |
def count_english_words(text: str):
|
9 |
return len(text.split(' '))
|
|
|
17 |
return all('\u4e00' <= char <= '\u9fff' for char in text)
|
18 |
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
CN_TEXT_1 = """
|
21 |
第一章第一题(初来乍到),请你构造一个问题使模型的回答是一字不差的“1+1=3”(不需要引号)。
|
22 |
|
|
|
154 |
name={'cn': '1-4 短说短话', 'en': '1-4'},
|
155 |
)
|
156 |
|
157 |
+
CN_TEXT_5 = """
|
158 |
+
第一章第五题(回文不变),请输入一个本身不是回文串的问题,使无论正着问还是倒着问,模型的回答是一样的。
|
159 |
+
|
160 |
+
请在下面的输入框内填写你的问题并点击按钮提交。
|
161 |
+
"""
|
162 |
+
|
163 |
+
EN_TEXT_5 = """
|
164 |
+
For the fifth question in chapter 1, please enter a question that is not a palindrome string so that the model's answer is the same whether it is asked forward or backward.
|
165 |
+
|
166 |
+
Please enter your query below and click the submit button
|
167 |
+
"""
|
168 |
+
|
169 |
+
def _checker_5(question_text: str, user_text: str, answer_text: str, lang: str, llm_callback: Callable[[str], str]):
|
170 |
+
answer_text = answer_text.strip()
|
171 |
+
user_text = user_text.strip()
|
172 |
+
reversed_user_text = word_reverse_text(user_text, lang)
|
173 |
+
second_answer_text = llm_callback(reversed_user_text)
|
174 |
+
|
175 |
+
if user_text == reversed_user_text:
|
176 |
+
return False, "用户的问题不能是回文串" if lang == 'cn' else 'Question should not be a palindrome string.'
|
177 |
+
|
178 |
+
if second_answer_text != answer_text:
|
179 |
+
return False, f"正着问和倒着问时,模型的回答应该是一样的\n 问题:{user_text}\n 正着问回答:{answer_text}\n 反着问回答:{second_answer_text}" if lang == 'cn' else f'The model\'s answer should be the same when asked forward or backward.\n Question: {user_text}\n Forward-Asking answer: {answer_text}\n Backward-Asking answer: {second_answer_text}'
|
180 |
+
return True, None
|
181 |
+
|
182 |
+
register_question(
|
183 |
+
{
|
184 |
+
'cn': CN_TEXT_5,
|
185 |
+
'en': EN_TEXT_5,
|
186 |
+
},
|
187 |
+
checkers=Checker(_checker_5, required_input_keys=['question_text', 'user_text', 'answer_text', 'lang', 'llm_callback']),
|
188 |
+
name={'cn': '1-5 回文不变', 'en': '1-5'},
|
189 |
+
level=1,
|
190 |
+
)
|
191 |
+
|
192 |
+
CN_TEXT_6 = """
|
193 |
+
第一章第六题(无中生狗),请提一个不包含“狗”这个字的问题,但是回答中至少出现3次“狗”这个字。
|
194 |
|
195 |
+
请在下面的输入框内填写你的问题并点击按钮提交。
|
196 |
+
"""
|
197 |
|
198 |
+
EN_TEXT_6 = """
|
199 |
+
For the sixth question in chapter 1, please ask a question that does not contain the word "dog", but the answer contains the word "dog" at least three times.
|
200 |
|
201 |
+
Please enter your query below and click the submit button
|
202 |
+
"""
|
203 |
|
|
|
|
|
|
|
204 |
|
205 |
+
def _cn_checker_6(question_text: str, user_text: str, answer_text: str) -> Tuple[bool, Optional[str]]:
|
206 |
+
if '狗' in user_text:
|
207 |
+
return False, '问题不得包含“狗”字'
|
|
|
|
|
|
|
208 |
|
209 |
+
dog_count = len(re.findall('狗', answer_text))
|
210 |
+
if dog_count >= 3:
|
211 |
+
return True, f'“狗”字的出现次数为{dog_count}次'
|
212 |
+
else:
|
213 |
+
return False, f'“狗”字的出现次数为{dog_count}次,未达到3次'
|
214 |
+
|
215 |
+
def _en_checker_6(question_text: str, user_text: str, answer_text: str) -> Tuple[bool, Optional[str]]:
|
216 |
+
q_words = re.findall(r'\w+', user_text.lower())
|
217 |
+
if any(word in {'dog', 'dogs'} for word in q_words):
|
218 |
+
return False, 'The question must not contain the word "dog" or "dogs"'
|
219 |
+
|
220 |
+
a_words = re.findall(r'\w+', answer_text.lower())
|
221 |
+
a_dog_count = sum(1 if word in {'dog', 'dogs'} else 0 for word in a_words)
|
222 |
+
if a_dog_count >= 3:
|
223 |
+
return True, f'The word "dog" (or "dogs") appears {a_dog_count} times.'
|
224 |
+
else:
|
225 |
+
return False, f'The word "dog" (or "dogs") appears {a_dog_count} times, ' \
|
226 |
+
f'which is less than 3 times.'
|
227 |
+
|
228 |
+
register_question(
|
229 |
+
{
|
230 |
+
'cn': CN_TEXT_6,
|
231 |
+
'en': EN_TEXT_6,
|
232 |
+
},
|
233 |
+
checkers={
|
234 |
+
'cn': _cn_checker_6,
|
235 |
+
'en': _en_checker_6,
|
236 |
+
},
|
237 |
+
name={'cn': '1-6 无中生狗', 'en': '1-6'},
|
238 |
+
level=1,
|
239 |
+
)
|
llmriddles/questions/level2.py
CHANGED
@@ -4,6 +4,7 @@ from typing import Optional, Tuple
|
|
4 |
import sympy
|
5 |
|
6 |
from .question import register_question
|
|
|
7 |
|
8 |
CN_TEXT_1 = """
|
9 |
第二章第一题(质数长度),你需要提出一个字数是质数的问题,使回答的长度刚好是它的下一个质数。
|
@@ -83,8 +84,7 @@ def _cn_checker_2(question_text: str, user_text: str, answer_text: str) -> Tuple
|
|
83 |
except (TypeError, ValueError):
|
84 |
return False, f'输入内容{user_text!r},并非一个大于1的正整数'
|
85 |
|
86 |
-
for value_item in
|
87 |
-
value_item = int(value_item)
|
88 |
if value_item >= value + 1000:
|
89 |
return True, f'检测到输出中数字{value_item},满足要求'
|
90 |
|
@@ -99,8 +99,7 @@ def _en_checker_2(question_text: str, user_text: str, answer_text: str) -> Tuple
|
|
99 |
except (TypeError, ValueError):
|
100 |
return False, f'You entered {user_text!r}, which is not a positive integer greater than 1'
|
101 |
|
102 |
-
for value_item in
|
103 |
-
value_item = int(value_item)
|
104 |
if value_item >= value + 1000:
|
105 |
return True, f'Detected the number {value_item} in the output, which meets the requirement'
|
106 |
|
@@ -137,8 +136,7 @@ def _cn_checker_3(question_text: str, user_text: str, answer_text: str) -> Tuple
|
|
137 |
return False, f'输入内容{user_text!r},并非一个大于1的正整数'
|
138 |
|
139 |
collected_values = []
|
140 |
-
for value_item in
|
141 |
-
value_item = int(value_item)
|
142 |
if value_item <= value - 1000:
|
143 |
collected_values.append(value_item)
|
144 |
|
@@ -158,8 +156,7 @@ def _en_checker_3(question_text: str, user_text: str, answer_text: str) -> Tuple
|
|
158 |
return False, f'You entered {user_text!r}, which is not a positive integer greater than 1'
|
159 |
|
160 |
collected_values = []
|
161 |
-
for value_item in
|
162 |
-
value_item = int(value_item)
|
163 |
if value_item <= value - 1000:
|
164 |
collected_values.append(value_item)
|
165 |
|
@@ -238,8 +235,7 @@ def _cn_checker_5(question_text: str, user_text: str, answer_text: str) -> Tuple
|
|
238 |
return False, f'输入内容{user_text!r},并非一个大于1的平方数'
|
239 |
|
240 |
sq = int(value ** 0.5)
|
241 |
-
for value_item in
|
242 |
-
value_item = int(value_item)
|
243 |
if value_item == sq:
|
244 |
return True, f'检测到平方根数{sq}'
|
245 |
else:
|
@@ -257,8 +253,7 @@ def _en_checker_5(question_text: str, user_text: str, answer_text: str) -> Tuple
|
|
257 |
return False, f'The input, {user_text!r}, is not a square number greater than 1'
|
258 |
|
259 |
sq = int(value ** 0.5)
|
260 |
-
for value_item in
|
261 |
-
value_item = int(value_item)
|
262 |
if value_item == sq:
|
263 |
return True, f'Detected square root: {sq}'
|
264 |
else:
|
|
|
4 |
import sympy
|
5 |
|
6 |
from .question import register_question
|
7 |
+
from .math_tools import get_all_numbers
|
8 |
|
9 |
CN_TEXT_1 = """
|
10 |
第二章第一题(质数长度),你需要提出一个字数是质数的问题,使回答的长度刚好是它的下一个质数。
|
|
|
84 |
except (TypeError, ValueError):
|
85 |
return False, f'输入内容{user_text!r},并非一个大于1的正整数'
|
86 |
|
87 |
+
for value_item in get_all_numbers(answer_text):
|
|
|
88 |
if value_item >= value + 1000:
|
89 |
return True, f'检测到输出中数字{value_item},满足要求'
|
90 |
|
|
|
99 |
except (TypeError, ValueError):
|
100 |
return False, f'You entered {user_text!r}, which is not a positive integer greater than 1'
|
101 |
|
102 |
+
for value_item in get_all_numbers(answer_text):
|
|
|
103 |
if value_item >= value + 1000:
|
104 |
return True, f'Detected the number {value_item} in the output, which meets the requirement'
|
105 |
|
|
|
136 |
return False, f'输入内容{user_text!r},并非一个大于1的正整数'
|
137 |
|
138 |
collected_values = []
|
139 |
+
for value_item in get_all_numbers(answer_text):
|
|
|
140 |
if value_item <= value - 1000:
|
141 |
collected_values.append(value_item)
|
142 |
|
|
|
156 |
return False, f'You entered {user_text!r}, which is not a positive integer greater than 1'
|
157 |
|
158 |
collected_values = []
|
159 |
+
for value_item in get_all_numbers(answer_text):
|
|
|
160 |
if value_item <= value - 1000:
|
161 |
collected_values.append(value_item)
|
162 |
|
|
|
235 |
return False, f'输入内容{user_text!r},并非一个大于1的平方数'
|
236 |
|
237 |
sq = int(value ** 0.5)
|
238 |
+
for value_item in get_all_numbers(answer_text):
|
|
|
239 |
if value_item == sq:
|
240 |
return True, f'检测到平方根数{sq}'
|
241 |
else:
|
|
|
253 |
return False, f'The input, {user_text!r}, is not a square number greater than 1'
|
254 |
|
255 |
sq = int(value ** 0.5)
|
256 |
+
for value_item in get_all_numbers(answer_text):
|
|
|
257 |
if value_item == sq:
|
258 |
return True, f'Detected square root: {sq}'
|
259 |
else:
|
llmriddles/questions/level3.py
CHANGED
@@ -1,29 +1,7 @@
|
|
1 |
from .question import register_question
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
chinese_words = []
|
6 |
-
english_words = []
|
7 |
-
other_words = []
|
8 |
-
temp_english_words = []
|
9 |
-
for char in text:
|
10 |
-
if '\u4e00' <= char <= '\u9fff':
|
11 |
-
chinese_words.append(char)
|
12 |
-
if len(temp_english_words) > 0:
|
13 |
-
english_words.append(''.join(temp_english_words))
|
14 |
-
temp_english_words = []
|
15 |
-
else:
|
16 |
-
if char.isalpha():
|
17 |
-
temp_english_words.append(char)
|
18 |
-
else:
|
19 |
-
if len(temp_english_words) > 0:
|
20 |
-
english_words.append(''.join(temp_english_words))
|
21 |
-
temp_english_words = []
|
22 |
-
other_words.append(char)
|
23 |
-
if contain_punctuation:
|
24 |
-
return len(chinese_words) + len(english_words) + len(other_words)
|
25 |
-
else:
|
26 |
-
return len(chinese_words) + len(english_words)
|
27 |
|
28 |
|
29 |
CN_TEXT_1 = """
|
@@ -33,7 +11,7 @@ CN_TEXT_1 = """
|
|
33 |
"""
|
34 |
|
35 |
EN_TEXT_1 = """
|
36 |
-
For the first question in chapter 3, please enter a question of at least ten words so that the model's answer is
|
37 |
|
38 |
Please enter your query below and click the submit button
|
39 |
"""
|
@@ -84,14 +62,9 @@ def _checker_2(question_text: str, user_text: str, answer_text: str, lang: str):
|
|
84 |
if count_words(user_text) < 10:
|
85 |
return False, "用户的问题长度应该至少10个字" if lang == 'cn' else 'Question should be no less than 10 words.'
|
86 |
|
87 |
-
|
88 |
-
user_text = user_text[::-1]
|
89 |
-
else:
|
90 |
-
words = user_text.split()
|
91 |
-
words.reverse()
|
92 |
-
user_text = ' '.join(words)
|
93 |
|
94 |
-
if
|
95 |
return True, None
|
96 |
else:
|
97 |
return False, None
|
@@ -144,22 +117,43 @@ register_question(
|
|
144 |
level=3,
|
145 |
)
|
146 |
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
149 |
|
150 |
-
|
151 |
-
|
152 |
|
153 |
-
|
154 |
-
|
155 |
|
156 |
-
|
157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
-
# def _checker_4(question_text: str, user_text: str, answer_text: str, lang: str):
|
160 |
-
# pass
|
161 |
|
162 |
-
# register_question({
|
163 |
-
# 'cn': CN_TEXT_4,
|
164 |
-
# 'en': EN_TEXT_4,
|
165 |
-
# }, _checker_4, level=3)
|
|
|
1 |
from .question import register_question
|
2 |
+
from .question import Checker
|
3 |
+
from typing import Callable
|
4 |
+
from .utility import count_words, word_reverse_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
CN_TEXT_1 = """
|
|
|
11 |
"""
|
12 |
|
13 |
EN_TEXT_1 = """
|
14 |
+
For the first question in chapter 3, please enter a question of at least ten words so that the model's answer is exactly the same with the question.
|
15 |
|
16 |
Please enter your query below and click the submit button
|
17 |
"""
|
|
|
62 |
if count_words(user_text) < 10:
|
63 |
return False, "用户的问题长度应该至少10个字" if lang == 'cn' else 'Question should be no less than 10 words.'
|
64 |
|
65 |
+
reversed_user_text = word_reverse_text(user_text, lang)
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
+
if reversed_user_text == answer_text:
|
68 |
return True, None
|
69 |
else:
|
70 |
return False, None
|
|
|
117 |
level=3,
|
118 |
)
|
119 |
|
120 |
+
CN_TEXT_4 = """
|
121 |
+
第三章第四题(回文协变),请你输入一个本身不是回文串的问题,使得正着问和倒着问时,模型的回答本身不是回文且也是逆序。
|
122 |
+
|
123 |
+
请在下面的输入框内填写你的提问并点击按钮提交。
|
124 |
+
"""
|
125 |
|
126 |
+
EN_TEXT_4 = """
|
127 |
+
For the fourth question in chapter 3, please enter a question that is not a palindrome string, so that the model's answer is also not a palindrome and is in reverse order when asked forward or backward.
|
128 |
|
129 |
+
Please enter your query below and click the submit button
|
130 |
+
"""
|
131 |
|
132 |
+
def _checker_4(question_text: str, user_text: str, answer_text: str, lang: str, llm_callback: Callable[[str], str]):
|
133 |
+
answer_text = answer_text.strip()
|
134 |
+
user_text = user_text.strip()
|
135 |
+
reversed_user_text = word_reverse_text(user_text, lang)
|
136 |
+
second_answer_text = llm_callback(reversed_user_text)
|
137 |
+
|
138 |
+
if user_text == reversed_user_text:
|
139 |
+
return False, "用户的问题不能是回文串" if lang == 'cn' else 'Question should not be a palindrome string.'
|
140 |
+
|
141 |
+
if second_answer_text != word_reverse_text(answer_text, lang):
|
142 |
+
return False, f"正着问和倒着问时,模型的回答应该是逆序。\n 用户问题:{user_text}\n 正着问回答: {answer_text} \n 反着问回答: {second_answer_text}" if lang == 'cn' else f'The model\'s answer should be in reverse order when asked forward or backward.\n Question: {user_text}\n Forward-Asking answer: {answer_text} \n Backward-Asking answer: {second_answer_text}'
|
143 |
+
|
144 |
+
if second_answer_text == word_reverse_text(second_answer_text):
|
145 |
+
return False, "正着问和倒着问时,模型的回答不能是回文串" if lang == 'cn' else 'The model\'s answer should not be a palindrome when asked forward or backward.'
|
146 |
+
|
147 |
+
return True, None
|
148 |
+
|
149 |
+
register_question(
|
150 |
+
{
|
151 |
+
'cn': CN_TEXT_4,
|
152 |
+
'en': EN_TEXT_4,
|
153 |
+
},
|
154 |
+
checkers=Checker(_checker_4, required_input_keys=['question_text', 'user_text', 'answer_text', 'lang', 'llm_callback']),
|
155 |
+
name={'cn': '3-4 回文协变', 'en': '3-4'},
|
156 |
+
level=3,
|
157 |
+
)
|
158 |
|
|
|
|
|
159 |
|
|
|
|
|
|
|
|
llmriddles/questions/level4.py
CHANGED
@@ -1,78 +1,87 @@
|
|
1 |
import re
|
2 |
|
3 |
from .question import register_question
|
|
|
|
|
|
|
4 |
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
int(text)
|
9 |
-
return True
|
10 |
-
except ValueError:
|
11 |
-
return False
|
12 |
|
|
|
|
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
# CN_TEXT_1 = """
|
19 |
-
# 第四章第一题,请构造一个问题A,它的回复是不同于A的B,然后以B提问能再次得到A。
|
20 |
-
|
21 |
-
# 请在下面的输入框内填写你构造并点击按钮提交。
|
22 |
-
# """
|
23 |
-
|
24 |
-
# EN_TEXT_1 = """
|
25 |
-
# For the first question in chapter 4, please make a question A whose answer is B that is different from A, and then ask B to get A again.
|
26 |
-
|
27 |
-
# Please enter your query below and click the submit button
|
28 |
-
# """
|
29 |
|
|
|
|
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
# pass
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
|
|
|
|
42 |
|
43 |
-
|
44 |
-
|
45 |
|
46 |
-
|
47 |
-
|
48 |
|
49 |
-
|
50 |
-
|
51 |
|
52 |
-
# Please enter your query below and click the submit button
|
53 |
-
# """
|
54 |
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
# pass
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
|
68 |
CN_TEXT_3 = """
|
69 |
-
|
70 |
|
71 |
请在下面的输入框内填写你构造并点击按钮提交。
|
72 |
"""
|
73 |
|
74 |
EN_TEXT_3 = """
|
75 |
-
For the
|
76 |
|
77 |
Please enter your query below and click the submit button
|
78 |
"""
|
@@ -86,8 +95,8 @@ def _checker_3(question_text: str, user_text: str, answer_text: str, lang: str):
|
|
86 |
return False, "问题应该是一个正整数" if lang == 'cn' else 'Question should be a positive integer.'
|
87 |
elif int(user_text) == 1:
|
88 |
return False, "问题应该是一个大于1的正整数" if lang == 'cn' else 'Question should be a positive integer greater than 1.'
|
89 |
-
elif int(user_text) - 1 not in
|
90 |
-
user_text) + 1 not in
|
91 |
return False, "回答中应该包含一个与问题相差1的数字" if lang == 'cn' else 'Answer should contain a number that is exactly 1 different from the question.'
|
92 |
else:
|
93 |
return True, None
|
|
|
1 |
import re
|
2 |
|
3 |
from .question import register_question
|
4 |
+
from .question import Checker
|
5 |
+
from typing import Callable
|
6 |
+
from .utility import count_words, word_reverse_text
|
7 |
|
8 |
+
from .math_tools import check_if_is_number, get_all_numbers
|
9 |
|
10 |
+
CN_TEXT_1 = """
|
11 |
+
第四章第一题(循环往复),请构造一个问题A,它的回复是不同于A的B,然后以B提问能再次得到A。
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
请在下面的输入框内填写你构造并点击按钮提交。
|
14 |
+
"""
|
15 |
|
16 |
+
EN_TEXT_1 = """
|
17 |
+
For the first question in chapter 4, please make a question A whose answer is B that is different from A, and then ask B to get A again.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
Please enter your query below and click the submit button
|
20 |
+
"""
|
21 |
|
22 |
+
def _checker_1(question_text: str, user_text: str, answer_text: str, lang: str, llm_callback: Callable[[str], str]):
|
23 |
+
answer_text = answer_text.strip()
|
24 |
+
user_text = user_text.strip()
|
25 |
+
second_answer_text = llm_callback(answer_text)
|
|
|
26 |
|
27 |
+
if second_answer_text != user_text:
|
28 |
+
return False, f"B产生的回答和原问题A不一致:\n A: {user_text}\n B: {answer_text}\n Answer from B: {second_answer_text}" if lang == 'cn' else f'The answer from B is not the same as the original question A:\n A: {user_text}\n B: {answer_text}\n Answer from B: {second_answer_text}'
|
29 |
+
|
30 |
+
return True, None
|
31 |
+
|
32 |
+
register_question(
|
33 |
+
{
|
34 |
+
'cn': CN_TEXT_1,
|
35 |
+
'en': EN_TEXT_1,
|
36 |
+
},
|
37 |
+
checkers=Checker(_checker_1, required_input_keys=['question_text', 'user_text', 'answer_text', 'lang', 'llm_callback']),
|
38 |
+
name={'cn': '4-1 循环往复', 'en': '4-1'},
|
39 |
+
level=4,
|
40 |
+
)
|
41 |
|
42 |
+
CN_TEXT_2 = """
|
43 |
+
第四章第二题(惜字如金),本题可能没有答案,你可以自由的先去做其他的题。请输入一个字的问题,使模型的回答在16个字以内。
|
44 |
|
45 |
+
请在下面的输入框内填写你构造并点击按钮提交。
|
46 |
+
"""
|
47 |
|
48 |
+
EN_TEXT_2 = """
|
49 |
+
For the second question in chapter 4, please enter a question of one word so that the model's answer is within 16 words (The Chinese version of this problem may not have a correct answer, but the English version might).
|
50 |
|
51 |
+
Please enter your query below and click the submit button
|
52 |
+
"""
|
53 |
|
|
|
|
|
54 |
|
55 |
+
def _checker_2(question_text: str, user_text: str, answer_text: str, lang: str):
|
56 |
+
answer_text = answer_text.strip()
|
57 |
+
user_text = user_text.strip()
|
58 |
+
if count_words(user_text) > 1:
|
59 |
+
return False, "用户的问题长度应该在一个字以内" if lang == 'cn' else 'Question should be within one word.'
|
60 |
|
61 |
+
if count_words(answer_text) > 16:
|
62 |
+
return False, "大语言模型的答案应该在16个字以内" if lang == 'cn' else 'Answer should be within 16 words.'
|
63 |
+
else:
|
64 |
+
return True, None
|
|
|
65 |
|
66 |
+
register_question(
|
67 |
+
{
|
68 |
+
'cn': CN_TEXT_2,
|
69 |
+
'en': EN_TEXT_2,
|
70 |
+
},
|
71 |
+
checkers=_checker_2,
|
72 |
+
name={'cn': '4-2 惜字如金', 'en': '4-2'},
|
73 |
+
level=4,
|
74 |
+
)
|
75 |
|
76 |
|
77 |
CN_TEXT_3 = """
|
78 |
+
第四章第三题(自然之密),请输入一个大于一的正整数作为问题,使回答里包含和它刚好相差1的数。
|
79 |
|
80 |
请在下面的输入框内填写你构造并点击按钮提交。
|
81 |
"""
|
82 |
|
83 |
EN_TEXT_3 = """
|
84 |
+
For the third question in chapter 4, please enter a positive integer greater than one as the question so that the answer contains a number that is exactly 1 different from it.
|
85 |
|
86 |
Please enter your query below and click the submit button
|
87 |
"""
|
|
|
95 |
return False, "问题应该是一个正整数" if lang == 'cn' else 'Question should be a positive integer.'
|
96 |
elif int(user_text) == 1:
|
97 |
return False, "问题应该是一个大于1的正整数" if lang == 'cn' else 'Question should be a positive integer greater than 1.'
|
98 |
+
elif int(user_text) - 1 not in get_all_numbers(answer_text) and int(
|
99 |
+
user_text) + 1 not in get_all_numbers(answer_text):
|
100 |
return False, "回答中应该包含一个与问题相差1的数字" if lang == 'cn' else 'Answer should contain a number that is exactly 1 different from the question.'
|
101 |
else:
|
102 |
return True, None
|
llmriddles/questions/level5.py
CHANGED
@@ -1,30 +1,5 @@
|
|
1 |
from .question import register_question
|
2 |
-
|
3 |
-
|
4 |
-
def count_words(text: str, contain_punctuation: bool = False):
|
5 |
-
chinese_words = []
|
6 |
-
english_words = []
|
7 |
-
other_words = []
|
8 |
-
temp_english_words = []
|
9 |
-
for char in text:
|
10 |
-
if '\u4e00' <= char <= '\u9fff':
|
11 |
-
chinese_words.append(char)
|
12 |
-
if len(temp_english_words) > 0:
|
13 |
-
english_words.append(''.join(temp_english_words))
|
14 |
-
temp_english_words = []
|
15 |
-
else:
|
16 |
-
if char.isalpha():
|
17 |
-
temp_english_words.append(char)
|
18 |
-
else:
|
19 |
-
if len(temp_english_words) > 0:
|
20 |
-
english_words.append(''.join(temp_english_words))
|
21 |
-
temp_english_words = []
|
22 |
-
other_words.append(char)
|
23 |
-
if contain_punctuation:
|
24 |
-
return len(chinese_words) + len(english_words) + len(other_words)
|
25 |
-
else:
|
26 |
-
return len(chinese_words) + len(english_words)
|
27 |
-
|
28 |
|
29 |
CN_TEXT_1 = """
|
30 |
第五章第一题(口是心非),请构造一个不少于十个字的问题,使得回答中不包含问题中的任意字符。
|
|
|
1 |
from .question import register_question
|
2 |
+
from .utility import count_words
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
CN_TEXT_1 = """
|
5 |
第五章第一题(口是心非),请构造一个不少于十个字的问题,使得回答中不包含问题中的任意字符。
|
llmriddles/questions/math_tools.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
|
4 |
+
def check_if_is_number(text: str):
|
5 |
+
try:
|
6 |
+
int(text)
|
7 |
+
return True
|
8 |
+
except ValueError:
|
9 |
+
return False
|
10 |
+
|
11 |
+
def get_all_numbers_in_a_sentence(text: str):
|
12 |
+
return [int(i) for i in re.findall(r'[-+]?\d+', text)]
|
13 |
+
|
14 |
+
def get_all_numbers_in_a_sentence_with_comma(text: str):
|
15 |
+
#remove comma in numbers
|
16 |
+
text = text.replace(',', '')
|
17 |
+
return [int(i) for i in re.findall(r'[-+]?\d+', text)]
|
18 |
+
|
19 |
+
def get_all_numbers(text: str):
|
20 |
+
return get_all_numbers_in_a_sentence(text) + get_all_numbers_in_a_sentence_with_comma(text)
|
llmriddles/questions/question.py
CHANGED
@@ -17,21 +17,32 @@ class Question:
|
|
17 |
|
18 |
_KNOWN_PROBLEMS = []
|
19 |
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def register_question(text: Union[Mapping[str, str], str],
|
22 |
checkers: Union[Mapping[str, SingleLangCheckerTyping], MultiLangCheckerTyping],
|
23 |
name=Union[Mapping[str, str], str],
|
24 |
level: int = 1, default_lang='cn'):
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
def _integrated_checker(question_text: str, user_text: str, answer_text: str, lang: str):
|
29 |
-
return _origin_checkers[lang](question_text, user_text, answer_text)
|
30 |
-
|
31 |
-
checker: MultiLangCheckerTyping = _integrated_checker
|
32 |
-
else:
|
33 |
-
checker: MultiLangCheckerTyping = checkers
|
34 |
-
|
35 |
if isinstance(text, str):
|
36 |
texts = {default_lang: text}
|
37 |
else:
|
|
|
17 |
|
18 |
_KNOWN_PROBLEMS = []
|
19 |
|
20 |
+
class Checker:
|
21 |
+
|
22 |
+
def __init__(self, checkers, required_input_keys=None) -> None:
|
23 |
+
self._origin_checkers = checkers
|
24 |
+
if isinstance(checkers, collections.abc.Mapping):
|
25 |
+
self.checker = self._integrated_checker
|
26 |
+
else:
|
27 |
+
self.checker = checkers
|
28 |
+
|
29 |
+
if required_input_keys == None:
|
30 |
+
required_input_keys = ['question_text', 'user_text', 'answer_text', 'lang']
|
31 |
+
self.required_input_keys = required_input_keys
|
32 |
+
|
33 |
+
def _integrated_checker(self, question_text: str, user_text: str, answer_text: str, lang: str):
|
34 |
+
return self._origin_checkers[lang](question_text, user_text, answer_text)
|
35 |
+
|
36 |
+
def __call__(self, inputs):
|
37 |
+
return self.checker(*[inputs[key] for key in self.required_input_keys])
|
38 |
+
|
39 |
def register_question(text: Union[Mapping[str, str], str],
|
40 |
checkers: Union[Mapping[str, SingleLangCheckerTyping], MultiLangCheckerTyping],
|
41 |
name=Union[Mapping[str, str], str],
|
42 |
level: int = 1, default_lang='cn'):
|
43 |
+
|
44 |
+
checker = checkers if isinstance(checkers, Checker) else Checker(checkers)
|
45 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
if isinstance(text, str):
|
47 |
texts = {default_lang: text}
|
48 |
else:
|
llmriddles/questions/utility.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
def word_reverse_text(input_text, lang='cn'):
|
4 |
+
if lang == 'cn':
|
5 |
+
user_text = input_text[::-1]
|
6 |
+
else:
|
7 |
+
words = input_text.split()
|
8 |
+
words.reverse()
|
9 |
+
user_text = ' '.join(words)
|
10 |
+
return user_text
|
11 |
+
|
12 |
+
def count_words(text: str, contain_punctuation: bool = False):
|
13 |
+
chinese_words = []
|
14 |
+
english_words = []
|
15 |
+
other_words = []
|
16 |
+
temp_english_words = []
|
17 |
+
for char in text:
|
18 |
+
if '\u4e00' <= char <= '\u9fff':
|
19 |
+
chinese_words.append(char)
|
20 |
+
if len(temp_english_words) > 0:
|
21 |
+
english_words.append(''.join(temp_english_words))
|
22 |
+
temp_english_words = []
|
23 |
+
else:
|
24 |
+
if char.isalpha():
|
25 |
+
temp_english_words.append(char)
|
26 |
+
else:
|
27 |
+
if len(temp_english_words) > 0:
|
28 |
+
english_words.append(''.join(temp_english_words))
|
29 |
+
temp_english_words = []
|
30 |
+
other_words.append(char)
|
31 |
+
if contain_punctuation:
|
32 |
+
return len(chinese_words) + len(english_words) + len(other_words)
|
33 |
+
else:
|
34 |
+
return len(chinese_words) + len(english_words)
|