amphora's picture
Update app.py
847658d verified
import streamlit as st
st.set_page_config(page_title="HAERAE Open Research Questions", layout="wide")
# Title (always in English)
st.title("HAERAE Open Research Questions")
# Language selection below the title
lang = st.radio("Language / ์–ธ์–ด", ["English", "ํ•œ๊ตญ์–ด"])
# Content in both languages
content = {
"English": {
"intro": """
HAERAE is a non-profit research lab focused on the interpretability and evaluation of Korean language models.
Our mission is to advance the field with insightful benchmarks and tools.
We've been doing most of our projects internally, but for those that have been unsolvable,
we are planning to open them to get help from the open-source community.
""",
"challenge_title": "HAERAE-Math Challenge",
"challenge_desc": """
Today we are introducing our first challenge: HAERAE-Math. We've created high-quality instructions on math
but don't have an idea on how to generate high-quality answers for them. We are looking for solutions that
use open-source models with openly available licenses.
We have created a total of 20,000 instructions already and are generating more. We've opened up a preview
of 50 of them in this link: [HAERAE-Math Samples](https://huggingface.co/datasets/HAERAE-HUB/HAERAE-Math-samples)
For those who generate answers for the 50 and share the methodology/results with us, we'll share the
remaining instructions and credit for the resulting dataset.
""",
"example_title": "Example Question",
"how_to_title": "How to Participate",
"how_to": """
1. Access the 50 sample questions from the provided Hugging Face dataset link.
2. Generate high-quality answers for these questions using open-source models.
3. Document your methodology and results.
4. Share your findings with us through [contact information or submission form].
5. If your approach is promising, we'll provide access to the full dataset of 20,000 instructions.
""",
"why_title": "Why Participate?",
"why": """
- Contribute to advancing Korean language model research
- Gain access to a large, high-quality dataset of math instructions
- Collaborate with HAERAE researchers
- Potential for co-authorship on related publications
""",
"contact_title": "Contact Us",
"contact": """
For more information or to submit your results, please contact us at:
[[email protected]]([email protected])
""",
"sidebar_title": "About HAERAE",
"sidebar_content": """
HAERAE is a non-profit research lab dedicated to advancing the field of
Korean language model interpretability and evaluation. Our work focuses on
creating insightful benchmarks and tools to push the boundaries of NLP research.
"""
},
"ํ•œ๊ตญ์–ด": {
"intro": """
HAERAE๋Š” ํ•œ๊ตญ์–ด ์–ธ์–ด ๋ชจ๋ธ์˜ ํ•ด์„๊ณผ ํ‰๊ฐ€์˜ ์—ฐ๊ตฌ๋ฅผ ์œ„ํ•ด ์„ค๋ฆฝ๋œ ๋น„์˜๋ฆฌ ์—ฐ๊ตฌํŒ€์ž…๋‹ˆ๋‹ค.
์ €ํฌ๋Š” ๋‹ค์–‘ํ•œ ๋ฒค์น˜๋งˆํฌ์™€ ์—ฐ๊ตฌ๋ฅผ ํ†ตํ•ด ํ•œ๊ตญ์–ด ์ž์—ฐ์–ด ์ฒ˜๋ฆฌ ์—ฐ๊ตฌ๋ฅผ ๋ฐœ์ „์‹œํ‚ค๊ธฐ ์œ„ํ•ด ๋…ธ๋ ฅํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
๊ธฐ์กด์—๋Š” ๋Œ€๋ถ€๋ถ„์˜ ํ”„๋กœ์ ํŠธ๋ฅผ ๋‚ด๋ถ€์ ์œผ๋กœ ์ˆ˜ํ–‰ํ•ด ์™”์ง€๋งŒ, ๋‚ด๋ถ€์ ์œผ๋กœ ํ•ด๊ฒฐํ•˜๊ธฐ ์–ด๋ ค์šด ๋ฌธ์ œ๋“ค์— ๋Œ€ํ•ด์„œ๋Š”
์˜คํ”ˆ ์†Œ์Šค ์ปค๋ฎค๋‹ˆํ‹ฐ์˜ ๋„์›€์„ ๋ฐ›๊ณ ์ž Open-Research-Question ํ”„๋กœ๊ทธ๋žจ์„ ์šด์˜ํ•˜๊ฒŒ ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.
""",
"challenge_title": "HAERAE-Math Challenge",
"challenge_desc": """
์ €ํฌ ํŒ€์€ [QARV-Instruct](https://huggingface.co/datasets/HAERAE-HUB/qarv-instruct-ko) ๋ถ€ํ„ฐ ์‹œ์ž‘ํ•ด์„œ ๊ณ ํ’ˆ์งˆ์˜ ํ•œ๊ตญ์–ด ์ง€์‹œ๋ฌธ ๋ฐ์ดํ„ฐ๋ฅผ
๋งŒ๋“ค๊ธฐ ์œ„ํ•ด ๋…ธ๋ ฅ ์ค‘์— ์žˆ์Šต๋‹ˆ๋‹ค. ์ด ๊ณผ์ •์—์„œ ๋งค์šฐ ๋†’์€ ์ˆ˜์ค€์˜ ์ˆ˜ํ•™ ์ง€์‹œ๋ฌธ์„ ์ œ์ž‘ํ•˜์˜€์œผ๋‚˜, ํ•ด๋‹น ์ง€์‹œ๋ฌธ์— ๋Œ€ํ•ด ์ ์ ˆํ•œ ๋‹ต๋ณ€์„ ๋งŒ๋“ค์ง€ ๋ชปํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
์ €ํฌ๋Š” ์ด๋ฒˆ ์ฑŒ๋ฆฐ์ง€๋ฅผ ํ†ตํ•ด ์˜คํ”ˆ์†Œ์Šค LLM์„ ์‚ฌ์šฉํ•˜์—ฌ ํ•ด๋‹น ๋ฌธ์ œ๋“ค์— ๋Œ€ํ•œ ๋‹ต์„ ์ œ์ž‘ํ•  ์ˆ˜ ์žˆ๋Š” ์†”๋ฃจ์…˜์„ ์ฐพ๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
ํ˜„์žฌ๋Š” ์ด 20,000๊ฐœ์˜ ์ง€์‹œ๋ฌธ์„ ์ด๋ฏธ ๋งŒ๋“ค์—ˆ์œผ๋ฉฐ ์ถ”๊ฐ€์ ์œผ๋กœ ์ƒ์„ฑํ•˜๋Š” ๊ณผ์ • ์ค‘์— ์žˆ์Šต๋‹ˆ๋‹ค.
์ƒ์„ฑ๋œ ์ง€์‹œ๋ฌธ ์ค‘ ๋žœ๋ค์œผ๋กœ ์ƒ˜ํ”Œ๋ง๋œ 50๊ฐœ์˜ ์งˆ๋ฌธ์„ ๋‹ค์Œ ๋งํฌ์—์„œ ๋ณด์‹ค ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
[HAERAE-Math ์ƒ˜ํ”Œ](https://huggingface.co/datasets/HAERAE-HUB/HAERAE-Math-samples)
50๊ฐœ์— ๋Œ€ํ•œ ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•˜๊ณ  ๋ฐฉ๋ฒ•๋ก /๊ฒฐ๊ณผ๋ฅผ ์ €ํฌ์—๊ฒŒ ๊ณต์œ ํ•ด์ฃผ์‹œ๋Š” ๋ถ„๋“ค๊ผ ์ €ํฌ๊ฐ€ ์ƒ์„ฑํ•œ ์ „์ฒด ์ง€์‹œ๋ฌธ๊ณผ ์ตœ์ข… ๋ฐ์ดํ„ฐ์…‹์— ๋Œ€ํ•œ ๊ธฐ์—ฌ๋„๋ฅผ ์ธ์ •ํ•ด๋“œ๋ฆด ์˜ˆ์ •์ž…๋‹ˆ๋‹ค.
(๊ฒฐ๊ณผ์— ๋”ฐ๋ผ ๋…ผ๋ฌธํ™”๋„ ๊ณ ๋ฏผ ์ค‘์— ์žˆ์Šต๋‹ˆ๋‹ค.)
""",
"example_title": "์˜ˆ์‹œ ์งˆ๋ฌธ",
"how_to_title": "์ฐธ์—ฌ ๋ฐฉ๋ฒ•",
"how_to": """
1. ์ œ๊ณต๋œ Hugging Face ๋ฐ์ดํ„ฐ์…‹ ๋งํฌ์—์„œ 50๊ฐœ์˜ ์ƒ˜ํ”Œ ์งˆ๋ฌธ์„ ํ™•์ธํ•ฉ๋‹ˆ๋‹ค.
2. ์˜คํ”ˆ ์†Œ์Šค ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•˜์—ฌ ์ด ์งˆ๋ฌธ๋“ค์— ๋Œ€ํ•œ ๊ณ ํ’ˆ์งˆ ๋‹ต๋ณ€์„ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
3. ๋ฐฉ๋ฒ•๋ก ๊ณผ ๊ฒฐ๊ณผ๋ฅผ ๋ฌธ์„œํ™”ํ•ฉ๋‹ˆ๋‹ค.
4. [์—ฐ๋ฝ์ฒ˜ ์ •๋ณด ๋˜๋Š” ์ œ์ถœ ์–‘์‹]์„ ํ†ตํ•ด ๊ท€ํ•˜์˜ ๊ฒฐ๊ณผ๋ฅผ ์ €ํฌ์™€ ๊ณต์œ ํ•ฉ๋‹ˆ๋‹ค.
5. ๊ท€ํ•˜์˜ ์ ‘๊ทผ ๋ฐฉ์‹์ด ์œ ์˜๋ฏธ ํ•˜๋‹ค๊ณ  ํŒ๋‹จ ๋œ๋‹ค๋ฉด, ๋‚˜๋จธ์ง€ ์ง€์‹œ๋ฌธ ๋ฐ์ดํ„ฐ์…‹์— ๋Œ€ํ•œ ์ ‘๊ทผ ๊ถŒํ•œ์„ ์ œ๊ณตํ•ด ๋“œ๋ฆฝ๋‹ˆ๋‹ค.
""",
"why_title": "์™œ ์ฐธ์—ฌํ•ด์•ผ ํ•˜๋‚˜์š”?",
"why": """
- ํ•œ๊ตญ์–ด ์–ธ์–ด ๋ชจ๋ธ ์—ฐ๊ตฌ ๋ฐœ์ „์— ๊ธฐ์—ฌ
- ๋Œ€๊ทœ๋ชจ์˜ ๊ณ ํ’ˆ์งˆ ์ˆ˜ํ•™ ์ง€์‹œ๋ฌธ ๋ฐ์ดํ„ฐ์…‹์— ์ ‘๊ทผ
- HAERAE ์—ฐ๊ตฌ์›๋“ค๊ณผ ํ˜‘๋ ฅ
- ๊ด€๋ จ ์ถœํŒ๋ฌผ์˜ ๊ณต๋™ ์ €์ž๊ฐ€ ๋  ๊ฐ€๋Šฅ์„ฑ
""",
"contact_title": "์—ฐ๋ฝ์ฒ˜",
"contact": """
๋” ๋งŽ์€ ์ •๋ณด๋ฅผ ์›ํ•˜์‹œ๊ฑฐ๋‚˜ ๊ฒฐ๊ณผ๋ฅผ ์ œ์ถœํ•˜๋ ค๋ฉด ๋‹ค์Œ ์—ฐ๋ฝ์ฒ˜๋กœ ๋ฌธ์˜ํ•ด ์ฃผ์„ธ์š”:
[[email protected]]([email protected])
""",
"sidebar_title": "HAERAE ์†Œ๊ฐœ",
"sidebar_content": """
HAERAE๋Š” ํ•œ๊ตญ์–ด ์–ธ์–ด ๋ชจ๋ธ์˜ ํ•ด์„๊ณผ ํ‰๊ฐ€์˜ ์—ฐ๊ตฌ๋ฅผ ์œ„ํ•ด ์„ค๋ฆฝ๋œ ๋น„์˜๋ฆฌ ์—ฐ๊ตฌํŒ€์ž…๋‹ˆ๋‹ค.
์ €ํฌ๋Š” ๋‹ค์–‘ํ•œ ๋ฒค์น˜๋งˆํฌ์™€ ์—ฐ๊ตฌ๋ฅผ ํ†ตํ•ด ํ•œ๊ตญ์–ด ์ž์—ฐ์–ด ์ฒ˜๋ฆฌ ์—ฐ๊ตฌ๋ฅผ ๋ฐœ์ „์‹œํ‚ค๊ธฐ ์œ„ํ•ด ๋…ธ๋ ฅํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค.
"""
}
}
# Main content
st.write(content[lang]["intro"])
st.header(content[lang]["challenge_title"])
st.write(content[lang]["challenge_desc"])
st.subheader(content[lang]["example_title"])
example_question = """
ํ•œ๊ตญ์˜ ๋ณด์•ˆ ์ „๋ฌธ๊ฐ€๊ฐ€ ๊ณ ๋„ํ™”๋œ ๋ฐ์ดํ„ฐ ๋ณดํ˜ธ ์‹œ์Šคํ…œ์„ ๊ฐœ๋ฐœํ•˜๊ณ  ์žˆ์Šต๋‹ˆ๋‹ค. ์ด ์‹œ์Šคํ…œ์€ 3์ฐจ์› ๊ธฐํ•˜ํ•™์  ์ž ๊ธˆ ๋ฉ”์ปค๋‹ˆ์ฆ˜์„ ์‚ฌ์šฉํ•˜๋Š”๋ฐ, ์ž ๊ธˆ ์žฅ์น˜๋Š” ์›๋ฟ” ๋ชจ์–‘์œผ๋กœ ๋˜์–ด ์žˆ๊ณ , ๋ฐ‘๋ฉด์˜ ๋ฐ˜์ง€๋ฆ„์€ 6cm, ๋†’์ด๋Š” 8cm์ž…๋‹ˆ๋‹ค. ์ด ์›๋ฟ” ๋ชจ์–‘์˜ ์ž ๊ธˆ ์žฅ์น˜์—๋Š” ์›ํ†ต ๋ชจ์–‘์˜ ์—ด์‡ ๊ฐ€ ๋”ฑ ๋งž๊ฒŒ ๋“ค์–ด๊ฐ€๊ฒŒ ์„ค๊ณ„๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.
๋ณด์•ˆ ์ „๋ฌธ๊ฐ€๋Š” ๋” ๋†’์€ ์ˆ˜์ค€์˜ ๋ณด์•ˆ์„ ์œ„ํ•ด ์›ํ†ต ๋ชจ์–‘์˜ ์—ด์‡  ์•ˆ์— ๊ตฌ ๋ชจ์–‘์˜ ์ž ๊ธˆ ์žฅ์น˜๋ฅผ ์ถ”๊ฐ€ํ•˜๋ ค๊ณ  ํ•ฉ๋‹ˆ๋‹ค. ์ด ๊ตฌ๋Š” ์›ํ†ต ์•ˆ์— ๋”ฑ ๋“ค์–ด๊ฐ€๋„๋ก ์„ค๊ณ„๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.
๋‹ค์Œ์˜ ์งˆ๋ฌธ๋“ค์„ ํ•ด๊ฒฐํ•˜์‹œ๊ธฐ ๋ฐ”๋ž๋‹ˆ๋‹ค:
1. ์›๋ฟ” ์•ˆ์— ๋”ฑ ๋“ค์–ด๊ฐ€๊ฒŒ ์„ค๊ณ„๋œ ์›ํ†ต์˜ ๋ฐ˜์ง€๋ฆ„์€ ์–ผ๋งˆ์ธ๊ฐ€์š”?
2. ์›ํ†ต ์•ˆ์— ๋”ฑ ๋“ค์–ด๊ฐ€๊ฒŒ ์„ค๊ณ„๋œ ๊ตฌ์˜ ๋ถ€ํ”ผ๋Š” ์–ผ๋งˆ์ธ๊ฐ€์š”?
3. ์›๋ฟ”, ์›ํ†ต, ๊ตฌ๊ฐ€ ๋ชจ๋‘ ๊ฐ™์€ ์ค‘์‹ฌ์ถ•์„ ๊ณต์œ ํ•˜๊ณ  ์žˆ์œผ๋ฉฐ ์›๋ฟ”์˜ ๊ผญ๋Œ€๊ธฐ์ ๊ณผ ์›ํ†ต, ๊ตฌ์˜ ์ค‘์‹ฌ์ ์ด ๋™์ผํ•˜๋‹ค๊ณ  ๊ฐ€์ •ํ•˜๋ฉด, ์›๋ฟ”์—์„œ ์›ํ†ต์ด ์ฐจ์ง€ํ•˜๋Š” ๋น„์œจ์„ ๊ตฌํ•˜์‹œ์˜ค.
4. ์ด์ œ ์›๋ฟ”์˜ ๋†’์ด๋ฅผ 2๋ฐฐ๋กœ ๋Š˜๋ฆฌ์ž. ์›๋ฟ”์˜ ๋†’์ด๊ฐ€ 16cm๊ฐ€ ๋˜์—ˆ์„ ๋•Œ, ์›ํ†ต๊ณผ ๊ตฌ์˜ ํฌ๊ธฐ์™€ ๋ถ€ํ”ผ๋Š” ์–ด๋–ป๊ฒŒ ๋ณ€ํ•˜๋‚˜์š”?
5. ์›๋ฟ”์˜ ๋†’์ด์™€ ๋ฐ‘๋ฉด์˜ ๋ฐ˜์ง€๋ฆ„์„ ๊ฐ๊ฐ h์™€ r์ด๋ผ๊ณ  ํ•  ๋•Œ, ์›ํ†ต๊ณผ ๊ตฌ์˜ ์ตœ๋Œ€ ๋ถ€ํ”ผ๋ฅผ r๊ณผ h๋กœ ํ‘œํ˜„ํ•˜์‹œ์˜ค.
์›๋ฟ”, ์›ํ†ต, ๊ตฌ์˜ ๋ถ€ํ”ผ ๊ณต์‹์„ ์‚ฌ์šฉํ•˜์—ฌ ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐํ•˜์‹œ๊ธฐ ๋ฐ”๋ž๋‹ˆ๋‹ค:
์›๋ฟ”์˜ ๋ถ€ํ”ผ: V = 1/3ฯ€rยฒh
์›ํ†ต์˜ ๋ถ€ํ”ผ: V = ฯ€rยฒh
๊ตฌ์˜ ๋ถ€ํ”ผ: V = 4/3ฯ€rยณ
"""
st.code(example_question, language="markdown")
st.header(content[lang]["how_to_title"])
st.write(content[lang]["how_to"])
st.header(content[lang]["why_title"])
st.write(content[lang]["why"])
st.header(content[lang]["contact_title"])
st.write(content[lang]["contact"])
st.sidebar.title(content[lang]["sidebar_title"])
st.sidebar.info(content[lang]["sidebar_content"])