Spaces:

HumanEval-V
/

HumanEval-V-Benchmark-Viewer

Running

File size: 1,890 Bytes

cf3b6c5
 
 
 
 
3dc6935
 
cf3b6c5
 
 
 
 
 
 
 
32e9007
 
 
 
cf3b6c5
 
 
 
 
 
 
 
 
 
32e9007
cf3b6c5
 
32e9007
cf3b6c5

import streamlit as st
import datasets

humaneval_v_data = datasets.load_from_disk("humaneval_v_test_hf")
st.set_page_config(layout="wide", page_title="HumanEval-V Viewer")

st.markdown("> <i>This is a viewer for the HumanEval-V benchmark, which consists of 108 coding tasks. Use the navigation buttons or enter an index to browse the tasks. If you encounter any issues, we encourage you to start a discussion [here](https://huggingface.co/datasets/HumanEval-V/HumanEval-V-Benchmark/discussions)</i>.", unsafe_allow_html=True)
st.markdown("---")

max_index = 108

# Initialize session state for index if not present
if 'index' not in st.session_state:
    st.session_state.index = 1

buttons = st.columns([2, 1.1, 5.9])

with buttons[0]:
    st.markdown("# HumanEval-V Viewer")

with buttons[1]:
    # Number input for navigation
    index_input = st.number_input(
        f"Go to index (1-{max_index}):", 
        min_value=1, 
        max_value=108, 
        value=st.session_state.index, 
        key="index_input", 
        help="Enter an index and jump to that index.",
        step=1
    )

coding_task = humaneval_v_data[index_input-1]
qid = coding_task["qid"]
image = coding_task["image"]
function_signature = coding_task["function_signature"]
ground_truth = coding_task["ground_truth_solution"]
test_script = coding_task["test_script"]

upper_columns = st.columns([2, 7])
with upper_columns[0]:
    st.markdown(f"### Question ID: {qid}")
    st.image(image, use_column_width=True)
    st.markdown("---")
with upper_columns[1]:
    st.markdown(f"### Function Signature:")
    st.markdown(f"")
    st.markdown(f"""```python
{function_signature}
```""")
    st.markdown(f"### Test Script:")
    st.markdown(f"")
    st.markdown(f"""```python
{test_script}
```""")
    st.markdown(f"### Ground Truth Solution:")
    st.markdown(f"")
    st.markdown(f"""```python
{ground_truth}
```""")