code_eval_octopack

Runtime error

App Files Files Community

Muennighoff commited on Apr 1, 2023

Commit

ae00acd

•

1 Parent(s): 9faf6e5

Add cargo_string kwarg

Browse files

Files changed (2) hide show

code_eval.py +16 -2
execute.py +5 -25

code_eval.py CHANGED Viewed

@@ -131,6 +131,20 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE."""
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class CodeEval(evaluate.Metric):
     def _info(self):
@@ -152,7 +166,7 @@ class CodeEval(evaluate.Metric):
             license=_LICENSE,
         )
-    def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0, language="python"):
         """Returns the scores"""
         if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
@@ -170,7 +184,7 @@ class CodeEval(evaluate.Metric):
             for task_id, (candidates, test_case) in enumerate(zip(predictions, references)):
                 for candidate in candidates:
                     test_program = candidate + "\n" + test_case
-                    args = (test_program, timeout, task_id, completion_id[task_id], language)
                     future = executor.submit(check_correctness, *args)
                     futures.append(future)
                     completion_id[task_id] += 1

 THE SOFTWARE."""
+# https://github.com/THUDM/CodeGeeX/blob/ebeb850f227a90c79de39f7e26b1302f374f3240/codegeex/benchmark/rust/Cargo.toml
+BASE_CARGO = '''[package]
+name = "rust"
+version = "0.1.0"
+edition = "2021"
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+[dependencies]
+rand = "0.4"
+regex = "1"
+md5 = "0.7.0"
+'''
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class CodeEval(evaluate.Metric):
     def _info(self):
             license=_LICENSE,
         )
+    def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0, language="python", cargo_string=BASE_CARGO):
         """Returns the scores"""
         if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
             for task_id, (candidates, test_case) in enumerate(zip(predictions, references)):
                 for candidate in candidates:
                     test_program = candidate + "\n" + test_case
+                    args = (test_program, timeout, task_id, completion_id[task_id], language, cargo_string)
                     future = executor.submit(check_correctness, *args)
                     futures.append(future)
                     completion_id[task_id] += 1

execute.py CHANGED Viewed

@@ -27,22 +27,8 @@ import signal
 import subprocess
 import tempfile
-# https://github.com/THUDM/CodeGeeX/blob/ebeb850f227a90c79de39f7e26b1302f374f3240/codegeex/benchmark/rust/Cargo.toml
-BASE_CARGO = '''[package]
-name = "rust"
-version = "0.1.0"
-edition = "2021"
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-[dependencies]
-rand = "0.4"
-regex = "1"
-md5 = "0.7.0"
-'''
-def check_correctness(check_program, timeout, task_id, completion_id, language):
     """
     Evaluates the functional correctness of a completion by running the test
     suite provided in the problem.
@@ -64,7 +50,7 @@ def check_correctness(check_program, timeout, task_id, completion_id, language):
     elif language == "javascript":
         p = multiprocessing.Process(target=unsafe_execute_js, args=(check_program, result, timeout))
     elif language == "rust":
-        p = multiprocessing.Process(target=unsafe_execute_rust, args=(check_program, result, timeout))
     else:
         raise ValueError(f"Language {language} not supported. Feel free to add it :)")
@@ -237,7 +223,7 @@ def unsafe_execute_js(check_program, result, timeout):
         except subprocess.TimeoutExpired as e:
             result.append("timed out")
-def unsafe_execute_rust(check_program, result, timeout):
     with create_tempdir():
@@ -255,14 +241,8 @@ def unsafe_execute_rust(check_program, result, timeout):
         os.makedirs(RUST_SRC, exist_ok=True)
         os.makedirs(RUST_BIN, exist_ok=True)
-        # Check if Cargo exists, if so copy it here
-        if os.path.exists("/Cargo.toml"):
-            pass
-        else:
-            # Warn that no Cargo was found in the parent directory
-            logging.warning(f"Cargo.toml not found in root directory ({os.path.abspath('/')}). Creating a new one. Timeout of >300 is recommended.")
-            # Create Cargo.toml
-            open(f"{RUST_DIR}/Cargo.toml", 'w').write(BASE_CARGO)
         with tempfile.NamedTemporaryFile(dir = RUST_BIN, delete=False) as f:
             file_name: str =  "test" + RUST_EXT

 import subprocess
 import tempfile
+def check_correctness(check_program, timeout, task_id, completion_id, language, cargo_string=""):
     """
     Evaluates the functional correctness of a completion by running the test
     suite provided in the problem.
     elif language == "javascript":
         p = multiprocessing.Process(target=unsafe_execute_js, args=(check_program, result, timeout))
     elif language == "rust":
+        p = multiprocessing.Process(target=unsafe_execute_rust, args=(check_program, result, timeout, cargo_string))
     else:
         raise ValueError(f"Language {language} not supported. Feel free to add it :)")
         except subprocess.TimeoutExpired as e:
             result.append("timed out")
+def unsafe_execute_rust(check_program, result, timeout, cargo_string):
     with create_tempdir():
         os.makedirs(RUST_SRC, exist_ok=True)
         os.makedirs(RUST_BIN, exist_ok=True)
+        # Create Cargo.toml file
+        open(f"{RUST_DIR}/Cargo.toml", 'w').write(cargo_string)
         with tempfile.NamedTemporaryFile(dir = RUST_BIN, delete=False) as f:
             file_name: str =  "test" + RUST_EXT