Spaces:

hebrew-llm-leaderboard
/

leaderboard

Running on CPU Upgrade

App Files Files Community

Shaltiel commited on Jun 29

Commit

93a260a

•

1 Parent(s): 4a85986

Added support for re-submitting rejected

Browse files

Files changed (4) hide show

app.py +0 -4
main_backend.py +0 -78
src/submission/check_validity.py +3 -3
src/submission/submit.py +8 -1

app.py CHANGED Viewed

@@ -31,9 +31,6 @@ from src.submission.submit import add_new_eval
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
-# def launch_backend():
-#    _ = subprocess.run(["python", "main_backend.py"])
 try:
     print(EVAL_REQUESTS_PATH)
     snapshot_download(
@@ -353,6 +350,5 @@ with demo:
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=1800)
-# scheduler.add_job(launch_backend, "interval", seconds=100) # will only allow one job to be run at the same time
 scheduler.start()
 demo.queue(default_concurrency_limit=40).launch(allowed_paths=["logos/"])

 def restart_space():
     API.restart_space(repo_id=REPO_ID)
 try:
     print(EVAL_REQUESTS_PATH)
     snapshot_download(
 scheduler = BackgroundScheduler()
 scheduler.add_job(restart_space, "interval", seconds=1800)
 scheduler.start()
 demo.queue(default_concurrency_limit=40).launch(allowed_paths=["logos/"])

main_backend.py DELETED Viewed

@@ -1,78 +0,0 @@
-import logging
-import pprint
-from huggingface_hub import snapshot_download
-logging.getLogger("openai").setLevel(logging.WARNING)
-from src.backend.run_eval_suite import run_evaluation
-from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
-from src.backend.sort_queue import sort_models_by_priority
-from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, DEVICE, API, LIMIT, TOKEN
-from src.about import Tasks, NUM_FEWSHOT
-TASKS_HARNESS = [task.value.benchmark for task in Tasks]
-logging.basicConfig(level=logging.ERROR)
-pp = pprint.PrettyPrinter(width=80)
-PENDING_STATUS = "PENDING"
-RUNNING_STATUS = "RUNNING"
-FINISHED_STATUS = "FINISHED"
-FAILED_STATUS = "FAILED"
-snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
-snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
-def run_auto_eval():
-    current_pending_status = [PENDING_STATUS]
-    # pull the eval dataset from the hub and parse any eval requests
-    # check completed evals and set them to finished
-    check_completed_evals(
-        api=API,
-        checked_status=RUNNING_STATUS,
-        completed_status=FINISHED_STATUS,
-        failed_status=FAILED_STATUS,
-        hf_repo=QUEUE_REPO,
-        local_dir=EVAL_REQUESTS_PATH_BACKEND,
-        hf_repo_results=RESULTS_REPO,
-        local_dir_results=EVAL_RESULTS_PATH_BACKEND
-    )
-    # Get all eval request that are PENDING, if you want to run other evals, change this parameter
-    eval_requests = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
-    # Sort the evals by priority (first submitted first run)
-    eval_requests = sort_models_by_priority(api=API, models=eval_requests)
-    print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
-    if len(eval_requests) == 0:
-        return
-    eval_request = eval_requests[0]
-    pp.pprint(eval_request)
-    set_eval_request(
-        api=API,
-        eval_request=eval_request,
-        set_to_status=RUNNING_STATUS,
-        hf_repo=QUEUE_REPO,
-        local_dir=EVAL_REQUESTS_PATH_BACKEND,
-    )
-    run_evaluation(
-        eval_request=eval_request,
-        task_names=TASKS_HARNESS,
-        num_fewshot=NUM_FEWSHOT,
-        local_dir=EVAL_RESULTS_PATH_BACKEND,
-        results_repo=RESULTS_REPO,
-        batch_size=1,
-        device=DEVICE,
-        no_cache=True,
-        limit=LIMIT
-        )
-if __name__ == "__main__":
-    run_auto_eval()

src/submission/check_validity.py CHANGED Viewed

@@ -79,7 +79,7 @@ def get_model_arch(model_info: ModelInfo):
 def already_submitted_models(requested_models_dir: str) -> set[str]:
     depth = 1
-    file_names = []
     users_to_submission_dates = defaultdict(list)
     for root, _, files in os.walk(requested_models_dir):
@@ -92,7 +92,7 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
                     continue
                 with open(file_path, "r") as f:
                     info = json.load(f)
-                    file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
                     # Select organisation
                     if info["model"].count("/") == 0 or "submitted_time" not in info:
@@ -100,4 +100,4 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
                     organisation, _ = info["model"].split("/")
                     users_to_submission_dates[organisation].append(info["submitted_time"])
-    return set(file_names), users_to_submission_dates

 def already_submitted_models(requested_models_dir: str) -> set[str]:
     depth = 1
+    requested_models = {}
     users_to_submission_dates = defaultdict(list)
     for root, _, files in os.walk(requested_models_dir):
                     continue
                 with open(file_path, "r") as f:
                     info = json.load(f)
+                    requested_models[f"{info['model']}_{info['revision']}_{info['precision']}"] = info["status"]
                     # Select organisation
                     if info["model"].count("/") == 0 or "submitted_time" not in info:
                     organisation, _ = info["model"].split("/")
                     users_to_submission_dates[organisation].append(info["submitted_time"])
+    return set(requested_models), users_to_submission_dates

src/submission/submit.py CHANGED Viewed

@@ -107,7 +107,14 @@ def add_new_eval(
     }
     # Check for duplicate submission
-    if f"{model}_{revision}_{precision}" in REQUESTED_MODELS or f"{model}_{actual_revision}_{precision}" in REQUESTED_MODELS:
         return styled_warning("This model has been already submitted.")
     print("Creating eval file")

     }
     # Check for duplicate submission
+    request_keys = [f"{model}_{revision}_{precision}", f"{model}_{actual_revision}_{precision}"]
+    requested_status = next((REQUESTED_MODELS[key] for key in request_keys if key in REQUESTED_MODELS), 'NONE')
+    # If it's None or Rejected - let it through. Otherwise - inform the user
+    if requested_status not in ['NONE', 'REJECTED']:
+        # if it failed - spell that out and tell him to contact if we wants to resubmit
+        if requested_status == 'FAILED':
+            return styled_warning("This model has been already submitted and failed to run - please open a discussion or contact the support email.")
         return styled_warning("This model has been already submitted.")
     print("Creating eval file")