Spaces:
Paused
Paused
File size: 5,720 Bytes
1ffc326 79410f6 18abd06 0f5c75a 1ffc326 08ae6c5 1ffc326 ffccb41 1ffc326 55cc480 1ffc326 0f5c75a 1ffc326 3af3367 6902167 3af3367 ca54606 3af3367 59447e3 d3573c0 17f7b4d 3af3367 17f7b4d 3af3367 ca8bbe5 d3573c0 5c49547 398ca01 ffccb41 398ca01 0f5c75a 98eee75 15b1ab3 98eee75 15b1ab3 98eee75 15b1ab3 98eee75 0f5c75a 08ae6c5 1ffc326 08ae6c5 1ffc326 7135a84 08ae6c5 1ffc326 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import logging
import pprint
from huggingface_hub import snapshot_download
logging.getLogger("openai").setLevel(logging.WARNING)
from src.backend.run_eval_suite_lighteval import run_evaluation
from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request, set_requests_seen
from src.backend.sort_queue import sort_models_by_priority
from src.envs import QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO, EVAL_RESULTS_PATH_BACKEND, API, LIMIT, TOKEN, ACCELERATOR, VENDOR, REGION
from src.about import TASKS_LIGHTEVAL
logging.basicConfig(level=logging.ERROR)
pp = pprint.PrettyPrinter(width=80)
PENDING_STATUS = "PENDING"
RUNNING_STATUS = "RUNNING"
FINISHED_STATUS = "FINISHED"
FAILED_STATUS = "FAILED"
REJECTED_STATUS = "REJECTED"
snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
def run_auto_eval():
current_pending_status = [PENDING_STATUS]
# pull the eval dataset from the hub and parse any eval requests
# check completed evals and set them to finished
check_completed_evals(
api=API,
checked_status=RUNNING_STATUS,
completed_status=FINISHED_STATUS,
failed_status=FAILED_STATUS,
hf_repo=QUEUE_REPO,
local_dir=EVAL_REQUESTS_PATH_BACKEND,
hf_repo_results=RESULTS_REPO,
local_dir_results=EVAL_RESULTS_PATH_BACKEND
)
# Get all eval request that are PENDING, if you want to run other evals, change this parameter
eval_requests, requests_seen = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
# Sort the evals by priority (first submitted first run)
eval_requests = sort_models_by_priority(api=API, models=eval_requests)
print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
if len(eval_requests) == 0:
return
eval_request = eval_requests[0]
pp.pprint(eval_request)
params_size = eval_request.params
if eval_request.precision == '4bit':
params_size //= 2
# For GPU
if not eval_request or params_size < 0:
raise ValueError("Couldn't detect number of params, please make sure the metadata is available")
elif params_size < 4:
instance_size, instance_type, cap = "x1", "nvidia-t4", 40
elif params_size < 8:
instance_size, instance_type, cap = "x1", "nvidia-a10g", 40
elif params_size < 30:
instance_size, instance_type, cap = "x4", "nvidia-a10g", 20
elif params_size < 45:
instance_size, instance_type, cap = "x2", "nvidia-a100", 5
elif params_size < 80:
instance_size, instance_type, cap = "x4", "nvidia-a100", 5
else:
set_eval_request(
api=API,
eval_request=eval_request,
set_to_status=REJECTED_STATUS,
hf_repo=QUEUE_REPO,
local_dir=EVAL_REQUESTS_PATH_BACKEND,
)
pp.pprint(dict(message="Number of params too big, can't run this model", params=eval_request.params))
return
# ignore counters, from admins
is_admin = any(org['name'] == 'hebrew-llm-leaderboard' for org in eval_request.user_info.get('orgs', []))
if not is_admin:
counter_key = f'count_{instance_size}_{instance_type}'
if not counter_key in requests_seen:
requests_seen[counter_key] = 0
if requests_seen[counter_key] >= cap:
set_eval_request(
api=API,
eval_request=eval_request,
set_to_status=REJECTED_STATUS,
hf_repo=QUEUE_REPO,
local_dir=EVAL_REQUESTS_PATH_BACKEND,
)
pp.pprint(dict(message="Reached maximum cap for requests of this instance type this month", counter=counter_key, instance_type=instance_type, cap=cap))
return
# next, check the user that submitted the request - allow up to 4 per user
user = eval_request.user_info['name']
if user in requests_seen and len(requests_seen[user]) >= 4:
set_eval_request(
api=API,
eval_request=eval_request,
set_to_status=REJECTED_STATUS,
hf_repo=QUEUE_REPO,
local_dir=EVAL_REQUESTS_PATH_BACKEND,
)
pp.pprint(dict(message="Reached maximum cap for requests for this user this month", counter=counter_key, user=user))
return
if not user in requests_seen:
requests_seen[user] = []
requests_seen[user].append(dict(model_id=eval_request.model, revision=eval_request.revision))
requests_seen[counter_key] += 1
set_requests_seen(
api=API,
requests_seen=requests_seen,
hf_repo=QUEUE_REPO,
local_dir=EVAL_REQUESTS_PATH_BACKEND
)
# end of counters check for non-admins
set_eval_request(
api=API,
eval_request=eval_request,
set_to_status=RUNNING_STATUS,
hf_repo=QUEUE_REPO,
local_dir=EVAL_REQUESTS_PATH_BACKEND,
)
run_evaluation(
eval_request=eval_request,
task_names=TASKS_LIGHTEVAL,
local_dir=EVAL_RESULTS_PATH_BACKEND,
batch_size=25,
accelerator=ACCELERATOR,
region=REGION,
vendor=VENDOR,
instance_size=instance_size,
instance_type=instance_type,
limit=LIMIT
)
if __name__ == "__main__":
run_auto_eval() |