add gpu info
Browse files- backend-cli.py +25 -14
- src/backend/manage_requests.py +2 -3
- src/display/utils.py +22 -3
- src/leaderboard/read_evals.py +8 -0
- src/populate.py +1 -14
- src/utils.py +37 -9
backend-cli.py
CHANGED
@@ -166,8 +166,13 @@ def process_evaluation(task: Task, eval_request: EvalRequest, limit: Optional[in
|
|
166 |
gpu_info = analyze_gpu_stats(gpu_stats_list)
|
167 |
for task_name in results['results'].keys():
|
168 |
for key, value in gpu_info.items():
|
169 |
-
|
|
|
|
|
|
|
170 |
|
|
|
|
|
171 |
print("GPU Usage:", gpu_info)
|
172 |
|
173 |
dumped = json.dumps(results, indent=2, default=lambda o: "<not serializable>")
|
@@ -430,25 +435,31 @@ if __name__ == "__main__":
|
|
430 |
if local_debug:
|
431 |
# debug_model_names = [args.model] # Use model from arguments
|
432 |
# debug_task_name = [args.task] # Use task from arguments
|
433 |
-
debug_model_names = ["mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mixtral-8x7B-v0.1"
|
|
|
|
|
|
|
434 |
debug_task_name = ['mmlu', 'selfcheckgpt'] # Use task from arguments
|
435 |
-
precisions = ['
|
436 |
task_lst = TASKS_HARNESS.copy()
|
437 |
for precision in precisions:
|
438 |
-
for
|
439 |
-
for
|
440 |
task_name = task.benchmark
|
441 |
if task_name not in debug_task_name:
|
442 |
continue
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
|
|
|
|
|
|
452 |
else:
|
453 |
while True:
|
454 |
res = False
|
|
|
166 |
gpu_info = analyze_gpu_stats(gpu_stats_list)
|
167 |
for task_name in results['results'].keys():
|
168 |
for key, value in gpu_info.items():
|
169 |
+
if "GPU" not in key:
|
170 |
+
results['results'][task_name][f"{key},none"] = int(value)
|
171 |
+
else:
|
172 |
+
results['results'][task_name][f"{key},none"] = value
|
173 |
|
174 |
+
results['results'][task_name]['batch_size,none'] = batch_size
|
175 |
+
print(f"gpu_stats_list: {gpu_stats_list}")
|
176 |
print("GPU Usage:", gpu_info)
|
177 |
|
178 |
dumped = json.dumps(results, indent=2, default=lambda o: "<not serializable>")
|
|
|
435 |
if local_debug:
|
436 |
# debug_model_names = [args.model] # Use model from arguments
|
437 |
# debug_task_name = [args.task] # Use task from arguments
|
438 |
+
debug_model_names = ["microsoft/phi-2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mixtral-8x7B-v0.1",
|
439 |
+
"databricks/dbrx-instruct", "databricks/dbrx-base",
|
440 |
+
"mistralai/Mixtral-8x22B-v0.1", "mistralai/Mixtral-8x22B-Instruct-v0.1", "alpindale/WizardLM-2-8x22B",
|
441 |
+
"CohereForAI/c4ai-command-r-plus"] # Use model from arguments
|
442 |
debug_task_name = ['mmlu', 'selfcheckgpt'] # Use task from arguments
|
443 |
+
precisions = ['4bit', 'float16', 'float32', '8bit']
|
444 |
task_lst = TASKS_HARNESS.copy()
|
445 |
for precision in precisions:
|
446 |
+
for debug_model_name in debug_model_names:
|
447 |
+
for task in task_lst:
|
448 |
task_name = task.benchmark
|
449 |
if task_name not in debug_task_name:
|
450 |
continue
|
451 |
+
try:
|
452 |
+
eval_request = EvalRequest(
|
453 |
+
model=debug_model_name,
|
454 |
+
private=False,
|
455 |
+
status="",
|
456 |
+
json_filepath="",
|
457 |
+
precision=precision, # Use precision from arguments
|
458 |
+
inference_framework=args.inference_framework # Use inference framework from arguments
|
459 |
+
)
|
460 |
+
results = process_evaluation(task, eval_request, limit=args.limit)
|
461 |
+
except Exception as e:
|
462 |
+
print(f"debug running error: {e}")
|
463 |
else:
|
464 |
while True:
|
465 |
res = False
|
src/backend/manage_requests.py
CHANGED
@@ -37,12 +37,11 @@ class EvalRequest:
|
|
37 |
# Quantized models need some added config, the install of bits and bytes, etc
|
38 |
# elif self.precision == "8bit":
|
39 |
# model_args += ",load_in_8bit=True"
|
40 |
-
|
41 |
-
|
42 |
# elif self.precision == "GPTQ":
|
43 |
# A GPTQ model does not need dtype to be specified,
|
44 |
# it will be inferred from the config
|
45 |
-
pass
|
46 |
elif self.precision == "8bit":
|
47 |
model_args += ",load_in_8bit=True"
|
48 |
else:
|
|
|
37 |
# Quantized models need some added config, the install of bits and bytes, etc
|
38 |
# elif self.precision == "8bit":
|
39 |
# model_args += ",load_in_8bit=True"
|
40 |
+
elif self.precision == "4bit":
|
41 |
+
model_args += ",load_in_4bit=True"
|
42 |
# elif self.precision == "GPTQ":
|
43 |
# A GPTQ model does not need dtype to be specified,
|
44 |
# it will be inferred from the config
|
|
|
45 |
elif self.precision == "8bit":
|
46 |
model_args += ",load_in_8bit=True"
|
47 |
else:
|
src/display/utils.py
CHANGED
@@ -16,7 +16,24 @@ MULTIPLE_CHOICEs = ["mmlu"]
|
|
16 |
GPU_TEMP = 'Temp(C)'
|
17 |
GPU_Power = 'Power(W)'
|
18 |
GPU_Mem = 'Mem(M)'
|
|
|
19 |
GPU_Util = 'Util(%)'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
@dataclass
|
22 |
class Task:
|
@@ -87,14 +104,16 @@ for task in Tasks:
|
|
87 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
88 |
# System performance metrics
|
89 |
auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True)])
|
|
|
|
|
90 |
if task.value.benchmark in MULTIPLE_CHOICEs:
|
91 |
continue
|
92 |
-
auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number",
|
93 |
auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True)])
|
94 |
|
95 |
auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True)])
|
96 |
-
auto_eval_column_dict.append([f"{task.name}_gpu_power", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Power}", "number",
|
97 |
-
auto_eval_column_dict.append([f"{task.name}_gpu_temp", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_TEMP}", "number",
|
98 |
auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True)])
|
99 |
|
100 |
# Model information
|
|
|
16 |
GPU_TEMP = 'Temp(C)'
|
17 |
GPU_Power = 'Power(W)'
|
18 |
GPU_Mem = 'Mem(M)'
|
19 |
+
GPU_Name = "GPU"
|
20 |
GPU_Util = 'Util(%)'
|
21 |
+
BATCH_SIZE = 'bs'
|
22 |
+
|
23 |
+
system_metrics_to_name_map = {
|
24 |
+
"end_to_end_time": f"{E2Es}",
|
25 |
+
"prefilling_time": f"{PREs}",
|
26 |
+
"decoding_throughput": f"{TS}",
|
27 |
+
}
|
28 |
+
|
29 |
+
gpu_metrics_to_name_map = {
|
30 |
+
GPU_Util: GPU_Util,
|
31 |
+
GPU_TEMP: GPU_TEMP,
|
32 |
+
GPU_Power: GPU_Power,
|
33 |
+
GPU_Mem: GPU_Mem,
|
34 |
+
"batch_size": BATCH_SIZE,
|
35 |
+
GPU_Name: GPU_Name,
|
36 |
+
}
|
37 |
|
38 |
@dataclass
|
39 |
class Task:
|
|
|
104 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
105 |
# System performance metrics
|
106 |
auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True)])
|
107 |
+
auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True)])
|
108 |
+
auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True)])
|
109 |
if task.value.benchmark in MULTIPLE_CHOICEs:
|
110 |
continue
|
111 |
+
auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False)])
|
112 |
auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True)])
|
113 |
|
114 |
auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True)])
|
115 |
+
auto_eval_column_dict.append([f"{task.name}_gpu_power", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Power}", "number", False)])
|
116 |
+
auto_eval_column_dict.append([f"{task.name}_gpu_temp", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_TEMP}", "number", False)])
|
117 |
auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True)])
|
118 |
|
119 |
# Model information
|
src/leaderboard/read_evals.py
CHANGED
@@ -103,6 +103,10 @@ class EvalResult:
|
|
103 |
|
104 |
if to_add is True:
|
105 |
multiplier = 100.0
|
|
|
|
|
|
|
|
|
106 |
if "rouge" in metric and "truthful" not in benchmark:
|
107 |
multiplier = 1.0
|
108 |
if "squad" in benchmark:
|
@@ -111,6 +115,10 @@ class EvalResult:
|
|
111 |
multiplier = 1.0
|
112 |
if "throughput" in metric:
|
113 |
multiplier = 1.0
|
|
|
|
|
|
|
|
|
114 |
# print('RESULTS', data['results'])
|
115 |
# print('XXX', benchmark, metric, value, multiplier)
|
116 |
results[benchmark][metric] = value * multiplier
|
|
|
103 |
|
104 |
if to_add is True:
|
105 |
multiplier = 100.0
|
106 |
+
if "GPU" in metric:
|
107 |
+
results[benchmark][metric] = value
|
108 |
+
continue
|
109 |
+
|
110 |
if "rouge" in metric and "truthful" not in benchmark:
|
111 |
multiplier = 1.0
|
112 |
if "squad" in benchmark:
|
|
|
115 |
multiplier = 1.0
|
116 |
if "throughput" in metric:
|
117 |
multiplier = 1.0
|
118 |
+
if "batch_" in metric or "Mem" in metric or "Util" in metric:
|
119 |
+
multiplier = 1
|
120 |
+
|
121 |
+
|
122 |
# print('RESULTS', data['results'])
|
123 |
# print('XXX', benchmark, metric, value, multiplier)
|
124 |
results[benchmark][metric] = value * multiplier
|
src/populate.py
CHANGED
@@ -12,7 +12,7 @@ from src.leaderboard.read_evals import get_raw_eval_results, EvalResult, update_
|
|
12 |
|
13 |
from src.backend.envs import Tasks as BackendTasks
|
14 |
from src.display.utils import Tasks
|
15 |
-
from src.display.utils import
|
16 |
|
17 |
def get_leaderboard_df(
|
18 |
results_path: str,
|
@@ -45,19 +45,7 @@ def get_leaderboard_df(
|
|
45 |
bm = (task.benchmark, task.metric)
|
46 |
name_to_bm_map[name] = bm
|
47 |
|
48 |
-
# bm_to_name_map = {bm: name for name, bm in name_to_bm_map.items()}
|
49 |
-
system_metrics_to_name_map = {
|
50 |
-
"end_to_end_time": f"{E2Es}",
|
51 |
-
"prefilling_time": f"{PREs}",
|
52 |
-
"decoding_throughput": f"{TS}",
|
53 |
-
}
|
54 |
|
55 |
-
gpu_metrics_to_name_map = {
|
56 |
-
GPU_Util: GPU_Util,
|
57 |
-
GPU_TEMP: GPU_TEMP,
|
58 |
-
GPU_Power: GPU_Power,
|
59 |
-
GPU_Mem: GPU_Mem
|
60 |
-
}
|
61 |
|
62 |
all_data_json = []
|
63 |
for entry in all_data_json_:
|
@@ -73,7 +61,6 @@ def get_leaderboard_df(
|
|
73 |
for gpu_metric, metric_namne in gpu_metrics_to_name_map.items():
|
74 |
if gpu_metric in entry[k]:
|
75 |
new_entry[f"{k} {metric_namne}"] = entry[k][gpu_metric]
|
76 |
-
|
77 |
all_data_json += [new_entry]
|
78 |
|
79 |
# all_data_json.append(baseline_row)
|
|
|
12 |
|
13 |
from src.backend.envs import Tasks as BackendTasks
|
14 |
from src.display.utils import Tasks
|
15 |
+
from src.display.utils import system_metrics_to_name_map, gpu_metrics_to_name_map
|
16 |
|
17 |
def get_leaderboard_df(
|
18 |
results_path: str,
|
|
|
45 |
bm = (task.benchmark, task.metric)
|
46 |
name_to_bm_map[name] = bm
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
all_data_json = []
|
51 |
for entry in all_data_json_:
|
|
|
61 |
for gpu_metric, metric_namne in gpu_metrics_to_name_map.items():
|
62 |
if gpu_metric in entry[k]:
|
63 |
new_entry[f"{k} {metric_namne}"] = entry[k][gpu_metric]
|
|
|
64 |
all_data_json += [new_entry]
|
65 |
|
66 |
# all_data_json.append(baseline_row)
|
src/utils.py
CHANGED
@@ -3,10 +3,10 @@ from huggingface_hub import snapshot_download
|
|
3 |
import subprocess
|
4 |
import re
|
5 |
try:
|
6 |
-
from src.display.utils import GPU_TEMP, GPU_Mem, GPU_Power, GPU_Util
|
7 |
except:
|
8 |
print("local debug: from display.utils")
|
9 |
-
from display.utils import GPU_TEMP, GPU_Mem, GPU_Power, GPU_Util
|
10 |
|
11 |
def my_snapshot_download(repo_id, revision, local_dir, repo_type, max_workers):
|
12 |
for i in range(10):
|
@@ -49,24 +49,36 @@ def parse_nvidia_smi():
|
|
49 |
|
50 |
# Regex to extract the relevant data for each GPU
|
51 |
gpu_info_pattern = re.compile(r'(\d+)C\s+P\d+\s+(\d+)W / \d+W\s+\|\s+(\d+)MiB / \d+MiB\s+\|\s+(\d+)%')
|
|
|
52 |
lines = output.split('\n')
|
53 |
-
|
54 |
for line in lines:
|
55 |
match = gpu_info_pattern.search(line)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
if match:
|
57 |
temp, power_usage, mem_usage, gpu_util = map(int, match.groups())
|
58 |
-
|
59 |
GPU_TEMP: temp,
|
60 |
GPU_Power: power_usage,
|
61 |
GPU_Mem: mem_usage,
|
62 |
GPU_Util: gpu_util
|
63 |
})
|
64 |
-
|
|
|
|
|
|
|
65 |
gpu_stats_total = {
|
66 |
GPU_TEMP: 0,
|
67 |
GPU_Power: 0,
|
68 |
GPU_Mem: 0,
|
69 |
-
GPU_Util: 0
|
|
|
70 |
}
|
71 |
for gpu_stat in gpu_stats:
|
72 |
gpu_stats_total[GPU_TEMP] += gpu_stat[GPU_TEMP]
|
@@ -77,7 +89,6 @@ def parse_nvidia_smi():
|
|
77 |
gpu_stats_total[GPU_TEMP] /= len(gpu_stats)
|
78 |
gpu_stats_total[GPU_Power] /= len(gpu_stats)
|
79 |
gpu_stats_total[GPU_Util] /= len(gpu_stats)
|
80 |
-
|
81 |
return [gpu_stats_total]
|
82 |
|
83 |
def monitor_gpus(stop_event, interval, stats_list):
|
@@ -88,11 +99,28 @@ def monitor_gpus(stop_event, interval, stats_list):
|
|
88 |
stop_event.wait(interval)
|
89 |
|
90 |
def analyze_gpu_stats(stats_list):
|
|
|
91 |
if not stats_list:
|
92 |
return None
|
93 |
-
avg_stats = {key: sum(d[key] for d in stats_list) / len(stats_list) for key in stats_list[0]}
|
94 |
-
return avg_stats
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
if __name__ == "__main__":
|
98 |
print(analyze_gpu_stats(parse_nvidia_smi()))
|
|
|
3 |
import subprocess
|
4 |
import re
|
5 |
try:
|
6 |
+
from src.display.utils import GPU_TEMP, GPU_Mem, GPU_Power, GPU_Util, GPU_Name
|
7 |
except:
|
8 |
print("local debug: from display.utils")
|
9 |
+
from display.utils import GPU_TEMP, GPU_Mem, GPU_Power, GPU_Util, GPU_Name
|
10 |
|
11 |
def my_snapshot_download(repo_id, revision, local_dir, repo_type, max_workers):
|
12 |
for i in range(10):
|
|
|
49 |
|
50 |
# Regex to extract the relevant data for each GPU
|
51 |
gpu_info_pattern = re.compile(r'(\d+)C\s+P\d+\s+(\d+)W / \d+W\s+\|\s+(\d+)MiB / \d+MiB\s+\|\s+(\d+)%')
|
52 |
+
gpu_name_pattern = re.compile(r'NVIDIA\s+([\w\s]+?\d+GB)')
|
53 |
lines = output.split('\n')
|
54 |
+
gpu_name = ""
|
55 |
for line in lines:
|
56 |
match = gpu_info_pattern.search(line)
|
57 |
+
name_match = gpu_name_pattern.search(line)
|
58 |
+
|
59 |
+
gpu_info = {}
|
60 |
+
|
61 |
+
if name_match:
|
62 |
+
# print(name_match)
|
63 |
+
gpu_name = name_match.group(1).strip()
|
64 |
if match:
|
65 |
temp, power_usage, mem_usage, gpu_util = map(int, match.groups())
|
66 |
+
gpu_info.update({
|
67 |
GPU_TEMP: temp,
|
68 |
GPU_Power: power_usage,
|
69 |
GPU_Mem: mem_usage,
|
70 |
GPU_Util: gpu_util
|
71 |
})
|
72 |
+
# print(f"gpu_info: {gpu_info}")
|
73 |
+
if len(gpu_info) >= 4:
|
74 |
+
gpu_stats.append(gpu_info)
|
75 |
+
gpu_name = f"{len(gpu_stats)}x{gpu_name}"
|
76 |
gpu_stats_total = {
|
77 |
GPU_TEMP: 0,
|
78 |
GPU_Power: 0,
|
79 |
GPU_Mem: 0,
|
80 |
+
GPU_Util: 0,
|
81 |
+
GPU_Name: gpu_name
|
82 |
}
|
83 |
for gpu_stat in gpu_stats:
|
84 |
gpu_stats_total[GPU_TEMP] += gpu_stat[GPU_TEMP]
|
|
|
89 |
gpu_stats_total[GPU_TEMP] /= len(gpu_stats)
|
90 |
gpu_stats_total[GPU_Power] /= len(gpu_stats)
|
91 |
gpu_stats_total[GPU_Util] /= len(gpu_stats)
|
|
|
92 |
return [gpu_stats_total]
|
93 |
|
94 |
def monitor_gpus(stop_event, interval, stats_list):
|
|
|
99 |
stop_event.wait(interval)
|
100 |
|
101 |
def analyze_gpu_stats(stats_list):
|
102 |
+
# Check if the stats_list is empty, and return None if it is
|
103 |
if not stats_list:
|
104 |
return None
|
|
|
|
|
105 |
|
106 |
+
# Initialize dictionaries to store the stats
|
107 |
+
avg_stats = {}
|
108 |
+
max_stats = {}
|
109 |
+
|
110 |
+
# Calculate average stats, excluding 'GPU_Mem'
|
111 |
+
for key in stats_list[0].keys():
|
112 |
+
if key != GPU_Mem and key != GPU_Name:
|
113 |
+
total = sum(d[key] for d in stats_list)
|
114 |
+
avg_stats[key] = total / len(stats_list)
|
115 |
+
|
116 |
+
# Calculate max stats for 'GPU_Mem'
|
117 |
+
max_stats[GPU_Mem] = max(d[GPU_Mem] for d in stats_list)
|
118 |
+
if GPU_Name in stats_list[0]:
|
119 |
+
avg_stats[GPU_Name] = stats_list[0][GPU_Name]
|
120 |
+
# Update average stats with max GPU memory usage
|
121 |
+
avg_stats.update(max_stats)
|
122 |
+
|
123 |
+
return avg_stats
|
124 |
|
125 |
if __name__ == "__main__":
|
126 |
print(analyze_gpu_stats(parse_nvidia_smi()))
|