Model dropdown

#12
by muellerzr HF staff - opened
Files changed (5) hide show
  1. README.md +1 -1
  2. requirements.txt +2 -2
  3. src/app.py +23 -56
  4. src/hub_utils.py +4 -4
  5. src/model_utils.py +7 -9
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: πŸš€
4
  colorFrom: pink
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 4.36.0
8
  app_file: src/app.py
9
  pinned: false
10
  license: apache-2.0
 
4
  colorFrom: pink
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 3.40.1
8
  app_file: src/app.py
9
  pinned: false
10
  license: apache-2.0
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
- accelerate<0.32.0
2
  transformers
3
  timm
4
- huggingface_hub==0.19.4
5
  tabulate
6
  einops
 
1
+ accelerate @ git+https://github.com/huggingface/accelerate
2
  transformers
3
  timm
4
+ huggingface_hub
5
  tabulate
6
  einops
src/app.py CHANGED
@@ -1,84 +1,45 @@
1
  import gradio as gr
2
  import pandas as pd
3
- from accelerate.utils import convert_bytes
4
  from hub_utils import check_for_discussion, report_results
5
- from huggingface_hub.utils import HfHubHTTPError
6
  from model_utils import calculate_memory, get_model
7
 
8
 
 
 
 
 
9
  def get_results(model_name: str, library: str, options: list, access_token: str):
10
- model = get_model(model_name, library, access_token)
11
- # try:
12
- # has_discussion = check_for_discussion(model_name)
13
- # except HfHubHTTPError:
14
- # has_discussion = True
15
  title = f"## Memory usage for '{model_name}'"
16
- data = calculate_memory(model, options)
17
- stages = {"model": [], "gradients": [], "optimizer": [], "step": []}
18
- for i, option in enumerate(data):
19
- for stage in stages:
20
- stages[stage].append(option["Training using Adam (Peak vRAM)"][stage])
21
- value = max(data[i]["Training using Adam (Peak vRAM)"].values())
22
- if value == -1:
23
- value = "N/A"
24
- else:
25
- value = convert_bytes(value)
26
- data[i]["Training using Adam (Peak vRAM)"] = value
27
-
28
- if any(value != -1 for value in stages["model"]):
29
- out_explain = "## Training using Adam explained:\n"
30
- out_explain += "When training on a batch size of 1, each stage of the training process is expected to have near the following memory results for each precision you selected:\n"
31
- memory_values = pd.DataFrame(
32
- columns=["dtype", "Model", "Gradient calculation", "Backward pass", "Optimizer step"]
33
- )
34
- for i, dtype in enumerate(options):
35
- if stages["model"][i] != -1:
36
- memory_values.loc[len(memory_values.index)] = [
37
- dtype,
38
- convert_bytes(stages["model"][i]),
39
- convert_bytes(stages["gradients"][i]),
40
- convert_bytes(stages["optimizer"][i]),
41
- convert_bytes(stages["step"][i]),
42
- ]
43
- return [
44
- title,
45
- gr.update(visible=True, value=pd.DataFrame(data)),
46
- gr.update(visible=True, value=out_explain),
47
- gr.update(visible=True, value=memory_values),
48
- ]
49
- else:
50
- return [
51
- title,
52
- gr.update(visible=True, value=pd.DataFrame(data)),
53
- gr.update(visible=False, value=""),
54
- gr.update(visible=False, value=pd.DataFrame()),
55
- ]
56
 
57
 
58
  with gr.Blocks() as demo:
59
  with gr.Column():
60
  gr.Markdown(
61
  """<img src="https://huggingface.co/spaces/hf-accelerate/model-memory-usage/resolve/main/measure_model_size.png" style="float: left;" width="250" height="250"><h1>πŸ€— Model Memory Calculator</h1>
 
62
  This tool will help you calculate how much vRAM is needed to train and perform big model inference
63
  on a model hosted on the πŸ€— Hugging Face Hub. The minimum recommended vRAM needed for a model
64
  is denoted as the size of the "largest layer", and training of a model is roughly 4x its size (for Adam).
 
65
  These calculations are accurate within a few percent at most, such as `bert-base-cased` being 413.68 MB and the calculator estimating 413.18 MB.
 
66
  When performing inference, expect to add up to an additional 20% to this as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/).
67
  More tests will be performed in the future to get a more accurate benchmark for each model.
 
68
  Currently this tool supports all models hosted that use `transformers` and `timm`.
 
69
  To use this tool pass in the URL or model name of the model you want to calculate the memory usage for,
70
  select which framework it originates from ("auto" will try and detect it from the model metadata), and
71
  what precisions you want to use."""
72
  )
73
  out_text = gr.Markdown()
74
  out = gr.DataFrame(
75
- headers=["dtype", "Largest Layer", "Total Size", "Training using Adam (Peak vRAM)"],
76
- interactive=False,
77
- visible=False,
78
- )
79
- out_explain = gr.Markdown()
80
- memory_values = gr.DataFrame(
81
- headers=["dtype", "Model", "Gradient calculation", "Backward pass", "Optimizer step"],
82
  interactive=False,
83
  visible=False,
84
  )
@@ -94,12 +55,18 @@ with gr.Blocks() as demo:
94
  access_token = gr.Textbox(label="API Token", placeholder="Optional (for gated models)")
95
  with gr.Row():
96
  btn = gr.Button("Calculate Memory Usage")
 
 
 
97
 
98
  btn.click(
99
  get_results,
100
  inputs=[inp, library, options, access_token],
101
- outputs=[out_text, out, out_explain, memory_values],
102
- api_name=False,
 
 
 
103
  )
104
 
105
 
 
1
  import gradio as gr
2
  import pandas as pd
 
3
  from hub_utils import check_for_discussion, report_results
 
4
  from model_utils import calculate_memory, get_model
5
 
6
 
7
+ # We need to store them as globals because gradio doesn't have a way for us to pass them in to the button
8
+ MODEL = None
9
+
10
+
11
  def get_results(model_name: str, library: str, options: list, access_token: str):
12
+ global MODEL
13
+ MODEL = get_model(model_name, library, access_token)
14
+ has_discussion = check_for_discussion(model_name)
 
 
15
  title = f"## Memory usage for '{model_name}'"
16
+ data = calculate_memory(MODEL, options)
17
+ return [title, gr.update(visible=True, value=pd.DataFrame(data)), gr.update(visible=not has_discussion)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  with gr.Blocks() as demo:
21
  with gr.Column():
22
  gr.Markdown(
23
  """<img src="https://huggingface.co/spaces/hf-accelerate/model-memory-usage/resolve/main/measure_model_size.png" style="float: left;" width="250" height="250"><h1>πŸ€— Model Memory Calculator</h1>
24
+
25
  This tool will help you calculate how much vRAM is needed to train and perform big model inference
26
  on a model hosted on the πŸ€— Hugging Face Hub. The minimum recommended vRAM needed for a model
27
  is denoted as the size of the "largest layer", and training of a model is roughly 4x its size (for Adam).
28
+
29
  These calculations are accurate within a few percent at most, such as `bert-base-cased` being 413.68 MB and the calculator estimating 413.18 MB.
30
+
31
  When performing inference, expect to add up to an additional 20% to this as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/).
32
  More tests will be performed in the future to get a more accurate benchmark for each model.
33
+
34
  Currently this tool supports all models hosted that use `transformers` and `timm`.
35
+
36
  To use this tool pass in the URL or model name of the model you want to calculate the memory usage for,
37
  select which framework it originates from ("auto" will try and detect it from the model metadata), and
38
  what precisions you want to use."""
39
  )
40
  out_text = gr.Markdown()
41
  out = gr.DataFrame(
42
+ headers=["dtype", "Largest Layer", "Total Size", "Training using Adam"],
 
 
 
 
 
 
43
  interactive=False,
44
  visible=False,
45
  )
 
55
  access_token = gr.Textbox(label="API Token", placeholder="Optional (for gated models)")
56
  with gr.Row():
57
  btn = gr.Button("Calculate Memory Usage")
58
+ post_to_hub = gr.Button(
59
+ value="Report results in this model repo's discussions!\n(Will open in a new tab)", visible=False
60
+ )
61
 
62
  btn.click(
63
  get_results,
64
  inputs=[inp, library, options, access_token],
65
+ outputs=[out_text, out, post_to_hub],
66
+ )
67
+
68
+ post_to_hub.click(report_results, inputs=[inp, library, access_token]).then(
69
+ lambda: gr.Button.update(visible=False), outputs=post_to_hub
70
  )
71
 
72
 
src/hub_utils.py CHANGED
@@ -13,7 +13,7 @@ def check_for_discussion(model_name: str):
13
  model_name = extract_from_url(model_name)
14
  discussions = list(api.get_repo_discussions(model_name))
15
  return any(
16
- discussion.author == "model-sizer-bot"
17
  for discussion in discussions
18
  )
19
 
@@ -27,9 +27,9 @@ def report_results(model_name, library, access_token):
27
  post = f"""# Model Memory Requirements\n
28
 
29
  You will need about {data[1]} VRAM to load this model for inference, and {data[3]} VRAM to train it using Adam.
30
-
31
- These calculations were measured from the [Model Memory Utility Space](https://huggingface.co/spaces/hf-accelerate/model-memory-usage) on the Hub.
32
-
33
  The minimum recommended vRAM needed for this model assumes using [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) and is denoted by the size of the "largest layer".
34
  When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.
35
 
 
13
  model_name = extract_from_url(model_name)
14
  discussions = list(api.get_repo_discussions(model_name))
15
  return any(
16
+ discussion.title == "[AUTOMATED] Model Memory Requirements" and discussion.author == "model-sizer-bot"
17
  for discussion in discussions
18
  )
19
 
 
27
  post = f"""# Model Memory Requirements\n
28
 
29
  You will need about {data[1]} VRAM to load this model for inference, and {data[3]} VRAM to train it using Adam.
30
+
31
+ These calculations were measured from the [Model Memory Utility Space](https://hf.co/spaces/hf-accelerate/model-memory-utility) on the Hub.
32
+
33
  The minimum recommended vRAM needed for this model assumes using [Accelerate or `device_map="auto"`](https://huggingface.co/docs/accelerate/usage_guides/big_modeling) and is denoted by the size of the "largest layer".
34
  When performing inference, expect to add up to an additional 20% to this, as found by [EleutherAI](https://blog.eleuther.ai/transformer-math/). More tests will be performed in the future to get a more accurate benchmark for each model.
35
 
src/model_utils.py CHANGED
@@ -3,7 +3,7 @@ from urllib.parse import urlparse
3
 
4
  import gradio as gr
5
  import torch
6
- from accelerate.commands.estimate import check_has_model, create_empty_model, estimate_training_usage
7
  from accelerate.utils import calculate_maximum_sizes, convert_bytes
8
  from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
9
 
@@ -27,8 +27,8 @@ def extract_from_url(name: str):
27
  return path[1:]
28
 
29
 
30
- def translate_llama(text):
31
- "Translates Llama-2 and CodeLlama to its hf counterpart"
32
  if not text.endswith("-hf"):
33
  return text + "-hf"
34
  return text
@@ -36,8 +36,8 @@ def translate_llama(text):
36
 
37
  def get_model(model_name: str, library: str, access_token: str):
38
  "Finds and grabs model from the Hub, and initializes on `meta`"
39
- if "meta-llama/Llama-2-" in model_name or "meta-llama/CodeLlama-" in model_name:
40
- model_name = translate_llama(model_name)
41
  if library == "auto":
42
  library = None
43
  model_name = extract_from_url(model_name)
@@ -84,12 +84,10 @@ def calculate_memory(model: torch.nn.Module, options: list):
84
  dtype_largest_layer = largest_layer[0]
85
 
86
  modifier = DTYPE_MODIFIER[dtype]
87
- dtype_training_size = estimate_training_usage(
88
- dtype_total_size, dtype if dtype != "float16/bfloat16" else "float16"
89
- )
90
  dtype_total_size /= modifier
91
  dtype_largest_layer /= modifier
92
 
 
93
  dtype_total_size = convert_bytes(dtype_total_size)
94
  dtype_largest_layer = convert_bytes(dtype_largest_layer)
95
  data.append(
@@ -97,7 +95,7 @@ def calculate_memory(model: torch.nn.Module, options: list):
97
  "dtype": dtype,
98
  "Largest Layer or Residual Group": dtype_largest_layer,
99
  "Total Size": dtype_total_size,
100
- "Training using Adam (Peak vRAM)": dtype_training_size,
101
  }
102
  )
103
  return data
 
3
 
4
  import gradio as gr
5
  import torch
6
+ from accelerate.commands.estimate import check_has_model, create_empty_model
7
  from accelerate.utils import calculate_maximum_sizes, convert_bytes
8
  from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
9
 
 
27
  return path[1:]
28
 
29
 
30
+ def translate_llama2(text):
31
+ "Translates llama-2 to its hf counterpart"
32
  if not text.endswith("-hf"):
33
  return text + "-hf"
34
  return text
 
36
 
37
  def get_model(model_name: str, library: str, access_token: str):
38
  "Finds and grabs model from the Hub, and initializes on `meta`"
39
+ if "meta-llama" in model_name:
40
+ model_name = translate_llama2(model_name)
41
  if library == "auto":
42
  library = None
43
  model_name = extract_from_url(model_name)
 
84
  dtype_largest_layer = largest_layer[0]
85
 
86
  modifier = DTYPE_MODIFIER[dtype]
 
 
 
87
  dtype_total_size /= modifier
88
  dtype_largest_layer /= modifier
89
 
90
+ dtype_training_size = convert_bytes(dtype_total_size * 4)
91
  dtype_total_size = convert_bytes(dtype_total_size)
92
  dtype_largest_layer = convert_bytes(dtype_largest_layer)
93
  data.append(
 
95
  "dtype": dtype,
96
  "Largest Layer or Residual Group": dtype_largest_layer,
97
  "Total Size": dtype_total_size,
98
+ "Training using Adam": dtype_training_size,
99
  }
100
  )
101
  return data