Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Refactor
Browse files- Makefile +19 -0
- code_samples/accelerate +0 -17
- code_samples/basic +0 -31
- code_samples/calculating_metrics +0 -51
- code_samples/checkpointing +0 -29
- code_samples/experiment_tracking +0 -32
- code_samples/gradient_accumulation +0 -33
- code_samples/initial +0 -11
- code_samples/initial_with_metrics +0 -27
- code_samples/large_scale_training/aws_sagemaker +0 -77
- code_samples/training_configuration/aws_sagemaker +51 -0
- code_samples/{large_scale_training β training_configuration}/deepspeed +33 -43
- code_samples/{large_scale_training β training_configuration}/megatron-lm +57 -68
- code_samples/{large_scale_training β training_configuration}/multi_gpu +21 -37
- code_samples/{large_scale_training β training_configuration}/multi_node_multi_gpu +36 -47
- code_samples/{large_scale_training β training_configuration}/pytorch_fsdp +33 -40
- setup.cfg +19 -0
- src/app.py +90 -48
- src/markup.py +1 -0
- src/template.py +4 -1
Makefile
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.PHONY: quality style test docs
|
2 |
+
|
3 |
+
# Check that source code meets quality standards
|
4 |
+
|
5 |
+
extra_quality_checks:
|
6 |
+
doc-builder style src --max_len 119
|
7 |
+
|
8 |
+
# this target runs checks on all files
|
9 |
+
quality:
|
10 |
+
black --check src
|
11 |
+
isort --check-only src
|
12 |
+
flake8 src
|
13 |
+
doc-builder style src --max_len 119 --check_only
|
14 |
+
|
15 |
+
# Format source code automatically and check is there are any problems left that need manual fixing
|
16 |
+
style:
|
17 |
+
black src
|
18 |
+
isort src
|
19 |
+
doc-builder style src --max_len 119
|
code_samples/accelerate
DELETED
@@ -1,17 +0,0 @@
|
|
1 |
-
<pre>
|
2 |
-
from accelerate import Accelerator
|
3 |
-
accelerator = Accelerator()
|
4 |
-
train_dataloader, model, optimizer scheduler = accelerator.prepare(
|
5 |
-
dataloader, model, optimizer, scheduler
|
6 |
-
)
|
7 |
-
|
8 |
-
model.train()
|
9 |
-
for batch in train_dataloader:
|
10 |
-
optimizer.zero_grad()
|
11 |
-
inputs, targets = batch
|
12 |
-
outputs = model(inputs)
|
13 |
-
loss = loss_function(outputs, targets)
|
14 |
-
accelerator.backward(loss)
|
15 |
-
optimizer.step()
|
16 |
-
scheduler.step()
|
17 |
-
</pre>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
code_samples/basic
DELETED
@@ -1,31 +0,0 @@
|
|
1 |
-
##
|
2 |
-
<pre>
|
3 |
-
+from accelerate import Accelerator
|
4 |
-
+accelerator = Accelerator()
|
5 |
-
+dataloader, model, optimizer scheduler = accelerator.prepare(
|
6 |
-
+ dataloader, model, optimizer, scheduler
|
7 |
-
+)
|
8 |
-
|
9 |
-
for batch in dataloader:
|
10 |
-
optimizer.zero_grad()
|
11 |
-
inputs, targets = batch
|
12 |
-
- inputs = inputs.to(device)
|
13 |
-
- targets = targets.to(device)
|
14 |
-
outputs = model(inputs)
|
15 |
-
loss = loss_function(outputs, targets)
|
16 |
-
- loss.backward()
|
17 |
-
+ accelerator.backward(loss)
|
18 |
-
optimizer.step()
|
19 |
-
scheduler.step()</pre>
|
20 |
-
##
|
21 |
-
Everything around `accelerate` occurs with the `Accelerator` class. To use it, first make an object.
|
22 |
-
Then call `.prepare` passing in the PyTorch objects that you would normally train with. This will
|
23 |
-
return the same objects, but they will be on the correct device and distributed if needed. Then
|
24 |
-
you can train as normal, but instead of calling `loss.backward()` you call `accelerator.backward(loss)`.
|
25 |
-
Also note that you don't need to call `model.to(device)` or `inputs.to(device)` anymore, as this
|
26 |
-
is done automatically by `accelerator.prepare()`.
|
27 |
-
|
28 |
-
##
|
29 |
-
To learn more checkout the related documentation:
|
30 |
-
- <a href="https://huggingface.co/docs/accelerate/basic_tutorials/migration" target="_blank">Migrating to π€ Accelerate</a>
|
31 |
-
- <a href="https://huggingface.co/docs/accelerate/package_reference/accelerator" target="_blank">The Accelerator</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
code_samples/calculating_metrics
DELETED
@@ -1,51 +0,0 @@
|
|
1 |
-
##
|
2 |
-
<pre>
|
3 |
-
import evaluate
|
4 |
-
+from accelerate import Accelerator
|
5 |
-
+accelerator = Accelerator()
|
6 |
-
+train_dataloader, eval_dataloader, model, optimizer, scheduler = (
|
7 |
-
+ accelerator.prepare(
|
8 |
-
+ train_dataloader, eval_dataloader,
|
9 |
-
+ model, optimizer, scheduler
|
10 |
-
+ )
|
11 |
-
+)
|
12 |
-
metric = evaluate.load("accuracy")
|
13 |
-
for batch in train_dataloader:
|
14 |
-
optimizer.zero_grad()
|
15 |
-
inputs, targets = batch
|
16 |
-
- inputs = inputs.to(device)
|
17 |
-
- targets = targets.to(device)
|
18 |
-
outputs = model(inputs)
|
19 |
-
loss = loss_function(outputs, targets)
|
20 |
-
loss.backward()
|
21 |
-
optimizer.step()
|
22 |
-
scheduler.step()
|
23 |
-
|
24 |
-
model.eval()
|
25 |
-
for batch in eval_dataloader:
|
26 |
-
inputs, targets = batch
|
27 |
-
- inputs = inputs.to(device)
|
28 |
-
- targets = targets.to(device)
|
29 |
-
with torch.no_grad():
|
30 |
-
outputs = model(inputs)
|
31 |
-
predictions = outputs.argmax(dim=-1)
|
32 |
-
+ predictions, references = accelerator.gather_for_metrics(
|
33 |
-
+ (predictions, references)
|
34 |
-
+ )
|
35 |
-
metric.add_batch(
|
36 |
-
predictions = predictions,
|
37 |
-
references = references
|
38 |
-
)
|
39 |
-
print(metric.compute())</pre>
|
40 |
-
|
41 |
-
##
|
42 |
-
When calculating metrics on a validation set, you can use the `Accelerator.gather_for_metrics`
|
43 |
-
method to gather the predictions and references from all devices and then calculate the metric on the gathered values.
|
44 |
-
This will also *automatically* drop the padded values from the gathered tensors that were added to ensure
|
45 |
-
that all tensors have the same length. This ensures that the metric is calculated on the correct values.
|
46 |
-
##
|
47 |
-
To learn more checkout the related documentation:
|
48 |
-
|
49 |
-
- <a href="https://huggingface.co/docs/accelerate/en/quicktour#distributed-evaluation" target="_blank">Quicktour - Calculating metrics</a>
|
50 |
-
- <a href="https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.gather_for_metrics" target="_blank">API reference</a>
|
51 |
-
- <a href="https://github.com/huggingface/accelerate/blob/main/examples/by_feature/multi_process_metrics.py" target="_blank">Example script</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
code_samples/checkpointing
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
##
|
2 |
-
<pre>
|
3 |
-
from accelerate import Accelerator
|
4 |
-
accelerator = Accelerator()
|
5 |
-
dataloader, model, optimizer scheduler = accelerator.prepare(
|
6 |
-
dataloader, model, optimizer, scheduler
|
7 |
-
)
|
8 |
-
|
9 |
-
for batch in dataloader:
|
10 |
-
optimizer.zero_grad()
|
11 |
-
inputs, targets = batch
|
12 |
-
outputs = model(inputs)
|
13 |
-
loss = loss_function(outputs, targets)
|
14 |
-
accelerator.backward(loss)
|
15 |
-
optimizer.step()
|
16 |
-
scheduler.step()
|
17 |
-
+accelerator.save_state("checkpoint_dir")
|
18 |
-
+accelerator.load_state("checkpoint_dir")</pre>
|
19 |
-
##
|
20 |
-
To save or load a checkpoint in, `Accelerator` provides the `save_state` and `load_state` methods.
|
21 |
-
These methods will save or load the state of the model, optimizer, scheduler, as well as random states and
|
22 |
-
any custom registered objects from the main process on each device to a passed in folder.
|
23 |
-
**This API is designed to save and resume training states only from within the same python script or training setup.**
|
24 |
-
##
|
25 |
-
To learn more checkout the related documentation:
|
26 |
-
- <a href="https://huggingface.co/docs/accelerate/usage_guides/checkpoint" target="_blank">Saving and loading training states</a>
|
27 |
-
- <a href="https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.save_state" target="_blank">`save_state` API reference</a>
|
28 |
-
- <a href="https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.load_state" target="_blank">`load_state` API reference</a>
|
29 |
-
- <a href="https://github.com/huggingface/accelerate/blob/main/examples/by_feature/checkpointing.py" target="_blank">Example script</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
code_samples/experiment_tracking
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
##
|
2 |
-
<pre>
|
3 |
-
from accelerate import Accelerator
|
4 |
-
-accelerator = Accelerator()
|
5 |
-
+accelerator = Accelerator(log_with="wandb")
|
6 |
-
train_dataloader, model, optimizer scheduler = accelerator.prepare(
|
7 |
-
dataloader, model, optimizer, scheduler
|
8 |
-
)
|
9 |
-
+accelerator.init_trackers()
|
10 |
-
model.train()
|
11 |
-
for batch in train_dataloader:
|
12 |
-
optimizer.zero_grad()
|
13 |
-
inputs, targets = batch
|
14 |
-
outputs = model(inputs)
|
15 |
-
loss = loss_function(outputs, targets)
|
16 |
-
+ accelerator.log({"loss":loss})
|
17 |
-
accelerator.backward(loss)
|
18 |
-
optimizer.step()
|
19 |
-
scheduler.step()
|
20 |
-
+accelerator.end_training()
|
21 |
-
</pre>
|
22 |
-
##
|
23 |
-
To use experiment trackers with `accelerate`, simply pass the desired tracker to the `log_with` parameter
|
24 |
-
when building the `Accelerator` object. Then initialize the tracker(s) by running `Accelerator.init_trackers()`
|
25 |
-
passing in any configurations they may need. Afterwards call `Accelerator.log` to log a particular value to your tracker.
|
26 |
-
At the end of training call `accelerator.end_training()` to call any finalization functions a tracking library
|
27 |
-
may need automatically.
|
28 |
-
##
|
29 |
-
To learn more checkout the related documentation:
|
30 |
-
- <a href="https://huggingface.co/docs/accelerate/usage_guides/tracking" target="_blank">Using experiment trackers</a>
|
31 |
-
- <a href="https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.log" target="_blank">Accelerator API Reference</a>
|
32 |
-
- <a href="https://huggingface.co/docs/accelerate/package_reference/tracking" target="_blank">Tracking API Reference</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
code_samples/gradient_accumulation
DELETED
@@ -1,33 +0,0 @@
|
|
1 |
-
##
|
2 |
-
<pre>
|
3 |
-
from accelerate import Accelerator
|
4 |
-
accelerator = Accelerator(
|
5 |
-
+ gradient_accumulation_steps=2,
|
6 |
-
)
|
7 |
-
dataloader, model, optimizer scheduler = accelerator.prepare(
|
8 |
-
dataloader, model, optimizer, scheduler
|
9 |
-
)
|
10 |
-
|
11 |
-
for batch in dataloader:
|
12 |
-
+ with accelerator.accumulate(model):
|
13 |
-
optimizer.zero_grad()
|
14 |
-
inputs, targets = batch
|
15 |
-
outputs = model(inputs)
|
16 |
-
loss = loss_function(outputs, targets)
|
17 |
-
accelerator.backward(loss)
|
18 |
-
optimizer.step()
|
19 |
-
scheduler.step()</pre>
|
20 |
-
|
21 |
-
##
|
22 |
-
When performing gradient accumulation in a distributed setup, there are many opportunities for efficiency mistakes
|
23 |
-
to occur. `Accelerator` provides a context manager that will take care of the details for you and ensure that the
|
24 |
-
model is training correctly. Simply wrap the training loop in the `Accelerator.accumulate` context manager
|
25 |
-
while passing in the model you are training on and during training the gradients will accumulate and synchronize
|
26 |
-
automatically when needed.
|
27 |
-
|
28 |
-
##
|
29 |
-
To learn more checkout the related documentation:
|
30 |
-
- <a href="https://huggingface.co/docs/accelerate/usage_guides/gradient_accumulation" target="_blank">Performing gradient accumulation</a>
|
31 |
-
- <a href="https://huggingface.co/docs/accelerate/package_reference/accelerator#accelerate.Accelerator.accumulate" target="_blank">API reference</a>
|
32 |
-
- <a href="https://github.com/huggingface/accelerate/blob/main/examples/by_feature/gradient_accumulation.py" target="_blank">Example script</a>
|
33 |
-
- <a href="https://github.com/huggingface/accelerate/blob/main/examples/by_feature/automatic_gradient_accumulation.py" target="_blank">Performing automatic gradient accumulation example script</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
code_samples/initial
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
<pre>
|
2 |
-
for batch in dataloader:
|
3 |
-
optimizer.zero_grad()
|
4 |
-
inputs, targets = batch
|
5 |
-
inputs = inputs.to(device)
|
6 |
-
targets = targets.to(device)
|
7 |
-
outputs = model(inputs)
|
8 |
-
loss = loss_function(outputs, targets)
|
9 |
-
loss.backward()
|
10 |
-
optimizer.step()
|
11 |
-
scheduler.step()</pre>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
code_samples/initial_with_metrics
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
<pre>
|
2 |
-
import evaluate
|
3 |
-
metric = evaluate.load("accuracy")
|
4 |
-
for batch in train_dataloader:
|
5 |
-
optimizer.zero_grad()
|
6 |
-
inputs, targets = batch
|
7 |
-
inputs = inputs.to(device)
|
8 |
-
targets = targets.to(device)
|
9 |
-
outputs = model(inputs)
|
10 |
-
loss = loss_function(outputs, targets)
|
11 |
-
loss.backward()
|
12 |
-
optimizer.step()
|
13 |
-
scheduler.step()
|
14 |
-
|
15 |
-
model.eval()
|
16 |
-
for batch in eval_dataloader:
|
17 |
-
inputs, targets = batch
|
18 |
-
inputs = inputs.to(device)
|
19 |
-
targets = targets.to(device)
|
20 |
-
with torch.no_grad():
|
21 |
-
outputs = model(inputs)
|
22 |
-
predictions = outputs.argmax(dim=-1)
|
23 |
-
metric.add_batch(
|
24 |
-
predictions = predictions,
|
25 |
-
references = references
|
26 |
-
)
|
27 |
-
print(metric.compute())</pre>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
code_samples/large_scale_training/aws_sagemaker
DELETED
@@ -1,77 +0,0 @@
|
|
1 |
-
##
|
2 |
-
Run `accelerate config` on and answer the questionnaire accordingly.
|
3 |
-
Below is an example yaml for running code remotely on AWS SageMaker. Replace placeholder `xxxxx` with
|
4 |
-
appropriate values.
|
5 |
-
|
6 |
-
<pre>
|
7 |
-
base_job_name: accelerate-sagemaker-1
|
8 |
-
compute_environment: AMAZON_SAGEMAKER
|
9 |
-
distributed_type: 'NO'
|
10 |
-
dynamo_backend: 'NO'
|
11 |
-
ec2_instance_type: ml.p3.2xlarge
|
12 |
-
gpu_ids: all
|
13 |
-
iam_role_name: xxxxx
|
14 |
-
mixed_precision: 'no'
|
15 |
-
num_machines: 1
|
16 |
-
profile: xxxxx
|
17 |
-
py_version: py38
|
18 |
-
pytorch_version: 1.10.2
|
19 |
-
region: us-east-1
|
20 |
-
transformers_version: 4.17.0
|
21 |
-
use_cpu: false
|
22 |
-
</pre>
|
23 |
-
##
|
24 |
-
<pre>
|
25 |
-
from accelerate import Accelerator
|
26 |
-
|
27 |
-
def parse_args():
|
28 |
-
parser = argparse.ArgumentParser(description="sample task")
|
29 |
-
|
30 |
-
parser.add_argument(
|
31 |
-
"--pad_to_max_length",
|
32 |
-
- action="store_true",
|
33 |
-
+ type=bool,
|
34 |
-
+ default=False,
|
35 |
-
help="If passed, pad all samples to `max_length`. Otherwise, dynamic padding is used.",
|
36 |
-
)
|
37 |
-
|
38 |
-
...
|
39 |
-
|
40 |
-
|
41 |
-
+ def main():
|
42 |
-
accelerator = Accelerator()
|
43 |
-
|
44 |
-
model, optimizer, training_dataloader, scheduler = accelerator.prepare(
|
45 |
-
model, optimizer, training_dataloader, scheduler
|
46 |
-
)
|
47 |
-
|
48 |
-
for batch in training_dataloader:
|
49 |
-
optimizer.zero_grad()
|
50 |
-
inputs, targets = batch
|
51 |
-
outputs = model(inputs)
|
52 |
-
loss = loss_function(outputs, targets)
|
53 |
-
accelerator.backward(loss)
|
54 |
-
optimizer.step()
|
55 |
-
scheduler.step()
|
56 |
-
|
57 |
-
- torch.save('/opt/ml/model`)
|
58 |
-
+ accelerator.save('/opt/ml/model')
|
59 |
-
|
60 |
-
+ if __name__ == "__main__":
|
61 |
-
+ main()
|
62 |
-
</pre>
|
63 |
-
Launching a script using default accelerate config file looks like the following:
|
64 |
-
```
|
65 |
-
accelerate launch {script_name.py} {--arg1} {--arg2} ...
|
66 |
-
```
|
67 |
-
##
|
68 |
-
SageMaker doesnβt support argparse actions. If you want to use, for example, boolean hyperparameters, you need to specify type as bool in your script and provide an explicit True or False value for this hyperparameter. An example for the same is shown above for `pad_to_max_length` argument. Another important point is to save all the output artifacts to `/opt/ml/model` or use `os.environ["SM_MODEL_DIR"]` as your save directory. After training, artifacts in this directory are uploaded to S3, an example is shown in above code snippet.
|
69 |
-
|
70 |
-
You can provide custom docker image, input channels pointing to S3 data locations and use SageMaker metrics logging
|
71 |
-
as part of advanced features. Please refer <a href="https://github.com/huggingface/notebooks/tree/main/sagemaker/22_accelerate_sagemaker_examples" target="_blank">Examples showcasing AWS SageMaker integration of π€ Accelerate</a>
|
72 |
-
|
73 |
-
##
|
74 |
-
To learn more checkout the related documentation:
|
75 |
-
- <a href="https://huggingface.co/docs/accelerate/usage_guides/sagemaker" target="_blank">How to use π€ Accelerate with SageMaker</a>
|
76 |
-
- <a href="https://github.com/huggingface/notebooks/tree/main/sagemaker/22_accelerate_sagemaker_examples" target="_blank">Examples showcasing AWS SageMaker integration of π€ Accelerate</a>
|
77 |
-
- <a href="https://huggingface.co/docs/accelerate/main/en/package_reference/cli" target="_blank">The Accelerate CLI</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
code_samples/training_configuration/aws_sagemaker
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
##
|
2 |
+
<pre>
|
3 |
+
+base_job_name: accelerate-sagemaker-1
|
4 |
+
+compute_environment: AMAZON_SAGEMAKER
|
5 |
+
distributed_type: 'NO'
|
6 |
+
dynamo_backend: 'NO'
|
7 |
+
+ec2_instance_type: ml.p3.2xlarge
|
8 |
+
+gpu_ids: all
|
9 |
+
+iam_role_name: MY_IAM_ROLE_NAME
|
10 |
+
mixed_precision: 'no'
|
11 |
+
+num_machines: 1
|
12 |
+
+profile: MY_PROFILE_NAME
|
13 |
+
+py_version: py38
|
14 |
+
+pytorch_version: 1.10.2
|
15 |
+
+region: us-east-1
|
16 |
+
+transformers_version: 4.17.0
|
17 |
+
use_cpu: false
|
18 |
+
</pre>
|
19 |
+
##
|
20 |
+
<pre>
|
21 |
+
def parse_args():
|
22 |
+
parser = argparse.ArgumentParse(
|
23 |
+
description="sample task"
|
24 |
+
)
|
25 |
+
|
26 |
+
parser.add_argument(
|
27 |
+
"--some_bool_arg",
|
28 |
+
- action="store_true",
|
29 |
+
+ type=bool,
|
30 |
+
+ default=False,
|
31 |
+
)
|
32 |
+
</pre>
|
33 |
+
##
|
34 |
+
If the YAML was generated through the `accelerate config` command:
|
35 |
+
```
|
36 |
+
accelerate launch {script_name.py} {--arg1} {--arg2} ...
|
37 |
+
```
|
38 |
+
|
39 |
+
If the YAML is saved to a `~/config.yaml` file:
|
40 |
+
```
|
41 |
+
accelerate launch --config_file ~/config.yaml {script_name.py} {--arg1} {--arg2} ...
|
42 |
+
```
|
43 |
+
##
|
44 |
+
SageMaker does not support argparse actions. As a result if a script parameter would normally be a boolean, you need to specify the type as `bool` in the script and provide an explicit `True` or `False` value.
|
45 |
+
|
46 |
+
Also, when using SageMaker all output artifacts should use `/opt/ml/model` or `os.environ["SM_MODEL_DIR"]` as your save directory. After training, artifacts in this directory are uploaded to S3.
|
47 |
+
##
|
48 |
+
To learn more checkout the related documentation:
|
49 |
+
- <a href="https://huggingface.co/docs/accelerate/usage_guides/sagemaker" target="_blank">How to use π€ Accelerate with SageMaker</a>
|
50 |
+
- <a href="https://github.com/huggingface/notebooks/tree/main/sagemaker/22_accelerate_sagemaker_examples" target="_blank">Examples showcasing AWS SageMaker integration of π€ Accelerate</a>
|
51 |
+
- <a href="https://huggingface.co/docs/accelerate/main/en/package_reference/cli" target="_blank">The Command Line</a>
|
code_samples/{large_scale_training β training_configuration}/deepspeed
RENAMED
@@ -1,17 +1,16 @@
|
|
1 |
##
|
2 |
-
|
3 |
-
Below is an example yaml for mixed-precision training using DeepSpeed ZeRO Stage-3 with CPU offloading on 8 GPUs.
|
4 |
<pre>
|
5 |
compute_environment: LOCAL_MACHINE
|
6 |
-
deepspeed_config:
|
7 |
-
gradient_accumulation_steps: 1
|
8 |
-
gradient_clipping: 1.0
|
9 |
-
offload_optimizer_device: cpu
|
10 |
-
offload_param_device: cpu
|
11 |
-
zero3_init_flag: true
|
12 |
-
zero3_save_16bit_model: true
|
13 |
-
zero_stage: 3
|
14 |
-
distributed_type: DEEPSPEED
|
15 |
downcast_bf16: 'no'
|
16 |
dynamo_backend: 'NO'
|
17 |
fsdp_config: {}
|
@@ -19,61 +18,52 @@ machine_rank: 0
|
|
19 |
main_training_function: main
|
20 |
megatron_lm_config: {}
|
21 |
mixed_precision: fp16
|
22 |
-
num_machines: 1
|
23 |
-
num_processes: 8
|
24 |
rdzv_backend: static
|
25 |
same_network: true
|
26 |
use_cpu: false
|
27 |
</pre>
|
28 |
##
|
|
|
29 |
<pre>
|
30 |
-
|
31 |
|
32 |
-
|
33 |
accelerator = Accelerator()
|
34 |
|
35 |
model, optimizer, training_dataloader, scheduler = accelerator.prepare(
|
36 |
model, optimizer, training_dataloader, scheduler
|
37 |
)
|
38 |
|
39 |
-
for batch in training_dataloader:
|
40 |
-
optimizer.zero_grad()
|
41 |
-
inputs, targets = batch
|
42 |
-
outputs = model(inputs)
|
43 |
-
loss = loss_function(outputs, targets)
|
44 |
-
accelerator.backward(loss)
|
45 |
-
optimizer.step()
|
46 |
-
scheduler.step()
|
47 |
-
|
48 |
-
...
|
49 |
-
|
50 |
generated_tokens = accelerator.unwrap_model(model).generate(
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
+
|
55 |
-
|
56 |
...
|
57 |
|
58 |
accelerator.unwrap_model(model).save_pretrained(
|
59 |
args.output_dir,
|
60 |
is_main_process=accelerator.is_main_process,
|
61 |
save_function=accelerator.save,
|
62 |
-
+ state_dict=accelerator.get_state_dict(model)
|
63 |
-
|
64 |
-
|
65 |
...
|
66 |
-
|
67 |
-
+ if __name__ == "__main__":
|
68 |
-
+ main()
|
69 |
</pre>
|
70 |
-
|
71 |
-
|
72 |
```
|
73 |
accelerate launch {script_name.py} {--arg1} {--arg2} ...
|
74 |
```
|
75 |
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
77 |
```
|
78 |
accelerate launch \
|
79 |
--use_deepspeed \
|
@@ -90,12 +80,12 @@ accelerate launch \
|
|
90 |
```
|
91 |
|
92 |
##
|
93 |
-
For core DeepSpeed features
|
94 |
-
|
95 |
-
For advanced users who like granular control via DeepSpeed config file, it is supported wherein you can pass its loaction when running `accelerate config` command. You can also specify values of most of the fields in DeepSpeed config file as `auto` and they are filled automatically via the arguments of `accelerate launch` command and `accelerator.prepare` call thereby making life simple for users. Please refer docs on <a href="https://huggingface.co/docs/accelerate/usage_guides/deepspeed#deepspeed-config-file" target="_blank">DeepSpeed Config File</a>
|
96 |
|
|
|
97 |
##
|
98 |
To learn more checkout the related documentation:
|
99 |
- <a href="https://huggingface.co/docs/accelerate/usage_guides/deepspeed" target="_blank">How to use DeepSpeed</a>
|
|
|
100 |
- <a href="https://huggingface.co/blog/accelerate-deepspeed" target="_blank">Accelerate Large Model Training using DeepSpeed</a>
|
101 |
- <a href="https://huggingface.co/docs/accelerate/package_reference/deepspeed" target="_blank">DeepSpeed Utilities</a>
|
|
|
1 |
##
|
2 |
+
Below is an example yaml for mixed precision training using DeepSpeed ZeRO Stage-3 with CPU offloading on 8 GPUs.
|
|
|
3 |
<pre>
|
4 |
compute_environment: LOCAL_MACHINE
|
5 |
+
+deepspeed_config:
|
6 |
+
+ gradient_accumulation_steps: 1
|
7 |
+
+ gradient_clipping: 1.0
|
8 |
+
+ offload_optimizer_device: cpu
|
9 |
+
+ offload_param_device: cpu
|
10 |
+
+ zero3_init_flag: true
|
11 |
+
+ zero3_save_16bit_model: true
|
12 |
+
+ zero_stage: 3
|
13 |
+
+distributed_type: DEEPSPEED
|
14 |
downcast_bf16: 'no'
|
15 |
dynamo_backend: 'NO'
|
16 |
fsdp_config: {}
|
|
|
18 |
main_training_function: main
|
19 |
megatron_lm_config: {}
|
20 |
mixed_precision: fp16
|
21 |
+
+num_machines: 1
|
22 |
+
+num_processes: 8
|
23 |
rdzv_backend: static
|
24 |
same_network: true
|
25 |
use_cpu: false
|
26 |
</pre>
|
27 |
##
|
28 |
+
Assume that `model` is created utilizing the `transformers` library.
|
29 |
<pre>
|
30 |
+
from accelerate import Accelerator
|
31 |
|
32 |
+
def main():
|
33 |
accelerator = Accelerator()
|
34 |
|
35 |
model, optimizer, training_dataloader, scheduler = accelerator.prepare(
|
36 |
model, optimizer, training_dataloader, scheduler
|
37 |
)
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
generated_tokens = accelerator.unwrap_model(model).generate(
|
40 |
+
batch["input_ids"],
|
41 |
+
attention_mask=batch["attention_mask"],
|
42 |
+
**gen_kwargs,
|
43 |
+
+ synced_gpus=True
|
44 |
+
)
|
45 |
...
|
46 |
|
47 |
accelerator.unwrap_model(model).save_pretrained(
|
48 |
args.output_dir,
|
49 |
is_main_process=accelerator.is_main_process,
|
50 |
save_function=accelerator.save,
|
51 |
+
+ state_dict=accelerator.get_state_dict(model)
|
52 |
+
)
|
|
|
53 |
...
|
|
|
|
|
|
|
54 |
</pre>
|
55 |
+
##
|
56 |
+
If the YAML was generated through the `accelerate config` command:
|
57 |
```
|
58 |
accelerate launch {script_name.py} {--arg1} {--arg2} ...
|
59 |
```
|
60 |
|
61 |
+
If the YAML is saved to a `~/config.yaml` file:
|
62 |
+
```
|
63 |
+
accelerate launch --config_file ~/config.yaml {script_name.py} {--arg1} {--arg2} ...
|
64 |
+
```
|
65 |
+
|
66 |
+
Or you can use `accelerate launch` with right configuration parameters and have no `config.yaml` file:
|
67 |
```
|
68 |
accelerate launch \
|
69 |
--use_deepspeed \
|
|
|
80 |
```
|
81 |
|
82 |
##
|
83 |
+
For core DeepSpeed features (ZeRO stages 1 and 2), Accelerate requires no code changes. For ZeRO Stage-3, `transformers`' `generate` function requires `synced_gpus=True` and `save_pretrained` requires the `state_dict` param due to the fact that model parameters are sharded across the GPUs.
|
|
|
|
|
84 |
|
85 |
+
You can also specify values of most of the fields in the `DeepSpeed` config file to `auto` and they will be automatically filled when performing `accelerate launch`.
|
86 |
##
|
87 |
To learn more checkout the related documentation:
|
88 |
- <a href="https://huggingface.co/docs/accelerate/usage_guides/deepspeed" target="_blank">How to use DeepSpeed</a>
|
89 |
+
<a href="https://huggingface.co/docs/accelerate/usage_guides/deepspeed#deepspeed-config-file" target="_blank">DeepSpeed Config File</a>
|
90 |
- <a href="https://huggingface.co/blog/accelerate-deepspeed" target="_blank">Accelerate Large Model Training using DeepSpeed</a>
|
91 |
- <a href="https://huggingface.co/docs/accelerate/package_reference/deepspeed" target="_blank">DeepSpeed Utilities</a>
|
code_samples/{large_scale_training β training_configuration}/megatron-lm
RENAMED
@@ -1,23 +1,22 @@
|
|
1 |
##
|
2 |
-
|
3 |
-
Below is an example yaml for BF16 mixed-precision training using Megatron-LM with DPxTPxPP=2x2x2 degrees on 8 GPUs. (DP-Data Parallelism, PP-Pipeline Parallelism, TP-Tensor Parallelism). It is also using Sequence Parallelism and selective activation checkpointing along with sharded optimizer.
|
4 |
<pre>
|
5 |
compute_environment: LOCAL_MACHINE
|
6 |
deepspeed_config: {}
|
7 |
-
distributed_type: MEGATRON_LM
|
8 |
downcast_bf16: 'no'
|
9 |
dynamo_backend: 'NO'
|
10 |
fsdp_config: {}
|
11 |
machine_rank: 0
|
12 |
main_training_function: main
|
13 |
-
megatron_lm_config:
|
14 |
-
megatron_lm_gradient_clipping: 1.0
|
15 |
-
megatron_lm_num_micro_batches: 2
|
16 |
-
megatron_lm_pp_degree: 2
|
17 |
-
megatron_lm_recompute_activations: true
|
18 |
-
megatron_lm_sequence_parallelism: true
|
19 |
-
megatron_lm_tp_degree: 2
|
20 |
-
megatron_lm_use_distributed_optimizer: true
|
21 |
mixed_precision: bf16
|
22 |
num_machines: 1
|
23 |
num_processes: 8
|
@@ -27,67 +26,52 @@ use_cpu: false
|
|
27 |
</pre>
|
28 |
##
|
29 |
<pre>
|
30 |
-
|
|
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
-
|
38 |
-
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
scheduler.step()
|
63 |
-
|
64 |
-
...
|
65 |
-
|
66 |
-
# in eval loop
|
67 |
-
for step, batch in enumerate(eval_dataloader):
|
68 |
-
with torch.no_grad():
|
69 |
-
outputs = model(**batch)
|
70 |
-
loss = outputs.loss
|
71 |
-
- losses.append(accelerator.gather_for_metrics(loss.repeat(args.per_device_eval_batch_size)))
|
72 |
-
+ losses.append(loss) # For Megatron-LM, the losses are already averaged across the data parallel group
|
73 |
-
- losses = torch.cat(losses)
|
74 |
-
+ losses = torch.tensor(losses)
|
75 |
-
eval_loss = torch.mean(losses)
|
76 |
-
perplexity = math.exp(eval_loss)
|
77 |
-
logger.info(f"epoch {epoch}: perplexity: {perplexity} eval_loss: {eval_loss}")
|
78 |
-
|
79 |
-
+ accelerator.save_state(output_dir)
|
80 |
-
|
81 |
-
+ if __name__ == "__main__":
|
82 |
-
+ main()
|
83 |
</pre>
|
84 |
-
|
85 |
-
|
86 |
```
|
87 |
accelerate launch {script_name.py} {--arg1} {--arg2} ...
|
88 |
```
|
89 |
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
91 |
```
|
92 |
accelerate launch \
|
93 |
--use_megatron_lm \
|
@@ -109,9 +93,14 @@ For Megatron-LM, the supported models Transformers GPT2, Megatron-BERT and T5 mo
|
|
109 |
3. Losses are already averaged across the data parallel group
|
110 |
4. save the model using `accelerator.save_state` instead of transformers `from_pretrianed`
|
111 |
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
-
|
115 |
|
116 |
##
|
117 |
To learn more checkout the related documentation:
|
|
|
1 |
##
|
2 |
+
Below is an example yaml for BF16 mixed-precision training using Megatron-LM with 2x Data Parallelism, 2x Pipeline Parallelism, and 2x Tensor Parallelism on 8 GPUs. It is also using Sequence Parallelism, selective activation checkpointing, and a sharded optimizer.
|
|
|
3 |
<pre>
|
4 |
compute_environment: LOCAL_MACHINE
|
5 |
deepspeed_config: {}
|
6 |
+
+distributed_type: MEGATRON_LM
|
7 |
downcast_bf16: 'no'
|
8 |
dynamo_backend: 'NO'
|
9 |
fsdp_config: {}
|
10 |
machine_rank: 0
|
11 |
main_training_function: main
|
12 |
+
+megatron_lm_config:
|
13 |
+
+ megatron_lm_gradient_clipping: 1.0
|
14 |
+
+ megatron_lm_num_micro_batches: 2
|
15 |
+
+ megatron_lm_pp_degree: 2
|
16 |
+
+ megatron_lm_recompute_activations: true
|
17 |
+
+ megatron_lm_sequence_parallelism: true
|
18 |
+
+ megatron_lm_tp_degree: 2
|
19 |
+
+ megatron_lm_use_distributed_optimizer: true
|
20 |
mixed_precision: bf16
|
21 |
num_machines: 1
|
22 |
num_processes: 8
|
|
|
26 |
</pre>
|
27 |
##
|
28 |
<pre>
|
29 |
+
from accelerate import Accelerator
|
30 |
+
+from accelerate.utils import MegatronLMDummyScheduler
|
31 |
|
32 |
+
accelerator = Accelerator()
|
33 |
+
|
34 |
+
...
|
35 |
+
|
36 |
+
-lr_scheduler = get_scheduler(
|
37 |
+
- name=args.lr_scheduler_type,
|
38 |
+
- ...
|
39 |
+
-)
|
40 |
+
+lr_scheduler = MegatronLMDummyScheduler(
|
41 |
+
+ optimizer=optimizer,
|
42 |
+
+ num_warmup_steps=...,
|
43 |
+
+ num_training_steps=...,
|
44 |
+
+)
|
45 |
+
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler = accelerator.prepare(
|
46 |
+
model, optimizer, train_dataloader, eval_dataloader, lr_scheduler
|
47 |
+
)
|
48 |
+
|
49 |
+
total_batch_size = (
|
50 |
+
- args.per_device_train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps
|
51 |
+
+ accelerator.state.megatron_lm_plugin.global_batch_size
|
52 |
+
)
|
53 |
+
# in evaluation loop
|
54 |
+
for step, batch in enumerate(eval_dataloader):
|
55 |
+
with torch.no_grad():
|
56 |
+
outputs = model(**batch)
|
57 |
+
loss = outputs.loss
|
58 |
+
- losses.append(accelerator.gather_for_metrics(loss.repeat(args.per_device_eval_batch_size)))
|
59 |
+
+ losses.append(loss) # For Megatron-LM, the losses are already averaged across the data parallel group
|
60 |
+
-losses = torch.cat(losses)
|
61 |
+
+losses = torch.tensor(losses)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
</pre>
|
63 |
+
##
|
64 |
+
If the YAML was generated through the `accelerate config` command:
|
65 |
```
|
66 |
accelerate launch {script_name.py} {--arg1} {--arg2} ...
|
67 |
```
|
68 |
|
69 |
+
If the YAML is saved to a `~/config.yaml` file:
|
70 |
+
```
|
71 |
+
accelerate launch --config_file ~/config.yaml {script_name.py} {--arg1} {--arg2} ...
|
72 |
+
```
|
73 |
+
|
74 |
+
Or you can use `accelerate launch` with right configuration parameters and have no `config.yaml` file:
|
75 |
```
|
76 |
accelerate launch \
|
77 |
--use_megatron_lm \
|
|
|
93 |
3. Losses are already averaged across the data parallel group
|
94 |
4. save the model using `accelerator.save_state` instead of transformers `from_pretrianed`
|
95 |
|
96 |
+
The Accelerate Megatron-LM integration supports many advanced features such as:
|
97 |
+
- Leveraging custom training steps
|
98 |
+
- Using Megatron-LM indexed datasets
|
99 |
+
- Checkpoint reshaping and interoperabiloity utilities
|
100 |
+
- Using `megatron_generate` for text generation using Tensor and Pipeline Parallism
|
101 |
+
- Support for ROPE/ALibi Positional embeddings and Multi-Query Attention
|
102 |
|
103 |
+
However, each of these require more changes to your source code than what is presented here.
|
104 |
|
105 |
##
|
106 |
To learn more checkout the related documentation:
|
code_samples/{large_scale_training β training_configuration}/multi_gpu
RENAMED
@@ -1,60 +1,44 @@
|
|
1 |
##
|
2 |
-
Run `accelerate config` and answer the questionnaire accordingly.
|
3 |
-
Below is an example yaml for using multi-gpu training with 4 GPUs.
|
4 |
<pre>
|
5 |
-
compute_environment: LOCAL_MACHINE
|
6 |
deepspeed_config: {}
|
7 |
-
distributed_type: MULTI_GPU
|
8 |
downcast_bf16: 'no'
|
9 |
dynamo_backend: 'NO'
|
10 |
fsdp_config: {}
|
11 |
-
gpu_ids: all
|
12 |
-
machine_rank: 0
|
13 |
main_training_function: main
|
14 |
megatron_lm_config: {}
|
15 |
mixed_precision: 'no'
|
16 |
-
num_machines: 1
|
17 |
-
num_processes: 4
|
18 |
-
rdzv_backend: static
|
19 |
-
same_network: true
|
20 |
use_cpu: false</pre>
|
21 |
##
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
+ def main():
|
26 |
-
accelerator = Accelerator()
|
27 |
-
|
28 |
-
model, optimizer, training_dataloader, scheduler = accelerator.prepare(
|
29 |
-
model, optimizer, training_dataloader, scheduler
|
30 |
-
)
|
31 |
-
|
32 |
-
for batch in training_dataloader:
|
33 |
-
optimizer.zero_grad()
|
34 |
-
inputs, targets = batch
|
35 |
-
outputs = model(inputs)
|
36 |
-
loss = loss_function(outputs, targets)
|
37 |
-
accelerator.backward(loss)
|
38 |
-
optimizer.step()
|
39 |
-
scheduler.step()
|
40 |
-
|
41 |
-
+ if __name__ == "__main__":
|
42 |
-
+ main()
|
43 |
-
</pre>
|
44 |
-
|
45 |
-
Launching a script using default accelerate config file looks like the following:
|
46 |
```
|
47 |
accelerate launch {script_name.py} {--arg1} {--arg2} ...
|
48 |
```
|
49 |
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
51 |
```
|
52 |
accelerate launch --multi_gpu --num_processes=4 {script_name.py} {--arg1} {--arg2} ...
|
53 |
```
|
54 |
|
55 |
##
|
56 |
-
|
|
|
|
|
57 |
##
|
58 |
To learn more checkout the related documentation:
|
59 |
- <a href="https://huggingface.co/docs/accelerate/main/en/basic_tutorials/launch" target="_blank">Launching distributed code</a>
|
60 |
-
- <a href="https://huggingface.co/docs/accelerate/main/en/package_reference/cli" target="_blank">The
|
|
|
1 |
##
|
|
|
|
|
2 |
<pre>
|
3 |
+
compute_environment: LOCAL_MACHINE
|
4 |
deepspeed_config: {}
|
5 |
+
+distributed_type: MULTI_GPU
|
6 |
downcast_bf16: 'no'
|
7 |
dynamo_backend: 'NO'
|
8 |
fsdp_config: {}
|
9 |
+
+gpu_ids: all
|
10 |
+
+machine_rank: 0
|
11 |
main_training_function: main
|
12 |
megatron_lm_config: {}
|
13 |
mixed_precision: 'no'
|
14 |
+
+num_machines: 1
|
15 |
+
+num_processes: 4
|
16 |
+
+rdzv_backend: static
|
17 |
+
+same_network: true
|
18 |
use_cpu: false</pre>
|
19 |
##
|
20 |
+
None
|
21 |
+
##
|
22 |
+
If the YAML was generated through the `accelerate config` command:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
```
|
24 |
accelerate launch {script_name.py} {--arg1} {--arg2} ...
|
25 |
```
|
26 |
|
27 |
+
If the YAML is saved to a `~/config.yaml` file:
|
28 |
+
```
|
29 |
+
accelerate launch --config_file ~/config.yaml {script_name.py} {--arg1} {--arg2} ...
|
30 |
+
```
|
31 |
+
|
32 |
+
Or you can use `accelerate launch` with right configuration parameters and have no `config.yaml` file:
|
33 |
```
|
34 |
accelerate launch --multi_gpu --num_processes=4 {script_name.py} {--arg1} {--arg2} ...
|
35 |
```
|
36 |
|
37 |
##
|
38 |
+
Launching on multi-GPU instances requires a different launch command than just `python myscript.py`. Accelerate will wrap around the proper launching script to delegate and call, reading in how to set their configuration based on the parameters passed in. It is a passthrough to the `torchrun` command.
|
39 |
+
|
40 |
+
**Remember that you can always use the `accelerate launch` functionality, even if the code in your script does not use the `Accelerator`**
|
41 |
##
|
42 |
To learn more checkout the related documentation:
|
43 |
- <a href="https://huggingface.co/docs/accelerate/main/en/basic_tutorials/launch" target="_blank">Launching distributed code</a>
|
44 |
+
- <a href="https://huggingface.co/docs/accelerate/main/en/package_reference/cli" target="_blank">The Command Line</a>
|
code_samples/{large_scale_training β training_configuration}/multi_node_multi_gpu
RENAMED
@@ -1,89 +1,78 @@
|
|
1 |
##
|
2 |
-
|
3 |
-
Below is an example yaml for using multi-gpu training with 4 GPUs on 2 nodes/machines.
|
4 |
|
5 |
-
On
|
6 |
<pre>
|
7 |
-
compute_environment: LOCAL_MACHINE
|
8 |
deepspeed_config: {}
|
9 |
-
distributed_type: MULTI_GPU
|
10 |
downcast_bf16: 'no'
|
11 |
dynamo_backend: 'NO'
|
12 |
fsdp_config: {}
|
13 |
gpu_ids: all
|
14 |
-
machine_rank: 0
|
15 |
-
main_process_ip: 192.168.20.1
|
16 |
-
main_process_port: 8080
|
17 |
main_training_function: main
|
18 |
megatron_lm_config: {}
|
19 |
mixed_precision: 'no'
|
20 |
-
num_machines: 2
|
21 |
-
num_processes: 8
|
22 |
-
rdzv_backend: static
|
23 |
-
same_network: true
|
24 |
use_cpu: false
|
25 |
</pre>
|
26 |
|
27 |
-
On
|
28 |
<pre>
|
29 |
-
compute_environment: LOCAL_MACHINE
|
30 |
deepspeed_config: {}
|
31 |
-
distributed_type: MULTI_GPU
|
32 |
downcast_bf16: 'no'
|
33 |
dynamo_backend: 'NO'
|
34 |
fsdp_config: {}
|
35 |
gpu_ids: all
|
36 |
-machine_rank: 0
|
37 |
+machine_rank: 1
|
38 |
-
main_process_ip: 192.168.20.1
|
39 |
-
main_process_port: 8080
|
40 |
main_training_function: main
|
41 |
megatron_lm_config: {}
|
42 |
mixed_precision: 'no'
|
43 |
-
num_machines: 2
|
44 |
-
num_processes: 8
|
45 |
-
rdzv_backend: static
|
46 |
-
same_network: true
|
47 |
use_cpu: false
|
48 |
</pre>
|
49 |
##
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
+ def main():
|
54 |
-
accelerator = Accelerator()
|
55 |
-
|
56 |
-
model, optimizer, training_dataloader, scheduler = accelerator.prepare(
|
57 |
-
model, optimizer, training_dataloader, scheduler
|
58 |
-
)
|
59 |
-
|
60 |
-
for batch in training_dataloader:
|
61 |
-
optimizer.zero_grad()
|
62 |
-
inputs, targets = batch
|
63 |
-
outputs = model(inputs)
|
64 |
-
loss = loss_function(outputs, targets)
|
65 |
-
accelerator.backward(loss)
|
66 |
-
optimizer.step()
|
67 |
-
scheduler.step()
|
68 |
-
|
69 |
-
+ if __name__ == "__main__":
|
70 |
-
+ main()
|
71 |
-
</pre>
|
72 |
|
73 |
-
|
74 |
```
|
75 |
accelerate launch {script_name.py} {--arg1} {--arg2} ...
|
76 |
```
|
77 |
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
```
|
80 |
accelerate launch --multi_gpu --num_machines=2 --num_processes=8 --main_process_ip="192.168.20.1" --main_process_port=8080
|
81 |
--machine_rank={node_number} {script_name.py} {--arg1} {--arg2} ...
|
82 |
```
|
83 |
|
84 |
##
|
85 |
-
|
|
|
|
|
86 |
##
|
87 |
To learn more checkout the related documentation:
|
88 |
- <a href="https://huggingface.co/docs/accelerate/main/en/basic_tutorials/launch" target="_blank">Launching distributed code</a>
|
89 |
-
- <a href="https://huggingface.co/docs/accelerate/main/en/package_reference/cli" target="_blank">The
|
|
|
1 |
##
|
2 |
+
Below are example yamls for using multi-gpu training with 4 GPUs on two machines (nodes) where each machine has two GPUs:
|
|
|
3 |
|
4 |
+
On machine 1 (host):
|
5 |
<pre>
|
6 |
+
compute_environment: LOCAL_MACHINE
|
7 |
deepspeed_config: {}
|
8 |
+
+distributed_type: MULTI_GPU
|
9 |
downcast_bf16: 'no'
|
10 |
dynamo_backend: 'NO'
|
11 |
fsdp_config: {}
|
12 |
gpu_ids: all
|
13 |
+
+machine_rank: 0
|
14 |
+
+main_process_ip: 192.168.20.1
|
15 |
+
+main_process_port: 8080
|
16 |
main_training_function: main
|
17 |
megatron_lm_config: {}
|
18 |
mixed_precision: 'no'
|
19 |
+
+num_machines: 2
|
20 |
+
+num_processes: 8
|
21 |
+
+rdzv_backend: static
|
22 |
+
+same_network: true
|
23 |
use_cpu: false
|
24 |
</pre>
|
25 |
|
26 |
+
On machine 2:
|
27 |
<pre>
|
28 |
+
compute_environment: LOCAL_MACHINE
|
29 |
deepspeed_config: {}
|
30 |
+
+distributed_type: MULTI_GPU
|
31 |
downcast_bf16: 'no'
|
32 |
dynamo_backend: 'NO'
|
33 |
fsdp_config: {}
|
34 |
gpu_ids: all
|
35 |
-machine_rank: 0
|
36 |
+machine_rank: 1
|
37 |
+
+main_process_ip: 192.168.20.1
|
38 |
+
+main_process_port: 8080
|
39 |
main_training_function: main
|
40 |
megatron_lm_config: {}
|
41 |
mixed_precision: 'no'
|
42 |
+
+num_machines: 2
|
43 |
+
+num_processes: 8
|
44 |
+
+rdzv_backend: static
|
45 |
+
+same_network: true
|
46 |
use_cpu: false
|
47 |
</pre>
|
48 |
##
|
49 |
+
None
|
50 |
+
##
|
51 |
+
To launch a script, on each machine run one of the following variations:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
+
If the YAML was generated through the `accelerate config` command:
|
54 |
```
|
55 |
accelerate launch {script_name.py} {--arg1} {--arg2} ...
|
56 |
```
|
57 |
|
58 |
+
If the YAML is saved to a `~/config.yaml` file:
|
59 |
+
```
|
60 |
+
accelerate launch --config_file ~/config.yaml {script_name.py} {--arg1} {--arg2} ...
|
61 |
+
```
|
62 |
+
|
63 |
+
Or you can use `accelerate launch` with right configuration parameters and have no `config.yaml` file:
|
64 |
+
|
65 |
+
Replace `{node_number}` with appropriate machine number (0 for host, 1+ if not).
|
66 |
```
|
67 |
accelerate launch --multi_gpu --num_machines=2 --num_processes=8 --main_process_ip="192.168.20.1" --main_process_port=8080
|
68 |
--machine_rank={node_number} {script_name.py} {--arg1} {--arg2} ...
|
69 |
```
|
70 |
|
71 |
##
|
72 |
+
When utilizing multiple machines (nodes) for training, the config file needs to know how each machine will be able to communicate (the IP address and port), how many *total* GPUs there are, and whether the current machine is either the host or a client.
|
73 |
+
|
74 |
+
**Remember that you can always use the `accelerate launch` functionality, even if the code in your script does not use the `Accelerator`**
|
75 |
##
|
76 |
To learn more checkout the related documentation:
|
77 |
- <a href="https://huggingface.co/docs/accelerate/main/en/basic_tutorials/launch" target="_blank">Launching distributed code</a>
|
78 |
+
- <a href="https://huggingface.co/docs/accelerate/main/en/package_reference/cli" target="_blank">The Command Line</a>
|
code_samples/{large_scale_training β training_configuration}/pytorch_fsdp
RENAMED
@@ -1,63 +1,55 @@
|
|
1 |
##
|
2 |
-
|
3 |
-
Below is an example yaml for BF16 mixed-precision training using PyTorch FSDP with CPU offloading on 8 GPUs.
|
4 |
<pre>
|
5 |
-
compute_environment: LOCAL_MACHINE
|
6 |
deepspeed_config: {}
|
7 |
-
distributed_type: FSDP
|
8 |
downcast_bf16: 'no'
|
9 |
dynamo_backend: 'NO'
|
10 |
-
fsdp_config:
|
11 |
-
fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
|
12 |
-
fsdp_backward_prefetch_policy: BACKWARD_PRE
|
13 |
-
fsdp_offload_params: true
|
14 |
-
fsdp_sharding_strategy: 1
|
15 |
-
fsdp_state_dict_type: FULL_STATE_DICT
|
16 |
-
fsdp_transformer_layer_cls_to_wrap: T5Block
|
17 |
machine_rank: 0
|
18 |
main_training_function: main
|
19 |
megatron_lm_config: {}
|
20 |
mixed_precision: bf16
|
21 |
num_machines: 1
|
22 |
-
num_processes: 8
|
23 |
rdzv_backend: static
|
24 |
same_network: true
|
25 |
use_cpu: false
|
26 |
</pre>
|
27 |
##
|
28 |
<pre>
|
29 |
-
|
30 |
-
|
31 |
-
+ def main():
|
32 |
-
accelerator = Accelerator()
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
loss = loss_function(outputs, targets)
|
45 |
-
accelerator.backward(loss)
|
46 |
-
optimizer.step()
|
47 |
-
scheduler.step()
|
48 |
-
|
49 |
-
...
|
50 |
-
|
51 |
-
+ if __name__ == "__main__":
|
52 |
-
+ main()
|
53 |
</pre>
|
54 |
-
|
55 |
-
|
56 |
```
|
57 |
accelerate launch {script_name.py} {--arg1} {--arg2} ...
|
58 |
```
|
59 |
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
61 |
```
|
62 |
accelerate launch \
|
63 |
--use_fsdp \
|
@@ -71,10 +63,11 @@ accelerate launch \
|
|
71 |
```
|
72 |
|
73 |
##
|
74 |
-
For PyTorch FDSP, you need to prepare the model first before preparing the optimizer since FSDP will shard parameters in-place and this will break any previously initialized optimizers.
|
75 |
|
|
|
76 |
|
77 |
##
|
78 |
To learn more checkout the related documentation:
|
79 |
-
- <a href="https://huggingface.co/docs/accelerate/usage_guides/fsdp" target="_blank">How to use
|
80 |
- <a href="https://huggingface.co/blog/pytorch-fsdp" target="_blank">Accelerate Large Model Training using PyTorch Fully Sharded Data Parallel</a>
|
|
|
1 |
##
|
2 |
+
Below is an example yaml for BF16 mixed-precision training using PyTorch Fully Sharded Data Parallism (FSDP) with CPU offloading on 8 GPUs.
|
|
|
3 |
<pre>
|
4 |
+
compute_environment: LOCAL_MACHINE
|
5 |
deepspeed_config: {}
|
6 |
+
+distributed_type: FSDP
|
7 |
downcast_bf16: 'no'
|
8 |
dynamo_backend: 'NO'
|
9 |
+
+fsdp_config:
|
10 |
+
+ fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
|
11 |
+
+ fsdp_backward_prefetch_policy: BACKWARD_PRE
|
12 |
+
+ fsdp_offload_params: true
|
13 |
+
+ fsdp_sharding_strategy: 1
|
14 |
+
+ fsdp_state_dict_type: FULL_STATE_DICT
|
15 |
+
+ fsdp_transformer_layer_cls_to_wrap: T5Block
|
16 |
machine_rank: 0
|
17 |
main_training_function: main
|
18 |
megatron_lm_config: {}
|
19 |
mixed_precision: bf16
|
20 |
num_machines: 1
|
21 |
+
+num_processes: 8
|
22 |
rdzv_backend: static
|
23 |
same_network: true
|
24 |
use_cpu: false
|
25 |
</pre>
|
26 |
##
|
27 |
<pre>
|
28 |
+
from accelerate import Accelerator
|
|
|
|
|
|
|
29 |
|
30 |
+
accelerator = Accelerator()
|
31 |
+
- model, optimizer, dataloader, scheduler = accelerator.prepare(
|
32 |
+
- model, optimizer, dataloader, scheduler
|
33 |
+
-)
|
34 |
+
+model = accelerator.prepare(model)
|
35 |
+
+# Optimizer can be any PyTorch optimizer class
|
36 |
+
+optimizer = torch.optim.AdamW(params=model.parameters(), lr=lr)
|
37 |
+
+optimizer, dataloader, scheduler = accelerator.prepare(
|
38 |
+
+ optimizer, dataloader, scheduler
|
39 |
+
+)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
</pre>
|
41 |
+
##
|
42 |
+
If the YAML was generated through the `accelerate config` command:
|
43 |
```
|
44 |
accelerate launch {script_name.py} {--arg1} {--arg2} ...
|
45 |
```
|
46 |
|
47 |
+
If the YAML is saved to a `~/config.yaml` file:
|
48 |
+
```
|
49 |
+
accelerate launch --config_file ~/config.yaml {script_name.py} {--arg1} {--arg2} ...
|
50 |
+
```
|
51 |
+
|
52 |
+
Or you can use `accelerate launch` with right configuration parameters and have no `config.yaml` file:
|
53 |
```
|
54 |
accelerate launch \
|
55 |
--use_fsdp \
|
|
|
63 |
```
|
64 |
|
65 |
##
|
66 |
+
For PyTorch FDSP, you need to prepare the model first **before** preparing the optimizer since FSDP will shard parameters in-place and this will break any previously initialized optimizers.
|
67 |
|
68 |
+
For transformer models, please use `TRANSFORMER_BASED_WRAP` auto wrap policy as shown in the config above.
|
69 |
|
70 |
##
|
71 |
To learn more checkout the related documentation:
|
72 |
+
- <a href="https://huggingface.co/docs/accelerate/usage_guides/fsdp" target="_blank">How to use Fully Sharded Data Parallelism</a>
|
73 |
- <a href="https://huggingface.co/blog/pytorch-fsdp" target="_blank">Accelerate Large Model Training using PyTorch Fully Sharded Data Parallel</a>
|
setup.cfg
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[isort]
|
2 |
+
default_section = FIRSTPARTY
|
3 |
+
ensure_newline_before_comments = True
|
4 |
+
force_grid_wrap = 0
|
5 |
+
include_trailing_comma = True
|
6 |
+
known_first_party = accelerate
|
7 |
+
known_third_party =
|
8 |
+
numpy
|
9 |
+
torch
|
10 |
+
torch_xla
|
11 |
+
|
12 |
+
line_length = 119
|
13 |
+
lines_after_imports = 2
|
14 |
+
multi_line_output = 3
|
15 |
+
use_parentheses = True
|
16 |
+
|
17 |
+
[flake8]
|
18 |
+
ignore = E203, E722, E501, E741, W503, W605
|
19 |
+
max-line-length = 119
|
src/app.py
CHANGED
@@ -1,10 +1,35 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
-
from markup import
|
3 |
from template import get_templates
|
4 |
|
|
|
5 |
templates = get_templates()
|
6 |
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
def change(inp, textbox):
|
9 |
"""Based on an `inp`, render and highlight the appropriate code sample.
|
10 |
|
@@ -20,65 +45,80 @@ def change(inp, textbox):
|
|
20 |
if textbox == "base":
|
21 |
code, explanation, docs = get_text(inp, textbox)
|
22 |
if inp == "Basic":
|
23 |
-
return (
|
|
|
|
|
|
|
|
|
|
|
24 |
elif inp == "Calculating Metrics":
|
25 |
return (highlight(code), f"## Accelerate Code ({inp})", explanation, docs)
|
26 |
else:
|
27 |
return (highlight(code), f"## Accelerate Code ({inp})", explanation, docs)
|
28 |
-
elif textbox == "
|
29 |
-
|
30 |
-
return (highlight(
|
|
|
|
|
31 |
|
32 |
|
33 |
-
|
|
|
34 |
|
35 |
|
36 |
def base_features(textbox):
|
37 |
-
# textbox.value = "base"
|
38 |
inp = gr.Radio(
|
39 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
label="Select a feature you would like to integrate",
|
41 |
value="Basic",
|
42 |
)
|
43 |
-
with
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
with
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
with gr.Column():
|
53 |
-
gr.Markdown("## Documentation Links")
|
54 |
-
docs = gr.Markdown(default[3])
|
55 |
-
inp.change(fn=change, inputs=[inp, textbox], outputs=[out, feature, explanation, docs])
|
56 |
|
57 |
|
58 |
-
def
|
59 |
-
# textbox.value = "large_scale_training"
|
60 |
inp = gr.Radio(
|
61 |
-
[
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
)
|
65 |
-
with gr.Row():
|
66 |
-
with gr.Column():
|
67 |
-
feature = gr.Markdown("## Accelerate Config")
|
68 |
-
config = gr.Markdown("")
|
69 |
-
with gr.Row():
|
70 |
-
with gr.Column():
|
71 |
-
feature = gr.Markdown("## Accelerate Code")
|
72 |
-
out = gr.Markdown("")
|
73 |
-
with gr.Row():
|
74 |
-
with gr.Column():
|
75 |
-
gr.Markdown("## Explanation")
|
76 |
-
explanation = gr.Markdown("")
|
77 |
-
with gr.Row():
|
78 |
-
with gr.Column():
|
79 |
-
gr.Markdown("## Documentation Links")
|
80 |
-
docs = gr.Markdown("")
|
81 |
-
inp.change(fn=change, inputs=[inp, textbox], outputs=[config, out, feature, explanation, docs])
|
82 |
|
83 |
|
84 |
# def big_model_inference():
|
@@ -126,16 +166,18 @@ def large_scale_training(textbox):
|
|
126 |
with gr.Blocks() as demo:
|
127 |
|
128 |
with gr.Tabs():
|
129 |
-
with gr.TabItem("
|
130 |
textbox = gr.Textbox(label="tab_name", visible=False, value="base")
|
131 |
base_features(textbox)
|
132 |
-
with gr.TabItem("
|
133 |
-
textbox = gr.Textbox(
|
134 |
-
|
|
|
|
|
135 |
with gr.TabItem("Big Model Inference"):
|
136 |
# big_model_inference()
|
137 |
pass
|
138 |
-
with gr.TabItem("
|
139 |
# notebook_launcher()
|
140 |
pass
|
141 |
|
|
|
1 |
+
from contextlib import contextmanager
|
2 |
+
|
3 |
import gradio as gr
|
4 |
+
from markup import get_text, highlight
|
5 |
from template import get_templates
|
6 |
|
7 |
+
|
8 |
templates = get_templates()
|
9 |
|
10 |
|
11 |
+
def fill_tab(title, explanation):
|
12 |
+
"""
|
13 |
+
Fill the tab with the appropriate title and explanation.
|
14 |
+
"""
|
15 |
+
return gr.Markdown(title), gr.Markdown(explanation)
|
16 |
+
|
17 |
+
|
18 |
+
@contextmanager
|
19 |
+
def new_section():
|
20 |
+
"""
|
21 |
+
A context manager to create a new section in the interface. Equivalent of:
|
22 |
+
```python
|
23 |
+
with gr.Row():
|
24 |
+
with gr.Column():
|
25 |
+
...
|
26 |
+
```
|
27 |
+
"""
|
28 |
+
with gr.Row():
|
29 |
+
with gr.Column():
|
30 |
+
yield
|
31 |
+
|
32 |
+
|
33 |
def change(inp, textbox):
|
34 |
"""Based on an `inp`, render and highlight the appropriate code sample.
|
35 |
|
|
|
45 |
if textbox == "base":
|
46 |
code, explanation, docs = get_text(inp, textbox)
|
47 |
if inp == "Basic":
|
48 |
+
return (
|
49 |
+
highlight(code),
|
50 |
+
"## Accelerate Code (Base Integration)",
|
51 |
+
explanation,
|
52 |
+
docs,
|
53 |
+
)
|
54 |
elif inp == "Calculating Metrics":
|
55 |
return (highlight(code), f"## Accelerate Code ({inp})", explanation, docs)
|
56 |
else:
|
57 |
return (highlight(code), f"## Accelerate Code ({inp})", explanation, docs)
|
58 |
+
elif textbox == "training_configuration":
|
59 |
+
yaml, changes, command, explanation, docs = get_text(inp, textbox)
|
60 |
+
return (highlight(yaml), highlight(changes), command, explanation, docs)
|
61 |
+
else:
|
62 |
+
raise ValueError(f"Invalid tab name: {textbox}")
|
63 |
|
64 |
|
65 |
+
default_base = change("Basic", "base")
|
66 |
+
default_training_config = change("Multi GPU", "training_configuration")
|
67 |
|
68 |
|
69 |
def base_features(textbox):
|
|
|
70 |
inp = gr.Radio(
|
71 |
+
[
|
72 |
+
"Basic",
|
73 |
+
"Calculating Metrics",
|
74 |
+
"Checkpointing",
|
75 |
+
"Experiment Tracking",
|
76 |
+
"Gradient Accumulation",
|
77 |
+
],
|
78 |
label="Select a feature you would like to integrate",
|
79 |
value="Basic",
|
80 |
)
|
81 |
+
with new_section():
|
82 |
+
feature, out = fill_tab("## Accelerate Code", default_base[0])
|
83 |
+
with new_section():
|
84 |
+
_, explanation = fill_tab("## Explanation", default_base[2])
|
85 |
+
with new_section():
|
86 |
+
_, docs = fill_tab("## Documentation Links", default_base[3])
|
87 |
+
inp.change(
|
88 |
+
fn=change, inputs=[inp, textbox], outputs=[out, feature, explanation, docs]
|
89 |
+
)
|
|
|
|
|
|
|
|
|
90 |
|
91 |
|
92 |
+
def training_config(textbox):
|
|
|
93 |
inp = gr.Radio(
|
94 |
+
[
|
95 |
+
"AWS SageMaker",
|
96 |
+
"DeepSpeed",
|
97 |
+
"Megatron-LM",
|
98 |
+
"Multi GPU",
|
99 |
+
"Multi Node Multi GPU",
|
100 |
+
"PyTorch FSDP",
|
101 |
+
],
|
102 |
+
label="Select a distributed YAML configuration you would like to view.",
|
103 |
+
value="Multi GPU",
|
104 |
+
)
|
105 |
+
with new_section():
|
106 |
+
_, yaml = fill_tab("## Example YAML Configuration", default_training_config[0])
|
107 |
+
with new_section():
|
108 |
+
_, changes = fill_tab(
|
109 |
+
"## Changes to Training Script", default_training_config[1]
|
110 |
+
)
|
111 |
+
with new_section():
|
112 |
+
_, command = fill_tab("## Command to Run Training", default_training_config[2])
|
113 |
+
with new_section():
|
114 |
+
_, explanation = fill_tab("## Explanation", default_training_config[3])
|
115 |
+
with new_section():
|
116 |
+
_, docs = fill_tab("## Documentation Links", default_training_config[4])
|
117 |
+
inp.change(
|
118 |
+
fn=change,
|
119 |
+
inputs=[inp, textbox],
|
120 |
+
outputs=[yaml, changes, command, explanation, docs],
|
121 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
|
124 |
# def big_model_inference():
|
|
|
166 |
with gr.Blocks() as demo:
|
167 |
|
168 |
with gr.Tabs():
|
169 |
+
with gr.TabItem("Basic Training Integration"):
|
170 |
textbox = gr.Textbox(label="tab_name", visible=False, value="base")
|
171 |
base_features(textbox)
|
172 |
+
with gr.TabItem("Launch Configuration"):
|
173 |
+
textbox = gr.Textbox(
|
174 |
+
label="tab_name", visible=False, value="training_configuration"
|
175 |
+
)
|
176 |
+
training_config(textbox)
|
177 |
with gr.TabItem("Big Model Inference"):
|
178 |
# big_model_inference()
|
179 |
pass
|
180 |
+
with gr.TabItem("Launching from Notebooks"):
|
181 |
# notebook_launcher()
|
182 |
pass
|
183 |
|
src/markup.py
CHANGED
@@ -14,6 +14,7 @@
|
|
14 |
|
15 |
from template import get_filename
|
16 |
|
|
|
17 |
_remove_color = "rgb(103,6,12)"
|
18 |
_addition_color = "rgb(6,103,12)"
|
19 |
|
|
|
14 |
|
15 |
from template import get_filename
|
16 |
|
17 |
+
|
18 |
_remove_color = "rgb(103,6,12)"
|
19 |
_addition_color = "rgb(6,103,12)"
|
20 |
|
src/template.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# limitations under the License.
|
14 |
import os
|
15 |
|
|
|
16 |
TEMPLATES = ["initial", "initial_with_metrics", "accelerate"]
|
17 |
|
18 |
|
@@ -27,4 +28,6 @@ def get_templates() -> dict:
|
|
27 |
"""
|
28 |
Returns a dictionary of template type to code content
|
29 |
"""
|
30 |
-
return {
|
|
|
|
|
|
13 |
# limitations under the License.
|
14 |
import os
|
15 |
|
16 |
+
|
17 |
TEMPLATES = ["initial", "initial_with_metrics", "accelerate"]
|
18 |
|
19 |
|
|
|
28 |
"""
|
29 |
Returns a dictionary of template type to code content
|
30 |
"""
|
31 |
+
return {
|
32 |
+
template: open(get_filename("base", template)).read() for template in TEMPLATES
|
33 |
+
}
|