masanorihirano
commited on
Commit
•
bd9703c
1
Parent(s):
637e1de
add
Browse files
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
checkpoints/checkpoint_6101/state/checkpoint filter=lfs diff=lfs merge=lfs -text
|
checkpoints/checkpoint_6101/metadata/metadata
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"version": 1.1, "train_state_metadata": {"mdl_vars": {"params": {"core_layer": {"freq_emb": {"emb_var": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [3, 1280]}}, "horizon_ff_layer": {"hidden_layer": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "output_layer": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "residual_layer": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}}, "input_ff_layer": {"hidden_layer": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [64, 1280]}}}, "output_layer": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "residual_layer": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [64, 1280]}}}}, "stacked_transformer_layer": {"x_layers_0": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_1": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_10": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_11": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_12": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_13": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_14": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_15": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_16": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_17": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_18": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_19": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_2": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_3": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_4": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_5": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_6": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_7": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_8": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}, "x_layers_9": {"ff_layer": {"ffn_layer1": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "ffn_layer2": {"bias": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "linear": {"w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 1280]}}}, "layer_norm": {"bias": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}}, "layer_norm": {"scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}}, "self_attention": {"key": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "per_dim_scale": {"per_dim_scale": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [80]}}, "post": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "query": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}, "value": {"b": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [16, 80]}, "w": {"_array_metadata_tag": true, "dtype": "float32", "is_optax_masked_node": false, "unpadded_shape": [1280, 16, 80]}}}}}}}}}}
|
checkpoints/checkpoint_6101/state/checkpoint
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3299276dbcfa26789bc2dfc0b8127e329a7ab52e61fd7196e2ac4fce1616dd9a
|
3 |
+
size 1628582507
|