hello
#1
by
chansung
- opened
- .gitattributes +1 -0
- Dockerfile +8 -0
- __init__.py +13 -0
- data/test/cifar10_test.tfrecord +0 -0
- data/train/cifar10_train.tfrecord +0 -0
- data_validation.ipynb +130 -0
- kubeflow_runner.py +91 -0
- local_runner.py +64 -0
- model_analysis.ipynb +105 -0
- models/__init__.py +13 -0
- models/model.py +266 -0
- models/model_test.py +16 -0
- models/preprocessing.py +32 -0
- models/preprocessing_test.py +12 -0
- pipeline.json +745 -0
- pipeline/__init__.py +0 -0
- pipeline/components/__init__.py +0 -0
- pipeline/components/pusher/GHReleasePusher/__init__.py +0 -0
- pipeline/components/pusher/GHReleasePusher/component.py +40 -0
- pipeline/components/pusher/GHReleasePusher/constants.py +8 -0
- pipeline/components/pusher/GHReleasePusher/executor.py +93 -0
- pipeline/components/pusher/GHReleasePusher/executor_test.py +131 -0
- pipeline/components/pusher/GHReleasePusher/runner.py +74 -0
- pipeline/components/pusher/HFModelPusher/__init__.py +0 -0
- pipeline/components/pusher/HFModelPusher/component.py +40 -0
- pipeline/components/pusher/HFModelPusher/constants.py +6 -0
- pipeline/components/pusher/HFModelPusher/executor.py +93 -0
- pipeline/components/pusher/HFModelPusher/runner.py +92 -0
- pipeline/components/pusher/__init__.py +0 -0
- pipeline/components/testdata/trainer/current/Format-Serving/keras_metadata.pb +3 -0
- pipeline/components/testdata/trainer/current/Format-Serving/saved_model.pb +3 -0
- pipeline/components/testdata/trainer/current/Format-Serving/variables/variables.data-00000-of-00001 +3 -0
- pipeline/components/testdata/trainer/current/Format-Serving/variables/variables.index +0 -0
- pipeline/configs.py +121 -0
- pipeline/local_pipeline.py +179 -0
- pipeline/pipeline.py +173 -0
- requirements.txt +8 -0
.gitattributes
CHANGED
@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
29 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
30 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
31 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
29 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
30 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
31 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
32 |
+
pipeline/components/testdata/trainer/current/Format-Serving/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM tensorflow/tfx:1.9.1
|
2 |
+
|
3 |
+
WORKDIR /pipeline
|
4 |
+
COPY ./ ./
|
5 |
+
|
6 |
+
RUN pip install -r requirements.txt
|
7 |
+
|
8 |
+
ENV PYTHONPATH="/pipeline:${PYTHONPATH}"
|
__init__.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2020 Google LLC. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
data/test/cifar10_test.tfrecord
ADDED
Binary file (290 kB). View file
|
|
data/train/cifar10_train.tfrecord
ADDED
Binary file (296 kB). View file
|
|
data_validation.ipynb
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"# import required libs\n",
|
10 |
+
"import glob\n",
|
11 |
+
"import os\n",
|
12 |
+
"\n",
|
13 |
+
"import tensorflow as tf\n",
|
14 |
+
"import tensorflow_data_validation as tfdv\n",
|
15 |
+
"print('TF version: {}'.format(tf.version.VERSION))\n",
|
16 |
+
"print('TFDV version: {}'.format(tfdv.version.__version__))"
|
17 |
+
]
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"cell_type": "code",
|
21 |
+
"execution_count": null,
|
22 |
+
"metadata": {},
|
23 |
+
"outputs": [],
|
24 |
+
"source": [
|
25 |
+
"# Read artifact information from metadata store.\n",
|
26 |
+
"import beam_dag_runner\n",
|
27 |
+
"\n",
|
28 |
+
"from tfx.orchestration import metadata\n",
|
29 |
+
"from tfx.types import standard_artifacts\n",
|
30 |
+
"\n",
|
31 |
+
"metadata_connection_config = metadata.sqlite_metadata_connection_config(\n",
|
32 |
+
" beam_dag_runner.METADATA_PATH)\n",
|
33 |
+
"with metadata.Metadata(metadata_connection_config) as store:\n",
|
34 |
+
" stats_artifacts = store.get_artifacts_by_type(standard_artifacts.ExampleStatistics.TYPE_NAME)\n",
|
35 |
+
" schema_artifacts = store.get_artifacts_by_type(standard_artifacts.Schema.TYPE_NAME)\n",
|
36 |
+
" anomalies_artifacts = store.get_artifacts_by_type(standard_artifacts.ExampleAnomalies.TYPE_NAME)"
|
37 |
+
]
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"cell_type": "code",
|
41 |
+
"execution_count": null,
|
42 |
+
"metadata": {},
|
43 |
+
"outputs": [],
|
44 |
+
"source": [
|
45 |
+
"# configure output paths\n",
|
46 |
+
"# Exact paths to output artifacts can also be found on KFP Web UI if you are using kubeflow.\n",
|
47 |
+
"stats_path = stats_artifacts[-1].uri\n",
|
48 |
+
"train_stats_file = os.path.join(stats_path, 'train', 'stats_tfrecord')\n",
|
49 |
+
"eval_stats_file = os.path.join(stats_path, 'eval', 'stats_tfrecord')\n",
|
50 |
+
"print(\"Train stats file:{}, Eval stats file:{}\".format(\n",
|
51 |
+
" train_stats_file, eval_stats_file))\n",
|
52 |
+
"\n",
|
53 |
+
"schema_file = os.path.join(schema_artifacts[-1].uri, 'schema.pbtxt')\n",
|
54 |
+
"print(\"Generated schame file:{}\".format(schema_file))\n",
|
55 |
+
"anomalies_file = os.path.join(anomalies_artifacts[-1].uri, 'anomalies.pbtxt')\n",
|
56 |
+
"print(\"Generated anomalies file:{}\".format(anomalies_file))"
|
57 |
+
]
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"cell_type": "code",
|
61 |
+
"execution_count": null,
|
62 |
+
"metadata": {},
|
63 |
+
"outputs": [],
|
64 |
+
"source": [
|
65 |
+
"# load generated statistics from StatisticsGen\n",
|
66 |
+
"train_stats = tfdv.load_statistics(train_stats_file)\n",
|
67 |
+
"eval_stats = tfdv.load_statistics(eval_stats_file)\n",
|
68 |
+
"tfdv.visualize_statistics(lhs_statistics=eval_stats, rhs_statistics=train_stats,\n",
|
69 |
+
" lhs_name='EVAL_DATASET', rhs_name='TRAIN_DATASET')"
|
70 |
+
]
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"cell_type": "code",
|
74 |
+
"execution_count": null,
|
75 |
+
"metadata": {},
|
76 |
+
"outputs": [],
|
77 |
+
"source": [
|
78 |
+
"# load generated schema from SchemaGen\n",
|
79 |
+
"schema = tfdv.load_schema_text(schema_file)\n",
|
80 |
+
"tfdv.display_schema(schema=schema)"
|
81 |
+
]
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"cell_type": "code",
|
85 |
+
"execution_count": null,
|
86 |
+
"metadata": {},
|
87 |
+
"outputs": [],
|
88 |
+
"source": [
|
89 |
+
"# load data vaildation result from ExampleValidator\n",
|
90 |
+
"anomalies = tfdv.load_anomalies_text(anomalies_file)\n",
|
91 |
+
"tfdv.display_anomalies(anomalies)"
|
92 |
+
]
|
93 |
+
}
|
94 |
+
],
|
95 |
+
"metadata": {
|
96 |
+
"kernelspec": {
|
97 |
+
"display_name": "Python 3.10.4 64-bit",
|
98 |
+
"language": "python",
|
99 |
+
"name": "python3"
|
100 |
+
},
|
101 |
+
"language_info": {
|
102 |
+
"codemirror_mode": {
|
103 |
+
"name": "ipython",
|
104 |
+
"version": 3
|
105 |
+
},
|
106 |
+
"file_extension": ".py",
|
107 |
+
"mimetype": "text/x-python",
|
108 |
+
"name": "python",
|
109 |
+
"nbconvert_exporter": "python",
|
110 |
+
"pygments_lexer": "ipython3",
|
111 |
+
"version": "3.10.4"
|
112 |
+
},
|
113 |
+
"pycharm": {
|
114 |
+
"stem_cell": {
|
115 |
+
"cell_type": "raw",
|
116 |
+
"metadata": {
|
117 |
+
"collapsed": false
|
118 |
+
},
|
119 |
+
"source": []
|
120 |
+
}
|
121 |
+
},
|
122 |
+
"vscode": {
|
123 |
+
"interpreter": {
|
124 |
+
"hash": "4f946df053fbf2b937619d3c5458e7af74262f9a954d8797ba0b27400bcafe06"
|
125 |
+
}
|
126 |
+
}
|
127 |
+
},
|
128 |
+
"nbformat": 4,
|
129 |
+
"nbformat_minor": 2
|
130 |
+
}
|
kubeflow_runner.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from absl import logging
|
3 |
+
|
4 |
+
from tfx import v1 as tfx
|
5 |
+
from tfx import proto
|
6 |
+
from tfx.orchestration.kubeflow.v2 import kubeflow_v2_dag_runner as runner
|
7 |
+
from tfx.orchestration.data_types import RuntimeParameter
|
8 |
+
from tfx.proto import pusher_pb2
|
9 |
+
from tfx.proto import trainer_pb2
|
10 |
+
from tfx.proto import tuner_pb2
|
11 |
+
|
12 |
+
from pipeline import configs
|
13 |
+
from pipeline import pipeline
|
14 |
+
|
15 |
+
"""
|
16 |
+
RuntimeParameter could be injected with TFX CLI
|
17 |
+
:
|
18 |
+
--runtime-parameter output-config='{}' \
|
19 |
+
--runtime-parameter input-config='{"splits": [{"name": "train", "pattern": "span-[12]/train/*.tfrecord"}, {"name": "val", "pattern": "span-[12]/test/*.tfrecord"}]}'
|
20 |
+
|
21 |
+
OR it could be injected programatically
|
22 |
+
:
|
23 |
+
import json
|
24 |
+
from kfp.v2.google import client
|
25 |
+
|
26 |
+
pipelines_client = client.AIPlatformClient(
|
27 |
+
project_id=GOOGLE_CLOUD_PROJECT, region=GOOGLE_CLOUD_REGION,
|
28 |
+
)
|
29 |
+
_ = pipelines_client.create_run_from_job_spec(
|
30 |
+
PIPELINE_DEFINITION_FILE,
|
31 |
+
enable_caching=False,
|
32 |
+
parameter_values={
|
33 |
+
"input-config": json.dumps(
|
34 |
+
{
|
35 |
+
"splits": [
|
36 |
+
{"name": "train", "pattern": "span-[12]/train/*.tfrecord"},
|
37 |
+
{"name": "val", "pattern": "span-[12]/test/*.tfrecord"},
|
38 |
+
]
|
39 |
+
}
|
40 |
+
),
|
41 |
+
"output-config": json.dumps({}),
|
42 |
+
},
|
43 |
+
)
|
44 |
+
"""
|
45 |
+
|
46 |
+
|
47 |
+
def run():
|
48 |
+
runner_config = runner.KubeflowV2DagRunnerConfig(
|
49 |
+
default_image=configs.PIPELINE_IMAGE
|
50 |
+
)
|
51 |
+
|
52 |
+
runner.KubeflowV2DagRunner(
|
53 |
+
config=runner_config,
|
54 |
+
output_filename=configs.PIPELINE_NAME + "_pipeline.json",
|
55 |
+
).run(
|
56 |
+
pipeline.create_pipeline(
|
57 |
+
input_config=RuntimeParameter(
|
58 |
+
name="input-config",
|
59 |
+
default='{"input_config": {"splits": [{"name":"train", "pattern":"span-1/train/*"}, {"name":"eval", "pattern":"span-1/test/*"}]}}',
|
60 |
+
ptype=str,
|
61 |
+
),
|
62 |
+
output_config=RuntimeParameter(
|
63 |
+
name="output-config",
|
64 |
+
default="{}",
|
65 |
+
ptype=str,
|
66 |
+
),
|
67 |
+
pipeline_name=configs.PIPELINE_NAME,
|
68 |
+
pipeline_root=configs.PIPELINE_ROOT,
|
69 |
+
data_path=configs.DATA_PATH,
|
70 |
+
modules={
|
71 |
+
"preprocessing_fn": configs.PREPROCESSING_FN,
|
72 |
+
"training_fn": configs.TRAINING_FN,
|
73 |
+
"cloud_tuner_fn": configs.CLOUD_TUNER_FN,
|
74 |
+
},
|
75 |
+
train_args=trainer_pb2.TrainArgs(num_steps=configs.TRAIN_NUM_STEPS),
|
76 |
+
eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
|
77 |
+
tuner_args=tuner_pb2.TuneArgs(
|
78 |
+
num_parallel_trials=configs.NUM_PARALLEL_TRIALS
|
79 |
+
),
|
80 |
+
ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
|
81 |
+
ai_platform_tuner_args=configs.GCP_AI_PLATFORM_TUNER_ARGS,
|
82 |
+
ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
|
83 |
+
gh_release_args=configs.GH_RELEASE_ARGS,
|
84 |
+
hf_model_release_args=configs.HF_MODEL_RELEASE_ARGS,
|
85 |
+
)
|
86 |
+
)
|
87 |
+
|
88 |
+
|
89 |
+
if __name__ == "__main__":
|
90 |
+
logging.set_verbosity(logging.INFO)
|
91 |
+
run()
|
local_runner.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from absl import logging
|
3 |
+
|
4 |
+
from tfx import v1 as tfx
|
5 |
+
from tfx.orchestration.data_types import RuntimeParameter
|
6 |
+
from pipeline import configs
|
7 |
+
from pipeline import local_pipeline
|
8 |
+
|
9 |
+
# TFX pipeline produces many output files and metadata. All output data will be
|
10 |
+
# stored under this OUTPUT_DIR.
|
11 |
+
# NOTE: It is recommended to have a separated OUTPUT_DIR which is *outside* of
|
12 |
+
# the source code structure. Please change OUTPUT_DIR to other location
|
13 |
+
# where we can store outputs of the pipeline.
|
14 |
+
OUTPUT_DIR = "."
|
15 |
+
|
16 |
+
# TFX produces two types of outputs, files and metadata.
|
17 |
+
# - Files will be created under PIPELINE_ROOT directory.
|
18 |
+
# - Metadata will be written to SQLite database in METADATA_PATH.
|
19 |
+
PIPELINE_ROOT = os.path.join(OUTPUT_DIR, "tfx_pipeline_output", configs.PIPELINE_NAME)
|
20 |
+
METADATA_PATH = os.path.join(
|
21 |
+
OUTPUT_DIR, "tfx_metadata", configs.PIPELINE_NAME, "metadata.db"
|
22 |
+
)
|
23 |
+
|
24 |
+
# The last component of the pipeline, "Pusher" will produce serving model under
|
25 |
+
# SERVING_MODEL_DIR.
|
26 |
+
SERVING_MODEL_DIR = os.path.join(PIPELINE_ROOT, "serving_model")
|
27 |
+
|
28 |
+
# Specifies data file directory. DATA_PATH should be a directory containing CSV
|
29 |
+
# files for CsvExampleGen in this example. By default, data files are in the
|
30 |
+
# `data` directory.
|
31 |
+
# NOTE: If you upload data files to GCS(which is recommended if you use
|
32 |
+
# Kubeflow), you can use a path starting "gs://YOUR_BUCKET_NAME/path" for
|
33 |
+
# DATA_PATH. For example,
|
34 |
+
# DATA_PATH = 'gs://bucket/penguin/csv/'.
|
35 |
+
# TODO(step 4): Specify the path for your data.
|
36 |
+
DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
|
37 |
+
|
38 |
+
|
39 |
+
def run():
|
40 |
+
"""Define a pipeline."""
|
41 |
+
|
42 |
+
tfx.orchestration.LocalDagRunner().run(
|
43 |
+
local_pipeline.create_pipeline(
|
44 |
+
pipeline_name=configs.PIPELINE_NAME,
|
45 |
+
pipeline_root=PIPELINE_ROOT,
|
46 |
+
data_path=DATA_PATH,
|
47 |
+
modules={
|
48 |
+
"preprocessing_fn": configs.PREPROCESSING_FN,
|
49 |
+
"training_fn": configs.TRAINING_FN,
|
50 |
+
"tuner_fn": configs.TUNER_FN,
|
51 |
+
},
|
52 |
+
train_args=tfx.proto.TrainArgs(num_steps=configs.TRAIN_NUM_STEPS),
|
53 |
+
eval_args=tfx.proto.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
|
54 |
+
serving_model_dir=SERVING_MODEL_DIR,
|
55 |
+
metadata_connection_config=tfx.orchestration.metadata.sqlite_metadata_connection_config(
|
56 |
+
METADATA_PATH
|
57 |
+
),
|
58 |
+
)
|
59 |
+
)
|
60 |
+
|
61 |
+
|
62 |
+
if __name__ == "__main__":
|
63 |
+
logging.set_verbosity(logging.INFO)
|
64 |
+
run()
|
model_analysis.ipynb
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"# import required libs\n",
|
10 |
+
"import glob\n",
|
11 |
+
"import os\n",
|
12 |
+
"\n",
|
13 |
+
"import tensorflow as tf\n",
|
14 |
+
"import tensorflow_model_analysis as tfma\n",
|
15 |
+
"print('TF version: {}'.format(tf.version.VERSION))\n",
|
16 |
+
"print('TFMA version: {}'.format(tfma.version.VERSION_STRING))"
|
17 |
+
]
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"cell_type": "code",
|
21 |
+
"execution_count": null,
|
22 |
+
"metadata": {},
|
23 |
+
"outputs": [],
|
24 |
+
"source": [
|
25 |
+
"# Read artifact information from metadata store.\n",
|
26 |
+
"import beam_dag_runner\n",
|
27 |
+
"\n",
|
28 |
+
"from tfx.orchestration import metadata\n",
|
29 |
+
"from tfx.types import standard_artifacts\n",
|
30 |
+
"\n",
|
31 |
+
"metadata_connection_config = metadata.sqlite_metadata_connection_config(\n",
|
32 |
+
" beam_dag_runner.METADATA_PATH)\n",
|
33 |
+
"with metadata.Metadata(metadata_connection_config) as store:\n",
|
34 |
+
" model_eval_artifacts = store.get_artifacts_by_type(standard_artifacts.ModelEvaluation.TYPE_NAME)"
|
35 |
+
]
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"cell_type": "code",
|
39 |
+
"execution_count": null,
|
40 |
+
"metadata": {},
|
41 |
+
"outputs": [],
|
42 |
+
"source": [
|
43 |
+
"# configure output paths\n",
|
44 |
+
"# Exact paths to output artifacts can be found in the execution logs\n",
|
45 |
+
"# or KFP Web UI if you are using kubeflow.\n",
|
46 |
+
"model_eval_path = model_eval_artifacts[-1].uri\n",
|
47 |
+
"print(\"Generated model evaluation result:{}\".format(model_eval_path))"
|
48 |
+
]
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"cell_type": "markdown",
|
52 |
+
"metadata": {},
|
53 |
+
"source": [
|
54 |
+
"## Install Jupyter Extensions\n",
|
55 |
+
"Note: If running in a local Jupyter notebook, then these Jupyter extensions must be installed in the environment before running Jupyter.\n",
|
56 |
+
"\n",
|
57 |
+
"```bash\n",
|
58 |
+
"jupyter nbextension enable --py widgetsnbextension\n",
|
59 |
+
"jupyter nbextension install --py --symlink tensorflow_model_analysis\n",
|
60 |
+
"jupyter nbextension enable --py tensorflow_model_analysis\n",
|
61 |
+
"```"
|
62 |
+
]
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"cell_type": "code",
|
66 |
+
"execution_count": null,
|
67 |
+
"metadata": {},
|
68 |
+
"outputs": [],
|
69 |
+
"source": [
|
70 |
+
"eval_result = tfma.load_eval_result(model_eval_path)\n",
|
71 |
+
"tfma.view.render_slicing_metrics(eval_result, slicing_spec = tfma.slicer.SingleSliceSpec(columns=['trip_start_hour']))"
|
72 |
+
]
|
73 |
+
}
|
74 |
+
],
|
75 |
+
"metadata": {
|
76 |
+
"kernelspec": {
|
77 |
+
"display_name": "Python 3",
|
78 |
+
"language": "python",
|
79 |
+
"name": "python3"
|
80 |
+
},
|
81 |
+
"language_info": {
|
82 |
+
"codemirror_mode": {
|
83 |
+
"name": "ipython",
|
84 |
+
"version": 3
|
85 |
+
},
|
86 |
+
"file_extension": ".py",
|
87 |
+
"mimetype": "text/x-python",
|
88 |
+
"name": "python",
|
89 |
+
"nbconvert_exporter": "python",
|
90 |
+
"pygments_lexer": "ipython3",
|
91 |
+
"version": "3.7.5rc1"
|
92 |
+
},
|
93 |
+
"pycharm": {
|
94 |
+
"stem_cell": {
|
95 |
+
"cell_type": "raw",
|
96 |
+
"source": [],
|
97 |
+
"metadata": {
|
98 |
+
"collapsed": false
|
99 |
+
}
|
100 |
+
}
|
101 |
+
}
|
102 |
+
},
|
103 |
+
"nbformat": 4,
|
104 |
+
"nbformat_minor": 2
|
105 |
+
}
|
models/__init__.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2020 Google LLC. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
models/model.py
ADDED
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datetime
|
2 |
+
import os
|
3 |
+
from typing import List
|
4 |
+
import absl
|
5 |
+
import keras_tuner
|
6 |
+
import tensorflow as tf
|
7 |
+
from tensorflow.keras.optimizers import Adam
|
8 |
+
import tensorflow_transform as tft
|
9 |
+
|
10 |
+
from tensorflow_cloud import CloudTuner
|
11 |
+
from tfx.v1.components import TunerFnResult
|
12 |
+
from tfx.components.trainer.fn_args_utils import DataAccessor
|
13 |
+
from tfx.components.trainer.fn_args_utils import FnArgs
|
14 |
+
from tfx.dsl.io import fileio
|
15 |
+
from tfx_bsl.tfxio import dataset_options
|
16 |
+
import tfx.extensions.google_cloud_ai_platform.constants as vertex_const
|
17 |
+
import tfx.extensions.google_cloud_ai_platform.trainer.executor as vertex_training_const
|
18 |
+
import tfx.extensions.google_cloud_ai_platform.tuner.executor as vertex_tuner_const
|
19 |
+
|
20 |
+
_TRAIN_DATA_SIZE = 128
|
21 |
+
_EVAL_DATA_SIZE = 128
|
22 |
+
_TRAIN_BATCH_SIZE = 32
|
23 |
+
_EVAL_BATCH_SIZE = 32
|
24 |
+
_CLASSIFIER_LEARNING_RATE = 1e-3
|
25 |
+
_FINETUNE_LEARNING_RATE = 7e-6
|
26 |
+
_CLASSIFIER_EPOCHS = 30
|
27 |
+
|
28 |
+
_IMAGE_KEY = "image"
|
29 |
+
_LABEL_KEY = "label"
|
30 |
+
|
31 |
+
|
32 |
+
def INFO(text: str):
|
33 |
+
absl.logging.info(text)
|
34 |
+
|
35 |
+
|
36 |
+
def _transformed_name(key: str) -> str:
|
37 |
+
return key + "_xf"
|
38 |
+
|
39 |
+
|
40 |
+
def _get_signature(model):
|
41 |
+
signatures = {
|
42 |
+
"serving_default": _get_serve_image_fn(model).get_concrete_function(
|
43 |
+
tf.TensorSpec(
|
44 |
+
shape=[None, 224, 224, 3],
|
45 |
+
dtype=tf.float32,
|
46 |
+
name=_transformed_name(_IMAGE_KEY),
|
47 |
+
)
|
48 |
+
)
|
49 |
+
}
|
50 |
+
|
51 |
+
return signatures
|
52 |
+
|
53 |
+
|
54 |
+
def _get_serve_image_fn(model):
|
55 |
+
@tf.function
|
56 |
+
def serve_image_fn(image_tensor):
|
57 |
+
return model(image_tensor)
|
58 |
+
|
59 |
+
return serve_image_fn
|
60 |
+
|
61 |
+
|
62 |
+
def _image_augmentation(image_features):
|
63 |
+
batch_size = tf.shape(image_features)[0]
|
64 |
+
image_features = tf.image.random_flip_left_right(image_features)
|
65 |
+
image_features = tf.image.resize_with_crop_or_pad(image_features, 250, 250)
|
66 |
+
image_features = tf.image.random_crop(image_features, (batch_size, 224, 224, 3))
|
67 |
+
return image_features
|
68 |
+
|
69 |
+
|
70 |
+
def _data_augmentation(feature_dict):
|
71 |
+
image_features = feature_dict[_transformed_name(_IMAGE_KEY)]
|
72 |
+
image_features = _image_augmentation(image_features)
|
73 |
+
feature_dict[_transformed_name(_IMAGE_KEY)] = image_features
|
74 |
+
return feature_dict
|
75 |
+
|
76 |
+
|
77 |
+
def _input_fn(
|
78 |
+
file_pattern: List[str],
|
79 |
+
data_accessor: DataAccessor,
|
80 |
+
tf_transform_output: tft.TFTransformOutput,
|
81 |
+
is_train: bool = False,
|
82 |
+
batch_size: int = 200,
|
83 |
+
) -> tf.data.Dataset:
|
84 |
+
dataset = data_accessor.tf_dataset_factory(
|
85 |
+
file_pattern,
|
86 |
+
dataset_options.TensorFlowDatasetOptions(
|
87 |
+
batch_size=batch_size, label_key=_transformed_name(_LABEL_KEY)
|
88 |
+
),
|
89 |
+
tf_transform_output.transformed_metadata.schema,
|
90 |
+
)
|
91 |
+
|
92 |
+
if is_train:
|
93 |
+
dataset = dataset.map(lambda x, y: (_data_augmentation(x), y))
|
94 |
+
|
95 |
+
return dataset
|
96 |
+
|
97 |
+
|
98 |
+
def _get_hyperparameters() -> keras_tuner.HyperParameters:
|
99 |
+
hp = keras_tuner.HyperParameters()
|
100 |
+
hp.Choice("learning_rate", [1e-3, 1e-2], default=1e-3)
|
101 |
+
return hp
|
102 |
+
|
103 |
+
|
104 |
+
def _build_keras_model(hparams: keras_tuner.HyperParameters) -> tf.keras.Model:
|
105 |
+
base_model = tf.keras.applications.ResNet50(
|
106 |
+
input_shape=(224, 224, 3), include_top=False, weights="imagenet", pooling="max"
|
107 |
+
)
|
108 |
+
base_model.input_spec = None
|
109 |
+
base_model.trainable = False
|
110 |
+
|
111 |
+
model = tf.keras.Sequential(
|
112 |
+
[
|
113 |
+
tf.keras.layers.InputLayer(
|
114 |
+
input_shape=(224, 224, 3), name=_transformed_name(_IMAGE_KEY)
|
115 |
+
),
|
116 |
+
base_model,
|
117 |
+
tf.keras.layers.Dropout(0.1),
|
118 |
+
tf.keras.layers.Dense(10, activation="softmax"),
|
119 |
+
]
|
120 |
+
)
|
121 |
+
|
122 |
+
model.compile(
|
123 |
+
loss="sparse_categorical_crossentropy",
|
124 |
+
optimizer=Adam(learning_rate=hparams.get("learning_rate")),
|
125 |
+
metrics=["sparse_categorical_accuracy"],
|
126 |
+
)
|
127 |
+
model.summary(print_fn=INFO)
|
128 |
+
|
129 |
+
return model
|
130 |
+
|
131 |
+
|
132 |
+
def cloud_tuner_fn(fn_args: FnArgs) -> TunerFnResult:
|
133 |
+
TUNING_ARGS_KEY = vertex_tuner_const.TUNING_ARGS_KEY
|
134 |
+
TRAINING_ARGS_KEY = vertex_training_const.TRAINING_ARGS_KEY
|
135 |
+
VERTEX_PROJECT_KEY = "project"
|
136 |
+
VERTEX_REGION_KEY = "region"
|
137 |
+
|
138 |
+
tuner = CloudTuner(
|
139 |
+
_build_keras_model,
|
140 |
+
max_trials=6,
|
141 |
+
hyperparameters=_get_hyperparameters(),
|
142 |
+
project_id=fn_args.custom_config[TUNING_ARGS_KEY][VERTEX_PROJECT_KEY],
|
143 |
+
region=fn_args.custom_config[TUNING_ARGS_KEY][VERTEX_REGION_KEY],
|
144 |
+
objective="val_sparse_categorical_accuracy",
|
145 |
+
directory=fn_args.working_dir,
|
146 |
+
)
|
147 |
+
|
148 |
+
tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path)
|
149 |
+
|
150 |
+
train_dataset = _input_fn(
|
151 |
+
fn_args.train_files,
|
152 |
+
fn_args.data_accessor,
|
153 |
+
tf_transform_output,
|
154 |
+
is_train=True,
|
155 |
+
batch_size=_TRAIN_BATCH_SIZE,
|
156 |
+
)
|
157 |
+
|
158 |
+
eval_dataset = _input_fn(
|
159 |
+
fn_args.eval_files,
|
160 |
+
fn_args.data_accessor,
|
161 |
+
tf_transform_output,
|
162 |
+
is_train=False,
|
163 |
+
batch_size=_EVAL_BATCH_SIZE,
|
164 |
+
)
|
165 |
+
|
166 |
+
return TunerFnResult(
|
167 |
+
tuner=tuner,
|
168 |
+
fit_kwargs={
|
169 |
+
"x": train_dataset,
|
170 |
+
"validation_data": eval_dataset,
|
171 |
+
"steps_per_epoch": steps_per_epoch,
|
172 |
+
"validation_steps": fn_args.eval_steps,
|
173 |
+
},
|
174 |
+
)
|
175 |
+
|
176 |
+
|
177 |
+
def tuner_fn(fn_args: FnArgs) -> TunerFnResult:
|
178 |
+
steps_per_epoch = int(_TRAIN_DATA_SIZE / _TRAIN_BATCH_SIZE)
|
179 |
+
|
180 |
+
tuner = keras_tuner.RandomSearch(
|
181 |
+
_build_keras_model,
|
182 |
+
max_trials=6,
|
183 |
+
hyperparameters=_get_hyperparameters(),
|
184 |
+
allow_new_entries=False,
|
185 |
+
objective=keras_tuner.Objective("val_sparse_categorical_accuracy", "max"),
|
186 |
+
directory=fn_args.working_dir,
|
187 |
+
project_name="img_classification_tuning",
|
188 |
+
)
|
189 |
+
|
190 |
+
tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path)
|
191 |
+
|
192 |
+
train_dataset = _input_fn(
|
193 |
+
fn_args.train_files,
|
194 |
+
fn_args.data_accessor,
|
195 |
+
tf_transform_output,
|
196 |
+
is_train=True,
|
197 |
+
batch_size=_TRAIN_BATCH_SIZE,
|
198 |
+
)
|
199 |
+
|
200 |
+
eval_dataset = _input_fn(
|
201 |
+
fn_args.eval_files,
|
202 |
+
fn_args.data_accessor,
|
203 |
+
tf_transform_output,
|
204 |
+
is_train=False,
|
205 |
+
batch_size=_EVAL_BATCH_SIZE,
|
206 |
+
)
|
207 |
+
|
208 |
+
return TunerFnResult(
|
209 |
+
tuner=tuner,
|
210 |
+
fit_kwargs={
|
211 |
+
"x": train_dataset,
|
212 |
+
"validation_data": eval_dataset,
|
213 |
+
"steps_per_epoch": steps_per_epoch,
|
214 |
+
"validation_steps": fn_args.eval_steps,
|
215 |
+
},
|
216 |
+
)
|
217 |
+
|
218 |
+
|
219 |
+
def run_fn(fn_args: FnArgs):
|
220 |
+
steps_per_epoch = int(_TRAIN_DATA_SIZE / _TRAIN_BATCH_SIZE)
|
221 |
+
total_epochs = int(fn_args.train_steps / steps_per_epoch)
|
222 |
+
if _CLASSIFIER_EPOCHS > total_epochs:
|
223 |
+
raise ValueError("Classifier epochs is greater than the total epochs")
|
224 |
+
|
225 |
+
tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)
|
226 |
+
|
227 |
+
train_dataset = _input_fn(
|
228 |
+
fn_args.train_files,
|
229 |
+
fn_args.data_accessor,
|
230 |
+
tf_transform_output,
|
231 |
+
is_train=True,
|
232 |
+
batch_size=_TRAIN_BATCH_SIZE,
|
233 |
+
)
|
234 |
+
|
235 |
+
eval_dataset = _input_fn(
|
236 |
+
fn_args.eval_files,
|
237 |
+
fn_args.data_accessor,
|
238 |
+
tf_transform_output,
|
239 |
+
is_train=False,
|
240 |
+
batch_size=_EVAL_BATCH_SIZE,
|
241 |
+
)
|
242 |
+
|
243 |
+
INFO("Tensorboard logging to {}".format(fn_args.model_run_dir))
|
244 |
+
tensorboard_callback = tf.keras.callbacks.TensorBoard(
|
245 |
+
log_dir=fn_args.model_run_dir, update_freq="batch"
|
246 |
+
)
|
247 |
+
|
248 |
+
if fn_args.hyperparameters:
|
249 |
+
hparams = keras_tuner.HyperParameters.from_config(fn_args.hyperparameters)
|
250 |
+
else:
|
251 |
+
hparams = _get_hyperparameters()
|
252 |
+
INFO(f"HyperParameters for training: ${hparams.get_config()}")
|
253 |
+
|
254 |
+
model = _build_keras_model(hparams)
|
255 |
+
model.fit(
|
256 |
+
train_dataset,
|
257 |
+
epochs=_CLASSIFIER_EPOCHS,
|
258 |
+
steps_per_epoch=steps_per_epoch,
|
259 |
+
validation_data=eval_dataset,
|
260 |
+
validation_steps=fn_args.eval_steps,
|
261 |
+
callbacks=[tensorboard_callback],
|
262 |
+
)
|
263 |
+
|
264 |
+
model.save(
|
265 |
+
fn_args.serving_model_dir, save_format="tf", signatures=_get_signature(model)
|
266 |
+
)
|
models/model_test.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
|
3 |
+
from training_pipeline.models import model
|
4 |
+
|
5 |
+
|
6 |
+
class ModelTest(tf.test.TestCase):
|
7 |
+
def testBuildKerasModel(self):
|
8 |
+
pass
|
9 |
+
# built_model = model._build_keras_model(
|
10 |
+
# ["foo", "bar"]
|
11 |
+
# ) # pylint: disable=protected-access
|
12 |
+
# self.assertEqual(len(built_model.inputs), 2)
|
13 |
+
|
14 |
+
|
15 |
+
if __name__ == "__main__":
|
16 |
+
tf.test.main()
|
models/preprocessing.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
from tensorflow.keras.applications import resnet50
|
3 |
+
|
4 |
+
_IMAGE_KEY = "image"
|
5 |
+
_LABEL_KEY = "label"
|
6 |
+
|
7 |
+
|
8 |
+
def _transformed_name(key: str) -> str:
|
9 |
+
return key + "_xf"
|
10 |
+
|
11 |
+
|
12 |
+
def preprocessing_fn(inputs):
|
13 |
+
"""tf.transform's callback function for preprocessing inputs.
|
14 |
+
Args:
|
15 |
+
inputs: map from feature keys to raw not-yet-transformed features.
|
16 |
+
Returns:
|
17 |
+
Map from string feature key to transformed feature operations.
|
18 |
+
"""
|
19 |
+
outputs = {}
|
20 |
+
|
21 |
+
image_features = tf.map_fn(
|
22 |
+
lambda x: tf.io.decode_png(x[0], channels=3),
|
23 |
+
inputs[_IMAGE_KEY],
|
24 |
+
fn_output_signature=(tf.uint8),
|
25 |
+
)
|
26 |
+
image_features = tf.image.resize(image_features, [224, 224])
|
27 |
+
image_features = resnet50.preprocess_input(image_features)
|
28 |
+
|
29 |
+
outputs[_transformed_name(_IMAGE_KEY)] = image_features
|
30 |
+
outputs[_transformed_name(_LABEL_KEY)] = inputs[_LABEL_KEY]
|
31 |
+
|
32 |
+
return outputs
|
models/preprocessing_test.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
|
3 |
+
from training_pipeline.models import preprocessing
|
4 |
+
|
5 |
+
|
6 |
+
class PreprocessingTest(tf.test.TestCase):
|
7 |
+
def testPreprocessingFn(self):
|
8 |
+
self.assertTrue(callable(preprocessing.preprocessing_fn))
|
9 |
+
|
10 |
+
|
11 |
+
if __name__ == "__main__":
|
12 |
+
tf.test.main()
|
pipeline.json
ADDED
@@ -0,0 +1,745 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"displayName": "img-classification",
|
3 |
+
"labels": {
|
4 |
+
"tfx_py_version": "3-8",
|
5 |
+
"tfx_runner": "kubeflow_v2",
|
6 |
+
"tfx_version": "1-9-0",
|
7 |
+
},
|
8 |
+
"pipelineSpec": {
|
9 |
+
"components": {
|
10 |
+
"Evaluator": {
|
11 |
+
"executorLabel": "Evaluator_executor",
|
12 |
+
"inputDefinitions": {
|
13 |
+
"artifacts": {
|
14 |
+
"baseline_model": {
|
15 |
+
"artifactType": {
|
16 |
+
"instanceSchema": "title: tfx.Model\ntype: object\n"
|
17 |
+
}
|
18 |
+
},
|
19 |
+
"examples": {
|
20 |
+
"artifactType": {
|
21 |
+
"instanceSchema": "title: tfx.Examples\ntype: object\nproperties:\n span:\n type: integer\n description: Span for an artifact.\n version:\n type: integer\n description: Version for an artifact.\n split_names:\n type: string\n description: JSON-encoded list of splits for an artifact. Empty string means artifact has no split.\n"
|
22 |
+
}
|
23 |
+
},
|
24 |
+
"model": {
|
25 |
+
"artifactType": {
|
26 |
+
"instanceSchema": "title: tfx.Model\ntype: object\n"
|
27 |
+
}
|
28 |
+
},
|
29 |
+
},
|
30 |
+
"parameters": {
|
31 |
+
"eval_config": {"type": "STRING"},
|
32 |
+
"example_splits": {"type": "STRING"},
|
33 |
+
"fairness_indicator_thresholds": {"type": "STRING"},
|
34 |
+
},
|
35 |
+
},
|
36 |
+
"outputDefinitions": {
|
37 |
+
"artifacts": {
|
38 |
+
"blessing": {
|
39 |
+
"artifactType": {
|
40 |
+
"instanceSchema": "title: tfx.ModelBlessing\ntype: object\n"
|
41 |
+
}
|
42 |
+
},
|
43 |
+
"evaluation": {
|
44 |
+
"artifactType": {
|
45 |
+
"instanceSchema": "title: tfx.ModelEvaluation\ntype: object\n"
|
46 |
+
}
|
47 |
+
},
|
48 |
+
}
|
49 |
+
},
|
50 |
+
},
|
51 |
+
"ImportExampleGen": {
|
52 |
+
"executorLabel": "ImportExampleGen_executor",
|
53 |
+
"inputDefinitions": {
|
54 |
+
"parameters": {
|
55 |
+
"input_base": {"type": "STRING"},
|
56 |
+
"input_config": {"type": "STRING"},
|
57 |
+
"output_config": {"type": "STRING"},
|
58 |
+
"output_data_format": {"type": "INT"},
|
59 |
+
"output_file_format": {"type": "INT"},
|
60 |
+
}
|
61 |
+
},
|
62 |
+
"outputDefinitions": {
|
63 |
+
"artifacts": {
|
64 |
+
"examples": {
|
65 |
+
"artifactType": {
|
66 |
+
"instanceSchema": "title: tfx.Examples\ntype: object\nproperties:\n span:\n type: integer\n description: Span for an artifact.\n version:\n type: integer\n description: Version for an artifact.\n split_names:\n type: string\n description: JSON-encoded list of splits for an artifact. Empty string means artifact has no split.\n"
|
67 |
+
}
|
68 |
+
}
|
69 |
+
}
|
70 |
+
},
|
71 |
+
},
|
72 |
+
"Pusher": {
|
73 |
+
"executorLabel": "Pusher_executor",
|
74 |
+
"inputDefinitions": {
|
75 |
+
"artifacts": {
|
76 |
+
"model": {
|
77 |
+
"artifactType": {
|
78 |
+
"instanceSchema": "title: tfx.Model\ntype: object\n"
|
79 |
+
}
|
80 |
+
},
|
81 |
+
"model_blessing": {
|
82 |
+
"artifactType": {
|
83 |
+
"instanceSchema": "title: tfx.ModelBlessing\ntype: object\n"
|
84 |
+
}
|
85 |
+
},
|
86 |
+
},
|
87 |
+
"parameters": {
|
88 |
+
"custom_config": {"type": "STRING"},
|
89 |
+
"push_destination": {"type": "STRING"},
|
90 |
+
},
|
91 |
+
},
|
92 |
+
"outputDefinitions": {
|
93 |
+
"artifacts": {
|
94 |
+
"pushed_model": {
|
95 |
+
"artifactType": {
|
96 |
+
"instanceSchema": "title: tfx.PushedModel\ntype: object\n"
|
97 |
+
}
|
98 |
+
}
|
99 |
+
}
|
100 |
+
},
|
101 |
+
},
|
102 |
+
"SchemaGen": {
|
103 |
+
"executorLabel": "SchemaGen_executor",
|
104 |
+
"inputDefinitions": {
|
105 |
+
"artifacts": {
|
106 |
+
"statistics": {
|
107 |
+
"artifactType": {
|
108 |
+
"instanceSchema": "title: tfx.ExampleStatistics\ntype: object\nproperties:\n span:\n type: integer\n description: Span for an artifact.\n split_names:\n type: string\n description: JSON-encoded list of splits for an artifact. Empty string means artifact has no split.\n"
|
109 |
+
}
|
110 |
+
}
|
111 |
+
},
|
112 |
+
"parameters": {
|
113 |
+
"exclude_splits": {"type": "STRING"},
|
114 |
+
"infer_feature_shape": {"type": "INT"},
|
115 |
+
},
|
116 |
+
},
|
117 |
+
"outputDefinitions": {
|
118 |
+
"artifacts": {
|
119 |
+
"schema": {
|
120 |
+
"artifactType": {
|
121 |
+
"instanceSchema": "title: tfx.Schema\ntype: object\n"
|
122 |
+
}
|
123 |
+
}
|
124 |
+
}
|
125 |
+
},
|
126 |
+
},
|
127 |
+
"StatisticsGen": {
|
128 |
+
"executorLabel": "StatisticsGen_executor",
|
129 |
+
"inputDefinitions": {
|
130 |
+
"artifacts": {
|
131 |
+
"examples": {
|
132 |
+
"artifactType": {
|
133 |
+
"instanceSchema": "title: tfx.Examples\ntype: object\nproperties:\n span:\n type: integer\n description: Span for an artifact.\n version:\n type: integer\n description: Version for an artifact.\n split_names:\n type: string\n description: JSON-encoded list of splits for an artifact. Empty string means artifact has no split.\n"
|
134 |
+
}
|
135 |
+
}
|
136 |
+
},
|
137 |
+
"parameters": {"exclude_splits": {"type": "STRING"}},
|
138 |
+
},
|
139 |
+
"outputDefinitions": {
|
140 |
+
"artifacts": {
|
141 |
+
"statistics": {
|
142 |
+
"artifactType": {
|
143 |
+
"instanceSchema": "title: tfx.ExampleStatistics\ntype: object\nproperties:\n span:\n type: integer\n description: Span for an artifact.\n split_names:\n type: string\n description: JSON-encoded list of splits for an artifact. Empty string means artifact has no split.\n"
|
144 |
+
}
|
145 |
+
}
|
146 |
+
}
|
147 |
+
},
|
148 |
+
},
|
149 |
+
"Trainer": {
|
150 |
+
"executorLabel": "Trainer_executor",
|
151 |
+
"inputDefinitions": {
|
152 |
+
"artifacts": {
|
153 |
+
"examples": {
|
154 |
+
"artifactType": {
|
155 |
+
"instanceSchema": "title: tfx.Examples\ntype: object\nproperties:\n span:\n type: integer\n description: Span for an artifact.\n version:\n type: integer\n description: Version for an artifact.\n split_names:\n type: string\n description: JSON-encoded list of splits for an artifact. Empty string means artifact has no split.\n"
|
156 |
+
}
|
157 |
+
},
|
158 |
+
"schema": {
|
159 |
+
"artifactType": {
|
160 |
+
"instanceSchema": "title: tfx.Schema\ntype: object\n"
|
161 |
+
}
|
162 |
+
},
|
163 |
+
"transform_graph": {
|
164 |
+
"artifactType": {
|
165 |
+
"instanceSchema": "title: tfx.TransformGraph\ntype: object\n"
|
166 |
+
}
|
167 |
+
},
|
168 |
+
},
|
169 |
+
"parameters": {
|
170 |
+
"custom_config": {"type": "STRING"},
|
171 |
+
"eval_args": {"type": "STRING"},
|
172 |
+
"run_fn": {"type": "STRING"},
|
173 |
+
"train_args": {"type": "STRING"},
|
174 |
+
},
|
175 |
+
},
|
176 |
+
"outputDefinitions": {
|
177 |
+
"artifacts": {
|
178 |
+
"model": {
|
179 |
+
"artifactType": {
|
180 |
+
"instanceSchema": "title: tfx.Model\ntype: object\n"
|
181 |
+
}
|
182 |
+
},
|
183 |
+
"model_run": {
|
184 |
+
"artifactType": {
|
185 |
+
"instanceSchema": "title: tfx.ModelRun\ntype: object\n"
|
186 |
+
}
|
187 |
+
},
|
188 |
+
}
|
189 |
+
},
|
190 |
+
},
|
191 |
+
"Transform": {
|
192 |
+
"executorLabel": "Transform_executor",
|
193 |
+
"inputDefinitions": {
|
194 |
+
"artifacts": {
|
195 |
+
"examples": {
|
196 |
+
"artifactType": {
|
197 |
+
"instanceSchema": "title: tfx.Examples\ntype: object\nproperties:\n span:\n type: integer\n description: Span for an artifact.\n version:\n type: integer\n description: Version for an artifact.\n split_names:\n type: string\n description: JSON-encoded list of splits for an artifact. Empty string means artifact has no split.\n"
|
198 |
+
}
|
199 |
+
},
|
200 |
+
"schema": {
|
201 |
+
"artifactType": {
|
202 |
+
"instanceSchema": "title: tfx.Schema\ntype: object\n"
|
203 |
+
}
|
204 |
+
},
|
205 |
+
},
|
206 |
+
"parameters": {
|
207 |
+
"custom_config": {"type": "STRING"},
|
208 |
+
"disable_statistics": {"type": "INT"},
|
209 |
+
"force_tf_compat_v1": {"type": "INT"},
|
210 |
+
"preprocessing_fn": {"type": "STRING"},
|
211 |
+
},
|
212 |
+
},
|
213 |
+
"outputDefinitions": {
|
214 |
+
"artifacts": {
|
215 |
+
"post_transform_anomalies": {
|
216 |
+
"artifactType": {
|
217 |
+
"instanceSchema": "title: tfx.ExampleAnomalies\ntype: object\nproperties:\n span:\n type: integer\n description: Span for an artifact.\n split_names:\n type: string\n description: JSON-encoded list of splits for an artifact. Empty string means artifact has no split.\n"
|
218 |
+
}
|
219 |
+
},
|
220 |
+
"post_transform_schema": {
|
221 |
+
"artifactType": {
|
222 |
+
"instanceSchema": "title: tfx.Schema\ntype: object\n"
|
223 |
+
}
|
224 |
+
},
|
225 |
+
"post_transform_stats": {
|
226 |
+
"artifactType": {
|
227 |
+
"instanceSchema": "title: tfx.ExampleStatistics\ntype: object\nproperties:\n span:\n type: integer\n description: Span for an artifact.\n split_names:\n type: string\n description: JSON-encoded list of splits for an artifact. Empty string means artifact has no split.\n"
|
228 |
+
}
|
229 |
+
},
|
230 |
+
"pre_transform_schema": {
|
231 |
+
"artifactType": {
|
232 |
+
"instanceSchema": "title: tfx.Schema\ntype: object\n"
|
233 |
+
}
|
234 |
+
},
|
235 |
+
"pre_transform_stats": {
|
236 |
+
"artifactType": {
|
237 |
+
"instanceSchema": "title: tfx.ExampleStatistics\ntype: object\nproperties:\n span:\n type: integer\n description: Span for an artifact.\n split_names:\n type: string\n description: JSON-encoded list of splits for an artifact. Empty string means artifact has no split.\n"
|
238 |
+
}
|
239 |
+
},
|
240 |
+
"transform_graph": {
|
241 |
+
"artifactType": {
|
242 |
+
"instanceSchema": "title: tfx.TransformGraph\ntype: object\n"
|
243 |
+
}
|
244 |
+
},
|
245 |
+
"transformed_examples": {
|
246 |
+
"artifactType": {
|
247 |
+
"instanceSchema": "title: tfx.Examples\ntype: object\nproperties:\n span:\n type: integer\n description: Span for an artifact.\n version:\n type: integer\n description: Version for an artifact.\n split_names:\n type: string\n description: JSON-encoded list of splits for an artifact. Empty string means artifact has no split.\n"
|
248 |
+
}
|
249 |
+
},
|
250 |
+
"updated_analyzer_cache": {
|
251 |
+
"artifactType": {
|
252 |
+
"instanceSchema": "title: tfx.TransformCache\ntype: object\n"
|
253 |
+
}
|
254 |
+
},
|
255 |
+
}
|
256 |
+
},
|
257 |
+
},
|
258 |
+
"latest_blessed_model_resolver-model-blessing-resolver": {
|
259 |
+
"executorLabel": "latest_blessed_model_resolver-model-blessing-resolver_executor",
|
260 |
+
"outputDefinitions": {
|
261 |
+
"artifacts": {
|
262 |
+
"model_blessing": {
|
263 |
+
"artifactType": {
|
264 |
+
"instanceSchema": "title: tfx.ModelBlessing\ntype: object\n"
|
265 |
+
}
|
266 |
+
}
|
267 |
+
}
|
268 |
+
},
|
269 |
+
},
|
270 |
+
"latest_blessed_model_resolver-model-resolver": {
|
271 |
+
"executorLabel": "latest_blessed_model_resolver-model-resolver_executor",
|
272 |
+
"inputDefinitions": {
|
273 |
+
"artifacts": {
|
274 |
+
"input": {
|
275 |
+
"artifactType": {
|
276 |
+
"instanceSchema": "title: tfx.ModelBlessing\ntype: object\n"
|
277 |
+
}
|
278 |
+
}
|
279 |
+
}
|
280 |
+
},
|
281 |
+
"outputDefinitions": {
|
282 |
+
"artifacts": {
|
283 |
+
"model": {
|
284 |
+
"artifactType": {
|
285 |
+
"instanceSchema": "title: tfx.Model\ntype: object\n"
|
286 |
+
}
|
287 |
+
}
|
288 |
+
}
|
289 |
+
},
|
290 |
+
},
|
291 |
+
},
|
292 |
+
"deploymentSpec": {
|
293 |
+
"executors": {
|
294 |
+
"Evaluator_executor": {
|
295 |
+
"container": {
|
296 |
+
"args": [
|
297 |
+
"--executor_class_path",
|
298 |
+
"tfx.components.evaluator.executor.Executor",
|
299 |
+
"--json_serialized_invocation_args",
|
300 |
+
"{{$}}",
|
301 |
+
],
|
302 |
+
"command": [
|
303 |
+
"python",
|
304 |
+
"-m",
|
305 |
+
"tfx.orchestration.kubeflow.v2.container.kubeflow_v2_run_executor",
|
306 |
+
],
|
307 |
+
"image": "gcr.io/gcp-ml-172005/img-classification",
|
308 |
+
}
|
309 |
+
},
|
310 |
+
"ImportExampleGen_executor": {
|
311 |
+
"container": {
|
312 |
+
"args": [
|
313 |
+
"--executor_class_path",
|
314 |
+
"tfx.components.example_gen.import_example_gen.executor.Executor",
|
315 |
+
"--json_serialized_invocation_args",
|
316 |
+
"{{$}}",
|
317 |
+
],
|
318 |
+
"command": [
|
319 |
+
"python",
|
320 |
+
"-m",
|
321 |
+
"tfx.orchestration.kubeflow.v2.container.kubeflow_v2_run_executor",
|
322 |
+
],
|
323 |
+
"image": "gcr.io/gcp-ml-172005/img-classification",
|
324 |
+
"lifecycle": {
|
325 |
+
"preCacheCheck": {
|
326 |
+
"args": ["--json_serialized_invocation_args", "{{$}}"],
|
327 |
+
"command": [
|
328 |
+
"python",
|
329 |
+
"-m",
|
330 |
+
"tfx.orchestration.kubeflow.v2.file_based_example_gen.driver",
|
331 |
+
],
|
332 |
+
}
|
333 |
+
},
|
334 |
+
}
|
335 |
+
},
|
336 |
+
"Pusher_executor": {
|
337 |
+
"container": {
|
338 |
+
"args": [
|
339 |
+
"--executor_class_path",
|
340 |
+
"tfx.components.pusher.executor.Executor",
|
341 |
+
"--json_serialized_invocation_args",
|
342 |
+
"{{$}}",
|
343 |
+
],
|
344 |
+
"command": [
|
345 |
+
"python",
|
346 |
+
"-m",
|
347 |
+
"tfx.orchestration.kubeflow.v2.container.kubeflow_v2_run_executor",
|
348 |
+
],
|
349 |
+
"image": "gcr.io/gcp-ml-172005/img-classification",
|
350 |
+
}
|
351 |
+
},
|
352 |
+
"SchemaGen_executor": {
|
353 |
+
"container": {
|
354 |
+
"args": [
|
355 |
+
"--executor_class_path",
|
356 |
+
"tfx.components.schema_gen.executor.Executor",
|
357 |
+
"--json_serialized_invocation_args",
|
358 |
+
"{{$}}",
|
359 |
+
],
|
360 |
+
"command": [
|
361 |
+
"python",
|
362 |
+
"-m",
|
363 |
+
"tfx.orchestration.kubeflow.v2.container.kubeflow_v2_run_executor",
|
364 |
+
],
|
365 |
+
"image": "gcr.io/gcp-ml-172005/img-classification",
|
366 |
+
}
|
367 |
+
},
|
368 |
+
"StatisticsGen_executor": {
|
369 |
+
"container": {
|
370 |
+
"args": [
|
371 |
+
"--executor_class_path",
|
372 |
+
"tfx.components.statistics_gen.executor.Executor",
|
373 |
+
"--json_serialized_invocation_args",
|
374 |
+
"{{$}}",
|
375 |
+
],
|
376 |
+
"command": [
|
377 |
+
"python",
|
378 |
+
"-m",
|
379 |
+
"tfx.orchestration.kubeflow.v2.container.kubeflow_v2_run_executor",
|
380 |
+
],
|
381 |
+
"image": "gcr.io/gcp-ml-172005/img-classification",
|
382 |
+
}
|
383 |
+
},
|
384 |
+
"Trainer_executor": {
|
385 |
+
"container": {
|
386 |
+
"args": [
|
387 |
+
"--executor_class_path",
|
388 |
+
"tfx.extensions.google_cloud_ai_platform.trainer.executor.GenericExecutor",
|
389 |
+
"--json_serialized_invocation_args",
|
390 |
+
"{{$}}",
|
391 |
+
],
|
392 |
+
"command": [
|
393 |
+
"python",
|
394 |
+
"-m",
|
395 |
+
"tfx.orchestration.kubeflow.v2.container.kubeflow_v2_run_executor",
|
396 |
+
],
|
397 |
+
"image": "gcr.io/gcp-ml-172005/img-classification",
|
398 |
+
}
|
399 |
+
},
|
400 |
+
"Transform_executor": {
|
401 |
+
"container": {
|
402 |
+
"args": [
|
403 |
+
"--executor_class_path",
|
404 |
+
"tfx.components.transform.executor.Executor",
|
405 |
+
"--json_serialized_invocation_args",
|
406 |
+
"{{$}}",
|
407 |
+
],
|
408 |
+
"command": [
|
409 |
+
"python",
|
410 |
+
"-m",
|
411 |
+
"tfx.orchestration.kubeflow.v2.container.kubeflow_v2_run_executor",
|
412 |
+
],
|
413 |
+
"image": "gcr.io/gcp-ml-172005/img-classification",
|
414 |
+
}
|
415 |
+
},
|
416 |
+
"latest_blessed_model_resolver-model-blessing-resolver_executor": {
|
417 |
+
"resolver": {
|
418 |
+
"outputArtifactQueries": {
|
419 |
+
"model_blessing": {
|
420 |
+
"filter": 'schema_title="tfx.ModelBlessing" AND state=LIVE AND metadata.blessed.number_value=1'
|
421 |
+
}
|
422 |
+
}
|
423 |
+
}
|
424 |
+
},
|
425 |
+
"latest_blessed_model_resolver-model-resolver_executor": {
|
426 |
+
"resolver": {
|
427 |
+
"outputArtifactQueries": {
|
428 |
+
"model": {
|
429 |
+
"filter": "schema_title=\"tfx.Model\" AND state=LIVE AND name=\"{{$.inputs.artifacts['input'].metadata['current_model_id']}}\""
|
430 |
+
}
|
431 |
+
}
|
432 |
+
}
|
433 |
+
},
|
434 |
+
}
|
435 |
+
},
|
436 |
+
"pipelineInfo": {"name": "img-classification"},
|
437 |
+
"root": {
|
438 |
+
"dag": {
|
439 |
+
"tasks": {
|
440 |
+
"Evaluator": {
|
441 |
+
"cachingOptions": {"enableCache": true},
|
442 |
+
"componentRef": {"name": "Evaluator"},
|
443 |
+
"dependentTasks": [
|
444 |
+
"Trainer",
|
445 |
+
"Transform",
|
446 |
+
"latest_blessed_model_resolver-model-resolver",
|
447 |
+
],
|
448 |
+
"inputs": {
|
449 |
+
"artifacts": {
|
450 |
+
"baseline_model": {
|
451 |
+
"taskOutputArtifact": {
|
452 |
+
"outputArtifactKey": "model",
|
453 |
+
"producerTask": "latest_blessed_model_resolver-model-resolver",
|
454 |
+
}
|
455 |
+
},
|
456 |
+
"examples": {
|
457 |
+
"taskOutputArtifact": {
|
458 |
+
"outputArtifactKey": "transformed_examples",
|
459 |
+
"producerTask": "Transform",
|
460 |
+
}
|
461 |
+
},
|
462 |
+
"model": {
|
463 |
+
"taskOutputArtifact": {
|
464 |
+
"outputArtifactKey": "model",
|
465 |
+
"producerTask": "Trainer",
|
466 |
+
}
|
467 |
+
},
|
468 |
+
},
|
469 |
+
"parameters": {
|
470 |
+
"eval_config": {
|
471 |
+
"runtimeValue": {
|
472 |
+
"constantValue": {
|
473 |
+
"stringValue": '{\n "metrics_specs": [\n {\n "metrics": [\n {\n "class_name": "SparseCategoricalAccuracy",\n "threshold": {\n "change_threshold": {\n "absolute": -0.001,\n "direction": "HIGHER_IS_BETTER"\n },\n "value_threshold": {\n "lower_bound": 0.55\n }\n }\n }\n ]\n }\n ],\n "model_specs": [\n {\n "label_key": "label_xf"\n }\n ],\n "slicing_specs": [\n {}\n ]\n}'
|
474 |
+
}
|
475 |
+
}
|
476 |
+
},
|
477 |
+
"example_splits": {
|
478 |
+
"runtimeValue": {
|
479 |
+
"constantValue": {"stringValue": "null"}
|
480 |
+
}
|
481 |
+
},
|
482 |
+
"fairness_indicator_thresholds": {
|
483 |
+
"runtimeValue": {
|
484 |
+
"constantValue": {"stringValue": "null"}
|
485 |
+
}
|
486 |
+
},
|
487 |
+
},
|
488 |
+
},
|
489 |
+
"taskInfo": {"name": "Evaluator"},
|
490 |
+
},
|
491 |
+
"ImportExampleGen": {
|
492 |
+
"cachingOptions": {"enableCache": true},
|
493 |
+
"componentRef": {"name": "ImportExampleGen"},
|
494 |
+
"inputs": {
|
495 |
+
"parameters": {
|
496 |
+
"input_base": {
|
497 |
+
"runtimeValue": {
|
498 |
+
"constantValue": {
|
499 |
+
"stringValue": "gs://gcp-ml-172005-complete-mlops/data/"
|
500 |
+
}
|
501 |
+
}
|
502 |
+
},
|
503 |
+
"input_config": {
|
504 |
+
"runtimeValue": {
|
505 |
+
"constantValue": {
|
506 |
+
"stringValue": '{\n "splits": [\n {\n "name": "train",\n "pattern": "train/*"\n },\n {\n "name": "eval",\n "pattern": "test/*"\n }\n ]\n}'
|
507 |
+
}
|
508 |
+
}
|
509 |
+
},
|
510 |
+
"output_config": {
|
511 |
+
"runtimeValue": {
|
512 |
+
"constantValue": {"stringValue": "{}"}
|
513 |
+
}
|
514 |
+
},
|
515 |
+
"output_data_format": {
|
516 |
+
"runtimeValue": {"constantValue": {"intValue": "6"}}
|
517 |
+
},
|
518 |
+
"output_file_format": {
|
519 |
+
"runtimeValue": {"constantValue": {"intValue": "5"}}
|
520 |
+
},
|
521 |
+
}
|
522 |
+
},
|
523 |
+
"taskInfo": {"name": "ImportExampleGen"},
|
524 |
+
},
|
525 |
+
"Pusher": {
|
526 |
+
"cachingOptions": {"enableCache": true},
|
527 |
+
"componentRef": {"name": "Pusher"},
|
528 |
+
"dependentTasks": ["Evaluator", "Trainer"],
|
529 |
+
"inputs": {
|
530 |
+
"artifacts": {
|
531 |
+
"model": {
|
532 |
+
"taskOutputArtifact": {
|
533 |
+
"outputArtifactKey": "model",
|
534 |
+
"producerTask": "Trainer",
|
535 |
+
}
|
536 |
+
},
|
537 |
+
"model_blessing": {
|
538 |
+
"taskOutputArtifact": {
|
539 |
+
"outputArtifactKey": "blessing",
|
540 |
+
"producerTask": "Evaluator",
|
541 |
+
}
|
542 |
+
},
|
543 |
+
},
|
544 |
+
"parameters": {
|
545 |
+
"custom_config": {
|
546 |
+
"runtimeValue": {
|
547 |
+
"constantValue": {"stringValue": "null"}
|
548 |
+
}
|
549 |
+
},
|
550 |
+
"push_destination": {
|
551 |
+
"runtimeValue": {
|
552 |
+
"constantValue": {
|
553 |
+
"stringValue": '{\n "filesystem": {\n "base_directory": "gs://gcp-ml-172005-complete-mlops/tfx_pipeline_output/img-classification/serving_model"\n }\n}'
|
554 |
+
}
|
555 |
+
}
|
556 |
+
},
|
557 |
+
},
|
558 |
+
},
|
559 |
+
"taskInfo": {"name": "Pusher"},
|
560 |
+
},
|
561 |
+
"SchemaGen": {
|
562 |
+
"cachingOptions": {"enableCache": true},
|
563 |
+
"componentRef": {"name": "SchemaGen"},
|
564 |
+
"dependentTasks": ["StatisticsGen"],
|
565 |
+
"inputs": {
|
566 |
+
"artifacts": {
|
567 |
+
"statistics": {
|
568 |
+
"taskOutputArtifact": {
|
569 |
+
"outputArtifactKey": "statistics",
|
570 |
+
"producerTask": "StatisticsGen",
|
571 |
+
}
|
572 |
+
}
|
573 |
+
},
|
574 |
+
"parameters": {
|
575 |
+
"exclude_splits": {
|
576 |
+
"runtimeValue": {
|
577 |
+
"constantValue": {"stringValue": "[]"}
|
578 |
+
}
|
579 |
+
},
|
580 |
+
"infer_feature_shape": {
|
581 |
+
"runtimeValue": {"constantValue": {"intValue": "1"}}
|
582 |
+
},
|
583 |
+
},
|
584 |
+
},
|
585 |
+
"taskInfo": {"name": "SchemaGen"},
|
586 |
+
},
|
587 |
+
"StatisticsGen": {
|
588 |
+
"cachingOptions": {"enableCache": true},
|
589 |
+
"componentRef": {"name": "StatisticsGen"},
|
590 |
+
"dependentTasks": ["ImportExampleGen"],
|
591 |
+
"inputs": {
|
592 |
+
"artifacts": {
|
593 |
+
"examples": {
|
594 |
+
"taskOutputArtifact": {
|
595 |
+
"outputArtifactKey": "examples",
|
596 |
+
"producerTask": "ImportExampleGen",
|
597 |
+
}
|
598 |
+
}
|
599 |
+
},
|
600 |
+
"parameters": {
|
601 |
+
"exclude_splits": {
|
602 |
+
"runtimeValue": {
|
603 |
+
"constantValue": {"stringValue": "[]"}
|
604 |
+
}
|
605 |
+
}
|
606 |
+
},
|
607 |
+
},
|
608 |
+
"taskInfo": {"name": "StatisticsGen"},
|
609 |
+
},
|
610 |
+
"Trainer": {
|
611 |
+
"cachingOptions": {"enableCache": true},
|
612 |
+
"componentRef": {"name": "Trainer"},
|
613 |
+
"dependentTasks": ["SchemaGen", "Transform"],
|
614 |
+
"inputs": {
|
615 |
+
"artifacts": {
|
616 |
+
"examples": {
|
617 |
+
"taskOutputArtifact": {
|
618 |
+
"outputArtifactKey": "transformed_examples",
|
619 |
+
"producerTask": "Transform",
|
620 |
+
}
|
621 |
+
},
|
622 |
+
"schema": {
|
623 |
+
"taskOutputArtifact": {
|
624 |
+
"outputArtifactKey": "schema",
|
625 |
+
"producerTask": "SchemaGen",
|
626 |
+
}
|
627 |
+
},
|
628 |
+
"transform_graph": {
|
629 |
+
"taskOutputArtifact": {
|
630 |
+
"outputArtifactKey": "transform_graph",
|
631 |
+
"producerTask": "Transform",
|
632 |
+
}
|
633 |
+
},
|
634 |
+
},
|
635 |
+
"parameters": {
|
636 |
+
"custom_config": {
|
637 |
+
"runtimeValue": {
|
638 |
+
"constantValue": {
|
639 |
+
"stringValue": '{"ai_platform_enable_vertex": true, "ai_platform_training_args": {"project": "gcp-ml-172005", "worker_pool_specs": [{"container_spec": {"image_uri": "gcr.io/tfx-oss-public/tfx:1.9.0"}, "machine_spec": {"accelerator_count": 1, "accelerator_type": "NVIDIA_TESLA_K80", "machine_type": "n1-standard-4"}, "replica_count": 1}]}, "ai_platform_vertex_region": "us-central1-a", "use_gpu": true}'
|
640 |
+
}
|
641 |
+
}
|
642 |
+
},
|
643 |
+
"eval_args": {
|
644 |
+
"runtimeValue": {
|
645 |
+
"constantValue": {
|
646 |
+
"stringValue": '{\n "num_steps": 4\n}'
|
647 |
+
}
|
648 |
+
}
|
649 |
+
},
|
650 |
+
"run_fn": {
|
651 |
+
"runtimeValue": {
|
652 |
+
"constantValue": {
|
653 |
+
"stringValue": "models.model.run_fn"
|
654 |
+
}
|
655 |
+
}
|
656 |
+
},
|
657 |
+
"train_args": {
|
658 |
+
"runtimeValue": {
|
659 |
+
"constantValue": {
|
660 |
+
"stringValue": '{\n "num_steps": 160\n}'
|
661 |
+
}
|
662 |
+
}
|
663 |
+
},
|
664 |
+
},
|
665 |
+
},
|
666 |
+
"taskInfo": {"name": "Trainer"},
|
667 |
+
},
|
668 |
+
"Transform": {
|
669 |
+
"cachingOptions": {"enableCache": true},
|
670 |
+
"componentRef": {"name": "Transform"},
|
671 |
+
"dependentTasks": ["ImportExampleGen", "SchemaGen"],
|
672 |
+
"inputs": {
|
673 |
+
"artifacts": {
|
674 |
+
"examples": {
|
675 |
+
"taskOutputArtifact": {
|
676 |
+
"outputArtifactKey": "examples",
|
677 |
+
"producerTask": "ImportExampleGen",
|
678 |
+
}
|
679 |
+
},
|
680 |
+
"schema": {
|
681 |
+
"taskOutputArtifact": {
|
682 |
+
"outputArtifactKey": "schema",
|
683 |
+
"producerTask": "SchemaGen",
|
684 |
+
}
|
685 |
+
},
|
686 |
+
},
|
687 |
+
"parameters": {
|
688 |
+
"custom_config": {
|
689 |
+
"runtimeValue": {
|
690 |
+
"constantValue": {"stringValue": "null"}
|
691 |
+
}
|
692 |
+
},
|
693 |
+
"disable_statistics": {
|
694 |
+
"runtimeValue": {"constantValue": {"intValue": "0"}}
|
695 |
+
},
|
696 |
+
"force_tf_compat_v1": {
|
697 |
+
"runtimeValue": {"constantValue": {"intValue": "0"}}
|
698 |
+
},
|
699 |
+
"preprocessing_fn": {
|
700 |
+
"runtimeValue": {
|
701 |
+
"constantValue": {
|
702 |
+
"stringValue": "models.preprocessing.preprocessing_fn"
|
703 |
+
}
|
704 |
+
}
|
705 |
+
},
|
706 |
+
},
|
707 |
+
},
|
708 |
+
"taskInfo": {"name": "Transform"},
|
709 |
+
},
|
710 |
+
"latest_blessed_model_resolver-model-blessing-resolver": {
|
711 |
+
"componentRef": {
|
712 |
+
"name": "latest_blessed_model_resolver-model-blessing-resolver"
|
713 |
+
},
|
714 |
+
"taskInfo": {
|
715 |
+
"name": "latest_blessed_model_resolver-model-blessing-resolver"
|
716 |
+
},
|
717 |
+
},
|
718 |
+
"latest_blessed_model_resolver-model-resolver": {
|
719 |
+
"componentRef": {
|
720 |
+
"name": "latest_blessed_model_resolver-model-resolver"
|
721 |
+
},
|
722 |
+
"inputs": {
|
723 |
+
"artifacts": {
|
724 |
+
"input": {
|
725 |
+
"taskOutputArtifact": {
|
726 |
+
"outputArtifactKey": "model_blessing",
|
727 |
+
"producerTask": "latest_blessed_model_resolver-model-blessing-resolver",
|
728 |
+
}
|
729 |
+
}
|
730 |
+
}
|
731 |
+
},
|
732 |
+
"taskInfo": {
|
733 |
+
"name": "latest_blessed_model_resolver-model-resolver"
|
734 |
+
},
|
735 |
+
},
|
736 |
+
}
|
737 |
+
}
|
738 |
+
},
|
739 |
+
"schemaVersion": "2.0.0",
|
740 |
+
"sdkVersion": "tfx-1.9.0",
|
741 |
+
},
|
742 |
+
"runtimeConfig": {
|
743 |
+
"gcsOutputDirectory": "gs://gcp-ml-172005-complete-mlops/tfx_pipeline_output/img-classification"
|
744 |
+
},
|
745 |
+
}
|
pipeline/__init__.py
ADDED
File without changes
|
pipeline/components/__init__.py
ADDED
File without changes
|
pipeline/components/pusher/GHReleasePusher/__init__.py
ADDED
File without changes
|
pipeline/components/pusher/GHReleasePusher/component.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict, Optional
|
2 |
+
|
3 |
+
from tfx import types
|
4 |
+
from tfx.components.pusher import component as pusher_component
|
5 |
+
from tfx.dsl.components.base import executor_spec
|
6 |
+
from pipeline.components.pusher.GHReleasePusher import executor
|
7 |
+
|
8 |
+
|
9 |
+
class Pusher(pusher_component.Pusher):
|
10 |
+
"""Component for pushing model to Cloud AI Platform serving."""
|
11 |
+
|
12 |
+
EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(executor.Executor)
|
13 |
+
|
14 |
+
def __init__(
|
15 |
+
self,
|
16 |
+
model: Optional[types.Channel] = None,
|
17 |
+
model_blessing: Optional[types.Channel] = None,
|
18 |
+
infra_blessing: Optional[types.Channel] = None,
|
19 |
+
custom_config: Optional[Dict[str, Any]] = None,
|
20 |
+
):
|
21 |
+
"""Construct a Pusher component.
|
22 |
+
Args:
|
23 |
+
model: An optional Channel of type `standard_artifacts.Model`, usually
|
24 |
+
produced by a Trainer component, representing the model used for
|
25 |
+
training.
|
26 |
+
model_blessing: An optional Channel of type
|
27 |
+
`standard_artifacts.ModelBlessing`, usually produced from an Evaluator
|
28 |
+
component, containing the blessing model.
|
29 |
+
infra_blessing: An optional Channel of type
|
30 |
+
`standard_artifacts.InfraBlessing`, usually produced from an
|
31 |
+
InfraValidator component, containing the validation result.
|
32 |
+
custom_config: A dict which contains the deployment job parameters to be
|
33 |
+
passed to Cloud platforms.
|
34 |
+
"""
|
35 |
+
super().__init__(
|
36 |
+
model=model,
|
37 |
+
model_blessing=model_blessing,
|
38 |
+
infra_blessing=infra_blessing,
|
39 |
+
custom_config=custom_config,
|
40 |
+
)
|
pipeline/components/pusher/GHReleasePusher/constants.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
GH_RELEASE_KEY = "GH_RELEASE"
|
2 |
+
|
3 |
+
ACCESS_TOKEN_KEY = "ACCESS_TOKEN"
|
4 |
+
BRANCH_KEY = "BRANCH"
|
5 |
+
|
6 |
+
USERNAME_KEY = "USERNAME"
|
7 |
+
REPONAME_KEY = "REPONAME"
|
8 |
+
ASSETNAME_KEY = "ASSETNAME"
|
pipeline/components/pusher/GHReleasePusher/executor.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
from typing import Any, Dict, List
|
3 |
+
|
4 |
+
from google.api_core import client_options
|
5 |
+
from googleapiclient import discovery
|
6 |
+
from tfx import types
|
7 |
+
from tfx.components.pusher import executor as tfx_pusher_executor
|
8 |
+
from pipeline.components.pusher.GHReleasePusher import constants
|
9 |
+
from pipeline.components.pusher.GHReleasePusher import runner
|
10 |
+
from tfx.types import artifact_utils
|
11 |
+
from tfx.types import standard_component_specs
|
12 |
+
from tfx.utils import deprecation_utils
|
13 |
+
from tfx.utils import io_utils
|
14 |
+
from tfx.utils import json_utils
|
15 |
+
from tfx.utils import name_utils
|
16 |
+
from tfx.utils import telemetry_utils
|
17 |
+
|
18 |
+
|
19 |
+
from tfx.dsl.io import fileio
|
20 |
+
|
21 |
+
# Keys for custom_config.
|
22 |
+
_CUSTOM_CONFIG_KEY = "custom_config"
|
23 |
+
|
24 |
+
|
25 |
+
class Executor(tfx_pusher_executor.Executor):
|
26 |
+
"""Deploy a model to Google Cloud AI Platform serving."""
|
27 |
+
|
28 |
+
def Do(
|
29 |
+
self,
|
30 |
+
input_dict: Dict[str, List[types.Artifact]],
|
31 |
+
output_dict: Dict[str, List[types.Artifact]],
|
32 |
+
exec_properties: Dict[str, Any],
|
33 |
+
):
|
34 |
+
"""Overrides the tfx_pusher_executor.
|
35 |
+
Args:
|
36 |
+
input_dict: Input dict from input key to a list of artifacts, including:
|
37 |
+
- model_export: exported model from trainer.
|
38 |
+
- model_blessing: model blessing path from evaluator.
|
39 |
+
output_dict: Output dict from key to a list of artifacts, including:
|
40 |
+
- model_push: A list of 'ModelPushPath' artifact of size one. It will
|
41 |
+
include the model in this push execution if the model was pushed.
|
42 |
+
exec_properties: Mostly a passthrough input dict for
|
43 |
+
tfx.components.Pusher.executor. The following keys in `custom_config`
|
44 |
+
are consumed by this class:
|
45 |
+
CONFIG = {
|
46 |
+
"USERNAME": "deep-diver",
|
47 |
+
"REPONAME": "PyGithubTest",
|
48 |
+
"ASSETNAME": "saved_model.tar.gz",
|
49 |
+
}
|
50 |
+
Raises:
|
51 |
+
ValueError:
|
52 |
+
If one of USERNAME, REPONAME, ASSETNAME, TAG is not in exec_properties.custom_config.
|
53 |
+
If Serving model path does not start with gs://.
|
54 |
+
RuntimeError: if the GitHub Release job failed.
|
55 |
+
"""
|
56 |
+
self._log_startup(input_dict, output_dict, exec_properties)
|
57 |
+
|
58 |
+
custom_config = json_utils.loads(
|
59 |
+
exec_properties.get(_CUSTOM_CONFIG_KEY, "null")
|
60 |
+
)
|
61 |
+
|
62 |
+
if custom_config is not None and not isinstance(custom_config, Dict):
|
63 |
+
raise ValueError(
|
64 |
+
"custom_config in execution properties needs to be a dict."
|
65 |
+
)
|
66 |
+
|
67 |
+
gh_release_args = custom_config.get(constants.GH_RELEASE_KEY)
|
68 |
+
if not gh_release_args:
|
69 |
+
raise ValueError("'GH_RELEASE' is missing in 'custom_config'")
|
70 |
+
model_push = artifact_utils.get_single_instance(
|
71 |
+
output_dict[standard_component_specs.PUSHED_MODEL_KEY]
|
72 |
+
)
|
73 |
+
if not self.CheckBlessing(input_dict):
|
74 |
+
self._MarkNotPushed(model_push)
|
75 |
+
return
|
76 |
+
|
77 |
+
# Deploy the model.
|
78 |
+
io_utils.copy_dir(src=self.GetModelPath(input_dict), dst=model_push.uri)
|
79 |
+
model_path = model_push.uri
|
80 |
+
|
81 |
+
executor_class_path = name_utils.get_full_name(self.__class__)
|
82 |
+
with telemetry_utils.scoped_labels(
|
83 |
+
{telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}
|
84 |
+
):
|
85 |
+
job_labels = telemetry_utils.make_labels_dict()
|
86 |
+
|
87 |
+
model_name = f"v{int(time.time())}"
|
88 |
+
pushed_model_path = runner.release_model_for_github(
|
89 |
+
model_path=model_path,
|
90 |
+
model_version_name=model_name,
|
91 |
+
gh_release_args=gh_release_args,
|
92 |
+
)
|
93 |
+
self._MarkPushed(model_push, pushed_destination=pushed_model_path)
|
pipeline/components/pusher/GHReleasePusher/executor_test.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import copy
|
3 |
+
import os
|
4 |
+
from typing import Any, Dict
|
5 |
+
from unittest import mock
|
6 |
+
|
7 |
+
import tensorflow as tf
|
8 |
+
from tfx.dsl.io import fileio
|
9 |
+
from pipeline.components.pusher.GHReleasePusher import executor
|
10 |
+
from pipeline.components.pusher.GHReleasePusher import constants
|
11 |
+
from tfx.types import standard_artifacts
|
12 |
+
from tfx.types import standard_component_specs
|
13 |
+
from tfx.utils import json_utils
|
14 |
+
from tfx.utils import name_utils
|
15 |
+
from tfx.utils import telemetry_utils
|
16 |
+
|
17 |
+
_GH_RELEASE_KEY = "GH_RELEASE"
|
18 |
+
|
19 |
+
|
20 |
+
class ExecutorTest(tf.test.TestCase):
|
21 |
+
def setUp(self):
|
22 |
+
super().setUp()
|
23 |
+
self._source_data_dir = os.path.join(
|
24 |
+
os.path.dirname(
|
25 |
+
os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
|
26 |
+
),
|
27 |
+
"components",
|
28 |
+
"testdata",
|
29 |
+
)
|
30 |
+
self._output_data_dir = os.path.join(
|
31 |
+
os.environ.get("TEST_UNDECLARED_OUTPUTS_DIR", self.get_temp_dir()),
|
32 |
+
self._testMethodName,
|
33 |
+
)
|
34 |
+
fileio.makedirs(self._output_data_dir)
|
35 |
+
self._model_export = standard_artifacts.Model()
|
36 |
+
self._model_export.uri = os.path.join(self._source_data_dir, "trainer/current")
|
37 |
+
self._model_blessing = standard_artifacts.ModelBlessing()
|
38 |
+
self._input_dict = {
|
39 |
+
standard_component_specs.MODEL_KEY: [self._model_export],
|
40 |
+
standard_component_specs.MODEL_BLESSING_KEY: [self._model_blessing],
|
41 |
+
}
|
42 |
+
|
43 |
+
self._model_push = standard_artifacts.PushedModel()
|
44 |
+
self._model_push.uri = os.path.join(self._output_data_dir, "model_push")
|
45 |
+
fileio.makedirs(self._model_push.uri)
|
46 |
+
self._output_dict = {
|
47 |
+
standard_component_specs.PUSHED_MODEL_KEY: [self._model_push],
|
48 |
+
}
|
49 |
+
# Dict format of exec_properties. custom_config needs to be serialized
|
50 |
+
# before being passed into Do function.
|
51 |
+
self._exec_properties = {
|
52 |
+
"custom_config": {
|
53 |
+
constants.GH_RELEASE_KEY: {
|
54 |
+
"ACCESS_TOKEN": "...",
|
55 |
+
"USERNAME": "deep-diver",
|
56 |
+
"REPONAME": "PyGithubTest",
|
57 |
+
"BRANCH": "main",
|
58 |
+
"ASSETNAME": "saved_model.tar.gz",
|
59 |
+
}
|
60 |
+
},
|
61 |
+
"push_destination": None,
|
62 |
+
}
|
63 |
+
self._executor = executor.Executor()
|
64 |
+
|
65 |
+
def _serialize_custom_config_under_test(self) -> Dict[str, Any]:
|
66 |
+
"""Converts self._exec_properties['custom_config'] to string."""
|
67 |
+
result = copy.deepcopy(self._exec_properties)
|
68 |
+
result["custom_config"] = json_utils.dumps(result["custom_config"])
|
69 |
+
return result
|
70 |
+
|
71 |
+
def assertDirectoryEmpty(self, path):
|
72 |
+
self.assertEqual(len(fileio.listdir(path)), 0)
|
73 |
+
|
74 |
+
def assertDirectoryNotEmpty(self, path):
|
75 |
+
self.assertGreater(len(fileio.listdir(path)), 0)
|
76 |
+
|
77 |
+
def assertPushed(self):
|
78 |
+
self.assertDirectoryNotEmpty(self._model_push.uri)
|
79 |
+
self.assertEqual(1, self._model_push.get_int_custom_property("pushed"))
|
80 |
+
|
81 |
+
def assertNotPushed(self):
|
82 |
+
self.assertDirectoryEmpty(self._model_push.uri)
|
83 |
+
self.assertEqual(0, self._model_push.get_int_custom_property("pushed"))
|
84 |
+
|
85 |
+
@mock.patch.object(executor, "runner", autospec=True)
|
86 |
+
def testDoBlessed(self, mock_runner):
|
87 |
+
self._model_blessing.uri = os.path.join(
|
88 |
+
self._source_data_dir, "model_validator/blessed"
|
89 |
+
)
|
90 |
+
self._model_blessing.set_int_custom_property("blessed", 1)
|
91 |
+
version = self._model_push.get_string_custom_property("pushed_version")
|
92 |
+
mock_runner.release_model_for_github.return_value = (
|
93 |
+
f"https://github.com/username/reponame/releases/tag/{version}"
|
94 |
+
)
|
95 |
+
|
96 |
+
self._executor.Do(
|
97 |
+
self._input_dict,
|
98 |
+
self._output_dict,
|
99 |
+
self._serialize_custom_config_under_test(),
|
100 |
+
)
|
101 |
+
|
102 |
+
executor_class_path = name_utils.get_full_name(self._executor.__class__)
|
103 |
+
with telemetry_utils.scoped_labels(
|
104 |
+
{telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}
|
105 |
+
):
|
106 |
+
job_labels = telemetry_utils.make_labels_dict()
|
107 |
+
|
108 |
+
mock_runner.release_model_for_github.assert_called_once_with(
|
109 |
+
model_path=mock.ANY,
|
110 |
+
model_version_name=mock.ANY,
|
111 |
+
gh_release_args=mock.ANY,
|
112 |
+
)
|
113 |
+
self.assertPushed()
|
114 |
+
self.assertEqual(
|
115 |
+
self._model_push.get_string_custom_property("pushed_destination"),
|
116 |
+
f"https://github.com/username/reponame/releases/tag/{version}",
|
117 |
+
)
|
118 |
+
|
119 |
+
@mock.patch.object(executor, "runner", autospec=True)
|
120 |
+
def testDoNotBlessed(self, mock_runner):
|
121 |
+
self._model_blessing.uri = os.path.join(
|
122 |
+
self._source_data_dir, "model_validator/not_blessed"
|
123 |
+
)
|
124 |
+
self._model_blessing.set_int_custom_property("blessed", 0)
|
125 |
+
self._executor.Do(
|
126 |
+
self._input_dict,
|
127 |
+
self._output_dict,
|
128 |
+
self._serialize_custom_config_under_test(),
|
129 |
+
)
|
130 |
+
self.assertNotPushed()
|
131 |
+
mock_runner.release_model_for_github.assert_not_called()
|
pipeline/components/pusher/GHReleasePusher/runner.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict
|
2 |
+
|
3 |
+
import os
|
4 |
+
import tarfile
|
5 |
+
from absl import logging
|
6 |
+
|
7 |
+
from github import Github
|
8 |
+
import tensorflow as tf
|
9 |
+
|
10 |
+
from pipeline.components.pusher.GHReleasePusher import constants
|
11 |
+
|
12 |
+
|
13 |
+
def release_model_for_github(
|
14 |
+
model_path: str,
|
15 |
+
model_version_name: str,
|
16 |
+
gh_release_args: Dict[str, Any],
|
17 |
+
) -> str:
|
18 |
+
access_token = gh_release_args[constants.ACCESS_TOKEN_KEY]
|
19 |
+
|
20 |
+
username = gh_release_args[constants.USERNAME_KEY]
|
21 |
+
reponame = gh_release_args[constants.REPONAME_KEY]
|
22 |
+
repo_uri = f"{username}/{reponame}"
|
23 |
+
|
24 |
+
branch_name = gh_release_args[constants.BRANCH_KEY]
|
25 |
+
|
26 |
+
model_archive = gh_release_args[constants.ASSETNAME_KEY]
|
27 |
+
|
28 |
+
gh = Github(access_token)
|
29 |
+
repo = gh.get_repo(repo_uri)
|
30 |
+
branch = repo.get_branch(branch_name)
|
31 |
+
|
32 |
+
release = repo.create_git_release(
|
33 |
+
model_version_name,
|
34 |
+
f"model release {model_version_name}",
|
35 |
+
"",
|
36 |
+
draft=False,
|
37 |
+
prerelease=False,
|
38 |
+
target_commitish=branch,
|
39 |
+
)
|
40 |
+
|
41 |
+
logging.warning(f"model_path: {model_path}")
|
42 |
+
if model_path.startswith("gs://"):
|
43 |
+
logging.warning("download pushed model")
|
44 |
+
root_dir = "saved_model"
|
45 |
+
os.mkdir(root_dir)
|
46 |
+
|
47 |
+
blobnames = tf.io.gfile.listdir(model_path)
|
48 |
+
|
49 |
+
for blobname in blobnames:
|
50 |
+
blob = f"{model_path}/{blobname}"
|
51 |
+
|
52 |
+
if tf.io.gfile.isdir(blob):
|
53 |
+
sub_dir = f"{root_dir}/{blobname}"
|
54 |
+
os.mkdir(sub_dir)
|
55 |
+
|
56 |
+
sub_blobnames = tf.io.gfile.listdir(blob)
|
57 |
+
for sub_blobname in sub_blobnames:
|
58 |
+
sub_blob = f"{blob}{sub_blobname}"
|
59 |
+
|
60 |
+
logging.warning(f"{sub_dir}/{sub_blobname}")
|
61 |
+
tf.io.gfile.copy(sub_blob, f"{sub_dir}{sub_blobname}")
|
62 |
+
else:
|
63 |
+
logging.warning(f"{root_dir}/{blobname}")
|
64 |
+
tf.io.gfile.copy(blob, f"{root_dir}/{blobname}")
|
65 |
+
|
66 |
+
model_path = root_dir
|
67 |
+
|
68 |
+
logging.warning("compress the model")
|
69 |
+
with tarfile.open(model_archive, "w:gz") as tar:
|
70 |
+
tar.add(model_path)
|
71 |
+
|
72 |
+
logging.warning("upload the model")
|
73 |
+
release.upload_asset(model_archive, name=model_archive)
|
74 |
+
return f"https://github.com/{username}/{reponame}/releases/tag/{model_version_name}"
|
pipeline/components/pusher/HFModelPusher/__init__.py
ADDED
File without changes
|
pipeline/components/pusher/HFModelPusher/component.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict, Optional
|
2 |
+
|
3 |
+
from tfx import types
|
4 |
+
from tfx.components.pusher import component as pusher_component
|
5 |
+
from tfx.dsl.components.base import executor_spec
|
6 |
+
from pipeline.components.pusher.HFModelPusher import executor
|
7 |
+
|
8 |
+
|
9 |
+
class Pusher(pusher_component.Pusher):
|
10 |
+
"""Component for pushing model to Cloud AI Platform serving."""
|
11 |
+
|
12 |
+
EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(executor.Executor)
|
13 |
+
|
14 |
+
def __init__(
|
15 |
+
self,
|
16 |
+
model: Optional[types.Channel] = None,
|
17 |
+
model_blessing: Optional[types.Channel] = None,
|
18 |
+
infra_blessing: Optional[types.Channel] = None,
|
19 |
+
custom_config: Optional[Dict[str, Any]] = None,
|
20 |
+
):
|
21 |
+
"""Construct a Pusher component.
|
22 |
+
Args:
|
23 |
+
model: An optional Channel of type `standard_artifacts.Model`, usually
|
24 |
+
produced by a Trainer component, representing the model used for
|
25 |
+
training.
|
26 |
+
model_blessing: An optional Channel of type
|
27 |
+
`standard_artifacts.ModelBlessing`, usually produced from an Evaluator
|
28 |
+
component, containing the blessing model.
|
29 |
+
infra_blessing: An optional Channel of type
|
30 |
+
`standard_artifacts.InfraBlessing`, usually produced from an
|
31 |
+
InfraValidator component, containing the validation result.
|
32 |
+
custom_config: A dict which contains the deployment job parameters to be
|
33 |
+
passed to Cloud platforms.
|
34 |
+
"""
|
35 |
+
super().__init__(
|
36 |
+
model=model,
|
37 |
+
model_blessing=model_blessing,
|
38 |
+
infra_blessing=infra_blessing,
|
39 |
+
custom_config=custom_config,
|
40 |
+
)
|
pipeline/components/pusher/HFModelPusher/constants.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
HF_MODEL_RELEASE_KEY = "HF_MODEL_RELEASE"
|
2 |
+
|
3 |
+
ACCESS_TOKEN_KEY = "ACCESS_TOKEN"
|
4 |
+
|
5 |
+
USERNAME_KEY = "USERNAME"
|
6 |
+
REPONAME_KEY = "REPONAME"
|
pipeline/components/pusher/HFModelPusher/executor.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
from typing import Any, Dict, List
|
3 |
+
|
4 |
+
from google.api_core import client_options
|
5 |
+
from googleapiclient import discovery
|
6 |
+
from tfx import types
|
7 |
+
from tfx.components.pusher import executor as tfx_pusher_executor
|
8 |
+
from pipeline.components.pusher.HFModelPusher import constants
|
9 |
+
from pipeline.components.pusher.HFModelPusher import runner
|
10 |
+
from tfx.types import artifact_utils
|
11 |
+
from tfx.types import standard_component_specs
|
12 |
+
from tfx.utils import deprecation_utils
|
13 |
+
from tfx.utils import io_utils
|
14 |
+
from tfx.utils import json_utils
|
15 |
+
from tfx.utils import name_utils
|
16 |
+
from tfx.utils import telemetry_utils
|
17 |
+
|
18 |
+
|
19 |
+
from tfx.dsl.io import fileio
|
20 |
+
|
21 |
+
# Keys for custom_config.
|
22 |
+
_CUSTOM_CONFIG_KEY = "custom_config"
|
23 |
+
|
24 |
+
|
25 |
+
class Executor(tfx_pusher_executor.Executor):
|
26 |
+
"""Deploy a model to Google Cloud AI Platform serving."""
|
27 |
+
|
28 |
+
def Do(
|
29 |
+
self,
|
30 |
+
input_dict: Dict[str, List[types.Artifact]],
|
31 |
+
output_dict: Dict[str, List[types.Artifact]],
|
32 |
+
exec_properties: Dict[str, Any],
|
33 |
+
):
|
34 |
+
"""Overrides the tfx_pusher_executor.
|
35 |
+
Args:
|
36 |
+
input_dict: Input dict from input key to a list of artifacts, including:
|
37 |
+
- model_export: exported model from trainer.
|
38 |
+
- model_blessing: model blessing path from evaluator.
|
39 |
+
output_dict: Output dict from key to a list of artifacts, including:
|
40 |
+
- model_push: A list of 'ModelPushPath' artifact of size one. It will
|
41 |
+
include the model in this push execution if the model was pushed.
|
42 |
+
exec_properties: Mostly a passthrough input dict for
|
43 |
+
tfx.components.Pusher.executor. The following keys in `custom_config`
|
44 |
+
are consumed by this class:
|
45 |
+
CONFIG = {
|
46 |
+
"USERNAME": "deep-diver",
|
47 |
+
"REPONAME": "PyGithubTest",
|
48 |
+
"ASSETNAME": "saved_model.tar.gz",
|
49 |
+
}
|
50 |
+
Raises:
|
51 |
+
ValueError:
|
52 |
+
If one of USERNAME, REPONAME, ASSETNAME, TAG is not in exec_properties.custom_config.
|
53 |
+
If Serving model path does not start with gs://.
|
54 |
+
RuntimeError: if the GitHub Release job failed.
|
55 |
+
"""
|
56 |
+
self._log_startup(input_dict, output_dict, exec_properties)
|
57 |
+
|
58 |
+
custom_config = json_utils.loads(
|
59 |
+
exec_properties.get(_CUSTOM_CONFIG_KEY, "null")
|
60 |
+
)
|
61 |
+
|
62 |
+
if custom_config is not None and not isinstance(custom_config, Dict):
|
63 |
+
raise ValueError(
|
64 |
+
"custom_config in execution properties needs to be a dict."
|
65 |
+
)
|
66 |
+
|
67 |
+
gh_release_args = custom_config.get(constants.HF_MODEL_RELEASE_KEY)
|
68 |
+
if not gh_release_args:
|
69 |
+
raise ValueError("'HF_MODEL_RELEASE_KEY' is missing in 'custom_config'")
|
70 |
+
model_push = artifact_utils.get_single_instance(
|
71 |
+
output_dict[standard_component_specs.PUSHED_MODEL_KEY]
|
72 |
+
)
|
73 |
+
if not self.CheckBlessing(input_dict):
|
74 |
+
self._MarkNotPushed(model_push)
|
75 |
+
return
|
76 |
+
|
77 |
+
# Deploy the model.
|
78 |
+
io_utils.copy_dir(src=self.GetModelPath(input_dict), dst=model_push.uri)
|
79 |
+
model_path = model_push.uri
|
80 |
+
|
81 |
+
executor_class_path = name_utils.get_full_name(self.__class__)
|
82 |
+
with telemetry_utils.scoped_labels(
|
83 |
+
{telemetry_utils.LABEL_TFX_EXECUTOR: executor_class_path}
|
84 |
+
):
|
85 |
+
job_labels = telemetry_utils.make_labels_dict()
|
86 |
+
|
87 |
+
model_name = f"v{int(time.time())}"
|
88 |
+
pushed_model_path = runner.release_model_for_hf_model(
|
89 |
+
model_path=model_path,
|
90 |
+
model_version_name=model_name,
|
91 |
+
gh_release_args=gh_release_args,
|
92 |
+
)
|
93 |
+
self._MarkPushed(model_push, pushed_destination=pushed_model_path)
|
pipeline/components/pusher/HFModelPusher/runner.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict
|
2 |
+
|
3 |
+
import os
|
4 |
+
import tarfile
|
5 |
+
import time
|
6 |
+
from absl import logging
|
7 |
+
|
8 |
+
import tensorflow as tf
|
9 |
+
|
10 |
+
from huggingface_hub import HfApi
|
11 |
+
from requests.exceptions import HTTPError
|
12 |
+
|
13 |
+
from pipeline.components.pusher.HFModelPusher import constants
|
14 |
+
|
15 |
+
|
16 |
+
def release_model_for_hf_model(
|
17 |
+
model_path: str,
|
18 |
+
model_version_name: str,
|
19 |
+
hf_release_args: Dict[str, Any],
|
20 |
+
) -> str:
|
21 |
+
access_token = hf_release_args[constants.ACCESS_TOKEN_KEY]
|
22 |
+
|
23 |
+
username = hf_release_args[constants.USERNAME_KEY]
|
24 |
+
reponame = hf_release_args[constants.REPONAME_KEY]
|
25 |
+
repo_id = f"{username}/{reponame}"
|
26 |
+
|
27 |
+
repo_type = "model"
|
28 |
+
|
29 |
+
hf_api = HfApi()
|
30 |
+
hf_api.set_access_token(access_token)
|
31 |
+
|
32 |
+
logging.warning(f"model_path: {model_path}")
|
33 |
+
|
34 |
+
logging.warning("download pushed model")
|
35 |
+
model_name = f"v{int(time.time())}"
|
36 |
+
root_dir = model_name
|
37 |
+
os.mkdir(root_dir)
|
38 |
+
|
39 |
+
blobnames = tf.io.gfile.listdir(model_path)
|
40 |
+
|
41 |
+
for blobname in blobnames:
|
42 |
+
blob = f"{model_path}/{blobname}"
|
43 |
+
|
44 |
+
if tf.io.gfile.isdir(blob):
|
45 |
+
sub_dir = f"{root_dir}/{blobname}"
|
46 |
+
os.mkdir(sub_dir)
|
47 |
+
|
48 |
+
sub_blobnames = tf.io.gfile.listdir(blob)
|
49 |
+
for sub_blobname in sub_blobnames:
|
50 |
+
sub_blob = f"{blob}{sub_blobname}"
|
51 |
+
|
52 |
+
logging.warning(f"{sub_dir}/{sub_blobname}")
|
53 |
+
tf.io.gfile.copy(sub_blob, f"{sub_dir}{sub_blobname}")
|
54 |
+
else:
|
55 |
+
logging.warning(f"{root_dir}/{blobname}")
|
56 |
+
tf.io.gfile.copy(blob, f"{root_dir}/{blobname}")
|
57 |
+
|
58 |
+
model_path = root_dir
|
59 |
+
|
60 |
+
hf_hub_path = ""
|
61 |
+
try:
|
62 |
+
hf_api.create_repo(
|
63 |
+
token=access_token, repo_id=f"{repo_id}-model", repo_type=repo_type
|
64 |
+
)
|
65 |
+
except HTTPError as e:
|
66 |
+
logging.warning(e)
|
67 |
+
logging.warning(f"{repo_id}-model repository may already exist")
|
68 |
+
finally:
|
69 |
+
try:
|
70 |
+
hf_hub_path = hf_api.upload_folder(
|
71 |
+
repo_id=f"{repo_id}-model",
|
72 |
+
folder_path=model_path,
|
73 |
+
token=access_token,
|
74 |
+
create_pr=True,
|
75 |
+
repo_type=repo_type,
|
76 |
+
commit_message=model_name,
|
77 |
+
)
|
78 |
+
logging.warning(f"file is uploaded at {repo_id}-model")
|
79 |
+
except HTTPError:
|
80 |
+
logging.warning(e)
|
81 |
+
raise HTTPError
|
82 |
+
|
83 |
+
return hf_hub_path
|
84 |
+
|
85 |
+
hf_api.upload_folder(
|
86 |
+
repo_id="chansung/resnet50-tfx-pipeline-hf-model-test5-model",
|
87 |
+
folder_path="training_pipeline",
|
88 |
+
path_in_repo=".",
|
89 |
+
token="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",
|
90 |
+
create_pr=True,
|
91 |
+
repo_type="model",
|
92 |
+
commit_message="hello")
|
pipeline/components/pusher/__init__.py
ADDED
File without changes
|
pipeline/components/testdata/trainer/current/Format-Serving/keras_metadata.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f861a4ed57982d7ae49316173853abee7f927bf841de038e72001eca637fc490
|
3 |
+
size 557217
|
pipeline/components/testdata/trainer/current/Format-Serving/saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5b4be0c63050c743e3bddc8b8bab276247b7b83729f20db66696e81e7b15dac
|
3 |
+
size 3566431
|
pipeline/components/testdata/trainer/current/Format-Serving/variables/variables.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6795f3d1b78b32a79f8dcb91126aa8e647c14e43c5a00cafc865584c65349fb
|
3 |
+
size 102692910
|
pipeline/components/testdata/trainer/current/Format-Serving/variables/variables.index
ADDED
Binary file (18.1 kB). View file
|
|
pipeline/configs.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os # pylint: disable=unused-import
|
2 |
+
import tfx
|
3 |
+
import tfx.extensions.google_cloud_ai_platform.constants as vertex_const
|
4 |
+
import tfx.extensions.google_cloud_ai_platform.trainer.executor as vertex_training_const
|
5 |
+
import tfx.extensions.google_cloud_ai_platform.tuner.executor as vertex_tuner_const
|
6 |
+
|
7 |
+
PIPELINE_NAME = "resnet50-tfx-pipeline-hf-model-test6"
|
8 |
+
|
9 |
+
try:
|
10 |
+
import google.auth # pylint: disable=g-import-not-at-top # pytype: disable=import-error
|
11 |
+
|
12 |
+
try:
|
13 |
+
_, GOOGLE_CLOUD_PROJECT = google.auth.default()
|
14 |
+
except google.auth.exceptions.DefaultCredentialsError:
|
15 |
+
GOOGLE_CLOUD_PROJECT = "gcp-ml-172005"
|
16 |
+
except ImportError:
|
17 |
+
GOOGLE_CLOUD_PROJECT = "gcp-ml-172005"
|
18 |
+
|
19 |
+
GOOGLE_CLOUD_REGION = "us-central1"
|
20 |
+
|
21 |
+
GCS_BUCKET_NAME = GOOGLE_CLOUD_PROJECT + "-complete-mlops"
|
22 |
+
PIPELINE_IMAGE = f"gcr.io/{GOOGLE_CLOUD_PROJECT}/{PIPELINE_NAME}"
|
23 |
+
|
24 |
+
OUTPUT_DIR = os.path.join("gs://", GCS_BUCKET_NAME)
|
25 |
+
PIPELINE_ROOT = os.path.join(OUTPUT_DIR, "tfx_pipeline_output", PIPELINE_NAME)
|
26 |
+
DATA_PATH = f"gs://{GCS_BUCKET_NAME}/data/"
|
27 |
+
|
28 |
+
|
29 |
+
PREPROCESSING_FN = "models.preprocessing.preprocessing_fn"
|
30 |
+
TRAINING_FN = "models.model.run_fn"
|
31 |
+
TUNER_FN = "models.model.tuner_fn"
|
32 |
+
CLOUD_TUNER_FN = "models.model.tuner_fn"
|
33 |
+
|
34 |
+
TRAIN_NUM_STEPS = 160
|
35 |
+
EVAL_NUM_STEPS = 4
|
36 |
+
NUM_PARALLEL_TRIALS = 3
|
37 |
+
|
38 |
+
EVAL_ACCURACY_THRESHOLD = 0.6
|
39 |
+
|
40 |
+
GCP_AI_PLATFORM_TRAINING_ARGS = {
|
41 |
+
vertex_const.ENABLE_VERTEX_KEY: True,
|
42 |
+
vertex_const.VERTEX_REGION_KEY: GOOGLE_CLOUD_REGION,
|
43 |
+
vertex_training_const.TRAINING_ARGS_KEY: {
|
44 |
+
"project": GOOGLE_CLOUD_PROJECT,
|
45 |
+
"worker_pool_specs": [
|
46 |
+
{
|
47 |
+
"machine_spec": {
|
48 |
+
"machine_type": "n1-standard-4",
|
49 |
+
"accelerator_type": "NVIDIA_TESLA_K80",
|
50 |
+
"accelerator_count": 1,
|
51 |
+
},
|
52 |
+
"replica_count": 1,
|
53 |
+
"container_spec": {
|
54 |
+
"image_uri": PIPELINE_IMAGE,
|
55 |
+
},
|
56 |
+
}
|
57 |
+
],
|
58 |
+
},
|
59 |
+
"use_gpu": True,
|
60 |
+
}
|
61 |
+
|
62 |
+
GCP_AI_PLATFORM_TUNER_ARGS = {
|
63 |
+
vertex_const.ENABLE_VERTEX_KEY: True,
|
64 |
+
vertex_const.VERTEX_REGION_KEY: GOOGLE_CLOUD_REGION,
|
65 |
+
vertex_tuner_const.TUNING_ARGS_KEY: {
|
66 |
+
"project": GOOGLE_CLOUD_PROJECT,
|
67 |
+
# "serviceAccount": "[email protected]",
|
68 |
+
"job_spec": {
|
69 |
+
"worker_pool_specs": [
|
70 |
+
{
|
71 |
+
"machine_spec": {
|
72 |
+
"machine_type": "n1-standard-4",
|
73 |
+
"accelerator_type": "NVIDIA_TESLA_K80",
|
74 |
+
"accelerator_count": 1,
|
75 |
+
},
|
76 |
+
"replica_count": 1,
|
77 |
+
"container_spec": {
|
78 |
+
"image_uri": PIPELINE_IMAGE,
|
79 |
+
},
|
80 |
+
}
|
81 |
+
],
|
82 |
+
},
|
83 |
+
},
|
84 |
+
vertex_tuner_const.REMOTE_TRIALS_WORKING_DIR_KEY: os.path.join(
|
85 |
+
PIPELINE_ROOT, "trials"
|
86 |
+
),
|
87 |
+
"use_gpu": True,
|
88 |
+
}
|
89 |
+
|
90 |
+
GCP_AI_PLATFORM_SERVING_ARGS = {
|
91 |
+
vertex_const.ENABLE_VERTEX_KEY: True,
|
92 |
+
vertex_const.VERTEX_REGION_KEY: GOOGLE_CLOUD_REGION,
|
93 |
+
vertex_const.VERTEX_CONTAINER_IMAGE_URI_KEY: "us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-8:latest",
|
94 |
+
vertex_const.SERVING_ARGS_KEY: {
|
95 |
+
"project_id": GOOGLE_CLOUD_PROJECT,
|
96 |
+
"deployed_model_display_name": PIPELINE_NAME.replace("-", "_"),
|
97 |
+
"endpoint_name": "prediction-" + PIPELINE_NAME.replace("-", "_"),
|
98 |
+
"traffic_split": {"0": 100},
|
99 |
+
"machine_type": "n1-standard-4",
|
100 |
+
"min_replica_count": 1,
|
101 |
+
"max_replica_count": 1,
|
102 |
+
},
|
103 |
+
}
|
104 |
+
|
105 |
+
GH_RELEASE_ARGS = {
|
106 |
+
"GH_RELEASE": {
|
107 |
+
"ACCESS_TOKEN": "$GH_ACCESS_TOKEN",
|
108 |
+
"USERNAME": "deep-diver",
|
109 |
+
"REPONAME": "PyGithubTest",
|
110 |
+
"BRANCH": "main",
|
111 |
+
"ASSETNAME": "saved_model.tar.gz",
|
112 |
+
}
|
113 |
+
}
|
114 |
+
|
115 |
+
HF_MODEL_RELEASE_ARGS = {
|
116 |
+
"HF_MODEL_RELEASE": {
|
117 |
+
"ACCESS_TOKEN": "$HF_ACCESS_TOKEN",
|
118 |
+
"USERNAME": "chansung",
|
119 |
+
"REPONAME": PIPELINE_NAME,
|
120 |
+
}
|
121 |
+
}
|
pipeline/local_pipeline.py
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict, List, Optional, Text
|
2 |
+
|
3 |
+
import tensorflow_model_analysis as tfma
|
4 |
+
from tfx import v1 as tfx
|
5 |
+
|
6 |
+
from ml_metadata.proto import metadata_store_pb2
|
7 |
+
from tfx.proto import example_gen_pb2
|
8 |
+
|
9 |
+
import absl
|
10 |
+
import tensorflow_model_analysis as tfma
|
11 |
+
from tfx.components import Evaluator
|
12 |
+
from tfx.components import ExampleValidator
|
13 |
+
from tfx.components import ImportExampleGen
|
14 |
+
from tfx.components import Pusher
|
15 |
+
from tfx.components import SchemaGen
|
16 |
+
from tfx.components import StatisticsGen
|
17 |
+
from tfx.components import Trainer
|
18 |
+
from tfx.components import Tuner
|
19 |
+
from tfx.extensions.google_cloud_ai_platform.trainer.component import (
|
20 |
+
Trainer as VertexTrainer,
|
21 |
+
)
|
22 |
+
from tfx.extensions.google_cloud_ai_platform.pusher.component import (
|
23 |
+
Pusher as VertexPusher,
|
24 |
+
)
|
25 |
+
from tfx.components import Transform
|
26 |
+
from tfx.dsl.components.common import resolver
|
27 |
+
from tfx.dsl.experimental import latest_blessed_model_resolver
|
28 |
+
from tfx.orchestration import pipeline
|
29 |
+
from tfx.proto import example_gen_pb2
|
30 |
+
from tfx.proto import trainer_pb2
|
31 |
+
from tfx.types import Channel
|
32 |
+
from tfx.types.standard_artifacts import Model
|
33 |
+
from tfx.types.standard_artifacts import ModelBlessing
|
34 |
+
from tfx.orchestration.data_types import RuntimeParameter
|
35 |
+
|
36 |
+
from components.pusher.GHReleasePusher.component import Pusher as GHPusher
|
37 |
+
|
38 |
+
|
39 |
+
def create_pipeline(
|
40 |
+
pipeline_name: Text,
|
41 |
+
pipeline_root: Text,
|
42 |
+
data_path: Text,
|
43 |
+
modules: Dict[Text, Text],
|
44 |
+
train_args: trainer_pb2.TrainArgs,
|
45 |
+
eval_args: trainer_pb2.EvalArgs,
|
46 |
+
serving_model_dir: Text,
|
47 |
+
metadata_connection_config: Optional[metadata_store_pb2.ConnectionConfig] = None,
|
48 |
+
) -> tfx.dsl.Pipeline:
|
49 |
+
components = []
|
50 |
+
|
51 |
+
input_config = example_gen_pb2.Input(
|
52 |
+
splits=[
|
53 |
+
example_gen_pb2.Input.Split(name="train", pattern="train/*.tfrecord"),
|
54 |
+
example_gen_pb2.Input.Split(name="eval", pattern="test/*.tfrecord"),
|
55 |
+
]
|
56 |
+
)
|
57 |
+
example_gen = ImportExampleGen(input_base=data_path, input_config=input_config)
|
58 |
+
components.append(example_gen)
|
59 |
+
|
60 |
+
statistics_gen = StatisticsGen(examples=example_gen.outputs["examples"])
|
61 |
+
components.append(statistics_gen)
|
62 |
+
|
63 |
+
schema_gen = SchemaGen(
|
64 |
+
statistics=statistics_gen.outputs["statistics"], infer_feature_shape=True
|
65 |
+
)
|
66 |
+
components.append(schema_gen)
|
67 |
+
|
68 |
+
# example_validator = tfx.components.ExampleValidator(
|
69 |
+
# statistics=statistics_gen.outputs['statistics'],
|
70 |
+
# schema=schema_gen.outputs['schema'])
|
71 |
+
# components.append(example_validator)
|
72 |
+
|
73 |
+
transform = Transform(
|
74 |
+
examples=example_gen.outputs["examples"],
|
75 |
+
schema=schema_gen.outputs["schema"],
|
76 |
+
preprocessing_fn=modules["preprocessing_fn"],
|
77 |
+
)
|
78 |
+
components.append(transform)
|
79 |
+
|
80 |
+
tuner = Tuner(
|
81 |
+
tuner_fn=modules["tuner_fn"],
|
82 |
+
examples=transform.outputs["transformed_examples"],
|
83 |
+
schema=schema_gen.outputs["schema"],
|
84 |
+
transform_graph=transform.outputs["transform_graph"],
|
85 |
+
train_args=train_args,
|
86 |
+
eval_args=eval_args,
|
87 |
+
)
|
88 |
+
components.append(tuner)
|
89 |
+
|
90 |
+
trainer_args = {
|
91 |
+
"run_fn": modules["training_fn"],
|
92 |
+
"transformed_examples": transform.outputs["transformed_examples"],
|
93 |
+
"schema": schema_gen.outputs["schema"],
|
94 |
+
"hyperparameters": tuner.outputs["best_hyperparameters"],
|
95 |
+
"transform_graph": transform.outputs["transform_graph"],
|
96 |
+
"train_args": train_args,
|
97 |
+
"eval_args": eval_args,
|
98 |
+
}
|
99 |
+
trainer = Trainer(**trainer_args)
|
100 |
+
components.append(trainer)
|
101 |
+
|
102 |
+
model_resolver = resolver.Resolver(
|
103 |
+
strategy_class=latest_blessed_model_resolver.LatestBlessedModelResolver,
|
104 |
+
model=Channel(type=Model),
|
105 |
+
model_blessing=Channel(type=ModelBlessing),
|
106 |
+
).with_id("latest_blessed_model_resolver")
|
107 |
+
components.append(model_resolver)
|
108 |
+
|
109 |
+
# Uses TFMA to compute evaluation statistics over features of a model and
|
110 |
+
# perform quality validation of a candidate model (compare to a baseline).
|
111 |
+
eval_config = tfma.EvalConfig(
|
112 |
+
model_specs=[tfma.ModelSpec(label_key="label_xf", prediction_key="label_xf")],
|
113 |
+
slicing_specs=[tfma.SlicingSpec()],
|
114 |
+
metrics_specs=[
|
115 |
+
tfma.MetricsSpec(
|
116 |
+
metrics=[
|
117 |
+
tfma.MetricConfig(
|
118 |
+
class_name="SparseCategoricalAccuracy",
|
119 |
+
threshold=tfma.MetricThreshold(
|
120 |
+
value_threshold=tfma.GenericValueThreshold(
|
121 |
+
lower_bound={"value": 0.55}
|
122 |
+
),
|
123 |
+
# Change threshold will be ignored if there is no
|
124 |
+
# baseline model resolved from MLMD (first run).
|
125 |
+
change_threshold=tfma.GenericChangeThreshold(
|
126 |
+
direction=tfma.MetricDirection.HIGHER_IS_BETTER,
|
127 |
+
absolute={"value": -1e-3},
|
128 |
+
),
|
129 |
+
),
|
130 |
+
)
|
131 |
+
]
|
132 |
+
)
|
133 |
+
],
|
134 |
+
)
|
135 |
+
|
136 |
+
evaluator = Evaluator(
|
137 |
+
examples=transform.outputs["transformed_examples"],
|
138 |
+
model=trainer.outputs["model"],
|
139 |
+
baseline_model=model_resolver.outputs["model"],
|
140 |
+
eval_config=eval_config,
|
141 |
+
)
|
142 |
+
components.append(evaluator)
|
143 |
+
|
144 |
+
pusher_args = {
|
145 |
+
"model": trainer.outputs["model"],
|
146 |
+
"model_blessing": evaluator.outputs["blessing"],
|
147 |
+
"push_destination": tfx.proto.PushDestination(
|
148 |
+
filesystem=tfx.proto.PushDestination.Filesystem(
|
149 |
+
base_directory=serving_model_dir
|
150 |
+
)
|
151 |
+
),
|
152 |
+
}
|
153 |
+
pusher = Pusher(**pusher_args) # pylint: disable=unused-variable
|
154 |
+
components.append(pusher)
|
155 |
+
|
156 |
+
pusher_args = {
|
157 |
+
"model": trainer.outputs["model"],
|
158 |
+
"model_blessing": evaluator.outputs["blessing"],
|
159 |
+
"custom_config": {
|
160 |
+
"GH_RELEASE": {
|
161 |
+
"ACCESS_TOKEN": "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",
|
162 |
+
"USERNAME": "deep-diver",
|
163 |
+
"REPONAME": "PyGithubTest",
|
164 |
+
"BRANCH": "main",
|
165 |
+
"ASSETNAME": "saved_model.tar.gz",
|
166 |
+
}
|
167 |
+
},
|
168 |
+
}
|
169 |
+
|
170 |
+
gh_pusher = GHPusher(**pusher_args).with_id("gh_release_pusher")
|
171 |
+
components.append(gh_pusher)
|
172 |
+
|
173 |
+
return pipeline.Pipeline(
|
174 |
+
pipeline_name=pipeline_name,
|
175 |
+
pipeline_root=pipeline_root,
|
176 |
+
components=components,
|
177 |
+
enable_cache=False,
|
178 |
+
metadata_connection_config=metadata_connection_config,
|
179 |
+
)
|
pipeline/pipeline.py
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Any, Dict, List, Optional, Text
|
2 |
+
|
3 |
+
import tensorflow_model_analysis as tfma
|
4 |
+
from tfx import v1 as tfx
|
5 |
+
|
6 |
+
from ml_metadata.proto import metadata_store_pb2
|
7 |
+
from tfx.proto import example_gen_pb2
|
8 |
+
|
9 |
+
import absl
|
10 |
+
import tensorflow_model_analysis as tfma
|
11 |
+
from tfx.components import Evaluator
|
12 |
+
from tfx.components import ExampleValidator
|
13 |
+
from tfx.components import ImportExampleGen
|
14 |
+
from tfx.components import Pusher
|
15 |
+
from tfx.components import SchemaGen
|
16 |
+
from tfx.components import StatisticsGen
|
17 |
+
from tfx.components import Trainer
|
18 |
+
from tfx.components import Tuner
|
19 |
+
from tfx.extensions.google_cloud_ai_platform.trainer.component import (
|
20 |
+
Trainer as VertexTrainer,
|
21 |
+
)
|
22 |
+
from tfx.extensions.google_cloud_ai_platform.pusher.component import (
|
23 |
+
Pusher as VertexPusher,
|
24 |
+
)
|
25 |
+
from tfx.extensions.google_cloud_ai_platform.tuner.component import Tuner as VertexTuner
|
26 |
+
from pipeline.components.pusher.GHReleasePusher.component import Pusher as GHPusher
|
27 |
+
from pipeline.components.pusher.HFModelPusher.component import Pusher as HFModelPusher
|
28 |
+
from tfx.components import Transform
|
29 |
+
from tfx.dsl.components.common import resolver
|
30 |
+
from tfx.dsl.experimental import latest_blessed_model_resolver
|
31 |
+
from tfx.orchestration import pipeline
|
32 |
+
from tfx.proto import example_gen_pb2
|
33 |
+
from tfx.proto import trainer_pb2
|
34 |
+
from tfx.proto import tuner_pb2
|
35 |
+
from tfx.types import Channel
|
36 |
+
from tfx.types.standard_artifacts import Model
|
37 |
+
from tfx.types.standard_artifacts import ModelBlessing
|
38 |
+
from tfx.orchestration.data_types import RuntimeParameter
|
39 |
+
|
40 |
+
|
41 |
+
def create_pipeline(
|
42 |
+
input_config: RuntimeParameter,
|
43 |
+
output_config: RuntimeParameter,
|
44 |
+
pipeline_name: Text,
|
45 |
+
pipeline_root: Text,
|
46 |
+
data_path: Text,
|
47 |
+
modules: Dict[Text, Text],
|
48 |
+
train_args: trainer_pb2.TrainArgs,
|
49 |
+
eval_args: trainer_pb2.EvalArgs,
|
50 |
+
tuner_args: tuner_pb2.TuneArgs,
|
51 |
+
metadata_connection_config: Optional[metadata_store_pb2.ConnectionConfig] = None,
|
52 |
+
ai_platform_training_args: Optional[Dict[Text, Text]] = None,
|
53 |
+
ai_platform_tuner_args: Optional[Dict[Text, Text]] = None,
|
54 |
+
ai_platform_serving_args: Optional[Dict[Text, Any]] = None,
|
55 |
+
gh_release_args: Optional[Dict[Text, Any]] = None,
|
56 |
+
hf_model_release_args: Optional[Dict[Text, Any]] = None,
|
57 |
+
) -> tfx.dsl.Pipeline:
|
58 |
+
components = []
|
59 |
+
|
60 |
+
example_gen = ImportExampleGen(
|
61 |
+
input_base=data_path, input_config=input_config, output_config=output_config
|
62 |
+
)
|
63 |
+
components.append(example_gen)
|
64 |
+
|
65 |
+
statistics_gen = StatisticsGen(examples=example_gen.outputs["examples"])
|
66 |
+
components.append(statistics_gen)
|
67 |
+
|
68 |
+
schema_gen = SchemaGen(
|
69 |
+
statistics=statistics_gen.outputs["statistics"], infer_feature_shape=True
|
70 |
+
)
|
71 |
+
components.append(schema_gen)
|
72 |
+
|
73 |
+
# example_validator = tfx.components.ExampleValidator(
|
74 |
+
# statistics=statistics_gen.outputs['statistics'],
|
75 |
+
# schema=schema_gen.outputs['schema'])
|
76 |
+
# components.append(example_validator)
|
77 |
+
|
78 |
+
transform = Transform(
|
79 |
+
examples=example_gen.outputs["examples"],
|
80 |
+
schema=schema_gen.outputs["schema"],
|
81 |
+
preprocessing_fn=modules["preprocessing_fn"],
|
82 |
+
)
|
83 |
+
components.append(transform)
|
84 |
+
|
85 |
+
tuner = VertexTuner(
|
86 |
+
tuner_fn=modules["cloud_tuner_fn"],
|
87 |
+
examples=transform.outputs["transformed_examples"],
|
88 |
+
transform_graph=transform.outputs["transform_graph"],
|
89 |
+
train_args=train_args,
|
90 |
+
eval_args=eval_args,
|
91 |
+
tune_args=tuner_args,
|
92 |
+
custom_config=ai_platform_tuner_args,
|
93 |
+
)
|
94 |
+
components.append(tuner)
|
95 |
+
|
96 |
+
trainer_args = {
|
97 |
+
"run_fn": modules["training_fn"],
|
98 |
+
"transformed_examples": transform.outputs["transformed_examples"],
|
99 |
+
"schema": schema_gen.outputs["schema"],
|
100 |
+
"hyperparameters": tuner.outputs["best_hyperparameters"],
|
101 |
+
"transform_graph": transform.outputs["transform_graph"],
|
102 |
+
"train_args": train_args,
|
103 |
+
"eval_args": eval_args,
|
104 |
+
"custom_config": ai_platform_training_args,
|
105 |
+
}
|
106 |
+
trainer = VertexTrainer(**trainer_args)
|
107 |
+
components.append(trainer)
|
108 |
+
|
109 |
+
model_resolver = resolver.Resolver(
|
110 |
+
strategy_class=latest_blessed_model_resolver.LatestBlessedModelResolver,
|
111 |
+
model=Channel(type=Model),
|
112 |
+
model_blessing=Channel(type=ModelBlessing),
|
113 |
+
).with_id("latest_blessed_model_resolver")
|
114 |
+
components.append(model_resolver)
|
115 |
+
|
116 |
+
# Uses TFMA to compute evaluation statistics over features of a model and
|
117 |
+
# perform quality validation of a candidate model (compare to a baseline).
|
118 |
+
eval_config = tfma.EvalConfig(
|
119 |
+
model_specs=[tfma.ModelSpec(label_key="label_xf", prediction_key="label_xf")],
|
120 |
+
slicing_specs=[tfma.SlicingSpec()],
|
121 |
+
metrics_specs=[
|
122 |
+
tfma.MetricsSpec(
|
123 |
+
metrics=[
|
124 |
+
tfma.MetricConfig(
|
125 |
+
class_name="SparseCategoricalAccuracy",
|
126 |
+
threshold=tfma.MetricThreshold(
|
127 |
+
value_threshold=tfma.GenericValueThreshold(
|
128 |
+
lower_bound={"value": 0.55}
|
129 |
+
),
|
130 |
+
# Change threshold will be ignored if there is no
|
131 |
+
# baseline model resolved from MLMD (first run).
|
132 |
+
change_threshold=tfma.GenericChangeThreshold(
|
133 |
+
direction=tfma.MetricDirection.HIGHER_IS_BETTER,
|
134 |
+
absolute={"value": -1e-3},
|
135 |
+
),
|
136 |
+
),
|
137 |
+
)
|
138 |
+
]
|
139 |
+
)
|
140 |
+
],
|
141 |
+
)
|
142 |
+
|
143 |
+
evaluator = Evaluator(
|
144 |
+
examples=transform.outputs["transformed_examples"],
|
145 |
+
model=trainer.outputs["model"],
|
146 |
+
baseline_model=model_resolver.outputs["model"],
|
147 |
+
eval_config=eval_config,
|
148 |
+
)
|
149 |
+
components.append(evaluator)
|
150 |
+
|
151 |
+
pusher_args = {
|
152 |
+
"model": trainer.outputs["model"],
|
153 |
+
"model_blessing": evaluator.outputs["blessing"],
|
154 |
+
"custom_config": ai_platform_serving_args,
|
155 |
+
}
|
156 |
+
pusher = VertexPusher(**pusher_args) # pylint: disable=unused-variable
|
157 |
+
components.append(pusher)
|
158 |
+
|
159 |
+
pusher_args["custom_config"] = gh_release_args
|
160 |
+
gh_pusher = GHPusher(**pusher_args).with_id("GHReleasePusher")
|
161 |
+
components.append(gh_pusher)
|
162 |
+
|
163 |
+
pusher_args["custom_config"] = hf_model_release_args
|
164 |
+
hf_model_pusher = HFModelPusher(**pusher_args).with_id("HFModelPusher")
|
165 |
+
components.append(hf_model_pusher)
|
166 |
+
|
167 |
+
return pipeline.Pipeline(
|
168 |
+
pipeline_name=pipeline_name,
|
169 |
+
pipeline_root=pipeline_root,
|
170 |
+
components=components,
|
171 |
+
enable_cache=True,
|
172 |
+
metadata_connection_config=metadata_connection_config,
|
173 |
+
)
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pytest
|
2 |
+
kfp
|
3 |
+
tensorflow-cloud
|
4 |
+
keras-tuner
|
5 |
+
PyGithub
|
6 |
+
wget
|
7 |
+
oauth2client
|
8 |
+
huggingface-hub
|