Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
# Copyright (c) Facebook, Inc. and its affiliates. | |
import inspect | |
import numpy as np | |
import pprint | |
from typing import Any, List, Optional, Tuple, Union | |
from fvcore.transforms.transform import Transform, TransformList | |
""" | |
See "Data Augmentation" tutorial for an overview of the system: | |
https://detectron2.readthedocs.io/tutorials/augmentation.html | |
""" | |
__all__ = [ | |
"Augmentation", | |
"AugmentationList", | |
"AugInput", | |
"TransformGen", | |
"apply_transform_gens", | |
"StandardAugInput", | |
"apply_augmentations", | |
] | |
def _check_img_dtype(img): | |
assert isinstance(img, np.ndarray), "[Augmentation] Needs an numpy array, but got a {}!".format( | |
type(img) | |
) | |
assert not isinstance(img.dtype, np.integer) or ( | |
img.dtype == np.uint8 | |
), "[Augmentation] Got image of type {}, use uint8 or floating points instead!".format( | |
img.dtype | |
) | |
assert img.ndim in [2, 3], img.ndim | |
def _get_aug_input_args(aug, aug_input) -> List[Any]: | |
""" | |
Get the arguments to be passed to ``aug.get_transform`` from the input ``aug_input``. | |
""" | |
if aug.input_args is None: | |
# Decide what attributes are needed automatically | |
prms = list(inspect.signature(aug.get_transform).parameters.items()) | |
# The default behavior is: if there is one parameter, then its "image" | |
# (work automatically for majority of use cases, and also avoid BC breaking), | |
# Otherwise, use the argument names. | |
if len(prms) == 1: | |
names = ("image",) | |
else: | |
names = [] | |
for name, prm in prms: | |
if prm.kind in ( | |
inspect.Parameter.VAR_POSITIONAL, | |
inspect.Parameter.VAR_KEYWORD, | |
): | |
raise TypeError( | |
f""" \ | |
The default implementation of `{type(aug)}.__call__` does not allow \ | |
`{type(aug)}.get_transform` to use variable-length arguments (*args, **kwargs)! \ | |
If arguments are unknown, reimplement `__call__` instead. \ | |
""" | |
) | |
names.append(name) | |
aug.input_args = tuple(names) | |
args = [] | |
for f in aug.input_args: | |
try: | |
args.append(getattr(aug_input, f)) | |
except AttributeError as e: | |
raise AttributeError( | |
f"{type(aug)}.get_transform needs input attribute '{f}', " | |
f"but it is not an attribute of {type(aug_input)}!" | |
) from e | |
return args | |
class Augmentation: | |
""" | |
Augmentation defines (often random) policies/strategies to generate :class:`Transform` | |
from data. It is often used for pre-processing of input data. | |
A "policy" that generates a :class:`Transform` may, in the most general case, | |
need arbitrary information from input data in order to determine what transforms | |
to apply. Therefore, each :class:`Augmentation` instance defines the arguments | |
needed by its :meth:`get_transform` method. When called with the positional arguments, | |
the :meth:`get_transform` method executes the policy. | |
Note that :class:`Augmentation` defines the policies to create a :class:`Transform`, | |
but not how to execute the actual transform operations to those data. | |
Its :meth:`__call__` method will use :meth:`AugInput.transform` to execute the transform. | |
The returned `Transform` object is meant to describe deterministic transformation, which means | |
it can be re-applied on associated data, e.g. the geometry of an image and its segmentation | |
masks need to be transformed together. | |
(If such re-application is not needed, then determinism is not a crucial requirement.) | |
""" | |
input_args: Optional[Tuple[str]] = None | |
""" | |
Stores the attribute names needed by :meth:`get_transform`, e.g. ``("image", "sem_seg")``. | |
By default, it is just a tuple of argument names in :meth:`self.get_transform`, which often only | |
contain "image". As long as the argument name convention is followed, there is no need for | |
users to touch this attribute. | |
""" | |
def _init(self, params=None): | |
if params: | |
for k, v in params.items(): | |
if k != "self" and not k.startswith("_"): | |
setattr(self, k, v) | |
def get_transform(self, *args) -> Transform: | |
""" | |
Execute the policy based on input data, and decide what transform to apply to inputs. | |
Args: | |
args: Any fixed-length positional arguments. By default, the name of the arguments | |
should exist in the :class:`AugInput` to be used. | |
Returns: | |
Transform: Returns the deterministic transform to apply to the input. | |
Examples: | |
:: | |
class MyAug: | |
# if a policy needs to know both image and semantic segmentation | |
def get_transform(image, sem_seg) -> T.Transform: | |
pass | |
tfm: Transform = MyAug().get_transform(image, sem_seg) | |
new_image = tfm.apply_image(image) | |
Notes: | |
Users can freely use arbitrary new argument names in custom | |
:meth:`get_transform` method, as long as they are available in the | |
input data. In detectron2 we use the following convention: | |
* image: (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or | |
floating point in range [0, 1] or [0, 255]. | |
* boxes: (N,4) ndarray of float32. It represents the instance bounding boxes | |
of N instances. Each is in XYXY format in unit of absolute coordinates. | |
* sem_seg: (H,W) ndarray of type uint8. Each element is an integer label of pixel. | |
We do not specify convention for other types and do not include builtin | |
:class:`Augmentation` that uses other types in detectron2. | |
""" | |
raise NotImplementedError | |
def __call__(self, aug_input) -> Transform: | |
""" | |
Augment the given `aug_input` **in-place**, and return the transform that's used. | |
This method will be called to apply the augmentation. In most augmentation, it | |
is enough to use the default implementation, which calls :meth:`get_transform` | |
using the inputs. But a subclass can overwrite it to have more complicated logic. | |
Args: | |
aug_input (AugInput): an object that has attributes needed by this augmentation | |
(defined by ``self.get_transform``). Its ``transform`` method will be called | |
to in-place transform it. | |
Returns: | |
Transform: the transform that is applied on the input. | |
""" | |
args = _get_aug_input_args(self, aug_input) | |
tfm = self.get_transform(*args) | |
assert isinstance(tfm, (Transform, TransformList)), ( | |
f"{type(self)}.get_transform must return an instance of Transform! " | |
f"Got {type(tfm)} instead." | |
) | |
aug_input.transform(tfm) | |
return tfm | |
def _rand_range(self, low=1.0, high=None, size=None): | |
""" | |
Uniform float random number between low and high. | |
""" | |
if high is None: | |
low, high = 0, low | |
if size is None: | |
size = [] | |
return np.random.uniform(low, high, size) | |
def __repr__(self): | |
""" | |
Produce something like: | |
"MyAugmentation(field1={self.field1}, field2={self.field2})" | |
""" | |
try: | |
sig = inspect.signature(self.__init__) | |
classname = type(self).__name__ | |
argstr = [] | |
for name, param in sig.parameters.items(): | |
assert ( | |
param.kind != param.VAR_POSITIONAL and param.kind != param.VAR_KEYWORD | |
), "The default __repr__ doesn't support *args or **kwargs" | |
assert hasattr(self, name), ( | |
"Attribute {} not found! " | |
"Default __repr__ only works if attributes match the constructor.".format(name) | |
) | |
attr = getattr(self, name) | |
default = param.default | |
if default is attr: | |
continue | |
attr_str = pprint.pformat(attr) | |
if "\n" in attr_str: | |
# don't show it if pformat decides to use >1 lines | |
attr_str = "..." | |
argstr.append("{}={}".format(name, attr_str)) | |
return "{}({})".format(classname, ", ".join(argstr)) | |
except AssertionError: | |
return super().__repr__() | |
__str__ = __repr__ | |
class _TransformToAug(Augmentation): | |
def __init__(self, tfm: Transform): | |
self.tfm = tfm | |
def get_transform(self, *args): | |
return self.tfm | |
def __repr__(self): | |
return repr(self.tfm) | |
__str__ = __repr__ | |
def _transform_to_aug(tfm_or_aug): | |
""" | |
Wrap Transform into Augmentation. | |
Private, used internally to implement augmentations. | |
""" | |
assert isinstance(tfm_or_aug, (Transform, Augmentation)), tfm_or_aug | |
if isinstance(tfm_or_aug, Augmentation): | |
return tfm_or_aug | |
else: | |
return _TransformToAug(tfm_or_aug) | |
class AugmentationList(Augmentation): | |
""" | |
Apply a sequence of augmentations. | |
It has ``__call__`` method to apply the augmentations. | |
Note that :meth:`get_transform` method is impossible (will throw error if called) | |
for :class:`AugmentationList`, because in order to apply a sequence of augmentations, | |
the kth augmentation must be applied first, to provide inputs needed by the (k+1)th | |
augmentation. | |
""" | |
def __init__(self, augs): | |
""" | |
Args: | |
augs (list[Augmentation or Transform]): | |
""" | |
super().__init__() | |
self.augs = [_transform_to_aug(x) for x in augs] | |
def __call__(self, aug_input) -> TransformList: | |
tfms = [] | |
for x in self.augs: | |
tfm = x(aug_input) | |
tfms.append(tfm) | |
return TransformList(tfms) | |
def __repr__(self): | |
msgs = [str(x) for x in self.augs] | |
return "AugmentationList[{}]".format(", ".join(msgs)) | |
__str__ = __repr__ | |
class AugInput: | |
""" | |
Input that can be used with :meth:`Augmentation.__call__`. | |
This is a standard implementation for the majority of use cases. | |
This class provides the standard attributes **"image", "boxes", "sem_seg"** | |
defined in :meth:`__init__` and they may be needed by different augmentations. | |
Most augmentation policies do not need attributes beyond these three. | |
After applying augmentations to these attributes (using :meth:`AugInput.transform`), | |
the returned transforms can then be used to transform other data structures that users have. | |
Examples: | |
:: | |
input = AugInput(image, boxes=boxes) | |
tfms = augmentation(input) | |
transformed_image = input.image | |
transformed_boxes = input.boxes | |
transformed_other_data = tfms.apply_other(other_data) | |
An extended project that works with new data types may implement augmentation policies | |
that need other inputs. An algorithm may need to transform inputs in a way different | |
from the standard approach defined in this class. In those rare situations, users can | |
implement a class similar to this class, that satify the following condition: | |
* The input must provide access to these data in the form of attribute access | |
(``getattr``). For example, if an :class:`Augmentation` to be applied needs "image" | |
and "sem_seg" arguments, its input must have the attribute "image" and "sem_seg". | |
* The input must have a ``transform(tfm: Transform) -> None`` method which | |
in-place transforms all its attributes. | |
""" | |
# TODO maybe should support more builtin data types here | |
def __init__( | |
self, | |
image: np.ndarray, | |
*, | |
boxes: Optional[np.ndarray] = None, | |
sem_seg: Optional[np.ndarray] = None, | |
): | |
""" | |
Args: | |
image (ndarray): (H,W) or (H,W,C) ndarray of type uint8 in range [0, 255], or | |
floating point in range [0, 1] or [0, 255]. The meaning of C is up | |
to users. | |
boxes (ndarray or None): Nx4 float32 boxes in XYXY_ABS mode | |
sem_seg (ndarray or None): HxW uint8 semantic segmentation mask. Each element | |
is an integer label of pixel. | |
""" | |
_check_img_dtype(image) | |
self.image = image | |
self.boxes = boxes | |
self.sem_seg = sem_seg | |
def transform(self, tfm: Transform) -> None: | |
""" | |
In-place transform all attributes of this class. | |
By "in-place", it means after calling this method, accessing an attribute such | |
as ``self.image`` will return transformed data. | |
""" | |
self.image = tfm.apply_image(self.image) | |
if self.boxes is not None: | |
self.boxes = tfm.apply_box(self.boxes) | |
if self.sem_seg is not None: | |
self.sem_seg = tfm.apply_segmentation(self.sem_seg) | |
def apply_augmentations( | |
self, augmentations: List[Union[Augmentation, Transform]] | |
) -> TransformList: | |
""" | |
Equivalent of ``AugmentationList(augmentations)(self)`` | |
""" | |
return AugmentationList(augmentations)(self) | |
def apply_augmentations(augmentations: List[Union[Transform, Augmentation]], inputs): | |
""" | |
Use ``T.AugmentationList(augmentations)(inputs)`` instead. | |
""" | |
if isinstance(inputs, np.ndarray): | |
# handle the common case of image-only Augmentation, also for backward compatibility | |
image_only = True | |
inputs = AugInput(inputs) | |
else: | |
image_only = False | |
tfms = inputs.apply_augmentations(augmentations) | |
return inputs.image if image_only else inputs, tfms | |
apply_transform_gens = apply_augmentations | |
""" | |
Alias for backward-compatibility. | |
""" | |
TransformGen = Augmentation | |
""" | |
Alias for Augmentation, since it is something that generates :class:`Transform`s | |
""" | |
StandardAugInput = AugInput | |
""" | |
Alias for compatibility. It's not worth the complexity to have two classes. | |
""" | |