Source code for tensorpack.dataflow.imgaug.imgproc

# -*- coding: utf-8 -*-
# File: imgproc.py


import numpy as np
import cv2

from ...utils.develop import log_deprecated
from .base import PhotometricAugmentor

__all__ = ['Hue', 'Brightness', 'BrightnessScale', 'Contrast', 'MeanVarianceNormalize',
           'GaussianBlur', 'Gamma', 'Clip', 'Saturation', 'Lighting', 'MinMaxNormalize']


[docs]class Hue(PhotometricAugmentor):
    """ Randomly change color hue.
    """

[docs]    def __init__(self, range=(0, 180), rgb=True):
        """
        Args:
            range(list or tuple): range from which the applied hue offset is selected
                (maximum range can be [-90,90] for both uint8 and float32)
            rgb (bool): whether input is RGB or BGR.
        """
        super(Hue, self).__init__()
        rgb = bool(rgb)
        self._init(locals())

    def _get_augment_params(self, _):
        return self._rand_range(*self.range)

    def _augment(self, img, hue):
        m = cv2.COLOR_BGR2HSV if not self.rgb else cv2.COLOR_RGB2HSV
        hsv = cv2.cvtColor(img, m)
        # https://docs.opencv.org/3.2.0/de/d25/imgproc_color_conversions.html#color_convert_rgb_hsv
        if hsv.dtype.itemsize == 1:
            # OpenCV uses 0-179 for 8-bit images
            hsv[..., 0] = (hsv[..., 0] + hue) % 180
        else:
            # OpenCV uses 0-360 for floating point images
            hsv[..., 0] = (hsv[..., 0] + 2 * hue) % 360
        m = cv2.COLOR_HSV2BGR if not self.rgb else cv2.COLOR_HSV2RGB
        img = cv2.cvtColor(hsv, m)
        return img


[docs]class Brightness(PhotometricAugmentor):
    """
    Adjust brightness by adding a random number.
    """
[docs]    def __init__(self, delta, clip=True):
        """
        Args:
            delta (float): Randomly add a value within [-delta,delta]
            clip (bool): clip results to [0,255] even when data type is not uint8.
        """
        super(Brightness, self).__init__()
        assert delta > 0
        self._init(locals())

    def _get_augment_params(self, _):
        return self._rand_range(-self.delta, self.delta)

    def _augment(self, img, v):
        old_dtype = img.dtype
        img = img.astype('float32')
        img += v
        if self.clip or old_dtype == np.uint8:
            img = np.clip(img, 0, 255)
        return img.astype(old_dtype)


[docs]class BrightnessScale(PhotometricAugmentor):
    """
    Adjust brightness by scaling by a random factor.
    """
[docs]    def __init__(self, range, clip=True):
        """
        Args:
            range (tuple): Randomly scale the image by a factor in (range[0], range[1])
            clip (bool): clip results to [0,255] even when data type is not uint8.
        """
        super(BrightnessScale, self).__init__()
        self._init(locals())

    def _get_augment_params(self, _):
        return self._rand_range(*self.range)

    def _augment(self, img, v):
        old_dtype = img.dtype
        img = img.astype('float32')
        img *= v
        if self.clip or old_dtype == np.uint8:
            img = np.clip(img, 0, 255)
        return img.astype(old_dtype)


[docs]class Contrast(PhotometricAugmentor):
    """
    Apply ``x = (x - mean) * contrast_factor + mean`` to each channel.
    """

[docs]    def __init__(self, factor_range, rgb=None, clip=True):
        """
        Args:
            factor_range (list or tuple): an interval to randomly sample the `contrast_factor`.
            rgb (bool or None): if None, use the mean per-channel.
            clip (bool): clip to [0, 255] even when data type is not uint8.
        """
        super(Contrast, self).__init__()
        self._init(locals())

    def _get_augment_params(self, _):
        return self._rand_range(*self.factor_range)

    def _augment(self, img, r):
        old_dtype = img.dtype

        if img.ndim == 3:
            if self.rgb is not None:
                m = cv2.COLOR_RGB2GRAY if self.rgb else cv2.COLOR_BGR2GRAY
                grey = cv2.cvtColor(img.astype('float32'), m)
                mean = np.mean(grey)
            else:
                mean = np.mean(img, axis=(0, 1), keepdims=True)
        else:
            mean = np.mean(img)

        img = img * r + mean * (1 - r)
        if self.clip or old_dtype == np.uint8:
            img = np.clip(img, 0, 255)
        return img.astype(old_dtype)


[docs]class MeanVarianceNormalize(PhotometricAugmentor):
    """
    Linearly scales the image to have zero mean and unit norm.
    ``x = (x - mean) / adjusted_stddev``
    where ``adjusted_stddev = max(stddev, 1.0/sqrt(num_pixels * channels))``

    This augmentor always returns float32 images.
    """

[docs]    def __init__(self, all_channel=True):
        """
        Args:
            all_channel (bool): if True, normalize all channels together. else separately.
        """
        self._init(locals())

    def _augment(self, img, _):
        img = img.astype('float32')
        if self.all_channel:
            mean = np.mean(img)
            std = np.std(img)
        else:
            mean = np.mean(img, axis=(0, 1), keepdims=True)
            std = np.std(img, axis=(0, 1), keepdims=True)
        std = np.maximum(std, 1.0 / np.sqrt(np.prod(img.shape)))
        img = (img - mean) / std
        return img


[docs]class GaussianBlur(PhotometricAugmentor):
    """ Gaussian blur the image with random window size"""

[docs]    def __init__(self, size_range=(0, 3), sigma_range=(0, 0), symmetric=True, max_size=None):
        """
        Args:
            size_range (tuple[int]): Gaussian window size would be 2 * size +
                1, where size is randomly sampled from this [low, high) range.
            sigma_range (tuple[float]): min,max of the sigma value. 0 means
                opencv's default.
            symmetric (bool): whether to use the same size & sigma for x and y.
            max_size (int): deprecated
        """
        super(GaussianBlur, self).__init__()
        if not isinstance(size_range, (list, tuple)):
            size_range = (0, size_range)
        assert isinstance(sigma_range, (list, tuple)), sigma_range
        if max_size is not None:
            log_deprecated("GaussianBlur(max_size=)", "Use size_range= instead!", "2020-09-01")
            size_range = (0, max_size)
        self._init(locals())

    def _get_augment_params(self, _):
        size_xy = self.rng.randint(self.size_range[0], self.size_range[1], size=(2,)) * 2 + 1
        sigma_xy = self._rand_range(*self.sigma_range, size=(2,))
        if self.symmetric:
            size_xy[1] = size_xy[0]
            sigma_xy[1] = sigma_xy[0]
        return tuple(size_xy), tuple(sigma_xy)

    def _augment(self, img, prm):
        size, sigma = prm
        return np.reshape(cv2.GaussianBlur(img, size, sigmaX=sigma[0], sigmaY=sigma[1],
                                           borderType=cv2.BORDER_REPLICATE), img.shape)


[docs]class Gamma(PhotometricAugmentor):
    """ Randomly adjust gamma """
[docs]    def __init__(self, range=(-0.5, 0.5)):
        """
        Args:
            range(list or tuple): gamma range
        """
        super(Gamma, self).__init__()
        self._init(locals())

    def _get_augment_params(self, _):
        return self._rand_range(*self.range)

    def _augment(self, img, gamma):
        old_dtype = img.dtype
        lut = ((np.arange(256, dtype='float32') / 255) ** (1. / (1. + gamma)) * 255).astype('uint8')
        img = np.clip(img, 0, 255).astype('uint8')
        ret = cv2.LUT(img, lut).astype(old_dtype)
        if img.ndim == 3 and ret.ndim == 2:
            ret = ret[:, :, np.newaxis]
        return ret


[docs]class Clip(PhotometricAugmentor):
    """ Clip the pixel values """

[docs]    def __init__(self, min=0, max=255):
        """
        Args:
            min, max: the clip range
        """
        self._init(locals())

    def _augment(self, img, _):
        return np.clip(img, self.min, self.max)


[docs]class Saturation(PhotometricAugmentor):
    """ Randomly adjust saturation.
        Follows the implementation in `fb.resnet.torch
        <https://github.com/facebook/fb.resnet.torch/blob/master/datasets/transforms.lua#L218>`__.
    """

[docs]    def __init__(self, alpha=0.4, rgb=True, clip=True):
        """
        Args:
            alpha(float): maximum saturation change.
            rgb (bool): whether input is RGB or BGR.
            clip (bool): clip results to [0,255] even when data type is not uint8.
        """
        super().__init__()
        rgb = bool(rgb)
        assert alpha < 1
        self._init(locals())

    def _get_augment_params(self, _):
        return 1 + self._rand_range(-self.alpha, self.alpha)

    def _augment(self, img, v):
        old_dtype = img.dtype
        m = cv2.COLOR_RGB2GRAY if self.rgb else cv2.COLOR_BGR2GRAY
        grey = cv2.cvtColor(img, m)
        ret = img * v + (grey * (1 - v))[:, :, np.newaxis]
        if self.clip or old_dtype == np.uint8:
            ret = np.clip(ret, 0, 255)
        return ret.astype(old_dtype)


[docs]class Lighting(PhotometricAugmentor):
    """ Lighting noise, as in the paper
        `ImageNet Classification with Deep Convolutional Neural Networks
        <https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf>`_.
        The implementation follows `fb.resnet.torch
        <https://github.com/facebook/fb.resnet.torch/blob/master/datasets/transforms.lua#L184>`__.
    """

[docs]    def __init__(self, std, eigval, eigvec, clip=True):
        """
        Args:
            std (float): maximum standard deviation
            eigval: a vector of (3,). The eigenvalues of 3 channels.
            eigvec: a 3x3 matrix. Each column is one eigen vector.
            clip (bool): clip results to [0,255] even when data type is not uint8.
        """
        super(Lighting, self).__init__()
        eigval = np.asarray(eigval, dtype="float32")
        eigvec = np.asarray(eigvec, dtype="float32")
        assert eigval.shape == (3,)
        assert eigvec.shape == (3, 3)
        self._init(locals())

    def _get_augment_params(self, img):
        assert img.shape[2] == 3
        return (self.rng.randn(3) * self.std).astype("float32")

    def _augment(self, img, v):
        old_dtype = img.dtype
        v = v * self.eigval
        v = v.reshape((3, 1))
        inc = np.dot(self.eigvec, v).reshape((3,))
        img = np.add(img, inc)
        if self.clip or old_dtype == np.uint8:
            img = np.clip(img, 0, 255)
        return img.astype(old_dtype)


[docs]class MinMaxNormalize(PhotometricAugmentor):
    """
    Linearly scales the image to the range [min, max].

    This augmentor always returns float32 images.
    """
[docs]    def __init__(self, min=0, max=255, all_channel=True):
        """
        Args:
            max (float): The new maximum value
            min (float): The new minimum value
            all_channel (bool): if True, normalize all channels together. else separately.
        """
        self._init(locals())

    def _augment(self, img, _):
        img = img.astype('float32')
        if self.all_channel:
            minimum = np.min(img)
            maximum = np.max(img)
        else:
            minimum = np.min(img, axis=(0, 1), keepdims=True)
            maximum = np.max(img, axis=(0, 1), keepdims=True)
        img = (self.max - self.min) * (img - minimum) / (maximum - minimum) + self.min
        return img