Source code for tensorpack.dataflow.imgaug.imgproc

# -*- coding: utf-8 -*-
# File: imgproc.py


import numpy as np
import cv2

from ...utils.develop import log_deprecated
from .base import PhotometricAugmentor

__all__ = ['Hue', 'Brightness', 'BrightnessScale', 'Contrast', 'MeanVarianceNormalize',
           'GaussianBlur', 'Gamma', 'Clip', 'Saturation', 'Lighting', 'MinMaxNormalize']


[docs]class Hue(PhotometricAugmentor): """ Randomly change color hue. """
[docs] def __init__(self, range=(0, 180), rgb=True): """ Args: range(list or tuple): range from which the applied hue offset is selected (maximum range can be [-90,90] for both uint8 and float32) rgb (bool): whether input is RGB or BGR. """ super(Hue, self).__init__() rgb = bool(rgb) self._init(locals())
def _get_augment_params(self, _): return self._rand_range(*self.range) def _augment(self, img, hue): m = cv2.COLOR_BGR2HSV if not self.rgb else cv2.COLOR_RGB2HSV hsv = cv2.cvtColor(img, m) # https://docs.opencv.org/3.2.0/de/d25/imgproc_color_conversions.html#color_convert_rgb_hsv if hsv.dtype.itemsize == 1: # OpenCV uses 0-179 for 8-bit images hsv[..., 0] = (hsv[..., 0] + hue) % 180 else: # OpenCV uses 0-360 for floating point images hsv[..., 0] = (hsv[..., 0] + 2 * hue) % 360 m = cv2.COLOR_HSV2BGR if not self.rgb else cv2.COLOR_HSV2RGB img = cv2.cvtColor(hsv, m) return img
[docs]class Brightness(PhotometricAugmentor): """ Adjust brightness by adding a random number. """
[docs] def __init__(self, delta, clip=True): """ Args: delta (float): Randomly add a value within [-delta,delta] clip (bool): clip results to [0,255] even when data type is not uint8. """ super(Brightness, self).__init__() assert delta > 0 self._init(locals())
def _get_augment_params(self, _): return self._rand_range(-self.delta, self.delta) def _augment(self, img, v): old_dtype = img.dtype img = img.astype('float32') img += v if self.clip or old_dtype == np.uint8: img = np.clip(img, 0, 255) return img.astype(old_dtype)
[docs]class BrightnessScale(PhotometricAugmentor): """ Adjust brightness by scaling by a random factor. """
[docs] def __init__(self, range, clip=True): """ Args: range (tuple): Randomly scale the image by a factor in (range[0], range[1]) clip (bool): clip results to [0,255] even when data type is not uint8. """ super(BrightnessScale, self).__init__() self._init(locals())
def _get_augment_params(self, _): return self._rand_range(*self.range) def _augment(self, img, v): old_dtype = img.dtype img = img.astype('float32') img *= v if self.clip or old_dtype == np.uint8: img = np.clip(img, 0, 255) return img.astype(old_dtype)
[docs]class Contrast(PhotometricAugmentor): """ Apply ``x = (x - mean) * contrast_factor + mean`` to each channel. """
[docs] def __init__(self, factor_range, rgb=None, clip=True): """ Args: factor_range (list or tuple): an interval to randomly sample the `contrast_factor`. rgb (bool or None): if None, use the mean per-channel. clip (bool): clip to [0, 255] even when data type is not uint8. """ super(Contrast, self).__init__() self._init(locals())
def _get_augment_params(self, _): return self._rand_range(*self.factor_range) def _augment(self, img, r): old_dtype = img.dtype if img.ndim == 3: if self.rgb is not None: m = cv2.COLOR_RGB2GRAY if self.rgb else cv2.COLOR_BGR2GRAY grey = cv2.cvtColor(img.astype('float32'), m) mean = np.mean(grey) else: mean = np.mean(img, axis=(0, 1), keepdims=True) else: mean = np.mean(img) img = img * r + mean * (1 - r) if self.clip or old_dtype == np.uint8: img = np.clip(img, 0, 255) return img.astype(old_dtype)
[docs]class MeanVarianceNormalize(PhotometricAugmentor): """ Linearly scales the image to have zero mean and unit norm. ``x = (x - mean) / adjusted_stddev`` where ``adjusted_stddev = max(stddev, 1.0/sqrt(num_pixels * channels))`` This augmentor always returns float32 images. """
[docs] def __init__(self, all_channel=True): """ Args: all_channel (bool): if True, normalize all channels together. else separately. """ self._init(locals())
def _augment(self, img, _): img = img.astype('float32') if self.all_channel: mean = np.mean(img) std = np.std(img) else: mean = np.mean(img, axis=(0, 1), keepdims=True) std = np.std(img, axis=(0, 1), keepdims=True) std = np.maximum(std, 1.0 / np.sqrt(np.prod(img.shape))) img = (img - mean) / std return img
[docs]class GaussianBlur(PhotometricAugmentor): """ Gaussian blur the image with random window size"""
[docs] def __init__(self, size_range=(0, 3), sigma_range=(0, 0), symmetric=True, max_size=None): """ Args: size_range (tuple[int]): Gaussian window size would be 2 * size + 1, where size is randomly sampled from this [low, high) range. sigma_range (tuple[float]): min,max of the sigma value. 0 means opencv's default. symmetric (bool): whether to use the same size & sigma for x and y. max_size (int): deprecated """ super(GaussianBlur, self).__init__() if not isinstance(size_range, (list, tuple)): size_range = (0, size_range) assert isinstance(sigma_range, (list, tuple)), sigma_range if max_size is not None: log_deprecated("GaussianBlur(max_size=)", "Use size_range= instead!", "2020-09-01") size_range = (0, max_size) self._init(locals())
def _get_augment_params(self, _): size_xy = self.rng.randint(self.size_range[0], self.size_range[1], size=(2,)) * 2 + 1 sigma_xy = self._rand_range(*self.sigma_range, size=(2,)) if self.symmetric: size_xy[1] = size_xy[0] sigma_xy[1] = sigma_xy[0] return tuple(size_xy), tuple(sigma_xy) def _augment(self, img, prm): size, sigma = prm return np.reshape(cv2.GaussianBlur(img, size, sigmaX=sigma[0], sigmaY=sigma[1], borderType=cv2.BORDER_REPLICATE), img.shape)
[docs]class Gamma(PhotometricAugmentor): """ Randomly adjust gamma """
[docs] def __init__(self, range=(-0.5, 0.5)): """ Args: range(list or tuple): gamma range """ super(Gamma, self).__init__() self._init(locals())
def _get_augment_params(self, _): return self._rand_range(*self.range) def _augment(self, img, gamma): old_dtype = img.dtype lut = ((np.arange(256, dtype='float32') / 255) ** (1. / (1. + gamma)) * 255).astype('uint8') img = np.clip(img, 0, 255).astype('uint8') ret = cv2.LUT(img, lut).astype(old_dtype) if img.ndim == 3 and ret.ndim == 2: ret = ret[:, :, np.newaxis] return ret
[docs]class Clip(PhotometricAugmentor): """ Clip the pixel values """
[docs] def __init__(self, min=0, max=255): """ Args: min, max: the clip range """ self._init(locals())
def _augment(self, img, _): return np.clip(img, self.min, self.max)
[docs]class Saturation(PhotometricAugmentor): """ Randomly adjust saturation. Follows the implementation in `fb.resnet.torch <https://github.com/facebook/fb.resnet.torch/blob/master/datasets/transforms.lua#L218>`__. """
[docs] def __init__(self, alpha=0.4, rgb=True, clip=True): """ Args: alpha(float): maximum saturation change. rgb (bool): whether input is RGB or BGR. clip (bool): clip results to [0,255] even when data type is not uint8. """ super().__init__() rgb = bool(rgb) assert alpha < 1 self._init(locals())
def _get_augment_params(self, _): return 1 + self._rand_range(-self.alpha, self.alpha) def _augment(self, img, v): old_dtype = img.dtype m = cv2.COLOR_RGB2GRAY if self.rgb else cv2.COLOR_BGR2GRAY grey = cv2.cvtColor(img, m) ret = img * v + (grey * (1 - v))[:, :, np.newaxis] if self.clip or old_dtype == np.uint8: ret = np.clip(ret, 0, 255) return ret.astype(old_dtype)
[docs]class Lighting(PhotometricAugmentor): """ Lighting noise, as in the paper `ImageNet Classification with Deep Convolutional Neural Networks <https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf>`_. The implementation follows `fb.resnet.torch <https://github.com/facebook/fb.resnet.torch/blob/master/datasets/transforms.lua#L184>`__. """
[docs] def __init__(self, std, eigval, eigvec, clip=True): """ Args: std (float): maximum standard deviation eigval: a vector of (3,). The eigenvalues of 3 channels. eigvec: a 3x3 matrix. Each column is one eigen vector. clip (bool): clip results to [0,255] even when data type is not uint8. """ super(Lighting, self).__init__() eigval = np.asarray(eigval, dtype="float32") eigvec = np.asarray(eigvec, dtype="float32") assert eigval.shape == (3,) assert eigvec.shape == (3, 3) self._init(locals())
def _get_augment_params(self, img): assert img.shape[2] == 3 return (self.rng.randn(3) * self.std).astype("float32") def _augment(self, img, v): old_dtype = img.dtype v = v * self.eigval v = v.reshape((3, 1)) inc = np.dot(self.eigvec, v).reshape((3,)) img = np.add(img, inc) if self.clip or old_dtype == np.uint8: img = np.clip(img, 0, 255) return img.astype(old_dtype)
[docs]class MinMaxNormalize(PhotometricAugmentor): """ Linearly scales the image to the range [min, max]. This augmentor always returns float32 images. """
[docs] def __init__(self, min=0, max=255, all_channel=True): """ Args: max (float): The new maximum value min (float): The new minimum value all_channel (bool): if True, normalize all channels together. else separately. """ self._init(locals())
def _augment(self, img, _): img = img.astype('float32') if self.all_channel: minimum = np.min(img) maximum = np.max(img) else: minimum = np.min(img, axis=(0, 1), keepdims=True) maximum = np.max(img, axis=(0, 1), keepdims=True) img = (self.max - self.min) * (img - minimum) / (maximum - minimum) + self.min return img