Source code for tensorpack.utils.viz

# -*- coding: utf-8 -*-
# File: viz.py
# Credit: zxytim

import numpy as np
import os
import sys

from ..utils.develop import create_dummy_func  # noqa
from .argtools import shape2d
from .fs import mkdir_p

try:
    import cv2
except ImportError:
    pass


__all__ = ['interactive_imshow',
           'stack_patches', 'gen_stack_patches',
           'dump_dataflow_images', 'intensity_to_rgb',
           'draw_boxes']


[docs]def interactive_imshow(img, lclick_cb=None, rclick_cb=None, **kwargs): """ Args: img (np.ndarray): an image (expect BGR) to show. lclick_cb, rclick_cb: a callback ``func(img, x, y)`` for left/right click event. kwargs: can be {key_cb_a: callback_img, key_cb_b: callback_img}, to specify a callback ``func(img)`` for keypress. Some existing keypress event handler: * q: destroy the current window * x: execute ``sys.exit()`` * s: save image to "out.png" """ name = 'tensorpack_viz_window' cv2.imshow(name, img) def mouse_cb(event, x, y, *args): if event == cv2.EVENT_LBUTTONUP and lclick_cb is not None: lclick_cb(img, x, y) elif event == cv2.EVENT_RBUTTONUP and rclick_cb is not None: rclick_cb(img, x, y) cv2.setMouseCallback(name, mouse_cb) key = cv2.waitKey(-1) while key >= 128: key = cv2.waitKey(-1) key = chr(key & 0xff) cb_name = 'key_cb_' + key if cb_name in kwargs: kwargs[cb_name](img) elif key == 'q': cv2.destroyWindow(name) elif key == 'x': sys.exit() elif key == 's': cv2.imwrite('out.png', img) elif key in ['+', '=']: img = cv2.resize(img, None, fx=1.3, fy=1.3, interpolation=cv2.INTER_CUBIC) interactive_imshow(img, lclick_cb, rclick_cb, **kwargs) elif key == '-': img = cv2.resize(img, None, fx=0.7, fy=0.7, interpolation=cv2.INTER_CUBIC) interactive_imshow(img, lclick_cb, rclick_cb, **kwargs)
def _preprocess_patch_list(plist): plist = np.asarray(plist) assert plist.dtype != np.object if plist.ndim == 3: plist = plist[:, :, :, np.newaxis] assert plist.ndim == 4 and plist.shape[3] in [1, 3], plist.shape return plist def _pad_patch_list(plist, bgcolor): if isinstance(bgcolor, int): bgcolor = (bgcolor, bgcolor, bgcolor) def _pad_channel(plist): ret = [] for p in plist: if len(p.shape) == 2: p = p[:, :, np.newaxis] if p.shape[2] == 1: p = np.repeat(p, 3, 2) ret.append(p) return ret plist = _pad_channel(plist) shapes = [x.shape for x in plist] ph = max(s[0] for s in shapes) pw = max(s[1] for s in shapes) ret = np.zeros((len(plist), ph, pw, 3), dtype=plist[0].dtype) ret[:, :, :] = bgcolor for idx, p in enumerate(plist): s = p.shape sh = (ph - s[0]) // 2 sw = (pw - s[1]) // 2 ret[idx, sh:sh + s[0], sw:sw + s[1], :] = p return ret class Canvas(object): def __init__(self, ph, pw, nr_row, nr_col, channel, border, bgcolor): self.ph = ph self.pw = pw self.nr_row = nr_row self.nr_col = nr_col if border is None: border = int(0.05 * min(ph, pw)) self.border = border if isinstance(bgcolor, int): bgchannel = 1 else: bgchannel = 3 self.bgcolor = bgcolor self.channel = max(channel, bgchannel) self.canvas = np.zeros((nr_row * (ph + border) - border, nr_col * (pw + border) - border, self.channel), dtype='uint8') def draw_patches(self, plist): assert self.nr_row * self.nr_col >= len(plist), \ "{}*{} < {}".format(self.nr_row, self.nr_col, len(plist)) if self.channel == 3 and plist.shape[3] == 1: plist = np.repeat(plist, 3, axis=3) cur_row, cur_col = 0, 0 if self.channel == 1: self.canvas.fill(self.bgcolor) else: self.canvas[:, :, :] = self.bgcolor for patch in plist: r0 = cur_row * (self.ph + self.border) c0 = cur_col * (self.pw + self.border) self.canvas[r0:r0 + self.ph, c0:c0 + self.pw] = patch cur_col += 1 if cur_col == self.nr_col: cur_col = 0 cur_row += 1 def get_patchid_from_coord(self, x, y): x = x // (self.pw + self.border) y = y // (self.pw + self.border) idx = y * self.nr_col + x return idx
[docs]def stack_patches( patch_list, nr_row, nr_col, border=None, pad=False, bgcolor=255, viz=False, lclick_cb=None): """ Stacked patches into grid, to produce visualizations like the following: .. image:: https://github.com/tensorpack/tensorpack/raw/master/examples/GAN/demo/BEGAN-CelebA-samples.jpg Args: patch_list(list[ndarray] or ndarray): NHW or NHWC images in [0,255]. nr_row(int), nr_col(int): rows and cols of the grid. ``nr_col * nr_row`` must be no less than ``len(patch_list)``. border(int): border length between images. Defaults to ``0.05 * min(patch_width, patch_height)``. pad (boolean): when `patch_list` is a list, pad all patches to the maximum height and width. This option allows stacking patches of different shapes together. bgcolor(int or 3-tuple): background color in [0, 255]. Either an int or a BGR tuple. viz(bool): whether to use :func:`interactive_imshow` to visualize the results. lclick_cb: A callback function ``f(patch, patch index in patch_list)`` to get called when a patch get clicked in imshow. Returns: np.ndarray: the stacked image. """ if pad: patch_list = _pad_patch_list(patch_list, bgcolor) patch_list = _preprocess_patch_list(patch_list) if lclick_cb is not None: viz = True ph, pw = patch_list.shape[1:3] canvas = Canvas(ph, pw, nr_row, nr_col, patch_list.shape[-1], border, bgcolor) if lclick_cb is not None: def lclick_callback(img, x, y): idx = canvas.get_patchid_from_coord(x, y) lclick_cb(patch_list[idx], idx) else: lclick_callback = None canvas.draw_patches(patch_list) if viz: interactive_imshow(canvas.canvas, lclick_cb=lclick_callback) return canvas.canvas
[docs]def gen_stack_patches(patch_list, nr_row=None, nr_col=None, border=None, max_width=1000, max_height=1000, bgcolor=255, viz=False, lclick_cb=None): """ Similar to :func:`stack_patches` but with a generator interface. It takes a much-longer list and yields stacked results one by one. For example, if ``patch_list`` contains 1000 images and ``nr_row==nr_col==10``, this generator yields 10 stacked images. Args: nr_row(int), nr_col(int): rows and cols of each result. max_width(int), max_height(int): Maximum allowed size of the stacked image. If ``nr_row/nr_col`` are None, this number will be used to infer the rows and cols. Otherwise the option is ignored. patch_list, border, viz, lclick_cb: same as in :func:`stack_patches`. Yields: np.ndarray: the stacked image. """ # setup parameters patch_list = _preprocess_patch_list(patch_list) if lclick_cb is not None: viz = True ph, pw = patch_list.shape[1:3] if border is None: border = int(0.05 * min(ph, pw)) if nr_row is None: nr_row = int(max_height / (ph + border)) if nr_col is None: nr_col = int(max_width / (pw + border)) canvas = Canvas(ph, pw, nr_row, nr_col, patch_list.shape[-1], border, bgcolor) nr_patch = nr_row * nr_col start = 0 if lclick_cb is not None: def lclick_callback(img, x, y): idx = canvas.get_patchid_from_coord(x, y) idx = idx + start if idx < end: lclick_cb(patch_list[idx], idx) else: lclick_callback = None while True: end = start + nr_patch cur_list = patch_list[start:end] if not len(cur_list): return canvas.draw_patches(cur_list) if viz: interactive_imshow(canvas.canvas, lclick_cb=lclick_callback) yield canvas.canvas start = end
[docs]def dump_dataflow_images(df, index=0, batched=True, number=1000, output_dir=None, scale=1, resize=None, viz=None, flipRGB=False): """ Dump or visualize images of a :class:`DataFlow`. Args: df (DataFlow): the DataFlow. index (int): the index of the image component. batched (bool): whether the component contains batched images (NHW or NHWC) or not (HW or HWC). number (int): how many datapoint to take from the DataFlow. output_dir (str): output directory to save images, default to not save. scale (float): scale the value, usually either 1 or 255. resize (tuple or None): tuple of (h, w) to resize the images to. viz (tuple or None): tuple of (h, w) determining the grid size to use with :func:`gen_stack_patches` for visualization. No visualization will happen by default. flipRGB (bool): apply a RGB<->BGR conversion or not. """ if output_dir: mkdir_p(output_dir) if viz is not None: viz = shape2d(viz) vizsize = viz[0] * viz[1] if resize is not None: resize = tuple(shape2d(resize)) vizlist = [] df.reset_state() cnt = 0 while True: for dp in df: if not batched: imgbatch = [dp[index]] else: imgbatch = dp[index] for img in imgbatch: cnt += 1 if cnt == number: return if scale != 1: img = img * scale if resize is not None: img = cv2.resize(img, resize) if flipRGB: img = img[:, :, ::-1] if output_dir: fname = os.path.join(output_dir, '{:03d}.jpg'.format(cnt)) cv2.imwrite(fname, img) if viz is not None: vizlist.append(img) if viz is not None and len(vizlist) >= vizsize: stack_patches( vizlist[:vizsize], nr_row=viz[0], nr_col=viz[1], viz=True) vizlist = vizlist[vizsize:]
[docs]def intensity_to_rgb(intensity, cmap='cubehelix', normalize=False): """ Convert a 1-channel matrix of intensities to an RGB image employing a colormap. This function requires matplotlib. See `matplotlib colormaps <http://matplotlib.org/examples/color/colormaps_reference.html>`_ for a list of available colormap. Args: intensity (np.ndarray): array of intensities such as saliency. cmap (str): name of the colormap to use. normalize (bool): if True, will normalize the intensity so that it has minimum 0 and maximum 1. Returns: np.ndarray: an RGB float32 image in range [0, 255], a colored heatmap. """ assert intensity.ndim == 2, intensity.shape intensity = intensity.astype("float") if normalize: intensity -= intensity.min() intensity /= intensity.max() cmap = plt.get_cmap(cmap) intensity = cmap(intensity)[..., :3] return intensity.astype('float32') * 255.0
def draw_text(img, pos, text, color, font_scale=0.4): """ Draw text on an image. Args: pos (tuple): x, y; the position of the text text (str): font_scale (float): color (tuple): a 3-tuple BGR color in [0, 255] """ img = img.astype(np.uint8) x0, y0 = int(pos[0]), int(pos[1]) # Compute text size. font = cv2.FONT_HERSHEY_SIMPLEX ((text_w, text_h), _) = cv2.getTextSize(text, font, font_scale, 1) # Place text background. if x0 + text_w > img.shape[1]: x0 = img.shape[1] - text_w if y0 - int(1.15 * text_h) < 0: y0 = int(1.15 * text_h) back_topleft = x0, y0 - int(1.3 * text_h) back_bottomright = x0 + text_w, y0 cv2.rectangle(img, back_topleft, back_bottomright, color, -1) # Show text. text_bottomleft = x0, y0 - int(0.25 * text_h) cv2.putText(img, text, text_bottomleft, font, font_scale, (222, 222, 222), lineType=cv2.LINE_AA) return img
[docs]def draw_boxes(im, boxes, labels=None, color=None): """ Args: im (np.ndarray): a BGR image in range [0,255]. It will not be modified. boxes (np.ndarray): a numpy array of shape Nx4 where each row is [x1, y1, x2, y2]. labels: (list[str] or None) color: a 3-tuple BGR color (in range [0, 255]) Returns: np.ndarray: a new image. """ boxes = np.asarray(boxes, dtype='int32') if labels is not None: assert len(labels) == len(boxes), "{} != {}".format(len(labels), len(boxes)) areas = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) sorted_inds = np.argsort(-areas) # draw large ones first assert areas.min() > 0, areas.min() # allow equal, because we are not very strict about rounding error here assert boxes[:, 0].min() >= 0 and boxes[:, 1].min() >= 0 \ and boxes[:, 2].max() <= im.shape[1] and boxes[:, 3].max() <= im.shape[0], \ "Image shape: {}\n Boxes:\n{}".format(str(im.shape), str(boxes)) im = im.copy() if color is None: color = (15, 128, 15) if im.ndim == 2 or (im.ndim == 3 and im.shape[2] == 1): im = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) for i in sorted_inds: box = boxes[i, :] if labels is not None: im = draw_text(im, (box[0], box[1]), labels[i], color=color) cv2.rectangle(im, (box[0], box[1]), (box[2], box[3]), color=color, thickness=1) return im
try: import matplotlib.pyplot as plt except (ImportError, RuntimeError): intensity_to_rgb = create_dummy_func('intensity_to_rgb', 'matplotlib') # noqa if __name__ == '__main__': if False: imglist = [] for i in range(100): fname = "{:03d}.png".format(i) imglist.append(cv2.imread(fname)) for idx, patch in enumerate(gen_stack_patches( imglist, max_width=500, max_height=200)): of = "patch{:02d}.png".format(idx) cv2.imwrite(of, patch) if False: imglist = [] img = cv2.imread('out.png') img2 = cv2.resize(img, (300, 300)) viz = stack_patches([img, img2], 1, 2, pad=True, viz=True) if False: img = cv2.imread('cat.jpg') boxes = np.asarray([ [10, 30, 200, 100], [20, 80, 250, 250] ]) img = draw_boxes(img, boxes, ['asdfasdf', '11111111111111']) interactive_imshow(img)