Source code for tensorpack.dataflow.dataset.ilsvrc

# -*- coding: utf-8 -*-
# File: ilsvrc.py

import numpy as np
import os
import tarfile
import tqdm
from pathlib import Path

from ...utils import logger
from ...utils.fs import download, get_dataset_path, mkdir_p
from ...utils.loadcaffe import get_caffe_pb
from ...utils.timer import timed_operation
from ..base import RNGDataFlow

__all__ = ['ILSVRCMeta', 'ILSVRC12', 'ILSVRC12Files', 'TinyImageNet']

CAFFE_ILSVRC12_URL = ("http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz", 17858008)


[docs]class ILSVRCMeta(object):
    """
    Provide methods to access metadata for :class:`ILSVRC12` dataset.
    """

    def __init__(self, dir=None):
        if dir is None:
            dir = get_dataset_path('ilsvrc_metadata')
        self.dir = os.path.expanduser(dir)
        mkdir_p(self.dir)
        f = os.path.join(self.dir, 'synsets.txt')
        if not os.path.isfile(f):
            self._download_caffe_meta()
        self.caffepb = None

[docs]    def get_synset_words_1000(self):
        """
        Returns:
            dict: {cls_number: cls_name}
        """
        fname = os.path.join(self.dir, 'synset_words.txt')
        assert os.path.isfile(fname), fname
        lines = [x.strip() for x in open(fname).readlines()]
        return dict(enumerate(lines))

[docs]    def get_synset_1000(self):
        """
        Returns:
            dict: {cls_number: synset_id}
        """
        fname = os.path.join(self.dir, 'synsets.txt')
        assert os.path.isfile(fname)
        lines = [x.strip() for x in open(fname).readlines()]
        return dict(enumerate(lines))

    def _download_caffe_meta(self):
        fpath = download(CAFFE_ILSVRC12_URL[0], self.dir, expect_size=CAFFE_ILSVRC12_URL[1])
        tarfile.open(fpath, 'r:gz').extractall(self.dir)

[docs]    def get_image_list(self, name, dir_structure='original'):
        """
        Args:
            name (str): 'train' or 'val' or 'test'
            dir_structure (str): same as in :meth:`ILSVRC12.__init__()`.
        Returns:
            list: list of (image filename, label)
        """
        assert name in ['train', 'val', 'test']
        assert dir_structure in ['original', 'train']
        add_label_to_fname = (name != 'train' and dir_structure != 'original')
        if add_label_to_fname:
            synset = self.get_synset_1000()

        fname = os.path.join(self.dir, name + '.txt')
        assert os.path.isfile(fname), fname
        with open(fname) as f:
            ret = []
            for line in f.readlines():
                name, cls = line.strip().split()
                cls = int(cls)

                if add_label_to_fname:
                    name = os.path.join(synset[cls], name)

                ret.append((name.strip(), cls))
        assert len(ret), fname
        return ret

[docs]    def get_per_pixel_mean(self, size=None):
        """
        Args:
            size (tuple): image size in (h, w). Defaults to (256, 256).
        Returns:
            np.ndarray: per-pixel mean of shape (h, w, 3 (BGR)) in range [0, 255].
        """
        if self.caffepb is None:
            self.caffepb = get_caffe_pb()
        obj = self.caffepb.BlobProto()

        mean_file = os.path.join(self.dir, 'imagenet_mean.binaryproto')
        with open(mean_file, 'rb') as f:
            obj.ParseFromString(f.read())
        arr = np.array(obj.data).reshape((3, 256, 256)).astype('float32')
        arr = np.transpose(arr, [1, 2, 0])
        if size is not None:
            arr = cv2.resize(arr, size[::-1])
        return arr

[docs]    @staticmethod
    def guess_dir_structure(dir):
        """
        Return the directory structure of "dir".

        Args:
            dir(str): something like '/path/to/imagenet/val'

        Returns:
            either 'train' or 'original'
        """
        subdir = os.listdir(dir)[0]
        # find a subdir starting with 'n'
        if subdir.startswith('n') and \
                os.path.isdir(os.path.join(dir, subdir)):
            dir_structure = 'train'
        else:
            dir_structure = 'original'
        logger.info(
            "[ILSVRC12] Assuming directory {} has '{}' structure.".format(
                dir, dir_structure))
        return dir_structure


[docs]class ILSVRC12Files(RNGDataFlow):
    """
    Same as :class:`ILSVRC12`, but produces filenames of the images instead of nparrays.
    This could be useful when ``cv2.imread`` is a bottleneck and you want to
    decode it in smarter ways (e.g. in parallel).
    """
[docs]    def __init__(self, dir, name, meta_dir=None,
                 shuffle=None, dir_structure=None):
        """
        Same as in :class:`ILSVRC12`.
        """
        assert name in ['train', 'test', 'val'], name
        dir = os.path.expanduser(dir)
        assert os.path.isdir(dir), dir
        self.full_dir = os.path.join(dir, name)
        self.name = name
        assert os.path.isdir(self.full_dir), self.full_dir
        assert meta_dir is None or os.path.isdir(meta_dir), meta_dir
        if shuffle is None:
            shuffle = name == 'train'
        self.shuffle = shuffle

        if name == 'train':
            dir_structure = 'train'
        if dir_structure is None:
            dir_structure = ILSVRCMeta.guess_dir_structure(self.full_dir)

        meta = ILSVRCMeta(meta_dir)
        self.imglist = meta.get_image_list(name, dir_structure)

        for fname, _ in self.imglist[:10]:
            fname = os.path.join(self.full_dir, fname)
            assert os.path.isfile(fname), fname

    def __len__(self):
        return len(self.imglist)

    def __iter__(self):
        idxs = np.arange(len(self.imglist))
        if self.shuffle:
            self.rng.shuffle(idxs)
        for k in idxs:
            fname, label = self.imglist[k]
            fname = os.path.join(self.full_dir, fname)
            yield [fname, label]


[docs]class ILSVRC12(ILSVRC12Files):
    """
    The ILSVRC12 classification dataset, aka the commonly used 1000 classes ImageNet subset.
    This dataflow produces uint8 images of shape [h, w, 3(BGR)], and a label between [0, 999].
    The label map follows the synsets.txt file in
    http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz,
    which can also be queried using :class:`ILSVRCMeta`.
    """
[docs]    def __init__(self, dir, name, meta_dir=None,
                 shuffle=None, dir_structure=None):
        """
        Args:
            dir (str): A directory containing a subdir named ``name``,
                containing the images in a structure described below.
            name (str): One of 'train' or 'val' or 'test'.
            shuffle (bool): shuffle the dataset. Defaults to True if name=='train'.
            dir_structure (str): One of 'original' or 'train'.
                The directory structure for the 'val' directory.
                'original' means the original decompressed directory, which only has list of image files (as below).
                If set to 'train', it expects the same two-level directory structure similar to 'dir/train/'.
                By default, it tries to automatically detect the structure.
                You probably do not need to care about this option because 'original' is what people usually have.

        Example:

        When `dir_structure=='original'`, `dir` should have the following structure:

        .. code-block:: none

            dir/
              train/
                n02134418/
                  n02134418_198.JPEG
                  ...
                ...
              val/
                ILSVRC2012_val_00000001.JPEG
                ...
              test/
                ILSVRC2012_test_00000001.JPEG
                ...

        With the downloaded ILSVRC12_img_*.tar, you can use the following
        command to build the above structure:

        .. code-block:: none

            mkdir val && tar xvf ILSVRC12_img_val.tar -C val
            mkdir test && tar xvf ILSVRC12_img_test.tar -C test
            mkdir train && tar xvf ILSVRC12_img_train.tar -C train && cd train
            find -type f -name '*.tar' | parallel -P 10 'echo {} && mkdir -p {/.} && tar xf {} -C {/.}'

        When `dir_structure=='train'`, `dir` should have the following structure:

        .. code-block:: none

            dir/
              train/
                n02134418/
                  n02134418_198.JPEG
                  ...
                ...
              val/
                n01440764/
                  ILSVRC2012_val_00000293.JPEG
                  ...
                ...
              test/
                ILSVRC2012_test_00000001.JPEG
                ...
        """
        super(ILSVRC12, self).__init__(
            dir, name, meta_dir, shuffle, dir_structure)

    """
    There are some CMYK / png images, but cv2 seems robust to them.
    https://github.com/tensorflow/models/blob/c0cd713f59cfe44fa049b3120c417cc4079c17e3/research/inception/inception/data/build_imagenet_data.py#L264-L300
    """
    def __iter__(self):
        for fname, label in super(ILSVRC12, self).__iter__():
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
            assert im is not None, fname
            yield [im, label]

[docs]    @staticmethod
    def get_training_bbox(bbox_dir, imglist):
        import xml.etree.ElementTree as ET
        ret = []

        def parse_bbox(fname):
            root = ET.parse(fname).getroot()
            size = root.find('size').getchildren()
            size = map(int, [size[0].text, size[1].text])

            box = root.find('object').find('bndbox').getchildren()
            box = [float(x.text) for x in box]
            return np.asarray(box, dtype='float32')

        with timed_operation('Loading Bounding Boxes ...'):
            cnt = 0
            for k in tqdm.trange(len(imglist)):
                fname = imglist[k][0]
                fname = fname[:-4] + 'xml'
                fname = os.path.join(bbox_dir, fname)
                try:
                    ret.append(parse_bbox(fname))
                    cnt += 1
                except Exception:
                    ret.append(None)
            logger.info("{}/{} images have bounding box.".format(cnt, len(imglist)))
        return ret


[docs]class TinyImageNet(RNGDataFlow):
    """
    The TinyImageNet classification dataset, with 200 classes and 500 images
    per class. See https://tiny-imagenet.herokuapp.com/.

    It produces [image, label] where image is a 64x64x3(BGR) image, label is an
    integer in [0, 200).
    """
[docs]    def __init__(self, dir, name, shuffle=None):
        """
        Args:
            dir (str): a directory
            name (str): one of 'train' or 'val'
            shuffle (bool): shuffle the dataset.
                Defaults to True if name=='train'.
        """
        assert name in ['train', 'val'], name
        dir = Path(os.path.expanduser(dir))
        assert os.path.isdir(dir), dir
        self.full_dir = dir / name
        if shuffle is None:
            shuffle = name == 'train'
        self.shuffle = shuffle

        with open(dir / "wnids.txt") as f:
            wnids = [x.strip() for x in f.readlines()]
        cls_to_id = {name: id for id, name in enumerate(wnids)}
        assert len(cls_to_id) == 200

        self.imglist = []
        if name == 'train':
            for clsid, cls in enumerate(wnids):
                cls_dir = self.full_dir / cls / "images"
                for img in cls_dir.iterdir():
                    self.imglist.append((str(img), clsid))
        else:
            with open(self.full_dir / "val_annotations.txt") as f:
                for line in f:
                    line = line.strip().split()
                    img, cls = line[0], line[1]
                    img = self.full_dir / "images" / img
                    clsid = cls_to_id[cls]
                    self.imglist.append((str(img), clsid))

    def __len__(self):
        return len(self.imglist)

    def __iter__(self):
        idxs = np.arange(len(self.imglist))
        if self.shuffle:
            self.rng.shuffle(idxs)
        for k in idxs:
            fname, label = self.imglist[k]
            im = cv2.imread(fname, cv2.IMREAD_COLOR)
            assert im is not None, fname
            yield [im, label]


try:
    import cv2
except ImportError:
    from ...utils.develop import create_dummy_class
    ILSVRC12 = create_dummy_class('ILSVRC12', 'cv2')  # noqa
    TinyImageNet = create_dummy_class('TinyImageNet', 'cv2')  # noqa

if __name__ == '__main__':
    meta = ILSVRCMeta()
    # print(meta.get_synset_words_1000())

    ds = TinyImageNet('~/data/tiny-imagenet-200', 'val', shuffle=False)
    ds.reset_state()

    for _ in ds:
        from IPython import embed
        embed()
        break