# -*- coding: utf-8 -*-
# File: ilsvrc.py
import numpy as np
import os
import tarfile
import tqdm
from pathlib import Path
from ...utils import logger
from ...utils.fs import download, get_dataset_path, mkdir_p
from ...utils.loadcaffe import get_caffe_pb
from ...utils.timer import timed_operation
from ..base import RNGDataFlow
__all__ = ['ILSVRCMeta', 'ILSVRC12', 'ILSVRC12Files', 'TinyImageNet']
CAFFE_ILSVRC12_URL = ("http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz", 17858008)
[docs]class ILSVRC12Files(RNGDataFlow):
"""
Same as :class:`ILSVRC12`, but produces filenames of the images instead of nparrays.
This could be useful when ``cv2.imread`` is a bottleneck and you want to
decode it in smarter ways (e.g. in parallel).
"""
[docs] def __init__(self, dir, name, meta_dir=None,
shuffle=None, dir_structure=None):
"""
Same as in :class:`ILSVRC12`.
"""
assert name in ['train', 'test', 'val'], name
dir = os.path.expanduser(dir)
assert os.path.isdir(dir), dir
self.full_dir = os.path.join(dir, name)
self.name = name
assert os.path.isdir(self.full_dir), self.full_dir
assert meta_dir is None or os.path.isdir(meta_dir), meta_dir
if shuffle is None:
shuffle = name == 'train'
self.shuffle = shuffle
if name == 'train':
dir_structure = 'train'
if dir_structure is None:
dir_structure = ILSVRCMeta.guess_dir_structure(self.full_dir)
meta = ILSVRCMeta(meta_dir)
self.imglist = meta.get_image_list(name, dir_structure)
for fname, _ in self.imglist[:10]:
fname = os.path.join(self.full_dir, fname)
assert os.path.isfile(fname), fname
def __len__(self):
return len(self.imglist)
def __iter__(self):
idxs = np.arange(len(self.imglist))
if self.shuffle:
self.rng.shuffle(idxs)
for k in idxs:
fname, label = self.imglist[k]
fname = os.path.join(self.full_dir, fname)
yield [fname, label]
[docs]class ILSVRC12(ILSVRC12Files):
"""
The ILSVRC12 classification dataset, aka the commonly used 1000 classes ImageNet subset.
This dataflow produces uint8 images of shape [h, w, 3(BGR)], and a label between [0, 999].
The label map follows the synsets.txt file in
http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz,
which can also be queried using :class:`ILSVRCMeta`.
"""
[docs] def __init__(self, dir, name, meta_dir=None,
shuffle=None, dir_structure=None):
"""
Args:
dir (str): A directory containing a subdir named ``name``,
containing the images in a structure described below.
name (str): One of 'train' or 'val' or 'test'.
shuffle (bool): shuffle the dataset. Defaults to True if name=='train'.
dir_structure (str): One of 'original' or 'train'.
The directory structure for the 'val' directory.
'original' means the original decompressed directory, which only has list of image files (as below).
If set to 'train', it expects the same two-level directory structure similar to 'dir/train/'.
By default, it tries to automatically detect the structure.
You probably do not need to care about this option because 'original' is what people usually have.
Example:
When `dir_structure=='original'`, `dir` should have the following structure:
.. code-block:: none
dir/
train/
n02134418/
n02134418_198.JPEG
...
...
val/
ILSVRC2012_val_00000001.JPEG
...
test/
ILSVRC2012_test_00000001.JPEG
...
With the downloaded ILSVRC12_img_*.tar, you can use the following
command to build the above structure:
.. code-block:: none
mkdir val && tar xvf ILSVRC12_img_val.tar -C val
mkdir test && tar xvf ILSVRC12_img_test.tar -C test
mkdir train && tar xvf ILSVRC12_img_train.tar -C train && cd train
find -type f -name '*.tar' | parallel -P 10 'echo {} && mkdir -p {/.} && tar xf {} -C {/.}'
When `dir_structure=='train'`, `dir` should have the following structure:
.. code-block:: none
dir/
train/
n02134418/
n02134418_198.JPEG
...
...
val/
n01440764/
ILSVRC2012_val_00000293.JPEG
...
...
test/
ILSVRC2012_test_00000001.JPEG
...
"""
super(ILSVRC12, self).__init__(
dir, name, meta_dir, shuffle, dir_structure)
"""
There are some CMYK / png images, but cv2 seems robust to them.
https://github.com/tensorflow/models/blob/c0cd713f59cfe44fa049b3120c417cc4079c17e3/research/inception/inception/data/build_imagenet_data.py#L264-L300
"""
def __iter__(self):
for fname, label in super(ILSVRC12, self).__iter__():
im = cv2.imread(fname, cv2.IMREAD_COLOR)
assert im is not None, fname
yield [im, label]
[docs] @staticmethod
def get_training_bbox(bbox_dir, imglist):
import xml.etree.ElementTree as ET
ret = []
def parse_bbox(fname):
root = ET.parse(fname).getroot()
size = root.find('size').getchildren()
size = map(int, [size[0].text, size[1].text])
box = root.find('object').find('bndbox').getchildren()
box = [float(x.text) for x in box]
return np.asarray(box, dtype='float32')
with timed_operation('Loading Bounding Boxes ...'):
cnt = 0
for k in tqdm.trange(len(imglist)):
fname = imglist[k][0]
fname = fname[:-4] + 'xml'
fname = os.path.join(bbox_dir, fname)
try:
ret.append(parse_bbox(fname))
cnt += 1
except Exception:
ret.append(None)
logger.info("{}/{} images have bounding box.".format(cnt, len(imglist)))
return ret
[docs]class TinyImageNet(RNGDataFlow):
"""
The TinyImageNet classification dataset, with 200 classes and 500 images
per class. See https://tiny-imagenet.herokuapp.com/.
It produces [image, label] where image is a 64x64x3(BGR) image, label is an
integer in [0, 200).
"""
[docs] def __init__(self, dir, name, shuffle=None):
"""
Args:
dir (str): a directory
name (str): one of 'train' or 'val'
shuffle (bool): shuffle the dataset.
Defaults to True if name=='train'.
"""
assert name in ['train', 'val'], name
dir = Path(os.path.expanduser(dir))
assert os.path.isdir(dir), dir
self.full_dir = dir / name
if shuffle is None:
shuffle = name == 'train'
self.shuffle = shuffle
with open(dir / "wnids.txt") as f:
wnids = [x.strip() for x in f.readlines()]
cls_to_id = {name: id for id, name in enumerate(wnids)}
assert len(cls_to_id) == 200
self.imglist = []
if name == 'train':
for clsid, cls in enumerate(wnids):
cls_dir = self.full_dir / cls / "images"
for img in cls_dir.iterdir():
self.imglist.append((str(img), clsid))
else:
with open(self.full_dir / "val_annotations.txt") as f:
for line in f:
line = line.strip().split()
img, cls = line[0], line[1]
img = self.full_dir / "images" / img
clsid = cls_to_id[cls]
self.imglist.append((str(img), clsid))
def __len__(self):
return len(self.imglist)
def __iter__(self):
idxs = np.arange(len(self.imglist))
if self.shuffle:
self.rng.shuffle(idxs)
for k in idxs:
fname, label = self.imglist[k]
im = cv2.imread(fname, cv2.IMREAD_COLOR)
assert im is not None, fname
yield [im, label]
try:
import cv2
except ImportError:
from ...utils.develop import create_dummy_class
ILSVRC12 = create_dummy_class('ILSVRC12', 'cv2') # noqa
TinyImageNet = create_dummy_class('TinyImageNet', 'cv2') # noqa
if __name__ == '__main__':
meta = ILSVRCMeta()
# print(meta.get_synset_words_1000())
ds = TinyImageNet('~/data/tiny-imagenet-200', 'val', shuffle=False)
ds.reset_state()
for _ in ds:
from IPython import embed
embed()
break