Source code for tensorpack.callbacks.inference

# -*- coding: utf-8 -*-
# File: inference.py


import numpy as np
from abc import ABCMeta
import six

from ..tfutils.common import get_op_tensor_name
from ..utils import logger
from ..utils.stats import BinaryStatistics, RatioCounter
from .base import Callback

__all__ = ['ScalarStats', 'Inferencer',
           'ClassificationError', 'BinaryClassificationStats']


[docs]@six.add_metaclass(ABCMeta) class Inferencer(Callback): """ Base class of Inferencer. Inferencer is a special kind of callback that should be called by :class:`InferenceRunner`. It has the methods ``_get_fetches`` and ``_on_fetches`` which are like :class:`SessionRunHooks`, except that they will be used only by :class:`InferenceRunner`. .. document private functions .. automethod:: _before_inference .. automethod:: _after_inference .. automethod:: _get_fetches .. automethod:: _on_fetches """ def _before_epoch(self): self._before_inference()
[docs] def _before_inference(self): """ Called before a new round of inference starts. """ pass
def _trigger_epoch(self): ret = self._after_inference() if ret is None: return for k, v in six.iteritems(ret): try: v = float(v) except ValueError: logger.warn("{} returns a non-scalar statistics!".format(type(self).__name__)) continue else: self.trainer.monitors.put_scalar(k, v)
[docs] def _after_inference(self): """ Called after a round of inference ends. Returns a dict of scalar statistics which will be logged to monitors. """ pass
[docs] def get_fetches(self): """ Return a list of tensor names (guaranteed not op name) this inferencer needs. """ ret = self._get_fetches() return [get_op_tensor_name(n)[1] for n in ret]
[docs] def _get_fetches(self): """ To be implemented by subclasses """ raise NotImplementedError()
[docs] def on_fetches(self, results): """ Called after each new datapoint finished the forward inference. Args: results(list): list of results this inferencer fetched. Has the same length as ``self._get_fetches()``. """ self._on_fetches(results)
[docs] def _on_fetches(self, results): """ To be implemented by subclasses """ raise NotImplementedError()
[docs]class ScalarStats(Inferencer): """ Statistics of some scalar tensor. The value will be averaged over all given datapoints. Note that the average of accuracy over all batches is not necessarily the accuracy of the whole dataset. See :class:`ClassificationError` for details. """
[docs] def __init__(self, names, prefix='validation'): """ Args: names(list or str): list of names or just one name. The corresponding tensors have to be scalar. prefix(str): a prefix for logging """ if not isinstance(names, list): self.names = [names] else: self.names = names self.prefix = prefix
def _before_inference(self): self.stats = [] def _get_fetches(self): return self.names def _on_fetches(self, output): self.stats.append(output) def _after_inference(self): if len(self.stats): self.stats = np.mean(self.stats, axis=0) assert len(self.stats) == len(self.names) ret = {} for stat, name in zip(self.stats, self.names): opname, _ = get_op_tensor_name(name) name = '{}_{}'.format(self.prefix, opname) if self.prefix else opname ret[name] = stat return ret
[docs]class ClassificationError(Inferencer): """ Compute **true** classification error in batch mode, from a ``wrong`` tensor. The ``wrong`` tensor is supposed to be an binary vector containing whether each sample in the batch is *incorrectly* classified. You can use ``tf.nn.in_top_k`` to produce this vector. This Inferencer produces the "true" error, which could be different from ``ScalarStats('error_rate')``. It takes account of the fact that batches might not have the same size in testing (because the size of test set might not be a multiple of batch size). Therefore the result can be different from averaging the error rate of each batch. You can also use the "correct prediction" tensor, then this inferencer will give you "classification accuracy" instead of error. """
[docs] def __init__(self, wrong_tensor_name='incorrect_vector', summary_name='validation_error'): """ Args: wrong_tensor_name(str): name of the ``wrong`` binary vector tensor. summary_name(str): the name to log the error with. """ self.wrong_tensor_name = wrong_tensor_name self.summary_name = summary_name
def _before_inference(self): self.err_stat = RatioCounter() def _get_fetches(self): return [self.wrong_tensor_name] def _on_fetches(self, outputs): vec = outputs[0] # TODO put shape assertion into inference-runner assert vec.ndim == 1, "{} is not a vector!".format(self.wrong_tensor_name) batch_size = len(vec) wrong = np.sum(vec) self.err_stat.feed(wrong, batch_size) def _after_inference(self): return {self.summary_name: self.err_stat.ratio}
[docs]class BinaryClassificationStats(Inferencer): """ Compute precision / recall in binary classification, given the prediction vector and the label vector. """
[docs] def __init__(self, pred_tensor_name, label_tensor_name, prefix='val'): """ Args: pred_tensor_name(str): name of the 0/1 prediction tensor. label_tensor_name(str): name of the 0/1 label tensor. """ self.pred_tensor_name = pred_tensor_name self.label_tensor_name = label_tensor_name self.prefix = prefix
def _before_inference(self): self.stat = BinaryStatistics() def _get_fetches(self): return [self.pred_tensor_name, self.label_tensor_name] def _on_fetches(self, outputs): pred, label = outputs self.stat.feed(pred, label) def _after_inference(self): return {self.prefix + '_precision': self.stat.precision, self.prefix + '_recall': self.stat.recall}