Source code for tensorpack.callbacks.graph

# -*- coding: utf-8 -*-
# File: graph.py


""" Graph related callbacks"""

import numpy as np
import os

from ..compat import tfv1 as tf
from ..tfutils.common import get_op_tensor_name
from ..utils import logger
from .base import Callback

__all__ = ['RunOp', 'RunUpdateOps', 'ProcessTensors', 'DumpTensors',
           'DumpTensor', 'DumpTensorAsImage', 'DumpParamAsImage', 'CheckNumerics']


[docs]class RunOp(Callback):
    """ Run an Op. """

    _chief_only = False

[docs]    def __init__(self, op,
                 run_before=True, run_as_trigger=True,
                 run_step=False, verbose=False):
        """
        Args:
            op (tf.Operation or function): an Op, or a function that returns the Op in the graph.
                The function will be called after the main graph has been created (in the :meth:`setup_graph` callback).
            run_before (bool): run the Op before training
            run_as_trigger (bool): run the Op on every :meth:`trigger()` call.
            run_step (bool): run the Op every step (along with training)
            verbose (bool): print logs when the op is run.

        Example:
            The `DQN Example
            <https://github.com/tensorpack/tensorpack/blob/master/examples/DeepQNetwork/>`_
            uses this callback to update target network.
        """
        if not callable(op):
            self.setup_func = lambda: op  # noqa
        else:
            self.setup_func = op
        self.run_before = run_before
        self.run_as_trigger = run_as_trigger
        self.run_step = run_step
        self.verbose = verbose

    def _setup_graph(self):
        self._op = self.setup_func()
        if self.run_step:
            self._fetch = tf.train.SessionRunArgs(fetches=self._op)

    def _before_train(self):
        if self.run_before:
            self._print()
            self._op.run()

    def _trigger(self):
        if self.run_as_trigger:
            self._print()
            self._op.run()

    def _before_run(self, _):
        if self.run_step:
            self._print()
            return self._fetch

    def _print(self):
        if self.verbose:
            logger.info("Running Op {} ...".format(self._op.name))


[docs]class RunUpdateOps(RunOp):
    """
    Run ops from the collection UPDATE_OPS every step.
    The ops will be hooked to ``trainer.hooked_sess`` and run along with
    each ``hooked_sess.run`` call.

    Be careful when using ``UPDATE_OPS`` if your model contains more than one sub-networks.
    Perhaps not all updates are supposed to be executed in every iteration.

    This callback is one of the :func:`DEFAULT_CALLBACKS()`.
    """

[docs]    def __init__(self, collection=None):
        """
        Args:
            collection (str): collection of ops to run. Defaults to ``tf.GraphKeys.UPDATE_OPS``
        """
        if collection is None:
            collection = tf.GraphKeys.UPDATE_OPS
        name = 'UPDATE_OPS' if collection == tf.GraphKeys.UPDATE_OPS else collection

        def f():
            ops = tf.get_collection(collection)
            if ops:
                logger.info("Applying collection {} of {} ops.".format(name, len(ops)))
                return tf.group(*ops, name='update_ops')
            else:
                return tf.no_op(name='empty_update_ops')

        super(RunUpdateOps, self).__init__(
            f, run_before=False, run_as_trigger=False, run_step=True)


[docs]class ProcessTensors(Callback):
    """
    Fetch extra tensors **along with** each training step,
    and call some function over the values.
    It uses ``_{before,after}_run`` method to inject ``tf.train.SessionRunHooks``
    to the session.
    You can use it to print tensors, save tensors to file, etc.

    Example:

    .. code-block:: python

        ProcessTensors(['mycost1', 'mycost2'], lambda c1, c2: print(c1, c2, c1 + c2))
    """
[docs]    def __init__(self, names, fn):
        """
        Args:
            names (list[str]): names of tensors
            fn: a function taking all requested tensors as input
        """
        assert isinstance(names, (list, tuple)), names
        self._names = names
        self._fn = fn

    def _setup_graph(self):
        tensors = self.get_tensors_maybe_in_tower(self._names)
        self._fetch = tf.train.SessionRunArgs(fetches=tensors)

    def _before_run(self, _):
        return self._fetch

    def _after_run(self, _, rv):
        results = rv.results
        self._fn(*results)


[docs]class DumpTensors(ProcessTensors):
    """
    Dump some tensors to a file.
    Every step this callback fetches tensors and write them to a npz file
    under ``logger.get_logger_dir``.
    The dump can be loaded by ``dict(np.load(filename).items())``.
    """
[docs]    def __init__(self, names):
        """
        Args:
            names (list[str]): names of tensors
        """
        assert isinstance(names, (list, tuple)), names
        self._names = names
        dir = logger.get_logger_dir()

        def fn(*args):
            dic = {}
            for name, val in zip(self._names, args):
                dic[name] = val
            fname = os.path.join(
                dir, 'DumpTensor-{}.npz'.format(self.global_step))
            np.savez(fname, **dic)
        super(DumpTensors, self).__init__(names, fn)


[docs]class DumpTensorAsImage(Callback):
    """
    Dump a tensor to image(s) to ``logger.get_logger_dir()`` once triggered.

    Note that it requires the tensor is directly evaluable, i.e. either inputs
    are not its dependency (e.g. the weights of the model), or the inputs are
    feedfree (in which case this callback will take an extra datapoint from the input pipeline).
    """

[docs]    def __init__(self, tensor_name, prefix=None, map_func=None, scale=255):
        """
        Args:
            tensor_name (str): the name of the tensor.
            prefix (str): the filename prefix for saved images. Defaults to the Op name.
            map_func: map the value of the tensor to an image or list of
                 images of shape [h, w] or [h, w, c]. If None, will use identity.
            scale (float): a multiplier on pixel values, applied after map_func.
        """
        op_name, self.tensor_name = get_op_tensor_name(tensor_name)
        self.func = map_func
        if prefix is None:
            self.prefix = op_name
        else:
            self.prefix = prefix
        self.log_dir = logger.get_logger_dir()
        self.scale = scale

    def _before_train(self):
        self._tensor = self.graph.get_tensor_by_name(self.tensor_name)

    def _trigger(self):
        val = self.trainer.sess.run(self._tensor)
        if self.func is not None:
            val = self.func(val)
        if isinstance(val, list) or val.ndim == 4:
            for idx, im in enumerate(val):
                self._dump_image(im, idx)
        else:
            self._dump_image(val)
        self.trainer.monitors.put_image(self.prefix, val)

    def _dump_image(self, im, idx=None):
        assert im.ndim in [2, 3], str(im.ndim)
        fname = os.path.join(
            self.log_dir,
            self.prefix + '-ep{:03d}{}.png'.format(
                self.epoch_num, '-' + str(idx) if idx else ''))
        res = im * self.scale
        res = np.clip(res, 0, 255)
        cv2.imwrite(fname, res.astype('uint8'))


[docs]class CheckNumerics(RunOp):
    """
    Check variables in the graph for NaN and Inf.
    Raise an exception if such an error is found.
    """
    _chief_only = True

[docs]    def __init__(self, run_as_trigger=True, run_step=False):
        """
        Args: same as in :class:`RunOp`.
        """
        super().__init__(
            self._get_op,
            run_as_trigger=run_as_trigger,
            run_step=run_step)

    def _get_op(self):
        vars = tf.trainable_variables()
        ops = [tf.check_numerics(v, "CheckNumerics['{}']".format(v.op.name)).op for v in vars]
        check_op = tf.group(*ops, name="CheckAllNumerics")
        return check_op


try:
    import cv2
except ImportError:
    from ..utils.develop import create_dummy_class
    DumpTensorAsImage = create_dummy_class('DumpTensorAsImage', 'cv2')  # noqa

# alias
DumpParamAsImage = DumpTensorAsImage
DumpTensor = DumpTensors