Source code for tensorpack.callbacks.misc

# -*- coding: utf-8 -*-
# File: misc.py


import numpy as np
import os
import time
from collections import deque

from ..utils import logger
from ..utils.utils import humanize_time_delta
from .base import Callback

__all__ = ['SendStat', 'InjectShell', 'EstimatedTimeLeft']


[docs]class SendStat(Callback): """ An equivalent of :class:`SendMonitorData`, but as a normal callback. """ def __init__(self, command, names): self.command = command if not isinstance(names, list): names = [names] self.names = names def _trigger(self): M = self.trainer.monitors v = {k: M.get_latest(k) for k in self.names} cmd = self.command.format(**v) ret = os.system(cmd) if ret != 0: logger.error("Command {} failed with ret={}!".format(cmd, ret))
[docs]class InjectShell(Callback): """ Allow users to create a specific file as a signal to pause and iteratively debug the training. Once the :meth:`trigger` method is called, it detects whether the file exists, and opens an IPython/pdb shell if yes. In the shell, ``self`` is this callback, ``self.trainer`` is the trainer, and from that you can access everything else. Example: .. code-block:: none callbacks=[InjectShell('/path/to/pause-training.tmp'), ...] # the following command will pause the training and start a shell when the epoch finishes: $ touch /path/to/pause-training.tmp """
[docs] def __init__(self, file='INJECT_SHELL.tmp', shell='ipython'): """ Args: file (str): if this file exists, will open a shell. shell (str): one of 'ipython', 'pdb' """ self._file = file assert shell in ['ipython', 'pdb'] self._shell = shell logger.info("Create a file '{}' to open {} shell.".format(file, shell))
def _trigger(self): if os.path.isfile(self._file): logger.info("File {} exists, entering shell.".format(self._file)) self._inject() def _inject(self): trainer = self.trainer # noqa if self._shell == 'ipython': import IPython as IP # noqa IP.embed() elif self._shell == 'pdb': import pdb # noqa pdb.set_trace() def _after_train(self): if os.path.isfile(self._file): os.unlink(self._file)
[docs]class EstimatedTimeLeft(Callback): """ Estimate the time left until completion of training. """
[docs] def __init__(self, last_k_epochs=5, median=True): """ Args: last_k_epochs (int): Use the time spent on last k epochs to estimate total time left. median (bool): Use the mean or median time spent on last k epochs. """ self._times = deque(maxlen=last_k_epochs) self._median = median
def _before_train(self): self._max_epoch = self.trainer.max_epoch self._last_time = time.time() def _trigger_epoch(self): duration = time.time() - self._last_time self._last_time = time.time() self._times.append(duration) epoch_time = np.median(self._times) if self._median else np.mean(self._times) time_left = (self._max_epoch - self.epoch_num) * epoch_time if time_left > 0: logger.info("Estimated Time Left: " + humanize_time_delta(time_left))