Source code for msdnet.train

#-----------------------------------------------------------------------
#Copyright 2019 Centrum Wiskunde & Informatica, Amsterdam
#
#Author: Daniel M. Pelt
#Contact: D.M.Pelt@cwi.nl
#Website: http://dmpelt.github.io/msdnet/
#License: MIT
#
#This file is part of MSDNet, a Python implementation of the
#Mixed-Scale Dense Convolutional Neural Network.
#-----------------------------------------------------------------------

"""Module for training networks."""

from . import store
import numpy as np
import abc
import tqdm

[docs]class TrainAlgorithm(abc.ABC): """Base class implementing a training algorithm."""
[docs] @abc.abstractmethod def step(self, n, dlist): """Take a single algorithm step. :param n: :class:`.network.Network` to train with :param dlist: list of :class:`.data.DataPoint` to train with """ pass
[docs] @abc.abstractmethod def to_dict(self): """Save algorithm state to dictionary""" pass
[docs] @abc.abstractmethod def load_dict(self, dct): """Load algorithm state from dictionary""" pass
[docs] @classmethod @abc.abstractmethod def from_dict(cls, dct): """Load algorithm from dictionary""" pass
[docs] @classmethod def from_file(cls, fn): """Load algorithm from file""" dct = store.get_dict(fn, 'trainalgorithm') return cls.from_dict(dct)
[docs] def to_file(self, fn): """Save algorithm state to file""" store.store_dict(fn, 'trainalgorithm', self.to_dict())
[docs]class AdamAlgorithm(TrainAlgorithm): """Implementation of the ADAM algorithm. :param network: :class:`.network.Network` to train with :param a: ADAM parameter :param b1: ADAM parameter :param b2: ADAM parameter :param e: ADAM parameter """ def __init__(self, network, a = 0.001, b1 = 0.9, b2 = 0.999, e = 10**-8): self.a = a self.b1 = b1 self.b1t = b1 self.b2 = b2 self.b2t = b2 self.e = e if network: self.npars = network.getgradients().shape[0] self.m = np.zeros(self.npars) self.v = np.zeros(self.npars)
[docs] def step(self, n, dlist): n.gradient_zero() tpix = 0 for d in dlist: inp, tar, msk = d.getall() out = n.forward(inp) err = tar - out if msk is None: tpix += err.size else: msk = (msk == 0) err[:, msk] = 0 tpix += err.size - err.shape[0]*msk.sum() n.backward(err) n.gradient() g = n.getgradients() g/=tpix self.m *= self.b1 self.m += (1-self.b1)*g self.v *= self.b2 self.v += (1-self.b2)*(g**2) mhat = self.m/(1-self.b1t) vhat = self.v/(1-self.b2t) self.b1t *= self.b1 self.b2t *= self.b2 upd = self.a * mhat/(np.sqrt(vhat) + self.e) n.updategradients(upd)
[docs] def to_dict(self): dct = {} dct['a'] = self.a dct['b1'] = self.b1 dct['b1t'] = self.b1t dct['b2'] = self.b2 dct['b2t'] = self.b2t dct['e'] = self.e dct['npars'] = self.npars dct['m'] = self.m.copy() dct['v'] = self.v.copy() return dct
[docs] def load_dict(self, dct): self.a = dct['a'] self.b1 = dct['b1'] self.b1t = dct['b1t'] self.b2 = dct['b2'] self.b2t = dct['b2t'] self.e = dct['e'] self.npars = dct['npars'] self.m = dct['m'].copy() self.v = dct['v'].copy()
[docs] @classmethod def from_dict(cls, dct): t = cls(None) t.load_dict(dct) return t
[docs]def restore_training(fn, netclass, trainclass, valclass, valdata, gpu=True): """Restore training from file. :param fn: filename to load :param netclass: :class:`.network.Network` class to use :param trainclass: :class:`TrainAlgorithm` class to use :param valclass: :class:`.validate.Validation` class to use :param valdata: list of :class:`.data.DataPoint` to validate with :param gpu: (optional) whether to use GPU or CPU :return: network object, training algorithm object, and validation object """ n = netclass.from_file(fn, groupname='checkpoint', gpu=gpu) t = trainclass.from_file(fn) v = valclass.from_file(fn) v.d = valdata return n, t, v
[docs]def train(network, trainalg, validation, dataprov, outputfile, val_every=None, loggers=None, stopcrit=np.Inf, progress=False): """Train network. :param network: :class:`.network.Network` to train with :param trainalg: :class:`TrainAlgorithm` object that performs training. :param validation: :class:`.validate.Validation` object that performs validation. :param dataprov: :class:`.data.BatchProvider` object that generates training batches. :param outputfile: file to store trained network parameters in :param val_every: (optional) number of training steps before each validation step :param loggers: (optional) list of :class:`loggers.Logger` objects to perform logging. :param stopcric: (optional) number of validations steps without improvement before stopping training :param progress: (optional) whether to show progress during training """ if not val_every: val_every = len(validation.d) parts = outputfile.split('.') if len(parts)>1: checkpointfile = '.'.join(parts[:-1]) + '.checkpoint' else: checkpointfile = outputfile + '.checkpoint' nworse = 0 nstep = 0 if progress: pbar = tqdm.tqdm(total=val_every) while True: trainalg.step(network, dataprov.getbatch()) nstep+=1 if progress: pbar.update() if nstep>=val_every: if progress: pbar.clear() pbar.close() nstep=0 network.to_file(checkpointfile, groupname='checkpoint') trainalg.to_file(checkpointfile) validation.to_file(checkpointfile) if validation.validate(network): network.to_file(outputfile) nworse=0 else: nworse+=1 if nworse>=stopcrit: return if loggers: try: for log in loggers: log.log(validation) except TypeError: loggers.log(validation) if progress: pbar = tqdm.tqdm(total=val_every)