Source code for nnfbp.TrainingData

#-----------------------------------------------------------------------
#Copyright 2013 Centrum Wiskunde & Informatica, Amsterdam
#
#Author: Daniel M. Pelt
#Contact: D.M.Pelt@cwi.nl
#Website: http://dmpelt.github.io/pynnfbp/
#
#
#This file is part of the PyNN-FBP, a Python implementation of the
#NN-FBP tomographic reconstruction method.
#
#PyNN-FBP is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.
#
#PyNN-FBP is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with PyNN-FBP. If not, see <http://www.gnu.org/licenses/>.
#
#-----------------------------------------------------------------------

import tempfile
import numpy as np

hastables=True
try:
	import tables as ts
except ImportError:
	hastables=False

import sys
import os
import math
import random
[docs]class TrainingData(object):
    '''Base object of a class that represents training or validation data used
    during training of a network.
    
    An implementing class should define ``getDataBlock``, ``addDataBlock`` and ``normalizeData`` methods. See,
    for example, :class:`HDF5TrainingData`.
    
    :param data: Dataset to pick pixels from. (see :mod:`nnfbp.DataSet`)
    :type data: DataSet
    :param nPoints: Number of pixels to pick.
    :type nPoints: :class:`int`
    :param blockSize: Size of each data block.
    :type blockSize: :class:`int`
    '''
    
    def __setupIDX(self,size):
        '''Create a variable ``idx`` that gives location of pixels that can be picked.'''
        ym,xm = np.ogrid[-(size-1.)/2.:(size-1.)/2.:complex(0,size),-(size-1.)/2.:(size-1.)/2.:complex(0,size)]
        bnd = (size)**2/4
        self.mask = xm**2+ym**2<=bnd
        x,y = np.where(self.mask==True)
        self.idx = zip(x,y)
    
    def __getPickedIndices(self,nToPick):
        '''Return a list of the location of ``nToPick`` randomly selected pixels.'''
        nTimesToDo = int(math.ceil(nToPick/float(len(self.idx))))
        iList = []
        for i in xrange(nTimesToDo):
            iList.extend(self.idx)
        return zip(*random.sample(iList,nToPick))
    
    def __getPickedIndicesWithMask(self,nToPick,mask):
        '''Return a list of the location of ``nToPick`` randomly selected pixels.'''
        maskCombined = self.mask+mask
        x,y = np.where(maskCombined>1)
        idx = zip(x,y)
        nTimesToDo = int(math.ceil(nToPick/float(len(idx))))
        iList = []
        for i in xrange(nTimesToDo):
            iList.extend(idx)
        return zip(*random.sample(iList,nToPick))
    
    def __init__(self,data,nPoints,network,blockSize=10000):
        tmpFl = tempfile.mkstemp(dir=network.tmpDir)
        self.fn= tmpFl[1]
        os.close(tmpFl[0])
        pickArray = np.histogram(np.floor(data.nImages*np.random.rand(nPoints)), data.nImages, (0,data.nImages))[0]
        if np.max(pickArray)>blockSize:
            raise Exception('Buffer size is too small!')
        nParameters = network.red.outSize
        self.nPar = nParameters
        curData = np.empty((np.max(pickArray),nParameters+1))
        outData = np.empty((blockSize,nParameters+1))
        self.__setupIDX(network.proj.recSize)
        self.nBlocks=0
        nInBlock=0
        i=0
        for i in xrange(len(data)):
            example = data[i]
            nToPick = pickArray[i]
            
            if nToPick==0: continue
            image = example[0]
            sino = example[1]
            angles = example[2]
            if len(example)>3:
                pickedIndices = self.__getPickedIndicesWithMask(nToPick,example[3])
            else:
                pickedIndices = self.__getPickedIndices(nToPick)
            for j in xrange(nParameters):
                backImage = network.proj.reconstructWithFilter(sino,network.red.filters[:,j])
                curData[:nToPick,j] = backImage[pickedIndices]
            curData[:nToPick,nParameters] = image[pickedIndices]
            if nInBlock+nToPick<blockSize:
                outData[nInBlock:nInBlock+nToPick,:]=curData[:nToPick,:].copy()
                nInBlock+=nToPick
            else:
                nToWrite = blockSize-nInBlock
                nLeft = nToPick - nToWrite
                if nToWrite>0:
                    outData[nInBlock:blockSize,:] = curData[0:nToWrite,:].copy()
                    self.addDataBlock(outData,self.nBlocks)
                    self.nBlocks+=1
                nInBlock=0
                if nLeft>0:
                    outData[0:nLeft,:] = curData[nToWrite:nToPick,:].copy()
                    nInBlock+=nLeft
            percDone = float(blockSize*self.nBlocks + nInBlock)/nPoints
            nTicksDone = (int)(percDone*60)
            sys.stdout.write('\r[%s>%s] %d%% %s' % ('-'*nTicksDone, ' '*(60-nTicksDone), 100*percDone,50*' '))             
            sys.stdout.flush()
        if nInBlock>0:
            self.addDataBlock(outData[0:nInBlock,:], self.nBlocks)
            self.nBlocks+=1
        sys.stdout.write('\n')
        sys.stdout.flush()
    
[docs]    def addDataBlock(self,data,i):
        '''Add a block of data to the set.
        
        :param data: Block of data to add.
        :type data: :class:`numpy.ndarray`
        :param i: Position to add block to.
        :type i: :class:`int`
        '''
        raise NotImplementedError("TrainingData: Subclass should implement this method.")
    
[docs]    def getDataBlock(self,i):
        '''Get a block of data from the set.
        
        :param i: Position of block to get.
        :type i: :class:`int`
        :returns: :class:`numpy.ndarray` -- Block of data.
        '''
        raise NotImplementedError("TrainingData: Subclass should implement this method.")
    
[docs]    def getMinMax(self):
        '''Returns the minimum and maximum values of each column of the entire set.
        
        :returns: :class:`tuple` with:
        - ``minL`` -- :class:`numpy.ndarray` with minimum value of each column except last.
        - ``maxL`` -- :class:`numpy.ndarray` with minimum value of each column except last.
        - ``minIn`` -- :class:`float` minimum values of last column.
        - ``maxIn`` -- :class:`float` maximum values of last column.
        '''
        minL = np.empty(self.nPar)
        minL.fill(np.inf)
        maxL = np.empty(self.nPar)
        maxL.fill(-np.inf)
        maxIn = -np.inf
        minIn = np.inf
        for i in xrange(self.nBlocks):
            data = self.getDataBlock(i)
            if data == None:
                continue
            maxL = np.maximum(maxL, data[:, 0:self.nPar].max(0))
            minL = np.minimum(maxL, data[:, 0:self.nPar].min(0))
            maxIn = np.max([maxIn, data[:, self.nPar].max()])
            minIn = np.min([minIn, data[:, self.nPar].min()])
        return (minL,maxL,minIn,maxIn)
    
[docs]    def normalizeData(self,minL,maxL,minIn,maxIn):
        '''Normalize the set such that every column is in range (0,1), except for the last column,
        which will be normalized to (0.25,0.75). Parameters are like ``getMinMax()``.
        '''
        raise NotImplementedError("TrainingData: Subclass should implement this method.")
    
[docs]    def close(self):
        '''Close the underlying file.'''
        os.remove(self.fn)

[docs]class HDF5TrainingData(TrainingData):
    '''Implementation of :class:`TrainingData` that uses a HDF5 file to store data.
    
    :param compression: Which PyTables compression option to use.
    :type compression: :class:`string`
    :param comprl: Which PyTables compression level to use.
    :type comprl: :class:`int`
    '''
    
[docs]    def getDataBlock(self,i):
        h5file = ts.openFile(self.fn, mode='r', title="")
        try:
            data = h5file.getNode(h5file.root, "data%d" % i).read()
        except ts.exceptions.NoSuchNodeError:
            data = None
        h5file.close()
        return data
    
[docs]    def addDataBlock(self,data,i):
        h5file = ts.openFile(self.fn, mode='a', title="")
        atom = ts.Atom.from_dtype(data.dtype)
        filters = ts.Filters(complib=self.compression, complevel=self.comprl)
        ds = h5file.createCArray(h5file.root, "data%d" % i, atom,data.shape,filters=filters)
        ds[:] = data
        h5file.close()
    
[docs]    def normalizeData(self,minL,maxL,minIn,maxIn):
        h5file = ts.openFile(self.fn, mode='a', title="")
        for i in xrange(self.nBlocks):
            data = h5file.getNode(h5file.root, "data%d" % i)
            tileM = np.tile(minL, (data.shape[0],1))
            maxmin = np.tile(maxL-minL, (data.shape[0],1))
            data[:,0:self.nPar] =2*(data[:,0:self.nPar]-tileM)/maxmin - 1  
            data[:,self.nPar] = 0.25+(data[:,self.nPar]-minIn)/(2*(maxIn-minIn))
        h5file.close()
    
    def __init__(self,data,nPoints,network,blockSize=10000,compression='blosc',comprl=9):
        if not hastables:
            raise Exception("PyTables has to be installed to use HDF5TrainingData")
        self.compression = compression
        self.comprl = comprl
        super(HDF5TrainingData, self).__init__(data,nPoints,network,blockSize)
Navigation

Source code for nnfbp.TrainingData

Quick search

Navigation