#-----------------------------------------------------------------------
#Copyright 2013 Centrum Wiskunde & Informatica, Amsterdam
#
#Author: Daniel M. Pelt
#Contact: D.M.Pelt@cwi.nl
#Website: http://dmpelt.github.io/pynnfbp/
#
#
#This file is part of the PyNN-FBP, a Python implementation of the
#NN-FBP tomographic reconstruction method.
#
#PyNN-FBP is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.
#
#PyNN-FBP is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with PyNN-FBP. If not, see <http://www.gnu.org/licenses/>.
#
#-----------------------------------------------------------------------
import tempfile
import numpy as np
hastables=True
try:
import tables as ts
except ImportError:
hastables=False
import sys
import os
import math
import random
[docs]class TrainingData(object):
'''Base object of a class that represents training or validation data used
during training of a network.
An implementing class should define ``getDataBlock``, ``addDataBlock`` and ``normalizeData`` methods. See,
for example, :class:`HDF5TrainingData`.
:param data: Dataset to pick pixels from. (see :mod:`nnfbp.DataSet`)
:type data: DataSet
:param nPoints: Number of pixels to pick.
:type nPoints: :class:`int`
:param blockSize: Size of each data block.
:type blockSize: :class:`int`
'''
def __setupIDX(self,size):
'''Create a variable ``idx`` that gives location of pixels that can be picked.'''
ym,xm = np.ogrid[-(size-1.)/2.:(size-1.)/2.:complex(0,size),-(size-1.)/2.:(size-1.)/2.:complex(0,size)]
bnd = (size)**2/4
self.mask = xm**2+ym**2<=bnd
x,y = np.where(self.mask==True)
self.idx = zip(x,y)
def __getPickedIndices(self,nToPick):
'''Return a list of the location of ``nToPick`` randomly selected pixels.'''
nTimesToDo = int(math.ceil(nToPick/float(len(self.idx))))
iList = []
for i in xrange(nTimesToDo):
iList.extend(self.idx)
return zip(*random.sample(iList,nToPick))
def __getPickedIndicesWithMask(self,nToPick,mask):
'''Return a list of the location of ``nToPick`` randomly selected pixels.'''
maskCombined = self.mask+mask
x,y = np.where(maskCombined>1)
idx = zip(x,y)
nTimesToDo = int(math.ceil(nToPick/float(len(idx))))
iList = []
for i in xrange(nTimesToDo):
iList.extend(idx)
return zip(*random.sample(iList,nToPick))
def __init__(self,data,nPoints,network,blockSize=10000):
tmpFl = tempfile.mkstemp(dir=network.tmpDir)
self.fn= tmpFl[1]
os.close(tmpFl[0])
pickArray = np.histogram(np.floor(data.nImages*np.random.rand(nPoints)), data.nImages, (0,data.nImages))[0]
if np.max(pickArray)>blockSize:
raise Exception('Buffer size is too small!')
nParameters = network.red.outSize
self.nPar = nParameters
curData = np.empty((np.max(pickArray),nParameters+1))
outData = np.empty((blockSize,nParameters+1))
self.__setupIDX(network.proj.recSize)
self.nBlocks=0
nInBlock=0
i=0
for i in xrange(len(data)):
example = data[i]
nToPick = pickArray[i]
if nToPick==0: continue
image = example[0]
sino = example[1]
angles = example[2]
if len(example)>3:
pickedIndices = self.__getPickedIndicesWithMask(nToPick,example[3])
else:
pickedIndices = self.__getPickedIndices(nToPick)
for j in xrange(nParameters):
backImage = network.proj.reconstructWithFilter(sino,network.red.filters[:,j])
curData[:nToPick,j] = backImage[pickedIndices]
curData[:nToPick,nParameters] = image[pickedIndices]
if nInBlock+nToPick<blockSize:
outData[nInBlock:nInBlock+nToPick,:]=curData[:nToPick,:].copy()
nInBlock+=nToPick
else:
nToWrite = blockSize-nInBlock
nLeft = nToPick - nToWrite
if nToWrite>0:
outData[nInBlock:blockSize,:] = curData[0:nToWrite,:].copy()
self.addDataBlock(outData,self.nBlocks)
self.nBlocks+=1
nInBlock=0
if nLeft>0:
outData[0:nLeft,:] = curData[nToWrite:nToPick,:].copy()
nInBlock+=nLeft
percDone = float(blockSize*self.nBlocks + nInBlock)/nPoints
nTicksDone = (int)(percDone*60)
sys.stdout.write('\r[%s>%s] %d%% %s' % ('-'*nTicksDone, ' '*(60-nTicksDone), 100*percDone,50*' '))
sys.stdout.flush()
if nInBlock>0:
self.addDataBlock(outData[0:nInBlock,:], self.nBlocks)
self.nBlocks+=1
sys.stdout.write('\n')
sys.stdout.flush()
[docs] def addDataBlock(self,data,i):
'''Add a block of data to the set.
:param data: Block of data to add.
:type data: :class:`numpy.ndarray`
:param i: Position to add block to.
:type i: :class:`int`
'''
raise NotImplementedError("TrainingData: Subclass should implement this method.")
[docs] def getDataBlock(self,i):
'''Get a block of data from the set.
:param i: Position of block to get.
:type i: :class:`int`
:returns: :class:`numpy.ndarray` -- Block of data.
'''
raise NotImplementedError("TrainingData: Subclass should implement this method.")
[docs] def getMinMax(self):
'''Returns the minimum and maximum values of each column of the entire set.
:returns: :class:`tuple` with:
- ``minL`` -- :class:`numpy.ndarray` with minimum value of each column except last.
- ``maxL`` -- :class:`numpy.ndarray` with minimum value of each column except last.
- ``minIn`` -- :class:`float` minimum values of last column.
- ``maxIn`` -- :class:`float` maximum values of last column.
'''
minL = np.empty(self.nPar)
minL.fill(np.inf)
maxL = np.empty(self.nPar)
maxL.fill(-np.inf)
maxIn = -np.inf
minIn = np.inf
for i in xrange(self.nBlocks):
data = self.getDataBlock(i)
if data == None:
continue
maxL = np.maximum(maxL, data[:, 0:self.nPar].max(0))
minL = np.minimum(maxL, data[:, 0:self.nPar].min(0))
maxIn = np.max([maxIn, data[:, self.nPar].max()])
minIn = np.min([minIn, data[:, self.nPar].min()])
return (minL,maxL,minIn,maxIn)
[docs] def normalizeData(self,minL,maxL,minIn,maxIn):
'''Normalize the set such that every column is in range (0,1), except for the last column,
which will be normalized to (0.25,0.75). Parameters are like ``getMinMax()``.
'''
raise NotImplementedError("TrainingData: Subclass should implement this method.")
[docs] def close(self):
'''Close the underlying file.'''
os.remove(self.fn)
[docs]class HDF5TrainingData(TrainingData):
'''Implementation of :class:`TrainingData` that uses a HDF5 file to store data.
:param compression: Which PyTables compression option to use.
:type compression: :class:`string`
:param comprl: Which PyTables compression level to use.
:type comprl: :class:`int`
'''
[docs] def getDataBlock(self,i):
h5file = ts.openFile(self.fn, mode='r', title="")
try:
data = h5file.getNode(h5file.root, "data%d" % i).read()
except ts.exceptions.NoSuchNodeError:
data = None
h5file.close()
return data
[docs] def addDataBlock(self,data,i):
h5file = ts.openFile(self.fn, mode='a', title="")
atom = ts.Atom.from_dtype(data.dtype)
filters = ts.Filters(complib=self.compression, complevel=self.comprl)
ds = h5file.createCArray(h5file.root, "data%d" % i, atom,data.shape,filters=filters)
ds[:] = data
h5file.close()
[docs] def normalizeData(self,minL,maxL,minIn,maxIn):
h5file = ts.openFile(self.fn, mode='a', title="")
for i in xrange(self.nBlocks):
data = h5file.getNode(h5file.root, "data%d" % i)
tileM = np.tile(minL, (data.shape[0],1))
maxmin = np.tile(maxL-minL, (data.shape[0],1))
data[:,0:self.nPar] =2*(data[:,0:self.nPar]-tileM)/maxmin - 1
data[:,self.nPar] = 0.25+(data[:,self.nPar]-minIn)/(2*(maxIn-minIn))
h5file.close()
def __init__(self,data,nPoints,network,blockSize=10000,compression='blosc',comprl=9):
if not hastables:
raise Exception("PyTables has to be installed to use HDF5TrainingData")
self.compression = compression
self.comprl = comprl
super(HDF5TrainingData, self).__init__(data,nPoints,network,blockSize)