__author__ = "Simon Andermatt"
__copyright__ = "Copyright (C) 2017 Simon Andermatt"
import copy
import logging
import os
import pickle
import time
from abc import abstractmethod
import numpy as np
from mdgru.helper import argget, compile_arguments, generate_defaults_info
[docs]class SupervisedEvaluation(object):
_defaults = {
'dropout_rate': {'value': 0.5,
'help': '"keep rate" for weights using dropconnect. The higher the value, the closer the sampled models to the full model.'},
'namespace': {'value': 'default',
'help': "override default model name (if no ckpt is provided). Probably not a good idea!",
'alt': ['modelname']},
'only_save_labels': {'value': False, 'help': 'save only labels and no probability distributions'},
# 'batch_size': {'value': 1, 'help': 'determines batch size to be used during training'}
'validate_same': {'value': True, 'help': 'always pick other random samples for validation!',
'invert_meaning': 'dont_'},
'evaluate_uncertainty_times': {'value': 1, 'type': int,
'help': 'Number times we want to evaluate one volume. This only makes sense '
'using a keep rate of less than 1 during evaluation (dropout_during_evaluation '
'less than 1)', 'name': 'number_of_evaluation_samples'},
'evaluate_uncertainty_dropout': {'value': 1.0, 'type': float,
'help': 'Keeprate of weights during evaluation. Useful to visualize uncertainty '
'in conjunction with a number of samples per volume',
'name': 'dropout_during_evaluation'},
'evaluate_uncertainty_saveall': {'value': False,
'help': 'Save each evaluation sample per volume. Without this flag, only the '
'standard deviation and mean over all samples is kept.',
'name': 'save_individual_evaluations'},
'show_f05': True,
'show_f1': True,
'show_f2': True,
'show_l2': True,
'show_cross_entropy': True,
'print_each': {'value': 1, 'help': 'print execution time and losses each # iterations', 'type': int},
'batch_size': {'value': 1, 'help': 'Minibatchsize', 'type': int, 'name': 'batchsize', 'short': 'b'},
'datapath': {
'help': 'path where training, validation and testing folders lie. Can also be some other path, as long as the other locations are provided as absolute paths. An experimentsfolder will be created in this folder, where all runs and checkpoint files will be saved.'},
'locationtraining': {'value': None,
'help': 'absolute or relative path to datapath to the training data. Either a list of paths to the sample folders or one path to a folder where samples should be automatically determined.',
'nargs': '+'},
'locationtesting': {'value': None,
'help': 'absolute or relative path to datapath to the testing data. Either a list of paths to the sample folders or one path to a folder where samples should be automatically determined.',
'nargs': '+'},
'locationvalidation': {'value': None,
'help': 'absolute or relative path to datapath to the validation data. Either a list of paths to the sample folders or one path to a folder where samples should be automatically determined.',
'nargs': '+'},
'output_dims': {'help': 'number of output channels, e.g. number of classes the model needs to create a probability distribution over.','type': int, 'alt': ['nclasses']},
'windowsize': {'type':int, 'short':'w','help': 'window size to be used during training, validation and testing, if not specified otherwise', 'nargs':'+'},
'padding': {'help': 'padding to be used during training, validation and testing, if not specified otherwise. During training, the padding specifies the amount a patch is allowed to reach outside of the image along all dimensions, during testing, it specifies also the amount of overlap needed between patches.', 'value': [0], 'nargs':'+', 'short':'p', 'type':int},
'windowsizetesting': {'value':None, 'help': 'override windowsize for testing','nargs':'+', 'type':int},
'windowsizevalidation': None,#{'value':None, 'help': 'override windowsize for validation','nargs':'+'},
'paddingtesting': {'value':None, 'help': 'override padding for testing', 'nargs':'+', 'type':int},
'paddingvalidation': None,#{'value':None, 'help': 'override padding for validation', 'nargs':'+'},
'testbatchsize': {'value': 1, 'help': 'batchsize for testing'}
}
def __init__(self, modelcls, datacls, kw):
"""
Handler for the evaluation of model defined in modelcls using data coming from datacls.
Parameters
----------
modelcls : cls
Python class defining the model to evaluate
datacls : cls
Python class implementing the data loading and storing
"""
self.origargs = copy.copy(kw)
eval_kw, kw = compile_arguments(SupervisedEvaluation, kw, transitive=False)
for k, v in eval_kw.items():
setattr(self, k, v)
self.w = self.windowsize
self.p = self.padding
self.use_tensorboard = False
# self.dropout_rate = argget(kw, "dropout_rate", 0.5)
self.current_epoch = 0
self.current_iteration = 0
# create datasets for training, validation and testing:
locs = [[None, l] if l is None or len(l) > 1 else [os.path.join(self.datapath, l[0]), None] for l in
[self.locationtraining, self.locationvalidation, self.locationtesting]]
paramstraining = [self.w, self.p] + locs[0]
paramsvalidation = [self.windowsizevalidation if self.windowsizevalidation is not None else self.w,
self.paddingvalidation if self.paddingvalidation is not None else self.p] + locs[1]
paramstesting = [self.windowsizetesting if self.windowsizetesting is not None else self.w,
self.paddingtesting if self.paddingtesting is not None else self.p] + locs[2]
kwdata, kw = compile_arguments(datacls, kw, True, keep_entries=True)
kwcopy = copy.copy(kwdata)
kwcopy['nclasses'] = self.output_dims
kwcopy['batch_size'] = self.batch_size
self.trdc = datacls(*paramstraining, kw=copy.copy(kwcopy))
testkw = copy.copy(kwcopy)
testkw['batch_size'] = testkw['batch_size'] if not self.testbatchsize else self.testbatchsize
valkw = copy.copy(testkw)
testkw['ignore_missing_mask'] = True
self.tedc = datacls(*paramstesting, kw=testkw)
self.valdc = datacls(*paramsvalidation, kw=valkw)
self.currit = 0
self.show_dice = argget(kw, "show_dice", not self.show_f1)
self.binary_evaluation = self.show_dice or self.show_f1 or self.show_f05 or self.show_f2
self.estimatefilename = argget(kw, "estimatefilename", "estimate")
self.gpu = argget(kw, "gpus", [0])
self.get_train_session = lambda: self
self.get_test_session = lambda: self
[docs] @abstractmethod
def _train(self):
""" Performs one training iteration in respective framework and returns loss(es)"""
raise Exception("This needs to be implemented depending on the framework")
[docs] @abstractmethod
def _predict(self, batch, dropout, testing):
"""
Predict given batch and keeprate dropout.
Parameters
----------
batch : ndarray
dropout : float
Keeprate for dropconnect
testing
Returns
-------
ndarray : Prediction based on data batch
"""
pass
[docs] @abstractmethod
def _predict_with_loss(self, batch, batchlabs):
"""
Predict for given batch and return loss compared to labels in batchlabs
Parameters
----------
batch : image data
batchlabs : corresponding label data
Returns
-------
tuple of ndarray prediction and losses
"""
pass
[docs] @abstractmethod
def _set_session(self, sess, cachefolder):
pass
[docs] @abstractmethod
def _save(self, f):
"""
Save to file f in current framework
Parameters
----------
f : location to save model at
"""
pass
[docs] @abstractmethod
def _load(self, f):
"""
Load model in current framework from f
Parameters
----------
f : location of stored model
"""
pass
[docs] @abstractmethod
def get_globalstep(self):
"""
Return number of iterations this model has been trained in
Returns
-------
int : iteration count
"""
pass
[docs] def train(self):
"""
Measures and logs time when performing data sampling and training iteration.
"""
start_time = time.time()
batch, batchlabs = self.trdc.random_sample(batch_size=self.batch_size)
time_after_loading = time.time()
loss = self._train(batch, batchlabs)
self.currit += 1
end_time = time.time()
if (self.currit % self.print_each == 0):
logging.getLogger("eval").info("it: {}, time: [i/o: {}, processing: {}, all: {}], loss: {}"
.format(self.currit,
np.round(time_after_loading - start_time, 6),
np.round(end_time - time_after_loading, 6),
np.round(end_time - start_time, 6),
loss))
return loss
[docs] def test_scores(self, pred, ref):
"""
Evaluates all selected scores between reference data ref and prediction pred.
Parameters
----------
pred : ndarray
prediction, as probability distributions per pixel / voxel
ref : ndarray
labelmap, either as probability distributions per pixel / voxel or as label map
"""
ref = np.int32(np.expand_dims(ref.squeeze(), 0))
pred = np.expand_dims(pred.squeeze(), 0)
if pred.shape != ref.shape:
tar2 = np.zeros((np.prod(pred.shape[:-1]), pred.shape[-1]))
tar2[np.arange(np.prod(pred.shape[:-1])), ref.flatten()] = 1
ref = tar2.reshape(pred.shape)
res = {}
eps = 1e-8
nclasses = self.model.nclasses
if self.binary_evaluation:
enc_ref = np.argmax(ref, -1)
enc_pred = nclasses * np.argmax(pred, -1)
enc_both = enc_ref + enc_pred
bins = np.bincount(enc_both.flatten(), minlength=nclasses ** 2).reshape((nclasses, nclasses))
if self.show_dice:
res["dice"] = [bins[c, c] * 2 / (np.sum(bins, -1)[c] + np.sum(bins, -2)[c] + eps) for c in
range(nclasses)]
if self.show_f05 or self.show_f2:
precision = np.array([bins[c, c] / (np.sum(bins, -1)[c] + eps) for c in range(nclasses)])
recall = np.array([bins[c, c] / (np.sum(bins, -2)[c] + eps) for c in range(nclasses)])
if self.show_f05:
beta2 = 0.5 ** 2
res["f05"] = (1 + beta2) * precision * recall / ((beta2 * precision) + recall + eps)
if self.show_f1:
res["f1"] = [bins[c, c] * 2 / (np.sum(bins, -2)[c] + np.sum(bins, -1)[c] + eps) for c in
range(nclasses)]
if self.show_f2:
beta2 = 2 ** 2
res["f2"] = (1 + beta2) * precision * recall / (beta2 * precision + recall + eps)
if self.show_cross_entropy:
res["cross_entropy"] = np.mean(np.sum(ref * np.log(pred + eps), -1))
if self.show_l2:
res["l2"] = np.mean(np.sum((ref - pred) ** 2, -1))
return res
[docs] def test_all_random(self, batch_size=None, dc=None, resample=True):
"""
Test random samples
Parameters
----------
batch_size : int
minibatch size to compute on
dc : datacollection instance, optional
datacollection to sample from
resample : bool
indicates if we need to sample before evaluating
Returns
-------
tuple of loss and prediction ndarray
"""
if dc is None:
dc = self.valdc
if batch_size is None:
batch_size = self.batch_size
if self.validate_same:
dc.randomstate.seed(12345677)
if resample:
self.testbatch, self.testbatchlabs = dc.random_sample(batch_size=batch_size)
loss, prediction = self._predict_with_loss(self.testbatch, self.testbatchlabs)
return loss, prediction
[docs] def test_all_available(self, batch_size=None, dc=None, return_results=False, dropout=None, testing=False):
"""
Completely evaluates each full image in tps using grid sampling.
Parameters
----------
batch_size : int
minibatch size to compute on
dc : datacollection instance, optional
datacollection to sample from
return_results : bool
should results be returned or stored right away?
dropout : float
keeprate of dropconnect for inference
testing
Returns
-------
either tuple of predictions and errors or only errors, depending on return_results flag
"""
if dc is None:
dc = self.tedc
if batch_size > 1:
logging.getLogger('eval').error('not supported yet to have more than batchsize 1')
volgens = dc.get_volume_batch_generators()
if dropout is None:
dropout = self.evaluate_uncertainty_dropout
full_vols = []
errs = []
lasttime = time.time()
for volgen, file, shape, w, p in volgens:
logging.getLogger('eval').info(
'evaluating file {} of shape {} with w {} and p {}'.format(file, shape, w, p))
if len(shape) > 3:
shape = np.asarray([s for s in shape if s > 1])
res = np.zeros(list(shape) + [self.model.nclasses], dtype=np.float32)
if self.evaluate_uncertainty_times > 1:
uncertres = np.zeros(res.shape)
if self.evaluate_uncertainty_saveall:
allres = np.zeros([self.evaluate_uncertainty_times] + list(res.shape))
certainty = np.ones(w)
for ind, pp in enumerate(p):
if pp > 0:
slicesa = [slice(None) for _ in range(len(p))]
slicesb = [slice(None) for _ in range(len(p))]
reshapearr = [1 for _ in range(len(p))]
reshapearr[ind] = pp
slicesa[ind] = slice(None, pp)
slicesb[ind] = slice(-pp, None)
slicesa = tuple(slicesa)
slicesb = tuple(slicesb)
certainty[slicesa] *= np.arange(1.0 / pp, 1, 1.0 / (pp + 1)).reshape(reshapearr)
certainty[slicesb] *= np.arange(1.0 / pp, 1, 1.0 / (pp + 1))[::-1].reshape(reshapearr)
certainty = certainty.reshape([1] + w + [1])
# read, compute, merge, write back
for subvol, _, imin, imax in volgen:
if self.evaluate_uncertainty_times > 1:
preds = []
for i in range(self.evaluate_uncertainty_times):
preds.append(self._predict(subvol, dropout, testing))
logging.getLogger('eval').debug(
'evaluated run {} of subvolume from {} to {}'.format(i, imin, imax))
pred = np.mean(np.asarray(preds), 0)
uncert = np.std(np.asarray(preds), 0)
preds = [x * certainty for x in preds]
else:
pred = self._predict(subvol, dropout, testing)
logging.getLogger('eval').debug('evaluated subvolume from {} to {}'.format(imin, imax))
pred *= certainty
# now reembed it into array
wrongmin = [int(abs(x)) if x < 0 else 0 for x in imin]
wrongmax = [int(x) if x < 0 else None for x in (shape - imax)]
mimin = np.asarray(np.maximum(0, imin), dtype=np.int32)
mimax = np.asarray(np.minimum(shape, imax), dtype=np.int32)
slicesaa = [slice(mimina, miminb) for mimina, miminb in zip(mimin, mimax)]
slicesaa.append(slice(None))
slicesaa = tuple(slicesaa)
slicesbb = [0]
slicesbb.extend(slice(wrongmina, wrongminb) for wrongmina, wrongminb in zip(wrongmin, wrongmax))
slicesbb.append(slice(None))
slicesbb = tuple(slicesbb)
res[slicesaa] += pred[slicesbb]
if self.evaluate_uncertainty_times > 1:
uncert *= certainty
uncertres[slicesaa] += \
uncert[slicesbb]
if self.evaluate_uncertainty_saveall:
for j in range(self.evaluate_uncertainty_times):
allres[j, slicesaa] += preds[i][slicesbb]
# normalize again:
if self.evaluate_uncertainty_times > 1 and not return_results:
uncertres /= np.sum(res, -1).reshape(list(res.shape[:-1]) + [1])
dc.save(uncertres, os.path.join(file, "std-" + self.estimatefilename), tporigin=file)
if self.evaluate_uncertainty_saveall:
for j in range(self.evaluate_uncertainty_times):
dc.save(allres[j], os.path.join(file, "iter{}-".format(j) + self.estimatefilename),
tporigin=file)
if np.min(p) < 0:
res[np.where(np.sum(res, -1) < 1e-8)] = [1] + [0 for _ in range(self.model.nclasses - 1)]
res /= np.sum(res, -1).reshape(list(res.shape[:-1]) + [1])
# evaluate accuracy...
name = os.path.split(file)
name = name[-1] if len(name[-1]) else os.path.basename(name[0])
try:
if len(dc.maskfiles) > 0:
mfile = os.path.join(file, dc.maskfiles[0])
if os.path.exists(mfile):
mf = np.expand_dims(dc.load(mfile).squeeze(), 0)
errs.append([name, self.test_scores(res, mf)])
except Exception as e:
logging.getLogger('eval').warning(
'was not able to save test scores, is ground truth available?')
logging.getLogger('eval').warning('{}'.format(e))
if return_results:
full_vols.append([name, file, res])
else:
if not self.only_save_labels:
dc.save(res, os.path.join(file, self.estimatefilename + "-probdist"), tporigin=file)
dc.save(np.uint8(np.argmax(res, -1)), os.path.join(file, self.estimatefilename + "-labels"),
tporigin=file)
logging.getLogger('eval').info('evaluation took {} seconds'.format(time.time() - lasttime))
lasttime = time.time()
if return_results:
return full_vols, errs
else:
return errs
[docs] def load(self, f):
"""
loads model at location f from disk
Parameters
----------
f : str
location of stored model
"""
self._load(f)
states = {}
try:
pickle_name = f.rsplit('-', 1)[0] + ".pickle"
states = pickle.load(open(pickle_name, "rb"))
except Exception as e:
logging.getLogger('eval').warning('there was no randomstate pickle named {} around'.format(pickle_name))
if "trdc" in states:
self.trdc.set_states(states['trdc'])
else:
self.trdc.set_states(None)
if "tedc" in states:
self.tedc.set_states(states['tedc'])
else:
self.tedc.set_states(None)
if "valdc" in states:
self.valdc.set_states(states['valdc'])
else:
self.valdc.set_states(None)
if 'epoch' in states:
self.current_epoch = states['epoch']
else:
self.current_epoch = 0
if 'iteration' in states:
self.current_iteration = states['iteration']
else:
self.current_iteration = 0
self.current_iteration = self.current_iteration
[docs] def save(self, f):
"""
saves model to disk at location f
Parameters
----------
f : str
location to save model to
"""
ckpt = self._save(f)
trdc = self.trdc.get_states()
tedc = self.tedc.get_states()
valdc = self.valdc.get_states()
states = {}
if trdc:
states['trdc'] = trdc
if tedc:
states['tedc'] = tedc
if valdc:
states['valdc'] = valdc
states['epoch'] = self.current_epoch
states['iteration'] = self.current_iteration
pickle.dump(states, open(f + ".pickle", "wb"))
return ckpt
[docs] def add_summary_simple_value(self, text, value):
raise NotImplementedError("this needs to be implemented and only works with tensorflow backend.")
[docs] def set_session(self, sess, cachefolder, train=False):
return None
def __enter__(self):
pass
def __exit__(self, exc_type, exc_val, exc_tb):
pass
generate_defaults_info(SupervisedEvaluation)