Source code for mdgru.eval

__author__ = "Simon Andermatt"
__copyright__ = "Copyright (C) 2017 Simon Andermatt"

import copy
import logging
import os
import pickle
import time
from abc import abstractmethod

import numpy as np

from mdgru.helper import argget, compile_arguments, generate_defaults_info


[docs]class SupervisedEvaluation(object): _defaults = { 'dropout_rate': {'value': 0.5, 'help': '"keep rate" for weights using dropconnect. The higher the value, the closer the sampled models to the full model.'}, 'namespace': {'value': 'default', 'help': "override default model name (if no ckpt is provided). Probably not a good idea!", 'alt': ['modelname']}, 'only_save_labels': {'value': False, 'help': 'save only labels and no probability distributions'}, # 'batch_size': {'value': 1, 'help': 'determines batch size to be used during training'} 'validate_same': {'value': True, 'help': 'always pick other random samples for validation!', 'invert_meaning': 'dont_'}, 'evaluate_uncertainty_times': {'value': 1, 'type': int, 'help': 'Number times we want to evaluate one volume. This only makes sense ' 'using a keep rate of less than 1 during evaluation (dropout_during_evaluation ' 'less than 1)', 'name': 'number_of_evaluation_samples'}, 'evaluate_uncertainty_dropout': {'value': 1.0, 'type': float, 'help': 'Keeprate of weights during evaluation. Useful to visualize uncertainty ' 'in conjunction with a number of samples per volume', 'name': 'dropout_during_evaluation'}, 'evaluate_uncertainty_saveall': {'value': False, 'help': 'Save each evaluation sample per volume. Without this flag, only the ' 'standard deviation and mean over all samples is kept.', 'name': 'save_individual_evaluations'}, 'show_f05': True, 'show_f1': True, 'show_f2': True, 'show_l2': True, 'show_cross_entropy': True, 'print_each': {'value': 1, 'help': 'print execution time and losses each # iterations', 'type': int}, 'batch_size': {'value': 1, 'help': 'Minibatchsize', 'type': int, 'name': 'batchsize', 'short': 'b'}, 'datapath': { 'help': 'path where training, validation and testing folders lie. Can also be some other path, as long as the other locations are provided as absolute paths. An experimentsfolder will be created in this folder, where all runs and checkpoint files will be saved.'}, 'locationtraining': {'value': None, 'help': 'absolute or relative path to datapath to the training data. Either a list of paths to the sample folders or one path to a folder where samples should be automatically determined.', 'nargs': '+'}, 'locationtesting': {'value': None, 'help': 'absolute or relative path to datapath to the testing data. Either a list of paths to the sample folders or one path to a folder where samples should be automatically determined.', 'nargs': '+'}, 'locationvalidation': {'value': None, 'help': 'absolute or relative path to datapath to the validation data. Either a list of paths to the sample folders or one path to a folder where samples should be automatically determined.', 'nargs': '+'}, 'output_dims': {'help': 'number of output channels, e.g. number of classes the model needs to create a probability distribution over.','type': int, 'alt': ['nclasses']}, 'windowsize': {'type':int, 'short':'w','help': 'window size to be used during training, validation and testing, if not specified otherwise', 'nargs':'+'}, 'padding': {'help': 'padding to be used during training, validation and testing, if not specified otherwise. During training, the padding specifies the amount a patch is allowed to reach outside of the image along all dimensions, during testing, it specifies also the amount of overlap needed between patches.', 'value': [0], 'nargs':'+', 'short':'p', 'type':int}, 'windowsizetesting': {'value':None, 'help': 'override windowsize for testing','nargs':'+', 'type':int}, 'windowsizevalidation': None,#{'value':None, 'help': 'override windowsize for validation','nargs':'+'}, 'paddingtesting': {'value':None, 'help': 'override padding for testing', 'nargs':'+', 'type':int}, 'paddingvalidation': None,#{'value':None, 'help': 'override padding for validation', 'nargs':'+'}, 'testbatchsize': {'value': 1, 'help': 'batchsize for testing'} } def __init__(self, modelcls, datacls, kw): """ Handler for the evaluation of model defined in modelcls using data coming from datacls. Parameters ---------- modelcls : cls Python class defining the model to evaluate datacls : cls Python class implementing the data loading and storing """ self.origargs = copy.copy(kw) eval_kw, kw = compile_arguments(SupervisedEvaluation, kw, transitive=False) for k, v in eval_kw.items(): setattr(self, k, v) self.w = self.windowsize self.p = self.padding self.use_tensorboard = False # self.dropout_rate = argget(kw, "dropout_rate", 0.5) self.current_epoch = 0 self.current_iteration = 0 # create datasets for training, validation and testing: locs = [[None, l] if l is None or len(l) > 1 else [os.path.join(self.datapath, l[0]), None] for l in [self.locationtraining, self.locationvalidation, self.locationtesting]] paramstraining = [self.w, self.p] + locs[0] paramsvalidation = [self.windowsizevalidation if self.windowsizevalidation is not None else self.w, self.paddingvalidation if self.paddingvalidation is not None else self.p] + locs[1] paramstesting = [self.windowsizetesting if self.windowsizetesting is not None else self.w, self.paddingtesting if self.paddingtesting is not None else self.p] + locs[2] kwdata, kw = compile_arguments(datacls, kw, True, keep_entries=True) kwcopy = copy.copy(kwdata) kwcopy['nclasses'] = self.output_dims kwcopy['batch_size'] = self.batch_size self.trdc = datacls(*paramstraining, kw=copy.copy(kwcopy)) testkw = copy.copy(kwcopy) testkw['batch_size'] = testkw['batch_size'] if not self.testbatchsize else self.testbatchsize valkw = copy.copy(testkw) testkw['ignore_missing_mask'] = True self.tedc = datacls(*paramstesting, kw=testkw) self.valdc = datacls(*paramsvalidation, kw=valkw) self.currit = 0 self.show_dice = argget(kw, "show_dice", not self.show_f1) self.binary_evaluation = self.show_dice or self.show_f1 or self.show_f05 or self.show_f2 self.estimatefilename = argget(kw, "estimatefilename", "estimate") self.gpu = argget(kw, "gpus", [0]) self.get_train_session = lambda: self self.get_test_session = lambda: self
[docs] @abstractmethod def _train(self): """ Performs one training iteration in respective framework and returns loss(es)""" raise Exception("This needs to be implemented depending on the framework")
[docs] @abstractmethod def _predict(self, batch, dropout, testing): """ Predict given batch and keeprate dropout. Parameters ---------- batch : ndarray dropout : float Keeprate for dropconnect testing Returns ------- ndarray : Prediction based on data batch """ pass
[docs] @abstractmethod def _predict_with_loss(self, batch, batchlabs): """ Predict for given batch and return loss compared to labels in batchlabs Parameters ---------- batch : image data batchlabs : corresponding label data Returns ------- tuple of ndarray prediction and losses """ pass
[docs] @abstractmethod def _set_session(self, sess, cachefolder): pass
[docs] @abstractmethod def _save(self, f): """ Save to file f in current framework Parameters ---------- f : location to save model at """ pass
[docs] @abstractmethod def _load(self, f): """ Load model in current framework from f Parameters ---------- f : location of stored model """ pass
[docs] @abstractmethod def get_globalstep(self): """ Return number of iterations this model has been trained in Returns ------- int : iteration count """ pass
[docs] def train(self): """ Measures and logs time when performing data sampling and training iteration. """ start_time = time.time() batch, batchlabs = self.trdc.random_sample(batch_size=self.batch_size) time_after_loading = time.time() loss = self._train(batch, batchlabs) self.currit += 1 end_time = time.time() if (self.currit % self.print_each == 0): logging.getLogger("eval").info("it: {}, time: [i/o: {}, processing: {}, all: {}], loss: {}" .format(self.currit, np.round(time_after_loading - start_time, 6), np.round(end_time - time_after_loading, 6), np.round(end_time - start_time, 6), loss)) return loss
[docs] def test_scores(self, pred, ref): """ Evaluates all selected scores between reference data ref and prediction pred. Parameters ---------- pred : ndarray prediction, as probability distributions per pixel / voxel ref : ndarray labelmap, either as probability distributions per pixel / voxel or as label map """ ref = np.int32(np.expand_dims(ref.squeeze(), 0)) pred = np.expand_dims(pred.squeeze(), 0) if pred.shape != ref.shape: tar2 = np.zeros((np.prod(pred.shape[:-1]), pred.shape[-1])) tar2[np.arange(np.prod(pred.shape[:-1])), ref.flatten()] = 1 ref = tar2.reshape(pred.shape) res = {} eps = 1e-8 nclasses = self.model.nclasses if self.binary_evaluation: enc_ref = np.argmax(ref, -1) enc_pred = nclasses * np.argmax(pred, -1) enc_both = enc_ref + enc_pred bins = np.bincount(enc_both.flatten(), minlength=nclasses ** 2).reshape((nclasses, nclasses)) if self.show_dice: res["dice"] = [bins[c, c] * 2 / (np.sum(bins, -1)[c] + np.sum(bins, -2)[c] + eps) for c in range(nclasses)] if self.show_f05 or self.show_f2: precision = np.array([bins[c, c] / (np.sum(bins, -1)[c] + eps) for c in range(nclasses)]) recall = np.array([bins[c, c] / (np.sum(bins, -2)[c] + eps) for c in range(nclasses)]) if self.show_f05: beta2 = 0.5 ** 2 res["f05"] = (1 + beta2) * precision * recall / ((beta2 * precision) + recall + eps) if self.show_f1: res["f1"] = [bins[c, c] * 2 / (np.sum(bins, -2)[c] + np.sum(bins, -1)[c] + eps) for c in range(nclasses)] if self.show_f2: beta2 = 2 ** 2 res["f2"] = (1 + beta2) * precision * recall / (beta2 * precision + recall + eps) if self.show_cross_entropy: res["cross_entropy"] = np.mean(np.sum(ref * np.log(pred + eps), -1)) if self.show_l2: res["l2"] = np.mean(np.sum((ref - pred) ** 2, -1)) return res
[docs] def test_all_random(self, batch_size=None, dc=None, resample=True): """ Test random samples Parameters ---------- batch_size : int minibatch size to compute on dc : datacollection instance, optional datacollection to sample from resample : bool indicates if we need to sample before evaluating Returns ------- tuple of loss and prediction ndarray """ if dc is None: dc = self.valdc if batch_size is None: batch_size = self.batch_size if self.validate_same: dc.randomstate.seed(12345677) if resample: self.testbatch, self.testbatchlabs = dc.random_sample(batch_size=batch_size) loss, prediction = self._predict_with_loss(self.testbatch, self.testbatchlabs) return loss, prediction
[docs] def test_all_available(self, batch_size=None, dc=None, return_results=False, dropout=None, testing=False): """ Completely evaluates each full image in tps using grid sampling. Parameters ---------- batch_size : int minibatch size to compute on dc : datacollection instance, optional datacollection to sample from return_results : bool should results be returned or stored right away? dropout : float keeprate of dropconnect for inference testing Returns ------- either tuple of predictions and errors or only errors, depending on return_results flag """ if dc is None: dc = self.tedc if batch_size > 1: logging.getLogger('eval').error('not supported yet to have more than batchsize 1') volgens = dc.get_volume_batch_generators() if dropout is None: dropout = self.evaluate_uncertainty_dropout full_vols = [] errs = [] lasttime = time.time() for volgen, file, shape, w, p in volgens: logging.getLogger('eval').info( 'evaluating file {} of shape {} with w {} and p {}'.format(file, shape, w, p)) if len(shape) > 3: shape = np.asarray([s for s in shape if s > 1]) res = np.zeros(list(shape) + [self.model.nclasses], dtype=np.float32) if self.evaluate_uncertainty_times > 1: uncertres = np.zeros(res.shape) if self.evaluate_uncertainty_saveall: allres = np.zeros([self.evaluate_uncertainty_times] + list(res.shape)) certainty = np.ones(w) for ind, pp in enumerate(p): if pp > 0: slicesa = [slice(None) for _ in range(len(p))] slicesb = [slice(None) for _ in range(len(p))] reshapearr = [1 for _ in range(len(p))] reshapearr[ind] = pp slicesa[ind] = slice(None, pp) slicesb[ind] = slice(-pp, None) slicesa = tuple(slicesa) slicesb = tuple(slicesb) certainty[slicesa] *= np.arange(1.0 / pp, 1, 1.0 / (pp + 1)).reshape(reshapearr) certainty[slicesb] *= np.arange(1.0 / pp, 1, 1.0 / (pp + 1))[::-1].reshape(reshapearr) certainty = certainty.reshape([1] + w + [1]) # read, compute, merge, write back for subvol, _, imin, imax in volgen: if self.evaluate_uncertainty_times > 1: preds = [] for i in range(self.evaluate_uncertainty_times): preds.append(self._predict(subvol, dropout, testing)) logging.getLogger('eval').debug( 'evaluated run {} of subvolume from {} to {}'.format(i, imin, imax)) pred = np.mean(np.asarray(preds), 0) uncert = np.std(np.asarray(preds), 0) preds = [x * certainty for x in preds] else: pred = self._predict(subvol, dropout, testing) logging.getLogger('eval').debug('evaluated subvolume from {} to {}'.format(imin, imax)) pred *= certainty # now reembed it into array wrongmin = [int(abs(x)) if x < 0 else 0 for x in imin] wrongmax = [int(x) if x < 0 else None for x in (shape - imax)] mimin = np.asarray(np.maximum(0, imin), dtype=np.int32) mimax = np.asarray(np.minimum(shape, imax), dtype=np.int32) slicesaa = [slice(mimina, miminb) for mimina, miminb in zip(mimin, mimax)] slicesaa.append(slice(None)) slicesaa = tuple(slicesaa) slicesbb = [0] slicesbb.extend(slice(wrongmina, wrongminb) for wrongmina, wrongminb in zip(wrongmin, wrongmax)) slicesbb.append(slice(None)) slicesbb = tuple(slicesbb) res[slicesaa] += pred[slicesbb] if self.evaluate_uncertainty_times > 1: uncert *= certainty uncertres[slicesaa] += \ uncert[slicesbb] if self.evaluate_uncertainty_saveall: for j in range(self.evaluate_uncertainty_times): allres[j, slicesaa] += preds[i][slicesbb] # normalize again: if self.evaluate_uncertainty_times > 1 and not return_results: uncertres /= np.sum(res, -1).reshape(list(res.shape[:-1]) + [1]) dc.save(uncertres, os.path.join(file, "std-" + self.estimatefilename), tporigin=file) if self.evaluate_uncertainty_saveall: for j in range(self.evaluate_uncertainty_times): dc.save(allres[j], os.path.join(file, "iter{}-".format(j) + self.estimatefilename), tporigin=file) if np.min(p) < 0: res[np.where(np.sum(res, -1) < 1e-8)] = [1] + [0 for _ in range(self.model.nclasses - 1)] res /= np.sum(res, -1).reshape(list(res.shape[:-1]) + [1]) # evaluate accuracy... name = os.path.split(file) name = name[-1] if len(name[-1]) else os.path.basename(name[0]) try: if len(dc.maskfiles) > 0: mfile = os.path.join(file, dc.maskfiles[0]) if os.path.exists(mfile): mf = np.expand_dims(dc.load(mfile).squeeze(), 0) errs.append([name, self.test_scores(res, mf)]) except Exception as e: logging.getLogger('eval').warning( 'was not able to save test scores, is ground truth available?') logging.getLogger('eval').warning('{}'.format(e)) if return_results: full_vols.append([name, file, res]) else: if not self.only_save_labels: dc.save(res, os.path.join(file, self.estimatefilename + "-probdist"), tporigin=file) dc.save(np.uint8(np.argmax(res, -1)), os.path.join(file, self.estimatefilename + "-labels"), tporigin=file) logging.getLogger('eval').info('evaluation took {} seconds'.format(time.time() - lasttime)) lasttime = time.time() if return_results: return full_vols, errs else: return errs
[docs] def load(self, f): """ loads model at location f from disk Parameters ---------- f : str location of stored model """ self._load(f) states = {} try: pickle_name = f.rsplit('-', 1)[0] + ".pickle" states = pickle.load(open(pickle_name, "rb")) except Exception as e: logging.getLogger('eval').warning('there was no randomstate pickle named {} around'.format(pickle_name)) if "trdc" in states: self.trdc.set_states(states['trdc']) else: self.trdc.set_states(None) if "tedc" in states: self.tedc.set_states(states['tedc']) else: self.tedc.set_states(None) if "valdc" in states: self.valdc.set_states(states['valdc']) else: self.valdc.set_states(None) if 'epoch' in states: self.current_epoch = states['epoch'] else: self.current_epoch = 0 if 'iteration' in states: self.current_iteration = states['iteration'] else: self.current_iteration = 0 self.current_iteration = self.current_iteration
[docs] def save(self, f): """ saves model to disk at location f Parameters ---------- f : str location to save model to """ ckpt = self._save(f) trdc = self.trdc.get_states() tedc = self.tedc.get_states() valdc = self.valdc.get_states() states = {} if trdc: states['trdc'] = trdc if tedc: states['tedc'] = tedc if valdc: states['valdc'] = valdc states['epoch'] = self.current_epoch states['iteration'] = self.current_iteration pickle.dump(states, open(f + ".pickle", "wb")) return ckpt
[docs] def add_summary_simple_value(self, text, value): raise NotImplementedError("this needs to be implemented and only works with tensorflow backend.")
[docs] def set_session(self, sess, cachefolder, train=False): return None
def __enter__(self): pass def __exit__(self, exc_type, exc_val, exc_tb): pass
generate_defaults_info(SupervisedEvaluation)