diff --git a/.gitignore b/.gitignore index f891e40e..fc3baf17 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.idea code/*.pyc code/*_plots code/tmp* @@ -13,3 +14,5 @@ html *.pyc *~ *.swp +# This directory may be created by scripts from segmentation tutorials. +save_models diff --git a/README.rst b/README.rst index 85de179c..81252fc0 100644 --- a/README.rst +++ b/README.rst @@ -37,4 +37,4 @@ Subdirectories: Build instructions ------------------ -To build the html version of the tutorials, install sphinx and run doc/Makefile +To build the html version of the tutorials, run python doc/scripts/docgen.py diff --git a/code/cnn_1D_segm/data_loader/__init__.py b/code/cnn_1D_segm/data_loader/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/code/cnn_1D_segm/data_loader/cortical_layers.py b/code/cnn_1D_segm/data_loader/cortical_layers.py new file mode 100644 index 00000000..a0b3a2f9 --- /dev/null +++ b/code/cnn_1D_segm/data_loader/cortical_layers.py @@ -0,0 +1,185 @@ +import os +import time + +import numpy as np +from PIL import Image +import re +import warnings + +from dataset_loaders.parallel_loader import ThreadedDataset +from parallel_loader_1D import ThreadedDataset_1D + +floatX = 'float32' + +class Cortical6LayersDataset(ThreadedDataset_1D): + '''The Cortical Layers Dataset. + Parameters + ---------- + which_set: string + A string in ['train', 'val', 'valid', 'test'], corresponding to + the set to be returned. + split: float + A float indicating the dataset split between training and validation. + For example, if split=0.85, 85\% of the images will be used for training, + whereas 15\% will be used for validation. + ''' + name = 'cortical_layers' + + non_void_nclasses = 7 + GTclasses = [0, 1, 2, 3, 4, 5, 6] + _cmap = { + 0: (128, 128, 128), # padding + 1: (128, 0, 0), # layer 1 + 2: (128, 64, ), # layer 2 + 3: (128, 64, 128), # layer 3 + 4: (0, 0, 128), # layer 4 + 5: (0, 0, 64), # layer 5 + 6: (64, 64, 128), # layer 6 + } + _mask_labels = {0: 'padding', 1: 'layers1', 2: 'layer2', 3: 'layer3', + 4: 'layer4', 5: 'layer5', 6: 'layer6'} + _void_labels = [] + + + _filenames = None + + @property + def filenames(self): + + if self._filenames is None: + # Load filenames + nfiles = sum(1 for line in open(self.mask_path)) + filenames = range(nfiles) + np.random.seed(1609) + np.random.shuffle(filenames) + + if self.which_set == 'train': + filenames = filenames[:int(nfiles*self.split)] + elif self.which_set == 'val': + filenames = filenames[-(nfiles - int(nfiles*self.split)):] + + # Save the filenames list + self._filenames = filenames + + return self._filenames + + def __init__(self, + which_set="train", + split=0.85, + shuffle_at_each_epoch = True, + smooth_or_raw = 'both', + *args, **kwargs): + + self.task = 'segmentation' + + self.n_layers = 6 + n_layers_path = str(self.n_layers)+"layers_segmentation" + + self.which_set = "val" if which_set == "valid" else which_set + if self.which_set not in ("train", "val", 'test'): + raise ValueError("Unknown argument to which_set %s" % + self.which_set) + + self.split = split + + self.image_path_raw = os.path.join(self.path,n_layers_path,"training_raw.txt") + self.image_path_smooth = os.path.join(self.path,n_layers_path, "training_geo.txt") + self.mask_path = os.path.join(self.path,n_layers_path, "training_cls.txt") + self.regions_path = os.path.join(self.path, n_layers_path, "training_regions.txt") + + self.smooth_raw_both = smooth_or_raw + + if smooth_or_raw == 'both': + self.data_shape = (200,2) + else : + self.data_shape = (200,1) + + super(Cortical6LayersDataset, self).__init__(*args, **kwargs) + + def get_names(self): + """Return a dict of names, per prefix/subset.""" + + return {'default': self.filenames} + + + +def test_6layers(): + train_iter = Cortical6LayersDataset( + which_set='train', + smooth_or_raw = 'both', + batch_size=500, + data_augm_kwargs={}, + return_one_hot=False, + return_01c=False, + return_list=True, + use_threads=False) + + valid_iter = Cortical6LayersDataset( + which_set='valid', + smooth_or_raw = 'smooth', + batch_size=500, + data_augm_kwargs={}, + return_one_hot=False, + return_01c=False, + return_list=True, + use_threads=False) + + valid_iter2 = Cortical6LayersDataset( + which_set='valid', + smooth_or_raw = 'raw', + batch_size=500, + data_augm_kwargs={}, + return_one_hot=False, + return_01c=False, + return_list=True, + use_threads=False) + + + + train_nsamples = train_iter.nsamples + train_nbatches = train_iter.nbatches + valid_nbatches = valid_iter.nbatches + valid_nbatches2 = valid_iter2.nbatches + + + + # Simulate training + max_epochs = 1 + print "Simulate training for", str(max_epochs), "epochs" + start_training = time.time() + for epoch in range(max_epochs): + print "Epoch #", str(epoch) + + start_epoch = time.time() + + print "Iterate on the training set", train_nbatches, "minibatches" + for mb in range(train_nbatches): + start_batch = time.time() + batch = train_iter.next() + if mb%5 ==0: + print("Minibatch train {}: {} sec".format(mb, (time.time() - + start_batch))) + + print "Iterate on the validation set", valid_nbatches, "minibatches" + for mb in range(valid_nbatches): + start_batch = time.time() + batch = valid_iter.next() + if mb%5 ==0: + print("Minibatch valid {}: {} sec".format(mb, (time.time() - + start_batch))) + + print "Iterate on the validation set (second time)", valid_nbatches2, "minibatches" + for mb in range(valid_nbatches2): + start_batch = time.time() + batch = valid_iter2.next() + if mb%5==0: + print("Minibatch valid {}: {} sec".format(mb, (time.time() - + start_batch))) + + print("Epoch time: %s" % str(time.time() - start_epoch)) + print("Training time: %s" % str(time.time() - start_training)) + +if __name__ == '__main__': + print "Loading the dataset 1 batch at a time" + test_6layers() + print "Success!" diff --git a/code/cnn_1D_segm/data_loader/parallel_loader_1D.py b/code/cnn_1D_segm/data_loader/parallel_loader_1D.py new file mode 100644 index 00000000..272c8d00 --- /dev/null +++ b/code/cnn_1D_segm/data_loader/parallel_loader_1D.py @@ -0,0 +1,619 @@ +import ConfigParser +import os +from os.path import realpath +try: + import Queue +except ImportError: + import queue as Queue +import shutil +import sys +from threading import Thread +from time import sleep +import weakref + +import re +import numpy as np +from numpy.random import RandomState +from dataset_loaders.data_augmentation import random_transform +from dataset_loaders.parallel_loader import ThreadedDataset + +import dataset_loaders +from dataset_loaders.utils_parallel_loader import classproperty, grouper, overlap_grouper +from dataset_loaders.parallel_loader import threaded_fetch + +floatX = 'float32' + +class ThreadedDataset_1D(ThreadedDataset): + _wait_time = 0.05 + __version__ = '1' + """ + Threaded dataset. + This is an abstract class and should not be used as is. Each + specific dataset class should implement its `get_names` and + `load_sequence` functions to load the list of filenames to be + loaded and define how to load the data from the dataset, + respectively. + See `example_dataset.py` for an example on how to implement a + specific instance of a dataset. + Parameters + ---------- + seq_per_subset: int + The *maximum* number of sequences per each subset (a.k.a. prefix + or video). If 0, all sequences will be used. If greater than 0 + and `shuffle_at_each_epoch` is True, at each epoch a new + selection of sequences per subset will be randomly picked. Default: 0. + seq_length: int + The number of frames per sequence. If 0, 4D arrays will be + returned (not a sequence), else 5D arrays will be returned. + Default: 0. + overlap: int + The number of frames of overlap between the first frame of one + sample and the first frame of the next. Note that a negative + overlap will instead specify the number of frames that are + *skipped* between the last frame of one sample and the first + frame of the next. None is equivalent to seq_length - 1. + Default: None. + batch_size: int + The size of the batch. + queues_size: int + The size of the buffers used in the threaded case. Default: 50. + return_one_hot: bool + If True the labels will be returned in one-hot format, i.e. as + an array of `nclasses` elements all set to 0 except from the id + of the correct class which is set to 1. Default: False. + return_01c: bool + If True the last axis will be the channel axis (01c format), + else the channel axis will be the third to last (c01 format). + Default: False. + return_extended_sequences:bool + If True the first and last sequence of a batch will be extended so that + the first frame is repeated `seq_length/2` times. This is useful + to perform middle frame prediction, i.e., where the current + frame has to be the middle one and the previous and next ones + are used as context. Default:False. + return_middle_frame_only:bool + If True only the middle frame of the ground truth will be returned. + Default:False. + return_0_255: bool + If True the images will be returned in the range [0, 255] with + dtype `uint8`. Otherwise the images will be returned in the + range [0, 1] as dtype `float32`. Default: False. + use_threads: bool + If True threads will be used to fetch the data from the dataset. + Default: False. + nthreads: int + The number of threads to use when `use_threads` is True. Default: 1. + shuffle_at_each_epoch: bool + If True, at the end of each epoch a new set of batches will be + prepared and shuffled. Default: True. + infinite_iterator: bool + If False a `StopIteration` exception will be raised at the end of an + epoch. If True no exception will be raised and the dataset will + behave as an infinite iterator. Default: True. + return_list: bool + If True, each call to `next()` will return a list of two numpy arrays + containing the data and the labels respectively. If False, the + dataset will instead return a dictionary with the following + keys: + * `data`: the augmented/cropped sequence/image + * `labels`: the corresponding potentially cropped labels + * `filenames`: the filenames of the frames/images + * `subset`: the name of the subset the sequence/image belongs to + * `raw_data`: the original unprocessed sequence/image + Depending on the dataset, additional keys might be available. + Default: False. + data_augm_kwargs: dict + A dictionary of arguments to be passed to the data augmentation + function. Default: no data augmentation. See + :func:`~data_augmentation.random_transform` for a complete list + of parameters. + remove_mean: bool + If True, the statistics computed dataset-wise will be used to + remove the dataset mean from the data. Default: False. + divide_by_std: bool + If True, the statistics computed dataset-wise will be used to + divide the data by the dataset standard deviation. Default: False. + remove_per_img_mean: bool + If True, each image will be processed to have zero-mean. + Default: False. + divide_by_per_img_std=False + If True, each image will be processed to have unit variance. + Default: False. + raise_IOErrors: bool + If False in case of an IOError a message will be printed on + screen but no Exception will be raised. Default: False. + rng: :class:`numpy.random.RandomState` instance + The random number generator to use. If None, one will be created. + Default: None. + Notes + ----- + The parallel loader will automatically map all non-void classes to be + sequential starting from 0 and then map all void classes to the + next class. E.g., suppose non_void_nclasses = 4 and _void_classes = [3, 5] + the non-void classes will be mapped to 0, 1, 2, 3 and the void + classes will be mapped to 4, as follows: + 0 --> 0 + 1 --> 1 + 2 --> 2 + 3 --> 4 + 4 --> 3 + 5 --> 4 + Note also that in case the original labels are not sequential, it + suffices to list all the original labels as a list in GTclasses for + parallel_loader to map the non-void classes sequentially starting + from 0 and all the void classes to the next class. E.g. suppose + non_void_nclasses = 5, GTclasses = [0, 2, 5, 9, 11, 12, 99] and + _void_labels = [2, 99], then this will be the mapping: + 0 --> 0 + 2 --> 5 + 5 --> 1 + 9 --> 2 + 11 --> 3 + 12 --> 4 + 99 --> 5 + """ + def __init__(self, + seq_per_subset=0, # if 0 all sequences (or frames, if 4D) + seq_length=0, # if 0, return 4D + overlap=None, + batch_size=1, + queues_size=20, + return_one_hot=False, + return_01c=False, + return_extended_sequences=False, + return_middle_frame_only=False, + return_0_255=False, + use_threads=False, + nthreads=1, + shuffle_at_each_epoch=True, + infinite_iterator=True, + return_list=False, # for keras, return X,Y only + data_augm_kwargs={}, + remove_mean=False, # dataset stats + divide_by_std=False, # dataset stats + remove_per_img_mean=False, # img stats + divide_by_per_img_std=False, # img stats + raise_IOErrors=False, + rng=None, + preload=False, + **kwargs): + + if len(kwargs): + print('Unknown arguments: {}'.format(kwargs.keys())) + + # Set default values for the data augmentation params if not specified + default_data_augm_kwargs = { + 'crop_size': None, + 'rotation_range': 0, + 'width_shift_range': 0, + 'height_shift_range': 0, + 'shear_range': 0, + 'zoom_range': 0, + 'channel_shift_range': 0, + 'fill_mode': 'nearest', + 'cval': 0, + 'cval_mask': 0, + 'horizontal_flip': False, + 'vertical_flip': False, + 'rescale': None, + 'spline_warp': False, + 'warp_sigma': 0.1, + 'warp_grid_size': 3, + 'gamma': 0, + 'gain': 1} + + default_data_augm_kwargs.update(data_augm_kwargs) + self.data_augm_kwargs = default_data_augm_kwargs + del(default_data_augm_kwargs, data_augm_kwargs) + + # Put crop_size into canonical form [c1, 2] + cs = self.data_augm_kwargs['crop_size'] + if cs is not None: + # Convert to list + if isinstance(cs, int): + cs = [cs, cs] + elif isinstance(cs, tuple): + cs = list(cs) + # set 0, 0 to None + if cs == [0, 0]: + cs = None + self.data_augm_kwargs['crop_size'] = cs + + # Do not support multithread without shuffling + if use_threads and nthreads > 1 and not shuffle_at_each_epoch: + raise NotImplementedError('Multiple threads are not order ' + 'preserving') + + # Check that the implementing class has all the mandatory attributes + mandatory_attrs = ['name', 'non_void_nclasses', '_void_labels'] + missing_attrs = [attr for attr in mandatory_attrs if not + hasattr(self, attr)] + if missing_attrs != []: + raise NameError('Mandatory argument(s) missing: {}'.format( + missing_attrs)) + if hasattr(self, 'GT_classes'): + raise NameError('GTclasses mispelled as GT_classes') + + # If variable sized dataset --> either batch_size 1 or crop + if (not hasattr(self, 'data_shape') and batch_size > 1 and + not self.data_augm_kwargs['crop_size']): + raise ValueError( + '{} has no `data_shape` attribute, this means that the ' + 'shape of the samples varies across the dataset. You ' + 'must either set `batch_size = 1` or specify a ' + '`crop_size`'.format(self.name)) + + if seq_length and overlap and overlap >= seq_length: + raise ValueError('`overlap` should be smaller than `seq_length`') + + # Copy the data to the local path if not existing + if not os.path.exists(self.path): + print('The local path {} does not exist. Copying ' + 'the dataset...'.format(self.path)) + shutil.copytree(self.shared_path, self.path) + for r,d,f in os.walk(self.path): + os.chmod(r,0775) + print('Done.') + else: + try: + with open(os.path.join(self.path, '__version__')) as f: + if f.read() != self.__version__: + raise IOError + except IOError: + print('The local path {} exist, but is outdated. I will ' + 'replace the old files with the new ones...'.format( + self.path)) + if not os.path.exists(self.shared_path): + print('The shared_path {} for {} does not exist. Please ' + 'edit the config.ini file with a valid path, as ' + 'specified in the README.'.format(self.shared_path, + self.name)) + if realpath(self.path) != realpath(self.shared_path): + shutil.rmtree(self.path) + shutil.copytree(self.shared_path, self.path) + for r,d,f in os.walk(self.path): + os.chmod(r,0775) + with open(os.path.join(self.path, '__version__'), 'w') as f: + f.write(self.__version__) + print('Done.') + + # Save parameters in object + self.seq_per_subset = seq_per_subset + self.return_sequence = seq_length != 0 + self.seq_length = seq_length if seq_length else 1 + self.overlap = overlap if overlap is not None else self.seq_length - 1 + self.one_subset_per_batch = False + self.batch_size = batch_size + self.queues_size = queues_size + self.return_one_hot = return_one_hot + self.return_01c = return_01c + self.return_extended_sequences = return_extended_sequences + self.return_middle_frame_only = return_middle_frame_only + self.return_0_255 = return_0_255 + self.use_threads = use_threads + self.nthreads = nthreads + self.shuffle_at_each_epoch = shuffle_at_each_epoch + self.infinite_iterator = infinite_iterator + self.return_list = return_list + self.remove_mean = remove_mean + self.divide_by_std = divide_by_std + self.remove_per_img_mean = remove_per_img_mean + self.divide_by_per_img_std = divide_by_per_img_std + self.raise_IOErrors = raise_IOErrors + self.rng = rng if rng is not None else RandomState(0xbeef) + self.preload = preload + + self.set_has_GT = getattr(self, 'set_has_GT', True) + self.mean = getattr(self, 'mean', []) + self.std = getattr(self, 'std', []) + + # ...01c + data_shape = list(getattr(self.__class__, 'data_shape', + (None, None, 3))) + if self.data_augm_kwargs['crop_size']: + data_shape[-3:-1] = self.data_augm_kwargs['crop_size'] # change 01 + if self.return_01c: + self.data_shape = data_shape + else: + self.data_shape = [data_shape[i] for i in + [1] + range(1) + range(2, len(data_shape))] + + # Load a dict of names, per video/subset/prefix/... + self.names_per_subset = self.get_names() + + # Fill the sequences/batches lists and initialize everything + self._fill_names_sequences() + if len(self.names_sequences) == 0: + raise RuntimeError('The name list cannot be empty') + self._fill_names_batches(shuffle_at_each_epoch) + + # Cache for already loaded data + if self.preload: + self.image_raw = self._preload_data( + self.image_path_raw, dtype='floatX', expand=True) + self.image_smooth = self._preload_data( + self.image_path_smooth, dtype='floatX', expand=True) + self.mask = self._preload_data(self.mask_path, dtype='int32') + self.regions = self._preload_data(self.regions_path, dtype='int32') + else: + self.image_raw = None + self.image_smooth = None + self.mask = None + self.regions = None + + if self.use_threads: + # Initialize the queues + self.names_queue = Queue.Queue(maxsize=self.queues_size) + self.data_queue = Queue.Queue(maxsize=self.queues_size) + self._init_names_queue() # Fill the names queue + + # Start the data fetcher threads + self.sentinel = object() # guaranteed unique reference + self.data_fetchers = [] + for _ in range(self.nthreads): + data_fetcher = Thread( + target=threaded_fetch, + args=(weakref.ref(self),)) + data_fetcher.setDaemon(True) # Die when main dies + data_fetcher.start() + data_fetcher = weakref.ref(data_fetcher) + self.data_fetchers.append(data_fetcher) + # Give time to the data fetcher to die, in case of errors + # sleep(1) + + # super(ThreadedDataset_1D, self).__init__(*args, **kwargs) + + def _preload_data(self, path, dtype, expand=False): + if dtype == 'floatX': + py_type = float + dtype = floatX + elif dtype == 'int32': + py_type = int + else: + raise ValueError('dtype not supported', dtype) + ret = [] + with open(path) as fp: + for i, line in enumerate(fp): + line = re.split(' ', line) + line = np.array([py_type(el) for el in line], dtype=dtype) + ret.append(line) + ret = np.vstack(ret) + if expand: + # b,0 to b,0,c + ret = np.expand_dims(ret, axis=2) + return ret + + def fetch_from_dataset(self, batch_to_load): + """ + Return *batches* of 1D data. + `batch_to_load` contains the indices of the lines to load in the batch. + `load_sequence` should return a numpy array of 2 or more + elements, the first of which 4-dimensional (frame, 0, 1, c) + or (frame, c, 0, 1) containing the data and the second 3D or 4D + containing the label. + """ + batch_ret = {} + batch_to_load = [el for el in batch_to_load if el is not None] + batch_to_load = [element[1] for tupl in batch_to_load for element in tupl] + # Create batches + ret = {} + # Load data + ret['data'] = [] + + ret['indices'] = []#np.sort(batch_to_load) + + if self.smooth_raw_both=='raw' or self.smooth_raw_both=='both': + if self.preload: + raw = self.image_raw[batch_to_load] + else: + raw=[] + with open(self.image_path_raw) as fp: + for i, line in enumerate(fp): + if i in batch_to_load: + line = re.split(' ', line) + line = np.array([float(el) for el in line]) + line = line.astype(floatX) + raw.append(line) + if len(raw) == len(batch_to_load): + break + raw = np.vstack(raw) + # b,0 to b,0,c + raw = np.expand_dims(raw, axis=2) + + if self.smooth_raw_both=='smooth' or self.smooth_raw_both=='both': + if self.preload: + smooth = self.image_smooth[batch_to_load] + else: + smooth=[] + with open(self.image_path_smooth) as fp: + for i, line in enumerate(fp): + if i in batch_to_load: + line = re.split(' ', line) + line = np.array([float(el) for el in line]) + line = line.astype(floatX) + smooth.append(line) + if len(smooth) == len(batch_to_load): + break + + smooth = np.vstack(smooth) + # b,0 to b,0,c + smooth = np.expand_dims(smooth, axis=2) + + if self.smooth_raw_both=='raw': + ret['data'] = raw + elif self.smooth_raw_both == 'smooth': + ret['data'] = smooth + elif self.smooth_raw_both == 'both': + ret['data']=np.concatenate([smooth,raw],axis=2) + + + + # Load mask + ret['labels'] = [] + if self.task=='segmentation': + if self.preload: + ret['labels'] = self.mask[batch_to_load] + else: + with open(self.mask_path) as fp: + for i, line in enumerate(fp): + if i in batch_to_load: + line = re.split(' ', line) + line = np.array([int(el) for el in line]) + line = line.astype('int32') + ret['labels'].append(line) + if len(ret['labels']) == len(batch_to_load): + break + ret['labels'] = np.vstack(ret['labels']) + + elif self.task =='classification': + if self.preload: + ret['labels'] = self.mask[batch_to_load] + else: + with open(self.mask_path) as fp: + for i, line in enumerate(fp): + if i in batch_to_load: + line = re.split(' ', line) + line = np.array([int(el) for el in line]) + line = line.astype('int32') + ret['labels'].append(line) + if len(ret['labels']) == len(batch_to_load): + break + ret['labels'] = np.vstack(ret['labels']) + + + ret['filenames'] = batch_to_load + + ret['subset'] = 'default' + + assert all(el in ret.keys() + for el in ('data', 'labels', 'filenames', 'subset')), ( + 'Keys: {}'.format(ret.keys())) + assert all(isinstance(el, np.ndarray) + for el in (ret['data'], ret['labels'])) + raw_data = ret['data'].copy() + seq_x, seq_y = ret['data'], ret['labels'] + + # Per-data normalization + if self.remove_per_img_mean: + seq_x -= seq_x.mean(axis=1, keepdims=True) + if self.divide_by_per_img_std: + seq_x /= seq_x.std(axis=1, keepdims=True) + + # Dataset statistics normalization + if self.remove_mean: + seq_x -= getattr(self, 'mean', 0) + if self.divide_by_std: + seq_x /= getattr(self, 'std', 1) + + assert seq_x.ndim == 3 + assert seq_y.ndim == 2 + + # from b,0(,c) to b,0,1(,c) + seq_x = np.expand_dims(seq_x, axis=2) + seq_y = np.expand_dims(seq_y, axis=2) + + # Perform data augmentation, if needed + seq_x, seq_y = random_transform( + seq_x, seq_y, + nclasses=self.nclasses, + void_label=self.void_labels, + **self.data_augm_kwargs) + + # from b,0,1(,c) to b,0(,c) + sh = seq_x.shape + seq_x = seq_x.reshape((sh[0], sh[1], sh[3])) + + if self.task == 'segmentation': + seq_y = seq_y.reshape((sh[0], sh[1])) + elif self.task=='classification': + #print seq_y.shape + seq_y = seq_y.reshape((sh[0])) + #print seq_y.shape + + if self.set_has_GT and self._void_labels != []: + # Map all void classes to non_void_nclasses and shift the other + # values accordingly, so that the valid values are between 0 + # and non_void_nclasses-1 and the void_classes are all equal to + # non_void_nclasses. + void_l = self._void_labels + void_l.sort(reverse=True) + mapping = self._mapping + + # Apply the mapping + tmp_class = (-1 if not hasattr(self, 'GTclasses') else + max(self.GTclasses) + 1) + seq_y[seq_y == self.non_void_nclasses] = tmp_class + for i in sorted(mapping.keys()): + if i == self.non_void_nclasses: + continue + seq_y[seq_y == i] = mapping[i] + try: + seq_y[seq_y == tmp_class] = mapping[self.non_void_nclasses] + except KeyError: + # none of the original classes was self.non_void_nclasses + pass + elif max(self._cmap.keys()) > self.non_void_nclasses-1: + # Shift values of labels, so that the valid values are between 0 + # and non_void_nclasses-1. + mapping = self._mapping + + # Apply the mapping + tmp_class = (-1 if not hasattr(self, 'GTclasses') else + max(self.GTclasses) + 1) + seq_y[seq_y == self.non_void_nclasses] = tmp_class + for i in sorted(mapping.keys()): + if i == self.non_void_nclasses: + continue + seq_y[seq_y == i] = mapping[i] + try: + seq_y[seq_y == tmp_class] = mapping[self.non_void_nclasses] + except KeyError: + # none of the original classes was self.non_void_nclasses + pass + + # Transform targets seq_y to one hot code if return_one_hot + # is True + if self.set_has_GT and self.return_one_hot: + nc = (self.non_void_nclasses if self._void_labels == [] else + self.non_void_nclasses + 1) + sh = seq_y.shape + seq_y = seq_y.flatten() + seq_y_hot = np.zeros((seq_y.shape[0], nc), + dtype='int32') + seq_y = seq_y.astype('int32') + seq_y_hot[range(seq_y.shape[0]), seq_y] = 1 + seq_y_hot = seq_y_hot.reshape(sh + (nc,)) + seq_y = seq_y_hot + # Dimshuffle if return_01c is False + if not self.return_01c: + # b,0,c --> b,c,0 + seq_x = seq_x.transpose([0, 2, 1]) + if self.set_has_GT and self.return_one_hot: + seq_y = seq_y.transpose([0, 2, 1]) + raw_data = raw_data.transpose([0, 2, 1]) + + if self.return_0_255: + seq_x = (seq_x * 255).astype('uint8') + ret['data'], ret['labels'] = seq_x, seq_y + ret['raw_data'] = raw_data + # Append the data of this batch to the minibatch array + for k, v in ret.iteritems(): + batch_ret.setdefault(k, []).append(v) + + for k, v in batch_ret.iteritems(): + try: + batch_ret[k] = np.array(v) + except ValueError: + # Variable shape: cannot wrap with a numpy array + pass + + + batch_ret['data'] = batch_ret['data'].squeeze(0) + batch_ret['labels'] = batch_ret['labels'].squeeze(0) + + if self.seq_length > 0 and self.return_middle_frame_only: + batch_ret['labels'] = batch_ret['labels'][:, self.seq_length//2] + if self.return_list: + return [batch_ret['data'], batch_ret['labels']] + else: + return batch_ret diff --git a/code/cnn_1D_segm/fcn1D.py b/code/cnn_1D_segm/fcn1D.py new file mode 100644 index 00000000..35d50c7f --- /dev/null +++ b/code/cnn_1D_segm/fcn1D.py @@ -0,0 +1,109 @@ +import numpy as np +import theano.tensor as T +import lasagne +from lasagne.layers import InputLayer, DropoutLayer, ReshapeLayer, \ + NonlinearityLayer, DimshuffleLayer, ConcatLayer +from lasagne.layers import batch_norm, BatchNormLayer +from lasagne.layers import Pool1DLayer as PoolLayer +from lasagne.layers import Conv1DLayer as ConvLayer +from lasagne.layers import Upscale1DLayer as UpscaleLayer +from lasagne.layers import PadLayer +from lasagne.layers import ElemwiseSumLayer, ElemwiseMergeLayer +from lasagne.nonlinearities import softmax, linear, rectify + + +def conv_bn_relu(net, incoming_layer, depth, num_filters, filter_size, pad = 'same'): + net['conv'+str(depth)] = ConvLayer(net[incoming_layer], + num_filters = num_filters, filter_size = filter_size, + pad = pad, nonlinearity=None) + net['bn'+str(depth)] = BatchNormLayer(net['conv'+str(depth)]) + net['relu'+str(depth)] = NonlinearityLayer( net['bn'+str(depth)], nonlinearity = rectify) + incoming_layer = 'relu'+str(depth) + + return incoming_layer + +# start-snippet-bn_relu_conv +def bn_relu_conv(net, incoming_layer, depth, num_filters, filter_size, pad = 'same'): + + net['bn'+str(depth)] = BatchNormLayer(net[incoming_layer]) + net['relu'+str(depth)] = NonlinearityLayer( net['bn'+str(depth)], nonlinearity = rectify) + net['conv'+str(depth)] = ConvLayer(net['relu'+str(depth)], + num_filters = num_filters, filter_size = filter_size, + pad = pad, nonlinearity=None) + incoming_layer = 'conv'+str(depth) + + return incoming_layer +# end-snippet-bn_relu_conv + +# start-snippet-convolutions +def build_model(input_var, + n_classes = 6, + nb_in_channels = 2, + filter_size=25, + n_filters = 64, + depth = 8, + last_filter_size = 1, + block = 'bn_relu_conv', + out_nonlin = softmax): + ''' + Parameters: + ----------- + input_var : theano 3Dtensor shape(n_samples, n_in_channels, ray_length) + filter_size : odd int (to fit with same padding) + n_filters : int, number of filters for each convLayer + n_classes : int, number of classes to segment + depth : int, number of stacked convolution before concatenation + last_filter_size : int, last convolution filter size to obtain n_classes feature maps + out_nonlin : default=softmax, non linearity function + ''' + + + net = {} + + net['input'] = InputLayer((None, nb_in_channels, 200), input_var) + incoming_layer = 'input' + + #Convolution layers + for d in range(depth): + if block == 'bn_relu_conv': + incoming_layer = bn_relu_conv(net, incoming_layer, depth = d, + num_filters= n_filters, filter_size=filter_size) + # end-snippet-convolutions + elif block == 'conv_bn_relu': + incoming_layer = conv_bn_relu(net, incoming_layer, depth = d, + num_filters= n_filters, filter_size=filter_size) + # start-snippet-output + #Output layer + net['final_conv'] = ConvLayer(net[incoming_layer], + num_filters = n_classes, + filter_size = last_filter_size, + pad='same') + incoming_layer = 'final_conv' + + #DimshuffleLayer and ReshapeLayer to fit the softmax implementation + #(it needs a 1D or 2D tensor, not a 3D tensor) + net['final_dimshuffle'] = DimshuffleLayer(net[incoming_layer], (0,2,1)) + incoming_layer = 'final_dimshuffle' + + layerSize = lasagne.layers.get_output(net[incoming_layer]).shape + net['final_reshape'] = ReshapeLayer(net[incoming_layer], + (T.prod(layerSize[0:2]),layerSize[2])) + # (200*batch_size,n_classes)) + incoming_layer = 'final_reshape' + + + #This is the layer that computes the prediction + net['last_layer'] = NonlinearityLayer(net[incoming_layer], + nonlinearity = out_nonlin) + incoming_layer = 'last_layer' + + #Layers needed to visualize the prediction of the network + net['probs_reshape'] = ReshapeLayer(net[incoming_layer], + (layerSize[0], layerSize[1], n_classes)) + incoming_layer = 'probs_reshape' + + net['probs_dimshuffle'] = DimshuffleLayer(net[incoming_layer], (0,2,1)) + + + return [net[l] for l in ['last_layer']], net + # end-snippet-output diff --git a/code/cnn_1D_segm/train_fcn1D.py b/code/cnn_1D_segm/train_fcn1D.py new file mode 100644 index 00000000..d58c31d4 --- /dev/null +++ b/code/cnn_1D_segm/train_fcn1D.py @@ -0,0 +1,381 @@ +#!/usr/bin/env python2 +from __future__ import absolute_import, print_function, division + +import os +import argparse +import json +import time +from distutils.dir_util import copy_tree + +import lasagne +import numpy as np +import theano +import theano.tensor as T +from data_loader.cortical_layers import Cortical6LayersDataset +from fcn1D import build_model +from lasagne.objectives import categorical_crossentropy +from lasagne.regularization import regularize_network_params +from theano import config + +_FLOATX = config.floatX + + +def accuracy_metric(y_pred, y_true, void_labels, one_hot=False): + assert (y_pred.ndim == 2) or (y_pred.ndim == 1) + + # y_pred to indices + if y_pred.ndim == 2: + y_pred = T.argmax(y_pred, axis=1) + + if one_hot: + y_true = T.argmax(y_true, axis=1) + + # Compute accuracy + acc = T.eq(y_pred, y_true).astype(_FLOATX) + + # Create mask + mask = T.ones_like(y_true, dtype=_FLOATX) + for el in void_labels: + indices = T.eq(y_true, el).nonzero() + if any(indices): + mask = T.set_subtensor(mask[indices], 0.) + + # Apply mask + acc *= mask + acc = T.sum(acc) / T.sum(mask) + + return acc + + +def jaccard(y_pred, y_true, n_classes, one_hot=False): + assert (y_pred.ndim == 2) or (y_pred.ndim == 1) + + # y_pred to indices + if y_pred.ndim == 2: + y_pred = T.argmax(y_pred, axis=1) + + if one_hot: + y_true = T.argmax(y_true, axis=1) + + # Compute confusion matrix + cm = T.zeros((n_classes, n_classes)) + for i in range(n_classes): + for j in range(n_classes): + cm = T.set_subtensor( + cm[i, j], T.sum(T.eq(y_pred, i) * T.eq(y_true, j))) + + # Compute Jaccard Index + TP_perclass = T.cast(cm.diagonal(), _FLOATX) + FP_perclass = cm.sum(1) - TP_perclass + FN_perclass = cm.sum(0) - TP_perclass + + num = TP_perclass + denom = TP_perclass + FP_perclass + FN_perclass + + return T.stack([num, denom], axis=0) + + +SAVEPATH = 'save_models/' +LOADPATH = SAVEPATH +WEIGHTS_PATH = SAVEPATH + + +def train(dataset, learning_rate=0.0005, + weight_decay=0.001, num_epochs=500, + max_patience=25, data_augmentation={}, + savepath=None, loadpath=None, + batch_size=None, resume=False): + + if savepath is None: + raise ValueError('A saving directory must be specified') + + if batch_size is None: + batch_size = [1024, 1024, 1] + + # Model hyperparameters + n_filters = 64 + filter_size = 25 + depth = 8 + block = 'bn_relu_conv' + + # Hyperparameters for the dataset loader + smooth_or_raw = 'both' # use both input channels + shuffle_at_each_epoch = True + + # + # Prepare load/save directories + # + + exp_name = 'fcn1D' + exp_name += '_lrate=' + str(learning_rate) + exp_name += '_fil=' + str(n_filters) + exp_name += '_fsizes=' + str(filter_size) + exp_name += '_depth=' + str(depth) + exp_name += '_data=' + smooth_or_raw + exp_name += '_decay=' + str(weight_decay) + exp_name += '_pat=' + str(max_patience) + + savepath = os.path.join(savepath, dataset, exp_name) + loadpath = os.path.join(loadpath, dataset, exp_name) + print('Savepath : ') + print(savepath) + print('Loadpath : ') + print(loadpath) + + if not os.path.exists(savepath): + os.makedirs(savepath) + else: + print('\033[93m The following folder already exists {}. ' + 'It will be overwritten in a few seconds...\033[0m'.format( + savepath)) + + print('Saving directory : ' + savepath) + with open(os.path.join(savepath, "config.txt"), "w") as f: + for key, value in locals().items(): + f.write('{} = {}\n'.format(key, value)) + + # + # Define symbolic variables + # + input_var = T.tensor3('input_var') # n_example*nb_in_channels*ray_size + target_var = T.ivector('target_var') # n_example*ray_size + # learning rate is defined below as a theano variable. + learn_step = theano.shared(np.array(learning_rate, dtype=theano.config.floatX)) + + # + # Build dataset iterator + # + + if smooth_or_raw == 'both': + nb_in_channels = 2 + use_threads = False + else: + nb_in_channels = 1 + use_threads = True + + train_iter = Cortical6LayersDataset( + which_set='train', + smooth_or_raw=smooth_or_raw, + batch_size=batch_size[0], + data_augm_kwargs=data_augmentation, + shuffle_at_each_epoch=True, + return_one_hot=False, + return_01c=False, + return_list=False, + use_threads=use_threads, + preload=True) + + val_iter = Cortical6LayersDataset( + which_set='valid', + smooth_or_raw=smooth_or_raw, + batch_size=batch_size[1], + shuffle_at_each_epoch=True, + return_one_hot=False, + return_01c=False, + return_list=False, + use_threads=use_threads, + preload=True) + + test_iter = None + + n_batches_train = train_iter.nbatches + n_batches_val = val_iter.nbatches + n_batches_test = test_iter.nbatches if test_iter is not None else 0 + n_classes = train_iter.non_void_nclasses + void_labels = train_iter.void_labels + + # + # Build network + # + simple_net_output, net = build_model(input_var, + filter_size=filter_size, + n_filters=n_filters, + depth=depth, + block=block, + nb_in_channels=nb_in_channels, + n_classes=n_classes) + + # + # Define and compile theano functions + # + print("Defining and compiling training functions") + + prediction = lasagne.layers.get_output(simple_net_output[0]) + loss = categorical_crossentropy(prediction, target_var) + loss = loss.mean() + + if weight_decay > 0: + weightsl2 = regularize_network_params( + simple_net_output, lasagne.regularization.l2) + loss += weight_decay * weightsl2 + + train_acc = accuracy_metric(prediction, target_var, void_labels) + + params = lasagne.layers.get_all_params(simple_net_output, trainable=True) + updates = lasagne.updates.adam(loss, params, learning_rate=learn_step) + + train_fn = theano.function([input_var, target_var], [loss, train_acc], updates=updates) + + print("Done") + + print("Defining and compiling valid functions") + valid_prediction = lasagne.layers.get_output(simple_net_output[0], deterministic=True) + valid_loss = categorical_crossentropy(valid_prediction, target_var).mean() + valid_acc = accuracy_metric(valid_prediction, target_var, void_labels) + valid_jacc = jaccard(valid_prediction, target_var, n_classes) + + valid_fn = theano.function([input_var, target_var], [valid_loss, valid_acc, valid_jacc]) + print("Done") + + # + # Train loop + # + err_train = [] + acc_train = [] + + err_valid = [] + acc_valid = [] + jacc_valid = [] + patience = 0 + + # Training main loop + print("Start training") + + for epoch in range(num_epochs): + learn_step.set_value((learn_step.get_value() * 0.99).astype(theano.config.floatX)) + + # Single epoch training and validation + start_time = time.time() + # Cost train and acc train for this epoch + cost_train_epoch = 0 + acc_train_epoch = 0 + + for i in range(n_batches_train): + # Get minibatch (comment the next line if only 1 minibatch in training) + train_batch = train_iter.next() + X_train_batch, L_train_batch, idx_train_batch = train_batch['data'], train_batch['labels'], \ + train_batch['filenames'][0] + L_train_batch = np.reshape(L_train_batch, np.prod(L_train_batch.shape)) + + # Training step + cost_train_batch, acc_train_batch = train_fn(X_train_batch, L_train_batch) + + # Update epoch results + cost_train_epoch += cost_train_batch + acc_train_epoch += acc_train_batch + + # Add epoch results + err_train += [cost_train_epoch / n_batches_train] + acc_train += [acc_train_epoch / n_batches_train] + + # Validation + cost_val_epoch = 0 + acc_val_epoch = 0 + jacc_val_epoch = np.zeros((2, n_classes)) + + for i in range(n_batches_val): + # Get minibatch (comment the next line if only 1 minibatch in training) + val_batch = val_iter.next() + X_val_batch, L_val_batch, idx_val_batch = val_batch['data'], val_batch['labels'], val_batch['filenames'][0] + L_val_batch = np.reshape(L_val_batch, np.prod(L_val_batch.shape)) + + # Validation step + cost_val_batch, acc_val_batch, jacc_val_batch = valid_fn(X_val_batch, L_val_batch) + + # Update epoch results + cost_val_epoch += cost_val_batch + acc_val_epoch += acc_val_batch + jacc_val_epoch += jacc_val_batch + + # Add epoch results + err_valid += [cost_val_epoch / n_batches_val] + acc_valid += [acc_val_epoch / n_batches_val] + jacc_perclass_valid = jacc_val_epoch[0, :] / jacc_val_epoch[1, :] + jacc_valid += [np.mean(jacc_perclass_valid)] + # worse_indices_valid += [worse_indices_val_epoch] + + # Print results (once per epoch) + + out_str = ("EPOCH %i: Avg cost train %f, acc train %f" + + ", cost val %f, acc val %f, jacc val per class %s, " + "jacc val %f took %f s") + out_str = out_str % (epoch, err_train[epoch], + acc_train[epoch], + err_valid[epoch], + acc_valid[epoch], + ['%d: %f' % (i, j) + for i, j in enumerate(jacc_perclass_valid)], + jacc_valid[epoch], + time.time() - start_time) + print(out_str) + + # Early stopping and saving stuff + + with open(os.path.join(savepath, "fcn1D_output.log"), "a") as f: + f.write(out_str + "\n") + + if epoch == 0: + best_jacc_val = jacc_valid[epoch] + elif epoch > 1 and jacc_valid[epoch] > best_jacc_val: + print('saving best (and last) model') + best_jacc_val = jacc_valid[epoch] + patience = 0 + np.savez(os.path.join(savepath, 'new_fcn1D_model_best.npz'), + *lasagne.layers.get_all_param_values(simple_net_output)) + np.savez(os.path.join(savepath, "fcn1D_errors_best.npz"), + err_train=err_train, acc_train=acc_train, + err_valid=err_valid, acc_valid=acc_valid, jacc_valid=jacc_valid) + else: + patience += 1 + print('saving last model') + + np.savez(os.path.join(savepath, 'new_fcn1D_model_last.npz'), + *lasagne.layers.get_all_param_values(simple_net_output)) + np.savez(os.path.join(savepath, "fcn1D_errors_last.npz"), + err_train=err_train, acc_train=acc_train, + err_valid=err_valid, acc_valid=acc_valid, jacc_valid=jacc_valid) + # Finish training if patience has expired or max nber of epochs reached + + if patience == max_patience or epoch == num_epochs - 1: + if savepath != loadpath: + print('Copying model and other training files to {}'.format(loadpath)) + copy_tree(savepath, loadpath) + break + + +def main(): + parser = argparse.ArgumentParser(description='FCN-1D model training') + parser.add_argument('-dataset', + default='cortical_layers', + help='Dataset.') + parser.add_argument('-learning_rate', + default=0.0005, + help='Learning Rate') + parser.add_argument('--num_epochs', + '-ne', + type=int, + default=500, + help='Optional. Int to indicate the max' + 'number of epochs.') + parser.add_argument('-max_patience', + type=int, + default=25, + help='Max patience') + parser.add_argument('-batch_size', + type=int, + nargs='+', + default=[1024, 1024, 1], + help='Batch size [train, val, test]. Default: -batch_size 1024 1024 1') + parser.add_argument('-data_augmentation', + type=json.loads, + default={}, + help='use data augmentation') + args = parser.parse_args() + + train(dataset=args.dataset, learning_rate=args.learning_rate, + num_epochs=args.num_epochs, max_patience=args.max_patience, data_augmentation=args.data_augmentation, + batch_size=args.batch_size, savepath=SAVEPATH, loadpath=LOADPATH) + + +if __name__ == '__main__': + main() diff --git a/code/fcn_2D_segm/__init__.py b/code/fcn_2D_segm/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/code/fcn_2D_segm/fcn8.py b/code/fcn_2D_segm/fcn8.py new file mode 100644 index 00000000..5b19f320 --- /dev/null +++ b/code/fcn_2D_segm/fcn8.py @@ -0,0 +1,152 @@ +import numpy as np +import scipy.io as sio +import theano.tensor as T +import lasagne +from lasagne.layers import InputLayer, DropoutLayer, ReshapeLayer,\ + DimshuffleLayer +from lasagne.layers import Pool2DLayer as PoolLayer +from lasagne.layers import Conv2DLayer as ConvLayer +from lasagne.layers import ElemwiseSumLayer, ElemwiseMergeLayer +from lasagne.layers import Deconv2DLayer as DeconvLayer +from lasagne.nonlinearities import softmax, linear + + + +def freezeParameters(net, single=True): + """ + Freeze parameters of a layer or a network so that they are not trainable + anymore + + Parameters + ---------- + net: a network layer + single: whether to freeze a single layer of all of the layers below as well + """ + all_layers = lasagne.layers.get_all_layers(net) + + if single: + all_layers = [all_layers[-1]] + + for layer in all_layers: + layer_params = layer.get_params() + for p in layer_params: + try: + layer.params[p].remove('trainable') + except KeyError: + pass + + +# start-snippet-1 +def buildFCN8(nb_in_channels, input_var, + path_weights='/Tmp/romerosa/itinf/models/' + + 'camvid/new_fcn8_model_best.npz', + n_classes=21, load_weights=True, + void_labels=[], trainable=False, + layer=['probs_dimshuffle'], pascal=False, + temperature=1.0, dropout=0.5): + ''' + Build fcn8 model + ''' + + net = {} + + # Contracting path + net['input'] = InputLayer((None, nb_in_channels, None, None),input_var) + + # pool 1 + net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=100, flip_filters=False) + net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad='same', flip_filters=False) + net['pool1'] = PoolLayer(net['conv1_2'], 2) + + # pool 2 + net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad='same', flip_filters=False) + net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad='same', flip_filters=False) + net['pool2'] = PoolLayer(net['conv2_2'], 2) + + # pool 3 + net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad='same', flip_filters=False) + net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad='same', flip_filters=False) + net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad='same', flip_filters=False) + net['pool3'] = PoolLayer(net['conv3_3'], 2) + + # pool 4 + net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad='same', flip_filters=False) + net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad='same', flip_filters=False) + net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad='same', flip_filters=False) + net['pool4'] = PoolLayer(net['conv4_3'], 2) + + # pool 5 + net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad='same', flip_filters=False) + net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad='same', flip_filters=False) + net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad='same', flip_filters=False) + net['pool5'] = PoolLayer(net['conv5_3'], 2) + + # fc6 + net['fc6'] = ConvLayer(net['pool5'], 4096, 7, pad='valid', flip_filters=False) + net['fc6_dropout'] = DropoutLayer(net['fc6'], p=dropout) + + # fc7 + net['fc7'] = ConvLayer(net['fc6_dropout'], 4096, 1, pad='valid', flip_filters=False) + net['fc7_dropout'] = DropoutLayer(net['fc7'], p=dropout) + + net['score_fr'] = ConvLayer(net['fc7_dropout'], n_classes, 1, pad='valid', flip_filters=False) + + # Upsampling path + + # Unpool + net['score2'] = DeconvLayer(net['score_fr'], n_classes, 4, + stride=2, crop='valid', nonlinearity=linear) + net['score_pool4'] = ConvLayer(net['pool4'], n_classes, 1,pad='same') + net['score_fused'] = ElemwiseSumLayer((net['score2'],net['score_pool4']), + cropping=[None, None, 'center','center']) + + # Unpool + net['score4'] = DeconvLayer(net['score_fused'], n_classes, 4, + stride=2, crop='valid', nonlinearity=linear) + net['score_pool3'] = ConvLayer(net['pool3'], n_classes, 1,pad='valid') + net['score_final'] = ElemwiseSumLayer((net['score4'],net['score_pool3']), + cropping=[None, None, 'center','center']) + # Unpool + net['upsample'] = DeconvLayer(net['score_final'], n_classes, 16, + stride=8, crop='valid', nonlinearity=linear) + upsample_shape = lasagne.layers.get_output_shape(net['upsample'])[1] + net['input_tmp'] = InputLayer((None, upsample_shape, None, None), input_var) + + net['score'] = ElemwiseMergeLayer((net['input_tmp'], net['upsample']), + merge_function=lambda input, deconv: + deconv, + cropping=[None, None, 'center', + 'center']) + + # Final dimshuffle, reshape and softmax + net['final_dimshuffle'] = \ + lasagne.layers.DimshuffleLayer(net['score'], (0, 2, 3, 1)) + laySize = lasagne.layers.get_output(net['final_dimshuffle']).shape + net['final_reshape'] = \ + lasagne.layers.ReshapeLayer(net['final_dimshuffle'], + (T.prod(laySize[0:3]), + laySize[3])) + net['probs'] = lasagne.layers.NonlinearityLayer(net['final_reshape'], + nonlinearity=softmax) + # end-snippet-1 + + + # Do not train + if not trainable: + freezeParameters(net['probs']) + + # Go back to 4D + net['probs_reshape'] = ReshapeLayer(net['probs'], (laySize[0], laySize[1], + laySize[2], n_classes)) + + net['probs_dimshuffle'] = DimshuffleLayer(net['probs_reshape'], + (0, 3, 1, 2)) + + # Apply temperature + if load_weights: + soft_value = net['upsample'].W.get_value() / temperature + net['upsample'].W.set_value(soft_value) + soft_value = net['upsample'].b.get_value() / temperature + net['upsample'].b.set_value(soft_value) + + return [net[el] for el in layer] diff --git a/code/fcn_2D_segm/train_fcn8.py b/code/fcn_2D_segm/train_fcn8.py new file mode 100644 index 00000000..d106baee --- /dev/null +++ b/code/fcn_2D_segm/train_fcn8.py @@ -0,0 +1,420 @@ +#!/usr/bin/env python2 +from __future__ import absolute_import, print_function, division +import os +import argparse +import time +import json + +import numpy as np +import theano +import theano.tensor as T +from theano import config +import lasagne +from lasagne.regularization import regularize_network_params + +from dataset_loaders.images.polyps912 import Polyps912Dataset +from fcn8 import buildFCN8 + + +_FLOATX = config.floatX +_EPSILON = 10e-7 + + +def jaccard_metric(y_pred, y_true, n_classes, one_hot=False): + + assert (y_pred.ndim == 2) or (y_pred.ndim == 1) + + # y_pred to indices + if y_pred.ndim == 2: + y_pred = T.argmax(y_pred, axis=1) + + if one_hot: + y_true = T.argmax(y_true, axis=1) + + # Compute confusion matrix + cm = T.zeros((n_classes, n_classes)) + for i in range(n_classes): + for j in range(n_classes): + cm = T.set_subtensor( + cm[i, j], T.sum(T.eq(y_pred, i) * T.eq(y_true, j))) + + # Compute Jaccard Index + TP_perclass = T.cast(cm.diagonal(), _FLOATX) + FP_perclass = cm.sum(1) - TP_perclass + FN_perclass = cm.sum(0) - TP_perclass + + num = TP_perclass + denom = TP_perclass + FP_perclass + FN_perclass + + return T.stack([num, denom], axis=0) + + +def accuracy_metric(y_pred, y_true, void_labels, one_hot=False): + + assert (y_pred.ndim == 2) or (y_pred.ndim == 1) + + # y_pred to indices + if y_pred.ndim == 2: + y_pred = T.argmax(y_pred, axis=1) + + if one_hot: + y_true = T.argmax(y_true, axis=1) + + # Compute accuracy + acc = T.eq(y_pred, y_true).astype(_FLOATX) + + # Create mask + mask = T.ones_like(y_true, dtype=_FLOATX) + for el in void_labels: + indices = T.eq(y_true, el).nonzero() + if any(indices): + mask = T.set_subtensor(mask[indices], 0.) + + # Apply mask + acc *= mask + acc = T.sum(acc) / T.sum(mask) + + return acc + + +def crossentropy_metric(y_pred, y_true, void_labels, one_hot=False): + # Clip predictions + y_pred = T.clip(y_pred, _EPSILON, 1.0 - _EPSILON) + + if one_hot: + y_true = T.argmax(y_true, axis=1) + + # Create mask + mask = T.ones_like(y_true, dtype=_FLOATX) + for el in void_labels: + mask = T.set_subtensor(mask[T.eq(y_true, el).nonzero()], 0.) + + # Modify y_true temporarily + y_true_tmp = y_true * mask + y_true_tmp = y_true_tmp.astype('int32') + + # Compute cross-entropy + loss = T.nnet.categorical_crossentropy(y_pred, y_true_tmp) + + # Compute masked mean loss + loss *= mask + loss = T.sum(loss) / T.sum(mask) + + return loss + + +SAVEPATH = 'save_models/' +LOADPATH = SAVEPATH +WEIGHTS_PATH = SAVEPATH + + +def train(dataset, learn_step=0.005, + weight_decay=1e-4, num_epochs=500, + max_patience=100, data_augmentation={}, + savepath=None, #loadpath=None, + early_stop_class=None, + batch_size=None, + resume=False, + train_from_0_255=False): + + # + # Prepare load/save directories + # + exp_name = 'fcn8_' + 'data_aug' if bool(data_augmentation) else '' + + if savepath is None: + raise ValueError('A saving directory must be specified') + + savepath = os.path.join(savepath, dataset, exp_name) + # loadpath = os.path.join(loadpath, dataset, exp_name) + print(savepath) + # print loadpath + + if not os.path.exists(savepath): + os.makedirs(savepath) + else: + print('\033[93m The following folder already exists {}. ' + 'It will be overwritten in a few seconds...\033[0m'.format( + savepath)) + + print('Saving directory : ' + savepath) + with open(os.path.join(savepath, "config.txt"), "w") as f: + for key, value in locals().items(): + f.write('{} = {}\n'.format(key, value)) + + # + # Define symbolic variables + # + input_var = T.tensor4('input_var') + target_var = T.ivector('target_var') + + # + # Build dataset iterator + # + if batch_size is not None: + bs = batch_size + else: + bs = [10, 1, 1] + train_iter = Polyps912Dataset(which_set='train', + batch_size=batch_size[0], + seq_per_subset=0, + seq_length=0, + data_augm_kwargs=data_augmentation, + return_one_hot=False, + return_01c=False, + overlap=0, + use_threads=False, + shuffle_at_each_epoch=True, + return_list=True, + return_0_255=False) + val_iter = Polyps912Dataset(which_set='val', + batch_size=batch_size[1], + seq_per_subset=0, + seq_length=0, + return_one_hot=False, + return_01c=False, + overlap=0, + use_threads=False, + shuffle_at_each_epoch=False, + return_list=True, + return_0_255=False) + test_iter = Polyps912Dataset(which_set='test', + batch_size=batch_size[2], + seq_per_subset=0, + seq_length=0, + return_one_hot=False, + return_01c=False, + overlap=0, + use_threads=False, + shuffle_at_each_epoch=False, + return_list=True, + return_0_255=False) + + + n_batches_train = train_iter.nbatches + n_batches_val = val_iter.nbatches + n_batches_test = test_iter.nbatches if test_iter is not None else 0 + n_classes = train_iter.non_void_nclasses + void_labels = train_iter.void_labels + nb_in_channels = train_iter.data_shape[0] + + print("Batch. train: %d, val %d, test %d" % (n_batches_train, n_batches_val, n_batches_test)) + print("Nb of classes: %d" % (n_classes)) + print("Nb. of input channels: %d" % (nb_in_channels)) + + # + # Build network + # + convmodel = buildFCN8(nb_in_channels, input_var, n_classes=n_classes, + void_labels=void_labels, trainable=True, + load_weights=resume, pascal=True, layer=['probs']) + + # + # Define and compile theano functions + # + print("Defining and compiling training functions") + prediction = lasagne.layers.get_output(convmodel)[0] + loss = crossentropy_metric(prediction, target_var, void_labels) + + if weight_decay > 0: + weightsl2 = regularize_network_params( + convmodel, lasagne.regularization.l2) + loss += weight_decay * weightsl2 + + params = lasagne.layers.get_all_params(convmodel, trainable=True) + updates = lasagne.updates.adam(loss, params, learning_rate=learn_step) + + train_fn = theano.function([input_var, target_var], loss, updates=updates) + + print("Defining and compiling test functions") + test_prediction = lasagne.layers.get_output(convmodel, deterministic=True)[0] + test_loss = crossentropy_metric(test_prediction, target_var, void_labels) + test_acc = accuracy_metric(test_prediction, target_var, void_labels) + test_jacc = jaccard_metric(test_prediction, target_var, n_classes) + + val_fn = theano.function([input_var, target_var], [test_loss, test_acc, test_jacc]) + + # + # Train + # + err_train = [] + err_valid = [] + acc_valid = [] + jacc_valid = [] + patience = 0 + + ## Uncomment this to test the training + # n_batches_train = 1 + # n_batches_val = 1 + # n_batches_test = 1 + # num_epochs = 1 + + # Training main loop + print("Start training") + for epoch in range(num_epochs): + # Single epoch training and validation + start_time = time.time() + cost_train_tot = 0 + + # Train + for i in range(n_batches_train): + print('Training batch ', i) + # Get minibatch + X_train_batch, L_train_batch = train_iter.next() + L_train_batch = np.reshape(L_train_batch, np.prod(L_train_batch.shape)) + + + # Training step + cost_train = train_fn(X_train_batch, L_train_batch) + out_str = "cost %f" % (cost_train) + cost_train_tot += cost_train + + err_train += [cost_train_tot/n_batches_train] + + # Validation + cost_val_tot = 0 + acc_val_tot = 0 + jacc_val_tot = np.zeros((2, n_classes)) + for i in range(n_batches_val): + print('Valid batch ', i) + # Get minibatch + X_val_batch, L_val_batch = val_iter.next() + L_val_batch = np.reshape(L_val_batch, np.prod(L_val_batch.shape)) + + # Validation step + cost_val, acc_val, jacc_val = val_fn(X_val_batch, L_val_batch) + + acc_val_tot += acc_val + cost_val_tot += cost_val + jacc_val_tot += jacc_val + + err_valid += [cost_val_tot/n_batches_val] + acc_valid += [acc_val_tot/n_batches_val] + jacc_perclass_valid = jacc_val_tot[0, :] / jacc_val_tot[1, :] + if early_stop_class == None: + jacc_valid += [np.mean(jacc_perclass_valid)] + else: + jacc_valid += [jacc_perclass_valid[early_stop_class]] + + + out_str = "EPOCH %i: Avg epoch training cost train %f, cost val %f" +\ + ", acc val %f, jacc val class 0 %f, jacc val class 1 %f, jacc val %f took %f s" + out_str = out_str % (epoch, err_train[epoch], + err_valid[epoch], + acc_valid[epoch], + jacc_perclass_valid[0], + jacc_perclass_valid[1], + jacc_valid[epoch], + time.time()-start_time) + print(out_str) + + with open(os.path.join(savepath, "fcn8_output.log"), "a") as f: + f.write(out_str + "\n") + + # Early stopping and saving stuff + if epoch == 0: + best_jacc_val = jacc_valid[epoch] + elif epoch > 1 and jacc_valid[epoch] > best_jacc_val: + best_jacc_val = jacc_valid[epoch] + patience = 0 + np.savez(os.path.join(savepath, 'new_fcn8_model_best.npz'), *lasagne.layers.get_all_param_values(convmodel)) + np.savez(os.path.join(savepath, "fcn8_errors_best.npz"), err_valid, err_train, acc_valid, jacc_valid) + else: + patience += 1 + + np.savez(os.path.join(savepath, 'new_fcn8_model_last.npz'), *lasagne.layers.get_all_param_values(convmodel)) + np.savez(os.path.join(savepath, "fcn8_errors_last.npz"), err_valid, err_train, acc_valid, jacc_valid) + # Finish training if patience has expired or max nber of epochs + # reached + if patience == max_patience or epoch == num_epochs-1: + if test_iter is not None: + # Load best model weights + with np.load(os.path.join(savepath, 'new_fcn8_model_best.npz')) as f: + param_values = [f['arr_%d' % i] for i in range(len(f.files))] + nlayers = len(lasagne.layers.get_all_params(convmodel)) + lasagne.layers.set_all_param_values(convmodel, param_values[:nlayers]) + # Test + cost_test_tot = 0 + acc_test_tot = 0 + jacc_test_tot = np.zeros((2, n_classes)) + for i in range(n_batches_test): + # Get minibatch + X_test_batch, L_test_batch = test_iter.next() + L_test_batch = np.reshape(L_test_batch, np.prod(L_test_batch.shape)) + + # Test step + cost_test, acc_test, jacc_test = val_fn(X_test_batch, L_test_batch) + + acc_test_tot += acc_test + cost_test_tot += cost_test + jacc_test_tot += jacc_test + + err_test = cost_test_tot/n_batches_test + acc_test = acc_test_tot/n_batches_test + jacc_test_perclass = jacc_test_tot[0, :] / jacc_test_tot[1, :] + jacc_test = np.mean(jacc_test_perclass) + + out_str = "FINAL MODEL: err test % f, acc test %f, " + out_str += "jacc test class 0 % f, jacc test class 1 %f, jacc test %f" + out_str = out_str % (err_test, + acc_test, + jacc_test_perclass[0], + jacc_test_perclass[1], + jacc_test) + print(out_str) + # if savepath != loadpath: + # print('Copying model and other training files to {}'.format(loadpath)) + # copy_tree(savepath, loadpath) + + # End + return + + +def main(): + parser = argparse.ArgumentParser(description='FCN8 model training') + parser.add_argument('-dataset', + default='polyps', + help='Dataset.') + parser.add_argument('-learning_rate', + default=0.0001, + help='Learning Rate') + parser.add_argument('-penal_cst', + default=0.0, + help='regularization constant') + parser.add_argument('--num_epochs', + '-ne', + type=int, + default=750, + help='Optional. Int to indicate the max' + 'number of epochs.') + parser.add_argument('-max_patience', + type=int, + default=100, + help='Max patience') + parser.add_argument('-batch_size', + type=int, + nargs='+', + default=[10, 1, 1], + help='Batch size [train, val, test]. Default: -batch_size 10 1 1') + parser.add_argument('-data_augmentation', + type=json.loads, + default={'crop_size': (224, 224), 'horizontal_flip': True, 'fill_mode':'constant'}, + help='use data augmentation') + parser.add_argument('-early_stop_class', + type=int, + default=None, + help='class to early stop on') + parser.add_argument('-train_from_0_255', + type=bool, + default=False, + help='Whether to train from images within 0-255 range') + args = parser.parse_args() + + train(args.dataset, float(args.learning_rate), + float(args.penal_cst), int(args.num_epochs), int(args.max_patience), + data_augmentation=args.data_augmentation, batch_size=args.batch_size, + early_stop_class=args.early_stop_class, savepath=SAVEPATH, + train_from_0_255=args.train_from_0_255)#, loadpath=LOADPATH) + +if __name__ == "__main__": + main() diff --git a/code/guidelines_segm_tutos_with_conda.sh b/code/guidelines_segm_tutos_with_conda.sh new file mode 100644 index 00000000..93057b38 --- /dev/null +++ b/code/guidelines_segm_tutos_with_conda.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +### Base installation. + +# Create and enter main directory. +mkdir main_directory +cd main_directory +# Create and activate conda environment. +conda create --yes -n tuto python=2 +source activate tuto +# Install theano. +conda install --yes -c mila-udem theano +# Install Lasagne. +git clone https://github.com/Lasagne/Lasagne.git +cd Lasagne/ +pip install -e . +cd .. +# Install dataset_loaders. +conda install --yes matplotlib numpy Pillow scipy scikit-image seaborn h5py +git clone https://github.com/fvisin/dataset_loaders.git +cd dataset_loaders/ +pip install -e . +cd .. +# Create config.ini. +cd dataset_loaders/dataset_loaders +touch config.ini +cd ../../ +# Get tutorials code. +git clone https://github.com/lisa-lab/DeepLearningTutorials.git + +# NB: Don't forget to correctly set config.ini with section [general] +# and other relevant sections for segmentation tutorials before +# running following lines. +# Field `datasets_local_path` in [general] section should indicate a working +# directory for dataset_loaders module. You can use a directory within +# the main directory, for example main_directory/datasets_local_dir. +# If specified folder does not exist, it will be created. + +# NB: Following lines should be executed in the main directory created above. +# If any problem occures, consider deleting folder save_models (created by tutorial scripts) +# and wordking directory you specified for dataset_loaders: +# rm -rf save_models datasets_local_dir + +### Tutorial FCN 2D. +## Get polyps_split7.zip from https://drive.google.com/file/d/0B_60jvsCt1hhZWNfcW4wbHE5N3M/view +## Directory for [polyps912] section in config.ini should be full path to main_directory/polyps_split7 +unzip polyps_split7.zip +THEANO_FLAGS=device=cuda,floatX=float32 python DeepLearningTutorials/code/fcn_2D_segm/train_fcn8.py --num_epochs 60 + +### Tutorial UNET. +## Get test-volume.tif, train-labels.tif, train-volume.tif from ISBI challenge: http://brainiac2.mit.edu/isbi_challenge/home +## Directory for [isbi_em_stacks] section in config.ini should be full path to main_directory/isbi +pip install simpleitk +mkdir isbi +mv test-volume.tif train-labels.tif train-volume.tif isbi +THEANO_FLAGS=device=cuda,floatX=float32 python DeepLearningTutorials/code/unet/train_unet.py --num_epochs 60 + +### Tutorial FCN 1D. +## Get TrainingData190417.tar.gz from https://drive.google.com/file/d/0B3tbeSUS2FsVOVlIamlDdkNBQUE/edit +## Directory for [cortical_layers] section in config.ini should be full path to main_directory/cortical_layers +mkdir cortical_layers +cd cortical_layers/ +tar -xvf ../TrainingData190417.tar.gz +mv TrainingData 6layers_segmentation +cd .. +THEANO_FLAGS=device=cuda,floatX=float32 python DeepLearningTutorials/code/cnn_1D_segm/train_fcn1D.py --num_epochs 60 diff --git a/code/unet/Unet_lasagne_recipes.py b/code/unet/Unet_lasagne_recipes.py new file mode 100644 index 00000000..ff7a02f0 --- /dev/null +++ b/code/unet/Unet_lasagne_recipes.py @@ -0,0 +1,75 @@ +# start-snippet-1 +__author__ = 'Fabian Isensee' +from collections import OrderedDict +from lasagne.layers import (InputLayer, ConcatLayer, Pool2DLayer, ReshapeLayer, DimshuffleLayer, NonlinearityLayer, + DropoutLayer, Deconv2DLayer, batch_norm) +try: + from lasagne.layers.dnn import Conv2DDNNLayer as ConvLayer +except ImportError: + from lasagne.layers import Conv2DLayer as ConvLayer +import lasagne +from lasagne.init import HeNormal +# end-snippet-1 + +# start-snippet-downsampling +def build_UNet(n_input_channels=1, BATCH_SIZE=None, num_output_classes=2, pad='same', nonlinearity=lasagne.nonlinearities.elu, input_dim=(None, None), base_n_filters=64, do_dropout=False): + net = OrderedDict() + net['input'] = InputLayer((BATCH_SIZE, n_input_channels, input_dim[0], input_dim[1])) + + net['contr_1_1'] = batch_norm(ConvLayer(net['input'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + net['contr_1_2'] = batch_norm(ConvLayer(net['contr_1_1'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + net['pool1'] = Pool2DLayer(net['contr_1_2'], 2) + + net['contr_2_1'] = batch_norm(ConvLayer(net['pool1'], base_n_filters*2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + net['contr_2_2'] = batch_norm(ConvLayer(net['contr_2_1'], base_n_filters*2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + net['pool2'] = Pool2DLayer(net['contr_2_2'], 2) + + net['contr_3_1'] = batch_norm(ConvLayer(net['pool2'], base_n_filters*4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + net['contr_3_2'] = batch_norm(ConvLayer(net['contr_3_1'], base_n_filters*4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + net['pool3'] = Pool2DLayer(net['contr_3_2'], 2) + + net['contr_4_1'] = batch_norm(ConvLayer(net['pool3'], base_n_filters*8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + net['contr_4_2'] = batch_norm(ConvLayer(net['contr_4_1'], base_n_filters*8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + l = net['pool4'] = Pool2DLayer(net['contr_4_2'], 2) + # end-snippet-downsampling + + # start-snippet-bottleneck + # the paper does not really describe where and how dropout is added. Feel free to try more options + if do_dropout: + l = DropoutLayer(l, p=0.4) + + net['encode_1'] = batch_norm(ConvLayer(l, base_n_filters*16, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + net['encode_2'] = batch_norm(ConvLayer(net['encode_1'], base_n_filters*16, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + # end-snippet-bottleneck + + # start-snippet-upsampling + net['upscale1'] = batch_norm(Deconv2DLayer(net['encode_2'], base_n_filters*16, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu"))) + net['concat1'] = ConcatLayer([net['upscale1'], net['contr_4_2']], cropping=(None, None, "center", "center")) + net['expand_1_1'] = batch_norm(ConvLayer(net['concat1'], base_n_filters*8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + net['expand_1_2'] = batch_norm(ConvLayer(net['expand_1_1'], base_n_filters*8, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + + net['upscale2'] = batch_norm(Deconv2DLayer(net['expand_1_2'], base_n_filters*8, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu"))) + net['concat2'] = ConcatLayer([net['upscale2'], net['contr_3_2']], cropping=(None, None, "center", "center")) + net['expand_2_1'] = batch_norm(ConvLayer(net['concat2'], base_n_filters*4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + net['expand_2_2'] = batch_norm(ConvLayer(net['expand_2_1'], base_n_filters*4, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + + net['upscale3'] = batch_norm(Deconv2DLayer(net['expand_2_2'], base_n_filters*4, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu"))) + net['concat3'] = ConcatLayer([net['upscale3'], net['contr_2_2']], cropping=(None, None, "center", "center")) + net['expand_3_1'] = batch_norm(ConvLayer(net['concat3'], base_n_filters*2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + net['expand_3_2'] = batch_norm(ConvLayer(net['expand_3_1'], base_n_filters*2, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + + net['upscale4'] = batch_norm(Deconv2DLayer(net['expand_3_2'], base_n_filters*2, 2, 2, crop="valid", nonlinearity=nonlinearity, W=HeNormal(gain="relu"))) + net['concat4'] = ConcatLayer([net['upscale4'], net['contr_1_2']], cropping=(None, None, "center", "center")) + net['expand_4_1'] = batch_norm(ConvLayer(net['concat4'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + net['expand_4_2'] = batch_norm(ConvLayer(net['expand_4_1'], base_n_filters, 3, nonlinearity=nonlinearity, pad=pad, W=HeNormal(gain="relu"))) + # end-snippet-upsampling + + # start-snippet-output + net['output_segmentation'] = ConvLayer(net['expand_4_2'], num_output_classes, 1, nonlinearity=None) + net['dimshuffle'] = DimshuffleLayer(net['output_segmentation'], (1, 0, 2, 3)) + net['reshapeSeg'] = ReshapeLayer(net['dimshuffle'], (num_output_classes, -1)) + net['dimshuffle2'] = DimshuffleLayer(net['reshapeSeg'], (1, 0)) + net['output_flattened'] = NonlinearityLayer(net['dimshuffle2'], nonlinearity=lasagne.nonlinearities.softmax) + + return net +# end-snippet-output diff --git a/code/unet/train_unet.py b/code/unet/train_unet.py new file mode 100644 index 00000000..87136e27 --- /dev/null +++ b/code/unet/train_unet.py @@ -0,0 +1,419 @@ +#!/usr/bin/env python2 +from __future__ import absolute_import, print_function, division +import os +import argparse +import time +import json +from distutils.dir_util import copy_tree + +import numpy as np +import theano +import theano.tensor as T +from theano import config +import lasagne +from lasagne.regularization import regularize_network_params + + +from dataset_loaders.images.isbi_em_stacks import IsbiEmStacksDataset +from Unet_lasagne_recipes import build_UNet + + +_FLOATX = config.floatX +_EPSILON = 10e-7 + + +def jaccard_metric(y_pred, y_true, n_classes, one_hot=False): + + assert (y_pred.ndim == 2) or (y_pred.ndim == 1) + + # y_pred to indices + if y_pred.ndim == 2: + y_pred = T.argmax(y_pred, axis=1) + + if one_hot: + y_true = T.argmax(y_true, axis=1) + + # Compute confusion matrix + # cm = T.nnet.confusion_matrix(y_pred, y_true) + cm = T.zeros((n_classes, n_classes)) + for i in range(n_classes): + for j in range(n_classes): + cm = T.set_subtensor( + cm[i, j], T.sum(T.eq(y_pred, i) * T.eq(y_true, j))) + + # Compute Jaccard Index + TP_perclass = T.cast(cm.diagonal(), _FLOATX) + FP_perclass = cm.sum(1) - TP_perclass + FN_perclass = cm.sum(0) - TP_perclass + + num = TP_perclass + denom = TP_perclass + FP_perclass + FN_perclass + + return T.stack([num, denom], axis=0) + + +def accuracy_metric(y_pred, y_true, void_labels, one_hot=False): + + assert (y_pred.ndim == 2) or (y_pred.ndim == 1) + + # y_pred to indices + if y_pred.ndim == 2: + y_pred = T.argmax(y_pred, axis=1) + + if one_hot: + y_true = T.argmax(y_true, axis=1) + + # Compute accuracy + acc = T.eq(y_pred, y_true).astype(_FLOATX) + + # Create mask + mask = T.ones_like(y_true, dtype=_FLOATX) + for el in void_labels: + indices = T.eq(y_true, el).nonzero() + if any(indices): + mask = T.set_subtensor(mask[indices], 0.) + + # Apply mask + acc *= mask + acc = T.sum(acc) / T.sum(mask) + + return acc + + +def crossentropy_metric(y_pred, y_true, void_labels, one_hot=False): + # Clip predictions + y_pred = T.clip(y_pred, _EPSILON, 1.0 - _EPSILON) + + if one_hot: + y_true = T.argmax(y_true, axis=1) + + # Create mask + mask = T.ones_like(y_true, dtype=_FLOATX) + for el in void_labels: + mask = T.set_subtensor(mask[T.eq(y_true, el).nonzero()], 0.) + + # Modify y_true temporarily + y_true_tmp = y_true * mask + y_true_tmp = y_true_tmp.astype('int32') + + # Compute cross-entropy + loss = T.nnet.categorical_crossentropy(y_pred, y_true_tmp) + + # Compute masked mean loss + loss *= mask + loss = T.sum(loss) / T.sum(mask) + + return loss + + +SAVEPATH = 'save_models/' +LOADPATH = SAVEPATH +WEIGHTS_PATH = SAVEPATH + + +def train(dataset, learn_step=0.005, + weight_decay=1e-4, num_epochs=500, + max_patience=100, data_augmentation={}, + savepath=None, loadpath=None, + early_stop_class=None, + batch_size=None, + resume=False, + train_from_0_255=False): + + # + # Prepare load/save directories + # + exp_name = 'unet_' + 'data_aug' if bool(data_augmentation) else '' + + if savepath is None: + raise ValueError('A saving directory must be specified') + + savepath = os.path.join(savepath, dataset, exp_name) + # loadpath = os.path.join(loadpath, dataset, exp_name) + print(savepath) + # print loadpath + + if not os.path.exists(savepath): + os.makedirs(savepath) + else: + print('\033[93m The following folder already exists {}. ' + 'It will be overwritten in a few seconds...\033[0m'.format( + savepath)) + + print('Saving directory : ' + savepath) + with open(os.path.join(savepath, "config.txt"), "w") as f: + for key, value in locals().items(): + f.write('{} = {}\n'.format(key, value)) + + # + # Define symbolic variables + # + input_var = T.tensor4('input_var') + target_var = T.ivector('target_var') + + # + # Build dataset iterator + # + if batch_size is not None: + bs = batch_size + else: + bs = [10, 1, 1] + + + train_iter = IsbiEmStacksDataset(which_set='train', + batch_size=batch_size[0], + seq_per_subset=0, + seq_length=0, + data_augm_kwargs=data_augmentation, + return_one_hot=False, + return_01c=False, + overlap=0, + use_threads=True, + shuffle_at_each_epoch=True, + return_list=True, + return_0_255=False) + + val_iter = IsbiEmStacksDataset(which_set='val', + batch_size=batch_size[1], + seq_per_subset=0, + seq_length=0, + return_one_hot=False, + return_01c=False, + use_threads=True, + shuffle_at_each_epoch=False, + return_list=True, + return_0_255=False) + test_iter = None + + batch = train_iter.next() + input_dim = (np.shape(batch[0])[2], np.shape(batch[0])[3]) #(x,y) image shape + + + n_batches_train = train_iter.nbatches + n_batches_val = val_iter.nbatches + n_batches_test = test_iter.nbatches if test_iter is not None else 0 + n_classes = train_iter.non_void_nclasses + void_labels = train_iter.void_labels + nb_in_channels = train_iter.data_shape[0] + + print("Batch. train: %d, val %d, test %d" % (n_batches_train, n_batches_val, n_batches_test)) + print("Nb of classes: %d" % (n_classes)) + print("Nb. of input channels: %d" % (nb_in_channels)) + + # + # Build network + # + + net = build_UNet(n_input_channels= nb_in_channels,# BATCH_SIZE = batch_size, + num_output_classes = n_classes, base_n_filters = 64, do_dropout=False, + input_dim = (None, None)) + + output_layer = net["output_flattened"] + # + # Define and compile theano functions + # + print("Defining and compiling training functions") + prediction = lasagne.layers.get_output(output_layer, input_var) + loss = crossentropy_metric(prediction, target_var, void_labels) + + if weight_decay > 0: + weightsl2 = regularize_network_params(output_layer, lasagne.regularization.l2) + loss += weight_decay * weightsl2 + + params = lasagne.layers.get_all_params(output_layer, trainable=True) + updates = lasagne.updates.adam(loss, params, learning_rate=learn_step) + + train_fn = theano.function([input_var, target_var], loss, updates=updates) + + print("Defining and compiling test functions") + test_prediction = lasagne.layers.get_output(output_layer, input_var,deterministic=True) + test_loss = crossentropy_metric(test_prediction, target_var, void_labels) + test_acc = accuracy_metric(test_prediction, target_var, void_labels) + test_jacc = jaccard_metric(test_prediction, target_var, n_classes) + + val_fn = theano.function([input_var, target_var], [test_loss, test_acc, test_jacc]) + + # + # Train + # + err_train = [] + err_valid = [] + acc_valid = [] + jacc_valid = [] + patience = 0 + + # Training main loop + print("Start training") + for epoch in range(num_epochs): + # Single epoch training and validation + start_time = time.time() + cost_train_tot = 0 + # Train + print('Training steps ') + for i in range(n_batches_train): + print(i) + # Get minibatch + X_train_batch, L_train_batch = train_iter.next() + L_train_batch = np.reshape(L_train_batch, np.prod(L_train_batch.shape)) + + # Training step + cost_train = train_fn(X_train_batch, L_train_batch) + out_str = "cost %f" % (cost_train) + cost_train_tot += cost_train + + err_train += [cost_train_tot/n_batches_train] + + # Validation + cost_val_tot = 0 + acc_val_tot = 0 + jacc_val_tot = np.zeros((2, n_classes)) + + print('Validation steps') + for i in range(n_batches_val): + print(i) + # Get minibatch + X_val_batch, L_val_batch = val_iter.next() + L_val_batch = np.reshape(L_val_batch, np.prod(L_val_batch.shape)) + + # Validation step + cost_val, acc_val, jacc_val = val_fn(X_val_batch, L_val_batch) + + acc_val_tot += acc_val + cost_val_tot += cost_val + jacc_val_tot += jacc_val + + err_valid += [cost_val_tot/n_batches_val] + acc_valid += [acc_val_tot/n_batches_val] + jacc_perclass_valid = jacc_val_tot[0, :] / jacc_val_tot[1, :] + if early_stop_class == None: + jacc_valid += [np.mean(jacc_perclass_valid)] + else: + jacc_valid += [jacc_perclass_valid[early_stop_class]] + + + out_str = "EPOCH %i: Avg epoch training cost train %f, cost val %f" +\ + ", acc val %f, jacc val class 0 % f, jacc val class 1 %f, jacc val %f took %f s" + out_str = out_str % (epoch, err_train[epoch], + err_valid[epoch], + acc_valid[epoch], + jacc_perclass_valid[0], + jacc_perclass_valid[1], + jacc_valid[epoch], + time.time()-start_time) + print(out_str) + + with open(os.path.join(savepath, "unet_output.log"), "a") as f: + f.write(out_str + "\n") + + # Early stopping and saving stuff + if epoch == 0: + best_jacc_val = jacc_valid[epoch] + elif epoch > 1 and jacc_valid[epoch] > best_jacc_val: + best_jacc_val = jacc_valid[epoch] + patience = 0 + np.savez(os.path.join(savepath, 'new_unet_model_best.npz'), *lasagne.layers.get_all_param_values(output_layer)) + np.savez(os.path.join(savepath, 'unet_errors_best.npz'), err_valid, err_train, acc_valid, jacc_valid) + else: + patience += 1 + + np.savez(os.path.join(savepath, 'new_unet_model_last.npz'), *lasagne.layers.get_all_param_values(output_layer)) + np.savez(os.path.join(savepath, 'unet_errors_last.npz'), err_valid, err_train, acc_valid, jacc_valid) + # Finish training if patience has expired or max nber of epochs + # reached + if patience == max_patience or epoch == num_epochs-1: + if test_iter is not None: + # Load best model weights + with np.load(os.path.join(savepath, 'new_unet_model_best.npz')) as f: + param_values = [f['arr_%d' % i] for i in range(len(f.files))] + nlayers = len(lasagne.layers.get_all_params(output_layer)) + lasagne.layers.set_all_param_values(output_layer, param_values[:nlayers]) + # Test + cost_test_tot = 0 + acc_test_tot = 0 + jacc_test_tot = np.zeros((2, n_classes)) + for i in range(n_batches_test): + # Get minibatch + X_test_batch, L_test_batch = test_iter.next() + L_test_batch = np.reshape(L_test_batch, np.prod(L_test_batch.shape)) + + # Test step + cost_test, acc_test, jacc_test = val_fn(X_test_batch, L_test_batch) + + acc_test_tot += acc_test + cost_test_tot += cost_test + jacc_test_tot += jacc_test + + err_test = cost_test_tot/n_batches_test + acc_test = acc_test_tot/n_batches_test + jacc_test_perclass = jacc_test_tot[0, :] / jacc_test_tot[1, :] + jacc_test = np.mean(jacc_test_perclass) + + out_str = "FINAL MODEL: err test % f, acc test %f, " +\ + "jacc test class 0 %f, jacc test class 1 %f, jacc test %f" + out_str = out_str % (err_test, acc_test, jacc_test_perclass[0], + jacc_test_perclass[1], jacc_test) + print(out_str) + if savepath != loadpath: + print('Copying model and other training files to {}'.format(loadpath)) + copy_tree(savepath, loadpath) + + # End + return + + +def main(): + parser = argparse.ArgumentParser(description='U-Net model training') + parser.add_argument('-dataset', + default='em', + help='Dataset.') + parser.add_argument('-learning_rate', + default=0.0001, + help='Learning Rate') + parser.add_argument('-penal_cst', + default=0.0, + help='regularization constant') + parser.add_argument('--num_epochs', + '-ne', + type=int, + default=750, + help='Optional. Int to indicate the max' + 'number of epochs.') + parser.add_argument('-max_patience', + type=int, + default=100, + help='Max patience') + parser.add_argument('-batch_size', + type=int, + nargs='+', + default=[5, 5, 1], + help='Batch size [train, val, test]. Default: -batch_size 5 5 1') + parser.add_argument('-data_augmentation', + type=json.loads, + default={'rotation_range':25, + 'shear_range':0.41, + 'horizontal_flip':True, + 'vertical_flip':True, + 'fill_mode':'reflect', + 'spline_warp':True, + 'warp_sigma':10, + 'warp_grid_size':3, + 'crop_size': (224, 224)}, + help='use data augmentation') + parser.add_argument('-early_stop_class', + type=int, + default=None, + help='class to early stop on') + parser.add_argument('-train_from_0_255', + type=bool, + default=False, + help='Whether to train from images within 0-255 range') + args = parser.parse_args() + + train(args.dataset, float(args.learning_rate), + float(args.penal_cst), int(args.num_epochs), int(args.max_patience), + data_augmentation=args.data_augmentation, batch_size=args.batch_size, + early_stop_class=args.early_stop_class, savepath=SAVEPATH, + train_from_0_255=args.train_from_0_255, loadpath=LOADPATH) + +if __name__ == "__main__": + main() diff --git a/doc/cnn_1D_segm.txt b/doc/cnn_1D_segm.txt new file mode 100644 index 00000000..f81ea164 --- /dev/null +++ b/doc/cnn_1D_segm.txt @@ -0,0 +1,243 @@ +.. _cnn_1D_segm: + +Network for 1D segmentation +*************************** + +.. note:: + This section assumes the reader has already read through :doc:`lenet` for + convolutional networks motivation and :doc:`fcn_2D_segm` for segmentation + standard network. + + +Summary ++++++++ + +The fundamental notions behind segmentation have been explained in :doc:`fcn_2D_segm`. +A particularity here is that some of these notions will be applied to 1D +segmentation. However, almost every Lasagne layer used for 2D segmentation have +their respective 1D layer, so the implementation would look alike if the same +model was used. + + + + +Data +++++ + +The `BigBrain `__ dataset is a 3D ultra-high resolution model of the brain reconstructed from 2D sections. +We are interested in the outer part of the brain, the cortex. +More precisely, we are interested in segmenting the 6 different layers of the cortex in 3D. +Creating an expertly labelled training dataset with each 2D section (shown in figure 1) is unfeasible. Instead of giving as input a 2D image of one section of the brain, we give as input 1D vectors with information from across the cortex, extracted from smaller portions of manually labelled cortex +as shown in Figure 2. The final dataset is not available yet, a preliminary version +is available `here `_ . + +.. figure:: images/big_brain_section.png + :align: center + :scale: 100% + + **Figure 1** : Big Brain section + +.. figure:: images/ray.png + :align: center + :scale: 50% + + **Figure 2** : Ray extraction from segmentated cortex + +We will call *rays* the vectors of size 200 going from outside the brain and +through the cortex. As the images were stained for cell bodies, the intensity of each pixel of these rays represents the cell densities +and sizes contained in the cortical layer to which the pixel belongs. Since the 6 cortical layers +have different properties (cell density and size), the intensity profile can be used to +detect boundaries of the cortical layers. + +Each ray has 2 input channels, one representing the smoothed intensity and the other, +the raw version, as shown in Figure 3. The next figure, Figure 4, shows the +ground truth segmentation map, where each different color represent +a different label. The purple color indicate that these pixels are +outside the cortex, while the 6 other colors represent the 6 cortical layers. +For example, the first layer of the cortex is between pixels ~ 35-55. The cortex +for this sample starts at pixel ~35 and ends at pixel ~170. + + +.. figure:: images/raw_smooth.png + :align: center + :scale: 100% + + **Figure 3** : Raw and smooth intensity profiles (input channels) + + +.. figure:: images/labels.png + :align: center + :scale: 100% + + **Figure 4** : Cortical layers labels for this ray + + + +Model ++++++ + +We first started our experiment with more complex models, but we finally found that +the simpler model present here had enough capacity to learn how and where the layer boundaries are. +This model (depicted in Figure 5) is composed of 8 identical blocks, followed by a +last convolution and a softmax non linearity. + +Each block is composed of : + +* Batch Normalization layer +* Rectify nonlinearity layer +* Convolution layer, with kernel size 25, with enough padding such that the convolution does not change the feature resolution, and 64 features maps + +The last convolution has kernel size 1 and *number of classes* feature maps. +The softmax is then +used to detect which of these classes is more likely for each pixel. +Note that any input image size could be used here, since the model is built from +locally connected layers exclusively. + +.. figure:: images/cortical_layers_net.png + :align: center + :scale: 100% + + **Figure 5** : Model + +Note that we didn't use any pooling, because it was not needed. However, if +pooling layers were used, an upsampling path would have been necessary to recover full +spatial size of the input ray. Also, since each pixel of the output prediction has +a receptive field that includes all of the input pixel, the network is able to extract +enough contextual information. + + + + + + + +Results ++++++++ + +The model outputs a vector of the same size as the input (here, 200). +There are 7 class labels, including the 6 cortical layers and the 'not in the brain yet' +label. You can see in Figure 6 below the output of the model for some ray. The top +of the plot represent the ground truth segmentation, while the bottoms represent +the predicted segmentation. As you can see, there is only a small number of pixels +not correctly segmented. + +.. figure:: images/cortical_ray_result.png + :align: center + :scale: 100% + + **Figure 6** : Ground truth (top) vs prediction (bottom) for 1 ray + +However, since the purpose was to do 3D segmentation by using 1D segmentation +of the rays, we needed to put back the rays on the brain section. After interpolation +between those rays and smoothing, we get the results shown in Figure 7. The colored +lines are from 3D meshes based on the prediction from the model, intersected with a 2D section, and the grayscale stripes correspond to the +ground truth. As you can see, it achieves really good results on the small manually labelled +sample, which extend well to previously unsegmented cortex. + + + +.. figure:: images/cortical_valid1.png + :align: center + :scale: 40% + + **Figure 7** : Results put on the brain section + + +Code +++++ + +.. warning:: + + * Current code works with Python 2 only. + * If you use Theano with GPU backend (e.g. with Theano flag ``device=cuda``), + you will need at least 12GB free in your video RAM. + +The FCN implementation can be found in the following file: + +* `fcn1D.py <../code/cnn_1D_segm/fcn1D.py>`_ : Main script. Defines the model. +* `train_fcn1D.py <../code/cnn_1D_segm/train_fcn1D.py>`_ : Training loop + +Change the ``dataset_loaders/config.ini`` file and add the right path for the dataset: + +.. code-block:: cfg + + [cortical_layers] + shared_path = /path/to/DeepLearningTutorials/data/cortical_layers/ + +Folder indicated at section ``[cortical_layers]`` should contain a sub-folder named ``6layers_segmentation`` +(you can obtain it by just renaming the folder extracted from ``TrainingData190417.tar.gz``) which should +itself contain files: + +* ``training_cls_indices.txt`` +* ``training_cls.txt`` +* ``training_geo.txt`` +* ``training_raw.txt`` +* ``training_regions.txt`` + + +First define a *bn+relu+conv* block that returns the name of the last layer of +the block. Since the implementation uses a dictionary variable *net* that keeps +the layer's name as key and the actual layer object as variable, the name of the +last layer is sufficient + +.. literalinclude:: ../code/cnn_1D_segm/fcn1D.py + :start-after: start-snippet-bn_relu_conv + :end-before: end-snippet-bn_relu_conv + +The model is composed of 8 of these blocks, as seen below. Note that the +model implementation is very tweakable, since the depth (number of blocks), the +type of block, the filter size are the number of filters can all be changed by user. +However, the hyperparameters used here were: + +* filter_size = 25 +* n_filters = 64 +* depth = 8 +* block = bn_relu_conv + +.. literalinclude:: ../code/cnn_1D_segm/fcn1D.py + :start-after: start-snippet-convolutions + :end-before: end-snippet-convolutions + +Finally, the last convolution and softmax are achieved by : + +.. literalinclude:: ../code/cnn_1D_segm/fcn1D.py + :start-after: start-snippet-output + :end-before: end-snippet-output + +Running ``train_fcn1D.py`` on a Titan X lasted for around 4 hours, ending with the following: + +.. code-block:: text + + THEANO_FLAGS=device=cuda0,floatX=float32,dnn.conv.algo_fwd=time_once,dnn.conv.algo_bwd_data=time_once,dnn.conv.algo_bwd_filter=time_once,gpuarray.preallocate=1 python train_fcn1D.py + [...] + EPOCH 412: Avg cost train 0.065615, acc train 0.993349, cost val 0.041758, acc val 0.984398, jacc val per class ['0: 0.981183', '1: 0.953546', '2: 0.945765', '3: 0.980471', '4: 0.914617', '5: 0.968710', '6: 0.971049'], jacc val 0.959335 took 31.422823 s + saving last model + + +References +++++++++++ + +If you use this tutorial, please cite the following papers: + +* References for BigBrain: + + * `[pdf] `__ Lewis, L.B. et al.: BigBrain: Initial Tissue Classification and Surface Extraction, HBM 2014. + * `[website] `__ Amunts, K. et al.: "BigBrain: An Ultrahigh-Resolution 3D Human Brain Model", Science (2013) 340 no. 6139 1472-1475, June 2013. + * `[pdf] `__ Bludau, S. et al.: Two new Cytoarchitectonic Areas of the Human Frontal Pole, OHBM 2012. + * `[pdf] `__ Lepage, C. et al.: Automatic Repair of Acquisition Defects in Reconstruction of Histology Sections of a Human Brain, HBM 2010. + +* `[GitHub Repo] `__ Francesco Visin, Adriana Romero - Dataset loaders: a python library to load and preprocess datasets. 2017 + +Papers related to Theano/Lasagne: + +* `[pdf] `_ Theano Development Team. Theano: A Python framework for fast computation of mathematical expresssions. May 2016. +* `[website] `__ Sander Dieleman, Jan Schluter, Colin Raffel, Eben Olson, Søren Kaae Sønderby, Daniel Nouri, Daniel Maturana, Martin Thoma, Eric Battenberg, Jack Kelly, Jeffrey De Fauw, Michael Heilman, diogo149, Brian McFee, Hendrik Weideman, takacsg84, peterderivaz, Jon, instagibbs, Dr. Kashif Rasul, CongLiu, Britefury, and Jonas Degrave, “Lasagne: First release.” (2015). + + +Acknowledgements +================ + +This work was done in collaboration with Konrad Wagstyl, PhD student, University of Cambridge. +We would like to thank Professor Alan Evans' `[MCIN lab] `_ and Professor Katrin Amunts' `[INM-1 lab] `_. + +Thank you! diff --git a/doc/conf.py b/doc/conf.py index 52631d51..0f35bb34 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -25,11 +25,14 @@ extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo'] try: - from sphinx.ext import pngmath - extensions.append('sphinx.ext.pngmath') + from sphinx.ext import imgmath + extensions.append('sphinx.ext.imgmath') except ImportError: - print >>sys.stderr, 'Warning: could not import sphinx.ext.pngmath' - pass + try: + from sphinx.ext import pngmath + extensions.append('sphinx.ext.pngmath') + except ImportError: + pass # Add any paths that contain templates here, relative to this directory. templates_path = ['.templates'] @@ -155,12 +158,16 @@ # Options for LaTeX output # ------------------------ +latex_elements = { + # The paper size ('letter' or 'a4'). + #latex_paper_size = 'letter', -# The paper size ('letter' or 'a4'). -#latex_paper_size = 'letter' + # The font size ('10pt', '11pt' or '12pt'). + 'pointsize': '11pt', -# The font size ('10pt', '11pt' or '12pt'). -latex_font_size = '11pt' + # Additional stuff for the LaTeX preamble. + #latex_preamble = '', +} # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, document class [howto/manual]). diff --git a/doc/contents.txt b/doc/contents.txt index 3b7a16eb..3246aec1 100644 --- a/doc/contents.txt +++ b/doc/contents.txt @@ -24,3 +24,6 @@ Contents rnnrbm utilities references + fcn_2D_segm + cnn_1D_segm + unet diff --git a/doc/fcn_2D_segm.txt b/doc/fcn_2D_segm.txt new file mode 100644 index 00000000..eb462eab --- /dev/null +++ b/doc/fcn_2D_segm.txt @@ -0,0 +1,254 @@ +.. _fcn_2D_segm: + +Fully Convolutional Networks (FCN) for 2D segmentation +****************************************************** + +.. note:: + This section assumes the reader has already read through :doc:`lenet` for + convolutional networks motivation. + +Summary ++++++++ + +Segmentation task is different from classification task because it requires predicting +a class for each pixel of the input image, instead of only 1 class for the whole input. +Classification needs to understand *what* is in the input (namely, the context). However, +in order to predict what is in the input for each pixel, segmentation needs to recover +not only *what* is in the input, but also *where*. + +.. figure:: images/cat_segmentation.png + :align: center + :scale: 35% + + **Figure 1** : Segmentation network (from FCN paper) + +**Fully Convolutional Networks** (FCNs) owe their name to their architecture, which is +built only from locally connected layers, such as convolution, pooling and upsampling. +Note that no dense layer is used in this kind of architecture. This reduces the number +of parameters and computation time. Also, the network can work regardless of the original +image size, without requiring any fixed number of units at any stage, givent that all +connections are local. To obtain a segmentation map (output), segmentation +networks usually have 2 parts : + +* Downsampling path : capture semantic/contextual information +* Upsampling path : recover spatial information + +The **downsampling path** is used to extract and interpret the context (*what*), while the +**upsampling path** is used to enable precise localization (*where*). Furthermore, to fully +recover the fine-grained spatial information lost in the pooling or downsampling layers, we +often use skip connections. + +A skip connection is a connection that bypasses at least one layer. Here, it +is often used to transfer local information by concatenating or summing feature +maps from the downsampling path with feature maps from the upsampling path. Merging features +from various resolution levels helps combining context information with spatial information. + + +Data +++++ + +The polyps dataset can be found `here `__. +There is a total of 912 images taken from 36 patients. + +* Training set : 20 patients and 547 frames +* Validation set : 8 patients and 183 frames +* Test set : 8 patients and 182 frames + +Each pixel is labelled between 2 classes : polype or background. +The size of the images vary. We use data augmentation for training, as specified +in the default arguments in the code given below. Note that +the data augmentation is necessary for training with batch size greater than 1 +in order to have same image size with a random cropping. If no random cropping, +the batch size for the training set must be set to 1, like for validation and test +sets (where there is no data augmentation). + + +In each of the training, validation and test directory, the input images are in the +``/images`` directory and the polyps masks (segmentation maps) are in ``/masks2``. The +segmentation maps in the ``/masks2`` directory indicate the presence or absence +of polyps for each pixel. The other subdirectories (``/masks3`` and ``/masks4``) are, +respectively, for a segmentation task with 3 and 4 classes, but will not be +presented here. + + +Model ++++++ + +There are variants of the FCN architecture, which mainly differ in the spatial precision of +their output. For example, the figures below show the FCN-32, FCN-16 and FCN-8 variants. In the +figures, convolutional layers are represented as vertical lines between pooling layers, which +explicitely show the relative size of the feature maps. + +.. figure:: images/fcn.png + :align: center + :scale: 50% + + **Figure 2** : FCN architecture (from FCN paper) + +**Difference between the 3 FCN variants** + +As shown below, these 3 different architectures differ in the stride of the last convolution, +and the skip connections used to obtain the output segmentation maps. We will use the term +*downsampling path* to refer to the network up to *conv7* layer and we will use the term +*upsampling path* to refer to the network composed of all layers after *conv7*. It is worth +noting that the 3 FCN architectures share the same downsampling path, but differ in their +respective upsampling paths. + + +1. **FCN-32** : Directly produces the segmentation map from *conv7*, by using a +transposed convolution layer with stride 32. + +2. **FCN-16** : Sums the 2x upsampled prediction from *conv7* +(using a transposed convolution with stride 2) with *pool4* and then +produces the segmentation map, by using a transposed convolution layer with stride 16 +on top of that. + +3. **FCN-8** : Sums the 2x upsampled *conv7* (with a stride 2 transposed convolution) +with *pool4*, upsamples them with a stride 2 transposed convolution and sums them +with *pool3*, and applies a transposed convolution layer with stride 8 on the resulting +feature maps to obtain the segmentation map. + + +.. figure:: images/fcn_schema.png + :align: center + :scale: 65% + + **Figure 3** : FCN architecture (from FCN paper) + +As explained above, the upsampling paths of the FCN variants are different, since they +use different skip connection layers and strides for the last convolution, yielding +different segmentations, as shown in Figure 4. Combining layers that have different +precision helps retrieving fine-grained spatial information, as well as coarse +contextual information. + +.. figure:: images/fcn32_16_8.png + :align: center + :scale: 30% + + **Figure 4** : FCN results (from FCN paper) + +Note that the FCN-8 architecture was used on the polyps dataset below, +since it produces more precise segmentation maps. + + +Metrics +======= + +**Per pixel accuracy** + +This metric is self explanatory, since it outputs the class prediction accuracy +per pixel. + +.. math:: + :label: jaccard + + acc(P, GT) = \frac{|\text{pixels correctly predicted}|}{|\text{total nb of pixels}|} + + +**Jaccard (Intersection over Union)** + +This evaluation metric is often used for image segmentation, since it is more structured. +The jaccard is a per class evaluation metric, which computes the number of pixels in +the intersection between the +predicted and ground truth segmentation maps for a given class, divided by the +number of pixels in the union between those two segmentation maps, +also for that given class. + +.. math:: + :label: jaccard_equation + + jacc(P(class), GT(class)) = \frac{|P(class)\cap GT(class)|}{|P(class)\cup GT(class)|} + +where `P` is the predicted segmentation map and `GT` is the ground +truth segmentation map. `P(class)` is then the binary mask indicating if each +pixel is predicted as *class* or not. In general, the closer to 1, the better. + +.. figure:: images/jaccard.png + :align: center + :scale: 40% + + **Figure 5** : Jaccard visualisation (from this `website `__) + +Code +++++ + +.. warning:: + + * Current code works with Python 2 only. + * If you use Theano with GPU backend (e.g. with Theano flag ``device=cuda``), + you will need at least 12GB free in your video RAM. + +The FCN-8 implementation can be found in the following files: + +* `fcn8.py <../code/fcn_2D_segm/fcn8.py>`_ : Defines the model. +* `train_fcn8.py <../code/fcn_2D_segm/train_fcn8.py>`_ : Training loop (main script to use). + + +The user must install `Lasagne `_ , +and clone the GitHub repo `Dataset Loaders `_. + +.. code-block:: bash + + ## Installation of dataset_loaders. + + # dataset_loaders depends on Python modules matplotlib, numpy, scipy, Pillow, scikit-image, seaborn, and h5py. + # They can all be installed via conda. + conda install matplotlib numpy Pillow scipy scikit-image seaborn h5py + + git clone https://github.com/fvisin/dataset_loaders.git + + cd dataset_loaders/ + + pip install -e . + + +Change the ``dataset_loaders/config.ini`` file and add the right path for the dataset: + +.. code-block:: bash + + ## Into `dataset_loaders` git folder. + + # If ``config.ini`` does not yet exit, create it: + cd dataset_loaders + touch config.ini + + # ``config.ini`` must have at least the section ``[general]`` which indicates a work directory. + +.. code-block:: cfg + + [general] + datasets_local_path = /the/local/path/where/the/datasets/will/be/copied + + [polyps912] + shared_path = /path/to/DeepLearningTutorials/data/polyps_split7/ + +Folder indicated at section ``[polyps912]`` should be the unzipped dataset archive ``polyps_split7.zip``, with sub-folders: + +* ``test``, +* ``train`` +* ``valid`` + +We used Lasagne layers, as you can see in the code below. + +.. literalinclude:: ../code/fcn_2D_segm/fcn8.py + :start-after: start-snippet-1 + :end-before: end-snippet-1 + +References +++++++++++ + +If you use this tutorial, please cite the following papers. + +* `[pdf] `__ Long, J., Shelhamer, E., Darrell, T. Fully Convolutional Networks for Semantic Segmentation. 2014. +* `[pdf] `__ David Vázquez, Jorge Bernal, F. Javier Sánchez, Gloria Fernández-Esparrach, Antonio M. López, Adriana Romero, Michal Drozdzal, Aaron Courville. A Benchmark for Endoluminal Scene Segmentation of Colonoscopy Images. (2016). +* `[GitHub Repo] `__ Francesco Visin, Adriana Romero - Dataset loaders: a python library to load and preprocess datasets. 2017. + +Papers related to Theano/Lasagne: + +* `[pdf] `__ Theano Development Team. Theano: A Python framework for fast computation of mathematical expresssions. May 2016. +* `[website] `__ Sander Dieleman, Jan Schluter, Colin Raffel, Eben Olson, Søren Kaae Sønderby, Daniel Nouri, Daniel Maturana, Martin Thoma, Eric Battenberg, Jack Kelly, Jeffrey De Fauw, Michael Heilman, diogo149, Brian McFee, Hendrik Weideman, takacsg84, peterderivaz, Jon, instagibbs, Dr. Kashif Rasul, CongLiu, Britefury, and Jonas Degrave, “Lasagne: First release.” (2015). + + +Thank you! + + diff --git a/doc/images/big_brain.png b/doc/images/big_brain.png new file mode 100644 index 00000000..5725346b Binary files /dev/null and b/doc/images/big_brain.png differ diff --git a/doc/images/big_brain_section.png b/doc/images/big_brain_section.png new file mode 100644 index 00000000..16612c0d Binary files /dev/null and b/doc/images/big_brain_section.png differ diff --git a/doc/images/cat_segmentation.png b/doc/images/cat_segmentation.png new file mode 100644 index 00000000..490a2118 Binary files /dev/null and b/doc/images/cat_segmentation.png differ diff --git a/doc/images/cortical_layers_net.png b/doc/images/cortical_layers_net.png new file mode 100644 index 00000000..50c7ea20 Binary files /dev/null and b/doc/images/cortical_layers_net.png differ diff --git a/doc/images/cortical_ray_result.png b/doc/images/cortical_ray_result.png new file mode 100644 index 00000000..31799798 Binary files /dev/null and b/doc/images/cortical_ray_result.png differ diff --git a/doc/images/cortical_valid1.png b/doc/images/cortical_valid1.png new file mode 100644 index 00000000..9f76d7b2 Binary files /dev/null and b/doc/images/cortical_valid1.png differ diff --git a/doc/images/cortical_valid2.png b/doc/images/cortical_valid2.png new file mode 100644 index 00000000..1369b757 Binary files /dev/null and b/doc/images/cortical_valid2.png differ diff --git a/doc/images/cortical_valid3_v1.png b/doc/images/cortical_valid3_v1.png new file mode 100644 index 00000000..d25a3cd2 Binary files /dev/null and b/doc/images/cortical_valid3_v1.png differ diff --git a/doc/images/cortical_valid4.png b/doc/images/cortical_valid4.png new file mode 100644 index 00000000..4276d198 Binary files /dev/null and b/doc/images/cortical_valid4.png differ diff --git a/doc/images/fcn.png b/doc/images/fcn.png new file mode 100644 index 00000000..69ec4933 Binary files /dev/null and b/doc/images/fcn.png differ diff --git a/doc/images/fcn32_16_8.png b/doc/images/fcn32_16_8.png new file mode 100644 index 00000000..bbc92b32 Binary files /dev/null and b/doc/images/fcn32_16_8.png differ diff --git a/doc/images/fcn_schema.png b/doc/images/fcn_schema.png new file mode 100644 index 00000000..fce8add9 Binary files /dev/null and b/doc/images/fcn_schema.png differ diff --git a/doc/images/jaccard.png b/doc/images/jaccard.png new file mode 100644 index 00000000..2e7d6847 Binary files /dev/null and b/doc/images/jaccard.png differ diff --git a/doc/images/labels.png b/doc/images/labels.png new file mode 100644 index 00000000..35f84e94 Binary files /dev/null and b/doc/images/labels.png differ diff --git a/doc/images/polyps_results.png b/doc/images/polyps_results.png new file mode 100644 index 00000000..19c8d3ab Binary files /dev/null and b/doc/images/polyps_results.png differ diff --git a/doc/images/raw_smooth.png b/doc/images/raw_smooth.png new file mode 100644 index 00000000..748d9ae9 Binary files /dev/null and b/doc/images/raw_smooth.png differ diff --git a/doc/images/ray.png b/doc/images/ray.png new file mode 100644 index 00000000..c4564676 Binary files /dev/null and b/doc/images/ray.png differ diff --git a/doc/images/unet.jpg b/doc/images/unet.jpg new file mode 100644 index 00000000..49cce6ff Binary files /dev/null and b/doc/images/unet.jpg differ diff --git a/doc/index.txt b/doc/index.txt index e01e79fc..27962583 100644 --- a/doc/index.txt +++ b/doc/index.txt @@ -10,7 +10,7 @@ and an `introduction to Deep Learning algorithms `_ (Foundations & Trends in Machine Learning, 2009). @@ -60,11 +60,26 @@ LSTM network for sentiment analysis: Energy-based recurrent neural network (RNN-RBM): * :ref:`Modeling and generating sequences of polyphonic music ` +Segmentation for medical imagery (meant to be read in order): + * :ref:`Fully Convolutional Networks (FCN) for 2D segmentation ` + * :ref:`U-Net ` + * :ref:`1D segmentation ` + + .. _Theano: http://deeplearning.net/software/theano .. _Theano basic tutorial: http://deeplearning.net/software/theano/tutorial .. _Contractive auto-encoders: https://github.com/lisa-lab/DeepLearningTutorials/blob/master/code/cA.py -Note that the tutorials here are all compatible with Python 2 and 3, with the exception of :ref:`rnnrbm` which is only available for Python 2. + + + +Note that the tutorials here are all compatible with Python 2 and 3, +with the exception of :ref:`rnnrbm` which is only available for Python 2, like +the tutorials in medical imagery segmentation. + +If you work with ``conda``, `these command-line guidelines <../code/guidelines_segm_tutos_with_conda.sh>`__ +may also help you run segmentation tutorials. + diff --git a/doc/unet.txt b/doc/unet.txt new file mode 100644 index 00000000..7f0446b6 --- /dev/null +++ b/doc/unet.txt @@ -0,0 +1,194 @@ +.. _unet: + +U-Net +********************************************** + +.. note:: + This section assumes the reader has already read through :doc:`lenet` for + convolutional networks motivation and :doc:`fcn_2D_segm` for segmentation + network. + +Summary ++++++++ + +This tutorial provides a brief explanation of the U-Net architecture as well as a way to implement +it using Theano and Lasagne. U-Net is a Fully Convolutional Network (FCN) that does image segmentation. +Its goal is then to predict each pixel's class. See :doc:`fcn_2D_segm` for differences between +network architecture for classification and segmentation tasks. + +Data +++++ + +The data is from ISBI challenge and can be found `here `_. +We use data augmentation for training, as specified +in the defaults arguments in the code given below. + +Model ++++++ + +The U-Net architecture is built upon the Fully Convolutional Network and modified +in a way that it yields better segmentation in medical imaging. +Compared to FCN-8, the two main differences are (1) U-net is symmetric and (2) the skip +connections between the downsampling path and the upsampling path apply a concatenation +operator instead of a sum. These skip connections intend to provide local information +to the global information while upsampling. +Because of its symmetry, the network has a large number of feature maps in the upsampling +path, which allows to transfer information. By comparison, the basic FCN architecture only had +*number of classes* feature maps in its upsampling path. + +The U-Net owes its name to its symmetric shape, which is different from other FCN variants. + +U-Net architecture is separated in 3 parts: + +- 1 : The contracting/downsampling path +- 2 : Bottleneck +- 3 : The expanding/upsampling path + +.. figure:: images/unet.jpg + :align: center + :scale: 60% + + **Figure 1** : Illustration of U-Net architecture (from U-Net paper) + + +Contracting/downsampling path +============================= + +The contracting path is composed of 4 blocks. Each block is composed of + +* 3x3 Convolution Layer + activation function (with batch normalization) +* 3x3 Convolution Layer + activation function (with batch normalization) +* 2x2 Max Pooling + +Note that the number of feature maps doubles at each pooling, starting with +64 feature maps for the first block, 128 for the second, and so on. +The purpose of this contracting path is to capture the context of the input image +in order to be able to do segmentation. This coarse contextual information will +then be transfered to the upsampling path by means of skip connections. + + +Bottleneck +========== + +This part of the network is between the contracting and expanding paths. +The bottleneck is built from simply 2 convolutional layers (with batch +normalization), with dropout. + + +Expanding/upsampling path +========================= + +The expanding path is also composed of 4 blocks. Each of these blocks is composed of + +* Deconvolution layer with stride 2 +* Concatenation with the corresponding cropped feature map from the contracting path +* 3x3 Convolution layer + activation function (with batch normalization) +* 3x3 Convolution layer + activation function (with batch normalization) + + +The purpose of this expanding path is to enable precise localization combined +with contextual information from the contracting path. + +Advantages +========== + +* The U-Net combines the location information from the downsampling path with the contextual information in the upsampling path to finally obtain a general information combining localisation and context, which is necessary to predict a good segmentation map. + +* No dense layer, so images of different sizes can be used as input (since the only parameters to learn on convolution layers are the kernel, and the size of the kernel is independent from input image' size). + +* The use of massive data augmentation is important in domains like biomedical segmentation, since the number of annotated samples is usually limited. + + +Code +++++ + +.. warning:: + + * Current code works with Python 2 only. + * If you use Theano with GPU backend (e.g. with Theano flag ``device=cuda``), + you will need at least 12GB free in your video RAM. + +The U-Net implementation can be found in the following GitHub repo: + +* `Unet_lasagne_recipes.py <../code/unet/Unet_lasagne_recipes.py>`_, from original main script + `Unet.py `_. Defines the model. + +* `train_unet.py <../code/unet/train_unet.py>`_ : Training loop (main script to use). + + +The user must install `Lasagne `_ , +`SimpleITK `_ and +clone the GitHub repo `Dataset Loaders `_. + +Change the ``dataset_loaders/config.ini`` file to set the right path for the dataset: + +.. code-block:: cfg + + [isbi_em_stacks] + shared_path = /path/to/DeepLearningTutorials/data/isbi_challenge_em_stacks/ + +Folder indicated at section ``[isbi_em_stacks]`` should contain files: + +* ``test-volume.tif`` +* ``train-labels.tif`` +* ``train-volume.tif`` + +The user can now build a U-Net with a specified number of input channels and number of classes. +First include the Lasagne layers needed to define the U-Net architecture : + +.. literalinclude:: ../code/unet/Unet_lasagne_recipes.py + :start-after: start-snippet-1 + :end-before: end-snippet-1 + +The *net* variable will be an ordered dictionary containing layers names as keys and layers instances as value. +This is needed to be able to concatenate the feature maps from the contracting to expanding path. + + +First the contracting path : + +.. literalinclude:: ../code/unet/Unet_lasagne_recipes.py + :start-after: start-snippet-downsampling + :end-before: end-snippet-downsampling + +And then the bottleneck : + +.. literalinclude:: ../code/unet/Unet_lasagne_recipes.py + :start-after: start-snippet-bottleneck + :end-before: end-snippet-bottleneck + +Followed by the expanding path : + +.. literalinclude:: ../code/unet/Unet_lasagne_recipes.py + :start-after: start-snippet-upsampling + :end-before: end-snippet-upsampling + +And finally the output path (to obtain *number of classes* feature maps): + +.. literalinclude:: ../code/unet/Unet_lasagne_recipes.py + :start-after: start-snippet-output + :end-before: end-snippet-output + +Running ``train_unet.py`` on a Titan X lasted for around 60 minutes, ending with the following: + +.. code-block:: text + + $ THEANO_FLAGS=device=cuda0,floatX=float32,dnn.conv.algo_fwd=time_once,dnn.conv.algo_bwd_data=time_once,dnn.conv.algo_bwd_filter=time_once,gpuarray.preallocate=1 python train_unet.py + [...] + EPOCH 364: Avg epoch training cost train 0.160667, cost val 0.265909, acc val 0.888796, jacc val class 0 0.636058, jacc val class 1 0.861970, jacc val 0.749014 took 4.379772 s + + +References +++++++++++ + +If you use this tutorial, please cite the following papers. + +* `[pdf] `__ Olaf Ronneberger, Philipp Fischer, Thomas Brox. U_Net: Convolutional Networks for Biomedical Image Segmentation. May 2015. +* `[GitHub Repo] `__ Francesco Visin, Adriana Romero - Dataset loaders: a python library to load and preprocess datasets. 2017. + +Papers related to Theano/Lasagne: + +* `[pdf] `__ Theano Development Team. Theano: A Python framework for fast computation of mathematical expresssions. May 2016. +* `[website] `__ Sander Dieleman, Jan Schluter, Colin Raffel, Eben Olson, Søren Kaae Sønderby, Daniel Nouri, Daniel Maturana, Martin Thoma, Eric Battenberg, Jack Kelly, Jeffrey De Fauw, Michael Heilman, diogo149, Brian McFee, Hendrik Weideman, takacsg84, peterderivaz, Jon, instagibbs, Dr. Kashif Rasul, CongLiu, Britefury, and Jonas Degrave, “Lasagne: First release.” (2015). + + +Thank you!