diff --git a/latex/chap_segmentation.tex b/latex/chap_segmentation.tex index 48a87f9..9f2bfc9 100644 --- a/latex/chap_segmentation.tex +++ b/latex/chap_segmentation.tex @@ -18,37 +18,49 @@ \chapter{Deformable Shape Models using Deep Learning} \newpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Template models} +\section{Template Models} -An approach to the segmentation of medical images is to use a previously acquired template and deform it to match the image to segment. +An approach to the segmentation of medical images is to use a previously acquired template and deform it to match the image to segment. Formally, given a template $\phi_0$ and an image $I$, we are interested in obtaining the segmentation mask $\phi$ by finding the deformation fields $\psi$ to be applied to the image: +\begin{equation} + \phi = \phi_0 \cdot \psi \left( I \right) +\end{equation} + +\subsection{Constructing the template} + +There are various strategies to construct the template. One can select the template as the image most similar to the one to segment in an existing database (\textcite{commowick2007MICCAI}) or build a mean template by using multiple images (\textcite{joshi2004}). +Another strategy is to use multiple templates, which increases the robustness of the methods (\textcite{heckemann2006}) and then fuse the predictions (\textcite{warfield2004}). -% http://www.iro.umontreal.ca/~sherknie/articles/medImageRegAnOverview/brussel_bvz.pdf -% https://rd.springer.com/chapter/10.1007/978-3-540-75759-7_25 +A review of the existing methods of template construction can be found in~\textcite{cabezas2011}. +\subsection{Finding the deformation fields} -\subsection{Before deep learning} +Many methods such as \textit{Active Shape Models} (\textcite{cootes1995}), \textit{Active Appearance Models} (\textcite{cootes1998ECCV}) or \textit{Implicit Template Deformation} (\textcite{mory2012MICCAI}) have been proposed before. As this thesis focuses on deep learning, they are out-of-scope. We refer the interested reader to~\textcite{heimann2009} for a review of those methods. -\begin{itemize} - \item Link with registration ? -\end{itemize} +To the best of our knowledge, deep learning has not yet been used in the context of template models. It has however been used in the context of registration, i.e. the spatial alignment of two medical images. -\subsection{Building a shape model} +Convolutional neural networks have been used to regress the parameters of the registration transform from the input images (\textcite{miao2016},~\textcite{yang2016}). -\subsection{Deforming the shape model} +Another approach is to estimate a similarity measure from a neural network to be used in an iterative optimization strategy (\textcite{wu2013MICCAI},~\textcite{cheng2015},~\textcite{simonovosky2016MICCAI}). + +Recently, method using GANs (\textcite{goodfellow2014}) have been proposed. + +[TODO describe GANs approaches] An Unsupervised Learning Model for Deformable Medical Image Registration (voxelmorph) +Adversarial Similarity Network for Evaluating Image Alignment in Deep Learning Based Registration + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Template Models for Kidney Segmentation in 3D Ultrasound} +\section{Kidney Segmentation in 3D Ultrasound} The problem addressed is the same as in Section~\ref{sec:kidney}: kidney capsule segmentation in 3D ultrasound data from potentially ill children. The difference is that we are not in a transfer learning setting and we have access to both adults and children images simultaneously. The contribution of this work is in the novel model-based segmentation method presented in Section~\ref{sec:deformable_dl}. We compare performance of the method to a baseline 3D U-Net and discuss their results in Section~\ref{sec:seg_result}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\subsection{Dataset} +%\subsection{Dataset} \begin{figure}[htb] \begin{subfigure}[b]{0.245\textwidth} @@ -140,7 +152,13 @@ \section{Deformable shape models and deep learning} \label{fig:deform_network} \end{figure} -Figure~\ref{fig:deform_network} presents the process. The network predict a geometric transformation and a deformation field from the image, which are applied to a fixed shape model. The deformed shape model then corresponds to the correct segmentation for the image. The shape model is simply the ground truth segmentation from an image not included in the training, validation or test sets. +Figure~\ref{fig:deform_network} presents the process. The network predict a geometric transformation $G$ and a deformation field $\psi$ from the image, which are applied to a template $\phi_0$. The deformed template then corresponds to the correct segmentation for the image. + +\begin{equation} + \phi = \phi_0 \cdot G \cdot \ \psi +\end{equation} + +In our case, the template is the ground truth segmentation from an image not included in the training, validation or test sets. The next sections explains how to predict and apply the transformation and the deformation field. @@ -171,29 +189,32 @@ \subsection{Predicting a geometric transformation} \end{equation*} Finally, we have one rotation matrix in each direction ($R_x$, $R_y$ and $R_z$) built from one parameter each: -\begin{align*} - R_x &= +\begin{equation*} + R_x = \begin{bmatrix} 1 & 0 & 0 & 0 \\ 0 & \cos{r_x} & -\sin{r_x} & 0 \\ 0 & \sin{r_x} & \cos{r_x} & 0 \\ 0 & 0 & 0 & 1 - \end{bmatrix} \\ - R_y &= + \end{bmatrix} +\end{equation*} +\begin{equation*} + R_y = \begin{bmatrix} \cos{r_y} & 0 & - \sin{r_y} & 0 \\ 0 & 1 & 0 & 0 \\ \sin{r_y} & 0 & \cos{r_y} & 0 \\ 0 & 0 & 0 & 1 - \end{bmatrix} \\ - R_z &= + \end{bmatrix} + \mkern20mu + R_z = \begin{bmatrix} \cos{r_z} & -\sin{r_z} & 0 & 0 \\ \sin{r_z} & \cos{r_z} & 0 & 0 \\ 0 & 0 & 1 & 0 \\ 0 & 0 & 0 & 1 \end{bmatrix} -\end{align*} +\end{equation*} We also need to center the image around zero before applying the rotations, which requires no parameters except knowing the center of the image: \begin{equation*} @@ -225,11 +246,11 @@ \subsection{Predicting a deformation field} The deformation field are predicted from a convolutional layer with 3 $3 \times 3 \times 3$ filters, one filter per dimension. Each field is then smoothed with a $3 \times 3 \times 3$ mean filter, before being resized to the shape model size with a tri-linear interpolation. This resizing step allows predicting deformation fields at a lower resolution than the shape model, saving time and parameters to learn. -We added an $L_2$ penalty term to the deformation fields $F$ in the loss function: +We added an $L_2$ penalty term to the deformation fields $\psi$ in the loss function: \begin{equation} - P = \lambda \sum_x \left( F - I \right)(x)^2 + P = \lambda \sum_x \left( \psi - Id \right)(x)^2 \end{equation} -$I$ is the identity matrix and $\lambda$ is the strength of this term, chosen as $10^{-3}$. The goal of this penalty term is to constrain the size of the deformation fields. +$Id$ is the identity matrix and $\lambda$ is the strength of this term, chosen as $10^{-3}$. The goal of this penalty term is to constrain the size of the deformation fields. % \subsection{Distance map and loss} @@ -293,5 +314,21 @@ \section{Results and Discussion} \label{fig:transfo_matrix} \end{figure} +Since the geometric transformation provided an important boost in performance, we look at the distribution of each parameter predicted on the test set in Figure~\ref{fig:transfo_matrix}. + +First we note that the distributions are almost identical for all parameters. Looking at the values for individual images reveals that the parameters are correlated, i.e. if a parameter falls on the lower side of the distribution for an image, the other parameters will also fall on the lower side of their distributions for the same image. This is likely due to using only one convolutional layer to predict the parameters, i.e. a lack of capacity. + +The second point of interest is that the distributions between adults and children are very different. The children tends to have lower parameter values and the adults are spread out over a bigger range. This makes sense for the scaling parameters: as the template is based on an adult's kidney, it is necessary to shrink it more to match the size of a children. However there are no reasons why this should be true for the translation and rotation. This is possibly a side-effect of the parameters being so correlated. If the network lacked capacity, it make sense to focus on the most important parameters, the scaling, and the other followed. + +Finally, looking at the actual value of the parameters, it seems that the network relies on a very specific transformation to work. Every parameter falls into a narrow range of values. For all images, the template is translated, rotated and scaled in roughly the same direction and amplitude. + +The very low values of the scaling means the template model is heavily shrunk. As a result, the deformation fields must have very high values to match the target. The goal of the $L_2$ penalty is to change this by penalizing high values for the deformation fields. + +[TODO why are high values bad ?] + +[TODO geometric transfo distributions for penalty model] + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Conclusion} + +- multiple conv layers for geo pred diff --git a/latex/main.tex b/latex/main.tex index b3f0f60..651dae2 100644 --- a/latex/main.tex +++ b/latex/main.tex @@ -142,7 +142,7 @@ \addbibresource{references.bib} % \includeonly{chap_introduction, chap_segmentation, chap_transfer_learning, chap_hyperopt, chap_conclusion} -\includeonly{chap_segmentation} +\includeonly{chap_introduction} \pagenumbering{roman} @@ -152,7 +152,10 @@ \end{titlepage} \dominitoc -\tableofcontents +{ + \setstretch{1.1} + \tableofcontents +} \clearpage diff --git a/latex/references.bib b/latex/references.bib index d985a54..7285a76 100644 --- a/latex/references.bib +++ b/latex/references.bib @@ -118,6 +118,14 @@ @article{buda2017 author = "Mateusz Buda and Atsuto Maki and Maciej A. Mazurowski" } +@article{cabezas2011, + title = "A review of atlas-based segmentation for magnetic resonance brain images", + journal = "Computer Methods and Programs in Biomedicine", + year = "2011", + doi = "https://doi.org/10.1016/j.cmpb.2011.07.015", + author = "Mariano Cabezas and Arnau Oliver and Xavier Lladó and Jordi Freixenet and Meritxell Bach Cuadra" +} + @incollection{caruana1995NIPS, title = {Learning Many Related Tasks at the Same Time with Backpropagation}, author = {Caruana, Rich}, @@ -151,6 +159,17 @@ @article{chang2018 url = "https://www.ncbi.nlm.nih.gov/pubmed/28129148" } +@article{cheng2015, + author = {Xi Cheng and Li Zhang and Yefeng Zheng}, + title = {Deep similarity learning for multimodal medical images}, + journal = {Computer Methods in Biomechanics and Biomedical Engineering: Imaging \& Visualization}, + volume = {6}, + number = {3}, + pages = {248-252}, + year = {2015}, + doi = {10.1080/21681163.2015.1135299} +} + @misc{chollet2015keras, title={Keras}, author={Fran\c{c}ois Chollet and others}, @@ -174,6 +193,35 @@ @inproceedings{collobert2008ICML doi = {10.1145/1390156.1390177} } +@InProceedings{commowick2007MICCAI, + author="Commowick, Olivier + and Malandain, Gr{\'e}goire", + title="Efficient Selection of the Most Similar Image in a Database for Critical Structures Segmentation", + booktitle="Medical Image Computing and Computer-Assisted Intervention", + year="2007", + isbn="978-3-540-75759-7" +} + +@article{cootes1995, + title = "Active Shape Models-Their Training and Application", + journal = "Computer Vision and Image Understanding", + year = "1995", + doi = "https://doi.org/10.1006/cviu.1995.1004", + author = "T.F. Cootes and C.J. Taylor and D.H. Cooper and J. Graham" +} + +@InProceedings{cootes1998ECCV, + author="Cootes, T. F. + and Edwards, G. J. + and Taylor, C. J.", + editor="Burkhardt, Hans + and Neumann, Bernd", + title="Active appearance models", + booktitle="European Conference on Computer Vision", + year="1998", + isbn="978-3-540-69235-5" +} + @article{crammer2008JMLR, author = {Crammer, Koby and Kearns, Michael and Wortman, Jennifer}, title = {Learning from Multiple Sources}, @@ -295,6 +343,23 @@ @inproceedings{hazan2018ICLR url={https://openreview.net/forum?id=H1zriGeCZ}, } +@article{heckemann2006, + title = "Automatic anatomical brain MRI segmentation combining label propagation and decision fusion", + journal = "NeuroImage", + year = "2006", + doi = "https://doi.org/10.1016/j.neuroimage.2006.05.061", + author = "Rolf A. Heckemann and Joseph V. Hajnal and Paul Aljabar and Daniel Rueckert and Alexander Hammers" +} + +@article{heimann2009, + title = "Statistical shape models for 3D medical image segmentation: A review", + journal = "Medical Image Analysis", + year = "2009", + issn = "1361-8415", + doi = "https://doi.org/10.1016/j.media.2009.05.004", + author = "Tobias Heimann and Hans-Peter Meinzer" +} + @article{hinton2006, author = {Hinton, Geoffrey E. and Osindero, Simon and Teh, Yee-Whye}, title = {A Fast Learning Algorithm for Deep Belief Nets}, @@ -319,6 +384,14 @@ @incollection{jaderberg2015NIPS url = {http://papers.nips.cc/paper/5854-spatial-transformer-networks.pdf} } +@article{joshi2004, + title = "Unbiased diffeomorphic atlas construction for computational anatomy", + journal = "NeuroImage", + year = "2004", + doi = "https://doi.org/10.1016/j.neuroimage.2004.07.068", + author = "Sarang Joshi and Brad Davis and Matthieu Jomier and Guido Gerig" +} + @article{jones2001, title = {A {Taxonomy} of {Global} {Optimization} {Methods} {Based} on {Response} {Surfaces}}, url = {http://dx.doi.org/10.1023/A:1012771025575}, @@ -416,6 +489,14 @@ @article{marsousi2017 year = {2017} } +@article{miao2016, + author={S. Miao and Z. J. Wang and R. Liao}, + journal={IEEE Transactions on Medical Imaging}, + title={A CNN Regression Approach for Real-Time 2D/3D Registration}, + year={2016}, + doi={10.1109/TMI.2016.2521800} +} + @article{miikkulainen2017, author = {Risto Miikkulainen and Jason Zhi Liang and @@ -442,6 +523,21 @@ @inproceedings{miller1989 url = {http://dl.acm.org/citation.cfm?id=93126.94034} } +@InProceedings{mory2012MICCAI, + author="Mory, Beno{\^i}t + and Somphone, Oudom + and Prevost, Raphael + and Ardon, Roberto", + editor="Ayache, Nicholas + and Delingette, Herv{\'e} + and Golland, Polina + and Mori, Kensaku", + title="Real-Time 3D Image Segmentation by User-Constrained Template Deformation", + booktitle="Medical Image Computing and Computer-Assisted Intervention", + year="2012", + isbn="978-3-642-33415-3" +} + @inproceedings{murray2010NIPS, author = {Murray, Iain and Adams, Ryan P.}, title = {Slice Sampling Covariance Hyperparameters of Latent Gaussian Models}, @@ -608,6 +704,23 @@ @article{shin2016 url = "https://www.ncbi.nlm.nih.gov/pubmed/26886976" } +@InProceedings{simonovosky2016MICCAI, + author="Simonovsky, Martin + and Guti{\'e}rrez-Becker, Benjam{\'i}n + and Mateus, Diana + and Navab, Nassir + and Komodakis, Nikos", + editor="Ourselin, Sebastien + and Joskowicz, Leo + and Sabuncu, Mert R. + and Unal, Gozde + and Wells, William", + title="A Deep Metric for Multimodal Registration", + booktitle="Medical Image Computing and Computer-Assisted Intervention", + year="2016", + isbn="978-3-319-46726-9" +} + @article{simonyan2014, author = {Simonyan, Karen and Zisserman, Andrew}, year = {2014}, @@ -752,6 +865,43 @@ @inproceedings{vincent2008ICML doi = {10.1145/1390156.1390294} } +@article{warfield2004, + author={S. K. Warfield and K. H. Zou and W. M. Wells}, + journal={IEEE Transactions on Medical Imaging}, + title={Simultaneous truth and performance level estimation (STAPLE): an algorithm for the validation of image segmentation}, + year={2004}, + doi={10.1109/TMI.2004.828354}, +} + +@article{wu2013MICCAI, + title={Unsupervised Deep Feature Learning for Deformable Registration of MR Brain Images}, + author={Guorong Wu and Minjeong Kim and Qian Wang and Yaozong Gao and Shu Liao and Dinggang Shen}, + journal={International Conference on Medical Image Computing and Computer-Assisted Intervention}, + year={2013}, +} + +@InProceedings{yang2016, + author="Yang, Xiao + and Kwitt, Roland + and Niethammer, Marc", + editor="Carneiro, Gustavo + and Mateus, Diana + and Peter, Lo{\"i}c + and Bradley, Andrew + and Tavares, Jo{\~a}o Manuel R. S. + and Belagiannis, Vasileios + and Papa, Jo{\~a}o Paulo + and Nascimento, Jacinto C. + and Loog, Marco + and Lu, Zhi + and Cardoso, Jaime S. + and Cornebise, Julien", + title="Fast Predictive Image Registration", + booktitle="Deep Learning and Data Labeling for Medical Applications", + year="2016", + isbn="978-3-319-46976-8" +} + @inproceedings{yosinski2014NIPS, author = {Yosinski, Jason and Clune, Jeff and Bengio, Yoshua and Lipson, Hod}, title = {How Transferable Are Features in Deep Neural Networks?},