[FIX] Fixes to DWIDTI and DWIConnectome pipelines (#1083)

* some fixes and improvements to dwi dti * load specific modules rather than everything * add bids filename * improve error messages * simplify pipeline * add statistics to valid suffixes * bad extension spec * simplify statistics_on_atlas * fix test * update tol a bit * update test again * simplify tests * update regexp for preproc brainmask * add diffmodel to suffixes * add missing entity * add suffix parcellation * relax test * update unit tests * small fix in pet query * refactor atlases * use enumerations for atlas names * specify modules for linux based tests also * modify enumeration names * monkeypatch FSLDIR * small fix * small fix again * add unit tests * monkeypatch FSLDIR again * monkeypatch FSLDIR again 2 * refactor tests to avoid FSL errors * fix remaining test * fix bad name * rework documentation * load specific modules rather than everything
aramis-lab · Feb 23, 2024 · c6ad37b · c6ad37b
1 parent f2482f2
commit c6ad37b
Show file tree

Hide file tree

Showing 26 changed files with 1,125 additions and 731 deletions.
diff --git a/.github/workflows/test_pipelines_anat.yml b/.github/workflows/test_pipelines_anat.yml
@@ -26,7 +26,7 @@ jobs:
           source ~/miniconda3/etc/profile.d/conda.sh
           conda activate "${{ github.workspace }}"/env
           source "$(brew --prefix)/opt/modules/init/bash"
-          module load clinica.all
+          module load clinica/ants/2.4.4
           make install
           cd test
           poetry run pytest --verbose \
@@ -36,7 +36,7 @@ jobs:
           --junitxml=./test-reports/non_regression_anat_t1_linear_mac.xml \
           --disable-warnings \
           ./nonregression/pipelines/anat/test_t1_linear.py
-  
+
   test-t1-volume-MacOS:
     runs-on:
       - self-hosted
@@ -51,7 +51,8 @@ jobs:
           source ~/miniconda3/etc/profile.d/conda.sh
           conda activate "${{ github.workspace }}"/env
           source "$(brew --prefix)/opt/modules/init/bash"
-          module load clinica.all
+          module load clinica/matlab/2017a
+          module load clinica/spm12/r7771
           make install
           cd test
           poetry run pytest --verbose \
@@ -83,7 +84,7 @@ jobs:
           source /builds/miniconda/etc/profile.d/conda.sh
           conda activate "${{ github.workspace }}"/env
           source /usr/local/Modules/init/profile.sh
-          module load clinica.all
+          module load clinica/ants/2.4.4
           make install
           cd test
           poetry run pytest --verbose \
@@ -93,7 +94,7 @@ jobs:
           --junitxml=./test-reports/non_regression_anat_t1_linear_linux.xml \
           --disable-warnings \
           ./nonregression/pipelines/anat/test_t1_linear.py
-  
+
   test-t1-volume-Linux:
     runs-on:
       - self-hosted
@@ -115,7 +116,8 @@ jobs:
           source /builds/miniconda/etc/profile.d/conda.sh
           conda activate "${{ github.workspace }}"/env
           source /usr/local/Modules/init/profile.sh
-          module load clinica.all
+          module load clinica/matlab/2017a
+          module load clinica/spm12/r7771
           make install
           cd test
           poetry run pytest --verbose \

diff --git a/.github/workflows/test_pipelines_anat_freesurfer.yml b/.github/workflows/test_pipelines_anat_freesurfer.yml
@@ -26,7 +26,7 @@ jobs:
           source ~/miniconda3/etc/profile.d/conda.sh
           conda activate "${{ github.workspace }}"/env
           source "$(brew --prefix)/opt/modules/init/bash"
-          module load clinica.all
+          module load clinica/freesurfer/6.0.0
           make install
           cd test
           poetry run pytest --verbose \
@@ -58,7 +58,7 @@ jobs:
           source /builds/miniconda/etc/profile.d/conda.sh
           conda activate "${{ github.workspace }}"/env
           source /usr/local/Modules/init/profile.sh
-          module load clinica.all
+          module load clinica/freesurfer/6.0.0
           make install
           cd test
           poetry run pytest --verbose \

diff --git a/.github/workflows/test_pipelines_dwi.yml b/.github/workflows/test_pipelines_dwi.yml
@@ -26,7 +26,9 @@ jobs:
           source ~/miniconda3/etc/profile.d/conda.sh
           conda activate "${{ github.workspace }}"/env
           source "$(brew --prefix)/opt/modules/init/bash"
-          module load clinica.all
+          module load clinica/fsl/6.0.5
+          module load clinica/ants/2.4.4
+          module load clinica/freesurfer/6.0.0
           make install
           cd test
           poetry run pytest --verbose \
@@ -58,7 +60,9 @@ jobs:
           source /builds/miniconda/etc/profile.d/conda.sh
           conda activate "${{ github.workspace }}"/env
           source /usr/local/Modules/init/profile.sh
-          module load clinica.all
+          module load clinica/fsl/6.0.5
+          module load clinica/ants/2.4.4
+          module load clinica/freesurfer/6.0.0
           make install
           cd test
           poetry run pytest --verbose \

diff --git a/clinica/pipelines/dwi_dti/pipeline.py b/clinica/pipelines/dwi_dti/pipeline.py
@@ -252,7 +252,6 @@ def _build_core_nodes(self):
         from nipype.interfaces.mrtrix3 import TensorMetrics
 
         from clinica.utils.check_dependency import check_environment_variable
-        from clinica.utils.dwi import extract_bids_identifier_from_filename
 
         from .utils import (
             get_ants_transforms,
@@ -262,17 +261,6 @@ def _build_core_nodes(self):
             statistics_on_atlases,
         )
 
-        # Nodes creation
-        # ==============
-        get_bids_identifier = npe.Node(
-            interface=nutil.Function(
-                input_names=["dwi_filename"],
-                output_names=["bids_identifier"],
-                function=extract_bids_identifier_from_filename,
-            ),
-            name="0-Get_BIDS_Identifier",
-        )
-
         get_caps_filenames = npe.Node(
             interface=nutil.Function(
                 input_names=["caps_dwi_filename"],
@@ -342,7 +330,7 @@ def _build_core_nodes(self):
 
         scalar_analysis = npe.Node(
             interface=nutil.Function(
-                input_names=["in_registered_map", "name_map", "prefix_file"],
+                input_names=["in_registered_map", "name_map", "dwi_preprocessed_file"],
                 output_names=["atlas_statistics_list"],
                 function=statistics_on_atlases,
             ),
@@ -393,12 +381,6 @@ def _build_core_nodes(self):
                     print_begin_message,
                     [("preproc_dwi", "in_bids_or_caps_file")],
                 ),
-                # Get BIDS/CAPS identifier from filename
-                (
-                    self.input_node,
-                    get_bids_identifier,
-                    [("preproc_dwi", "caps_dwi_filename")],
-                ),
                 # Convert FSL gradient files (bval/bvec) to MRtrix format
                 (
                     self.input_node,
@@ -483,39 +465,39 @@ def _build_core_nodes(self):
                 ),
                 # Generate regional TSV files
                 (
-                    get_bids_identifier,
+                    self.input_node,
                     scalar_analysis_fa,
-                    [("bids_identifier", "prefix_file")],
+                    [("preproc_dwi", "dwi_preprocessed_file")],
                 ),
                 (
                     thres_norm_fa,
                     scalar_analysis_fa,
                     [("out_file", "in_registered_map")],
                 ),
                 (
-                    get_bids_identifier,
+                    self.input_node,
                     scalar_analysis_md,
-                    [("bids_identifier", "prefix_file")],
+                    [("preproc_dwi", "dwi_preprocessed_file")],
                 ),
                 (
                     thres_norm_md,
                     scalar_analysis_md,
                     [("out_file", "in_registered_map")],
                 ),
                 (
-                    get_bids_identifier,
+                    self.input_node,
                     scalar_analysis_ad,
-                    [("bids_identifier", "prefix_file")],
+                    [("preproc_dwi", "dwi_preprocessed_file")],
                 ),
                 (
                     thres_norm_ad,
                     scalar_analysis_ad,
                     [("out_file", "in_registered_map")],
                 ),
                 (
-                    get_bids_identifier,
+                    self.input_node,
                     scalar_analysis_rd,
-                    [("bids_identifier", "prefix_file")],
+                    [("preproc_dwi", "dwi_preprocessed_file")],
                 ),
                 (
                     thres_norm_rd,

diff --git a/clinica/pipelines/dwi_dti/utils.py b/clinica/pipelines/dwi_dti/utils.py
@@ -1,47 +1,41 @@
-def statistics_on_atlases(in_registered_map, name_map, prefix_file=None):
+def statistics_on_atlases(
+    in_registered_map: str, name_map: str, dwi_preprocessed_file: str
+) -> list:
     """Computes a list of statistics files for each atlas.
 
-    Args:
-        in_registered_map (str): Map already registered on atlases.
-        name_map (str): Name of the registered map in CAPS format.
-        prefix_file (Opt[str]):
-            <prefix_file>_space-<atlas_name>_map-<name_map>_statistics.tsv
+    Parameters
+    ----------
+    in_registered_map : str
+        Map already registered on atlases.
+
+    name_map : str
+        Name of the registered map in CAPS format.
 
-    Returns:
+    dwi_preprocessed_file : str
+        The preprocessed DWI file name which contains the entities to be
+        used for building the statistics file names.
+
+    Returns
+    -------
+    list of str :
         List of paths leading to the statistics TSV files.
     """
     from pathlib import Path
 
-    from nipype.utils.filemanip import split_filename
-
-    from clinica.utils.atlas import (
-        AtlasAbstract,
-        JHUDTI811mm,
-        JHUTracts01mm,
-        JHUTracts251mm,
-    )
+    from clinica.utils.atlas import atlas_factory
+    from clinica.utils.bids import BIDSFileName
     from clinica.utils.statistics import statistics_on_atlas
 
-    in_atlas_list = [JHUDTI811mm(), JHUTracts01mm(), JHUTracts251mm()]
-
     atlas_statistics_list = []
-    for atlas in in_atlas_list:
-        if not isinstance(atlas, AtlasAbstract):
-            raise TypeError("Atlas element must be an AtlasAbstract type")
-
-        if prefix_file:
-            filename = (
-                f"{prefix_file}_space-{atlas.get_name_atlas()}"
-                f"_res-{atlas.get_spatial_resolution()}_map-{name_map}_statistics.tsv"
-            )
-        else:
-            _, base, _ = split_filename(in_registered_map)
-            filename = (
-                f"{base}_space-{atlas.get_name_atlas()}"
-                f"_res-{atlas.get_spatial_resolution()}_map-{name_map}_statistics.tsv"
-            )
-
-        out_atlas_statistics = str((Path.cwd() / filename).resolve())
+    for atlas_name in ("JHUDTI81", "JHUTracts0", "JHUTracts25"):
+        atlas = atlas_factory(atlas_name)
+        source = BIDSFileName.from_name(dwi_preprocessed_file)
+        source.update_entity("space", atlas.name)
+        source.update_entity("res", atlas.spatial_resolution)
+        source.update_entity("map", name_map)
+        source.suffix = "statistics"
+        source.extension = ".tsv"
+        out_atlas_statistics = str((Path.cwd() / source.name).resolve())
         statistics_on_atlas(in_registered_map, atlas, out_atlas_statistics)
         atlas_statistics_list.append(out_atlas_statistics)
 
@@ -53,7 +47,7 @@ def get_caps_filenames(caps_dwi_filename: str):
     import re
 
     m = re.search(
-        r"(sub-[a-zA-Z0-9]+)_(ses-[a-zA-Z0-9]+).*_dwi_space-[a-zA-Z0-9]+",
+        r"(sub-[a-zA-Z0-9]+)_(ses-[a-zA-Z0-9]+).*_space-[a-zA-Z0-9]+_desc-preproc",
         caps_dwi_filename,
     )
     if not m:
@@ -62,7 +56,7 @@ def get_caps_filenames(caps_dwi_filename: str):
         )
 
     caps_prefix = m.group(0)
-    bids_source = f"{m.group(1)}_{m.group(2)}_dwi"
+    bids_source = f"{m.group(1)}_{m.group(2)}"
 
     out_dti = f"{caps_prefix}_model-DTI_diffmodel.nii.gz"
     out_fa = f"{caps_prefix}_FA.nii.gz"
@@ -115,20 +109,22 @@ def rename_into_caps(
     )
 
 
-def print_begin_pipeline(in_bids_or_caps_file):
+def print_begin_pipeline(in_bids_or_caps_file: str):
     from clinica.utils.filemanip import get_subject_id
     from clinica.utils.ux import print_begin_image
 
     print_begin_image(get_subject_id(in_bids_or_caps_file))
 
 
-def print_end_pipeline(in_bids_or_caps_file, final_file_1, final_file_2):
+def print_end_pipeline(in_bids_or_caps_file: str, final_file_1: str, final_file_2: str):
     from clinica.utils.filemanip import get_subject_id
     from clinica.utils.ux import print_end_image
 
     print_end_image(get_subject_id(in_bids_or_caps_file))
 
 
-def get_ants_transforms(in_affine_transformation, in_bspline_transformation):
+def get_ants_transforms(
+    in_affine_transformation: str, in_bspline_transformation: str
+) -> list:
     """Combine transformations for antsApplyTransforms interface."""
     return [in_bspline_transformation, in_affine_transformation]
diff --git a/clinica/pipelines/machine_learning/classification_cli.py b/clinica/pipelines/machine_learning/classification_cli.py
@@ -5,7 +5,7 @@
 from clinica import option
 from clinica.pipelines import cli_param
 from clinica.pipelines.engine import clinica_pipeline
-from clinica.utils.atlas import T1_VOLUME_ATLASES
+from clinica.utils.atlas import T1AndPetVolumeAtlasName
 
 pipeline_name = "machinelearning-classification"
 
@@ -47,7 +47,7 @@
 @cli_param.option_group.option(
     "-atlas",
     "--atlas",
-    type=click.Choice(T1_VOLUME_ATLASES),
+    type=click.Choice(T1AndPetVolumeAtlasName),
     help="One of the atlases generated by t1-volume or pet-volume pipeline.",
 )
 @option.global_option_group
@@ -69,21 +69,47 @@ def cli(
 ) -> None:
     """Classification based on machine learning using scikit-learn.
 
+    Parameters
+    ----------
+    caps_directory : str
 
-        GROUP_LABEL is a string defining the group label for the current analysis, which helps you keep track of different analyses.
+    group_label : str
+        String defining the group label for the current analysis, which helps you keep track of different analyses.
 
-        The third positional argument defines the type of features for classification. It can be 'RegionBased' or 'VoxelBased'.
+    orig_input_data : str
+        Defines the type of features for classification.
+        It can be 'RegionBased' or 'VoxelBased'.
 
-        The fourth positional argument defines the studied modality ('T1w' or 'PET')
+    image_type : str
+        Defines the studied modality ('T1w' or 'PET')
 
-        The fifth positional argument defines the algorithm. It can be 'DualSVM', 'LogisticRegression' or 'RandomForest'.
+    algorithm : str
+        Defines the algorithm. It can be 'DualSVM', 'LogisticRegression' or 'RandomForest'.
 
-        The sixth positional argument defines the validation method. It can be 'RepeatedHoldOut' or 'RepeatedKFoldCV'.
+    validation : str
+        Defines the validation method. It can be 'RepeatedHoldOut' or 'RepeatedKFoldCV'.
 
-        SUBJECTS_VISITS_TSV is a TSV file containing the participant_id and the session_id columns
+    subjects_visits_tsv : str
+        TSV file containing the participant_id and the session_id columns.
 
-        DIAGNOSES_TSV is a TSV file where the diagnosis for each participant (identified by a participant ID) is reported (e.g. AD, CN). It allows the algorithm to perform the dual classification (between the two labels reported).
+    diagnoses_tsv : str
+        TSV file where the diagnosis for each participant (identified by a participant ID) is reported (e.g. AD, CN).
+        It allows the algorithm to perform the dual classification (between the two labels reported).
 
+    output_directory : str
+        The output folder path.
+
+    acq_label : str, optional
+
+    suvr_reference_region : str, optional
+
+    atlas : str, optional
+
+    n_procs : int, optional
+        The number of processes to be used by the pipeline.
+
+    Notes
+    -----
     See https://aramislab.paris.inria.fr/clinica/docs/public/latest/Pipelines/MachineLearning_Classification/
     """
     from clinica.utils.exceptions import ClinicaException
@@ -115,10 +141,11 @@ def cli(
             "Clinica will now exit."
         )
 
-    if algorithm in ["LogisticRegression", "RandomForest"]:
+    if algorithm in ("LogisticRegression", "RandomForest"):
         if orig_input_data != "RegionBased" or validation != "RepeatedHoldOut":
             raise ClinicaException(
-                "LogisticRegression or RandomForest algorithm can only work on region-based featured or RepeatedHoldOut algorithm. "
+                "LogisticRegression or RandomForest algorithm can only work "
+                "on region-based featured or RepeatedHoldOut algorithm. "
                 "Clinica will now exit."
             )