diff --git a/.circleci/main.yml b/.circleci/main.yml index a13300a78d..91add3529f 100644 --- a/.circleci/main.yml +++ b/.circleci/main.yml @@ -68,7 +68,9 @@ commands: steps: - run: name: Getting Sample BIDS Data - command: git clone https://github.com/bids-standard/bids-examples.git + command: | + mkdir -p /home/circleci/project/dev/circleci_data/.pytest_cache/d/bids-examples + git clone https://github.com/bids-standard/bids-examples.git /home/circleci/project/dev/circleci_data/.pytest_cache/d/bids-examples get-singularity: parameters: version: @@ -156,7 +158,7 @@ commands: then TAG=nightly else - TAG="${CIRCLE_BRANCH//\//_}" + TAG=`echo ${CIRCLE_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` fi DOCKER_TAG="ghcr.io/${CIRCLE_PROJECT_USERNAME,,}/${CIRCLE_PROJECT_REPONAME,,}:${TAG,,}" if [[ -n "<< parameters.variant >>" ]] @@ -172,7 +174,7 @@ commands: name: Testing Singularity installation command: | pip install -r dev/circleci_data/requirements.txt - coverage run -m pytest --junitxml=test-results/junit.xml --continue-on-collection-errors dev/circleci_data/test_install.py + coverage run -m pytest --capture=no --junitxml=test-results/junit.xml --continue-on-collection-errors dev/circleci_data/test_install.py jobs: combine-coverage: diff --git a/.github/workflows/build_C-PAC.yml b/.github/workflows/build_C-PAC.yml index d126f6a778..ef7a196cef 100644 --- a/.github/workflows/build_C-PAC.yml +++ b/.github/workflows/build_C-PAC.yml @@ -42,7 +42,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` DOCKERFILE=.github/Dockerfiles/C-PAC.develop$VARIANT-$OS.Dockerfile elif [[ $GITHUB_BRANCH == 'develop' ]] then diff --git a/.github/workflows/regression_test_full.yml b/.github/workflows/regression_test_full.yml index 6dba2d1bf2..20d25a9316 100644 --- a/.github/workflows/regression_test_full.yml +++ b/.github/workflows/regression_test_full.yml @@ -13,7 +13,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_BRANCH == 'develop' ]] then TAG=nightly diff --git a/.github/workflows/regression_test_lite.yml b/.github/workflows/regression_test_lite.yml index 4e6b5a46f6..87aba8a5bd 100644 --- a/.github/workflows/regression_test_lite.yml +++ b/.github/workflows/regression_test_lite.yml @@ -37,7 +37,7 @@ jobs: run: | if [[ ! $GITHUB_REF_NAME == 'main' ]] && [[ ! $GITHUB_REF_NAME == 'develop' ]] then - TAG=${GITHUB_REF_NAME//\//_} + TAG=`echo ${GITHUB_REF_NAME} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_REF_NAME == 'develop' ]] then TAG=nightly diff --git a/.github/workflows/smoke_test_participant.yml b/.github/workflows/smoke_test_participant.yml index 3fde0de8aa..6b7e219775 100644 --- a/.github/workflows/smoke_test_participant.yml +++ b/.github/workflows/smoke_test_participant.yml @@ -68,7 +68,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_BRANCH == 'develop' ]] then TAG=nightly @@ -133,7 +133,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_BRANCH == 'develop' ]] then TAG=nightly @@ -192,7 +192,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_BRANCH == 'develop' ]] then TAG=nightly diff --git a/.ruff.toml b/.ruff.toml index d690751b02..1f2ac8a9ab 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -11,7 +11,9 @@ external = ["T20"] # Don't autoremove 'noqa` comments for these rules [lint.per-file-ignores] "CPAC/func_preproc/func_preproc.py" = ["E402"] "CPAC/utils/sklearn.py" = ["RUF003"] +"CPAC/utils/tests/old_functions.py" = ["C", "D", "E", "EM", "PLW", "RET"] "CPAC/utils/utils.py" = ["T201"] # until `repickle` is removed +"dev/circleci_data/conftest.py" = ["F401"] "setup.py" = ["D1"] [lint.flake8-import-conventions.extend-aliases] @@ -32,7 +34,7 @@ section-order = ["future", "standard-library", "third-party", "collab", "other-f [lint.isort.sections] "collab" = ["nibabel", "nilearn", "nipype", "PyBASC", "pybids", "scipy", "spython"] -"other-first-party" = ["flowdump", "indi_aws", "indi_schedulers", "PyPEER"] +"other-first-party" = ["bids2table", "flowdump", "indi_aws", "indi_schedulers", "PyPEER"] [lint.pydocstyle] convention = "numpy" diff --git a/.stubs/bids2table/__init__.pyi b/.stubs/bids2table/__init__.pyi new file mode 100644 index 0000000000..00915d5b3f --- /dev/null +++ b/.stubs/bids2table/__init__.pyi @@ -0,0 +1,3 @@ +from ._b2t import bids2table + +__all__ = ["bids2table"] diff --git a/.stubs/bids2table/_b2t.pyi b/.stubs/bids2table/_b2t.pyi new file mode 100644 index 0000000000..94ccb3ff57 --- /dev/null +++ b/.stubs/bids2table/_b2t.pyi @@ -0,0 +1,50 @@ +# Copyright (C) 2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +# """Specific typing stubs for bids2table.""" +from typing import Literal, Optional, overload + +from elbow.typing import StrOrPath +from bids2table.table import BIDSTable + +@overload +def bids2table( + root: StrOrPath, + *, + with_meta: bool = True, + persistent: bool = False, + index_path: Optional[StrOrPath] = None, + exclude: Optional[list[str]] = None, + incremental: bool = False, + overwrite: bool = False, + workers: Optional[int] = None, + worker_id: Optional[int] = None, + return_table: Literal[True] = True, +) -> BIDSTable: ... +@overload +def bids2table( + root: StrOrPath, + *, + with_meta: bool = True, + persistent: bool = False, + index_path: Optional[StrOrPath] = None, + exclude: Optional[list[str]] = None, + incremental: bool = False, + overwrite: bool = False, + workers: Optional[int] = None, + worker_id: Optional[int] = None, + return_table: Literal[False], +) -> None: ... diff --git a/CHANGELOG.md b/CHANGELOG.md index df8f40a666..a8bb98da0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Moved `pygraphviz` from requirements to `graphviz` optional dependencies group. +- Split `ResourcePool` into three classes: `Resource`, `ResourcePool`, and `StratPool`. ### Fixed diff --git a/CPAC/_entrypoints/run.py b/CPAC/_entrypoints/run.py index d7bc6812e4..7cb673e359 100755 --- a/CPAC/_entrypoints/run.py +++ b/CPAC/_entrypoints/run.py @@ -29,6 +29,7 @@ from warnings import simplefilter import yaml +from bids2table import bids2table from CPAC import __version__, license_notice from CPAC.pipeline import AVAILABLE_PIPELINE_CONFIGS @@ -37,7 +38,6 @@ from CPAC.utils.bids_utils import ( cl_strip_brackets, create_cpac_data_config, - load_cpac_data_config, load_yaml_config, sub_list_filter_by_labels, ) @@ -51,8 +51,6 @@ from CPAC.utils.monitoring import failed_to_start, FMLOGGER, log_nodes_cb, WFLOGGER from CPAC.utils.utils import update_nested_dict -from bids2table import bids2table - simplefilter(action="ignore", category=FutureWarning) DEFAULT_TMP_DIR = "/tmp" @@ -786,9 +784,9 @@ def run_main(): try: # fillna - bids_table['ses'] = bids_table['ses'].fillna('None') + bids_table["ses"] = bids_table["ses"].fillna("None") grouped_tab = bids_table.groupby(["sub", "ses"]) - except Exception as e: + except Exception as e: # TODO: raise exception WFLOGGER.warning("Could not create bids table: %s", e) print("Could not create bids table: %s", e) sys.exit(1) diff --git a/CPAC/alff/alff.py b/CPAC/alff/alff.py index 4fe03cb2cc..e26342ffb5 100644 --- a/CPAC/alff/alff.py +++ b/CPAC/alff/alff.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright (C) 2012-2024 C-PAC Developers +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os from nipype.interfaces.afni import preprocess @@ -7,8 +22,9 @@ from CPAC.alff.utils import get_opt_string from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration.registration import apply_transform +from CPAC.utils.interfaces import Function from CPAC.utils.utils import check_prov_for_regtool @@ -177,7 +193,7 @@ def create_alff(wf_name="alff_workflow"): wf.connect(input_node, "rest_res", bandpass, "in_file") get_option_string = pe.Node( - util.Function( + Function( input_names=["mask"], output_names=["option_string"], function=get_opt_string, diff --git a/CPAC/alff/utils.py b/CPAC/alff/utils.py index f89e0c8ca4..d7532373bf 100644 --- a/CPAC/alff/utils.py +++ b/CPAC/alff/utils.py @@ -3,7 +3,10 @@ from pathlib import Path +from CPAC.utils.interfaces.function import Function + +@Function.sig_imports(["from pathlib import Path"]) def get_opt_string(mask: Path | str) -> str: """ Return option string for 3dTstat. diff --git a/CPAC/anat_preproc/anat_preproc.py b/CPAC/anat_preproc/anat_preproc.py index 8e24b54b81..5a6acd286e 100644 --- a/CPAC/anat_preproc/anat_preproc.py +++ b/CPAC/anat_preproc/anat_preproc.py @@ -15,7 +15,6 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -# from copy import deepcopy import os from nipype.interfaces import afni, ants, freesurfer, fsl @@ -35,7 +34,8 @@ wb_command, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock +from CPAC.utils.interfaces import Function from CPAC.utils.interfaces.fsl import Merge as fslMerge @@ -138,7 +138,7 @@ def acpc_alignment( aff_to_rig_imports = ["import os", "from numpy import *"] aff_to_rig = pe.Node( - util.Function( + Function( input_names=["in_xfm", "out_name"], output_names=["out_mat"], function=fsl_aff_to_rigid, @@ -319,7 +319,7 @@ def T1wmulT2w_brain_norm_s_string(sigma, in_file): return "-s %f -div %s" % (sigma, in_file) T1wmulT2w_brain_norm_s_string = pe.Node( - util.Function( + Function( input_names=["sigma", "in_file"], output_names=["out_str"], function=T1wmulT2w_brain_norm_s_string, @@ -378,7 +378,7 @@ def form_lower_string(mean, std): return "-thr %s -bin -ero -mul 255" % (lower) form_lower_string = pe.Node( - util.Function( + Function( input_names=["mean", "std"], output_names=["out_str"], function=form_lower_string, @@ -444,7 +444,7 @@ def file_to_a_list(infile_1, infile_2): return [infile_1, infile_2] file_to_a_list = pe.Node( - util.Function( + Function( input_names=["infile_1", "infile_2"], output_names=["out_list"], function=file_to_a_list, @@ -544,7 +544,7 @@ def afni_brain_connector(wf, cfg, strat_pool, pipe_num, opt): ) skullstrip_args = pe.Node( - util.Function( + Function( input_names=[ "spat_norm", "spat_norm_dxyz", @@ -762,7 +762,7 @@ def fsl_brain_connector(wf, cfg, strat_pool, pipe_num, opt): anat_robustfov.inputs.output_type = "NIFTI_GZ" anat_pad_RobustFOV_cropped = pe.Node( - util.Function( + Function( input_names=["cropped_image_path", "target_image_path"], output_names=["padded_image_path"], function=pad, @@ -902,7 +902,7 @@ def unet_brain_connector(wf, cfg, strat_pool, pipe_num, opt): from CPAC.unet.function import predict_volumes unet_mask = pe.Node( - util.Function( + Function( input_names=["model_path", "cimg_in"], output_names=["out_path"], function=predict_volumes, @@ -1083,7 +1083,7 @@ def freesurfer_brain_connector(wf, cfg, strat_pool, pipe_num, opt): # convert brain mask file from .mgz to .nii.gz fs_brain_mask_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=mri_convert ), name=f"fs_brainmask_to_nifti_{pipe_num}", @@ -1119,7 +1119,7 @@ def freesurfer_abcd_brain_connector(wf, cfg, strat_pool, pipe_num, opt): Ref: https://github.com/DCAN-Labs/DCAN-HCP/blob/7927754/PostFreeSurfer/PostFreeSurferPipeline.sh#L151-L156 """ wmparc_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file", "reslice_like", "args"], output_names=["out_file"], function=mri_convert, @@ -1130,7 +1130,7 @@ def freesurfer_abcd_brain_connector(wf, cfg, strat_pool, pipe_num, opt): # Register wmparc file if ingressing FreeSurfer data if strat_pool.check_rpool("pipeline-fs_xfm"): wmparc_to_native = pe.Node( - util.Function( + Function( input_names=["source_file", "target_file", "xfm", "out_file"], output_names=["transformed_file"], function=normalize_wmparc, @@ -1168,7 +1168,7 @@ def freesurfer_abcd_brain_connector(wf, cfg, strat_pool, pipe_num, opt): wf.connect(wmparc_to_nifti, "out_file", binary_mask, "in_file") wb_command_fill_holes = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=wb_command ), name=f"wb_command_fill_holes_{pipe_num}", @@ -1206,7 +1206,7 @@ def freesurfer_fsl_brain_connector(wf, cfg, strat_pool, pipe_num, opt): # mri_convert -it mgz ${SUBJECTS_DIR}/${subject}/mri/brainmask.mgz -ot nii brainmask.nii.gz convert_fs_brainmask_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=mri_convert ), name=f"convert_fs_brainmask_to_nifti_{node_id}", @@ -1217,7 +1217,7 @@ def freesurfer_fsl_brain_connector(wf, cfg, strat_pool, pipe_num, opt): # mri_convert -it mgz ${SUBJECTS_DIR}/${subject}/mri/T1.mgz -ot nii T1.nii.gz convert_fs_T1_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=mri_convert ), name=f"convert_fs_T1_to_nifti_{node_id}", @@ -2888,7 +2888,7 @@ def freesurfer_abcd_preproc(wf, cfg, strat_pool, pipe_num, opt=None): # fslmaths "$T1wImageFile"_1mm.nii.gz -div $Mean -mul 150 -abs "$T1wImageFile"_1mm.nii.gz normalize_head = pe.Node( - util.Function( + Function( input_names=["in_file", "number", "out_file_suffix"], output_names=["out_file"], function=fslmaths_command, diff --git a/CPAC/anat_preproc/lesion_preproc.py b/CPAC/anat_preproc/lesion_preproc.py index 2ef58c3d2a..07871ae32d 100644 --- a/CPAC/anat_preproc/lesion_preproc.py +++ b/CPAC/anat_preproc/lesion_preproc.py @@ -1,13 +1,30 @@ # -*- coding: utf-8 -*- +# Copyright (C) 2019-2023 C-PAC Developers +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . from nipype.interfaces import afni import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def inverse_lesion(lesion_path): - """ + """Replace non-zeroes with zeroes and zeroes with ones. + Check if the image contains more zeros than non-zeros, if so, replaces non-zeros by zeros and zeros by ones. @@ -38,13 +55,12 @@ def inverse_lesion(lesion_path): nii = nu.inverse_nifti_values(image=lesion_path) nib.save(nii, lesion_out) return lesion_out - else: - return lesion_out + return lesion_out def create_lesion_preproc(wf_name="lesion_preproc"): - """ - The main purpose of this workflow is to process lesions masks. + """Process lesions masks. + Lesion mask file is deobliqued and reoriented in the same way as the T1 in the anat_preproc function. @@ -95,7 +111,7 @@ def create_lesion_preproc(wf_name="lesion_preproc"): lesion_deoblique.inputs.deoblique = True lesion_inverted = pe.Node( - interface=util.Function( + interface=Function( input_names=["lesion_path"], output_names=["lesion_out"], function=inverse_lesion, diff --git a/CPAC/anat_preproc/utils.py b/CPAC/anat_preproc/utils.py index b3246fc41a..39904bbb66 100644 --- a/CPAC/anat_preproc/utils.py +++ b/CPAC/anat_preproc/utils.py @@ -1,73 +1,34 @@ # -*- coding: utf-8 -*- -from numpy import zeros -from nibabel import load as nib_load, Nifti1Image -import nipype.interfaces.utility as util - -from CPAC.pipeline import nipype_pipeline_engine as pe - - -def get_shape(nifti_image): - return nib_load(nifti_image).shape - - -def pad(cropped_image_path, target_image_path): - """ - Pad a cropped image to match the dimensions of a target image along the z-axis, - while keeping padded image aligned with target_image. - - Parameters - ---------- - - cropped_image_path (str): The file path to the cropped image (NIfTI format). - - target_image_path (str): The file path to the target image (NIfTI format). - - Returns - ------- - - str: The file path to the saved padded image (NIfTI format). +# Copyright (C) 2018-2023 C-PAC Developers - The function loads cropped and target iamges, calculates the z-dimension shift required for alignment such - that the mask generated from padded image will work correctly on the target image. The result padded image is - saved as an NIfTI file in the working directory/node and file path is returned as output. +# This file is part of C-PAC. - Note: The function assumes that the input images are in NIfTI format and have compatible dimensions. The cropped - and target image should only differ in z-axis dimension. - """ - from os import getcwd, path - from typing import Optional +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. - from numpy import asanyarray, ndarray, zeros_like - from nibabel import load, Nifti1Image, save +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. - cropped_image: Optional[ndarray] = asanyarray(load(cropped_image_path).dataobj) - target_image: Optional[ndarray] = asanyarray(load(target_image_path).dataobj) +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +import os - # Taking 1 slice to calculate the z dimension shift from top - center_row: int = target_image.shape[0] // 2 - center_column: int = target_image.shape[1] // 2 - z_slice_cropped_image: Optional[ndarray] = cropped_image[ - center_row, center_column, : - ] - z_slice_target_image: Optional[ndarray] = target_image[center_row, center_column, :] - - for z_shift in range(len(z_slice_target_image) - len(z_slice_cropped_image) + 1): - if ( - z_slice_target_image[z_shift : z_shift + len(z_slice_cropped_image)] - == z_slice_cropped_image - ).all(): - break +from numpy import * +from nibabel import load as nib_load +from nipype.interfaces.base import CommandLineInputSpec, File, TraitedSpec +import nipype.interfaces.utility as util +from nipype.interfaces.workbench.base import WBCommand - padded_image_matrix: Optional[ndarray] = zeros_like(target_image) - padded_image_matrix[:, :, z_shift : cropped_image.shape[2] + z_shift] = ( - cropped_image - ) - padded_image_path: str = path.join(getcwd(), "padded_image_T1w.nii.gz") - cropped_image = load(cropped_image_path) - save( - Nifti1Image(padded_image_matrix, affine=cropped_image.affine), padded_image_path - ) - return padded_image_path +from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def get_shape(nifti_image): + """Return the shape of a NIfTI image.""" return nib_load(nifti_image).shape @@ -286,7 +247,7 @@ def split_hemi(multi_file): def split_hemi_interface() -> util.Function: """Return a function interface for split_hemi.""" - return util.Function( + return Function( input_names=["multi_file"], output_names=["lh", "rh"], function=split_hemi ) @@ -587,12 +548,9 @@ def normalize_wmparc(source_file, target_file, xfm, out_file): return os.path.join(os.getcwd(), out_file) -"""This module provides interfaces for workbench -volume-remove-islands commands""" -from nipype.interfaces.base import CommandLineInputSpec, File, TraitedSpec -from nipype.interfaces.workbench.base import WBCommand - - class VolumeRemoveIslandsInputSpec(CommandLineInputSpec): + """InputSpec for workbench -volume-remove-islands commands.""" + in_file = File( exists=True, mandatory=True, @@ -610,14 +568,14 @@ class VolumeRemoveIslandsInputSpec(CommandLineInputSpec): class VolumeRemoveIslandsOutputSpec(TraitedSpec): + """OutputSpec for workbench -volume-remove-islands commands.""" + out_file = File(exists=True, desc="the output ROI volume") class VolumeRemoveIslands(WBCommand): - """ - workbench - -volume-remove-islands - REMOVE ISLANDS FROM AN ROI VOLUME + """Remove islandes from an ROI volume. + wb_command -volume-remove-islands - the input ROI volume - output - the output ROI volume. diff --git a/CPAC/conftest.py b/CPAC/conftest.py new file mode 100644 index 0000000000..52113ebd40 --- /dev/null +++ b/CPAC/conftest.py @@ -0,0 +1,32 @@ +# Copyright (C) 2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Global pytest configuration.""" + +from pathlib import Path + +import pytest + + +@pytest.fixture +def bids_examples(cache: pytest.Cache) -> Path: + """Get cached example BIDS directories.""" + bids_dir = cache.mkdir("bids-examples").absolute() + if not (bids_dir.exists() and list(bids_dir.iterdir())): + from git import Repo + + Repo.clone_from("https://github.com/bids-standard/bids-examples.git", bids_dir) + return bids_dir diff --git a/CPAC/distortion_correction/distortion_correction.py b/CPAC/distortion_correction/distortion_correction.py index 79b8400bb1..5f0728b628 100644 --- a/CPAC/distortion_correction/distortion_correction.py +++ b/CPAC/distortion_correction/distortion_correction.py @@ -32,7 +32,7 @@ run_fsl_topup, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.utils import function from CPAC.utils.datasource import match_epi_fmaps from CPAC.utils.interfaces.function import Function @@ -131,7 +131,7 @@ def distcor_phasediff_fsl_fugue(wf, cfg, strat_pool, pipe_num, opt=None): == "AFNI" ): skullstrip_args = pe.Node( - util.Function( + Function( input_names=["shrink_fac"], output_names=["expr"], function=create_afni_arg, @@ -165,7 +165,7 @@ def distcor_phasediff_fsl_fugue(wf, cfg, strat_pool, pipe_num, opt=None): == "BET" ): bet = pe.Node( - interface=fsl.BET(), name="distcor_phasediff_bet_skullstrip_{pipe_num}" + interface=fsl.BET(), name=f"distcor_phasediff_bet_skullstrip_{pipe_num}" ) bet.inputs.output_type = "NIFTI_GZ" bet.inputs.frac = cfg.functional_preproc["distortion_correction"]["PhaseDiff"][ @@ -438,11 +438,6 @@ def distcor_blip_afni_qwarp(wf, cfg, strat_pool, pipe_num, opt=None): node, out = strat_pool.get_data("pe-direction") wf.connect(node, out, match_epi_fmaps_node, "bold_pedir") - # interface = {'bold': (match_epi_fmaps_node, 'opposite_pe_epi'), - # 'desc-brain_bold': 'opposite_pe_epi_brain'} - # wf, strat_pool = wrap_block([bold_mask_afni, bold_masking], - # interface, wf, cfg, strat_pool, pipe_num, opt) - func_get_brain_mask = pe.Node( interface=preprocess.Automask(), name=f"afni_mask_opposite_pe_{pipe_num}" ) @@ -530,10 +525,6 @@ def distcor_blip_afni_qwarp(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(node, out, undistort_func_mean, "reference_image") wf.connect(convert_afni_warp, "ants_warp", undistort_func_mean, "transforms") - # interface = {'desc-preproc_bold': (undistort_func_mean, 'output_image')} - # wf, strat_pool = wrap_block([bold_mask_afni], - # interface, wf, cfg, strat_pool, pipe_num, opt) - remask = pe.Node( interface=preprocess.Automask(), name=f"afni_remask_boldmask_{pipe_num}" ) @@ -667,7 +658,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): "import sys", ] phase_encoding = pe.Node( - util.Function( + Function( input_names=[ "unwarp_dir", "phase_one", @@ -710,7 +701,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): topup_imports = ["import os", "import subprocess"] run_topup = pe.Node( - util.Function( + Function( input_names=["merged_file", "acqparams"], output_names=[ "out_fieldcoef", @@ -732,7 +723,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(phase_encoding, "acq_params", run_topup, "acqparams") choose_phase = pe.Node( - util.Function( + Function( input_names=["phase_imgs", "unwarp_dir"], output_names=["out_phase_image", "vnum"], function=choose_phase_image, @@ -746,7 +737,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(node, out, choose_phase, "unwarp_dir") vnum_base = pe.Node( - util.Function( + Function( input_names=[ "vnum", "motion_mat_list", @@ -764,7 +755,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(run_topup, "out_jacs", vnum_base, "jac_matrix_list") wf.connect(run_topup, "out_warps", vnum_base, "warp_field_list") - mean_bold = strat_pool.node_data("sbref") + mean_bold = strat_pool.get_data("sbref") flirt = pe.Node(interface=fsl.FLIRT(), name="flirt") flirt.inputs.dof = 6 @@ -797,7 +788,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): name = "PhaseTwo_aw" vnum_base_two = pe.Node( - util.Function( + Function( input_names=[ "vnum", "motion_mat_list", @@ -840,7 +831,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): name = "PhaseOne_aw" vnum_base_one = pe.Node( - util.Function( + Function( input_names=[ "vnum", "motion_mat_list", diff --git a/CPAC/distortion_correction/utils.py b/CPAC/distortion_correction/utils.py index 2b78dbfa4d..b76acba074 100644 --- a/CPAC/distortion_correction/utils.py +++ b/CPAC/distortion_correction/utils.py @@ -1,3 +1,19 @@ +# Copyright (C) 2021-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os import subprocess import sys @@ -12,6 +28,8 @@ import nipype.interfaces.utility as util from nipype.pipeline import engine as pe +from CPAC.utils.interfaces import Function + def run_HCP_gradient_unwarp(phase_vol, input_coeffs): import os @@ -49,7 +67,7 @@ def run_convertwarp(cw_trilinear, cw_fullWarp_abs): f"--warp1={cw_fullWarp_abs}", "--relout", f"--out={out_file}", - f"--j={jac_out}", + f"--j={out_jac}", ] subprocess.check_output(cmd) @@ -64,7 +82,7 @@ def gradient_distortion_correction(wf, inp_image, name): grad_unwarp_imports = ["import os", "import subprocess"] grad_unwarp = pe.Node( - util.Function( + Function( input_names=["phase_vol", "input_coeffs"], output_names=["trilinear", "abs_fullWarp"], function=run_HCP_gradient_unwarp, @@ -78,7 +96,7 @@ def gradient_distortion_correction(wf, inp_image, name): convertwarp_imports = ["import os", "import subprocess"] convert_warp = pe.Node( - util.Function( + Function( input_names=["cw_trilinear", "cw_fullWarp_abs"], output_names=["out_file_cw", "out_jac_cw"], function=run_convertwarp, @@ -248,8 +266,9 @@ def phase_encode( def z_pad(name="z_pad"): - """Pad in Z by one slice if odd so that topup does not complain - (slice consists of zeros that will be dilated by following step). + """Pad in Z by one slice if odd so that topup does not complain. + + (Slice consists of zeros that will be dilated by following step). """ wf = pe.Workflow(name=name) diff --git a/CPAC/easy_thresh/easy_thresh.py b/CPAC/easy_thresh/easy_thresh.py index d514d51c54..20918c08a9 100644 --- a/CPAC/easy_thresh/easy_thresh.py +++ b/CPAC/easy_thresh/easy_thresh.py @@ -1,3 +1,19 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os import re import subprocess @@ -7,12 +23,11 @@ import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def easy_thresh(wf_name): - """ - Workflow for carrying out cluster-based thresholding - and colour activation overlaying. + """Carry out cluster-based thresholding and colour activation overlaying. Parameters ---------- @@ -213,7 +228,7 @@ def easy_thresh(wf_name): # or qform/sform info) from one image to another geo_imports = ["import subprocess"] copy_geometry = pe.MapNode( - util.Function( + Function( input_names=["infile_a", "infile_b"], output_names=["out_file"], function=copy_geom, @@ -246,7 +261,7 @@ def easy_thresh(wf_name): cluster_imports = ["import os", "import re", "import subprocess"] cluster = pe.MapNode( - util.Function( + Function( input_names=[ "in_file", "volume", @@ -271,7 +286,7 @@ def easy_thresh(wf_name): # create tuple of z_threshold and max intensity value of threshold file create_tuple = pe.MapNode( - util.Function( + Function( input_names=["infile_a", "infile_b"], output_names=["out_file"], function=get_tuple, @@ -299,7 +314,7 @@ def easy_thresh(wf_name): # as FSLDIR,MNI and voxel size get_bg_imports = ["import os", "import nibabel as nib"] get_backgroundimage = pe.MapNode( - util.Function( + Function( input_names=["in_file", "file_parameters"], output_names=["out_file"], function=get_standard_background_img, @@ -312,7 +327,7 @@ def easy_thresh(wf_name): # function node to get the standard fsl brain image # outputs single file get_backgroundimage2 = pe.Node( - util.Function( + Function( input_names=["in_file", "file_parameters"], output_names=["out_file"], function=get_standard_background_img, @@ -412,10 +427,9 @@ def call_cluster(in_file, volume, dlh, threshold, pthreshold, parameters): def copy_geom(infile_a, infile_b): - """ - Method to call fsl fslcpgeom command to copy - certain parts of the header information (image dimensions, - voxel dimensions, voxel dimensions units string, image + """Call fsl fslcpgeom command to copy certain parts of the header information. + + Copy (image dimensions, voxel dimensions, voxel dimensions units string, image orientation/origin or qform/sform info) from one image to another. Parameters @@ -449,9 +463,7 @@ def copy_geom(infile_a, infile_b): def get_standard_background_img(in_file, file_parameters): - """ - Method to get the standard brain image from FSL - standard data directory. + """Get the standard brain image from FSL standard data directory. Parameters ---------- @@ -487,10 +499,7 @@ def get_standard_background_img(in_file, file_parameters): def get_tuple(infile_a, infile_b): - """ - Simple method to return tuple of z_threhsold - maximum intensity values of Zstatistic image - for input to the overlay. + """Return tuple of z_threhsold maximum intensity values of Zstatistic image for input to the overlay. Parameters ---------- diff --git a/CPAC/func_preproc/func_ingress.py b/CPAC/func_preproc/func_ingress.py index 60c8ccf5c9..2105503a19 100644 --- a/CPAC/func_preproc/func_ingress.py +++ b/CPAC/func_preproc/func_ingress.py @@ -14,12 +14,21 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -from CPAC.utils.datasource import create_func_datasource, ingress_func_metadata +"""Ingress functional data for preprocessing.""" + +from CPAC.utils.strategy import Strategy def connect_func_ingress( - workflow, strat_list, c, sub_dict, subject_id, input_creds_path, unique_id=None + workflow, + strat_list: list[Strategy], + c, + sub_dict, + subject_id, + input_creds_path, + unique_id=None, ): + """Connect functional ingress workflow.""" for num_strat, strat in enumerate(strat_list): if "func" in sub_dict: func_paths_dict = sub_dict["func"] @@ -31,7 +40,9 @@ def connect_func_ingress( else: workflow_name = f"func_gather_{unique_id}_{num_strat}" - func_wf = create_func_datasource(func_paths_dict, workflow_name) + func_wf = strat._resource_pool.create_func_datasource( + func_paths_dict, workflow_name + ) func_wf.inputs.inputnode.set( subject=subject_id, @@ -47,8 +58,6 @@ def connect_func_ingress( } ) - (workflow, strat.rpool, diff, blip, fmap_rp_list) = ingress_func_metadata( - workflow, c, strat.rpool, sub_dict, subject_id, input_creds_path, unique_id - ) + diff, blip, fmap_rp_list = strat.rpool.ingress_func_metadata() - return (workflow, diff, blip, fmap_rp_list) + return strat.rpool.wf, diff, blip, fmap_rp_list diff --git a/CPAC/func_preproc/func_motion.py b/CPAC/func_preproc/func_motion.py index 21fdb86a50..dfec8ab91c 100644 --- a/CPAC/func_preproc/func_motion.py +++ b/CPAC/func_preproc/func_motion.py @@ -31,7 +31,7 @@ motion_power_statistics, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.pipeline.schema import valid_options from CPAC.utils.interfaces.function import Function from CPAC.utils.utils import check_prov_for_motion_tool @@ -423,7 +423,7 @@ def get_motion_ref(wf, cfg, strat_pool, pipe_num, opt=None): elif opt == "fmriprep_reference": func_get_RPI = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=estimate_reference_image, @@ -830,7 +830,7 @@ def motion_estimate_filter(wf, cfg, strat_pool, pipe_num, opt=None): notch.inputs.lowpass_cutoff = opt.get("lowpass_cutoff") notch.inputs.filter_order = opt.get("filter_order") - movement_parameters = strat_pool.node_data("desc-movementParameters_motion") + movement_parameters = strat_pool.get_data("desc-movementParameters_motion") wf.connect( movement_parameters.node, movement_parameters.out, notch, "motion_params" ) diff --git a/CPAC/func_preproc/func_preproc.py b/CPAC/func_preproc/func_preproc.py index 4d0fe73c9e..69b856509a 100644 --- a/CPAC/func_preproc/func_preproc.py +++ b/CPAC/func_preproc/func_preproc.py @@ -22,7 +22,8 @@ from CPAC.func_preproc.utils import nullify from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock +from CPAC.utils.interfaces import Function from CPAC.utils.interfaces.ants import ( AI, # niworkflows PrintHeader, @@ -343,7 +344,7 @@ def create_wf_edit_func(wf_name="edit_func"): # allocate a node to check that the requested edits are # reasonable given the data func_get_idx = pe.Node( - util.Function( + Function( input_names=["in_files", "stop_idx", "start_idx"], output_names=["stopidx", "startidx"], function=get_idx, @@ -877,7 +878,7 @@ def form_thr_string(thr): return "-thr %s" % (threshold_z) form_thr_string = pe.Node( - util.Function( + Function( input_names=["thr"], output_names=["out_str"], function=form_thr_string, @@ -992,7 +993,7 @@ def bold_mask_fsl_afni(wf, cfg, strat_pool, pipe_num, opt=None): # and this function has been changed. # CHANGES: - # * Converted from a plain function to a CPAC.pipeline.nodeblock.NodeBlockFunction + # * Converted from a plain function to a CPAC.pipeline.engine.nodeblock.NodeBlockFunction # * Removed Registration version check # * Hardcoded Registration parameters instead of loading epi_atlasbased_brainmask.json # * Uses C-PAC's ``FSL-AFNI-brain-probseg`` template in place of ``templateflow.api.get("MNI152NLin2009cAsym", resolution=1, label="brain", suffix="probseg")`` diff --git a/CPAC/func_preproc/tests/test_preproc_connections.py b/CPAC/func_preproc/tests/test_preproc_connections.py index f58380a7fd..9b7da2ed4c 100644 --- a/CPAC/func_preproc/tests/test_preproc_connections.py +++ b/CPAC/func_preproc/tests/test_preproc_connections.py @@ -36,7 +36,6 @@ ) from CPAC.func_preproc.func_preproc import func_normalize from CPAC.nuisance.nuisance import choose_nuisance_blocks -from CPAC.pipeline.cpac_pipeline import connect_pipeline from CPAC.pipeline.engine import ResourcePool from CPAC.pipeline.nipype_pipeline_engine import Workflow from CPAC.registration.registration import ( @@ -81,7 +80,7 @@ "from-template_to-T1w_mode-image_desc-linear_xfm", ] -NUM_TESTS = 48 # number of parameterizations to run for many-parameter tests +NUM_TESTS = 8 # number of parameterizations to run for many-parameter tests def _filter_assertion_message( @@ -268,7 +267,7 @@ def test_motion_filter_connections( if not rpool.check_rpool("desc-cleaned_bold"): pipeline_blocks += choose_nuisance_blocks(c, generate_only) wf = Workflow(re.sub(r"[\[\]\-\:\_ \'\",]", "", str(rpool))) - connect_pipeline(wf, c, rpool, pipeline_blocks) + rpool.connect_pipeline(wf, c, pipeline_blocks) # Check that filtering is happening as expected filter_switch_key = [ "functional_preproc", diff --git a/CPAC/group_analysis/group_analysis.py b/CPAC/group_analysis/group_analysis.py index d3e78c4698..6da81ff37e 100644 --- a/CPAC/group_analysis/group_analysis.py +++ b/CPAC/group_analysis/group_analysis.py @@ -1,14 +1,29 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . from nipype.interfaces import fsl import nipype.interfaces.utility as util from CPAC.easy_thresh import easy_thresh from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def get_operation(in_file): - """ - Method to create operation string - for fslmaths. + """Create operation string for fslmaths. Parameters ---------- @@ -39,7 +54,9 @@ def get_operation(in_file): def label_zstat_files(zstat_list, con_file): - """Take in the z-stat file outputs of FSL FLAME and rename them after the + """Rename z-stat file outputs from FSL FLAME using contrast labels. + + Take in the z-stat file outputs of FSL FLAME and rename them after the contrast labels of the contrasts provided. """ cons = [] @@ -64,9 +81,7 @@ def label_zstat_files(zstat_list, con_file): def create_fsl_flame_wf(ftest=False, wf_name="groupAnalysis"): - """ - FSL `FEAT `_ - BASED Group Analysis. + """Run FSL `FEAT `_ BASED Group Analysis. Parameters ---------- @@ -313,7 +328,7 @@ def create_fsl_flame_wf(ftest=False, wf_name="groupAnalysis"): # easier interpretation label_zstat_imports = ["import os"] label_zstat = pe.Node( - util.Function( + Function( input_names=["zstat_list", "con_file"], output_names=["new_zstat_list"], function=label_zstat_files, @@ -341,7 +356,7 @@ def create_fsl_flame_wf(ftest=False, wf_name="groupAnalysis"): # function node to get the operation string for fslmaths command get_opstring = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=get_operation ), name="get_opstring", diff --git a/CPAC/longitudinal_pipeline/longitudinal_preproc.py b/CPAC/longitudinal_pipeline/longitudinal_preproc.py index dfead14d59..9fbe31c6b5 100644 --- a/CPAC/longitudinal_pipeline/longitudinal_preproc.py +++ b/CPAC/longitudinal_pipeline/longitudinal_preproc.py @@ -24,9 +24,9 @@ import numpy as np import nibabel as nib from nipype.interfaces import fsl -import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function from CPAC.utils.monitoring import IFLOGGER from CPAC.utils.nifti_utils import nifti_image_input @@ -617,7 +617,7 @@ def subject_specific_template( ] if method == "flirt": template_gen_node = pe.Node( - util.Function( + Function( input_names=[ "input_brain_list", "input_skull_list", diff --git a/CPAC/longitudinal_pipeline/longitudinal_workflow.py b/CPAC/longitudinal_pipeline/longitudinal_workflow.py index 9006f95698..962d444a4e 100644 --- a/CPAC/longitudinal_pipeline/longitudinal_workflow.py +++ b/CPAC/longitudinal_pipeline/longitudinal_workflow.py @@ -21,17 +21,16 @@ import nipype.interfaces.io as nio from indi_aws import aws_utils +from CPAC.func_preproc.func_ingress import connect_func_ingress from CPAC.longitudinal_pipeline.longitudinal_preproc import subject_specific_template from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.cpac_pipeline import ( build_anat_preproc_stack, build_segmentation_stack, build_T1w_registration_stack, - connect_pipeline, - initialize_nipype_wf, ) -from CPAC.pipeline.engine.engine import ingress_output_dir, initiate_rpool -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine import ResourcePool +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration import ( create_fsl_flirt_linear_reg, create_fsl_fnirt_nonlinear_reg, @@ -428,16 +427,13 @@ def anat_longitudinal_wf(subject_id, sub_list, config): except KeyError: input_creds_path = None - workflow = initialize_nipype_wf( - config, - sub_list[0], - # just grab the first one for the name - name="anat_longitudinal_pre-preproc", + rpool = ResourcePool( + cfg=config, + data_paths=session, + pipeline_name="anat_longitudinal_pre-preproc", ) - - workflow, rpool = initiate_rpool(workflow, config, session) pipeline_blocks = build_anat_preproc_stack(rpool, config) - workflow = connect_pipeline(workflow, config, rpool, pipeline_blocks) + workflow = rpool.connect_pipeline(rpool.wf, config, pipeline_blocks) session_wfs[unique_id] = rpool @@ -473,13 +469,6 @@ def anat_longitudinal_wf(subject_id, sub_list, config): ) for strat in strats_brain_dct.keys(): - wf = initialize_nipype_wf( - config, - sub_list[0], - # just grab the first one for the name - name=f"template_node_{strat}", - ) - config.pipeline_setup["pipeline_name"] = f"longitudinal_{orig_pipe_name}" template_node_name = f"longitudinal_anat_template_{strat}" @@ -507,9 +496,9 @@ def anat_longitudinal_wf(subject_id, sub_list, config): template_node.inputs.input_skull_list = strats_head_dct[strat] long_id = f"longitudinal_{subject_id}_strat-{strat}" - - wf, rpool = initiate_rpool(wf, config, part_id=long_id) - + rpool = ResourcePool( + cfg=config, part_id=long_id, pipeline_name=f"template_node_{strat}" + ) rpool.set_data( "space-longitudinal_desc-brain_T1w", template_node, @@ -552,7 +541,7 @@ def anat_longitudinal_wf(subject_id, sub_list, config): pipeline_blocks = build_segmentation_stack(rpool, config, pipeline_blocks) - wf = connect_pipeline(wf, config, rpool, pipeline_blocks) + wf = rpool.connect_pipeline(rpool.wf, config, pipeline_blocks) excl = [ "space-longitudinal_desc-brain_T1w", @@ -574,7 +563,7 @@ def anat_longitudinal_wf(subject_id, sub_list, config): creds_path = session["creds_path"] if creds_path and "none" not in creds_path.lower(): if os.path.exists(creds_path): - input_creds_path = os.path.abspath(creds_path) + session["creds_path"] = os.path.abspath(creds_path) else: err_msg = ( 'Credentials path: "%s" for subject "%s" ' @@ -583,18 +572,14 @@ def anat_longitudinal_wf(subject_id, sub_list, config): ) raise Exception(err_msg) else: - input_creds_path = None + session["creds_path"] = None except KeyError: - input_creds_path = None - - wf = initialize_nipype_wf(config, sub_list[0]) - - wf, rpool = initiate_rpool(wf, config, session) + session["creds_path"] = None config.pipeline_setup["pipeline_name"] = f"longitudinal_{orig_pipe_name}" - rpool = ingress_output_dir( - config, rpool, long_id, creds_path=input_creds_path - ) + rpool = ResourcePool(cfg=config, data_paths=session) + wf = rpool.wf + rpool.ingress_output_dir() select_node_name = f"select_{unique_id}" select_sess = pe.Node( @@ -654,17 +639,14 @@ def anat_longitudinal_wf(subject_id, sub_list, config): input_creds_path = None except KeyError: input_creds_path = None - - wf = initialize_nipype_wf(config, sub_list[0]) - - wf, rpool = initiate_rpool(wf, config, session) - + session["creds_path"] = input_creds_path + rpool = ResourcePool(cfg=config, data_paths=session) pipeline_blocks = [ warp_longitudinal_T1w_to_template, warp_longitudinal_seg_to_T1w, ] - wf = connect_pipeline(wf, config, rpool, pipeline_blocks) + wf = rpool.connect_pipeline(rpool.wf, config, pipeline_blocks) rpool.gather_pipes(wf, config) diff --git a/CPAC/median_angle/median_angle.py b/CPAC/median_angle/median_angle.py index 1433df8ac8..de4fd683cb 100644 --- a/CPAC/median_angle/median_angle.py +++ b/CPAC/median_angle/median_angle.py @@ -1,12 +1,29 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def median_angle_correct(target_angle_deg, realigned_file): - """ - Performs median angle correction on fMRI data. Median angle correction algorithm - based on [1]_. + """Perform median angle correction on fMRI data. + + Median angle correction algorithm based on [1]_. Parameters ---------- @@ -89,8 +106,7 @@ def writeToFile(data, nii, fname): def calc_median_angle_params(subject): - """ - Calculates median angle parameters of a subject. + """Calculate median angle parameters of a subject. Parameters ---------- @@ -133,8 +149,7 @@ def calc_median_angle_params(subject): def calc_target_angle(mean_bolds, median_angles): """ - Calculates a target angle based on median angle parameters of - the group. + Calculate a target angle based on median angle parameters of the group. Parameters ---------- @@ -229,7 +244,7 @@ def create_median_angle_correction(name="median_angle_correction"): ) mac = pe.Node( - util.Function( + Function( input_names=["target_angle_deg", "realigned_file"], output_names=["corrected_file", "angles_file"], function=median_angle_correct, @@ -305,7 +320,7 @@ def create_target_angle(name="target_angle"): ) cmap = pe.MapNode( - util.Function( + Function( input_names=["subject"], output_names=["mean_bold", "median_angle"], function=calc_median_angle_params, @@ -315,7 +330,7 @@ def create_target_angle(name="target_angle"): ) cta = pe.Node( - util.Function( + Function( input_names=["mean_bolds", "median_angles"], output_names=["target_angle"], function=calc_target_angle, diff --git a/CPAC/network_centrality/pipeline.py b/CPAC/network_centrality/pipeline.py index e486f8eff0..407489fd9f 100644 --- a/CPAC/network_centrality/pipeline.py +++ b/CPAC/network_centrality/pipeline.py @@ -19,7 +19,7 @@ from CPAC.network_centrality.network_centrality import create_centrality_wf from CPAC.network_centrality.utils import check_centrality_params, create_merge_node from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.pipeline.schema import valid_options diff --git a/CPAC/nuisance/nuisance.py b/CPAC/nuisance/nuisance.py index 3c990e1de1..04807755b7 100644 --- a/CPAC/nuisance/nuisance.py +++ b/CPAC/nuisance/nuisance.py @@ -37,8 +37,8 @@ TR_string_to_float, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.engine.resource_pool import ResourcePool -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock +from CPAC.pipeline.engine.resource import StratPool from CPAC.registration.registration import ( apply_transform, warp_timeseries_to_EPItemplate, @@ -125,7 +125,7 @@ def form_mask_erosion_prop(erosion_prop): ] eroded_mask = pe.Node( - util.Function( + Function( input_names=[ "roi_mask", "skullstrip_mask", @@ -156,7 +156,7 @@ def form_mask_erosion_prop(erosion_prop): wf.connect(eroded_mask, "output_roi_mask", outputspec, "eroded_mask") if segmentmap: erosion_segmentmap = pe.Node( - util.Function( + Function( input_names=["roi_mask", "erosion_mm", "erosion_prop"], output_names=["eroded_roi_mask"], function=erosion, @@ -1357,7 +1357,7 @@ def create_regressor_workflow( ] cosfilter_node = pe.Node( - util.Function( + Function( input_names=["input_image_path", "timestep"], output_names=["cosfiltered_img"], function=cosine_filter, @@ -1374,7 +1374,7 @@ def create_regressor_workflow( "input_image_path", ) tr_string2float_node = pe.Node( - util.Function( + Function( input_names=["tr"], output_names=["tr_float"], function=TR_string_to_float, @@ -1887,7 +1887,7 @@ def filtering_bold_and_regressors( bandpass_ts.inputs.outputtype = "NIFTI_GZ" tr_string2float_node = pe.Node( - util.Function( + Function( input_names=["tr"], output_names=["tr_float"], function=TR_string_to_float, @@ -2363,7 +2363,7 @@ def erode_mask_WM(wf, cfg, strat_pool, pipe_num, opt=None): outputs=["desc-confounds_timeseries", "censor-indices"], ) def nuisance_regressors_generation_EPItemplate(wf, cfg, strat_pool, pipe_num, opt=None): - return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, opt, "bold") + return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, "bold") @nodeblock( @@ -2407,39 +2407,22 @@ def nuisance_regressors_generation_EPItemplate(wf, cfg, strat_pool, pipe_num, op outputs=["desc-confounds_timeseries", "censor-indices"], ) def nuisance_regressors_generation_T1w(wf, cfg, strat_pool, pipe_num, opt=None): - return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, opt, "T1w") + return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, "T1w") def nuisance_regressors_generation( wf: Workflow, cfg: Configuration, - strat_pool: ResourcePool, + strat_pool: StratPool, pipe_num: int, - opt: dict, space: Literal["T1w", "bold"], ) -> tuple[Workflow, dict]: - """ - Parameters - ---------- - wf : ~nipype.pipeline.engine.workflows.Workflow - - cfg : ~CPAC.utils.configuration.Configuration - - strat_pool : ~CPAC.pipeline.engine.ResourcePool - - pipe_num : int - - opt : dict - - space : str - T1w or bold - - Returns - ------- - wf : nipype.pipeline.engine.workflows.Workflow - - outputs : dict - """ + """Generate nuisance regressors.""" + try: + opt = strat_pool.regressor_dct + except LookupError: + # no regressors to generate + return wf, {} prefixes = [f"space-{space}_"] * 2 reg_tool = None if space == "T1w": @@ -2663,7 +2646,7 @@ def nuisance_regressors_generation( return (wf, outputs) -def nuisance_regression(wf, cfg, strat_pool, pipe_num, opt, space, res=None): +def nuisance_regression(wf, cfg, strat_pool: StratPool, pipe_num, opt, space, res=None): """Nuisance regression in native (BOLD) or template space. Parameters @@ -2680,7 +2663,11 @@ def nuisance_regression(wf, cfg, strat_pool, pipe_num, opt, space, res=None): outputs : dict """ - opt = strat_pool.regressor_dct(cfg) + try: + opt = strat_pool.regressor_dct + except LookupError: + # no regressors + return wf, {} bandpass = "Bandpass" in opt bandpass_before = ( bandpass diff --git a/CPAC/nuisance/utils/utils.py b/CPAC/nuisance/utils/utils.py index 92499523a8..db6667dcb3 100644 --- a/CPAC/nuisance/utils/utils.py +++ b/CPAC/nuisance/utils/utils.py @@ -499,7 +499,7 @@ def generate_summarize_tissue_mask_ventricles_masking( # generate inverse transform flags, which depends on the number of transforms inverse_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, diff --git a/CPAC/pipeline/cpac_pipeline.py b/CPAC/pipeline/cpac_pipeline.py index 029bc1984e..b6300f9aa8 100644 --- a/CPAC/pipeline/cpac_pipeline.py +++ b/CPAC/pipeline/cpac_pipeline.py @@ -25,12 +25,16 @@ import sys import time from time import strftime +from typing import Any import yaml -import nipype +import nipype # type: ignore [import-untyped] from nipype import config, logging -from flowdump import save_workflow_json, WorkflowJSONMeta -from indi_aws import aws_utils, fetch_creds +from flowdump import ( # type: ignore [import-untyped] + save_workflow_json, + WorkflowJSONMeta, +) +from indi_aws import aws_utils, fetch_creds # type: ignore [import-untyped] import CPAC from CPAC.alff.alff import alff_falff, alff_falff_space_template @@ -126,11 +130,8 @@ ingress_regressors, nuisance_regression_template, ) - -# pylint: disable=wrong-import-order -from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.check_outputs import check_outputs -from CPAC.pipeline.engine.engine import initiate_rpool, NodeBlock +from CPAC.pipeline.engine import ResourcePool from CPAC.pipeline.nipype_pipeline_engine.plugins import ( LegacyMultiProcPlugin, MultiProcPlugin, @@ -197,16 +198,14 @@ from CPAC.utils.docs import version_report from CPAC.utils.monitoring import ( FMLOGGER, - getLogger, log_nodes_cb, log_nodes_initial, - LOGTAIL, set_up_logger, - WARNING_FREESURFER_OFF_WITH_DATA, WFLOGGER, ) from CPAC.utils.monitoring.draw_gantt_chart import resource_report from CPAC.utils.trimmer import the_trimmer +from CPAC.utils.typing import SUB_GROUP from CPAC.utils.utils import ( check_config_resources, check_system_deps, @@ -221,7 +220,7 @@ def run_workflow( - sub_group, + sub_group: SUB_GROUP, c, run, pipeline_timing_info=None, @@ -422,9 +421,10 @@ def run_workflow( license_notice=CPAC.license_notice.replace("\n", "\n "), ), ) - subject_info = {} + subject_info: dict[str, Any] = {} subject_info["subject_id"] = subject_id subject_info["start_time"] = pipeline_start_time + check_centrality_degree = c.network_centrality["run"] and ( len(c.network_centrality["degree_centrality"]["weight_options"]) != 0 or len(c.network_centrality["eigenvector_centrality"]["weight_options"]) != 0 @@ -547,7 +547,9 @@ def run_workflow( workflow, _ = the_trimmer( workflow, output_dir=c.pipeline_setup["output_directory"]["path"], - s3_creds_path=input_creds_path, + s3_creds_path=c.pipeline_setup["Amazon-AWS"][ + "aws_output_bucket_credentials" + ], ) pipeline_start_datetime = strftime("%Y-%m-%d %H:%M:%S") @@ -559,7 +561,7 @@ def run_workflow( # for strat_no, strat in enumerate(strat_list): # strat_label = 'strat_%d' % strat_no - # subject_info[strat_label] = strat.get_name() + # subject_info[strat_label] = strat.name # subject_info['resource_pool'].append(strat.get_resource_pool()) subject_info["status"] = "Running" @@ -709,21 +711,24 @@ def run_workflow( ] timeHeader = dict(zip(gpaTimeFields, gpaTimeFields)) - with open( - os.path.join( - c.pipeline_setup["log_directory"]["path"], - "cpac_individual_timing" - f"_{c.pipeline_setup['pipeline_name']}.csv", - ), - "a", - ) as timeCSV, open( - os.path.join( - c.pipeline_setup["log_directory"]["path"], - "cpac_individual_timing_%s.csv" - % c.pipeline_setup["pipeline_name"], - ), - "r", - ) as readTimeCSV: + with ( + open( + os.path.join( + c.pipeline_setup["log_directory"]["path"], + "cpac_individual_timing" + f"_{c.pipeline_setup['pipeline_name']}.csv", + ), + "a", + ) as timeCSV, + open( + os.path.join( + c.pipeline_setup["log_directory"]["path"], + "cpac_individual_timing_%s.csv" + % c.pipeline_setup["pipeline_name"], + ), + "r", + ) as readTimeCSV, + ): timeWriter = csv.DictWriter(timeCSV, fieldnames=gpaTimeFields) timeReader = csv.DictReader(readTimeCSV) @@ -750,7 +755,10 @@ def run_workflow( os.path.basename(log_dir), ) bucket_name = c.pipeline_setup["output_directory"]["path"].split("/")[2] - bucket = fetch_creds.return_bucket(creds_path, bucket_name) + bucket = fetch_creds.return_bucket( + c.pipeline_setup["Amazon-AWS"]["aws_output_bucket_credentials"], + bucket_name, + ) # Collect local log files local_log_files = [] @@ -852,22 +860,6 @@ def remove_workdir(wdpath: str) -> None: FMLOGGER.warning("Could not remove working directory %s", wdpath) -def initialize_nipype_wf(cfg, sub_data_dct, name=""): - """Initialize a new nipype workflow.""" - if name: - name = f"_{name}" - - workflow_name = f"cpac{name}_{sub_data_dct[0][0]}_{sub_data_dct[0][1]}" - wf = pe.Workflow(name=workflow_name) - wf.base_dir = cfg.pipeline_setup["working_directory"]["path"] - wf.config["execution"] = { - "hash_method": "timestamp", - "crashdump_dir": os.path.abspath(cfg.pipeline_setup["log_directory"]["path"]), - } - - return wf - - def load_cpac_pipe_config(pipe_config): """Load in pipeline config file.""" config_file = os.path.realpath(pipe_config) @@ -1125,99 +1117,10 @@ def build_segmentation_stack(rpool, cfg, pipeline_blocks=None): return pipeline_blocks -def list_blocks(pipeline_blocks, indent=None): - """List node blocks line by line. - - Parameters - ---------- - pipeline_blocks : list or tuple - - indent : int or None - number of spaces after a tab indent - - Returns - ------- - str - """ - blockstring = yaml.dump( - [ - getattr( - block, - "__name__", - getattr( - block, - "name", - yaml.safe_load(list_blocks(list(block))) - if isinstance(block, (tuple, list, set)) - else str(block), - ), - ) - for block in pipeline_blocks - ] - ) - if isinstance(indent, int): - blockstring = "\n".join( - [ - "\t" + " " * indent + line.replace("- - ", "- ") - for line in blockstring.split("\n") - ] - ) - return blockstring - - -def connect_pipeline(wf, cfg, rpool, pipeline_blocks): - """Connect the pipeline blocks to the workflow.""" - WFLOGGER.info( - "Connecting pipeline blocks:\n%s", list_blocks(pipeline_blocks, indent=1) - ) - - previous_nb = None - for block in pipeline_blocks: - try: - nb = NodeBlock(block, debug=cfg["pipeline_setup", "Debugging", "verbose"]) - wf = nb.connect_block(wf, cfg, rpool) - except LookupError as e: - if nb.name == "freesurfer_postproc": - WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) - LOGTAIL["warnings"].append(WARNING_FREESURFER_OFF_WITH_DATA) - continue - previous_nb_str = ( - (f"after node block '{previous_nb.get_name()}':") - if previous_nb - else "at beginning:" - ) - # Alert user to block that raises error - if isinstance(block, list): - node_block_names = str([NodeBlock(b).get_name() for b in block]) - e.args = ( - f"When trying to connect one of the node blocks " - f"{node_block_names} " - f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", - ) - else: - node_block_names = NodeBlock(block).get_name() - e.args = ( - f"When trying to connect node block " - f"'{node_block_names}' " - f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", - ) - if cfg.pipeline_setup["Debugging"]["verbose"]: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug(e.args[0]) - verbose_logger.debug(rpool) - raise - previous_nb = nb - - return wf - - -def build_workflow(subject_id, sub_group, cfg, pipeline_name=None): +def build_workflow(subject_id, sub_group: SUB_GROUP, cfg, pipeline_name=None): """Build a C-PAC workflow for a single subject.""" from CPAC.utils.datasource import gather_extraction_maps - # Workflow setup - wf = initialize_nipype_wf(cfg, sub_group, name=pipeline_name) - # Extract credentials path if it exists # try: # creds_path = sub_group["creds_path"] @@ -1241,8 +1144,7 @@ def build_workflow(subject_id, sub_group, cfg, pipeline_name=None): # PREPROCESSING # """"""""""""""""""""""""""""""""""""""""""""""""""" - wf, rpool = initiate_rpool(wf, cfg, sub_group) - + rpool = ResourcePool(cfg=cfg, data_paths=sub_group, pipeline_name=pipeline_name) pipeline_blocks = build_anat_preproc_stack(rpool, cfg) # Anatomical to T1 template registration @@ -1287,7 +1189,7 @@ def build_workflow(subject_id, sub_group, cfg, pipeline_name=None): # Distortion/Susceptibility Correction distcor_blocks = [] if "fmap" in sub_group[0]: - fmap_keys = sub_group[1]["ent__suffix"].values + fmap_keys = sub_group[1]["suffix"].values if "phasediff" in fmap_keys or "phase1" in fmap_keys: if "magnitude" in fmap_keys or "magnitude1" in fmap_keys: distcor_blocks.append(distcor_phasediff_fsl_fugue) @@ -1434,7 +1336,7 @@ def build_workflow(subject_id, sub_group, cfg, pipeline_name=None): if rpool.check_rpool(func): apply_func_warp["T1"] = False - target_space_nuis = cfg.nuisance_corrections["2-nuisance_regression"]["space"] + # target_space_nuis = cfg.nuisance_corrections["2-nuisance_regression"]["space"] target_space_alff = cfg.amplitude_low_frequency_fluctuation["target_space"] target_space_reho = cfg.regional_homogeneity["target_space"] @@ -1609,7 +1511,7 @@ def build_workflow(subject_id, sub_group, cfg, pipeline_name=None): # Connect the entire pipeline! try: - wf = connect_pipeline(wf, cfg, rpool, pipeline_blocks) + wf = rpool.connect_pipeline(rpool.wf, cfg, pipeline_blocks) except LookupError as lookup_error: missing_key = None errorstrings = [arg for arg in lookup_error.args[0].split("\n") if arg.strip()] diff --git a/CPAC/pipeline/cpac_runner.py b/CPAC/pipeline/cpac_runner.py index 375f1d2027..5c3ab82890 100644 --- a/CPAC/pipeline/cpac_runner.py +++ b/CPAC/pipeline/cpac_runner.py @@ -19,6 +19,7 @@ from time import strftime import warnings +from pandas.core.groupby import DataFrameGroupBy from voluptuous.error import Invalid import yaml @@ -261,7 +262,7 @@ def run_T1w_longitudinal(sublist, cfg): def run( - bids_table, + bids_table: DataFrameGroupBy, config_file=None, p_name=None, plugin=None, diff --git a/CPAC/pipeline/engine/__init__.py b/CPAC/pipeline/engine/__init__.py index e69de29bb2..534c9f7450 100644 --- a/CPAC/pipeline/engine/__init__.py +++ b/CPAC/pipeline/engine/__init__.py @@ -0,0 +1,26 @@ +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""C-PAC engine.""" + +from .nodeblock import NodeBlock +from .resource import ResourcePool, StratPool + +__all__ = [ + "NodeBlock", + "ResourcePool", + "StratPool", +] diff --git a/CPAC/pipeline/engine/engine.py b/CPAC/pipeline/engine/engine.py deleted file mode 100644 index a6cd44024a..0000000000 --- a/CPAC/pipeline/engine/engine.py +++ /dev/null @@ -1,1458 +0,0 @@ -# Copyright (C) 2021-2024 C-PAC Developers - -# This file is part of C-PAC. - -# C-PAC is free software: you can redistribute it and/or modify it under -# the terms of the GNU Lesser General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. - -# C-PAC is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public -# License along with C-PAC. If not, see . -import ast -import copy -import hashlib -from itertools import chain -import json -import logging -import os -import re -from typing import Optional -import warnings -import pandas as pd - - -from nipype import config -from nipype.interfaces.utility import Rename - -from CPAC.image_utils.spatial_smoothing import spatial_smoothing -from CPAC.image_utils.statistical_transforms import ( - fisher_z_score_standardize, - z_score_standardize, -) -from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.check_outputs import ExpectedOutputs -from CPAC.pipeline.nodeblock import NodeBlockFunction -from CPAC.pipeline.utils import MOVEMENT_FILTER_KEYS, name_fork, source_set -from CPAC.registration.registration import transform_derivative -from CPAC.resources.templates.lookup_table import lookup_identifier -from CPAC.utils.bids_utils import res_in_filename -from CPAC.utils.configuration import Configuration -from CPAC.utils.datasource import ( - create_anat_datasource, - create_func_datasource, - create_general_datasource, - ingress_func_metadata, - resolve_resolution, -) -from CPAC.utils.interfaces.datasink import DataSink -from CPAC.utils.interfaces.function import Function -from CPAC.utils.monitoring import ( - getLogger, - LOGTAIL, - WARNING_FREESURFER_OFF_WITH_DATA, - WFLOGGER, -) -from CPAC.utils.outputs import Outputs -from CPAC.utils.utils import ( - check_prov_for_regtool, - create_id_string, - get_last_prov_entry, - read_json, - write_output_json, -) - - - -class NodeBlock: - def __init__(self, node_block_functions, debug=False): - if not isinstance(node_block_functions, list): - node_block_functions = [node_block_functions] - - self.node_blocks = {} - - for node_block_function in node_block_functions: # <---- sets up the NodeBlock object in case you gave it a list of node blocks instead of a single one - for option forking. - self.input_interface = [] - if isinstance(node_block_function, tuple): - self.input_interface = node_block_function[1] - node_block_function = node_block_function[0] - if not isinstance(self.input_interface, list): - self.input_interface = [self.input_interface] - - if not isinstance(node_block_function, NodeBlockFunction): - # If the object is a plain function `__name__` will be more useful than `str()` - obj_str = ( - node_block_function.__name__ - if hasattr(node_block_function, "__name__") - else str(node_block_function) - ) - msg = f'Object is not a nodeblock: "{obj_str}"' - raise TypeError(msg) - - name = node_block_function.name - self.name = name - self.node_blocks[name] = {} - - if self.input_interface: - for interface in self.input_interface: - for orig_input in node_block_function.inputs: - if isinstance(orig_input, tuple): - list_tup = list(orig_input) - if interface[0] in list_tup: - list_tup.remove(interface[0]) - list_tup.append(interface[1]) - node_block_function.inputs.remove(orig_input) - node_block_function.inputs.append(tuple(list_tup)) - elif orig_input == interface[0]: - node_block_function.inputs.remove(interface[0]) - node_block_function.inputs.append(interface[1]) - - for key, val in node_block_function.legacy_nodeblock_dict().items(): - self.node_blocks[name][key] = val - - self.node_blocks[name]["block_function"] = node_block_function - - # TODO: fix/replace below - self.outputs = {} - for out in node_block_function.outputs: - self.outputs[out] = None - - self.options = ["base"] - if node_block_function.outputs is not None: - self.options = node_block_function.outputs - - WFLOGGER.info("Connecting %s...", name) - if debug: - config.update_config({"logging": {"workflow_level": "DEBUG"}}) - logging.update_logging(config) - WFLOGGER.debug( - '"inputs": %s\n\t "outputs": %s%s', - node_block_function.inputs, - list(self.outputs.keys()), - f'\n\t"options": {self.options}' - if self.options != ["base"] - else "", - ) - config.update_config({"logging": {"workflow_level": "INFO"}}) - logging.update_logging(config) - - def get_name(self): - return self.name - - def check_null(self, val): - if isinstance(val, str): - val = None if val.lower() == "none" else val - return val - - def check_output(self, outputs, label, name): - if label not in outputs: - msg = ( - f'\n[!] Output name "{label}" in the block ' - "function does not match the outputs list " - f'{outputs} in Node Block "{name}"\n' - ) - raise NameError(msg) - - def grab_tiered_dct(self, cfg, key_list): - cfg_dct = cfg.dict() - for key in key_list: - try: - cfg_dct = cfg_dct.get(key, {}) - except KeyError as ke: - msg = "[!] The config provided to the node block is not valid" - raise KeyError(msg) from ke - return cfg_dct - - def connect_block(self, wf, cfg, rpool): - debug = cfg.pipeline_setup["Debugging"]["verbose"] - all_opts = [] - for name, block_dct in self.node_blocks.items(): - opts = [] - config = self.check_null(block_dct["config"]) - option_key = self.check_null(block_dct["option_key"]) - option_val = self.check_null(block_dct["option_val"]) - if option_key and option_val: - if not isinstance(option_key, list): - option_key = [option_key] - if not isinstance(option_val, list): - option_val = [option_val] - if config: - key_list = config + option_key - else: - key_list = option_key - if "USER-DEFINED" in option_val: - # load custom config data into each 'opt' - opts = self.grab_tiered_dct(cfg, key_list) - else: - for option in option_val: - try: - if option in self.grab_tiered_dct(cfg, key_list): - # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list - opts.append(option) - except AttributeError as err: - msg = f"{err}\nNode Block: {name}" - raise Exception(msg) - - if opts is None: - opts = [opts] - - elif option_key and not option_val: - # enables multiple config forking entries - if not isinstance(option_key[0], list): - msg = ( - f"[!] The option_key field ({option_key}) " - f"for {name} exists but there is no " - "option_val.\n\nIf you are trying to " - "populate multiple option keys, the " - "option_val field must contain a list of " - "a list.\n" - ) - raise ValueError(msg) - for option_config in option_key: - # option_config is a list of pipe config levels down to the option - if config: - key_list = config + option_config - else: - key_list = option_config - option_val = option_config[-1] - if option_val in self.grab_tiered_dct(cfg, key_list[:-1]): - opts.append(option_val) - else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! - opts = [None] - all_opts += opts - - sidecar_additions = { - "CpacConfigHash": hashlib.sha1( - json.dumps(cfg.dict(), sort_keys=True).encode("utf-8") - ).hexdigest(), - "CpacConfig": cfg.dict(), - } - - if cfg["pipeline_setup"]["output_directory"].get("user_defined"): - sidecar_additions["UserDefined"] = cfg["pipeline_setup"][ - "output_directory" - ]["user_defined"] - - for name, block_dct in self.node_blocks.items(): - # iterates over either the single node block in the sequence, or a list of node blocks within the list of node blocks, i.e. for option forking. - switch = self.check_null(block_dct["switch"]) - config = self.check_null(block_dct["config"]) - option_key = self.check_null(block_dct["option_key"]) - option_val = self.check_null(block_dct["option_val"]) - inputs = self.check_null(block_dct["inputs"]) - outputs = self.check_null(block_dct["outputs"]) - - block_function = block_dct["block_function"] - - opts = [] - if option_key and option_val: - if not isinstance(option_key, list): - option_key = [option_key] - if not isinstance(option_val, list): - option_val = [option_val] - if config: - key_list = config + option_key - else: - key_list = option_key - if "USER-DEFINED" in option_val: - # load custom config data into each 'opt' - opts = self.grab_tiered_dct(cfg, key_list) - else: - for option in option_val: - if option in self.grab_tiered_dct(cfg, key_list): - # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list - opts.append(option) - else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! - opts = [None] - # THIS ALSO MEANS the multiple option-val's in docstring node blocks can be entered once in the entire node-block sequence, not in a list of multiples - if not opts: - # for node blocks where the options are split into different - # block functions - opts will be empty for non-selected - # options, and would waste the get_strats effort below - continue - - if not switch: - switch = [True] - else: - if config: - try: - key_list = config + switch - except TypeError as te: - msg = ( - "\n\n[!] Developer info: Docstring error " - f"for {name}, make sure the 'config' or " - "'switch' fields are lists.\n\n" - ) - raise TypeError(msg) from te - switch = self.grab_tiered_dct(cfg, key_list) - elif isinstance(switch[0], list): - # we have multiple switches, which is designed to only work if - # config is set to "None" - switch_list = [] - for key_list in switch: - val = self.grab_tiered_dct(cfg, key_list) - if isinstance(val, list): - # fork switches - if True in val: - switch_list.append(True) - if False in val: - switch_list.append(False) - else: - switch_list.append(val) - if False in switch_list: - switch = [False] - else: - switch = [True] - else: - # if config is set to "None" - key_list = switch - switch = self.grab_tiered_dct(cfg, key_list) - if not isinstance(switch, list): - switch = [switch] - if True in switch: - for ( - pipe_idx, - strat_pool, # strat_pool is a ResourcePool like {'desc-preproc_T1w': { 'json': info, 'data': (node, out) }, 'desc-brain_mask': etc.} - ) in rpool.get_strats(inputs, debug).items(): - # keep in mind rpool.get_strats(inputs) = {pipe_idx1: {'desc-preproc_T1w': etc.}, pipe_idx2: {..} } - fork = False in switch - for opt in opts: # it's a dictionary of ResourcePools called strat_pools, except those sub-ResourcePools only have one level! no pipe_idx strat keys. - # remember, you can get 'data' or 'json' from strat_pool with member functions - # strat_pool has all of the JSON information of all the inputs! - # so when we set_data below for the TOP-LEVEL MAIN RPOOL (not the strat_pool), we can generate new merged JSON information for each output. - # particularly, our custom 'CpacProvenance' field. - node_name = name - pipe_x = rpool.get_pipe_number(pipe_idx) - - replaced_inputs = [] - for interface in self.input_interface: - if isinstance(interface[1], list): - for input_name in interface[1]: - if strat_pool.check_rpool(input_name): - break - else: - input_name = interface[1] - strat_pool.copy_resource(input_name, interface[0]) - replaced_inputs.append(interface[0]) - try: - wf, outs = block_function(wf, cfg, strat_pool, pipe_x, opt) - except IOError as e: # duplicate node - WFLOGGER.warning(e) - continue - - if not outs: - if block_function.__name__ == "freesurfer_postproc": - WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) - LOGTAIL["warnings"].append( - WARNING_FREESURFER_OFF_WITH_DATA - ) - continue - - if opt and len(option_val) > 1: - node_name = f"{node_name}_{opt}" - elif opt and "USER-DEFINED" in option_val: - node_name = f'{node_name}_{opt["Name"]}' - - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("\n=======================") - verbose_logger.debug("Node name: %s", node_name) - prov_dct = rpool.get_resource_strats_from_prov( - ast.literal_eval(pipe_idx) - ) - for key, val in prov_dct.items(): - verbose_logger.debug("-------------------") - verbose_logger.debug("Input - %s:", key) - sub_prov_dct = rpool.get_resource_strats_from_prov(val) - for sub_key, sub_val in sub_prov_dct.items(): - sub_sub_dct = rpool.get_resource_strats_from_prov( - sub_val - ) - verbose_logger.debug(" sub-input - %s:", sub_key) - verbose_logger.debug(" prov = %s", sub_val) - verbose_logger.debug( - " sub_sub_inputs = %s", sub_sub_dct.keys() - ) - - for label, connection in outs.items(): - self.check_output(outputs, label, name) - new_json_info = copy.deepcopy(strat_pool.get("json")) - - # transfer over data-specific json info - # for example, if the input data json is _bold and the output is also _bold - data_type = label.split("_")[-1] - if data_type in new_json_info["subjson"]: - if ( - "SkullStripped" - in new_json_info["subjson"][data_type] - ): - new_json_info["SkullStripped"] = new_json_info[ - "subjson" - ][data_type]["SkullStripped"] - - # determine sources for the outputs, i.e. all input data into the node block - new_json_info["Sources"] = [ - x - for x in strat_pool.get_entire_rpool() - if x != "json" and x not in replaced_inputs - ] - - if isinstance(outputs, dict): - new_json_info.update(outputs[label]) - if "Description" not in outputs[label]: - # don't propagate old Description - try: - del new_json_info["Description"] - except KeyError: - pass - if "Template" in outputs[label]: - template_key = outputs[label]["Template"] - if template_key in new_json_info["Sources"]: - # only if the pipeline config template key is entered as the 'Template' field - # otherwise, skip this and take in the literal 'Template' string - try: - new_json_info["Template"] = new_json_info[ - "subjson" - ][template_key]["Description"] - except KeyError: - pass - try: - new_json_info["Resolution"] = new_json_info[ - "subjson" - ][template_key]["Resolution"] - except KeyError: - pass - else: - # don't propagate old Description - try: - del new_json_info["Description"] - except KeyError: - pass - - if "Description" in new_json_info: - new_json_info["Description"] = " ".join( - new_json_info["Description"].split() - ) - - for sidecar_key, sidecar_value in sidecar_additions.items(): - if sidecar_key not in new_json_info: - new_json_info[sidecar_key] = sidecar_value - - try: - del new_json_info["subjson"] - except KeyError: - pass - - if fork or len(opts) > 1 or len(all_opts) > 1: - if "CpacVariant" not in new_json_info: - new_json_info["CpacVariant"] = {} - raw_label = rpool.get_raw_label(label) - if raw_label not in new_json_info["CpacVariant"]: - new_json_info["CpacVariant"][raw_label] = [] - new_json_info["CpacVariant"][raw_label].append( - node_name - ) - - rpool.set_data( - label, - connection[0], - connection[1], - new_json_info, - pipe_idx, - node_name, - fork, - ) - - wf, post_labels = rpool.post_process( - wf, - label, - connection, - new_json_info, - pipe_idx, - pipe_x, - outs, - ) - - if rpool.func_reg: - for postlabel in post_labels: - connection = (postlabel[1], postlabel[2]) - wf = rpool.derivative_xfm( - wf, - postlabel[0], - connection, - new_json_info, - pipe_idx, - pipe_x, - ) - return wf - - -def wrap_block(node_blocks, interface, wf, cfg, strat_pool, pipe_num, opt): - """Wrap a list of node block functions to use within other node blocks. - - Example usage: - - # This calls the 'bold_mask_afni' and 'bold_masking' node blocks to - # skull-strip an EPI field map, without having to invoke the NodeBlock - # connection system. - - # The interface dictionary tells wrap_block to set the EPI field map - # in the parent node block's throw-away strat_pool as 'bold', so that - # the 'bold_mask_afni' and 'bold_masking' node blocks will see that as - # the 'bold' input. - - # It also tells wrap_block to set the 'desc-brain_bold' output of - # the 'bold_masking' node block to 'opposite_pe_epi_brain' (what it - # actually is) in the parent node block's strat_pool, which gets - # returned. - - # Note 'bold' and 'desc-brain_bold' (all on the left side) are the - # labels that 'bold_mask_afni' and 'bold_masking' understand/expect - # through their interfaces and docstrings. - - # The right-hand side (the values of the 'interface' dictionary) are - # what 'make sense' within the current parent node block - in this - # case, the distortion correction node block dealing with field maps. - - interface = {'bold': (match_epi_fmaps_node, 'opposite_pe_epi'), - 'desc-brain_bold': 'opposite_pe_epi_brain'} - wf, strat_pool = wrap_block([bold_mask_afni, bold_masking], - interface, wf, cfg, strat_pool, - pipe_num, opt) - - ...further downstream in the parent node block: - - node, out = strat_pool.get_data('opposite_pe_epi_brain') - - # The above line will connect the output of the 'bold_masking' node - # block (which is the skull-stripped version of 'opposite_pe_epi') to - # the next node. - - """ - for block in node_blocks: - # new_pool = copy.deepcopy(strat_pool) - for in_resource, val in interface.items(): - if isinstance(val, tuple): - strat_pool.set_data( - in_resource, val[0], val[1], {}, "", "", fork=True - ) # - if "sub_num" not in strat_pool.get_pool_info(): - strat_pool.set_pool_info({"sub_num": 0}) - sub_num = strat_pool.get_pool_info()["sub_num"] - - wf, outputs = block(wf, cfg, strat_pool, f"{pipe_num}-{sub_num}", opt) # - for out, val in outputs.items(): - if out in interface and isinstance(interface[out], str): - strat_pool.set_data( - interface[out], outputs[out][0], outputs[out][1], {}, "", "" - ) - else: - strat_pool.set_data(out, outputs[out][0], outputs[out][1], {}, "", "") - sub_num += 1 - strat_pool.set_pool_info({"sub_num": sub_num}) - - return (wf, strat_pool) - - -def ingress_raw_anat_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): - if "anat" not in data_paths[1]["ent__datatype"].values: - WFLOGGER.warning("No anatomical data present.") - return rpool - - # if "creds_path" not in data_paths: - # data_paths["creds_path"] = None - - anat_flow = create_anat_datasource(f"anat_T1w_gather_{part_id}_{ses_id}") - - anat = {} - anat_data = data_paths[1].loc[data_paths[1]["ent__datatype"] == "anat"] - if "T1w" in anat_data["ent__suffix"].values: - anat["T1"] = anat_data["finfo__file_path"].values[0] - - # if isinstance(data_paths["anat"], str): - # anat["T1"] = data_paths["anat"] - # elif "T1w" in data_paths["anat"]: - # anat["T1"] = data_paths["anat"]["T1w"] - - if "T1" in anat: - anat_flow.inputs.inputnode.set( - subject=part_id, - anat=anat["T1"], - creds_path=None, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - img_type="anat", - ) - rpool.set_data("T1w", anat_flow, "outputspec.anat", {}, "", "anat_ingress") - - # if "T2w" in data_paths["anat"]: - # anat_flow_T2 = create_anat_datasource(f"anat_T2w_gather_{part_id}_{ses_id}") - # anat_flow_T2.inputs.inputnode.set( - # subject=part_id, - # anat=data_paths["anat"]["T2w"], - # creds_path=data_paths["creds_path"], - # dl_dir=cfg.pipeline_setup["working_directory"]["path"], - # img_type="anat", - # ) - # rpool.set_data("T2w", anat_flow_T2, "outputspec.anat", {}, "", "anat_ingress") - - if cfg.surface_analysis["freesurfer"]["ingress_reconall"]: - rpool = ingress_freesurfer( - wf, rpool, cfg, data_paths, unique_id, part_id, ses_id - ) - - return rpool - - -def ingress_freesurfer(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): - try: - fs_path = os.path.join(cfg.pipeline_setup["freesurfer_dir"], part_id) - except KeyError: - WFLOGGER.warning("No FreeSurfer data present.") - return rpool - - # fs_path = os.path.join(cfg.pipeline_setup['freesurfer_dir'], part_id) - if not os.path.exists(fs_path): - if "sub" in part_id: - fs_path = os.path.join( - cfg.pipeline_setup["freesurfer_dir"], part_id.replace("sub-", "") - ) - else: - fs_path = os.path.join( - cfg.pipeline_setup["freesurfer_dir"], ("sub-" + part_id) - ) - - # patch for flo-specific data - if not os.path.exists(fs_path): - subj_ses = part_id + "-" + ses_id - fs_path = os.path.join(cfg.pipeline_setup["freesurfer_dir"], subj_ses) - if not os.path.exists(fs_path): - WFLOGGER.info("No FreeSurfer data found for subject %s", part_id) - return rpool - - # Check for double nested subj names - if os.path.exists(os.path.join(fs_path, os.path.basename(fs_path))): - fs_path = os.path.join(fs_path, part_id) - - fs_ingress = create_general_datasource("gather_freesurfer_dir") - fs_ingress.inputs.inputnode.set( - unique_id=unique_id, - data=fs_path, - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - "freesurfer-subject-dir", - fs_ingress, - "outputspec.data", - {}, - "", - "freesurfer_config_ingress", - ) - - recon_outs = { - "pipeline-fs_raw-average": "mri/rawavg.mgz", - "pipeline-fs_subcortical-seg": "mri/aseg.mgz", - "pipeline-fs_brainmask": "mri/brainmask.mgz", - "pipeline-fs_wmparc": "mri/wmparc.mgz", - "pipeline-fs_T1": "mri/T1.mgz", - "pipeline-fs_hemi-L_desc-surface_curv": "surf/lh.curv", - "pipeline-fs_hemi-R_desc-surface_curv": "surf/rh.curv", - "pipeline-fs_hemi-L_desc-surfaceMesh_pial": "surf/lh.pial", - "pipeline-fs_hemi-R_desc-surfaceMesh_pial": "surf/rh.pial", - "pipeline-fs_hemi-L_desc-surfaceMesh_smoothwm": "surf/lh.smoothwm", - "pipeline-fs_hemi-R_desc-surfaceMesh_smoothwm": "surf/rh.smoothwm", - "pipeline-fs_hemi-L_desc-surfaceMesh_sphere": "surf/lh.sphere", - "pipeline-fs_hemi-R_desc-surfaceMesh_sphere": "surf/rh.sphere", - "pipeline-fs_hemi-L_desc-surfaceMap_sulc": "surf/lh.sulc", - "pipeline-fs_hemi-R_desc-surfaceMap_sulc": "surf/rh.sulc", - "pipeline-fs_hemi-L_desc-surfaceMap_thickness": "surf/lh.thickness", - "pipeline-fs_hemi-R_desc-surfaceMap_thickness": "surf/rh.thickness", - "pipeline-fs_hemi-L_desc-surfaceMap_volume": "surf/lh.volume", - "pipeline-fs_hemi-R_desc-surfaceMap_volume": "surf/rh.volume", - "pipeline-fs_hemi-L_desc-surfaceMesh_white": "surf/lh.white", - "pipeline-fs_hemi-R_desc-surfaceMesh_white": "surf/rh.white", - "pipeline-fs_xfm": "mri/transforms/talairach.lta", - } - - for key, outfile in recon_outs.items(): - fullpath = os.path.join(fs_path, outfile) - if os.path.exists(fullpath): - fs_ingress = create_general_datasource(f"gather_fs_{key}_dir") - fs_ingress.inputs.inputnode.set( - unique_id=unique_id, - data=fullpath, - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - key, fs_ingress, "outputspec.data", {}, "", f"fs_{key}_ingress" - ) - else: - warnings.warn( - str(LookupError(f"\n[!] Path does not exist for {fullpath}.\n")) - ) - - return rpool - - -def ingress_raw_func_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): - func_paths_dct = data_paths[1].loc[data_paths[1]["ent__datatype"] == "func"] - - func_wf = create_func_datasource( - func_paths_dct, rpool, f"func_ingress_{part_id}_{ses_id}" - ) - func_wf.inputs.inputnode.set( - subject=part_id, - creds_path=None, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - func_wf.get_node("inputnode").iterables = ("scan", list(func_paths_dct.keys())) - - rpool.set_data("subject", func_wf, "outputspec.subject", {}, "", "func_ingress") - rpool.set_data("bold", func_wf, "outputspec.rest", {}, "", "func_ingress") - rpool.set_data("scan", func_wf, "outputspec.scan", {}, "", "func_ingress") - rpool.set_data( - "scan-params", func_wf, "outputspec.scan_params", {}, "", "scan_params_ingress" - ) - - # TODO: CHECK FOR PARAMETERS - diff = None - blip = None - fmap_rp_list = None - # wf, rpool, diff, blip, fmap_rp_list = ingress_func_metadata( - # wf, cfg, rpool, data_paths, part_id, None, ses_id - # ) - - # Memoize list of local functional scans - # TODO: handle S3 files - # Skip S3 files for now - - local_func_scans = ( - [file_path for file_path in func_paths_dct["finfo__file_path"].values] - if not func_paths_dct.empty - else [] - ) - - # local_func_scans = [ - # func_paths_dct[scan]["scan"] - # for scan in func_paths_dct.keys() - # # if not func_paths_dct[scan]["scan"].startswith("s3://") - # ] - if local_func_scans: - # pylint: disable=protected-access - wf._local_func_scans = local_func_scans - if cfg.pipeline_setup["Debugging"]["verbose"]: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("local_func_scans: %s", local_func_scans) - del local_func_scans - - return (wf, rpool, diff, blip, fmap_rp_list) - - -def ingress_output_dir( - wf, cfg, rpool, unique_id, data_paths, part_id, ses_id, creds_path=None -): - dir_path = data_paths["derivatives_dir"] - - WFLOGGER.info("\nPulling outputs from %s.\n", dir_path) - - anat = os.path.join(dir_path, "anat") - func = os.path.join(dir_path, "func") - - exts = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"] - - outdir_anat = [] - outdir_func = [] - func_paths = {} - func_dict = {} - - for subdir in [anat, func]: - if os.path.isdir(subdir): - for filename in os.listdir(subdir): - for ext in exts: - if ext in filename: - if subdir == anat: - outdir_anat.append(os.path.join(subdir, filename)) - else: - outdir_func.append(os.path.join(subdir, filename)) - - # Add derivatives directory to rpool - ingress = create_general_datasource("gather_derivatives_dir") - ingress.inputs.inputnode.set( - unique_id=unique_id, - data=dir_path, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - "derivatives-dir", ingress, "outputspec.data", {}, "", "outdir_config_ingress" - ) - - for subdir in [outdir_anat, outdir_func]: - for filepath in subdir: - filename = str(filepath) - for ext in exts: - filename = filename.split("/")[-1].replace(ext, "") - - data_label = filename.split(unique_id)[1].lstrip("_") - - if len(filename) == len(data_label): - msg = ( - "\n\n[!] Possibly wrong participant or " - "session in this directory?\n\n" - f"Filepath: {filepath}\n\n" - ) - raise Exception(msg) - - bidstag = "" - for tag in data_label.split("_"): - for prefix in ["task-", "run-", "acq-", "rec"]: - if tag.startswith(prefix): - bidstag += f"{tag}_" - data_label = data_label.replace(f"{tag}_", "") - data_label, json = strip_template(data_label, dir_path, filename) - - rpool, json_info, pipe_idx, node_name, data_label = json_outdir_ingress( - rpool, filepath, exts, data_label, json - ) - - if ( - "template" in data_label - and not json_info["Template"] - == cfg.pipeline_setup["outdir_ingress"]["Template"] - ): - continue - # Rename confounds to avoid confusion in nuisance regression - if data_label.endswith("desc-confounds_timeseries"): - data_label = "pipeline-ingress_desc-confounds_timeseries" - - if len(bidstag) > 1: - # Remove tail symbol - bidstag = bidstag[:-1] - if bidstag.startswith("task-"): - bidstag = bidstag.replace("task-", "") - - # Rename bold mask for CPAC naming convention - # and to avoid collision with anat brain mask - if data_label.endswith("desc-brain_mask") and filepath in outdir_func: - data_label = data_label.replace("brain_mask", "bold_mask") - - try: - pipe_x = rpool.get_pipe_number(pipe_idx) - except ValueError: - pipe_x = len(rpool.pipe_list) - if filepath in outdir_anat: - ingress = create_general_datasource( - f"gather_anat_outdir_{data_label!s}_{pipe_x}" - ) - ingress.inputs.inputnode.set( - unique_id=unique_id, - data=filepath, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - data_label, - ingress, - "outputspec.data", - json_info, - pipe_idx, - node_name, - f"outdir_{data_label}_ingress", - inject=True, - ) - else: - if data_label.endswith("desc-preproc_bold"): - func_key = data_label - func_dict[bidstag] = {} - func_dict[bidstag]["scan"] = str(filepath) - func_dict[bidstag]["scan_parameters"] = json_info - func_dict[bidstag]["pipe_idx"] = pipe_idx - if data_label.endswith("desc-brain_mask"): - data_label = data_label.replace("brain_mask", "bold_mask") - try: - func_paths[data_label].append(filepath) - except: - func_paths[data_label] = [] - func_paths[data_label].append(filepath) - - if func_dict: - wf, rpool = func_outdir_ingress( - wf, - cfg, - func_dict, - rpool, - unique_id, - creds_path, - part_id, - func_key, - func_paths, - ) - - if cfg.surface_analysis["freesurfer"]["ingress_reconall"]: - rpool = ingress_freesurfer( - wf, rpool, cfg, data_paths, unique_id, part_id, ses_id - ) - return wf, rpool - - -def json_outdir_ingress(rpool, filepath, exts, data_label, json): - desc_val = None - for tag in data_label.split("_"): - if "desc-" in tag: - desc_val = tag - break - jsonpath = str(filepath) - for ext in exts: - jsonpath = jsonpath.replace(ext, "") - jsonpath = f"{jsonpath}.json" - - if not os.path.exists(jsonpath): - WFLOGGER.info( - "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n", filepath, jsonpath - ) - json_info = { - "Description": "This data was generated elsewhere and " - "supplied by the user into this C-PAC run's " - "output directory. This JSON file was " - "automatically generated by C-PAC because a " - "JSON file was not supplied with the data." - } - json_info = {**json_info, **json} - write_output_json(json_info, jsonpath) - else: - json_info = read_json(jsonpath) - json_info = {**json_info, **json} - if "CpacProvenance" in json_info: - if desc_val: - # it's a C-PAC output, let's check for pipe_idx/strat integer - # suffixes in the desc- entries. - only_desc = str(desc_val) - - if only_desc[-1].isdigit(): - for idx in range(0, 3): - # let's stop at 3, please don't run >999 strategies okay? - if only_desc[-1].isdigit(): - only_desc = only_desc[:-1] - - if only_desc[-1] == "-": - only_desc = only_desc.rstrip("-") - else: - msg = ( - "\n[!] Something went wrong with either " - "reading in the output directory or when " - "it was written out previously.\n\nGive " - "this to your friendly local C-PAC " - f"developer:\n\n{data_label!s}\n" - ) - raise IOError(msg) - - # remove the integer at the end of the desc-* variant, we will - # get the unique pipe_idx from the CpacProvenance below - data_label = data_label.replace(desc_val, only_desc) - - # preserve cpac provenance/pipe_idx - pipe_idx = rpool.generate_prov_string(json_info["CpacProvenance"]) - node_name = "" - - else: - json_info["CpacProvenance"] = [f"{data_label}:Non-C-PAC Origin: {filepath}"] - if "Description" not in json_info: - json_info["Description"] = ( - "This data was generated elsewhere and " - "supplied by the user into this C-PAC run's " - "output directory. This JSON file was " - "automatically generated by C-PAC because a " - "JSON file was not supplied with the data." - ) - pipe_idx = rpool.generate_prov_string(json_info["CpacProvenance"]) - node_name = f"{data_label}_ingress" - - return rpool, json_info, pipe_idx, node_name, data_label - - -def func_outdir_ingress( - wf, cfg, func_dict, rpool, unique_id, creds_path, part_id, key, func_paths -): - pipe_x = len(rpool.pipe_list) - ingress = create_func_datasource( - func_dict, rpool, f"gather_func_outdir_{key}_{pipe_x}" - ) - ingress.inputs.inputnode.set( - subject=unique_id, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data("subject", ingress, "outputspec.subject", {}, "", "func_ingress") - ingress.get_node("inputnode").iterables = ("scan", list(func_dict.keys())) - rpool.set_data(key, ingress, "outputspec.rest", {}, "", "func_ingress") - - rpool.set_data("scan", ingress, "outputspec.scan", {}, "", "func_ingress") - rpool.set_data( - "scan-params", ingress, "outputspec.scan_params", {}, "", "scan_params_ingress" - ) - wf, rpool, diff, blip, fmap_rp_list = ingress_func_metadata( - wf, cfg, rpool, func_dict, part_id, creds_path, key - ) - - # Have to do it this weird way to save the parsed BIDS tag & filepath - mask_paths_key = ( - "desc-bold_mask" - if "desc-bold_mask" in func_paths - else "space-template_desc-bold_mask" - ) - ts_paths_key = "pipeline-ingress_desc-confounds_timeseries" - - # Connect func data with approproate scan name - iterables = pe.Node( - Function( - input_names=["scan", "mask_paths", "ts_paths"], - output_names=["out_scan", "mask", "confounds"], - function=set_iterables, - ), - name=f"set_iterables_{pipe_x}", - ) - iterables.inputs.mask_paths = func_paths[mask_paths_key] - iterables.inputs.ts_paths = func_paths[ts_paths_key] - wf.connect(ingress, "outputspec.scan", iterables, "scan") - - for key in func_paths: - if key in (mask_paths_key, ts_paths_key): - ingress_func = create_general_datasource(f"ingress_func_data_{key}") - ingress_func.inputs.inputnode.set( - unique_id=unique_id, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - wf.connect(iterables, "out_scan", ingress_func, "inputnode.scan") - if key == mask_paths_key: - wf.connect(iterables, "mask", ingress_func, "inputnode.data") - rpool.set_data( - key, ingress_func, "inputnode.data", {}, "", f"outdir_{key}_ingress" - ) - elif key == ts_paths_key: - wf.connect(iterables, "confounds", ingress_func, "inputnode.data") - rpool.set_data( - key, ingress_func, "inputnode.data", {}, "", f"outdir_{key}_ingress" - ) - - return wf, rpool - - -def set_iterables(scan, mask_paths=None, ts_paths=None): - # match scan with filepath to get filepath - mask_path = [path for path in mask_paths if scan in path] - ts_path = [path for path in ts_paths if scan in path] - - return (scan, mask_path[0], ts_path[0]) - - -def strip_template(data_label, dir_path, filename): - json = {} - # rename to template - for prefix in ["space-", "from-", "to-"]: - for bidstag in data_label.split("_"): - if bidstag.startswith(prefix): - template_key, template_val = bidstag.split("-") - template_name, _template_desc = lookup_identifier(template_val) - if template_name: - json["Template"] = template_val - data_label = data_label.replace(template_val, "template") - elif bidstag.startswith("res-"): - res_key, res_val = bidstag.split("-") - json["Resolution"] = res_val - data_label = data_label.replace(bidstag, "") - if data_label.find("__"): - data_label = data_label.replace("__", "_") - return data_label, json - - -def ingress_pipeconfig_paths(cfg, rpool, unique_id, creds_path=None): - # ingress config file paths - # TODO: may want to change the resource keys for each to include one level up in the YAML as well - - import pandas as pd - import pkg_resources as p - - template_csv = p.resource_filename("CPAC", "resources/cpac_templates.csv") - template_df = pd.read_csv(template_csv, keep_default_na=False) - - for row in template_df.itertuples(): - key = row.Key - val = row.Pipeline_Config_Entry - val = cfg.get_nested(cfg, [x.lstrip() for x in val.split(",")]) - resolution = row.Intended_Resolution_Config_Entry - desc = row.Description - - if not val: - continue - - if resolution: - res_keys = [x.lstrip() for x in resolution.split(",")] - tag = res_keys[-1] - json_info = {} - - if "$FSLDIR" in val: - val = val.replace("$FSLDIR", cfg.pipeline_setup["system_config"]["FSLDIR"]) - if "$priors_path" in val: - priors_path = ( - cfg.segmentation["tissue_segmentation"]["FSL-FAST"]["use_priors"][ - "priors_path" - ] - or "" - ) - if "$FSLDIR" in priors_path: - priors_path = priors_path.replace( - "$FSLDIR", cfg.pipeline_setup["system_config"]["FSLDIR"] - ) - val = val.replace("$priors_path", priors_path) - if "${resolution_for_anat}" in val: - val = val.replace( - "${resolution_for_anat}", - cfg.registration_workflows["anatomical_registration"][ - "resolution_for_anat" - ], - ) - if "${func_resolution}" in val: - val = val.replace( - "${func_resolution}", - cfg.registration_workflows["functional_registration"][ - "func_registration_to_template" - ]["output_resolution"][tag], - ) - - if desc: - template_name, _template_desc = lookup_identifier(val) - if template_name: - desc = f"{template_name} - {desc}" - json_info["Description"] = f"{desc} - {val}" - if resolution: - resolution = cfg.get_nested(cfg, res_keys) - json_info["Resolution"] = resolution - - resampled_template = pe.Node( - Function( - input_names=["resolution", "template", "template_name", "tag"], - output_names=["resampled_template"], - function=resolve_resolution, - as_module=True, - ), - name="resampled_" + key, - ) - - resampled_template.inputs.resolution = resolution - resampled_template.inputs.template = val - resampled_template.inputs.template_name = key - resampled_template.inputs.tag = tag - - # the set_data below is set up a little differently, because we are - # injecting and also over-writing already-existing entries - # other alternative would have been to ingress into the - # resampled_template node from the already existing entries, but we - # didn't do that here - rpool.set_data( - key, - resampled_template, - "resampled_template", - json_info, - "", - "template_resample", - ) # pipe_idx (after the blank json {}) should be the previous strat that you want deleted! because you're not connecting this the regular way, you have to do it manually - - elif val: - config_ingress = create_general_datasource(f"gather_{key}") - config_ingress.inputs.inputnode.set( - unique_id=unique_id, - data=val, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - key, - config_ingress, - "outputspec.data", - json_info, - "", - f"{key}_config_ingress", - ) - # templates, resampling from config - """ - template_keys = [ - ("anat", ["network_centrality", "template_specification_file"]), - ("anat", ["nuisance_corrections", "2-nuisance_regression", - "lateral_ventricles_mask"]), - ("anat", - ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", - "CSF_path"]), - ("anat", - ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", - "GM_path"]), - ("anat", - ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", - "WM_path"]), - ("anat", - ["segmentation", "tissue_segmentation", "Template_Based", "CSF"]), - ("anat", - ["segmentation", "tissue_segmentation", "Template_Based", "GRAY"]), - ("anat", - ["segmentation", "tissue_segmentation", "Template_Based", "WHITE"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T1w_ACPC_template"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T1w_brain_ACPC_template"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T2w_ACPC_template"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T2w_brain_ACPC_template"])] - - def get_nested_attr(c, template_key): - attr = getattr(c, template_key[0]) - keys = template_key[1:] - - def _get_nested(attr, keys): - if len(keys) > 1: - return (_get_nested(attr[keys[0]], keys[1:])) - elif len(keys): - return (attr[keys[0]]) - else: - return (attr) - - return (_get_nested(attr, keys)) - - def set_nested_attr(c, template_key, value): - attr = getattr(c, template_key[0]) - keys = template_key[1:] - - def _set_nested(attr, keys): - if len(keys) > 1: - return (_set_nested(attr[keys[0]], keys[1:])) - elif len(keys): - attr[keys[0]] = value - else: - return (attr) - - return (_set_nested(attr, keys)) - - for key_type, key in template_keys: - attr = cfg.get_nested(cfg, key) - if isinstance(attr, str) or attr == None: - node = create_check_for_s3_node( - key[-1], - attr, key_type, - data_paths['creds_path'], - cfg.pipeline_setup['working_directory']['path'], - map_node=False - ) - cfg.set_nested(cfg, key, node) - - template_keys_in_list = [ - ("anat", - ["segmentation", "tissue_segmentation", "ANTs_Prior_Based", - "template_brain_list"]), - ("anat", - ["segmentation", "tissue_segmentation", "ANTs_Prior_Based", - "template_segmentation_list"]), - ] - - for key_type, key in template_keys_in_list: - node = create_check_for_s3_node( - key[-1], - cfg.get_nested(cfg, key), key_type, - data_paths['creds_path'], - cfg.pipeline_setup['working_directory']['path'], - map_node=True - ) - cfg.set_nested(cfg, key, node) - """ - - return rpool - -def ingress_all_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): - - - - - #### One way to do it - - # for data in data_paths[1].iterrows(): - # suffix = data[1]["ent__suffix"] - # datatype = data[1]["ent__datatype"] - # filepath = data[1]["finfo__file_path"] - # desc = data[1]["ent__desc"] - - # data_flow = create_general_datasource(f"gather_{datatype}_{suffix}") - # data_flow.inputs.inputnode.set( - # unique_id=unique_id, - # data=filepath, - # creds_path=None, - # dl_dir=cfg.pipeline_setup["working_directory"]["path"], - # ) - # rpool.set_data( - # f"{datatype}_{suffix}", - # data_flow, - # "outputspec.data", - # {}, - # "", - # f"{datatype}_{suffix}_ingress", - # ) - - return rpool - -def initiate_rpool(wf, cfg, data_paths=None, part_id=None): - """ - Initialize a new ResourcePool. - - data_paths format: - {'anat': { - 'T1w': '{T1w path}', - 'T2w': '{T2w path}' - }, - 'creds_path': {None OR path to credentials CSV}, - 'func': { - '{scan ID}': - { - 'scan': '{path to BOLD}', - 'scan_parameters': {scan parameter dictionary} - } - }, - 'site_id': 'site-ID', - 'subject_id': 'sub-01', - 'unique_id': 'ses-1', - 'derivatives_dir': '{derivatives_dir path}'} - """ - # TODO: refactor further, integrate with the ingress_data functionality - # TODO: used for BIDS-Derivatives (below), and possible refactoring of - # TODO: the raw data config to use 'T1w' label instead of 'anat' etc. - - if data_paths: - part_id = data_paths[0][0] - ses_id = data_paths[0][1] - unique_id = f"{part_id}_{ses_id}" - - elif part_id: - unique_id = part_id - creds_path = None - from .resource_pool import ResourcePool - rpool = ResourcePool(name=unique_id, cfg=cfg) - - # if data_paths: - # rpool = ingress_all_data( - # wf, rpool, cfg, data_paths, unique_id, part_id, ses_id - # ) - rpool.build_rpool(data_paths) - - # grab any file paths from the pipeline config YAML - # creds_path = None - # rpool = ingress_pipeconfig_paths(cfg, rpool, unique_id, creds_path) - - # output files with 4 different scans - resource_description = { - "suffix": "T1w", - #"desc": "preproc", - #"space": "MNI152NLin6ASym" - } - resource_content = rpool.get_resource(resource_description) - #print(dir(rpool.get_resource("T1w")[0])) - #rpool.write_to_disk(cfg.pipeline_setup["working_directory"]["path"]) - #print(rpool.get_resource("T1w")) - - # Ensure the directory exists - os.makedirs('/code/output', exist_ok=True) - - # Now, safely open the file. It will be created if it does not exist. - with open('/code/output/output.txt', 'w') as file: - - # Write the content to the file - file.write(str(resource_content)) - import sys - sys.exit() - - - return (wf, rpool) - - -def run_node_blocks(blocks, data_paths, cfg=None): - import os - - from CPAC.pipeline import nipype_pipeline_engine as pe - from CPAC.pipeline.engine import NodeBlock - - if not cfg: - cfg = { - "pipeline_setup": { - "working_directory": {"path": os.getcwd()}, - "log_directory": {"path": os.getcwd()}, - } - } - - # TODO: WE HAVE TO PARSE OVER UNIQUE ID'S!!! - _, rpool = initiate_rpool(cfg, data_paths) - - wf = pe.Workflow(name="node_blocks") - wf.base_dir = cfg.pipeline_setup["working_directory"]["path"] - wf.config["execution"] = { - "hash_method": "timestamp", - "crashdump_dir": cfg.pipeline_setup["log_directory"]["path"], - } - - run_blocks = [] - if rpool.check_rpool("desc-preproc_T1w"): - WFLOGGER.info("Preprocessed T1w found, skipping anatomical preprocessing.") - else: - run_blocks += blocks[0] - if rpool.check_rpool("desc-preproc_bold"): - WFLOGGER.info("Preprocessed BOLD found, skipping functional preprocessing.") - else: - run_blocks += blocks[1] - - for block in run_blocks: - wf = NodeBlock( - block, debug=cfg["pipeline_setup", "Debugging", "verbose"] - ).connect_block(wf, cfg, rpool) - rpool.gather_pipes(wf, cfg) - - wf.run() - - -class NodeData: - r"""Attribute access for ResourcePool.get_data outputs. - - Class to hold outputs of CPAC.pipeline.engine.ResourcePool().get_data(), so one can - do ``node_data = strat_pool.node_data(resource)`` and have ``node_data.node`` and - ``node_data.out`` instead of doing ``node, out = strat_pool.get_data(resource)`` - and needing two variables (``node`` and ``out``) to store that information. - - Also includes ``variant`` attribute providing the resource's self-keyed value - within its ``CpacVariant`` dictionary. - - Examples - -------- - >>> rp = ResourcePool() - >>> rp.node_data(None) - NotImplemented (NotImplemented) - - >>> rp.set_data('test', - ... pe.Node(Function(input_names=[]), 'test'), - ... 'b', [], 0, 'test') - >>> rp.node_data('test') - test (b) - >>> rp.node_data('test').out - 'b' - - >>> try: - ... rp.node_data('b') - ... except LookupError as lookup_error: - ... print(str(lookup_error).strip().split('\n')[0].strip()) - [!] C-PAC says: None of the listed resources are in the resource pool: - """ - - # pylint: disable=too-few-public-methods - def __init__(self, strat_pool=None, resource=None, **kwargs): - self.node = NotImplemented - self.out = NotImplemented - if strat_pool is not None and resource is not None: - self.node, self.out = strat_pool.get_data(resource, **kwargs) - - def __repr__(self): # noqa: D105 - return f'{getattr(self.node, "name", str(self.node))} ({self.out})' diff --git a/CPAC/pipeline/engine/nodeblock.py b/CPAC/pipeline/engine/nodeblock.py index e69de29bb2..e68bfbf0d2 100644 --- a/CPAC/pipeline/engine/nodeblock.py +++ b/CPAC/pipeline/engine/nodeblock.py @@ -0,0 +1,349 @@ +# Copyright (C) 2023-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Classes and decorator for :py:class:`NodeBlock` s and :py:class:`NodeBlockFunction` s.""" + +from typing import Any, Callable, Optional, TYPE_CHECKING + +import yaml +from nipype import config, logging # type: ignore [import-untyped] +from nipype.pipeline.engine import Workflow # type: ignore[import-untyped] + +from CPAC.utils.configuration.configuration import Configuration +from CPAC.utils.monitoring import ( + WFLOGGER, +) + +if TYPE_CHECKING: + from CPAC.pipeline.engine.resource import ResourceData, StratPool + +NODEBLOCK_INPUTS = list[str | list | tuple] +NODEBLOCK_OUTPUTS = list[str] | dict[str, Any] +PIPELINE_BLOCKS = list["NodeBlockFunction | PIPELINE_BLOCKS"] + + +class NodeBlockFunction: + """Store a reference to the nodeblock function and all of its metadata.""" + + def __init__( + self, + func: Callable, + name: str, + config: Optional[list[str]] = None, + switch: Optional[list[str] | list[list[str]]] = None, + option_key: Optional[str | list[str]] = None, + option_val: Optional[str | list[str]] = None, + inputs: Optional[NODEBLOCK_INPUTS] = None, + outputs: Optional[NODEBLOCK_OUTPUTS] = None, + ) -> None: + self.func = func + """:py:class:`Nodeblock` function reference.""" + self.name: str = name + """Used in the graph and logging to identify the :py:class:`NodeBlock` and its + component :py:class:`~nipype.pipeline.engine.Node` s.""" + self.config: Optional[list[str]] = config + """ + Indicates the nested keys in a C-PAC pipeline + :py:class:`~CPAC.utils.configuration.Configuration` should configure a + :py:class:`NodeBlock` built from this function. If `config` is set to ``None``, + then all other :py:class:`~CPAC.utils.configuration.Configuration` -related + entities must be specified from the root of the :py:class:`~CPAC.utils.configuration.Configuration` . + """ + self.switch: Optional[list[str] | list[list[str]]] = switch + """ + Indicates any keys that should evaluate to ``True`` for this :py:class:`NodeBlock` + to be active. A list of lists of strings indicates multiple `switch` es + that must all be ``True`` to run, and is currently only an option if `config` is + set to ``None``. + """ + self.option_key: Optional[str | list[str]] = option_key + """ + Indicates the nested keys (starting at the nested key indicated by `config`) + that should configure this :py:class:`NodeBlock`. + """ + self.option_val: Optional[str | list[str]] = option_val + """Indicates values for which this :py:class:`NodeBlock` should be active.""" + self.inputs: list[str | list | tuple] = inputs if inputs else [] + """:py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating + resources needed for the :py:class:`NodeBlock`'s functionality.""" + self.outputs: list[str] | dict[str, Any] = outputs if outputs else [] + """ + :py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating + resources generated or updated by the :py:class:`NodeBlock`, optionally + including metadata for the outputs' respective sidecars. + """ + + # Forward function attributes similar to functools.update_wrapper: + # https://docs.python.org/3/library/functools.html#functools.update_wrapper + self.__module__ = func.__module__ + self.__name__ = func.__name__ + self.__qualname__ = func.__qualname__ + self.__annotations__ = func.__annotations__ + self.__doc__ = "".join( + [ + _.replace(" ", "") + for _ in [func.__doc__, "", "", NodeBlockFunction.__call__.__doc__] + if _ is not None + ] + ).rstrip() + + def __call__( + self, + wf: Workflow, + cfg: Configuration, + strat_pool: "StratPool", + pipe_num: Optional[int | str], + opt: Optional[str] = None, + ) -> tuple[Workflow, dict[str, "ResourceData"]]: + """Call a :py:class:`NodeBlockFunction`. + + All :py:class:`NodeBlockFunction` s have the same signature. + """ + return self.func(wf, cfg, strat_pool, pipe_num, opt) + + def legacy_nodeblock_dict(self): + """Return :py:class:`NodeBlock` metadata as a dictionary. + + Helper for compatibility reasons. + """ + return { + "name": self.name, + "config": self.config, + "switch": self.switch, + "option_key": self.option_key, + "option_val": self.option_val, + "inputs": self.inputs, + "outputs": self.outputs, + } + + def __repr__(self) -> str: + """Return reproducible string representation of a :py:class:`NodeBlockFunction`.""" + return ( + f"NodeBlockFunction({self.func.__module__}." + f'{self.func.__name__}, "{self.name}", ' + f"config={self.config}, switch={self.switch}, " + f"option_key={self.option_key}, option_val=" + f"{self.option_val}, inputs={self.inputs}, " + f"outputs={self.outputs})" + ) + + def __str__(self) -> str: + """Return string representation of a :py:class:`NodeBlockFunction`.""" + return f"NodeBlockFunction({self.name})" + + +class NodeBlock: + """A :py:class:`~nipype.pipeline.engine.Workflow` subgraph composed of :py:class:`NodeBlockFunction` s.""" + + def __init__( + self, + node_block_functions: NodeBlockFunction | PIPELINE_BLOCKS, + debug: bool = False, + ) -> None: + """Create a :py:class:`NodeBlock` from a list of :py:class:`NodeBlockFunction` s.""" + if not isinstance(node_block_functions, list): + node_block_functions = [node_block_functions] + + self.node_blocks: dict[str, Any] = {} + + for node_block_function in node_block_functions: # <---- sets up the NodeBlock object in case you gave it a list of node blocks instead of a single one - for option forking. + self.input_interface = [] + if isinstance(node_block_function, tuple): + self.input_interface = node_block_function[1] + node_block_function = node_block_function[0] # noqa: PLW2901 + if not isinstance(self.input_interface, list): + self.input_interface = [self.input_interface] + + if not isinstance(node_block_function, NodeBlockFunction): + # If the object is a plain function `__name__` will be more useful than `str()` + obj_str = ( + node_block_function.__name__ # type: ignore [attr-defined] + if hasattr(node_block_function, "__name__") + else str(node_block_function) + ) + msg = f'Object is not a nodeblock: "{obj_str}"' + raise TypeError(msg) + + name = node_block_function.name + self.name = name + self.node_blocks[name] = {} + + if self.input_interface: + for interface in self.input_interface: + for orig_input in node_block_function.inputs: + if isinstance(orig_input, tuple): + list_tup = list(orig_input) + if interface[0] in list_tup: + list_tup.remove(interface[0]) + list_tup.append(interface[1]) + node_block_function.inputs.remove(orig_input) + node_block_function.inputs.append(tuple(list_tup)) + elif orig_input == interface[0]: + node_block_function.inputs.remove(interface[0]) + node_block_function.inputs.append(interface[1]) + + for key, val in node_block_function.legacy_nodeblock_dict().items(): + self.node_blocks[name][key] = val + + self.node_blocks[name]["block_function"] = node_block_function + + # TODO: fix/replace below + self.outputs: dict[str, Optional[str]] = {} + for out in node_block_function.outputs: + self.outputs[out] = None + + self.options: list[str] | dict[str, Any] = ["base"] + if node_block_function.outputs is not None: + self.options = node_block_function.outputs + + WFLOGGER.info("Connecting %s...", name) + if debug: + config.update_config({"logging": {"workflow_level": "DEBUG"}}) + logging.update_logging(config) + WFLOGGER.debug( + '"inputs": %s\n\t "outputs": %s%s', + node_block_function.inputs, + list(self.outputs.keys()), + f'\n\t"options": {self.options}' + if self.options != ["base"] + else "", + ) + config.update_config({"logging": {"workflow_level": "INFO"}}) + logging.update_logging(config) + + def check_output(self, outputs: NODEBLOCK_OUTPUTS, label: str, name: str) -> None: + """Check if a label is listed in a :py:class:`NodeBlock` 's `outputs`. + + Raises + ------ + NameError + If a mismatch is found. + """ + if label not in outputs: + msg = ( + f'\n[!] Output name "{label}" in the block ' + "function does not match the outputs list " + f'{outputs} in Node Block "{name}"\n' + ) + raise NameError(msg) + + @staticmethod + def list_blocks( + pipeline_blocks: PIPELINE_BLOCKS, indent: Optional[int] = None + ) -> str: + """List :py:class:`NodeBlockFunction` s line by line. + + Parameters + ---------- + pipeline_blocks + list of :py:class:`NodeBlockFunction` s + + indent + number of spaces after a tab indent + + Returns + ------- + str + formatted list of :py:class:`NodeBlockFunction` s + """ + blockstring = yaml.dump( + [ + getattr( + block, + "__name__", + getattr( + block, + "name", + yaml.safe_load(NodeBlock.list_blocks(list(block))) + if isinstance(block, (tuple, list, set)) + else str(block), + ), + ) + for block in pipeline_blocks + ] + ) + if isinstance(indent, int): + blockstring = "\n".join( + [ + "\t" + " " * indent + line.replace("- - ", "- ") + for line in blockstring.split("\n") + ] + ) + return blockstring + + +def nodeblock( + name: Optional[str] = None, + config: Optional[list[str]] = None, + switch: Optional[list[str] | list[list[str]]] = None, + option_key: Optional[str | list[str]] = None, + option_val: Optional[str | list[str]] = None, + inputs: Optional[NODEBLOCK_INPUTS] = None, + outputs: Optional[list[str] | dict[str, Any]] = None, +): + """Define a :py:class:`NodeBlockFunction` . + + Connections to the pipeline :py:class:`~CPAC.utils.configuration.Configuration` and to other :py:class:`NodeBlockFunction` s. + + Parameters + ---------- + name + Used in the graph and logging to identify the :py:class:`NodeBlock` and its + component :py:class:`~nipype.pipeline.engine.Node` s. + The :py:class:`NodeBlockFunction`'s `.__name__` is used if `name` is not + provided. + + config + Indicates the nested keys in a C-PAC pipeline + :py:class:`~CPAC.pipeline.configuration.Configuration` should configure a + :py:class:`NodeBlock` built from this :py:class:`NodeBlockFunction`. If `config` + is set to ``None``, then all other + :py:class:`~CPAC.pipeline.configuration.Configuration` -related entities + must be specified from the root of the + :py:class:`~CPAC.pipeline.configuration.Configuration` . + + switch + Indicates any keys that should evaluate to ``True`` for this + :py:class:`NodeBlock` to be active. A list of lists of strings indicates + multiple switches that must all be ``True`` to run, and is currently only an + option if config is set to ``None``. + + option_key + Indicates the nested keys (starting at the nested key indicated by `config`) + that should configure this :py:class:`NodeBlock`. + + option_val + Indicates values for which this :py:class:`NodeBlock` should be active. + + inputs + :py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating files needed for the :py:class:`NodeBlock` 's + functionality. + + outputs + :py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating files + generated or updated by the :py:class:`NodeBlock`, optionally including metadata + for the `outputs` ' respective sidecars. + """ + return lambda func: NodeBlockFunction( + func, + name if name is not None else func.__name__, + config, + switch, + option_key, + option_val, + inputs, + outputs, + ) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 04d434a07e..ddbabdec56 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -1,9 +1,226 @@ +# Copyright (C) 2021-2024 C-PAC Developers -from bids2table import BIDSTable, BIDSFile, join_bids_path, parse_bids_entities -from dataclasses import dataclass +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +""":py:class:`Resource` s and :py:class:`ResourcePool` s for C-PAC.""" + +import ast +from collections.abc import KeysView +from copy import deepcopy +import hashlib +from itertools import chain +import json +import os +from pathlib import Path +import re +from types import NoneType +from typing import Any, cast, Literal, NamedTuple, Optional, overload + +import numpy as np import pandas as pd +from nipype.interfaces import utility as util # type: ignore [import-untyped] +from nipype.interfaces.utility import Rename # type: ignore [import-untyped] +from nipype.pipeline import engine as pe # type: ignore [import-untyped] + +from CPAC.image_utils.spatial_smoothing import spatial_smoothing +from CPAC.image_utils.statistical_transforms import ( + fisher_z_score_standardize, + z_score_standardize, +) +from CPAC.pipeline.check_outputs import ExpectedOutputs +from CPAC.pipeline.engine.nodeblock import ( + NodeBlock, + NODEBLOCK_INPUTS, + NODEBLOCK_OUTPUTS, + NodeBlockFunction, + PIPELINE_BLOCKS, +) +from CPAC.pipeline.utils import name_fork, source_set +from CPAC.registration.registration import transform_derivative +from CPAC.resources.templates.lookup_table import lookup_identifier +from CPAC.utils.bids_utils import res_in_filename +from CPAC.utils.configuration.configuration import Configuration, Preconfiguration +from CPAC.utils.datasource import ( + calc_delta_te_and_asym_ratio, + check_for_s3, + check_func_scan, + create_anat_datasource, + create_fmap_datasource, + create_general_datasource, + gather_echo_times, + get_fmap_phasediff_metadata, + get_rest, + resolve_resolution, +) +from CPAC.utils.interfaces.datasink import DataSink +from CPAC.utils.interfaces.function import Function +from CPAC.utils.monitoring import ( + getLogger, + LOGTAIL, + UTLOGGER, + WARNING_FREESURFER_OFF_WITH_DATA, + WFLOGGER, +) +from CPAC.utils.outputs import Outputs +from CPAC.utils.typing import LIST_OF_LIST_OF_STR, PIPE_IDX, SUB_GROUP +from CPAC.utils.utils import ( + check_prov_for_regtool, + create_id_string, + get_last_prov_entry, + get_scan_params, + read_json, + write_output_json, +) + +EXTS = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"] +POOL_DICT = dict[str | tuple, "STRAT_DICT | list[ResourceIO]"] +STRAT_DICT = dict[str | tuple, "Resource"] + + +class DataPaths: + """Store subject-session specific data paths.""" + + def __init__( + self, *, data_paths: Optional[dict] = None, part_id: Optional[str] = "" + ) -> None: + """Initialize a `DataPaths` instance.""" + if not data_paths: + data_paths = {} + if part_id and "part_id" in data_paths and part_id != data_paths["part_id"]: + WFLOGGER.warning( + "both 'part_id' (%s) and data_paths['part_id'] (%s) provided. " + "Using '%s'.", + part_id, + data_paths["part_id"], + part_id, + ) + anat: dict[str, str] | str = data_paths.get("anat", {}) + if isinstance(anat, str): + anat = {"T1": anat} + self.anat: dict[str, str] = anat + self.creds_path: Optional[str] = data_paths.get("creds_path") + self.fmap: Optional[dict] = data_paths.get("fmap") + self.func: dict[str, dict[str, str | dict]] = data_paths.get("func", {}) + self.part_id: str = data_paths.get("subject_id", "") + self.site_id: str = data_paths.get("site_id", "") + self.ses_id: str = data_paths.get("unique_id", "") + self.derivatives_dir: Optional[str] = data_paths.get("derivatives_dir") + + def __repr__(self) -> str: + """Return reproducible string representation of `DataPaths` instance.""" + return f"DataPaths(data_paths={self.as_dict()})" + + def __str__(self) -> str: + """Return string representation of a `DataPaths` instance.""" + return repr(self) + + def as_dict(self) -> dict: + """Return a `data_paths` dictionary. + + `data_paths` format:: + + {"anat": {"T1w": "{T1w path}", "T2w": "{T2w path}"}, + "creds_path": {None OR path to credentials CSV}, + "func": { + "{scan ID}": { + "scan": "{path to BOLD}", + "scan_parameters": {scan parameter dictionary}, + } + }, + "site_id": "site-ID", + "subject_id": "sub-01", + "unique_id": "ses-1", + "derivatives_dir": "{derivatives_dir path}",} + """ + return { + **{ + k: v + for k, v in { + key: getattr(self, key, None) + for key in [ + "anat", + "creds_path", + "func", + "site_id", + "derivatives_dir", + ] + }.items() + if v + }, + **{ + k: v + for k, v in { + key[0]: getattr(self, key[1], None) + for key in [ + ("subject_id", "part_id"), + ("unique_id", "ses_id"), + ] + }.items() + if v + }, + } + + +@Function.sig_imports(["from typing import Optional"]) +def set_iterables( + scan: str, + mask_paths: Optional[list[str]] = None, + ts_paths: Optional[list[str]] = None, +) -> tuple[str, str, str]: + """Match scan with filepath to get filepath.""" + if mask_paths is None: + mask_paths = [] + if ts_paths is None: + ts_paths = [] + mask_path = [path for path in mask_paths if scan in path] + ts_path = [path for path in ts_paths if scan in path] + + return (scan, mask_path[0], ts_path[0]) + + +def strip_template(data_label: str) -> tuple[str, dict[str, str]]: + """Strip a template name from a data label to use as a :py:class:`Resource` key.""" + json = {} + # rename to template + for prefix in ["space-", "from-", "to-"]: + for bidstag in data_label.split("_"): + if bidstag.startswith(prefix): + _template_key, template_val = bidstag.split("-") + template_name, _template_desc = lookup_identifier(template_val) + if template_name: + json["Template"] = template_val + data_label = data_label.replace(template_val, "template") + elif bidstag.startswith("res-"): + _res_key, res_val = bidstag.split("-") + json["Resolution"] = res_val + data_label = data_label.replace(bidstag, "") + if data_label.find("__"): + data_label = data_label.replace("__", "_") + return data_label, json -class Resource(): + +class ResourceData(NamedTuple): + """Attribute and tuple access for `ResourceData`.""" + + node: pe.Node + """Resource :py:class:`~nipype.pipeline.engine.Node`.""" + out: str + """Output key.""" + + +class ResourceIO: row: dict CpacProvenance: tuple ds: dict @@ -15,13 +232,12 @@ class Resource(): rel_path: str def __init__(self, row, CpacProvenance): - self.cpac_provenance = CpacProvenance - self.metadata = {} # replace with >> row['json'] if isinstance(row['json'], dict) else {} + self.metadata = {} # replace with >> row['json'] if isinstance(row['json'], dict) else {} self.row = row for key, value in self.row.items(): setattr(self, key, value) - + self.filename = self.file_path.split("/")[-1] self.rel_path = f"sub-{self.sub}" if self.ses != "None": @@ -29,34 +245,3188 @@ def __init__(self, row, CpacProvenance): self.rel_path += f"/{self.datatype}" self.suffix = self.suffix - + self.name = self.filename.split(".")[0] - self.strats = { - str(self.cpac_provenance) : self.file_path - } + self.strats = {str(self.cpac_provenance): self.file_path} for key, value in self.metadata.items(): setattr(self, key, value) def __repr__(self): - exclude_list = ['CpacConfig', 'CpacConfigHash', 'CpacProvenance', 'metadata', 'cpac_provenance', 'ds', 'entity', 'finfo', 'row', 'filename', 'file_path', 'rel_path', 'entities', 'path', 'entity_to_bids_key', ] # Add attribute names to exclude - attributes = {attr: value for attr, value in self.__dict__.items() if attr not in exclude_list and value is not None} + exclude_list = [ + "CpacConfig", + "CpacConfigHash", + "CpacProvenance", + "metadata", + "cpac_provenance", + "ds", + "entity", + "finfo", + "row", + "filename", + "file_path", + "rel_path", + "entities", + "path", + "entity_to_bids_key", + ] # Add attribute names to exclude + attributes = { + attr: value + for attr, value in self.__dict__.items() + if attr not in exclude_list and value is not None + } return f"{self.__class__.__name__}({attributes})" - # write to disk + @staticmethod + def subset(df: pd.DataFrame, column: str, value: Any) -> pd.DataFrame: + """Return a subset of a DataFrame where column == value.""" + return df.loc[df[column] == value] + def write_to_disk(self, path): + """Write to disk.""" import shutil + try: path_to_write = os.path.join(path, self.rel_path) os.makedirs(path_to_write, exist_ok=True) # Copy the NIFTI file - shutil.copy(self.finfo['file_path'], path_to_write) + shutil.copy(self.finfo["file_path"], path_to_write) # Write the JSON file only if the ext is .nii.gz - if self.filename.endswith('.nii.gz'): - json_path = os.path.join(path_to_write, f"{self.filename.replace('.nii.gz', '.json')}") - with open(json_path, 'w') as f: + if self.filename.endswith(".nii.gz"): + json_path = os.path.join( + path_to_write, f"{self.filename.replace('.nii.gz', '.json')}" + ) + with open(json_path, "w") as f: f.write(json.dumps(self.metadata, indent=4)) return f"successfully written to {path_to_write}" except Exception as e: WFLOGGER.error(f"Error writing to disk: {e}") print(f"Error writing to disk: {e}") - \ No newline at end of file + + +class Resource: + """A single `Resource` and its methods.""" + + def __init__(self, data: tuple[pe.Node, str], json: dict) -> None: + """Initialize a `Resource`.""" + self.data = ResourceData(*data) + """Tuple of source :py:class:`~nipype.pipeline.engine.Node` and output key.""" + self._json: dict = json + """Metadata.""" + self._keys = {"data", "json"} + """Dictionary-style subscriptable keys.""" + + def keys(self) -> list[str]: + """Return list of subscriptable keys.""" + return list(self._keys) + + def __contains__(self, item: Any) -> bool: + """Return ``True`` if `item` in :py:meth:`Resource.keys()`, ``False`` otherwise.""" + return item in self.keys() + + def __getitem__(self, name: str) -> Any: + """Provide legacy dict-style get access.""" + if name in self.keys(): + return getattr(self, name) + msg = f"Key '{name}' not set in {self}." + raise KeyError(msg) + + def __repr__(self) -> str: + """Return reproducible string for `Resource`.""" + positional = f"Resource(data={self.data}, json={self.json}" + kw = ", ".join( + f"{key}={getattr(self, key)}" + for key in self.keys() + if key not in ["data", "json"] + ) + return f"{positional}{kw})" + + def __setitem__(self, name: str, value: Any) -> None: + """Provide legacy dict-style set access for `Resource`.""" + setattr(self, name, value) + if name not in self.keys(): + self._keys.add(name) + + def __str__(self) -> str: + """Return string representation of `Resource`.""" + return f"{self.data[0]}" + + def get_json(self) -> dict[str | tuple, Any]: + """Return a deep copy of `Resource` JSON.""" + UTLOGGER.debug( + "%s is a deep copy of the attached JSON. Assign it to a variable before modifying or the changes will be ephemeral.", + self.__class__.__name__, + ) + return json.loads(json.dumps(self._json)) + + def set_json(self, value=dict) -> None: + """Update `Resource` JSON.""" + self._json.update(value) + + json = property(get_json, set_json, doc=get_json.__doc__) + + @property + def cpac_provenance(self) -> list: + """Get "CpacProvenance" of a `Resource`.""" + return self.json["CpacProvenance"] + + +class _Pool: + """All Resources.""" + + def __init__(self) -> None: + """Initialize a :py:class:`ResourcePool` or :py:class:`StratPool` .""" + self.ants_interp: str + self.cfg: Configuration + self.creds_paths: Optional[str] + self.data_paths: DataPaths | SUB_GROUP + self.fsl_interp: str + self.func_reg: bool + self.fwhm: list[int] + self.info: dict = {} + self.logdir: Optional[str] + self.name: list[str] | str + self.num_ants_cores: int + self.num_cpus = int + self.part_id: str + self.pipe_list: list + self.ses_id: str + self.smoothing_bool: bool + self.smooth_opts: list[str] + self.regressors: dict | list + self.rpool: dict + self.run_smoothing: bool + self.run_zscoring: bool + self.unique_id: str + self.zscoring_bool: bool + self.wf: pe.Workflow + + def __repr__(self) -> str: + """Return reproducible `_Pool` string.""" + params = [ + f"{param}={getattr(self, param)}" + for param in ["rpool", "name", "cfg", "pipe_list"] + if getattr(self, param, None) + ] + return f'{self.__class__.__name__}({", ".join(params)})' + + def __str__(self) -> str: + """Return string representation of a `_Pool`.""" + if self.name: + return f"{self.__class__.__name__}({self.name}): {list(self.rpool)}" + return f"{self.__class__.__name__}: {list(self.rpool)}" + + @staticmethod + def generate_prov_string(prov: LIST_OF_LIST_OF_STR | tuple) -> tuple[str, str]: + """Generate a string from a SINGLE RESOURCE'S dictionary of MULTIPLE PRECEDING RESOURCES (or single, if just one). + + NOTE: this DOES NOT merge multiple resources!!! (i.e. for merging-strat pipe_idx generation). + """ + if not isinstance(prov, list): + msg = ( + "\n[!] Developer info: the CpacProvenance " + f"entry for {prov} has to be a list.\n" + ) + raise TypeError(msg) + last_entry = get_last_prov_entry(prov) + resource = last_entry.split(":")[0] + return (resource, str(prov)) + + def check_rpool(self, resource: list[str] | str) -> bool: + """Check if a `resource` is present in the `_Pool`.""" + if not isinstance(resource, list): + resource = [resource] + for name in resource: + if name in self.rpool: + return True + return False + + def keys(self) -> KeysView: + """Return `rpool`'s keys.""" + return self.rpool.keys() + + def __contains__(self, key) -> bool: + """Return ``True`` if key in `_Pool`, ``False`` otherwise.""" + return key in self.keys() + + @staticmethod + def get_resource_from_prov(prov: LIST_OF_LIST_OF_STR) -> Optional[str]: + """Return the last item in the provenance list. + + Each resource (i.e. "desc-cleaned_bold" AKA nuisance-regressed BOLD + data) has its own provenance list. the name of the resource, and + the node that produced it, is always the last item in the provenance + list, with the two separated by a colon (`:`) + """ + if not len(prov): + return None + if isinstance(prov[-1], list): + last_item_in_list = prov[-1][-1] + assert isinstance(last_item_in_list, str) + return last_item_in_list.split(":")[0] + if isinstance(prov[-1], str): + return prov[-1].split(":")[0] + return None + + def set_data( + self, + resource: str, + node: pe.Node | pe.Workflow, + output: str, + json_info: dict[str | tuple, Any], + pipe_idx: PIPE_IDX, + node_name: str, + fork: bool = False, + inject: bool = False, + ) -> None: + """Plug a :py:class:`Resource` into a `_Pool`.""" + json_info = json_info.copy() + cpac_prov: LIST_OF_LIST_OF_STR = [] + if "CpacProvenance" in json_info: + cpac_prov = json_info["CpacProvenance"] + current_prov_list = list(cpac_prov) + new_prov_list = list(cpac_prov) # <---- making a copy, it was already a list + if not inject: + new_prov_list.append(f"{resource}:{node_name}") + try: + _resource, new_pipe_idx = self.generate_prov_string(new_prov_list) + except IndexError: + msg = ( + f"\n\nThe set_data() call for {resource} has no " + "provenance information and should not be an " + "injection." + ) + raise IndexError(msg) + if not json_info: + json_info = { + "RawSources": [ + resource # <---- this will be repopulated to the full file path at the end of the pipeline building, in gather_pipes() + ] + } + json_info["CpacProvenance"] = new_prov_list + + if resource not in self.keys(): + self.rpool[resource] = {} + elif not fork: # <--- in the event of multiple strategies/options, this will run for every option; just keep in mind + search = False + if self.get_resource_from_prov(current_prov_list) == resource: + # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION + pipe_idx = self.generate_prov_string(current_prov_list)[1] + if pipe_idx not in self.rpool[resource].keys(): + search = True + else: + search = True + if search: + for idx in current_prov_list: + if self.get_resource_from_prov(idx) == resource: + if isinstance(idx, list): + # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION + pipe_idx = self.generate_prov_string(idx)[1] + elif isinstance(idx, str): + pipe_idx = idx + break + if pipe_idx in self.rpool[resource].keys(): + # in case the resource name is now new, and not the original + # remove old keys so we don't end up with a new strat for every new node unit (unless we fork) + del self.rpool[resource][pipe_idx] + if new_pipe_idx not in self.rpool[resource]: + self.rpool[resource][new_pipe_idx] = Resource( + data=ResourceData(node, output), json=json_info + ) + if new_pipe_idx not in self.pipe_list: + self.pipe_list.append(new_pipe_idx) + + def get( + self, + resource: LIST_OF_LIST_OF_STR | str | list[str], + pipe_idx: Optional[PIPE_IDX], + report_fetched: bool, + optional: bool, + ) -> ( + Optional[Resource | STRAT_DICT | dict] + | tuple[Optional[Resource | STRAT_DICT], Optional[str]] + ): + """Return a dictionary of strats or a single :py:class:`Resource` .""" + if not isinstance(resource, list): + resource = [resource] + # if a list of potential inputs are given, pick the first one found + for label in resource: + if label in self.keys(): + _found = self.rpool[label] + if pipe_idx: + _found = _found[pipe_idx] + if report_fetched: + return _found, label + return _found + if optional: + if report_fetched: + return (None, None) + return None + msg = ( + "\n\n[!] C-PAC says: None of the listed resources are in " + f"the resource pool:\n\n {resource}\n\nOptions:\n- You " + "can enable a node block earlier in the pipeline which " + "produces these resources. Check the 'outputs:' field in " + "a node block's documentation.\n- You can directly " + "provide this required data by pulling it from another " + "BIDS directory using 'source_outputs_dir:' in the " + "pipeline configuration, or by placing it directly in " + "your C-PAC output directory.\n- If you have done these, " + "and you still get this message, please let us know " + "through any of our support channels at: " + "https://fcp-indi.github.io/\n" + ) + raise LookupError(msg) + + +class ResourcePool(_Pool): + """A pool of :py:class:`Resource` s.""" + + def __init__( + self, + name: str = "", + cfg: Optional[Configuration] = None, + pipe_list: Optional[list] = None, + *, + data_paths: Optional[DataPaths | dict | SUB_GROUP] = None, + part_id: Optional[str] = None, + pipeline_name: str = "", + wf: Optional[pe.Workflow] = None, + ) -> None: + """Initialize a `ResourcePool`.""" + self.name = name + super().__init__() + if isinstance(data_paths, dict): + data_paths = DataPaths(data_paths=data_paths) + elif data_paths is None: + data_paths = DataPaths(part_id=part_id) + self.data_paths = data_paths + + if cfg: + self.cfg = cfg + else: + self.cfg = Preconfiguration("blank") + + # pass-through for convenient access + self.creds_path: Optional[str] + self.part_id: str + self.ses_id: str + self.unique_id: str + if isinstance(self.data_paths, DataPaths): + self.creds_path = self.data_paths.creds_path + self.part_id = self.data_paths.part_id + self.ses_id = self.data_paths.ses_id + else: + self.creds_path = self.cfg[ + "pipeline_setup", "Amazon-AWS", "aws_output_bucket_credentials" + ] + self.part_id = self.data_paths[0][0] + self.ses_id = self.data_paths[0][1] + self.unique_id: str = f"{self.part_id}_{self.ses_id}" + self.rpool: POOL_DICT = {} + if not isinstance(self.data_paths, DataPaths): + self.build_rpool() + + if not pipe_list: + self.pipe_list = [] + else: + self.pipe_list = pipe_list + + self.logdir = self._config_lookup(["pipeline_setup", "log_directory", "path"]) + self.num_cpus = self._config_lookup( + ["pipeline_setup", "system_config", "max_cores_per_participant"] + ) + self.num_ants_cores = self._config_lookup( + ["pipeline_setup", "system_config", "num_ants_threads"] + ) + + self.ants_interp = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "ANTs_pipelines", + "interpolation", + ] + ) + self.fsl_interp = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "FNIRT_pipelines", + "interpolation", + ] + ) + self.func_reg = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "run", + ] + ) + + self.run_smoothing = "smoothed" in self._config_lookup( + ["post_processing", "spatial_smoothing", "output"], list + ) + self.smoothing_bool = self._config_lookup( + ["post_processing", "spatial_smoothing", "run"] + ) + self.run_zscoring = "z-scored" in self._config_lookup( + ["post_processing", "z-scoring", "output"], list + ) + self.zscoring_bool = self._config_lookup( + ["post_processing", "z-scoring", "run"] + ) + self.fwhm = self._config_lookup( + ["post_processing", "spatial_smoothing", "fwhm"] + ) + self.smooth_opts = self._config_lookup( + ["post_processing", "spatial_smoothing", "smoothing_method"] + ) + + if wf: + self.wf = wf + else: + self.initialize_nipype_wf(pipeline_name) + + self.xfm = [ + "alff", + "desc-sm_alff", + "desc-zstd_alff", + "desc-sm-zstd_alff", + "falff", + "desc-sm_falff", + "desc-zstd_falff", + "desc-sm-zstd_falff", + "reho", + "desc-sm_reho", + "desc-zstd_reho", + "desc-sm-zstd_reho", + ] + ingress_derivatives = False + try: + if self.data_paths.derivatives_dir and self._config_lookup( + ["pipeline_setup", "outdir_ingress", "run"], bool + ): + ingress_derivatives = True + except (AttributeError, KeyError, TypeError): + pass + if ingress_derivatives: + self.ingress_output_dir() + else: + self.ingress_raw_anat_data() + # if getattr(data_paths, 'func', False) or : # TODO: include conditional in ingress_raw_func_data + self.ingress_raw_func_data() + self.ingress_pipeconfig_paths() + + def back_propogate_template_name( + self, resource_idx: str, json_info: dict, id_string: pe.Node + ) -> None: + """Find and apply the template name from a :py:class:`Resource` 's provenance.""" + if "template" in resource_idx and self.check_rpool("derivatives-dir"): + if self.check_rpool("template"): + node, out = self.get_data("template") + self.wf.connect(node, out, id_string, "template_desc") + elif "Template" in json_info: + id_string.inputs.template_desc = json_info["Template"] + elif ( + "template" in resource_idx and len(json_info.get("CpacProvenance", [])) > 1 + ): + for resource in source_set(json_info["CpacProvenance"]): + source, value = resource.split(":", 1) + if value.startswith("template_") and source != "FSL-AFNI-bold-ref": + # 'FSL-AFNI-bold-ref' is currently allowed to be in + # a different space, so don't use it as the space for + # descendents + try: + ancestors = self.rpool.get(source) + assert ancestors is not None + anscestor_json = next(iter(ancestors.items()))[1].json + if "Description" in anscestor_json: + id_string.inputs.template_desc = anscestor_json[ + "Description" + ] + return + except (IndexError, KeyError): + pass + return + + def build_rpool(self, default_CpacProvenance="ingress"): + assert isinstance(self.data_paths, tuple) + # count = 1 + for index, row in self.data_paths[1].iterrows(): + # Check if 'json' is not None and contains 'CpacProvenance' + if row.get("json") and row["json"].get("CpacProvenance"): + CpacProvenance = row["json"]["CpacProvenance"] + else: + CpacProvenance = default_CpacProvenance + resource_io = ResourceIO(row, CpacProvenance) + # making the rpool a list so that the duplicates are appended rather than overwritten + _default_list: list["ResourceIO"] = [] + self.rpool.setdefault(resource_io.suffix, _default_list) + _new_resource = cast(list["ResourceIO"], self.rpool[resource_io.suffix]) + _new_resource.append(resource_io) + # count += 1 + # if count >10: + # break + + def write_to_disk(self, path): + for resources in self.rpool.values(): + for item in resources: + print(item["resource"].write_to_disk(path)) + + def get_resource(self, description): + matching_resources = [] + for resources in self.rpool.get(description["suffix"], []): + # Initialize a flag to True, assuming the resource matches until proven otherwise + is_match = True + for key, val in description.items(): + # Skip the 'suffix' key as it's used to select the pool, not to match resources + if key == "suffix": + continue + # Check if the resource matches the description criteria + # Use getattr for object attributes or resources.get for dictionary keys + resource_val = getattr(resources, key, None) + if resource_val.lower() != val.lower(): + is_match = False + break # Break out of the inner loop if any criteria does not match + if is_match: + # If the resource matches all criteria, append its name to the matching_resources list + matching_resources.append(resources.name) + for items in matching_resources: + print(items) + return matching_resources + + def set_resource(self, name, value): + self.rpool[name] = value + + def gather_pipes( # noqa: PLR0915 + self, + wf: pe.Workflow, + cfg: Configuration, + all_types: bool = False, + add_excl: Optional[list[str]] = None, + ) -> None: + """Gather pipes including naming, postproc, and expected outputs.""" + excl: list[str] = [] + # substring_excl: list[str] = [] + outputs_logger = getLogger(f"{self.part_id}_expectedOutputs") + expected_outputs = ExpectedOutputs() + + if add_excl: + excl += add_excl + + if "nonsmoothed" not in cfg.post_processing["spatial_smoothing"]["output"]: # type: ignore [attr-defined] + excl += Outputs.native_nonsmooth + excl += Outputs.template_nonsmooth + + if "raw" not in cfg.post_processing["z-scoring"]["output"]: # type: ignore [attr-defined] + excl += Outputs.native_raw + excl += Outputs.template_raw + + if not cfg.pipeline_setup["output_directory"]["write_debugging_outputs"]: # type: ignore [attr-defined] + # substring_excl.append(['bold']) + excl += Outputs.debugging + + for resource in self.keys(): + if resource in excl or resource not in Outputs.any: + continue + + # drop = False + # for substring_list in substring_excl: + # bool_list = [] + # for substring in substring_list: + # if substring in resource: + # bool_list.append(True) + # else: + # bool_list.append(False) + # for item in bool_list: + # if not item: + # break + # else: + # drop = True + # if drop: + # break + # if drop: + # continue + + subdir = "other" + if resource in Outputs.anat: + subdir = "anat" + # TODO: get acq- etc. + elif resource in Outputs.func: + subdir = "func" + # TODO: other stuff like acq- etc. + + for pipe_idx in self.rpool[resource]: + unique_id = self.unique_id + part_id = self.part_id + ses_id = self.ses_id + + if "ses-" not in ses_id: + ses_id = f"ses-{ses_id}" + + out_dir = cfg.pipeline_setup["output_directory"]["path"] # type: ignore [attr-defined] + pipe_name = cfg.pipeline_setup["pipeline_name"] # type: ignore [attr-defined] + container = os.path.join(f"pipeline_{pipe_name}", part_id, ses_id) + filename = f"{unique_id}_{res_in_filename(self.cfg, resource)}" + + out_path = os.path.join(out_dir, container, subdir, filename) + + out_dct = { + "unique_id": unique_id, + "out_dir": out_dir, + "container": container, + "subdir": subdir, + "filename": filename, + "out_path": out_path, + } + self.rpool[resource][pipe_idx]["out"] = out_dct + + # TODO: have to link the pipe_idx's here. and call up 'desc-preproc_T1w' from a Sources in a json and replace. here. + # TODO: can do the pipeline_description.json variants here too! + + num_variant: Optional[int | str] = 0 + if len(self.rpool[resource]) == 1: + num_variant = "" + unlabelled = self._get_unlabelled(resource) + for pipe_idx in self.rpool[resource]: + pipe_x = self._get_pipe_number(pipe_idx) + json_info = self.rpool[resource][pipe_idx]["json"] + out_dct = self.rpool[resource][pipe_idx]["out"] + + try: + if unlabelled: + assert isinstance(num_variant, int) + num_variant += 1 + except TypeError: + pass + + try: + del json_info["subjson"] + except KeyError: + pass + + if out_dct["subdir"] == "other" and not all_types: + continue + + unique_id = out_dct["unique_id"] + resource_idx = resource + + if isinstance(num_variant, int): + resource_idx, out_dct = name_fork( + resource_idx, cfg, json_info, out_dct + ) + if unlabelled: + if "desc-" in out_dct["filename"]: + for key in out_dct["filename"].split("_")[::-1]: + # final `desc` entity + if key.startswith("desc-"): + out_dct["filename"] = out_dct["filename"].replace( + key, f"{key}-{num_variant}" + ) + resource_idx = resource_idx.replace( + key, f"{key}-{num_variant}" + ) + break + else: + suff = resource.split("_")[-1] + newdesc_suff = f"desc-{num_variant}_{suff}" + resource_idx = resource_idx.replace(suff, newdesc_suff) + id_string = pe.Node( + Function( + input_names=[ + "cfg", + "unique_id", + "resource", + "scan_id", + "template_desc", + "atlas_id", + "fwhm", + "subdir", + "extension", + ], + output_names=["out_filename"], + function=create_id_string, + ), + name=f"id_string_{resource_idx}_{pipe_x}", + ) + id_string.inputs.cfg = self.cfg + id_string.inputs.unique_id = unique_id + id_string.inputs.resource = resource_idx + id_string.inputs.subdir = out_dct["subdir"] + + # grab the iterable scan ID + if out_dct["subdir"] == "func": + node, out = self.rpool["scan"]["['scan:func_ingress']"]["data"] + wf.connect(node, out, id_string, "scan_id") + + self.back_propogate_template_name(resource_idx, json_info, id_string) + # grab the FWHM if smoothed + for tag in resource.split("_"): + if "desc-" in tag and "-sm" in tag: + fwhm_idx = str(pipe_idx).replace(f"{resource}:", "fwhm:") + try: + node, out = self.rpool["fwhm"][fwhm_idx]["data"] + wf.connect(node, out, id_string, "fwhm") + except KeyError: + # smoothing was not done for this resource in the + # engine.py smoothing + pass + break + atlas_suffixes = ["timeseries", "correlations", "statmap"] + # grab the iterable atlas ID + atlas_id = None + if not resource.endswith("desc-confounds_timeseries"): + if resource.split("_")[-1] in atlas_suffixes: + atlas_idx = str(pipe_idx).replace(resource, "atlas_name") + # need the single quote and the colon inside the double + # quotes - it's the encoded pipe_idx + # atlas_idx = new_idx.replace(f"'{temp_rsc}:", + # "'atlas_name:") + if atlas_idx in self.rpool["atlas_name"]: + node, out = self.rpool["atlas_name"][atlas_idx]["data"] + wf.connect(node, out, id_string, "atlas_id") + elif "atlas-" in resource: + for tag in resource.split("_"): + if "atlas-" in tag: + atlas_id = tag.replace("atlas-", "") + id_string.inputs.atlas_id = atlas_id + else: + WFLOGGER.warning( + "\n[!] No atlas ID found for %s.\n", out_dct["filename"] + ) + nii_name = pe.Node(Rename(), name=f"nii_{resource_idx}_{pipe_x}") + nii_name.inputs.keep_ext = True + + if resource in Outputs.ciftis: + nii_name.inputs.keep_ext = False + id_string.inputs.extension = Outputs.ciftis[resource] + else: + nii_name.inputs.keep_ext = True + + if resource in Outputs.giftis: + nii_name.inputs.keep_ext = False + id_string.inputs.extension = f"{Outputs.giftis[resource]}.gii" + + else: + nii_name.inputs.keep_ext = True + + wf.connect(id_string, "out_filename", nii_name, "format_string") + + node, out = self.rpool[resource][pipe_idx]["data"] + try: + wf.connect(node, out, nii_name, "in_file") + except OSError as os_error: + WFLOGGER.warning(os_error) + continue + + write_json = pe.Node( + Function( + input_names=["json_data", "filename"], + output_names=["json_file"], + function=write_output_json, + ), + name=f"json_{resource_idx}_{pipe_x}", + ) + write_json.inputs.json_data = json_info + + wf.connect(id_string, "out_filename", write_json, "filename") + ds = pe.Node(DataSink(), name=f"sinker_{resource_idx}_{pipe_x}") + ds.inputs.parameterization = False + ds.inputs.base_directory = out_dct["out_dir"] + ds.inputs.encrypt_bucket_keys = cfg.pipeline_setup["Amazon-AWS"][ # type: ignore[attr-defined] + "s3_encryption" + ] + ds.inputs.container = out_dct["container"] + + if cfg.pipeline_setup["Amazon-AWS"]["aws_output_bucket_credentials"]: # type: ignore[attr-defined] + ds.inputs.creds_path = cfg.pipeline_setup["Amazon-AWS"][ # type: ignore[attr-defined] + "aws_output_bucket_credentials" + ] + expected_outputs += ( + out_dct["subdir"], + create_id_string( + self.cfg, + unique_id, + resource_idx, + template_desc=id_string.inputs.template_desc, + atlas_id=atlas_id, + subdir=out_dct["subdir"], + ), + ) + wf.connect(nii_name, "out_file", ds, f'{out_dct["subdir"]}.@data') + wf.connect(write_json, "json_file", ds, f'{out_dct["subdir"]}.@json') + outputs_logger.info(expected_outputs) + + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: None = None, + report_fetched: Literal[False] = False, + *, + optional: Literal[True], + ) -> Optional[STRAT_DICT]: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: PIPE_IDX, + report_fetched: Literal[False] = False, + *, + optional: Literal[True], + ) -> Optional[Resource]: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: None = None, + *, + report_fetched: Literal[True], + optional: Literal[True], + ) -> tuple[Optional[STRAT_DICT], Optional[str]]: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: PIPE_IDX, + report_fetched: Literal[True], + optional: Literal[True], + ) -> tuple[Optional[Resource], Optional[str]]: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: None = None, + report_fetched: Literal[False] = False, + optional: Literal[False] = False, + ) -> STRAT_DICT: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: PIPE_IDX, + report_fetched: Literal[False] = False, + optional: Literal[False] = False, + ) -> Resource: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: None = None, + *, + report_fetched: Literal[True], + optional: bool = False, + ) -> tuple[Optional[STRAT_DICT], Optional[str]]: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: PIPE_IDX, + report_fetched: Literal[True], + optional: Literal[False] = False, + ) -> tuple[Resource, str]: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: bool = False, + optional: bool = False, + ) -> ( + Optional[Resource | STRAT_DICT] + | tuple[Optional[Resource | STRAT_DICT], Optional[str]] + ): ... + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: bool = False, + optional: bool = False, + ): + """Return a dictionary of strats. + + Inside those are dictionaries like ``{'data': (node, out), 'json': info}``. + """ + return super().get(resource, pipe_idx, report_fetched, optional) + + @overload + def get_data( + self, + resource: list[str] | str, + pipe_idx: None = None, + report_fetched: bool = False, + quick_single: bool = False, + ) -> ResourceData: ... + @overload + def get_data( + self, + resource: list[str] | str, + pipe_idx: PIPE_IDX, + report_fetched: Literal[True], + quick_single: Literal[False] = False, + ) -> tuple[ResourceData, str]: ... + @overload + def get_data( + self, + resource: list[str] | str, + pipe_idx: PIPE_IDX, + report_fetched: Literal[False] = False, + quick_single: bool = False, + ) -> ResourceData: ... + @overload + def get_data( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX], + report_fetched: bool, + quick_single: Literal[True], + ) -> ResourceData: ... + def get_data( + self, + resource, + pipe_idx=None, + report_fetched=False, + quick_single=False, + ): + """Get :py:class:`ResourceData` from `ResourcePool`.""" + _resource = self.get(resource, pipe_idx=pipe_idx, report_fetched=report_fetched) + if report_fetched: + if pipe_idx: + connect, fetched = _resource + assert isinstance(connect, Resource) and isinstance(fetched, str) + return connect.data, fetched + if quick_single or len(resource) == 1: + assert isinstance(_resource, dict) + for value in _resource.values(): + return value.data + assert isinstance(_resource, Resource) + return _resource.data + + def get_json(self, resource: str, strat: str | tuple) -> dict: + """Get JSON metadata from a :py:class:`Resource` in a strategy.""" + return self.get(resource, pipe_idx=strat).json + + def get_json_info(self, resource: str, key: str) -> Any: + """Get a metadata value from a matching from any strategy.""" + # TODO: key checks + for val in self.rpool[resource].values(): + if key in val.json: + return val.json[key] + msg = f"{key} not found in any strategy for {resource} in {self}." + raise KeyError(msg) + + @staticmethod + def get_raw_label(resource: str) -> str: + """Remove ``desc-*`` label.""" + for tag in resource.split("_"): + if "desc-" in tag: + resource = resource.replace(f"{tag}_", "") + break + return resource + + def get_strats( # noqa: PLR0912,PLR0915 + self, resources: NODEBLOCK_INPUTS, debug: bool = False + ) -> dict[str | tuple, "StratPool"]: + """Get a dictionary of :py:class:`StratPool` s.""" + # TODO: NOTE: NOT COMPATIBLE WITH SUB-RPOOL/STRAT_POOLS + # TODO: (and it doesn't have to be) + import itertools + + linked_resources = [] + resource_list: list[str | list[str]] = [] + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("\nresources: %s", resources) + for resource in resources: + # grab the linked-input tuples + if isinstance(resource, tuple): + linked: list[str] = [] + for label in list(resource): + rp_dct, fetched_resource = self.get( + label, report_fetched=True, optional=True + ) + if not rp_dct: + continue + assert fetched_resource is not None + linked.append(fetched_resource) + resource_list += linked + if len(linked) < 2: # noqa: PLR2004 + continue + linked_resources.append(linked) + else: + resource_list.append(resource) + + total_pool = [] + variant_pool: dict = {} + len_inputs = len(resource_list) + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("linked_resources: %s", linked_resources) + verbose_logger.debug("resource_list: %s", resource_list) + for resource in resource_list: + ( + rp_dct, # <---- rp_dct has the strats/pipe_idxs as the keys on first level, then 'data' and 'json' on each strat level underneath + fetched_resource, + ) = self.get( + resource, + report_fetched=True, + optional=True, # oh, and we make the resource fetching in get_strats optional so we can have optional inputs, but they won't be optional in the node block unless we want them to be + ) + if not rp_dct: + len_inputs -= 1 + continue + assert isinstance(rp_dct, dict) and fetched_resource is not None + sub_pool = [] + if debug: + verbose_logger.debug("len(rp_dct): %s\n", len(rp_dct)) + for strat in rp_dct.keys(): + json_info = self.get_json(fetched_resource, strat) + cpac_prov = json_info["CpacProvenance"] + sub_pool.append(cpac_prov) + if fetched_resource not in variant_pool: + variant_pool[fetched_resource] = [] + if "CpacVariant" in json_info: + for key, val in json_info["CpacVariant"].items(): + if val not in variant_pool[fetched_resource]: + variant_pool[fetched_resource] += val + variant_pool[fetched_resource].append(f"NO-{val[0]}") + + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("%s sub_pool: %s\n", resource, sub_pool) + total_pool.append(sub_pool) + + if not total_pool: + raise LookupError( + "\n\n[!] C-PAC says: None of the listed " + "resources in the node block being connected " + "exist in the resource pool.\n\nResources:\n" + "%s\n\n" % resource_list + ) + + # TODO: right now total_pool is: + # TODO: [[[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment], [T1w:anat_ingress,desc-preproc_T1w:anatomical_init]], + # TODO: [[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment, desc-brain_mask:brain_mask_afni], [T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-brain_mask:brain_mask_afni]]] + + # TODO: and the code below thinks total_pool is a list of lists, like [[pipe_idx, pipe_idx], [pipe_idx, pipe_idx, pipe_idx], etc.] + # TODO: and the actual resource is encoded in the tag: of the last item, every time! + # keying the strategies to the resources, inverting it + if len_inputs > 1: + strats = itertools.product(*total_pool) + + # we now currently have "strats", the combined permutations of all the strategies, as a list of tuples, each tuple combining one version of input each, being one of the permutations. + # OF ALL THE DIFFERENT INPUTS. and they are tagged by their fetched inputs with {name}:{strat}. + # so, each tuple has ONE STRAT FOR EACH INPUT, so if there are three inputs, each tuple will have 3 items. + new_strats: dict[str | tuple, StratPool] = {} + + # get rid of duplicates - TODO: refactor .product + strat_str_list = [] + strat_list_list = [] + for strat_tuple in strats: + strat_list = list(deepcopy(strat_tuple)) + strat_str = str(strat_list) + if strat_str not in strat_str_list: + strat_str_list.append(strat_str) + strat_list_list.append(strat_list) + + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("len(strat_list_list): %s\n", len(strat_list_list)) + for strat_list in strat_list_list: + json_dct = {} + for strat in strat_list: + # strat is a prov list for a single resource/input + prov_resource, strat_idx = self.generate_prov_string(strat) + strat_json = self.get_json(prov_resource, strat=strat_idx) + json_dct[prov_resource] = strat_json + + drop = False + if linked_resources: + for linked in linked_resources: # <--- 'linked' is each tuple + if drop: + break + for xlabel in linked: + if drop or xlabel is None: + break + xjson = json.loads(json.dumps(json_dct[xlabel])) + for ylabel in linked: + if xlabel == ylabel or ylabel is None: + continue + yjson = json.loads(json.dumps(json_dct[ylabel])) + + if "CpacVariant" not in xjson: + xjson["CpacVariant"] = {} + if "CpacVariant" not in yjson: + yjson["CpacVariant"] = {} + + current_strat = [] + for val in xjson["CpacVariant"].values(): + if isinstance(val, list): + current_strat.append(val[0]) + else: + current_strat.append(val) + current_spread = list(set(variant_pool[xlabel])) + for spread_label in current_spread: + if "NO-" in spread_label: + continue + if spread_label not in current_strat: + current_strat.append(f"NO-{spread_label}") + + other_strat = [] + for val in yjson["CpacVariant"].values(): + if isinstance(val, list): + other_strat.append(val[0]) + else: + other_strat.append(val) + other_spread = list(set(variant_pool[ylabel])) + for spread_label in other_spread: + if "NO-" in spread_label: + continue + if spread_label not in other_strat: + other_strat.append(f"NO-{spread_label}") + + for variant in current_spread: + in_current_strat = False + in_other_strat = False + in_other_spread = False + + if variant is None: + in_current_strat = True + if None in other_spread: + in_other_strat = True + if variant in current_strat: + in_current_strat = True + if variant in other_strat: + in_other_strat = True + if variant in other_spread: + in_other_spread = True + + if not in_other_strat: + if in_other_spread: + if in_current_strat: + drop = True + break + + if in_other_strat: + if in_other_spread: + if not in_current_strat: + drop = True + break + if drop: + break + if drop: + continue + + # make the merged strat label from the multiple inputs + # strat_list is actually the merged CpacProvenance lists + pipe_idx = str(strat_list) + new_strats[pipe_idx] = StratPool(name=pipe_idx, cfg=self.cfg) + # new_strats is A DICTIONARY OF StratPool OBJECTS! + new_strats[pipe_idx].json = {"CpacProvenance": strat_list} + + # now just invert resource:strat to strat:resource for each resource:strat + for cpac_prov in strat_list: + resource, strat = self.generate_prov_string(cpac_prov) + strat_resource = self.rpool[resource][strat] + # remember, `strat_resource` is a Resource. + new_strats[pipe_idx].rpool[resource] = strat_resource + # `new_strats` is A DICTIONARY OF RESOURCEPOOL OBJECTS! each one is a new slice of the resource pool combined together. + self.pipe_list.append(pipe_idx) + if "CpacVariant" in strat_resource["json"]: + if "CpacVariant" not in new_strats[pipe_idx]._json: + new_strats[pipe_idx]._json["CpacVariant"] = {} + for younger_resource, variant_list in ( + new_strats[pipe_idx]._json["CpacVariant"].items() + ): + if ( + younger_resource + not in new_strats[pipe_idx]._json["CpacVariant"] + ): + new_strats[pipe_idx]._json["CpacVariant"][ + younger_resource + ] = variant_list + # preserve each input's JSON info also + new_strats[pipe_idx].preserve_json_info(resource, strat_resource) + else: + new_strats = {} + for resource_strat_list in total_pool: + # total_pool will have only one list of strats, for the one input + for cpac_prov in resource_strat_list: # <------- cpac_prov here doesn't need to be modified, because it's not merging with other inputs + resource, pipe_idx = self.generate_prov_string(cpac_prov) + strat_resource = self.rpool[resource][pipe_idx] + # remember, `strat_resource` is a Resource. + new_strats[pipe_idx] = StratPool( + rpool={resource: strat_resource}, name=pipe_idx, cfg=self.cfg + ) # <----- again, new_strats is A DICTIONARY OF StratPool OBJECTS! + new_strats[pipe_idx].json = strat_resource.json + new_strats[pipe_idx].json["subjson"] = {} + new_strats[pipe_idx].json["CpacProvenance"] = cpac_prov + # preserve each input's JSON info also + new_strats[pipe_idx].preserve_json_info(resource, strat_resource) + return new_strats + + def initialize_nipype_wf(self, name: str = "") -> None: + """Initialize a new nipype :py:class:`~nipype.pipeline.engine.Workflow` .""" + if name: + name = f"_{name}" + workflow_name = f"cpac{name}_{self.unique_id}" + self.wf = pe.Workflow(name=workflow_name) + self.wf.base_dir = self.cfg.pipeline_setup["working_directory"]["path"] # type: ignore[attr-defined] + self.wf.config["execution"] = { + "hash_method": "timestamp", + "crashdump_dir": os.path.abspath( + self.cfg.pipeline_setup["log_directory"]["path"] # type: ignore[attr-defined] + ), + } + + def ingress_freesurfer(self) -> None: + """Ingress FreeSurfer data.""" + try: + fs_path = os.path.join( + self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined] + self.part_id, + ) + except KeyError: + WFLOGGER.warning("No FreeSurfer data present.") + return + + # fs_path = os.path.join(cfg.pipeline_setup['freesurfer_dir'], part_id) + if not os.path.exists(fs_path): + if "sub" in self.part_id: + fs_path = os.path.join( + self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined] + self.part_id.replace("sub-", ""), + ) + else: + fs_path = os.path.join( + self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined] + ("sub-" + self.part_id), + ) + + # patch for flo-specific data + if not os.path.exists(fs_path): + subj_ses = f"{self.part_id}-{self.ses_id}" + fs_path = os.path.join( + self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined] + subj_ses, + ) + if not os.path.exists(fs_path): + WFLOGGER.info( + "No FreeSurfer data found for subject %s", self.part_id + ) + return + + # Check for double nested subj names + if os.path.exists(os.path.join(fs_path, os.path.basename(fs_path))): + fs_path = os.path.join(fs_path, self.part_id) + + fs_ingress = create_general_datasource("gather_freesurfer_dir") + fs_ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=fs_path, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + ) + self.set_data( + "freesurfer-subject-dir", + fs_ingress, + "outputspec.data", + {}, + "", + "freesurfer_config_ingress", + ) + + recon_outs = { + "pipeline-fs_raw-average": "mri/rawavg.mgz", + "pipeline-fs_subcortical-seg": "mri/aseg.mgz", + "pipeline-fs_brainmask": "mri/brainmask.mgz", + "pipeline-fs_wmparc": "mri/wmparc.mgz", + "pipeline-fs_T1": "mri/T1.mgz", + "pipeline-fs_hemi-L_desc-surface_curv": "surf/lh.curv", + "pipeline-fs_hemi-R_desc-surface_curv": "surf/rh.curv", + "pipeline-fs_hemi-L_desc-surfaceMesh_pial": "surf/lh.pial", + "pipeline-fs_hemi-R_desc-surfaceMesh_pial": "surf/rh.pial", + "pipeline-fs_hemi-L_desc-surfaceMesh_smoothwm": "surf/lh.smoothwm", + "pipeline-fs_hemi-R_desc-surfaceMesh_smoothwm": "surf/rh.smoothwm", + "pipeline-fs_hemi-L_desc-surfaceMesh_sphere": "surf/lh.sphere", + "pipeline-fs_hemi-R_desc-surfaceMesh_sphere": "surf/rh.sphere", + "pipeline-fs_hemi-L_desc-surfaceMap_sulc": "surf/lh.sulc", + "pipeline-fs_hemi-R_desc-surfaceMap_sulc": "surf/rh.sulc", + "pipeline-fs_hemi-L_desc-surfaceMap_thickness": "surf/lh.thickness", + "pipeline-fs_hemi-R_desc-surfaceMap_thickness": "surf/rh.thickness", + "pipeline-fs_hemi-L_desc-surfaceMap_volume": "surf/lh.volume", + "pipeline-fs_hemi-R_desc-surfaceMap_volume": "surf/rh.volume", + "pipeline-fs_hemi-L_desc-surfaceMesh_white": "surf/lh.white", + "pipeline-fs_hemi-R_desc-surfaceMesh_white": "surf/rh.white", + "pipeline-fs_xfm": "mri/transforms/talairach.lta", + } + + for key, outfile in recon_outs.items(): + fullpath = os.path.join(fs_path, outfile) + if os.path.exists(fullpath): + fs_ingress = create_general_datasource(f"gather_fs_{key}_dir") + fs_ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=fullpath, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + ) + self.set_data( + key, fs_ingress, "outputspec.data", {}, "", f"fs_{key}_ingress" + ) + else: + WFLOGGER.warning("\n[!] Path does not exist for %s.\n", fullpath) + + return + + def ingress_output_dir(self) -> None: + """Ingress an output directory into a `ResourcePool`.""" + dir_path = self.data_paths.derivatives_dir + assert dir_path is not None + WFLOGGER.info("\nPulling outputs from %s.\n", dir_path) + + anat = os.path.join(dir_path, "anat") + func = os.path.join(dir_path, "func") + + outdir_anat: list[str] = [] + outdir_func: list[str] = [] + func_paths: dict = {} + func_dict: dict = {} + func_key = "" + + for subdir in [anat, func]: + if os.path.isdir(subdir): + for filename in os.listdir(subdir): + for ext in EXTS: + if ext in filename: + if subdir == anat: + outdir_anat.append(os.path.join(subdir, filename)) + else: + outdir_func.append(os.path.join(subdir, filename)) + + # Add derivatives directory to rpool + ingress = create_general_datasource("gather_derivatives_dir") + ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=dir_path, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + ) + self.set_data( + "derivatives-dir", + ingress, + "outputspec.data", + {}, + "", + "outdir_config_ingress", + ) + + for subdirs in [outdir_anat, outdir_func]: + for filepath in subdirs: + filename = str(filepath) + for ext in EXTS: + filename = filename.split("/")[-1].replace(ext, "") + + data_label = filename.split(self.unique_id)[1].lstrip("_") + + if len(filename) == len(data_label): + msg = ( + "\n\n[!] Possibly wrong participant or " + "session in this directory?\n\n" + f"Filepath: {filepath}\n\n" + ) + raise Exception(msg) + + bidstag = "" + for tag in data_label.split("_"): + for prefix in ["task-", "run-", "acq-", "rec"]: + if tag.startswith(prefix): + bidstag += f"{tag}_" + data_label = data_label.replace(f"{tag}_", "") + data_label, json = strip_template(data_label) + + json_info, pipe_idx, node_name, data_label = self.json_outdir_ingress( + filepath, data_label, json + ) + + if ( + "template" in data_label + and not json_info["Template"] + == self.cfg.pipeline_setup["outdir_ingress"]["Template"] # type: ignore[attr-defined] + ): + continue + # Rename confounds to avoid confusion in nuisance regression + if data_label.endswith("desc-confounds_timeseries"): + data_label = "pipeline-ingress_desc-confounds_timeseries" + + if len(bidstag) > 1: + # Remove tail symbol + bidstag = bidstag[:-1] + if bidstag.startswith("task-"): + bidstag = bidstag.replace("task-", "") + + # Rename bold mask for CPAC naming convention + # and to avoid collision with anat brain mask + if data_label.endswith("desc-brain_mask") and filepath in outdir_func: + data_label = data_label.replace("brain_mask", "bold_mask") + + try: + pipe_x = self._get_pipe_number(pipe_idx) + except ValueError: + pipe_x = len(self.pipe_list) + if filepath in outdir_anat: + ingress = create_general_datasource( + f"gather_anat_outdir_{data_label!s}_{pipe_x}" + ) + ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=filepath, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + ) + self.set_data( + data_label, + ingress, + "outputspec.data", + json_info, + pipe_idx, + node_name=f"outdir_{data_label}_ingress", + inject=True, + ) + else: + if data_label.endswith("desc-preproc_bold"): + func_key = data_label + func_dict[bidstag] = {} + func_dict[bidstag]["scan"] = str(filepath) + func_dict[bidstag]["scan_parameters"] = json_info + func_dict[bidstag]["pipe_idx"] = pipe_idx + if data_label.endswith("desc-brain_mask"): + data_label = data_label.replace("brain_mask", "bold_mask") + try: + func_paths[data_label].append(filepath) + except (AttributeError, KeyError, TypeError): + func_paths[data_label] = [] + func_paths[data_label].append(filepath) + + if func_dict: + self.func_outdir_ingress( + func_dict, + func_key, + func_paths, + ) + + if self.cfg.surface_analysis["freesurfer"]["ingress_reconall"]: # type: ignore[attr-defined] + self.ingress_freesurfer() + + def ingress_func_metadata( + self, + num_strat=None, + ) -> tuple[bool, bool, list[str]]: + """Ingress metadata for functional scans.""" + name_suffix = "" + for suffix_part in (self.unique_id, num_strat): + if suffix_part is not None: + name_suffix += f"_{suffix_part}" + # Grab field maps + diff = False + blip = False + fmap_rp_list = [] + fmap_TE_list = [] + if self.data_paths.fmap: + second = False + for orig_key in self.data_paths.fmap: + gather_fmap = create_fmap_datasource( + self.data_paths.fmap, f"fmap_gather_{orig_key}_{self.part_id}" + ) + gather_fmap.inputs.inputnode.set( + subject=self.part_id, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + ) + gather_fmap.inputs.inputnode.scan = orig_key + + key = orig_key + if "epi" in key and not second: + key = "epi-1" + second = True + elif "epi" in key and second: + key = "epi-2" + + self.set_data( + key, gather_fmap, "outputspec.rest", {}, "", "fmap_ingress" + ) + self.set_data( + f"{key}-scan-params", + gather_fmap, + "outputspec.scan_params", + {}, + "", + "fmap_params_ingress", + ) + + fmap_rp_list.append(key) + + get_fmap_metadata_imports = ["import json"] + get_fmap_metadata = pe.Node( + Function( + input_names=["data_config_scan_params"], + output_names=[ + "dwell_time", + "pe_direction", + "total_readout", + "echo_time", + "echo_time_one", + "echo_time_two", + ], + function=get_fmap_phasediff_metadata, + imports=get_fmap_metadata_imports, + ), + name=f"{key}_get_metadata{name_suffix}", + ) + + self.wf.connect( + gather_fmap, + "outputspec.scan_params", + get_fmap_metadata, + "data_config_scan_params", + ) + + if "phase" in key: + # leave it open to all three options, in case there is a + # phasediff image with either a single EchoTime field (which + # usually matches one of the magnitude EchoTimes), OR + # a phasediff with an EchoTime1 and EchoTime2 + + # at least one of these rpool keys will have a None value, + # which will be sorted out in gather_echo_times below + self.set_data( + f"{key}-TE", + get_fmap_metadata, + "echo_time", + {}, + "", + "fmap_TE_ingress", + ) + fmap_TE_list.append(f"{key}-TE") + + self.set_data( + f"{key}-TE1", + get_fmap_metadata, + "echo_time_one", + {}, + "", + "fmap_TE1_ingress", + ) + fmap_TE_list.append(f"{key}-TE1") + + self.set_data( + f"{key}-TE2", + get_fmap_metadata, + "echo_time_two", + {}, + "", + "fmap_TE2_ingress", + ) + fmap_TE_list.append(f"{key}-TE2") + + elif "magnitude" in key: + self.set_data( + f"{key}-TE", + get_fmap_metadata, + "echo_time", + {}, + "", + "fmap_TE_ingress", + ) + fmap_TE_list.append(f"{key}-TE") + + self.set_data( + f"{key}-dwell", + get_fmap_metadata, + "dwell_time", + {}, + "", + "fmap_dwell_ingress", + ) + self.set_data( + f"{key}-pedir", + get_fmap_metadata, + "pe_direction", + {}, + "", + "fmap_pedir_ingress", + ) + self.set_data( + f"{key}-total-readout", + get_fmap_metadata, + "total_readout", + {}, + "", + "fmap_readout_ingress", + ) + + if "phase" in key or "mag" in key: + diff = True + + if re.match("epi_[AP]{2}", orig_key): + blip = True + + if diff: + calc_delta_ratio = pe.Node( + Function( + input_names=["effective_echo_spacing", "echo_times"], + output_names=["deltaTE", "ees_asym_ratio"], + function=calc_delta_te_and_asym_ratio, + imports=["from typing import Optional"], + ), + name=f"diff_distcor_calc_delta{name_suffix}", + ) + + gather_echoes = pe.Node( + Function( + input_names=[ + "echotime_1", + "echotime_2", + "echotime_3", + "echotime_4", + ], + output_names=["echotime_list"], + function=gather_echo_times, + ), + name="fugue_gather_echo_times", + ) + + for idx, fmap_file in enumerate(fmap_TE_list, start=1): + try: + node, out_file = self.get_data( + fmap_file, f"['{fmap_file}:fmap_TE_ingress']" + ) + self.wf.connect( + node, out_file, gather_echoes, f"echotime_{idx}" + ) + except KeyError: + pass + + self.wf.connect( + gather_echoes, "echotime_list", calc_delta_ratio, "echo_times" + ) + + # Add in nodes to get parameters from configuration file + # a node which checks if scan_parameters are present for each scan + scan_params = pe.Node( + Function( + input_names=[ + "data_config_scan_params", + "subject_id", + "scan", + "pipeconfig_tr", + "pipeconfig_tpattern", + "pipeconfig_start_indx", + "pipeconfig_stop_indx", + ], + output_names=[ + "tr", + "tpattern", + "template", + "ref_slice", + "start_indx", + "stop_indx", + "pe_direction", + "effective_echo_spacing", + ], + function=get_scan_params, + imports=["from CPAC.utils.utils import check, try_fetch_parameter"], + ), + name=f"bold_scan_params_{self.part_id}{name_suffix}", + ) + scan_params.inputs.subject_id = self.part_id + scan_params.inputs.set( + pipeconfig_start_indx=self.cfg.functional_preproc["truncation"]["start_tr"], # type: ignore[attr-defined] + pipeconfig_stop_indx=self.cfg.functional_preproc["truncation"]["stop_tr"], # type: ignore[attr-defined] + ) + + node, out = self.get_data("scan", "['scan:func_ingress']") + self.wf.connect(node, out, scan_params, "scan") + + # Workaround for extracting metadata with ingress + if self.check_rpool("derivatives-dir"): + selectrest_json = pe.Node( + Function( + input_names=["scan", "rest_dict", "resource"], + output_names=["file_path"], + function=get_rest, + as_module=True, + ), + name="selectrest_json", + ) + selectrest_json.inputs.rest_dict = self.data_paths.as_dict() + selectrest_json.inputs.resource = "scan_parameters" + self.wf.connect(node, out, selectrest_json, "scan") + self.wf.connect( + selectrest_json, "file_path", scan_params, "data_config_scan_params" + ) + + else: + # wire in the scan parameter workflow + node, out = self.get_data( + "scan-params", "['scan-params:scan_params_ingress']" + ) + self.wf.connect(node, out, scan_params, "data_config_scan_params") + + self.set_data("TR", scan_params, "tr", {}, "", "func_metadata_ingress") + self.set_data( + "tpattern", scan_params, "tpattern", {}, "", "func_metadata_ingress" + ) + self.set_data( + "template", scan_params, "template", {}, "", "func_metadata_ingress" + ) + self.set_data( + "start-tr", scan_params, "start_indx", {}, "", "func_metadata_ingress" + ) + self.set_data( + "stop-tr", scan_params, "stop_indx", {}, "", "func_metadata_ingress" + ) + self.set_data( + "pe-direction", scan_params, "pe_direction", {}, "", "func_metadata_ingress" + ) + + if diff: + # Connect EffectiveEchoSpacing from functional metadata + self.set_data( + "effectiveEchoSpacing", + scan_params, + "effective_echo_spacing", + {}, + "", + "func_metadata_ingress", + ) + node, out_file = self.get_data( + "effectiveEchoSpacing", "['effectiveEchoSpacing:func_metadata_ingress']" + ) + self.wf.connect(node, out_file, calc_delta_ratio, "effective_echo_spacing") + self.set_data( + "deltaTE", calc_delta_ratio, "deltaTE", {}, "", "deltaTE_ingress" + ) + self.set_data( + "ees-asym-ratio", + calc_delta_ratio, + "ees_asym_ratio", + {}, + "", + "ees_asym_ratio_ingress", + ) + + return diff, blip, fmap_rp_list + + def ingress_pipeconfig_paths(self): + """Ingress config file paths.""" + # TODO: may want to change the resource keys for each to include one level up in the YAML as well + + import pkg_resources as p + + template_csv = p.resource_filename("CPAC", "resources/cpac_templates.csv") + template_df = pd.read_csv(template_csv, keep_default_na=False) + + for row in template_df.itertuples(): + key = row.Key + val = row.Pipeline_Config_Entry + val = self.cfg.get_nested(self.cfg, [x.lstrip() for x in val.split(",")]) + resolution = row.Intended_Resolution_Config_Entry + desc = row.Description + + if not val: + continue + + if resolution: + res_keys = [x.lstrip() for x in resolution.split(",")] + tag = res_keys[-1] + json_info = {} + + if "$FSLDIR" in val: + val = val.replace( + "$FSLDIR", self.cfg.pipeline_setup["system_config"]["FSLDIR"] + ) + if "$priors_path" in val: + priors_path = ( + self.cfg.segmentation["tissue_segmentation"]["FSL-FAST"][ + "use_priors" + ]["priors_path"] + or "" + ) + if "$FSLDIR" in priors_path: + priors_path = priors_path.replace( + "$FSLDIR", self.cfg.pipeline_setup["system_config"]["FSLDIR"] + ) + val = val.replace("$priors_path", priors_path) + if "${resolution_for_anat}" in val: + val = val.replace( + "${resolution_for_anat}", + self.cfg.registration_workflows["anatomical_registration"][ + "resolution_for_anat" + ], + ) + if "${func_resolution}" in val: + val = val.replace( + "${func_resolution}", + self.cfg.registration_workflows["functional_registration"][ + "func_registration_to_template" + ]["output_resolution"][tag], + ) + + if desc: + template_name, _template_desc = lookup_identifier(val) + if template_name: + desc = f"{template_name} - {desc}" + json_info["Description"] = f"{desc} - {val}" + if resolution: + resolution = self.cfg.get_nested(self.cfg, res_keys) + json_info["Resolution"] = resolution + + resampled_template = pe.Node( + Function( + input_names=["resolution", "template", "template_name", "tag"], + output_names=["resampled_template"], + function=resolve_resolution, + as_module=True, + ), + name="resampled_" + key, + ) + + resampled_template.inputs.resolution = resolution + resampled_template.inputs.template = val + resampled_template.inputs.template_name = key + resampled_template.inputs.tag = tag + + # the set_data below is set up a little differently, because we are + # injecting and also over-writing already-existing entries + # other alternative would have been to ingress into the + # resampled_template node from the already existing entries, but we + # didn't do that here + self.set_data( + key, + resampled_template, + "resampled_template", + json_info, + "", + "template_resample", + ) # pipe_idx (after the blank json {}) should be the previous strat that you want deleted! because you're not connecting this the regular way, you have to do it manually + + elif val: + config_ingress = create_general_datasource(f"gather_{key}") + config_ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=val, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + ) + self.set_data( + key, + config_ingress, + "outputspec.data", + json_info, + "", + f"{key}_config_ingress", + ) + + def create_func_datasource( + self, rest_dict: dict | pd.DataFrame, wf_name="func_datasource" + ) -> pe.Workflow: + """Create a :py:class:`~nipype.pipeline.engine.Workflow` to gather timeseries data. + + Return the functional timeseries-related file paths for each series/scan from the + dictionary of functional files described in the data configuration (sublist) YAML + file. + + Scan input (from inputnode) is an iterable. + """ + wf = pe.Workflow(name=wf_name) + + inputnode = pe.Node( + util.IdentityInterface( + fields=["subject", "scan", "creds_path", "dl_dir"], + mandatory_inputs=True, + ), + name="inputnode", + ) + + outputnode = pe.Node( + util.IdentityInterface( + fields=[ + "subject", + "rest", + "scan", + "scan_params", + "phase_diff", + "magnitude", + ] + ), + name="outputspec", + ) + + # have this here for now because of the big change in the data + # configuration format + # (Not necessary with ingress - format does not comply) + if not self.check_rpool("derivatives-dir"): + check_scan = pe.Node( + Function( + input_names=["func_scan_dct", "scan"], + output_names=[], + function=check_func_scan, + as_module=True, + ), + name="check_func_scan", + ) + + check_scan.inputs.func_scan_dct = rest_dict + wf.connect(inputnode, "scan", check_scan, "scan") + + # get the functional scan itself + selectrest = pe.Node( + Function( + input_names=["scan", "rest_dict", "resource"], + output_names=["file_path"], + function=get_rest, + as_module=True, + ), + name="selectrest", + ) + selectrest.inputs.rest_dict = rest_dict + selectrest.inputs.resource = "scan" + wf.connect(inputnode, "scan", selectrest, "scan") + + # check to see if it's on an Amazon AWS S3 bucket, and download it, if it + # is - otherwise, just return the local file path + check_s3_node = pe.Node( + Function( + input_names=["file_path", "creds_path", "dl_dir", "img_type"], + output_names=["local_path"], + function=check_for_s3, + as_module=True, + ), + name="check_for_s3", + ) + + wf.connect(selectrest, "file_path", check_s3_node, "file_path") + wf.connect(inputnode, "creds_path", check_s3_node, "creds_path") + wf.connect(inputnode, "dl_dir", check_s3_node, "dl_dir") + check_s3_node.inputs.img_type = "func" + + wf.connect(inputnode, "subject", outputnode, "subject") + wf.connect(check_s3_node, "local_path", outputnode, "rest") + wf.connect(inputnode, "scan", outputnode, "scan") + + # scan parameters CSV + select_scan_params = pe.Node( + Function( + input_names=["scan", "rest_dict", "resource"], + output_names=["file_path"], + function=get_rest, + as_module=True, + ), + name="select_scan_params", + ) + select_scan_params.inputs.rest_dict = rest_dict + select_scan_params.inputs.resource = "scan_parameters" + wf.connect(inputnode, "scan", select_scan_params, "scan") + + # if the scan parameters file is on AWS S3, download it + s3_scan_params = pe.Node( + Function( + input_names=["file_path", "creds_path", "dl_dir", "img_type"], + output_names=["local_path"], + function=check_for_s3, + as_module=True, + ), + name="s3_scan_params", + ) + + wf.connect(select_scan_params, "file_path", s3_scan_params, "file_path") + wf.connect(inputnode, "creds_path", s3_scan_params, "creds_path") + wf.connect(inputnode, "dl_dir", s3_scan_params, "dl_dir") + wf.connect(s3_scan_params, "local_path", outputnode, "scan_params") + + return wf + + def ingress_raw_func_data(self): + """Ingress raw functional data.""" + func_paths_dct: dict | pd.DataFrame + local_func_scans: list[str] | np.ndarray # TODO: array typing + if isinstance(self.data_paths, DataPaths): + func_paths_dct = self.data_paths.func + local_func_scans = [ + func_paths_dct[scan]["scan"] + for scan in func_paths_dct.keys() + if not func_paths_dct[scan]["scan"].startswith("s3://") + ] + else: + func_paths_dct = ResourceIO.subset(self.data_paths[1], "datatype", "func") + local_func_scans = cast(np.ndarray, func_paths_dct["file_path"].values) + func_wf = self.create_func_datasource( + func_paths_dct, f"func_ingress_{self.part_id}_{self.ses_id}" + ) + func_wf.inputs.inputnode.set( + subject=self.part_id, + creds_path=self.creds_path, + dl_dir=self.cfg["pipeline_setup", "working_directory", "path"], + ) + func_wf.get_node("inputnode").iterables = ("scan", list(func_paths_dct.keys())) + + self.set_data("subject", func_wf, "outputspec.subject", {}, "", "func_ingress") + self.set_data("bold", func_wf, "outputspec.rest", {}, "", "func_ingress") + self.set_data("scan", func_wf, "outputspec.scan", {}, "", "func_ingress") + self.set_data( + "scan-params", + func_wf, + "outputspec.scan_params", + {}, + "", + "scan_params_ingress", + ) + + # TODO: CHECK FOR PARAMETERS + + diff, blip, fmap_rp_list = self.ingress_func_metadata() + + # Memoize list of local functional scans + # TODO: handle S3 files + # Skip S3 files for now + + if local_func_scans: + # pylint: disable=protected-access + self.wf._local_func_scans = local_func_scans + if self.cfg.pipeline_setup["Debugging"]["verbose"]: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("local_func_scans: %s", local_func_scans) + del local_func_scans + + return diff, blip, fmap_rp_list + + def func_outdir_ingress(self, func_dict: dict, key: str, func_paths: dict) -> None: + """Ingress a functional output directory.""" + pipe_x = len(self.pipe_list) + ingress = self.create_func_datasource( + func_dict, f"gather_func_outdir_{key}_{pipe_x}" + ) + ingress.inputs.inputnode.set( + subject=self.unique_id, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + ) + self.set_data("subject", ingress, "outputspec.subject", {}, "", "func_ingress") + ingress.get_node("inputnode").iterables = ("scan", list(func_dict.keys())) + self.set_data(key, ingress, "outputspec.rest", {}, "", "func_ingress") + + self.set_data("scan", ingress, "outputspec.scan", {}, "", "func_ingress") + self.set_data( + "scan-params", + ingress, + "outputspec.scan_params", + {}, + "", + "scan_params_ingress", + ) + self.ingress_func_metadata() + + # Have to do it this weird way to save the parsed BIDS tag & filepath + mask_paths_key = ( + "desc-bold_mask" + if "desc-bold_mask" in func_paths + else "space-template_desc-bold_mask" + ) + ts_paths_key = "pipeline-ingress_desc-confounds_timeseries" + + # Connect func data with approproate scan name + iterables = pe.Node( + Function( + input_names=["scan", "mask_paths", "ts_paths"], + output_names=["out_scan", "mask", "confounds"], + function=set_iterables, + ), + name=f"set_iterables_{pipe_x}", + ) + iterables.inputs.mask_paths = func_paths[mask_paths_key] + iterables.inputs.ts_paths = func_paths[ts_paths_key] + self.wf.connect(ingress, "outputspec.scan", iterables, "scan") + + for key in func_paths: + if key in (mask_paths_key, ts_paths_key): + ingress_func = create_general_datasource(f"ingress_func_data_{key}") + ingress_func.inputs.inputnode.set( + unique_id=self.unique_id, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + ) + self.wf.connect(iterables, "out_scan", ingress_func, "inputnode.scan") + if key == mask_paths_key: + self.wf.connect(iterables, "mask", ingress_func, "inputnode.data") + self.set_data( + key, + ingress_func, + "inputnode.data", + {}, + "", + f"outdir_{key}_ingress", + ) + elif key == ts_paths_key: + self.wf.connect( + iterables, "confounds", ingress_func, "inputnode.data" + ) + self.set_data( + key, + ingress_func, + "inputnode.data", + {}, + "", + f"outdir_{key}_ingress", + ) + + def json_outdir_ingress( + self, filepath: Path | str, data_label: str, json: dict + ) -> tuple[dict, tuple[str, str], str, str]: + """Ingress sidecars from a BIDS derivatives directory.""" + desc_val = None + for tag in data_label.split("_"): + if "desc-" in tag: + desc_val = tag + break + jsonpath = str(filepath) + for ext in EXTS: + jsonpath = jsonpath.replace(ext, "") + jsonpath = f"{jsonpath}.json" + + if not os.path.exists(jsonpath): + WFLOGGER.info( + "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n", + filepath, + jsonpath, + ) + json_info = { + "Description": "This data was generated elsewhere and " + "supplied by the user into this C-PAC run's " + "output directory. This JSON file was " + "automatically generated by C-PAC because a " + "JSON file was not supplied with the data." + } + json_info = {**json_info, **json} + write_output_json(json_info, jsonpath) + else: + json_info = read_json(jsonpath) + json_info = {**json_info, **json} + if "CpacProvenance" in json_info: + if desc_val: + # it's a C-PAC output, let's check for pipe_idx/strat integer + # suffixes in the desc- entries. + only_desc = str(desc_val) + + if only_desc[-1].isdigit(): + for _strat_idx in range(0, 3): + # let's stop at 3, please don't run >999 strategies okay? + if only_desc[-1].isdigit(): + only_desc = only_desc[:-1] + + if only_desc[-1] == "-": + only_desc = only_desc.rstrip("-") + else: + msg = ( + "\n[!] Something went wrong with either " + "reading in the output directory or when " + "it was written out previously.\n\nGive " + "this to your friendly local C-PAC " + f"developer:\n\n{data_label!s}\n" + ) + raise IOError(msg) + + # remove the integer at the end of the desc-* variant, we will + # get the unique pipe_idx from the CpacProvenance below + data_label = data_label.replace(desc_val, only_desc) + + # preserve cpac provenance/pipe_idx + pipe_idx = self.generate_prov_string(json_info["CpacProvenance"]) + node_name = "" + else: + json_info["CpacProvenance"] = [f"{data_label}:Non-C-PAC Origin: {filepath}"] # type: ignore [assignment] + if "Description" not in json_info: + json_info["Description"] = ( + "This data was generated elsewhere and " + "supplied by the user into this C-PAC run's " + "output directory. This JSON file was " + "automatically generated by C-PAC because a " + "JSON file was not supplied with the data." + ) + pipe_idx = self.generate_prov_string(json_info["CpacProvenance"]) + node_name = f"{data_label}_ingress" + + return json_info, pipe_idx, node_name, data_label + + def ingress_raw_anat_data(self) -> None: + """Ingress raw anatomical data.""" + if (isinstance(self.data_paths, DataPaths) and not self.data_paths.anat) or ( + not isinstance(self.data_paths, DataPaths) + and "anat" not in self.data_paths[1]["datatype"].values + ): + WFLOGGER.warning("No anatomical data present.") + return + + anat_flow = create_anat_datasource(f"anat_T1w_gather_{self.unique_id}") + + anat = {} + if isinstance(self.data_paths, DataPaths): + if "T1w" in self.data_paths.anat: + anat["T1"] = self.data_paths.anat["T1w"] + else: + anat_data = ResourceIO.subset(self.data_paths[1], "datatype", "anat") + if "T1w" in anat_data["suffix"].values: + anat["T1"] = anat_data["file_path"].values[0] + + if "T1" in anat: + anat_flow.inputs.inputnode.set( + subject=self.part_id, + anat=anat["T1"], + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + img_type="anat", + ) + self.set_data("T1w", anat_flow, "outputspec.anat", {}, "", "anat_ingress") + + if isinstance(self.data_paths, DataPaths) and "T2w" in self.data_paths.anat: + anat_flow_T2 = create_anat_datasource( + f"anat_T2w_gather_{self.part_id}_{self.ses_id}" + ) + anat_flow_T2.inputs.inputnode.set( + subject=self.part_id, + anat=self.data_paths.anat["T2w"], + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] + img_type="anat", + ) + self.set_data( + "T2w", anat_flow_T2, "outputspec.anat", {}, "", "anat_ingress" + ) + + if self.cfg.surface_analysis["freesurfer"]["ingress_reconall"]: # type: ignore[attr-defined] + self.ingress_freesurfer() + + def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # noqa: PLR0912,PLR0915 + """Connect a :py:class:`~CPAC.pipeline.engine.nodeblock.NodeBlock` via the `ResourcePool`.""" + debug = bool(self.cfg.pipeline_setup["Debugging"]["verbose"]) # type: ignore [attr-defined] + all_opts: list[str] = [] + + sidecar_additions = { + "CpacConfigHash": hashlib.sha1( + json.dumps(self.cfg.dict(), sort_keys=True).encode("utf-8") + ).hexdigest(), + "CpacConfig": self.cfg.dict(), + } + + if self.cfg["pipeline_setup"]["output_directory"].get("user_defined"): + sidecar_additions["UserDefined"] = self.cfg["pipeline_setup"][ + "output_directory" + ]["user_defined"] + + for name, block_dct in block.node_blocks.items(): + # iterates over either the single node block in the sequence, or a list of node blocks within the list of node blocks, i.e. for option forking. + switch = _check_null(block_dct["switch"]) + config = _check_null(block_dct["config"]) + option_key = _check_null(block_dct["option_key"]) + option_val = _check_null(block_dct["option_val"]) + inputs: NODEBLOCK_INPUTS = _check_null(block_dct["inputs"]) + outputs: NODEBLOCK_OUTPUTS = _check_null(block_dct["outputs"]) + + block_function: NodeBlockFunction = block_dct["block_function"] + + opts = [] + if option_key and option_val: + if not isinstance(option_key, list): + option_key = [option_key] + if not isinstance(option_val, list): + option_val = [option_val] + if config: + key_list = config + option_key + else: + key_list = option_key + if "USER-DEFINED" in option_val: + # load custom config data into each 'opt' + opts = self.cfg[key_list] + else: + for option in option_val: + try: + if option in self.cfg[key_list]: + # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list + opts.append(option) + except AttributeError as err: + msg = f"{err}\nNode Block: {name}" + raise Exception(msg) + + if opts is None: + opts = [opts] + + elif option_key and not option_val: + # enables multiple config forking entries + if not isinstance(option_key[0], list): + msg = ( + f"[!] The option_key field ({option_key}) " + f"for {name} exists but there is no " + "option_val.\n\nIf you are trying to " + "populate multiple option keys, the " + "option_val field must contain a list of " + "a list.\n" + ) + raise ValueError(msg) + for option_config in option_key: + # option_config is a list of pipe config levels down to the option + if config: + key_list = config + option_config + else: + key_list = option_config + option_val = option_config[-1] + if option_val in self.cfg[key_list[:-1]]: + opts.append(option_val) + else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! + opts = [None] + # THIS ALSO MEANS the multiple option-val's in docstring node blocks can be entered once in the entire node-block sequence, not in a list of multiples + if not opts: + # for node blocks where the options are split into different + # block functions - opts will be empty for non-selected + # options, and would waste the get_strats effort below + continue + all_opts += opts + + if not switch: + switch = [True] + else: + if config: + try: + key_list = config + switch + except TypeError as te: + msg = ( + "\n\n[!] Developer info: Docstring error " + f"for {name}, make sure the 'config' or " + "'switch' fields are lists.\n\n" + ) + raise TypeError(msg) from te + switch = self.cfg[key_list] + elif isinstance(switch[0], list): + # we have multiple switches, which is designed to only work if + # config is set to "None" + switch_list = [] + for key_list in switch: + val = self.cfg[key_list] + if isinstance(val, list): + # fork switches + if True in val: + switch_list.append(True) + if False in val: + switch_list.append(False) + else: + switch_list.append(val) + if False in switch_list: + switch = [False] + else: + switch = [True] + else: + # if config is set to "None" + key_list = switch + switch = self.cfg[key_list] + if not isinstance(switch, list): + switch = [switch] + if True in switch: + for ( + pipe_idx, + strat_pool, # strat_pool is a ResourcePool like {'desc-preproc_T1w': { 'json': info, 'data': (node, out) }, 'desc-brain_mask': etc.} + ) in self.get_strats(inputs, debug).items(): + # keep in mind rpool.get_strats(inputs) = {pipe_idx1: {'desc-preproc_T1w': etc.}, pipe_idx2: {..} } + fork = False in switch + for opt in opts: # it's a dictionary of ResourcePools called strat_pools, except those sub-ResourcePools only have one level! no pipe_idx strat keys. + # remember, you can get 'data' or 'json' from strat_pool with member functions + # strat_pool has all of the JSON information of all the inputs! + # so when we set_data below for the TOP-LEVEL MAIN RPOOL (not the strat_pool), we can generate new merged JSON information for each output. + # particularly, our custom 'CpacProvenance' field. + node_name = name + pipe_x = self._get_pipe_number(pipe_idx) + + replaced_inputs = [] + for interface in block.input_interface: + if isinstance(interface[1], list): + for input_name in interface[1]: + if strat_pool.check_rpool(input_name): + break + else: + input_name = interface[1] + strat_pool.copy_resource(input_name, interface[0]) + replaced_inputs.append(interface[0]) + try: + wf, outs = block_function( + wf, self.cfg, strat_pool, pipe_x, opt + ) + except IOError as e: # duplicate node + WFLOGGER.warning(e) + continue + + if not outs: + if block_function.__name__ == "freesurfer_postproc": + WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) + LOGTAIL["warnings"].append( + WARNING_FREESURFER_OFF_WITH_DATA + ) + continue + + if opt and len(option_val) > 1: + node_name = f"{node_name}_{opt}" + elif opt and "USER-DEFINED" in option_val: + node_name = f'{node_name}_{opt["Name"]}' + + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("\n=======================") + verbose_logger.debug("Node name: %s", node_name) + prov_dct = self.get_resource_strats_from_prov( + ast.literal_eval(str(pipe_idx)) + ) + for key, val in prov_dct.items(): + verbose_logger.debug("-------------------") + verbose_logger.debug("Input - %s:", key) + sub_prov_dct = self.get_resource_strats_from_prov(val) + for sub_key, sub_val in sub_prov_dct.items(): + sub_sub_dct = self.get_resource_strats_from_prov( + sub_val + ) + verbose_logger.debug(" sub-input - %s:", sub_key) + verbose_logger.debug(" prov = %s", sub_val) + verbose_logger.debug( + " sub_sub_inputs = %s", sub_sub_dct.keys() + ) + + for label, connection in outs.items(): + block.check_output(outputs, label, name) + new_json_info = strat_pool.json + + # transfer over data-specific json info + # for example, if the input data json is _bold and the output is also _bold + data_type = label.split("_")[-1] + if data_type in new_json_info["subjson"]: + if ( + "SkullStripped" + in new_json_info["subjson"][data_type] + ): + new_json_info["SkullStripped"] = new_json_info[ + "subjson" + ][data_type]["SkullStripped"] + + # determine sources for the outputs, i.e. all input data into the node block + new_json_info["Sources"] = [ + x + for x in strat_pool.rpool + if x != "json" and x not in replaced_inputs + ] + + if isinstance(outputs, dict): + new_json_info.update(outputs[label]) + if "Description" not in outputs[label]: + # don't propagate old Description + try: + del new_json_info["Description"] + except KeyError: + pass + if "Template" in outputs[label]: + template_key = outputs[label]["Template"] + if template_key in new_json_info["Sources"]: + # only if the pipeline config template key is entered as the 'Template' field + # otherwise, skip this and take in the literal 'Template' string + try: + new_json_info["Template"] = new_json_info[ + "subjson" + ][template_key]["Description"] + except KeyError: + pass + try: + new_json_info["Resolution"] = new_json_info[ + "subjson" + ][template_key]["Resolution"] + except KeyError: + pass + else: + # don't propagate old Description + try: + del new_json_info["Description"] + except KeyError: + pass + + if "Description" in new_json_info: + new_json_info["Description"] = " ".join( + new_json_info["Description"].split() + ) + + for sidecar_key, sidecar_value in sidecar_additions.items(): + if sidecar_key not in new_json_info: + new_json_info[sidecar_key] = sidecar_value + + try: + del new_json_info["subjson"] + except KeyError: + pass + + if fork or len(opts) > 1 or len(all_opts) > 1: + if "CpacVariant" not in new_json_info: + new_json_info["CpacVariant"] = {} + raw_label = self.get_raw_label(label) + if raw_label not in new_json_info["CpacVariant"]: + new_json_info["CpacVariant"][raw_label] = [] + new_json_info["CpacVariant"][raw_label].append( + node_name + ) + + self.set_data( + label, + connection[0], + connection[1], + new_json_info, + pipe_idx, + node_name, + fork, + ) + + wf, post_labels = self.post_process( + wf, + label, + connection, + new_json_info, + pipe_idx, + pipe_x, + outs, + ) + + if self.func_reg: + for postlabel in post_labels: + connection = ResourceData( # noqa: PLW2901 + postlabel[1], postlabel[2] + ) + wf = self.derivative_xfm( + wf, + postlabel[0], + connection, + new_json_info, + pipe_idx, + pipe_x, + ) + return wf + + def connect_pipeline( + self, + wf: pe.Workflow, + cfg: Configuration, + pipeline_blocks: PIPELINE_BLOCKS, + ) -> pe.Workflow: + """Connect the pipeline blocks to the workflow.""" + from CPAC.pipeline.engine.nodeblock import NodeBlockFunction, PIPELINE_BLOCKS + + WFLOGGER.info( + "Connecting pipeline blocks:\n%s", + NodeBlock.list_blocks(pipeline_blocks, indent=1), + ) + previous_nb: Optional[NodeBlockFunction | PIPELINE_BLOCKS] = None + for block in pipeline_blocks: + try: + wf = self.connect_block( + wf, + NodeBlock( + block, debug=cfg["pipeline_setup", "Debugging", "verbose"] + ), + ) + except LookupError as e: + if getattr(block, "name", "") == "freesurfer_postproc": + WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) + LOGTAIL["warnings"].append(WARNING_FREESURFER_OFF_WITH_DATA) + continue + previous_nb_str = ( + (f"after node block '{previous_nb.name}':") + if isinstance(previous_nb, NodeBlockFunction) + else "at beginning:" + ) + # Alert user to block that raises error + if isinstance(block, list): + node_block_names = str([NodeBlock(b).name for b in block]) + e.args = ( + f"When trying to connect one of the node blocks " + f"{node_block_names} " + f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", + ) + else: + node_block_names = NodeBlock(block).name + e.args = ( + f"When trying to connect node block " + f"'{node_block_names}' " + f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", + ) + if cfg.pipeline_setup["Debugging"]["verbose"]: # type: ignore [attr-defined] + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug(e.args[0]) + verbose_logger.debug(self) + raise + previous_nb = block + + return wf + + def derivative_xfm( + self, + wf: pe.Workflow, + label: str, + connection: ResourceData | tuple[pe.Node | pe.Workflow, str], + json_info: dict, + pipe_idx: str | tuple, + pipe_x: int, + ) -> pe.Workflow: + """Find the appropriate bold-to-template transform for given `pipe_idx`.""" + if label in self.xfm: + json_info = dict(json_info) + + # get the bold-to-template transform from the current strat_pool info + xfm_idx: Optional[str | tuple] = None + xfm_label = "from-bold_to-template_mode-image_xfm" + for entry in json_info["CpacProvenance"]: + if isinstance(entry, list): + if entry[-1].split(":")[0] == xfm_label: + xfm_prov = entry + xfm_idx = self.generate_prov_string(xfm_prov)[1] + break + + # but if the resource doesn't have the bold-to-template transform + # in its provenance/strategy, find the appropriate one for this + # current pipe_idx/strat + xfm_info: list[tuple[str | tuple, list]] + if not xfm_idx: + xfm_info = [] + for pipe_idx, entry in self.get(xfm_label).items(): + xfm_info.append((pipe_idx, entry.cpac_provenance)) + else: + xfm_info = [(xfm_idx, xfm_prov)] + + for num, xfm_entry in enumerate(xfm_info): + xfm_idx, xfm_prov = xfm_entry + reg_tool = check_prov_for_regtool(xfm_prov) + + xfm = transform_derivative( + f"{label}_xfm_{pipe_x}_{num}", + label, + reg_tool, + self.num_cpus, + self.num_ants_cores, + ants_interp=self.ants_interp, + fsl_interp=self.fsl_interp, + opt=None, + ) + wf.connect(connection[0], connection[1], xfm, "inputspec.in_file") + + node, out = self.get_data("T1w-brain-template-deriv", quick_single=True) + wf.connect(node, out, xfm, "inputspec.reference") + + node, out = self.get_data( + "from-bold_to-template_mode-image_xfm", pipe_idx=xfm_idx + ) + wf.connect(node, out, xfm, "inputspec.transform") + + label = f"space-template_{label}" + json_info["Template"] = self.get_json_info( + "T1w-brain-template-deriv", "Description" + ) + new_prov = json_info["CpacProvenance"] + xfm_prov + json_info["CpacProvenance"] = new_prov + new_pipe_idx = self.generate_prov_string(new_prov) + self.set_data( + label, + xfm, + "outputspec.out_file", + json_info, + new_pipe_idx, + f"{label}_xfm_{num}", + fork=True, + ) + + return wf + + def post_process( + self, + wf: pe.Workflow, + label: str, + connection: ResourceData | tuple[pe.Node | pe.Workflow, str], + json_info: dict, + pipe_idx: str | tuple, + pipe_x: int, + outs: dict[str, ResourceData], + ) -> tuple[pe.Workflow, list[tuple[str, pe.Node | pe.Workflow, str]]]: + """Connect smoothing and z-scoring, if configured.""" + input_type = "func_derivative" + + post_labels = [(label, connection[0], connection[1])] + + if re.match(r"(.*_)?[ed]c[bw]$", label) or re.match(r"(.*_)?lfcd[bw]$", label): + # suffix: [eigenvector or degree] centrality [binarized or weighted] + # or lfcd [binarized or weighted] + mask = "template-specification-file" + elif "space-template" in label: + if "space-template_res-derivative_desc-bold_mask" in self.keys(): + mask = "space-template_res-derivative_desc-bold_mask" + else: + mask = "space-template_desc-bold_mask" + else: + mask = "space-bold_desc-brain_mask" + + mask_idx = None + for entry in json_info["CpacProvenance"]: + if isinstance(entry, list): + if entry[-1].split(":")[0] == mask: + mask_prov = entry + mask_idx = self.generate_prov_string(mask_prov)[1] + break + + if self.smoothing_bool: + if label in Outputs.to_smooth: + for smooth_opt in self.smooth_opts: + sm = spatial_smoothing( + f"{label}_smooth_{smooth_opt}_{pipe_x}", + self.fwhm, + input_type, + smooth_opt, + ) + wf.connect(connection[0], connection[1], sm, "inputspec.in_file") + node, out = self.get_data( + mask, pipe_idx=mask_idx, quick_single=mask_idx is None + ) + wf.connect(node, out, sm, "inputspec.mask") + + if "desc-" not in label: + if "space-" in label: + for tag in label.split("_"): + if "space-" in tag: + smlabel = label.replace(tag, f"{tag}_desc-sm") + break + else: + smlabel = f"desc-sm_{label}" + else: + for tag in label.split("_"): + if "desc-" in tag: + newtag = f"{tag}-sm" + smlabel = label.replace(tag, newtag) + break + + post_labels.append((smlabel, sm, "outputspec.out_file")) + + self.set_data( + smlabel, + sm, + "outputspec.out_file", + json_info, + pipe_idx, + f"spatial_smoothing_{smooth_opt}", + fork=True, + ) + self.set_data( + "fwhm", + sm, + "outputspec.fwhm", + json_info, + pipe_idx, + f"spatial_smoothing_{smooth_opt}", + fork=True, + ) + + if self.zscoring_bool: + for label_con_tpl in post_labels: + label = label_con_tpl[0] + connection = (label_con_tpl[1], label_con_tpl[2]) + if label in Outputs.to_zstd: + zstd = z_score_standardize(f"{label}_zstd_{pipe_x}", input_type) + + wf.connect(connection[0], connection[1], zstd, "inputspec.in_file") + + node, out = self.get_data(mask, pipe_idx=mask_idx) + wf.connect(node, out, zstd, "inputspec.mask") + + if "desc-" not in label: + if "space-template" in label: + new_label = label.replace( + "space-template", "space-template_desc-zstd" + ) + else: + new_label = f"desc-zstd_{label}" + else: + for tag in label.split("_"): + if "desc-" in tag: + newtag = f"{tag}-zstd" + new_label = label.replace(tag, newtag) + break + + post_labels.append((new_label, zstd, "outputspec.out_file")) + + self.set_data( + new_label, + zstd, + "outputspec.out_file", + json_info, + pipe_idx, + "zscore_standardize", + fork=True, + ) + + elif label in Outputs.to_fisherz: + zstd = fisher_z_score_standardize( + f"{label}_zstd_{pipe_x}", label, input_type + ) + + wf.connect( + connection[0], connection[1], zstd, "inputspec.correlation_file" + ) + + # if the output is 'space-template_desc-MeanSCA_correlations', we want 'desc-MeanSCA_timeseries' + oned = label.replace("correlations", "timeseries") + + node, out = outs[oned] + wf.connect(node, out, zstd, "inputspec.timeseries_oned") + + post_labels.append((new_label, zstd, "outputspec.out_file")) + + self.set_data( + new_label, + zstd, + "outputspec.out_file", + json_info, + pipe_idx, + "fisher_zscore_standardize", + fork=True, + ) + + return wf, post_labels + + @staticmethod + def get_resource_strats_from_prov(prov: list | str) -> dict[str, list | str]: + """Return all entries that led to this provenance. + + If you provide the provenance of a `ResourcePool` output, this will + return a dictionary of all the preceding `ResourcePool` entries that + led to that one specific output:: + {rpool entry}: {that entry's provenance} + {rpool entry}: {that entry's provenance} + """ + strat_resource: dict[str, list | str] = {} + if isinstance(prov, str): + resource = prov.split(":")[0] + strat_resource[resource] = prov + else: + for entry in prov: + if isinstance(entry, list): + resource = entry[-1].split(":")[0] + strat_resource[resource] = entry + elif isinstance(entry, str): + resource = entry.split(":")[0] + strat_resource[resource] = entry + return strat_resource + + def _config_lookup( + self, keylist: str | list[str], fallback_type: type = NoneType + ) -> Any: + """Lookup a :py:class:`~CPAC.utils.configuration.Configuration` key, return ``None`` if not found.""" + try: + return self.cfg[keylist] + except (AttributeError, KeyError): + return fallback_type() + + def _get_pipe_number(self, pipe_idx: str | tuple) -> int: + """Return the index of a strategy in `self.pipe_list`.""" + return self.pipe_list.index(pipe_idx) + + def _get_unlabelled(self, resource: str) -> set[str]: + """Get unlabelled :py:class:`Resource` s. + + These :py:class:`Resource` s need integer suffixes to differentiate. + """ + from CPAC.func_preproc.func_motion import motion_estimate_filter + + all_jsons = [ + self.rpool[resource][pipe_idx]._json for pipe_idx in self.rpool[resource] + ] + unlabelled = { + key + for json_info in all_jsons + for key in json_info.get("CpacVariant", {}).keys() + if key not in (*motion_estimate_filter.outputs, "regressors") + } + if "bold" in unlabelled: + all_bolds = list( + chain.from_iterable( + json_info["CpacVariant"]["bold"] + for json_info in all_jsons + if "CpacVariant" in json_info and "bold" in json_info["CpacVariant"] + ) + ) + if all( + re.match(r"apply_(phasediff|blip)_to_timeseries_separately_.*", _bold) + for _bold in all_bolds + ): + # this fork point should only result in 0 or 1 forks + unlabelled.remove("bold") + del all_bolds + all_forks = { + key: set( + chain.from_iterable( + json_info["CpacVariant"][key] + for json_info in all_jsons + if "CpacVariant" in json_info and key in json_info["CpacVariant"] + ) + ) + for key in unlabelled + } + del all_jsons + for key, forks in all_forks.items(): + if len(forks) < 2: # noqa: PLR2004 + # no int suffix needed if only one fork + unlabelled.remove(key) + del all_forks + return unlabelled + + +class StratPool(_Pool): + """A pool of :py:class:`ResourcePool` s keyed by strategy.""" + + def __init__( + self, + cfg: Configuration, + *, + rpool: Optional[dict] = None, + name: str | list[str] = "", + ) -> None: + """Initialize a `StratPool`.""" + super().__init__() + self.rpool: STRAT_DICT + if not rpool: + self.rpool = {} + else: + self.rpool = rpool + self._json: dict[str, dict] = {"subjson": {}} + self.cfg = cfg + if not isinstance(name, list): + name = [name] + self.name: list[str] = name # type: ignore # pyright: ignore[reportIncompatibleVariantOverride] + self._regressor_dct: dict = {} + + def append_name(self, name: str) -> None: + """Append a name to the `StratPool`.""" + self.name.append(name) + + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: Literal[False] = False, + *, + optional: Literal[True], + ) -> Optional[Resource]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX], + report_fetched: Literal[True], + optional: Literal[True], + ) -> tuple[Optional[Resource], Optional[str]]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + *, + report_fetched: Literal[True], + optional: Literal[False], + ) -> tuple[Resource, str]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: bool = False, + *, + optional: Literal[True], + ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: Literal[False] = False, + optional: Literal[False] = False, + ) -> Resource: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + *, + report_fetched: Literal[True], + optional: Literal[False] = False, + ) -> tuple[Resource, str]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: bool = False, + optional: bool = False, + ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ... + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: bool = False, + optional: bool = False, + ): + """Return a :py:class:`Resource` .""" + return super().get(resource, pipe_idx, report_fetched, optional) + + @overload + def get_data( + self, resource: list[str] | str, report_fetched: Literal[True] + ) -> tuple[ResourceData, str]: ... + @overload + def get_data( + self, resource: list[str] | str, report_fetched: Literal[False] = False + ) -> ResourceData: ... + def get_data(self, resource, report_fetched=False): + """Get :py:class:`ResourceData` from a `StratPool`.""" + _resource = self.get(resource, report_fetched=report_fetched) + if report_fetched: + assert isinstance(_resource, tuple) + connect, fetched = _resource + assert isinstance(connect, Resource) and isinstance(fetched, str) + return connect.data, fetched + assert isinstance(_resource, Resource) + return _resource.data + + def get_json(self, resource: str) -> dict: + """Get JSON metadata from a :py:class:`Resource` in a `StratPool`.""" + return self.get(resource).json + + json = property( + fget=Resource.get_json, + fset=Resource.set_json, + doc="""Return a deep copy of full-`StratPool`-strategy-specific JSON.""", + ) + + def get_cpac_provenance(self, resource: list[str] | str) -> list: + """Get "CpacProvenance" for a given :py:class:`Resource` .""" + # NOTE: strat_resource has to be entered properly by the developer + # it has to either be rpool[resource][strat] or strat_pool[resource] + if isinstance(resource, list): + for _resource in resource: + try: + return self.get_cpac_provenance(_resource) + except KeyError: + continue + return self.get(resource).cpac_provenance + + def copy_resource(self, resource: str, new_name: str): + """Copy a :py:class:`Resource` within a `StratPool`.""" + try: + self.rpool[new_name] = self.rpool[resource] + except KeyError: + msg = f"[!] {resource} not in the resource pool." + raise Exception(msg) + + def filter_name(self, cfg: Configuration) -> str: + """ + Return the name of the filter for this strategy. + + In a `StratPool` with filtered movement parameters. + """ + motion_filters = cfg[ + "functional_preproc", + "motion_estimates_and_correction", + "motion_estimate_filter", + "filters", + ] + if len(motion_filters) == 1 and cfg.switch_is_on( + [ + "functional_preproc", + "motion_estimates_and_correction", + "motion_estimate_filter", + "run", + ], + exclusive=True, + ): + return motion_filters[0]["Name"] + try: + key = "motion" + sidecar = self.get_json("desc-movementParameters_motion") + except KeyError: + sidecar = None + if sidecar is not None and "CpacVariant" in sidecar: + if sidecar["CpacVariant"][key]: + return sidecar["CpacVariant"][key][0][::-1].split("_", 1)[0][::-1] + return "none" + + def preserve_json_info(self, resource: str, strat_resource: Resource) -> None: + """Preserve JSON info when updating a `StratPool`.""" + data_type = resource.split("_")[-1] + if data_type not in self._json["subjson"]: + self._json["subjson"][data_type] = {} + self._json["subjson"][data_type].update(strat_resource.json) + + @property + def regressor_dct(self) -> dict: + """Return the regressor dictionary for the current strategy if one exists. + + Raises + ------ + KeyError + If regressor dictionary does not exist in current strategy. + """ + # pylint: disable=attribute-defined-outside-init + if hasattr(self, "_regressor_dct") and self._regressor_dct: # memoized + # pylint: disable=access-member-before-definition + return self._regressor_dct + key_error = KeyError( + "[!] No regressors in resource pool. \n\n" + "Try turning on create_regressors or " + "ingress_regressors." + ) + _nr = self.cfg["nuisance_corrections", "2-nuisance_regression"] + if not hasattr(self, "timeseries"): + if _nr["Regressors"]: + self.regressors = {reg["Name"]: reg for reg in _nr["Regressors"]} + else: + self.regressors = [] + if self.check_rpool("parsed_regressors"): # ingressed regressor + # name regressor workflow without regressor_prov + strat_name = _nr["ingress_regressors"]["Regressors"]["Name"] + if strat_name in self.regressors: + self._regressor_dct = self.regressors[strat_name] + return self._regressor_dct + self._regressor_dct = _nr["ingress_regressors"]["Regressors"] + return self._regressor_dct + prov = self.get_cpac_provenance("desc-confounds_timeseries") + strat_name_components = prov[-1].split("_") + for _ in list(range(prov[-1].count("_"))): + reg_name = "_".join(strat_name_components[-_:]) + if isinstance(self.regressors, dict) and reg_name in self.regressors: + self._regressor_dct = self.regressors[reg_name] + return self._regressor_dct + raise key_error + + @property + def filtered_movement(self) -> bool: + """Check if the movement parameters have been filtered in this `StratPool`.""" + try: + return "motion_estimate_filter" in str( + self.get_cpac_provenance("desc-movementParameters_motion") + ) + except KeyError: + # not a strat_pool or no movement parameters in strat_pool + return False + + +def _check_null(val: Any) -> Any: + """Return ``None`` if `val` == "none" (case-insensitive).""" + if isinstance(val, str): + val = None if val.lower() == "none" else val + return val diff --git a/CPAC/pipeline/engine/resource_pool.py b/CPAC/pipeline/engine/resource_pool.py deleted file mode 100644 index ba31bbf618..0000000000 --- a/CPAC/pipeline/engine/resource_pool.py +++ /dev/null @@ -1,1469 +0,0 @@ - -# This file is part of C-PAC. - -# C-PAC is free software: you can redistribute it and/or modify it under -# the terms of the GNU Lesser General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. - -# C-PAC is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public -# License along with C-PAC. If not, see . -import ast -import copy -import hashlib -from itertools import chain -import json -import logging -import os -import re -from typing import Optional -import warnings -import pandas as pd - - -from nipype import config -from nipype.interfaces.utility import Rename - -from CPAC.image_utils.spatial_smoothing import spatial_smoothing -from CPAC.image_utils.statistical_transforms import ( - fisher_z_score_standardize, - z_score_standardize, -) -from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.check_outputs import ExpectedOutputs -from CPAC.pipeline.nodeblock import NodeBlockFunction -from CPAC.pipeline.utils import MOVEMENT_FILTER_KEYS, name_fork, source_set -from CPAC.registration.registration import transform_derivative -from CPAC.resources.templates.lookup_table import lookup_identifier -from CPAC.utils.bids_utils import res_in_filename -from CPAC.utils.configuration import Configuration -from CPAC.utils.datasource import ( - create_anat_datasource, - create_func_datasource, - create_general_datasource, - ingress_func_metadata, - resolve_resolution, -) -from CPAC.utils.interfaces.datasink import DataSink -from CPAC.utils.interfaces.function import Function -from CPAC.utils.monitoring import ( - getLogger, - LOGTAIL, - WARNING_FREESURFER_OFF_WITH_DATA, - WFLOGGER, -) -from CPAC.utils.outputs import Outputs -from CPAC.utils.utils import ( - check_prov_for_regtool, - create_id_string, - get_last_prov_entry, - read_json, - write_output_json, -) - -from typing import Optional -from .resource import Resource - -class ResourcePool: - def __init__(self, rpool=None, name=None, cfg=None, pipe_list=None): - if not rpool: - self.rpool = {} - else: - self.rpool = rpool - - if not pipe_list: - self.pipe_list = [] - else: - self.pipe_list = pipe_list - - self.name = name - self.info = {} - - if cfg: - self.cfg = cfg - self.logdir = cfg.pipeline_setup["log_directory"]["path"] - - self.num_cpus = cfg.pipeline_setup["system_config"][ - "max_cores_per_participant" - ] - self.num_ants_cores = cfg.pipeline_setup["system_config"][ - "num_ants_threads" - ] - - self.ants_interp = cfg.registration_workflows["functional_registration"][ - "func_registration_to_template" - ]["ANTs_pipelines"]["interpolation"] - self.fsl_interp = cfg.registration_workflows["functional_registration"][ - "func_registration_to_template" - ]["FNIRT_pipelines"]["interpolation"] - - self.func_reg = cfg.registration_workflows["functional_registration"][ - "func_registration_to_template" - ]["run"] - - self.run_smoothing = ( - "smoothed" in cfg.post_processing["spatial_smoothing"]["output"] - ) - self.smoothing_bool = cfg.post_processing["spatial_smoothing"]["run"] - self.run_zscoring = "z-scored" in cfg.post_processing["z-scoring"]["output"] - self.zscoring_bool = cfg.post_processing["z-scoring"]["run"] - self.fwhm = cfg.post_processing["spatial_smoothing"]["fwhm"] - self.smooth_opts = cfg.post_processing["spatial_smoothing"][ - "smoothing_method" - ] - - self.xfm = [ - "alff", - "desc-sm_alff", - "desc-zstd_alff", - "desc-sm-zstd_alff", - "falff", - "desc-sm_falff", - "desc-zstd_falff", - "desc-sm-zstd_falff", - "reho", - "desc-sm_reho", - "desc-zstd_reho", - "desc-sm-zstd_reho", - ] - - def __repr__(self) -> str: - params = [ - f"{param}={getattr(self, param)}" - for param in ["rpool", "name", "cfg", "pipe_list"] - if getattr(self, param, None) is not None - ] - return f'ResourcePool({", ".join(params)})' - - def __str__(self) -> str: - if self.name: - return f"ResourcePool({self.name}): {list(self.rpool)}" - return f"ResourcePool: {list(self.rpool)}" - - def append_name(self, name): - self.name.append(name) - - def back_propogate_template_name( - self, wf, resource_idx: str, json_info: dict, id_string: "pe.Node" - ) -> None: - """Find and apply the template name from a resource's provenance. - - Parameters - ---------- - resource_idx : str - - json_info : dict - - id_string : pe.Node - - Returns - ------- - None - """ - if "template" in resource_idx and self.check_rpool("derivatives-dir"): - if self.check_rpool("template"): - node, out = self.get_data("template") - wf.connect(node, out, id_string, "template_desc") - elif "Template" in json_info: - id_string.inputs.template_desc = json_info["Template"] - elif ( - "template" in resource_idx and len(json_info.get("CpacProvenance", [])) > 1 - ): - for resource in source_set(json_info["CpacProvenance"]): - source, value = resource.split(":", 1) - if value.startswith("template_") and source != "FSL-AFNI-bold-ref": - # 'FSL-AFNI-bold-ref' is currently allowed to be in - # a different space, so don't use it as the space for - # descendents - try: - anscestor_json = next(iter(self.rpool.get(source).items()))[ - 1 - ].get("json", {}) - if "Description" in anscestor_json: - id_string.inputs.template_desc = anscestor_json[ - "Description" - ] - return - except (IndexError, KeyError): - pass - return - - def get_name(self): - return self.name - - def check_rpool(self, resource): - if not isinstance(resource, list): - resource = [resource] - for name in resource: - if name in self.rpool: - return True - return False - - def get_pipe_number(self, pipe_idx): - return self.pipe_list.index(pipe_idx) - - def get_pool_info(self): - return self.info - - def set_pool_info(self, info_dct): - self.info.update(info_dct) - - def get_entire_rpool(self): - return self.rpool - - def get_resources(self): - return self.rpool.keys() - - def copy_rpool(self): - return ResourcePool( - rpool=copy.deepcopy(self.get_entire_rpool()), - name=self.name, - cfg=self.cfg, - pipe_list=copy.deepcopy(self.pipe_list), - ) - - @staticmethod - def get_raw_label(resource: str) -> str: - """Remove ``desc-*`` label.""" - for tag in resource.split("_"): - if "desc-" in tag: - resource = resource.replace(f"{tag}_", "") - break - return resource - - def get_strat_info(self, prov, label=None, logdir=None): - strat_info = {} - for entry in prov: - if isinstance(entry, list): - strat_info[entry[-1].split(":")[0]] = entry - elif isinstance(entry, str): - strat_info[entry.split(":")[0]] = entry.split(":")[1] - if label: - if not logdir: - logdir = self.logdir - WFLOGGER.info( - "\n\nPrinting out strategy info for %s in %s\n", label, logdir - ) - write_output_json( - strat_info, f"{label}_strat_info", indent=4, basedir=logdir - ) - - def set_json_info(self, resource, pipe_idx, key, val): - # TODO: actually should probably be able to inititialize resource/pipe_idx - if pipe_idx not in self.rpool[resource]: - msg = ( - "\n[!] DEV: The pipeline/strat ID does not exist " - f"in the resource pool.\nResource: {resource}" - f"Pipe idx: {pipe_idx}\nKey: {key}\nVal: {val}\n" - ) - raise Exception(msg) - if "json" not in self.rpool[resource][pipe_idx]: - self.rpool[resource][pipe_idx]["json"] = {} - self.rpool[resource][pipe_idx]["json"][key] = val - - def get_json_info(self, resource, pipe_idx, key): - # TODO: key checks - if not pipe_idx: - for pipe_idx, val in self.rpool[resource].items(): - return val["json"][key] - return self.rpool[resource][pipe_idx][key] - - @staticmethod - def get_resource_from_prov(prov): - # each resource (i.e. "desc-cleaned_bold" AKA nuisance-regressed BOLD - # data) has its own provenance list. the name of the resource, and - # the node that produced it, is always the last item in the provenance - # list, with the two separated by a colon : - if not len(prov): - return None - if isinstance(prov[-1], list): - return prov[-1][-1].split(":")[0] - if isinstance(prov[-1], str): - return prov[-1].split(":")[0] - return None - - def regressor_dct(self, cfg) -> dict: - """Return the regressor dictionary for the current strategy if one exists. - - Raises KeyError otherwise. - """ - # pylint: disable=attribute-defined-outside-init - if hasattr(self, "_regressor_dct"): # memoized - # pylint: disable=access-member-before-definition - return self._regressor_dct - key_error = KeyError( - "[!] No regressors in resource pool. \n\n" - "Try turning on create_regressors or " - "ingress_regressors." - ) - _nr = cfg["nuisance_corrections", "2-nuisance_regression"] - if not hasattr(self, "timeseries"): - if _nr["Regressors"]: - self.regressors = {reg["Name"]: reg for reg in _nr["Regressors"]} - else: - self.regressors = [] - if self.check_rpool("parsed_regressors"): # ingressed regressor - # name regressor workflow without regressor_prov - strat_name = _nr["ingress_regressors"]["Regressors"]["Name"] - if strat_name in self.regressors: - self._regressor_dct = self.regressors[strat_name] - return self._regressor_dct - self.regressor_dct = _nr["ingress_regressors"]["Regressors"] - return self.regressor_dct - prov = self.get_cpac_provenance("desc-confounds_timeseries") - strat_name_components = prov[-1].split("_") - for _ in list(range(prov[-1].count("_"))): - reg_name = "_".join(strat_name_components[-_:]) - if reg_name in self.regressors: - self._regressor_dct = self.regressors[reg_name] - return self._regressor_dct - raise key_error - - def set_data( - self, - resource, - node, - output, - json_info, - pipe_idx, - node_name, - fork=False, - inject=False, - ): - json_info = json_info.copy() - cpac_prov = [] - if "CpacProvenance" in json_info: - cpac_prov = json_info["CpacProvenance"] - current_prov_list = list(cpac_prov) - new_prov_list = list(cpac_prov) # <---- making a copy, it was already a list - if not inject: - new_prov_list.append(f"{resource}:{node_name}") - try: - res, new_pipe_idx = self.generate_prov_string(new_prov_list) - except IndexError: - msg = ( - f"\n\nThe set_data() call for {resource} has no " - "provenance information and should not be an " - "injection." - ) - raise IndexError(msg) - if not json_info: - json_info = { - "RawSources": [ - resource # <---- this will be repopulated to the full file path at the end of the pipeline building, in gather_pipes() - ] - } - json_info["CpacProvenance"] = new_prov_list - - if resource not in self.rpool.keys(): - self.rpool[resource] = {} - elif not fork: # <--- in the event of multiple strategies/options, this will run for every option; just keep in mind - search = False - if self.get_resource_from_prov(current_prov_list) == resource: - # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION - pipe_idx = self.generate_prov_string(current_prov_list)[1] - if pipe_idx not in self.rpool[resource].keys(): - search = True - else: - search = True - if search: - for idx in current_prov_list: - if self.get_resource_from_prov(idx) == resource: - if isinstance(idx, list): - # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION - pipe_idx = self.generate_prov_string(idx)[1] - elif isinstance(idx, str): - pipe_idx = idx - break - if pipe_idx in self.rpool[resource].keys(): - # in case the resource name is now new, and not the original - # remove old keys so we don't end up with a new strat for every new node unit (unless we fork) - del self.rpool[resource][pipe_idx] - if new_pipe_idx not in self.rpool[resource]: - self.rpool[resource][new_pipe_idx] = {} - if new_pipe_idx not in self.pipe_list: - self.pipe_list.append(new_pipe_idx) - - self.rpool[resource][new_pipe_idx]["data"] = (node, output) - self.rpool[resource][new_pipe_idx]["json"] = json_info - - def get( - self, - resource: list[str] | str, - pipe_idx: Optional[str] = None, - report_fetched: Optional[bool] = False, - optional: Optional[bool] = False, - ) -> tuple[Optional[dict], Optional[str]] | Optional[dict]: - # NOTE!!! - # if this is the main rpool, this will return a dictionary of strats, and inside those, are dictionaries like {'data': (node, out), 'json': info} - # BUT, if this is a sub rpool (i.e. a strat_pool), this will return a one-level dictionary of {'data': (node, out), 'json': info} WITHOUT THE LEVEL OF STRAT KEYS ABOVE IT - if not isinstance(resource, list): - resource = [resource] - # if a list of potential inputs are given, pick the first one found - for label in resource: - if label in self.rpool.keys(): - _found = self.rpool[label] - if pipe_idx: - _found = _found[pipe_idx] - if report_fetched: - return _found, label - return _found - if optional: - if report_fetched: - return (None, None) - return None - msg = ( - "\n\n[!] C-PAC says: None of the listed resources are in " - f"the resource pool:\n\n {resource}\n\nOptions:\n- You " - "can enable a node block earlier in the pipeline which " - "produces these resources. Check the 'outputs:' field in " - "a node block's documentation.\n- You can directly " - "provide this required data by pulling it from another " - "BIDS directory using 'source_outputs_dir:' in the " - "pipeline configuration, or by placing it directly in " - "your C-PAC output directory.\n- If you have done these, " - "and you still get this message, please let us know " - "through any of our support channels at: " - "https://fcp-indi.github.io/\n" - ) - raise LookupError(msg) - - def get_data( - self, resource, pipe_idx=None, report_fetched=False, quick_single=False - ): - if report_fetched: - if pipe_idx: - connect, fetched = self.get( - resource, pipe_idx=pipe_idx, report_fetched=report_fetched - ) - return (connect["data"], fetched) - connect, fetched = self.get(resource, report_fetched=report_fetched) - return (connect["data"], fetched) - if pipe_idx: - return self.get(resource, pipe_idx=pipe_idx)["data"] - if quick_single or len(self.get(resource)) == 1: - for _key, val in self.get(resource).items(): - return val["data"] - return self.get(resource)["data"] - - def copy_resource(self, resource, new_name): - try: - self.rpool[new_name] = self.rpool[resource] - except KeyError: - msg = f"[!] {resource} not in the resource pool." - raise Exception(msg) - - def update_resource(self, resource, new_name): - # move over any new pipe_idx's - self.rpool[new_name].update(self.rpool[resource]) - - def get_pipe_idxs(self, resource): - return self.rpool[resource].keys() - - def get_json(self, resource, strat=None): - # NOTE: resource_strat_dct has to be entered properly by the developer - # it has to either be rpool[resource][strat] or strat_pool[resource] - if strat: - resource_strat_dct = self.rpool[resource][strat] - else: - # for strat_pools mainly, where there is no 'strat' key level - resource_strat_dct = self.rpool[resource] - - # TODO: the below hits the exception if you use get_cpac_provenance on - # TODO: the main rpool (i.e. if strat=None) - if "json" in resource_strat_dct: - strat_json = resource_strat_dct["json"] - else: - msg = ( - "\n[!] Developer info: the JSON " - f"information for {resource} and {strat} " - f"is incomplete.\n" - ) - raise Exception(msg) - return strat_json - - def get_cpac_provenance(self, resource, strat=None): - # NOTE: resource_strat_dct has to be entered properly by the developer - # it has to either be rpool[resource][strat] or strat_pool[resource] - if isinstance(resource, list): - for _resource in resource: - try: - return self.get_cpac_provenance(_resource, strat) - except KeyError: - continue - json_data = self.get_json(resource, strat) - return json_data["CpacProvenance"] - - @staticmethod - def generate_prov_string(prov): - # this will generate a string from a SINGLE RESOURCE'S dictionary of - # MULTIPLE PRECEDING RESOURCES (or single, if just one) - # NOTE: this DOES NOT merge multiple resources!!! (i.e. for merging-strat pipe_idx generation) - if not isinstance(prov, list): - msg = ( - "\n[!] Developer info: the CpacProvenance " - f"entry for {prov} has to be a list.\n" - ) - raise TypeError(msg) - last_entry = get_last_prov_entry(prov) - resource = last_entry.split(":")[0] - return (resource, str(prov)) - - @staticmethod - def generate_prov_list(prov_str): - if not isinstance(prov_str, str): - msg = ( - "\n[!] Developer info: the CpacProvenance " - f"entry for {prov_str!s} has to be a string.\n" - ) - raise TypeError(msg) - return ast.literal_eval(prov_str) - - @staticmethod - def get_resource_strats_from_prov(prov): - # if you provide the provenance of a resource pool output, this will - # return a dictionary of all the preceding resource pool entries that - # led to that one specific output: - # {rpool entry}: {that entry's provenance} - # {rpool entry}: {that entry's provenance} - resource_strat_dct = {} - if isinstance(prov, str): - resource = prov.split(":")[0] - resource_strat_dct[resource] = prov - else: - for spot, entry in enumerate(prov): - if isinstance(entry, list): - resource = entry[-1].split(":")[0] - resource_strat_dct[resource] = entry - elif isinstance(entry, str): - resource = entry.split(":")[0] - resource_strat_dct[resource] = entry - return resource_strat_dct - - def flatten_prov(self, prov): - if isinstance(prov, str): - return [prov] - if isinstance(prov, list): - flat_prov = [] - for entry in prov: - if isinstance(entry, list): - flat_prov += self.flatten_prov(entry) - else: - flat_prov.append(entry) - return flat_prov - return None - - def get_strats(self, resources, debug=False): - # TODO: NOTE: NOT COMPATIBLE WITH SUB-RPOOL/STRAT_POOLS - # TODO: (and it doesn't have to be) - - import itertools - - linked_resources = [] - resource_list = [] - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("\nresources: %s", resources) - for resource in resources: - # grab the linked-input tuples - if isinstance(resource, tuple): - linked = [] - for label in list(resource): - rp_dct, fetched_resource = self.get( - label, report_fetched=True, optional=True - ) - if not rp_dct: - continue - linked.append(fetched_resource) - resource_list += linked - if len(linked) < 2: # noqa: PLR2004 - continue - linked_resources.append(linked) - else: - resource_list.append(resource) - - total_pool = [] - variant_pool = {} - len_inputs = len(resource_list) - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("linked_resources: %s", linked_resources) - verbose_logger.debug("resource_list: %s", resource_list) - for resource in resource_list: - ( - rp_dct, # <---- rp_dct has the strats/pipe_idxs as the keys on first level, then 'data' and 'json' on each strat level underneath - fetched_resource, - ) = self.get( - resource, - report_fetched=True, - optional=True, # oh, and we make the resource fetching in get_strats optional so we can have optional inputs, but they won't be optional in the node block unless we want them to be - ) - if not rp_dct: - len_inputs -= 1 - continue - sub_pool = [] - if debug: - verbose_logger.debug("len(rp_dct): %s\n", len(rp_dct)) - for strat in rp_dct.keys(): - json_info = self.get_json(fetched_resource, strat) - cpac_prov = json_info["CpacProvenance"] - sub_pool.append(cpac_prov) - if fetched_resource not in variant_pool: - variant_pool[fetched_resource] = [] - if "CpacVariant" in json_info: - for key, val in json_info["CpacVariant"].items(): - if val not in variant_pool[fetched_resource]: - variant_pool[fetched_resource] += val - variant_pool[fetched_resource].append(f"NO-{val[0]}") - - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("%s sub_pool: %s\n", resource, sub_pool) - total_pool.append(sub_pool) - - if not total_pool: - raise LookupError( - "\n\n[!] C-PAC says: None of the listed " - "resources in the node block being connected " - "exist in the resource pool.\n\nResources:\n" - "%s\n\n" % resource_list - ) - - # TODO: right now total_pool is: - # TODO: [[[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment], [T1w:anat_ingress,desc-preproc_T1w:anatomical_init]], - # TODO: [[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment, desc-brain_mask:brain_mask_afni], [T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-brain_mask:brain_mask_afni]]] - - # TODO: and the code below thinks total_pool is a list of lists, like [[pipe_idx, pipe_idx], [pipe_idx, pipe_idx, pipe_idx], etc.] - # TODO: and the actual resource is encoded in the tag: of the last item, every time! - # keying the strategies to the resources, inverting it - if len_inputs > 1: - strats = itertools.product(*total_pool) - - # we now currently have "strats", the combined permutations of all the strategies, as a list of tuples, each tuple combining one version of input each, being one of the permutations. - # OF ALL THE DIFFERENT INPUTS. and they are tagged by their fetched inputs with {name}:{strat}. - # so, each tuple has ONE STRAT FOR EACH INPUT, so if there are three inputs, each tuple will have 3 items. - new_strats = {} - - # get rid of duplicates - TODO: refactor .product - strat_str_list = [] - strat_list_list = [] - for strat_tuple in strats: - strat_list = list(copy.deepcopy(strat_tuple)) - strat_str = str(strat_list) - if strat_str not in strat_str_list: - strat_str_list.append(strat_str) - strat_list_list.append(strat_list) - - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("len(strat_list_list): %s\n", len(strat_list_list)) - for strat_list in strat_list_list: - json_dct = {} - for strat in strat_list: - # strat is a prov list for a single resource/input - strat_resource, strat_idx = self.generate_prov_string(strat) - strat_json = self.get_json(strat_resource, strat=strat_idx) - json_dct[strat_resource] = strat_json - - drop = False - if linked_resources: - for linked in linked_resources: # <--- 'linked' is each tuple - if drop: - break - for xlabel in linked: - if drop: - break - xjson = copy.deepcopy(json_dct[xlabel]) - for ylabel in linked: - if xlabel == ylabel: - continue - yjson = copy.deepcopy(json_dct[ylabel]) - - if "CpacVariant" not in xjson: - xjson["CpacVariant"] = {} - if "CpacVariant" not in yjson: - yjson["CpacVariant"] = {} - - current_strat = [] - for key, val in xjson["CpacVariant"].items(): - if isinstance(val, list): - current_strat.append(val[0]) - else: - current_strat.append(val) - current_spread = list(set(variant_pool[xlabel])) - for spread_label in current_spread: - if "NO-" in spread_label: - continue - if spread_label not in current_strat: - current_strat.append(f"NO-{spread_label}") - - other_strat = [] - for key, val in yjson["CpacVariant"].items(): - if isinstance(val, list): - other_strat.append(val[0]) - else: - other_strat.append(val) - other_spread = list(set(variant_pool[ylabel])) - for spread_label in other_spread: - if "NO-" in spread_label: - continue - if spread_label not in other_strat: - other_strat.append(f"NO-{spread_label}") - - for variant in current_spread: - in_current_strat = False - in_other_strat = False - in_other_spread = False - - if variant is None: - in_current_strat = True - if None in other_spread: - in_other_strat = True - if variant in current_strat: - in_current_strat = True - if variant in other_strat: - in_other_strat = True - if variant in other_spread: - in_other_spread = True - - if not in_other_strat: - if in_other_spread: - if in_current_strat: - drop = True - break - - if in_other_strat: - if in_other_spread: - if not in_current_strat: - drop = True - break - if drop: - break - if drop: - continue - - # make the merged strat label from the multiple inputs - # strat_list is actually the merged CpacProvenance lists - pipe_idx = str(strat_list) - new_strats[pipe_idx] = ResourcePool() - # new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! - # placing JSON info at one level higher only for copy convenience - new_strats[pipe_idx].rpool["json"] = {} - new_strats[pipe_idx].rpool["json"]["subjson"] = {} - new_strats[pipe_idx].rpool["json"]["CpacProvenance"] = strat_list - - # now just invert resource:strat to strat:resource for each resource:strat - for cpac_prov in strat_list: - resource, strat = self.generate_prov_string(cpac_prov) - resource_strat_dct = self.rpool[resource][strat] - # remember, `resource_strat_dct` is the dct of 'data' and 'json'. - new_strats[pipe_idx].rpool[resource] = resource_strat_dct - # `new_strats` is A DICTIONARY OF RESOURCEPOOL OBJECTS! each one is a new slice of the resource pool combined together. - self.pipe_list.append(pipe_idx) - if "CpacVariant" in resource_strat_dct["json"]: - if "CpacVariant" not in new_strats[pipe_idx].rpool["json"]: - new_strats[pipe_idx].rpool["json"]["CpacVariant"] = {} - for younger_resource, variant_list in resource_strat_dct[ - "json" - ]["CpacVariant"].items(): - if ( - younger_resource - not in new_strats[pipe_idx].rpool["json"]["CpacVariant"] - ): - new_strats[pipe_idx].rpool["json"]["CpacVariant"][ - younger_resource - ] = variant_list - # preserve each input's JSON info also - data_type = resource.split("_")[-1] - if data_type not in new_strats[pipe_idx].rpool["json"]["subjson"]: - new_strats[pipe_idx].rpool["json"]["subjson"][data_type] = {} - new_strats[pipe_idx].rpool["json"]["subjson"][data_type].update( - copy.deepcopy(resource_strat_dct["json"]) - ) - else: - new_strats = {} - for resource_strat_list in total_pool: - # total_pool will have only one list of strats, for the one input - for cpac_prov in resource_strat_list: # <------- cpac_prov here doesn't need to be modified, because it's not merging with other inputs - resource, pipe_idx = self.generate_prov_string(cpac_prov) - resource_strat_dct = self.rpool[resource][pipe_idx] - # remember, `resource_strat_dct` is the dct of 'data' and 'json'. - new_strats[pipe_idx] = ResourcePool( - rpool={resource: resource_strat_dct} - ) # <----- again, new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! - # placing JSON info at one level higher only for copy convenience - new_strats[pipe_idx].rpool["json"] = resource_strat_dct["json"] - # TODO: WARNING- THIS IS A LEVEL HIGHER THAN THE ORIGINAL 'JSON' FOR EASE OF ACCESS IN CONNECT_BLOCK WITH THE .GET(JSON) - new_strats[pipe_idx].rpool["json"]["subjson"] = {} - new_strats[pipe_idx].rpool["json"]["CpacProvenance"] = cpac_prov - # preserve each input's JSON info also - data_type = resource.split("_")[-1] - if data_type not in new_strats[pipe_idx].rpool["json"]["subjson"]: - new_strats[pipe_idx].rpool["json"]["subjson"][data_type] = {} - new_strats[pipe_idx].rpool["json"]["subjson"][data_type].update( - copy.deepcopy(resource_strat_dct["json"]) - ) - return new_strats - - def derivative_xfm(self, wf, label, connection, json_info, pipe_idx, pipe_x): - if label in self.xfm: - json_info = dict(json_info) - - # get the bold-to-template transform from the current strat_pool info - xfm_idx = None - xfm_label = "from-bold_to-template_mode-image_xfm" - for entry in json_info["CpacProvenance"]: - if isinstance(entry, list): - if entry[-1].split(":")[0] == xfm_label: - xfm_prov = entry - xfm_idx = self.generate_prov_string(xfm_prov)[1] - break - - # but if the resource doesn't have the bold-to-template transform - # in its provenance/strategy, find the appropriate one for this - # current pipe_idx/strat - if not xfm_idx: - xfm_info = [] - for pipe_idx, entry in self.get(xfm_label).items(): - xfm_info.append((pipe_idx, entry["json"]["CpacProvenance"])) - else: - xfm_info = [(xfm_idx, xfm_prov)] - - for num, xfm_entry in enumerate(xfm_info): - xfm_idx, xfm_prov = xfm_entry - reg_tool = check_prov_for_regtool(xfm_prov) - - xfm = transform_derivative( - f"{label}_xfm_{pipe_x}_{num}", - label, - reg_tool, - self.num_cpus, - self.num_ants_cores, - ants_interp=self.ants_interp, - fsl_interp=self.fsl_interp, - opt=None, - ) - wf.connect(connection[0], connection[1], xfm, "inputspec.in_file") - - node, out = self.get_data("T1w-brain-template-deriv", quick_single=True) - wf.connect(node, out, xfm, "inputspec.reference") - - node, out = self.get_data( - "from-bold_to-template_mode-image_xfm", pipe_idx=xfm_idx - ) - wf.connect(node, out, xfm, "inputspec.transform") - - label = f"space-template_{label}" - json_info["Template"] = self.get_json_info( - "T1w-brain-template-deriv", None, "Description" - ) - new_prov = json_info["CpacProvenance"] + xfm_prov - json_info["CpacProvenance"] = new_prov - new_pipe_idx = self.generate_prov_string(new_prov) - self.set_data( - label, - xfm, - "outputspec.out_file", - json_info, - new_pipe_idx, - f"{label}_xfm_{num}", - fork=True, - ) - - return wf - - - @property - def filtered_movement(self) -> bool: - """ - Check if the movement parameters have been filtered in this strat_pool. - - Returns - ------- - bool - """ - try: - return "motion_estimate_filter" in str( - self.get_cpac_provenance("desc-movementParameters_motion") - ) - except KeyError: - # not a strat_pool or no movement parameters in strat_pool - return False - - def filter_name(self, cfg: Configuration) -> str: - """ - Return the name of the filter for this strategy. - - In a strat_pool with filtered movement parameters. - """ - motion_filters = cfg[ - "functional_preproc", - "motion_estimates_and_correction", - "motion_estimate_filter", - "filters", - ] - if len(motion_filters) == 1 and cfg.switch_is_on( - [ - "functional_preproc", - "motion_estimates_and_correction", - "motion_estimate_filter", - "run", - ], - exclusive=True, - ): - return motion_filters[0]["Name"] - try: - key = "motion" - sidecar = self.get_json("desc-movementParameters_motion") - except KeyError: - sidecar = None - if sidecar is not None and "CpacVariant" in sidecar: - if sidecar["CpacVariant"][key]: - return sidecar["CpacVariant"][key][0][::-1].split("_", 1)[0][::-1] - return "none" - - def post_process(self, wf, label, connection, json_info, pipe_idx, pipe_x, outs): - input_type = "func_derivative" - - post_labels = [(label, connection[0], connection[1])] - - if re.match(r"(.*_)?[ed]c[bw]$", label) or re.match(r"(.*_)?lfcd[bw]$", label): - # suffix: [eigenvector or degree] centrality [binarized or weighted] - # or lfcd [binarized or weighted] - mask = "template-specification-file" - elif "space-template" in label: - if "space-template_res-derivative_desc-bold_mask" in self.rpool.keys(): - mask = "space-template_res-derivative_desc-bold_mask" - else: - mask = "space-template_desc-bold_mask" - else: - mask = "space-bold_desc-brain_mask" - - mask_idx = None - for entry in json_info["CpacProvenance"]: - if isinstance(entry, list): - if entry[-1].split(":")[0] == mask: - mask_prov = entry - mask_idx = self.generate_prov_string(mask_prov)[1] - break - - if self.smoothing_bool: - if label in Outputs.to_smooth: - for smooth_opt in self.smooth_opts: - sm = spatial_smoothing( - f"{label}_smooth_{smooth_opt}_{pipe_x}", - self.fwhm, - input_type, - smooth_opt, - ) - wf.connect(connection[0], connection[1], sm, "inputspec.in_file") - node, out = self.get_data( - mask, pipe_idx=mask_idx, quick_single=mask_idx is None - ) - wf.connect(node, out, sm, "inputspec.mask") - - if "desc-" not in label: - if "space-" in label: - for tag in label.split("_"): - if "space-" in tag: - smlabel = label.replace(tag, f"{tag}_desc-sm") - break - else: - smlabel = f"desc-sm_{label}" - else: - for tag in label.split("_"): - if "desc-" in tag: - newtag = f"{tag}-sm" - smlabel = label.replace(tag, newtag) - break - - post_labels.append((smlabel, sm, "outputspec.out_file")) - - self.set_data( - smlabel, - sm, - "outputspec.out_file", - json_info, - pipe_idx, - f"spatial_smoothing_{smooth_opt}", - fork=True, - ) - self.set_data( - "fwhm", - sm, - "outputspec.fwhm", - json_info, - pipe_idx, - f"spatial_smoothing_{smooth_opt}", - fork=True, - ) - - if self.zscoring_bool: - for label_con_tpl in post_labels: - label = label_con_tpl[0] - connection = (label_con_tpl[1], label_con_tpl[2]) - if label in Outputs.to_zstd: - zstd = z_score_standardize(f"{label}_zstd_{pipe_x}", input_type) - - wf.connect(connection[0], connection[1], zstd, "inputspec.in_file") - - node, out = self.get_data(mask, pipe_idx=mask_idx) - wf.connect(node, out, zstd, "inputspec.mask") - - if "desc-" not in label: - if "space-template" in label: - new_label = label.replace( - "space-template", "space-template_desc-zstd" - ) - else: - new_label = f"desc-zstd_{label}" - else: - for tag in label.split("_"): - if "desc-" in tag: - newtag = f"{tag}-zstd" - new_label = label.replace(tag, newtag) - break - - post_labels.append((new_label, zstd, "outputspec.out_file")) - - self.set_data( - new_label, - zstd, - "outputspec.out_file", - json_info, - pipe_idx, - "zscore_standardize", - fork=True, - ) - - elif label in Outputs.to_fisherz: - zstd = fisher_z_score_standardize( - f"{label}_zstd_{pipe_x}", label, input_type - ) - - wf.connect( - connection[0], connection[1], zstd, "inputspec.correlation_file" - ) - - # if the output is 'space-template_desc-MeanSCA_correlations', we want 'desc-MeanSCA_timeseries' - oned = label.replace("correlations", "timeseries") - - node, out = outs[oned] - wf.connect(node, out, zstd, "inputspec.timeseries_oned") - - post_labels.append((new_label, zstd, "outputspec.out_file")) - - self.set_data( - new_label, - zstd, - "outputspec.out_file", - json_info, - pipe_idx, - "fisher_zscore_standardize", - fork=True, - ) - - return (wf, post_labels) - - def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): - excl = [] - substring_excl = [] - outputs_logger = getLogger(f'{cfg["subject_id"]}_expectedOutputs') - expected_outputs = ExpectedOutputs() - - if add_excl: - excl += add_excl - - if "nonsmoothed" not in cfg.post_processing["spatial_smoothing"]["output"]: - excl += Outputs.native_nonsmooth - excl += Outputs.template_nonsmooth - - if "raw" not in cfg.post_processing["z-scoring"]["output"]: - excl += Outputs.native_raw - excl += Outputs.template_raw - - if not cfg.pipeline_setup["output_directory"]["write_debugging_outputs"]: - # substring_excl.append(['bold']) - excl += Outputs.debugging - - for resource in self.rpool.keys(): - if resource not in Outputs.any: - continue - - if resource in excl: - continue - - drop = False - for substring_list in substring_excl: - bool_list = [] - for substring in substring_list: - if substring in resource: - bool_list.append(True) - else: - bool_list.append(False) - for item in bool_list: - if not item: - break - else: - drop = True - if drop: - break - if drop: - continue - - subdir = "other" - if resource in Outputs.anat: - subdir = "anat" - # TODO: get acq- etc. - elif resource in Outputs.func: - subdir = "func" - # TODO: other stuff like acq- etc. - - for pipe_idx in self.rpool[resource]: - unique_id = self.get_name() - part_id = unique_id.split("_")[0] - ses_id = unique_id.split("_")[1] - - if "ses-" not in ses_id: - ses_id = f"ses-{ses_id}" - - out_dir = cfg.pipeline_setup["output_directory"]["path"] - pipe_name = cfg.pipeline_setup["pipeline_name"] - container = os.path.join(f"pipeline_{pipe_name}", part_id, ses_id) - filename = f"{unique_id}_{res_in_filename(self.cfg, resource)}" - - out_path = os.path.join(out_dir, container, subdir, filename) - - out_dct = { - "unique_id": unique_id, - "out_dir": out_dir, - "container": container, - "subdir": subdir, - "filename": filename, - "out_path": out_path, - } - self.rpool[resource][pipe_idx]["out"] = out_dct - - # TODO: have to link the pipe_idx's here. and call up 'desc-preproc_T1w' from a Sources in a json and replace. here. - # TODO: can do the pipeline_description.json variants here too! - - for resource in self.rpool.keys(): - if resource not in Outputs.any: - continue - - if resource in excl: - continue - - drop = False - for substring_list in substring_excl: - bool_list = [] - for substring in substring_list: - if substring in resource: - bool_list.append(True) - else: - bool_list.append(False) - for item in bool_list: - if not item: - break - else: - drop = True - if drop: - break - if drop: - continue - - num_variant = 0 - if len(self.rpool[resource]) == 1: - num_variant = "" - all_jsons = [ - self.rpool[resource][pipe_idx]["json"] - for pipe_idx in self.rpool[resource] - ] - unlabelled = { - key - for json_info in all_jsons - for key in json_info.get("CpacVariant", {}).keys() - if key not in (*MOVEMENT_FILTER_KEYS, "regressors") - } - if "bold" in unlabelled: - all_bolds = list( - chain.from_iterable( - json_info["CpacVariant"]["bold"] - for json_info in all_jsons - if "CpacVariant" in json_info - and "bold" in json_info["CpacVariant"] - ) - ) - # not any(not) because all is overloaded as a parameter here - if not any( - not re.match( - r"apply_(phasediff|blip)_to_timeseries_separately_.*", _bold - ) - for _bold in all_bolds - ): - # this fork point should only result in 0 or 1 forks - unlabelled.remove("bold") - del all_bolds - all_forks = { - key: set( - chain.from_iterable( - json_info["CpacVariant"][key] - for json_info in all_jsons - if "CpacVariant" in json_info - and key in json_info["CpacVariant"] - ) - ) - for key in unlabelled - } - # del all_jsons - for key, forks in all_forks.items(): - if len(forks) < 2: # noqa: PLR2004 - # no int suffix needed if only one fork - unlabelled.remove(key) - # del all_forks - for pipe_idx in self.rpool[resource]: - pipe_x = self.get_pipe_number(pipe_idx) - json_info = self.rpool[resource][pipe_idx]["json"] - out_dct = self.rpool[resource][pipe_idx]["out"] - - try: - if unlabelled: - num_variant += 1 - except TypeError: - pass - - try: - del json_info["subjson"] - except KeyError: - pass - - if out_dct["subdir"] == "other" and not all: - continue - - unique_id = out_dct["unique_id"] - resource_idx = resource - - if isinstance(num_variant, int): - resource_idx, out_dct = name_fork( - resource_idx, cfg, json_info, out_dct - ) - if unlabelled: - if "desc-" in out_dct["filename"]: - for key in out_dct["filename"].split("_")[::-1]: - # final `desc` entity - if key.startswith("desc-"): - out_dct["filename"] = out_dct["filename"].replace( - key, f"{key}-{num_variant}" - ) - resource_idx = resource_idx.replace( - key, f"{key}-{num_variant}" - ) - break - else: - suff = resource.split("_")[-1] - newdesc_suff = f"desc-{num_variant}_{suff}" - resource_idx = resource_idx.replace(suff, newdesc_suff) - id_string = pe.Node( - Function( - input_names=[ - "cfg", - "unique_id", - "resource", - "scan_id", - "template_desc", - "atlas_id", - "fwhm", - "subdir", - "extension", - ], - output_names=["out_filename"], - function=create_id_string, - ), - name=f"id_string_{resource_idx}_{pipe_x}", - ) - id_string.inputs.cfg = self.cfg - id_string.inputs.unique_id = unique_id - id_string.inputs.resource = resource_idx - id_string.inputs.subdir = out_dct["subdir"] - - # grab the iterable scan ID - if out_dct["subdir"] == "func": - node, out = self.rpool["scan"]["['scan:func_ingress']"]["data"] - wf.connect(node, out, id_string, "scan_id") - - self.back_propogate_template_name( - wf, resource_idx, json_info, id_string - ) - # grab the FWHM if smoothed - for tag in resource.split("_"): - if "desc-" in tag and "-sm" in tag: - fwhm_idx = pipe_idx.replace(f"{resource}:", "fwhm:") - try: - node, out = self.rpool["fwhm"][fwhm_idx]["data"] - wf.connect(node, out, id_string, "fwhm") - except KeyError: - # smoothing was not done for this resource in the - # engine.py smoothing - pass - break - atlas_suffixes = ["timeseries", "correlations", "statmap"] - # grab the iterable atlas ID - atlas_id = None - if not resource.endswith("desc-confounds_timeseries"): - if resource.split("_")[-1] in atlas_suffixes: - atlas_idx = pipe_idx.replace(resource, "atlas_name") - # need the single quote and the colon inside the double - # quotes - it's the encoded pipe_idx - # atlas_idx = new_idx.replace(f"'{temp_rsc}:", - # "'atlas_name:") - if atlas_idx in self.rpool["atlas_name"]: - node, out = self.rpool["atlas_name"][atlas_idx]["data"] - wf.connect(node, out, id_string, "atlas_id") - elif "atlas-" in resource: - for tag in resource.split("_"): - if "atlas-" in tag: - atlas_id = tag.replace("atlas-", "") - id_string.inputs.atlas_id = atlas_id - else: - warnings.warn( - str( - LookupError( - "\n[!] No atlas ID found for " - f"{out_dct['filename']}.\n" - ) - ) - ) - nii_name = pe.Node(Rename(), name=f"nii_{resource_idx}_{pipe_x}") - nii_name.inputs.keep_ext = True - - if resource in Outputs.ciftis: - nii_name.inputs.keep_ext = False - id_string.inputs.extension = Outputs.ciftis[resource] - else: - nii_name.inputs.keep_ext = True - - if resource in Outputs.giftis: - nii_name.inputs.keep_ext = False - id_string.inputs.extension = f"{Outputs.giftis[resource]}.gii" - - else: - nii_name.inputs.keep_ext = True - - wf.connect(id_string, "out_filename", nii_name, "format_string") - - node, out = self.rpool[resource][pipe_idx]["data"] - try: - wf.connect(node, out, nii_name, "in_file") - except OSError as os_error: - WFLOGGER.warning(os_error) - continue - - write_json_imports = ["import os", "import json"] - write_json = pe.Node( - Function( - input_names=["json_data", "filename"], - output_names=["json_file"], - function=write_output_json, - imports=write_json_imports, - ), - name=f"json_{resource_idx}_{pipe_x}", - ) - write_json.inputs.json_data = json_info - - wf.connect(id_string, "out_filename", write_json, "filename") - ds = pe.Node(DataSink(), name=f"sinker_{resource_idx}_{pipe_x}") - ds.inputs.parameterization = False - ds.inputs.base_directory = out_dct["out_dir"] - ds.inputs.encrypt_bucket_keys = cfg.pipeline_setup["Amazon-AWS"][ - "s3_encryption" - ] - ds.inputs.container = out_dct["container"] - - if cfg.pipeline_setup["Amazon-AWS"]["aws_output_bucket_credentials"]: - ds.inputs.creds_path = cfg.pipeline_setup["Amazon-AWS"][ - "aws_output_bucket_credentials" - ] - expected_outputs += ( - out_dct["subdir"], - create_id_string( - self.cfg, - unique_id, - resource_idx, - template_desc=id_string.inputs.template_desc, - atlas_id=atlas_id, - subdir=out_dct["subdir"], - ), - ) - wf.connect(nii_name, "out_file", ds, f'{out_dct["subdir"]}.@data') - wf.connect(write_json, "json_file", ds, f'{out_dct["subdir"]}.@json') - outputs_logger.info(expected_outputs) - - def node_data(self, resource, **kwargs): - """Create NodeData objects. - - Parameters - ---------- - resource : str - - Returns - ------- - NodeData - """ - return NodeData(self, resource, **kwargs) - - def build_rpool( - self, - data_paths, - default_CpacProvenance = ('ingress')): - count = 1 - for index, row in data_paths[1].iterrows(): - # Check if 'meta__json' is not None and contains 'CpacProvenance' - if row.get('meta__json') and row['meta__json'].get('CpacProvenance'): - CpacProvenance = row['meta__json']['CpacProvenance'] - else: - CpacProvenance = default_CpacProvenance - resource = Resource(row, CpacProvenance) - # making the rpool a list so that the duplicates are appended rather than overwritten - self.rpool.setdefault(resource.suffix, []) - self.rpool[resource.suffix].append(resource) - # count += 1 - # if count >10: - # break - - - def write_to_disk(self, path): - for resources in self.rpool.values(): - for item in resources: - print(item['resource'].write_to_disk(path)) - - def get_resource(self, description): - matching_resources = [] - for resources in self.rpool.get(description['suffix'], []): - # Initialize a flag to True, assuming the resource matches until proven otherwise - is_match = True - for key, val in description.items(): - # Skip the 'suffix' key as it's used to select the pool, not to match resources - if key == 'suffix': - continue - # Check if the resource matches the description criteria - # Use getattr for object attributes or resources.get for dictionary keys - resource_val = getattr(resources, key, None) - if resource_val.lower() != val.lower(): - is_match = False - break # Break out of the inner loop if any criteria does not match - if is_match: - # If the resource matches all criteria, append its name to the matching_resources list - matching_resources.append(resources.name) - for items in matching_resources: - print(items) - return matching_resources - - - def set_resource(self, name, value): - self.rpool[name] = value - diff --git a/CPAC/pipeline/engine/strat_pool.py b/CPAC/pipeline/engine/strat_pool.py deleted file mode 100644 index de20677326..0000000000 --- a/CPAC/pipeline/engine/strat_pool.py +++ /dev/null @@ -1,3 +0,0 @@ - -class StratPool(ResourcePool): - pass diff --git a/CPAC/pipeline/nodeblock.py b/CPAC/pipeline/nodeblock.py deleted file mode 100644 index 53b9db1330..0000000000 --- a/CPAC/pipeline/nodeblock.py +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright (C) 2023-2024 C-PAC Developers - -# This file is part of C-PAC. - -# C-PAC is free software: you can redistribute it and/or modify it under -# the terms of the GNU Lesser General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. - -# C-PAC is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public -# License along with C-PAC. If not, see . -"""Class and decorator for NodeBlock functions.""" - -from typing import Any, Callable, Optional - - -class NodeBlockFunction: - """Store a reference to the nodeblock function and all of its meta-data.""" - - def __init__( - self, - func: Callable, - name: Optional[str] = None, - config: Optional[list[str]] = None, - switch: Optional[list[str] | list[list[str]]] = None, - option_key: Optional[str | list[str]] = None, - option_val: Optional[str | list[str]] = None, - inputs: Optional[list[str | list | tuple]] = None, - outputs: Optional[list[str] | dict[str, Any]] = None, - ) -> None: - self.func = func - """Nodeblock function reference.""" - self.name: Optional[str] = name - """Used in the graph and logging to identify the NodeBlock and its component nodes.""" - self.config: Optional[list[str]] = config - """ - Indicates the nested keys in a C-PAC pipeline configuration should configure a NodeBlock built from this - function. If config is set to ``None``, then all other configuration-related entities must be specified from the - root of the configuration. - """ - self.switch: Optional[list[str] | list[list[str]]] = switch - """ - Indicates any keys that should evaluate to True for this NodeBlock to be active. A list of lists of strings - indicates multiple switches that must all be True to run, and is currently only an option if config is set to - ``None``. - """ - self.option_key: Optional[str | list[str]] = option_key - """ - Indicates the nested keys (starting at the nested key indicated by config) that should configure this NodeBlock. - """ - self.option_val: Optional[str | list[str]] = option_val - """Indicates values for which this NodeBlock should be active.""" - self.inputs: Optional[list[str | list | tuple]] = inputs - """ResourcePool keys indicating resources needed for the NodeBlock's functionality.""" - self.outputs: Optional[list[str] | dict[str, Any]] = outputs - """ - ResourcePool keys indicating resources generated or updated by the NodeBlock, optionally including metadata - for the outputs' respective sidecars. - """ - - # Forward function attributes similar to functools.update_wrapper: - # https://docs.python.org/3/library/functools.html#functools.update_wrapper - self.__module__ = func.__module__ - self.__name__ = func.__name__ - self.__qualname__ = func.__qualname__ - self.__annotations__ = func.__annotations__ - self.__doc__ = "".join( - [ - _.replace(" ", "") - for _ in [func.__doc__, "", "", NodeBlockFunction.__call__.__doc__] - if _ is not None - ] - ).rstrip() - - # all node block functions have this signature - def __call__(self, wf, cfg, strat_pool, pipe_num, opt=None): - """ - - Parameters - ---------- - wf : ~nipype.pipeline.engine.workflows.Workflow - - cfg : ~CPAC.utils.configuration.Configuration - - strat_pool - - pipe_num : int - - opt : str, optional - - Returns - ------- - wf : ~nipype.pipeline.engine.workflows.Workflow - - out : dict - """ - return self.func(wf, cfg, strat_pool, pipe_num, opt) - - def legacy_nodeblock_dict(self): - """Return nodeblock metadata as a dictionary. - - Helper for compatibility reasons. - """ - return { - "name": self.name, - "config": self.config, - "switch": self.switch, - "option_key": self.option_key, - "option_val": self.option_val, - "inputs": self.inputs, - "outputs": self.outputs, - } - - def __repr__(self) -> str: - """Return reproducible string representation of a NodeBlockFunction.""" - return ( - f"NodeBlockFunction({self.func.__module__}." - f'{self.func.__name__}, "{self.name}", ' - f"config={self.config}, switch={self.switch}, " - f"option_key={self.option_key}, option_val=" - f"{self.option_val}, inputs={self.inputs}, " - f"outputs={self.outputs})" - ) - - def __str__(self) -> str: - """Return string representation of a NodeBlockFunction.""" - return f"NodeBlockFunction({self.name})" - - -def nodeblock( - name: Optional[str] = None, - config: Optional[list[str]] = None, - switch: Optional[list[str] | list[list[str]]] = None, - option_key: Optional[str | list[str]] = None, - option_val: Optional[str | list[str]] = None, - inputs: Optional[list[str | list | tuple]] = None, - outputs: Optional[list[str] | dict[str, Any]] = None, -): - """ - Define a node block. - - Connections to the pipeline configuration and to other node blocks. - - Parameters - ---------- - name - Used in the graph and logging to identify the NodeBlock and its component nodes. - config - Indicates the nested keys in a C-PAC pipeline configuration should configure a NodeBlock built from this - function. If config is set to ``None``, then all other configuration-related entities must be specified from the - root of the configuration. - switch - Indicates any keys that should evaluate to True for this NodeBlock to be active. A list of lists of strings - indicates multiple switches that must all be True to run, and is currently only an option if config is set to - ``None``. - option_key - Indicates the nested keys (starting at the nested key indicated by config) that should configure this NodeBlock. - option_val - Indicates values for which this NodeBlock should be active. - inputs - ResourcePool keys indicating files needed for the NodeBlock's functionality. - outputs - ResourcePool keys indicating files generated or updated by the NodeBlock, optionally including metadata - for the outputs' respective sidecars. - """ - return lambda func: NodeBlockFunction( - func, - name if name is not None else func.__name__, - config, - switch, - option_key, - option_val, - inputs, - outputs, - ) diff --git a/CPAC/pipeline/schema.py b/CPAC/pipeline/schema.py index 915cb47045..6dc11326d5 100644 --- a/CPAC/pipeline/schema.py +++ b/CPAC/pipeline/schema.py @@ -21,6 +21,7 @@ from itertools import chain, permutations import re from subprocess import CalledProcessError +from typing import Any as TypeAny, Optional as TypeOptional import numpy as np from pathvalidate import sanitize_filename @@ -63,18 +64,12 @@ Number = Any(float, int, All(str, Match(SCIENTIFIC_NOTATION_STR_REGEX))) -def str_to_bool1_1(x): # pylint: disable=invalid-name - """Convert strings to Booleans for YAML1.1 syntax. +def str_to_bool1_1(x: TypeAny) -> bool: # pylint: disable=invalid-name + """Convert strings to Booleans for YAML1.1 syntax [1]_. - Ref https://yaml.org/type/bool.html - - Parameters + References ---------- - x : any - - Returns - ------- - bool + .. [1] 2005-01-18. Oren Ben-Kiki, Clark Evans & Brian Ingerson. `"Boolean Language-Independent Type for YAML™ Version 1.1" [Working Draft] `_. Copyright © 2001-2005 Oren Ben-Kiki, Clark Evans, Brian Ingerson. """ if isinstance(x, str): try: @@ -316,19 +311,9 @@ def str_to_bool1_1(x): # pylint: disable=invalid-name ) -def name_motion_filter(mfilter, mfilters=None): +def name_motion_filter(mfilter: dict, mfilters: TypeOptional[list] = None) -> str: """Given a motion filter, create a short string for the filename. - Parameters - ---------- - mfilter : dict - - mfliters : list or None - - Returns - ------- - str - Examples -------- >>> name_motion_filter({'filter_type': 'notch', 'filter_order': 2, @@ -385,19 +370,8 @@ def name_motion_filter(mfilter, mfilters=None): return name -def permutation_message(key, options): - """Give a human-readable error message for keys that accept permutation values. - - Parameters - ---------- - key: str - - options: list or set - - Returns - ------- - msg: str - """ +def permutation_message(key: str, options: list | set) -> str: + """Give a human-readable error message for keys that accept permutation values.""" return f""" \'{key}\' takes a dictionary with paths to region-of-interest (ROI) @@ -412,7 +386,7 @@ def permutation_message(key, options): """ -def sanitize(filename): +def sanitize(filename: str) -> str: """Sanitize a filename and replace whitespaces with underscores.""" return re.sub(r"\s+", "_", sanitize_filename(filename)) @@ -1253,20 +1227,12 @@ def sanitize(filename): ) -def schema(config_dict): +def schema(config_dict: dict) -> dict: """Validate a participant-analysis pipeline configuration. Validate against the latest validation schema by first applying backwards- compatibility patches, then applying Voluptuous validation, then handling complex - configuration interaction checks before returning validated config_dict. - - Parameters - ---------- - config_dict : dict - - Returns - ------- - dict + configuration interaction checks before returning validated `config_dict`. """ from CPAC.utils.utils import _changes_1_8_0_to_1_8_1 diff --git a/CPAC/pipeline/test/test_cpac_pipeline.py b/CPAC/pipeline/test/test_cpac_pipeline.py index 7c81a31923..1eef42faf6 100644 --- a/CPAC/pipeline/test/test_cpac_pipeline.py +++ b/CPAC/pipeline/test/test_cpac_pipeline.py @@ -1,10 +1,14 @@ """Tests for cpac_pipeline.py.""" +from typing import cast + +import pandas as pd import pytest from CPAC.pipeline.cpac_pipeline import run_workflow from CPAC.pipeline.nipype_pipeline_engine.plugins import MultiProcPlugin from CPAC.utils.configuration import Configuration +from CPAC.utils.typing import SUB_GROUP @pytest.mark.parametrize("plugin", [MultiProcPlugin(), False, "MultiProc", None]) @@ -16,7 +20,8 @@ def test_plugin_param(plugin): cfg = Configuration() with pytest.raises((TypeError, KeyError)) as e: - exitcode = run_workflow({}, cfg, False, plugin=plugin) + sub_group = cast(SUB_GROUP, ((("", ""), pd.DataFrame([])))) + exitcode = run_workflow(sub_group, cfg, run=False, plugin=plugin) assert exitcode != 0 if isinstance(plugin, str) or plugin is None: assert e.typename == "KeyError" diff --git a/CPAC/pipeline/test/test_engine.py b/CPAC/pipeline/test/test_engine.py index c228fc3640..14721291d0 100644 --- a/CPAC/pipeline/test/test_engine.py +++ b/CPAC/pipeline/test/test_engine.py @@ -1,154 +1,102 @@ -import os +# Copyright (C) 2021-2024 C-PAC Developers -import pytest - -from CPAC.pipeline.cpac_pipeline import ( - build_anat_preproc_stack, - build_workflow, - connect_pipeline, - initialize_nipype_wf, - load_cpac_pipe_config, -) -from CPAC.pipeline.engine import ( - ingress_pipeconfig_paths, - ingress_raw_anat_data, - ingress_raw_func_data, - initiate_rpool, - ResourcePool, -) -from CPAC.utils.bids_utils import create_cpac_data_config - - -@pytest.mark.skip(reason="not a pytest test") -def test_ingress_func_raw_data(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") - - wf = initialize_nipype_wf(cfg, sub_data_dct) - - part_id = sub_data_dct["subject_id"] - ses_id = sub_data_dct["unique_id"] - - unique_id = f"{part_id}_{ses_id}" - - rpool = ResourcePool(name=unique_id, cfg=cfg) +# This file is part of C-PAC. - if "func" in sub_data_dct: - wf, rpool, diff, blip, fmap_rp_list = ingress_raw_func_data( - wf, rpool, cfg, sub_data_dct, unique_id, part_id, ses_id - ) +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. - rpool.gather_pipes(wf, cfg, all=True) +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. - wf.run() +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Tests for C-PAC pipeline engine.""" +from pathlib import Path -@pytest.mark.skip(reason="not a pytest test") -def test_ingress_anat_raw_data(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") - - wf = initialize_nipype_wf(cfg, sub_data_dct) - - part_id = sub_data_dct["subject_id"] - ses_id = sub_data_dct["unique_id"] - - unique_id = f"{part_id}_{ses_id}" - - rpool = ResourcePool(name=unique_id, cfg=cfg) +import pytest +from bids2table import bids2table - rpool = ingress_raw_anat_data( - wf, rpool, cfg, sub_data_dct, unique_id, part_id, ses_id +from CPAC.pipeline.cpac_pipeline import ( + build_anat_preproc_stack, + build_workflow, +) +from CPAC.pipeline.engine import ResourcePool +from CPAC.utils.configuration import Configuration, Preconfiguration + + +def _set_up_test( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> tuple[Configuration, dict]: + """Set up `cfg` and `sub_data` for engine tests.""" + bids_dir = str(bids_examples / "ds051") + bids_table = bids2table(bids_dir).flat + bids_table["ses"] = bids_table["ses"].fillna("None") + sub_data = bids_table.groupby(["sub", "ses"]) + cfg = Preconfiguration(preconfig) + cfg.pipeline_setup["output_directory"]["path"] = str(tmp_path / "out") + cfg.pipeline_setup["working_directory"]["path"] = str(tmp_path / "work") + cfg.pipeline_setup["log_directory"]["path"] = str(tmp_path / "logs") + return cfg, list(sub_data)[0] + + +@pytest.mark.parametrize("preconfig", ["default"]) +def test_ingress_func_raw_data( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> None: + """Test :py:meth:`~CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_func_data` .""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) + rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) + rpool.gather_pipes(rpool.wf, cfg, all_types=True) + + +@pytest.mark.parametrize("preconfig", ["default"]) +def test_ingress_anat_raw_data( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> None: + """Test :py:meth:`~CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_anat_data` .""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) + rpool = ResourcePool( + cfg=cfg, + data_paths=sub_data_dct, ) + rpool.ingress_raw_anat_data() + rpool.gather_pipes(rpool.wf, cfg, all_types=True) + + +@pytest.mark.parametrize("preconfig", ["default"]) +def test_ingress_pipeconfig_data( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> None: + """Test :py:meth:`~CPAC.pipeline.engine.resource.ResourcePool.ingress_pipeconfig_paths` .""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) + rpool = ResourcePool( + cfg=cfg, + data_paths=sub_data_dct, + ) + rpool.gather_pipes(rpool.wf, cfg, all_types=True) - rpool.gather_pipes(wf, cfg, all=True) - - wf.run() - - -@pytest.mark.skip(reason="not a pytest test") -def test_ingress_pipeconfig_data(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") - cfg.pipeline_setup["log_directory"]["path"] = os.path.join(test_dir, "logs") - - wf = initialize_nipype_wf(cfg, sub_data_dct) - - part_id = sub_data_dct["subject_id"] - ses_id = sub_data_dct["unique_id"] - - unique_id = f"{part_id}_{ses_id}" - - rpool = ResourcePool(name=unique_id, cfg=cfg) - - rpool = ingress_pipeconfig_paths(cfg, rpool, sub_data_dct, unique_id) - - rpool.gather_pipes(wf, cfg, all=True) - - wf.run() - - -@pytest.mark.skip(reason="not a pytest test") -def test_build_anat_preproc_stack(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") - cfg.pipeline_setup["log_directory"]["path"] = os.path.join(test_dir, "logs") - - wf = initialize_nipype_wf(cfg, sub_data_dct) - - wf, rpool = initiate_rpool(wf, cfg, sub_data_dct) +@pytest.mark.parametrize("preconfig", ["anat-only"]) +def test_build_anat_preproc_stack( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> None: + """Test :py:func:`~CPAC.pipeline.cpac_pipeline.build_anat_preproc_stack` .""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) + rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) pipeline_blocks = build_anat_preproc_stack(rpool, cfg) - wf = connect_pipeline(wf, cfg, rpool, pipeline_blocks) - + wf = rpool.connect_pipeline(rpool.wf, cfg, pipeline_blocks) rpool.gather_pipes(wf, cfg) - wf.run() - - -@pytest.mark.skip(reason="not a pytest test") -def test_build_workflow(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") - cfg.pipeline_setup["log_directory"]["path"] = os.path.join(test_dir, "logs") - - wf = initialize_nipype_wf(cfg, sub_data_dct) - - wf, rpool = initiate_rpool(wf, cfg, sub_data_dct) - - wf, _, _ = build_workflow(sub_data_dct["subject_id"], sub_data_dct, cfg) +@pytest.mark.parametrize("preconfig", ["default"]) +def test_build_workflow(bids_examples: Path, preconfig: str, tmp_path: Path) -> None: + """Test :py:func:`~CPAC.pipeline.cpac_pipeline.build_workflow` .""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) + rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) + wf = build_workflow(sub_data_dct["subject_id"], sub_data_dct, cfg) rpool.gather_pipes(wf, cfg) - - wf.run() - - -# bids_dir = "/Users/steven.giavasis/data/HBN-SI_dataset/rawdata" -# test_dir = "/test_dir" - -# cfg = "/Users/hecheng.jin/GitHub/DevBranch/CPAC/resources/configs/pipeline_config_monkey-ABCD.yml" -cfg = "/Users/hecheng.jin/GitHub/pipeline_config_monkey-ABCDlocal.yml" -bids_dir = "/Users/hecheng.jin/Monkey/monkey_data_oxford/site-ucdavis" -test_dir = "/Users/hecheng.jin/GitHub/Test/T2preproc" - -# test_ingress_func_raw_data(cfg, bids_dir, test_dir) -# test_ingress_anat_raw_data(cfg, bids_dir, test_dir) -# test_ingress_pipeconfig_data(cfg, bids_dir, test_dir) -# test_build_anat_preproc_stack(cfg, bids_dir, test_dir) -if __name__ == "__main__": - test_build_workflow(cfg, bids_dir, test_dir) diff --git a/CPAC/pipeline/utils.py b/CPAC/pipeline/utils.py index 39acb6429f..6f6953fef2 100644 --- a/CPAC/pipeline/utils.py +++ b/CPAC/pipeline/utils.py @@ -18,31 +18,16 @@ from itertools import chain -from CPAC.func_preproc.func_motion import motion_estimate_filter from CPAC.utils.bids_utils import insert_entity +from CPAC.utils.configuration.configuration import Configuration -MOVEMENT_FILTER_KEYS = motion_estimate_filter.outputs +def name_fork( + resource_idx: str, cfg: Configuration, json_info: dict, out_dct: dict +) -> tuple[str, dict]: + """Create and insert entities for forkpoints.""" + from CPAC.func_preproc.func_motion import motion_estimate_filter -def name_fork(resource_idx, cfg, json_info, out_dct): - """Create and insert entities for forkpoints. - - Parameters - ---------- - resource_idx : str - - cfg : CPAC.utils.configuration.Configuration - - json_info : dict - - out_dct : dict - - Returns - ------- - resource_idx : str - - out_dct : dict - """ if cfg.switch_is_on( [ "functional_preproc", @@ -54,7 +39,7 @@ def name_fork(resource_idx, cfg, json_info, out_dct): filt_value = None _motion_variant = { _key: json_info["CpacVariant"][_key] - for _key in MOVEMENT_FILTER_KEYS + for _key in motion_estimate_filter.outputs if _key in json_info.get("CpacVariant", {}) } if "unfiltered-" in resource_idx: @@ -105,12 +90,6 @@ def present_outputs(outputs: dict, keys: list) -> dict: NodeBlocks that differ only by configuration options and relevant output keys. - Parameters - ---------- - outputs : dict - - keys : list of str - Returns ------- dict diff --git a/CPAC/qc/pipeline.py b/CPAC/qc/pipeline.py index 15d6b35e09..fd39ed5193 100644 --- a/CPAC/qc/pipeline.py +++ b/CPAC/qc/pipeline.py @@ -1,7 +1,7 @@ import pkg_resources as p from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.qc.qc import ( afni_Edge3, create_montage, diff --git a/CPAC/qc/xcp.py b/CPAC/qc/xcp.py index 95cb870430..61bb008a0e 100644 --- a/CPAC/qc/xcp.py +++ b/CPAC/qc/xcp.py @@ -67,13 +67,15 @@ import pandas as pd import nibabel as nib from nipype.interfaces import afni, fsl +from nipype.pipeline.engine import Node, Workflow from CPAC.generate_motion_statistics.generate_motion_statistics import ( DVARS_strip_t0, ImageTo1D, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock +from CPAC.pipeline.engine.resource import StratPool from CPAC.qc.qcmetrics import regisQ from CPAC.utils.interfaces.function import Function @@ -85,33 +87,29 @@ ] -def _connect_motion(wf, nodes, strat_pool, qc_file, pipe_num): +def _connect_motion( + wf: Workflow, nodes: dict, strat_pool: StratPool, qc_file: Node, pipe_num: int +) -> Workflow: """ Connect the motion metrics to the workflow. Parameters ---------- - wf : nipype.pipeline.engine.Workflow + wf The workflow to connect the motion metrics to. - nodes : dict + nodes Dictionary of nodes already collected from the strategy pool. - strat_pool : CPAC.pipeline.engine.ResourcePool + strat_pool The current strategy pool. - qc_file : nipype.pipeline.engine.Node - A function node with the function ``generate_xcp_qc``. - - pipe_num : int - - Returns - ------- - wf : nipype.pipeline.engine.Workflow + qc_file + A function node with the function :py:func:`generate_xcp_qc` . """ # pylint: disable=invalid-name, too-many-arguments try: - nodes = {**nodes, "censor-indices": strat_pool.node_data("censor-indices")} + nodes = {**nodes, "censor-indices": strat_pool.get_data("censor-indices")} wf.connect( nodes["censor-indices"].node, nodes["censor-indices"].out, @@ -501,7 +499,7 @@ def qc_xcp(wf, cfg, strat_pool, pipe_num, opt=None): ) qc_file.inputs.desc = "preproc" qc_file.inputs.regressors = ( - strat_pool.node_data("regressors") + strat_pool.get_data("regressors") .node.name.split("regressors_")[-1][::-1] .split("_", 1)[-1][::-1] ) @@ -511,7 +509,7 @@ def qc_xcp(wf, cfg, strat_pool, pipe_num, opt=None): op_string="-bin ", ) nodes = { - key: strat_pool.node_data(key) + key: strat_pool.get_data(key) for key in [ "bold", "desc-preproc_bold", @@ -526,13 +524,13 @@ def qc_xcp(wf, cfg, strat_pool, pipe_num, opt=None): ] if strat_pool.check_rpool(key) } - nodes["bold2template_mask"] = strat_pool.node_data( + nodes["bold2template_mask"] = strat_pool.get_data( ["space-template_desc-bold_mask", "space-EPItemplate_desc-bold_mask"] ) - nodes["template_mask"] = strat_pool.node_data( + nodes["template_mask"] = strat_pool.get_data( ["T1w-brain-template-mask", "EPI-template-mask"] ) - nodes["template"] = strat_pool.node_data( + nodes["template"] = strat_pool.get_data( ["T1w-brain-template-funcreg", "EPI-brain-template-funcreg"] ) resample_bold_mask_to_template = pe.Node( diff --git a/CPAC/randomise/randomise.py b/CPAC/randomise/randomise.py index 8c2351c9f0..b3144685aa 100644 --- a/CPAC/randomise/randomise.py +++ b/CPAC/randomise/randomise.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function from CPAC.utils.monitoring import IFLOGGER @@ -53,7 +54,6 @@ def prep_randomise_workflow( ): from nipype.interfaces import fsl import nipype.interfaces.io as nio - import nipype.interfaces.utility as util wf = pe.Workflow(name="randomise_workflow") wf.base_dir = c.work_dir @@ -74,7 +74,7 @@ def prep_randomise_workflow( randomise.inputs.fcon = fts_file select_tcorrp_files = pe.Node( - util.Function( + Function( input_names=["input_list"], output_names=["out_file"], function=select ), name="select_t_corrp", @@ -83,7 +83,7 @@ def prep_randomise_workflow( wf.connect(randomise, "t_corrected_p_files", select_tcorrp_files, "input_list") select_tstat_files = pe.Node( - util.Function( + Function( input_names=["input_list"], output_names=["out_file"], function=select ), name="select_t_stat", @@ -147,6 +147,10 @@ def run(group_config_path): import os from CPAC.pipeline.cpac_group_runner import load_config_yml + from CPAC.pipeline.cpac_randomise_pipeline import ( + randomise_merged_file, + randomise_merged_mask, + ) group_config_obj = load_config_yml(group_config_path) pipeline_output_folder = group_config_obj.pipeline_dir diff --git a/CPAC/registration/output_func_to_standard.py b/CPAC/registration/output_func_to_standard.py index 6cf172f76d..bafea7d8d0 100644 --- a/CPAC/registration/output_func_to_standard.py +++ b/CPAC/registration/output_func_to_standard.py @@ -374,7 +374,7 @@ def ants_apply_warps_func_mni( itk_imports = ["import os"] change_transform = pe.Node( - util.Function( + Function( input_names=["input_affine_file"], output_names=["updated_affine_file"], function=change_itk_transform_type, @@ -534,7 +534,7 @@ def ants_apply_warps_func_mni( # check transform list (if missing any init/rig/affine) and exclude Nonetype check_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -546,7 +546,7 @@ def ants_apply_warps_func_mni( # generate inverse transform flags, which depends on the number of transforms inverse_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, diff --git a/CPAC/registration/registration.py b/CPAC/registration/registration.py index b6cc9892ea..3673b267cf 100644 --- a/CPAC/registration/registration.py +++ b/CPAC/registration/registration.py @@ -17,7 +17,7 @@ # pylint: disable=too-many-lines,ungrouped-imports,wrong-import-order """Workflows for registration.""" -from typing import Optional +from typing import Optional, TYPE_CHECKING from voluptuous import RequiredFieldInvalid from nipype.interfaces import afni, ants, c3, fsl, utility as util @@ -26,7 +26,7 @@ from CPAC.anat_preproc.lesion_preproc import create_lesion_preproc from CPAC.func_preproc.utils import chunk_ts, split_ts_chunks from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration.utils import ( change_itk_transform_type, check_transforms, @@ -39,9 +39,14 @@ seperate_warps_list, single_ants_xfm_to_list, ) +from CPAC.utils.configuration.configuration import Configuration +from CPAC.utils.interfaces import Function from CPAC.utils.interfaces.fsl import Merge as fslMerge from CPAC.utils.utils import check_prov_for_motion_tool, check_prov_for_regtool +if TYPE_CHECKING: + from CPAC.pipeline.engine.resource import StratPool + def apply_transform( wf_name, @@ -104,7 +109,7 @@ def apply_transform( wf.connect(inputNode, "reference", apply_warp, "reference_image") interp_string = pe.Node( - util.Function( + Function( input_names=["interpolation", "reg_tool"], output_names=["interpolation"], function=interpolation_string, @@ -118,7 +123,7 @@ def apply_transform( wf.connect(interp_string, "interpolation", apply_warp, "interpolation") ants_xfm_list = pe.Node( - util.Function( + Function( input_names=["transform"], output_names=["transform_list"], function=single_ants_xfm_to_list, @@ -135,7 +140,7 @@ def apply_transform( if int(num_cpus) > 1 and time_series: chunk_imports = ["import nibabel as nib"] chunk = pe.Node( - util.Function( + Function( input_names=["func_file", "n_chunks", "chunk_size"], output_names=["TR_ranges"], function=chunk_ts, @@ -154,7 +159,7 @@ def apply_transform( split_imports = ["import os", "import subprocess"] split = pe.Node( - util.Function( + Function( input_names=["func_file", "tr_ranges"], output_names=["split_funcs"], function=split_ts_chunks, @@ -196,7 +201,7 @@ def apply_transform( ) interp_string = pe.Node( - util.Function( + Function( input_names=["interpolation", "reg_tool"], output_names=["interpolation"], function=interpolation_string, @@ -222,7 +227,7 @@ def apply_transform( if int(num_cpus) > 1 and time_series: chunk_imports = ["import nibabel as nib"] chunk = pe.Node( - util.Function( + Function( input_names=["func_file", "n_chunks", "chunk_size"], output_names=["TR_ranges"], function=chunk_ts, @@ -241,7 +246,7 @@ def apply_transform( split_imports = ["import os", "import subprocess"] split = pe.Node( - util.Function( + Function( input_names=["func_file", "tr_ranges"], output_names=["split_funcs"], function=split_ts_chunks, @@ -761,7 +766,7 @@ def create_register_func_to_anat( if phase_diff_distcor: conv_pedir = pe.Node( - interface=util.Function( + interface=Function( input_names=["pedir", "convert"], output_names=["pedir"], function=convert_pedir, @@ -1067,7 +1072,7 @@ def bbreg_args(bbreg_target): if phase_diff_distcor: conv_pedir = pe.Node( - interface=util.Function( + interface=Function( input_names=["pedir", "convert"], output_names=["pedir"], function=convert_pedir, @@ -1276,7 +1281,7 @@ def create_wf_calculate_ants_warp( """ reg_imports = ["import os", "import subprocess"] calculate_ants_warp = pe.Node( - interface=util.Function( + interface=Function( input_names=[ "moving_brain", "reference_brain", @@ -1302,7 +1307,7 @@ def create_wf_calculate_ants_warp( calculate_ants_warp.interface.num_threads = num_threads select_forward_initial = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1313,7 +1318,7 @@ def create_wf_calculate_ants_warp( select_forward_initial.inputs.selection = "Initial" select_forward_rigid = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1324,7 +1329,7 @@ def create_wf_calculate_ants_warp( select_forward_rigid.inputs.selection = "Rigid" select_forward_affine = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1335,7 +1340,7 @@ def create_wf_calculate_ants_warp( select_forward_affine.inputs.selection = "Affine" select_forward_warp = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1346,7 +1351,7 @@ def create_wf_calculate_ants_warp( select_forward_warp.inputs.selection = "Warp" select_inverse_warp = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1788,7 +1793,7 @@ def ANTs_registration_connector( # check transform list to exclude Nonetype (missing) init/rig/affine check_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -1851,7 +1856,7 @@ def ANTs_registration_connector( # check transform list to exclude Nonetype (missing) init/rig/affine check_invlinear_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -1873,7 +1878,7 @@ def ANTs_registration_connector( # generate inverse transform flags, which depends on the # number of transforms inverse_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, @@ -1935,7 +1940,7 @@ def ANTs_registration_connector( # check transform list to exclude Nonetype (missing) init/rig/affine check_all_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -2004,7 +2009,7 @@ def ANTs_registration_connector( # check transform list to exclude Nonetype (missing) init/rig/affine check_all_inv_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -2026,7 +2031,7 @@ def ANTs_registration_connector( # generate inverse transform flags, which depends on the # number of transforms inverse_all_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, @@ -2122,7 +2127,7 @@ def bold_to_T1template_xfm_connector( itk_imports = ["import os"] change_transform = pe.Node( - util.Function( + Function( input_names=["input_affine_file"], output_names=["updated_affine_file"], function=change_itk_transform_type, @@ -2615,7 +2620,7 @@ def register_ANTs_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None): node, out = connect wf.connect(node, out, ants_rc, "inputspec.input_brain") - t1w_brain_template = strat_pool.node_data("T1w-brain-template") + t1w_brain_template = strat_pool.get_data("T1w-brain-template") wf.connect( t1w_brain_template.node, t1w_brain_template.out, @@ -2634,10 +2639,10 @@ def register_ANTs_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None): ) wf.connect(node, out, ants_rc, "inputspec.input_head") - t1w_template = strat_pool.node_data("T1w-template") + t1w_template = strat_pool.get_data("T1w-template") wf.connect(t1w_template.node, t1w_template.out, ants_rc, "inputspec.reference_head") - brain_mask = strat_pool.node_data( + brain_mask = strat_pool.get_data( [ "space-T1w_desc-brain_mask", "space-longitudinal_desc-brain_mask", @@ -2964,7 +2969,7 @@ def overwrite_transform_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None # c4d -mcs ${WD}/xfms/ANTs_CombinedWarp.nii.gz -oo ${WD}/xfms/e1.nii.gz ${WD}/xfms/e2.nii.gz ${WD}/xfms/e3.nii.gz # -mcs: -multicomponent-split, -oo: -output-multiple split_combined_warp = pe.Node( - util.Function( + Function( input_names=["input_name", "output_name"], output_names=["output1", "output2", "output3"], function=run_c4d, @@ -2982,7 +2987,7 @@ def overwrite_transform_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None # c4d -mcs ${WD}/xfms/ANTs_CombinedInvWarp.nii.gz -oo ${WD}/xfms/e1inv.nii.gz ${WD}/xfms/e2inv.nii.gz ${WD}/xfms/e3inv.nii.gz split_combined_inv_warp = pe.Node( - util.Function( + Function( input_names=["input_name", "output_name"], output_names=["output1", "output2", "output3"], function=run_c4d, @@ -3678,7 +3683,7 @@ def apply_phasediff_to_timeseries_separately(wf, cfg, strat_pool, pipe_num, opt= wf.connect(warp_fmap, "out_file", mask_fmap, "in_file") conv_pedir = pe.Node( - interface=util.Function( + interface=Function( input_names=["pedir", "convert"], output_names=["pedir"], function=convert_pedir, @@ -4819,7 +4824,7 @@ def single_step_resample_timeseries_to_T1template( reg_tool = check_prov_for_regtool(xfm_prov) bbr2itk = pe.Node( - util.Function( + Function( input_names=["reference_file", "source_file", "transform_file"], output_names=["itk_transform"], function=run_c3d, @@ -4860,7 +4865,7 @@ def single_step_resample_timeseries_to_T1template( ### Loop starts! ### motionxfm2itk = pe.MapNode( - util.Function( + Function( input_names=["reference_file", "source_file", "transform_file"], output_names=["itk_transform"], function=run_c3d, @@ -4881,7 +4886,7 @@ def single_step_resample_timeseries_to_T1template( wf.connect(node, out, motionxfm2itk, "transform_file") elif motion_correct_tool == "3dvolreg": convert_transform = pe.Node( - util.Function( + Function( input_names=["one_d_filename"], output_names=["transform_directory"], function=one_d_to_mat, @@ -5415,8 +5420,8 @@ def warp_tissuemask_to_template(wf, cfg, strat_pool, pipe_num, xfm, template_spa def warp_resource_to_template( wf: pe.Workflow, - cfg, - strat_pool, + cfg: Configuration, + strat_pool: "StratPool", pipe_num: int, input_resource: list[str] | str, xfm: str, @@ -5427,24 +5432,24 @@ def warp_resource_to_template( Parameters ---------- - wf : pe.Workflow + wf - cfg : CPAC.utils.configuration.Configuration + cfg - strat_pool : CPAC.pipeline.engine.ResourcePool + strat_pool - pipe_num : int + pipe_num - input_resource : str or list + input_resource key for the resource to warp to template - xfm : str + xfm key for the transform to apply - reference : str, optional + reference key for reference if not using f'{template_space}-template' - time_series : boolean, optional + time_series resource to transform is 4D? Returns diff --git a/CPAC/reho/reho.py b/CPAC/reho/reho.py index 80e6599d10..b19ad9ecc7 100644 --- a/CPAC/reho/reho.py +++ b/CPAC/reho/reho.py @@ -1,9 +1,26 @@ # coding: utf-8 +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.reho.utils import * +from CPAC.utils.interfaces import Function def create_reho(wf_name): @@ -99,7 +116,7 @@ def create_reho(wf_name): "from CPAC.reho.utils import f_kendall", ] raw_reho_map = pe.Node( - util.Function( + Function( input_names=["in_file", "mask_file", "cluster_size"], output_names=["out_file"], function=compute_reho, diff --git a/CPAC/resources/tests/test_templates.py b/CPAC/resources/tests/test_templates.py index 13a4f72745..0c70370f7f 100644 --- a/CPAC/resources/tests/test_templates.py +++ b/CPAC/resources/tests/test_templates.py @@ -16,27 +16,37 @@ # License along with C-PAC. If not, see . """Tests for packaged templates.""" +from importlib.util import find_spec import os import pytest from CPAC.pipeline import ALL_PIPELINE_CONFIGS -from CPAC.pipeline.engine import ingress_pipeconfig_paths, ResourcePool +from CPAC.pipeline.engine import ResourcePool from CPAC.utils.configuration import Preconfiguration from CPAC.utils.datasource import get_highest_local_res -@pytest.mark.parametrize("pipeline", ALL_PIPELINE_CONFIGS) +@pytest.mark.parametrize( + "pipeline", + [ + pytest.param( + config, + marks=pytest.mark.skipif( + not find_spec("torch"), reason="torch required for NHP configs." + ), + ) + if config in ["monkey", "nhp-macaque"] + else config + for config in ALL_PIPELINE_CONFIGS + ], +) def test_packaged_path_exists(pipeline): - """ - Check that all local templates are included in image at at - least one resolution. - """ - rpool = ingress_pipeconfig_paths( - Preconfiguration(pipeline), ResourcePool(), "pytest" - ) + """Check that all local templates are included in at least one resolution.""" + rpool = ResourcePool(cfg=Preconfiguration(pipeline), part_id="pytest") + rpool.ingress_pipeconfig_paths() for resource in rpool.rpool.values(): - node = next(iter(resource.values())).get("data")[0] + node = next(iter(resource.values())).data[0] if hasattr(node.inputs, "template") and not node.inputs.template.startswith( "s3:" ): diff --git a/CPAC/sca/sca.py b/CPAC/sca/sca.py index 8e714dbd5f..bf855d578a 100644 --- a/CPAC/sca/sca.py +++ b/CPAC/sca/sca.py @@ -18,7 +18,7 @@ from nipype.interfaces.afni import preprocess from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.sca.utils import * from CPAC.timeseries.timeseries_analysis import ( get_roi_timeseries, @@ -30,11 +30,15 @@ create_spatial_map_dataflow, resample_func_roi, ) +from CPAC.utils.interfaces import Function def create_sca(name_sca="sca"): """ - Map of the correlations of the Region of Interest(Seed in native or MNI space) with the rest of brain voxels. + Create map of the correlations of the Region of Interest with the rest of brain voxels. + + (Seed in native or MNI space) + The map is normalized to contain Z-scores, mapped in standard space and treated with spatial smoothing. Parameters @@ -150,8 +154,8 @@ def create_sca(name_sca="sca"): def create_temporal_reg(wflow_name="temporal_reg", which="SR"): - r""" - Temporal multiple regression workflow + r"""Create temporal multiple regression workflow. + Provides a spatial map of parameter estimates corresponding to each provided timeseries in a timeseries.txt file as regressors. @@ -280,9 +284,7 @@ def create_temporal_reg(wflow_name="temporal_reg", which="SR"): ) check_timeseries = pe.Node( - util.Function( - input_names=["in_file"], output_names=["out_file"], function=check_ts - ), + Function(input_names=["in_file"], output_names=["out_file"], function=check_ts), name="check_timeseries", ) @@ -325,7 +327,7 @@ def create_temporal_reg(wflow_name="temporal_reg", which="SR"): map_roi_imports = ['import os', 'import numpy as np'] # get roi order and send to output node for raw outputs - get_roi_order = pe.Node(util.Function(input_names=['maps', + get_roi_order = pe.Node(Function(input_names=['maps', 'timeseries'], output_names=['labels', 'maps'], @@ -350,7 +352,7 @@ def create_temporal_reg(wflow_name="temporal_reg", which="SR"): outputNode, 'temp_reg_map_files') # get roi order and send to output node for z-stat outputs - get_roi_order_zstat = pe.Node(util.Function(input_names=['maps', + get_roi_order_zstat = pe.Node(Function(input_names=['maps', 'timeseries'], output_names=['labels', 'maps'], @@ -396,7 +398,7 @@ def SCA_AVG(wf, cfg, strat_pool, pipe_num, opt=None): # same workflow, except to run TSE and send it to the resource # pool so that it will not get sent to SCA resample_functional_roi_for_sca = pe.Node( - util.Function( + Function( input_names=["in_func", "in_roi", "realignment", "identity_matrix"], output_names=["out_func", "out_roi"], function=resample_func_roi, diff --git a/CPAC/scrubbing/scrubbing.py b/CPAC/scrubbing/scrubbing.py index ed85ef1024..e08b816edc 100644 --- a/CPAC/scrubbing/scrubbing.py +++ b/CPAC/scrubbing/scrubbing.py @@ -1,13 +1,29 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def create_scrubbing_preproc(wf_name="scrubbing"): - """ - This workflow essentially takes the list of offending timepoints that are to be removed - and removes it from the motion corrected input image. Also, it removes the information - of discarded time points from the movement parameters file obtained during motion correction. + """Take the list of offending timepoints that are to be removed and remove it from the motion corrected input image. + + Also remove the information of discarded time points from the movement parameters file obtained during motion correction. Parameters ---------- @@ -94,7 +110,7 @@ def create_scrubbing_preproc(wf_name="scrubbing"): ) craft_scrub_input = pe.Node( - util.Function( + Function( input_names=["scrub_input", "frames_in_1D_file"], output_names=["scrub_input_string"], function=get_indx, @@ -103,7 +119,7 @@ def create_scrubbing_preproc(wf_name="scrubbing"): ) scrubbed_movement_parameters = pe.Node( - util.Function( + Function( input_names=["infile_a", "infile_b"], output_names=["out_file"], function=get_mov_parameters, @@ -120,7 +136,7 @@ def create_scrubbing_preproc(wf_name="scrubbing"): # scrubbed_preprocessed.inputs.outputtype = 'NIFTI_GZ' scrubbed_preprocessed = pe.Node( - util.Function( + Function( input_names=["scrub_input"], output_names=["scrubbed_image"], function=scrub_image, @@ -152,9 +168,8 @@ def create_scrubbing_preproc(wf_name="scrubbing"): def get_mov_parameters(infile_a, infile_b): - """ - Method to get the new movement parameters - file after removing the offending time frames + """Get the new movement parameters file after removing the offending time frames. + (i.e., those exceeding FD 0.5mm/0.2mm threshold). Parameters @@ -192,7 +207,7 @@ def get_mov_parameters(infile_a, infile_b): raise Exception(msg) f = open(out_file, "a") - for l in l1: + for l in l1: # noqa: E741 data = l2[int(l.strip())] f.write(data) f.close() @@ -200,9 +215,7 @@ def get_mov_parameters(infile_a, infile_b): def get_indx(scrub_input, frames_in_1D_file): - """ - Method to get the list of time - frames that are to be included. + """Get the list of time frames that are to be included. Parameters ---------- @@ -230,10 +243,10 @@ def get_indx(scrub_input, frames_in_1D_file): def scrub_image(scrub_input): - """ - Method to run 3dcalc in order to scrub the image. This is used instead of - the Nipype interface for 3dcalc because functionality is needed for - specifying an input file with specifically-selected volumes. For example: + """Run 3dcalc in order to scrub the image. + + This is used instead of the Nipype interface for 3dcalc because functionality is + needed for specifying an input file with specifically-selected volumes. For example: input.nii.gz[2,3,4,..98], etc. Parameters diff --git a/CPAC/seg_preproc/seg_preproc.py b/CPAC/seg_preproc/seg_preproc.py index a66990f1e6..1fe3f4045f 100644 --- a/CPAC/seg_preproc/seg_preproc.py +++ b/CPAC/seg_preproc/seg_preproc.py @@ -1,9 +1,25 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . from nipype.interfaces import ants, freesurfer, fsl, utility as util from nipype.interfaces.utility import Function from CPAC.anat_preproc.utils import mri_convert from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration.registration import apply_transform from CPAC.registration.utils import check_transforms, generate_inverse_transform_flags from CPAC.seg_preproc.utils import ( @@ -23,10 +39,10 @@ def process_segment_map(wf_name, use_priors, use_custom_threshold, reg_tool): - """This is a sub workflow used inside segmentation workflow to process - probability maps obtained in segmentation. Steps include overlapping - of the prior tissue with probability maps, thresholding and binarizing - it and creating a mask that is used in further analysis. + """Create a sub workflow used inside segmentation workflow to process probability maps obtained in segmentation. + + Steps include overlapping of the prior tissue with probability maps, thresholding + and binarizing it and creating a mask that is used in further analysis. Parameters ---------- @@ -274,7 +290,7 @@ def tissue_mask_template_to_t1(wf_name, use_ants): # check transform list to exclude Nonetype (missing) init/rig/affine check_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -289,7 +305,7 @@ def tissue_mask_template_to_t1(wf_name, use_ants): # generate inverse transform flags, which depends on the # number of transforms inverse_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, @@ -356,9 +372,7 @@ def tissue_mask_template_to_t1(wf_name, use_ants): def create_seg_preproc_antsJointLabel_method(wf_name="seg_preproc_templated_based"): - """ - Generate the subject's cerebral spinal fluids, - white matter and gray matter mask based on provided template, if selected to do so. + """Generate the subject's cerebral spinal fluids, white matter and gray matter mask based on provided template, if selected to do so. Parameters ---------- @@ -417,7 +431,7 @@ def create_seg_preproc_antsJointLabel_method(wf_name="seg_preproc_templated_base ) seg_preproc_antsJointLabel = pe.Node( - util.Function( + Function( input_names=[ "anatomical_brain", "anatomical_brain_mask", @@ -700,7 +714,7 @@ def tissue_seg_fsl_fast(wf, cfg, strat_pool, pipe_num, opt=None): ) get_csf = pe.Node( - util.Function( + Function( input_names=["probability_maps"], output_names=["filename"], function=pick_wm_prob_0, @@ -945,7 +959,7 @@ def tissue_seg_freesurfer(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(node, out, fs_aseg_to_native, "target_file") fs_aseg_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=mri_convert ), name=f"fs_aseg_to_nifti_{pipe_num}", diff --git a/CPAC/surface/surf_preproc.py b/CPAC/surface/surf_preproc.py index 2229e24b5a..7959688f86 100644 --- a/CPAC/surface/surf_preproc.py +++ b/CPAC/surface/surf_preproc.py @@ -1,10 +1,25 @@ -import os +# Copyright (C) 2021-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. -import nipype.interfaces.utility as util +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +import os from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.surface.PostFreeSurfer.surf_reho import run_surf_reho +from CPAC.utils.interfaces import Function def run_surface( @@ -1026,7 +1041,7 @@ def run_surface( ) def surface_postproc(wf, cfg, strat_pool, pipe_num, opt=None): surf = pe.Node( - util.Function( + Function( input_names=[ "post_freesurfer_folder", "freesurfer_folder", @@ -1369,7 +1384,7 @@ def surface_postproc(wf, cfg, strat_pool, pipe_num, opt=None): ) def surface_falff(wf, cfg, strat_pool, pipe_num, opt): falff = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries"], output_names=["surf_falff"], function=run_surf_falff, @@ -1394,7 +1409,7 @@ def surface_falff(wf, cfg, strat_pool, pipe_num, opt): ) def surface_alff(wf, cfg, strat_pool, pipe_num, opt): alff = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries"], output_names=["surf_alff"], function=run_surf_alff, @@ -1427,7 +1442,7 @@ def surface_alff(wf, cfg, strat_pool, pipe_num, opt): ) def surface_reho(wf, cfg, strat_pool, pipe_num, opt): L_cortex_file = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries", "structure", "cortex_filename"], output_names=["L_cortex_file"], function=run_get_cortex, @@ -1442,7 +1457,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): wf.connect(node, out, L_cortex_file, "dtseries") R_cortex_file = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries", "structure", "cortex_filename"], output_names=["R_cortex_file"], function=run_get_cortex, @@ -1456,7 +1471,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): wf.connect(node, out, R_cortex_file, "dtseries") mean_timeseries = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries"], output_names=["mean_timeseries"], function=run_mean_timeseries, @@ -1468,7 +1483,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): wf.connect(node, out, mean_timeseries, "dtseries") L_reho = pe.Node( - util.Function( + Function( input_names=[ "subject", "dtseries", @@ -1499,7 +1514,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): wf.connect(node, out, L_reho, "dtseries") R_reho = pe.Node( - util.Function( + Function( input_names=[ "subject", "dtseries", @@ -1545,7 +1560,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): ) def surface_connectivity_matrix(wf, cfg, strat_pool, pipe_num, opt): connectivity_parcellation = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries", "surf_atlaslabel"], output_names=["parcellation_file"], function=run_ciftiparcellate, @@ -1561,7 +1576,7 @@ def surface_connectivity_matrix(wf, cfg, strat_pool, pipe_num, opt): ]["surface_parcellation_template"] correlation_matrix = pe.Node( - util.Function( + Function( input_names=["subject", "ptseries"], output_names=["correlation_matrix"], function=run_cifticorrelation, diff --git a/CPAC/timeseries/timeseries_analysis.py b/CPAC/timeseries/timeseries_analysis.py index 14547bc79b..18b1a4851a 100644 --- a/CPAC/timeseries/timeseries_analysis.py +++ b/CPAC/timeseries/timeseries_analysis.py @@ -15,7 +15,6 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . from nipype.interfaces import afni, fsl, utility as util -from nipype.interfaces.utility import Function from CPAC.connectome.connectivity_matrix import ( create_connectome_afni, @@ -23,12 +22,13 @@ get_connectome_method, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.utils.datasource import ( create_roi_mask_dataflow, create_spatial_map_dataflow, resample_func_roi, ) +from CPAC.utils.interfaces import Function from CPAC.utils.monitoring import FMLOGGER @@ -86,7 +86,7 @@ def get_voxel_timeseries(wf_name: str = "voxel_timeseries") -> pe.Workflow: ) timeseries_voxel = pe.Node( - util.Function( + Function( input_names=["data_file", "template"], output_names=["oneD_file"], function=gen_voxel_timeseries, @@ -241,7 +241,7 @@ def get_roi_timeseries(wf_name: str = "roi_timeseries") -> pe.Workflow: clean_csv_imports = ["import os"] clean_csv = pe.Node( - util.Function( + Function( input_names=["roi_csv"], output_names=["roi_array", "edited_roi_csv"], function=clean_roi_csv, @@ -382,7 +382,7 @@ def get_vertices_timeseries(wf_name="vertices_timeseries"): ) timeseries_surface = pe.Node( - util.Function( + Function( input_names=["rh_surface_file", "lh_surface_file"], output_names=["out_file"], function=gen_vertices_timeseries, diff --git a/CPAC/utils/bids_utils.py b/CPAC/utils/bids_utils.py index 34e72d430e..08e6edb989 100755 --- a/CPAC/utils/bids_utils.py +++ b/CPAC/utils/bids_utils.py @@ -14,6 +14,9 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . +"""Utilities for using BIDS data.""" + +from base64 import b64decode import json import os import re @@ -91,8 +94,7 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True): ) if raise_error: raise ValueError(msg) - else: - UTLOGGER.error(msg) + UTLOGGER.error(msg) elif not f_dict["scantype"]: msg = ( f"Filename ({fname}) does not appear to contain" @@ -100,8 +102,7 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True): ) if raise_error: raise ValueError(msg) - else: - UTLOGGER.error(msg) + UTLOGGER.error(msg) elif "bold" in f_dict["scantype"] and not f_dict["task"]: msg = ( f"Filename ({fname}) is a BOLD file, but doesn't contain a task, does" @@ -109,15 +110,13 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True): ) if raise_error: raise ValueError(msg) - else: - UTLOGGER.error(msg) + UTLOGGER.error(msg) return f_dict def bids_entities_from_filename(filename): - """Function to collect a list of BIDS entities from a given - filename. + """Collect a list of BIDS entities from a given filename. Parameters ---------- @@ -142,7 +141,7 @@ def bids_entities_from_filename(filename): def bids_match_entities(file_list, entities, suffix): - """Function to subset a list of filepaths by a passed BIDS entity. + """Subset a list of filepaths by a passed BIDS entity. Parameters ---------- @@ -250,10 +249,9 @@ def bids_remove_entity(name, key): def bids_retrieve_params(bids_config_dict, f_dict, dbg=False): - """ + """Retrieve BIDS parameters for BIDS file corresponding to f_dict. - Retrieve the BIDS parameters from bids_config_dict for BIDS file - corresponding to f_dict. If an exact match for f_dict is not found + If an exact match for f_dict is not found the nearest match is returned, corresponding to the BIDS inheritance principle. @@ -316,12 +314,10 @@ def bids_retrieve_params(bids_config_dict, f_dict, dbg=False): return params -def bids_parse_sidecar(config_dict, dbg=False, raise_error=True): - # type: (dict, bool) -> dict - """ - Uses the BIDS principle of inheritance to build a data structure that - maps parameters in side car .json files to components in the names of - corresponding nifti files. +def bids_parse_sidecar( + config_dict: dict, dbg: bool = False, raise_error: bool = True +) -> dict: + """Use BIDS inheritance to map parameters in sidecar to corresponding NIfTI files. :param config_dict: dictionary that maps paths of sidecar json files (the key) to a dictionary containing the contents of the files (the values) @@ -428,9 +424,9 @@ def bids_parse_sidecar(config_dict, dbg=False, raise_error=True): def bids_shortest_entity(file_list): - """Function to return the single file with the shortest chain of - BIDS entities from a given list, returning the first if more than - one have the same minimum length. + """Return the single file with the shortest chain of BIDS entities from a list. + + Return the first if more than one have the same minimum length. Parameters ---------- @@ -553,9 +549,7 @@ def bids_gen_cpac_sublist( raise_error=True, only_one_anat=True, ): - """ - Generates a CPAC formatted subject list from information contained in a - BIDS formatted set of data. + """Generate a CPAC formatted subject list from a BIDS dataset. Parameters ---------- @@ -910,8 +904,9 @@ def camelCase(string: str) -> str: # pylint: disable=invalid-name def combine_multiple_entity_instances(bids_str: str) -> str: - """Combines mutliple instances of a key in a BIDS string to a single - instance by camelCasing and concatenating the values. + """Combine mutliple instances of a key in a BIDS string to a single instance. + + camelCase and concatenate the values. Parameters ---------- @@ -950,8 +945,7 @@ def combine_multiple_entity_instances(bids_str: str) -> str: def insert_entity(resource, key, value): - """Insert a `f'{key}-{value}'` BIDS entity before `desc-` if - present or before the suffix otherwise. + """Insert a BIDS entity before `desc-` if present or before the suffix otherwise. Parameters ---------- @@ -983,7 +977,8 @@ def insert_entity(resource, key, value): return "_".join([*new_entities[0], f"{key}-{value}", *new_entities[1], suff]) -def load_yaml_config(config_filename, aws_input_creds): +def load_yaml_config(config_filename: str, aws_input_creds: str) -> dict | list: + """Load a YAML configuration file, locally or from AWS.""" if config_filename.lower().startswith("data:"): try: header, encoded = config_filename.split(",", 1) @@ -1020,8 +1015,7 @@ def load_yaml_config(config_filename, aws_input_creds): def cl_strip_brackets(arg_list): - """Removes '[' from before first and ']' from after final - arguments in a list of commandline arguments. + """Remove '[' from before first and ']' from after final arguments. Parameters ---------- @@ -1051,7 +1045,7 @@ def create_cpac_data_config( aws_input_creds=None, skip_bids_validator=False, only_one_anat=True, -): +) -> list[dict]: """ Create a C-PAC data config YAML file from a BIDS directory. @@ -1111,8 +1105,7 @@ def create_cpac_data_config( def load_cpac_data_config(data_config_file, participant_labels, aws_input_creds): - """ - Loads the file as a check to make sure it is available and readable. + """Load the file to make sure it is available and readable. Parameters ---------- @@ -1210,8 +1203,7 @@ def res_in_filename(cfg, label): def sub_list_filter_by_labels(sub_list, labels): - """Function to filter a sub_list by provided BIDS labels for - specified suffixes. + """Filter a sub_list by provided BIDS labels for specified suffixes. Parameters ---------- @@ -1287,7 +1279,7 @@ def without_key(entity: str, key: str) -> str: def _t1w_filter(anat, shortest_entity, label): - """Helper function to filter T1w paths. + """Filter T1w paths. Parameters ---------- @@ -1318,7 +1310,7 @@ def _t1w_filter(anat, shortest_entity, label): def _sub_anat_filter(anat, shortest_entity, label): - """Helper function to filter anat paths in sub_list. + """Filter anat paths in sub_list. Parameters ---------- @@ -1341,7 +1333,7 @@ def _sub_anat_filter(anat, shortest_entity, label): def _sub_list_filter_by_label(sub_list, label_type, label): - """Function to filter a sub_list by a CLI-provided label. + """Filter a sub_list by a CLI-provided label. Parameters ---------- @@ -1410,7 +1402,7 @@ def _sub_list_filter_by_label(sub_list, label_type, label): def _match_functional_scan(sub_list_func_dict, scan_file_to_match): - """Function to subset a scan from a sub_list_func_dict by a scan filename. + """Subset a scan from a sub_list_func_dict by a scan filename. Parameters ---------- diff --git a/CPAC/utils/configuration/configuration.py b/CPAC/utils/configuration/configuration.py index 2b1da15fdf..8999662989 100644 --- a/CPAC/utils/configuration/configuration.py +++ b/CPAC/utils/configuration/configuration.py @@ -25,6 +25,7 @@ import pkg_resources as p import yaml +from CPAC.utils.typing import SUB_GROUP from .diff import dct_diff CONFIG_KEY_TYPE = str | list[str] @@ -622,7 +623,7 @@ def key_type_error(self, key): ) -def check_pname(p_name: str, pipe_config: Configuration) -> str: +def check_pname(p_name: Optional[str], pipe_config: Configuration) -> str: """Check / set `p_name`, the str representation of a pipeline for use in filetrees. Parameters @@ -756,13 +757,13 @@ def __init__(self, preconfig, skip_env_check=False): def set_subject( - sub_group, pipe_config: "Configuration", p_name: Optional[str] = None + sub_group: SUB_GROUP, pipe_config: "Configuration", p_name: Optional[str] = None ) -> tuple[str, str, str]: """Set pipeline name and log directory path for a given sub_dict. Parameters ---------- - sub_dict : dict + sub_group : dict pipe_config : CPAC.utils.configuration.Configuration diff --git a/CPAC/utils/datasource.py b/CPAC/utils/datasource.py index 50bb61d770..8eba26bf21 100644 --- a/CPAC/utils/datasource.py +++ b/CPAC/utils/datasource.py @@ -20,6 +20,7 @@ import json from pathlib import Path import re +from typing import Optional from voluptuous import RequiredFieldInvalid from nipype.interfaces import utility as util @@ -30,7 +31,6 @@ from CPAC.utils.bids_utils import bids_remove_entity from CPAC.utils.interfaces.function import Function from CPAC.utils.monitoring import FMLOGGER -from CPAC.utils.utils import get_scan_params def bidsier_prefix(unique_id): @@ -64,7 +64,8 @@ def bidsier_prefix(unique_id): return "_".join(components) -def get_rest(scan, rest_dict, resource="scan"): +@Function.sig_imports(["from pathlib import Path"]) +def get_rest(scan: str, rest_dict: dict, resource: str = "scan") -> Path | str: """Return the path of the chosen resource in the functional file dictionary. scan: the scan/series name or label @@ -127,7 +128,7 @@ def select_model_files(model, ftest, model_name): return fts_file, con_file, grp_file, mat_file -def check_func_scan(func_scan_dct, scan): +def check_func_scan(func_scan_dct: dict, scan: str) -> None: """Run some checks on the functional timeseries-related files. For a given series/scan name or label. @@ -168,119 +169,6 @@ def check_func_scan(func_scan_dct, scan): raise ValueError(msg) -def create_func_datasource(rest_dict, rpool, wf_name="func_datasource"): - """Return the functional timeseries-related file paths for each series/scan... - - ...from the dictionary of functional files described in the data - configuration (sublist) YAML file. - - Scan input (from inputnode) is an iterable. - """ - import nipype.interfaces.utility as util - - from CPAC.pipeline import nipype_pipeline_engine as pe - - wf = pe.Workflow(name=wf_name) - - inputnode = pe.Node( - util.IdentityInterface( - fields=["subject", "scan", "creds_path", "dl_dir"], mandatory_inputs=True - ), - name="inputnode", - ) - - outputnode = pe.Node( - util.IdentityInterface( - fields=["subject", "rest", "scan", "scan_params", "phase_diff", "magnitude"] - ), - name="outputspec", - ) - - # have this here for now because of the big change in the data - # configuration format - # (Not necessary with ingress - format does not comply) - if not rpool.check_rpool("derivatives-dir"): - check_scan = pe.Node( - function.Function( - input_names=["func_scan_dct", "scan"], - output_names=[], - function=check_func_scan, - as_module=True, - ), - name="check_func_scan", - ) - - check_scan.inputs.func_scan_dct = rest_dict - wf.connect(inputnode, "scan", check_scan, "scan") - - # get the functional scan itself - selectrest = pe.Node( - function.Function( - input_names=["scan", "rest_dict", "resource"], - output_names=["file_path"], - function=get_rest, - as_module=True, - ), - name="selectrest", - ) - selectrest.inputs.rest_dict = rest_dict - selectrest.inputs.resource = "scan" - wf.connect(inputnode, "scan", selectrest, "scan") - - # check to see if it's on an Amazon AWS S3 bucket, and download it, if it - # is - otherwise, just return the local file path - check_s3_node = pe.Node( - function.Function( - input_names=["file_path", "creds_path", "dl_dir", "img_type"], - output_names=["local_path"], - function=check_for_s3, - as_module=True, - ), - name="check_for_s3", - ) - - wf.connect(selectrest, "file_path", check_s3_node, "file_path") - wf.connect(inputnode, "creds_path", check_s3_node, "creds_path") - wf.connect(inputnode, "dl_dir", check_s3_node, "dl_dir") - check_s3_node.inputs.img_type = "func" - - wf.connect(inputnode, "subject", outputnode, "subject") - wf.connect(check_s3_node, "local_path", outputnode, "rest") - wf.connect(inputnode, "scan", outputnode, "scan") - - # scan parameters CSV - select_scan_params = pe.Node( - function.Function( - input_names=["scan", "rest_dict", "resource"], - output_names=["file_path"], - function=get_rest, - as_module=True, - ), - name="select_scan_params", - ) - select_scan_params.inputs.rest_dict = rest_dict - select_scan_params.inputs.resource = "scan_parameters" - wf.connect(inputnode, "scan", select_scan_params, "scan") - - # if the scan parameters file is on AWS S3, download it - s3_scan_params = pe.Node( - function.Function( - input_names=["file_path", "creds_path", "dl_dir", "img_type"], - output_names=["local_path"], - function=check_for_s3, - as_module=True, - ), - name="s3_scan_params", - ) - - wf.connect(select_scan_params, "file_path", s3_scan_params, "file_path") - wf.connect(inputnode, "creds_path", s3_scan_params, "creds_path") - wf.connect(inputnode, "dl_dir", s3_scan_params, "dl_dir") - wf.connect(s3_scan_params, "local_path", outputnode, "scan_params") - - return wf - - def create_fmap_datasource(fmap_dct, wf_name="fmap_datasource"): """Return the field map files... @@ -374,7 +262,7 @@ def create_fmap_datasource(fmap_dct, wf_name="fmap_datasource"): return wf -def get_fmap_phasediff_metadata(data_config_scan_params): +def get_fmap_phasediff_metadata(data_config_scan_params: dict | str): """Return the scan parameters for a field map phasediff scan.""" if ( not isinstance(data_config_scan_params, dict) @@ -513,299 +401,6 @@ def match_epi_fmaps( return (opposite_pe_epi, same_pe_epi) -def ingress_func_metadata( - wf, - cfg, - rpool, - sub_dict, - subject_id, - input_creds_path, - unique_id=None, - num_strat=None, -): - """Ingress metadata for functional scans.""" - name_suffix = "" - for suffix_part in (unique_id, num_strat): - if suffix_part is not None: - name_suffix += f"_{suffix_part}" - # Grab field maps - diff = False - blip = False - fmap_rp_list = [] - fmap_TE_list = [] - if "fmap" in sub_dict[1]["ent__datatype"].values: - second = False - for orig_key in sub_dict["fmap"]: - gather_fmap = create_fmap_datasource( - sub_dict["fmap"], f"fmap_gather_{orig_key}_{subject_id}" - ) - gather_fmap.inputs.inputnode.set( - subject=subject_id, - creds_path=input_creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - gather_fmap.inputs.inputnode.scan = orig_key - - key = orig_key - if "epi" in key and not second: - key = "epi-1" - second = True - elif "epi" in key and second: - key = "epi-2" - - rpool.set_data(key, gather_fmap, "outputspec.rest", {}, "", "fmap_ingress") - rpool.set_data( - f"{key}-scan-params", - gather_fmap, - "outputspec.scan_params", - {}, - "", - "fmap_params_ingress", - ) - - fmap_rp_list.append(key) - - get_fmap_metadata_imports = ["import json"] - get_fmap_metadata = pe.Node( - Function( - input_names=["data_config_scan_params"], - output_names=[ - "dwell_time", - "pe_direction", - "total_readout", - "echo_time", - "echo_time_one", - "echo_time_two", - ], - function=get_fmap_phasediff_metadata, - imports=get_fmap_metadata_imports, - ), - name=f"{key}_get_metadata{name_suffix}", - ) - - wf.connect( - gather_fmap, - "outputspec.scan_params", - get_fmap_metadata, - "data_config_scan_params", - ) - - if "phase" in key: - # leave it open to all three options, in case there is a - # phasediff image with either a single EchoTime field (which - # usually matches one of the magnitude EchoTimes), OR - # a phasediff with an EchoTime1 and EchoTime2 - - # at least one of these rpool keys will have a None value, - # which will be sorted out in gather_echo_times below - rpool.set_data( - f"{key}-TE", - get_fmap_metadata, - "echo_time", - {}, - "", - "fmap_TE_ingress", - ) - fmap_TE_list.append(f"{key}-TE") - - rpool.set_data( - f"{key}-TE1", - get_fmap_metadata, - "echo_time_one", - {}, - "", - "fmap_TE1_ingress", - ) - fmap_TE_list.append(f"{key}-TE1") - - rpool.set_data( - f"{key}-TE2", - get_fmap_metadata, - "echo_time_two", - {}, - "", - "fmap_TE2_ingress", - ) - fmap_TE_list.append(f"{key}-TE2") - - elif "magnitude" in key: - rpool.set_data( - f"{key}-TE", - get_fmap_metadata, - "echo_time", - {}, - "", - "fmap_TE_ingress", - ) - fmap_TE_list.append(f"{key}-TE") - - rpool.set_data( - f"{key}-dwell", - get_fmap_metadata, - "dwell_time", - {}, - "", - "fmap_dwell_ingress", - ) - rpool.set_data( - f"{key}-pedir", - get_fmap_metadata, - "pe_direction", - {}, - "", - "fmap_pedir_ingress", - ) - rpool.set_data( - f"{key}-total-readout", - get_fmap_metadata, - "total_readout", - {}, - "", - "fmap_readout_ingress", - ) - - if "phase" in key or "mag" in key: - diff = True - - if re.match("epi_[AP]{2}", orig_key): - blip = True - - if diff: - calc_delta_ratio = pe.Node( - Function( - input_names=["effective_echo_spacing", "echo_times"], - output_names=["deltaTE", "ees_asym_ratio"], - function=calc_delta_te_and_asym_ratio, - imports=["from typing import Optional"], - ), - name=f"diff_distcor_calc_delta{name_suffix}", - ) - - gather_echoes = pe.Node( - Function( - input_names=[ - "echotime_1", - "echotime_2", - "echotime_3", - "echotime_4", - ], - output_names=["echotime_list"], - function=gather_echo_times, - ), - name="fugue_gather_echo_times", - ) - - for idx, fmap_file in enumerate(fmap_TE_list, start=1): - try: - node, out_file = rpool.get(fmap_file)[ - f"['{fmap_file}:fmap_TE_ingress']" - ]["data"] - wf.connect(node, out_file, gather_echoes, f"echotime_{idx}") - except KeyError: - pass - - wf.connect(gather_echoes, "echotime_list", calc_delta_ratio, "echo_times") - - # Add in nodes to get parameters from configuration file - # a node which checks if scan_parameters are present for each scan - scan_params = pe.Node( - Function( - input_names=[ - "data_config_scan_params", - "subject_id", - "scan", - "pipeconfig_tr", - "pipeconfig_tpattern", - "pipeconfig_start_indx", - "pipeconfig_stop_indx", - ], - output_names=[ - "tr", - "tpattern", - "template", - "ref_slice", - "start_indx", - "stop_indx", - "pe_direction", - "effective_echo_spacing", - ], - function=get_scan_params, - imports=["from CPAC.utils.utils import check, try_fetch_parameter"], - ), - name=f"bold_scan_params_{subject_id}{name_suffix}", - ) - scan_params.inputs.subject_id = subject_id - scan_params.inputs.set( - pipeconfig_start_indx=cfg.functional_preproc["truncation"]["start_tr"], - pipeconfig_stop_indx=cfg.functional_preproc["truncation"]["stop_tr"], - ) - - node, out = rpool.get("scan")["['scan:func_ingress']"]["data"] - wf.connect(node, out, scan_params, "scan") - - # Workaround for extracting metadata with ingress - if rpool.check_rpool("derivatives-dir"): - selectrest_json = pe.Node( - function.Function( - input_names=["scan", "rest_dict", "resource"], - output_names=["file_path"], - function=get_rest, - as_module=True, - ), - name="selectrest_json", - ) - selectrest_json.inputs.rest_dict = sub_dict - selectrest_json.inputs.resource = "scan_parameters" - wf.connect(node, out, selectrest_json, "scan") - wf.connect(selectrest_json, "file_path", scan_params, "data_config_scan_params") - - else: - # wire in the scan parameter workflow - node, out = rpool.get("scan-params")["['scan-params:scan_params_ingress']"][ - "data" - ] - wf.connect(node, out, scan_params, "data_config_scan_params") - - rpool.set_data("TR", scan_params, "tr", {}, "", "func_metadata_ingress") - rpool.set_data("tpattern", scan_params, "tpattern", {}, "", "func_metadata_ingress") - rpool.set_data("template", scan_params, "template", {}, "", "func_metadata_ingress") - rpool.set_data( - "start-tr", scan_params, "start_indx", {}, "", "func_metadata_ingress" - ) - rpool.set_data("stop-tr", scan_params, "stop_indx", {}, "", "func_metadata_ingress") - rpool.set_data( - "pe-direction", scan_params, "pe_direction", {}, "", "func_metadata_ingress" - ) - - if diff: - # Connect EffectiveEchoSpacing from functional metadata - rpool.set_data( - "effectiveEchoSpacing", - scan_params, - "effective_echo_spacing", - {}, - "", - "func_metadata_ingress", - ) - node, out_file = rpool.get("effectiveEchoSpacing")[ - "['effectiveEchoSpacing:func_metadata_ingress']" - ]["data"] - wf.connect(node, out_file, calc_delta_ratio, "effective_echo_spacing") - rpool.set_data( - "deltaTE", calc_delta_ratio, "deltaTE", {}, "", "deltaTE_ingress" - ) - rpool.set_data( - "ees-asym-ratio", - calc_delta_ratio, - "ees_asym_ratio", - {}, - "", - "ees_asym_ratio_ingress", - ) - - return wf, rpool, diff, blip, fmap_rp_list - - def create_general_datasource(wf_name): """Create a general-purpose datasource node.""" import nipype.interfaces.utility as util @@ -881,9 +476,16 @@ def create_check_for_s3_node( return check_s3_node +@function.Function.sig_imports( + ["from pathlib import Path", "from typing import Optional"] +) def check_for_s3( - file_path, creds_path=None, dl_dir=None, img_type="other", verbose=False -): + file_path: Path | str, + creds_path: Optional[Path | str] = None, + dl_dir: Optional[Path | str] = None, + img_type: str = "other", + verbose: bool = False, +) -> Path | str: """Check if passed-in file is on S3.""" # Import packages import os diff --git a/CPAC/utils/interfaces/__init__.py b/CPAC/utils/interfaces/__init__.py index 126bb1c22b..6716a562f5 100644 --- a/CPAC/utils/interfaces/__init__.py +++ b/CPAC/utils/interfaces/__init__.py @@ -1,7 +1,27 @@ +# Copyright (C) 2010-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Custom interfaces for C-PAC.""" + from . import brickstat, datasink, function, pc +from .function import Function __all__ = [ "function", + "Function", "pc", "brickstat", "datasink", diff --git a/CPAC/utils/interfaces/function/function.py b/CPAC/utils/interfaces/function/function.py index 34d01373d5..2df6741717 100644 --- a/CPAC/utils/interfaces/function/function.py +++ b/CPAC/utils/interfaces/function/function.py @@ -156,28 +156,28 @@ class Function(NipypeFunction): def __init__( self, - input_names=None, - output_names="out", - function=None, - imports=None, - as_module=False, + input_names: Optional[str | list[str]] = None, + output_names: Optional[str | list[str]] = "out", + function: Optional[Callable] = None, + imports: Optional[list[str]] = None, + as_module: bool = False, **inputs, ): - """Initialize a :py:func`~CPAC.utils.interfaces.function.Function` interface. + """Initialize a :py:func:`~CPAC.utils.interfaces.function.Function` interface. Parameters ---------- - input_names : single str or list or None + input_names names corresponding to function inputs if ``None``, derive input names from function argument names - output_names : single str or list + output_names names corresponding to function outputs (default: 'out'). if list of length > 1, has to match the number of outputs - function : callable + function callable python object. must be able to execute in an - isolated namespace (possibly in concert with the ``imports`` + isolated namespace (possibly in concert with the `imports` parameter) - imports : list of strings + imports list of import statements that allow the function to execute in an otherwise empty namespace. If these collide with imports defined via the :py:meth:`Function.sig_imports` @@ -244,12 +244,11 @@ def sig_imports(imports: list[str]) -> Callable: Parameters ---------- - imports : list of str + imports import statements to import the function in an otherwise empty namespace. If these collide with imports defined via the - :py:meth:`Function.__init__` initialization method, the - imports given as a parameter here will be overridden by - those from the initializer. + :py:meth:`Function.__init__` method, the imports given as a parameter here + will be overridden by those from the initializer. Returns ------- diff --git a/CPAC/utils/interfaces/function/seg_preproc.py b/CPAC/utils/interfaces/function/seg_preproc.py index d220781f48..f3cb06840b 100644 --- a/CPAC/utils/interfaces/function/seg_preproc.py +++ b/CPAC/utils/interfaces/function/seg_preproc.py @@ -1,11 +1,26 @@ +# Copyright (C) 2022-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . """Function interfaces for seg_preproc.""" -from nipype.interfaces import utility as util +from CPAC.utils.interfaces.function.function import Function def pick_tissue_from_labels_file_interface(input_names=None): - """Function to create a Function interface for - CPAC.seg_preproc.utils.pick_tissue_from_labels_file. + """Create a Function interface for ~CPAC.seg_preproc.utils.pick_tissue_from_labels_file. Parameters ---------- @@ -20,7 +35,7 @@ def pick_tissue_from_labels_file_interface(input_names=None): if input_names is None: input_names = ["multiatlas_Labels", "csf_label", "gm_label", "wm_label"] - return util.Function( + return Function( input_names=input_names, output_names=["csf_mask", "gm_mask", "wm_mask"], function=pick_tissue_from_labels_file, diff --git a/CPAC/utils/strategy.py b/CPAC/utils/strategy.py index 5c2b8cf4eb..d294bc0e60 100644 --- a/CPAC/utils/strategy.py +++ b/CPAC/utils/strategy.py @@ -15,13 +15,13 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -from CPAC.pipeline.engine.resource_pool import ResourcePool +from CPAC.pipeline.engine.resource import ResourcePool from CPAC.utils.monitoring import WFLOGGER class Strategy: def __init__(self): - self._resource_pool = ResourcePool({}) + self._resource_pool = ResourcePool() self.leaf_node = None self.leaf_out_file = None self.name = [] @@ -29,9 +29,6 @@ def __init__(self): def append_name(self, name): self.name.append(name) - def get_name(self): - return self.name - def set_leaf_properties(self, node, out_file): self.leaf_node = node self.leaf_out_file = out_file @@ -57,7 +54,7 @@ def get_node_from_resource_pool(self, resource_key): @property def resource_pool(self): """Strategy's ResourcePool dict.""" - return self._resource_pool.get_entire_rpool() + return self._resource_pool.rpool @property def rpool(self): diff --git a/CPAC/utils/tests/old_functions.py b/CPAC/utils/tests/old_functions.py new file mode 100644 index 0000000000..80171db77b --- /dev/null +++ b/CPAC/utils/tests/old_functions.py @@ -0,0 +1,67 @@ +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Functions from before refactoring.""" + + +def check(params_dct, subject_id, scan_id, val_to_check, throw_exception): + """https://github.com/FCP-INDI/C-PAC/blob/96db8b0b65ab1d5f55fb3b895855af34d72c17e4/CPAC/utils/utils.py#L630-L653""" + if val_to_check not in params_dct: + if throw_exception: + raise Exception( + f"Missing Value for {val_to_check} for participant " f"{subject_id}" + ) + return None + if isinstance(params_dct[val_to_check], dict): + ret_val = params_dct[val_to_check][scan_id] + else: + ret_val = params_dct[val_to_check] + if ret_val == "None": + if throw_exception: + raise Exception( + f"'None' Parameter Value for {val_to_check} for participant " + f"{subject_id}" + ) + else: + ret_val = None + if ret_val == "" and throw_exception: + raise Exception( + f"Missing Value for {val_to_check} for participant " f"{subject_id}" + ) + return ret_val + + +def check2(val): + """https://github.com/FCP-INDI/C-PAC/blob/96db8b0b65ab1d5f55fb3b895855af34d72c17e4/CPAC/utils/utils.py#L745-L746""" + return val if val == None or val == "" or isinstance(val, str) else int(val) + + +def try_fetch_parameter(scan_parameters, subject, scan, keys): + """https://github.com/FCP-INDI/C-PAC/blob/96db8b0b65ab1d5f55fb3b895855af34d72c17e4/CPAC/utils/utils.py#L679-L703""" + scan_parameters = dict((k.lower(), v) for k, v in scan_parameters.items()) + for key in keys: + key = key.lower() + if key not in scan_parameters: + continue + if isinstance(scan_parameters[key], dict): + value = scan_parameters[key][scan] + else: + value = scan_parameters[key] + if value == "None": + return None + if value is not None: + return value + return None diff --git a/CPAC/utils/tests/test_datasource.py b/CPAC/utils/tests/test_datasource.py index 9842310bb1..be7c2255c2 100644 --- a/CPAC/utils/tests/test_datasource.py +++ b/CPAC/utils/tests/test_datasource.py @@ -1,10 +1,26 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import json import pytest -import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.utils.datasource import match_epi_fmaps +from CPAC.utils.interfaces import Function from CPAC.utils.test_resources import setup_test_wf @@ -48,7 +64,7 @@ def test_match_epi_fmaps(): } match_fmaps = pe.Node( - util.Function( + Function( input_names=["fmap_dct", "bold_pedir"], output_names=["opposite_pe_epi", "same_pe_epi"], function=match_epi_fmaps, diff --git a/CPAC/utils/tests/test_utils.py b/CPAC/utils/tests/test_utils.py index f9a4cb73e4..4d8f18dabe 100644 --- a/CPAC/utils/tests/test_utils.py +++ b/CPAC/utils/tests/test_utils.py @@ -3,35 +3,52 @@ import multiprocessing from unittest import mock +from _pytest.logging import LogCaptureFixture import pytest from CPAC.func_preproc import get_motion_ref -from CPAC.pipeline.nodeblock import NodeBlockFunction +from CPAC.pipeline.engine.nodeblock import NodeBlockFunction from CPAC.utils.configuration import Configuration from CPAC.utils.monitoring.custom_logging import log_subprocess +from CPAC.utils.tests import old_functions from CPAC.utils.utils import ( check_config_resources, check_system_deps, - try_fetch_parameter, + ScanParameters, ) -scan_params_bids = { - "RepetitionTime": 2.0, - "ScanOptions": "FS", - "SliceAcquisitionOrder": "Interleaved Ascending", -} -scan_params_cpac = { - "tr": 2.5, - "acquisition": "seq+z", - "reference": "24", - "first_tr": "", - "last_tr": "", +SCAN_PARAMS = { + "BIDS": { + "params": { + "RepetitionTime": 2.0, + "ScanOptions": "FS", + "SliceAcquisitionOrder": "Interleaved Ascending", + }, + "expected_TR": 2.0, + }, + "C-PAC": { + "params": { + "tr": 2.5, + "acquisition": "seq+z", + "reference": "24", + "first_TR": 1, + "last_TR": "", + }, + "expected_TR": 2.5, + }, + "nested": { + "params": { + "TR": {"scan": 3}, + "first_TR": {"scan": 0}, + "last_TR": {"scan": 450}, + }, + "expected_TR": 3, + }, } def _installation_check(command: str, flag: str) -> None: - """Test that command is installed by running specified version or - help flag. + """Test that command is installed by running specified version or help flag. Parameters ---------- @@ -56,9 +73,10 @@ def _installation_check(command: str, flag: str) -> None: def test_check_config_resources(): """Test check_config_resources function.""" - with mock.patch.object(multiprocessing, "cpu_count", return_value=2), pytest.raises( - SystemError - ) as system_error: + with ( + mock.patch.object(multiprocessing, "cpu_count", return_value=2), + pytest.raises(SystemError) as system_error, + ): check_config_resources( Configuration( {"pipeline_setup": {"system_config": {"max_cores_per_participant": 10}}} @@ -69,12 +87,62 @@ def test_check_config_resources(): assert "threads available (2)" in error_string -def test_function(): - TR = try_fetch_parameter(scan_params_bids, "0001", "scan", ["TR", "RepetitionTime"]) - assert TR == 2.0 - - TR = try_fetch_parameter(scan_params_cpac, "0001", "scan", ["TR", "RepetitionTime"]) - assert TR == 2.5 +@pytest.mark.parametrize("scan_params", ["BIDS", "C-PAC", "nested"]) +@pytest.mark.parametrize("convert_to", [int, float, str]) +def test_fetch_and_convert( + caplog: LogCaptureFixture, scan_params: str, convert_to: type +) -> None: + """Test functionality to fetch and convert scan parameters.""" + params = ScanParameters(SCAN_PARAMS[scan_params]["params"], "subject", "scan") + TR = params.fetch_and_convert( + keys=["TR", "RepetitionTime"], + convert_to=convert_to, + ) + if TR and "RepetitionTime" in params.params: + old_TR = convert_to( + old_functions.check( + params.params, params.subject, params.scan, "RepetitionTime", False + ) + ) + assert TR == old_TR + try: + old_TR = convert_to( + old_functions.try_fetch_parameter( + params.params, params.subject, params.scan, ["TR", "RepetitionTime"] + ) + ) + except TypeError: + old_TR = None + assert ( + (TR == convert_to(SCAN_PARAMS[scan_params]["expected_TR"])) + and isinstance(TR, convert_to) + and TR == old_TR + ) + if scan_params == "C-PAC": + assert "Using case-insenitive match: 'TR' ≅ 'tr'." in caplog.text + else: + assert "Using case-insenitive match: 'TR' ≅ 'tr'." not in caplog.text + not_TR = params.fetch_and_convert( + keys=["NotTR", "NotRepetitionTime"], + convert_to=convert_to, + ) + assert not_TR is None + if "first_TR" in params.params: + first_tr = params.fetch_and_convert(["first_TR"], int, 1, False) + old_first_tr = old_functions.check( + params.params, params.subject, params.scan, "first_TR", False + ) + if old_first_tr: + old_first_tr = old_functions.check2(old_first_tr) + assert first_tr == old_first_tr + if "last_TR" in params.params: + last_tr = params.fetch_and_convert(["last_TR"], int, "", False) + old_last_tr = old_functions.check( + params.params, params.subject, params.scan, "last_TR", False + ) + if old_last_tr: + old_last_tr = old_functions.check2(old_last_tr) + assert last_tr == old_last_tr @pytest.mark.parametrize("executable", ["Xvfb"]) @@ -96,6 +164,7 @@ def test_NodeBlock_option_SSOT(): # pylint: disable=invalid-name def test_system_deps(): """Test system dependencies. + Raises an exception if dependencies are not met. """ check_system_deps(*([True] * 4)) diff --git a/CPAC/utils/typing.py b/CPAC/utils/typing.py new file mode 100644 index 0000000000..8e44556a1e --- /dev/null +++ b/CPAC/utils/typing.py @@ -0,0 +1,27 @@ +# Copyright (C) 2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Type aliases for C-PAC.""" + +from typing import ForwardRef + +from pandas import DataFrame + +LIST_OF_LIST_OF_STR = str | list[ForwardRef("LIST_OF_LIST_OF_STR")] +# _PIPE_IDX = list[ForwardRef("PIPE_IDX")] | str | tuple[ForwardRef("PIPE_IDX"), ...] +# PIPE_IDX = TypeVar("PIPE_IDX", bound=_PIPE_IDX) +PIPE_IDX = list[str | tuple] | str | tuple +SUB_GROUP = tuple[tuple[str, str], DataFrame] diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index 4ba3285218..8e179411ae 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -25,7 +25,7 @@ import numbers import os import pickle -from typing import Any +from typing import Any, Literal, Optional, overload import numpy as np from voluptuous.error import Invalid @@ -33,6 +33,7 @@ from CPAC.utils.configuration import Configuration from CPAC.utils.docs import deprecated +from CPAC.utils.interfaces.function import Function from CPAC.utils.monitoring import FMLOGGER, WFLOGGER CONFIGS_DIR = os.path.abspath( @@ -46,6 +47,7 @@ os.path.join(CONFIGS_DIR, "1.7-1.8-deprecations.yml"), "r", encoding="utf-8" ) as _f: NESTED_CONFIG_DEPRECATIONS = yaml.safe_load(_f) +PE_DIRECTION = Literal["i", "i-", "j", "j-", "k", "k-", ""] VALID_PATTERNS = [ "alt+z", "altplus", @@ -130,13 +132,13 @@ def get_flag_wf(wf_name="get_flag"): input_node = pe.Node(util.IdentityInterface(fields=["in_flag"]), name="inputspec") get_flag = pe.Node( - util.Function(input_names=["in_flag"], function=_get_flag), name="get_flag" + Function(input_names=["in_flag"], function=_get_flag), name="get_flag" ) wf.connect(input_node, "in_flag", get_flag, "in_flag") -def read_json(json_file): +def read_json(json_file: str) -> dict: """Read a JSON file and return the contents as a dictionary.""" try: with open(json_file, "r") as f: @@ -222,6 +224,7 @@ def create_id_string( return combine_multiple_entity_instances(res_in_filename(cfg, out_filename)) +@Function.sig_imports(["import os", "import json"]) def write_output_json(json_data, filename, indent=3, basedir=None): """Write a dictionary to a JSON file.""" if not basedir: @@ -320,7 +323,7 @@ def get_zscore(map_node=False, wf_name="z_score"): ) op_string = pe.MapNode( - util.Function( + Function( input_names=["mean", "std_dev"], output_names=["op_string"], function=get_operand_string, @@ -343,7 +346,7 @@ def get_zscore(map_node=False, wf_name="z_score"): ) op_string = pe.Node( - util.Function( + Function( input_names=["mean", "std_dev"], output_names=["op_string"], function=get_operand_string, @@ -398,7 +401,7 @@ def get_fisher_zscore(input_name, map_node=False, wf_name="fisher_z_score"): if map_node: # node to separate out fisher_z_score = pe.MapNode( - util.Function( + Function( input_names=["correlation_file", "timeseries_one_d", "input_name"], output_names=["out_file"], function=compute_fisher_z_score, @@ -408,7 +411,7 @@ def get_fisher_zscore(input_name, map_node=False, wf_name="fisher_z_score"): ) else: fisher_z_score = pe.Node( - util.Function( + Function( input_names=["correlation_file", "timeseries_one_d", "input_name"], output_names=["out_file"], function=compute_fisher_z_score, @@ -471,18 +474,172 @@ def compute_fisher_z_score(correlation_file, timeseries_one_d, input_name): return out_file -def fetch_and_convert( - scan_parameters: dict, scan: str, keys: list[str], convert_to: type, fallback: Any -) -> Any: - """Fetch a parameter from a scan parameters dictionary and convert it to a given type. +class ScanParameters: + """A dictionary of scan parameters and access methods.""" - Catch TypeError exceptions and return a fallback value in those cases. - """ - try: - value = convert_to(scan_parameters, None, scan, keys) - except TypeError: - value = fallback - return value + def __init__(self, scan_parameters: str | dict, subject_id: str, scan: str): + """Initialize ScanParameters dict and metadata.""" + self.subject = subject_id + self.scan = scan + if ".json" in scan_parameters: + if not os.path.exists(scan_parameters): + err = ( + "\n[!] WARNING: Scan parameters JSON file listed in your data" + f" configuration file does not exist:\n{scan_parameters}" + ) + raise FileNotFoundError(err) + with open(scan_parameters, "r") as f: + self.params: dict = json.load(f) + elif isinstance(scan_parameters, dict): + self.params = scan_parameters + else: + err = ( + "\n\n[!] Could not read the format of the scan parameters " + "information included in the data configuration file for " + f"the participant {self.subject}.\n\n" + ) + raise OSError(err) + + def check(self, val_to_check: str, throw_exception: bool): + """Check that a value is populated for a given key in a parameters dictionary.""" + if val_to_check not in self.params: + if throw_exception: + msg = f"Missing Value for {val_to_check} for participant {self.subject}" + raise ValueError(msg) + return None + + if isinstance(self.params[val_to_check], dict): + ret_val = self.params[val_to_check][self.scan] + else: + ret_val = self.params[val_to_check] + + if ret_val == "None": + if throw_exception: + msg = ( + f"'None' parameter value for {val_to_check} for" + f" participant {self.subject}." + ) + raise ValueError(msg) + ret_val = None + + if ret_val == "" and throw_exception: + msg = f"Missing value for {val_to_check} for participant {self.subject}." + raise ValueError(msg) + + return ret_val + + @overload + def fetch( + self, + keys: Optional[list[str]] = None, + *, + match_case: Literal[False], + throw_exception: bool, + ) -> Any: ... + @overload + def fetch( + self, + keys: Optional[list[str]] = None, + *, + match_case: Literal[True], + throw_exception: bool, + ) -> tuple[Any, tuple[str, str]]: ... + def fetch(self, keys, *, match_case=False, throw_exception=True): + """Fetch the first found parameter from a scan params dictionary. + + Returns + ------- + value + The value of the parameter. + + keys, optional + The matched keys (only if ``match_case is True``) + + throw_exception + Raise an exception if value is ``""`` or ``None``? + """ + if match_case: + keys = {key.lower(): key for key in keys} + scan_param_keys = {key.lower(): key for key in self.params.keys()} + scan_parameters = {key.lower(): value for key, value in self.params.items()} + else: + scan_parameters = self.params + for key in keys: + if key in scan_parameters: + if match_case: + return self.check(key, throw_exception), ( + keys[key], + scan_param_keys[key], + ) + return self.check(key, throw_exception) + msg = f"None of {keys} found in {list(scan_parameters.keys())}." + raise KeyError(msg) + + def fetch_and_convert( + self, + keys: list[str], + convert_to: Optional[type] = None, + fallback: Optional[Any] = None, + warn_typeerror: bool = True, + throw_exception: bool = False, + ) -> Any: + """Fetch a parameter from a scan params dictionary and convert it to a given type. + + Catch TypeError exceptions and return a fallback value in those cases. + + Parameters + ---------- + keys + if multiple keys provided, the value corresponding to the first found will be + returned + + convert_to + the type to return if possible + + fallback + a value to return if the keys are not found in ``scan_parameters`` + + warn_typeerror + log a warning if value cannot be converted to ``convert_to`` type? + + throw_exception + raise an error for empty string or NoneTypes? + + Returns + ------- + value + The gathered parameter coerced to the specified type, if possible. + ``fallback`` otherwise. + """ + value: Any = fallback + fallback_message = f"Falling back to {fallback} ({type(fallback)})." + + try: + raw_value = self.fetch(keys, throw_exception=throw_exception) + except KeyError: + try: + raw_value, matched_keys = self.fetch( + keys, match_case=True, throw_exception=throw_exception + ) + except KeyError: + WFLOGGER.warning( + f"None of {keys} found in {list(self.params.keys())}. " + f"{fallback_message}" + ) + return fallback + WFLOGGER.warning( + f"None exact match found. Using case-insenitive match: '{matched_keys[0]}'" + f" ≅ '{matched_keys[1]}'." + ) + if convert_to: + try: + value = convert_to(raw_value) + except (TypeError, ValueError): + if warn_typeerror: + WFLOGGER.warning( + f"Could not convert {value} to {convert_to}. {fallback_message}" + ) + return value def get_operand_string(mean, std_dev): @@ -570,35 +727,6 @@ def correlation(matrix1, matrix2, match_rows=False, z_scored=False, symmetric=Fa return r -def check(params_dct, subject_id, scan_id, val_to_check, throw_exception): - """Check that a value is populated for a given key in a parameters dictionary.""" - if val_to_check not in params_dct: - if throw_exception: - msg = f"Missing Value for {val_to_check} for participant {subject_id}" - raise ValueError(msg) - return None - - if isinstance(params_dct[val_to_check], dict): - ret_val = params_dct[val_to_check][scan_id] - else: - ret_val = params_dct[val_to_check] - - if ret_val == "None": - if throw_exception: - msg = ( - f"'None' Parameter Value for {val_to_check} for" - f" participant {subject_id}" - ) - raise ValueError(msg) - ret_val = None - - if ret_val == "" and throw_exception: - msg = f"Missing Value for {val_to_check} for participant {subject_id}" - raise ValueError(msg) - - return ret_val - - def check_random_state(seed): """ Turn seed into a np.random.RandomState instance. @@ -624,172 +752,94 @@ def check_random_state(seed): ) -def try_fetch_parameter(scan_parameters, subject, scan, keys): - """Try to fetch a parameter from a scan parameters dictionary.""" - scan_parameters = {k.lower(): v for k, v in scan_parameters.items()} - - for _key in keys: - key = _key.lower() - - if key not in scan_parameters: - continue - - if isinstance(scan_parameters[key], dict): - value = scan_parameters[key][scan] - else: - value = scan_parameters[key] - - # Explicit none value - if value == "None": - return None - - if value is not None: - return value - return None - - +@Function.sig_imports( + [ + "import json", + "import os", + "from typing import Literal, Optional", + "from CPAC.utils.utils import ScanParameters, PE_DIRECTION, VALID_PATTERNS", + ] +) def get_scan_params( - subject_id, - scan, - pipeconfig_start_indx, - pipeconfig_stop_indx, - data_config_scan_params=None, -): + subject_id: str, + scan: str, + pipeconfig_start_indx: Optional[int | str], + pipeconfig_stop_indx: Optional[int | str], + data_config_scan_params: Optional[dict | str] = None, +) -> tuple[ + Optional[str], + Optional[str], + Optional[int], + Optional[int], + Optional[int], + Optional[int], + PE_DIRECTION, + Optional[float], +]: """Extract slice timing correction parameters and scan parameters. Parameters ---------- - subject_id : str + subject_id subject id - scan : str + scan scan id - pipeconfig_start_indx : int + pipeconfig_start_indx starting volume index as provided in the pipeline config yaml file - pipeconfig_stop_indx : int + pipeconfig_stop_indx ending volume index as provided in the pipeline config yaml file - data_config_scan_params : str - file path to scan parameter JSON file listed in data config yaml file + data_config_scan_params + file path to scan parameter JSON file listed in data config yaml file or loaded + paramater dictionary Returns ------- - TR : a string + tr TR value - pattern : a string + tpattern slice aquisition pattern string or file path - ref_slice : an integer - reference slice which is used to allign all other slices - first_tr : an integer - starting TR or starting volume index - last_tr : an integer - ending TR or ending volume index - pe_direction : str - effective_echo_spacing : float + ref_slice + index of reference slice which is used to allign all other slices + first_tr + index of starting TR or starting volume index + last_tr + index of ending TR or ending volume index + pe_direction + https://bids-specification.readthedocs.io/en/stable/glossary.html#phaseencodingdirection-metadata + effective_echo_spacing + https://bids-specification.readthedocs.io/en/stable/glossary.html#effectiveechospacing-metadata """ - import json - import os - import warnings - - def check2(val): - return val if val is None or val == "" or isinstance(val, str) else int(val) - - # initialize vars to empty - TR = pattern = ref_slice = first_tr = last_tr = pe_direction = "" - unit = "s" - effective_echo_spacing = template = None + unit: Literal["ms", "s"] = "s" if isinstance(pipeconfig_stop_indx, str): if "End" in pipeconfig_stop_indx or "end" in pipeconfig_stop_indx: pipeconfig_stop_indx = None - if data_config_scan_params: - if ".json" in data_config_scan_params: - if not os.path.exists(data_config_scan_params): - err = ( - "\n[!] WARNING: Scan parameters JSON file listed in your data" - f" configuration file does not exist:\n{data_config_scan_params}" - ) - raise FileNotFoundError(err) - - with open(data_config_scan_params, "r") as f: - params_dct = json.load(f) - - # get details from the configuration - # if this is a JSON file, the key values are the BIDS format - # standard - # TODO: better handling of errant key values!!! - if "RepetitionTime" in params_dct.keys(): - TR = float(check(params_dct, subject_id, scan, "RepetitionTime", False)) - if "SliceTiming" in params_dct.keys(): - pattern = str(check(params_dct, subject_id, scan, "SliceTiming", False)) - elif "SliceAcquisitionOrder" in params_dct.keys(): - pattern = str( - check(params_dct, subject_id, scan, "SliceAcquisitionOrder", False) - ) - if "PhaseEncodingDirection" in params_dct.keys(): - pe_direction = str( - check(params_dct, subject_id, scan, "PhaseEncodingDirection", False) - ) - try: - "EffectiveEchoSpacing" in params_dct.keys() - effective_echo_spacing = float( - check(params_dct, subject_id, scan, "EffectiveEchoSpacing", False) - ) - except TypeError: - pass - - elif len(data_config_scan_params) > 0 and isinstance( - data_config_scan_params, dict - ): - params_dct = data_config_scan_params - - # TODO: better handling of errant key values!!! - # TODO: use schema validator to deal with it - # get details from the configuration - TR = fetch_and_convert( - params_dct, scan, ["TR", "RepetitionTime"], float, None - ) - template = fetch_and_convert( - params_dct, scan, ["Template", "template"], str, None - ) - - pattern = str( - try_fetch_parameter( - params_dct, - subject_id, - scan, - ["acquisition", "SliceTiming", "SliceAcquisitionOrder"], - ) - ) - - ref_slice = check(params_dct, subject_id, scan, "reference", False) - ref_slice = int(ref_slice) if ref_slice else ref_slice - - first_tr = check(params_dct, subject_id, scan, "first_TR", False) - first_tr = check2(first_tr) if first_tr else first_tr - - last_tr = check(params_dct, subject_id, scan, "last_TR", False) - last_tr = check2(last_tr) if last_tr else last_tr - - pe_direction = check( - params_dct, subject_id, scan, "PhaseEncodingDirection", False - ) - effective_echo_spacing = fetch_and_convert( - params_dct, - scan, - ["EffectiveEchoSpacing"], - float, - effective_echo_spacing, - ) - - else: - err = ( - "\n\n[!] Could not read the format of the scan parameters " - "information included in the data configuration file for " - f"the participant {subject_id}.\n\n" - ) - raise OSError(err) - first_tr = pipeconfig_start_indx if first_tr == "" or first_tr is None else first_tr - last_tr = pipeconfig_stop_indx if last_tr == "" or last_tr is None else last_tr - pattern = None if "None" in pattern or "none" in pattern else pattern + params = ScanParameters(data_config_scan_params, subject_id, scan) + # TODO: better handling of errant key values!!! + # TODO: use schema validator to deal with it + # get details from the configuration + tr: float | Literal[""] = params.fetch_and_convert( + ["RepetitionTime", "TR"], float, "" + ) + template: Optional[str] = params.fetch_and_convert(["Template", "template"], str) + pattern: Optional[str] = params.fetch_and_convert( + ["acquisition", "SliceTiming", "SliceAcquisitionOrder"], + str, + None, + ) + ref_slice: Optional[int | str] = params.fetch_and_convert(["reference"], int, None) + first_tr: Optional[int | str] = params.fetch_and_convert( + ["first_TR"], int, pipeconfig_start_indx, False + ) + last_tr: Optional[int | str] = params.fetch_and_convert( + ["last_TR"], int, pipeconfig_stop_indx, False + ) + pe_direction: PE_DIRECTION = params.fetch_and_convert( + ["PhaseEncodingDirection"], str, "" + ) + effective_echo_spacing: Optional[float] = params.fetch_and_convert( + ["EffectiveEchoSpacing"], float + ) """ if not pattern: @@ -865,26 +915,26 @@ def check2(val): slice_timings.sort() max_slice_offset = slice_timings[-1] - # checking if the unit of TR and slice timing match or not - # if slice timing in ms convert TR to ms as well - if TR and max_slice_offset > TR: - warnings.warn( + # checking if the unit of tr and slice timing match or not + # if slice timing in ms convert tr to ms as well + if tr and max_slice_offset > tr: + WFLOGGER.warning( "TR is in seconds and slice timings are in " "milliseconds. Converting TR into milliseconds" ) - TR = TR * 1000 - WFLOGGER.info("New TR value %s ms", TR) + tr = tr * 1000 + WFLOGGER.info("New tr value %s ms", tr) unit = "ms" - elif TR and TR > 10: # noqa: PLR2004 + elif tr and tr > 10: # noqa: PLR2004 # check to see, if TR is in milliseconds, convert it into seconds - warnings.warn("TR is in milliseconds, Converting it into seconds") - TR = TR / 1000.0 - WFLOGGER.info("New TR value %s s", TR) + WFLOGGER.warning("TR is in milliseconds, Converting it into seconds") + tr = tr / 1000.0 + WFLOGGER.info("New TR value %s s", tr) unit = "s" # swap back in - tr = f"{TR!s}{unit}" if TR else "" + tr = f"{tr!s}{unit}" if tr else "" tpattern = pattern start_indx = first_tr stop_indx = last_tr diff --git a/CPAC/vmhc/vmhc.py b/CPAC/vmhc/vmhc.py index 3c547a8e2f..e09f156dfb 100644 --- a/CPAC/vmhc/vmhc.py +++ b/CPAC/vmhc/vmhc.py @@ -3,7 +3,7 @@ from CPAC.image_utils import spatial_smoothing from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration.registration import apply_transform from CPAC.utils.utils import check_prov_for_regtool from CPAC.vmhc import * diff --git a/dev/circleci_data/conftest.py b/dev/circleci_data/conftest.py new file mode 100644 index 0000000000..4d67fdac05 --- /dev/null +++ b/dev/circleci_data/conftest.py @@ -0,0 +1,19 @@ +# Copyright (C) 2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Pytest configuration for CircleCI-specific tests.""" + +from CPAC.conftest import bids_examples diff --git a/dev/circleci_data/test_external_utils.py b/dev/circleci_data/test_external_utils.py index f516b0c903..d4892fee3b 100644 --- a/dev/circleci_data/test_external_utils.py +++ b/dev/circleci_data/test_external_utils.py @@ -25,9 +25,9 @@ import pytest import semver -CPAC_DIR = str(Path(__file__).parent.parent.parent) -sys.path.append(CPAC_DIR) -DATA_DIR = os.path.join(CPAC_DIR, "dev", "circleci_data") +CPAC_DIR = Path(__file__).parent.parent.parent +sys.path.append(str(CPAC_DIR)) +DATA_DIR = CPAC_DIR / "dev/circleci_data" from CPAC.__main__ import utils as CPAC_main_utils # noqa: E402 @@ -70,9 +70,8 @@ def test_build_data_config(caplog, cli_runner, multiword_connector): caplog.set_level(INFO) if multiword_connector == "-" and _BACKPORT_CLICK: return - os.chdir(DATA_DIR) - test_yaml = os.path.join(DATA_DIR, "data_settings.yml") - _delete_test_yaml(test_yaml) + os.chdir(str(DATA_DIR)) + test_yaml = DATA_DIR / "data_settings.yml" if multiword_connector == "_": data_config = CPAC_main_utils.commands[ _click_backport(CPAC_main_utils, "data-config") @@ -89,49 +88,50 @@ def test_build_data_config(caplog, cli_runner, multiword_connector): assert "\n".join(caplog.messages).startswith( "\nGenerated a default data_settings YAML file for editing" ) - assert os.path.exists(test_yaml) + assert test_yaml.exists() _delete_test_yaml(test_yaml) -def test_new_settings_template(caplog, cli_runner): +def test_new_settings_template(bids_examples, caplog, cli_runner): """Test CLI ``utils new-settings-template``.""" caplog.set_level(INFO) - os.chdir(CPAC_DIR) - - example_dir = os.path.join(CPAC_DIR, "bids-examples") - if not os.path.exists(example_dir): - from git import Repo - - Repo.clone_from( - "https://github.com/bids-standard/bids-examples.git", example_dir - ) + example_dir = Path(CPAC_DIR / "bids-examples") + if not example_dir.exists(): + example_dir.symlink_to(bids_examples) + os.chdir(str(CPAC_DIR)) result = cli_runner.invoke( CPAC_main_utils.commands[ _click_backport(CPAC_main_utils, "data-config") ].commands["build"], - [os.path.join(DATA_DIR, "data_settings_bids_examples_ds051_default_BIDS.yml")], + [str(DATA_DIR / "data_settings_bids_examples_ds051_default_BIDS.yml")], ) - participant_yaml = os.path.join(DATA_DIR, "data_config_ds051.yml") - group_yaml = os.path.join(DATA_DIR, "group_analysis_participants_ds051.txt") + participant_yaml = DATA_DIR / "data_config_ds051.yml" + group_yaml = DATA_DIR / "group_analysis_participants_ds051.txt" + + if example_dir.is_symlink() or example_dir.is_file(): + example_dir.unlink() + else: + from shutil import rmtree + rmtree(example_dir) assert result.exit_code == 0 assert "\n".join(caplog.messages).startswith( "\nGenerating data configuration file.." ) - assert os.path.exists(participant_yaml) - assert os.path.exists(group_yaml) + assert participant_yaml.exists() + assert group_yaml.exists() _delete_test_yaml(participant_yaml) _delete_test_yaml(group_yaml) def test_repickle(cli_runner): # noqa fn = "python_2_pickle.pkl" - pickle_path = os.path.join(DATA_DIR, fn) + pickle_path = str(DATA_DIR / fn) backups = [_Backup(pickle_path), _Backup(f"{pickle_path}z")] - result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [DATA_DIR]) + result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [str(DATA_DIR)]) assert result.exit_code == 0 assert ( @@ -139,7 +139,7 @@ def test_repickle(cli_runner): # noqa "pickle." in result.output ) - result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [DATA_DIR]) + result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [str(DATA_DIR)]) assert result.exit_code == 0 assert f"Pickle {fn} is a Python 3 pickle." in result.output @@ -157,9 +157,10 @@ def restore(self): w.write(self.data) -def _delete_test_yaml(test_yaml): - if os.path.exists(test_yaml): - os.remove(test_yaml) +def _delete_test_yaml(test_yaml: Path) -> None: + """Delete test YAML file.""" + if test_yaml.exists(): + os.remove(str(test_yaml)) def _test_repickle(pickle_path, gzipped=False): diff --git a/dev/circleci_data/test_in_image.sh b/dev/circleci_data/test_in_image.sh index b62de84994..9420d7c1ab 100755 --- a/dev/circleci_data/test_in_image.sh +++ b/dev/circleci_data/test_in_image.sh @@ -4,7 +4,7 @@ export PATH=$PATH:/home/$(whoami)/.local/bin pip install -r /code/dev/circleci_data/requirements.txt # run test with coverage as module -python -m coverage run --include */CPAC/*,*/run.py,*/dev/docker_data/* -m pytest --ignore-glob=*test_install.py --junitxml=test-results/junit.xml --doctest-modules dev/circleci_data /code/CPAC +python -m coverage run --include */CPAC/*,*/run.py,*/dev/docker_data/* -m pytest --ignore-glob=*test_install.py --capture=no --junitxml=test-results/junit.xml --doctest-modules dev/circleci_data /code/CPAC echo "$?" > test-results/exitcode diff --git a/py.typed b/py.typed new file mode 100644 index 0000000000..b648ac9233 --- /dev/null +++ b/py.typed @@ -0,0 +1 @@ +partial diff --git a/pyproject.toml b/pyproject.toml index 13181c224b..b5d583edbc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,3 +18,9 @@ [build-system] requires = ["nipype==1.8.6", "numpy==1.25.1", "pyyaml==6.0", "setuptools<60.0", "voluptuous==0.13.1"] build-backend = "setuptools.build_meta" + +[tool.mypy] +mypy_path = ".stubs" + +[tool.pyright] +stubPath = ".stubs" diff --git a/requirements.txt b/requirements.txt index 11e1be79f6..61b0279d01 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,7 +28,7 @@ prov==2.0.0 psutil==5.9.5 PyBASC==0.6.1 pybids==0.15.6 -PyPEER @ https://github.com/shnizzedy/PyPEER/archive/6965d2b2bea0fef824e885fec33a8e0e6bd50a97.zip +PyPEER @ git+https://git@github.com/ChildMindInstitute/PyPEER.git@6965d2b2bea0fef824e885fec33a8e0e6bd50a97 python-dateutil==2.8.2 PyYAML==6.0 requests==2.32.0 @@ -46,7 +46,7 @@ h5py==3.8.0 importlib-metadata==6.8.0 lxml==4.9.2 pip==23.3 -setuptools<60.0 -urllib3==1.26.19 +setuptools==70.0.0 +urllib3==1.26.18 wheel==0.40.0 -zipp==3.16.0 +zipp==3.19.1