From e64309c108fa2c15395228592599fb77c3d8634e Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 3 Jul 2024 18:15:43 -0400 Subject: [PATCH 01/93] :bug: Fix import (probably merge error) [run reg-suite] --- CPAC/pipeline/engine.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CPAC/pipeline/engine.py b/CPAC/pipeline/engine.py index 3c739ae1ab..d7f53f7029 100644 --- a/CPAC/pipeline/engine.py +++ b/CPAC/pipeline/engine.py @@ -19,13 +19,12 @@ import hashlib from itertools import chain import json -import logging import os import re from typing import Optional import warnings -from nipype import config +from nipype import config, logging from nipype.interfaces.utility import Rename from CPAC.image_utils.spatial_smoothing import spatial_smoothing From 74c09505a62b00dc2bb30ff425cca38ce9c5afa9 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Fri, 5 Jul 2024 09:56:33 -0400 Subject: [PATCH 02/93] :bug: Import local variables in `get_scan_params` [run reg-suite] --- CPAC/utils/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index 4ba3285218..b8b84ec186 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -689,6 +689,13 @@ def get_scan_params( import os import warnings + from CPAC.utils.utils import ( + check, + fetch_and_convert, + try_fetch_parameter, + VALID_PATTERNS, + ) + def check2(val): return val if val is None or val == "" or isinstance(val, str) else int(val) From 17257e3fdc885dd14517991116db3a0901644ad3 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Fri, 5 Jul 2024 14:26:27 -0400 Subject: [PATCH 03/93] :recycle: Dedupe function node imports --- CPAC/utils/datasource.py | 1 - CPAC/utils/utils.py | 23 +++++++++++------------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/CPAC/utils/datasource.py b/CPAC/utils/datasource.py index d0089d8afe..008e674c2d 100644 --- a/CPAC/utils/datasource.py +++ b/CPAC/utils/datasource.py @@ -730,7 +730,6 @@ def ingress_func_metadata( "effective_echo_spacing", ], function=get_scan_params, - imports=["from CPAC.utils.utils import check, try_fetch_parameter"], ), name=f"bold_scan_params_{subject_id}{name_suffix}", ) diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index b8b84ec186..b84c847515 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -33,6 +33,7 @@ from CPAC.utils.configuration import Configuration from CPAC.utils.docs import deprecated +from CPAC.utils.interfaces.function import Function from CPAC.utils.monitoring import FMLOGGER, WFLOGGER CONFIGS_DIR = os.path.abspath( @@ -648,6 +649,14 @@ def try_fetch_parameter(scan_parameters, subject, scan, keys): return None +@Function.sig_imports( + [ + "import json", + "import os", + "from CPAC.utils.utils import check, fetch_and_convert," + " try_fetch_parameter, VALID_PATTERNS", + ] +) def get_scan_params( subject_id, scan, @@ -685,16 +694,6 @@ def get_scan_params( pe_direction : str effective_echo_spacing : float """ - import json - import os - import warnings - - from CPAC.utils.utils import ( - check, - fetch_and_convert, - try_fetch_parameter, - VALID_PATTERNS, - ) def check2(val): return val if val is None or val == "" or isinstance(val, str) else int(val) @@ -875,7 +874,7 @@ def check2(val): # checking if the unit of TR and slice timing match or not # if slice timing in ms convert TR to ms as well if TR and max_slice_offset > TR: - warnings.warn( + WFLOGGER.warn( "TR is in seconds and slice timings are in " "milliseconds. Converting TR into milliseconds" ) @@ -885,7 +884,7 @@ def check2(val): elif TR and TR > 10: # noqa: PLR2004 # check to see, if TR is in milliseconds, convert it into seconds - warnings.warn("TR is in milliseconds, Converting it into seconds") + WFLOGGER.warn("TR is in milliseconds, Converting it into seconds") TR = TR / 1000.0 WFLOGGER.info("New TR value %s s", TR) unit = "s" From ddf21035bd3bc86cb5eeac525122802217a1f908 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Sat, 6 Jul 2024 00:27:06 -0400 Subject: [PATCH 04/93] :construction: WIP :bug: Fix `get_scan_params` [run reg-suite] --- CPAC/utils/tests/test_utils.py | 77 +++++++++---- CPAC/utils/utils.py | 197 ++++++++++++++++++++++----------- 2 files changed, 184 insertions(+), 90 deletions(-) diff --git a/CPAC/utils/tests/test_utils.py b/CPAC/utils/tests/test_utils.py index f9a4cb73e4..43539d9a57 100644 --- a/CPAC/utils/tests/test_utils.py +++ b/CPAC/utils/tests/test_utils.py @@ -3,6 +3,7 @@ import multiprocessing from unittest import mock +from _pytest.logging import LogCaptureFixture import pytest from CPAC.func_preproc import get_motion_ref @@ -12,26 +13,33 @@ from CPAC.utils.utils import ( check_config_resources, check_system_deps, - try_fetch_parameter, + fetch_and_convert, ) -scan_params_bids = { - "RepetitionTime": 2.0, - "ScanOptions": "FS", - "SliceAcquisitionOrder": "Interleaved Ascending", -} -scan_params_cpac = { - "tr": 2.5, - "acquisition": "seq+z", - "reference": "24", - "first_tr": "", - "last_tr": "", +SCAN_PARAMS = { + "BIDS": { + "params": { + "RepetitionTime": 2.0, + "ScanOptions": "FS", + "SliceAcquisitionOrder": "Interleaved Ascending", + }, + "expected_TR": 2.0, + }, + "C-PAC": { + "params": { + "tr": 2.5, + "acquisition": "seq+z", + "reference": "24", + "first_tr": "", + "last_tr": "", + }, + "expected_TR": 2.5, + }, } def _installation_check(command: str, flag: str) -> None: - """Test that command is installed by running specified version or - help flag. + """Test that command is installed by running specified version or help flag. Parameters ---------- @@ -56,9 +64,10 @@ def _installation_check(command: str, flag: str) -> None: def test_check_config_resources(): """Test check_config_resources function.""" - with mock.patch.object(multiprocessing, "cpu_count", return_value=2), pytest.raises( - SystemError - ) as system_error: + with ( + mock.patch.object(multiprocessing, "cpu_count", return_value=2), + pytest.raises(SystemError) as system_error, + ): check_config_resources( Configuration( {"pipeline_setup": {"system_config": {"max_cores_per_participant": 10}}} @@ -69,12 +78,33 @@ def test_check_config_resources(): assert "threads available (2)" in error_string -def test_function(): - TR = try_fetch_parameter(scan_params_bids, "0001", "scan", ["TR", "RepetitionTime"]) - assert TR == 2.0 - - TR = try_fetch_parameter(scan_params_cpac, "0001", "scan", ["TR", "RepetitionTime"]) - assert TR == 2.5 +@pytest.mark.parametrize("scan_params", ["BIDS", "C-PAC"]) +@pytest.mark.parametrize("convert_to", [int, float, str]) +def test_fetch_and_convert( + caplog: LogCaptureFixture, scan_params: str, convert_to: type +) -> None: + """Test functionality to fetch and convert scan parameters.""" + params = SCAN_PARAMS[scan_params]["params"] + TR = fetch_and_convert( + scan_parameters=params, + scan="scan", + keys=["TR", "RepetitionTime"], + convert_to=convert_to, + ) + assert (TR == convert_to(SCAN_PARAMS[scan_params]["expected_TR"])) and isinstance( + TR, convert_to + ) + if scan_params == "C-PAC": + assert "Using case-insenitive match: 'TR' ≅ 'tr'." in caplog.text + else: + assert "Using case-insenitive match: 'TR' ≅ 'tr'." not in caplog.text + not_TR = fetch_and_convert( + scan_parameters=params, + scan="scan", + keys=["NotTR", "NotRepetitionTime"], + convert_to=convert_to, + ) + assert not_TR is None @pytest.mark.parametrize("executable", ["Xvfb"]) @@ -96,6 +126,7 @@ def test_NodeBlock_option_SSOT(): # pylint: disable=invalid-name def test_system_deps(): """Test system dependencies. + Raises an exception if dependencies are not met. """ check_system_deps(*([True] * 4)) diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index b84c847515..47528b7f36 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -25,7 +25,7 @@ import numbers import os import pickle -from typing import Any +from typing import Any, Literal, Optional, overload import numpy as np from voluptuous.error import Invalid @@ -47,6 +47,7 @@ os.path.join(CONFIGS_DIR, "1.7-1.8-deprecations.yml"), "r", encoding="utf-8" ) as _f: NESTED_CONFIG_DEPRECATIONS = yaml.safe_load(_f) +PE_DIRECTION = Literal["i", "i-", "j", "j-", "k", "k-", ""] VALID_PATTERNS = [ "alt+z", "altplus", @@ -472,17 +473,92 @@ def compute_fisher_z_score(correlation_file, timeseries_one_d, input_name): return out_file +@overload +def fetch( + scan_parameters: dict, + scan: Optional[str] = None, + keys: Optional[list[str]] = None, + *, + match_case: Literal[False], +) -> Any: ... +@overload +def fetch( + scan_parameters: dict, + scan: Optional[str] = None, + keys: Optional[list[str]] = None, + *, + match_case: Literal[True], +) -> tuple[Any, tuple[str, str]]: ... +def fetch(scan_parameters, scan, keys, *, match_case=False): + """Fetch the first found parameter from a scan params dictionary. + + Returns + ------- + value + The value of the parameter. + + keys, optional + The matched keys (only if ``match_case is True``) + """ + if match_case: + keys = {key.lower(): key for key in keys} + scan_param_keys = {key.lower(): key for key in scan_parameters.keys()} + scan_parameters = {key.lower(): value for key, value in scan_parameters.items()} + for key in keys: + if key in scan_parameters: + if match_case: + return check(scan_parameters, None, scan, key, True), ( + keys[key], + scan_param_keys[key], + ) + return check(scan_parameters, None, scan, key, True) + msg = f"None of {keys} found in {list(scan_parameters.keys())}." + raise KeyError(msg) + + def fetch_and_convert( - scan_parameters: dict, scan: str, keys: list[str], convert_to: type, fallback: Any + scan_parameters: dict, + scan: str, + keys: list[str], + convert_to: type, + fallback: Optional[Any] = None, ) -> Any: - """Fetch a parameter from a scan parameters dictionary and convert it to a given type. + """Fetch a parameter from a scan params dictionary and convert it to a given type. Catch TypeError exceptions and return a fallback value in those cases. + + Returns + ------- + value + The gathered parameter coerced to the specified type, if possible. + ``fallback`` otherwise. """ + value: Any = fallback + fallback_message = f"Falling back to {fallback} ({type(fallback)})." + + try: + raw_value = fetch(scan_parameters, scan, keys) + except KeyError: + try: + raw_value, matched_keys = fetch( + scan_parameters, scan, keys, match_case=True + ) + except KeyError: + WFLOGGER.warning( + f"None of {keys} found in {list(scan_parameters.keys())}. " + f"{fallback_message}" + ) + return fallback + WFLOGGER.warning( + f"None exact match found. Using case-insenitive match: '{matched_keys[0]}'" + f" ≅ '{matched_keys[1]}'." + ) try: - value = convert_to(scan_parameters, None, scan, keys) + value = convert_to(raw_value) except TypeError: - value = fallback + WFLOGGER.warning( + f"Could not convert {value} to {convert_to}. {fallback_message}" + ) return value @@ -625,74 +701,61 @@ def check_random_state(seed): ) -def try_fetch_parameter(scan_parameters, subject, scan, keys): - """Try to fetch a parameter from a scan parameters dictionary.""" - scan_parameters = {k.lower(): v for k, v in scan_parameters.items()} - - for _key in keys: - key = _key.lower() - - if key not in scan_parameters: - continue - - if isinstance(scan_parameters[key], dict): - value = scan_parameters[key][scan] - else: - value = scan_parameters[key] - - # Explicit none value - if value == "None": - return None - - if value is not None: - return value - return None - - @Function.sig_imports( [ "import json", "import os", - "from CPAC.utils.utils import check, fetch_and_convert," - " try_fetch_parameter, VALID_PATTERNS", + "from CPAC.utils.utils import check, fetch_and_convert," " VALID_PATTERNS", ] ) def get_scan_params( - subject_id, - scan, - pipeconfig_start_indx, - pipeconfig_stop_indx, - data_config_scan_params=None, -): + subject_id: str, + scan: str, + pipeconfig_start_indx: int, + pipeconfig_stop_indx: Optional[int | str], + data_config_scan_params: Optional[dict | str] = None, +) -> tuple[ + Optional[str], + Optional[str], + Optional[int], + Optional[int], + Optional[int], + Optional[int], + PE_DIRECTION, + Optional[float], +]: """Extract slice timing correction parameters and scan parameters. Parameters ---------- - subject_id : str + subject_id subject id - scan : str + scan scan id - pipeconfig_start_indx : int + pipeconfig_start_indx starting volume index as provided in the pipeline config yaml file - pipeconfig_stop_indx : int + pipeconfig_stop_indx ending volume index as provided in the pipeline config yaml file - data_config_scan_params : str - file path to scan parameter JSON file listed in data config yaml file + data_config_scan_params + file path to scan parameter JSON file listed in data config yaml file or loaded + paramater dictionary Returns ------- - TR : a string + TR TR value - pattern : a string + tpattern slice aquisition pattern string or file path - ref_slice : an integer - reference slice which is used to allign all other slices - first_tr : an integer - starting TR or starting volume index - last_tr : an integer - ending TR or ending volume index - pe_direction : str - effective_echo_spacing : float + ref_slice + index of reference slice which is used to allign all other slices + first_tr + index of starting TR or starting volume index + last_tr + index of ending TR or ending volume index + pe_direction + https://bids-specification.readthedocs.io/en/stable/glossary.html#phaseencodingdirection-metadata + effective_echo_spacing + https://bids-specification.readthedocs.io/en/stable/glossary.html#effectiveechospacing-metadata """ def check2(val): @@ -700,7 +763,7 @@ def check2(val): # initialize vars to empty TR = pattern = ref_slice = first_tr = last_tr = pe_direction = "" - unit = "s" + unit: Literal["ms", "s"] = "s" effective_echo_spacing = template = None if isinstance(pipeconfig_stop_indx, str): @@ -750,24 +813,24 @@ def check2(val): # TODO: better handling of errant key values!!! # TODO: use schema validator to deal with it # get details from the configuration - TR = fetch_and_convert( + TR: Optional[float] = fetch_and_convert( params_dct, scan, ["TR", "RepetitionTime"], float, None ) - template = fetch_and_convert( + template: Optional[str] = fetch_and_convert( params_dct, scan, ["Template", "template"], str, None ) - pattern = str( - try_fetch_parameter( - params_dct, - subject_id, - scan, - ["acquisition", "SliceTiming", "SliceAcquisitionOrder"], - ) + pattern: str = fetch_and_convert( + params_dct, + scan, + ["acquisition", "SliceTiming", "SliceAcquisitionOrder"], + str, + "", ) - ref_slice = check(params_dct, subject_id, scan, "reference", False) - ref_slice = int(ref_slice) if ref_slice else ref_slice + ref_slice: Optional[int] = fetch_and_convert( + params_dct, scan, ["reference"], int, None + ) first_tr = check(params_dct, subject_id, scan, "first_TR", False) first_tr = check2(first_tr) if first_tr else first_tr @@ -874,7 +937,7 @@ def check2(val): # checking if the unit of TR and slice timing match or not # if slice timing in ms convert TR to ms as well if TR and max_slice_offset > TR: - WFLOGGER.warn( + WFLOGGER.warning( "TR is in seconds and slice timings are in " "milliseconds. Converting TR into milliseconds" ) @@ -884,7 +947,7 @@ def check2(val): elif TR and TR > 10: # noqa: PLR2004 # check to see, if TR is in milliseconds, convert it into seconds - WFLOGGER.warn("TR is in milliseconds, Converting it into seconds") + WFLOGGER.warning("TR is in milliseconds, Converting it into seconds") TR = TR / 1000.0 WFLOGGER.info("New TR value %s s", TR) unit = "s" From 52aa366f69523a02e2b609af83d18483351db3b0 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 8 Jul 2024 10:05:09 -0400 Subject: [PATCH 05/93] :recycle: DRY `fetch_and_convert`|`fetch`|`check`|`check2` [run reg-suite] --- CPAC/utils/utils.py | 191 +++++++++++++++++++++----------------------- 1 file changed, 89 insertions(+), 102 deletions(-) diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index 47528b7f36..dd20b14b43 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -476,6 +476,7 @@ def compute_fisher_z_score(correlation_file, timeseries_one_d, input_name): @overload def fetch( scan_parameters: dict, + subject: Optional[str] = None, scan: Optional[str] = None, keys: Optional[list[str]] = None, *, @@ -484,12 +485,13 @@ def fetch( @overload def fetch( scan_parameters: dict, + subject: Optional[str] = None, scan: Optional[str] = None, keys: Optional[list[str]] = None, *, match_case: Literal[True], ) -> tuple[Any, tuple[str, str]]: ... -def fetch(scan_parameters, scan, keys, *, match_case=False): +def fetch(scan_parameters, subject, scan, keys, *, match_case=False): """Fetch the first found parameter from a scan params dictionary. Returns @@ -507,26 +509,52 @@ def fetch(scan_parameters, scan, keys, *, match_case=False): for key in keys: if key in scan_parameters: if match_case: - return check(scan_parameters, None, scan, key, True), ( + return check(scan_parameters, subject, scan, key, True), ( keys[key], scan_param_keys[key], ) - return check(scan_parameters, None, scan, key, True) + return check(scan_parameters, subject, scan, key, True) msg = f"None of {keys} found in {list(scan_parameters.keys())}." raise KeyError(msg) def fetch_and_convert( scan_parameters: dict, + subject: str, scan: str, keys: list[str], convert_to: type, fallback: Optional[Any] = None, + warn_typeerror: bool = True, ) -> Any: """Fetch a parameter from a scan params dictionary and convert it to a given type. Catch TypeError exceptions and return a fallback value in those cases. + Parameters + ---------- + scan_parameters + dictionary of scan metadata + + subject + the subject ID + + scan + the scan ID + + keys + if multiple keys provided, the value corresponding to the first found will be + returned + + convert_to + the type to return if possible + + fallback + a value to return if the keys are not found in ``scan_parameters`` + + warn_typeerror + log a warning if value cannot be converted to ``convert_to`` type? + Returns ------- value @@ -537,11 +565,11 @@ def fetch_and_convert( fallback_message = f"Falling back to {fallback} ({type(fallback)})." try: - raw_value = fetch(scan_parameters, scan, keys) + raw_value = fetch(scan_parameters, subject, scan, keys) except KeyError: try: raw_value, matched_keys = fetch( - scan_parameters, scan, keys, match_case=True + scan_parameters, subject, scan, keys, match_case=True ) except KeyError: WFLOGGER.warning( @@ -556,9 +584,10 @@ def fetch_and_convert( try: value = convert_to(raw_value) except TypeError: - WFLOGGER.warning( - f"Could not convert {value} to {convert_to}. {fallback_message}" - ) + if warn_typeerror: + WFLOGGER.warning( + f"Could not convert {value} to {convert_to}. {fallback_message}" + ) return value @@ -705,13 +734,14 @@ def check_random_state(seed): [ "import json", "import os", - "from CPAC.utils.utils import check, fetch_and_convert," " VALID_PATTERNS", + "from typing import Literal, Optional", + "from CPAC.utils.utils import fetch_and_convert, PE_DIRECTION, VALID_PATTERNS", ] ) def get_scan_params( subject_id: str, scan: str, - pipeconfig_start_indx: int, + pipeconfig_start_indx: Optional[int | str], pipeconfig_stop_indx: Optional[int | str], data_config_scan_params: Optional[dict | str] = None, ) -> tuple[ @@ -742,7 +772,7 @@ def get_scan_params( Returns ------- - TR + tr TR value tpattern slice aquisition pattern string or file path @@ -757,12 +787,8 @@ def get_scan_params( effective_echo_spacing https://bids-specification.readthedocs.io/en/stable/glossary.html#effectiveechospacing-metadata """ - - def check2(val): - return val if val is None or val == "" or isinstance(val, str) else int(val) - # initialize vars to empty - TR = pattern = ref_slice = first_tr = last_tr = pe_direction = "" + tr = pattern = ref_slice = first_tr = last_tr = pe_direction = "" unit: Literal["ms", "s"] = "s" effective_echo_spacing = template = None @@ -777,78 +803,10 @@ def check2(val): f" configuration file does not exist:\n{data_config_scan_params}" ) raise FileNotFoundError(err) - with open(data_config_scan_params, "r") as f: - params_dct = json.load(f) - - # get details from the configuration - # if this is a JSON file, the key values are the BIDS format - # standard - # TODO: better handling of errant key values!!! - if "RepetitionTime" in params_dct.keys(): - TR = float(check(params_dct, subject_id, scan, "RepetitionTime", False)) - if "SliceTiming" in params_dct.keys(): - pattern = str(check(params_dct, subject_id, scan, "SliceTiming", False)) - elif "SliceAcquisitionOrder" in params_dct.keys(): - pattern = str( - check(params_dct, subject_id, scan, "SliceAcquisitionOrder", False) - ) - if "PhaseEncodingDirection" in params_dct.keys(): - pe_direction = str( - check(params_dct, subject_id, scan, "PhaseEncodingDirection", False) - ) - try: - "EffectiveEchoSpacing" in params_dct.keys() - effective_echo_spacing = float( - check(params_dct, subject_id, scan, "EffectiveEchoSpacing", False) - ) - except TypeError: - pass - - elif len(data_config_scan_params) > 0 and isinstance( - data_config_scan_params, dict - ): + params_dct: dict = json.load(f) + elif isinstance(data_config_scan_params, dict): params_dct = data_config_scan_params - - # TODO: better handling of errant key values!!! - # TODO: use schema validator to deal with it - # get details from the configuration - TR: Optional[float] = fetch_and_convert( - params_dct, scan, ["TR", "RepetitionTime"], float, None - ) - template: Optional[str] = fetch_and_convert( - params_dct, scan, ["Template", "template"], str, None - ) - - pattern: str = fetch_and_convert( - params_dct, - scan, - ["acquisition", "SliceTiming", "SliceAcquisitionOrder"], - str, - "", - ) - - ref_slice: Optional[int] = fetch_and_convert( - params_dct, scan, ["reference"], int, None - ) - - first_tr = check(params_dct, subject_id, scan, "first_TR", False) - first_tr = check2(first_tr) if first_tr else first_tr - - last_tr = check(params_dct, subject_id, scan, "last_TR", False) - last_tr = check2(last_tr) if last_tr else last_tr - - pe_direction = check( - params_dct, subject_id, scan, "PhaseEncodingDirection", False - ) - effective_echo_spacing = fetch_and_convert( - params_dct, - scan, - ["EffectiveEchoSpacing"], - float, - effective_echo_spacing, - ) - else: err = ( "\n\n[!] Could not read the format of the scan parameters " @@ -856,9 +814,38 @@ def check2(val): f"the participant {subject_id}.\n\n" ) raise OSError(err) - first_tr = pipeconfig_start_indx if first_tr == "" or first_tr is None else first_tr - last_tr = pipeconfig_stop_indx if last_tr == "" or last_tr is None else last_tr - pattern = None if "None" in pattern or "none" in pattern else pattern + # TODO: better handling of errant key values!!! + # TODO: use schema validator to deal with it + # get details from the configuration + tr: float | Literal[""] = fetch_and_convert( + params_dct, subject_id, scan, ["RepetitionTime", "TR"], float, "" + ) + template: Optional[str] = fetch_and_convert( + params_dct, subject_id, scan, ["Template", "template"], str + ) + pattern: Optional[str] = fetch_and_convert( + params_dct, + subject_id, + scan, + ["acquisition", "SliceTiming", "SliceAcquisitionOrder"], + str, + None, + ) + ref_slice: Optional[int | str] = fetch_and_convert( + params_dct, subject_id, scan, ["reference"], int, None + ) + first_tr: Optional[int | str] = fetch_and_convert( + params_dct, subject_id, scan, ["first_TR"], int, pipeconfig_start_indx + ) + last_tr: Optional[int | str] = fetch_and_convert( + params_dct, subject_id, scan, ["last_TR"], int, pipeconfig_stop_indx + ) + pe_direction: PE_DIRECTION = fetch_and_convert( + params_dct, subject_id, scan, ["PhaseEncodingDirection"], str, "" + ) + effective_echo_spacing: Optional[float] = fetch_and_convert( + params_dct, subject_id, scan, ["EffectiveEchoSpacing"], float + ) """ if not pattern: @@ -934,26 +921,26 @@ def check2(val): slice_timings.sort() max_slice_offset = slice_timings[-1] - # checking if the unit of TR and slice timing match or not - # if slice timing in ms convert TR to ms as well - if TR and max_slice_offset > TR: + # checking if the unit of tr and slice timing match or not + # if slice timing in ms convert tr to ms as well + if tr and max_slice_offset > tr: WFLOGGER.warning( - "TR is in seconds and slice timings are in " - "milliseconds. Converting TR into milliseconds" + "tr is in seconds and slice timings are in " + "milliseconds. Converting tr into milliseconds" ) - TR = TR * 1000 - WFLOGGER.info("New TR value %s ms", TR) + tr = tr * 1000 + WFLOGGER.info("New tr value %s ms", tr) unit = "ms" - elif TR and TR > 10: # noqa: PLR2004 - # check to see, if TR is in milliseconds, convert it into seconds - WFLOGGER.warning("TR is in milliseconds, Converting it into seconds") - TR = TR / 1000.0 - WFLOGGER.info("New TR value %s s", TR) + elif tr and tr > 10: # noqa: PLR2004 + # check to see, if tr is in milliseconds, convert it into seconds + WFLOGGER.warning("tr is in milliseconds, Converting it into seconds") + tr = tr / 1000.0 + WFLOGGER.info("New tr value %s s", tr) unit = "s" # swap back in - tr = f"{TR!s}{unit}" if TR else "" + tr = f"{tr!s}{unit}" if tr else "" tpattern = pattern start_indx = first_tr stop_indx = last_tr From c5c39b08c38c6b50ebd6c2a4d5791022e199ad41 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 8 Jul 2024 10:49:11 -0400 Subject: [PATCH 06/93] :bug: Tell Nipype to import typehint type [run reg-suite] --- CPAC/alff/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CPAC/alff/utils.py b/CPAC/alff/utils.py index f89e0c8ca4..d7532373bf 100644 --- a/CPAC/alff/utils.py +++ b/CPAC/alff/utils.py @@ -3,7 +3,10 @@ from pathlib import Path +from CPAC.utils.interfaces.function import Function + +@Function.sig_imports(["from pathlib import Path"]) def get_opt_string(mask: Path | str) -> str: """ Return option string for 3dTstat. From 6a5b7230209455f69bbbf05404e9a01a1eca6351 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 8 Jul 2024 12:01:02 -0400 Subject: [PATCH 07/93] :recycle: DRY params, sub, scan --- CPAC/utils/tests/test_utils.py | 12 +- CPAC/utils/utils.py | 341 ++++++++++++++++----------------- 2 files changed, 167 insertions(+), 186 deletions(-) diff --git a/CPAC/utils/tests/test_utils.py b/CPAC/utils/tests/test_utils.py index 43539d9a57..750b883758 100644 --- a/CPAC/utils/tests/test_utils.py +++ b/CPAC/utils/tests/test_utils.py @@ -13,7 +13,7 @@ from CPAC.utils.utils import ( check_config_resources, check_system_deps, - fetch_and_convert, + ScanParameters, ) SCAN_PARAMS = { @@ -84,10 +84,8 @@ def test_fetch_and_convert( caplog: LogCaptureFixture, scan_params: str, convert_to: type ) -> None: """Test functionality to fetch and convert scan parameters.""" - params = SCAN_PARAMS[scan_params]["params"] - TR = fetch_and_convert( - scan_parameters=params, - scan="scan", + params = ScanParameters(SCAN_PARAMS[scan_params]["params"], "subject", "scan") + TR = params.fetch_and_convert( keys=["TR", "RepetitionTime"], convert_to=convert_to, ) @@ -98,9 +96,7 @@ def test_fetch_and_convert( assert "Using case-insenitive match: 'TR' ≅ 'tr'." in caplog.text else: assert "Using case-insenitive match: 'TR' ≅ 'tr'." not in caplog.text - not_TR = fetch_and_convert( - scan_parameters=params, - scan="scan", + not_TR = params.fetch_and_convert( keys=["NotTR", "NotRepetitionTime"], convert_to=convert_to, ) diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index dd20b14b43..814303f249 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -473,122 +473,161 @@ def compute_fisher_z_score(correlation_file, timeseries_one_d, input_name): return out_file -@overload -def fetch( - scan_parameters: dict, - subject: Optional[str] = None, - scan: Optional[str] = None, - keys: Optional[list[str]] = None, - *, - match_case: Literal[False], -) -> Any: ... -@overload -def fetch( - scan_parameters: dict, - subject: Optional[str] = None, - scan: Optional[str] = None, - keys: Optional[list[str]] = None, - *, - match_case: Literal[True], -) -> tuple[Any, tuple[str, str]]: ... -def fetch(scan_parameters, subject, scan, keys, *, match_case=False): - """Fetch the first found parameter from a scan params dictionary. +class ScanParameters: + """A dictionary of scan parameters and access methods.""" + + def __init__(self, scan_parameters: str | dict, subject_id: str, scan: str): + """Initialize ScanParameters dict and metadata.""" + self.subject = subject_id + self.scan = scan + if ".json" in scan_parameters: + if not os.path.exists(scan_parameters): + err = ( + "\n[!] WARNING: Scan parameters JSON file listed in your data" + f" configuration file does not exist:\n{scan_parameters}" + ) + raise FileNotFoundError(err) + with open(scan_parameters, "r") as f: + self.params: dict = json.load(f) + elif isinstance(scan_parameters, dict): + self.params = scan_parameters + else: + err = ( + "\n\n[!] Could not read the format of the scan parameters " + "information included in the data configuration file for " + f"the participant {self.subject}.\n\n" + ) + raise OSError(err) - Returns - ------- - value - The value of the parameter. + def check(self, val_to_check: str, throw_exception: bool): + """Check that a value is populated for a given key in a parameters dictionary.""" + if val_to_check not in self.params: + if throw_exception: + msg = f"Missing Value for {val_to_check} for participant {self.subject}" + raise ValueError(msg) + return None - keys, optional - The matched keys (only if ``match_case is True``) - """ - if match_case: - keys = {key.lower(): key for key in keys} - scan_param_keys = {key.lower(): key for key in scan_parameters.keys()} - scan_parameters = {key.lower(): value for key, value in scan_parameters.items()} - for key in keys: - if key in scan_parameters: - if match_case: - return check(scan_parameters, subject, scan, key, True), ( - keys[key], - scan_param_keys[key], + if isinstance(self.params[val_to_check], dict): + ret_val = self.params[val_to_check][self.scan] + else: + ret_val = self.params[val_to_check] + + if ret_val == "None": + if throw_exception: + msg = ( + f"'None' parameter value for {val_to_check} for" + f" participant {self.subject}." ) - return check(scan_parameters, subject, scan, key, True) - msg = f"None of {keys} found in {list(scan_parameters.keys())}." - raise KeyError(msg) + raise ValueError(msg) + ret_val = None + if ret_val == "" and throw_exception: + msg = f"Missing value for {val_to_check} for participant {self.subject}." + raise ValueError(msg) -def fetch_and_convert( - scan_parameters: dict, - subject: str, - scan: str, - keys: list[str], - convert_to: type, - fallback: Optional[Any] = None, - warn_typeerror: bool = True, -) -> Any: - """Fetch a parameter from a scan params dictionary and convert it to a given type. + return ret_val + + @overload + def fetch( + self, + keys: Optional[list[str]] = None, + *, + match_case: Literal[False], + ) -> Any: ... + @overload + def fetch( + self, + keys: Optional[list[str]] = None, + *, + match_case: Literal[True], + ) -> tuple[Any, tuple[str, str]]: ... + def fetch(self, keys, *, match_case=False): + """Fetch the first found parameter from a scan params dictionary. - Catch TypeError exceptions and return a fallback value in those cases. + Returns + ------- + value + The value of the parameter. - Parameters - ---------- - scan_parameters - dictionary of scan metadata + keys, optional + The matched keys (only if ``match_case is True``) + """ + if match_case: + keys = {key.lower(): key for key in keys} + scan_param_keys = {key.lower(): key for key in self.params.keys()} + scan_parameters = {key.lower(): value for key, value in self.params.items()} + else: + scan_parameters = self.params + for key in keys: + if key in scan_parameters: + if match_case: + return self.check(key, True), ( + keys[key], + scan_param_keys[key], + ) + return self.check(key, True) + msg = f"None of {keys} found in {list(scan_parameters.keys())}." + raise KeyError(msg) - subject - the subject ID + def fetch_and_convert( + self, + keys: list[str], + convert_to: Optional[type] = None, + fallback: Optional[Any] = None, + warn_typeerror: bool = True, + ) -> Any: + """Fetch a parameter from a scan params dictionary and convert it to a given type. - scan - the scan ID + Catch TypeError exceptions and return a fallback value in those cases. - keys - if multiple keys provided, the value corresponding to the first found will be - returned + Parameters + ---------- + keys + if multiple keys provided, the value corresponding to the first found will be + returned - convert_to - the type to return if possible + convert_to + the type to return if possible - fallback - a value to return if the keys are not found in ``scan_parameters`` + fallback + a value to return if the keys are not found in ``scan_parameters`` - warn_typeerror - log a warning if value cannot be converted to ``convert_to`` type? + warn_typeerror + log a warning if value cannot be converted to ``convert_to`` type? - Returns - ------- - value - The gathered parameter coerced to the specified type, if possible. - ``fallback`` otherwise. - """ - value: Any = fallback - fallback_message = f"Falling back to {fallback} ({type(fallback)})." + Returns + ------- + value + The gathered parameter coerced to the specified type, if possible. + ``fallback`` otherwise. + """ + value: Any = fallback + fallback_message = f"Falling back to {fallback} ({type(fallback)})." - try: - raw_value = fetch(scan_parameters, subject, scan, keys) - except KeyError: try: - raw_value, matched_keys = fetch( - scan_parameters, subject, scan, keys, match_case=True - ) + raw_value = self.fetch(keys) except KeyError: + try: + raw_value, matched_keys = self.fetch(keys, match_case=True) + except KeyError: + WFLOGGER.warning( + f"None of {keys} found in {list(self.params.keys())}. " + f"{fallback_message}" + ) + return fallback WFLOGGER.warning( - f"None of {keys} found in {list(scan_parameters.keys())}. " - f"{fallback_message}" - ) - return fallback - WFLOGGER.warning( - f"None exact match found. Using case-insenitive match: '{matched_keys[0]}'" - f" ≅ '{matched_keys[1]}'." - ) - try: - value = convert_to(raw_value) - except TypeError: - if warn_typeerror: - WFLOGGER.warning( - f"Could not convert {value} to {convert_to}. {fallback_message}" + f"None exact match found. Using case-insenitive match: '{matched_keys[0]}'" + f" ≅ '{matched_keys[1]}'." ) - return value + if convert_to: + try: + value = convert_to(raw_value) + except TypeError: + if warn_typeerror: + WFLOGGER.warning( + f"Could not convert {value} to {convert_to}. {fallback_message}" + ) + return value def get_operand_string(mean, std_dev): @@ -676,35 +715,6 @@ def correlation(matrix1, matrix2, match_rows=False, z_scored=False, symmetric=Fa return r -def check(params_dct, subject_id, scan_id, val_to_check, throw_exception): - """Check that a value is populated for a given key in a parameters dictionary.""" - if val_to_check not in params_dct: - if throw_exception: - msg = f"Missing Value for {val_to_check} for participant {subject_id}" - raise ValueError(msg) - return None - - if isinstance(params_dct[val_to_check], dict): - ret_val = params_dct[val_to_check][scan_id] - else: - ret_val = params_dct[val_to_check] - - if ret_val == "None": - if throw_exception: - msg = ( - f"'None' Parameter Value for {val_to_check} for" - f" participant {subject_id}" - ) - raise ValueError(msg) - ret_val = None - - if ret_val == "" and throw_exception: - msg = f"Missing Value for {val_to_check} for participant {subject_id}" - raise ValueError(msg) - - return ret_val - - def check_random_state(seed): """ Turn seed into a np.random.RandomState instance. @@ -735,7 +745,7 @@ def check_random_state(seed): "import json", "import os", "from typing import Literal, Optional", - "from CPAC.utils.utils import fetch_and_convert, PE_DIRECTION, VALID_PATTERNS", + "from CPAC.utils.utils import ScanParameters, PE_DIRECTION, VALID_PATTERNS", ] ) def get_scan_params( @@ -795,57 +805,32 @@ def get_scan_params( if isinstance(pipeconfig_stop_indx, str): if "End" in pipeconfig_stop_indx or "end" in pipeconfig_stop_indx: pipeconfig_stop_indx = None - if data_config_scan_params: - if ".json" in data_config_scan_params: - if not os.path.exists(data_config_scan_params): - err = ( - "\n[!] WARNING: Scan parameters JSON file listed in your data" - f" configuration file does not exist:\n{data_config_scan_params}" - ) - raise FileNotFoundError(err) - with open(data_config_scan_params, "r") as f: - params_dct: dict = json.load(f) - elif isinstance(data_config_scan_params, dict): - params_dct = data_config_scan_params - else: - err = ( - "\n\n[!] Could not read the format of the scan parameters " - "information included in the data configuration file for " - f"the participant {subject_id}.\n\n" - ) - raise OSError(err) - # TODO: better handling of errant key values!!! - # TODO: use schema validator to deal with it - # get details from the configuration - tr: float | Literal[""] = fetch_and_convert( - params_dct, subject_id, scan, ["RepetitionTime", "TR"], float, "" - ) - template: Optional[str] = fetch_and_convert( - params_dct, subject_id, scan, ["Template", "template"], str - ) - pattern: Optional[str] = fetch_and_convert( - params_dct, - subject_id, - scan, - ["acquisition", "SliceTiming", "SliceAcquisitionOrder"], - str, - None, - ) - ref_slice: Optional[int | str] = fetch_and_convert( - params_dct, subject_id, scan, ["reference"], int, None - ) - first_tr: Optional[int | str] = fetch_and_convert( - params_dct, subject_id, scan, ["first_TR"], int, pipeconfig_start_indx - ) - last_tr: Optional[int | str] = fetch_and_convert( - params_dct, subject_id, scan, ["last_TR"], int, pipeconfig_stop_indx - ) - pe_direction: PE_DIRECTION = fetch_and_convert( - params_dct, subject_id, scan, ["PhaseEncodingDirection"], str, "" - ) - effective_echo_spacing: Optional[float] = fetch_and_convert( - params_dct, subject_id, scan, ["EffectiveEchoSpacing"], float - ) + params = ScanParameters(data_config_scan_params, subject_id, scan) + # TODO: better handling of errant key values!!! + # TODO: use schema validator to deal with it + # get details from the configuration + tr: float | Literal[""] = params.fetch_and_convert( + ["RepetitionTime", "TR"], float, "" + ) + template: Optional[str] = params.fetch_and_convert(["Template", "template"], str) + pattern: Optional[str] = params.fetch_and_convert( + ["acquisition", "SliceTiming", "SliceAcquisitionOrder"], + str, + None, + ) + ref_slice: Optional[int | str] = params.fetch_and_convert(["reference"], int, None) + first_tr: Optional[int | str] = params.fetch_and_convert( + ["first_TR"], int, pipeconfig_start_indx + ) + last_tr: Optional[int | str] = params.fetch_and_convert( + ["last_TR"], int, pipeconfig_stop_indx + ) + pe_direction: PE_DIRECTION = params.fetch_and_convert( + ["PhaseEncodingDirection"], str, "" + ) + effective_echo_spacing: Optional[float] = params.fetch_and_convert( + ["EffectiveEchoSpacing"], float + ) """ if not pattern: From 3ebb9f4d8b790bb2eb8c9514be63c8642cd2f42b Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 8 Jul 2024 12:54:48 -0400 Subject: [PATCH 08/93] :white_check_mark: Add tests for `fetch` refactor [rebuild base-lite] [rebuild base-standard] [run reg-suite] --- .ruff.toml | 1 + CPAC/utils/tests/old_functions.py | 67 +++++++++++++++++++++++++++++++ CPAC/utils/tests/test_utils.py | 52 +++++++++++++++++++++--- CPAC/utils/utils.py | 27 +++++++++---- 4 files changed, 134 insertions(+), 13 deletions(-) create mode 100644 CPAC/utils/tests/old_functions.py diff --git a/.ruff.toml b/.ruff.toml index d690751b02..265427a1ab 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -11,6 +11,7 @@ external = ["T20"] # Don't autoremove 'noqa` comments for these rules [lint.per-file-ignores] "CPAC/func_preproc/func_preproc.py" = ["E402"] "CPAC/utils/sklearn.py" = ["RUF003"] +"CPAC/utils/tests/old_functions.py" = ["C", "D", "E", "EM", "PLW", "RET"] "CPAC/utils/utils.py" = ["T201"] # until `repickle` is removed "setup.py" = ["D1"] diff --git a/CPAC/utils/tests/old_functions.py b/CPAC/utils/tests/old_functions.py new file mode 100644 index 0000000000..80171db77b --- /dev/null +++ b/CPAC/utils/tests/old_functions.py @@ -0,0 +1,67 @@ +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Functions from before refactoring.""" + + +def check(params_dct, subject_id, scan_id, val_to_check, throw_exception): + """https://github.com/FCP-INDI/C-PAC/blob/96db8b0b65ab1d5f55fb3b895855af34d72c17e4/CPAC/utils/utils.py#L630-L653""" + if val_to_check not in params_dct: + if throw_exception: + raise Exception( + f"Missing Value for {val_to_check} for participant " f"{subject_id}" + ) + return None + if isinstance(params_dct[val_to_check], dict): + ret_val = params_dct[val_to_check][scan_id] + else: + ret_val = params_dct[val_to_check] + if ret_val == "None": + if throw_exception: + raise Exception( + f"'None' Parameter Value for {val_to_check} for participant " + f"{subject_id}" + ) + else: + ret_val = None + if ret_val == "" and throw_exception: + raise Exception( + f"Missing Value for {val_to_check} for participant " f"{subject_id}" + ) + return ret_val + + +def check2(val): + """https://github.com/FCP-INDI/C-PAC/blob/96db8b0b65ab1d5f55fb3b895855af34d72c17e4/CPAC/utils/utils.py#L745-L746""" + return val if val == None or val == "" or isinstance(val, str) else int(val) + + +def try_fetch_parameter(scan_parameters, subject, scan, keys): + """https://github.com/FCP-INDI/C-PAC/blob/96db8b0b65ab1d5f55fb3b895855af34d72c17e4/CPAC/utils/utils.py#L679-L703""" + scan_parameters = dict((k.lower(), v) for k, v in scan_parameters.items()) + for key in keys: + key = key.lower() + if key not in scan_parameters: + continue + if isinstance(scan_parameters[key], dict): + value = scan_parameters[key][scan] + else: + value = scan_parameters[key] + if value == "None": + return None + if value is not None: + return value + return None diff --git a/CPAC/utils/tests/test_utils.py b/CPAC/utils/tests/test_utils.py index 750b883758..ab896c6029 100644 --- a/CPAC/utils/tests/test_utils.py +++ b/CPAC/utils/tests/test_utils.py @@ -10,6 +10,7 @@ from CPAC.pipeline.nodeblock import NodeBlockFunction from CPAC.utils.configuration import Configuration from CPAC.utils.monitoring.custom_logging import log_subprocess +from CPAC.utils.tests import old_functions from CPAC.utils.utils import ( check_config_resources, check_system_deps, @@ -30,11 +31,19 @@ "tr": 2.5, "acquisition": "seq+z", "reference": "24", - "first_tr": "", - "last_tr": "", + "first_TR": 1, + "last_TR": "", }, "expected_TR": 2.5, }, + "nested": { + "params": { + "TR": {"scan": 3}, + "first_TR": {"scan": 0}, + "last_TR": {"scan": 450}, + }, + "expected_TR": 3, + }, } @@ -78,7 +87,7 @@ def test_check_config_resources(): assert "threads available (2)" in error_string -@pytest.mark.parametrize("scan_params", ["BIDS", "C-PAC"]) +@pytest.mark.parametrize("scan_params", ["BIDS", "C-PAC", "nested"]) @pytest.mark.parametrize("convert_to", [int, float, str]) def test_fetch_and_convert( caplog: LogCaptureFixture, scan_params: str, convert_to: type @@ -89,8 +98,25 @@ def test_fetch_and_convert( keys=["TR", "RepetitionTime"], convert_to=convert_to, ) - assert (TR == convert_to(SCAN_PARAMS[scan_params]["expected_TR"])) and isinstance( - TR, convert_to + if TR and "RepetitionTime" in params.params: + old_TR = convert_to( + old_functions.check( + params.params, params.subject, params.scan, "RepetitionTime", False + ) + ) + assert TR == old_TR + try: + old_TR = convert_to( + old_functions.try_fetch_parameter( + params.params, params.subject, params.scan, ["TR", "RepetitionTime"] + ) + ) + except TypeError: + old_TR = None + assert ( + (TR == convert_to(SCAN_PARAMS[scan_params]["expected_TR"])) + and isinstance(TR, convert_to) + and TR == old_TR ) if scan_params == "C-PAC": assert "Using case-insenitive match: 'TR' ≅ 'tr'." in caplog.text @@ -101,6 +127,22 @@ def test_fetch_and_convert( convert_to=convert_to, ) assert not_TR is None + if "first_TR" in params.params: + first_tr = params.fetch_and_convert(["first_TR"], int, 1, False) + old_first_tr = old_functions.check( + params.params, params.subject, params.scan, "first_TR", False + ) + if old_first_tr: + old_first_tr = old_functions.check2(old_first_tr) + assert first_tr == old_first_tr + if "last_TR" in params.params: + last_tr = params.fetch_and_convert(["last_TR"], int, "", False) + old_last_tr = old_functions.check( + params.params, params.subject, params.scan, "last_TR", False + ) + if old_last_tr: + old_last_tr = old_functions.check2(old_last_tr) + assert last_tr == old_last_tr @pytest.mark.parametrize("executable", ["Xvfb"]) diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index 814303f249..29201d779d 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -533,6 +533,7 @@ def fetch( keys: Optional[list[str]] = None, *, match_case: Literal[False], + throw_exception: bool, ) -> Any: ... @overload def fetch( @@ -540,8 +541,9 @@ def fetch( keys: Optional[list[str]] = None, *, match_case: Literal[True], + throw_exception: bool, ) -> tuple[Any, tuple[str, str]]: ... - def fetch(self, keys, *, match_case=False): + def fetch(self, keys, *, match_case=False, throw_exception=True): """Fetch the first found parameter from a scan params dictionary. Returns @@ -551,6 +553,9 @@ def fetch(self, keys, *, match_case=False): keys, optional The matched keys (only if ``match_case is True``) + + throw_exception + Raise an exception if value is ``""`` or ``None``? """ if match_case: keys = {key.lower(): key for key in keys} @@ -561,11 +566,11 @@ def fetch(self, keys, *, match_case=False): for key in keys: if key in scan_parameters: if match_case: - return self.check(key, True), ( + return self.check(key, throw_exception), ( keys[key], scan_param_keys[key], ) - return self.check(key, True) + return self.check(key, throw_exception) msg = f"None of {keys} found in {list(scan_parameters.keys())}." raise KeyError(msg) @@ -575,6 +580,7 @@ def fetch_and_convert( convert_to: Optional[type] = None, fallback: Optional[Any] = None, warn_typeerror: bool = True, + throw_exception: bool = False, ) -> Any: """Fetch a parameter from a scan params dictionary and convert it to a given type. @@ -595,6 +601,9 @@ def fetch_and_convert( warn_typeerror log a warning if value cannot be converted to ``convert_to`` type? + throw_exception + raise an error for empty string or NoneTypes? + Returns ------- value @@ -605,10 +614,12 @@ def fetch_and_convert( fallback_message = f"Falling back to {fallback} ({type(fallback)})." try: - raw_value = self.fetch(keys) + raw_value = self.fetch(keys, throw_exception=throw_exception) except KeyError: try: - raw_value, matched_keys = self.fetch(keys, match_case=True) + raw_value, matched_keys = self.fetch( + keys, match_case=True, throw_exception=throw_exception + ) except KeyError: WFLOGGER.warning( f"None of {keys} found in {list(self.params.keys())}. " @@ -622,7 +633,7 @@ def fetch_and_convert( if convert_to: try: value = convert_to(raw_value) - except TypeError: + except (TypeError, ValueError): if warn_typeerror: WFLOGGER.warning( f"Could not convert {value} to {convert_to}. {fallback_message}" @@ -820,10 +831,10 @@ def get_scan_params( ) ref_slice: Optional[int | str] = params.fetch_and_convert(["reference"], int, None) first_tr: Optional[int | str] = params.fetch_and_convert( - ["first_TR"], int, pipeconfig_start_indx + ["first_TR"], int, pipeconfig_start_indx, False ) last_tr: Optional[int | str] = params.fetch_and_convert( - ["last_TR"], int, pipeconfig_stop_indx + ["last_TR"], int, pipeconfig_stop_indx, False ) pe_direction: PE_DIRECTION = params.fetch_and_convert( ["PhaseEncodingDirection"], str, "" From 7d6f0eeb0a09894798ae7b0578ec1ca0aba0c9a6 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 8 Jul 2024 12:59:12 -0400 Subject: [PATCH 09/93] :pencil2: Fix TR capitalization --- CPAC/utils/utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index 29201d779d..41937056c6 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -921,18 +921,18 @@ def get_scan_params( # if slice timing in ms convert tr to ms as well if tr and max_slice_offset > tr: WFLOGGER.warning( - "tr is in seconds and slice timings are in " - "milliseconds. Converting tr into milliseconds" + "TR is in seconds and slice timings are in " + "milliseconds. Converting TR into milliseconds" ) tr = tr * 1000 WFLOGGER.info("New tr value %s ms", tr) unit = "ms" elif tr and tr > 10: # noqa: PLR2004 - # check to see, if tr is in milliseconds, convert it into seconds - WFLOGGER.warning("tr is in milliseconds, Converting it into seconds") + # check to see, if TR is in milliseconds, convert it into seconds + WFLOGGER.warning("TR is in milliseconds, Converting it into seconds") tr = tr / 1000.0 - WFLOGGER.info("New tr value %s s", tr) + WFLOGGER.info("New TR value %s s", tr) unit = "s" # swap back in From b19907a8aa8ca909db66de81c3b3b575b17a078a Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 8 Jul 2024 13:40:19 -0400 Subject: [PATCH 10/93] :bug: Use C-PAC Function node --- CPAC/alff/alff.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/CPAC/alff/alff.py b/CPAC/alff/alff.py index 4fe03cb2cc..f8bfc1a0b8 100644 --- a/CPAC/alff/alff.py +++ b/CPAC/alff/alff.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright (C) 2012-2024 C-PAC Developers +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os from nipype.interfaces.afni import preprocess @@ -9,6 +24,7 @@ from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.nodeblock import nodeblock from CPAC.registration.registration import apply_transform +from CPAC.utils.interfaces import Function from CPAC.utils.utils import check_prov_for_regtool @@ -177,7 +193,7 @@ def create_alff(wf_name="alff_workflow"): wf.connect(input_node, "rest_res", bandpass, "in_file") get_option_string = pe.Node( - util.Function( + Function( input_names=["mask"], output_names=["option_string"], function=get_opt_string, From b013cccb49c05e4297a0b088727ebb41e99e8988 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 8 Jul 2024 13:54:48 -0400 Subject: [PATCH 11/93] :package: Init `Function` --- CPAC/utils/interfaces/__init__.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/CPAC/utils/interfaces/__init__.py b/CPAC/utils/interfaces/__init__.py index 126bb1c22b..6716a562f5 100644 --- a/CPAC/utils/interfaces/__init__.py +++ b/CPAC/utils/interfaces/__init__.py @@ -1,7 +1,27 @@ +# Copyright (C) 2010-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Custom interfaces for C-PAC.""" + from . import brickstat, datasink, function, pc +from .function import Function __all__ = [ "function", + "Function", "pc", "brickstat", "datasink", From c7819d1aefbb9ebd1b352c71b2f92cb980e907e1 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 8 Jul 2024 14:16:53 -0400 Subject: [PATCH 12/93] :art: Remove unnecessary initializations --- CPAC/utils/utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index 41937056c6..bfd313a56c 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -808,10 +808,7 @@ def get_scan_params( effective_echo_spacing https://bids-specification.readthedocs.io/en/stable/glossary.html#effectiveechospacing-metadata """ - # initialize vars to empty - tr = pattern = ref_slice = first_tr = last_tr = pe_direction = "" unit: Literal["ms", "s"] = "s" - effective_echo_spacing = template = None if isinstance(pipeconfig_stop_indx, str): if "End" in pipeconfig_stop_indx or "end" in pipeconfig_stop_indx: From f1943771af4bf1061f87dd79288fb464992e1b94 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 8 Jul 2024 16:07:15 -0400 Subject: [PATCH 13/93] :recycle: Exclusively use custom `Function` Nodes + :rotating_light: Lint --- CPAC/anat_preproc/anat_preproc.py | 30 +++--- CPAC/anat_preproc/lesion_preproc.py | 28 ++++-- CPAC/anat_preproc/utils.py | 98 ++++++------------- .../distortion_correction.py | 14 +-- CPAC/distortion_correction/utils.py | 29 +++++- CPAC/easy_thresh/easy_thresh.py | 47 +++++---- CPAC/func_preproc/func_motion.py | 2 +- CPAC/func_preproc/func_preproc.py | 5 +- CPAC/group_analysis/group_analysis.py | 33 +++++-- .../longitudinal_preproc.py | 4 +- CPAC/median_angle/median_angle.py | 35 +++++-- CPAC/nuisance/nuisance.py | 13 +-- CPAC/nuisance/utils/utils.py | 2 +- CPAC/randomise/randomise.py | 10 +- CPAC/registration/output_func_to_standard.py | 6 +- CPAC/registration/registration.py | 57 +++++------ CPAC/reho/reho.py | 19 +++- CPAC/sca/sca.py | 20 ++-- CPAC/scrubbing/scrubbing.py | 49 ++++++---- CPAC/seg_preproc/seg_preproc.py | 38 ++++--- CPAC/surface/surf_preproc.py | 39 +++++--- CPAC/timeseries/timeseries_analysis.py | 8 +- CPAC/utils/interfaces/function/seg_preproc.py | 23 ++++- CPAC/utils/tests/test_datasource.py | 20 +++- CPAC/utils/utils.py | 10 +- 25 files changed, 385 insertions(+), 254 deletions(-) diff --git a/CPAC/anat_preproc/anat_preproc.py b/CPAC/anat_preproc/anat_preproc.py index 8e24b54b81..0f4e770f97 100644 --- a/CPAC/anat_preproc/anat_preproc.py +++ b/CPAC/anat_preproc/anat_preproc.py @@ -15,7 +15,6 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -# from copy import deepcopy import os from nipype.interfaces import afni, ants, freesurfer, fsl @@ -36,6 +35,7 @@ ) from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.nodeblock import nodeblock +from CPAC.utils.interfaces import Function from CPAC.utils.interfaces.fsl import Merge as fslMerge @@ -138,7 +138,7 @@ def acpc_alignment( aff_to_rig_imports = ["import os", "from numpy import *"] aff_to_rig = pe.Node( - util.Function( + Function( input_names=["in_xfm", "out_name"], output_names=["out_mat"], function=fsl_aff_to_rigid, @@ -319,7 +319,7 @@ def T1wmulT2w_brain_norm_s_string(sigma, in_file): return "-s %f -div %s" % (sigma, in_file) T1wmulT2w_brain_norm_s_string = pe.Node( - util.Function( + Function( input_names=["sigma", "in_file"], output_names=["out_str"], function=T1wmulT2w_brain_norm_s_string, @@ -378,7 +378,7 @@ def form_lower_string(mean, std): return "-thr %s -bin -ero -mul 255" % (lower) form_lower_string = pe.Node( - util.Function( + Function( input_names=["mean", "std"], output_names=["out_str"], function=form_lower_string, @@ -444,7 +444,7 @@ def file_to_a_list(infile_1, infile_2): return [infile_1, infile_2] file_to_a_list = pe.Node( - util.Function( + Function( input_names=["infile_1", "infile_2"], output_names=["out_list"], function=file_to_a_list, @@ -544,7 +544,7 @@ def afni_brain_connector(wf, cfg, strat_pool, pipe_num, opt): ) skullstrip_args = pe.Node( - util.Function( + Function( input_names=[ "spat_norm", "spat_norm_dxyz", @@ -762,7 +762,7 @@ def fsl_brain_connector(wf, cfg, strat_pool, pipe_num, opt): anat_robustfov.inputs.output_type = "NIFTI_GZ" anat_pad_RobustFOV_cropped = pe.Node( - util.Function( + Function( input_names=["cropped_image_path", "target_image_path"], output_names=["padded_image_path"], function=pad, @@ -902,7 +902,7 @@ def unet_brain_connector(wf, cfg, strat_pool, pipe_num, opt): from CPAC.unet.function import predict_volumes unet_mask = pe.Node( - util.Function( + Function( input_names=["model_path", "cimg_in"], output_names=["out_path"], function=predict_volumes, @@ -1083,7 +1083,7 @@ def freesurfer_brain_connector(wf, cfg, strat_pool, pipe_num, opt): # convert brain mask file from .mgz to .nii.gz fs_brain_mask_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=mri_convert ), name=f"fs_brainmask_to_nifti_{pipe_num}", @@ -1119,7 +1119,7 @@ def freesurfer_abcd_brain_connector(wf, cfg, strat_pool, pipe_num, opt): Ref: https://github.com/DCAN-Labs/DCAN-HCP/blob/7927754/PostFreeSurfer/PostFreeSurferPipeline.sh#L151-L156 """ wmparc_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file", "reslice_like", "args"], output_names=["out_file"], function=mri_convert, @@ -1130,7 +1130,7 @@ def freesurfer_abcd_brain_connector(wf, cfg, strat_pool, pipe_num, opt): # Register wmparc file if ingressing FreeSurfer data if strat_pool.check_rpool("pipeline-fs_xfm"): wmparc_to_native = pe.Node( - util.Function( + Function( input_names=["source_file", "target_file", "xfm", "out_file"], output_names=["transformed_file"], function=normalize_wmparc, @@ -1168,7 +1168,7 @@ def freesurfer_abcd_brain_connector(wf, cfg, strat_pool, pipe_num, opt): wf.connect(wmparc_to_nifti, "out_file", binary_mask, "in_file") wb_command_fill_holes = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=wb_command ), name=f"wb_command_fill_holes_{pipe_num}", @@ -1206,7 +1206,7 @@ def freesurfer_fsl_brain_connector(wf, cfg, strat_pool, pipe_num, opt): # mri_convert -it mgz ${SUBJECTS_DIR}/${subject}/mri/brainmask.mgz -ot nii brainmask.nii.gz convert_fs_brainmask_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=mri_convert ), name=f"convert_fs_brainmask_to_nifti_{node_id}", @@ -1217,7 +1217,7 @@ def freesurfer_fsl_brain_connector(wf, cfg, strat_pool, pipe_num, opt): # mri_convert -it mgz ${SUBJECTS_DIR}/${subject}/mri/T1.mgz -ot nii T1.nii.gz convert_fs_T1_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=mri_convert ), name=f"convert_fs_T1_to_nifti_{node_id}", @@ -2888,7 +2888,7 @@ def freesurfer_abcd_preproc(wf, cfg, strat_pool, pipe_num, opt=None): # fslmaths "$T1wImageFile"_1mm.nii.gz -div $Mean -mul 150 -abs "$T1wImageFile"_1mm.nii.gz normalize_head = pe.Node( - util.Function( + Function( input_names=["in_file", "number", "out_file_suffix"], output_names=["out_file"], function=fslmaths_command, diff --git a/CPAC/anat_preproc/lesion_preproc.py b/CPAC/anat_preproc/lesion_preproc.py index 2ef58c3d2a..07871ae32d 100644 --- a/CPAC/anat_preproc/lesion_preproc.py +++ b/CPAC/anat_preproc/lesion_preproc.py @@ -1,13 +1,30 @@ # -*- coding: utf-8 -*- +# Copyright (C) 2019-2023 C-PAC Developers +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . from nipype.interfaces import afni import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def inverse_lesion(lesion_path): - """ + """Replace non-zeroes with zeroes and zeroes with ones. + Check if the image contains more zeros than non-zeros, if so, replaces non-zeros by zeros and zeros by ones. @@ -38,13 +55,12 @@ def inverse_lesion(lesion_path): nii = nu.inverse_nifti_values(image=lesion_path) nib.save(nii, lesion_out) return lesion_out - else: - return lesion_out + return lesion_out def create_lesion_preproc(wf_name="lesion_preproc"): - """ - The main purpose of this workflow is to process lesions masks. + """Process lesions masks. + Lesion mask file is deobliqued and reoriented in the same way as the T1 in the anat_preproc function. @@ -95,7 +111,7 @@ def create_lesion_preproc(wf_name="lesion_preproc"): lesion_deoblique.inputs.deoblique = True lesion_inverted = pe.Node( - interface=util.Function( + interface=Function( input_names=["lesion_path"], output_names=["lesion_out"], function=inverse_lesion, diff --git a/CPAC/anat_preproc/utils.py b/CPAC/anat_preproc/utils.py index b3246fc41a..39904bbb66 100644 --- a/CPAC/anat_preproc/utils.py +++ b/CPAC/anat_preproc/utils.py @@ -1,73 +1,34 @@ # -*- coding: utf-8 -*- -from numpy import zeros -from nibabel import load as nib_load, Nifti1Image -import nipype.interfaces.utility as util - -from CPAC.pipeline import nipype_pipeline_engine as pe - - -def get_shape(nifti_image): - return nib_load(nifti_image).shape - - -def pad(cropped_image_path, target_image_path): - """ - Pad a cropped image to match the dimensions of a target image along the z-axis, - while keeping padded image aligned with target_image. - - Parameters - ---------- - - cropped_image_path (str): The file path to the cropped image (NIfTI format). - - target_image_path (str): The file path to the target image (NIfTI format). - - Returns - ------- - - str: The file path to the saved padded image (NIfTI format). +# Copyright (C) 2018-2023 C-PAC Developers - The function loads cropped and target iamges, calculates the z-dimension shift required for alignment such - that the mask generated from padded image will work correctly on the target image. The result padded image is - saved as an NIfTI file in the working directory/node and file path is returned as output. +# This file is part of C-PAC. - Note: The function assumes that the input images are in NIfTI format and have compatible dimensions. The cropped - and target image should only differ in z-axis dimension. - """ - from os import getcwd, path - from typing import Optional +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. - from numpy import asanyarray, ndarray, zeros_like - from nibabel import load, Nifti1Image, save +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. - cropped_image: Optional[ndarray] = asanyarray(load(cropped_image_path).dataobj) - target_image: Optional[ndarray] = asanyarray(load(target_image_path).dataobj) +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +import os - # Taking 1 slice to calculate the z dimension shift from top - center_row: int = target_image.shape[0] // 2 - center_column: int = target_image.shape[1] // 2 - z_slice_cropped_image: Optional[ndarray] = cropped_image[ - center_row, center_column, : - ] - z_slice_target_image: Optional[ndarray] = target_image[center_row, center_column, :] - - for z_shift in range(len(z_slice_target_image) - len(z_slice_cropped_image) + 1): - if ( - z_slice_target_image[z_shift : z_shift + len(z_slice_cropped_image)] - == z_slice_cropped_image - ).all(): - break +from numpy import * +from nibabel import load as nib_load +from nipype.interfaces.base import CommandLineInputSpec, File, TraitedSpec +import nipype.interfaces.utility as util +from nipype.interfaces.workbench.base import WBCommand - padded_image_matrix: Optional[ndarray] = zeros_like(target_image) - padded_image_matrix[:, :, z_shift : cropped_image.shape[2] + z_shift] = ( - cropped_image - ) - padded_image_path: str = path.join(getcwd(), "padded_image_T1w.nii.gz") - cropped_image = load(cropped_image_path) - save( - Nifti1Image(padded_image_matrix, affine=cropped_image.affine), padded_image_path - ) - return padded_image_path +from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def get_shape(nifti_image): + """Return the shape of a NIfTI image.""" return nib_load(nifti_image).shape @@ -286,7 +247,7 @@ def split_hemi(multi_file): def split_hemi_interface() -> util.Function: """Return a function interface for split_hemi.""" - return util.Function( + return Function( input_names=["multi_file"], output_names=["lh", "rh"], function=split_hemi ) @@ -587,12 +548,9 @@ def normalize_wmparc(source_file, target_file, xfm, out_file): return os.path.join(os.getcwd(), out_file) -"""This module provides interfaces for workbench -volume-remove-islands commands""" -from nipype.interfaces.base import CommandLineInputSpec, File, TraitedSpec -from nipype.interfaces.workbench.base import WBCommand - - class VolumeRemoveIslandsInputSpec(CommandLineInputSpec): + """InputSpec for workbench -volume-remove-islands commands.""" + in_file = File( exists=True, mandatory=True, @@ -610,14 +568,14 @@ class VolumeRemoveIslandsInputSpec(CommandLineInputSpec): class VolumeRemoveIslandsOutputSpec(TraitedSpec): + """OutputSpec for workbench -volume-remove-islands commands.""" + out_file = File(exists=True, desc="the output ROI volume") class VolumeRemoveIslands(WBCommand): - """ - workbench - -volume-remove-islands - REMOVE ISLANDS FROM AN ROI VOLUME + """Remove islandes from an ROI volume. + wb_command -volume-remove-islands - the input ROI volume - output - the output ROI volume. diff --git a/CPAC/distortion_correction/distortion_correction.py b/CPAC/distortion_correction/distortion_correction.py index 79b8400bb1..91b379b0a7 100644 --- a/CPAC/distortion_correction/distortion_correction.py +++ b/CPAC/distortion_correction/distortion_correction.py @@ -131,7 +131,7 @@ def distcor_phasediff_fsl_fugue(wf, cfg, strat_pool, pipe_num, opt=None): == "AFNI" ): skullstrip_args = pe.Node( - util.Function( + Function( input_names=["shrink_fac"], output_names=["expr"], function=create_afni_arg, @@ -667,7 +667,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): "import sys", ] phase_encoding = pe.Node( - util.Function( + Function( input_names=[ "unwarp_dir", "phase_one", @@ -710,7 +710,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): topup_imports = ["import os", "import subprocess"] run_topup = pe.Node( - util.Function( + Function( input_names=["merged_file", "acqparams"], output_names=[ "out_fieldcoef", @@ -732,7 +732,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(phase_encoding, "acq_params", run_topup, "acqparams") choose_phase = pe.Node( - util.Function( + Function( input_names=["phase_imgs", "unwarp_dir"], output_names=["out_phase_image", "vnum"], function=choose_phase_image, @@ -746,7 +746,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(node, out, choose_phase, "unwarp_dir") vnum_base = pe.Node( - util.Function( + Function( input_names=[ "vnum", "motion_mat_list", @@ -797,7 +797,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): name = "PhaseTwo_aw" vnum_base_two = pe.Node( - util.Function( + Function( input_names=[ "vnum", "motion_mat_list", @@ -840,7 +840,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): name = "PhaseOne_aw" vnum_base_one = pe.Node( - util.Function( + Function( input_names=[ "vnum", "motion_mat_list", diff --git a/CPAC/distortion_correction/utils.py b/CPAC/distortion_correction/utils.py index 2b78dbfa4d..b76acba074 100644 --- a/CPAC/distortion_correction/utils.py +++ b/CPAC/distortion_correction/utils.py @@ -1,3 +1,19 @@ +# Copyright (C) 2021-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os import subprocess import sys @@ -12,6 +28,8 @@ import nipype.interfaces.utility as util from nipype.pipeline import engine as pe +from CPAC.utils.interfaces import Function + def run_HCP_gradient_unwarp(phase_vol, input_coeffs): import os @@ -49,7 +67,7 @@ def run_convertwarp(cw_trilinear, cw_fullWarp_abs): f"--warp1={cw_fullWarp_abs}", "--relout", f"--out={out_file}", - f"--j={jac_out}", + f"--j={out_jac}", ] subprocess.check_output(cmd) @@ -64,7 +82,7 @@ def gradient_distortion_correction(wf, inp_image, name): grad_unwarp_imports = ["import os", "import subprocess"] grad_unwarp = pe.Node( - util.Function( + Function( input_names=["phase_vol", "input_coeffs"], output_names=["trilinear", "abs_fullWarp"], function=run_HCP_gradient_unwarp, @@ -78,7 +96,7 @@ def gradient_distortion_correction(wf, inp_image, name): convertwarp_imports = ["import os", "import subprocess"] convert_warp = pe.Node( - util.Function( + Function( input_names=["cw_trilinear", "cw_fullWarp_abs"], output_names=["out_file_cw", "out_jac_cw"], function=run_convertwarp, @@ -248,8 +266,9 @@ def phase_encode( def z_pad(name="z_pad"): - """Pad in Z by one slice if odd so that topup does not complain - (slice consists of zeros that will be dilated by following step). + """Pad in Z by one slice if odd so that topup does not complain. + + (Slice consists of zeros that will be dilated by following step). """ wf = pe.Workflow(name=name) diff --git a/CPAC/easy_thresh/easy_thresh.py b/CPAC/easy_thresh/easy_thresh.py index d514d51c54..20918c08a9 100644 --- a/CPAC/easy_thresh/easy_thresh.py +++ b/CPAC/easy_thresh/easy_thresh.py @@ -1,3 +1,19 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os import re import subprocess @@ -7,12 +23,11 @@ import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def easy_thresh(wf_name): - """ - Workflow for carrying out cluster-based thresholding - and colour activation overlaying. + """Carry out cluster-based thresholding and colour activation overlaying. Parameters ---------- @@ -213,7 +228,7 @@ def easy_thresh(wf_name): # or qform/sform info) from one image to another geo_imports = ["import subprocess"] copy_geometry = pe.MapNode( - util.Function( + Function( input_names=["infile_a", "infile_b"], output_names=["out_file"], function=copy_geom, @@ -246,7 +261,7 @@ def easy_thresh(wf_name): cluster_imports = ["import os", "import re", "import subprocess"] cluster = pe.MapNode( - util.Function( + Function( input_names=[ "in_file", "volume", @@ -271,7 +286,7 @@ def easy_thresh(wf_name): # create tuple of z_threshold and max intensity value of threshold file create_tuple = pe.MapNode( - util.Function( + Function( input_names=["infile_a", "infile_b"], output_names=["out_file"], function=get_tuple, @@ -299,7 +314,7 @@ def easy_thresh(wf_name): # as FSLDIR,MNI and voxel size get_bg_imports = ["import os", "import nibabel as nib"] get_backgroundimage = pe.MapNode( - util.Function( + Function( input_names=["in_file", "file_parameters"], output_names=["out_file"], function=get_standard_background_img, @@ -312,7 +327,7 @@ def easy_thresh(wf_name): # function node to get the standard fsl brain image # outputs single file get_backgroundimage2 = pe.Node( - util.Function( + Function( input_names=["in_file", "file_parameters"], output_names=["out_file"], function=get_standard_background_img, @@ -412,10 +427,9 @@ def call_cluster(in_file, volume, dlh, threshold, pthreshold, parameters): def copy_geom(infile_a, infile_b): - """ - Method to call fsl fslcpgeom command to copy - certain parts of the header information (image dimensions, - voxel dimensions, voxel dimensions units string, image + """Call fsl fslcpgeom command to copy certain parts of the header information. + + Copy (image dimensions, voxel dimensions, voxel dimensions units string, image orientation/origin or qform/sform info) from one image to another. Parameters @@ -449,9 +463,7 @@ def copy_geom(infile_a, infile_b): def get_standard_background_img(in_file, file_parameters): - """ - Method to get the standard brain image from FSL - standard data directory. + """Get the standard brain image from FSL standard data directory. Parameters ---------- @@ -487,10 +499,7 @@ def get_standard_background_img(in_file, file_parameters): def get_tuple(infile_a, infile_b): - """ - Simple method to return tuple of z_threhsold - maximum intensity values of Zstatistic image - for input to the overlay. + """Return tuple of z_threhsold maximum intensity values of Zstatistic image for input to the overlay. Parameters ---------- diff --git a/CPAC/func_preproc/func_motion.py b/CPAC/func_preproc/func_motion.py index 21fdb86a50..bea7d2e29c 100644 --- a/CPAC/func_preproc/func_motion.py +++ b/CPAC/func_preproc/func_motion.py @@ -423,7 +423,7 @@ def get_motion_ref(wf, cfg, strat_pool, pipe_num, opt=None): elif opt == "fmriprep_reference": func_get_RPI = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=estimate_reference_image, diff --git a/CPAC/func_preproc/func_preproc.py b/CPAC/func_preproc/func_preproc.py index 4d0fe73c9e..7004b4f025 100644 --- a/CPAC/func_preproc/func_preproc.py +++ b/CPAC/func_preproc/func_preproc.py @@ -23,6 +23,7 @@ from CPAC.func_preproc.utils import nullify from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.nodeblock import nodeblock +from CPAC.utils.interfaces import Function from CPAC.utils.interfaces.ants import ( AI, # niworkflows PrintHeader, @@ -343,7 +344,7 @@ def create_wf_edit_func(wf_name="edit_func"): # allocate a node to check that the requested edits are # reasonable given the data func_get_idx = pe.Node( - util.Function( + Function( input_names=["in_files", "stop_idx", "start_idx"], output_names=["stopidx", "startidx"], function=get_idx, @@ -877,7 +878,7 @@ def form_thr_string(thr): return "-thr %s" % (threshold_z) form_thr_string = pe.Node( - util.Function( + Function( input_names=["thr"], output_names=["out_str"], function=form_thr_string, diff --git a/CPAC/group_analysis/group_analysis.py b/CPAC/group_analysis/group_analysis.py index d3e78c4698..6da81ff37e 100644 --- a/CPAC/group_analysis/group_analysis.py +++ b/CPAC/group_analysis/group_analysis.py @@ -1,14 +1,29 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . from nipype.interfaces import fsl import nipype.interfaces.utility as util from CPAC.easy_thresh import easy_thresh from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def get_operation(in_file): - """ - Method to create operation string - for fslmaths. + """Create operation string for fslmaths. Parameters ---------- @@ -39,7 +54,9 @@ def get_operation(in_file): def label_zstat_files(zstat_list, con_file): - """Take in the z-stat file outputs of FSL FLAME and rename them after the + """Rename z-stat file outputs from FSL FLAME using contrast labels. + + Take in the z-stat file outputs of FSL FLAME and rename them after the contrast labels of the contrasts provided. """ cons = [] @@ -64,9 +81,7 @@ def label_zstat_files(zstat_list, con_file): def create_fsl_flame_wf(ftest=False, wf_name="groupAnalysis"): - """ - FSL `FEAT `_ - BASED Group Analysis. + """Run FSL `FEAT `_ BASED Group Analysis. Parameters ---------- @@ -313,7 +328,7 @@ def create_fsl_flame_wf(ftest=False, wf_name="groupAnalysis"): # easier interpretation label_zstat_imports = ["import os"] label_zstat = pe.Node( - util.Function( + Function( input_names=["zstat_list", "con_file"], output_names=["new_zstat_list"], function=label_zstat_files, @@ -341,7 +356,7 @@ def create_fsl_flame_wf(ftest=False, wf_name="groupAnalysis"): # function node to get the operation string for fslmaths command get_opstring = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=get_operation ), name="get_opstring", diff --git a/CPAC/longitudinal_pipeline/longitudinal_preproc.py b/CPAC/longitudinal_pipeline/longitudinal_preproc.py index dfead14d59..9fbe31c6b5 100644 --- a/CPAC/longitudinal_pipeline/longitudinal_preproc.py +++ b/CPAC/longitudinal_pipeline/longitudinal_preproc.py @@ -24,9 +24,9 @@ import numpy as np import nibabel as nib from nipype.interfaces import fsl -import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function from CPAC.utils.monitoring import IFLOGGER from CPAC.utils.nifti_utils import nifti_image_input @@ -617,7 +617,7 @@ def subject_specific_template( ] if method == "flirt": template_gen_node = pe.Node( - util.Function( + Function( input_names=[ "input_brain_list", "input_skull_list", diff --git a/CPAC/median_angle/median_angle.py b/CPAC/median_angle/median_angle.py index 1433df8ac8..de4fd683cb 100644 --- a/CPAC/median_angle/median_angle.py +++ b/CPAC/median_angle/median_angle.py @@ -1,12 +1,29 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def median_angle_correct(target_angle_deg, realigned_file): - """ - Performs median angle correction on fMRI data. Median angle correction algorithm - based on [1]_. + """Perform median angle correction on fMRI data. + + Median angle correction algorithm based on [1]_. Parameters ---------- @@ -89,8 +106,7 @@ def writeToFile(data, nii, fname): def calc_median_angle_params(subject): - """ - Calculates median angle parameters of a subject. + """Calculate median angle parameters of a subject. Parameters ---------- @@ -133,8 +149,7 @@ def calc_median_angle_params(subject): def calc_target_angle(mean_bolds, median_angles): """ - Calculates a target angle based on median angle parameters of - the group. + Calculate a target angle based on median angle parameters of the group. Parameters ---------- @@ -229,7 +244,7 @@ def create_median_angle_correction(name="median_angle_correction"): ) mac = pe.Node( - util.Function( + Function( input_names=["target_angle_deg", "realigned_file"], output_names=["corrected_file", "angles_file"], function=median_angle_correct, @@ -305,7 +320,7 @@ def create_target_angle(name="target_angle"): ) cmap = pe.MapNode( - util.Function( + Function( input_names=["subject"], output_names=["mean_bold", "median_angle"], function=calc_median_angle_params, @@ -315,7 +330,7 @@ def create_target_angle(name="target_angle"): ) cta = pe.Node( - util.Function( + Function( input_names=["mean_bolds", "median_angles"], output_names=["target_angle"], function=calc_target_angle, diff --git a/CPAC/nuisance/nuisance.py b/CPAC/nuisance/nuisance.py index c547ff6b01..45337a0c23 100644 --- a/CPAC/nuisance/nuisance.py +++ b/CPAC/nuisance/nuisance.py @@ -125,7 +125,7 @@ def form_mask_erosion_prop(erosion_prop): ] eroded_mask = pe.Node( - util.Function( + Function( input_names=[ "roi_mask", "skullstrip_mask", @@ -156,7 +156,7 @@ def form_mask_erosion_prop(erosion_prop): wf.connect(eroded_mask, "output_roi_mask", outputspec, "eroded_mask") if segmentmap: erosion_segmentmap = pe.Node( - util.Function( + Function( input_names=["roi_mask", "erosion_mm", "erosion_prop"], output_names=["eroded_roi_mask"], function=erosion, @@ -1357,7 +1357,7 @@ def create_regressor_workflow( ] cosfilter_node = pe.Node( - util.Function( + Function( input_names=["input_image_path", "timestep"], output_names=["cosfiltered_img"], function=cosine_filter, @@ -1374,7 +1374,7 @@ def create_regressor_workflow( "input_image_path", ) tr_string2float_node = pe.Node( - util.Function( + Function( input_names=["tr"], output_names=["tr_float"], function=TR_string_to_float, @@ -1887,7 +1887,7 @@ def filtering_bold_and_regressors( bandpass_ts.inputs.outputtype = "NIFTI_GZ" tr_string2float_node = pe.Node( - util.Function( + Function( input_names=["tr"], output_names=["tr_float"], function=TR_string_to_float, @@ -2418,7 +2418,8 @@ def nuisance_regressors_generation( opt: dict, space: Literal["T1w", "bold"], ) -> tuple[Workflow, dict]: - """ + """Generate nuisance regressors. + Parameters ---------- wf : ~nipype.pipeline.engine.workflows.Workflow diff --git a/CPAC/nuisance/utils/utils.py b/CPAC/nuisance/utils/utils.py index 92499523a8..db6667dcb3 100644 --- a/CPAC/nuisance/utils/utils.py +++ b/CPAC/nuisance/utils/utils.py @@ -499,7 +499,7 @@ def generate_summarize_tissue_mask_ventricles_masking( # generate inverse transform flags, which depends on the number of transforms inverse_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, diff --git a/CPAC/randomise/randomise.py b/CPAC/randomise/randomise.py index 8c2351c9f0..b3144685aa 100644 --- a/CPAC/randomise/randomise.py +++ b/CPAC/randomise/randomise.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function from CPAC.utils.monitoring import IFLOGGER @@ -53,7 +54,6 @@ def prep_randomise_workflow( ): from nipype.interfaces import fsl import nipype.interfaces.io as nio - import nipype.interfaces.utility as util wf = pe.Workflow(name="randomise_workflow") wf.base_dir = c.work_dir @@ -74,7 +74,7 @@ def prep_randomise_workflow( randomise.inputs.fcon = fts_file select_tcorrp_files = pe.Node( - util.Function( + Function( input_names=["input_list"], output_names=["out_file"], function=select ), name="select_t_corrp", @@ -83,7 +83,7 @@ def prep_randomise_workflow( wf.connect(randomise, "t_corrected_p_files", select_tcorrp_files, "input_list") select_tstat_files = pe.Node( - util.Function( + Function( input_names=["input_list"], output_names=["out_file"], function=select ), name="select_t_stat", @@ -147,6 +147,10 @@ def run(group_config_path): import os from CPAC.pipeline.cpac_group_runner import load_config_yml + from CPAC.pipeline.cpac_randomise_pipeline import ( + randomise_merged_file, + randomise_merged_mask, + ) group_config_obj = load_config_yml(group_config_path) pipeline_output_folder = group_config_obj.pipeline_dir diff --git a/CPAC/registration/output_func_to_standard.py b/CPAC/registration/output_func_to_standard.py index 6cf172f76d..bafea7d8d0 100644 --- a/CPAC/registration/output_func_to_standard.py +++ b/CPAC/registration/output_func_to_standard.py @@ -374,7 +374,7 @@ def ants_apply_warps_func_mni( itk_imports = ["import os"] change_transform = pe.Node( - util.Function( + Function( input_names=["input_affine_file"], output_names=["updated_affine_file"], function=change_itk_transform_type, @@ -534,7 +534,7 @@ def ants_apply_warps_func_mni( # check transform list (if missing any init/rig/affine) and exclude Nonetype check_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -546,7 +546,7 @@ def ants_apply_warps_func_mni( # generate inverse transform flags, which depends on the number of transforms inverse_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, diff --git a/CPAC/registration/registration.py b/CPAC/registration/registration.py index b6cc9892ea..da63e694e4 100644 --- a/CPAC/registration/registration.py +++ b/CPAC/registration/registration.py @@ -39,6 +39,7 @@ seperate_warps_list, single_ants_xfm_to_list, ) +from CPAC.utils.interfaces import Function from CPAC.utils.interfaces.fsl import Merge as fslMerge from CPAC.utils.utils import check_prov_for_motion_tool, check_prov_for_regtool @@ -104,7 +105,7 @@ def apply_transform( wf.connect(inputNode, "reference", apply_warp, "reference_image") interp_string = pe.Node( - util.Function( + Function( input_names=["interpolation", "reg_tool"], output_names=["interpolation"], function=interpolation_string, @@ -118,7 +119,7 @@ def apply_transform( wf.connect(interp_string, "interpolation", apply_warp, "interpolation") ants_xfm_list = pe.Node( - util.Function( + Function( input_names=["transform"], output_names=["transform_list"], function=single_ants_xfm_to_list, @@ -135,7 +136,7 @@ def apply_transform( if int(num_cpus) > 1 and time_series: chunk_imports = ["import nibabel as nib"] chunk = pe.Node( - util.Function( + Function( input_names=["func_file", "n_chunks", "chunk_size"], output_names=["TR_ranges"], function=chunk_ts, @@ -154,7 +155,7 @@ def apply_transform( split_imports = ["import os", "import subprocess"] split = pe.Node( - util.Function( + Function( input_names=["func_file", "tr_ranges"], output_names=["split_funcs"], function=split_ts_chunks, @@ -196,7 +197,7 @@ def apply_transform( ) interp_string = pe.Node( - util.Function( + Function( input_names=["interpolation", "reg_tool"], output_names=["interpolation"], function=interpolation_string, @@ -222,7 +223,7 @@ def apply_transform( if int(num_cpus) > 1 and time_series: chunk_imports = ["import nibabel as nib"] chunk = pe.Node( - util.Function( + Function( input_names=["func_file", "n_chunks", "chunk_size"], output_names=["TR_ranges"], function=chunk_ts, @@ -241,7 +242,7 @@ def apply_transform( split_imports = ["import os", "import subprocess"] split = pe.Node( - util.Function( + Function( input_names=["func_file", "tr_ranges"], output_names=["split_funcs"], function=split_ts_chunks, @@ -761,7 +762,7 @@ def create_register_func_to_anat( if phase_diff_distcor: conv_pedir = pe.Node( - interface=util.Function( + interface=Function( input_names=["pedir", "convert"], output_names=["pedir"], function=convert_pedir, @@ -1067,7 +1068,7 @@ def bbreg_args(bbreg_target): if phase_diff_distcor: conv_pedir = pe.Node( - interface=util.Function( + interface=Function( input_names=["pedir", "convert"], output_names=["pedir"], function=convert_pedir, @@ -1276,7 +1277,7 @@ def create_wf_calculate_ants_warp( """ reg_imports = ["import os", "import subprocess"] calculate_ants_warp = pe.Node( - interface=util.Function( + interface=Function( input_names=[ "moving_brain", "reference_brain", @@ -1302,7 +1303,7 @@ def create_wf_calculate_ants_warp( calculate_ants_warp.interface.num_threads = num_threads select_forward_initial = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1313,7 +1314,7 @@ def create_wf_calculate_ants_warp( select_forward_initial.inputs.selection = "Initial" select_forward_rigid = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1324,7 +1325,7 @@ def create_wf_calculate_ants_warp( select_forward_rigid.inputs.selection = "Rigid" select_forward_affine = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1335,7 +1336,7 @@ def create_wf_calculate_ants_warp( select_forward_affine.inputs.selection = "Affine" select_forward_warp = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1346,7 +1347,7 @@ def create_wf_calculate_ants_warp( select_forward_warp.inputs.selection = "Warp" select_inverse_warp = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1788,7 +1789,7 @@ def ANTs_registration_connector( # check transform list to exclude Nonetype (missing) init/rig/affine check_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -1851,7 +1852,7 @@ def ANTs_registration_connector( # check transform list to exclude Nonetype (missing) init/rig/affine check_invlinear_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -1873,7 +1874,7 @@ def ANTs_registration_connector( # generate inverse transform flags, which depends on the # number of transforms inverse_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, @@ -1935,7 +1936,7 @@ def ANTs_registration_connector( # check transform list to exclude Nonetype (missing) init/rig/affine check_all_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -2004,7 +2005,7 @@ def ANTs_registration_connector( # check transform list to exclude Nonetype (missing) init/rig/affine check_all_inv_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -2026,7 +2027,7 @@ def ANTs_registration_connector( # generate inverse transform flags, which depends on the # number of transforms inverse_all_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, @@ -2122,7 +2123,7 @@ def bold_to_T1template_xfm_connector( itk_imports = ["import os"] change_transform = pe.Node( - util.Function( + Function( input_names=["input_affine_file"], output_names=["updated_affine_file"], function=change_itk_transform_type, @@ -2964,7 +2965,7 @@ def overwrite_transform_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None # c4d -mcs ${WD}/xfms/ANTs_CombinedWarp.nii.gz -oo ${WD}/xfms/e1.nii.gz ${WD}/xfms/e2.nii.gz ${WD}/xfms/e3.nii.gz # -mcs: -multicomponent-split, -oo: -output-multiple split_combined_warp = pe.Node( - util.Function( + Function( input_names=["input_name", "output_name"], output_names=["output1", "output2", "output3"], function=run_c4d, @@ -2982,7 +2983,7 @@ def overwrite_transform_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None # c4d -mcs ${WD}/xfms/ANTs_CombinedInvWarp.nii.gz -oo ${WD}/xfms/e1inv.nii.gz ${WD}/xfms/e2inv.nii.gz ${WD}/xfms/e3inv.nii.gz split_combined_inv_warp = pe.Node( - util.Function( + Function( input_names=["input_name", "output_name"], output_names=["output1", "output2", "output3"], function=run_c4d, @@ -3678,7 +3679,7 @@ def apply_phasediff_to_timeseries_separately(wf, cfg, strat_pool, pipe_num, opt= wf.connect(warp_fmap, "out_file", mask_fmap, "in_file") conv_pedir = pe.Node( - interface=util.Function( + interface=Function( input_names=["pedir", "convert"], output_names=["pedir"], function=convert_pedir, @@ -4819,7 +4820,7 @@ def single_step_resample_timeseries_to_T1template( reg_tool = check_prov_for_regtool(xfm_prov) bbr2itk = pe.Node( - util.Function( + Function( input_names=["reference_file", "source_file", "transform_file"], output_names=["itk_transform"], function=run_c3d, @@ -4860,7 +4861,7 @@ def single_step_resample_timeseries_to_T1template( ### Loop starts! ### motionxfm2itk = pe.MapNode( - util.Function( + Function( input_names=["reference_file", "source_file", "transform_file"], output_names=["itk_transform"], function=run_c3d, @@ -4881,7 +4882,7 @@ def single_step_resample_timeseries_to_T1template( wf.connect(node, out, motionxfm2itk, "transform_file") elif motion_correct_tool == "3dvolreg": convert_transform = pe.Node( - util.Function( + Function( input_names=["one_d_filename"], output_names=["transform_directory"], function=one_d_to_mat, diff --git a/CPAC/reho/reho.py b/CPAC/reho/reho.py index 80e6599d10..870d3fa36d 100644 --- a/CPAC/reho/reho.py +++ b/CPAC/reho/reho.py @@ -1,9 +1,26 @@ # coding: utf-8 +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.nodeblock import nodeblock from CPAC.reho.utils import * +from CPAC.utils.interfaces import Function def create_reho(wf_name): @@ -99,7 +116,7 @@ def create_reho(wf_name): "from CPAC.reho.utils import f_kendall", ] raw_reho_map = pe.Node( - util.Function( + Function( input_names=["in_file", "mask_file", "cluster_size"], output_names=["out_file"], function=compute_reho, diff --git a/CPAC/sca/sca.py b/CPAC/sca/sca.py index 8e714dbd5f..d12aae7de9 100644 --- a/CPAC/sca/sca.py +++ b/CPAC/sca/sca.py @@ -30,11 +30,15 @@ create_spatial_map_dataflow, resample_func_roi, ) +from CPAC.utils.interfaces import Function def create_sca(name_sca="sca"): """ - Map of the correlations of the Region of Interest(Seed in native or MNI space) with the rest of brain voxels. + Create map of the correlations of the Region of Interest with the rest of brain voxels. + + (Seed in native or MNI space) + The map is normalized to contain Z-scores, mapped in standard space and treated with spatial smoothing. Parameters @@ -150,8 +154,8 @@ def create_sca(name_sca="sca"): def create_temporal_reg(wflow_name="temporal_reg", which="SR"): - r""" - Temporal multiple regression workflow + r"""Create temporal multiple regression workflow. + Provides a spatial map of parameter estimates corresponding to each provided timeseries in a timeseries.txt file as regressors. @@ -280,9 +284,7 @@ def create_temporal_reg(wflow_name="temporal_reg", which="SR"): ) check_timeseries = pe.Node( - util.Function( - input_names=["in_file"], output_names=["out_file"], function=check_ts - ), + Function(input_names=["in_file"], output_names=["out_file"], function=check_ts), name="check_timeseries", ) @@ -325,7 +327,7 @@ def create_temporal_reg(wflow_name="temporal_reg", which="SR"): map_roi_imports = ['import os', 'import numpy as np'] # get roi order and send to output node for raw outputs - get_roi_order = pe.Node(util.Function(input_names=['maps', + get_roi_order = pe.Node(Function(input_names=['maps', 'timeseries'], output_names=['labels', 'maps'], @@ -350,7 +352,7 @@ def create_temporal_reg(wflow_name="temporal_reg", which="SR"): outputNode, 'temp_reg_map_files') # get roi order and send to output node for z-stat outputs - get_roi_order_zstat = pe.Node(util.Function(input_names=['maps', + get_roi_order_zstat = pe.Node(Function(input_names=['maps', 'timeseries'], output_names=['labels', 'maps'], @@ -396,7 +398,7 @@ def SCA_AVG(wf, cfg, strat_pool, pipe_num, opt=None): # same workflow, except to run TSE and send it to the resource # pool so that it will not get sent to SCA resample_functional_roi_for_sca = pe.Node( - util.Function( + Function( input_names=["in_func", "in_roi", "realignment", "identity_matrix"], output_names=["out_func", "out_roi"], function=resample_func_roi, diff --git a/CPAC/scrubbing/scrubbing.py b/CPAC/scrubbing/scrubbing.py index ed85ef1024..e08b816edc 100644 --- a/CPAC/scrubbing/scrubbing.py +++ b/CPAC/scrubbing/scrubbing.py @@ -1,13 +1,29 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def create_scrubbing_preproc(wf_name="scrubbing"): - """ - This workflow essentially takes the list of offending timepoints that are to be removed - and removes it from the motion corrected input image. Also, it removes the information - of discarded time points from the movement parameters file obtained during motion correction. + """Take the list of offending timepoints that are to be removed and remove it from the motion corrected input image. + + Also remove the information of discarded time points from the movement parameters file obtained during motion correction. Parameters ---------- @@ -94,7 +110,7 @@ def create_scrubbing_preproc(wf_name="scrubbing"): ) craft_scrub_input = pe.Node( - util.Function( + Function( input_names=["scrub_input", "frames_in_1D_file"], output_names=["scrub_input_string"], function=get_indx, @@ -103,7 +119,7 @@ def create_scrubbing_preproc(wf_name="scrubbing"): ) scrubbed_movement_parameters = pe.Node( - util.Function( + Function( input_names=["infile_a", "infile_b"], output_names=["out_file"], function=get_mov_parameters, @@ -120,7 +136,7 @@ def create_scrubbing_preproc(wf_name="scrubbing"): # scrubbed_preprocessed.inputs.outputtype = 'NIFTI_GZ' scrubbed_preprocessed = pe.Node( - util.Function( + Function( input_names=["scrub_input"], output_names=["scrubbed_image"], function=scrub_image, @@ -152,9 +168,8 @@ def create_scrubbing_preproc(wf_name="scrubbing"): def get_mov_parameters(infile_a, infile_b): - """ - Method to get the new movement parameters - file after removing the offending time frames + """Get the new movement parameters file after removing the offending time frames. + (i.e., those exceeding FD 0.5mm/0.2mm threshold). Parameters @@ -192,7 +207,7 @@ def get_mov_parameters(infile_a, infile_b): raise Exception(msg) f = open(out_file, "a") - for l in l1: + for l in l1: # noqa: E741 data = l2[int(l.strip())] f.write(data) f.close() @@ -200,9 +215,7 @@ def get_mov_parameters(infile_a, infile_b): def get_indx(scrub_input, frames_in_1D_file): - """ - Method to get the list of time - frames that are to be included. + """Get the list of time frames that are to be included. Parameters ---------- @@ -230,10 +243,10 @@ def get_indx(scrub_input, frames_in_1D_file): def scrub_image(scrub_input): - """ - Method to run 3dcalc in order to scrub the image. This is used instead of - the Nipype interface for 3dcalc because functionality is needed for - specifying an input file with specifically-selected volumes. For example: + """Run 3dcalc in order to scrub the image. + + This is used instead of the Nipype interface for 3dcalc because functionality is + needed for specifying an input file with specifically-selected volumes. For example: input.nii.gz[2,3,4,..98], etc. Parameters diff --git a/CPAC/seg_preproc/seg_preproc.py b/CPAC/seg_preproc/seg_preproc.py index a66990f1e6..f769cf14b3 100644 --- a/CPAC/seg_preproc/seg_preproc.py +++ b/CPAC/seg_preproc/seg_preproc.py @@ -1,3 +1,19 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . from nipype.interfaces import ants, freesurfer, fsl, utility as util from nipype.interfaces.utility import Function @@ -23,10 +39,10 @@ def process_segment_map(wf_name, use_priors, use_custom_threshold, reg_tool): - """This is a sub workflow used inside segmentation workflow to process - probability maps obtained in segmentation. Steps include overlapping - of the prior tissue with probability maps, thresholding and binarizing - it and creating a mask that is used in further analysis. + """Create a sub workflow used inside segmentation workflow to process probability maps obtained in segmentation. + + Steps include overlapping of the prior tissue with probability maps, thresholding + and binarizing it and creating a mask that is used in further analysis. Parameters ---------- @@ -274,7 +290,7 @@ def tissue_mask_template_to_t1(wf_name, use_ants): # check transform list to exclude Nonetype (missing) init/rig/affine check_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -289,7 +305,7 @@ def tissue_mask_template_to_t1(wf_name, use_ants): # generate inverse transform flags, which depends on the # number of transforms inverse_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, @@ -356,9 +372,7 @@ def tissue_mask_template_to_t1(wf_name, use_ants): def create_seg_preproc_antsJointLabel_method(wf_name="seg_preproc_templated_based"): - """ - Generate the subject's cerebral spinal fluids, - white matter and gray matter mask based on provided template, if selected to do so. + """Generate the subject's cerebral spinal fluids, white matter and gray matter mask based on provided template, if selected to do so. Parameters ---------- @@ -417,7 +431,7 @@ def create_seg_preproc_antsJointLabel_method(wf_name="seg_preproc_templated_base ) seg_preproc_antsJointLabel = pe.Node( - util.Function( + Function( input_names=[ "anatomical_brain", "anatomical_brain_mask", @@ -700,7 +714,7 @@ def tissue_seg_fsl_fast(wf, cfg, strat_pool, pipe_num, opt=None): ) get_csf = pe.Node( - util.Function( + Function( input_names=["probability_maps"], output_names=["filename"], function=pick_wm_prob_0, @@ -945,7 +959,7 @@ def tissue_seg_freesurfer(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(node, out, fs_aseg_to_native, "target_file") fs_aseg_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=mri_convert ), name=f"fs_aseg_to_nifti_{pipe_num}", diff --git a/CPAC/surface/surf_preproc.py b/CPAC/surface/surf_preproc.py index 2229e24b5a..1defe4e2d1 100644 --- a/CPAC/surface/surf_preproc.py +++ b/CPAC/surface/surf_preproc.py @@ -1,10 +1,25 @@ -import os +# Copyright (C) 2021-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. -import nipype.interfaces.utility as util +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +import os from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.nodeblock import nodeblock from CPAC.surface.PostFreeSurfer.surf_reho import run_surf_reho +from CPAC.utils.interfaces import Function def run_surface( @@ -1026,7 +1041,7 @@ def run_surface( ) def surface_postproc(wf, cfg, strat_pool, pipe_num, opt=None): surf = pe.Node( - util.Function( + Function( input_names=[ "post_freesurfer_folder", "freesurfer_folder", @@ -1369,7 +1384,7 @@ def surface_postproc(wf, cfg, strat_pool, pipe_num, opt=None): ) def surface_falff(wf, cfg, strat_pool, pipe_num, opt): falff = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries"], output_names=["surf_falff"], function=run_surf_falff, @@ -1394,7 +1409,7 @@ def surface_falff(wf, cfg, strat_pool, pipe_num, opt): ) def surface_alff(wf, cfg, strat_pool, pipe_num, opt): alff = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries"], output_names=["surf_alff"], function=run_surf_alff, @@ -1427,7 +1442,7 @@ def surface_alff(wf, cfg, strat_pool, pipe_num, opt): ) def surface_reho(wf, cfg, strat_pool, pipe_num, opt): L_cortex_file = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries", "structure", "cortex_filename"], output_names=["L_cortex_file"], function=run_get_cortex, @@ -1442,7 +1457,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): wf.connect(node, out, L_cortex_file, "dtseries") R_cortex_file = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries", "structure", "cortex_filename"], output_names=["R_cortex_file"], function=run_get_cortex, @@ -1456,7 +1471,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): wf.connect(node, out, R_cortex_file, "dtseries") mean_timeseries = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries"], output_names=["mean_timeseries"], function=run_mean_timeseries, @@ -1468,7 +1483,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): wf.connect(node, out, mean_timeseries, "dtseries") L_reho = pe.Node( - util.Function( + Function( input_names=[ "subject", "dtseries", @@ -1499,7 +1514,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): wf.connect(node, out, L_reho, "dtseries") R_reho = pe.Node( - util.Function( + Function( input_names=[ "subject", "dtseries", @@ -1545,7 +1560,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): ) def surface_connectivity_matrix(wf, cfg, strat_pool, pipe_num, opt): connectivity_parcellation = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries", "surf_atlaslabel"], output_names=["parcellation_file"], function=run_ciftiparcellate, @@ -1561,7 +1576,7 @@ def surface_connectivity_matrix(wf, cfg, strat_pool, pipe_num, opt): ]["surface_parcellation_template"] correlation_matrix = pe.Node( - util.Function( + Function( input_names=["subject", "ptseries"], output_names=["correlation_matrix"], function=run_cifticorrelation, diff --git a/CPAC/timeseries/timeseries_analysis.py b/CPAC/timeseries/timeseries_analysis.py index 14547bc79b..a56bc33c74 100644 --- a/CPAC/timeseries/timeseries_analysis.py +++ b/CPAC/timeseries/timeseries_analysis.py @@ -15,7 +15,6 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . from nipype.interfaces import afni, fsl, utility as util -from nipype.interfaces.utility import Function from CPAC.connectome.connectivity_matrix import ( create_connectome_afni, @@ -29,6 +28,7 @@ create_spatial_map_dataflow, resample_func_roi, ) +from CPAC.utils.interfaces import Function from CPAC.utils.monitoring import FMLOGGER @@ -86,7 +86,7 @@ def get_voxel_timeseries(wf_name: str = "voxel_timeseries") -> pe.Workflow: ) timeseries_voxel = pe.Node( - util.Function( + Function( input_names=["data_file", "template"], output_names=["oneD_file"], function=gen_voxel_timeseries, @@ -241,7 +241,7 @@ def get_roi_timeseries(wf_name: str = "roi_timeseries") -> pe.Workflow: clean_csv_imports = ["import os"] clean_csv = pe.Node( - util.Function( + Function( input_names=["roi_csv"], output_names=["roi_array", "edited_roi_csv"], function=clean_roi_csv, @@ -382,7 +382,7 @@ def get_vertices_timeseries(wf_name="vertices_timeseries"): ) timeseries_surface = pe.Node( - util.Function( + Function( input_names=["rh_surface_file", "lh_surface_file"], output_names=["out_file"], function=gen_vertices_timeseries, diff --git a/CPAC/utils/interfaces/function/seg_preproc.py b/CPAC/utils/interfaces/function/seg_preproc.py index d220781f48..5fe9152b23 100644 --- a/CPAC/utils/interfaces/function/seg_preproc.py +++ b/CPAC/utils/interfaces/function/seg_preproc.py @@ -1,11 +1,26 @@ +# Copyright (C) 2022-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . """Function interfaces for seg_preproc.""" -from nipype.interfaces import utility as util +from CPAC.utils.interfaces import Function def pick_tissue_from_labels_file_interface(input_names=None): - """Function to create a Function interface for - CPAC.seg_preproc.utils.pick_tissue_from_labels_file. + """Create a Function interface for ~CPAC.seg_preproc.utils.pick_tissue_from_labels_file. Parameters ---------- @@ -20,7 +35,7 @@ def pick_tissue_from_labels_file_interface(input_names=None): if input_names is None: input_names = ["multiatlas_Labels", "csf_label", "gm_label", "wm_label"] - return util.Function( + return Function( input_names=input_names, output_names=["csf_mask", "gm_mask", "wm_mask"], function=pick_tissue_from_labels_file, diff --git a/CPAC/utils/tests/test_datasource.py b/CPAC/utils/tests/test_datasource.py index 9842310bb1..be7c2255c2 100644 --- a/CPAC/utils/tests/test_datasource.py +++ b/CPAC/utils/tests/test_datasource.py @@ -1,10 +1,26 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import json import pytest -import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.utils.datasource import match_epi_fmaps +from CPAC.utils.interfaces import Function from CPAC.utils.test_resources import setup_test_wf @@ -48,7 +64,7 @@ def test_match_epi_fmaps(): } match_fmaps = pe.Node( - util.Function( + Function( input_names=["fmap_dct", "bold_pedir"], output_names=["opposite_pe_epi", "same_pe_epi"], function=match_epi_fmaps, diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index bfd313a56c..b459262993 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -132,7 +132,7 @@ def get_flag_wf(wf_name="get_flag"): input_node = pe.Node(util.IdentityInterface(fields=["in_flag"]), name="inputspec") get_flag = pe.Node( - util.Function(input_names=["in_flag"], function=_get_flag), name="get_flag" + Function(input_names=["in_flag"], function=_get_flag), name="get_flag" ) wf.connect(input_node, "in_flag", get_flag, "in_flag") @@ -322,7 +322,7 @@ def get_zscore(map_node=False, wf_name="z_score"): ) op_string = pe.MapNode( - util.Function( + Function( input_names=["mean", "std_dev"], output_names=["op_string"], function=get_operand_string, @@ -345,7 +345,7 @@ def get_zscore(map_node=False, wf_name="z_score"): ) op_string = pe.Node( - util.Function( + Function( input_names=["mean", "std_dev"], output_names=["op_string"], function=get_operand_string, @@ -400,7 +400,7 @@ def get_fisher_zscore(input_name, map_node=False, wf_name="fisher_z_score"): if map_node: # node to separate out fisher_z_score = pe.MapNode( - util.Function( + Function( input_names=["correlation_file", "timeseries_one_d", "input_name"], output_names=["out_file"], function=compute_fisher_z_score, @@ -410,7 +410,7 @@ def get_fisher_zscore(input_name, map_node=False, wf_name="fisher_z_score"): ) else: fisher_z_score = pe.Node( - util.Function( + Function( input_names=["correlation_file", "timeseries_one_d", "input_name"], output_names=["out_file"], function=compute_fisher_z_score, From af65a2e850b1c3ed0438bf4284325ce88c44129f Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 8 Jul 2024 17:01:06 -0400 Subject: [PATCH 14/93] :bug: Fix circular import --- CPAC/utils/interfaces/function/seg_preproc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAC/utils/interfaces/function/seg_preproc.py b/CPAC/utils/interfaces/function/seg_preproc.py index 5fe9152b23..f3cb06840b 100644 --- a/CPAC/utils/interfaces/function/seg_preproc.py +++ b/CPAC/utils/interfaces/function/seg_preproc.py @@ -16,7 +16,7 @@ # License along with C-PAC. If not, see . """Function interfaces for seg_preproc.""" -from CPAC.utils.interfaces import Function +from CPAC.utils.interfaces.function.function import Function def pick_tissue_from_labels_file_interface(input_names=None): From b414d172ac6fa208d48a4010d484d5440c2da721 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 8 Jul 2024 17:15:27 -0400 Subject: [PATCH 15/93] :truck: Move engine resources to own submodule --- CPAC/pipeline/engine/__init__.py | 50 + CPAC/pipeline/engine/engine.py | 1279 ++++++++++++++++ .../{engine.py => engine/resource.py} | 1341 +---------------- 3 files changed, 1380 insertions(+), 1290 deletions(-) create mode 100644 CPAC/pipeline/engine/__init__.py create mode 100644 CPAC/pipeline/engine/engine.py rename CPAC/pipeline/{engine.py => engine/resource.py} (54%) diff --git a/CPAC/pipeline/engine/__init__.py b/CPAC/pipeline/engine/__init__.py new file mode 100644 index 0000000000..3d00ebde72 --- /dev/null +++ b/CPAC/pipeline/engine/__init__.py @@ -0,0 +1,50 @@ +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""C-PAC engine.""" + +from .engine import ( + func_outdir_ingress, + ingress_freesurfer, + ingress_output_dir, + ingress_pipeconfig_paths, + ingress_raw_anat_data, + json_outdir_ingress, + NodeBlock, + run_node_blocks, + set_iterables, + strip_template, + wrap_block, +) +from .resource import initiate_rpool, NodeData, ResourcePool + +__all__ = [ + "func_outdir_ingress", + "ingress_freesurfer", + "ingress_raw_anat_data", + "ingress_output_dir", + "ingress_pipeconfig_paths", + "ingress_raw_func_data", + "initiate_rpool", + "json_outdir_ingress", + "NodeBlock", + "NodeData", + "ResourcePool", + "run_node_blocks", + "set_iterables", + "strip_template", + "wrap_block", +] diff --git a/CPAC/pipeline/engine/engine.py b/CPAC/pipeline/engine/engine.py new file mode 100644 index 0000000000..4e941b05d9 --- /dev/null +++ b/CPAC/pipeline/engine/engine.py @@ -0,0 +1,1279 @@ +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +import ast +import copy +import hashlib +import json +import logging +import os +import warnings + +from nipype import config + +from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.pipeline.nodeblock import NodeBlockFunction +from CPAC.resources.templates.lookup_table import lookup_identifier +from CPAC.utils.datasource import ( + create_anat_datasource, + create_func_datasource, + create_general_datasource, + ingress_func_metadata, + resolve_resolution, +) +from CPAC.utils.interfaces.function import Function +from CPAC.utils.monitoring import ( + getLogger, + LOGTAIL, + WARNING_FREESURFER_OFF_WITH_DATA, + WFLOGGER, +) +from CPAC.utils.utils import ( + read_json, + write_output_json, +) + + +class NodeBlock: + def __init__(self, node_block_functions, debug=False): + if not isinstance(node_block_functions, list): + node_block_functions = [node_block_functions] + + self.node_blocks = {} + + for node_block_function in node_block_functions: # <---- sets up the NodeBlock object in case you gave it a list of node blocks instead of a single one - for option forking. + self.input_interface = [] + if isinstance(node_block_function, tuple): + self.input_interface = node_block_function[1] + node_block_function = node_block_function[0] + if not isinstance(self.input_interface, list): + self.input_interface = [self.input_interface] + + if not isinstance(node_block_function, NodeBlockFunction): + # If the object is a plain function `__name__` will be more useful than `str()` + obj_str = ( + node_block_function.__name__ + if hasattr(node_block_function, "__name__") + else str(node_block_function) + ) + msg = f'Object is not a nodeblock: "{obj_str}"' + raise TypeError(msg) + + name = node_block_function.name + self.name = name + self.node_blocks[name] = {} + + if self.input_interface: + for interface in self.input_interface: + for orig_input in node_block_function.inputs: + if isinstance(orig_input, tuple): + list_tup = list(orig_input) + if interface[0] in list_tup: + list_tup.remove(interface[0]) + list_tup.append(interface[1]) + node_block_function.inputs.remove(orig_input) + node_block_function.inputs.append(tuple(list_tup)) + elif orig_input == interface[0]: + node_block_function.inputs.remove(interface[0]) + node_block_function.inputs.append(interface[1]) + + for key, val in node_block_function.legacy_nodeblock_dict().items(): + self.node_blocks[name][key] = val + + self.node_blocks[name]["block_function"] = node_block_function + + # TODO: fix/replace below + self.outputs = {} + for out in node_block_function.outputs: + self.outputs[out] = None + + self.options = ["base"] + if node_block_function.outputs is not None: + self.options = node_block_function.outputs + + WFLOGGER.info("Connecting %s...", name) + if debug: + config.update_config({"logging": {"workflow_level": "DEBUG"}}) + logging.update_logging(config) + WFLOGGER.debug( + '"inputs": %s\n\t "outputs": %s%s', + node_block_function.inputs, + list(self.outputs.keys()), + f'\n\t"options": {self.options}' + if self.options != ["base"] + else "", + ) + config.update_config({"logging": {"workflow_level": "INFO"}}) + logging.update_logging(config) + + def get_name(self): + return self.name + + def check_null(self, val): + if isinstance(val, str): + val = None if val.lower() == "none" else val + return val + + def check_output(self, outputs, label, name): + if label not in outputs: + msg = ( + f'\n[!] Output name "{label}" in the block ' + "function does not match the outputs list " + f'{outputs} in Node Block "{name}"\n' + ) + raise NameError(msg) + + def grab_tiered_dct(self, cfg, key_list): + cfg_dct = cfg.dict() + for key in key_list: + try: + cfg_dct = cfg_dct.get(key, {}) + except KeyError as ke: + msg = "[!] The config provided to the node block is not valid" + raise KeyError(msg) from ke + return cfg_dct + + def connect_block(self, wf, cfg, rpool): + debug = cfg.pipeline_setup["Debugging"]["verbose"] + all_opts = [] + for name, block_dct in self.node_blocks.items(): + opts = [] + config = self.check_null(block_dct["config"]) + option_key = self.check_null(block_dct["option_key"]) + option_val = self.check_null(block_dct["option_val"]) + if option_key and option_val: + if not isinstance(option_key, list): + option_key = [option_key] + if not isinstance(option_val, list): + option_val = [option_val] + if config: + key_list = config + option_key + else: + key_list = option_key + if "USER-DEFINED" in option_val: + # load custom config data into each 'opt' + opts = self.grab_tiered_dct(cfg, key_list) + else: + for option in option_val: + try: + if option in self.grab_tiered_dct(cfg, key_list): + # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list + opts.append(option) + except AttributeError as err: + msg = f"{err}\nNode Block: {name}" + raise Exception(msg) + + if opts is None: + opts = [opts] + + elif option_key and not option_val: + # enables multiple config forking entries + if not isinstance(option_key[0], list): + msg = ( + f"[!] The option_key field ({option_key}) " + f"for {name} exists but there is no " + "option_val.\n\nIf you are trying to " + "populate multiple option keys, the " + "option_val field must contain a list of " + "a list.\n" + ) + raise ValueError(msg) + for option_config in option_key: + # option_config is a list of pipe config levels down to the option + if config: + key_list = config + option_config + else: + key_list = option_config + option_val = option_config[-1] + if option_val in self.grab_tiered_dct(cfg, key_list[:-1]): + opts.append(option_val) + else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! + opts = [None] + all_opts += opts + + sidecar_additions = { + "CpacConfigHash": hashlib.sha1( + json.dumps(cfg.dict(), sort_keys=True).encode("utf-8") + ).hexdigest(), + "CpacConfig": cfg.dict(), + } + + if cfg["pipeline_setup"]["output_directory"].get("user_defined"): + sidecar_additions["UserDefined"] = cfg["pipeline_setup"][ + "output_directory" + ]["user_defined"] + + for name, block_dct in self.node_blocks.items(): + # iterates over either the single node block in the sequence, or a list of node blocks within the list of node blocks, i.e. for option forking. + switch = self.check_null(block_dct["switch"]) + config = self.check_null(block_dct["config"]) + option_key = self.check_null(block_dct["option_key"]) + option_val = self.check_null(block_dct["option_val"]) + inputs = self.check_null(block_dct["inputs"]) + outputs = self.check_null(block_dct["outputs"]) + + block_function = block_dct["block_function"] + + opts = [] + if option_key and option_val: + if not isinstance(option_key, list): + option_key = [option_key] + if not isinstance(option_val, list): + option_val = [option_val] + if config: + key_list = config + option_key + else: + key_list = option_key + if "USER-DEFINED" in option_val: + # load custom config data into each 'opt' + opts = self.grab_tiered_dct(cfg, key_list) + else: + for option in option_val: + if option in self.grab_tiered_dct(cfg, key_list): + # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list + opts.append(option) + else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! + opts = [None] + # THIS ALSO MEANS the multiple option-val's in docstring node blocks can be entered once in the entire node-block sequence, not in a list of multiples + if not opts: + # for node blocks where the options are split into different + # block functions - opts will be empty for non-selected + # options, and would waste the get_strats effort below + continue + + if not switch: + switch = [True] + else: + if config: + try: + key_list = config + switch + except TypeError as te: + msg = ( + "\n\n[!] Developer info: Docstring error " + f"for {name}, make sure the 'config' or " + "'switch' fields are lists.\n\n" + ) + raise TypeError(msg) from te + switch = self.grab_tiered_dct(cfg, key_list) + elif isinstance(switch[0], list): + # we have multiple switches, which is designed to only work if + # config is set to "None" + switch_list = [] + for key_list in switch: + val = self.grab_tiered_dct(cfg, key_list) + if isinstance(val, list): + # fork switches + if True in val: + switch_list.append(True) + if False in val: + switch_list.append(False) + else: + switch_list.append(val) + if False in switch_list: + switch = [False] + else: + switch = [True] + else: + # if config is set to "None" + key_list = switch + switch = self.grab_tiered_dct(cfg, key_list) + if not isinstance(switch, list): + switch = [switch] + if True in switch: + for ( + pipe_idx, + strat_pool, # strat_pool is a ResourcePool like {'desc-preproc_T1w': { 'json': info, 'data': (node, out) }, 'desc-brain_mask': etc.} + ) in rpool.get_strats(inputs, debug).items(): + # keep in mind rpool.get_strats(inputs) = {pipe_idx1: {'desc-preproc_T1w': etc.}, pipe_idx2: {..} } + fork = False in switch + for opt in opts: # it's a dictionary of ResourcePools called strat_pools, except those sub-ResourcePools only have one level! no pipe_idx strat keys. + # remember, you can get 'data' or 'json' from strat_pool with member functions + # strat_pool has all of the JSON information of all the inputs! + # so when we set_data below for the TOP-LEVEL MAIN RPOOL (not the strat_pool), we can generate new merged JSON information for each output. + # particularly, our custom 'CpacProvenance' field. + node_name = name + pipe_x = rpool.get_pipe_number(pipe_idx) + + replaced_inputs = [] + for interface in self.input_interface: + if isinstance(interface[1], list): + for input_name in interface[1]: + if strat_pool.check_rpool(input_name): + break + else: + input_name = interface[1] + strat_pool.copy_resource(input_name, interface[0]) + replaced_inputs.append(interface[0]) + try: + wf, outs = block_function(wf, cfg, strat_pool, pipe_x, opt) + except IOError as e: # duplicate node + WFLOGGER.warning(e) + continue + + if not outs: + if block_function.__name__ == "freesurfer_postproc": + WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) + LOGTAIL["warnings"].append( + WARNING_FREESURFER_OFF_WITH_DATA + ) + continue + + if opt and len(option_val) > 1: + node_name = f"{node_name}_{opt}" + elif opt and "USER-DEFINED" in option_val: + node_name = f'{node_name}_{opt["Name"]}' + + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("\n=======================") + verbose_logger.debug("Node name: %s", node_name) + prov_dct = rpool.get_resource_strats_from_prov( + ast.literal_eval(pipe_idx) + ) + for key, val in prov_dct.items(): + verbose_logger.debug("-------------------") + verbose_logger.debug("Input - %s:", key) + sub_prov_dct = rpool.get_resource_strats_from_prov(val) + for sub_key, sub_val in sub_prov_dct.items(): + sub_sub_dct = rpool.get_resource_strats_from_prov( + sub_val + ) + verbose_logger.debug(" sub-input - %s:", sub_key) + verbose_logger.debug(" prov = %s", sub_val) + verbose_logger.debug( + " sub_sub_inputs = %s", sub_sub_dct.keys() + ) + + for label, connection in outs.items(): + self.check_output(outputs, label, name) + new_json_info = copy.deepcopy(strat_pool.get("json")) + + # transfer over data-specific json info + # for example, if the input data json is _bold and the output is also _bold + data_type = label.split("_")[-1] + if data_type in new_json_info["subjson"]: + if ( + "SkullStripped" + in new_json_info["subjson"][data_type] + ): + new_json_info["SkullStripped"] = new_json_info[ + "subjson" + ][data_type]["SkullStripped"] + + # determine sources for the outputs, i.e. all input data into the node block + new_json_info["Sources"] = [ + x + for x in strat_pool.get_entire_rpool() + if x != "json" and x not in replaced_inputs + ] + + if isinstance(outputs, dict): + new_json_info.update(outputs[label]) + if "Description" not in outputs[label]: + # don't propagate old Description + try: + del new_json_info["Description"] + except KeyError: + pass + if "Template" in outputs[label]: + template_key = outputs[label]["Template"] + if template_key in new_json_info["Sources"]: + # only if the pipeline config template key is entered as the 'Template' field + # otherwise, skip this and take in the literal 'Template' string + try: + new_json_info["Template"] = new_json_info[ + "subjson" + ][template_key]["Description"] + except KeyError: + pass + try: + new_json_info["Resolution"] = new_json_info[ + "subjson" + ][template_key]["Resolution"] + except KeyError: + pass + else: + # don't propagate old Description + try: + del new_json_info["Description"] + except KeyError: + pass + + if "Description" in new_json_info: + new_json_info["Description"] = " ".join( + new_json_info["Description"].split() + ) + + for sidecar_key, sidecar_value in sidecar_additions.items(): + if sidecar_key not in new_json_info: + new_json_info[sidecar_key] = sidecar_value + + try: + del new_json_info["subjson"] + except KeyError: + pass + + if fork or len(opts) > 1 or len(all_opts) > 1: + if "CpacVariant" not in new_json_info: + new_json_info["CpacVariant"] = {} + raw_label = rpool.get_raw_label(label) + if raw_label not in new_json_info["CpacVariant"]: + new_json_info["CpacVariant"][raw_label] = [] + new_json_info["CpacVariant"][raw_label].append( + node_name + ) + + rpool.set_data( + label, + connection[0], + connection[1], + new_json_info, + pipe_idx, + node_name, + fork, + ) + + wf, post_labels = rpool.post_process( + wf, + label, + connection, + new_json_info, + pipe_idx, + pipe_x, + outs, + ) + + if rpool.func_reg: + for postlabel in post_labels: + connection = (postlabel[1], postlabel[2]) + wf = rpool.derivative_xfm( + wf, + postlabel[0], + connection, + new_json_info, + pipe_idx, + pipe_x, + ) + return wf + + +def wrap_block(node_blocks, interface, wf, cfg, strat_pool, pipe_num, opt): + """Wrap a list of node block functions to use within other node blocks. + + Example usage: + + # This calls the 'bold_mask_afni' and 'bold_masking' node blocks to + # skull-strip an EPI field map, without having to invoke the NodeBlock + # connection system. + + # The interface dictionary tells wrap_block to set the EPI field map + # in the parent node block's throw-away strat_pool as 'bold', so that + # the 'bold_mask_afni' and 'bold_masking' node blocks will see that as + # the 'bold' input. + + # It also tells wrap_block to set the 'desc-brain_bold' output of + # the 'bold_masking' node block to 'opposite_pe_epi_brain' (what it + # actually is) in the parent node block's strat_pool, which gets + # returned. + + # Note 'bold' and 'desc-brain_bold' (all on the left side) are the + # labels that 'bold_mask_afni' and 'bold_masking' understand/expect + # through their interfaces and docstrings. + + # The right-hand side (the values of the 'interface' dictionary) are + # what 'make sense' within the current parent node block - in this + # case, the distortion correction node block dealing with field maps. + + interface = {'bold': (match_epi_fmaps_node, 'opposite_pe_epi'), + 'desc-brain_bold': 'opposite_pe_epi_brain'} + wf, strat_pool = wrap_block([bold_mask_afni, bold_masking], + interface, wf, cfg, strat_pool, + pipe_num, opt) + + ...further downstream in the parent node block: + + node, out = strat_pool.get_data('opposite_pe_epi_brain') + + # The above line will connect the output of the 'bold_masking' node + # block (which is the skull-stripped version of 'opposite_pe_epi') to + # the next node. + + """ + for block in node_blocks: + # new_pool = copy.deepcopy(strat_pool) + for in_resource, val in interface.items(): + if isinstance(val, tuple): + strat_pool.set_data( + in_resource, val[0], val[1], {}, "", "", fork=True + ) # + if "sub_num" not in strat_pool.get_pool_info(): + strat_pool.set_pool_info({"sub_num": 0}) + sub_num = strat_pool.get_pool_info()["sub_num"] + + wf, outputs = block(wf, cfg, strat_pool, f"{pipe_num}-{sub_num}", opt) # + for out, val in outputs.items(): + if out in interface and isinstance(interface[out], str): + strat_pool.set_data( + interface[out], outputs[out][0], outputs[out][1], {}, "", "" + ) + else: + strat_pool.set_data(out, outputs[out][0], outputs[out][1], {}, "", "") + sub_num += 1 + strat_pool.set_pool_info({"sub_num": sub_num}) + + return (wf, strat_pool) + + +def ingress_raw_anat_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): + if "anat" not in data_paths: + WFLOGGER.warning("No anatomical data present.") + return rpool + + if "creds_path" not in data_paths: + data_paths["creds_path"] = None + + anat_flow = create_anat_datasource(f"anat_T1w_gather_{part_id}_{ses_id}") + + anat = {} + if isinstance(data_paths["anat"], str): + anat["T1"] = data_paths["anat"] + elif "T1w" in data_paths["anat"]: + anat["T1"] = data_paths["anat"]["T1w"] + + if "T1" in anat: + anat_flow.inputs.inputnode.set( + subject=part_id, + anat=anat["T1"], + creds_path=data_paths["creds_path"], + dl_dir=cfg.pipeline_setup["working_directory"]["path"], + img_type="anat", + ) + rpool.set_data("T1w", anat_flow, "outputspec.anat", {}, "", "anat_ingress") + + if "T2w" in data_paths["anat"]: + anat_flow_T2 = create_anat_datasource(f"anat_T2w_gather_{part_id}_{ses_id}") + anat_flow_T2.inputs.inputnode.set( + subject=part_id, + anat=data_paths["anat"]["T2w"], + creds_path=data_paths["creds_path"], + dl_dir=cfg.pipeline_setup["working_directory"]["path"], + img_type="anat", + ) + rpool.set_data("T2w", anat_flow_T2, "outputspec.anat", {}, "", "anat_ingress") + + if cfg.surface_analysis["freesurfer"]["ingress_reconall"]: + rpool = ingress_freesurfer( + wf, rpool, cfg, data_paths, unique_id, part_id, ses_id + ) + + return rpool + + +def ingress_freesurfer(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): + try: + fs_path = os.path.join(cfg.pipeline_setup["freesurfer_dir"], part_id) + except KeyError: + WFLOGGER.warning("No FreeSurfer data present.") + return rpool + + # fs_path = os.path.join(cfg.pipeline_setup['freesurfer_dir'], part_id) + if not os.path.exists(fs_path): + if "sub" in part_id: + fs_path = os.path.join( + cfg.pipeline_setup["freesurfer_dir"], part_id.replace("sub-", "") + ) + else: + fs_path = os.path.join( + cfg.pipeline_setup["freesurfer_dir"], ("sub-" + part_id) + ) + + # patch for flo-specific data + if not os.path.exists(fs_path): + subj_ses = part_id + "-" + ses_id + fs_path = os.path.join(cfg.pipeline_setup["freesurfer_dir"], subj_ses) + if not os.path.exists(fs_path): + WFLOGGER.info("No FreeSurfer data found for subject %s", part_id) + return rpool + + # Check for double nested subj names + if os.path.exists(os.path.join(fs_path, os.path.basename(fs_path))): + fs_path = os.path.join(fs_path, part_id) + + fs_ingress = create_general_datasource("gather_freesurfer_dir") + fs_ingress.inputs.inputnode.set( + unique_id=unique_id, + data=fs_path, + creds_path=data_paths["creds_path"], + dl_dir=cfg.pipeline_setup["working_directory"]["path"], + ) + rpool.set_data( + "freesurfer-subject-dir", + fs_ingress, + "outputspec.data", + {}, + "", + "freesurfer_config_ingress", + ) + + recon_outs = { + "pipeline-fs_raw-average": "mri/rawavg.mgz", + "pipeline-fs_subcortical-seg": "mri/aseg.mgz", + "pipeline-fs_brainmask": "mri/brainmask.mgz", + "pipeline-fs_wmparc": "mri/wmparc.mgz", + "pipeline-fs_T1": "mri/T1.mgz", + "pipeline-fs_hemi-L_desc-surface_curv": "surf/lh.curv", + "pipeline-fs_hemi-R_desc-surface_curv": "surf/rh.curv", + "pipeline-fs_hemi-L_desc-surfaceMesh_pial": "surf/lh.pial", + "pipeline-fs_hemi-R_desc-surfaceMesh_pial": "surf/rh.pial", + "pipeline-fs_hemi-L_desc-surfaceMesh_smoothwm": "surf/lh.smoothwm", + "pipeline-fs_hemi-R_desc-surfaceMesh_smoothwm": "surf/rh.smoothwm", + "pipeline-fs_hemi-L_desc-surfaceMesh_sphere": "surf/lh.sphere", + "pipeline-fs_hemi-R_desc-surfaceMesh_sphere": "surf/rh.sphere", + "pipeline-fs_hemi-L_desc-surfaceMap_sulc": "surf/lh.sulc", + "pipeline-fs_hemi-R_desc-surfaceMap_sulc": "surf/rh.sulc", + "pipeline-fs_hemi-L_desc-surfaceMap_thickness": "surf/lh.thickness", + "pipeline-fs_hemi-R_desc-surfaceMap_thickness": "surf/rh.thickness", + "pipeline-fs_hemi-L_desc-surfaceMap_volume": "surf/lh.volume", + "pipeline-fs_hemi-R_desc-surfaceMap_volume": "surf/rh.volume", + "pipeline-fs_hemi-L_desc-surfaceMesh_white": "surf/lh.white", + "pipeline-fs_hemi-R_desc-surfaceMesh_white": "surf/rh.white", + "pipeline-fs_xfm": "mri/transforms/talairach.lta", + } + + for key, outfile in recon_outs.items(): + fullpath = os.path.join(fs_path, outfile) + if os.path.exists(fullpath): + fs_ingress = create_general_datasource(f"gather_fs_{key}_dir") + fs_ingress.inputs.inputnode.set( + unique_id=unique_id, + data=fullpath, + creds_path=data_paths["creds_path"], + dl_dir=cfg.pipeline_setup["working_directory"]["path"], + ) + rpool.set_data( + key, fs_ingress, "outputspec.data", {}, "", f"fs_{key}_ingress" + ) + else: + warnings.warn( + str(LookupError(f"\n[!] Path does not exist for {fullpath}.\n")) + ) + + return rpool + + +def ingress_raw_func_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): + func_paths_dct = data_paths["func"] + + func_wf = create_func_datasource( + func_paths_dct, rpool, f"func_ingress_{part_id}_{ses_id}" + ) + func_wf.inputs.inputnode.set( + subject=part_id, + creds_path=data_paths["creds_path"], + dl_dir=cfg.pipeline_setup["working_directory"]["path"], + ) + func_wf.get_node("inputnode").iterables = ("scan", list(func_paths_dct.keys())) + + rpool.set_data("subject", func_wf, "outputspec.subject", {}, "", "func_ingress") + rpool.set_data("bold", func_wf, "outputspec.rest", {}, "", "func_ingress") + rpool.set_data("scan", func_wf, "outputspec.scan", {}, "", "func_ingress") + rpool.set_data( + "scan-params", func_wf, "outputspec.scan_params", {}, "", "scan_params_ingress" + ) + + # TODO: CHECK FOR PARAMETERS + + wf, rpool, diff, blip, fmap_rp_list = ingress_func_metadata( + wf, cfg, rpool, data_paths, part_id, data_paths["creds_path"], ses_id + ) + + # Memoize list of local functional scans + # TODO: handle S3 files + # Skip S3 files for now + + local_func_scans = [ + func_paths_dct[scan]["scan"] + for scan in func_paths_dct.keys() + if not func_paths_dct[scan]["scan"].startswith("s3://") + ] + if local_func_scans: + # pylint: disable=protected-access + wf._local_func_scans = local_func_scans + if cfg.pipeline_setup["Debugging"]["verbose"]: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("local_func_scans: %s", local_func_scans) + del local_func_scans + + return (wf, rpool, diff, blip, fmap_rp_list) + + +def ingress_output_dir( + wf, cfg, rpool, unique_id, data_paths, part_id, ses_id, creds_path=None +): + dir_path = data_paths["derivatives_dir"] + + WFLOGGER.info("\nPulling outputs from %s.\n", dir_path) + + anat = os.path.join(dir_path, "anat") + func = os.path.join(dir_path, "func") + + exts = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"] + + outdir_anat = [] + outdir_func = [] + func_paths = {} + func_dict = {} + + for subdir in [anat, func]: + if os.path.isdir(subdir): + for filename in os.listdir(subdir): + for ext in exts: + if ext in filename: + if subdir == anat: + outdir_anat.append(os.path.join(subdir, filename)) + else: + outdir_func.append(os.path.join(subdir, filename)) + + # Add derivatives directory to rpool + ingress = create_general_datasource("gather_derivatives_dir") + ingress.inputs.inputnode.set( + unique_id=unique_id, + data=dir_path, + creds_path=creds_path, + dl_dir=cfg.pipeline_setup["working_directory"]["path"], + ) + rpool.set_data( + "derivatives-dir", ingress, "outputspec.data", {}, "", "outdir_config_ingress" + ) + + for subdir in [outdir_anat, outdir_func]: + for filepath in subdir: + filename = str(filepath) + for ext in exts: + filename = filename.split("/")[-1].replace(ext, "") + + data_label = filename.split(unique_id)[1].lstrip("_") + + if len(filename) == len(data_label): + msg = ( + "\n\n[!] Possibly wrong participant or " + "session in this directory?\n\n" + f"Filepath: {filepath}\n\n" + ) + raise Exception(msg) + + bidstag = "" + for tag in data_label.split("_"): + for prefix in ["task-", "run-", "acq-", "rec"]: + if tag.startswith(prefix): + bidstag += f"{tag}_" + data_label = data_label.replace(f"{tag}_", "") + data_label, json = strip_template(data_label, dir_path, filename) + + rpool, json_info, pipe_idx, node_name, data_label = json_outdir_ingress( + rpool, filepath, exts, data_label, json + ) + + if ( + "template" in data_label + and not json_info["Template"] + == cfg.pipeline_setup["outdir_ingress"]["Template"] + ): + continue + # Rename confounds to avoid confusion in nuisance regression + if data_label.endswith("desc-confounds_timeseries"): + data_label = "pipeline-ingress_desc-confounds_timeseries" + + if len(bidstag) > 1: + # Remove tail symbol + bidstag = bidstag[:-1] + if bidstag.startswith("task-"): + bidstag = bidstag.replace("task-", "") + + # Rename bold mask for CPAC naming convention + # and to avoid collision with anat brain mask + if data_label.endswith("desc-brain_mask") and filepath in outdir_func: + data_label = data_label.replace("brain_mask", "bold_mask") + + try: + pipe_x = rpool.get_pipe_number(pipe_idx) + except ValueError: + pipe_x = len(rpool.pipe_list) + if filepath in outdir_anat: + ingress = create_general_datasource( + f"gather_anat_outdir_{data_label!s}_{pipe_x}" + ) + ingress.inputs.inputnode.set( + unique_id=unique_id, + data=filepath, + creds_path=creds_path, + dl_dir=cfg.pipeline_setup["working_directory"]["path"], + ) + rpool.set_data( + data_label, + ingress, + "outputspec.data", + json_info, + pipe_idx, + node_name, + f"outdir_{data_label}_ingress", + inject=True, + ) + else: + if data_label.endswith("desc-preproc_bold"): + func_key = data_label + func_dict[bidstag] = {} + func_dict[bidstag]["scan"] = str(filepath) + func_dict[bidstag]["scan_parameters"] = json_info + func_dict[bidstag]["pipe_idx"] = pipe_idx + if data_label.endswith("desc-brain_mask"): + data_label = data_label.replace("brain_mask", "bold_mask") + try: + func_paths[data_label].append(filepath) + except: + func_paths[data_label] = [] + func_paths[data_label].append(filepath) + + if func_dict: + wf, rpool = func_outdir_ingress( + wf, + cfg, + func_dict, + rpool, + unique_id, + creds_path, + part_id, + func_key, + func_paths, + ) + + if cfg.surface_analysis["freesurfer"]["ingress_reconall"]: + rpool = ingress_freesurfer( + wf, rpool, cfg, data_paths, unique_id, part_id, ses_id + ) + return wf, rpool + + +def json_outdir_ingress(rpool, filepath, exts, data_label, json): + desc_val = None + for tag in data_label.split("_"): + if "desc-" in tag: + desc_val = tag + break + jsonpath = str(filepath) + for ext in exts: + jsonpath = jsonpath.replace(ext, "") + jsonpath = f"{jsonpath}.json" + + if not os.path.exists(jsonpath): + WFLOGGER.info( + "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n", filepath, jsonpath + ) + json_info = { + "Description": "This data was generated elsewhere and " + "supplied by the user into this C-PAC run's " + "output directory. This JSON file was " + "automatically generated by C-PAC because a " + "JSON file was not supplied with the data." + } + json_info = {**json_info, **json} + write_output_json(json_info, jsonpath) + else: + json_info = read_json(jsonpath) + json_info = {**json_info, **json} + if "CpacProvenance" in json_info: + if desc_val: + # it's a C-PAC output, let's check for pipe_idx/strat integer + # suffixes in the desc- entries. + only_desc = str(desc_val) + + if only_desc[-1].isdigit(): + for idx in range(0, 3): + # let's stop at 3, please don't run >999 strategies okay? + if only_desc[-1].isdigit(): + only_desc = only_desc[:-1] + + if only_desc[-1] == "-": + only_desc = only_desc.rstrip("-") + else: + msg = ( + "\n[!] Something went wrong with either " + "reading in the output directory or when " + "it was written out previously.\n\nGive " + "this to your friendly local C-PAC " + f"developer:\n\n{data_label!s}\n" + ) + raise IOError(msg) + + # remove the integer at the end of the desc-* variant, we will + # get the unique pipe_idx from the CpacProvenance below + data_label = data_label.replace(desc_val, only_desc) + + # preserve cpac provenance/pipe_idx + pipe_idx = rpool.generate_prov_string(json_info["CpacProvenance"]) + node_name = "" + + else: + json_info["CpacProvenance"] = [f"{data_label}:Non-C-PAC Origin: {filepath}"] + if "Description" not in json_info: + json_info["Description"] = ( + "This data was generated elsewhere and " + "supplied by the user into this C-PAC run's " + "output directory. This JSON file was " + "automatically generated by C-PAC because a " + "JSON file was not supplied with the data." + ) + pipe_idx = rpool.generate_prov_string(json_info["CpacProvenance"]) + node_name = f"{data_label}_ingress" + + return rpool, json_info, pipe_idx, node_name, data_label + + +def func_outdir_ingress( + wf, cfg, func_dict, rpool, unique_id, creds_path, part_id, key, func_paths +): + pipe_x = len(rpool.pipe_list) + ingress = create_func_datasource( + func_dict, rpool, f"gather_func_outdir_{key}_{pipe_x}" + ) + ingress.inputs.inputnode.set( + subject=unique_id, + creds_path=creds_path, + dl_dir=cfg.pipeline_setup["working_directory"]["path"], + ) + rpool.set_data("subject", ingress, "outputspec.subject", {}, "", "func_ingress") + ingress.get_node("inputnode").iterables = ("scan", list(func_dict.keys())) + rpool.set_data(key, ingress, "outputspec.rest", {}, "", "func_ingress") + + rpool.set_data("scan", ingress, "outputspec.scan", {}, "", "func_ingress") + rpool.set_data( + "scan-params", ingress, "outputspec.scan_params", {}, "", "scan_params_ingress" + ) + wf, rpool, diff, blip, fmap_rp_list = ingress_func_metadata( + wf, cfg, rpool, func_dict, part_id, creds_path, key + ) + + # Have to do it this weird way to save the parsed BIDS tag & filepath + mask_paths_key = ( + "desc-bold_mask" + if "desc-bold_mask" in func_paths + else "space-template_desc-bold_mask" + ) + ts_paths_key = "pipeline-ingress_desc-confounds_timeseries" + + # Connect func data with approproate scan name + iterables = pe.Node( + Function( + input_names=["scan", "mask_paths", "ts_paths"], + output_names=["out_scan", "mask", "confounds"], + function=set_iterables, + ), + name=f"set_iterables_{pipe_x}", + ) + iterables.inputs.mask_paths = func_paths[mask_paths_key] + iterables.inputs.ts_paths = func_paths[ts_paths_key] + wf.connect(ingress, "outputspec.scan", iterables, "scan") + + for key in func_paths: + if key in (mask_paths_key, ts_paths_key): + ingress_func = create_general_datasource(f"ingress_func_data_{key}") + ingress_func.inputs.inputnode.set( + unique_id=unique_id, + creds_path=creds_path, + dl_dir=cfg.pipeline_setup["working_directory"]["path"], + ) + wf.connect(iterables, "out_scan", ingress_func, "inputnode.scan") + if key == mask_paths_key: + wf.connect(iterables, "mask", ingress_func, "inputnode.data") + rpool.set_data( + key, ingress_func, "inputnode.data", {}, "", f"outdir_{key}_ingress" + ) + elif key == ts_paths_key: + wf.connect(iterables, "confounds", ingress_func, "inputnode.data") + rpool.set_data( + key, ingress_func, "inputnode.data", {}, "", f"outdir_{key}_ingress" + ) + + return wf, rpool + + +def set_iterables(scan, mask_paths=None, ts_paths=None): + # match scan with filepath to get filepath + mask_path = [path for path in mask_paths if scan in path] + ts_path = [path for path in ts_paths if scan in path] + + return (scan, mask_path[0], ts_path[0]) + + +def strip_template(data_label, dir_path, filename): + json = {} + # rename to template + for prefix in ["space-", "from-", "to-"]: + for bidstag in data_label.split("_"): + if bidstag.startswith(prefix): + template_key, template_val = bidstag.split("-") + template_name, _template_desc = lookup_identifier(template_val) + if template_name: + json["Template"] = template_val + data_label = data_label.replace(template_val, "template") + elif bidstag.startswith("res-"): + res_key, res_val = bidstag.split("-") + json["Resolution"] = res_val + data_label = data_label.replace(bidstag, "") + if data_label.find("__"): + data_label = data_label.replace("__", "_") + return data_label, json + + +def ingress_pipeconfig_paths(cfg, rpool, unique_id, creds_path=None): + # ingress config file paths + # TODO: may want to change the resource keys for each to include one level up in the YAML as well + + import pandas as pd + import pkg_resources as p + + template_csv = p.resource_filename("CPAC", "resources/cpac_templates.csv") + template_df = pd.read_csv(template_csv, keep_default_na=False) + + for row in template_df.itertuples(): + key = row.Key + val = row.Pipeline_Config_Entry + val = cfg.get_nested(cfg, [x.lstrip() for x in val.split(",")]) + resolution = row.Intended_Resolution_Config_Entry + desc = row.Description + + if not val: + continue + + if resolution: + res_keys = [x.lstrip() for x in resolution.split(",")] + tag = res_keys[-1] + json_info = {} + + if "$FSLDIR" in val: + val = val.replace("$FSLDIR", cfg.pipeline_setup["system_config"]["FSLDIR"]) + if "$priors_path" in val: + priors_path = ( + cfg.segmentation["tissue_segmentation"]["FSL-FAST"]["use_priors"][ + "priors_path" + ] + or "" + ) + if "$FSLDIR" in priors_path: + priors_path = priors_path.replace( + "$FSLDIR", cfg.pipeline_setup["system_config"]["FSLDIR"] + ) + val = val.replace("$priors_path", priors_path) + if "${resolution_for_anat}" in val: + val = val.replace( + "${resolution_for_anat}", + cfg.registration_workflows["anatomical_registration"][ + "resolution_for_anat" + ], + ) + if "${func_resolution}" in val: + val = val.replace( + "${func_resolution}", + cfg.registration_workflows["functional_registration"][ + "func_registration_to_template" + ]["output_resolution"][tag], + ) + + if desc: + template_name, _template_desc = lookup_identifier(val) + if template_name: + desc = f"{template_name} - {desc}" + json_info["Description"] = f"{desc} - {val}" + if resolution: + resolution = cfg.get_nested(cfg, res_keys) + json_info["Resolution"] = resolution + + resampled_template = pe.Node( + Function( + input_names=["resolution", "template", "template_name", "tag"], + output_names=["resampled_template"], + function=resolve_resolution, + as_module=True, + ), + name="resampled_" + key, + ) + + resampled_template.inputs.resolution = resolution + resampled_template.inputs.template = val + resampled_template.inputs.template_name = key + resampled_template.inputs.tag = tag + + # the set_data below is set up a little differently, because we are + # injecting and also over-writing already-existing entries + # other alternative would have been to ingress into the + # resampled_template node from the already existing entries, but we + # didn't do that here + rpool.set_data( + key, + resampled_template, + "resampled_template", + json_info, + "", + "template_resample", + ) # pipe_idx (after the blank json {}) should be the previous strat that you want deleted! because you're not connecting this the regular way, you have to do it manually + + elif val: + config_ingress = create_general_datasource(f"gather_{key}") + config_ingress.inputs.inputnode.set( + unique_id=unique_id, + data=val, + creds_path=creds_path, + dl_dir=cfg.pipeline_setup["working_directory"]["path"], + ) + rpool.set_data( + key, + config_ingress, + "outputspec.data", + json_info, + "", + f"{key}_config_ingress", + ) + # templates, resampling from config + """ + template_keys = [ + ("anat", ["network_centrality", "template_specification_file"]), + ("anat", ["nuisance_corrections", "2-nuisance_regression", + "lateral_ventricles_mask"]), + ("anat", + ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", + "CSF_path"]), + ("anat", + ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", + "GM_path"]), + ("anat", + ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", + "WM_path"]), + ("anat", + ["segmentation", "tissue_segmentation", "Template_Based", "CSF"]), + ("anat", + ["segmentation", "tissue_segmentation", "Template_Based", "GRAY"]), + ("anat", + ["segmentation", "tissue_segmentation", "Template_Based", "WHITE"]), + ("anat", ["anatomical_preproc", "acpc_alignment", "T1w_ACPC_template"]), + ("anat", ["anatomical_preproc", "acpc_alignment", "T1w_brain_ACPC_template"]), + ("anat", ["anatomical_preproc", "acpc_alignment", "T2w_ACPC_template"]), + ("anat", ["anatomical_preproc", "acpc_alignment", "T2w_brain_ACPC_template"])] + + def get_nested_attr(c, template_key): + attr = getattr(c, template_key[0]) + keys = template_key[1:] + + def _get_nested(attr, keys): + if len(keys) > 1: + return (_get_nested(attr[keys[0]], keys[1:])) + elif len(keys): + return (attr[keys[0]]) + else: + return (attr) + + return (_get_nested(attr, keys)) + + def set_nested_attr(c, template_key, value): + attr = getattr(c, template_key[0]) + keys = template_key[1:] + + def _set_nested(attr, keys): + if len(keys) > 1: + return (_set_nested(attr[keys[0]], keys[1:])) + elif len(keys): + attr[keys[0]] = value + else: + return (attr) + + return (_set_nested(attr, keys)) + + for key_type, key in template_keys: + attr = cfg.get_nested(cfg, key) + if isinstance(attr, str) or attr == None: + node = create_check_for_s3_node( + key[-1], + attr, key_type, + data_paths['creds_path'], + cfg.pipeline_setup['working_directory']['path'], + map_node=False + ) + cfg.set_nested(cfg, key, node) + + template_keys_in_list = [ + ("anat", + ["segmentation", "tissue_segmentation", "ANTs_Prior_Based", + "template_brain_list"]), + ("anat", + ["segmentation", "tissue_segmentation", "ANTs_Prior_Based", + "template_segmentation_list"]), + ] + + for key_type, key in template_keys_in_list: + node = create_check_for_s3_node( + key[-1], + cfg.get_nested(cfg, key), key_type, + data_paths['creds_path'], + cfg.pipeline_setup['working_directory']['path'], + map_node=True + ) + cfg.set_nested(cfg, key, node) + """ + + return rpool + + +def run_node_blocks(blocks, data_paths, cfg=None): + import os + + from CPAC.pipeline import nipype_pipeline_engine as pe + from CPAC.pipeline.engine import NodeBlock + from CPAC.pipeline.engine.resource import initiate_rpool + + if not cfg: + cfg = { + "pipeline_setup": { + "working_directory": {"path": os.getcwd()}, + "log_directory": {"path": os.getcwd()}, + } + } + + # TODO: WE HAVE TO PARSE OVER UNIQUE ID'S!!! + _, rpool = initiate_rpool(cfg, data_paths) + + wf = pe.Workflow(name="node_blocks") + wf.base_dir = cfg.pipeline_setup["working_directory"]["path"] + wf.config["execution"] = { + "hash_method": "timestamp", + "crashdump_dir": cfg.pipeline_setup["log_directory"]["path"], + } + + run_blocks = [] + if rpool.check_rpool("desc-preproc_T1w"): + WFLOGGER.info("Preprocessed T1w found, skipping anatomical preprocessing.") + else: + run_blocks += blocks[0] + if rpool.check_rpool("desc-preproc_bold"): + WFLOGGER.info("Preprocessed BOLD found, skipping functional preprocessing.") + else: + run_blocks += blocks[1] + + for block in run_blocks: + wf = NodeBlock( + block, debug=cfg["pipeline_setup", "Debugging", "verbose"] + ).connect_block(wf, cfg, rpool) + rpool.gather_pipes(wf, cfg) + + wf.run() diff --git a/CPAC/pipeline/engine.py b/CPAC/pipeline/engine/resource.py similarity index 54% rename from CPAC/pipeline/engine.py rename to CPAC/pipeline/engine/resource.py index 3c739ae1ab..16d9761f91 100644 --- a/CPAC/pipeline/engine.py +++ b/CPAC/pipeline/engine/resource.py @@ -14,18 +14,16 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . +"""Resources and ResourcePools for C-PAC.""" + import ast import copy -import hashlib from itertools import chain -import json -import logging import os import re from typing import Optional import warnings -from nipype import config from nipype.interfaces.utility import Rename from CPAC.image_utils.spatial_smoothing import spatial_smoothing @@ -35,25 +33,20 @@ ) from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.check_outputs import ExpectedOutputs -from CPAC.pipeline.nodeblock import NodeBlockFunction +from CPAC.pipeline.engine.engine import ( + ingress_output_dir, + ingress_pipeconfig_paths, + ingress_raw_anat_data, + ingress_raw_func_data, +) from CPAC.pipeline.utils import MOVEMENT_FILTER_KEYS, name_fork, source_set from CPAC.registration.registration import transform_derivative -from CPAC.resources.templates.lookup_table import lookup_identifier from CPAC.utils.bids_utils import res_in_filename from CPAC.utils.configuration import Configuration -from CPAC.utils.datasource import ( - create_anat_datasource, - create_func_datasource, - create_general_datasource, - ingress_func_metadata, - resolve_resolution, -) from CPAC.utils.interfaces.datasink import DataSink from CPAC.utils.interfaces.function import Function from CPAC.utils.monitoring import ( getLogger, - LOGTAIL, - WARNING_FREESURFER_OFF_WITH_DATA, WFLOGGER, ) from CPAC.utils.outputs import Outputs @@ -61,11 +54,53 @@ check_prov_for_regtool, create_id_string, get_last_prov_entry, - read_json, write_output_json, ) +class NodeData: + r"""Attribute access for ResourcePool.get_data outputs. + + Class to hold outputs of CPAC.pipeline.engine.ResourcePool().get_data(), so one can + do ``node_data = strat_pool.node_data(resource)`` and have ``node_data.node`` and + ``node_data.out`` instead of doing ``node, out = strat_pool.get_data(resource)`` + and needing two variables (``node`` and ``out``) to store that information. + + Also includes ``variant`` attribute providing the resource's self-keyed value + within its ``CpacVariant`` dictionary. + + Examples + -------- + >>> rp = ResourcePool() + >>> rp.node_data(None) + NotImplemented (NotImplemented) + + >>> rp.set_data('test', + ... pe.Node(Function(input_names=[]), 'test'), + ... 'b', [], 0, 'test') + >>> rp.node_data('test') + test (b) + >>> rp.node_data('test').out + 'b' + + >>> try: + ... rp.node_data('b') + ... except LookupError as lookup_error: + ... print(str(lookup_error).strip().split('\n')[0].strip()) + [!] C-PAC says: None of the listed resources are in the resource pool: + """ + + # pylint: disable=too-few-public-methods + def __init__(self, strat_pool=None, resource=None, **kwargs): + self.node = NotImplemented + self.out = NotImplemented + if strat_pool is not None and resource is not None: + self.node, self.out = strat_pool.get_data(resource, **kwargs) + + def __repr__(self): # noqa: D105 + return f'{getattr(self.node, "name", str(self.node))} ({self.out})' + + class ResourcePool: def __init__(self, rpool=None, name=None, cfg=None, pipe_list=None): if not rpool: @@ -1413,1194 +1448,6 @@ def node_data(self, resource, **kwargs): return NodeData(self, resource, **kwargs) -class NodeBlock: - def __init__(self, node_block_functions, debug=False): - if not isinstance(node_block_functions, list): - node_block_functions = [node_block_functions] - - self.node_blocks = {} - - for node_block_function in node_block_functions: # <---- sets up the NodeBlock object in case you gave it a list of node blocks instead of a single one - for option forking. - self.input_interface = [] - if isinstance(node_block_function, tuple): - self.input_interface = node_block_function[1] - node_block_function = node_block_function[0] - if not isinstance(self.input_interface, list): - self.input_interface = [self.input_interface] - - if not isinstance(node_block_function, NodeBlockFunction): - # If the object is a plain function `__name__` will be more useful than `str()` - obj_str = ( - node_block_function.__name__ - if hasattr(node_block_function, "__name__") - else str(node_block_function) - ) - msg = f'Object is not a nodeblock: "{obj_str}"' - raise TypeError(msg) - - name = node_block_function.name - self.name = name - self.node_blocks[name] = {} - - if self.input_interface: - for interface in self.input_interface: - for orig_input in node_block_function.inputs: - if isinstance(orig_input, tuple): - list_tup = list(orig_input) - if interface[0] in list_tup: - list_tup.remove(interface[0]) - list_tup.append(interface[1]) - node_block_function.inputs.remove(orig_input) - node_block_function.inputs.append(tuple(list_tup)) - elif orig_input == interface[0]: - node_block_function.inputs.remove(interface[0]) - node_block_function.inputs.append(interface[1]) - - for key, val in node_block_function.legacy_nodeblock_dict().items(): - self.node_blocks[name][key] = val - - self.node_blocks[name]["block_function"] = node_block_function - - # TODO: fix/replace below - self.outputs = {} - for out in node_block_function.outputs: - self.outputs[out] = None - - self.options = ["base"] - if node_block_function.outputs is not None: - self.options = node_block_function.outputs - - WFLOGGER.info("Connecting %s...", name) - if debug: - config.update_config({"logging": {"workflow_level": "DEBUG"}}) - logging.update_logging(config) - WFLOGGER.debug( - '"inputs": %s\n\t "outputs": %s%s', - node_block_function.inputs, - list(self.outputs.keys()), - f'\n\t"options": {self.options}' - if self.options != ["base"] - else "", - ) - config.update_config({"logging": {"workflow_level": "INFO"}}) - logging.update_logging(config) - - def get_name(self): - return self.name - - def check_null(self, val): - if isinstance(val, str): - val = None if val.lower() == "none" else val - return val - - def check_output(self, outputs, label, name): - if label not in outputs: - msg = ( - f'\n[!] Output name "{label}" in the block ' - "function does not match the outputs list " - f'{outputs} in Node Block "{name}"\n' - ) - raise NameError(msg) - - def grab_tiered_dct(self, cfg, key_list): - cfg_dct = cfg.dict() - for key in key_list: - try: - cfg_dct = cfg_dct.get(key, {}) - except KeyError as ke: - msg = "[!] The config provided to the node block is not valid" - raise KeyError(msg) from ke - return cfg_dct - - def connect_block(self, wf, cfg, rpool): - debug = cfg.pipeline_setup["Debugging"]["verbose"] - all_opts = [] - for name, block_dct in self.node_blocks.items(): - opts = [] - config = self.check_null(block_dct["config"]) - option_key = self.check_null(block_dct["option_key"]) - option_val = self.check_null(block_dct["option_val"]) - if option_key and option_val: - if not isinstance(option_key, list): - option_key = [option_key] - if not isinstance(option_val, list): - option_val = [option_val] - if config: - key_list = config + option_key - else: - key_list = option_key - if "USER-DEFINED" in option_val: - # load custom config data into each 'opt' - opts = self.grab_tiered_dct(cfg, key_list) - else: - for option in option_val: - try: - if option in self.grab_tiered_dct(cfg, key_list): - # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list - opts.append(option) - except AttributeError as err: - msg = f"{err}\nNode Block: {name}" - raise Exception(msg) - - if opts is None: - opts = [opts] - - elif option_key and not option_val: - # enables multiple config forking entries - if not isinstance(option_key[0], list): - msg = ( - f"[!] The option_key field ({option_key}) " - f"for {name} exists but there is no " - "option_val.\n\nIf you are trying to " - "populate multiple option keys, the " - "option_val field must contain a list of " - "a list.\n" - ) - raise ValueError(msg) - for option_config in option_key: - # option_config is a list of pipe config levels down to the option - if config: - key_list = config + option_config - else: - key_list = option_config - option_val = option_config[-1] - if option_val in self.grab_tiered_dct(cfg, key_list[:-1]): - opts.append(option_val) - else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! - opts = [None] - all_opts += opts - - sidecar_additions = { - "CpacConfigHash": hashlib.sha1( - json.dumps(cfg.dict(), sort_keys=True).encode("utf-8") - ).hexdigest(), - "CpacConfig": cfg.dict(), - } - - if cfg["pipeline_setup"]["output_directory"].get("user_defined"): - sidecar_additions["UserDefined"] = cfg["pipeline_setup"][ - "output_directory" - ]["user_defined"] - - for name, block_dct in self.node_blocks.items(): - # iterates over either the single node block in the sequence, or a list of node blocks within the list of node blocks, i.e. for option forking. - switch = self.check_null(block_dct["switch"]) - config = self.check_null(block_dct["config"]) - option_key = self.check_null(block_dct["option_key"]) - option_val = self.check_null(block_dct["option_val"]) - inputs = self.check_null(block_dct["inputs"]) - outputs = self.check_null(block_dct["outputs"]) - - block_function = block_dct["block_function"] - - opts = [] - if option_key and option_val: - if not isinstance(option_key, list): - option_key = [option_key] - if not isinstance(option_val, list): - option_val = [option_val] - if config: - key_list = config + option_key - else: - key_list = option_key - if "USER-DEFINED" in option_val: - # load custom config data into each 'opt' - opts = self.grab_tiered_dct(cfg, key_list) - else: - for option in option_val: - if option in self.grab_tiered_dct(cfg, key_list): - # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list - opts.append(option) - else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! - opts = [None] - # THIS ALSO MEANS the multiple option-val's in docstring node blocks can be entered once in the entire node-block sequence, not in a list of multiples - if not opts: - # for node blocks where the options are split into different - # block functions - opts will be empty for non-selected - # options, and would waste the get_strats effort below - continue - - if not switch: - switch = [True] - else: - if config: - try: - key_list = config + switch - except TypeError as te: - msg = ( - "\n\n[!] Developer info: Docstring error " - f"for {name}, make sure the 'config' or " - "'switch' fields are lists.\n\n" - ) - raise TypeError(msg) from te - switch = self.grab_tiered_dct(cfg, key_list) - elif isinstance(switch[0], list): - # we have multiple switches, which is designed to only work if - # config is set to "None" - switch_list = [] - for key_list in switch: - val = self.grab_tiered_dct(cfg, key_list) - if isinstance(val, list): - # fork switches - if True in val: - switch_list.append(True) - if False in val: - switch_list.append(False) - else: - switch_list.append(val) - if False in switch_list: - switch = [False] - else: - switch = [True] - else: - # if config is set to "None" - key_list = switch - switch = self.grab_tiered_dct(cfg, key_list) - if not isinstance(switch, list): - switch = [switch] - if True in switch: - for ( - pipe_idx, - strat_pool, # strat_pool is a ResourcePool like {'desc-preproc_T1w': { 'json': info, 'data': (node, out) }, 'desc-brain_mask': etc.} - ) in rpool.get_strats(inputs, debug).items(): - # keep in mind rpool.get_strats(inputs) = {pipe_idx1: {'desc-preproc_T1w': etc.}, pipe_idx2: {..} } - fork = False in switch - for opt in opts: # it's a dictionary of ResourcePools called strat_pools, except those sub-ResourcePools only have one level! no pipe_idx strat keys. - # remember, you can get 'data' or 'json' from strat_pool with member functions - # strat_pool has all of the JSON information of all the inputs! - # so when we set_data below for the TOP-LEVEL MAIN RPOOL (not the strat_pool), we can generate new merged JSON information for each output. - # particularly, our custom 'CpacProvenance' field. - node_name = name - pipe_x = rpool.get_pipe_number(pipe_idx) - - replaced_inputs = [] - for interface in self.input_interface: - if isinstance(interface[1], list): - for input_name in interface[1]: - if strat_pool.check_rpool(input_name): - break - else: - input_name = interface[1] - strat_pool.copy_resource(input_name, interface[0]) - replaced_inputs.append(interface[0]) - try: - wf, outs = block_function(wf, cfg, strat_pool, pipe_x, opt) - except IOError as e: # duplicate node - WFLOGGER.warning(e) - continue - - if not outs: - if block_function.__name__ == "freesurfer_postproc": - WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) - LOGTAIL["warnings"].append( - WARNING_FREESURFER_OFF_WITH_DATA - ) - continue - - if opt and len(option_val) > 1: - node_name = f"{node_name}_{opt}" - elif opt and "USER-DEFINED" in option_val: - node_name = f'{node_name}_{opt["Name"]}' - - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("\n=======================") - verbose_logger.debug("Node name: %s", node_name) - prov_dct = rpool.get_resource_strats_from_prov( - ast.literal_eval(pipe_idx) - ) - for key, val in prov_dct.items(): - verbose_logger.debug("-------------------") - verbose_logger.debug("Input - %s:", key) - sub_prov_dct = rpool.get_resource_strats_from_prov(val) - for sub_key, sub_val in sub_prov_dct.items(): - sub_sub_dct = rpool.get_resource_strats_from_prov( - sub_val - ) - verbose_logger.debug(" sub-input - %s:", sub_key) - verbose_logger.debug(" prov = %s", sub_val) - verbose_logger.debug( - " sub_sub_inputs = %s", sub_sub_dct.keys() - ) - - for label, connection in outs.items(): - self.check_output(outputs, label, name) - new_json_info = copy.deepcopy(strat_pool.get("json")) - - # transfer over data-specific json info - # for example, if the input data json is _bold and the output is also _bold - data_type = label.split("_")[-1] - if data_type in new_json_info["subjson"]: - if ( - "SkullStripped" - in new_json_info["subjson"][data_type] - ): - new_json_info["SkullStripped"] = new_json_info[ - "subjson" - ][data_type]["SkullStripped"] - - # determine sources for the outputs, i.e. all input data into the node block - new_json_info["Sources"] = [ - x - for x in strat_pool.get_entire_rpool() - if x != "json" and x not in replaced_inputs - ] - - if isinstance(outputs, dict): - new_json_info.update(outputs[label]) - if "Description" not in outputs[label]: - # don't propagate old Description - try: - del new_json_info["Description"] - except KeyError: - pass - if "Template" in outputs[label]: - template_key = outputs[label]["Template"] - if template_key in new_json_info["Sources"]: - # only if the pipeline config template key is entered as the 'Template' field - # otherwise, skip this and take in the literal 'Template' string - try: - new_json_info["Template"] = new_json_info[ - "subjson" - ][template_key]["Description"] - except KeyError: - pass - try: - new_json_info["Resolution"] = new_json_info[ - "subjson" - ][template_key]["Resolution"] - except KeyError: - pass - else: - # don't propagate old Description - try: - del new_json_info["Description"] - except KeyError: - pass - - if "Description" in new_json_info: - new_json_info["Description"] = " ".join( - new_json_info["Description"].split() - ) - - for sidecar_key, sidecar_value in sidecar_additions.items(): - if sidecar_key not in new_json_info: - new_json_info[sidecar_key] = sidecar_value - - try: - del new_json_info["subjson"] - except KeyError: - pass - - if fork or len(opts) > 1 or len(all_opts) > 1: - if "CpacVariant" not in new_json_info: - new_json_info["CpacVariant"] = {} - raw_label = rpool.get_raw_label(label) - if raw_label not in new_json_info["CpacVariant"]: - new_json_info["CpacVariant"][raw_label] = [] - new_json_info["CpacVariant"][raw_label].append( - node_name - ) - - rpool.set_data( - label, - connection[0], - connection[1], - new_json_info, - pipe_idx, - node_name, - fork, - ) - - wf, post_labels = rpool.post_process( - wf, - label, - connection, - new_json_info, - pipe_idx, - pipe_x, - outs, - ) - - if rpool.func_reg: - for postlabel in post_labels: - connection = (postlabel[1], postlabel[2]) - wf = rpool.derivative_xfm( - wf, - postlabel[0], - connection, - new_json_info, - pipe_idx, - pipe_x, - ) - return wf - - -def wrap_block(node_blocks, interface, wf, cfg, strat_pool, pipe_num, opt): - """Wrap a list of node block functions to use within other node blocks. - - Example usage: - - # This calls the 'bold_mask_afni' and 'bold_masking' node blocks to - # skull-strip an EPI field map, without having to invoke the NodeBlock - # connection system. - - # The interface dictionary tells wrap_block to set the EPI field map - # in the parent node block's throw-away strat_pool as 'bold', so that - # the 'bold_mask_afni' and 'bold_masking' node blocks will see that as - # the 'bold' input. - - # It also tells wrap_block to set the 'desc-brain_bold' output of - # the 'bold_masking' node block to 'opposite_pe_epi_brain' (what it - # actually is) in the parent node block's strat_pool, which gets - # returned. - - # Note 'bold' and 'desc-brain_bold' (all on the left side) are the - # labels that 'bold_mask_afni' and 'bold_masking' understand/expect - # through their interfaces and docstrings. - - # The right-hand side (the values of the 'interface' dictionary) are - # what 'make sense' within the current parent node block - in this - # case, the distortion correction node block dealing with field maps. - - interface = {'bold': (match_epi_fmaps_node, 'opposite_pe_epi'), - 'desc-brain_bold': 'opposite_pe_epi_brain'} - wf, strat_pool = wrap_block([bold_mask_afni, bold_masking], - interface, wf, cfg, strat_pool, - pipe_num, opt) - - ...further downstream in the parent node block: - - node, out = strat_pool.get_data('opposite_pe_epi_brain') - - # The above line will connect the output of the 'bold_masking' node - # block (which is the skull-stripped version of 'opposite_pe_epi') to - # the next node. - - """ - for block in node_blocks: - # new_pool = copy.deepcopy(strat_pool) - for in_resource, val in interface.items(): - if isinstance(val, tuple): - strat_pool.set_data( - in_resource, val[0], val[1], {}, "", "", fork=True - ) # - if "sub_num" not in strat_pool.get_pool_info(): - strat_pool.set_pool_info({"sub_num": 0}) - sub_num = strat_pool.get_pool_info()["sub_num"] - - wf, outputs = block(wf, cfg, strat_pool, f"{pipe_num}-{sub_num}", opt) # - for out, val in outputs.items(): - if out in interface and isinstance(interface[out], str): - strat_pool.set_data( - interface[out], outputs[out][0], outputs[out][1], {}, "", "" - ) - else: - strat_pool.set_data(out, outputs[out][0], outputs[out][1], {}, "", "") - sub_num += 1 - strat_pool.set_pool_info({"sub_num": sub_num}) - - return (wf, strat_pool) - - -def ingress_raw_anat_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): - if "anat" not in data_paths: - WFLOGGER.warning("No anatomical data present.") - return rpool - - if "creds_path" not in data_paths: - data_paths["creds_path"] = None - - anat_flow = create_anat_datasource(f"anat_T1w_gather_{part_id}_{ses_id}") - - anat = {} - if isinstance(data_paths["anat"], str): - anat["T1"] = data_paths["anat"] - elif "T1w" in data_paths["anat"]: - anat["T1"] = data_paths["anat"]["T1w"] - - if "T1" in anat: - anat_flow.inputs.inputnode.set( - subject=part_id, - anat=anat["T1"], - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - img_type="anat", - ) - rpool.set_data("T1w", anat_flow, "outputspec.anat", {}, "", "anat_ingress") - - if "T2w" in data_paths["anat"]: - anat_flow_T2 = create_anat_datasource(f"anat_T2w_gather_{part_id}_{ses_id}") - anat_flow_T2.inputs.inputnode.set( - subject=part_id, - anat=data_paths["anat"]["T2w"], - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - img_type="anat", - ) - rpool.set_data("T2w", anat_flow_T2, "outputspec.anat", {}, "", "anat_ingress") - - if cfg.surface_analysis["freesurfer"]["ingress_reconall"]: - rpool = ingress_freesurfer( - wf, rpool, cfg, data_paths, unique_id, part_id, ses_id - ) - - return rpool - - -def ingress_freesurfer(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): - try: - fs_path = os.path.join(cfg.pipeline_setup["freesurfer_dir"], part_id) - except KeyError: - WFLOGGER.warning("No FreeSurfer data present.") - return rpool - - # fs_path = os.path.join(cfg.pipeline_setup['freesurfer_dir'], part_id) - if not os.path.exists(fs_path): - if "sub" in part_id: - fs_path = os.path.join( - cfg.pipeline_setup["freesurfer_dir"], part_id.replace("sub-", "") - ) - else: - fs_path = os.path.join( - cfg.pipeline_setup["freesurfer_dir"], ("sub-" + part_id) - ) - - # patch for flo-specific data - if not os.path.exists(fs_path): - subj_ses = part_id + "-" + ses_id - fs_path = os.path.join(cfg.pipeline_setup["freesurfer_dir"], subj_ses) - if not os.path.exists(fs_path): - WFLOGGER.info("No FreeSurfer data found for subject %s", part_id) - return rpool - - # Check for double nested subj names - if os.path.exists(os.path.join(fs_path, os.path.basename(fs_path))): - fs_path = os.path.join(fs_path, part_id) - - fs_ingress = create_general_datasource("gather_freesurfer_dir") - fs_ingress.inputs.inputnode.set( - unique_id=unique_id, - data=fs_path, - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - "freesurfer-subject-dir", - fs_ingress, - "outputspec.data", - {}, - "", - "freesurfer_config_ingress", - ) - - recon_outs = { - "pipeline-fs_raw-average": "mri/rawavg.mgz", - "pipeline-fs_subcortical-seg": "mri/aseg.mgz", - "pipeline-fs_brainmask": "mri/brainmask.mgz", - "pipeline-fs_wmparc": "mri/wmparc.mgz", - "pipeline-fs_T1": "mri/T1.mgz", - "pipeline-fs_hemi-L_desc-surface_curv": "surf/lh.curv", - "pipeline-fs_hemi-R_desc-surface_curv": "surf/rh.curv", - "pipeline-fs_hemi-L_desc-surfaceMesh_pial": "surf/lh.pial", - "pipeline-fs_hemi-R_desc-surfaceMesh_pial": "surf/rh.pial", - "pipeline-fs_hemi-L_desc-surfaceMesh_smoothwm": "surf/lh.smoothwm", - "pipeline-fs_hemi-R_desc-surfaceMesh_smoothwm": "surf/rh.smoothwm", - "pipeline-fs_hemi-L_desc-surfaceMesh_sphere": "surf/lh.sphere", - "pipeline-fs_hemi-R_desc-surfaceMesh_sphere": "surf/rh.sphere", - "pipeline-fs_hemi-L_desc-surfaceMap_sulc": "surf/lh.sulc", - "pipeline-fs_hemi-R_desc-surfaceMap_sulc": "surf/rh.sulc", - "pipeline-fs_hemi-L_desc-surfaceMap_thickness": "surf/lh.thickness", - "pipeline-fs_hemi-R_desc-surfaceMap_thickness": "surf/rh.thickness", - "pipeline-fs_hemi-L_desc-surfaceMap_volume": "surf/lh.volume", - "pipeline-fs_hemi-R_desc-surfaceMap_volume": "surf/rh.volume", - "pipeline-fs_hemi-L_desc-surfaceMesh_white": "surf/lh.white", - "pipeline-fs_hemi-R_desc-surfaceMesh_white": "surf/rh.white", - "pipeline-fs_xfm": "mri/transforms/talairach.lta", - } - - for key, outfile in recon_outs.items(): - fullpath = os.path.join(fs_path, outfile) - if os.path.exists(fullpath): - fs_ingress = create_general_datasource(f"gather_fs_{key}_dir") - fs_ingress.inputs.inputnode.set( - unique_id=unique_id, - data=fullpath, - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - key, fs_ingress, "outputspec.data", {}, "", f"fs_{key}_ingress" - ) - else: - warnings.warn( - str(LookupError(f"\n[!] Path does not exist for {fullpath}.\n")) - ) - - return rpool - - -def ingress_raw_func_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): - func_paths_dct = data_paths["func"] - - func_wf = create_func_datasource( - func_paths_dct, rpool, f"func_ingress_{part_id}_{ses_id}" - ) - func_wf.inputs.inputnode.set( - subject=part_id, - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - func_wf.get_node("inputnode").iterables = ("scan", list(func_paths_dct.keys())) - - rpool.set_data("subject", func_wf, "outputspec.subject", {}, "", "func_ingress") - rpool.set_data("bold", func_wf, "outputspec.rest", {}, "", "func_ingress") - rpool.set_data("scan", func_wf, "outputspec.scan", {}, "", "func_ingress") - rpool.set_data( - "scan-params", func_wf, "outputspec.scan_params", {}, "", "scan_params_ingress" - ) - - # TODO: CHECK FOR PARAMETERS - - wf, rpool, diff, blip, fmap_rp_list = ingress_func_metadata( - wf, cfg, rpool, data_paths, part_id, data_paths["creds_path"], ses_id - ) - - # Memoize list of local functional scans - # TODO: handle S3 files - # Skip S3 files for now - - local_func_scans = [ - func_paths_dct[scan]["scan"] - for scan in func_paths_dct.keys() - if not func_paths_dct[scan]["scan"].startswith("s3://") - ] - if local_func_scans: - # pylint: disable=protected-access - wf._local_func_scans = local_func_scans - if cfg.pipeline_setup["Debugging"]["verbose"]: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("local_func_scans: %s", local_func_scans) - del local_func_scans - - return (wf, rpool, diff, blip, fmap_rp_list) - - -def ingress_output_dir( - wf, cfg, rpool, unique_id, data_paths, part_id, ses_id, creds_path=None -): - dir_path = data_paths["derivatives_dir"] - - WFLOGGER.info("\nPulling outputs from %s.\n", dir_path) - - anat = os.path.join(dir_path, "anat") - func = os.path.join(dir_path, "func") - - exts = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"] - - outdir_anat = [] - outdir_func = [] - func_paths = {} - func_dict = {} - - for subdir in [anat, func]: - if os.path.isdir(subdir): - for filename in os.listdir(subdir): - for ext in exts: - if ext in filename: - if subdir == anat: - outdir_anat.append(os.path.join(subdir, filename)) - else: - outdir_func.append(os.path.join(subdir, filename)) - - # Add derivatives directory to rpool - ingress = create_general_datasource("gather_derivatives_dir") - ingress.inputs.inputnode.set( - unique_id=unique_id, - data=dir_path, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - "derivatives-dir", ingress, "outputspec.data", {}, "", "outdir_config_ingress" - ) - - for subdir in [outdir_anat, outdir_func]: - for filepath in subdir: - filename = str(filepath) - for ext in exts: - filename = filename.split("/")[-1].replace(ext, "") - - data_label = filename.split(unique_id)[1].lstrip("_") - - if len(filename) == len(data_label): - msg = ( - "\n\n[!] Possibly wrong participant or " - "session in this directory?\n\n" - f"Filepath: {filepath}\n\n" - ) - raise Exception(msg) - - bidstag = "" - for tag in data_label.split("_"): - for prefix in ["task-", "run-", "acq-", "rec"]: - if tag.startswith(prefix): - bidstag += f"{tag}_" - data_label = data_label.replace(f"{tag}_", "") - data_label, json = strip_template(data_label, dir_path, filename) - - rpool, json_info, pipe_idx, node_name, data_label = json_outdir_ingress( - rpool, filepath, exts, data_label, json - ) - - if ( - "template" in data_label - and not json_info["Template"] - == cfg.pipeline_setup["outdir_ingress"]["Template"] - ): - continue - # Rename confounds to avoid confusion in nuisance regression - if data_label.endswith("desc-confounds_timeseries"): - data_label = "pipeline-ingress_desc-confounds_timeseries" - - if len(bidstag) > 1: - # Remove tail symbol - bidstag = bidstag[:-1] - if bidstag.startswith("task-"): - bidstag = bidstag.replace("task-", "") - - # Rename bold mask for CPAC naming convention - # and to avoid collision with anat brain mask - if data_label.endswith("desc-brain_mask") and filepath in outdir_func: - data_label = data_label.replace("brain_mask", "bold_mask") - - try: - pipe_x = rpool.get_pipe_number(pipe_idx) - except ValueError: - pipe_x = len(rpool.pipe_list) - if filepath in outdir_anat: - ingress = create_general_datasource( - f"gather_anat_outdir_{data_label!s}_{pipe_x}" - ) - ingress.inputs.inputnode.set( - unique_id=unique_id, - data=filepath, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - data_label, - ingress, - "outputspec.data", - json_info, - pipe_idx, - node_name, - f"outdir_{data_label}_ingress", - inject=True, - ) - else: - if data_label.endswith("desc-preproc_bold"): - func_key = data_label - func_dict[bidstag] = {} - func_dict[bidstag]["scan"] = str(filepath) - func_dict[bidstag]["scan_parameters"] = json_info - func_dict[bidstag]["pipe_idx"] = pipe_idx - if data_label.endswith("desc-brain_mask"): - data_label = data_label.replace("brain_mask", "bold_mask") - try: - func_paths[data_label].append(filepath) - except: - func_paths[data_label] = [] - func_paths[data_label].append(filepath) - - if func_dict: - wf, rpool = func_outdir_ingress( - wf, - cfg, - func_dict, - rpool, - unique_id, - creds_path, - part_id, - func_key, - func_paths, - ) - - if cfg.surface_analysis["freesurfer"]["ingress_reconall"]: - rpool = ingress_freesurfer( - wf, rpool, cfg, data_paths, unique_id, part_id, ses_id - ) - return wf, rpool - - -def json_outdir_ingress(rpool, filepath, exts, data_label, json): - desc_val = None - for tag in data_label.split("_"): - if "desc-" in tag: - desc_val = tag - break - jsonpath = str(filepath) - for ext in exts: - jsonpath = jsonpath.replace(ext, "") - jsonpath = f"{jsonpath}.json" - - if not os.path.exists(jsonpath): - WFLOGGER.info( - "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n", filepath, jsonpath - ) - json_info = { - "Description": "This data was generated elsewhere and " - "supplied by the user into this C-PAC run's " - "output directory. This JSON file was " - "automatically generated by C-PAC because a " - "JSON file was not supplied with the data." - } - json_info = {**json_info, **json} - write_output_json(json_info, jsonpath) - else: - json_info = read_json(jsonpath) - json_info = {**json_info, **json} - if "CpacProvenance" in json_info: - if desc_val: - # it's a C-PAC output, let's check for pipe_idx/strat integer - # suffixes in the desc- entries. - only_desc = str(desc_val) - - if only_desc[-1].isdigit(): - for idx in range(0, 3): - # let's stop at 3, please don't run >999 strategies okay? - if only_desc[-1].isdigit(): - only_desc = only_desc[:-1] - - if only_desc[-1] == "-": - only_desc = only_desc.rstrip("-") - else: - msg = ( - "\n[!] Something went wrong with either " - "reading in the output directory or when " - "it was written out previously.\n\nGive " - "this to your friendly local C-PAC " - f"developer:\n\n{data_label!s}\n" - ) - raise IOError(msg) - - # remove the integer at the end of the desc-* variant, we will - # get the unique pipe_idx from the CpacProvenance below - data_label = data_label.replace(desc_val, only_desc) - - # preserve cpac provenance/pipe_idx - pipe_idx = rpool.generate_prov_string(json_info["CpacProvenance"]) - node_name = "" - - else: - json_info["CpacProvenance"] = [f"{data_label}:Non-C-PAC Origin: {filepath}"] - if "Description" not in json_info: - json_info["Description"] = ( - "This data was generated elsewhere and " - "supplied by the user into this C-PAC run's " - "output directory. This JSON file was " - "automatically generated by C-PAC because a " - "JSON file was not supplied with the data." - ) - pipe_idx = rpool.generate_prov_string(json_info["CpacProvenance"]) - node_name = f"{data_label}_ingress" - - return rpool, json_info, pipe_idx, node_name, data_label - - -def func_outdir_ingress( - wf, cfg, func_dict, rpool, unique_id, creds_path, part_id, key, func_paths -): - pipe_x = len(rpool.pipe_list) - ingress = create_func_datasource( - func_dict, rpool, f"gather_func_outdir_{key}_{pipe_x}" - ) - ingress.inputs.inputnode.set( - subject=unique_id, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data("subject", ingress, "outputspec.subject", {}, "", "func_ingress") - ingress.get_node("inputnode").iterables = ("scan", list(func_dict.keys())) - rpool.set_data(key, ingress, "outputspec.rest", {}, "", "func_ingress") - - rpool.set_data("scan", ingress, "outputspec.scan", {}, "", "func_ingress") - rpool.set_data( - "scan-params", ingress, "outputspec.scan_params", {}, "", "scan_params_ingress" - ) - wf, rpool, diff, blip, fmap_rp_list = ingress_func_metadata( - wf, cfg, rpool, func_dict, part_id, creds_path, key - ) - - # Have to do it this weird way to save the parsed BIDS tag & filepath - mask_paths_key = ( - "desc-bold_mask" - if "desc-bold_mask" in func_paths - else "space-template_desc-bold_mask" - ) - ts_paths_key = "pipeline-ingress_desc-confounds_timeseries" - - # Connect func data with approproate scan name - iterables = pe.Node( - Function( - input_names=["scan", "mask_paths", "ts_paths"], - output_names=["out_scan", "mask", "confounds"], - function=set_iterables, - ), - name=f"set_iterables_{pipe_x}", - ) - iterables.inputs.mask_paths = func_paths[mask_paths_key] - iterables.inputs.ts_paths = func_paths[ts_paths_key] - wf.connect(ingress, "outputspec.scan", iterables, "scan") - - for key in func_paths: - if key in (mask_paths_key, ts_paths_key): - ingress_func = create_general_datasource(f"ingress_func_data_{key}") - ingress_func.inputs.inputnode.set( - unique_id=unique_id, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - wf.connect(iterables, "out_scan", ingress_func, "inputnode.scan") - if key == mask_paths_key: - wf.connect(iterables, "mask", ingress_func, "inputnode.data") - rpool.set_data( - key, ingress_func, "inputnode.data", {}, "", f"outdir_{key}_ingress" - ) - elif key == ts_paths_key: - wf.connect(iterables, "confounds", ingress_func, "inputnode.data") - rpool.set_data( - key, ingress_func, "inputnode.data", {}, "", f"outdir_{key}_ingress" - ) - - return wf, rpool - - -def set_iterables(scan, mask_paths=None, ts_paths=None): - # match scan with filepath to get filepath - mask_path = [path for path in mask_paths if scan in path] - ts_path = [path for path in ts_paths if scan in path] - - return (scan, mask_path[0], ts_path[0]) - - -def strip_template(data_label, dir_path, filename): - json = {} - # rename to template - for prefix in ["space-", "from-", "to-"]: - for bidstag in data_label.split("_"): - if bidstag.startswith(prefix): - template_key, template_val = bidstag.split("-") - template_name, _template_desc = lookup_identifier(template_val) - if template_name: - json["Template"] = template_val - data_label = data_label.replace(template_val, "template") - elif bidstag.startswith("res-"): - res_key, res_val = bidstag.split("-") - json["Resolution"] = res_val - data_label = data_label.replace(bidstag, "") - if data_label.find("__"): - data_label = data_label.replace("__", "_") - return data_label, json - - -def ingress_pipeconfig_paths(cfg, rpool, unique_id, creds_path=None): - # ingress config file paths - # TODO: may want to change the resource keys for each to include one level up in the YAML as well - - import pandas as pd - import pkg_resources as p - - template_csv = p.resource_filename("CPAC", "resources/cpac_templates.csv") - template_df = pd.read_csv(template_csv, keep_default_na=False) - - for row in template_df.itertuples(): - key = row.Key - val = row.Pipeline_Config_Entry - val = cfg.get_nested(cfg, [x.lstrip() for x in val.split(",")]) - resolution = row.Intended_Resolution_Config_Entry - desc = row.Description - - if not val: - continue - - if resolution: - res_keys = [x.lstrip() for x in resolution.split(",")] - tag = res_keys[-1] - json_info = {} - - if "$FSLDIR" in val: - val = val.replace("$FSLDIR", cfg.pipeline_setup["system_config"]["FSLDIR"]) - if "$priors_path" in val: - priors_path = ( - cfg.segmentation["tissue_segmentation"]["FSL-FAST"]["use_priors"][ - "priors_path" - ] - or "" - ) - if "$FSLDIR" in priors_path: - priors_path = priors_path.replace( - "$FSLDIR", cfg.pipeline_setup["system_config"]["FSLDIR"] - ) - val = val.replace("$priors_path", priors_path) - if "${resolution_for_anat}" in val: - val = val.replace( - "${resolution_for_anat}", - cfg.registration_workflows["anatomical_registration"][ - "resolution_for_anat" - ], - ) - if "${func_resolution}" in val: - val = val.replace( - "${func_resolution}", - cfg.registration_workflows["functional_registration"][ - "func_registration_to_template" - ]["output_resolution"][tag], - ) - - if desc: - template_name, _template_desc = lookup_identifier(val) - if template_name: - desc = f"{template_name} - {desc}" - json_info["Description"] = f"{desc} - {val}" - if resolution: - resolution = cfg.get_nested(cfg, res_keys) - json_info["Resolution"] = resolution - - resampled_template = pe.Node( - Function( - input_names=["resolution", "template", "template_name", "tag"], - output_names=["resampled_template"], - function=resolve_resolution, - as_module=True, - ), - name="resampled_" + key, - ) - - resampled_template.inputs.resolution = resolution - resampled_template.inputs.template = val - resampled_template.inputs.template_name = key - resampled_template.inputs.tag = tag - - # the set_data below is set up a little differently, because we are - # injecting and also over-writing already-existing entries - # other alternative would have been to ingress into the - # resampled_template node from the already existing entries, but we - # didn't do that here - rpool.set_data( - key, - resampled_template, - "resampled_template", - json_info, - "", - "template_resample", - ) # pipe_idx (after the blank json {}) should be the previous strat that you want deleted! because you're not connecting this the regular way, you have to do it manually - - elif val: - config_ingress = create_general_datasource(f"gather_{key}") - config_ingress.inputs.inputnode.set( - unique_id=unique_id, - data=val, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - key, - config_ingress, - "outputspec.data", - json_info, - "", - f"{key}_config_ingress", - ) - # templates, resampling from config - """ - template_keys = [ - ("anat", ["network_centrality", "template_specification_file"]), - ("anat", ["nuisance_corrections", "2-nuisance_regression", - "lateral_ventricles_mask"]), - ("anat", - ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", - "CSF_path"]), - ("anat", - ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", - "GM_path"]), - ("anat", - ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", - "WM_path"]), - ("anat", - ["segmentation", "tissue_segmentation", "Template_Based", "CSF"]), - ("anat", - ["segmentation", "tissue_segmentation", "Template_Based", "GRAY"]), - ("anat", - ["segmentation", "tissue_segmentation", "Template_Based", "WHITE"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T1w_ACPC_template"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T1w_brain_ACPC_template"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T2w_ACPC_template"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T2w_brain_ACPC_template"])] - - def get_nested_attr(c, template_key): - attr = getattr(c, template_key[0]) - keys = template_key[1:] - - def _get_nested(attr, keys): - if len(keys) > 1: - return (_get_nested(attr[keys[0]], keys[1:])) - elif len(keys): - return (attr[keys[0]]) - else: - return (attr) - - return (_get_nested(attr, keys)) - - def set_nested_attr(c, template_key, value): - attr = getattr(c, template_key[0]) - keys = template_key[1:] - - def _set_nested(attr, keys): - if len(keys) > 1: - return (_set_nested(attr[keys[0]], keys[1:])) - elif len(keys): - attr[keys[0]] = value - else: - return (attr) - - return (_set_nested(attr, keys)) - - for key_type, key in template_keys: - attr = cfg.get_nested(cfg, key) - if isinstance(attr, str) or attr == None: - node = create_check_for_s3_node( - key[-1], - attr, key_type, - data_paths['creds_path'], - cfg.pipeline_setup['working_directory']['path'], - map_node=False - ) - cfg.set_nested(cfg, key, node) - - template_keys_in_list = [ - ("anat", - ["segmentation", "tissue_segmentation", "ANTs_Prior_Based", - "template_brain_list"]), - ("anat", - ["segmentation", "tissue_segmentation", "ANTs_Prior_Based", - "template_segmentation_list"]), - ] - - for key_type, key in template_keys_in_list: - node = create_check_for_s3_node( - key[-1], - cfg.get_nested(cfg, key), key_type, - data_paths['creds_path'], - cfg.pipeline_setup['working_directory']['path'], - map_node=True - ) - cfg.set_nested(cfg, key, node) - """ - - return rpool - - def initiate_rpool(wf, cfg, data_paths=None, part_id=None): """ Initialize a new ResourcePool. @@ -2674,89 +1521,3 @@ def initiate_rpool(wf, cfg, data_paths=None, part_id=None): # output files with 4 different scans return (wf, rpool) - - -def run_node_blocks(blocks, data_paths, cfg=None): - import os - - from CPAC.pipeline import nipype_pipeline_engine as pe - from CPAC.pipeline.engine import NodeBlock - - if not cfg: - cfg = { - "pipeline_setup": { - "working_directory": {"path": os.getcwd()}, - "log_directory": {"path": os.getcwd()}, - } - } - - # TODO: WE HAVE TO PARSE OVER UNIQUE ID'S!!! - _, rpool = initiate_rpool(cfg, data_paths) - - wf = pe.Workflow(name="node_blocks") - wf.base_dir = cfg.pipeline_setup["working_directory"]["path"] - wf.config["execution"] = { - "hash_method": "timestamp", - "crashdump_dir": cfg.pipeline_setup["log_directory"]["path"], - } - - run_blocks = [] - if rpool.check_rpool("desc-preproc_T1w"): - WFLOGGER.info("Preprocessed T1w found, skipping anatomical preprocessing.") - else: - run_blocks += blocks[0] - if rpool.check_rpool("desc-preproc_bold"): - WFLOGGER.info("Preprocessed BOLD found, skipping functional preprocessing.") - else: - run_blocks += blocks[1] - - for block in run_blocks: - wf = NodeBlock( - block, debug=cfg["pipeline_setup", "Debugging", "verbose"] - ).connect_block(wf, cfg, rpool) - rpool.gather_pipes(wf, cfg) - - wf.run() - - -class NodeData: - r"""Attribute access for ResourcePool.get_data outputs. - - Class to hold outputs of CPAC.pipeline.engine.ResourcePool().get_data(), so one can - do ``node_data = strat_pool.node_data(resource)`` and have ``node_data.node`` and - ``node_data.out`` instead of doing ``node, out = strat_pool.get_data(resource)`` - and needing two variables (``node`` and ``out``) to store that information. - - Also includes ``variant`` attribute providing the resource's self-keyed value - within its ``CpacVariant`` dictionary. - - Examples - -------- - >>> rp = ResourcePool() - >>> rp.node_data(None) - NotImplemented (NotImplemented) - - >>> rp.set_data('test', - ... pe.Node(Function(input_names=[]), 'test'), - ... 'b', [], 0, 'test') - >>> rp.node_data('test') - test (b) - >>> rp.node_data('test').out - 'b' - - >>> try: - ... rp.node_data('b') - ... except LookupError as lookup_error: - ... print(str(lookup_error).strip().split('\n')[0].strip()) - [!] C-PAC says: None of the listed resources are in the resource pool: - """ - - # pylint: disable=too-few-public-methods - def __init__(self, strat_pool=None, resource=None, **kwargs): - self.node = NotImplemented - self.out = NotImplemented - if strat_pool is not None and resource is not None: - self.node, self.out = strat_pool.get_data(resource, **kwargs) - - def __repr__(self): # noqa: D105 - return f'{getattr(self.node, "name", str(self.node))} ({self.out})' From 821bcaa0987a3f91cf5f7a5a98ff0419103d953c Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 8 Jul 2024 17:20:09 -0400 Subject: [PATCH 16/93] :construction: Split ResourcePool into three classes with docstrings --- CPAC/pipeline/engine/resource.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 16d9761f91..af69d3cd7e 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -101,7 +101,13 @@ def __repr__(self): # noqa: D105 return f'{getattr(self.node, "name", str(self.node))} ({self.out})' +class Resource: + """A single Resource and its methods.""" + + class ResourcePool: + """All Resources.""" + def __init__(self, rpool=None, name=None, cfg=None, pipe_list=None): if not rpool: self.rpool = {} @@ -1521,3 +1527,7 @@ def initiate_rpool(wf, cfg, data_paths=None, part_id=None): # output files with 4 different scans return (wf, rpool) + + +class StratPool(ResourcePool): + """All resources for a strategy.""" From 979b0a9c715c8398b55134ca6d46db707800084c Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Tue, 9 Jul 2024 09:39:03 -0400 Subject: [PATCH 17/93] :pencil2: Fix f-string missing `f` --- CPAC/distortion_correction/distortion_correction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAC/distortion_correction/distortion_correction.py b/CPAC/distortion_correction/distortion_correction.py index 91b379b0a7..a7f0eaefcc 100644 --- a/CPAC/distortion_correction/distortion_correction.py +++ b/CPAC/distortion_correction/distortion_correction.py @@ -165,7 +165,7 @@ def distcor_phasediff_fsl_fugue(wf, cfg, strat_pool, pipe_num, opt=None): == "BET" ): bet = pe.Node( - interface=fsl.BET(), name="distcor_phasediff_bet_skullstrip_{pipe_num}" + interface=fsl.BET(), name=f"distcor_phasediff_bet_skullstrip_{pipe_num}" ) bet.inputs.output_type = "NIFTI_GZ" bet.inputs.frac = cfg.functional_preproc["distortion_correction"]["PhaseDiff"][ From 06612ffa1a465cfcb9c50bf3eee4797188274f95 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Tue, 9 Jul 2024 16:38:34 -0400 Subject: [PATCH 18/93] :white_check_mark: Move BIDS examples to a fixture --- CPAC/conftest.py | 33 +++++++++++++++ dev/circleci_data/conftest.py | 17 ++++++++ dev/circleci_data/test_external_utils.py | 52 +++++++++++------------- 3 files changed, 74 insertions(+), 28 deletions(-) create mode 100644 CPAC/conftest.py create mode 100644 dev/circleci_data/conftest.py diff --git a/CPAC/conftest.py b/CPAC/conftest.py new file mode 100644 index 0000000000..e27b86a85a --- /dev/null +++ b/CPAC/conftest.py @@ -0,0 +1,33 @@ +# Copyright (C) 2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Global pytest configuration.""" +from pathlib import Path +import pytest + + +@pytest.fixture +def bids_examples(cache) -> Path: + """Get cached example BIDS directories.""" + example_dir = cache.makedir("bids-examples") + bids_dirs = Path(example_dir / "bids-examples") + if not bids_dirs.exists(): + from git import Repo + + Repo.clone_from( + "https://github.com/bids-standard/bids-examples.git", bids_dirs + ) + return bids_dirs diff --git a/dev/circleci_data/conftest.py b/dev/circleci_data/conftest.py new file mode 100644 index 0000000000..0b39d51b82 --- /dev/null +++ b/dev/circleci_data/conftest.py @@ -0,0 +1,17 @@ +# Copyright (C) 2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Pytest configuration for CircleCI-specific tests.""" diff --git a/dev/circleci_data/test_external_utils.py b/dev/circleci_data/test_external_utils.py index f516b0c903..e747767032 100644 --- a/dev/circleci_data/test_external_utils.py +++ b/dev/circleci_data/test_external_utils.py @@ -25,9 +25,9 @@ import pytest import semver -CPAC_DIR = str(Path(__file__).parent.parent.parent) -sys.path.append(CPAC_DIR) -DATA_DIR = os.path.join(CPAC_DIR, "dev", "circleci_data") +CPAC_DIR = Path(__file__).parent.parent.parent +sys.path.append(str(CPAC_DIR)) +DATA_DIR = CPAC_DIR / "dev/circleci_data" from CPAC.__main__ import utils as CPAC_main_utils # noqa: E402 @@ -70,9 +70,8 @@ def test_build_data_config(caplog, cli_runner, multiword_connector): caplog.set_level(INFO) if multiword_connector == "-" and _BACKPORT_CLICK: return - os.chdir(DATA_DIR) - test_yaml = os.path.join(DATA_DIR, "data_settings.yml") - _delete_test_yaml(test_yaml) + os.chdir(str(DATA_DIR)) + test_yaml = DATA_DIR / "data_settings.yml" if multiword_connector == "_": data_config = CPAC_main_utils.commands[ _click_backport(CPAC_main_utils, "data-config") @@ -89,49 +88,45 @@ def test_build_data_config(caplog, cli_runner, multiword_connector): assert "\n".join(caplog.messages).startswith( "\nGenerated a default data_settings YAML file for editing" ) - assert os.path.exists(test_yaml) + assert test_yaml.exists() _delete_test_yaml(test_yaml) -def test_new_settings_template(caplog, cli_runner): +def test_new_settings_template(bids_examples, caplog, cli_runner): """Test CLI ``utils new-settings-template``.""" caplog.set_level(INFO) - os.chdir(CPAC_DIR) - - example_dir = os.path.join(CPAC_DIR, "bids-examples") - if not os.path.exists(example_dir): - from git import Repo - - Repo.clone_from( - "https://github.com/bids-standard/bids-examples.git", example_dir - ) + example_dir = Path(CPAC_DIR / "bids-examples") + if not example_dir.exists(): + example_dir.symlink_to(bids_examples) + os.chdir(str(CPAC_DIR)) result = cli_runner.invoke( CPAC_main_utils.commands[ _click_backport(CPAC_main_utils, "data-config") ].commands["build"], - [os.path.join(DATA_DIR, "data_settings_bids_examples_ds051_default_BIDS.yml")], + [str(DATA_DIR / "data_settings_bids_examples_ds051_default_BIDS.yml")], ) - participant_yaml = os.path.join(DATA_DIR, "data_config_ds051.yml") - group_yaml = os.path.join(DATA_DIR, "group_analysis_participants_ds051.txt") + participant_yaml = DATA_DIR / "data_config_ds051.yml" + group_yaml = DATA_DIR / "group_analysis_participants_ds051.txt" + os.remove(str(example_dir)) assert result.exit_code == 0 assert "\n".join(caplog.messages).startswith( "\nGenerating data configuration file.." ) - assert os.path.exists(participant_yaml) - assert os.path.exists(group_yaml) + assert participant_yaml.exists() + assert group_yaml.exists() _delete_test_yaml(participant_yaml) _delete_test_yaml(group_yaml) def test_repickle(cli_runner): # noqa fn = "python_2_pickle.pkl" - pickle_path = os.path.join(DATA_DIR, fn) + pickle_path = str(DATA_DIR / fn) backups = [_Backup(pickle_path), _Backup(f"{pickle_path}z")] - result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [DATA_DIR]) + result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [str(DATA_DIR)]) assert result.exit_code == 0 assert ( @@ -139,7 +134,7 @@ def test_repickle(cli_runner): # noqa "pickle." in result.output ) - result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [DATA_DIR]) + result = cli_runner.invoke(CPAC_main_utils.commands["repickle"], [str(DATA_DIR)]) assert result.exit_code == 0 assert f"Pickle {fn} is a Python 3 pickle." in result.output @@ -157,9 +152,10 @@ def restore(self): w.write(self.data) -def _delete_test_yaml(test_yaml): - if os.path.exists(test_yaml): - os.remove(test_yaml) +def _delete_test_yaml(test_yaml: Path) -> None: + """Delete test YAML file.""" + if test_yaml.exists(): + os.remove(str(test_yaml)) def _test_repickle(pickle_path, gzipped=False): From 7a766032e321be63575afb066e988ed56d357f26 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 10 Jul 2024 12:51:42 -0400 Subject: [PATCH 19/93] :construction: WIP :recycle: Move `ResourcePool` init from functions to methods --- CPAC/conftest.py | 12 +- CPAC/func_preproc/func_ingress.py | 18 +- .../longitudinal_workflow.py | 33 +- CPAC/pipeline/cpac_pipeline.py | 37 +- CPAC/pipeline/engine/__init__.py | 17 - CPAC/pipeline/engine/engine.py | 719 +--------- CPAC/pipeline/engine/resource.py | 1240 ++++++++++++++++- CPAC/utils/bids_utils.py | 72 +- CPAC/utils/datasource.py | 426 +----- CPAC/utils/utils.py | 2 +- 10 files changed, 1275 insertions(+), 1301 deletions(-) diff --git a/CPAC/conftest.py b/CPAC/conftest.py index e27b86a85a..c252a8b74f 100644 --- a/CPAC/conftest.py +++ b/CPAC/conftest.py @@ -15,7 +15,9 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . """Global pytest configuration.""" + from pathlib import Path + import pytest @@ -23,11 +25,9 @@ def bids_examples(cache) -> Path: """Get cached example BIDS directories.""" example_dir = cache.makedir("bids-examples") - bids_dirs = Path(example_dir / "bids-examples") - if not bids_dirs.exists(): + bids_dir = Path(example_dir / "bids-examples") + if not bids_dir.exists(): from git import Repo - Repo.clone_from( - "https://github.com/bids-standard/bids-examples.git", bids_dirs - ) - return bids_dirs + Repo.clone_from("https://github.com/bids-standard/bids-examples.git", bids_dir) + return bids_dir diff --git a/CPAC/func_preproc/func_ingress.py b/CPAC/func_preproc/func_ingress.py index 60c8ccf5c9..4b995a18ba 100644 --- a/CPAC/func_preproc/func_ingress.py +++ b/CPAC/func_preproc/func_ingress.py @@ -14,12 +14,22 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -from CPAC.utils.datasource import create_func_datasource, ingress_func_metadata +"""Ingress functional data for preprocessing.""" + +from CPAC.utils.datasource import create_func_datasource +from CPAC.utils.strategy import Strategy def connect_func_ingress( - workflow, strat_list, c, sub_dict, subject_id, input_creds_path, unique_id=None + workflow, + strat_list: list[Strategy], + c, + sub_dict, + subject_id, + input_creds_path, + unique_id=None, ): + """Connect functional ingress workflow.""" for num_strat, strat in enumerate(strat_list): if "func" in sub_dict: func_paths_dict = sub_dict["func"] @@ -47,8 +57,8 @@ def connect_func_ingress( } ) - (workflow, strat.rpool, diff, blip, fmap_rp_list) = ingress_func_metadata( - workflow, c, strat.rpool, sub_dict, subject_id, input_creds_path, unique_id + (workflow, diff, blip, fmap_rp_list) = strat.rpool.ingress_func_metadata( + workflow, sub_dict ) return (workflow, diff, blip, fmap_rp_list) diff --git a/CPAC/longitudinal_pipeline/longitudinal_workflow.py b/CPAC/longitudinal_pipeline/longitudinal_workflow.py index 4229fc30c6..829e123de4 100644 --- a/CPAC/longitudinal_pipeline/longitudinal_workflow.py +++ b/CPAC/longitudinal_pipeline/longitudinal_workflow.py @@ -21,6 +21,7 @@ import nipype.interfaces.io as nio from indi_aws import aws_utils +from CPAC.func_preproc.func_ingress import connect_func_ingress from CPAC.longitudinal_pipeline.longitudinal_preproc import subject_specific_template from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.cpac_pipeline import ( @@ -30,7 +31,7 @@ connect_pipeline, initialize_nipype_wf, ) -from CPAC.pipeline.engine import ingress_output_dir, initiate_rpool +from CPAC.pipeline.engine import initiate_rpool from CPAC.pipeline.nodeblock import nodeblock from CPAC.registration import ( create_fsl_flirt_linear_reg, @@ -430,12 +431,12 @@ def anat_longitudinal_wf(subject_id, sub_list, config): workflow = initialize_nipype_wf( config, - sub_list[0], + session, # just grab the first one for the name name="anat_longitudinal_pre-preproc", ) - workflow, rpool = initiate_rpool(workflow, config, session) + rpool = initiate_rpool(workflow, config, session) pipeline_blocks = build_anat_preproc_stack(rpool, config) workflow = connect_pipeline(workflow, config, rpool, pipeline_blocks) @@ -507,9 +508,7 @@ def anat_longitudinal_wf(subject_id, sub_list, config): template_node.inputs.input_skull_list = strats_head_dct[strat] long_id = f"longitudinal_{subject_id}_strat-{strat}" - - wf, rpool = initiate_rpool(wf, config, part_id=long_id) - + rpool = initiate_rpool(wf, config, part_id=long_id) rpool.set_data( "space-longitudinal_desc-brain_T1w", template_node, @@ -574,7 +573,7 @@ def anat_longitudinal_wf(subject_id, sub_list, config): creds_path = session["creds_path"] if creds_path and "none" not in creds_path.lower(): if os.path.exists(creds_path): - input_creds_path = os.path.abspath(creds_path) + session["creds_path"] = os.path.abspath(creds_path) else: err_msg = ( 'Credentials path: "%s" for subject "%s" ' @@ -583,18 +582,15 @@ def anat_longitudinal_wf(subject_id, sub_list, config): ) raise Exception(err_msg) else: - input_creds_path = None + session["creds_path"] = None except KeyError: - input_creds_path = None - - wf = initialize_nipype_wf(config, sub_list[0]) + session["creds_path"] = None - wf, rpool = initiate_rpool(wf, config, session) + wf = initialize_nipype_wf(config, session) + rpool = initiate_rpool(wf, config, session) config.pipeline_setup["pipeline_name"] = f"longitudinal_{orig_pipe_name}" - rpool = ingress_output_dir( - config, rpool, long_id, creds_path=input_creds_path - ) + rpool.ingress_output_dir() select_node_name = f"select_{unique_id}" select_sess = pe.Node( @@ -654,10 +650,9 @@ def anat_longitudinal_wf(subject_id, sub_list, config): input_creds_path = None except KeyError: input_creds_path = None - - wf = initialize_nipype_wf(config, sub_list[0]) - - wf, rpool = initiate_rpool(wf, config, session) + session["creds_path"] = input_creds_path + wf = initialize_nipype_wf(config, session) + rpool = initiate_rpool(wf, config, session) pipeline_blocks = [ warp_longitudinal_T1w_to_template, diff --git a/CPAC/pipeline/cpac_pipeline.py b/CPAC/pipeline/cpac_pipeline.py index 40811b9e77..f0baaa323c 100644 --- a/CPAC/pipeline/cpac_pipeline.py +++ b/CPAC/pipeline/cpac_pipeline.py @@ -710,21 +710,24 @@ def run_workflow( ] timeHeader = dict(zip(gpaTimeFields, gpaTimeFields)) - with open( - os.path.join( - c.pipeline_setup["log_directory"]["path"], - "cpac_individual_timing" - f"_{c.pipeline_setup['pipeline_name']}.csv", - ), - "a", - ) as timeCSV, open( - os.path.join( - c.pipeline_setup["log_directory"]["path"], - "cpac_individual_timing_%s.csv" - % c.pipeline_setup["pipeline_name"], - ), - "r", - ) as readTimeCSV: + with ( + open( + os.path.join( + c.pipeline_setup["log_directory"]["path"], + "cpac_individual_timing" + f"_{c.pipeline_setup['pipeline_name']}.csv", + ), + "a", + ) as timeCSV, + open( + os.path.join( + c.pipeline_setup["log_directory"]["path"], + "cpac_individual_timing_%s.csv" + % c.pipeline_setup["pipeline_name"], + ), + "r", + ) as readTimeCSV, + ): timeWriter = csv.DictWriter(timeCSV, fieldnames=gpaTimeFields) timeReader = csv.DictReader(readTimeCSV) @@ -1244,7 +1247,7 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None): # PREPROCESSING # """"""""""""""""""""""""""""""""""""""""""""""""""" - wf, rpool = initiate_rpool(wf, cfg, sub_dict) + rpool = initiate_rpool(wf, cfg, sub_dict) pipeline_blocks = build_anat_preproc_stack(rpool, cfg) @@ -1437,7 +1440,7 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None): if rpool.check_rpool(func): apply_func_warp["T1"] = False - target_space_nuis = cfg.nuisance_corrections["2-nuisance_regression"]["space"] + # target_space_nuis = cfg.nuisance_corrections["2-nuisance_regression"]["space"] target_space_alff = cfg.amplitude_low_frequency_fluctuation["target_space"] target_space_reho = cfg.regional_homogeneity["target_space"] diff --git a/CPAC/pipeline/engine/__init__.py b/CPAC/pipeline/engine/__init__.py index 3d00ebde72..1350e2bb36 100644 --- a/CPAC/pipeline/engine/__init__.py +++ b/CPAC/pipeline/engine/__init__.py @@ -17,34 +17,17 @@ """C-PAC engine.""" from .engine import ( - func_outdir_ingress, - ingress_freesurfer, - ingress_output_dir, - ingress_pipeconfig_paths, - ingress_raw_anat_data, - json_outdir_ingress, NodeBlock, run_node_blocks, - set_iterables, - strip_template, wrap_block, ) from .resource import initiate_rpool, NodeData, ResourcePool __all__ = [ - "func_outdir_ingress", - "ingress_freesurfer", - "ingress_raw_anat_data", - "ingress_output_dir", - "ingress_pipeconfig_paths", - "ingress_raw_func_data", "initiate_rpool", - "json_outdir_ingress", "NodeBlock", "NodeData", "ResourcePool", "run_node_blocks", - "set_iterables", - "strip_template", "wrap_block", ] diff --git a/CPAC/pipeline/engine/engine.py b/CPAC/pipeline/engine/engine.py index 4e941b05d9..e6280ace5f 100644 --- a/CPAC/pipeline/engine/engine.py +++ b/CPAC/pipeline/engine/engine.py @@ -14,37 +14,25 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . +"""C-PAC pipeline engine.""" + import ast import copy import hashlib import json import logging import os -import warnings from nipype import config from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.nodeblock import NodeBlockFunction -from CPAC.resources.templates.lookup_table import lookup_identifier -from CPAC.utils.datasource import ( - create_anat_datasource, - create_func_datasource, - create_general_datasource, - ingress_func_metadata, - resolve_resolution, -) -from CPAC.utils.interfaces.function import Function from CPAC.utils.monitoring import ( getLogger, LOGTAIL, WARNING_FREESURFER_OFF_WITH_DATA, WFLOGGER, ) -from CPAC.utils.utils import ( - read_json, - write_output_json, -) class NodeBlock: @@ -537,708 +525,7 @@ def wrap_block(node_blocks, interface, wf, cfg, strat_pool, pipe_num, opt): return (wf, strat_pool) -def ingress_raw_anat_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): - if "anat" not in data_paths: - WFLOGGER.warning("No anatomical data present.") - return rpool - - if "creds_path" not in data_paths: - data_paths["creds_path"] = None - - anat_flow = create_anat_datasource(f"anat_T1w_gather_{part_id}_{ses_id}") - - anat = {} - if isinstance(data_paths["anat"], str): - anat["T1"] = data_paths["anat"] - elif "T1w" in data_paths["anat"]: - anat["T1"] = data_paths["anat"]["T1w"] - - if "T1" in anat: - anat_flow.inputs.inputnode.set( - subject=part_id, - anat=anat["T1"], - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - img_type="anat", - ) - rpool.set_data("T1w", anat_flow, "outputspec.anat", {}, "", "anat_ingress") - - if "T2w" in data_paths["anat"]: - anat_flow_T2 = create_anat_datasource(f"anat_T2w_gather_{part_id}_{ses_id}") - anat_flow_T2.inputs.inputnode.set( - subject=part_id, - anat=data_paths["anat"]["T2w"], - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - img_type="anat", - ) - rpool.set_data("T2w", anat_flow_T2, "outputspec.anat", {}, "", "anat_ingress") - - if cfg.surface_analysis["freesurfer"]["ingress_reconall"]: - rpool = ingress_freesurfer( - wf, rpool, cfg, data_paths, unique_id, part_id, ses_id - ) - - return rpool - - -def ingress_freesurfer(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): - try: - fs_path = os.path.join(cfg.pipeline_setup["freesurfer_dir"], part_id) - except KeyError: - WFLOGGER.warning("No FreeSurfer data present.") - return rpool - - # fs_path = os.path.join(cfg.pipeline_setup['freesurfer_dir'], part_id) - if not os.path.exists(fs_path): - if "sub" in part_id: - fs_path = os.path.join( - cfg.pipeline_setup["freesurfer_dir"], part_id.replace("sub-", "") - ) - else: - fs_path = os.path.join( - cfg.pipeline_setup["freesurfer_dir"], ("sub-" + part_id) - ) - - # patch for flo-specific data - if not os.path.exists(fs_path): - subj_ses = part_id + "-" + ses_id - fs_path = os.path.join(cfg.pipeline_setup["freesurfer_dir"], subj_ses) - if not os.path.exists(fs_path): - WFLOGGER.info("No FreeSurfer data found for subject %s", part_id) - return rpool - - # Check for double nested subj names - if os.path.exists(os.path.join(fs_path, os.path.basename(fs_path))): - fs_path = os.path.join(fs_path, part_id) - - fs_ingress = create_general_datasource("gather_freesurfer_dir") - fs_ingress.inputs.inputnode.set( - unique_id=unique_id, - data=fs_path, - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - "freesurfer-subject-dir", - fs_ingress, - "outputspec.data", - {}, - "", - "freesurfer_config_ingress", - ) - - recon_outs = { - "pipeline-fs_raw-average": "mri/rawavg.mgz", - "pipeline-fs_subcortical-seg": "mri/aseg.mgz", - "pipeline-fs_brainmask": "mri/brainmask.mgz", - "pipeline-fs_wmparc": "mri/wmparc.mgz", - "pipeline-fs_T1": "mri/T1.mgz", - "pipeline-fs_hemi-L_desc-surface_curv": "surf/lh.curv", - "pipeline-fs_hemi-R_desc-surface_curv": "surf/rh.curv", - "pipeline-fs_hemi-L_desc-surfaceMesh_pial": "surf/lh.pial", - "pipeline-fs_hemi-R_desc-surfaceMesh_pial": "surf/rh.pial", - "pipeline-fs_hemi-L_desc-surfaceMesh_smoothwm": "surf/lh.smoothwm", - "pipeline-fs_hemi-R_desc-surfaceMesh_smoothwm": "surf/rh.smoothwm", - "pipeline-fs_hemi-L_desc-surfaceMesh_sphere": "surf/lh.sphere", - "pipeline-fs_hemi-R_desc-surfaceMesh_sphere": "surf/rh.sphere", - "pipeline-fs_hemi-L_desc-surfaceMap_sulc": "surf/lh.sulc", - "pipeline-fs_hemi-R_desc-surfaceMap_sulc": "surf/rh.sulc", - "pipeline-fs_hemi-L_desc-surfaceMap_thickness": "surf/lh.thickness", - "pipeline-fs_hemi-R_desc-surfaceMap_thickness": "surf/rh.thickness", - "pipeline-fs_hemi-L_desc-surfaceMap_volume": "surf/lh.volume", - "pipeline-fs_hemi-R_desc-surfaceMap_volume": "surf/rh.volume", - "pipeline-fs_hemi-L_desc-surfaceMesh_white": "surf/lh.white", - "pipeline-fs_hemi-R_desc-surfaceMesh_white": "surf/rh.white", - "pipeline-fs_xfm": "mri/transforms/talairach.lta", - } - - for key, outfile in recon_outs.items(): - fullpath = os.path.join(fs_path, outfile) - if os.path.exists(fullpath): - fs_ingress = create_general_datasource(f"gather_fs_{key}_dir") - fs_ingress.inputs.inputnode.set( - unique_id=unique_id, - data=fullpath, - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - key, fs_ingress, "outputspec.data", {}, "", f"fs_{key}_ingress" - ) - else: - warnings.warn( - str(LookupError(f"\n[!] Path does not exist for {fullpath}.\n")) - ) - - return rpool - - -def ingress_raw_func_data(wf, rpool, cfg, data_paths, unique_id, part_id, ses_id): - func_paths_dct = data_paths["func"] - - func_wf = create_func_datasource( - func_paths_dct, rpool, f"func_ingress_{part_id}_{ses_id}" - ) - func_wf.inputs.inputnode.set( - subject=part_id, - creds_path=data_paths["creds_path"], - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - func_wf.get_node("inputnode").iterables = ("scan", list(func_paths_dct.keys())) - - rpool.set_data("subject", func_wf, "outputspec.subject", {}, "", "func_ingress") - rpool.set_data("bold", func_wf, "outputspec.rest", {}, "", "func_ingress") - rpool.set_data("scan", func_wf, "outputspec.scan", {}, "", "func_ingress") - rpool.set_data( - "scan-params", func_wf, "outputspec.scan_params", {}, "", "scan_params_ingress" - ) - - # TODO: CHECK FOR PARAMETERS - - wf, rpool, diff, blip, fmap_rp_list = ingress_func_metadata( - wf, cfg, rpool, data_paths, part_id, data_paths["creds_path"], ses_id - ) - - # Memoize list of local functional scans - # TODO: handle S3 files - # Skip S3 files for now - - local_func_scans = [ - func_paths_dct[scan]["scan"] - for scan in func_paths_dct.keys() - if not func_paths_dct[scan]["scan"].startswith("s3://") - ] - if local_func_scans: - # pylint: disable=protected-access - wf._local_func_scans = local_func_scans - if cfg.pipeline_setup["Debugging"]["verbose"]: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("local_func_scans: %s", local_func_scans) - del local_func_scans - - return (wf, rpool, diff, blip, fmap_rp_list) - - -def ingress_output_dir( - wf, cfg, rpool, unique_id, data_paths, part_id, ses_id, creds_path=None -): - dir_path = data_paths["derivatives_dir"] - - WFLOGGER.info("\nPulling outputs from %s.\n", dir_path) - - anat = os.path.join(dir_path, "anat") - func = os.path.join(dir_path, "func") - - exts = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"] - - outdir_anat = [] - outdir_func = [] - func_paths = {} - func_dict = {} - - for subdir in [anat, func]: - if os.path.isdir(subdir): - for filename in os.listdir(subdir): - for ext in exts: - if ext in filename: - if subdir == anat: - outdir_anat.append(os.path.join(subdir, filename)) - else: - outdir_func.append(os.path.join(subdir, filename)) - - # Add derivatives directory to rpool - ingress = create_general_datasource("gather_derivatives_dir") - ingress.inputs.inputnode.set( - unique_id=unique_id, - data=dir_path, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - "derivatives-dir", ingress, "outputspec.data", {}, "", "outdir_config_ingress" - ) - - for subdir in [outdir_anat, outdir_func]: - for filepath in subdir: - filename = str(filepath) - for ext in exts: - filename = filename.split("/")[-1].replace(ext, "") - - data_label = filename.split(unique_id)[1].lstrip("_") - - if len(filename) == len(data_label): - msg = ( - "\n\n[!] Possibly wrong participant or " - "session in this directory?\n\n" - f"Filepath: {filepath}\n\n" - ) - raise Exception(msg) - - bidstag = "" - for tag in data_label.split("_"): - for prefix in ["task-", "run-", "acq-", "rec"]: - if tag.startswith(prefix): - bidstag += f"{tag}_" - data_label = data_label.replace(f"{tag}_", "") - data_label, json = strip_template(data_label, dir_path, filename) - - rpool, json_info, pipe_idx, node_name, data_label = json_outdir_ingress( - rpool, filepath, exts, data_label, json - ) - - if ( - "template" in data_label - and not json_info["Template"] - == cfg.pipeline_setup["outdir_ingress"]["Template"] - ): - continue - # Rename confounds to avoid confusion in nuisance regression - if data_label.endswith("desc-confounds_timeseries"): - data_label = "pipeline-ingress_desc-confounds_timeseries" - - if len(bidstag) > 1: - # Remove tail symbol - bidstag = bidstag[:-1] - if bidstag.startswith("task-"): - bidstag = bidstag.replace("task-", "") - - # Rename bold mask for CPAC naming convention - # and to avoid collision with anat brain mask - if data_label.endswith("desc-brain_mask") and filepath in outdir_func: - data_label = data_label.replace("brain_mask", "bold_mask") - - try: - pipe_x = rpool.get_pipe_number(pipe_idx) - except ValueError: - pipe_x = len(rpool.pipe_list) - if filepath in outdir_anat: - ingress = create_general_datasource( - f"gather_anat_outdir_{data_label!s}_{pipe_x}" - ) - ingress.inputs.inputnode.set( - unique_id=unique_id, - data=filepath, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - data_label, - ingress, - "outputspec.data", - json_info, - pipe_idx, - node_name, - f"outdir_{data_label}_ingress", - inject=True, - ) - else: - if data_label.endswith("desc-preproc_bold"): - func_key = data_label - func_dict[bidstag] = {} - func_dict[bidstag]["scan"] = str(filepath) - func_dict[bidstag]["scan_parameters"] = json_info - func_dict[bidstag]["pipe_idx"] = pipe_idx - if data_label.endswith("desc-brain_mask"): - data_label = data_label.replace("brain_mask", "bold_mask") - try: - func_paths[data_label].append(filepath) - except: - func_paths[data_label] = [] - func_paths[data_label].append(filepath) - - if func_dict: - wf, rpool = func_outdir_ingress( - wf, - cfg, - func_dict, - rpool, - unique_id, - creds_path, - part_id, - func_key, - func_paths, - ) - - if cfg.surface_analysis["freesurfer"]["ingress_reconall"]: - rpool = ingress_freesurfer( - wf, rpool, cfg, data_paths, unique_id, part_id, ses_id - ) - return wf, rpool - - -def json_outdir_ingress(rpool, filepath, exts, data_label, json): - desc_val = None - for tag in data_label.split("_"): - if "desc-" in tag: - desc_val = tag - break - jsonpath = str(filepath) - for ext in exts: - jsonpath = jsonpath.replace(ext, "") - jsonpath = f"{jsonpath}.json" - - if not os.path.exists(jsonpath): - WFLOGGER.info( - "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n", filepath, jsonpath - ) - json_info = { - "Description": "This data was generated elsewhere and " - "supplied by the user into this C-PAC run's " - "output directory. This JSON file was " - "automatically generated by C-PAC because a " - "JSON file was not supplied with the data." - } - json_info = {**json_info, **json} - write_output_json(json_info, jsonpath) - else: - json_info = read_json(jsonpath) - json_info = {**json_info, **json} - if "CpacProvenance" in json_info: - if desc_val: - # it's a C-PAC output, let's check for pipe_idx/strat integer - # suffixes in the desc- entries. - only_desc = str(desc_val) - - if only_desc[-1].isdigit(): - for idx in range(0, 3): - # let's stop at 3, please don't run >999 strategies okay? - if only_desc[-1].isdigit(): - only_desc = only_desc[:-1] - - if only_desc[-1] == "-": - only_desc = only_desc.rstrip("-") - else: - msg = ( - "\n[!] Something went wrong with either " - "reading in the output directory or when " - "it was written out previously.\n\nGive " - "this to your friendly local C-PAC " - f"developer:\n\n{data_label!s}\n" - ) - raise IOError(msg) - - # remove the integer at the end of the desc-* variant, we will - # get the unique pipe_idx from the CpacProvenance below - data_label = data_label.replace(desc_val, only_desc) - - # preserve cpac provenance/pipe_idx - pipe_idx = rpool.generate_prov_string(json_info["CpacProvenance"]) - node_name = "" - - else: - json_info["CpacProvenance"] = [f"{data_label}:Non-C-PAC Origin: {filepath}"] - if "Description" not in json_info: - json_info["Description"] = ( - "This data was generated elsewhere and " - "supplied by the user into this C-PAC run's " - "output directory. This JSON file was " - "automatically generated by C-PAC because a " - "JSON file was not supplied with the data." - ) - pipe_idx = rpool.generate_prov_string(json_info["CpacProvenance"]) - node_name = f"{data_label}_ingress" - - return rpool, json_info, pipe_idx, node_name, data_label - - -def func_outdir_ingress( - wf, cfg, func_dict, rpool, unique_id, creds_path, part_id, key, func_paths -): - pipe_x = len(rpool.pipe_list) - ingress = create_func_datasource( - func_dict, rpool, f"gather_func_outdir_{key}_{pipe_x}" - ) - ingress.inputs.inputnode.set( - subject=unique_id, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data("subject", ingress, "outputspec.subject", {}, "", "func_ingress") - ingress.get_node("inputnode").iterables = ("scan", list(func_dict.keys())) - rpool.set_data(key, ingress, "outputspec.rest", {}, "", "func_ingress") - - rpool.set_data("scan", ingress, "outputspec.scan", {}, "", "func_ingress") - rpool.set_data( - "scan-params", ingress, "outputspec.scan_params", {}, "", "scan_params_ingress" - ) - wf, rpool, diff, blip, fmap_rp_list = ingress_func_metadata( - wf, cfg, rpool, func_dict, part_id, creds_path, key - ) - - # Have to do it this weird way to save the parsed BIDS tag & filepath - mask_paths_key = ( - "desc-bold_mask" - if "desc-bold_mask" in func_paths - else "space-template_desc-bold_mask" - ) - ts_paths_key = "pipeline-ingress_desc-confounds_timeseries" - - # Connect func data with approproate scan name - iterables = pe.Node( - Function( - input_names=["scan", "mask_paths", "ts_paths"], - output_names=["out_scan", "mask", "confounds"], - function=set_iterables, - ), - name=f"set_iterables_{pipe_x}", - ) - iterables.inputs.mask_paths = func_paths[mask_paths_key] - iterables.inputs.ts_paths = func_paths[ts_paths_key] - wf.connect(ingress, "outputspec.scan", iterables, "scan") - - for key in func_paths: - if key in (mask_paths_key, ts_paths_key): - ingress_func = create_general_datasource(f"ingress_func_data_{key}") - ingress_func.inputs.inputnode.set( - unique_id=unique_id, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - wf.connect(iterables, "out_scan", ingress_func, "inputnode.scan") - if key == mask_paths_key: - wf.connect(iterables, "mask", ingress_func, "inputnode.data") - rpool.set_data( - key, ingress_func, "inputnode.data", {}, "", f"outdir_{key}_ingress" - ) - elif key == ts_paths_key: - wf.connect(iterables, "confounds", ingress_func, "inputnode.data") - rpool.set_data( - key, ingress_func, "inputnode.data", {}, "", f"outdir_{key}_ingress" - ) - - return wf, rpool - - -def set_iterables(scan, mask_paths=None, ts_paths=None): - # match scan with filepath to get filepath - mask_path = [path for path in mask_paths if scan in path] - ts_path = [path for path in ts_paths if scan in path] - - return (scan, mask_path[0], ts_path[0]) - - -def strip_template(data_label, dir_path, filename): - json = {} - # rename to template - for prefix in ["space-", "from-", "to-"]: - for bidstag in data_label.split("_"): - if bidstag.startswith(prefix): - template_key, template_val = bidstag.split("-") - template_name, _template_desc = lookup_identifier(template_val) - if template_name: - json["Template"] = template_val - data_label = data_label.replace(template_val, "template") - elif bidstag.startswith("res-"): - res_key, res_val = bidstag.split("-") - json["Resolution"] = res_val - data_label = data_label.replace(bidstag, "") - if data_label.find("__"): - data_label = data_label.replace("__", "_") - return data_label, json - - -def ingress_pipeconfig_paths(cfg, rpool, unique_id, creds_path=None): - # ingress config file paths - # TODO: may want to change the resource keys for each to include one level up in the YAML as well - - import pandas as pd - import pkg_resources as p - - template_csv = p.resource_filename("CPAC", "resources/cpac_templates.csv") - template_df = pd.read_csv(template_csv, keep_default_na=False) - - for row in template_df.itertuples(): - key = row.Key - val = row.Pipeline_Config_Entry - val = cfg.get_nested(cfg, [x.lstrip() for x in val.split(",")]) - resolution = row.Intended_Resolution_Config_Entry - desc = row.Description - - if not val: - continue - - if resolution: - res_keys = [x.lstrip() for x in resolution.split(",")] - tag = res_keys[-1] - json_info = {} - - if "$FSLDIR" in val: - val = val.replace("$FSLDIR", cfg.pipeline_setup["system_config"]["FSLDIR"]) - if "$priors_path" in val: - priors_path = ( - cfg.segmentation["tissue_segmentation"]["FSL-FAST"]["use_priors"][ - "priors_path" - ] - or "" - ) - if "$FSLDIR" in priors_path: - priors_path = priors_path.replace( - "$FSLDIR", cfg.pipeline_setup["system_config"]["FSLDIR"] - ) - val = val.replace("$priors_path", priors_path) - if "${resolution_for_anat}" in val: - val = val.replace( - "${resolution_for_anat}", - cfg.registration_workflows["anatomical_registration"][ - "resolution_for_anat" - ], - ) - if "${func_resolution}" in val: - val = val.replace( - "${func_resolution}", - cfg.registration_workflows["functional_registration"][ - "func_registration_to_template" - ]["output_resolution"][tag], - ) - - if desc: - template_name, _template_desc = lookup_identifier(val) - if template_name: - desc = f"{template_name} - {desc}" - json_info["Description"] = f"{desc} - {val}" - if resolution: - resolution = cfg.get_nested(cfg, res_keys) - json_info["Resolution"] = resolution - - resampled_template = pe.Node( - Function( - input_names=["resolution", "template", "template_name", "tag"], - output_names=["resampled_template"], - function=resolve_resolution, - as_module=True, - ), - name="resampled_" + key, - ) - - resampled_template.inputs.resolution = resolution - resampled_template.inputs.template = val - resampled_template.inputs.template_name = key - resampled_template.inputs.tag = tag - - # the set_data below is set up a little differently, because we are - # injecting and also over-writing already-existing entries - # other alternative would have been to ingress into the - # resampled_template node from the already existing entries, but we - # didn't do that here - rpool.set_data( - key, - resampled_template, - "resampled_template", - json_info, - "", - "template_resample", - ) # pipe_idx (after the blank json {}) should be the previous strat that you want deleted! because you're not connecting this the regular way, you have to do it manually - - elif val: - config_ingress = create_general_datasource(f"gather_{key}") - config_ingress.inputs.inputnode.set( - unique_id=unique_id, - data=val, - creds_path=creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - rpool.set_data( - key, - config_ingress, - "outputspec.data", - json_info, - "", - f"{key}_config_ingress", - ) - # templates, resampling from config - """ - template_keys = [ - ("anat", ["network_centrality", "template_specification_file"]), - ("anat", ["nuisance_corrections", "2-nuisance_regression", - "lateral_ventricles_mask"]), - ("anat", - ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", - "CSF_path"]), - ("anat", - ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", - "GM_path"]), - ("anat", - ["segmentation", "tissue_segmentation", "FSL-FAST", "use_priors", - "WM_path"]), - ("anat", - ["segmentation", "tissue_segmentation", "Template_Based", "CSF"]), - ("anat", - ["segmentation", "tissue_segmentation", "Template_Based", "GRAY"]), - ("anat", - ["segmentation", "tissue_segmentation", "Template_Based", "WHITE"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T1w_ACPC_template"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T1w_brain_ACPC_template"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T2w_ACPC_template"]), - ("anat", ["anatomical_preproc", "acpc_alignment", "T2w_brain_ACPC_template"])] - - def get_nested_attr(c, template_key): - attr = getattr(c, template_key[0]) - keys = template_key[1:] - - def _get_nested(attr, keys): - if len(keys) > 1: - return (_get_nested(attr[keys[0]], keys[1:])) - elif len(keys): - return (attr[keys[0]]) - else: - return (attr) - - return (_get_nested(attr, keys)) - - def set_nested_attr(c, template_key, value): - attr = getattr(c, template_key[0]) - keys = template_key[1:] - - def _set_nested(attr, keys): - if len(keys) > 1: - return (_set_nested(attr[keys[0]], keys[1:])) - elif len(keys): - attr[keys[0]] = value - else: - return (attr) - - return (_set_nested(attr, keys)) - - for key_type, key in template_keys: - attr = cfg.get_nested(cfg, key) - if isinstance(attr, str) or attr == None: - node = create_check_for_s3_node( - key[-1], - attr, key_type, - data_paths['creds_path'], - cfg.pipeline_setup['working_directory']['path'], - map_node=False - ) - cfg.set_nested(cfg, key, node) - - template_keys_in_list = [ - ("anat", - ["segmentation", "tissue_segmentation", "ANTs_Prior_Based", - "template_brain_list"]), - ("anat", - ["segmentation", "tissue_segmentation", "ANTs_Prior_Based", - "template_segmentation_list"]), - ] - - for key_type, key in template_keys_in_list: - node = create_check_for_s3_node( - key[-1], - cfg.get_nested(cfg, key), key_type, - data_paths['creds_path'], - cfg.pipeline_setup['working_directory']['path'], - map_node=True - ) - cfg.set_nested(cfg, key, node) - """ - - return rpool - - def run_node_blocks(blocks, data_paths, cfg=None): - import os - - from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.engine import NodeBlock from CPAC.pipeline.engine.resource import initiate_rpool @@ -1251,9 +538,9 @@ def run_node_blocks(blocks, data_paths, cfg=None): } # TODO: WE HAVE TO PARSE OVER UNIQUE ID'S!!! - _, rpool = initiate_rpool(cfg, data_paths) wf = pe.Workflow(name="node_blocks") + rpool = initiate_rpool(wf, cfg, data_paths) wf.base_dir = cfg.pipeline_setup["working_directory"]["path"] wf.config["execution"] = { "hash_method": "timestamp", diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index af69d3cd7e..5fc9add3db 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -20,10 +20,12 @@ import copy from itertools import chain import os +from pathlib import Path import re from typing import Optional import warnings +from nipype.interfaces import utility as util from nipype.interfaces.utility import Rename from CPAC.image_utils.spatial_smoothing import spatial_smoothing @@ -33,16 +35,23 @@ ) from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.check_outputs import ExpectedOutputs -from CPAC.pipeline.engine.engine import ( - ingress_output_dir, - ingress_pipeconfig_paths, - ingress_raw_anat_data, - ingress_raw_func_data, -) from CPAC.pipeline.utils import MOVEMENT_FILTER_KEYS, name_fork, source_set from CPAC.registration.registration import transform_derivative +from CPAC.resources.templates.lookup_table import lookup_identifier from CPAC.utils.bids_utils import res_in_filename from CPAC.utils.configuration import Configuration +from CPAC.utils.datasource import ( + calc_delta_te_and_asym_ratio, + check_for_s3, + check_func_scan, + create_anat_datasource, + create_fmap_datasource, + create_general_datasource, + gather_echo_times, + get_fmap_phasediff_metadata, + get_rest, + resolve_resolution, +) from CPAC.utils.interfaces.datasink import DataSink from CPAC.utils.interfaces.function import Function from CPAC.utils.monitoring import ( @@ -54,9 +63,143 @@ check_prov_for_regtool, create_id_string, get_last_prov_entry, + get_scan_params, + read_json, write_output_json, ) +EXTS = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"] + + +def generate_prov_string(prov: list[str]) -> tuple[str, str]: + """Generate a string from a SINGLE RESOURCE'S dictionary of MULTIPLE PRECEDING RESOURCES (or single, if just one). + + NOTE: this DOES NOT merge multiple resources!!! (i.e. for merging-strat pipe_idx generation). + """ + if not isinstance(prov, list): + msg = ( + "\n[!] Developer info: the CpacProvenance " + f"entry for {prov} has to be a list.\n" + ) + raise TypeError(msg) + last_entry = get_last_prov_entry(prov) + resource = last_entry.split(":")[0] + return (resource, str(prov)) + + +def json_outdir_ingress( + filepath: Path | str, data_label: str, json: dict +) -> tuple[dict, tuple[str, str], str, str]: + """Ingress sidecars from a BIDS derivatives directory.""" + desc_val = None + for tag in data_label.split("_"): + if "desc-" in tag: + desc_val = tag + break + jsonpath = str(filepath) + for ext in EXTS: + jsonpath = jsonpath.replace(ext, "") + jsonpath = f"{jsonpath}.json" + + if not os.path.exists(jsonpath): + WFLOGGER.info( + "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n", + filepath, + jsonpath, + ) + json_info = { + "Description": "This data was generated elsewhere and " + "supplied by the user into this C-PAC run's " + "output directory. This JSON file was " + "automatically generated by C-PAC because a " + "JSON file was not supplied with the data." + } + json_info = {**json_info, **json} + write_output_json(json_info, jsonpath) + else: + json_info = read_json(jsonpath) + json_info = {**json_info, **json} + if "CpacProvenance" in json_info: + if desc_val: + # it's a C-PAC output, let's check for pipe_idx/strat integer + # suffixes in the desc- entries. + only_desc = str(desc_val) + + if only_desc[-1].isdigit(): + for _strat_idx in range(0, 3): + # let's stop at 3, please don't run >999 strategies okay? + if only_desc[-1].isdigit(): + only_desc = only_desc[:-1] + + if only_desc[-1] == "-": + only_desc = only_desc.rstrip("-") + else: + msg = ( + "\n[!] Something went wrong with either " + "reading in the output directory or when " + "it was written out previously.\n\nGive " + "this to your friendly local C-PAC " + f"developer:\n\n{data_label!s}\n" + ) + raise IOError(msg) + + # remove the integer at the end of the desc-* variant, we will + # get the unique pipe_idx from the CpacProvenance below + data_label = data_label.replace(desc_val, only_desc) + + # preserve cpac provenance/pipe_idx + pipe_idx = generate_prov_string(json_info["CpacProvenance"]) + node_name = "" + + else: + json_info["CpacProvenance"] = [f"{data_label}:Non-C-PAC Origin: {filepath}"] + if "Description" not in json_info: + json_info["Description"] = ( + "This data was generated elsewhere and " + "supplied by the user into this C-PAC run's " + "output directory. This JSON file was " + "automatically generated by C-PAC because a " + "JSON file was not supplied with the data." + ) + pipe_idx = generate_prov_string(json_info["CpacProvenance"]) + node_name = f"{data_label}_ingress" + + return json_info, pipe_idx, node_name, data_label + + +@Function.sig_imports(["from typing import Optional"]) +def set_iterables( + scan: str, + mask_paths: Optional[list[str]] = None, + ts_paths: Optional[list[str]] = None, +) -> tuple[str, str, str]: + """Match scan with filepath to get filepath.""" + mask_path = [path for path in mask_paths if scan in path] + ts_path = [path for path in ts_paths if scan in path] + + return (scan, mask_path[0], ts_path[0]) + + +def strip_template(data_label: str) -> tuple[str, dict[str, str]]: + """Strip a template name from a data label to use as a Resource key.""" + json = {} + # rename to template + for prefix in ["space-", "from-", "to-"]: + for bidstag in data_label.split("_"): + if bidstag.startswith(prefix): + _template_key, template_val = bidstag.split("-") + template_name, _template_desc = lookup_identifier(template_val) + if template_name: + json["Template"] = template_val + data_label = data_label.replace(template_val, "template") + elif bidstag.startswith("res-"): + _res_key, res_val = bidstag.split("-") + json["Resolution"] = res_val + data_label = data_label.replace(bidstag, "") + if data_label.find("__"): + data_label = data_label.replace("__", "_") + return data_label, json + class NodeData: r"""Attribute access for ResourcePool.get_data outputs. @@ -108,7 +251,28 @@ class Resource: class ResourcePool: """All Resources.""" - def __init__(self, rpool=None, name=None, cfg=None, pipe_list=None): + def __init__( + self, + rpool: Optional[dict] = None, + name: str = "", + cfg: Optional[Configuration] = None, + pipe_list: Optional[list] = None, + *, + creds_path: Optional[str] = None, + data_paths: Optional[dict] = None, + part_id: Optional[str] = None, + ses_id: Optional[str] = None, + unique_id: Optional[str] = None, + wf: Optional[pe.Workflow] = None, + **kwargs, + ): + """Initialize a ResourcePool.""" + self.creds_path = creds_path + self.data_paths = data_paths + self.part_id = part_id + self.ses_id = ses_id + self.unique_id = unique_id + self._init_wf = wf if not rpool: self.rpool = {} else: @@ -171,23 +335,22 @@ def __init__(self, rpool=None, name=None, cfg=None, pipe_list=None): ] def __repr__(self) -> str: + """Return reproducible ResourcePool string.""" params = [ f"{param}={getattr(self, param)}" for param in ["rpool", "name", "cfg", "pipe_list"] - if getattr(self, param, None) is not None + if getattr(self, param, None) ] return f'ResourcePool({", ".join(params)})' def __str__(self) -> str: + """Return string representation of ResourcePool.""" if self.name: return f"ResourcePool({self.name}): {list(self.rpool)}" return f"ResourcePool: {list(self.rpool)}" - def append_name(self, name): - self.name.append(name) - def back_propogate_template_name( - self, wf, resource_idx: str, json_info: dict, id_string: "pe.Node" + self, resource_idx: str, json_info: dict, id_string: "pe.Node" ) -> None: """Find and apply the template name from a resource's provenance. @@ -206,7 +369,7 @@ def back_propogate_template_name( if "template" in resource_idx and self.check_rpool("derivatives-dir"): if self.check_rpool("template"): node, out = self.get_data("template") - wf.connect(node, out, id_string, "template_desc") + self._init_wf.connect(node, out, id_string, "template_desc") elif "Template" in json_info: id_string.inputs.template_desc = json_info["Template"] elif ( @@ -242,6 +405,125 @@ def check_rpool(self, resource): return True return False + def create_func_datasource( + self, rest_dict: dict, wf_name="func_datasource" + ) -> pe.Workflow: + """Create a workflow to gather timeseries data. + + Return the functional timeseries-related file paths for each series/scan from the + dictionary of functional files described in the data configuration (sublist) YAML + file. + + Scan input (from inputnode) is an iterable. + """ + wf = pe.Workflow(name=wf_name) + + inputnode = pe.Node( + util.IdentityInterface( + fields=["subject", "scan", "creds_path", "dl_dir"], + mandatory_inputs=True, + ), + name="inputnode", + ) + + outputnode = pe.Node( + util.IdentityInterface( + fields=[ + "subject", + "rest", + "scan", + "scan_params", + "phase_diff", + "magnitude", + ] + ), + name="outputspec", + ) + + # have this here for now because of the big change in the data + # configuration format + # (Not necessary with ingress - format does not comply) + if not self.check_rpool("derivatives-dir"): + check_scan = pe.Node( + Function( + input_names=["func_scan_dct", "scan"], + output_names=[], + function=check_func_scan, + as_module=True, + ), + name="check_func_scan", + ) + + check_scan.inputs.func_scan_dct = rest_dict + wf.connect(inputnode, "scan", check_scan, "scan") + + # get the functional scan itself + selectrest = pe.Node( + Function( + input_names=["scan", "rest_dict", "resource"], + output_names=["file_path"], + function=get_rest, + as_module=True, + ), + name="selectrest", + ) + selectrest.inputs.rest_dict = rest_dict + selectrest.inputs.resource = "scan" + wf.connect(inputnode, "scan", selectrest, "scan") + + # check to see if it's on an Amazon AWS S3 bucket, and download it, if it + # is - otherwise, just return the local file path + check_s3_node = pe.Node( + Function( + input_names=["file_path", "creds_path", "dl_dir", "img_type"], + output_names=["local_path"], + function=check_for_s3, + as_module=True, + ), + name="check_for_s3", + ) + + wf.connect(selectrest, "file_path", check_s3_node, "file_path") + wf.connect(inputnode, "creds_path", check_s3_node, "creds_path") + wf.connect(inputnode, "dl_dir", check_s3_node, "dl_dir") + check_s3_node.inputs.img_type = "func" + + wf.connect(inputnode, "subject", outputnode, "subject") + wf.connect(check_s3_node, "local_path", outputnode, "rest") + wf.connect(inputnode, "scan", outputnode, "scan") + + # scan parameters CSV + select_scan_params = pe.Node( + Function( + input_names=["scan", "rest_dict", "resource"], + output_names=["file_path"], + function=get_rest, + as_module=True, + ), + name="select_scan_params", + ) + select_scan_params.inputs.rest_dict = rest_dict + select_scan_params.inputs.resource = "scan_parameters" + wf.connect(inputnode, "scan", select_scan_params, "scan") + + # if the scan parameters file is on AWS S3, download it + s3_scan_params = pe.Node( + Function( + input_names=["file_path", "creds_path", "dl_dir", "img_type"], + output_names=["local_path"], + function=check_for_s3, + as_module=True, + ), + name="s3_scan_params", + ) + + wf.connect(select_scan_params, "file_path", s3_scan_params, "file_path") + wf.connect(inputnode, "creds_path", s3_scan_params, "creds_path") + wf.connect(inputnode, "dl_dir", s3_scan_params, "dl_dir") + wf.connect(s3_scan_params, "local_path", outputnode, "scan_params") + + return wf + def get_pipe_number(self, pipe_idx): return self.pipe_list.index(pipe_idx) @@ -382,7 +664,7 @@ def set_data( if not inject: new_prov_list.append(f"{resource}:{node_name}") try: - res, new_pipe_idx = self.generate_prov_string(new_prov_list) + _res, new_pipe_idx = generate_prov_string(new_prov_list) except IndexError: msg = ( f"\n\nThe set_data() call for {resource} has no " @@ -404,7 +686,7 @@ def set_data( search = False if self.get_resource_from_prov(current_prov_list) == resource: # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION - pipe_idx = self.generate_prov_string(current_prov_list)[1] + pipe_idx = generate_prov_string(current_prov_list)[1] if pipe_idx not in self.rpool[resource].keys(): search = True else: @@ -414,7 +696,7 @@ def set_data( if self.get_resource_from_prov(idx) == resource: if isinstance(idx, list): # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION - pipe_idx = self.generate_prov_string(idx)[1] + pipe_idx = generate_prov_string(idx)[1] elif isinstance(idx, str): pipe_idx = idx break @@ -537,21 +819,6 @@ def get_cpac_provenance(self, resource, strat=None): json_data = self.get_json(resource, strat) return json_data["CpacProvenance"] - @staticmethod - def generate_prov_string(prov): - # this will generate a string from a SINGLE RESOURCE'S dictionary of - # MULTIPLE PRECEDING RESOURCES (or single, if just one) - # NOTE: this DOES NOT merge multiple resources!!! (i.e. for merging-strat pipe_idx generation) - if not isinstance(prov, list): - msg = ( - "\n[!] Developer info: the CpacProvenance " - f"entry for {prov} has to be a list.\n" - ) - raise TypeError(msg) - last_entry = get_last_prov_entry(prov) - resource = last_entry.split(":")[0] - return (resource, str(prov)) - @staticmethod def generate_prov_list(prov_str): if not isinstance(prov_str, str): @@ -704,7 +971,7 @@ def get_strats(self, resources, debug=False): json_dct = {} for strat in strat_list: # strat is a prov list for a single resource/input - strat_resource, strat_idx = self.generate_prov_string(strat) + strat_resource, strat_idx = generate_prov_string(strat) strat_json = self.get_json(strat_resource, strat=strat_idx) json_dct[strat_resource] = strat_json @@ -797,7 +1064,7 @@ def get_strats(self, resources, debug=False): # now just invert resource:strat to strat:resource for each resource:strat for cpac_prov in strat_list: - resource, strat = self.generate_prov_string(cpac_prov) + resource, strat = generate_prov_string(cpac_prov) resource_strat_dct = self.rpool[resource][strat] # remember, `resource_strat_dct` is the dct of 'data' and 'json'. new_strats[pipe_idx].rpool[resource] = resource_strat_dct @@ -828,7 +1095,7 @@ def get_strats(self, resources, debug=False): for resource_strat_list in total_pool: # total_pool will have only one list of strats, for the one input for cpac_prov in resource_strat_list: # <------- cpac_prov here doesn't need to be modified, because it's not merging with other inputs - resource, pipe_idx = self.generate_prov_string(cpac_prov) + resource, pipe_idx = generate_prov_string(cpac_prov) resource_strat_dct = self.rpool[resource][pipe_idx] # remember, `resource_strat_dct` is the dct of 'data' and 'json'. new_strats[pipe_idx] = ResourcePool( @@ -859,7 +1126,7 @@ def derivative_xfm(self, wf, label, connection, json_info, pipe_idx, pipe_x): if isinstance(entry, list): if entry[-1].split(":")[0] == xfm_label: xfm_prov = entry - xfm_idx = self.generate_prov_string(xfm_prov)[1] + xfm_idx = generate_prov_string(xfm_prov)[1] break # but if the resource doesn't have the bold-to-template transform @@ -902,7 +1169,7 @@ def derivative_xfm(self, wf, label, connection, json_info, pipe_idx, pipe_x): ) new_prov = json_info["CpacProvenance"] + xfm_prov json_info["CpacProvenance"] = new_prov - new_pipe_idx = self.generate_prov_string(new_prov) + new_pipe_idx = generate_prov_string(new_prov) self.set_data( label, xfm, @@ -986,7 +1253,7 @@ def post_process(self, wf, label, connection, json_info, pipe_idx, pipe_x, outs) if isinstance(entry, list): if entry[-1].split(":")[0] == mask: mask_prov = entry - mask_idx = self.generate_prov_string(mask_prov)[1] + mask_idx = generate_prov_string(mask_prov)[1] break if self.smoothing_bool: @@ -1110,7 +1377,7 @@ def post_process(self, wf, label, connection, json_info, pipe_idx, pipe_x, outs) def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): excl = [] substring_excl = [] - outputs_logger = getLogger(f'{cfg["subject_id"]}_expectedOutputs') + outputs_logger = getLogger(f"{self.part_id}_expectedOutputs") expected_outputs = ExpectedOutputs() if add_excl: @@ -1333,9 +1600,7 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): node, out = self.rpool["scan"]["['scan:func_ingress']"]["data"] wf.connect(node, out, id_string, "scan_id") - self.back_propogate_template_name( - wf, resource_idx, json_info, id_string - ) + self.back_propogate_template_name(resource_idx, json_info, id_string) # grab the FWHM if smoothed for tag in resource.split("_"): if "desc-" in tag and "-sm" in tag: @@ -1453,12 +1718,846 @@ def node_data(self, resource, **kwargs): """ return NodeData(self, resource, **kwargs) + def ingress_freesurfer(self) -> None: + """Ingress FreeSurfer data.""" + try: + fs_path = os.path.join( + self.cfg.pipeline_setup["freesurfer_dir"], self.part_id + ) + except KeyError: + WFLOGGER.warning("No FreeSurfer data present.") + return + + # fs_path = os.path.join(cfg.pipeline_setup['freesurfer_dir'], part_id) + if not os.path.exists(fs_path): + if "sub" in self.part_id: + fs_path = os.path.join( + self.cfg.pipeline_setup["freesurfer_dir"], + self.part_id.replace("sub-", ""), + ) + else: + fs_path = os.path.join( + self.cfg.pipeline_setup["freesurfer_dir"], ("sub-" + self.part_id) + ) + + # patch for flo-specific data + if not os.path.exists(fs_path): + subj_ses = f"{self.part_id}-{self.ses_id}" + fs_path = os.path.join( + self.cfg.pipeline_setup["freesurfer_dir"], subj_ses + ) + if not os.path.exists(fs_path): + WFLOGGER.info( + "No FreeSurfer data found for subject %s", self.part_id + ) + return + + # Check for double nested subj names + if os.path.exists(os.path.join(fs_path, os.path.basename(fs_path))): + fs_path = os.path.join(fs_path, self.part_id) + + fs_ingress = create_general_datasource("gather_freesurfer_dir") + fs_ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=fs_path, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + ) + self.set_data( + "freesurfer-subject-dir", + fs_ingress, + "outputspec.data", + {}, + "", + "freesurfer_config_ingress", + ) + + recon_outs = { + "pipeline-fs_raw-average": "mri/rawavg.mgz", + "pipeline-fs_subcortical-seg": "mri/aseg.mgz", + "pipeline-fs_brainmask": "mri/brainmask.mgz", + "pipeline-fs_wmparc": "mri/wmparc.mgz", + "pipeline-fs_T1": "mri/T1.mgz", + "pipeline-fs_hemi-L_desc-surface_curv": "surf/lh.curv", + "pipeline-fs_hemi-R_desc-surface_curv": "surf/rh.curv", + "pipeline-fs_hemi-L_desc-surfaceMesh_pial": "surf/lh.pial", + "pipeline-fs_hemi-R_desc-surfaceMesh_pial": "surf/rh.pial", + "pipeline-fs_hemi-L_desc-surfaceMesh_smoothwm": "surf/lh.smoothwm", + "pipeline-fs_hemi-R_desc-surfaceMesh_smoothwm": "surf/rh.smoothwm", + "pipeline-fs_hemi-L_desc-surfaceMesh_sphere": "surf/lh.sphere", + "pipeline-fs_hemi-R_desc-surfaceMesh_sphere": "surf/rh.sphere", + "pipeline-fs_hemi-L_desc-surfaceMap_sulc": "surf/lh.sulc", + "pipeline-fs_hemi-R_desc-surfaceMap_sulc": "surf/rh.sulc", + "pipeline-fs_hemi-L_desc-surfaceMap_thickness": "surf/lh.thickness", + "pipeline-fs_hemi-R_desc-surfaceMap_thickness": "surf/rh.thickness", + "pipeline-fs_hemi-L_desc-surfaceMap_volume": "surf/lh.volume", + "pipeline-fs_hemi-R_desc-surfaceMap_volume": "surf/rh.volume", + "pipeline-fs_hemi-L_desc-surfaceMesh_white": "surf/lh.white", + "pipeline-fs_hemi-R_desc-surfaceMesh_white": "surf/rh.white", + "pipeline-fs_xfm": "mri/transforms/talairach.lta", + } + + for key, outfile in recon_outs.items(): + fullpath = os.path.join(fs_path, outfile) + if os.path.exists(fullpath): + fs_ingress = create_general_datasource(f"gather_fs_{key}_dir") + fs_ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=fullpath, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + ) + self.set_data( + key, fs_ingress, "outputspec.data", {}, "", f"fs_{key}_ingress" + ) + else: + warnings.warn( + str(LookupError(f"\n[!] Path does not exist for {fullpath}.\n")) + ) + + return + + def ingress_output_dir(self) -> None: + """Ingress an output directory into a ResourcePool.""" + dir_path = self.data_paths["derivatives_dir"] + + WFLOGGER.info("\nPulling outputs from %s.\n", dir_path) + + anat = os.path.join(dir_path, "anat") + func = os.path.join(dir_path, "func") + + outdir_anat = [] + outdir_func = [] + func_paths = {} + func_dict = {} + func_key = "" + + for subdir in [anat, func]: + if os.path.isdir(subdir): + for filename in os.listdir(subdir): + for ext in EXTS: + if ext in filename: + if subdir == anat: + outdir_anat.append(os.path.join(subdir, filename)) + else: + outdir_func.append(os.path.join(subdir, filename)) + + # Add derivatives directory to rpool + ingress = create_general_datasource("gather_derivatives_dir") + ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=dir_path, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + ) + self.set_data( + "derivatives-dir", + ingress, + "outputspec.data", + {}, + "", + "outdir_config_ingress", + ) + + for subdir in [outdir_anat, outdir_func]: + for filepath in subdir: + filename = str(filepath) + for ext in EXTS: + filename = filename.split("/")[-1].replace(ext, "") + + data_label = filename.split(self.unique_id)[1].lstrip("_") + + if len(filename) == len(data_label): + msg = ( + "\n\n[!] Possibly wrong participant or " + "session in this directory?\n\n" + f"Filepath: {filepath}\n\n" + ) + raise Exception(msg) + + bidstag = "" + for tag in data_label.split("_"): + for prefix in ["task-", "run-", "acq-", "rec"]: + if tag.startswith(prefix): + bidstag += f"{tag}_" + data_label = data_label.replace(f"{tag}_", "") + data_label, json = strip_template(data_label) + + json_info, pipe_idx, node_name, data_label = json_outdir_ingress( + filepath, data_label, json + ) + + if ( + "template" in data_label + and not json_info["Template"] + == self.cfg.pipeline_setup["outdir_ingress"]["Template"] + ): + continue + # Rename confounds to avoid confusion in nuisance regression + if data_label.endswith("desc-confounds_timeseries"): + data_label = "pipeline-ingress_desc-confounds_timeseries" + + if len(bidstag) > 1: + # Remove tail symbol + bidstag = bidstag[:-1] + if bidstag.startswith("task-"): + bidstag = bidstag.replace("task-", "") + + # Rename bold mask for CPAC naming convention + # and to avoid collision with anat brain mask + if data_label.endswith("desc-brain_mask") and filepath in outdir_func: + data_label = data_label.replace("brain_mask", "bold_mask") + + try: + pipe_x = self.get_pipe_number(pipe_idx) + except ValueError: + pipe_x = len(self.pipe_list) + if filepath in outdir_anat: + ingress = create_general_datasource( + f"gather_anat_outdir_{data_label!s}_{pipe_x}" + ) + ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=filepath, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + ) + self.set_data( + data_label, + ingress, + "outputspec.data", + json_info, + pipe_idx, + node_name, + f"outdir_{data_label}_ingress", + inject=True, + ) + else: + if data_label.endswith("desc-preproc_bold"): + func_key = data_label + func_dict[bidstag] = {} + func_dict[bidstag]["scan"] = str(filepath) + func_dict[bidstag]["scan_parameters"] = json_info + func_dict[bidstag]["pipe_idx"] = pipe_idx + if data_label.endswith("desc-brain_mask"): + data_label = data_label.replace("brain_mask", "bold_mask") + try: + func_paths[data_label].append(filepath) + except (AttributeError, KeyError, TypeError): + func_paths[data_label] = [] + func_paths[data_label].append(filepath) + + if func_dict: + self.func_outdir_ingress( + func_dict, + func_key, + func_paths, + ) + + if self.cfg.surface_analysis["freesurfer"]["ingress_reconall"]: + self.ingress_freesurfer() + + def ingress_func_metadata( + self, + num_strat=None, + ) -> tuple[bool, bool, list[str]]: + """Ingress metadata for functional scans.""" + name_suffix = "" + for suffix_part in (self.unique_id, num_strat): + if suffix_part is not None: + name_suffix += f"_{suffix_part}" + # Grab field maps + diff = False + blip = False + fmap_rp_list = [] + fmap_TE_list = [] + if "fmap" in self.data_paths: + second = False + for orig_key in self.data_paths["fmap"]: + gather_fmap = create_fmap_datasource( + self.data_paths["fmap"], f"fmap_gather_{orig_key}_{self.part_id}" + ) + gather_fmap.inputs.inputnode.set( + subject=self.part_id, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + ) + gather_fmap.inputs.inputnode.scan = orig_key + + key = orig_key + if "epi" in key and not second: + key = "epi-1" + second = True + elif "epi" in key and second: + key = "epi-2" + + self.set_data( + key, gather_fmap, "outputspec.rest", {}, "", "fmap_ingress" + ) + self.set_data( + f"{key}-scan-params", + gather_fmap, + "outputspec.scan_params", + {}, + "", + "fmap_params_ingress", + ) + + fmap_rp_list.append(key) + + get_fmap_metadata_imports = ["import json"] + get_fmap_metadata = pe.Node( + Function( + input_names=["data_config_scan_params"], + output_names=[ + "dwell_time", + "pe_direction", + "total_readout", + "echo_time", + "echo_time_one", + "echo_time_two", + ], + function=get_fmap_phasediff_metadata, + imports=get_fmap_metadata_imports, + ), + name=f"{key}_get_metadata{name_suffix}", + ) + + self._init_wf.connect( + gather_fmap, + "outputspec.scan_params", + get_fmap_metadata, + "data_config_scan_params", + ) + + if "phase" in key: + # leave it open to all three options, in case there is a + # phasediff image with either a single EchoTime field (which + # usually matches one of the magnitude EchoTimes), OR + # a phasediff with an EchoTime1 and EchoTime2 + + # at least one of these rpool keys will have a None value, + # which will be sorted out in gather_echo_times below + self.set_data( + f"{key}-TE", + get_fmap_metadata, + "echo_time", + {}, + "", + "fmap_TE_ingress", + ) + fmap_TE_list.append(f"{key}-TE") + + self.set_data( + f"{key}-TE1", + get_fmap_metadata, + "echo_time_one", + {}, + "", + "fmap_TE1_ingress", + ) + fmap_TE_list.append(f"{key}-TE1") -def initiate_rpool(wf, cfg, data_paths=None, part_id=None): + self.set_data( + f"{key}-TE2", + get_fmap_metadata, + "echo_time_two", + {}, + "", + "fmap_TE2_ingress", + ) + fmap_TE_list.append(f"{key}-TE2") + + elif "magnitude" in key: + self.set_data( + f"{key}-TE", + get_fmap_metadata, + "echo_time", + {}, + "", + "fmap_TE_ingress", + ) + fmap_TE_list.append(f"{key}-TE") + + self.set_data( + f"{key}-dwell", + get_fmap_metadata, + "dwell_time", + {}, + "", + "fmap_dwell_ingress", + ) + self.set_data( + f"{key}-pedir", + get_fmap_metadata, + "pe_direction", + {}, + "", + "fmap_pedir_ingress", + ) + self.set_data( + f"{key}-total-readout", + get_fmap_metadata, + "total_readout", + {}, + "", + "fmap_readout_ingress", + ) + + if "phase" in key or "mag" in key: + diff = True + + if re.match("epi_[AP]{2}", orig_key): + blip = True + + if diff: + calc_delta_ratio = pe.Node( + Function( + input_names=["effective_echo_spacing", "echo_times"], + output_names=["deltaTE", "ees_asym_ratio"], + function=calc_delta_te_and_asym_ratio, + imports=["from typing import Optional"], + ), + name=f"diff_distcor_calc_delta{name_suffix}", + ) + + gather_echoes = pe.Node( + Function( + input_names=[ + "echotime_1", + "echotime_2", + "echotime_3", + "echotime_4", + ], + output_names=["echotime_list"], + function=gather_echo_times, + ), + name="fugue_gather_echo_times", + ) + + for idx, fmap_file in enumerate(fmap_TE_list, start=1): + try: + node, out_file = self.get(fmap_file)[ + f"['{fmap_file}:fmap_TE_ingress']" + ]["data"] + self._init_wf.connect( + node, out_file, gather_echoes, f"echotime_{idx}" + ) + except KeyError: + pass + + self._init_wf.connect( + gather_echoes, "echotime_list", calc_delta_ratio, "echo_times" + ) + + # Add in nodes to get parameters from configuration file + # a node which checks if scan_parameters are present for each scan + scan_params = pe.Node( + Function( + input_names=[ + "data_config_scan_params", + "subject_id", + "scan", + "pipeconfig_tr", + "pipeconfig_tpattern", + "pipeconfig_start_indx", + "pipeconfig_stop_indx", + ], + output_names=[ + "tr", + "tpattern", + "template", + "ref_slice", + "start_indx", + "stop_indx", + "pe_direction", + "effective_echo_spacing", + ], + function=get_scan_params, + imports=["from CPAC.utils.utils import check, try_fetch_parameter"], + ), + name=f"bold_scan_params_{self.part_id}{name_suffix}", + ) + scan_params.inputs.subject_id = self.part_id + scan_params.inputs.set( + pipeconfig_start_indx=self.cfg.functional_preproc["truncation"]["start_tr"], + pipeconfig_stop_indx=self.cfg.functional_preproc["truncation"]["stop_tr"], + ) + + node, out = self.get("scan")["['scan:func_ingress']"]["data"] + self._init_wf.connect(node, out, scan_params, "scan") + + # Workaround for extracting metadata with ingress + if self.check_rpool("derivatives-dir"): + selectrest_json = pe.Node( + Function( + input_names=["scan", "rest_dict", "resource"], + output_names=["file_path"], + function=get_rest, + as_module=True, + ), + name="selectrest_json", + ) + selectrest_json.inputs.rest_dict = self.data_paths + selectrest_json.inputs.resource = "scan_parameters" + self._init_wf.connect(node, out, selectrest_json, "scan") + self._init_wf.connect( + selectrest_json, "file_path", scan_params, "data_config_scan_params" + ) + + else: + # wire in the scan parameter workflow + node, out = self.get("scan-params")["['scan-params:scan_params_ingress']"][ + "data" + ] + self._init_wf.connect(node, out, scan_params, "data_config_scan_params") + + self.set_data("TR", scan_params, "tr", {}, "", "func_metadata_ingress") + self.set_data( + "tpattern", scan_params, "tpattern", {}, "", "func_metadata_ingress" + ) + self.set_data( + "template", scan_params, "template", {}, "", "func_metadata_ingress" + ) + self.set_data( + "start-tr", scan_params, "start_indx", {}, "", "func_metadata_ingress" + ) + self.set_data( + "stop-tr", scan_params, "stop_indx", {}, "", "func_metadata_ingress" + ) + self.set_data( + "pe-direction", scan_params, "pe_direction", {}, "", "func_metadata_ingress" + ) + + if diff: + # Connect EffectiveEchoSpacing from functional metadata + self.set_data( + "effectiveEchoSpacing", + scan_params, + "effective_echo_spacing", + {}, + "", + "func_metadata_ingress", + ) + node, out_file = self.get("effectiveEchoSpacing")[ + "['effectiveEchoSpacing:func_metadata_ingress']" + ]["data"] + self._init_wf.connect( + node, out_file, calc_delta_ratio, "effective_echo_spacing" + ) + self.set_data( + "deltaTE", calc_delta_ratio, "deltaTE", {}, "", "deltaTE_ingress" + ) + self.set_data( + "ees-asym-ratio", + calc_delta_ratio, + "ees_asym_ratio", + {}, + "", + "ees_asym_ratio_ingress", + ) + + return diff, blip, fmap_rp_list + + def ingress_pipeconfig_paths(self): + """Ingress config file paths.""" + # TODO: may want to change the resource keys for each to include one level up in the YAML as well + + import pandas as pd + import pkg_resources as p + + template_csv = p.resource_filename("CPAC", "resources/cpac_templates.csv") + template_df = pd.read_csv(template_csv, keep_default_na=False) + + for row in template_df.itertuples(): + key = row.Key + val = row.Pipeline_Config_Entry + val = self.cfg.get_nested(self.cfg, [x.lstrip() for x in val.split(",")]) + resolution = row.Intended_Resolution_Config_Entry + desc = row.Description + + if not val: + continue + + if resolution: + res_keys = [x.lstrip() for x in resolution.split(",")] + tag = res_keys[-1] + json_info = {} + + if "$FSLDIR" in val: + val = val.replace( + "$FSLDIR", self.cfg.pipeline_setup["system_config"]["FSLDIR"] + ) + if "$priors_path" in val: + priors_path = ( + self.cfg.segmentation["tissue_segmentation"]["FSL-FAST"][ + "use_priors" + ]["priors_path"] + or "" + ) + if "$FSLDIR" in priors_path: + priors_path = priors_path.replace( + "$FSLDIR", self.cfg.pipeline_setup["system_config"]["FSLDIR"] + ) + val = val.replace("$priors_path", priors_path) + if "${resolution_for_anat}" in val: + val = val.replace( + "${resolution_for_anat}", + self.cfg.registration_workflows["anatomical_registration"][ + "resolution_for_anat" + ], + ) + if "${func_resolution}" in val: + val = val.replace( + "${func_resolution}", + self.cfg.registration_workflows["functional_registration"][ + "func_registration_to_template" + ]["output_resolution"][tag], + ) + + if desc: + template_name, _template_desc = lookup_identifier(val) + if template_name: + desc = f"{template_name} - {desc}" + json_info["Description"] = f"{desc} - {val}" + if resolution: + resolution = self.cfg.get_nested(self.cfg, res_keys) + json_info["Resolution"] = resolution + + resampled_template = pe.Node( + Function( + input_names=["resolution", "template", "template_name", "tag"], + output_names=["resampled_template"], + function=resolve_resolution, + as_module=True, + ), + name="resampled_" + key, + ) + + resampled_template.inputs.resolution = resolution + resampled_template.inputs.template = val + resampled_template.inputs.template_name = key + resampled_template.inputs.tag = tag + + # the set_data below is set up a little differently, because we are + # injecting and also over-writing already-existing entries + # other alternative would have been to ingress into the + # resampled_template node from the already existing entries, but we + # didn't do that here + self.set_data( + key, + resampled_template, + "resampled_template", + json_info, + "", + "template_resample", + ) # pipe_idx (after the blank json {}) should be the previous strat that you want deleted! because you're not connecting this the regular way, you have to do it manually + + elif val: + config_ingress = create_general_datasource(f"gather_{key}") + config_ingress.inputs.inputnode.set( + unique_id=self.unique_id, + data=val, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + ) + self.set_data( + key, + config_ingress, + "outputspec.data", + json_info, + "", + f"{key}_config_ingress", + ) + + def ingress_raw_func_data(self): + """Ingress raw functional data.""" + func_paths_dct = self.data_paths["func"] + + func_wf = self.create_func_datasource( + func_paths_dct, f"func_ingress_{self.part_id}_{self.ses_id}" + ) + func_wf.inputs.inputnode.set( + subject=self.part_id, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + ) + func_wf.get_node("inputnode").iterables = ("scan", list(func_paths_dct.keys())) + + self.set_data("subject", func_wf, "outputspec.subject", {}, "", "func_ingress") + self.set_data("bold", func_wf, "outputspec.rest", {}, "", "func_ingress") + self.set_data("scan", func_wf, "outputspec.scan", {}, "", "func_ingress") + self.set_data( + "scan-params", + func_wf, + "outputspec.scan_params", + {}, + "", + "scan_params_ingress", + ) + + # TODO: CHECK FOR PARAMETERS + + diff, blip, fmap_rp_list = self.ingress_func_metadata() + + # Memoize list of local functional scans + # TODO: handle S3 files + # Skip S3 files for now + + local_func_scans = [ + func_paths_dct[scan]["scan"] + for scan in func_paths_dct.keys() + if not func_paths_dct[scan]["scan"].startswith("s3://") + ] + if local_func_scans: + # pylint: disable=protected-access + self._init_wf._local_func_scans = local_func_scans + if self.cfg.pipeline_setup["Debugging"]["verbose"]: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("local_func_scans: %s", local_func_scans) + del local_func_scans + + return diff, blip, fmap_rp_list + + def func_outdir_ingress(self, func_dict: dict, key: str, func_paths: dict) -> None: + """Ingress a functional output directory.""" + pipe_x = len(self.pipe_list) + ingress = self.create_func_datasource( + func_dict, f"gather_func_outdir_{key}_{pipe_x}" + ) + ingress.inputs.inputnode.set( + subject=self.unique_id, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + ) + self.set_data("subject", ingress, "outputspec.subject", {}, "", "func_ingress") + ingress.get_node("inputnode").iterables = ("scan", list(func_dict.keys())) + self.set_data(key, ingress, "outputspec.rest", {}, "", "func_ingress") + + self.set_data("scan", ingress, "outputspec.scan", {}, "", "func_ingress") + self.set_data( + "scan-params", + ingress, + "outputspec.scan_params", + {}, + "", + "scan_params_ingress", + ) + self.ingress_func_metadata() + + # Have to do it this weird way to save the parsed BIDS tag & filepath + mask_paths_key = ( + "desc-bold_mask" + if "desc-bold_mask" in func_paths + else "space-template_desc-bold_mask" + ) + ts_paths_key = "pipeline-ingress_desc-confounds_timeseries" + + # Connect func data with approproate scan name + iterables = pe.Node( + Function( + input_names=["scan", "mask_paths", "ts_paths"], + output_names=["out_scan", "mask", "confounds"], + function=set_iterables, + ), + name=f"set_iterables_{pipe_x}", + ) + iterables.inputs.mask_paths = func_paths[mask_paths_key] + iterables.inputs.ts_paths = func_paths[ts_paths_key] + self._init_wf.connect(ingress, "outputspec.scan", iterables, "scan") + + for key in func_paths: + if key in (mask_paths_key, ts_paths_key): + ingress_func = create_general_datasource(f"ingress_func_data_{key}") + ingress_func.inputs.inputnode.set( + unique_id=self.unique_id, + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + ) + self._init_wf.connect( + iterables, "out_scan", ingress_func, "inputnode.scan" + ) + if key == mask_paths_key: + self._init_wf.connect( + iterables, "mask", ingress_func, "inputnode.data" + ) + self.set_data( + key, + ingress_func, + "inputnode.data", + {}, + "", + f"outdir_{key}_ingress", + ) + elif key == ts_paths_key: + self._init_wf.connect( + iterables, "confounds", ingress_func, "inputnode.data" + ) + self.set_data( + key, + ingress_func, + "inputnode.data", + {}, + "", + f"outdir_{key}_ingress", + ) + + def ingress_raw_anat_data(self) -> None: + """Ingress raw anatomical data.""" + if "anat" not in self.data_paths: + WFLOGGER.warning("No anatomical data present.") + return + + anat_flow = create_anat_datasource( + f"anat_T1w_gather_{self.part_id}_{self.ses_id}" + ) + + anat = {} + if isinstance(self.data_paths["anat"], str): + anat["T1"] = self.data_paths["anat"] + elif "T1w" in self.data_paths["anat"]: + anat["T1"] = self.data_paths["anat"]["T1w"] + + if "T1" in anat: + anat_flow.inputs.inputnode.set( + subject=self.part_id, + anat=anat["T1"], + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + img_type="anat", + ) + self.set_data("T1w", anat_flow, "outputspec.anat", {}, "", "anat_ingress") + + if "T2w" in self.data_paths["anat"]: + anat_flow_T2 = create_anat_datasource( + f"anat_T2w_gather_{self.part_id}_{self.ses_id}" + ) + anat_flow_T2.inputs.inputnode.set( + subject=self.part_id, + anat=self.data_paths["anat"]["T2w"], + creds_path=self.creds_path, + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + img_type="anat", + ) + self.set_data( + "T2w", anat_flow_T2, "outputspec.anat", {}, "", "anat_ingress" + ) + + if self.cfg.surface_analysis["freesurfer"]["ingress_reconall"]: + self.ingress_freesurfer() + + +def initiate_rpool( + wf: pe.Workflow, + cfg: Configuration, + data_paths: Optional[dict] = None, + part_id: Optional[str] = None, +) -> ResourcePool: """ Initialize a new ResourcePool. - data_paths format: + data_paths format:: + {'anat': { 'T1w': '{T1w path}', 'T2w': '{T2w path}' @@ -1480,20 +2579,32 @@ def initiate_rpool(wf, cfg, data_paths=None, part_id=None): # TODO: used for BIDS-Derivatives (below), and possible refactoring of # TODO: the raw data config to use 'T1w' label instead of 'anat' etc. + kwargs = {"cfg": cfg, "wf": wf} if data_paths: - part_id = data_paths["subject_id"] - ses_id = data_paths["unique_id"] + part_id: str = data_paths["subject_id"] + ses_id: str = data_paths["unique_id"] if "creds_path" not in data_paths: creds_path = None else: - creds_path = data_paths["creds_path"] - unique_id = f"{part_id}_{ses_id}" - + creds_path: Optional[Path | str] = data_paths["creds_path"] + unique_id: str = f"{part_id}_{ses_id}" + kwargs.update( + { + "part_id": part_id, + "ses_id": ses_id, + "creds_path": creds_path, + "data_paths": data_paths, + } + ) elif part_id: unique_id = part_id creds_path = None + kwargs.update({"part_id": part_id, "creds_path": creds_path}) + else: + unique_id = "" + kwargs.update({"unique_id": unique_id}) - rpool = ResourcePool(name=unique_id, cfg=cfg) + rpool = ResourcePool(name=unique_id, **kwargs) if data_paths: # ingress outdir @@ -1502,32 +2613,25 @@ def initiate_rpool(wf, cfg, data_paths=None, part_id=None): data_paths["derivatives_dir"] and cfg.pipeline_setup["outdir_ingress"]["run"] ): - wf, rpool = ingress_output_dir( - wf, - cfg, - rpool, - unique_id, - data_paths, - part_id, - ses_id, - creds_path=None, - ) - except: - rpool = ingress_raw_anat_data( - wf, rpool, cfg, data_paths, unique_id, part_id, ses_id - ) + rpool.ingress_output_dir() + except (AttributeError, KeyError): + rpool.ingress_raw_anat_data() if "func" in data_paths: - wf, rpool, diff, blip, fmap_rp_list = ingress_raw_func_data( - wf, rpool, cfg, data_paths, unique_id, part_id, ses_id - ) + rpool.ingress_raw_func_data() # grab any file paths from the pipeline config YAML - rpool = ingress_pipeconfig_paths(cfg, rpool, unique_id, creds_path) + rpool.ingress_pipeconfig_paths() # output files with 4 different scans - return (wf, rpool) + return rpool._init_wf, rpool class StratPool(ResourcePool): """All resources for a strategy.""" + + def __init__(self): + """Initialize a ResourcePool.""" + + def append_name(self, name): + self.name.append(name) diff --git a/CPAC/utils/bids_utils.py b/CPAC/utils/bids_utils.py index 34e72d430e..08e6edb989 100755 --- a/CPAC/utils/bids_utils.py +++ b/CPAC/utils/bids_utils.py @@ -14,6 +14,9 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . +"""Utilities for using BIDS data.""" + +from base64 import b64decode import json import os import re @@ -91,8 +94,7 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True): ) if raise_error: raise ValueError(msg) - else: - UTLOGGER.error(msg) + UTLOGGER.error(msg) elif not f_dict["scantype"]: msg = ( f"Filename ({fname}) does not appear to contain" @@ -100,8 +102,7 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True): ) if raise_error: raise ValueError(msg) - else: - UTLOGGER.error(msg) + UTLOGGER.error(msg) elif "bold" in f_dict["scantype"] and not f_dict["task"]: msg = ( f"Filename ({fname}) is a BOLD file, but doesn't contain a task, does" @@ -109,15 +110,13 @@ def bids_decode_fname(file_path, dbg=False, raise_error=True): ) if raise_error: raise ValueError(msg) - else: - UTLOGGER.error(msg) + UTLOGGER.error(msg) return f_dict def bids_entities_from_filename(filename): - """Function to collect a list of BIDS entities from a given - filename. + """Collect a list of BIDS entities from a given filename. Parameters ---------- @@ -142,7 +141,7 @@ def bids_entities_from_filename(filename): def bids_match_entities(file_list, entities, suffix): - """Function to subset a list of filepaths by a passed BIDS entity. + """Subset a list of filepaths by a passed BIDS entity. Parameters ---------- @@ -250,10 +249,9 @@ def bids_remove_entity(name, key): def bids_retrieve_params(bids_config_dict, f_dict, dbg=False): - """ + """Retrieve BIDS parameters for BIDS file corresponding to f_dict. - Retrieve the BIDS parameters from bids_config_dict for BIDS file - corresponding to f_dict. If an exact match for f_dict is not found + If an exact match for f_dict is not found the nearest match is returned, corresponding to the BIDS inheritance principle. @@ -316,12 +314,10 @@ def bids_retrieve_params(bids_config_dict, f_dict, dbg=False): return params -def bids_parse_sidecar(config_dict, dbg=False, raise_error=True): - # type: (dict, bool) -> dict - """ - Uses the BIDS principle of inheritance to build a data structure that - maps parameters in side car .json files to components in the names of - corresponding nifti files. +def bids_parse_sidecar( + config_dict: dict, dbg: bool = False, raise_error: bool = True +) -> dict: + """Use BIDS inheritance to map parameters in sidecar to corresponding NIfTI files. :param config_dict: dictionary that maps paths of sidecar json files (the key) to a dictionary containing the contents of the files (the values) @@ -428,9 +424,9 @@ def bids_parse_sidecar(config_dict, dbg=False, raise_error=True): def bids_shortest_entity(file_list): - """Function to return the single file with the shortest chain of - BIDS entities from a given list, returning the first if more than - one have the same minimum length. + """Return the single file with the shortest chain of BIDS entities from a list. + + Return the first if more than one have the same minimum length. Parameters ---------- @@ -553,9 +549,7 @@ def bids_gen_cpac_sublist( raise_error=True, only_one_anat=True, ): - """ - Generates a CPAC formatted subject list from information contained in a - BIDS formatted set of data. + """Generate a CPAC formatted subject list from a BIDS dataset. Parameters ---------- @@ -910,8 +904,9 @@ def camelCase(string: str) -> str: # pylint: disable=invalid-name def combine_multiple_entity_instances(bids_str: str) -> str: - """Combines mutliple instances of a key in a BIDS string to a single - instance by camelCasing and concatenating the values. + """Combine mutliple instances of a key in a BIDS string to a single instance. + + camelCase and concatenate the values. Parameters ---------- @@ -950,8 +945,7 @@ def combine_multiple_entity_instances(bids_str: str) -> str: def insert_entity(resource, key, value): - """Insert a `f'{key}-{value}'` BIDS entity before `desc-` if - present or before the suffix otherwise. + """Insert a BIDS entity before `desc-` if present or before the suffix otherwise. Parameters ---------- @@ -983,7 +977,8 @@ def insert_entity(resource, key, value): return "_".join([*new_entities[0], f"{key}-{value}", *new_entities[1], suff]) -def load_yaml_config(config_filename, aws_input_creds): +def load_yaml_config(config_filename: str, aws_input_creds: str) -> dict | list: + """Load a YAML configuration file, locally or from AWS.""" if config_filename.lower().startswith("data:"): try: header, encoded = config_filename.split(",", 1) @@ -1020,8 +1015,7 @@ def load_yaml_config(config_filename, aws_input_creds): def cl_strip_brackets(arg_list): - """Removes '[' from before first and ']' from after final - arguments in a list of commandline arguments. + """Remove '[' from before first and ']' from after final arguments. Parameters ---------- @@ -1051,7 +1045,7 @@ def create_cpac_data_config( aws_input_creds=None, skip_bids_validator=False, only_one_anat=True, -): +) -> list[dict]: """ Create a C-PAC data config YAML file from a BIDS directory. @@ -1111,8 +1105,7 @@ def create_cpac_data_config( def load_cpac_data_config(data_config_file, participant_labels, aws_input_creds): - """ - Loads the file as a check to make sure it is available and readable. + """Load the file to make sure it is available and readable. Parameters ---------- @@ -1210,8 +1203,7 @@ def res_in_filename(cfg, label): def sub_list_filter_by_labels(sub_list, labels): - """Function to filter a sub_list by provided BIDS labels for - specified suffixes. + """Filter a sub_list by provided BIDS labels for specified suffixes. Parameters ---------- @@ -1287,7 +1279,7 @@ def without_key(entity: str, key: str) -> str: def _t1w_filter(anat, shortest_entity, label): - """Helper function to filter T1w paths. + """Filter T1w paths. Parameters ---------- @@ -1318,7 +1310,7 @@ def _t1w_filter(anat, shortest_entity, label): def _sub_anat_filter(anat, shortest_entity, label): - """Helper function to filter anat paths in sub_list. + """Filter anat paths in sub_list. Parameters ---------- @@ -1341,7 +1333,7 @@ def _sub_anat_filter(anat, shortest_entity, label): def _sub_list_filter_by_label(sub_list, label_type, label): - """Function to filter a sub_list by a CLI-provided label. + """Filter a sub_list by a CLI-provided label. Parameters ---------- @@ -1410,7 +1402,7 @@ def _sub_list_filter_by_label(sub_list, label_type, label): def _match_functional_scan(sub_list_func_dict, scan_file_to_match): - """Function to subset a scan from a sub_list_func_dict by a scan filename. + """Subset a scan from a sub_list_func_dict by a scan filename. Parameters ---------- diff --git a/CPAC/utils/datasource.py b/CPAC/utils/datasource.py index d0089d8afe..2633b56b69 100644 --- a/CPAC/utils/datasource.py +++ b/CPAC/utils/datasource.py @@ -20,6 +20,7 @@ import json from pathlib import Path import re +from typing import Optional from voluptuous import RequiredFieldInvalid from nipype.interfaces import utility as util @@ -28,9 +29,7 @@ from CPAC.resources.templates.lookup_table import format_identifier, lookup_identifier from CPAC.utils import function from CPAC.utils.bids_utils import bids_remove_entity -from CPAC.utils.interfaces.function import Function from CPAC.utils.monitoring import FMLOGGER -from CPAC.utils.utils import get_scan_params def bidsier_prefix(unique_id): @@ -64,7 +63,7 @@ def bidsier_prefix(unique_id): return "_".join(components) -def get_rest(scan, rest_dict, resource="scan"): +def get_rest(scan: str, rest_dict: dict, resource: str = "scan") -> Path | str: """Return the path of the chosen resource in the functional file dictionary. scan: the scan/series name or label @@ -127,7 +126,7 @@ def select_model_files(model, ftest, model_name): return fts_file, con_file, grp_file, mat_file -def check_func_scan(func_scan_dct, scan): +def check_func_scan(func_scan_dct: dict, scan: str) -> None: """Run some checks on the functional timeseries-related files. For a given series/scan name or label. @@ -168,119 +167,6 @@ def check_func_scan(func_scan_dct, scan): raise ValueError(msg) -def create_func_datasource(rest_dict, rpool, wf_name="func_datasource"): - """Return the functional timeseries-related file paths for each series/scan... - - ...from the dictionary of functional files described in the data - configuration (sublist) YAML file. - - Scan input (from inputnode) is an iterable. - """ - import nipype.interfaces.utility as util - - from CPAC.pipeline import nipype_pipeline_engine as pe - - wf = pe.Workflow(name=wf_name) - - inputnode = pe.Node( - util.IdentityInterface( - fields=["subject", "scan", "creds_path", "dl_dir"], mandatory_inputs=True - ), - name="inputnode", - ) - - outputnode = pe.Node( - util.IdentityInterface( - fields=["subject", "rest", "scan", "scan_params", "phase_diff", "magnitude"] - ), - name="outputspec", - ) - - # have this here for now because of the big change in the data - # configuration format - # (Not necessary with ingress - format does not comply) - if not rpool.check_rpool("derivatives-dir"): - check_scan = pe.Node( - function.Function( - input_names=["func_scan_dct", "scan"], - output_names=[], - function=check_func_scan, - as_module=True, - ), - name="check_func_scan", - ) - - check_scan.inputs.func_scan_dct = rest_dict - wf.connect(inputnode, "scan", check_scan, "scan") - - # get the functional scan itself - selectrest = pe.Node( - function.Function( - input_names=["scan", "rest_dict", "resource"], - output_names=["file_path"], - function=get_rest, - as_module=True, - ), - name="selectrest", - ) - selectrest.inputs.rest_dict = rest_dict - selectrest.inputs.resource = "scan" - wf.connect(inputnode, "scan", selectrest, "scan") - - # check to see if it's on an Amazon AWS S3 bucket, and download it, if it - # is - otherwise, just return the local file path - check_s3_node = pe.Node( - function.Function( - input_names=["file_path", "creds_path", "dl_dir", "img_type"], - output_names=["local_path"], - function=check_for_s3, - as_module=True, - ), - name="check_for_s3", - ) - - wf.connect(selectrest, "file_path", check_s3_node, "file_path") - wf.connect(inputnode, "creds_path", check_s3_node, "creds_path") - wf.connect(inputnode, "dl_dir", check_s3_node, "dl_dir") - check_s3_node.inputs.img_type = "func" - - wf.connect(inputnode, "subject", outputnode, "subject") - wf.connect(check_s3_node, "local_path", outputnode, "rest") - wf.connect(inputnode, "scan", outputnode, "scan") - - # scan parameters CSV - select_scan_params = pe.Node( - function.Function( - input_names=["scan", "rest_dict", "resource"], - output_names=["file_path"], - function=get_rest, - as_module=True, - ), - name="select_scan_params", - ) - select_scan_params.inputs.rest_dict = rest_dict - select_scan_params.inputs.resource = "scan_parameters" - wf.connect(inputnode, "scan", select_scan_params, "scan") - - # if the scan parameters file is on AWS S3, download it - s3_scan_params = pe.Node( - function.Function( - input_names=["file_path", "creds_path", "dl_dir", "img_type"], - output_names=["local_path"], - function=check_for_s3, - as_module=True, - ), - name="s3_scan_params", - ) - - wf.connect(select_scan_params, "file_path", s3_scan_params, "file_path") - wf.connect(inputnode, "creds_path", s3_scan_params, "creds_path") - wf.connect(inputnode, "dl_dir", s3_scan_params, "dl_dir") - wf.connect(s3_scan_params, "local_path", outputnode, "scan_params") - - return wf - - def create_fmap_datasource(fmap_dct, wf_name="fmap_datasource"): """Return the field map files... @@ -374,7 +260,7 @@ def create_fmap_datasource(fmap_dct, wf_name="fmap_datasource"): return wf -def get_fmap_phasediff_metadata(data_config_scan_params): +def get_fmap_phasediff_metadata(data_config_scan_params: dict | str): """Return the scan parameters for a field map phasediff scan.""" if ( not isinstance(data_config_scan_params, dict) @@ -513,299 +399,6 @@ def match_epi_fmaps( return (opposite_pe_epi, same_pe_epi) -def ingress_func_metadata( - wf, - cfg, - rpool, - sub_dict, - subject_id, - input_creds_path, - unique_id=None, - num_strat=None, -): - """Ingress metadata for functional scans.""" - name_suffix = "" - for suffix_part in (unique_id, num_strat): - if suffix_part is not None: - name_suffix += f"_{suffix_part}" - # Grab field maps - diff = False - blip = False - fmap_rp_list = [] - fmap_TE_list = [] - if "fmap" in sub_dict: - second = False - for orig_key in sub_dict["fmap"]: - gather_fmap = create_fmap_datasource( - sub_dict["fmap"], f"fmap_gather_{orig_key}_{subject_id}" - ) - gather_fmap.inputs.inputnode.set( - subject=subject_id, - creds_path=input_creds_path, - dl_dir=cfg.pipeline_setup["working_directory"]["path"], - ) - gather_fmap.inputs.inputnode.scan = orig_key - - key = orig_key - if "epi" in key and not second: - key = "epi-1" - second = True - elif "epi" in key and second: - key = "epi-2" - - rpool.set_data(key, gather_fmap, "outputspec.rest", {}, "", "fmap_ingress") - rpool.set_data( - f"{key}-scan-params", - gather_fmap, - "outputspec.scan_params", - {}, - "", - "fmap_params_ingress", - ) - - fmap_rp_list.append(key) - - get_fmap_metadata_imports = ["import json"] - get_fmap_metadata = pe.Node( - Function( - input_names=["data_config_scan_params"], - output_names=[ - "dwell_time", - "pe_direction", - "total_readout", - "echo_time", - "echo_time_one", - "echo_time_two", - ], - function=get_fmap_phasediff_metadata, - imports=get_fmap_metadata_imports, - ), - name=f"{key}_get_metadata{name_suffix}", - ) - - wf.connect( - gather_fmap, - "outputspec.scan_params", - get_fmap_metadata, - "data_config_scan_params", - ) - - if "phase" in key: - # leave it open to all three options, in case there is a - # phasediff image with either a single EchoTime field (which - # usually matches one of the magnitude EchoTimes), OR - # a phasediff with an EchoTime1 and EchoTime2 - - # at least one of these rpool keys will have a None value, - # which will be sorted out in gather_echo_times below - rpool.set_data( - f"{key}-TE", - get_fmap_metadata, - "echo_time", - {}, - "", - "fmap_TE_ingress", - ) - fmap_TE_list.append(f"{key}-TE") - - rpool.set_data( - f"{key}-TE1", - get_fmap_metadata, - "echo_time_one", - {}, - "", - "fmap_TE1_ingress", - ) - fmap_TE_list.append(f"{key}-TE1") - - rpool.set_data( - f"{key}-TE2", - get_fmap_metadata, - "echo_time_two", - {}, - "", - "fmap_TE2_ingress", - ) - fmap_TE_list.append(f"{key}-TE2") - - elif "magnitude" in key: - rpool.set_data( - f"{key}-TE", - get_fmap_metadata, - "echo_time", - {}, - "", - "fmap_TE_ingress", - ) - fmap_TE_list.append(f"{key}-TE") - - rpool.set_data( - f"{key}-dwell", - get_fmap_metadata, - "dwell_time", - {}, - "", - "fmap_dwell_ingress", - ) - rpool.set_data( - f"{key}-pedir", - get_fmap_metadata, - "pe_direction", - {}, - "", - "fmap_pedir_ingress", - ) - rpool.set_data( - f"{key}-total-readout", - get_fmap_metadata, - "total_readout", - {}, - "", - "fmap_readout_ingress", - ) - - if "phase" in key or "mag" in key: - diff = True - - if re.match("epi_[AP]{2}", orig_key): - blip = True - - if diff: - calc_delta_ratio = pe.Node( - Function( - input_names=["effective_echo_spacing", "echo_times"], - output_names=["deltaTE", "ees_asym_ratio"], - function=calc_delta_te_and_asym_ratio, - imports=["from typing import Optional"], - ), - name=f"diff_distcor_calc_delta{name_suffix}", - ) - - gather_echoes = pe.Node( - Function( - input_names=[ - "echotime_1", - "echotime_2", - "echotime_3", - "echotime_4", - ], - output_names=["echotime_list"], - function=gather_echo_times, - ), - name="fugue_gather_echo_times", - ) - - for idx, fmap_file in enumerate(fmap_TE_list, start=1): - try: - node, out_file = rpool.get(fmap_file)[ - f"['{fmap_file}:fmap_TE_ingress']" - ]["data"] - wf.connect(node, out_file, gather_echoes, f"echotime_{idx}") - except KeyError: - pass - - wf.connect(gather_echoes, "echotime_list", calc_delta_ratio, "echo_times") - - # Add in nodes to get parameters from configuration file - # a node which checks if scan_parameters are present for each scan - scan_params = pe.Node( - Function( - input_names=[ - "data_config_scan_params", - "subject_id", - "scan", - "pipeconfig_tr", - "pipeconfig_tpattern", - "pipeconfig_start_indx", - "pipeconfig_stop_indx", - ], - output_names=[ - "tr", - "tpattern", - "template", - "ref_slice", - "start_indx", - "stop_indx", - "pe_direction", - "effective_echo_spacing", - ], - function=get_scan_params, - imports=["from CPAC.utils.utils import check, try_fetch_parameter"], - ), - name=f"bold_scan_params_{subject_id}{name_suffix}", - ) - scan_params.inputs.subject_id = subject_id - scan_params.inputs.set( - pipeconfig_start_indx=cfg.functional_preproc["truncation"]["start_tr"], - pipeconfig_stop_indx=cfg.functional_preproc["truncation"]["stop_tr"], - ) - - node, out = rpool.get("scan")["['scan:func_ingress']"]["data"] - wf.connect(node, out, scan_params, "scan") - - # Workaround for extracting metadata with ingress - if rpool.check_rpool("derivatives-dir"): - selectrest_json = pe.Node( - function.Function( - input_names=["scan", "rest_dict", "resource"], - output_names=["file_path"], - function=get_rest, - as_module=True, - ), - name="selectrest_json", - ) - selectrest_json.inputs.rest_dict = sub_dict - selectrest_json.inputs.resource = "scan_parameters" - wf.connect(node, out, selectrest_json, "scan") - wf.connect(selectrest_json, "file_path", scan_params, "data_config_scan_params") - - else: - # wire in the scan parameter workflow - node, out = rpool.get("scan-params")["['scan-params:scan_params_ingress']"][ - "data" - ] - wf.connect(node, out, scan_params, "data_config_scan_params") - - rpool.set_data("TR", scan_params, "tr", {}, "", "func_metadata_ingress") - rpool.set_data("tpattern", scan_params, "tpattern", {}, "", "func_metadata_ingress") - rpool.set_data("template", scan_params, "template", {}, "", "func_metadata_ingress") - rpool.set_data( - "start-tr", scan_params, "start_indx", {}, "", "func_metadata_ingress" - ) - rpool.set_data("stop-tr", scan_params, "stop_indx", {}, "", "func_metadata_ingress") - rpool.set_data( - "pe-direction", scan_params, "pe_direction", {}, "", "func_metadata_ingress" - ) - - if diff: - # Connect EffectiveEchoSpacing from functional metadata - rpool.set_data( - "effectiveEchoSpacing", - scan_params, - "effective_echo_spacing", - {}, - "", - "func_metadata_ingress", - ) - node, out_file = rpool.get("effectiveEchoSpacing")[ - "['effectiveEchoSpacing:func_metadata_ingress']" - ]["data"] - wf.connect(node, out_file, calc_delta_ratio, "effective_echo_spacing") - rpool.set_data( - "deltaTE", calc_delta_ratio, "deltaTE", {}, "", "deltaTE_ingress" - ) - rpool.set_data( - "ees-asym-ratio", - calc_delta_ratio, - "ees_asym_ratio", - {}, - "", - "ees_asym_ratio_ingress", - ) - - return wf, rpool, diff, blip, fmap_rp_list - - def create_general_datasource(wf_name): """Create a general-purpose datasource node.""" import nipype.interfaces.utility as util @@ -881,9 +474,16 @@ def create_check_for_s3_node( return check_s3_node +@function.Function.sig_imports( + ["from pathlib import Path", "from typing import Optional"] +) def check_for_s3( - file_path, creds_path=None, dl_dir=None, img_type="other", verbose=False -): + file_path: Path | str, + creds_path: Optional[Path | str] = None, + dl_dir: Optional[Path | str] = None, + img_type: str = "other", + verbose: bool = False, +) -> Path | str: """Check if passed-in file is on S3.""" # Import packages import os diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index 4ba3285218..7d90593cc7 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -136,7 +136,7 @@ def get_flag_wf(wf_name="get_flag"): wf.connect(input_node, "in_flag", get_flag, "in_flag") -def read_json(json_file): +def read_json(json_file: str) -> dict: """Read a JSON file and return the contents as a dictionary.""" try: with open(json_file, "r") as f: From 646f49da79f7257992f58aceec1987f409f7f848 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 10 Jul 2024 12:52:05 -0400 Subject: [PATCH 20/93] :white_check_mark: Restore engine unit tests --- CPAC/pipeline/test/test_engine.py | 194 +++++++++++-------------- CPAC/resources/tests/test_templates.py | 12 +- 2 files changed, 90 insertions(+), 116 deletions(-) diff --git a/CPAC/pipeline/test/test_engine.py b/CPAC/pipeline/test/test_engine.py index c228fc3640..46df0a2dec 100644 --- a/CPAC/pipeline/test/test_engine.py +++ b/CPAC/pipeline/test/test_engine.py @@ -1,4 +1,22 @@ -import os +# Copyright (C) 2021-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Tests for C-PAC pipeline engine.""" + +from pathlib import Path import pytest @@ -7,148 +25,108 @@ build_workflow, connect_pipeline, initialize_nipype_wf, - load_cpac_pipe_config, ) from CPAC.pipeline.engine import ( - ingress_pipeconfig_paths, - ingress_raw_anat_data, - ingress_raw_func_data, initiate_rpool, ResourcePool, ) from CPAC.utils.bids_utils import create_cpac_data_config - - -@pytest.mark.skip(reason="not a pytest test") -def test_ingress_func_raw_data(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") - +from CPAC.utils.configuration import Configuration, Preconfiguration + + +def _set_up_test( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> tuple[Configuration, dict]: + """Set up ``cfg`` and ``sub_data`` for engine tests.""" + bids_dir = str(bids_examples / "ds051") + sub_data = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] + cfg = Preconfiguration(preconfig) + cfg.pipeline_setup["output_directory"]["path"] = str(tmp_path / "out") + cfg.pipeline_setup["working_directory"]["path"] = str(tmp_path / "work") + cfg.pipeline_setup["log_directory"]["path"] = str(tmp_path / "logs") + return (cfg, sub_data) + + +@pytest.mark.parametrize("preconfig", ["default"]) +def test_ingress_func_raw_data( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> None: + """Test :py:method:~`CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_func_data`.""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) wf = initialize_nipype_wf(cfg, sub_data_dct) - part_id = sub_data_dct["subject_id"] ses_id = sub_data_dct["unique_id"] - unique_id = f"{part_id}_{ses_id}" - - rpool = ResourcePool(name=unique_id, cfg=cfg) - + rpool = ResourcePool(name=unique_id, cfg=cfg, data_paths=sub_data_dct, wf=wf) if "func" in sub_data_dct: - wf, rpool, diff, blip, fmap_rp_list = ingress_raw_func_data( - wf, rpool, cfg, sub_data_dct, unique_id, part_id, ses_id - ) - + rpool.ingress_raw_func_data() rpool.gather_pipes(wf, cfg, all=True) - wf.run() - - -@pytest.mark.skip(reason="not a pytest test") -def test_ingress_anat_raw_data(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") +@pytest.mark.parametrize("preconfig", ["default"]) +def test_ingress_anat_raw_data( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> None: + """Test :py:method:~`CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_anat_data`.""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) wf = initialize_nipype_wf(cfg, sub_data_dct) - part_id = sub_data_dct["subject_id"] ses_id = sub_data_dct["unique_id"] - unique_id = f"{part_id}_{ses_id}" - - rpool = ResourcePool(name=unique_id, cfg=cfg) - - rpool = ingress_raw_anat_data( - wf, rpool, cfg, sub_data_dct, unique_id, part_id, ses_id + rpool = ResourcePool( + name=unique_id, + cfg=cfg, + data_paths=sub_data_dct, + unique_id=unique_id, + part_id=part_id, + ses_id=ses_id, + wf=wf, ) - + rpool.ingress_raw_anat_data() rpool.gather_pipes(wf, cfg, all=True) - wf.run() - - -@pytest.mark.skip(reason="not a pytest test") -def test_ingress_pipeconfig_data(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") - cfg.pipeline_setup["log_directory"]["path"] = os.path.join(test_dir, "logs") +@pytest.mark.parametrize("preconfig", ["default"]) +def test_ingress_pipeconfig_data( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> None: + """Test :py:method:~`CPAC.pipeline.engine.resource.ResourcePool.ingress_pipeconfig_paths`.""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) wf = initialize_nipype_wf(cfg, sub_data_dct) - part_id = sub_data_dct["subject_id"] ses_id = sub_data_dct["unique_id"] - unique_id = f"{part_id}_{ses_id}" - - rpool = ResourcePool(name=unique_id, cfg=cfg) - - rpool = ingress_pipeconfig_paths(cfg, rpool, sub_data_dct, unique_id) - + rpool = ResourcePool( + name=unique_id, + cfg=cfg, + data_paths=sub_data_dct, + part_id=part_id, + ses_id=ses_id, + unique_id=unique_id, + ) + rpool.ingress_pipeconfig_paths() rpool.gather_pipes(wf, cfg, all=True) - wf.run() - - -@pytest.mark.skip(reason="not a pytest test") -def test_build_anat_preproc_stack(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") - cfg.pipeline_setup["log_directory"]["path"] = os.path.join(test_dir, "logs") +@pytest.mark.parametrize("preconfig", ["anat-only"]) +def test_build_anat_preproc_stack( + bids_examples: Path, preconfig: str, tmp_path: Path +) -> None: + """Test :py:func:~`CPAC.pipeline.cpac_pipeline.build_anat_preproc_stack`.""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) wf = initialize_nipype_wf(cfg, sub_data_dct) - - wf, rpool = initiate_rpool(wf, cfg, sub_data_dct) - + rpool = initiate_rpool(wf, cfg, sub_data_dct) pipeline_blocks = build_anat_preproc_stack(rpool, cfg) wf = connect_pipeline(wf, cfg, rpool, pipeline_blocks) - rpool.gather_pipes(wf, cfg) - wf.run() - - -@pytest.mark.skip(reason="not a pytest test") -def test_build_workflow(pipe_config, bids_dir, test_dir): - sub_data_dct = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] - cfg = load_cpac_pipe_config(pipe_config) - - cfg.pipeline_setup["output_directory"]["path"] = os.path.join(test_dir, "out") - cfg.pipeline_setup["working_directory"]["path"] = os.path.join(test_dir, "work") - cfg.pipeline_setup["log_directory"]["path"] = os.path.join(test_dir, "logs") +@pytest.mark.parametrize("preconfig", ["default"]) +def test_build_workflow(bids_examples: Path, preconfig: str, tmp_path: Path) -> None: + """Test :py:func:~`CPAC.pipeline.cpac_pipeline.build_workflow`.""" + cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) wf = initialize_nipype_wf(cfg, sub_data_dct) - - wf, rpool = initiate_rpool(wf, cfg, sub_data_dct) - - wf, _, _ = build_workflow(sub_data_dct["subject_id"], sub_data_dct, cfg) - + rpool = initiate_rpool(wf, cfg, sub_data_dct) + wf = build_workflow(sub_data_dct["subject_id"], sub_data_dct, cfg) rpool.gather_pipes(wf, cfg) - - wf.run() - - -# bids_dir = "/Users/steven.giavasis/data/HBN-SI_dataset/rawdata" -# test_dir = "/test_dir" - -# cfg = "/Users/hecheng.jin/GitHub/DevBranch/CPAC/resources/configs/pipeline_config_monkey-ABCD.yml" -cfg = "/Users/hecheng.jin/GitHub/pipeline_config_monkey-ABCDlocal.yml" -bids_dir = "/Users/hecheng.jin/Monkey/monkey_data_oxford/site-ucdavis" -test_dir = "/Users/hecheng.jin/GitHub/Test/T2preproc" - -# test_ingress_func_raw_data(cfg, bids_dir, test_dir) -# test_ingress_anat_raw_data(cfg, bids_dir, test_dir) -# test_ingress_pipeconfig_data(cfg, bids_dir, test_dir) -# test_build_anat_preproc_stack(cfg, bids_dir, test_dir) -if __name__ == "__main__": - test_build_workflow(cfg, bids_dir, test_dir) diff --git a/CPAC/resources/tests/test_templates.py b/CPAC/resources/tests/test_templates.py index 13a4f72745..d9f5fa9f3c 100644 --- a/CPAC/resources/tests/test_templates.py +++ b/CPAC/resources/tests/test_templates.py @@ -21,20 +21,16 @@ import pytest from CPAC.pipeline import ALL_PIPELINE_CONFIGS -from CPAC.pipeline.engine import ingress_pipeconfig_paths, ResourcePool +from CPAC.pipeline.engine import ResourcePool from CPAC.utils.configuration import Preconfiguration from CPAC.utils.datasource import get_highest_local_res @pytest.mark.parametrize("pipeline", ALL_PIPELINE_CONFIGS) def test_packaged_path_exists(pipeline): - """ - Check that all local templates are included in image at at - least one resolution. - """ - rpool = ingress_pipeconfig_paths( - Preconfiguration(pipeline), ResourcePool(), "pytest" - ) + """Check that all local templates are included in at least one resolution.""" + rpool = ResourcePool(cfg=Preconfiguration(pipeline), unique_id="pytest") + rpool.ingress_pipeconfig_paths() for resource in rpool.rpool.values(): node = next(iter(resource.values())).get("data")[0] if hasattr(node.inputs, "template") and not node.inputs.template.startswith( From 3334a2a40e0308ccada6fa9d7d44e170aa695c7f Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 10 Jul 2024 13:00:10 -0400 Subject: [PATCH 21/93] :construction_worker: Replace non-`\w`-non-dot characters with hyphens --- .circleci/main.yml | 2 +- .github/workflows/build_C-PAC.yml | 2 +- .github/workflows/regression_test_full.yml | 2 +- .github/workflows/regression_test_lite.yml | 2 +- .github/workflows/smoke_test_participant.yml | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.circleci/main.yml b/.circleci/main.yml index a13300a78d..c1cb4bc391 100644 --- a/.circleci/main.yml +++ b/.circleci/main.yml @@ -156,7 +156,7 @@ commands: then TAG=nightly else - TAG="${CIRCLE_BRANCH//\//_}" + TAG=`echo ${CIRCLE_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` fi DOCKER_TAG="ghcr.io/${CIRCLE_PROJECT_USERNAME,,}/${CIRCLE_PROJECT_REPONAME,,}:${TAG,,}" if [[ -n "<< parameters.variant >>" ]] diff --git a/.github/workflows/build_C-PAC.yml b/.github/workflows/build_C-PAC.yml index d126f6a778..ef7a196cef 100644 --- a/.github/workflows/build_C-PAC.yml +++ b/.github/workflows/build_C-PAC.yml @@ -42,7 +42,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` DOCKERFILE=.github/Dockerfiles/C-PAC.develop$VARIANT-$OS.Dockerfile elif [[ $GITHUB_BRANCH == 'develop' ]] then diff --git a/.github/workflows/regression_test_full.yml b/.github/workflows/regression_test_full.yml index 6dba2d1bf2..20d25a9316 100644 --- a/.github/workflows/regression_test_full.yml +++ b/.github/workflows/regression_test_full.yml @@ -13,7 +13,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_BRANCH == 'develop' ]] then TAG=nightly diff --git a/.github/workflows/regression_test_lite.yml b/.github/workflows/regression_test_lite.yml index 4e6b5a46f6..87aba8a5bd 100644 --- a/.github/workflows/regression_test_lite.yml +++ b/.github/workflows/regression_test_lite.yml @@ -37,7 +37,7 @@ jobs: run: | if [[ ! $GITHUB_REF_NAME == 'main' ]] && [[ ! $GITHUB_REF_NAME == 'develop' ]] then - TAG=${GITHUB_REF_NAME//\//_} + TAG=`echo ${GITHUB_REF_NAME} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_REF_NAME == 'develop' ]] then TAG=nightly diff --git a/.github/workflows/smoke_test_participant.yml b/.github/workflows/smoke_test_participant.yml index 3fde0de8aa..6b7e219775 100644 --- a/.github/workflows/smoke_test_participant.yml +++ b/.github/workflows/smoke_test_participant.yml @@ -68,7 +68,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_BRANCH == 'develop' ]] then TAG=nightly @@ -133,7 +133,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_BRANCH == 'develop' ]] then TAG=nightly @@ -192,7 +192,7 @@ jobs: GITHUB_BRANCH=$(echo ${GITHUB_REF} | cut -d '/' -f 3-) if [[ ! $GITHUB_BRANCH == 'main' ]] && [[ ! $GITHUB_BRANCH == 'develop' ]] then - TAG=${GITHUB_BRANCH//\//_} + TAG=`echo ${GITHUB_BRANCH} | sed 's/[^a-zA-Z0-9._]/-/g'` elif [[ $GITHUB_BRANCH == 'develop' ]] then TAG=nightly From bbf3e97a239c8cf4402048a555cc73e0ae5614b4 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 10 Jul 2024 17:01:03 -0400 Subject: [PATCH 22/93] :recycle: Fold `initiate_rpool` into `ResourcePool.__init__` --- .../longitudinal_workflow.py | 40 +- CPAC/pipeline/cpac_pipeline.py | 31 +- CPAC/pipeline/engine/__init__.py | 3 +- CPAC/pipeline/engine/engine.py | 4 +- CPAC/pipeline/engine/resource.py | 406 ++++++++++-------- CPAC/pipeline/test/test_engine.py | 46 +- CPAC/utils/configuration/configuration.py | 7 + 7 files changed, 263 insertions(+), 274 deletions(-) diff --git a/CPAC/longitudinal_pipeline/longitudinal_workflow.py b/CPAC/longitudinal_pipeline/longitudinal_workflow.py index 829e123de4..aacbea6b8d 100644 --- a/CPAC/longitudinal_pipeline/longitudinal_workflow.py +++ b/CPAC/longitudinal_pipeline/longitudinal_workflow.py @@ -29,9 +29,8 @@ build_segmentation_stack, build_T1w_registration_stack, connect_pipeline, - initialize_nipype_wf, ) -from CPAC.pipeline.engine import initiate_rpool +from CPAC.pipeline.engine import ResourcePool from CPAC.pipeline.nodeblock import nodeblock from CPAC.registration import ( create_fsl_flirt_linear_reg, @@ -429,16 +428,13 @@ def anat_longitudinal_wf(subject_id, sub_list, config): except KeyError: input_creds_path = None - workflow = initialize_nipype_wf( - config, - session, - # just grab the first one for the name - name="anat_longitudinal_pre-preproc", + rpool = ResourcePool( + cfg=config, + data_paths=session, + pipeline_name="anat_longitudinal_pre-preproc", ) - - rpool = initiate_rpool(workflow, config, session) pipeline_blocks = build_anat_preproc_stack(rpool, config) - workflow = connect_pipeline(workflow, config, rpool, pipeline_blocks) + workflow = connect_pipeline(rpool.wf, config, rpool, pipeline_blocks) session_wfs[unique_id] = rpool @@ -474,13 +470,6 @@ def anat_longitudinal_wf(subject_id, sub_list, config): ) for strat in strats_brain_dct.keys(): - wf = initialize_nipype_wf( - config, - sub_list[0], - # just grab the first one for the name - name=f"template_node_{strat}", - ) - config.pipeline_setup["pipeline_name"] = f"longitudinal_{orig_pipe_name}" template_node_name = f"longitudinal_anat_template_{strat}" @@ -508,7 +497,9 @@ def anat_longitudinal_wf(subject_id, sub_list, config): template_node.inputs.input_skull_list = strats_head_dct[strat] long_id = f"longitudinal_{subject_id}_strat-{strat}" - rpool = initiate_rpool(wf, config, part_id=long_id) + rpool = ResourcePool( + cfg=config, part_id=long_id, pipeline_name=f"template_node_{strat}" + ) rpool.set_data( "space-longitudinal_desc-brain_T1w", template_node, @@ -551,7 +542,7 @@ def anat_longitudinal_wf(subject_id, sub_list, config): pipeline_blocks = build_segmentation_stack(rpool, config, pipeline_blocks) - wf = connect_pipeline(wf, config, rpool, pipeline_blocks) + wf = connect_pipeline(rpool.wf, config, rpool, pipeline_blocks) excl = [ "space-longitudinal_desc-brain_T1w", @@ -586,10 +577,9 @@ def anat_longitudinal_wf(subject_id, sub_list, config): except KeyError: session["creds_path"] = None - wf = initialize_nipype_wf(config, session) - rpool = initiate_rpool(wf, config, session) - config.pipeline_setup["pipeline_name"] = f"longitudinal_{orig_pipe_name}" + rpool = ResourcePool(cfg=config, data_paths=session) + wf = rpool.wf rpool.ingress_output_dir() select_node_name = f"select_{unique_id}" @@ -651,15 +641,13 @@ def anat_longitudinal_wf(subject_id, sub_list, config): except KeyError: input_creds_path = None session["creds_path"] = input_creds_path - wf = initialize_nipype_wf(config, session) - rpool = initiate_rpool(wf, config, session) - + rpool = ResourcePool(cfg=config, data_paths=session) pipeline_blocks = [ warp_longitudinal_T1w_to_template, warp_longitudinal_seg_to_T1w, ] - wf = connect_pipeline(wf, config, rpool, pipeline_blocks) + wf = connect_pipeline(rpool.wf, config, rpool, pipeline_blocks) rpool.gather_pipes(wf, config) diff --git a/CPAC/pipeline/cpac_pipeline.py b/CPAC/pipeline/cpac_pipeline.py index f0baaa323c..9b5ed67141 100644 --- a/CPAC/pipeline/cpac_pipeline.py +++ b/CPAC/pipeline/cpac_pipeline.py @@ -128,9 +128,8 @@ ) # pylint: disable=wrong-import-order -from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.check_outputs import check_outputs -from CPAC.pipeline.engine import initiate_rpool, NodeBlock +from CPAC.pipeline.engine import NodeBlock, ResourcePool from CPAC.pipeline.nipype_pipeline_engine.plugins import ( LegacyMultiProcPlugin, MultiProcPlugin, @@ -856,24 +855,6 @@ def remove_workdir(wdpath: str) -> None: FMLOGGER.warning("Could not remove working directory %s", wdpath) -def initialize_nipype_wf(cfg, sub_data_dct, name=""): - """Initialize a new nipype workflow.""" - if name: - name = f"_{name}" - - workflow_name = ( - f'cpac{name}_{sub_data_dct["subject_id"]}_{sub_data_dct["unique_id"]}' - ) - wf = pe.Workflow(name=workflow_name) - wf.base_dir = cfg.pipeline_setup["working_directory"]["path"] - wf.config["execution"] = { - "hash_method": "timestamp", - "crashdump_dir": os.path.abspath(cfg.pipeline_setup["log_directory"]["path"]), - } - - return wf - - def load_cpac_pipe_config(pipe_config): """Load in pipeline config file.""" config_file = os.path.realpath(pipe_config) @@ -1074,7 +1055,6 @@ def build_T1w_registration_stack(rpool, cfg, pipeline_blocks=None): warp_wholeheadT1_to_template, warp_T1mask_to_template, ] - if not rpool.check_rpool("desc-restore-brain_T1w"): reg_blocks.append(correct_restore_brain_intensity_abcd) @@ -1176,7 +1156,6 @@ def connect_pipeline(wf, cfg, rpool, pipeline_blocks): WFLOGGER.info( "Connecting pipeline blocks:\n%s", list_blocks(pipeline_blocks, indent=1) ) - previous_nb = None for block in pipeline_blocks: try: @@ -1221,9 +1200,6 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None): """Build a C-PAC workflow for a single subject.""" from CPAC.utils.datasource import gather_extraction_maps - # Workflow setup - wf = initialize_nipype_wf(cfg, sub_dict, name=pipeline_name) - # Extract credentials path if it exists try: creds_path = sub_dict["creds_path"] @@ -1247,8 +1223,7 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None): # PREPROCESSING # """"""""""""""""""""""""""""""""""""""""""""""""""" - rpool = initiate_rpool(wf, cfg, sub_dict) - + rpool = ResourcePool(cfg=cfg, data_paths=sub_dict, pipeline_name=pipeline_name) pipeline_blocks = build_anat_preproc_stack(rpool, cfg) # Anatomical to T1 template registration @@ -1615,7 +1590,7 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None): # Connect the entire pipeline! try: - wf = connect_pipeline(wf, cfg, rpool, pipeline_blocks) + wf = connect_pipeline(rpool.wf, cfg, rpool, pipeline_blocks) except LookupError as lookup_error: missing_key = None errorstrings = [arg for arg in lookup_error.args[0].split("\n") if arg.strip()] diff --git a/CPAC/pipeline/engine/__init__.py b/CPAC/pipeline/engine/__init__.py index 1350e2bb36..dc1d077656 100644 --- a/CPAC/pipeline/engine/__init__.py +++ b/CPAC/pipeline/engine/__init__.py @@ -21,10 +21,9 @@ run_node_blocks, wrap_block, ) -from .resource import initiate_rpool, NodeData, ResourcePool +from .resource import NodeData, ResourcePool __all__ = [ - "initiate_rpool", "NodeBlock", "NodeData", "ResourcePool", diff --git a/CPAC/pipeline/engine/engine.py b/CPAC/pipeline/engine/engine.py index e6280ace5f..4187476bf7 100644 --- a/CPAC/pipeline/engine/engine.py +++ b/CPAC/pipeline/engine/engine.py @@ -527,7 +527,7 @@ def wrap_block(node_blocks, interface, wf, cfg, strat_pool, pipe_num, opt): def run_node_blocks(blocks, data_paths, cfg=None): from CPAC.pipeline.engine import NodeBlock - from CPAC.pipeline.engine.resource import initiate_rpool + from CPAC.pipeline.engine.resource import ResourcePool if not cfg: cfg = { @@ -540,7 +540,7 @@ def run_node_blocks(blocks, data_paths, cfg=None): # TODO: WE HAVE TO PARSE OVER UNIQUE ID'S!!! wf = pe.Workflow(name="node_blocks") - rpool = initiate_rpool(wf, cfg, data_paths) + rpool = ResourcePool(wf=wf, cfg=cfg, data_paths=data_paths) wf.base_dir = cfg.pipeline_setup["working_directory"]["path"] wf.config["execution"] = { "hash_method": "timestamp", diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 5fc9add3db..72a3036fbf 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -17,12 +17,14 @@ """Resources and ResourcePools for C-PAC.""" import ast +from collections.abc import KeysView import copy from itertools import chain import os from pathlib import Path import re -from typing import Optional +from types import NoneType +from typing import Any, Optional import warnings from nipype.interfaces import utility as util @@ -39,7 +41,7 @@ from CPAC.registration.registration import transform_derivative from CPAC.resources.templates.lookup_table import lookup_identifier from CPAC.utils.bids_utils import res_in_filename -from CPAC.utils.configuration import Configuration +from CPAC.utils.configuration.configuration import Configuration, EmptyConfiguration from CPAC.utils.datasource import ( calc_delta_te_and_asym_ratio, check_for_s3, @@ -71,6 +73,78 @@ EXTS = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"] +class DataPaths: + """Store subject-session specific data paths.""" + + def __init__(self, *, data_paths: Optional[dict] = None, part_id: str = "") -> None: + """Initialize a ``DataPaths`` instance.""" + if not data_paths: + data_paths = {} + if part_id and "part_id" in data_paths and part_id != data_paths["part_id"]: + WFLOGGER.warning( + "both 'part_id' (%s) and data_paths['part_id'] (%s) provided. " + "Using '%s'.", + part_id, + data_paths["part_id"], + part_id, + ) + anat: dict[str, str] | str = data_paths.get("anat", {}) + if isinstance(anat, str): + anat = {"T1": anat} + self.anat: dict[str, str] = anat + self.creds_path: Optional[str] = data_paths.get("creds_path") + self.fmap: Optional[dict] = data_paths.get("fmap") + self.func: dict[str, dict[str, str | dict]] = data_paths.get("func", {}) + self.part_id: str = data_paths.get("subject_id", "") + self.site_id: str = data_paths.get("site_id", "") + self.ses_id: str = data_paths.get("unique_id", "") + self.unique_id: str = "_".join([self.part_id, self.ses_id]) + self.derivatives_dir: Optional[str] = data_paths.get("derivatives_dir") + + def __repr__(self) -> str: + """Return reproducible string representation of ``DataPaths`` instance.""" + return f"DataPaths(data_paths={self.as_dict()})" + + def __str__(self) -> str: + """Return string representation of a ``DataPaths`` instance.""" + return f"" + + def as_dict(self) -> dict: + """Return ``data_paths`` dictionary. + + data_paths format:: + + {"anat": {"T1w": "{T1w path}", "T2w": "{T2w path}"}, + "creds_path": {None OR path to credentials CSV}, + "func": { + "{scan ID}": { + "scan": "{path to BOLD}", + "scan_parameters": {scan parameter dictionary}, + } + }, + "site_id": "site-ID", + "subject_id": "sub-01", + "unique_id": "ses-1", + "derivatives_dir": "{derivatives_dir path}",} + """ + return { + k: v + for k, v in { + key: getattr(self, key) + for key in [ + "anat", + "creds_path", + "func", + "site_id", + "subject_id", + "unique_id", + "derivatives_dir", + ] + }.items() + if v + } + + def generate_prov_string(prov: list[str]) -> tuple[str, str]: """Generate a string from a SINGLE RESOURCE'S dictionary of MULTIPLE PRECEDING RESOURCES (or single, if just one). @@ -253,30 +327,26 @@ class ResourcePool: def __init__( self, - rpool: Optional[dict] = None, name: str = "", cfg: Optional[Configuration] = None, pipe_list: Optional[list] = None, *, - creds_path: Optional[str] = None, - data_paths: Optional[dict] = None, - part_id: Optional[str] = None, - ses_id: Optional[str] = None, - unique_id: Optional[str] = None, + data_paths: Optional[DataPaths | dict] = None, + pipeline_name: str = "", wf: Optional[pe.Workflow] = None, - **kwargs, ): """Initialize a ResourcePool.""" - self.creds_path = creds_path + if isinstance(data_paths, dict): + data_paths = DataPaths(data_paths=data_paths) + elif not data_paths: + data_paths = DataPaths() self.data_paths = data_paths - self.part_id = part_id - self.ses_id = ses_id - self.unique_id = unique_id - self._init_wf = wf - if not rpool: - self.rpool = {} - else: - self.rpool = rpool + # pass-through for convenient access + self.creds_path = self.data_paths.creds_path + self.part_id = self.data_paths.part_id + self.ses_id = self.data_paths.ses_id + self.unique_id = self.data_paths.unique_id + self.rpool = {} if not pipe_list: self.pipe_list = [] @@ -288,36 +358,67 @@ def __init__( if cfg: self.cfg = cfg - self.logdir = cfg.pipeline_setup["log_directory"]["path"] + else: + self.cfg = EmptyConfiguration() - self.num_cpus = cfg.pipeline_setup["system_config"][ - "max_cores_per_participant" + self.logdir = self._config_lookup(["pipeline_setup", "log_directory", "path"]) + self.num_cpus = self._config_lookup( + ["pipeline_setup", "system_config", "max_cores_per_participant"] + ) + self.num_ants_cores = self._config_lookup( + ["pipeline_setup", "system_config", "num_ants_threads"] + ) + + self.ants_interp = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "ANTs_pipelines", + "interpolation", + ] + ) + self.fsl_interp = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "FNIRT_pipelines", + "interpolation", ] - self.num_ants_cores = cfg.pipeline_setup["system_config"][ - "num_ants_threads" + ) + self.func_reg = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "run", ] + ) - self.ants_interp = cfg.registration_workflows["functional_registration"][ - "func_registration_to_template" - ]["ANTs_pipelines"]["interpolation"] - self.fsl_interp = cfg.registration_workflows["functional_registration"][ - "func_registration_to_template" - ]["FNIRT_pipelines"]["interpolation"] - - self.func_reg = cfg.registration_workflows["functional_registration"][ - "func_registration_to_template" - ]["run"] + self.run_smoothing = "smoothed" in self._config_lookup( + ["post_processing", "spatial_smoothing", "output"], list + ) + self.smoothing_bool = self._config_lookup( + ["post_processing", "spatial_smoothing", "run"] + ) + self.run_zscoring = "z-scored" in self._config_lookup( + ["post_processing", "z-scoring", "output"], list + ) + self.zscoring_bool = self._config_lookup( + ["post_processing", "z-scoring", "run"] + ) + self.fwhm = self._config_lookup( + ["post_processing", "spatial_smoothing", "fwhm"] + ) + self.smooth_opts = self._config_lookup( + ["post_processing", "spatial_smoothing", "smoothing_method"] + ) - self.run_smoothing = ( - "smoothed" in cfg.post_processing["spatial_smoothing"]["output"] - ) - self.smoothing_bool = cfg.post_processing["spatial_smoothing"]["run"] - self.run_zscoring = "z-scored" in cfg.post_processing["z-scoring"]["output"] - self.zscoring_bool = cfg.post_processing["z-scoring"]["run"] - self.fwhm = cfg.post_processing["spatial_smoothing"]["fwhm"] - self.smooth_opts = cfg.post_processing["spatial_smoothing"][ - "smoothing_method" - ] + if wf: + self.wf = wf + else: + self.initialize_nipype_wf(pipeline_name) self.xfm = [ "alff", @@ -333,6 +434,21 @@ def __init__( "desc-zstd_reho", "desc-sm-zstd_reho", ] + ingress_derivatives = False + try: + if self.data_paths.derivatives_dir and self._config_lookup( + ["pipeline_setup", "outdir_ingress", "run"], bool + ): + ingress_derivatives = True + except (AttributeError, KeyError, TypeError): + pass + if ingress_derivatives: + self.ingress_output_dir() + else: + self.ingress_raw_anat_data() + if data_paths.func: + self.ingress_raw_func_data() + self.ingress_pipeconfig_paths() def __repr__(self) -> str: """Return reproducible ResourcePool string.""" @@ -349,6 +465,27 @@ def __str__(self) -> str: return f"ResourcePool({self.name}): {list(self.rpool)}" return f"ResourcePool: {list(self.rpool)}" + def initialize_nipype_wf(self, name: str = "") -> None: + """Initialize a new nipype workflow.""" + if name: + name = f"_{name}" + workflow_name = f"cpac{name}_{self.unique_id}" + self.wf = pe.Workflow(name=workflow_name) + self.wf.base_dir = self.cfg.pipeline_setup["working_directory"]["path"] + self.wf.config["execution"] = { + "hash_method": "timestamp", + "crashdump_dir": os.path.abspath( + self.cfg.pipeline_setup["log_directory"]["path"] + ), + } + + def _config_lookup(self, keylist, fallback_type: type = NoneType) -> Any: + """Lookup a config key, return None if not found.""" + try: + return self.cfg[keylist] + except (AttributeError, KeyError): + return fallback_type() + def back_propogate_template_name( self, resource_idx: str, json_info: dict, id_string: "pe.Node" ) -> None: @@ -369,7 +506,7 @@ def back_propogate_template_name( if "template" in resource_idx and self.check_rpool("derivatives-dir"): if self.check_rpool("template"): node, out = self.get_data("template") - self._init_wf.connect(node, out, id_string, "template_desc") + self.wf.connect(node, out, id_string, "template_desc") elif "Template" in json_info: id_string.inputs.template_desc = json_info["Template"] elif ( @@ -536,16 +673,12 @@ def set_pool_info(self, info_dct): def get_entire_rpool(self): return self.rpool - def get_resources(self): + def keys(self) -> KeysView: + """Return rpool's keys.""" return self.rpool.keys() - def copy_rpool(self): - return ResourcePool( - rpool=copy.deepcopy(self.get_entire_rpool()), - name=self.name, - cfg=self.cfg, - pipe_list=copy.deepcopy(self.pipe_list), - ) + def get_resources(self): + return self.rpool.keys() @staticmethod def get_raw_label(resource: str) -> str: @@ -863,10 +996,9 @@ def flatten_prov(self, prov): return flat_prov return None - def get_strats(self, resources, debug=False): + def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: # TODO: NOTE: NOT COMPATIBLE WITH SUB-RPOOL/STRAT_POOLS # TODO: (and it doesn't have to be) - import itertools linked_resources = [] @@ -952,7 +1084,7 @@ def get_strats(self, resources, debug=False): # we now currently have "strats", the combined permutations of all the strategies, as a list of tuples, each tuple combining one version of input each, being one of the permutations. # OF ALL THE DIFFERENT INPUTS. and they are tagged by their fetched inputs with {name}:{strat}. # so, each tuple has ONE STRAT FOR EACH INPUT, so if there are three inputs, each tuple will have 3 items. - new_strats = {} + new_strats: dict[str | tuple, StratPool] = {} # get rid of duplicates - TODO: refactor .product strat_str_list = [] @@ -1055,7 +1187,7 @@ def get_strats(self, resources, debug=False): # make the merged strat label from the multiple inputs # strat_list is actually the merged CpacProvenance lists pipe_idx = str(strat_list) - new_strats[pipe_idx] = ResourcePool() + new_strats[pipe_idx] = StratPool() # new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! # placing JSON info at one level higher only for copy convenience new_strats[pipe_idx].rpool["json"] = {} @@ -1098,7 +1230,7 @@ def get_strats(self, resources, debug=False): resource, pipe_idx = generate_prov_string(cpac_prov) resource_strat_dct = self.rpool[resource][pipe_idx] # remember, `resource_strat_dct` is the dct of 'data' and 'json'. - new_strats[pipe_idx] = ResourcePool( + new_strats[pipe_idx] = StratPool( rpool={resource: resource_strat_dct} ) # <----- again, new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! # placing JSON info at one level higher only for copy convenience @@ -1429,9 +1561,9 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): # TODO: other stuff like acq- etc. for pipe_idx in self.rpool[resource]: - unique_id = self.get_name() - part_id = unique_id.split("_")[0] - ses_id = unique_id.split("_")[1] + unique_id = self.unique_id + part_id = self.part_id + ses_id = self.ses_id if "ses-" not in ses_id: ses_id = f"ses-{ses_id}" @@ -1819,7 +1951,7 @@ def ingress_freesurfer(self) -> None: def ingress_output_dir(self) -> None: """Ingress an output directory into a ResourcePool.""" - dir_path = self.data_paths["derivatives_dir"] + dir_path = self.data_paths.derivatives_dir WFLOGGER.info("\nPulling outputs from %s.\n", dir_path) @@ -1971,11 +2103,11 @@ def ingress_func_metadata( blip = False fmap_rp_list = [] fmap_TE_list = [] - if "fmap" in self.data_paths: + if self.data_paths.fmap: second = False - for orig_key in self.data_paths["fmap"]: + for orig_key in self.data_paths.fmap: gather_fmap = create_fmap_datasource( - self.data_paths["fmap"], f"fmap_gather_{orig_key}_{self.part_id}" + self.data_paths.fmap, f"fmap_gather_{orig_key}_{self.part_id}" ) gather_fmap.inputs.inputnode.set( subject=self.part_id, @@ -2023,7 +2155,7 @@ def ingress_func_metadata( name=f"{key}_get_metadata{name_suffix}", ) - self._init_wf.connect( + self.wf.connect( gather_fmap, "outputspec.scan_params", get_fmap_metadata, @@ -2140,13 +2272,13 @@ def ingress_func_metadata( node, out_file = self.get(fmap_file)[ f"['{fmap_file}:fmap_TE_ingress']" ]["data"] - self._init_wf.connect( + self.wf.connect( node, out_file, gather_echoes, f"echotime_{idx}" ) except KeyError: pass - self._init_wf.connect( + self.wf.connect( gather_echoes, "echotime_list", calc_delta_ratio, "echo_times" ) @@ -2185,7 +2317,7 @@ def ingress_func_metadata( ) node, out = self.get("scan")["['scan:func_ingress']"]["data"] - self._init_wf.connect(node, out, scan_params, "scan") + self.wf.connect(node, out, scan_params, "scan") # Workaround for extracting metadata with ingress if self.check_rpool("derivatives-dir"): @@ -2198,10 +2330,10 @@ def ingress_func_metadata( ), name="selectrest_json", ) - selectrest_json.inputs.rest_dict = self.data_paths + selectrest_json.inputs.rest_dict = self.data_paths.as_dict() selectrest_json.inputs.resource = "scan_parameters" - self._init_wf.connect(node, out, selectrest_json, "scan") - self._init_wf.connect( + self.wf.connect(node, out, selectrest_json, "scan") + self.wf.connect( selectrest_json, "file_path", scan_params, "data_config_scan_params" ) @@ -2210,7 +2342,7 @@ def ingress_func_metadata( node, out = self.get("scan-params")["['scan-params:scan_params_ingress']"][ "data" ] - self._init_wf.connect(node, out, scan_params, "data_config_scan_params") + self.wf.connect(node, out, scan_params, "data_config_scan_params") self.set_data("TR", scan_params, "tr", {}, "", "func_metadata_ingress") self.set_data( @@ -2242,9 +2374,7 @@ def ingress_func_metadata( node, out_file = self.get("effectiveEchoSpacing")[ "['effectiveEchoSpacing:func_metadata_ingress']" ]["data"] - self._init_wf.connect( - node, out_file, calc_delta_ratio, "effective_echo_spacing" - ) + self.wf.connect(node, out_file, calc_delta_ratio, "effective_echo_spacing") self.set_data( "deltaTE", calc_delta_ratio, "deltaTE", {}, "", "deltaTE_ingress" ) @@ -2372,7 +2502,7 @@ def ingress_pipeconfig_paths(self): def ingress_raw_func_data(self): """Ingress raw functional data.""" - func_paths_dct = self.data_paths["func"] + func_paths_dct = self.data_paths.func func_wf = self.create_func_datasource( func_paths_dct, f"func_ingress_{self.part_id}_{self.ses_id}" @@ -2411,7 +2541,7 @@ def ingress_raw_func_data(self): ] if local_func_scans: # pylint: disable=protected-access - self._init_wf._local_func_scans = local_func_scans + self.wf._local_func_scans = local_func_scans if self.cfg.pipeline_setup["Debugging"]["verbose"]: verbose_logger = getLogger("CPAC.engine") verbose_logger.debug("local_func_scans: %s", local_func_scans) @@ -2464,7 +2594,7 @@ def func_outdir_ingress(self, func_dict: dict, key: str, func_paths: dict) -> No ) iterables.inputs.mask_paths = func_paths[mask_paths_key] iterables.inputs.ts_paths = func_paths[ts_paths_key] - self._init_wf.connect(ingress, "outputspec.scan", iterables, "scan") + self.wf.connect(ingress, "outputspec.scan", iterables, "scan") for key in func_paths: if key in (mask_paths_key, ts_paths_key): @@ -2474,13 +2604,9 @@ def func_outdir_ingress(self, func_dict: dict, key: str, func_paths: dict) -> No creds_path=self.creds_path, dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], ) - self._init_wf.connect( - iterables, "out_scan", ingress_func, "inputnode.scan" - ) + self.wf.connect(iterables, "out_scan", ingress_func, "inputnode.scan") if key == mask_paths_key: - self._init_wf.connect( - iterables, "mask", ingress_func, "inputnode.data" - ) + self.wf.connect(iterables, "mask", ingress_func, "inputnode.data") self.set_data( key, ingress_func, @@ -2490,7 +2616,7 @@ def func_outdir_ingress(self, func_dict: dict, key: str, func_paths: dict) -> No f"outdir_{key}_ingress", ) elif key == ts_paths_key: - self._init_wf.connect( + self.wf.connect( iterables, "confounds", ingress_func, "inputnode.data" ) self.set_data( @@ -2504,19 +2630,15 @@ def func_outdir_ingress(self, func_dict: dict, key: str, func_paths: dict) -> No def ingress_raw_anat_data(self) -> None: """Ingress raw anatomical data.""" - if "anat" not in self.data_paths: + if not self.data_paths.anat: WFLOGGER.warning("No anatomical data present.") return - anat_flow = create_anat_datasource( - f"anat_T1w_gather_{self.part_id}_{self.ses_id}" - ) + anat_flow = create_anat_datasource(f"anat_T1w_gather_{self.unique_id}") anat = {} - if isinstance(self.data_paths["anat"], str): - anat["T1"] = self.data_paths["anat"] - elif "T1w" in self.data_paths["anat"]: - anat["T1"] = self.data_paths["anat"]["T1w"] + if "T1w" in self.data_paths.anat: + anat["T1"] = self.data_paths.anat["T1w"] if "T1" in anat: anat_flow.inputs.inputnode.set( @@ -2528,13 +2650,13 @@ def ingress_raw_anat_data(self) -> None: ) self.set_data("T1w", anat_flow, "outputspec.anat", {}, "", "anat_ingress") - if "T2w" in self.data_paths["anat"]: + if "T2w" in self.data_paths.anat: anat_flow_T2 = create_anat_datasource( f"anat_T2w_gather_{self.part_id}_{self.ses_id}" ) anat_flow_T2.inputs.inputnode.set( subject=self.part_id, - anat=self.data_paths["anat"]["T2w"], + anat=self.data_paths.anat["T2w"], creds_path=self.creds_path, dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], img_type="anat", @@ -2547,91 +2669,19 @@ def ingress_raw_anat_data(self) -> None: self.ingress_freesurfer() -def initiate_rpool( - wf: pe.Workflow, - cfg: Configuration, - data_paths: Optional[dict] = None, - part_id: Optional[str] = None, -) -> ResourcePool: - """ - Initialize a new ResourcePool. - - data_paths format:: - - {'anat': { - 'T1w': '{T1w path}', - 'T2w': '{T2w path}' - }, - 'creds_path': {None OR path to credentials CSV}, - 'func': { - '{scan ID}': - { - 'scan': '{path to BOLD}', - 'scan_parameters': {scan parameter dictionary} - } - }, - 'site_id': 'site-ID', - 'subject_id': 'sub-01', - 'unique_id': 'ses-1', - 'derivatives_dir': '{derivatives_dir path}'} - """ - # TODO: refactor further, integrate with the ingress_data functionality - # TODO: used for BIDS-Derivatives (below), and possible refactoring of - # TODO: the raw data config to use 'T1w' label instead of 'anat' etc. - - kwargs = {"cfg": cfg, "wf": wf} - if data_paths: - part_id: str = data_paths["subject_id"] - ses_id: str = data_paths["unique_id"] - if "creds_path" not in data_paths: - creds_path = None - else: - creds_path: Optional[Path | str] = data_paths["creds_path"] - unique_id: str = f"{part_id}_{ses_id}" - kwargs.update( - { - "part_id": part_id, - "ses_id": ses_id, - "creds_path": creds_path, - "data_paths": data_paths, - } - ) - elif part_id: - unique_id = part_id - creds_path = None - kwargs.update({"part_id": part_id, "creds_path": creds_path}) - else: - unique_id = "" - kwargs.update({"unique_id": unique_id}) - - rpool = ResourcePool(name=unique_id, **kwargs) - - if data_paths: - # ingress outdir - try: - if ( - data_paths["derivatives_dir"] - and cfg.pipeline_setup["outdir_ingress"]["run"] - ): - rpool.ingress_output_dir() - except (AttributeError, KeyError): - rpool.ingress_raw_anat_data() - if "func" in data_paths: - rpool.ingress_raw_func_data() - - # grab any file paths from the pipeline config YAML - rpool.ingress_pipeconfig_paths() - - # output files with 4 different scans - - return rpool._init_wf, rpool - - class StratPool(ResourcePool): - """All resources for a strategy.""" + """A pool of ResourcePools keyed by strategy.""" - def __init__(self): - """Initialize a ResourcePool.""" + def __init__(self, rpool: Optional[dict[ResourcePool]] = None) -> None: + """Initialize a StratPool.""" + if not rpool: + self.rpool = {} + else: + self.rpool = rpool def append_name(self, name): self.name.append(name) + + def get_strats(self, resources, debug) -> None: + """ResourcePool method that is not valid for a StratPool.""" + raise NotImplementedError diff --git a/CPAC/pipeline/test/test_engine.py b/CPAC/pipeline/test/test_engine.py index 46df0a2dec..8193fc744d 100644 --- a/CPAC/pipeline/test/test_engine.py +++ b/CPAC/pipeline/test/test_engine.py @@ -24,12 +24,8 @@ build_anat_preproc_stack, build_workflow, connect_pipeline, - initialize_nipype_wf, -) -from CPAC.pipeline.engine import ( - initiate_rpool, - ResourcePool, ) +from CPAC.pipeline.engine import ResourcePool from CPAC.utils.bids_utils import create_cpac_data_config from CPAC.utils.configuration import Configuration, Preconfiguration @@ -53,14 +49,8 @@ def test_ingress_func_raw_data( ) -> None: """Test :py:method:~`CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_func_data`.""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) - wf = initialize_nipype_wf(cfg, sub_data_dct) - part_id = sub_data_dct["subject_id"] - ses_id = sub_data_dct["unique_id"] - unique_id = f"{part_id}_{ses_id}" - rpool = ResourcePool(name=unique_id, cfg=cfg, data_paths=sub_data_dct, wf=wf) - if "func" in sub_data_dct: - rpool.ingress_raw_func_data() - rpool.gather_pipes(wf, cfg, all=True) + rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) + rpool.gather_pipes(rpool.wf, cfg, all=True) @pytest.mark.parametrize("preconfig", ["default"]) @@ -69,21 +59,12 @@ def test_ingress_anat_raw_data( ) -> None: """Test :py:method:~`CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_anat_data`.""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) - wf = initialize_nipype_wf(cfg, sub_data_dct) - part_id = sub_data_dct["subject_id"] - ses_id = sub_data_dct["unique_id"] - unique_id = f"{part_id}_{ses_id}" rpool = ResourcePool( - name=unique_id, cfg=cfg, data_paths=sub_data_dct, - unique_id=unique_id, - part_id=part_id, - ses_id=ses_id, - wf=wf, ) rpool.ingress_raw_anat_data() - rpool.gather_pipes(wf, cfg, all=True) + rpool.gather_pipes(rpool.wf, cfg, all=True) @pytest.mark.parametrize("preconfig", ["default"]) @@ -92,20 +73,11 @@ def test_ingress_pipeconfig_data( ) -> None: """Test :py:method:~`CPAC.pipeline.engine.resource.ResourcePool.ingress_pipeconfig_paths`.""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) - wf = initialize_nipype_wf(cfg, sub_data_dct) - part_id = sub_data_dct["subject_id"] - ses_id = sub_data_dct["unique_id"] - unique_id = f"{part_id}_{ses_id}" rpool = ResourcePool( - name=unique_id, cfg=cfg, data_paths=sub_data_dct, - part_id=part_id, - ses_id=ses_id, - unique_id=unique_id, ) - rpool.ingress_pipeconfig_paths() - rpool.gather_pipes(wf, cfg, all=True) + rpool.gather_pipes(rpool.wf, cfg, all=True) @pytest.mark.parametrize("preconfig", ["anat-only"]) @@ -115,10 +87,9 @@ def test_build_anat_preproc_stack( """Test :py:func:~`CPAC.pipeline.cpac_pipeline.build_anat_preproc_stack`.""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) - wf = initialize_nipype_wf(cfg, sub_data_dct) - rpool = initiate_rpool(wf, cfg, sub_data_dct) + rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) pipeline_blocks = build_anat_preproc_stack(rpool, cfg) - wf = connect_pipeline(wf, cfg, rpool, pipeline_blocks) + wf = connect_pipeline(rpool.wf, cfg, rpool, pipeline_blocks) rpool.gather_pipes(wf, cfg) @@ -126,7 +97,6 @@ def test_build_anat_preproc_stack( def test_build_workflow(bids_examples: Path, preconfig: str, tmp_path: Path) -> None: """Test :py:func:~`CPAC.pipeline.cpac_pipeline.build_workflow`.""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) - wf = initialize_nipype_wf(cfg, sub_data_dct) - rpool = initiate_rpool(wf, cfg, sub_data_dct) + rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) wf = build_workflow(sub_data_dct["subject_id"], sub_data_dct, cfg) rpool.gather_pipes(wf, cfg) diff --git a/CPAC/utils/configuration/configuration.py b/CPAC/utils/configuration/configuration.py index 8444cce105..bcac06df3a 100644 --- a/CPAC/utils/configuration/configuration.py +++ b/CPAC/utils/configuration/configuration.py @@ -622,6 +622,13 @@ def key_type_error(self, key): ) +class EmptyConfiguration(Configuration): + """A Configuration with all methods and no values.""" + + def __init__(self) -> None: + """Initialize an empty configuration.""" + + def check_pname(p_name: str, pipe_config: Configuration) -> str: """Check / set `p_name`, the str representation of a pipeline for use in filetrees. From f1f77050de27d788b74ef67912eb50ff1ab9fafb Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 10 Jul 2024 17:06:06 -0400 Subject: [PATCH 23/93] :recycle: Finish moving `create_func_datasource` from func to method --- CPAC/func_preproc/func_ingress.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CPAC/func_preproc/func_ingress.py b/CPAC/func_preproc/func_ingress.py index 4b995a18ba..56e472997c 100644 --- a/CPAC/func_preproc/func_ingress.py +++ b/CPAC/func_preproc/func_ingress.py @@ -16,7 +16,6 @@ # License along with C-PAC. If not, see . """Ingress functional data for preprocessing.""" -from CPAC.utils.datasource import create_func_datasource from CPAC.utils.strategy import Strategy @@ -41,7 +40,9 @@ def connect_func_ingress( else: workflow_name = f"func_gather_{unique_id}_{num_strat}" - func_wf = create_func_datasource(func_paths_dict, workflow_name) + func_wf = strat._resource_pool.create_func_datasource( + func_paths_dict, workflow_name + ) func_wf.inputs.inputnode.set( subject=subject_id, From 515b79167da3120b3c1d1e0ebf8f3a23f0d83890 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 11 Jul 2024 10:20:55 -0400 Subject: [PATCH 24/93] =?UTF-8?q?:recycle:=20EmptyConfiguration=20?= =?UTF-8?q?=E2=86=92=20Preconfiguration('blank')?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CPAC/pipeline/engine/resource.py | 4 ++-- CPAC/utils/configuration/configuration.py | 7 ------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 72a3036fbf..76065fb28e 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -41,7 +41,7 @@ from CPAC.registration.registration import transform_derivative from CPAC.resources.templates.lookup_table import lookup_identifier from CPAC.utils.bids_utils import res_in_filename -from CPAC.utils.configuration.configuration import Configuration, EmptyConfiguration +from CPAC.utils.configuration.configuration import Configuration, Preconfiguration from CPAC.utils.datasource import ( calc_delta_te_and_asym_ratio, check_for_s3, @@ -359,7 +359,7 @@ def __init__( if cfg: self.cfg = cfg else: - self.cfg = EmptyConfiguration() + self.cfg = Preconfiguration("blank") self.logdir = self._config_lookup(["pipeline_setup", "log_directory", "path"]) self.num_cpus = self._config_lookup( diff --git a/CPAC/utils/configuration/configuration.py b/CPAC/utils/configuration/configuration.py index bcac06df3a..8444cce105 100644 --- a/CPAC/utils/configuration/configuration.py +++ b/CPAC/utils/configuration/configuration.py @@ -622,13 +622,6 @@ def key_type_error(self, key): ) -class EmptyConfiguration(Configuration): - """A Configuration with all methods and no values.""" - - def __init__(self) -> None: - """Initialize an empty configuration.""" - - def check_pname(p_name: str, pipe_config: Configuration) -> str: """Check / set `p_name`, the str representation of a pipeline for use in filetrees. From 3c2220d81e3206f23e0149eef6a4e970482bb6f2 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 11 Jul 2024 10:23:18 -0400 Subject: [PATCH 25/93] :pencil2: Import Path for signature --- CPAC/utils/datasource.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CPAC/utils/datasource.py b/CPAC/utils/datasource.py index 2633b56b69..8eba26bf21 100644 --- a/CPAC/utils/datasource.py +++ b/CPAC/utils/datasource.py @@ -29,6 +29,7 @@ from CPAC.resources.templates.lookup_table import format_identifier, lookup_identifier from CPAC.utils import function from CPAC.utils.bids_utils import bids_remove_entity +from CPAC.utils.interfaces.function import Function from CPAC.utils.monitoring import FMLOGGER @@ -63,6 +64,7 @@ def bidsier_prefix(unique_id): return "_".join(components) +@Function.sig_imports(["from pathlib import Path"]) def get_rest(scan: str, rest_dict: dict, resource: str = "scan") -> Path | str: """Return the path of the chosen resource in the functional file dictionary. From 8a24441ff2cfd860a4c68d6e6c542ccbdd2cb1e5 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 11 Jul 2024 10:28:45 -0400 Subject: [PATCH 26/93] :recycle: Pass `part_id` through to `DataPaths` --- CPAC/pipeline/engine/resource.py | 3 ++- CPAC/resources/tests/test_templates.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 76065fb28e..d50c5cb6ba 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -332,6 +332,7 @@ def __init__( pipe_list: Optional[list] = None, *, data_paths: Optional[DataPaths | dict] = None, + part_id: Optional[str] = None, pipeline_name: str = "", wf: Optional[pe.Workflow] = None, ): @@ -339,7 +340,7 @@ def __init__( if isinstance(data_paths, dict): data_paths = DataPaths(data_paths=data_paths) elif not data_paths: - data_paths = DataPaths() + data_paths = DataPaths(part_id=part_id) self.data_paths = data_paths # pass-through for convenient access self.creds_path = self.data_paths.creds_path diff --git a/CPAC/resources/tests/test_templates.py b/CPAC/resources/tests/test_templates.py index d9f5fa9f3c..09d753a7c0 100644 --- a/CPAC/resources/tests/test_templates.py +++ b/CPAC/resources/tests/test_templates.py @@ -29,7 +29,7 @@ @pytest.mark.parametrize("pipeline", ALL_PIPELINE_CONFIGS) def test_packaged_path_exists(pipeline): """Check that all local templates are included in at least one resolution.""" - rpool = ResourcePool(cfg=Preconfiguration(pipeline), unique_id="pytest") + rpool = ResourcePool(cfg=Preconfiguration(pipeline), part_id="pytest") rpool.ingress_pipeconfig_paths() for resource in rpool.rpool.values(): node = next(iter(resource.values())).get("data")[0] From 87fb1c012160a2eb0ea9a7fb0097c9b7ab4301c4 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 11 Jul 2024 10:37:20 -0400 Subject: [PATCH 27/93] :white_check_mark: Skip NHP configs if no torch installed --- CPAC/resources/tests/test_templates.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/CPAC/resources/tests/test_templates.py b/CPAC/resources/tests/test_templates.py index 09d753a7c0..66e8c27262 100644 --- a/CPAC/resources/tests/test_templates.py +++ b/CPAC/resources/tests/test_templates.py @@ -16,6 +16,7 @@ # License along with C-PAC. If not, see . """Tests for packaged templates.""" +from importlib.util import find_spec import os import pytest @@ -26,7 +27,20 @@ from CPAC.utils.datasource import get_highest_local_res -@pytest.mark.parametrize("pipeline", ALL_PIPELINE_CONFIGS) +@pytest.mark.parametrize( + "pipeline", + [ + pytest.param( + config, + marks=pytest.mark.skipif( + not find_spec("torch"), reason="torch required for NHP configs." + ), + ) + if config in ["monkey", "nhp-macaque"] + else config + for config in ALL_PIPELINE_CONFIGS + ], +) def test_packaged_path_exists(pipeline): """Check that all local templates are included in at least one resolution.""" rpool = ResourcePool(cfg=Preconfiguration(pipeline), part_id="pytest") From 3af140714d4dcde7045524a52ecd8f663bf0aca3 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 11 Jul 2024 10:48:11 -0400 Subject: [PATCH 28/93] :white_check_mark: Use abspath for BIDS-examples --- .ruff.toml | 1 + CPAC/conftest.py | 2 +- dev/circleci_data/conftest.py | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.ruff.toml b/.ruff.toml index d690751b02..059117f23b 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -12,6 +12,7 @@ external = ["T20"] # Don't autoremove 'noqa` comments for these rules "CPAC/func_preproc/func_preproc.py" = ["E402"] "CPAC/utils/sklearn.py" = ["RUF003"] "CPAC/utils/utils.py" = ["T201"] # until `repickle` is removed +"dev/circleci_data/conftest.py" = ["F401"] "setup.py" = ["D1"] [lint.flake8-import-conventions.extend-aliases] diff --git a/CPAC/conftest.py b/CPAC/conftest.py index c252a8b74f..b8f3512624 100644 --- a/CPAC/conftest.py +++ b/CPAC/conftest.py @@ -25,7 +25,7 @@ def bids_examples(cache) -> Path: """Get cached example BIDS directories.""" example_dir = cache.makedir("bids-examples") - bids_dir = Path(example_dir / "bids-examples") + bids_dir = Path(example_dir / "bids-examples").absolute() if not bids_dir.exists(): from git import Repo diff --git a/dev/circleci_data/conftest.py b/dev/circleci_data/conftest.py index 0b39d51b82..4d67fdac05 100644 --- a/dev/circleci_data/conftest.py +++ b/dev/circleci_data/conftest.py @@ -15,3 +15,5 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . """Pytest configuration for CircleCI-specific tests.""" + +from CPAC.conftest import bids_examples From 98c8bb80b7c5f6ee736dcb8347e12ae451962bb2 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 11 Jul 2024 14:13:24 -0400 Subject: [PATCH 29/93] :construction: :recycle: Continue updating calls to `ResourcePool` methods --- CPAC/func_preproc/func_ingress.py | 6 +- CPAC/pipeline/engine/__init__.py | 4 +- CPAC/pipeline/engine/resource.py | 865 +++++++++++++++++------------- 3 files changed, 497 insertions(+), 378 deletions(-) diff --git a/CPAC/func_preproc/func_ingress.py b/CPAC/func_preproc/func_ingress.py index 56e472997c..2105503a19 100644 --- a/CPAC/func_preproc/func_ingress.py +++ b/CPAC/func_preproc/func_ingress.py @@ -58,8 +58,6 @@ def connect_func_ingress( } ) - (workflow, diff, blip, fmap_rp_list) = strat.rpool.ingress_func_metadata( - workflow, sub_dict - ) + diff, blip, fmap_rp_list = strat.rpool.ingress_func_metadata() - return (workflow, diff, blip, fmap_rp_list) + return strat.rpool.wf, diff, blip, fmap_rp_list diff --git a/CPAC/pipeline/engine/__init__.py b/CPAC/pipeline/engine/__init__.py index dc1d077656..975df2e26e 100644 --- a/CPAC/pipeline/engine/__init__.py +++ b/CPAC/pipeline/engine/__init__.py @@ -21,12 +21,12 @@ run_node_blocks, wrap_block, ) -from .resource import NodeData, ResourcePool +from .resource import ResourcePool, StratPool __all__ = [ "NodeBlock", - "NodeData", "ResourcePool", + "StratPool", "run_node_blocks", "wrap_block", ] diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index d50c5cb6ba..6baa3d093f 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -24,7 +24,7 @@ from pathlib import Path import re from types import NoneType -from typing import Any, Optional +from typing import Any, Literal, NamedTuple, Optional, overload import warnings from nipype.interfaces import utility as util @@ -71,6 +71,7 @@ ) EXTS = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"] +STRAT_DICT = dict[str, dict[str | tuple, "Resource"]] class DataPaths: @@ -145,7 +146,7 @@ def as_dict(self) -> dict: } -def generate_prov_string(prov: list[str]) -> tuple[str, str]: +def generate_prov_string(prov: list[str] | str | tuple) -> tuple[str, str]: """Generate a string from a SINGLE RESOURCE'S dictionary of MULTIPLE PRECEDING RESOURCES (or single, if just one). NOTE: this DOES NOT merge multiple resources!!! (i.e. for merging-strat pipe_idx generation). @@ -275,54 +276,50 @@ def strip_template(data_label: str) -> tuple[str, dict[str, str]]: return data_label, json -class NodeData: - r"""Attribute access for ResourcePool.get_data outputs. - - Class to hold outputs of CPAC.pipeline.engine.ResourcePool().get_data(), so one can - do ``node_data = strat_pool.node_data(resource)`` and have ``node_data.node`` and - ``node_data.out`` instead of doing ``node, out = strat_pool.get_data(resource)`` - and needing two variables (``node`` and ``out``) to store that information. - - Also includes ``variant`` attribute providing the resource's self-keyed value - within its ``CpacVariant`` dictionary. - - Examples - -------- - >>> rp = ResourcePool() - >>> rp.node_data(None) - NotImplemented (NotImplemented) - - >>> rp.set_data('test', - ... pe.Node(Function(input_names=[]), 'test'), - ... 'b', [], 0, 'test') - >>> rp.node_data('test') - test (b) - >>> rp.node_data('test').out - 'b' - - >>> try: - ... rp.node_data('b') - ... except LookupError as lookup_error: - ... print(str(lookup_error).strip().split('\n')[0].strip()) - [!] C-PAC says: None of the listed resources are in the resource pool: - """ - - # pylint: disable=too-few-public-methods - def __init__(self, strat_pool=None, resource=None, **kwargs): - self.node = NotImplemented - self.out = NotImplemented - if strat_pool is not None and resource is not None: - self.node, self.out = strat_pool.get_data(resource, **kwargs) +class ResourceData(NamedTuple): + """Attribute and tuple access for ResourceData.""" - def __repr__(self): # noqa: D105 - return f'{getattr(self.node, "name", str(self.node))} ({self.out})' + node: pe.Node + """Resource Node.""" + out: str + """Output key.""" class Resource: """A single Resource and its methods.""" + def __init__(self, data: tuple[pe.Node, str], json: dict | list) -> None: + """Initialize a Resource.""" + self.data = ResourceData(*data) + """Tuple of source Node and output key.""" + self.json = json + """Metadata.""" + self._keys = {"data", "json"} + """Dictionary-style subscriptable keys.""" + + def keys(self) -> list[str]: + """Return list of subscriptable keys.""" + return list(self._keys) + + def __getitem__(self, name: str | tuple[str]) -> tuple[pe.Node, str | tuple[str]]: + """Provide legacy dict-style get access.""" + if name in self.keys(): + return getattr(self, name) + msg = f"Key '{name}' not set in {self}." + raise KeyError(msg) + + def __setitem__(self, name: str | tuple[str], value: Any) -> None: + """Provide legacy dict-style set access.""" + setattr(self, name, value) + if name not in self.keys(): + self._keys.add(name) + + def __str__(self) -> str: + """Return string representation of Resource.""" + return f"{self.data[0]}" -class ResourcePool: + +class _Pool: """All Resources.""" def __init__( @@ -846,16 +843,30 @@ def set_data( self.rpool[resource][new_pipe_idx]["data"] = (node, output) self.rpool[resource][new_pipe_idx]["json"] = json_info - def get( + @overload + def _pool_get( + self: "ResourcePool", + resource: list[str] | str, + pipe_idx: Optional[str], + report_fetched: bool, + optional: bool, + ) -> Optional[dict[dict]] | tuple[Optional[dict[dict]], Optional[str]]: ... + @overload + def _pool_get( + self: "StratPool", + resource: list[str] | str, + pipe_idx: Optional[str], + report_fetched: bool, + optional: bool, + ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ... + def _pool_get( self, resource: list[str] | str, - pipe_idx: Optional[str] = None, - report_fetched: Optional[bool] = False, - optional: Optional[bool] = False, - ) -> tuple[Optional[dict], Optional[str]] | Optional[dict]: - # NOTE!!! - # if this is the main rpool, this will return a dictionary of strats, and inside those, are dictionaries like {'data': (node, out), 'json': info} - # BUT, if this is a sub rpool (i.e. a strat_pool), this will return a one-level dictionary of {'data': (node, out), 'json': info} WITHOUT THE LEVEL OF STRAT KEYS ABOVE IT + pipe_idx: Optional[str], + report_fetched: bool, + optional: bool, + ): + """Return a dictionary of strats or a single Resource.""" if not isinstance(resource, list): resource = [resource] # if a list of potential inputs are given, pick the first one found @@ -887,8 +898,36 @@ def get( ) raise LookupError(msg) + @overload + def get_data( + self, + resource: str, + pipe_idx: Optional[list | str | tuple] = None, + report_fetched: Literal[True] = True, + quick_single: bool = False, + ) -> tuple[dict, str]: ... + @overload + def get_data( + self, + resource: str, + pipe_idx: Optional[list | str | tuple] = None, + report_fetched: Literal[False] = False, + quick_single: bool = False, + ) -> dict: ... + @overload def get_data( - self, resource, pipe_idx=None, report_fetched=False, quick_single=False + self, + resource: str, + pipe_idx: Optional[list | str | tuple] = None, + report_fetched: bool = False, + quick_single: bool = False, + ) -> tuple[dict, str] | dict: ... + def get_data( + self, + resource: str, + pipe_idx: Optional[list | str | tuple] = None, + report_fetched: bool = False, + quick_single: bool = False, ): if report_fetched: if pipe_idx: @@ -997,257 +1036,6 @@ def flatten_prov(self, prov): return flat_prov return None - def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: - # TODO: NOTE: NOT COMPATIBLE WITH SUB-RPOOL/STRAT_POOLS - # TODO: (and it doesn't have to be) - import itertools - - linked_resources = [] - resource_list = [] - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("\nresources: %s", resources) - for resource in resources: - # grab the linked-input tuples - if isinstance(resource, tuple): - linked = [] - for label in list(resource): - rp_dct, fetched_resource = self.get( - label, report_fetched=True, optional=True - ) - if not rp_dct: - continue - linked.append(fetched_resource) - resource_list += linked - if len(linked) < 2: # noqa: PLR2004 - continue - linked_resources.append(linked) - else: - resource_list.append(resource) - - total_pool = [] - variant_pool = {} - len_inputs = len(resource_list) - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("linked_resources: %s", linked_resources) - verbose_logger.debug("resource_list: %s", resource_list) - for resource in resource_list: - ( - rp_dct, # <---- rp_dct has the strats/pipe_idxs as the keys on first level, then 'data' and 'json' on each strat level underneath - fetched_resource, - ) = self.get( - resource, - report_fetched=True, - optional=True, # oh, and we make the resource fetching in get_strats optional so we can have optional inputs, but they won't be optional in the node block unless we want them to be - ) - if not rp_dct: - len_inputs -= 1 - continue - sub_pool = [] - if debug: - verbose_logger.debug("len(rp_dct): %s\n", len(rp_dct)) - for strat in rp_dct.keys(): - json_info = self.get_json(fetched_resource, strat) - cpac_prov = json_info["CpacProvenance"] - sub_pool.append(cpac_prov) - if fetched_resource not in variant_pool: - variant_pool[fetched_resource] = [] - if "CpacVariant" in json_info: - for key, val in json_info["CpacVariant"].items(): - if val not in variant_pool[fetched_resource]: - variant_pool[fetched_resource] += val - variant_pool[fetched_resource].append(f"NO-{val[0]}") - - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("%s sub_pool: %s\n", resource, sub_pool) - total_pool.append(sub_pool) - - if not total_pool: - raise LookupError( - "\n\n[!] C-PAC says: None of the listed " - "resources in the node block being connected " - "exist in the resource pool.\n\nResources:\n" - "%s\n\n" % resource_list - ) - - # TODO: right now total_pool is: - # TODO: [[[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment], [T1w:anat_ingress,desc-preproc_T1w:anatomical_init]], - # TODO: [[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment, desc-brain_mask:brain_mask_afni], [T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-brain_mask:brain_mask_afni]]] - - # TODO: and the code below thinks total_pool is a list of lists, like [[pipe_idx, pipe_idx], [pipe_idx, pipe_idx, pipe_idx], etc.] - # TODO: and the actual resource is encoded in the tag: of the last item, every time! - # keying the strategies to the resources, inverting it - if len_inputs > 1: - strats = itertools.product(*total_pool) - - # we now currently have "strats", the combined permutations of all the strategies, as a list of tuples, each tuple combining one version of input each, being one of the permutations. - # OF ALL THE DIFFERENT INPUTS. and they are tagged by their fetched inputs with {name}:{strat}. - # so, each tuple has ONE STRAT FOR EACH INPUT, so if there are three inputs, each tuple will have 3 items. - new_strats: dict[str | tuple, StratPool] = {} - - # get rid of duplicates - TODO: refactor .product - strat_str_list = [] - strat_list_list = [] - for strat_tuple in strats: - strat_list = list(copy.deepcopy(strat_tuple)) - strat_str = str(strat_list) - if strat_str not in strat_str_list: - strat_str_list.append(strat_str) - strat_list_list.append(strat_list) - - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("len(strat_list_list): %s\n", len(strat_list_list)) - for strat_list in strat_list_list: - json_dct = {} - for strat in strat_list: - # strat is a prov list for a single resource/input - strat_resource, strat_idx = generate_prov_string(strat) - strat_json = self.get_json(strat_resource, strat=strat_idx) - json_dct[strat_resource] = strat_json - - drop = False - if linked_resources: - for linked in linked_resources: # <--- 'linked' is each tuple - if drop: - break - for xlabel in linked: - if drop: - break - xjson = copy.deepcopy(json_dct[xlabel]) - for ylabel in linked: - if xlabel == ylabel: - continue - yjson = copy.deepcopy(json_dct[ylabel]) - - if "CpacVariant" not in xjson: - xjson["CpacVariant"] = {} - if "CpacVariant" not in yjson: - yjson["CpacVariant"] = {} - - current_strat = [] - for key, val in xjson["CpacVariant"].items(): - if isinstance(val, list): - current_strat.append(val[0]) - else: - current_strat.append(val) - current_spread = list(set(variant_pool[xlabel])) - for spread_label in current_spread: - if "NO-" in spread_label: - continue - if spread_label not in current_strat: - current_strat.append(f"NO-{spread_label}") - - other_strat = [] - for key, val in yjson["CpacVariant"].items(): - if isinstance(val, list): - other_strat.append(val[0]) - else: - other_strat.append(val) - other_spread = list(set(variant_pool[ylabel])) - for spread_label in other_spread: - if "NO-" in spread_label: - continue - if spread_label not in other_strat: - other_strat.append(f"NO-{spread_label}") - - for variant in current_spread: - in_current_strat = False - in_other_strat = False - in_other_spread = False - - if variant is None: - in_current_strat = True - if None in other_spread: - in_other_strat = True - if variant in current_strat: - in_current_strat = True - if variant in other_strat: - in_other_strat = True - if variant in other_spread: - in_other_spread = True - - if not in_other_strat: - if in_other_spread: - if in_current_strat: - drop = True - break - - if in_other_strat: - if in_other_spread: - if not in_current_strat: - drop = True - break - if drop: - break - if drop: - continue - - # make the merged strat label from the multiple inputs - # strat_list is actually the merged CpacProvenance lists - pipe_idx = str(strat_list) - new_strats[pipe_idx] = StratPool() - # new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! - # placing JSON info at one level higher only for copy convenience - new_strats[pipe_idx].rpool["json"] = {} - new_strats[pipe_idx].rpool["json"]["subjson"] = {} - new_strats[pipe_idx].rpool["json"]["CpacProvenance"] = strat_list - - # now just invert resource:strat to strat:resource for each resource:strat - for cpac_prov in strat_list: - resource, strat = generate_prov_string(cpac_prov) - resource_strat_dct = self.rpool[resource][strat] - # remember, `resource_strat_dct` is the dct of 'data' and 'json'. - new_strats[pipe_idx].rpool[resource] = resource_strat_dct - # `new_strats` is A DICTIONARY OF RESOURCEPOOL OBJECTS! each one is a new slice of the resource pool combined together. - self.pipe_list.append(pipe_idx) - if "CpacVariant" in resource_strat_dct["json"]: - if "CpacVariant" not in new_strats[pipe_idx].rpool["json"]: - new_strats[pipe_idx].rpool["json"]["CpacVariant"] = {} - for younger_resource, variant_list in resource_strat_dct[ - "json" - ]["CpacVariant"].items(): - if ( - younger_resource - not in new_strats[pipe_idx].rpool["json"]["CpacVariant"] - ): - new_strats[pipe_idx].rpool["json"]["CpacVariant"][ - younger_resource - ] = variant_list - # preserve each input's JSON info also - data_type = resource.split("_")[-1] - if data_type not in new_strats[pipe_idx].rpool["json"]["subjson"]: - new_strats[pipe_idx].rpool["json"]["subjson"][data_type] = {} - new_strats[pipe_idx].rpool["json"]["subjson"][data_type].update( - copy.deepcopy(resource_strat_dct["json"]) - ) - else: - new_strats = {} - for resource_strat_list in total_pool: - # total_pool will have only one list of strats, for the one input - for cpac_prov in resource_strat_list: # <------- cpac_prov here doesn't need to be modified, because it's not merging with other inputs - resource, pipe_idx = generate_prov_string(cpac_prov) - resource_strat_dct = self.rpool[resource][pipe_idx] - # remember, `resource_strat_dct` is the dct of 'data' and 'json'. - new_strats[pipe_idx] = StratPool( - rpool={resource: resource_strat_dct} - ) # <----- again, new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! - # placing JSON info at one level higher only for copy convenience - new_strats[pipe_idx].rpool["json"] = resource_strat_dct["json"] - # TODO: WARNING- THIS IS A LEVEL HIGHER THAN THE ORIGINAL 'JSON' FOR EASE OF ACCESS IN CONNECT_BLOCK WITH THE .GET(JSON) - new_strats[pipe_idx].rpool["json"]["subjson"] = {} - new_strats[pipe_idx].rpool["json"]["CpacProvenance"] = cpac_prov - # preserve each input's JSON info also - data_type = resource.split("_")[-1] - if data_type not in new_strats[pipe_idx].rpool["json"]["subjson"]: - new_strats[pipe_idx].rpool["json"]["subjson"][data_type] = {} - new_strats[pipe_idx].rpool["json"]["subjson"][data_type].update( - copy.deepcopy(resource_strat_dct["json"]) - ) - return new_strats - def derivative_xfm(self, wf, label, connection, json_info, pipe_idx, pipe_x): if label in self.xfm: json_info = dict(json_info) @@ -1776,80 +1564,372 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): nii_name = pe.Node(Rename(), name=f"nii_{resource_idx}_{pipe_x}") nii_name.inputs.keep_ext = True - if resource in Outputs.ciftis: - nii_name.inputs.keep_ext = False - id_string.inputs.extension = Outputs.ciftis[resource] - else: - nii_name.inputs.keep_ext = True + if resource in Outputs.ciftis: + nii_name.inputs.keep_ext = False + id_string.inputs.extension = Outputs.ciftis[resource] + else: + nii_name.inputs.keep_ext = True + + if resource in Outputs.giftis: + nii_name.inputs.keep_ext = False + id_string.inputs.extension = f"{Outputs.giftis[resource]}.gii" + + else: + nii_name.inputs.keep_ext = True + + wf.connect(id_string, "out_filename", nii_name, "format_string") + + node, out = self.rpool[resource][pipe_idx]["data"] + try: + wf.connect(node, out, nii_name, "in_file") + except OSError as os_error: + WFLOGGER.warning(os_error) + continue + + write_json_imports = ["import os", "import json"] + write_json = pe.Node( + Function( + input_names=["json_data", "filename"], + output_names=["json_file"], + function=write_output_json, + imports=write_json_imports, + ), + name=f"json_{resource_idx}_{pipe_x}", + ) + write_json.inputs.json_data = json_info + + wf.connect(id_string, "out_filename", write_json, "filename") + ds = pe.Node(DataSink(), name=f"sinker_{resource_idx}_{pipe_x}") + ds.inputs.parameterization = False + ds.inputs.base_directory = out_dct["out_dir"] + ds.inputs.encrypt_bucket_keys = cfg.pipeline_setup["Amazon-AWS"][ + "s3_encryption" + ] + ds.inputs.container = out_dct["container"] + + if cfg.pipeline_setup["Amazon-AWS"]["aws_output_bucket_credentials"]: + ds.inputs.creds_path = cfg.pipeline_setup["Amazon-AWS"][ + "aws_output_bucket_credentials" + ] + expected_outputs += ( + out_dct["subdir"], + create_id_string( + self.cfg, + unique_id, + resource_idx, + template_desc=id_string.inputs.template_desc, + atlas_id=atlas_id, + subdir=out_dct["subdir"], + ), + ) + wf.connect(nii_name, "out_file", ds, f'{out_dct["subdir"]}.@data') + wf.connect(write_json, "json_file", ds, f'{out_dct["subdir"]}.@json') + outputs_logger.info(expected_outputs) + + def node_data(self, resource: str | tuple[str], **kwargs) -> ResourceData: + """Create ResourceData objects.""" + return ResourceData(self, resource, **kwargs) + + +class ResourcePool(_Pool): + """A pool of Resources.""" + + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[str] = None, + report_fetched: Literal[False] = False, + optional: bool = False, + ) -> Optional[STRAT_DICT]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[str], + report_fetched: Literal[True], + optional: bool = False, + ) -> tuple[Optional[STRAT_DICT], Optional[str]]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[str] = None, + *, + report_fetched: Literal[True], + optional: bool = False, + ) -> tuple[Optional[STRAT_DICT], Optional[str]]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[str] = None, + report_fetched: bool = False, + optional: bool = False, + ) -> Optional[STRAT_DICT] | tuple[STRAT_DICT, Optional[str]]: ... + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[str] = None, + report_fetched: bool = False, + optional: bool = False, + ): + """Return a dictionary of strats. + + Inside those, are dictionaries like ``{'data': (node, out), 'json': info}``. + """ + self._pool_get(resource, pipe_idx, report_fetched, optional) + + def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: + # TODO: NOTE: NOT COMPATIBLE WITH SUB-RPOOL/STRAT_POOLS + # TODO: (and it doesn't have to be) + import itertools + + linked_resources = [] + resource_list = [] + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("\nresources: %s", resources) + for resource in resources: + # grab the linked-input tuples + if isinstance(resource, tuple): + linked = [] + for label in list(resource): + rp_dct, fetched_resource = self.get( + label, report_fetched=True, optional=True + ) + if not rp_dct: + continue + linked.append(fetched_resource) + resource_list += linked + if len(linked) < 2: # noqa: PLR2004 + continue + linked_resources.append(linked) + else: + resource_list.append(resource) + + total_pool = [] + variant_pool = {} + len_inputs = len(resource_list) + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("linked_resources: %s", linked_resources) + verbose_logger.debug("resource_list: %s", resource_list) + for resource in resource_list: + ( + rp_dct, # <---- rp_dct has the strats/pipe_idxs as the keys on first level, then 'data' and 'json' on each strat level underneath + fetched_resource, + ) = self.get( + resource, + report_fetched=True, + optional=True, # oh, and we make the resource fetching in get_strats optional so we can have optional inputs, but they won't be optional in the node block unless we want them to be + ) + if not rp_dct: + len_inputs -= 1 + continue + sub_pool = [] + if debug: + verbose_logger.debug("len(rp_dct): %s\n", len(rp_dct)) + for strat in rp_dct.keys(): + json_info = self.get_json(fetched_resource, strat) + cpac_prov = json_info["CpacProvenance"] + sub_pool.append(cpac_prov) + if fetched_resource not in variant_pool: + variant_pool[fetched_resource] = [] + if "CpacVariant" in json_info: + for key, val in json_info["CpacVariant"].items(): + if val not in variant_pool[fetched_resource]: + variant_pool[fetched_resource] += val + variant_pool[fetched_resource].append(f"NO-{val[0]}") + + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("%s sub_pool: %s\n", resource, sub_pool) + total_pool.append(sub_pool) + + if not total_pool: + raise LookupError( + "\n\n[!] C-PAC says: None of the listed " + "resources in the node block being connected " + "exist in the resource pool.\n\nResources:\n" + "%s\n\n" % resource_list + ) - if resource in Outputs.giftis: - nii_name.inputs.keep_ext = False - id_string.inputs.extension = f"{Outputs.giftis[resource]}.gii" + # TODO: right now total_pool is: + # TODO: [[[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment], [T1w:anat_ingress,desc-preproc_T1w:anatomical_init]], + # TODO: [[T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-preproc_T1w:acpc_alignment, desc-brain_mask:brain_mask_afni], [T1w:anat_ingress, desc-preproc_T1w:anatomical_init, desc-brain_mask:brain_mask_afni]]] - else: - nii_name.inputs.keep_ext = True + # TODO: and the code below thinks total_pool is a list of lists, like [[pipe_idx, pipe_idx], [pipe_idx, pipe_idx, pipe_idx], etc.] + # TODO: and the actual resource is encoded in the tag: of the last item, every time! + # keying the strategies to the resources, inverting it + if len_inputs > 1: + strats = itertools.product(*total_pool) - wf.connect(id_string, "out_filename", nii_name, "format_string") + # we now currently have "strats", the combined permutations of all the strategies, as a list of tuples, each tuple combining one version of input each, being one of the permutations. + # OF ALL THE DIFFERENT INPUTS. and they are tagged by their fetched inputs with {name}:{strat}. + # so, each tuple has ONE STRAT FOR EACH INPUT, so if there are three inputs, each tuple will have 3 items. + new_strats: dict[str | tuple, StratPool] = {} - node, out = self.rpool[resource][pipe_idx]["data"] - try: - wf.connect(node, out, nii_name, "in_file") - except OSError as os_error: - WFLOGGER.warning(os_error) - continue + # get rid of duplicates - TODO: refactor .product + strat_str_list = [] + strat_list_list = [] + for strat_tuple in strats: + strat_list = list(copy.deepcopy(strat_tuple)) + strat_str = str(strat_list) + if strat_str not in strat_str_list: + strat_str_list.append(strat_str) + strat_list_list.append(strat_list) - write_json_imports = ["import os", "import json"] - write_json = pe.Node( - Function( - input_names=["json_data", "filename"], - output_names=["json_file"], - function=write_output_json, - imports=write_json_imports, - ), - name=f"json_{resource_idx}_{pipe_x}", - ) - write_json.inputs.json_data = json_info + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("len(strat_list_list): %s\n", len(strat_list_list)) + for strat_list in strat_list_list: + json_dct = {} + for strat in strat_list: + # strat is a prov list for a single resource/input + strat_resource, strat_idx = generate_prov_string(strat) + strat_json = self.get_json(strat_resource, strat=strat_idx) + json_dct[strat_resource] = strat_json - wf.connect(id_string, "out_filename", write_json, "filename") - ds = pe.Node(DataSink(), name=f"sinker_{resource_idx}_{pipe_x}") - ds.inputs.parameterization = False - ds.inputs.base_directory = out_dct["out_dir"] - ds.inputs.encrypt_bucket_keys = cfg.pipeline_setup["Amazon-AWS"][ - "s3_encryption" - ] - ds.inputs.container = out_dct["container"] + drop = False + if linked_resources: + for linked in linked_resources: # <--- 'linked' is each tuple + if drop: + break + for xlabel in linked: + if drop: + break + xjson = copy.deepcopy(json_dct[xlabel]) + for ylabel in linked: + if xlabel == ylabel: + continue + yjson = copy.deepcopy(json_dct[ylabel]) - if cfg.pipeline_setup["Amazon-AWS"]["aws_output_bucket_credentials"]: - ds.inputs.creds_path = cfg.pipeline_setup["Amazon-AWS"][ - "aws_output_bucket_credentials" - ] - expected_outputs += ( - out_dct["subdir"], - create_id_string( - self.cfg, - unique_id, - resource_idx, - template_desc=id_string.inputs.template_desc, - atlas_id=atlas_id, - subdir=out_dct["subdir"], - ), - ) - wf.connect(nii_name, "out_file", ds, f'{out_dct["subdir"]}.@data') - wf.connect(write_json, "json_file", ds, f'{out_dct["subdir"]}.@json') - outputs_logger.info(expected_outputs) + if "CpacVariant" not in xjson: + xjson["CpacVariant"] = {} + if "CpacVariant" not in yjson: + yjson["CpacVariant"] = {} - def node_data(self, resource, **kwargs): - """Create NodeData objects. + current_strat = [] + for key, val in xjson["CpacVariant"].items(): + if isinstance(val, list): + current_strat.append(val[0]) + else: + current_strat.append(val) + current_spread = list(set(variant_pool[xlabel])) + for spread_label in current_spread: + if "NO-" in spread_label: + continue + if spread_label not in current_strat: + current_strat.append(f"NO-{spread_label}") - Parameters - ---------- - resource : str + other_strat = [] + for key, val in yjson["CpacVariant"].items(): + if isinstance(val, list): + other_strat.append(val[0]) + else: + other_strat.append(val) + other_spread = list(set(variant_pool[ylabel])) + for spread_label in other_spread: + if "NO-" in spread_label: + continue + if spread_label not in other_strat: + other_strat.append(f"NO-{spread_label}") - Returns - ------- - NodeData - """ - return NodeData(self, resource, **kwargs) + for variant in current_spread: + in_current_strat = False + in_other_strat = False + in_other_spread = False + + if variant is None: + in_current_strat = True + if None in other_spread: + in_other_strat = True + if variant in current_strat: + in_current_strat = True + if variant in other_strat: + in_other_strat = True + if variant in other_spread: + in_other_spread = True + + if not in_other_strat: + if in_other_spread: + if in_current_strat: + drop = True + break + + if in_other_strat: + if in_other_spread: + if not in_current_strat: + drop = True + break + if drop: + break + if drop: + continue + + # make the merged strat label from the multiple inputs + # strat_list is actually the merged CpacProvenance lists + pipe_idx = str(strat_list) + new_strats[pipe_idx] = StratPool() + # new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! + # placing JSON info at one level higher only for copy convenience + new_strats[pipe_idx].rpool["json"] = {} + new_strats[pipe_idx].rpool["json"]["subjson"] = {} + new_strats[pipe_idx].rpool["json"]["CpacProvenance"] = strat_list + + # now just invert resource:strat to strat:resource for each resource:strat + for cpac_prov in strat_list: + resource, strat = generate_prov_string(cpac_prov) + resource_strat_dct = self.rpool[resource][strat] + # remember, `resource_strat_dct` is the dct of 'data' and 'json'. + new_strats[pipe_idx].rpool[resource] = resource_strat_dct + # `new_strats` is A DICTIONARY OF RESOURCEPOOL OBJECTS! each one is a new slice of the resource pool combined together. + self.pipe_list.append(pipe_idx) + if "CpacVariant" in resource_strat_dct["json"]: + if "CpacVariant" not in new_strats[pipe_idx].rpool["json"]: + new_strats[pipe_idx].rpool["json"]["CpacVariant"] = {} + for younger_resource, variant_list in resource_strat_dct[ + "json" + ]["CpacVariant"].items(): + if ( + younger_resource + not in new_strats[pipe_idx].rpool["json"]["CpacVariant"] + ): + new_strats[pipe_idx].rpool["json"]["CpacVariant"][ + younger_resource + ] = variant_list + # preserve each input's JSON info also + data_type = resource.split("_")[-1] + if data_type not in new_strats[pipe_idx].rpool["json"]["subjson"]: + new_strats[pipe_idx].rpool["json"]["subjson"][data_type] = {} + new_strats[pipe_idx].rpool["json"]["subjson"][data_type].update( + copy.deepcopy(resource_strat_dct["json"]) + ) + else: + new_strats = {} + for resource_strat_list in total_pool: + # total_pool will have only one list of strats, for the one input + for cpac_prov in resource_strat_list: # <------- cpac_prov here doesn't need to be modified, because it's not merging with other inputs + resource, pipe_idx = generate_prov_string(cpac_prov) + resource_strat_dct = self.rpool[resource][pipe_idx] + # remember, `resource_strat_dct` is the dct of 'data' and 'json'. + new_strats[pipe_idx] = StratPool( + rpool={resource: resource_strat_dct} + ) # <----- again, new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! + # placing JSON info at one level higher only for copy convenience + new_strats[pipe_idx].rpool["json"] = resource_strat_dct["json"] + # TODO: WARNING- THIS IS A LEVEL HIGHER THAN THE ORIGINAL 'JSON' FOR EASE OF ACCESS IN CONNECT_BLOCK WITH THE .GET(JSON) + new_strats[pipe_idx].rpool["json"]["subjson"] = {} + new_strats[pipe_idx].rpool["json"]["CpacProvenance"] = cpac_prov + # preserve each input's JSON info also + data_type = resource.split("_")[-1] + if data_type not in new_strats[pipe_idx].rpool["json"]["subjson"]: + new_strats[pipe_idx].rpool["json"]["subjson"][data_type] = {} + new_strats[pipe_idx].rpool["json"]["subjson"][data_type].update( + copy.deepcopy(resource_strat_dct["json"]) + ) + return new_strats def ingress_freesurfer(self) -> None: """Ingress FreeSurfer data.""" @@ -2670,10 +2750,12 @@ def ingress_raw_anat_data(self) -> None: self.ingress_freesurfer() -class StratPool(ResourcePool): +class StratPool(_Pool): """A pool of ResourcePools keyed by strategy.""" - def __init__(self, rpool: Optional[dict[ResourcePool]] = None) -> None: + def __init__( + self, rpool: Optional[dict[str | list | tuple, ResourcePool]] = None + ) -> None: """Initialize a StratPool.""" if not rpool: self.rpool = {} @@ -2683,6 +2765,45 @@ def __init__(self, rpool: Optional[dict[ResourcePool]] = None) -> None: def append_name(self, name): self.name.append(name) - def get_strats(self, resources, debug) -> None: - """ResourcePool method that is not valid for a StratPool.""" - raise NotImplementedError + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[str] = None, + report_fetched: Literal[False] = False, + optional: bool = False, + ) -> Optional[Resource]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[str], + report_fetched: Literal[True], + optional: bool = False, + ) -> tuple[Optional[Resource], Optional[str]]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[str], + *, + report_fetched: Literal[True], + optional: bool = False, + ) -> tuple[Optional[Resource], Optional[str]]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[str] = None, + report_fetched: bool = False, + optional: bool = False, + ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ... + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[str] = None, + report_fetched: bool = False, + optional: bool = False, + ): + """Return a Resource.""" + self._pool_get(resource, pipe_idx, report_fetched, optional) From 4d9934de40f502b30f38593b6b0faf0cbc61d0a3 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 11 Jul 2024 17:37:13 -0400 Subject: [PATCH 30/93] :construction: WIP :recycle: Fix `StratPool.__init__` --- CPAC/pipeline/engine/resource.py | 280 ++++++++++++++++--------------- 1 file changed, 142 insertions(+), 138 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 6baa3d093f..681e5ef75c 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -322,132 +322,11 @@ def __str__(self) -> str: class _Pool: """All Resources.""" - def __init__( - self, - name: str = "", - cfg: Optional[Configuration] = None, - pipe_list: Optional[list] = None, - *, - data_paths: Optional[DataPaths | dict] = None, - part_id: Optional[str] = None, - pipeline_name: str = "", - wf: Optional[pe.Workflow] = None, - ): - """Initialize a ResourcePool.""" - if isinstance(data_paths, dict): - data_paths = DataPaths(data_paths=data_paths) - elif not data_paths: - data_paths = DataPaths(part_id=part_id) - self.data_paths = data_paths - # pass-through for convenient access - self.creds_path = self.data_paths.creds_path - self.part_id = self.data_paths.part_id - self.ses_id = self.data_paths.ses_id - self.unique_id = self.data_paths.unique_id - self.rpool = {} - - if not pipe_list: - self.pipe_list = [] - else: - self.pipe_list = pipe_list - + def __init__(self, name: str = "") -> None: + """Initialize a ResourcePool or StratPool.""" self.name = name self.info = {} - if cfg: - self.cfg = cfg - else: - self.cfg = Preconfiguration("blank") - - self.logdir = self._config_lookup(["pipeline_setup", "log_directory", "path"]) - self.num_cpus = self._config_lookup( - ["pipeline_setup", "system_config", "max_cores_per_participant"] - ) - self.num_ants_cores = self._config_lookup( - ["pipeline_setup", "system_config", "num_ants_threads"] - ) - - self.ants_interp = self._config_lookup( - [ - "registration_workflows", - "functional_registration", - "func_registration_to_template", - "ANTs_pipelines", - "interpolation", - ] - ) - self.fsl_interp = self._config_lookup( - [ - "registration_workflows", - "functional_registration", - "func_registration_to_template", - "FNIRT_pipelines", - "interpolation", - ] - ) - self.func_reg = self._config_lookup( - [ - "registration_workflows", - "functional_registration", - "func_registration_to_template", - "run", - ] - ) - - self.run_smoothing = "smoothed" in self._config_lookup( - ["post_processing", "spatial_smoothing", "output"], list - ) - self.smoothing_bool = self._config_lookup( - ["post_processing", "spatial_smoothing", "run"] - ) - self.run_zscoring = "z-scored" in self._config_lookup( - ["post_processing", "z-scoring", "output"], list - ) - self.zscoring_bool = self._config_lookup( - ["post_processing", "z-scoring", "run"] - ) - self.fwhm = self._config_lookup( - ["post_processing", "spatial_smoothing", "fwhm"] - ) - self.smooth_opts = self._config_lookup( - ["post_processing", "spatial_smoothing", "smoothing_method"] - ) - - if wf: - self.wf = wf - else: - self.initialize_nipype_wf(pipeline_name) - - self.xfm = [ - "alff", - "desc-sm_alff", - "desc-zstd_alff", - "desc-sm-zstd_alff", - "falff", - "desc-sm_falff", - "desc-zstd_falff", - "desc-sm-zstd_falff", - "reho", - "desc-sm_reho", - "desc-zstd_reho", - "desc-sm-zstd_reho", - ] - ingress_derivatives = False - try: - if self.data_paths.derivatives_dir and self._config_lookup( - ["pipeline_setup", "outdir_ingress", "run"], bool - ): - ingress_derivatives = True - except (AttributeError, KeyError, TypeError): - pass - if ingress_derivatives: - self.ingress_output_dir() - else: - self.ingress_raw_anat_data() - if data_paths.func: - self.ingress_raw_func_data() - self.ingress_pipeconfig_paths() - def __repr__(self) -> str: """Return reproducible ResourcePool string.""" params = [ @@ -455,7 +334,7 @@ def __repr__(self) -> str: for param in ["rpool", "name", "cfg", "pipe_list"] if getattr(self, param, None) ] - return f'ResourcePool({", ".join(params)})' + return f'{self.__class__.__name__}({", ".join(params)})' def __str__(self) -> str: """Return string representation of ResourcePool.""" @@ -675,9 +554,6 @@ def keys(self) -> KeysView: """Return rpool's keys.""" return self.rpool.keys() - def get_resources(self): - return self.rpool.keys() - @staticmethod def get_raw_label(resource: str) -> str: """Remove ``desc-*`` label.""" @@ -811,7 +687,7 @@ def set_data( } json_info["CpacProvenance"] = new_prov_list - if resource not in self.rpool.keys(): + if resource not in self.keys(): self.rpool[resource] = {} elif not fork: # <--- in the event of multiple strategies/options, this will run for every option; just keep in mind search = False @@ -871,7 +747,7 @@ def _pool_get( resource = [resource] # if a list of potential inputs are given, pick the first one found for label in resource: - if label in self.rpool.keys(): + if label in self.keys(): _found = self.rpool[label] if pipe_idx: _found = _found[pipe_idx] @@ -1162,7 +1038,7 @@ def post_process(self, wf, label, connection, json_info, pipe_idx, pipe_x, outs) # or lfcd [binarized or weighted] mask = "template-specification-file" elif "space-template" in label: - if "space-template_res-derivative_desc-bold_mask" in self.rpool.keys(): + if "space-template_res-derivative_desc-bold_mask" in self.keys(): mask = "space-template_res-derivative_desc-bold_mask" else: mask = "space-template_desc-bold_mask" @@ -1316,7 +1192,7 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): # substring_excl.append(['bold']) excl += Outputs.debugging - for resource in self.rpool.keys(): + for resource in self.keys(): if resource not in Outputs.any: continue @@ -1377,7 +1253,7 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): # TODO: have to link the pipe_idx's here. and call up 'desc-preproc_T1w' from a Sources in a json and replace. here. # TODO: can do the pipeline_description.json variants here too! - for resource in self.rpool.keys(): + for resource in self.keys(): if resource not in Outputs.any: continue @@ -1628,12 +1504,136 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): def node_data(self, resource: str | tuple[str], **kwargs) -> ResourceData: """Create ResourceData objects.""" - return ResourceData(self, resource, **kwargs) + return ResourceData(*self.get_data(resource, **kwargs)) class ResourcePool(_Pool): """A pool of Resources.""" + def __init__( + self, + name: str = "", + cfg: Optional[Configuration] = None, + pipe_list: Optional[list] = None, + *, + data_paths: Optional[DataPaths | dict] = None, + part_id: Optional[str] = None, + pipeline_name: str = "", + wf: Optional[pe.Workflow] = None, + ) -> None: + """Initialize a ResourcePool.""" + super().__init__(name=name) + if isinstance(data_paths, dict): + data_paths = DataPaths(data_paths=data_paths) + elif not data_paths: + data_paths = DataPaths(part_id=part_id) + self.data_paths = data_paths + # pass-through for convenient access + self.creds_path = self.data_paths.creds_path + self.part_id = self.data_paths.part_id + self.ses_id = self.data_paths.ses_id + self.unique_id = self.data_paths.unique_id + self.rpool = {} + + if not pipe_list: + self.pipe_list = [] + else: + self.pipe_list = pipe_list + + if cfg: + self.cfg = cfg + else: + self.cfg = Preconfiguration("blank") + + self.logdir = self._config_lookup(["pipeline_setup", "log_directory", "path"]) + self.num_cpus = self._config_lookup( + ["pipeline_setup", "system_config", "max_cores_per_participant"] + ) + self.num_ants_cores = self._config_lookup( + ["pipeline_setup", "system_config", "num_ants_threads"] + ) + + self.ants_interp = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "ANTs_pipelines", + "interpolation", + ] + ) + self.fsl_interp = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "FNIRT_pipelines", + "interpolation", + ] + ) + self.func_reg = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "run", + ] + ) + + self.run_smoothing = "smoothed" in self._config_lookup( + ["post_processing", "spatial_smoothing", "output"], list + ) + self.smoothing_bool = self._config_lookup( + ["post_processing", "spatial_smoothing", "run"] + ) + self.run_zscoring = "z-scored" in self._config_lookup( + ["post_processing", "z-scoring", "output"], list + ) + self.zscoring_bool = self._config_lookup( + ["post_processing", "z-scoring", "run"] + ) + self.fwhm = self._config_lookup( + ["post_processing", "spatial_smoothing", "fwhm"] + ) + self.smooth_opts = self._config_lookup( + ["post_processing", "spatial_smoothing", "smoothing_method"] + ) + + if wf: + self.wf = wf + else: + self.initialize_nipype_wf(pipeline_name) + + self.xfm = [ + "alff", + "desc-sm_alff", + "desc-zstd_alff", + "desc-sm-zstd_alff", + "falff", + "desc-sm_falff", + "desc-zstd_falff", + "desc-sm-zstd_falff", + "reho", + "desc-sm_reho", + "desc-zstd_reho", + "desc-sm-zstd_reho", + ] + ingress_derivatives = False + try: + if self.data_paths.derivatives_dir and self._config_lookup( + ["pipeline_setup", "outdir_ingress", "run"], bool + ): + ingress_derivatives = True + except (AttributeError, KeyError, TypeError): + pass + if ingress_derivatives: + self.ingress_output_dir() + else: + self.ingress_raw_anat_data() + if data_paths.func: + self.ingress_raw_func_data() + self.ingress_pipeconfig_paths() + @overload def get( self, @@ -1678,7 +1678,7 @@ def get( Inside those, are dictionaries like ``{'data': (node, out), 'json': info}``. """ - self._pool_get(resource, pipe_idx, report_fetched, optional) + return self._pool_get(resource, pipe_idx, report_fetched, optional) def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: # TODO: NOTE: NOT COMPATIBLE WITH SUB-RPOOL/STRAT_POOLS @@ -1871,7 +1871,7 @@ def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: # make the merged strat label from the multiple inputs # strat_list is actually the merged CpacProvenance lists pipe_idx = str(strat_list) - new_strats[pipe_idx] = StratPool() + new_strats[pipe_idx] = StratPool(name=pipe_idx) # new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! # placing JSON info at one level higher only for copy convenience new_strats[pipe_idx].rpool["json"] = {} @@ -1915,7 +1915,7 @@ def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: resource_strat_dct = self.rpool[resource][pipe_idx] # remember, `resource_strat_dct` is the dct of 'data' and 'json'. new_strats[pipe_idx] = StratPool( - rpool={resource: resource_strat_dct} + rpool={resource: resource_strat_dct}, name=pipe_idx ) # <----- again, new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! # placing JSON info at one level higher only for copy convenience new_strats[pipe_idx].rpool["json"] = resource_strat_dct["json"] @@ -2754,9 +2754,13 @@ class StratPool(_Pool): """A pool of ResourcePools keyed by strategy.""" def __init__( - self, rpool: Optional[dict[str | list | tuple, ResourcePool]] = None + self, + rpool: Optional[dict[str | list | tuple, ResourcePool]] = None, + *, + name: str = "", ) -> None: """Initialize a StratPool.""" + super().__init__(name=name) if not rpool: self.rpool = {} else: @@ -2806,4 +2810,4 @@ def get( optional: bool = False, ): """Return a Resource.""" - self._pool_get(resource, pipe_idx, report_fetched, optional) + return self._pool_get(resource, pipe_idx, report_fetched, optional) From 1cc39143b0bc7cb2ce189c73a8227cad29e8ca06 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Fri, 12 Jul 2024 15:37:58 -0400 Subject: [PATCH 31/93] :art: :technologist: Clarify typing --- CPAC/pipeline/engine/resource.py | 944 +++++++++++++++++-------------- CPAC/utils/typing.py | 20 + 2 files changed, 525 insertions(+), 439 deletions(-) create mode 100644 CPAC/utils/typing.py diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 681e5ef75c..c6dcdca260 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -27,8 +27,8 @@ from typing import Any, Literal, NamedTuple, Optional, overload import warnings -from nipype.interfaces import utility as util -from nipype.interfaces.utility import Rename +from nipype.interfaces import utility as util # type: ignore [import-untyped] +from nipype.interfaces.utility import Rename # type: ignore [import-untyped] from CPAC.image_utils.spatial_smoothing import spatial_smoothing from CPAC.image_utils.statistical_transforms import ( @@ -61,6 +61,7 @@ WFLOGGER, ) from CPAC.utils.outputs import Outputs +from CPAC.utils.typing import LIST_OF_LIST_OF_STR, PIPE_IDX from CPAC.utils.utils import ( check_prov_for_regtool, create_id_string, @@ -71,13 +72,16 @@ ) EXTS = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"] -STRAT_DICT = dict[str, dict[str | tuple, "Resource"]] +POOL_DICT = dict[str | tuple, "STRAT_DICT"] +STRAT_DICT = dict[str | tuple, dict | "Resource"] class DataPaths: """Store subject-session specific data paths.""" - def __init__(self, *, data_paths: Optional[dict] = None, part_id: str = "") -> None: + def __init__( + self, *, data_paths: Optional[dict] = None, part_id: Optional[str] = "" + ) -> None: """Initialize a ``DataPaths`` instance.""" if not data_paths: data_paths = {} @@ -146,102 +150,6 @@ def as_dict(self) -> dict: } -def generate_prov_string(prov: list[str] | str | tuple) -> tuple[str, str]: - """Generate a string from a SINGLE RESOURCE'S dictionary of MULTIPLE PRECEDING RESOURCES (or single, if just one). - - NOTE: this DOES NOT merge multiple resources!!! (i.e. for merging-strat pipe_idx generation). - """ - if not isinstance(prov, list): - msg = ( - "\n[!] Developer info: the CpacProvenance " - f"entry for {prov} has to be a list.\n" - ) - raise TypeError(msg) - last_entry = get_last_prov_entry(prov) - resource = last_entry.split(":")[0] - return (resource, str(prov)) - - -def json_outdir_ingress( - filepath: Path | str, data_label: str, json: dict -) -> tuple[dict, tuple[str, str], str, str]: - """Ingress sidecars from a BIDS derivatives directory.""" - desc_val = None - for tag in data_label.split("_"): - if "desc-" in tag: - desc_val = tag - break - jsonpath = str(filepath) - for ext in EXTS: - jsonpath = jsonpath.replace(ext, "") - jsonpath = f"{jsonpath}.json" - - if not os.path.exists(jsonpath): - WFLOGGER.info( - "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n", - filepath, - jsonpath, - ) - json_info = { - "Description": "This data was generated elsewhere and " - "supplied by the user into this C-PAC run's " - "output directory. This JSON file was " - "automatically generated by C-PAC because a " - "JSON file was not supplied with the data." - } - json_info = {**json_info, **json} - write_output_json(json_info, jsonpath) - else: - json_info = read_json(jsonpath) - json_info = {**json_info, **json} - if "CpacProvenance" in json_info: - if desc_val: - # it's a C-PAC output, let's check for pipe_idx/strat integer - # suffixes in the desc- entries. - only_desc = str(desc_val) - - if only_desc[-1].isdigit(): - for _strat_idx in range(0, 3): - # let's stop at 3, please don't run >999 strategies okay? - if only_desc[-1].isdigit(): - only_desc = only_desc[:-1] - - if only_desc[-1] == "-": - only_desc = only_desc.rstrip("-") - else: - msg = ( - "\n[!] Something went wrong with either " - "reading in the output directory or when " - "it was written out previously.\n\nGive " - "this to your friendly local C-PAC " - f"developer:\n\n{data_label!s}\n" - ) - raise IOError(msg) - - # remove the integer at the end of the desc-* variant, we will - # get the unique pipe_idx from the CpacProvenance below - data_label = data_label.replace(desc_val, only_desc) - - # preserve cpac provenance/pipe_idx - pipe_idx = generate_prov_string(json_info["CpacProvenance"]) - node_name = "" - - else: - json_info["CpacProvenance"] = [f"{data_label}:Non-C-PAC Origin: {filepath}"] - if "Description" not in json_info: - json_info["Description"] = ( - "This data was generated elsewhere and " - "supplied by the user into this C-PAC run's " - "output directory. This JSON file was " - "automatically generated by C-PAC because a " - "JSON file was not supplied with the data." - ) - pipe_idx = generate_prov_string(json_info["CpacProvenance"]) - node_name = f"{data_label}_ingress" - - return json_info, pipe_idx, node_name, data_label - - @Function.sig_imports(["from typing import Optional"]) def set_iterables( scan: str, @@ -249,6 +157,10 @@ def set_iterables( ts_paths: Optional[list[str]] = None, ) -> tuple[str, str, str]: """Match scan with filepath to get filepath.""" + if mask_paths is None: + mask_paths = [] + if ts_paths is None: + ts_paths = [] mask_path = [path for path in mask_paths if scan in path] ts_path = [path for path in ts_paths if scan in path] @@ -301,14 +213,14 @@ def keys(self) -> list[str]: """Return list of subscriptable keys.""" return list(self._keys) - def __getitem__(self, name: str | tuple[str]) -> tuple[pe.Node, str | tuple[str]]: + def __getitem__(self, name: str) -> tuple[pe.Node, str | tuple[str]]: """Provide legacy dict-style get access.""" if name in self.keys(): return getattr(self, name) msg = f"Key '{name}' not set in {self}." raise KeyError(msg) - def __setitem__(self, name: str | tuple[str], value: Any) -> None: + def __setitem__(self, name: str, value: Any) -> None: """Provide legacy dict-style set access.""" setattr(self, name, value) if name not in self.keys(): @@ -324,8 +236,31 @@ class _Pool: def __init__(self, name: str = "") -> None: """Initialize a ResourcePool or StratPool.""" + self.ants_interp: str + self.cfg: Configuration + self.creds_paths: Optional[str] + self.data_paths: DataPaths + self.fsl_interp: str + self.func_reg: bool + self.fwhm: list[int] + self.info: dict = {} + self.logdir: Optional[str] self.name = name - self.info = {} + self.num_ants_cores: int + self.num_cpus = int + self.part_id: str + self.pipe_list: list + self.ses_id: str + self.smoothing_bool: bool + self.smooth_opts: list[str] + self.regressors: dict | list + self.rpool: dict + self.run_smoothing: bool + self.run_zscoring: bool + self.unique_id: str + self.zscoring_bool: bool + self.wf: pe.Workflow + self._regressor_dct: dict def __repr__(self) -> str: """Return reproducible ResourcePool string.""" @@ -348,14 +283,92 @@ def initialize_nipype_wf(self, name: str = "") -> None: name = f"_{name}" workflow_name = f"cpac{name}_{self.unique_id}" self.wf = pe.Workflow(name=workflow_name) - self.wf.base_dir = self.cfg.pipeline_setup["working_directory"]["path"] + self.wf.base_dir = self.cfg.pipeline_setup["working_directory"]["path"] # type: ignore[attr-defined] self.wf.config["execution"] = { "hash_method": "timestamp", "crashdump_dir": os.path.abspath( - self.cfg.pipeline_setup["log_directory"]["path"] + self.cfg.pipeline_setup["log_directory"]["path"] # type: ignore[attr-defined] ), } + def json_outdir_ingress( + self, filepath: Path | str, data_label: str, json: dict + ) -> tuple[dict, tuple[str, str], str, str]: + """Ingress sidecars from a BIDS derivatives directory.""" + desc_val = None + for tag in data_label.split("_"): + if "desc-" in tag: + desc_val = tag + break + jsonpath = str(filepath) + for ext in EXTS: + jsonpath = jsonpath.replace(ext, "") + jsonpath = f"{jsonpath}.json" + + if not os.path.exists(jsonpath): + WFLOGGER.info( + "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n", + filepath, + jsonpath, + ) + json_info = { + "Description": "This data was generated elsewhere and " + "supplied by the user into this C-PAC run's " + "output directory. This JSON file was " + "automatically generated by C-PAC because a " + "JSON file was not supplied with the data." + } + json_info = {**json_info, **json} + write_output_json(json_info, jsonpath) + else: + json_info = read_json(jsonpath) + json_info = {**json_info, **json} + if "CpacProvenance" in json_info: + if desc_val: + # it's a C-PAC output, let's check for pipe_idx/strat integer + # suffixes in the desc- entries. + only_desc = str(desc_val) + + if only_desc[-1].isdigit(): + for _strat_idx in range(0, 3): + # let's stop at 3, please don't run >999 strategies okay? + if only_desc[-1].isdigit(): + only_desc = only_desc[:-1] + + if only_desc[-1] == "-": + only_desc = only_desc.rstrip("-") + else: + msg = ( + "\n[!] Something went wrong with either " + "reading in the output directory or when " + "it was written out previously.\n\nGive " + "this to your friendly local C-PAC " + f"developer:\n\n{data_label!s}\n" + ) + raise IOError(msg) + + # remove the integer at the end of the desc-* variant, we will + # get the unique pipe_idx from the CpacProvenance below + data_label = data_label.replace(desc_val, only_desc) + + # preserve cpac provenance/pipe_idx + pipe_idx = self.generate_prov_string(json_info["CpacProvenance"]) + node_name = "" + else: + json_info["CpacProvenance"] = [f"{data_label}:Non-C-PAC Origin: {filepath}"] # type: ignore [assignment] + if "Description" not in json_info: + json_info["Description"] = ( + "This data was generated elsewhere and " + "supplied by the user into this C-PAC run's " + "output directory. This JSON file was " + "automatically generated by C-PAC because a " + "JSON file was not supplied with the data." + ) + pipe_idx = self.generate_prov_string(json_info["CpacProvenance"]) + node_name = f"{data_label}_ingress" + + return json_info, pipe_idx, node_name, data_label + def _config_lookup(self, keylist, fallback_type: type = NoneType) -> Any: """Lookup a config key, return None if not found.""" try: @@ -363,50 +376,21 @@ def _config_lookup(self, keylist, fallback_type: type = NoneType) -> Any: except (AttributeError, KeyError): return fallback_type() - def back_propogate_template_name( - self, resource_idx: str, json_info: dict, id_string: "pe.Node" - ) -> None: - """Find and apply the template name from a resource's provenance. - - Parameters - ---------- - resource_idx : str - - json_info : dict - - id_string : pe.Node + @staticmethod + def generate_prov_string(prov: LIST_OF_LIST_OF_STR | tuple) -> tuple[str, str]: + """Generate a string from a SINGLE RESOURCE'S dictionary of MULTIPLE PRECEDING RESOURCES (or single, if just one). - Returns - ------- - None + NOTE: this DOES NOT merge multiple resources!!! (i.e. for merging-strat pipe_idx generation). """ - if "template" in resource_idx and self.check_rpool("derivatives-dir"): - if self.check_rpool("template"): - node, out = self.get_data("template") - self.wf.connect(node, out, id_string, "template_desc") - elif "Template" in json_info: - id_string.inputs.template_desc = json_info["Template"] - elif ( - "template" in resource_idx and len(json_info.get("CpacProvenance", [])) > 1 - ): - for resource in source_set(json_info["CpacProvenance"]): - source, value = resource.split(":", 1) - if value.startswith("template_") and source != "FSL-AFNI-bold-ref": - # 'FSL-AFNI-bold-ref' is currently allowed to be in - # a different space, so don't use it as the space for - # descendents - try: - anscestor_json = next(iter(self.rpool.get(source).items()))[ - 1 - ].get("json", {}) - if "Description" in anscestor_json: - id_string.inputs.template_desc = anscestor_json[ - "Description" - ] - return - except (IndexError, KeyError): - pass - return + if not isinstance(prov, list): + msg = ( + "\n[!] Developer info: the CpacProvenance " + f"entry for {prov} has to be a list.\n" + ) + raise TypeError(msg) + last_entry = get_last_prov_entry(prov) + resource = last_entry.split(":")[0] + return (resource, str(prov)) def get_name(self): return self.name @@ -601,15 +585,20 @@ def get_json_info(self, resource, pipe_idx, key): return self.rpool[resource][pipe_idx][key] @staticmethod - def get_resource_from_prov(prov): - # each resource (i.e. "desc-cleaned_bold" AKA nuisance-regressed BOLD - # data) has its own provenance list. the name of the resource, and - # the node that produced it, is always the last item in the provenance - # list, with the two separated by a colon : + def get_resource_from_prov(prov: LIST_OF_LIST_OF_STR) -> Optional[str]: + """Return the last item in the provenance list. + + Each resource (i.e. "desc-cleaned_bold" AKA nuisance-regressed BOLD + data) has its own provenance list. the name of the resource, and + the node that produced it, is always the last item in the provenance + list, with the two separated by a colon : + """ if not len(prov): return None if isinstance(prov[-1], list): - return prov[-1][-1].split(":")[0] + last_item_in_list = prov[-1][-1] + assert isinstance(last_item_in_list, str) + return last_item_in_list.split(":")[0] if isinstance(prov[-1], str): return prov[-1].split(":")[0] return None @@ -640,30 +629,31 @@ def regressor_dct(self, cfg) -> dict: if strat_name in self.regressors: self._regressor_dct = self.regressors[strat_name] return self._regressor_dct - self.regressor_dct = _nr["ingress_regressors"]["Regressors"] - return self.regressor_dct + self._regressor_dct = _nr["ingress_regressors"]["Regressors"] + return self._regressor_dct prov = self.get_cpac_provenance("desc-confounds_timeseries") strat_name_components = prov[-1].split("_") for _ in list(range(prov[-1].count("_"))): reg_name = "_".join(strat_name_components[-_:]) - if reg_name in self.regressors: + if isinstance(self.regressors, dict) and reg_name in self.regressors: self._regressor_dct = self.regressors[reg_name] return self._regressor_dct raise key_error def set_data( self, - resource, - node, - output, - json_info, - pipe_idx, - node_name, - fork=False, - inject=False, - ): + resource: str, + node: pe.Node | pe.Workflow, + output: str, + json_info: dict, + pipe_idx: PIPE_IDX, + node_name: str, + fork: bool = False, + inject: bool = False, + ) -> None: + """Plug a Resource into a _Pool.""" json_info = json_info.copy() - cpac_prov = [] + cpac_prov: LIST_OF_LIST_OF_STR = [] if "CpacProvenance" in json_info: cpac_prov = json_info["CpacProvenance"] current_prov_list = list(cpac_prov) @@ -671,7 +661,7 @@ def set_data( if not inject: new_prov_list.append(f"{resource}:{node_name}") try: - _res, new_pipe_idx = generate_prov_string(new_prov_list) + _resource, new_pipe_idx = self.generate_prov_string(new_prov_list) except IndexError: msg = ( f"\n\nThe set_data() call for {resource} has no " @@ -693,7 +683,7 @@ def set_data( search = False if self.get_resource_from_prov(current_prov_list) == resource: # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION - pipe_idx = generate_prov_string(current_prov_list)[1] + pipe_idx = self.generate_prov_string(current_prov_list)[1] if pipe_idx not in self.rpool[resource].keys(): search = True else: @@ -703,7 +693,7 @@ def set_data( if self.get_resource_from_prov(idx) == resource: if isinstance(idx, list): # CHANGING PIPE_IDX, BE CAREFUL DOWNSTREAM IN THIS FUNCTION - pipe_idx = generate_prov_string(idx)[1] + pipe_idx = self.generate_prov_string(idx)[1] elif isinstance(idx, str): pipe_idx = idx break @@ -712,35 +702,37 @@ def set_data( # remove old keys so we don't end up with a new strat for every new node unit (unless we fork) del self.rpool[resource][pipe_idx] if new_pipe_idx not in self.rpool[resource]: - self.rpool[resource][new_pipe_idx] = {} + self.rpool[resource][new_pipe_idx] = Resource( + data=ResourceData(node, output), json=json_info + ) if new_pipe_idx not in self.pipe_list: self.pipe_list.append(new_pipe_idx) - self.rpool[resource][new_pipe_idx]["data"] = (node, output) - self.rpool[resource][new_pipe_idx]["json"] = json_info - - @overload - def _pool_get( - self: "ResourcePool", - resource: list[str] | str, - pipe_idx: Optional[str], - report_fetched: bool, - optional: bool, - ) -> Optional[dict[dict]] | tuple[Optional[dict[dict]], Optional[str]]: ... - @overload - def _pool_get( - self: "StratPool", - resource: list[str] | str, - pipe_idx: Optional[str], - report_fetched: bool, - optional: bool, - ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ... - def _pool_get( + # @overload + # def get( + # self: "ResourcePool", + # resource: list[str] | str, + # pipe_idx: Optional[PIPE_IDX], + # report_fetched: bool, + # optional: bool, + # ) -> Optional[dict[dict]] | tuple[Optional[dict[dict]], Optional[str]]: ... + # @overload + # def get( + # self: "StratPool", + # resource: list[str] | str, + # pipe_idx: Optional[PIPE_IDX], + # report_fetched: bool, + # optional: bool, + # ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ... + def get( self, resource: list[str] | str, - pipe_idx: Optional[str], + pipe_idx: Optional[PIPE_IDX], report_fetched: bool, optional: bool, + ) -> ( + Optional[Resource | STRAT_DICT] + | tuple[Optional[Resource | STRAT_DICT], Optional[str]] ): """Return a dictionary of strats or a single Resource.""" if not isinstance(resource, list): @@ -774,52 +766,6 @@ def _pool_get( ) raise LookupError(msg) - @overload - def get_data( - self, - resource: str, - pipe_idx: Optional[list | str | tuple] = None, - report_fetched: Literal[True] = True, - quick_single: bool = False, - ) -> tuple[dict, str]: ... - @overload - def get_data( - self, - resource: str, - pipe_idx: Optional[list | str | tuple] = None, - report_fetched: Literal[False] = False, - quick_single: bool = False, - ) -> dict: ... - @overload - def get_data( - self, - resource: str, - pipe_idx: Optional[list | str | tuple] = None, - report_fetched: bool = False, - quick_single: bool = False, - ) -> tuple[dict, str] | dict: ... - def get_data( - self, - resource: str, - pipe_idx: Optional[list | str | tuple] = None, - report_fetched: bool = False, - quick_single: bool = False, - ): - if report_fetched: - if pipe_idx: - connect, fetched = self.get( - resource, pipe_idx=pipe_idx, report_fetched=report_fetched - ) - return (connect["data"], fetched) - connect, fetched = self.get(resource, report_fetched=report_fetched) - return (connect["data"], fetched) - if pipe_idx: - return self.get(resource, pipe_idx=pipe_idx)["data"] - if quick_single or len(self.get(resource)) == 1: - for _key, val in self.get(resource).items(): - return val["data"] - return self.get(resource)["data"] - def copy_resource(self, resource, new_name): try: self.rpool[new_name] = self.rpool[resource] @@ -923,7 +869,7 @@ def derivative_xfm(self, wf, label, connection, json_info, pipe_idx, pipe_x): if isinstance(entry, list): if entry[-1].split(":")[0] == xfm_label: xfm_prov = entry - xfm_idx = generate_prov_string(xfm_prov)[1] + xfm_idx = self.generate_prov_string(xfm_prov)[1] break # but if the resource doesn't have the bold-to-template transform @@ -966,7 +912,7 @@ def derivative_xfm(self, wf, label, connection, json_info, pipe_idx, pipe_x): ) new_prov = json_info["CpacProvenance"] + xfm_prov json_info["CpacProvenance"] = new_prov - new_pipe_idx = generate_prov_string(new_prov) + new_pipe_idx = self.generate_prov_string(new_prov) self.set_data( label, xfm, @@ -1050,7 +996,7 @@ def post_process(self, wf, label, connection, json_info, pipe_idx, pipe_x, outs) if isinstance(entry, list): if entry[-1].split(":")[0] == mask: mask_prov = entry - mask_idx = generate_prov_string(mask_prov)[1] + mask_idx = self.generate_prov_string(mask_prov)[1] break if self.smoothing_bool: @@ -1171,6 +1117,171 @@ def post_process(self, wf, label, connection, json_info, pipe_idx, pipe_x, outs) return (wf, post_labels) + def node_data(self, resource: str | tuple[str], **kwargs) -> ResourceData: + """Create ResourceData objects.""" + return ResourceData(*self.get_data(resource, **kwargs)) # type: ignore[attr-defined] + + +class ResourcePool(_Pool): + """A pool of Resources.""" + + def __init__( + self, + name: str = "", + cfg: Optional[Configuration] = None, + pipe_list: Optional[list] = None, + *, + data_paths: Optional[DataPaths | dict] = None, + part_id: Optional[str] = None, + pipeline_name: str = "", + wf: Optional[pe.Workflow] = None, + ) -> None: + """Initialize a ResourcePool.""" + super().__init__(name=name) + if isinstance(data_paths, dict): + data_paths = DataPaths(data_paths=data_paths) + elif not data_paths: + data_paths = DataPaths(part_id=part_id) + self.data_paths = data_paths + # pass-through for convenient access + self.creds_path = self.data_paths.creds_path + self.part_id = self.data_paths.part_id + self.ses_id = self.data_paths.ses_id + self.unique_id = self.data_paths.unique_id + self.rpool: POOL_DICT = {} + + if not pipe_list: + self.pipe_list = [] + else: + self.pipe_list = pipe_list + + if cfg: + self.cfg = cfg + else: + self.cfg = Preconfiguration("blank") + + self.logdir = self._config_lookup(["pipeline_setup", "log_directory", "path"]) + self.num_cpus = self._config_lookup( + ["pipeline_setup", "system_config", "max_cores_per_participant"] + ) + self.num_ants_cores = self._config_lookup( + ["pipeline_setup", "system_config", "num_ants_threads"] + ) + + self.ants_interp = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "ANTs_pipelines", + "interpolation", + ] + ) + self.fsl_interp = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "FNIRT_pipelines", + "interpolation", + ] + ) + self.func_reg = self._config_lookup( + [ + "registration_workflows", + "functional_registration", + "func_registration_to_template", + "run", + ] + ) + + self.run_smoothing = "smoothed" in self._config_lookup( + ["post_processing", "spatial_smoothing", "output"], list + ) + self.smoothing_bool = self._config_lookup( + ["post_processing", "spatial_smoothing", "run"] + ) + self.run_zscoring = "z-scored" in self._config_lookup( + ["post_processing", "z-scoring", "output"], list + ) + self.zscoring_bool = self._config_lookup( + ["post_processing", "z-scoring", "run"] + ) + self.fwhm = self._config_lookup( + ["post_processing", "spatial_smoothing", "fwhm"] + ) + self.smooth_opts = self._config_lookup( + ["post_processing", "spatial_smoothing", "smoothing_method"] + ) + + if wf: + self.wf = wf + else: + self.initialize_nipype_wf(pipeline_name) + + self.xfm = [ + "alff", + "desc-sm_alff", + "desc-zstd_alff", + "desc-sm-zstd_alff", + "falff", + "desc-sm_falff", + "desc-zstd_falff", + "desc-sm-zstd_falff", + "reho", + "desc-sm_reho", + "desc-zstd_reho", + "desc-sm-zstd_reho", + ] + ingress_derivatives = False + try: + if self.data_paths.derivatives_dir and self._config_lookup( + ["pipeline_setup", "outdir_ingress", "run"], bool + ): + ingress_derivatives = True + except (AttributeError, KeyError, TypeError): + pass + if ingress_derivatives: + self.ingress_output_dir() + else: + self.ingress_raw_anat_data() + if data_paths.func: + self.ingress_raw_func_data() + self.ingress_pipeconfig_paths() + + def back_propogate_template_name( + self, resource_idx: str, json_info: dict, id_string: "pe.Node" + ) -> None: + """Find and apply the template name from a resource's provenance.""" + if "template" in resource_idx and self.check_rpool("derivatives-dir"): + if self.check_rpool("template"): + node, out = self.get_data("template") + self.wf.connect(node, out, id_string, "template_desc") + elif "Template" in json_info: + id_string.inputs.template_desc = json_info["Template"] + elif ( + "template" in resource_idx and len(json_info.get("CpacProvenance", [])) > 1 + ): + for resource in source_set(json_info["CpacProvenance"]): + source, value = resource.split(":", 1) + if value.startswith("template_") and source != "FSL-AFNI-bold-ref": + # 'FSL-AFNI-bold-ref' is currently allowed to be in + # a different space, so don't use it as the space for + # descendents + try: + ancestors = self.rpool.get(source) + assert ancestors is not None + ancestor: dict = next(iter(ancestors.items()))[1] + anscestor_json: dict = ancestor.get("json", {}) + if "Description" in anscestor_json: + id_string.inputs.template_desc = anscestor_json[ + "Description" + ] + return + except (IndexError, KeyError): + pass + return + def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): excl = [] substring_excl = [] @@ -1502,143 +1613,11 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): wf.connect(write_json, "json_file", ds, f'{out_dct["subdir"]}.@json') outputs_logger.info(expected_outputs) - def node_data(self, resource: str | tuple[str], **kwargs) -> ResourceData: - """Create ResourceData objects.""" - return ResourceData(*self.get_data(resource, **kwargs)) - - -class ResourcePool(_Pool): - """A pool of Resources.""" - - def __init__( - self, - name: str = "", - cfg: Optional[Configuration] = None, - pipe_list: Optional[list] = None, - *, - data_paths: Optional[DataPaths | dict] = None, - part_id: Optional[str] = None, - pipeline_name: str = "", - wf: Optional[pe.Workflow] = None, - ) -> None: - """Initialize a ResourcePool.""" - super().__init__(name=name) - if isinstance(data_paths, dict): - data_paths = DataPaths(data_paths=data_paths) - elif not data_paths: - data_paths = DataPaths(part_id=part_id) - self.data_paths = data_paths - # pass-through for convenient access - self.creds_path = self.data_paths.creds_path - self.part_id = self.data_paths.part_id - self.ses_id = self.data_paths.ses_id - self.unique_id = self.data_paths.unique_id - self.rpool = {} - - if not pipe_list: - self.pipe_list = [] - else: - self.pipe_list = pipe_list - - if cfg: - self.cfg = cfg - else: - self.cfg = Preconfiguration("blank") - - self.logdir = self._config_lookup(["pipeline_setup", "log_directory", "path"]) - self.num_cpus = self._config_lookup( - ["pipeline_setup", "system_config", "max_cores_per_participant"] - ) - self.num_ants_cores = self._config_lookup( - ["pipeline_setup", "system_config", "num_ants_threads"] - ) - - self.ants_interp = self._config_lookup( - [ - "registration_workflows", - "functional_registration", - "func_registration_to_template", - "ANTs_pipelines", - "interpolation", - ] - ) - self.fsl_interp = self._config_lookup( - [ - "registration_workflows", - "functional_registration", - "func_registration_to_template", - "FNIRT_pipelines", - "interpolation", - ] - ) - self.func_reg = self._config_lookup( - [ - "registration_workflows", - "functional_registration", - "func_registration_to_template", - "run", - ] - ) - - self.run_smoothing = "smoothed" in self._config_lookup( - ["post_processing", "spatial_smoothing", "output"], list - ) - self.smoothing_bool = self._config_lookup( - ["post_processing", "spatial_smoothing", "run"] - ) - self.run_zscoring = "z-scored" in self._config_lookup( - ["post_processing", "z-scoring", "output"], list - ) - self.zscoring_bool = self._config_lookup( - ["post_processing", "z-scoring", "run"] - ) - self.fwhm = self._config_lookup( - ["post_processing", "spatial_smoothing", "fwhm"] - ) - self.smooth_opts = self._config_lookup( - ["post_processing", "spatial_smoothing", "smoothing_method"] - ) - - if wf: - self.wf = wf - else: - self.initialize_nipype_wf(pipeline_name) - - self.xfm = [ - "alff", - "desc-sm_alff", - "desc-zstd_alff", - "desc-sm-zstd_alff", - "falff", - "desc-sm_falff", - "desc-zstd_falff", - "desc-sm-zstd_falff", - "reho", - "desc-sm_reho", - "desc-zstd_reho", - "desc-sm-zstd_reho", - ] - ingress_derivatives = False - try: - if self.data_paths.derivatives_dir and self._config_lookup( - ["pipeline_setup", "outdir_ingress", "run"], bool - ): - ingress_derivatives = True - except (AttributeError, KeyError, TypeError): - pass - if ingress_derivatives: - self.ingress_output_dir() - else: - self.ingress_raw_anat_data() - if data_paths.func: - self.ingress_raw_func_data() - self.ingress_pipeconfig_paths() - @overload def get( self, resource: list[str] | str, - pipe_idx: Optional[str] = None, + pipe_idx: None = None, report_fetched: Literal[False] = False, optional: bool = False, ) -> Optional[STRAT_DICT]: ... @@ -1646,15 +1625,15 @@ def get( def get( self, resource: list[str] | str, - pipe_idx: Optional[str], - report_fetched: Literal[True], + pipe_idx: PIPE_IDX, + report_fetched: Literal[False] = False, optional: bool = False, - ) -> tuple[Optional[STRAT_DICT], Optional[str]]: ... + ) -> Optional[Resource]: ... @overload def get( self, resource: list[str] | str, - pipe_idx: Optional[str] = None, + pipe_idx: None = None, *, report_fetched: Literal[True], optional: bool = False, @@ -1663,14 +1642,25 @@ def get( def get( self, resource: list[str] | str, - pipe_idx: Optional[str] = None, + pipe_idx: PIPE_IDX, + report_fetched: Literal[True], + optional: Literal[False], + ) -> tuple[Optional[Resource], Optional[str]]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, report_fetched: bool = False, optional: bool = False, - ) -> Optional[STRAT_DICT] | tuple[STRAT_DICT, Optional[str]]: ... + ) -> ( + Optional[Resource | STRAT_DICT] + | tuple[Optional[Resource | STRAT_DICT], Optional[str]] + ): ... def get( self, resource: list[str] | str, - pipe_idx: Optional[str] = None, + pipe_idx: Optional[PIPE_IDX] = None, report_fetched: bool = False, optional: bool = False, ): @@ -1678,7 +1668,60 @@ def get( Inside those, are dictionaries like ``{'data': (node, out), 'json': info}``. """ - return self._pool_get(resource, pipe_idx, report_fetched, optional) + return super().get(resource, pipe_idx, report_fetched, optional) + + @overload + def get_data( + self, + resource: list[str] | str, + pipe_idx: None = None, + report_fetched: bool = False, + quick_single: bool = False, + ) -> ResourceData: ... + @overload + def get_data( + self, + resource: list[str] | str, + pipe_idx: PIPE_IDX, + report_fetched: Literal[True], + quick_single: Literal[False] = False, + ) -> tuple[ResourceData, str]: ... + @overload + def get_data( + self, + resource: list[str] | str, + pipe_idx: PIPE_IDX, + report_fetched: Literal[False] = False, + quick_single: bool = False, + ) -> ResourceData: ... + @overload + def get_data( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX], + report_fetched: bool, + quick_single: Literal[True], + ) -> ResourceData: ... + def get_data( + self, + resource, + pipe_idx, + report_fetched, + quick_single, + ): + """Get ResourceData from ResourcePool.""" + _resource = self.get(resource, pipe_idx=pipe_idx, report_fetched=report_fetched) + if report_fetched: + if pipe_idx: + connect, fetched = _resource + assert isinstance(connect, Resource) and isinstance(fetched, str) + return connect.data, fetched + if quick_single or len(_resource) == 1: + assert isinstance(_resource, dict) + for value in _resource.values(): + return value.data + assert isinstance(_resource, Resource) + return _resource.data def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: # TODO: NOTE: NOT COMPATIBLE WITH SUB-RPOOL/STRAT_POOLS @@ -1709,7 +1752,7 @@ def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: resource_list.append(resource) total_pool = [] - variant_pool = {} + variant_pool: dict = {} len_inputs = len(resource_list) if debug: verbose_logger = getLogger("CPAC.engine") @@ -1727,6 +1770,7 @@ def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: if not rp_dct: len_inputs -= 1 continue + assert isinstance(rp_dct, dict) sub_pool = [] if debug: verbose_logger.debug("len(rp_dct): %s\n", len(rp_dct)) @@ -1787,7 +1831,7 @@ def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: json_dct = {} for strat in strat_list: # strat is a prov list for a single resource/input - strat_resource, strat_idx = generate_prov_string(strat) + strat_resource, strat_idx = self.generate_prov_string(strat) strat_json = self.get_json(strat_resource, strat=strat_idx) json_dct[strat_resource] = strat_json @@ -1797,11 +1841,11 @@ def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: if drop: break for xlabel in linked: - if drop: + if drop or xlabel is None: break xjson = copy.deepcopy(json_dct[xlabel]) for ylabel in linked: - if xlabel == ylabel: + if xlabel == ylabel or ylabel is None: continue yjson = copy.deepcopy(json_dct[ylabel]) @@ -1880,18 +1924,18 @@ def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: # now just invert resource:strat to strat:resource for each resource:strat for cpac_prov in strat_list: - resource, strat = generate_prov_string(cpac_prov) + resource, strat = self.generate_prov_string(cpac_prov) resource_strat_dct = self.rpool[resource][strat] - # remember, `resource_strat_dct` is the dct of 'data' and 'json'. + # remember, `resource_strat_dct` is a Resource. new_strats[pipe_idx].rpool[resource] = resource_strat_dct # `new_strats` is A DICTIONARY OF RESOURCEPOOL OBJECTS! each one is a new slice of the resource pool combined together. self.pipe_list.append(pipe_idx) if "CpacVariant" in resource_strat_dct["json"]: if "CpacVariant" not in new_strats[pipe_idx].rpool["json"]: new_strats[pipe_idx].rpool["json"]["CpacVariant"] = {} - for younger_resource, variant_list in resource_strat_dct[ - "json" - ]["CpacVariant"].items(): + _variant = new_strats[pipe_idx].rpool["json"]["CpacVariant"] + assert isinstance(_variant, dict) + for younger_resource, variant_list in _variant.items(): if ( younger_resource not in new_strats[pipe_idx].rpool["json"]["CpacVariant"] @@ -1911,9 +1955,9 @@ def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: for resource_strat_list in total_pool: # total_pool will have only one list of strats, for the one input for cpac_prov in resource_strat_list: # <------- cpac_prov here doesn't need to be modified, because it's not merging with other inputs - resource, pipe_idx = generate_prov_string(cpac_prov) + resource, pipe_idx = self.generate_prov_string(cpac_prov) resource_strat_dct = self.rpool[resource][pipe_idx] - # remember, `resource_strat_dct` is the dct of 'data' and 'json'. + # remember, `resource_strat_dct` is a Resource. new_strats[pipe_idx] = StratPool( rpool={resource: resource_strat_dct}, name=pipe_idx ) # <----- again, new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! @@ -1926,6 +1970,7 @@ def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: data_type = resource.split("_")[-1] if data_type not in new_strats[pipe_idx].rpool["json"]["subjson"]: new_strats[pipe_idx].rpool["json"]["subjson"][data_type] = {} + _json = new_strats[pipe_idx].rpool["json"] new_strats[pipe_idx].rpool["json"]["subjson"][data_type].update( copy.deepcopy(resource_strat_dct["json"]) ) @@ -1935,7 +1980,8 @@ def ingress_freesurfer(self) -> None: """Ingress FreeSurfer data.""" try: fs_path = os.path.join( - self.cfg.pipeline_setup["freesurfer_dir"], self.part_id + self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined] + self.part_id, ) except KeyError: WFLOGGER.warning("No FreeSurfer data present.") @@ -1945,19 +1991,21 @@ def ingress_freesurfer(self) -> None: if not os.path.exists(fs_path): if "sub" in self.part_id: fs_path = os.path.join( - self.cfg.pipeline_setup["freesurfer_dir"], + self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined] self.part_id.replace("sub-", ""), ) else: fs_path = os.path.join( - self.cfg.pipeline_setup["freesurfer_dir"], ("sub-" + self.part_id) + self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined] + ("sub-" + self.part_id), ) # patch for flo-specific data if not os.path.exists(fs_path): subj_ses = f"{self.part_id}-{self.ses_id}" fs_path = os.path.join( - self.cfg.pipeline_setup["freesurfer_dir"], subj_ses + self.cfg.pipeline_setup["freesurfer_dir"], # type: ignore[attr-defined] + subj_ses, ) if not os.path.exists(fs_path): WFLOGGER.info( @@ -1974,7 +2022,7 @@ def ingress_freesurfer(self) -> None: unique_id=self.unique_id, data=fs_path, creds_path=self.creds_path, - dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] ) self.set_data( "freesurfer-subject-dir", @@ -2018,7 +2066,7 @@ def ingress_freesurfer(self) -> None: unique_id=self.unique_id, data=fullpath, creds_path=self.creds_path, - dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] ) self.set_data( key, fs_ingress, "outputspec.data", {}, "", f"fs_{key}_ingress" @@ -2033,16 +2081,16 @@ def ingress_freesurfer(self) -> None: def ingress_output_dir(self) -> None: """Ingress an output directory into a ResourcePool.""" dir_path = self.data_paths.derivatives_dir - + assert dir_path is not None WFLOGGER.info("\nPulling outputs from %s.\n", dir_path) anat = os.path.join(dir_path, "anat") func = os.path.join(dir_path, "func") - outdir_anat = [] - outdir_func = [] - func_paths = {} - func_dict = {} + outdir_anat: list[str] = [] + outdir_func: list[str] = [] + func_paths: dict = {} + func_dict: dict = {} func_key = "" for subdir in [anat, func]: @@ -2061,7 +2109,7 @@ def ingress_output_dir(self) -> None: unique_id=self.unique_id, data=dir_path, creds_path=self.creds_path, - dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] ) self.set_data( "derivatives-dir", @@ -2072,8 +2120,8 @@ def ingress_output_dir(self) -> None: "outdir_config_ingress", ) - for subdir in [outdir_anat, outdir_func]: - for filepath in subdir: + for subdirs in [outdir_anat, outdir_func]: + for filepath in subdirs: filename = str(filepath) for ext in EXTS: filename = filename.split("/")[-1].replace(ext, "") @@ -2096,14 +2144,14 @@ def ingress_output_dir(self) -> None: data_label = data_label.replace(f"{tag}_", "") data_label, json = strip_template(data_label) - json_info, pipe_idx, node_name, data_label = json_outdir_ingress( + json_info, pipe_idx, node_name, data_label = self.json_outdir_ingress( filepath, data_label, json ) if ( "template" in data_label and not json_info["Template"] - == self.cfg.pipeline_setup["outdir_ingress"]["Template"] + == self.cfg.pipeline_setup["outdir_ingress"]["Template"] # type: ignore[attr-defined] ): continue # Rename confounds to avoid confusion in nuisance regression @@ -2133,7 +2181,7 @@ def ingress_output_dir(self) -> None: unique_id=self.unique_id, data=filepath, creds_path=self.creds_path, - dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] ) self.set_data( data_label, @@ -2141,8 +2189,7 @@ def ingress_output_dir(self) -> None: "outputspec.data", json_info, pipe_idx, - node_name, - f"outdir_{data_label}_ingress", + node_name=f"outdir_{data_label}_ingress", inject=True, ) else: @@ -2167,7 +2214,7 @@ def ingress_output_dir(self) -> None: func_paths, ) - if self.cfg.surface_analysis["freesurfer"]["ingress_reconall"]: + if self.cfg.surface_analysis["freesurfer"]["ingress_reconall"]: # type: ignore[attr-defined] self.ingress_freesurfer() def ingress_func_metadata( @@ -2193,7 +2240,7 @@ def ingress_func_metadata( gather_fmap.inputs.inputnode.set( subject=self.part_id, creds_path=self.creds_path, - dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] ) gather_fmap.inputs.inputnode.scan = orig_key @@ -2350,9 +2397,9 @@ def ingress_func_metadata( for idx, fmap_file in enumerate(fmap_TE_list, start=1): try: - node, out_file = self.get(fmap_file)[ - f"['{fmap_file}:fmap_TE_ingress']" - ]["data"] + node, out_file = self.get_data( + fmap_file, f"['{fmap_file}:fmap_TE_ingress']" + ) self.wf.connect( node, out_file, gather_echoes, f"echotime_{idx}" ) @@ -2393,11 +2440,11 @@ def ingress_func_metadata( ) scan_params.inputs.subject_id = self.part_id scan_params.inputs.set( - pipeconfig_start_indx=self.cfg.functional_preproc["truncation"]["start_tr"], - pipeconfig_stop_indx=self.cfg.functional_preproc["truncation"]["stop_tr"], + pipeconfig_start_indx=self.cfg.functional_preproc["truncation"]["start_tr"], # type: ignore[attr-defined] + pipeconfig_stop_indx=self.cfg.functional_preproc["truncation"]["stop_tr"], # type: ignore[attr-defined] ) - node, out = self.get("scan")["['scan:func_ingress']"]["data"] + node, out = self.get_data("scan", "['scan:func_ingress']") self.wf.connect(node, out, scan_params, "scan") # Workaround for extracting metadata with ingress @@ -2420,9 +2467,9 @@ def ingress_func_metadata( else: # wire in the scan parameter workflow - node, out = self.get("scan-params")["['scan-params:scan_params_ingress']"][ - "data" - ] + node, out = self.get_data( + "scan-params", "['scan-params:scan_params_ingress']" + ) self.wf.connect(node, out, scan_params, "data_config_scan_params") self.set_data("TR", scan_params, "tr", {}, "", "func_metadata_ingress") @@ -2452,9 +2499,9 @@ def ingress_func_metadata( "", "func_metadata_ingress", ) - node, out_file = self.get("effectiveEchoSpacing")[ - "['effectiveEchoSpacing:func_metadata_ingress']" - ]["data"] + node, out_file = self.get_data( + "effectiveEchoSpacing", "['effectiveEchoSpacing:func_metadata_ingress']" + ) self.wf.connect(node, out_file, calc_delta_ratio, "effective_echo_spacing") self.set_data( "deltaTE", calc_delta_ratio, "deltaTE", {}, "", "deltaTE_ingress" @@ -2639,7 +2686,7 @@ def func_outdir_ingress(self, func_dict: dict, key: str, func_paths: dict) -> No ingress.inputs.inputnode.set( subject=self.unique_id, creds_path=self.creds_path, - dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] ) self.set_data("subject", ingress, "outputspec.subject", {}, "", "func_ingress") ingress.get_node("inputnode").iterables = ("scan", list(func_dict.keys())) @@ -2683,7 +2730,7 @@ def func_outdir_ingress(self, func_dict: dict, key: str, func_paths: dict) -> No ingress_func.inputs.inputnode.set( unique_id=self.unique_id, creds_path=self.creds_path, - dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] ) self.wf.connect(iterables, "out_scan", ingress_func, "inputnode.scan") if key == mask_paths_key: @@ -2726,7 +2773,7 @@ def ingress_raw_anat_data(self) -> None: subject=self.part_id, anat=anat["T1"], creds_path=self.creds_path, - dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] img_type="anat", ) self.set_data("T1w", anat_flow, "outputspec.anat", {}, "", "anat_ingress") @@ -2739,14 +2786,14 @@ def ingress_raw_anat_data(self) -> None: subject=self.part_id, anat=self.data_paths.anat["T2w"], creds_path=self.creds_path, - dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], + dl_dir=self.cfg.pipeline_setup["working_directory"]["path"], # type: ignore[attr-defined] img_type="anat", ) self.set_data( "T2w", anat_flow_T2, "outputspec.anat", {}, "", "anat_ingress" ) - if self.cfg.surface_analysis["freesurfer"]["ingress_reconall"]: + if self.cfg.surface_analysis["freesurfer"]["ingress_reconall"]: # type: ignore[attr-defined] self.ingress_freesurfer() @@ -2755,16 +2802,16 @@ class StratPool(_Pool): def __init__( self, - rpool: Optional[dict[str | list | tuple, ResourcePool]] = None, + rpool: Optional[dict] = None, *, name: str = "", ) -> None: """Initialize a StratPool.""" super().__init__(name=name) if not rpool: - self.rpool = {} + self.rpool = STRAT_DICT({}) else: - self.rpool = rpool + self.rpool = STRAT_DICT(rpool) def append_name(self, name): self.name.append(name) @@ -2773,7 +2820,7 @@ def append_name(self, name): def get( self, resource: list[str] | str, - pipe_idx: Optional[str] = None, + pipe_idx: Optional[PIPE_IDX] = None, report_fetched: Literal[False] = False, optional: bool = False, ) -> Optional[Resource]: ... @@ -2781,7 +2828,7 @@ def get( def get( self, resource: list[str] | str, - pipe_idx: Optional[str], + pipe_idx: Optional[PIPE_IDX], report_fetched: Literal[True], optional: bool = False, ) -> tuple[Optional[Resource], Optional[str]]: ... @@ -2789,7 +2836,7 @@ def get( def get( self, resource: list[str] | str, - pipe_idx: Optional[str], + pipe_idx: Optional[PIPE_IDX] = None, *, report_fetched: Literal[True], optional: bool = False, @@ -2798,16 +2845,35 @@ def get( def get( self, resource: list[str] | str, - pipe_idx: Optional[str] = None, + pipe_idx: Optional[PIPE_IDX] = None, report_fetched: bool = False, optional: bool = False, ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ... def get( self, resource: list[str] | str, - pipe_idx: Optional[str] = None, + pipe_idx: Optional[PIPE_IDX] = None, report_fetched: bool = False, optional: bool = False, ): """Return a Resource.""" - return self._pool_get(resource, pipe_idx, report_fetched, optional) + return super().get(resource, pipe_idx, report_fetched, optional) + + @overload + def get_data( + self, resource: list[str] | str, report_fetched: Literal[True] + ) -> tuple[ResourceData, str]: ... + @overload + def get_data( + self, resource: list[str] | str, report_fetched: Literal[False] = False + ) -> ResourceData: ... + def get_data(self, resource, report_fetched): + """Get ResourceData from a StratPool.""" + _resource = self.get(resource, report_fetched=report_fetched) + if report_fetched: + assert isinstance(_resource, tuple) + connect, fetched = _resource + assert isinstance(connect, Resource) and isinstance(fetched, str) + return connect.data, fetched + assert isinstance(_resource, Resource) + return _resource.data diff --git a/CPAC/utils/typing.py b/CPAC/utils/typing.py new file mode 100644 index 0000000000..e93a851745 --- /dev/null +++ b/CPAC/utils/typing.py @@ -0,0 +1,20 @@ +# Copyright (C) 2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Type aliases for C-PAC.""" + +LIST_OF_LIST_OF_STR = str | list["LIST_OF_LIST_OF_STR"] +PIPE_IDX = list["PIPE_IDX"] | str | tuple["PIPE_IDX", ...] From 199bd60bad8e926d56333c673f6ffe56dae3cf83 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Fri, 12 Jul 2024 16:06:49 -0400 Subject: [PATCH 32/93] =?UTF-8?q?:white=5Fcheck=5Fmark:=20`pytest.Cache.ma?= =?UTF-8?q?kedir`=20=E2=86=92=20`pytest.Cache.mkdir`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CPAC/conftest.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/CPAC/conftest.py b/CPAC/conftest.py index b8f3512624..3bff185fba 100644 --- a/CPAC/conftest.py +++ b/CPAC/conftest.py @@ -22,10 +22,9 @@ @pytest.fixture -def bids_examples(cache) -> Path: +def bids_examples(cache: pytest.Cache) -> Path: """Get cached example BIDS directories.""" - example_dir = cache.makedir("bids-examples") - bids_dir = Path(example_dir / "bids-examples").absolute() + bids_dir = cache.mkdir("bids-examples").absolute() if not bids_dir.exists(): from git import Repo From ec32f34c50c477836b4e5e6d1a20b3442dc261f1 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Fri, 12 Jul 2024 20:52:33 -0400 Subject: [PATCH 33/93] :pencil2: Pipe inside quotation marks --- CPAC/pipeline/engine/resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index c6dcdca260..a2fd0f9aa6 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -73,7 +73,7 @@ EXTS = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"] POOL_DICT = dict[str | tuple, "STRAT_DICT"] -STRAT_DICT = dict[str | tuple, dict | "Resource"] +STRAT_DICT = dict[str | tuple, "dict | Resource"] class DataPaths: From b1f16ed20264cf6918a9a1b99251f306575827d8 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Fri, 12 Jul 2024 22:02:47 -0400 Subject: [PATCH 34/93] :art: Type `ResourcePool.get_strats` --- CPAC/alff/alff.py | 2 +- CPAC/anat_preproc/anat_preproc.py | 2 +- .../distortion_correction.py | 2 +- CPAC/func_preproc/func_motion.py | 2 +- CPAC/func_preproc/func_preproc.py | 4 +- .../longitudinal_workflow.py | 2 +- CPAC/network_centrality/pipeline.py | 2 +- CPAC/nuisance/nuisance.py | 2 +- CPAC/pipeline/engine/engine.py | 14 +++-- CPAC/pipeline/{ => engine}/nodeblock.py | 10 ++- CPAC/pipeline/engine/resource.py | 61 +++++++++---------- CPAC/qc/pipeline.py | 2 +- CPAC/qc/xcp.py | 2 +- CPAC/registration/registration.py | 2 +- CPAC/reho/reho.py | 2 +- CPAC/sca/sca.py | 2 +- CPAC/seg_preproc/seg_preproc.py | 2 +- CPAC/surface/surf_preproc.py | 2 +- CPAC/timeseries/timeseries_analysis.py | 2 +- CPAC/utils/tests/test_utils.py | 9 +-- CPAC/utils/typing.py | 8 ++- CPAC/vmhc/vmhc.py | 2 +- 22 files changed, 74 insertions(+), 64 deletions(-) rename CPAC/pipeline/{ => engine}/nodeblock.py (96%) diff --git a/CPAC/alff/alff.py b/CPAC/alff/alff.py index 4fe03cb2cc..dad32c7bcf 100644 --- a/CPAC/alff/alff.py +++ b/CPAC/alff/alff.py @@ -7,7 +7,7 @@ from CPAC.alff.utils import get_opt_string from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration.registration import apply_transform from CPAC.utils.utils import check_prov_for_regtool diff --git a/CPAC/anat_preproc/anat_preproc.py b/CPAC/anat_preproc/anat_preproc.py index 8e24b54b81..56f9f74678 100644 --- a/CPAC/anat_preproc/anat_preproc.py +++ b/CPAC/anat_preproc/anat_preproc.py @@ -35,7 +35,7 @@ wb_command, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.utils.interfaces.fsl import Merge as fslMerge diff --git a/CPAC/distortion_correction/distortion_correction.py b/CPAC/distortion_correction/distortion_correction.py index 79b8400bb1..df23874266 100644 --- a/CPAC/distortion_correction/distortion_correction.py +++ b/CPAC/distortion_correction/distortion_correction.py @@ -32,7 +32,7 @@ run_fsl_topup, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.utils import function from CPAC.utils.datasource import match_epi_fmaps from CPAC.utils.interfaces.function import Function diff --git a/CPAC/func_preproc/func_motion.py b/CPAC/func_preproc/func_motion.py index 21fdb86a50..10e73be2d0 100644 --- a/CPAC/func_preproc/func_motion.py +++ b/CPAC/func_preproc/func_motion.py @@ -31,7 +31,7 @@ motion_power_statistics, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.pipeline.schema import valid_options from CPAC.utils.interfaces.function import Function from CPAC.utils.utils import check_prov_for_motion_tool diff --git a/CPAC/func_preproc/func_preproc.py b/CPAC/func_preproc/func_preproc.py index 4d0fe73c9e..4196d6541a 100644 --- a/CPAC/func_preproc/func_preproc.py +++ b/CPAC/func_preproc/func_preproc.py @@ -22,7 +22,7 @@ from CPAC.func_preproc.utils import nullify from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.utils.interfaces.ants import ( AI, # niworkflows PrintHeader, @@ -992,7 +992,7 @@ def bold_mask_fsl_afni(wf, cfg, strat_pool, pipe_num, opt=None): # and this function has been changed. # CHANGES: - # * Converted from a plain function to a CPAC.pipeline.nodeblock.NodeBlockFunction + # * Converted from a plain function to a CPAC.pipeline.engine.nodeblock.NodeBlockFunction # * Removed Registration version check # * Hardcoded Registration parameters instead of loading epi_atlasbased_brainmask.json # * Uses C-PAC's ``FSL-AFNI-brain-probseg`` template in place of ``templateflow.api.get("MNI152NLin2009cAsym", resolution=1, label="brain", suffix="probseg")`` diff --git a/CPAC/longitudinal_pipeline/longitudinal_workflow.py b/CPAC/longitudinal_pipeline/longitudinal_workflow.py index aacbea6b8d..1d4e62a8f1 100644 --- a/CPAC/longitudinal_pipeline/longitudinal_workflow.py +++ b/CPAC/longitudinal_pipeline/longitudinal_workflow.py @@ -31,7 +31,7 @@ connect_pipeline, ) from CPAC.pipeline.engine import ResourcePool -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration import ( create_fsl_flirt_linear_reg, create_fsl_fnirt_nonlinear_reg, diff --git a/CPAC/network_centrality/pipeline.py b/CPAC/network_centrality/pipeline.py index e486f8eff0..407489fd9f 100644 --- a/CPAC/network_centrality/pipeline.py +++ b/CPAC/network_centrality/pipeline.py @@ -19,7 +19,7 @@ from CPAC.network_centrality.network_centrality import create_centrality_wf from CPAC.network_centrality.utils import check_centrality_params, create_merge_node from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.pipeline.schema import valid_options diff --git a/CPAC/nuisance/nuisance.py b/CPAC/nuisance/nuisance.py index c547ff6b01..943d5a3db8 100644 --- a/CPAC/nuisance/nuisance.py +++ b/CPAC/nuisance/nuisance.py @@ -38,7 +38,7 @@ ) from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.engine import ResourcePool -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration.registration import ( apply_transform, warp_timeseries_to_EPItemplate, diff --git a/CPAC/pipeline/engine/engine.py b/CPAC/pipeline/engine/engine.py index 4187476bf7..16ffed02cf 100644 --- a/CPAC/pipeline/engine/engine.py +++ b/CPAC/pipeline/engine/engine.py @@ -23,10 +23,12 @@ import logging import os -from nipype import config +from nipype import config # type: ignore [import-untyped] from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import NodeBlockFunction +from CPAC.pipeline.engine.nodeblock import NODEBLOCK_INPUTS, NodeBlockFunction +from CPAC.pipeline.engine.resource import ResourcePool +from CPAC.utils.configuration.configuration import Configuration from CPAC.utils.monitoring import ( getLogger, LOGTAIL, @@ -134,8 +136,8 @@ def grab_tiered_dct(self, cfg, key_list): raise KeyError(msg) from ke return cfg_dct - def connect_block(self, wf, cfg, rpool): - debug = cfg.pipeline_setup["Debugging"]["verbose"] + def connect_block(self, wf: pe.Workflow, cfg: Configuration, rpool: ResourcePool): + debug = bool(cfg.pipeline_setup["Debugging"]["verbose"]) # type: ignore [attr-defined] all_opts = [] for name, block_dct in self.node_blocks.items(): opts = [] @@ -210,7 +212,7 @@ def connect_block(self, wf, cfg, rpool): config = self.check_null(block_dct["config"]) option_key = self.check_null(block_dct["option_key"]) option_val = self.check_null(block_dct["option_val"]) - inputs = self.check_null(block_dct["inputs"]) + inputs: NODEBLOCK_INPUTS = self.check_null(block_dct["inputs"]) outputs = self.check_null(block_dct["outputs"]) block_function = block_dct["block_function"] @@ -329,7 +331,7 @@ def connect_block(self, wf, cfg, rpool): verbose_logger.debug("\n=======================") verbose_logger.debug("Node name: %s", node_name) prov_dct = rpool.get_resource_strats_from_prov( - ast.literal_eval(pipe_idx) + ast.literal_eval(str(pipe_idx)) ) for key, val in prov_dct.items(): verbose_logger.debug("-------------------") diff --git a/CPAC/pipeline/nodeblock.py b/CPAC/pipeline/engine/nodeblock.py similarity index 96% rename from CPAC/pipeline/nodeblock.py rename to CPAC/pipeline/engine/nodeblock.py index 53b9db1330..13a857cb55 100644 --- a/CPAC/pipeline/nodeblock.py +++ b/CPAC/pipeline/engine/nodeblock.py @@ -18,6 +18,8 @@ from typing import Any, Callable, Optional +NODEBLOCK_INPUTS = list[str | list | tuple] + class NodeBlockFunction: """Store a reference to the nodeblock function and all of its meta-data.""" @@ -30,7 +32,7 @@ def __init__( switch: Optional[list[str] | list[list[str]]] = None, option_key: Optional[str | list[str]] = None, option_val: Optional[str | list[str]] = None, - inputs: Optional[list[str | list | tuple]] = None, + inputs: Optional[NODEBLOCK_INPUTS] = None, outputs: Optional[list[str] | dict[str, Any]] = None, ) -> None: self.func = func @@ -55,7 +57,9 @@ def __init__( """ self.option_val: Optional[str | list[str]] = option_val """Indicates values for which this NodeBlock should be active.""" - self.inputs: Optional[list[str | list | tuple]] = inputs + if inputs is None: + inputs = [] + self.inputs: list[str | list | tuple] = inputs """ResourcePool keys indicating resources needed for the NodeBlock's functionality.""" self.outputs: Optional[list[str] | dict[str, Any]] = outputs """ @@ -138,7 +142,7 @@ def nodeblock( switch: Optional[list[str] | list[list[str]]] = None, option_key: Optional[str | list[str]] = None, option_val: Optional[str | list[str]] = None, - inputs: Optional[list[str | list | tuple]] = None, + inputs: Optional[NODEBLOCK_INPUTS] = None, outputs: Optional[list[str] | dict[str, Any]] = None, ): """ diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index a2fd0f9aa6..1810192c00 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -37,6 +37,7 @@ ) from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.check_outputs import ExpectedOutputs +from CPAC.pipeline.engine.nodeblock import NODEBLOCK_INPUTS from CPAC.pipeline.utils import MOVEMENT_FILTER_KEYS, name_fork, source_set from CPAC.registration.registration import transform_derivative from CPAC.resources.templates.lookup_table import lookup_identifier @@ -708,30 +709,14 @@ def set_data( if new_pipe_idx not in self.pipe_list: self.pipe_list.append(new_pipe_idx) - # @overload - # def get( - # self: "ResourcePool", - # resource: list[str] | str, - # pipe_idx: Optional[PIPE_IDX], - # report_fetched: bool, - # optional: bool, - # ) -> Optional[dict[dict]] | tuple[Optional[dict[dict]], Optional[str]]: ... - # @overload - # def get( - # self: "StratPool", - # resource: list[str] | str, - # pipe_idx: Optional[PIPE_IDX], - # report_fetched: bool, - # optional: bool, - # ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ... def get( self, - resource: list[str] | str, + resource: LIST_OF_LIST_OF_STR | str | list[str], pipe_idx: Optional[PIPE_IDX], report_fetched: bool, optional: bool, ) -> ( - Optional[Resource | STRAT_DICT] + Optional[Resource | STRAT_DICT | dict] | tuple[Optional[Resource | STRAT_DICT], Optional[str]] ): """Return a dictionary of strats or a single Resource.""" @@ -1271,8 +1256,10 @@ def back_propogate_template_name( try: ancestors = self.rpool.get(source) assert ancestors is not None - ancestor: dict = next(iter(ancestors.items()))[1] - anscestor_json: dict = ancestor.get("json", {}) + ancestor = next(iter(ancestors.items()))[1] + assert not isinstance(ancestor, dict) + anscestor_json = ancestor.json + assert isinstance(anscestor_json, dict) if "Description" in anscestor_json: id_string.inputs.template_desc = anscestor_json[ "Description" @@ -1616,7 +1603,7 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): @overload def get( self, - resource: list[str] | str, + resource: LIST_OF_LIST_OF_STR, pipe_idx: None = None, report_fetched: Literal[False] = False, optional: bool = False, @@ -1624,7 +1611,7 @@ def get( @overload def get( self, - resource: list[str] | str, + resource: LIST_OF_LIST_OF_STR, pipe_idx: PIPE_IDX, report_fetched: Literal[False] = False, optional: bool = False, @@ -1632,7 +1619,7 @@ def get( @overload def get( self, - resource: list[str] | str, + resource: LIST_OF_LIST_OF_STR, pipe_idx: None = None, *, report_fetched: Literal[True], @@ -1641,7 +1628,7 @@ def get( @overload def get( self, - resource: list[str] | str, + resource: LIST_OF_LIST_OF_STR, pipe_idx: PIPE_IDX, report_fetched: Literal[True], optional: Literal[False], @@ -1649,7 +1636,7 @@ def get( @overload def get( self, - resource: list[str] | str, + resource: LIST_OF_LIST_OF_STR, pipe_idx: Optional[PIPE_IDX] = None, report_fetched: bool = False, optional: bool = False, @@ -1659,7 +1646,7 @@ def get( ): ... def get( self, - resource: list[str] | str, + resource: LIST_OF_LIST_OF_STR, pipe_idx: Optional[PIPE_IDX] = None, report_fetched: bool = False, optional: bool = False, @@ -1723,26 +1710,30 @@ def get_data( assert isinstance(_resource, Resource) return _resource.data - def get_strats(self, resources, debug=False) -> dict[str | tuple, "StratPool"]: + def get_strats( + self, resources: NODEBLOCK_INPUTS, debug: bool = False + ) -> dict[str | tuple, "StratPool"]: + """Get a dictionary of StratPools.""" # TODO: NOTE: NOT COMPATIBLE WITH SUB-RPOOL/STRAT_POOLS # TODO: (and it doesn't have to be) import itertools linked_resources = [] - resource_list = [] + resource_list: list[str | list[str]] = [] if debug: verbose_logger = getLogger("CPAC.engine") verbose_logger.debug("\nresources: %s", resources) for resource in resources: # grab the linked-input tuples if isinstance(resource, tuple): - linked = [] + linked: list[str] = [] for label in list(resource): rp_dct, fetched_resource = self.get( label, report_fetched=True, optional=True ) if not rp_dct: continue + assert fetched_resource is not None linked.append(fetched_resource) resource_list += linked if len(linked) < 2: # noqa: PLR2004 @@ -2816,6 +2807,14 @@ def __init__( def append_name(self, name): self.name.append(name) + @overload + def get( + self, + resource: Literal["json"], + pipe_idx: None = None, + report_fetched: Literal[False] = False, + optional: Literal[False] = False, + ) -> dict: ... @overload def get( self, @@ -2823,7 +2822,7 @@ def get( pipe_idx: Optional[PIPE_IDX] = None, report_fetched: Literal[False] = False, optional: bool = False, - ) -> Optional[Resource]: ... + ) -> Optional[Resource] | dict: ... @overload def get( self, @@ -2848,7 +2847,7 @@ def get( pipe_idx: Optional[PIPE_IDX] = None, report_fetched: bool = False, optional: bool = False, - ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ... + ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]] | dict: ... def get( self, resource: list[str] | str, diff --git a/CPAC/qc/pipeline.py b/CPAC/qc/pipeline.py index 15d6b35e09..fd39ed5193 100644 --- a/CPAC/qc/pipeline.py +++ b/CPAC/qc/pipeline.py @@ -1,7 +1,7 @@ import pkg_resources as p from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.qc.qc import ( afni_Edge3, create_montage, diff --git a/CPAC/qc/xcp.py b/CPAC/qc/xcp.py index 95cb870430..43ddc03381 100644 --- a/CPAC/qc/xcp.py +++ b/CPAC/qc/xcp.py @@ -73,7 +73,7 @@ ImageTo1D, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.qc.qcmetrics import regisQ from CPAC.utils.interfaces.function import Function diff --git a/CPAC/registration/registration.py b/CPAC/registration/registration.py index b6cc9892ea..8f1bf980ac 100644 --- a/CPAC/registration/registration.py +++ b/CPAC/registration/registration.py @@ -26,7 +26,7 @@ from CPAC.anat_preproc.lesion_preproc import create_lesion_preproc from CPAC.func_preproc.utils import chunk_ts, split_ts_chunks from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration.utils import ( change_itk_transform_type, check_transforms, diff --git a/CPAC/reho/reho.py b/CPAC/reho/reho.py index 80e6599d10..43f2ba5f47 100644 --- a/CPAC/reho/reho.py +++ b/CPAC/reho/reho.py @@ -2,7 +2,7 @@ import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.reho.utils import * diff --git a/CPAC/sca/sca.py b/CPAC/sca/sca.py index 8e714dbd5f..db48c7f04b 100644 --- a/CPAC/sca/sca.py +++ b/CPAC/sca/sca.py @@ -18,7 +18,7 @@ from nipype.interfaces.afni import preprocess from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.sca.utils import * from CPAC.timeseries.timeseries_analysis import ( get_roi_timeseries, diff --git a/CPAC/seg_preproc/seg_preproc.py b/CPAC/seg_preproc/seg_preproc.py index a66990f1e6..d2e9a98f6d 100644 --- a/CPAC/seg_preproc/seg_preproc.py +++ b/CPAC/seg_preproc/seg_preproc.py @@ -3,7 +3,7 @@ from CPAC.anat_preproc.utils import mri_convert from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration.registration import apply_transform from CPAC.registration.utils import check_transforms, generate_inverse_transform_flags from CPAC.seg_preproc.utils import ( diff --git a/CPAC/surface/surf_preproc.py b/CPAC/surface/surf_preproc.py index 2229e24b5a..f826d4b564 100644 --- a/CPAC/surface/surf_preproc.py +++ b/CPAC/surface/surf_preproc.py @@ -3,7 +3,7 @@ import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.surface.PostFreeSurfer.surf_reho import run_surf_reho diff --git a/CPAC/timeseries/timeseries_analysis.py b/CPAC/timeseries/timeseries_analysis.py index 14547bc79b..8f8355a3d2 100644 --- a/CPAC/timeseries/timeseries_analysis.py +++ b/CPAC/timeseries/timeseries_analysis.py @@ -23,7 +23,7 @@ get_connectome_method, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.utils.datasource import ( create_roi_mask_dataflow, create_spatial_map_dataflow, diff --git a/CPAC/utils/tests/test_utils.py b/CPAC/utils/tests/test_utils.py index f9a4cb73e4..68befae270 100644 --- a/CPAC/utils/tests/test_utils.py +++ b/CPAC/utils/tests/test_utils.py @@ -6,7 +6,7 @@ import pytest from CPAC.func_preproc import get_motion_ref -from CPAC.pipeline.nodeblock import NodeBlockFunction +from CPAC.pipeline.engine.nodeblock import NodeBlockFunction from CPAC.utils.configuration import Configuration from CPAC.utils.monitoring.custom_logging import log_subprocess from CPAC.utils.utils import ( @@ -56,9 +56,10 @@ def _installation_check(command: str, flag: str) -> None: def test_check_config_resources(): """Test check_config_resources function.""" - with mock.patch.object(multiprocessing, "cpu_count", return_value=2), pytest.raises( - SystemError - ) as system_error: + with ( + mock.patch.object(multiprocessing, "cpu_count", return_value=2), + pytest.raises(SystemError) as system_error, + ): check_config_resources( Configuration( {"pipeline_setup": {"system_config": {"max_cores_per_participant": 10}}} diff --git a/CPAC/utils/typing.py b/CPAC/utils/typing.py index e93a851745..79197dd314 100644 --- a/CPAC/utils/typing.py +++ b/CPAC/utils/typing.py @@ -16,5 +16,9 @@ # License along with C-PAC. If not, see . """Type aliases for C-PAC.""" -LIST_OF_LIST_OF_STR = str | list["LIST_OF_LIST_OF_STR"] -PIPE_IDX = list["PIPE_IDX"] | str | tuple["PIPE_IDX", ...] +from typing import ForwardRef + +LIST_OF_LIST_OF_STR = str | list[ForwardRef("LIST_OF_LIST_OF_STR")] +# _PIPE_IDX = list[ForwardRef("PIPE_IDX")] | str | tuple[ForwardRef("PIPE_IDX"), ...] +# PIPE_IDX = TypeVar("PIPE_IDX", bound=_PIPE_IDX) +PIPE_IDX = list[str | tuple] | str | tuple diff --git a/CPAC/vmhc/vmhc.py b/CPAC/vmhc/vmhc.py index 3c547a8e2f..e09f156dfb 100644 --- a/CPAC/vmhc/vmhc.py +++ b/CPAC/vmhc/vmhc.py @@ -3,7 +3,7 @@ from CPAC.image_utils import spatial_smoothing from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.nodeblock import nodeblock +from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration.registration import apply_transform from CPAC.utils.utils import check_prov_for_regtool from CPAC.vmhc import * From 02d1a145335efe16c8048d31dfea0ea5911ab44a Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Fri, 12 Jul 2024 22:06:37 -0400 Subject: [PATCH 35/93] :white_check_mark: Remove dir instead of file --- CPAC/utils/tests/test_utils.py | 4 ++-- dev/circleci_data/test_external_utils.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CPAC/utils/tests/test_utils.py b/CPAC/utils/tests/test_utils.py index 68befae270..c972e712f9 100644 --- a/CPAC/utils/tests/test_utils.py +++ b/CPAC/utils/tests/test_utils.py @@ -30,8 +30,7 @@ def _installation_check(command: str, flag: str) -> None: - """Test that command is installed by running specified version or - help flag. + """Test that command is installed by running specified version or help flag. Parameters ---------- @@ -97,6 +96,7 @@ def test_NodeBlock_option_SSOT(): # pylint: disable=invalid-name def test_system_deps(): """Test system dependencies. + Raises an exception if dependencies are not met. """ check_system_deps(*([True] * 4)) diff --git a/dev/circleci_data/test_external_utils.py b/dev/circleci_data/test_external_utils.py index e747767032..5682f75d66 100644 --- a/dev/circleci_data/test_external_utils.py +++ b/dev/circleci_data/test_external_utils.py @@ -19,6 +19,7 @@ from logging import INFO import os from pathlib import Path +from shutil import rmtree import sys import click @@ -110,7 +111,7 @@ def test_new_settings_template(bids_examples, caplog, cli_runner): participant_yaml = DATA_DIR / "data_config_ds051.yml" group_yaml = DATA_DIR / "group_analysis_participants_ds051.txt" - os.remove(str(example_dir)) + rmtree(str(example_dir)) assert result.exit_code == 0 assert "\n".join(caplog.messages).startswith( "\nGenerating data configuration file.." From 1b1146282c45ce11601384c69097056f419e2bcf Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Fri, 12 Jul 2024 22:14:15 -0400 Subject: [PATCH 36/93] :technologist: Add `__repr__` method to `Resource` --- CPAC/pipeline/engine/resource.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 1810192c00..4a4687f230 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -221,6 +221,16 @@ def __getitem__(self, name: str) -> tuple[pe.Node, str | tuple[str]]: msg = f"Key '{name}' not set in {self}." raise KeyError(msg) + def __repr__(self) -> str: + """Return reproducible string for Resource.""" + positional = f"Resource(data={self.data}, json={self.json}" + kw = ", ".join( + f"{key}={getattr(self, key)}" + for key in self.keys() + if key not in ["data", "json"] + ) + return f"{positional}{kw})" + def __setitem__(self, name: str, value: Any) -> None: """Provide legacy dict-style set access.""" setattr(self, name, value) From 9ba084ecdfe3d6d1e48c0a8a3f575f3e8a8a8887 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Jul 2024 18:35:58 +0000 Subject: [PATCH 37/93] :arrow_up: Bump zipp from 3.16.0 to 3.19.1 Bumps [zipp](https://github.com/jaraco/zipp) from 3.16.0 to 3.19.1. - [Release notes](https://github.com/jaraco/zipp/releases) - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/zipp/compare/v3.16.0...v3.19.1) --- updated-dependencies: - dependency-name: zipp dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e6060c7f38..185f432729 100644 --- a/requirements.txt +++ b/requirements.txt @@ -48,4 +48,4 @@ pip==23.3 setuptools<60.0 urllib3==1.26.19 wheel==0.40.0 -zipp==3.16.0 +zipp==3.19.1 From bb6cbae1441c8f7372754e4eb25d86cf6239c14d Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 15 Jul 2024 10:01:42 -0400 Subject: [PATCH 38/93] :bug: Fix circular import --- CPAC/pipeline/engine/resource.py | 6 ++++-- CPAC/pipeline/utils.py | 7 +++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 4a4687f230..e57e723343 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -38,7 +38,7 @@ from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.check_outputs import ExpectedOutputs from CPAC.pipeline.engine.nodeblock import NODEBLOCK_INPUTS -from CPAC.pipeline.utils import MOVEMENT_FILTER_KEYS, name_fork, source_set +from CPAC.pipeline.utils import name_fork, source_set from CPAC.registration.registration import transform_derivative from CPAC.resources.templates.lookup_table import lookup_identifier from CPAC.utils.bids_utils import res_in_filename @@ -1280,6 +1280,8 @@ def back_propogate_template_name( return def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): + from CPAC.func_preproc.func_motion import motion_estimate_filter + excl = [] substring_excl = [] outputs_logger = getLogger(f"{self.part_id}_expectedOutputs") @@ -1397,7 +1399,7 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): key for json_info in all_jsons for key in json_info.get("CpacVariant", {}).keys() - if key not in (*MOVEMENT_FILTER_KEYS, "regressors") + if key not in (*motion_estimate_filter.outputs, "regressors") } if "bold" in unlabelled: all_bolds = list( diff --git a/CPAC/pipeline/utils.py b/CPAC/pipeline/utils.py index 39acb6429f..7b1dbaffff 100644 --- a/CPAC/pipeline/utils.py +++ b/CPAC/pipeline/utils.py @@ -18,11 +18,8 @@ from itertools import chain -from CPAC.func_preproc.func_motion import motion_estimate_filter from CPAC.utils.bids_utils import insert_entity -MOVEMENT_FILTER_KEYS = motion_estimate_filter.outputs - def name_fork(resource_idx, cfg, json_info, out_dct): """Create and insert entities for forkpoints. @@ -43,6 +40,8 @@ def name_fork(resource_idx, cfg, json_info, out_dct): out_dct : dict """ + from CPAC.func_preproc.func_motion import motion_estimate_filter + if cfg.switch_is_on( [ "functional_preproc", @@ -54,7 +53,7 @@ def name_fork(resource_idx, cfg, json_info, out_dct): filt_value = None _motion_variant = { _key: json_info["CpacVariant"][_key] - for _key in MOVEMENT_FILTER_KEYS + for _key in motion_estimate_filter.outputs if _key in json_info.get("CpacVariant", {}) } if "unfiltered-" in resource_idx: From 1058c940c85df6ff41fd998e40a8a0282ccb717c Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 15 Jul 2024 11:55:46 -0400 Subject: [PATCH 39/93] :art: Define `_Pool.__contains__` and `Resource.__contains__` --- CPAC/pipeline/engine/resource.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index e57e723343..ceff964211 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -214,6 +214,10 @@ def keys(self) -> list[str]: """Return list of subscriptable keys.""" return list(self._keys) + def __contains__(self, item: Any) -> bool: + """Return True if item in self.keys(), False otherwise.""" + return item in self.keys() + def __getitem__(self, name: str) -> tuple[pe.Node, str | tuple[str]]: """Provide legacy dict-style get access.""" if name in self.keys(): @@ -549,6 +553,10 @@ def keys(self) -> KeysView: """Return rpool's keys.""" return self.rpool.keys() + def __contains__(self, key) -> bool: + """Return True if key in Pool, False otherwise.""" + return key in self.keys() + @staticmethod def get_raw_label(resource: str) -> str: """Remove ``desc-*`` label.""" @@ -1704,9 +1712,9 @@ def get_data( def get_data( self, resource, - pipe_idx, - report_fetched, - quick_single, + pipe_idx=None, + report_fetched=False, + quick_single=False, ): """Get ResourceData from ResourcePool.""" _resource = self.get(resource, pipe_idx=pipe_idx, report_fetched=report_fetched) @@ -2878,7 +2886,7 @@ def get_data( def get_data( self, resource: list[str] | str, report_fetched: Literal[False] = False ) -> ResourceData: ... - def get_data(self, resource, report_fetched): + def get_data(self, resource, report_fetched=False): """Get ResourceData from a StratPool.""" _resource = self.get(resource, report_fetched=report_fetched) if report_fetched: From aaa37a90bd992567ee3466ec27691107f8c2149a Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 15 Jul 2024 13:14:09 -0400 Subject: [PATCH 40/93] :recycle: Move `StratDict().rpool[json]` to `StratDict().json` --- CPAC/pipeline/engine/engine.py | 2 +- CPAC/pipeline/engine/resource.py | 198 ++++++++++++++++++++----------- 2 files changed, 128 insertions(+), 72 deletions(-) diff --git a/CPAC/pipeline/engine/engine.py b/CPAC/pipeline/engine/engine.py index 16ffed02cf..9c9540e18e 100644 --- a/CPAC/pipeline/engine/engine.py +++ b/CPAC/pipeline/engine/engine.py @@ -349,7 +349,7 @@ def connect_block(self, wf: pe.Workflow, cfg: Configuration, rpool: ResourcePool for label, connection in outs.items(): self.check_output(outputs, label, name) - new_json_info = copy.deepcopy(strat_pool.get("json")) + new_json_info = copy.deepcopy(strat_pool.json) # transfer over data-specific json info # for example, if the input data json is _bold and the output is also _bold diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index ceff964211..82d79e7d28 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -74,7 +74,7 @@ EXTS = [".nii", ".gz", ".mat", ".1D", ".txt", ".csv", ".rms", ".tsv"] POOL_DICT = dict[str | tuple, "STRAT_DICT"] -STRAT_DICT = dict[str | tuple, "dict | Resource"] +STRAT_DICT = dict[str | tuple, "Resource"] class DataPaths: @@ -201,7 +201,7 @@ class ResourceData(NamedTuple): class Resource: """A single Resource and its methods.""" - def __init__(self, data: tuple[pe.Node, str], json: dict | list) -> None: + def __init__(self, data: tuple[pe.Node, str], json: dict) -> None: """Initialize a Resource.""" self.data = ResourceData(*data) """Tuple of source Node and output key.""" @@ -218,7 +218,7 @@ def __contains__(self, item: Any) -> bool: """Return True if item in self.keys(), False otherwise.""" return item in self.keys() - def __getitem__(self, name: str) -> tuple[pe.Node, str | tuple[str]]: + def __getitem__(self, name: str) -> Any: """Provide legacy dict-style get access.""" if name in self.keys(): return getattr(self, name) @@ -784,18 +784,18 @@ def get_pipe_idxs(self, resource): return self.rpool[resource].keys() def get_json(self, resource, strat=None): - # NOTE: resource_strat_dct has to be entered properly by the developer + # NOTE: strat_resource has to be entered properly by the developer # it has to either be rpool[resource][strat] or strat_pool[resource] if strat: - resource_strat_dct = self.rpool[resource][strat] + strat_resource = self.rpool[resource][strat] else: # for strat_pools mainly, where there is no 'strat' key level - resource_strat_dct = self.rpool[resource] + strat_resource = self.rpool[resource] # TODO: the below hits the exception if you use get_cpac_provenance on # TODO: the main rpool (i.e. if strat=None) - if "json" in resource_strat_dct: - strat_json = resource_strat_dct["json"] + if "json" in strat_resource: + strat_json = strat_resource["json"] else: msg = ( "\n[!] Developer info: the JSON " @@ -806,7 +806,7 @@ def get_json(self, resource, strat=None): return strat_json def get_cpac_provenance(self, resource, strat=None): - # NOTE: resource_strat_dct has to be entered properly by the developer + # NOTE: strat_resource has to be entered properly by the developer # it has to either be rpool[resource][strat] or strat_pool[resource] if isinstance(resource, list): for _resource in resource: @@ -834,19 +834,19 @@ def get_resource_strats_from_prov(prov): # led to that one specific output: # {rpool entry}: {that entry's provenance} # {rpool entry}: {that entry's provenance} - resource_strat_dct = {} + strat_resource = {} if isinstance(prov, str): resource = prov.split(":")[0] - resource_strat_dct[resource] = prov + strat_resource[resource] = prov else: for spot, entry in enumerate(prov): if isinstance(entry, list): resource = entry[-1].split(":")[0] - resource_strat_dct[resource] = entry + strat_resource[resource] = entry elif isinstance(entry, str): resource = entry.split(":")[0] - resource_strat_dct[resource] = entry - return resource_strat_dct + strat_resource[resource] = entry + return strat_resource def flatten_prov(self, prov): if isinstance(prov, str): @@ -1274,10 +1274,7 @@ def back_propogate_template_name( try: ancestors = self.rpool.get(source) assert ancestors is not None - ancestor = next(iter(ancestors.items()))[1] - assert not isinstance(ancestor, dict) - anscestor_json = ancestor.json - assert isinstance(anscestor_json, dict) + anscestor_json = next(iter(ancestors.items()))[1].json if "Description" in anscestor_json: id_string.inputs.template_desc = anscestor_json[ "Description" @@ -1626,7 +1623,8 @@ def get( resource: LIST_OF_LIST_OF_STR, pipe_idx: None = None, report_fetched: Literal[False] = False, - optional: bool = False, + *, + optional: Literal[True], ) -> Optional[STRAT_DICT]: ... @overload def get( @@ -1634,7 +1632,8 @@ def get( resource: LIST_OF_LIST_OF_STR, pipe_idx: PIPE_IDX, report_fetched: Literal[False] = False, - optional: bool = False, + *, + optional: Literal[True], ) -> Optional[Resource]: ... @overload def get( @@ -1643,7 +1642,7 @@ def get( pipe_idx: None = None, *, report_fetched: Literal[True], - optional: bool = False, + optional: Literal[True], ) -> tuple[Optional[STRAT_DICT], Optional[str]]: ... @overload def get( @@ -1651,9 +1650,42 @@ def get( resource: LIST_OF_LIST_OF_STR, pipe_idx: PIPE_IDX, report_fetched: Literal[True], - optional: Literal[False], + optional: Literal[True], ) -> tuple[Optional[Resource], Optional[str]]: ... @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: None = None, + report_fetched: Literal[False] = False, + optional: Literal[False] = False, + ) -> STRAT_DICT: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: PIPE_IDX, + report_fetched: Literal[False] = False, + optional: Literal[False] = False, + ) -> Resource: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: None = None, + *, + report_fetched: Literal[True], + optional: bool = False, + ) -> tuple[Optional[STRAT_DICT], Optional[str]]: ... + @overload + def get( + self, + resource: LIST_OF_LIST_OF_STR, + pipe_idx: PIPE_IDX, + report_fetched: Literal[True], + optional: Literal[False] = False, + ) -> tuple[Resource, str]: ... + @overload def get( self, resource: LIST_OF_LIST_OF_STR, @@ -1842,9 +1874,9 @@ def get_strats( json_dct = {} for strat in strat_list: # strat is a prov list for a single resource/input - strat_resource, strat_idx = self.generate_prov_string(strat) - strat_json = self.get_json(strat_resource, strat=strat_idx) - json_dct[strat_resource] = strat_json + prov_resource, strat_idx = self.generate_prov_string(strat) + strat_json = self.get_json(prov_resource, strat=strat_idx) + json_dct[prov_resource] = strat_json drop = False if linked_resources: @@ -1927,39 +1959,36 @@ def get_strats( # strat_list is actually the merged CpacProvenance lists pipe_idx = str(strat_list) new_strats[pipe_idx] = StratPool(name=pipe_idx) - # new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! - # placing JSON info at one level higher only for copy convenience - new_strats[pipe_idx].rpool["json"] = {} - new_strats[pipe_idx].rpool["json"]["subjson"] = {} - new_strats[pipe_idx].rpool["json"]["CpacProvenance"] = strat_list + # new_strats is A DICTIONARY OF StratPool OBJECTS! + new_strats[pipe_idx].json = {"CpacProvenance": {strat_list}} # now just invert resource:strat to strat:resource for each resource:strat for cpac_prov in strat_list: resource, strat = self.generate_prov_string(cpac_prov) - resource_strat_dct = self.rpool[resource][strat] - # remember, `resource_strat_dct` is a Resource. - new_strats[pipe_idx].rpool[resource] = resource_strat_dct + strat_resource = self.rpool[resource][strat] + # remember, `strat_resource` is a Resource. + new_strats[pipe_idx].rpool[resource] = strat_resource # `new_strats` is A DICTIONARY OF RESOURCEPOOL OBJECTS! each one is a new slice of the resource pool combined together. self.pipe_list.append(pipe_idx) - if "CpacVariant" in resource_strat_dct["json"]: - if "CpacVariant" not in new_strats[pipe_idx].rpool["json"]: - new_strats[pipe_idx].rpool["json"]["CpacVariant"] = {} - _variant = new_strats[pipe_idx].rpool["json"]["CpacVariant"] + if "CpacVariant" in strat_resource["json"]: + if "CpacVariant" not in new_strats[pipe_idx].json: + new_strats[pipe_idx].json["CpacVariant"] = {} + _variant = new_strats[pipe_idx].json["CpacVariant"] assert isinstance(_variant, dict) for younger_resource, variant_list in _variant.items(): if ( younger_resource - not in new_strats[pipe_idx].rpool["json"]["CpacVariant"] + not in new_strats[pipe_idx].json["CpacVariant"] ): - new_strats[pipe_idx].rpool["json"]["CpacVariant"][ + new_strats[pipe_idx].json["CpacVariant"][ younger_resource ] = variant_list # preserve each input's JSON info also data_type = resource.split("_")[-1] - if data_type not in new_strats[pipe_idx].rpool["json"]["subjson"]: - new_strats[pipe_idx].rpool["json"]["subjson"][data_type] = {} - new_strats[pipe_idx].rpool["json"]["subjson"][data_type].update( - copy.deepcopy(resource_strat_dct["json"]) + if data_type not in new_strats[pipe_idx].json["subjson"]: + new_strats[pipe_idx].json["subjson"][data_type] = {} + new_strats[pipe_idx].json["subjson"][data_type].update( + copy.deepcopy(strat_resource["json"]) ) else: new_strats = {} @@ -1967,23 +1996,20 @@ def get_strats( # total_pool will have only one list of strats, for the one input for cpac_prov in resource_strat_list: # <------- cpac_prov here doesn't need to be modified, because it's not merging with other inputs resource, pipe_idx = self.generate_prov_string(cpac_prov) - resource_strat_dct = self.rpool[resource][pipe_idx] - # remember, `resource_strat_dct` is a Resource. + strat_resource = self.rpool[resource][pipe_idx] + # remember, `strat_resource` is a Resource. new_strats[pipe_idx] = StratPool( - rpool={resource: resource_strat_dct}, name=pipe_idx - ) # <----- again, new_strats is A DICTIONARY OF RESOURCEPOOL OBJECTS! - # placing JSON info at one level higher only for copy convenience - new_strats[pipe_idx].rpool["json"] = resource_strat_dct["json"] - # TODO: WARNING- THIS IS A LEVEL HIGHER THAN THE ORIGINAL 'JSON' FOR EASE OF ACCESS IN CONNECT_BLOCK WITH THE .GET(JSON) - new_strats[pipe_idx].rpool["json"]["subjson"] = {} - new_strats[pipe_idx].rpool["json"]["CpacProvenance"] = cpac_prov + rpool={resource: strat_resource}, name=pipe_idx + ) # <----- again, new_strats is A DICTIONARY OF StratPool OBJECTS! + new_strats[pipe_idx].json = strat_resource.json + new_strats[pipe_idx].json["subjson"] = {} + new_strats[pipe_idx].json["CpacProvenance"] = cpac_prov # preserve each input's JSON info also data_type = resource.split("_")[-1] - if data_type not in new_strats[pipe_idx].rpool["json"]["subjson"]: - new_strats[pipe_idx].rpool["json"]["subjson"][data_type] = {} - _json = new_strats[pipe_idx].rpool["json"] - new_strats[pipe_idx].rpool["json"]["subjson"][data_type].update( - copy.deepcopy(resource_strat_dct["json"]) + if data_type not in new_strats[pipe_idx].json["subjson"]: + new_strats[pipe_idx].json["subjson"][data_type] = {} + new_strats[pipe_idx].json["subjson"][data_type].update( + copy.deepcopy(strat_resource["json"]) ) return new_strats @@ -2823,33 +2849,27 @@ def __init__( self.rpool = STRAT_DICT({}) else: self.rpool = STRAT_DICT(rpool) + self._json: dict[str, dict] = {"subjson": {}} def append_name(self, name): self.name.append(name) - @overload - def get( - self, - resource: Literal["json"], - pipe_idx: None = None, - report_fetched: Literal[False] = False, - optional: Literal[False] = False, - ) -> dict: ... @overload def get( self, resource: list[str] | str, pipe_idx: Optional[PIPE_IDX] = None, report_fetched: Literal[False] = False, - optional: bool = False, - ) -> Optional[Resource] | dict: ... + *, + optional: Literal[True], + ) -> Optional[Resource]: ... @overload def get( self, resource: list[str] | str, pipe_idx: Optional[PIPE_IDX], report_fetched: Literal[True], - optional: bool = False, + optional: Literal[True], ) -> tuple[Optional[Resource], Optional[str]]: ... @overload def get( @@ -2858,8 +2878,34 @@ def get( pipe_idx: Optional[PIPE_IDX] = None, *, report_fetched: Literal[True], - optional: bool = False, - ) -> tuple[Optional[Resource], Optional[str]]: ... + optional: Literal[False], + ) -> tuple[Resource, str]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: bool = False, + *, + optional: Literal[True], + ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + report_fetched: Literal[False] = False, + optional: Literal[False] = False, + ) -> Resource: ... + @overload + def get( + self, + resource: list[str] | str, + pipe_idx: Optional[PIPE_IDX] = None, + *, + report_fetched: Literal[True], + optional: Literal[False] = False, + ) -> tuple[Resource, str]: ... @overload def get( self, @@ -2867,7 +2913,7 @@ def get( pipe_idx: Optional[PIPE_IDX] = None, report_fetched: bool = False, optional: bool = False, - ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]] | dict: ... + ) -> Optional[Resource] | tuple[Optional[Resource], Optional[str]]: ... def get( self, resource: list[str] | str, @@ -2896,3 +2942,13 @@ def get_data(self, resource, report_fetched=False): return connect.data, fetched assert isinstance(_resource, Resource) return _resource.data + + @property + def json(self) -> dict: + """Return strategy-specific JSON.""" + return self._json + + @json.setter + def json(self, strategy_json=dict) -> None: + """Update strategy-specific JSON.""" + self._json.update(strategy_json) From 4280fc63f67a7904da76ad7ccec56ad4e48018bb Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 15 Jul 2024 13:47:15 -0400 Subject: [PATCH 41/93] :recycle: Dedupe loop through `self.node_blocks.items()` --- CPAC/pipeline/engine/engine.py | 70 +++++++++++----------------------- 1 file changed, 23 insertions(+), 47 deletions(-) diff --git a/CPAC/pipeline/engine/engine.py b/CPAC/pipeline/engine/engine.py index 9c9540e18e..e71e2cefe1 100644 --- a/CPAC/pipeline/engine/engine.py +++ b/CPAC/pipeline/engine/engine.py @@ -138,12 +138,32 @@ def grab_tiered_dct(self, cfg, key_list): def connect_block(self, wf: pe.Workflow, cfg: Configuration, rpool: ResourcePool): debug = bool(cfg.pipeline_setup["Debugging"]["verbose"]) # type: ignore [attr-defined] - all_opts = [] + all_opts: list[str] = [] + + sidecar_additions = { + "CpacConfigHash": hashlib.sha1( + json.dumps(cfg.dict(), sort_keys=True).encode("utf-8") + ).hexdigest(), + "CpacConfig": cfg.dict(), + } + + if cfg["pipeline_setup"]["output_directory"].get("user_defined"): + sidecar_additions["UserDefined"] = cfg["pipeline_setup"][ + "output_directory" + ]["user_defined"] + for name, block_dct in self.node_blocks.items(): - opts = [] + # iterates over either the single node block in the sequence, or a list of node blocks within the list of node blocks, i.e. for option forking. + switch = self.check_null(block_dct["switch"]) config = self.check_null(block_dct["config"]) option_key = self.check_null(block_dct["option_key"]) option_val = self.check_null(block_dct["option_val"]) + inputs: NODEBLOCK_INPUTS = self.check_null(block_dct["inputs"]) + outputs = self.check_null(block_dct["outputs"]) + + block_function = block_dct["block_function"] + + opts = [] if option_key and option_val: if not isinstance(option_key, list): option_key = [option_key] @@ -190,51 +210,6 @@ def connect_block(self, wf: pe.Workflow, cfg: Configuration, rpool: ResourcePool option_val = option_config[-1] if option_val in self.grab_tiered_dct(cfg, key_list[:-1]): opts.append(option_val) - else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! - opts = [None] - all_opts += opts - - sidecar_additions = { - "CpacConfigHash": hashlib.sha1( - json.dumps(cfg.dict(), sort_keys=True).encode("utf-8") - ).hexdigest(), - "CpacConfig": cfg.dict(), - } - - if cfg["pipeline_setup"]["output_directory"].get("user_defined"): - sidecar_additions["UserDefined"] = cfg["pipeline_setup"][ - "output_directory" - ]["user_defined"] - - for name, block_dct in self.node_blocks.items(): - # iterates over either the single node block in the sequence, or a list of node blocks within the list of node blocks, i.e. for option forking. - switch = self.check_null(block_dct["switch"]) - config = self.check_null(block_dct["config"]) - option_key = self.check_null(block_dct["option_key"]) - option_val = self.check_null(block_dct["option_val"]) - inputs: NODEBLOCK_INPUTS = self.check_null(block_dct["inputs"]) - outputs = self.check_null(block_dct["outputs"]) - - block_function = block_dct["block_function"] - - opts = [] - if option_key and option_val: - if not isinstance(option_key, list): - option_key = [option_key] - if not isinstance(option_val, list): - option_val = [option_val] - if config: - key_list = config + option_key - else: - key_list = option_key - if "USER-DEFINED" in option_val: - # load custom config data into each 'opt' - opts = self.grab_tiered_dct(cfg, key_list) - else: - for option in option_val: - if option in self.grab_tiered_dct(cfg, key_list): - # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list - opts.append(option) else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! opts = [None] # THIS ALSO MEANS the multiple option-val's in docstring node blocks can be entered once in the entire node-block sequence, not in a list of multiples @@ -243,6 +218,7 @@ def connect_block(self, wf: pe.Workflow, cfg: Configuration, rpool: ResourcePool # block functions - opts will be empty for non-selected # options, and would waste the get_strats effort below continue + all_opts += opts if not switch: switch = [True] From c22db11a19b0f7e8888a7401d824bcaf86b6a6b2 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 15 Jul 2024 14:02:58 -0400 Subject: [PATCH 42/93] :bug: Remove extra curly braces --- CPAC/pipeline/engine/resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 82d79e7d28..1b021dcd16 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -1960,7 +1960,7 @@ def get_strats( pipe_idx = str(strat_list) new_strats[pipe_idx] = StratPool(name=pipe_idx) # new_strats is A DICTIONARY OF StratPool OBJECTS! - new_strats[pipe_idx].json = {"CpacProvenance": {strat_list}} + new_strats[pipe_idx].json = {"CpacProvenance": strat_list} # now just invert resource:strat to strat:resource for each resource:strat for cpac_prov in strat_list: From 90d96b1b2f6de269e6c1eace8a8b273c02d8e597 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 15 Jul 2024 14:10:55 -0400 Subject: [PATCH 43/93] :white_check_mark: Update test for new `Resource` class --- CPAC/resources/tests/test_templates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAC/resources/tests/test_templates.py b/CPAC/resources/tests/test_templates.py index 66e8c27262..0c70370f7f 100644 --- a/CPAC/resources/tests/test_templates.py +++ b/CPAC/resources/tests/test_templates.py @@ -46,7 +46,7 @@ def test_packaged_path_exists(pipeline): rpool = ResourcePool(cfg=Preconfiguration(pipeline), part_id="pytest") rpool.ingress_pipeconfig_paths() for resource in rpool.rpool.values(): - node = next(iter(resource.values())).get("data")[0] + node = next(iter(resource.values())).data[0] if hasattr(node.inputs, "template") and not node.inputs.template.startswith( "s3:" ): From e25479fd1a3fed2aa34e011b2cd06bb083998a77 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 15 Jul 2024 14:19:59 -0400 Subject: [PATCH 44/93] :bug: Fix auto-`quick_single` --- CPAC/pipeline/engine/resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 1b021dcd16..c4e849c146 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -1755,7 +1755,7 @@ def get_data( connect, fetched = _resource assert isinstance(connect, Resource) and isinstance(fetched, str) return connect.data, fetched - if quick_single or len(_resource) == 1: + if quick_single or len(resource) == 1: assert isinstance(_resource, dict) for value in _resource.values(): return value.data From faf8ab9587f969fe4ffb0bd958b629a2ca7b78fe Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 15 Jul 2024 14:44:58 -0400 Subject: [PATCH 45/93] :white_check_mark: Check if `bids-examples` is empty before moving on --- CPAC/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAC/conftest.py b/CPAC/conftest.py index 3bff185fba..52113ebd40 100644 --- a/CPAC/conftest.py +++ b/CPAC/conftest.py @@ -25,7 +25,7 @@ def bids_examples(cache: pytest.Cache) -> Path: """Get cached example BIDS directories.""" bids_dir = cache.mkdir("bids-examples").absolute() - if not bids_dir.exists(): + if not (bids_dir.exists() and list(bids_dir.iterdir())): from git import Repo Repo.clone_from("https://github.com/bids-standard/bids-examples.git", bids_dir) From 5ea19ef0285994a3e303ce4903911994a67f64b3 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 15 Jul 2024 15:33:11 -0400 Subject: [PATCH 46/93] :twisted_rightwards_arrow: Merge `develop` into `engine/(th)r(e)esources` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Squashed commit of the following: commit a40c44931df0ca753db61f5321159f282dd77f7c Merge: 6b6f7f447 9ba084ecd Author: Jon Clucas Date: Mon Jul 15 13:16:14 2024 -0400 ⬆️ Bump zipp from 3.16.0 to 3.19.1 (#2132) commit 9ba084ecdfe3d6d1e48c0a8a3f575f3e8a8a8887 Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue Jul 9 18:35:58 2024 +0000 :arrow_up: Bump zipp from 3.16.0 to 3.19.1 Bumps [zipp](https://github.com/jaraco/zipp) from 3.16.0 to 3.19.1. - [Release notes](https://github.com/jaraco/zipp/releases) - [Changelog](https://github.com/jaraco/zipp/blob/main/NEWS.rst) - [Commits](https://github.com/jaraco/zipp/compare/v3.16.0...v3.19.1) --- updated-dependencies: - dependency-name: zipp dependency-type: direct:production ... Signed-off-by: dependabot[bot] commit 6b6f7f447bdd3d0acb67b02f04417c10a2e870bb Merge: 5ce5d379f 979b0a9c7 Author: Jon Clucas Date: Fri Jul 12 13:27:18 2024 -0400 🐛 Fix `get_scan_params` (#2129) commit 979b0a9c715c8398b55134ca6d46db707800084c Author: Jon Clucas Date: Tue Jul 9 09:39:03 2024 -0400 :pencil2: Fix f-string missing `f` commit af65a2e850b1c3ed0438bf4284325ce88c44129f Author: Jon Clucas Date: Mon Jul 8 17:01:06 2024 -0400 :bug: Fix circular import commit f1943771af4bf1061f87dd79288fb464992e1b94 Author: Jon Clucas Date: Mon Jul 8 16:07:15 2024 -0400 :recycle: Exclusively use custom `Function` Nodes + :rotating_light: Lint commit c7819d1aefbb9ebd1b352c71b2f92cb980e907e1 Author: Jon Clucas Date: Mon Jul 8 14:16:53 2024 -0400 :art: Remove unnecessary initializations commit b013cccb49c05e4297a0b088727ebb41e99e8988 Author: Jon Clucas Date: Mon Jul 8 13:54:48 2024 -0400 :package: Init `Function` commit b19907a8aa8ca909db66de81c3b3b575b17a078a Author: Jon Clucas Date: Mon Jul 8 13:40:19 2024 -0400 :bug: Use C-PAC Function node commit 7d6f0eeb0a09894798ae7b0578ec1ca0aba0c9a6 Author: Jon Clucas Date: Mon Jul 8 12:59:12 2024 -0400 :pencil2: Fix TR capitalization commit 3ebb9f4d8b790bb2eb8c9514be63c8642cd2f42b Author: Jon Clucas Date: Mon Jul 8 12:54:48 2024 -0400 :white_check_mark: Add tests for `fetch` refactor [rebuild base-lite] [rebuild base-standard] [run reg-suite] commit 6a5b7230209455f69bbbf05404e9a01a1eca6351 Author: Jon Clucas Date: Mon Jul 8 12:01:02 2024 -0400 :recycle: DRY params, sub, scan commit c5c39b08c38c6b50ebd6c2a4d5791022e199ad41 Author: Jon Clucas Date: Mon Jul 8 10:49:11 2024 -0400 :bug: Tell Nipype to import typehint type [run reg-suite] commit 52aa366f69523a02e2b609af83d18483351db3b0 Author: Jon Clucas Date: Mon Jul 8 10:05:09 2024 -0400 :recycle: DRY `fetch_and_convert`|`fetch`|`check`|`check2` [run reg-suite] commit ddf21035bd3bc86cb5eeac525122802217a1f908 Author: Jon Clucas Date: Sat Jul 6 00:27:06 2024 -0400 :construction: WIP :bug: Fix `get_scan_params` [run reg-suite] commit 17257e3fdc885dd14517991116db3a0901644ad3 Author: Jon Clucas Date: Fri Jul 5 14:26:27 2024 -0400 :recycle: Dedupe function node imports commit 74c09505a62b00dc2bb30ff425cca38ce9c5afa9 Author: Jon Clucas Date: Fri Jul 5 09:56:33 2024 -0400 :bug: Import local variables in `get_scan_params` [run reg-suite] commit e64309c108fa2c15395228592599fb77c3d8634e Author: Jon Clucas Date: Wed Jul 3 18:15:43 2024 -0400 :bug: Fix import (probably merge error) [run reg-suite] --- .ruff.toml | 1 + CPAC/alff/alff.py | 18 +- CPAC/alff/utils.py | 3 + CPAC/anat_preproc/anat_preproc.py | 30 +- CPAC/anat_preproc/lesion_preproc.py | 28 +- CPAC/anat_preproc/utils.py | 98 ++-- .../distortion_correction.py | 16 +- CPAC/distortion_correction/utils.py | 29 +- CPAC/easy_thresh/easy_thresh.py | 47 +- CPAC/func_preproc/func_motion.py | 2 +- CPAC/func_preproc/func_preproc.py | 5 +- CPAC/group_analysis/group_analysis.py | 33 +- .../longitudinal_preproc.py | 4 +- CPAC/median_angle/median_angle.py | 35 +- CPAC/nuisance/nuisance.py | 13 +- CPAC/nuisance/utils/utils.py | 2 +- CPAC/pipeline/engine/engine.py | 3 +- CPAC/randomise/randomise.py | 10 +- CPAC/registration/output_func_to_standard.py | 6 +- CPAC/registration/registration.py | 57 +-- CPAC/reho/reho.py | 19 +- CPAC/sca/sca.py | 20 +- CPAC/scrubbing/scrubbing.py | 49 +- CPAC/seg_preproc/seg_preproc.py | 38 +- CPAC/surface/surf_preproc.py | 39 +- CPAC/timeseries/timeseries_analysis.py | 8 +- CPAC/utils/interfaces/__init__.py | 20 + CPAC/utils/interfaces/function/seg_preproc.py | 23 +- CPAC/utils/tests/old_functions.py | 67 +++ CPAC/utils/tests/test_datasource.py | 20 +- CPAC/utils/tests/test_utils.py | 104 +++- CPAC/utils/utils.py | 457 ++++++++++-------- requirements.txt | 2 +- 33 files changed, 830 insertions(+), 476 deletions(-) create mode 100644 CPAC/utils/tests/old_functions.py diff --git a/.ruff.toml b/.ruff.toml index 059117f23b..590d3baa47 100644 --- a/.ruff.toml +++ b/.ruff.toml @@ -11,6 +11,7 @@ external = ["T20"] # Don't autoremove 'noqa` comments for these rules [lint.per-file-ignores] "CPAC/func_preproc/func_preproc.py" = ["E402"] "CPAC/utils/sklearn.py" = ["RUF003"] +"CPAC/utils/tests/old_functions.py" = ["C", "D", "E", "EM", "PLW", "RET"] "CPAC/utils/utils.py" = ["T201"] # until `repickle` is removed "dev/circleci_data/conftest.py" = ["F401"] "setup.py" = ["D1"] diff --git a/CPAC/alff/alff.py b/CPAC/alff/alff.py index dad32c7bcf..e26342ffb5 100644 --- a/CPAC/alff/alff.py +++ b/CPAC/alff/alff.py @@ -1,5 +1,20 @@ # -*- coding: utf-8 -*- +# Copyright (C) 2012-2024 C-PAC Developers +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os from nipype.interfaces.afni import preprocess @@ -9,6 +24,7 @@ from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.registration.registration import apply_transform +from CPAC.utils.interfaces import Function from CPAC.utils.utils import check_prov_for_regtool @@ -177,7 +193,7 @@ def create_alff(wf_name="alff_workflow"): wf.connect(input_node, "rest_res", bandpass, "in_file") get_option_string = pe.Node( - util.Function( + Function( input_names=["mask"], output_names=["option_string"], function=get_opt_string, diff --git a/CPAC/alff/utils.py b/CPAC/alff/utils.py index f89e0c8ca4..d7532373bf 100644 --- a/CPAC/alff/utils.py +++ b/CPAC/alff/utils.py @@ -3,7 +3,10 @@ from pathlib import Path +from CPAC.utils.interfaces.function import Function + +@Function.sig_imports(["from pathlib import Path"]) def get_opt_string(mask: Path | str) -> str: """ Return option string for 3dTstat. diff --git a/CPAC/anat_preproc/anat_preproc.py b/CPAC/anat_preproc/anat_preproc.py index 56f9f74678..5a6acd286e 100644 --- a/CPAC/anat_preproc/anat_preproc.py +++ b/CPAC/anat_preproc/anat_preproc.py @@ -15,7 +15,6 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -# from copy import deepcopy import os from nipype.interfaces import afni, ants, freesurfer, fsl @@ -36,6 +35,7 @@ ) from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.engine.nodeblock import nodeblock +from CPAC.utils.interfaces import Function from CPAC.utils.interfaces.fsl import Merge as fslMerge @@ -138,7 +138,7 @@ def acpc_alignment( aff_to_rig_imports = ["import os", "from numpy import *"] aff_to_rig = pe.Node( - util.Function( + Function( input_names=["in_xfm", "out_name"], output_names=["out_mat"], function=fsl_aff_to_rigid, @@ -319,7 +319,7 @@ def T1wmulT2w_brain_norm_s_string(sigma, in_file): return "-s %f -div %s" % (sigma, in_file) T1wmulT2w_brain_norm_s_string = pe.Node( - util.Function( + Function( input_names=["sigma", "in_file"], output_names=["out_str"], function=T1wmulT2w_brain_norm_s_string, @@ -378,7 +378,7 @@ def form_lower_string(mean, std): return "-thr %s -bin -ero -mul 255" % (lower) form_lower_string = pe.Node( - util.Function( + Function( input_names=["mean", "std"], output_names=["out_str"], function=form_lower_string, @@ -444,7 +444,7 @@ def file_to_a_list(infile_1, infile_2): return [infile_1, infile_2] file_to_a_list = pe.Node( - util.Function( + Function( input_names=["infile_1", "infile_2"], output_names=["out_list"], function=file_to_a_list, @@ -544,7 +544,7 @@ def afni_brain_connector(wf, cfg, strat_pool, pipe_num, opt): ) skullstrip_args = pe.Node( - util.Function( + Function( input_names=[ "spat_norm", "spat_norm_dxyz", @@ -762,7 +762,7 @@ def fsl_brain_connector(wf, cfg, strat_pool, pipe_num, opt): anat_robustfov.inputs.output_type = "NIFTI_GZ" anat_pad_RobustFOV_cropped = pe.Node( - util.Function( + Function( input_names=["cropped_image_path", "target_image_path"], output_names=["padded_image_path"], function=pad, @@ -902,7 +902,7 @@ def unet_brain_connector(wf, cfg, strat_pool, pipe_num, opt): from CPAC.unet.function import predict_volumes unet_mask = pe.Node( - util.Function( + Function( input_names=["model_path", "cimg_in"], output_names=["out_path"], function=predict_volumes, @@ -1083,7 +1083,7 @@ def freesurfer_brain_connector(wf, cfg, strat_pool, pipe_num, opt): # convert brain mask file from .mgz to .nii.gz fs_brain_mask_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=mri_convert ), name=f"fs_brainmask_to_nifti_{pipe_num}", @@ -1119,7 +1119,7 @@ def freesurfer_abcd_brain_connector(wf, cfg, strat_pool, pipe_num, opt): Ref: https://github.com/DCAN-Labs/DCAN-HCP/blob/7927754/PostFreeSurfer/PostFreeSurferPipeline.sh#L151-L156 """ wmparc_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file", "reslice_like", "args"], output_names=["out_file"], function=mri_convert, @@ -1130,7 +1130,7 @@ def freesurfer_abcd_brain_connector(wf, cfg, strat_pool, pipe_num, opt): # Register wmparc file if ingressing FreeSurfer data if strat_pool.check_rpool("pipeline-fs_xfm"): wmparc_to_native = pe.Node( - util.Function( + Function( input_names=["source_file", "target_file", "xfm", "out_file"], output_names=["transformed_file"], function=normalize_wmparc, @@ -1168,7 +1168,7 @@ def freesurfer_abcd_brain_connector(wf, cfg, strat_pool, pipe_num, opt): wf.connect(wmparc_to_nifti, "out_file", binary_mask, "in_file") wb_command_fill_holes = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=wb_command ), name=f"wb_command_fill_holes_{pipe_num}", @@ -1206,7 +1206,7 @@ def freesurfer_fsl_brain_connector(wf, cfg, strat_pool, pipe_num, opt): # mri_convert -it mgz ${SUBJECTS_DIR}/${subject}/mri/brainmask.mgz -ot nii brainmask.nii.gz convert_fs_brainmask_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=mri_convert ), name=f"convert_fs_brainmask_to_nifti_{node_id}", @@ -1217,7 +1217,7 @@ def freesurfer_fsl_brain_connector(wf, cfg, strat_pool, pipe_num, opt): # mri_convert -it mgz ${SUBJECTS_DIR}/${subject}/mri/T1.mgz -ot nii T1.nii.gz convert_fs_T1_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=mri_convert ), name=f"convert_fs_T1_to_nifti_{node_id}", @@ -2888,7 +2888,7 @@ def freesurfer_abcd_preproc(wf, cfg, strat_pool, pipe_num, opt=None): # fslmaths "$T1wImageFile"_1mm.nii.gz -div $Mean -mul 150 -abs "$T1wImageFile"_1mm.nii.gz normalize_head = pe.Node( - util.Function( + Function( input_names=["in_file", "number", "out_file_suffix"], output_names=["out_file"], function=fslmaths_command, diff --git a/CPAC/anat_preproc/lesion_preproc.py b/CPAC/anat_preproc/lesion_preproc.py index 2ef58c3d2a..07871ae32d 100644 --- a/CPAC/anat_preproc/lesion_preproc.py +++ b/CPAC/anat_preproc/lesion_preproc.py @@ -1,13 +1,30 @@ # -*- coding: utf-8 -*- +# Copyright (C) 2019-2023 C-PAC Developers +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . from nipype.interfaces import afni import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def inverse_lesion(lesion_path): - """ + """Replace non-zeroes with zeroes and zeroes with ones. + Check if the image contains more zeros than non-zeros, if so, replaces non-zeros by zeros and zeros by ones. @@ -38,13 +55,12 @@ def inverse_lesion(lesion_path): nii = nu.inverse_nifti_values(image=lesion_path) nib.save(nii, lesion_out) return lesion_out - else: - return lesion_out + return lesion_out def create_lesion_preproc(wf_name="lesion_preproc"): - """ - The main purpose of this workflow is to process lesions masks. + """Process lesions masks. + Lesion mask file is deobliqued and reoriented in the same way as the T1 in the anat_preproc function. @@ -95,7 +111,7 @@ def create_lesion_preproc(wf_name="lesion_preproc"): lesion_deoblique.inputs.deoblique = True lesion_inverted = pe.Node( - interface=util.Function( + interface=Function( input_names=["lesion_path"], output_names=["lesion_out"], function=inverse_lesion, diff --git a/CPAC/anat_preproc/utils.py b/CPAC/anat_preproc/utils.py index b3246fc41a..39904bbb66 100644 --- a/CPAC/anat_preproc/utils.py +++ b/CPAC/anat_preproc/utils.py @@ -1,73 +1,34 @@ # -*- coding: utf-8 -*- -from numpy import zeros -from nibabel import load as nib_load, Nifti1Image -import nipype.interfaces.utility as util - -from CPAC.pipeline import nipype_pipeline_engine as pe - - -def get_shape(nifti_image): - return nib_load(nifti_image).shape - - -def pad(cropped_image_path, target_image_path): - """ - Pad a cropped image to match the dimensions of a target image along the z-axis, - while keeping padded image aligned with target_image. - - Parameters - ---------- - - cropped_image_path (str): The file path to the cropped image (NIfTI format). - - target_image_path (str): The file path to the target image (NIfTI format). - - Returns - ------- - - str: The file path to the saved padded image (NIfTI format). +# Copyright (C) 2018-2023 C-PAC Developers - The function loads cropped and target iamges, calculates the z-dimension shift required for alignment such - that the mask generated from padded image will work correctly on the target image. The result padded image is - saved as an NIfTI file in the working directory/node and file path is returned as output. +# This file is part of C-PAC. - Note: The function assumes that the input images are in NIfTI format and have compatible dimensions. The cropped - and target image should only differ in z-axis dimension. - """ - from os import getcwd, path - from typing import Optional +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. - from numpy import asanyarray, ndarray, zeros_like - from nibabel import load, Nifti1Image, save +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. - cropped_image: Optional[ndarray] = asanyarray(load(cropped_image_path).dataobj) - target_image: Optional[ndarray] = asanyarray(load(target_image_path).dataobj) +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +import os - # Taking 1 slice to calculate the z dimension shift from top - center_row: int = target_image.shape[0] // 2 - center_column: int = target_image.shape[1] // 2 - z_slice_cropped_image: Optional[ndarray] = cropped_image[ - center_row, center_column, : - ] - z_slice_target_image: Optional[ndarray] = target_image[center_row, center_column, :] - - for z_shift in range(len(z_slice_target_image) - len(z_slice_cropped_image) + 1): - if ( - z_slice_target_image[z_shift : z_shift + len(z_slice_cropped_image)] - == z_slice_cropped_image - ).all(): - break +from numpy import * +from nibabel import load as nib_load +from nipype.interfaces.base import CommandLineInputSpec, File, TraitedSpec +import nipype.interfaces.utility as util +from nipype.interfaces.workbench.base import WBCommand - padded_image_matrix: Optional[ndarray] = zeros_like(target_image) - padded_image_matrix[:, :, z_shift : cropped_image.shape[2] + z_shift] = ( - cropped_image - ) - padded_image_path: str = path.join(getcwd(), "padded_image_T1w.nii.gz") - cropped_image = load(cropped_image_path) - save( - Nifti1Image(padded_image_matrix, affine=cropped_image.affine), padded_image_path - ) - return padded_image_path +from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def get_shape(nifti_image): + """Return the shape of a NIfTI image.""" return nib_load(nifti_image).shape @@ -286,7 +247,7 @@ def split_hemi(multi_file): def split_hemi_interface() -> util.Function: """Return a function interface for split_hemi.""" - return util.Function( + return Function( input_names=["multi_file"], output_names=["lh", "rh"], function=split_hemi ) @@ -587,12 +548,9 @@ def normalize_wmparc(source_file, target_file, xfm, out_file): return os.path.join(os.getcwd(), out_file) -"""This module provides interfaces for workbench -volume-remove-islands commands""" -from nipype.interfaces.base import CommandLineInputSpec, File, TraitedSpec -from nipype.interfaces.workbench.base import WBCommand - - class VolumeRemoveIslandsInputSpec(CommandLineInputSpec): + """InputSpec for workbench -volume-remove-islands commands.""" + in_file = File( exists=True, mandatory=True, @@ -610,14 +568,14 @@ class VolumeRemoveIslandsInputSpec(CommandLineInputSpec): class VolumeRemoveIslandsOutputSpec(TraitedSpec): + """OutputSpec for workbench -volume-remove-islands commands.""" + out_file = File(exists=True, desc="the output ROI volume") class VolumeRemoveIslands(WBCommand): - """ - workbench - -volume-remove-islands - REMOVE ISLANDS FROM AN ROI VOLUME + """Remove islandes from an ROI volume. + wb_command -volume-remove-islands - the input ROI volume - output - the output ROI volume. diff --git a/CPAC/distortion_correction/distortion_correction.py b/CPAC/distortion_correction/distortion_correction.py index df23874266..0b0454e7d3 100644 --- a/CPAC/distortion_correction/distortion_correction.py +++ b/CPAC/distortion_correction/distortion_correction.py @@ -131,7 +131,7 @@ def distcor_phasediff_fsl_fugue(wf, cfg, strat_pool, pipe_num, opt=None): == "AFNI" ): skullstrip_args = pe.Node( - util.Function( + Function( input_names=["shrink_fac"], output_names=["expr"], function=create_afni_arg, @@ -165,7 +165,7 @@ def distcor_phasediff_fsl_fugue(wf, cfg, strat_pool, pipe_num, opt=None): == "BET" ): bet = pe.Node( - interface=fsl.BET(), name="distcor_phasediff_bet_skullstrip_{pipe_num}" + interface=fsl.BET(), name=f"distcor_phasediff_bet_skullstrip_{pipe_num}" ) bet.inputs.output_type = "NIFTI_GZ" bet.inputs.frac = cfg.functional_preproc["distortion_correction"]["PhaseDiff"][ @@ -667,7 +667,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): "import sys", ] phase_encoding = pe.Node( - util.Function( + Function( input_names=[ "unwarp_dir", "phase_one", @@ -710,7 +710,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): topup_imports = ["import os", "import subprocess"] run_topup = pe.Node( - util.Function( + Function( input_names=["merged_file", "acqparams"], output_names=[ "out_fieldcoef", @@ -732,7 +732,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(phase_encoding, "acq_params", run_topup, "acqparams") choose_phase = pe.Node( - util.Function( + Function( input_names=["phase_imgs", "unwarp_dir"], output_names=["out_phase_image", "vnum"], function=choose_phase_image, @@ -746,7 +746,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(node, out, choose_phase, "unwarp_dir") vnum_base = pe.Node( - util.Function( + Function( input_names=[ "vnum", "motion_mat_list", @@ -797,7 +797,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): name = "PhaseTwo_aw" vnum_base_two = pe.Node( - util.Function( + Function( input_names=[ "vnum", "motion_mat_list", @@ -840,7 +840,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): name = "PhaseOne_aw" vnum_base_one = pe.Node( - util.Function( + Function( input_names=[ "vnum", "motion_mat_list", diff --git a/CPAC/distortion_correction/utils.py b/CPAC/distortion_correction/utils.py index 2b78dbfa4d..b76acba074 100644 --- a/CPAC/distortion_correction/utils.py +++ b/CPAC/distortion_correction/utils.py @@ -1,3 +1,19 @@ +# Copyright (C) 2021-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os import subprocess import sys @@ -12,6 +28,8 @@ import nipype.interfaces.utility as util from nipype.pipeline import engine as pe +from CPAC.utils.interfaces import Function + def run_HCP_gradient_unwarp(phase_vol, input_coeffs): import os @@ -49,7 +67,7 @@ def run_convertwarp(cw_trilinear, cw_fullWarp_abs): f"--warp1={cw_fullWarp_abs}", "--relout", f"--out={out_file}", - f"--j={jac_out}", + f"--j={out_jac}", ] subprocess.check_output(cmd) @@ -64,7 +82,7 @@ def gradient_distortion_correction(wf, inp_image, name): grad_unwarp_imports = ["import os", "import subprocess"] grad_unwarp = pe.Node( - util.Function( + Function( input_names=["phase_vol", "input_coeffs"], output_names=["trilinear", "abs_fullWarp"], function=run_HCP_gradient_unwarp, @@ -78,7 +96,7 @@ def gradient_distortion_correction(wf, inp_image, name): convertwarp_imports = ["import os", "import subprocess"] convert_warp = pe.Node( - util.Function( + Function( input_names=["cw_trilinear", "cw_fullWarp_abs"], output_names=["out_file_cw", "out_jac_cw"], function=run_convertwarp, @@ -248,8 +266,9 @@ def phase_encode( def z_pad(name="z_pad"): - """Pad in Z by one slice if odd so that topup does not complain - (slice consists of zeros that will be dilated by following step). + """Pad in Z by one slice if odd so that topup does not complain. + + (Slice consists of zeros that will be dilated by following step). """ wf = pe.Workflow(name=name) diff --git a/CPAC/easy_thresh/easy_thresh.py b/CPAC/easy_thresh/easy_thresh.py index d514d51c54..20918c08a9 100644 --- a/CPAC/easy_thresh/easy_thresh.py +++ b/CPAC/easy_thresh/easy_thresh.py @@ -1,3 +1,19 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import os import re import subprocess @@ -7,12 +23,11 @@ import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def easy_thresh(wf_name): - """ - Workflow for carrying out cluster-based thresholding - and colour activation overlaying. + """Carry out cluster-based thresholding and colour activation overlaying. Parameters ---------- @@ -213,7 +228,7 @@ def easy_thresh(wf_name): # or qform/sform info) from one image to another geo_imports = ["import subprocess"] copy_geometry = pe.MapNode( - util.Function( + Function( input_names=["infile_a", "infile_b"], output_names=["out_file"], function=copy_geom, @@ -246,7 +261,7 @@ def easy_thresh(wf_name): cluster_imports = ["import os", "import re", "import subprocess"] cluster = pe.MapNode( - util.Function( + Function( input_names=[ "in_file", "volume", @@ -271,7 +286,7 @@ def easy_thresh(wf_name): # create tuple of z_threshold and max intensity value of threshold file create_tuple = pe.MapNode( - util.Function( + Function( input_names=["infile_a", "infile_b"], output_names=["out_file"], function=get_tuple, @@ -299,7 +314,7 @@ def easy_thresh(wf_name): # as FSLDIR,MNI and voxel size get_bg_imports = ["import os", "import nibabel as nib"] get_backgroundimage = pe.MapNode( - util.Function( + Function( input_names=["in_file", "file_parameters"], output_names=["out_file"], function=get_standard_background_img, @@ -312,7 +327,7 @@ def easy_thresh(wf_name): # function node to get the standard fsl brain image # outputs single file get_backgroundimage2 = pe.Node( - util.Function( + Function( input_names=["in_file", "file_parameters"], output_names=["out_file"], function=get_standard_background_img, @@ -412,10 +427,9 @@ def call_cluster(in_file, volume, dlh, threshold, pthreshold, parameters): def copy_geom(infile_a, infile_b): - """ - Method to call fsl fslcpgeom command to copy - certain parts of the header information (image dimensions, - voxel dimensions, voxel dimensions units string, image + """Call fsl fslcpgeom command to copy certain parts of the header information. + + Copy (image dimensions, voxel dimensions, voxel dimensions units string, image orientation/origin or qform/sform info) from one image to another. Parameters @@ -449,9 +463,7 @@ def copy_geom(infile_a, infile_b): def get_standard_background_img(in_file, file_parameters): - """ - Method to get the standard brain image from FSL - standard data directory. + """Get the standard brain image from FSL standard data directory. Parameters ---------- @@ -487,10 +499,7 @@ def get_standard_background_img(in_file, file_parameters): def get_tuple(infile_a, infile_b): - """ - Simple method to return tuple of z_threhsold - maximum intensity values of Zstatistic image - for input to the overlay. + """Return tuple of z_threhsold maximum intensity values of Zstatistic image for input to the overlay. Parameters ---------- diff --git a/CPAC/func_preproc/func_motion.py b/CPAC/func_preproc/func_motion.py index 10e73be2d0..7077a5fc31 100644 --- a/CPAC/func_preproc/func_motion.py +++ b/CPAC/func_preproc/func_motion.py @@ -423,7 +423,7 @@ def get_motion_ref(wf, cfg, strat_pool, pipe_num, opt=None): elif opt == "fmriprep_reference": func_get_RPI = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=estimate_reference_image, diff --git a/CPAC/func_preproc/func_preproc.py b/CPAC/func_preproc/func_preproc.py index 4196d6541a..69b856509a 100644 --- a/CPAC/func_preproc/func_preproc.py +++ b/CPAC/func_preproc/func_preproc.py @@ -23,6 +23,7 @@ from CPAC.func_preproc.utils import nullify from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.engine.nodeblock import nodeblock +from CPAC.utils.interfaces import Function from CPAC.utils.interfaces.ants import ( AI, # niworkflows PrintHeader, @@ -343,7 +344,7 @@ def create_wf_edit_func(wf_name="edit_func"): # allocate a node to check that the requested edits are # reasonable given the data func_get_idx = pe.Node( - util.Function( + Function( input_names=["in_files", "stop_idx", "start_idx"], output_names=["stopidx", "startidx"], function=get_idx, @@ -877,7 +878,7 @@ def form_thr_string(thr): return "-thr %s" % (threshold_z) form_thr_string = pe.Node( - util.Function( + Function( input_names=["thr"], output_names=["out_str"], function=form_thr_string, diff --git a/CPAC/group_analysis/group_analysis.py b/CPAC/group_analysis/group_analysis.py index d3e78c4698..6da81ff37e 100644 --- a/CPAC/group_analysis/group_analysis.py +++ b/CPAC/group_analysis/group_analysis.py @@ -1,14 +1,29 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . from nipype.interfaces import fsl import nipype.interfaces.utility as util from CPAC.easy_thresh import easy_thresh from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def get_operation(in_file): - """ - Method to create operation string - for fslmaths. + """Create operation string for fslmaths. Parameters ---------- @@ -39,7 +54,9 @@ def get_operation(in_file): def label_zstat_files(zstat_list, con_file): - """Take in the z-stat file outputs of FSL FLAME and rename them after the + """Rename z-stat file outputs from FSL FLAME using contrast labels. + + Take in the z-stat file outputs of FSL FLAME and rename them after the contrast labels of the contrasts provided. """ cons = [] @@ -64,9 +81,7 @@ def label_zstat_files(zstat_list, con_file): def create_fsl_flame_wf(ftest=False, wf_name="groupAnalysis"): - """ - FSL `FEAT `_ - BASED Group Analysis. + """Run FSL `FEAT `_ BASED Group Analysis. Parameters ---------- @@ -313,7 +328,7 @@ def create_fsl_flame_wf(ftest=False, wf_name="groupAnalysis"): # easier interpretation label_zstat_imports = ["import os"] label_zstat = pe.Node( - util.Function( + Function( input_names=["zstat_list", "con_file"], output_names=["new_zstat_list"], function=label_zstat_files, @@ -341,7 +356,7 @@ def create_fsl_flame_wf(ftest=False, wf_name="groupAnalysis"): # function node to get the operation string for fslmaths command get_opstring = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=get_operation ), name="get_opstring", diff --git a/CPAC/longitudinal_pipeline/longitudinal_preproc.py b/CPAC/longitudinal_pipeline/longitudinal_preproc.py index dfead14d59..9fbe31c6b5 100644 --- a/CPAC/longitudinal_pipeline/longitudinal_preproc.py +++ b/CPAC/longitudinal_pipeline/longitudinal_preproc.py @@ -24,9 +24,9 @@ import numpy as np import nibabel as nib from nipype.interfaces import fsl -import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function from CPAC.utils.monitoring import IFLOGGER from CPAC.utils.nifti_utils import nifti_image_input @@ -617,7 +617,7 @@ def subject_specific_template( ] if method == "flirt": template_gen_node = pe.Node( - util.Function( + Function( input_names=[ "input_brain_list", "input_skull_list", diff --git a/CPAC/median_angle/median_angle.py b/CPAC/median_angle/median_angle.py index 1433df8ac8..de4fd683cb 100644 --- a/CPAC/median_angle/median_angle.py +++ b/CPAC/median_angle/median_angle.py @@ -1,12 +1,29 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def median_angle_correct(target_angle_deg, realigned_file): - """ - Performs median angle correction on fMRI data. Median angle correction algorithm - based on [1]_. + """Perform median angle correction on fMRI data. + + Median angle correction algorithm based on [1]_. Parameters ---------- @@ -89,8 +106,7 @@ def writeToFile(data, nii, fname): def calc_median_angle_params(subject): - """ - Calculates median angle parameters of a subject. + """Calculate median angle parameters of a subject. Parameters ---------- @@ -133,8 +149,7 @@ def calc_median_angle_params(subject): def calc_target_angle(mean_bolds, median_angles): """ - Calculates a target angle based on median angle parameters of - the group. + Calculate a target angle based on median angle parameters of the group. Parameters ---------- @@ -229,7 +244,7 @@ def create_median_angle_correction(name="median_angle_correction"): ) mac = pe.Node( - util.Function( + Function( input_names=["target_angle_deg", "realigned_file"], output_names=["corrected_file", "angles_file"], function=median_angle_correct, @@ -305,7 +320,7 @@ def create_target_angle(name="target_angle"): ) cmap = pe.MapNode( - util.Function( + Function( input_names=["subject"], output_names=["mean_bold", "median_angle"], function=calc_median_angle_params, @@ -315,7 +330,7 @@ def create_target_angle(name="target_angle"): ) cta = pe.Node( - util.Function( + Function( input_names=["mean_bolds", "median_angles"], output_names=["target_angle"], function=calc_target_angle, diff --git a/CPAC/nuisance/nuisance.py b/CPAC/nuisance/nuisance.py index 943d5a3db8..b47fc0886f 100644 --- a/CPAC/nuisance/nuisance.py +++ b/CPAC/nuisance/nuisance.py @@ -125,7 +125,7 @@ def form_mask_erosion_prop(erosion_prop): ] eroded_mask = pe.Node( - util.Function( + Function( input_names=[ "roi_mask", "skullstrip_mask", @@ -156,7 +156,7 @@ def form_mask_erosion_prop(erosion_prop): wf.connect(eroded_mask, "output_roi_mask", outputspec, "eroded_mask") if segmentmap: erosion_segmentmap = pe.Node( - util.Function( + Function( input_names=["roi_mask", "erosion_mm", "erosion_prop"], output_names=["eroded_roi_mask"], function=erosion, @@ -1357,7 +1357,7 @@ def create_regressor_workflow( ] cosfilter_node = pe.Node( - util.Function( + Function( input_names=["input_image_path", "timestep"], output_names=["cosfiltered_img"], function=cosine_filter, @@ -1374,7 +1374,7 @@ def create_regressor_workflow( "input_image_path", ) tr_string2float_node = pe.Node( - util.Function( + Function( input_names=["tr"], output_names=["tr_float"], function=TR_string_to_float, @@ -1887,7 +1887,7 @@ def filtering_bold_and_regressors( bandpass_ts.inputs.outputtype = "NIFTI_GZ" tr_string2float_node = pe.Node( - util.Function( + Function( input_names=["tr"], output_names=["tr_float"], function=TR_string_to_float, @@ -2418,7 +2418,8 @@ def nuisance_regressors_generation( opt: dict, space: Literal["T1w", "bold"], ) -> tuple[Workflow, dict]: - """ + """Generate nuisance regressors. + Parameters ---------- wf : ~nipype.pipeline.engine.workflows.Workflow diff --git a/CPAC/nuisance/utils/utils.py b/CPAC/nuisance/utils/utils.py index 92499523a8..db6667dcb3 100644 --- a/CPAC/nuisance/utils/utils.py +++ b/CPAC/nuisance/utils/utils.py @@ -499,7 +499,7 @@ def generate_summarize_tissue_mask_ventricles_masking( # generate inverse transform flags, which depends on the number of transforms inverse_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, diff --git a/CPAC/pipeline/engine/engine.py b/CPAC/pipeline/engine/engine.py index e71e2cefe1..2ccdf08e58 100644 --- a/CPAC/pipeline/engine/engine.py +++ b/CPAC/pipeline/engine/engine.py @@ -20,10 +20,9 @@ import copy import hashlib import json -import logging import os -from nipype import config # type: ignore [import-untyped] +from nipype import config, logging # type: ignore [import-untyped] from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.engine.nodeblock import NODEBLOCK_INPUTS, NodeBlockFunction diff --git a/CPAC/randomise/randomise.py b/CPAC/randomise/randomise.py index 8c2351c9f0..b3144685aa 100644 --- a/CPAC/randomise/randomise.py +++ b/CPAC/randomise/randomise.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function from CPAC.utils.monitoring import IFLOGGER @@ -53,7 +54,6 @@ def prep_randomise_workflow( ): from nipype.interfaces import fsl import nipype.interfaces.io as nio - import nipype.interfaces.utility as util wf = pe.Workflow(name="randomise_workflow") wf.base_dir = c.work_dir @@ -74,7 +74,7 @@ def prep_randomise_workflow( randomise.inputs.fcon = fts_file select_tcorrp_files = pe.Node( - util.Function( + Function( input_names=["input_list"], output_names=["out_file"], function=select ), name="select_t_corrp", @@ -83,7 +83,7 @@ def prep_randomise_workflow( wf.connect(randomise, "t_corrected_p_files", select_tcorrp_files, "input_list") select_tstat_files = pe.Node( - util.Function( + Function( input_names=["input_list"], output_names=["out_file"], function=select ), name="select_t_stat", @@ -147,6 +147,10 @@ def run(group_config_path): import os from CPAC.pipeline.cpac_group_runner import load_config_yml + from CPAC.pipeline.cpac_randomise_pipeline import ( + randomise_merged_file, + randomise_merged_mask, + ) group_config_obj = load_config_yml(group_config_path) pipeline_output_folder = group_config_obj.pipeline_dir diff --git a/CPAC/registration/output_func_to_standard.py b/CPAC/registration/output_func_to_standard.py index 6cf172f76d..bafea7d8d0 100644 --- a/CPAC/registration/output_func_to_standard.py +++ b/CPAC/registration/output_func_to_standard.py @@ -374,7 +374,7 @@ def ants_apply_warps_func_mni( itk_imports = ["import os"] change_transform = pe.Node( - util.Function( + Function( input_names=["input_affine_file"], output_names=["updated_affine_file"], function=change_itk_transform_type, @@ -534,7 +534,7 @@ def ants_apply_warps_func_mni( # check transform list (if missing any init/rig/affine) and exclude Nonetype check_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -546,7 +546,7 @@ def ants_apply_warps_func_mni( # generate inverse transform flags, which depends on the number of transforms inverse_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, diff --git a/CPAC/registration/registration.py b/CPAC/registration/registration.py index 8f1bf980ac..af2112f77f 100644 --- a/CPAC/registration/registration.py +++ b/CPAC/registration/registration.py @@ -39,6 +39,7 @@ seperate_warps_list, single_ants_xfm_to_list, ) +from CPAC.utils.interfaces import Function from CPAC.utils.interfaces.fsl import Merge as fslMerge from CPAC.utils.utils import check_prov_for_motion_tool, check_prov_for_regtool @@ -104,7 +105,7 @@ def apply_transform( wf.connect(inputNode, "reference", apply_warp, "reference_image") interp_string = pe.Node( - util.Function( + Function( input_names=["interpolation", "reg_tool"], output_names=["interpolation"], function=interpolation_string, @@ -118,7 +119,7 @@ def apply_transform( wf.connect(interp_string, "interpolation", apply_warp, "interpolation") ants_xfm_list = pe.Node( - util.Function( + Function( input_names=["transform"], output_names=["transform_list"], function=single_ants_xfm_to_list, @@ -135,7 +136,7 @@ def apply_transform( if int(num_cpus) > 1 and time_series: chunk_imports = ["import nibabel as nib"] chunk = pe.Node( - util.Function( + Function( input_names=["func_file", "n_chunks", "chunk_size"], output_names=["TR_ranges"], function=chunk_ts, @@ -154,7 +155,7 @@ def apply_transform( split_imports = ["import os", "import subprocess"] split = pe.Node( - util.Function( + Function( input_names=["func_file", "tr_ranges"], output_names=["split_funcs"], function=split_ts_chunks, @@ -196,7 +197,7 @@ def apply_transform( ) interp_string = pe.Node( - util.Function( + Function( input_names=["interpolation", "reg_tool"], output_names=["interpolation"], function=interpolation_string, @@ -222,7 +223,7 @@ def apply_transform( if int(num_cpus) > 1 and time_series: chunk_imports = ["import nibabel as nib"] chunk = pe.Node( - util.Function( + Function( input_names=["func_file", "n_chunks", "chunk_size"], output_names=["TR_ranges"], function=chunk_ts, @@ -241,7 +242,7 @@ def apply_transform( split_imports = ["import os", "import subprocess"] split = pe.Node( - util.Function( + Function( input_names=["func_file", "tr_ranges"], output_names=["split_funcs"], function=split_ts_chunks, @@ -761,7 +762,7 @@ def create_register_func_to_anat( if phase_diff_distcor: conv_pedir = pe.Node( - interface=util.Function( + interface=Function( input_names=["pedir", "convert"], output_names=["pedir"], function=convert_pedir, @@ -1067,7 +1068,7 @@ def bbreg_args(bbreg_target): if phase_diff_distcor: conv_pedir = pe.Node( - interface=util.Function( + interface=Function( input_names=["pedir", "convert"], output_names=["pedir"], function=convert_pedir, @@ -1276,7 +1277,7 @@ def create_wf_calculate_ants_warp( """ reg_imports = ["import os", "import subprocess"] calculate_ants_warp = pe.Node( - interface=util.Function( + interface=Function( input_names=[ "moving_brain", "reference_brain", @@ -1302,7 +1303,7 @@ def create_wf_calculate_ants_warp( calculate_ants_warp.interface.num_threads = num_threads select_forward_initial = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1313,7 +1314,7 @@ def create_wf_calculate_ants_warp( select_forward_initial.inputs.selection = "Initial" select_forward_rigid = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1324,7 +1325,7 @@ def create_wf_calculate_ants_warp( select_forward_rigid.inputs.selection = "Rigid" select_forward_affine = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1335,7 +1336,7 @@ def create_wf_calculate_ants_warp( select_forward_affine.inputs.selection = "Affine" select_forward_warp = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1346,7 +1347,7 @@ def create_wf_calculate_ants_warp( select_forward_warp.inputs.selection = "Warp" select_inverse_warp = pe.Node( - util.Function( + Function( input_names=["warp_list", "selection"], output_names=["selected_warp"], function=seperate_warps_list, @@ -1788,7 +1789,7 @@ def ANTs_registration_connector( # check transform list to exclude Nonetype (missing) init/rig/affine check_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -1851,7 +1852,7 @@ def ANTs_registration_connector( # check transform list to exclude Nonetype (missing) init/rig/affine check_invlinear_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -1873,7 +1874,7 @@ def ANTs_registration_connector( # generate inverse transform flags, which depends on the # number of transforms inverse_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, @@ -1935,7 +1936,7 @@ def ANTs_registration_connector( # check transform list to exclude Nonetype (missing) init/rig/affine check_all_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -2004,7 +2005,7 @@ def ANTs_registration_connector( # check transform list to exclude Nonetype (missing) init/rig/affine check_all_inv_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -2026,7 +2027,7 @@ def ANTs_registration_connector( # generate inverse transform flags, which depends on the # number of transforms inverse_all_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, @@ -2122,7 +2123,7 @@ def bold_to_T1template_xfm_connector( itk_imports = ["import os"] change_transform = pe.Node( - util.Function( + Function( input_names=["input_affine_file"], output_names=["updated_affine_file"], function=change_itk_transform_type, @@ -2964,7 +2965,7 @@ def overwrite_transform_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None # c4d -mcs ${WD}/xfms/ANTs_CombinedWarp.nii.gz -oo ${WD}/xfms/e1.nii.gz ${WD}/xfms/e2.nii.gz ${WD}/xfms/e3.nii.gz # -mcs: -multicomponent-split, -oo: -output-multiple split_combined_warp = pe.Node( - util.Function( + Function( input_names=["input_name", "output_name"], output_names=["output1", "output2", "output3"], function=run_c4d, @@ -2982,7 +2983,7 @@ def overwrite_transform_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None # c4d -mcs ${WD}/xfms/ANTs_CombinedInvWarp.nii.gz -oo ${WD}/xfms/e1inv.nii.gz ${WD}/xfms/e2inv.nii.gz ${WD}/xfms/e3inv.nii.gz split_combined_inv_warp = pe.Node( - util.Function( + Function( input_names=["input_name", "output_name"], output_names=["output1", "output2", "output3"], function=run_c4d, @@ -3678,7 +3679,7 @@ def apply_phasediff_to_timeseries_separately(wf, cfg, strat_pool, pipe_num, opt= wf.connect(warp_fmap, "out_file", mask_fmap, "in_file") conv_pedir = pe.Node( - interface=util.Function( + interface=Function( input_names=["pedir", "convert"], output_names=["pedir"], function=convert_pedir, @@ -4819,7 +4820,7 @@ def single_step_resample_timeseries_to_T1template( reg_tool = check_prov_for_regtool(xfm_prov) bbr2itk = pe.Node( - util.Function( + Function( input_names=["reference_file", "source_file", "transform_file"], output_names=["itk_transform"], function=run_c3d, @@ -4860,7 +4861,7 @@ def single_step_resample_timeseries_to_T1template( ### Loop starts! ### motionxfm2itk = pe.MapNode( - util.Function( + Function( input_names=["reference_file", "source_file", "transform_file"], output_names=["itk_transform"], function=run_c3d, @@ -4881,7 +4882,7 @@ def single_step_resample_timeseries_to_T1template( wf.connect(node, out, motionxfm2itk, "transform_file") elif motion_correct_tool == "3dvolreg": convert_transform = pe.Node( - util.Function( + Function( input_names=["one_d_filename"], output_names=["transform_directory"], function=one_d_to_mat, diff --git a/CPAC/reho/reho.py b/CPAC/reho/reho.py index 43f2ba5f47..b19ad9ecc7 100644 --- a/CPAC/reho/reho.py +++ b/CPAC/reho/reho.py @@ -1,9 +1,26 @@ # coding: utf-8 +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.reho.utils import * +from CPAC.utils.interfaces import Function def create_reho(wf_name): @@ -99,7 +116,7 @@ def create_reho(wf_name): "from CPAC.reho.utils import f_kendall", ] raw_reho_map = pe.Node( - util.Function( + Function( input_names=["in_file", "mask_file", "cluster_size"], output_names=["out_file"], function=compute_reho, diff --git a/CPAC/sca/sca.py b/CPAC/sca/sca.py index db48c7f04b..bf855d578a 100644 --- a/CPAC/sca/sca.py +++ b/CPAC/sca/sca.py @@ -30,11 +30,15 @@ create_spatial_map_dataflow, resample_func_roi, ) +from CPAC.utils.interfaces import Function def create_sca(name_sca="sca"): """ - Map of the correlations of the Region of Interest(Seed in native or MNI space) with the rest of brain voxels. + Create map of the correlations of the Region of Interest with the rest of brain voxels. + + (Seed in native or MNI space) + The map is normalized to contain Z-scores, mapped in standard space and treated with spatial smoothing. Parameters @@ -150,8 +154,8 @@ def create_sca(name_sca="sca"): def create_temporal_reg(wflow_name="temporal_reg", which="SR"): - r""" - Temporal multiple regression workflow + r"""Create temporal multiple regression workflow. + Provides a spatial map of parameter estimates corresponding to each provided timeseries in a timeseries.txt file as regressors. @@ -280,9 +284,7 @@ def create_temporal_reg(wflow_name="temporal_reg", which="SR"): ) check_timeseries = pe.Node( - util.Function( - input_names=["in_file"], output_names=["out_file"], function=check_ts - ), + Function(input_names=["in_file"], output_names=["out_file"], function=check_ts), name="check_timeseries", ) @@ -325,7 +327,7 @@ def create_temporal_reg(wflow_name="temporal_reg", which="SR"): map_roi_imports = ['import os', 'import numpy as np'] # get roi order and send to output node for raw outputs - get_roi_order = pe.Node(util.Function(input_names=['maps', + get_roi_order = pe.Node(Function(input_names=['maps', 'timeseries'], output_names=['labels', 'maps'], @@ -350,7 +352,7 @@ def create_temporal_reg(wflow_name="temporal_reg", which="SR"): outputNode, 'temp_reg_map_files') # get roi order and send to output node for z-stat outputs - get_roi_order_zstat = pe.Node(util.Function(input_names=['maps', + get_roi_order_zstat = pe.Node(Function(input_names=['maps', 'timeseries'], output_names=['labels', 'maps'], @@ -396,7 +398,7 @@ def SCA_AVG(wf, cfg, strat_pool, pipe_num, opt=None): # same workflow, except to run TSE and send it to the resource # pool so that it will not get sent to SCA resample_functional_roi_for_sca = pe.Node( - util.Function( + Function( input_names=["in_func", "in_roi", "realignment", "identity_matrix"], output_names=["out_func", "out_roi"], function=resample_func_roi, diff --git a/CPAC/scrubbing/scrubbing.py b/CPAC/scrubbing/scrubbing.py index ed85ef1024..e08b816edc 100644 --- a/CPAC/scrubbing/scrubbing.py +++ b/CPAC/scrubbing/scrubbing.py @@ -1,13 +1,29 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe +from CPAC.utils.interfaces import Function def create_scrubbing_preproc(wf_name="scrubbing"): - """ - This workflow essentially takes the list of offending timepoints that are to be removed - and removes it from the motion corrected input image. Also, it removes the information - of discarded time points from the movement parameters file obtained during motion correction. + """Take the list of offending timepoints that are to be removed and remove it from the motion corrected input image. + + Also remove the information of discarded time points from the movement parameters file obtained during motion correction. Parameters ---------- @@ -94,7 +110,7 @@ def create_scrubbing_preproc(wf_name="scrubbing"): ) craft_scrub_input = pe.Node( - util.Function( + Function( input_names=["scrub_input", "frames_in_1D_file"], output_names=["scrub_input_string"], function=get_indx, @@ -103,7 +119,7 @@ def create_scrubbing_preproc(wf_name="scrubbing"): ) scrubbed_movement_parameters = pe.Node( - util.Function( + Function( input_names=["infile_a", "infile_b"], output_names=["out_file"], function=get_mov_parameters, @@ -120,7 +136,7 @@ def create_scrubbing_preproc(wf_name="scrubbing"): # scrubbed_preprocessed.inputs.outputtype = 'NIFTI_GZ' scrubbed_preprocessed = pe.Node( - util.Function( + Function( input_names=["scrub_input"], output_names=["scrubbed_image"], function=scrub_image, @@ -152,9 +168,8 @@ def create_scrubbing_preproc(wf_name="scrubbing"): def get_mov_parameters(infile_a, infile_b): - """ - Method to get the new movement parameters - file after removing the offending time frames + """Get the new movement parameters file after removing the offending time frames. + (i.e., those exceeding FD 0.5mm/0.2mm threshold). Parameters @@ -192,7 +207,7 @@ def get_mov_parameters(infile_a, infile_b): raise Exception(msg) f = open(out_file, "a") - for l in l1: + for l in l1: # noqa: E741 data = l2[int(l.strip())] f.write(data) f.close() @@ -200,9 +215,7 @@ def get_mov_parameters(infile_a, infile_b): def get_indx(scrub_input, frames_in_1D_file): - """ - Method to get the list of time - frames that are to be included. + """Get the list of time frames that are to be included. Parameters ---------- @@ -230,10 +243,10 @@ def get_indx(scrub_input, frames_in_1D_file): def scrub_image(scrub_input): - """ - Method to run 3dcalc in order to scrub the image. This is used instead of - the Nipype interface for 3dcalc because functionality is needed for - specifying an input file with specifically-selected volumes. For example: + """Run 3dcalc in order to scrub the image. + + This is used instead of the Nipype interface for 3dcalc because functionality is + needed for specifying an input file with specifically-selected volumes. For example: input.nii.gz[2,3,4,..98], etc. Parameters diff --git a/CPAC/seg_preproc/seg_preproc.py b/CPAC/seg_preproc/seg_preproc.py index d2e9a98f6d..1fe3f4045f 100644 --- a/CPAC/seg_preproc/seg_preproc.py +++ b/CPAC/seg_preproc/seg_preproc.py @@ -1,3 +1,19 @@ +# Copyright (C) 2012-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . from nipype.interfaces import ants, freesurfer, fsl, utility as util from nipype.interfaces.utility import Function @@ -23,10 +39,10 @@ def process_segment_map(wf_name, use_priors, use_custom_threshold, reg_tool): - """This is a sub workflow used inside segmentation workflow to process - probability maps obtained in segmentation. Steps include overlapping - of the prior tissue with probability maps, thresholding and binarizing - it and creating a mask that is used in further analysis. + """Create a sub workflow used inside segmentation workflow to process probability maps obtained in segmentation. + + Steps include overlapping of the prior tissue with probability maps, thresholding + and binarizing it and creating a mask that is used in further analysis. Parameters ---------- @@ -274,7 +290,7 @@ def tissue_mask_template_to_t1(wf_name, use_ants): # check transform list to exclude Nonetype (missing) init/rig/affine check_transform = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["checked_transform_list", "list_length"], function=check_transforms, @@ -289,7 +305,7 @@ def tissue_mask_template_to_t1(wf_name, use_ants): # generate inverse transform flags, which depends on the # number of transforms inverse_transform_flags = pe.Node( - util.Function( + Function( input_names=["transform_list"], output_names=["inverse_transform_flags"], function=generate_inverse_transform_flags, @@ -356,9 +372,7 @@ def tissue_mask_template_to_t1(wf_name, use_ants): def create_seg_preproc_antsJointLabel_method(wf_name="seg_preproc_templated_based"): - """ - Generate the subject's cerebral spinal fluids, - white matter and gray matter mask based on provided template, if selected to do so. + """Generate the subject's cerebral spinal fluids, white matter and gray matter mask based on provided template, if selected to do so. Parameters ---------- @@ -417,7 +431,7 @@ def create_seg_preproc_antsJointLabel_method(wf_name="seg_preproc_templated_base ) seg_preproc_antsJointLabel = pe.Node( - util.Function( + Function( input_names=[ "anatomical_brain", "anatomical_brain_mask", @@ -700,7 +714,7 @@ def tissue_seg_fsl_fast(wf, cfg, strat_pool, pipe_num, opt=None): ) get_csf = pe.Node( - util.Function( + Function( input_names=["probability_maps"], output_names=["filename"], function=pick_wm_prob_0, @@ -945,7 +959,7 @@ def tissue_seg_freesurfer(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(node, out, fs_aseg_to_native, "target_file") fs_aseg_to_nifti = pe.Node( - util.Function( + Function( input_names=["in_file"], output_names=["out_file"], function=mri_convert ), name=f"fs_aseg_to_nifti_{pipe_num}", diff --git a/CPAC/surface/surf_preproc.py b/CPAC/surface/surf_preproc.py index f826d4b564..7959688f86 100644 --- a/CPAC/surface/surf_preproc.py +++ b/CPAC/surface/surf_preproc.py @@ -1,10 +1,25 @@ -import os +# Copyright (C) 2021-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. -import nipype.interfaces.utility as util +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +import os from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.engine.nodeblock import nodeblock from CPAC.surface.PostFreeSurfer.surf_reho import run_surf_reho +from CPAC.utils.interfaces import Function def run_surface( @@ -1026,7 +1041,7 @@ def run_surface( ) def surface_postproc(wf, cfg, strat_pool, pipe_num, opt=None): surf = pe.Node( - util.Function( + Function( input_names=[ "post_freesurfer_folder", "freesurfer_folder", @@ -1369,7 +1384,7 @@ def surface_postproc(wf, cfg, strat_pool, pipe_num, opt=None): ) def surface_falff(wf, cfg, strat_pool, pipe_num, opt): falff = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries"], output_names=["surf_falff"], function=run_surf_falff, @@ -1394,7 +1409,7 @@ def surface_falff(wf, cfg, strat_pool, pipe_num, opt): ) def surface_alff(wf, cfg, strat_pool, pipe_num, opt): alff = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries"], output_names=["surf_alff"], function=run_surf_alff, @@ -1427,7 +1442,7 @@ def surface_alff(wf, cfg, strat_pool, pipe_num, opt): ) def surface_reho(wf, cfg, strat_pool, pipe_num, opt): L_cortex_file = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries", "structure", "cortex_filename"], output_names=["L_cortex_file"], function=run_get_cortex, @@ -1442,7 +1457,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): wf.connect(node, out, L_cortex_file, "dtseries") R_cortex_file = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries", "structure", "cortex_filename"], output_names=["R_cortex_file"], function=run_get_cortex, @@ -1456,7 +1471,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): wf.connect(node, out, R_cortex_file, "dtseries") mean_timeseries = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries"], output_names=["mean_timeseries"], function=run_mean_timeseries, @@ -1468,7 +1483,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): wf.connect(node, out, mean_timeseries, "dtseries") L_reho = pe.Node( - util.Function( + Function( input_names=[ "subject", "dtseries", @@ -1499,7 +1514,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): wf.connect(node, out, L_reho, "dtseries") R_reho = pe.Node( - util.Function( + Function( input_names=[ "subject", "dtseries", @@ -1545,7 +1560,7 @@ def surface_reho(wf, cfg, strat_pool, pipe_num, opt): ) def surface_connectivity_matrix(wf, cfg, strat_pool, pipe_num, opt): connectivity_parcellation = pe.Node( - util.Function( + Function( input_names=["subject", "dtseries", "surf_atlaslabel"], output_names=["parcellation_file"], function=run_ciftiparcellate, @@ -1561,7 +1576,7 @@ def surface_connectivity_matrix(wf, cfg, strat_pool, pipe_num, opt): ]["surface_parcellation_template"] correlation_matrix = pe.Node( - util.Function( + Function( input_names=["subject", "ptseries"], output_names=["correlation_matrix"], function=run_cifticorrelation, diff --git a/CPAC/timeseries/timeseries_analysis.py b/CPAC/timeseries/timeseries_analysis.py index 8f8355a3d2..18b1a4851a 100644 --- a/CPAC/timeseries/timeseries_analysis.py +++ b/CPAC/timeseries/timeseries_analysis.py @@ -15,7 +15,6 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . from nipype.interfaces import afni, fsl, utility as util -from nipype.interfaces.utility import Function from CPAC.connectome.connectivity_matrix import ( create_connectome_afni, @@ -29,6 +28,7 @@ create_spatial_map_dataflow, resample_func_roi, ) +from CPAC.utils.interfaces import Function from CPAC.utils.monitoring import FMLOGGER @@ -86,7 +86,7 @@ def get_voxel_timeseries(wf_name: str = "voxel_timeseries") -> pe.Workflow: ) timeseries_voxel = pe.Node( - util.Function( + Function( input_names=["data_file", "template"], output_names=["oneD_file"], function=gen_voxel_timeseries, @@ -241,7 +241,7 @@ def get_roi_timeseries(wf_name: str = "roi_timeseries") -> pe.Workflow: clean_csv_imports = ["import os"] clean_csv = pe.Node( - util.Function( + Function( input_names=["roi_csv"], output_names=["roi_array", "edited_roi_csv"], function=clean_roi_csv, @@ -382,7 +382,7 @@ def get_vertices_timeseries(wf_name="vertices_timeseries"): ) timeseries_surface = pe.Node( - util.Function( + Function( input_names=["rh_surface_file", "lh_surface_file"], output_names=["out_file"], function=gen_vertices_timeseries, diff --git a/CPAC/utils/interfaces/__init__.py b/CPAC/utils/interfaces/__init__.py index 126bb1c22b..6716a562f5 100644 --- a/CPAC/utils/interfaces/__init__.py +++ b/CPAC/utils/interfaces/__init__.py @@ -1,7 +1,27 @@ +# Copyright (C) 2010-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Custom interfaces for C-PAC.""" + from . import brickstat, datasink, function, pc +from .function import Function __all__ = [ "function", + "Function", "pc", "brickstat", "datasink", diff --git a/CPAC/utils/interfaces/function/seg_preproc.py b/CPAC/utils/interfaces/function/seg_preproc.py index d220781f48..f3cb06840b 100644 --- a/CPAC/utils/interfaces/function/seg_preproc.py +++ b/CPAC/utils/interfaces/function/seg_preproc.py @@ -1,11 +1,26 @@ +# Copyright (C) 2022-2023 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . """Function interfaces for seg_preproc.""" -from nipype.interfaces import utility as util +from CPAC.utils.interfaces.function.function import Function def pick_tissue_from_labels_file_interface(input_names=None): - """Function to create a Function interface for - CPAC.seg_preproc.utils.pick_tissue_from_labels_file. + """Create a Function interface for ~CPAC.seg_preproc.utils.pick_tissue_from_labels_file. Parameters ---------- @@ -20,7 +35,7 @@ def pick_tissue_from_labels_file_interface(input_names=None): if input_names is None: input_names = ["multiatlas_Labels", "csf_label", "gm_label", "wm_label"] - return util.Function( + return Function( input_names=input_names, output_names=["csf_mask", "gm_mask", "wm_mask"], function=pick_tissue_from_labels_file, diff --git a/CPAC/utils/tests/old_functions.py b/CPAC/utils/tests/old_functions.py new file mode 100644 index 0000000000..80171db77b --- /dev/null +++ b/CPAC/utils/tests/old_functions.py @@ -0,0 +1,67 @@ +# Copyright (C) 2012-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . +"""Functions from before refactoring.""" + + +def check(params_dct, subject_id, scan_id, val_to_check, throw_exception): + """https://github.com/FCP-INDI/C-PAC/blob/96db8b0b65ab1d5f55fb3b895855af34d72c17e4/CPAC/utils/utils.py#L630-L653""" + if val_to_check not in params_dct: + if throw_exception: + raise Exception( + f"Missing Value for {val_to_check} for participant " f"{subject_id}" + ) + return None + if isinstance(params_dct[val_to_check], dict): + ret_val = params_dct[val_to_check][scan_id] + else: + ret_val = params_dct[val_to_check] + if ret_val == "None": + if throw_exception: + raise Exception( + f"'None' Parameter Value for {val_to_check} for participant " + f"{subject_id}" + ) + else: + ret_val = None + if ret_val == "" and throw_exception: + raise Exception( + f"Missing Value for {val_to_check} for participant " f"{subject_id}" + ) + return ret_val + + +def check2(val): + """https://github.com/FCP-INDI/C-PAC/blob/96db8b0b65ab1d5f55fb3b895855af34d72c17e4/CPAC/utils/utils.py#L745-L746""" + return val if val == None or val == "" or isinstance(val, str) else int(val) + + +def try_fetch_parameter(scan_parameters, subject, scan, keys): + """https://github.com/FCP-INDI/C-PAC/blob/96db8b0b65ab1d5f55fb3b895855af34d72c17e4/CPAC/utils/utils.py#L679-L703""" + scan_parameters = dict((k.lower(), v) for k, v in scan_parameters.items()) + for key in keys: + key = key.lower() + if key not in scan_parameters: + continue + if isinstance(scan_parameters[key], dict): + value = scan_parameters[key][scan] + else: + value = scan_parameters[key] + if value == "None": + return None + if value is not None: + return value + return None diff --git a/CPAC/utils/tests/test_datasource.py b/CPAC/utils/tests/test_datasource.py index 9842310bb1..be7c2255c2 100644 --- a/CPAC/utils/tests/test_datasource.py +++ b/CPAC/utils/tests/test_datasource.py @@ -1,10 +1,26 @@ +# Copyright (C) 2019-2024 C-PAC Developers + +# This file is part of C-PAC. + +# C-PAC is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. + +# C-PAC is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +# License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with C-PAC. If not, see . import json import pytest -import nipype.interfaces.utility as util from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.utils.datasource import match_epi_fmaps +from CPAC.utils.interfaces import Function from CPAC.utils.test_resources import setup_test_wf @@ -48,7 +64,7 @@ def test_match_epi_fmaps(): } match_fmaps = pe.Node( - util.Function( + Function( input_names=["fmap_dct", "bold_pedir"], output_names=["opposite_pe_epi", "same_pe_epi"], function=match_epi_fmaps, diff --git a/CPAC/utils/tests/test_utils.py b/CPAC/utils/tests/test_utils.py index c972e712f9..4d8f18dabe 100644 --- a/CPAC/utils/tests/test_utils.py +++ b/CPAC/utils/tests/test_utils.py @@ -3,29 +3,47 @@ import multiprocessing from unittest import mock +from _pytest.logging import LogCaptureFixture import pytest from CPAC.func_preproc import get_motion_ref from CPAC.pipeline.engine.nodeblock import NodeBlockFunction from CPAC.utils.configuration import Configuration from CPAC.utils.monitoring.custom_logging import log_subprocess +from CPAC.utils.tests import old_functions from CPAC.utils.utils import ( check_config_resources, check_system_deps, - try_fetch_parameter, + ScanParameters, ) -scan_params_bids = { - "RepetitionTime": 2.0, - "ScanOptions": "FS", - "SliceAcquisitionOrder": "Interleaved Ascending", -} -scan_params_cpac = { - "tr": 2.5, - "acquisition": "seq+z", - "reference": "24", - "first_tr": "", - "last_tr": "", +SCAN_PARAMS = { + "BIDS": { + "params": { + "RepetitionTime": 2.0, + "ScanOptions": "FS", + "SliceAcquisitionOrder": "Interleaved Ascending", + }, + "expected_TR": 2.0, + }, + "C-PAC": { + "params": { + "tr": 2.5, + "acquisition": "seq+z", + "reference": "24", + "first_TR": 1, + "last_TR": "", + }, + "expected_TR": 2.5, + }, + "nested": { + "params": { + "TR": {"scan": 3}, + "first_TR": {"scan": 0}, + "last_TR": {"scan": 450}, + }, + "expected_TR": 3, + }, } @@ -69,12 +87,62 @@ def test_check_config_resources(): assert "threads available (2)" in error_string -def test_function(): - TR = try_fetch_parameter(scan_params_bids, "0001", "scan", ["TR", "RepetitionTime"]) - assert TR == 2.0 - - TR = try_fetch_parameter(scan_params_cpac, "0001", "scan", ["TR", "RepetitionTime"]) - assert TR == 2.5 +@pytest.mark.parametrize("scan_params", ["BIDS", "C-PAC", "nested"]) +@pytest.mark.parametrize("convert_to", [int, float, str]) +def test_fetch_and_convert( + caplog: LogCaptureFixture, scan_params: str, convert_to: type +) -> None: + """Test functionality to fetch and convert scan parameters.""" + params = ScanParameters(SCAN_PARAMS[scan_params]["params"], "subject", "scan") + TR = params.fetch_and_convert( + keys=["TR", "RepetitionTime"], + convert_to=convert_to, + ) + if TR and "RepetitionTime" in params.params: + old_TR = convert_to( + old_functions.check( + params.params, params.subject, params.scan, "RepetitionTime", False + ) + ) + assert TR == old_TR + try: + old_TR = convert_to( + old_functions.try_fetch_parameter( + params.params, params.subject, params.scan, ["TR", "RepetitionTime"] + ) + ) + except TypeError: + old_TR = None + assert ( + (TR == convert_to(SCAN_PARAMS[scan_params]["expected_TR"])) + and isinstance(TR, convert_to) + and TR == old_TR + ) + if scan_params == "C-PAC": + assert "Using case-insenitive match: 'TR' ≅ 'tr'." in caplog.text + else: + assert "Using case-insenitive match: 'TR' ≅ 'tr'." not in caplog.text + not_TR = params.fetch_and_convert( + keys=["NotTR", "NotRepetitionTime"], + convert_to=convert_to, + ) + assert not_TR is None + if "first_TR" in params.params: + first_tr = params.fetch_and_convert(["first_TR"], int, 1, False) + old_first_tr = old_functions.check( + params.params, params.subject, params.scan, "first_TR", False + ) + if old_first_tr: + old_first_tr = old_functions.check2(old_first_tr) + assert first_tr == old_first_tr + if "last_TR" in params.params: + last_tr = params.fetch_and_convert(["last_TR"], int, "", False) + old_last_tr = old_functions.check( + params.params, params.subject, params.scan, "last_TR", False + ) + if old_last_tr: + old_last_tr = old_functions.check2(old_last_tr) + assert last_tr == old_last_tr @pytest.mark.parametrize("executable", ["Xvfb"]) diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index 7d90593cc7..99051e9368 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -25,7 +25,7 @@ import numbers import os import pickle -from typing import Any +from typing import Any, Literal, Optional, overload import numpy as np from voluptuous.error import Invalid @@ -33,6 +33,7 @@ from CPAC.utils.configuration import Configuration from CPAC.utils.docs import deprecated +from CPAC.utils.interfaces.function import Function from CPAC.utils.monitoring import FMLOGGER, WFLOGGER CONFIGS_DIR = os.path.abspath( @@ -46,6 +47,7 @@ os.path.join(CONFIGS_DIR, "1.7-1.8-deprecations.yml"), "r", encoding="utf-8" ) as _f: NESTED_CONFIG_DEPRECATIONS = yaml.safe_load(_f) +PE_DIRECTION = Literal["i", "i-", "j", "j-", "k", "k-", ""] VALID_PATTERNS = [ "alt+z", "altplus", @@ -130,7 +132,7 @@ def get_flag_wf(wf_name="get_flag"): input_node = pe.Node(util.IdentityInterface(fields=["in_flag"]), name="inputspec") get_flag = pe.Node( - util.Function(input_names=["in_flag"], function=_get_flag), name="get_flag" + Function(input_names=["in_flag"], function=_get_flag), name="get_flag" ) wf.connect(input_node, "in_flag", get_flag, "in_flag") @@ -320,7 +322,7 @@ def get_zscore(map_node=False, wf_name="z_score"): ) op_string = pe.MapNode( - util.Function( + Function( input_names=["mean", "std_dev"], output_names=["op_string"], function=get_operand_string, @@ -343,7 +345,7 @@ def get_zscore(map_node=False, wf_name="z_score"): ) op_string = pe.Node( - util.Function( + Function( input_names=["mean", "std_dev"], output_names=["op_string"], function=get_operand_string, @@ -398,7 +400,7 @@ def get_fisher_zscore(input_name, map_node=False, wf_name="fisher_z_score"): if map_node: # node to separate out fisher_z_score = pe.MapNode( - util.Function( + Function( input_names=["correlation_file", "timeseries_one_d", "input_name"], output_names=["out_file"], function=compute_fisher_z_score, @@ -408,7 +410,7 @@ def get_fisher_zscore(input_name, map_node=False, wf_name="fisher_z_score"): ) else: fisher_z_score = pe.Node( - util.Function( + Function( input_names=["correlation_file", "timeseries_one_d", "input_name"], output_names=["out_file"], function=compute_fisher_z_score, @@ -471,18 +473,172 @@ def compute_fisher_z_score(correlation_file, timeseries_one_d, input_name): return out_file -def fetch_and_convert( - scan_parameters: dict, scan: str, keys: list[str], convert_to: type, fallback: Any -) -> Any: - """Fetch a parameter from a scan parameters dictionary and convert it to a given type. +class ScanParameters: + """A dictionary of scan parameters and access methods.""" - Catch TypeError exceptions and return a fallback value in those cases. - """ - try: - value = convert_to(scan_parameters, None, scan, keys) - except TypeError: - value = fallback - return value + def __init__(self, scan_parameters: str | dict, subject_id: str, scan: str): + """Initialize ScanParameters dict and metadata.""" + self.subject = subject_id + self.scan = scan + if ".json" in scan_parameters: + if not os.path.exists(scan_parameters): + err = ( + "\n[!] WARNING: Scan parameters JSON file listed in your data" + f" configuration file does not exist:\n{scan_parameters}" + ) + raise FileNotFoundError(err) + with open(scan_parameters, "r") as f: + self.params: dict = json.load(f) + elif isinstance(scan_parameters, dict): + self.params = scan_parameters + else: + err = ( + "\n\n[!] Could not read the format of the scan parameters " + "information included in the data configuration file for " + f"the participant {self.subject}.\n\n" + ) + raise OSError(err) + + def check(self, val_to_check: str, throw_exception: bool): + """Check that a value is populated for a given key in a parameters dictionary.""" + if val_to_check not in self.params: + if throw_exception: + msg = f"Missing Value for {val_to_check} for participant {self.subject}" + raise ValueError(msg) + return None + + if isinstance(self.params[val_to_check], dict): + ret_val = self.params[val_to_check][self.scan] + else: + ret_val = self.params[val_to_check] + + if ret_val == "None": + if throw_exception: + msg = ( + f"'None' parameter value for {val_to_check} for" + f" participant {self.subject}." + ) + raise ValueError(msg) + ret_val = None + + if ret_val == "" and throw_exception: + msg = f"Missing value for {val_to_check} for participant {self.subject}." + raise ValueError(msg) + + return ret_val + + @overload + def fetch( + self, + keys: Optional[list[str]] = None, + *, + match_case: Literal[False], + throw_exception: bool, + ) -> Any: ... + @overload + def fetch( + self, + keys: Optional[list[str]] = None, + *, + match_case: Literal[True], + throw_exception: bool, + ) -> tuple[Any, tuple[str, str]]: ... + def fetch(self, keys, *, match_case=False, throw_exception=True): + """Fetch the first found parameter from a scan params dictionary. + + Returns + ------- + value + The value of the parameter. + + keys, optional + The matched keys (only if ``match_case is True``) + + throw_exception + Raise an exception if value is ``""`` or ``None``? + """ + if match_case: + keys = {key.lower(): key for key in keys} + scan_param_keys = {key.lower(): key for key in self.params.keys()} + scan_parameters = {key.lower(): value for key, value in self.params.items()} + else: + scan_parameters = self.params + for key in keys: + if key in scan_parameters: + if match_case: + return self.check(key, throw_exception), ( + keys[key], + scan_param_keys[key], + ) + return self.check(key, throw_exception) + msg = f"None of {keys} found in {list(scan_parameters.keys())}." + raise KeyError(msg) + + def fetch_and_convert( + self, + keys: list[str], + convert_to: Optional[type] = None, + fallback: Optional[Any] = None, + warn_typeerror: bool = True, + throw_exception: bool = False, + ) -> Any: + """Fetch a parameter from a scan params dictionary and convert it to a given type. + + Catch TypeError exceptions and return a fallback value in those cases. + + Parameters + ---------- + keys + if multiple keys provided, the value corresponding to the first found will be + returned + + convert_to + the type to return if possible + + fallback + a value to return if the keys are not found in ``scan_parameters`` + + warn_typeerror + log a warning if value cannot be converted to ``convert_to`` type? + + throw_exception + raise an error for empty string or NoneTypes? + + Returns + ------- + value + The gathered parameter coerced to the specified type, if possible. + ``fallback`` otherwise. + """ + value: Any = fallback + fallback_message = f"Falling back to {fallback} ({type(fallback)})." + + try: + raw_value = self.fetch(keys, throw_exception=throw_exception) + except KeyError: + try: + raw_value, matched_keys = self.fetch( + keys, match_case=True, throw_exception=throw_exception + ) + except KeyError: + WFLOGGER.warning( + f"None of {keys} found in {list(self.params.keys())}. " + f"{fallback_message}" + ) + return fallback + WFLOGGER.warning( + f"None exact match found. Using case-insenitive match: '{matched_keys[0]}'" + f" ≅ '{matched_keys[1]}'." + ) + if convert_to: + try: + value = convert_to(raw_value) + except (TypeError, ValueError): + if warn_typeerror: + WFLOGGER.warning( + f"Could not convert {value} to {convert_to}. {fallback_message}" + ) + return value def get_operand_string(mean, std_dev): @@ -570,35 +726,6 @@ def correlation(matrix1, matrix2, match_rows=False, z_scored=False, symmetric=Fa return r -def check(params_dct, subject_id, scan_id, val_to_check, throw_exception): - """Check that a value is populated for a given key in a parameters dictionary.""" - if val_to_check not in params_dct: - if throw_exception: - msg = f"Missing Value for {val_to_check} for participant {subject_id}" - raise ValueError(msg) - return None - - if isinstance(params_dct[val_to_check], dict): - ret_val = params_dct[val_to_check][scan_id] - else: - ret_val = params_dct[val_to_check] - - if ret_val == "None": - if throw_exception: - msg = ( - f"'None' Parameter Value for {val_to_check} for" - f" participant {subject_id}" - ) - raise ValueError(msg) - ret_val = None - - if ret_val == "" and throw_exception: - msg = f"Missing Value for {val_to_check} for participant {subject_id}" - raise ValueError(msg) - - return ret_val - - def check_random_state(seed): """ Turn seed into a np.random.RandomState instance. @@ -624,172 +751,94 @@ def check_random_state(seed): ) -def try_fetch_parameter(scan_parameters, subject, scan, keys): - """Try to fetch a parameter from a scan parameters dictionary.""" - scan_parameters = {k.lower(): v for k, v in scan_parameters.items()} - - for _key in keys: - key = _key.lower() - - if key not in scan_parameters: - continue - - if isinstance(scan_parameters[key], dict): - value = scan_parameters[key][scan] - else: - value = scan_parameters[key] - - # Explicit none value - if value == "None": - return None - - if value is not None: - return value - return None - - +@Function.sig_imports( + [ + "import json", + "import os", + "from typing import Literal, Optional", + "from CPAC.utils.utils import ScanParameters, PE_DIRECTION, VALID_PATTERNS", + ] +) def get_scan_params( - subject_id, - scan, - pipeconfig_start_indx, - pipeconfig_stop_indx, - data_config_scan_params=None, -): + subject_id: str, + scan: str, + pipeconfig_start_indx: Optional[int | str], + pipeconfig_stop_indx: Optional[int | str], + data_config_scan_params: Optional[dict | str] = None, +) -> tuple[ + Optional[str], + Optional[str], + Optional[int], + Optional[int], + Optional[int], + Optional[int], + PE_DIRECTION, + Optional[float], +]: """Extract slice timing correction parameters and scan parameters. Parameters ---------- - subject_id : str + subject_id subject id - scan : str + scan scan id - pipeconfig_start_indx : int + pipeconfig_start_indx starting volume index as provided in the pipeline config yaml file - pipeconfig_stop_indx : int + pipeconfig_stop_indx ending volume index as provided in the pipeline config yaml file - data_config_scan_params : str - file path to scan parameter JSON file listed in data config yaml file + data_config_scan_params + file path to scan parameter JSON file listed in data config yaml file or loaded + paramater dictionary Returns ------- - TR : a string + tr TR value - pattern : a string + tpattern slice aquisition pattern string or file path - ref_slice : an integer - reference slice which is used to allign all other slices - first_tr : an integer - starting TR or starting volume index - last_tr : an integer - ending TR or ending volume index - pe_direction : str - effective_echo_spacing : float + ref_slice + index of reference slice which is used to allign all other slices + first_tr + index of starting TR or starting volume index + last_tr + index of ending TR or ending volume index + pe_direction + https://bids-specification.readthedocs.io/en/stable/glossary.html#phaseencodingdirection-metadata + effective_echo_spacing + https://bids-specification.readthedocs.io/en/stable/glossary.html#effectiveechospacing-metadata """ - import json - import os - import warnings - - def check2(val): - return val if val is None or val == "" or isinstance(val, str) else int(val) - - # initialize vars to empty - TR = pattern = ref_slice = first_tr = last_tr = pe_direction = "" - unit = "s" - effective_echo_spacing = template = None + unit: Literal["ms", "s"] = "s" if isinstance(pipeconfig_stop_indx, str): if "End" in pipeconfig_stop_indx or "end" in pipeconfig_stop_indx: pipeconfig_stop_indx = None - if data_config_scan_params: - if ".json" in data_config_scan_params: - if not os.path.exists(data_config_scan_params): - err = ( - "\n[!] WARNING: Scan parameters JSON file listed in your data" - f" configuration file does not exist:\n{data_config_scan_params}" - ) - raise FileNotFoundError(err) - - with open(data_config_scan_params, "r") as f: - params_dct = json.load(f) - - # get details from the configuration - # if this is a JSON file, the key values are the BIDS format - # standard - # TODO: better handling of errant key values!!! - if "RepetitionTime" in params_dct.keys(): - TR = float(check(params_dct, subject_id, scan, "RepetitionTime", False)) - if "SliceTiming" in params_dct.keys(): - pattern = str(check(params_dct, subject_id, scan, "SliceTiming", False)) - elif "SliceAcquisitionOrder" in params_dct.keys(): - pattern = str( - check(params_dct, subject_id, scan, "SliceAcquisitionOrder", False) - ) - if "PhaseEncodingDirection" in params_dct.keys(): - pe_direction = str( - check(params_dct, subject_id, scan, "PhaseEncodingDirection", False) - ) - try: - "EffectiveEchoSpacing" in params_dct.keys() - effective_echo_spacing = float( - check(params_dct, subject_id, scan, "EffectiveEchoSpacing", False) - ) - except TypeError: - pass - - elif len(data_config_scan_params) > 0 and isinstance( - data_config_scan_params, dict - ): - params_dct = data_config_scan_params - - # TODO: better handling of errant key values!!! - # TODO: use schema validator to deal with it - # get details from the configuration - TR = fetch_and_convert( - params_dct, scan, ["TR", "RepetitionTime"], float, None - ) - template = fetch_and_convert( - params_dct, scan, ["Template", "template"], str, None - ) - - pattern = str( - try_fetch_parameter( - params_dct, - subject_id, - scan, - ["acquisition", "SliceTiming", "SliceAcquisitionOrder"], - ) - ) - - ref_slice = check(params_dct, subject_id, scan, "reference", False) - ref_slice = int(ref_slice) if ref_slice else ref_slice - - first_tr = check(params_dct, subject_id, scan, "first_TR", False) - first_tr = check2(first_tr) if first_tr else first_tr - - last_tr = check(params_dct, subject_id, scan, "last_TR", False) - last_tr = check2(last_tr) if last_tr else last_tr - - pe_direction = check( - params_dct, subject_id, scan, "PhaseEncodingDirection", False - ) - effective_echo_spacing = fetch_and_convert( - params_dct, - scan, - ["EffectiveEchoSpacing"], - float, - effective_echo_spacing, - ) - - else: - err = ( - "\n\n[!] Could not read the format of the scan parameters " - "information included in the data configuration file for " - f"the participant {subject_id}.\n\n" - ) - raise OSError(err) - first_tr = pipeconfig_start_indx if first_tr == "" or first_tr is None else first_tr - last_tr = pipeconfig_stop_indx if last_tr == "" or last_tr is None else last_tr - pattern = None if "None" in pattern or "none" in pattern else pattern + params = ScanParameters(data_config_scan_params, subject_id, scan) + # TODO: better handling of errant key values!!! + # TODO: use schema validator to deal with it + # get details from the configuration + tr: float | Literal[""] = params.fetch_and_convert( + ["RepetitionTime", "TR"], float, "" + ) + template: Optional[str] = params.fetch_and_convert(["Template", "template"], str) + pattern: Optional[str] = params.fetch_and_convert( + ["acquisition", "SliceTiming", "SliceAcquisitionOrder"], + str, + None, + ) + ref_slice: Optional[int | str] = params.fetch_and_convert(["reference"], int, None) + first_tr: Optional[int | str] = params.fetch_and_convert( + ["first_TR"], int, pipeconfig_start_indx, False + ) + last_tr: Optional[int | str] = params.fetch_and_convert( + ["last_TR"], int, pipeconfig_stop_indx, False + ) + pe_direction: PE_DIRECTION = params.fetch_and_convert( + ["PhaseEncodingDirection"], str, "" + ) + effective_echo_spacing: Optional[float] = params.fetch_and_convert( + ["EffectiveEchoSpacing"], float + ) """ if not pattern: @@ -865,26 +914,26 @@ def check2(val): slice_timings.sort() max_slice_offset = slice_timings[-1] - # checking if the unit of TR and slice timing match or not - # if slice timing in ms convert TR to ms as well - if TR and max_slice_offset > TR: - warnings.warn( + # checking if the unit of tr and slice timing match or not + # if slice timing in ms convert tr to ms as well + if tr and max_slice_offset > tr: + WFLOGGER.warning( "TR is in seconds and slice timings are in " "milliseconds. Converting TR into milliseconds" ) - TR = TR * 1000 - WFLOGGER.info("New TR value %s ms", TR) + tr = tr * 1000 + WFLOGGER.info("New tr value %s ms", tr) unit = "ms" - elif TR and TR > 10: # noqa: PLR2004 + elif tr and tr > 10: # noqa: PLR2004 # check to see, if TR is in milliseconds, convert it into seconds - warnings.warn("TR is in milliseconds, Converting it into seconds") - TR = TR / 1000.0 - WFLOGGER.info("New TR value %s s", TR) + WFLOGGER.warning("TR is in milliseconds, Converting it into seconds") + tr = tr / 1000.0 + WFLOGGER.info("New TR value %s s", tr) unit = "s" # swap back in - tr = f"{TR!s}{unit}" if TR else "" + tr = f"{tr!s}{unit}" if tr else "" tpattern = pattern start_indx = first_tr stop_indx = last_tr diff --git a/requirements.txt b/requirements.txt index e6060c7f38..185f432729 100644 --- a/requirements.txt +++ b/requirements.txt @@ -48,4 +48,4 @@ pip==23.3 setuptools<60.0 urllib3==1.26.19 wheel==0.40.0 -zipp==3.16.0 +zipp==3.19.1 From abb4809be8243c1d56eb00cc255a3bdc44047603 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 15 Jul 2024 16:13:39 -0400 Subject: [PATCH 47/93] :art: Type `connect_pipeline` --- .../tests/test_preproc_connections.py | 2 +- CPAC/pipeline/cpac_pipeline.py | 26 +++++++++++++------ CPAC/pipeline/engine/__init__.py | 2 ++ CPAC/pipeline/engine/engine.py | 18 +++++++++---- CPAC/pipeline/engine/nodeblock.py | 8 +++--- 5 files changed, 38 insertions(+), 18 deletions(-) diff --git a/CPAC/func_preproc/tests/test_preproc_connections.py b/CPAC/func_preproc/tests/test_preproc_connections.py index f58380a7fd..d7a4466ff4 100644 --- a/CPAC/func_preproc/tests/test_preproc_connections.py +++ b/CPAC/func_preproc/tests/test_preproc_connections.py @@ -81,7 +81,7 @@ "from-template_to-T1w_mode-image_desc-linear_xfm", ] -NUM_TESTS = 48 # number of parameterizations to run for many-parameter tests +NUM_TESTS = 12 # number of parameterizations to run for many-parameter tests def _filter_assertion_message( diff --git a/CPAC/pipeline/cpac_pipeline.py b/CPAC/pipeline/cpac_pipeline.py index 9b5ed67141..b3192f11ea 100644 --- a/CPAC/pipeline/cpac_pipeline.py +++ b/CPAC/pipeline/cpac_pipeline.py @@ -25,12 +25,16 @@ import sys import time from time import strftime +from typing import Any, Optional import yaml -import nipype +import nipype # type: ignore [import-untyped] from nipype import config, logging -from flowdump import save_workflow_json, WorkflowJSONMeta -from indi_aws import aws_utils, fetch_creds +from flowdump import ( # type: ignore [import-untyped] + save_workflow_json, + WorkflowJSONMeta, +) +from indi_aws import aws_utils, fetch_creds # type: ignore [import-untyped] import CPAC from CPAC.alff.alff import alff_falff, alff_falff_space_template @@ -126,10 +130,11 @@ ingress_regressors, nuisance_regression_template, ) +from CPAC.pipeline import nipype_pipeline_engine as pe # pylint: disable=wrong-import-order from CPAC.pipeline.check_outputs import check_outputs -from CPAC.pipeline.engine import NodeBlock, ResourcePool +from CPAC.pipeline.engine import NodeBlock, PIPELINE_BLOCKS, ResourcePool from CPAC.pipeline.nipype_pipeline_engine.plugins import ( LegacyMultiProcPlugin, MultiProcPlugin, @@ -421,7 +426,7 @@ def run_workflow( license_notice=CPAC.license_notice.replace("\n", "\n "), ), ) - subject_info = {} + subject_info: dict[str, Any] = {} subject_info["subject_id"] = subject_id subject_info["start_time"] = pipeline_start_time @@ -1151,12 +1156,17 @@ def list_blocks(pipeline_blocks, indent=None): return blockstring -def connect_pipeline(wf, cfg, rpool, pipeline_blocks): +def connect_pipeline( + wf: pe.Workflow, + cfg: Configuration, + rpool: ResourcePool, + pipeline_blocks: PIPELINE_BLOCKS, +) -> pe.Workflow: """Connect the pipeline blocks to the workflow.""" WFLOGGER.info( "Connecting pipeline blocks:\n%s", list_blocks(pipeline_blocks, indent=1) ) - previous_nb = None + previous_nb: Optional[NodeBlock] = None for block in pipeline_blocks: try: nb = NodeBlock(block, debug=cfg["pipeline_setup", "Debugging", "verbose"]) @@ -1186,7 +1196,7 @@ def connect_pipeline(wf, cfg, rpool, pipeline_blocks): f"'{node_block_names}' " f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", ) - if cfg.pipeline_setup["Debugging"]["verbose"]: + if cfg.pipeline_setup["Debugging"]["verbose"]: # type: ignore [attr-defined] verbose_logger = getLogger("CPAC.engine") verbose_logger.debug(e.args[0]) verbose_logger.debug(rpool) diff --git a/CPAC/pipeline/engine/__init__.py b/CPAC/pipeline/engine/__init__.py index 975df2e26e..a63f117563 100644 --- a/CPAC/pipeline/engine/__init__.py +++ b/CPAC/pipeline/engine/__init__.py @@ -18,6 +18,7 @@ from .engine import ( NodeBlock, + PIPELINE_BLOCKS, run_node_blocks, wrap_block, ) @@ -25,6 +26,7 @@ __all__ = [ "NodeBlock", + "PIPELINE_BLOCKS", "ResourcePool", "StratPool", "run_node_blocks", diff --git a/CPAC/pipeline/engine/engine.py b/CPAC/pipeline/engine/engine.py index 2ccdf08e58..f9bd8c857f 100644 --- a/CPAC/pipeline/engine/engine.py +++ b/CPAC/pipeline/engine/engine.py @@ -21,6 +21,7 @@ import hashlib import json import os +from typing import Any, Optional from nipype import config, logging # type: ignore [import-untyped] @@ -35,13 +36,20 @@ WFLOGGER, ) +PIPELINE_BLOCKS = list[NodeBlockFunction | "PIPELINE_BLOCKS"] + class NodeBlock: - def __init__(self, node_block_functions, debug=False): + def __init__( + self, + node_block_functions: NodeBlockFunction | PIPELINE_BLOCKS, + debug: bool = False, + ) -> None: + """Create a ``NodeBlock`` from a list of py:class:`~CPAC.pipeline.engine.nodeblock.NodeBlockFunction`s.""" if not isinstance(node_block_functions, list): node_block_functions = [node_block_functions] - self.node_blocks = {} + self.node_blocks: dict[str, Any] = {} for node_block_function in node_block_functions: # <---- sets up the NodeBlock object in case you gave it a list of node blocks instead of a single one - for option forking. self.input_interface = [] @@ -54,7 +62,7 @@ def __init__(self, node_block_functions, debug=False): if not isinstance(node_block_function, NodeBlockFunction): # If the object is a plain function `__name__` will be more useful than `str()` obj_str = ( - node_block_function.__name__ + node_block_function.__name__ # type: ignore [attr-defined] if hasattr(node_block_function, "__name__") else str(node_block_function) ) @@ -85,11 +93,11 @@ def __init__(self, node_block_functions, debug=False): self.node_blocks[name]["block_function"] = node_block_function # TODO: fix/replace below - self.outputs = {} + self.outputs: dict[str, Optional[str]] = {} for out in node_block_function.outputs: self.outputs[out] = None - self.options = ["base"] + self.options: list[str] | dict[str, Any] = ["base"] if node_block_function.outputs is not None: self.options = node_block_function.outputs diff --git a/CPAC/pipeline/engine/nodeblock.py b/CPAC/pipeline/engine/nodeblock.py index 13a857cb55..f9090f2453 100644 --- a/CPAC/pipeline/engine/nodeblock.py +++ b/CPAC/pipeline/engine/nodeblock.py @@ -27,7 +27,7 @@ class NodeBlockFunction: def __init__( self, func: Callable, - name: Optional[str] = None, + name: str, config: Optional[list[str]] = None, switch: Optional[list[str] | list[list[str]]] = None, option_key: Optional[str | list[str]] = None, @@ -37,7 +37,7 @@ def __init__( ) -> None: self.func = func """Nodeblock function reference.""" - self.name: Optional[str] = name + self.name: str = name """Used in the graph and logging to identify the NodeBlock and its component nodes.""" self.config: Optional[list[str]] = config """ @@ -61,7 +61,7 @@ def __init__( inputs = [] self.inputs: list[str | list | tuple] = inputs """ResourcePool keys indicating resources needed for the NodeBlock's functionality.""" - self.outputs: Optional[list[str] | dict[str, Any]] = outputs + self.outputs: list[str] | dict[str, Any] = outputs if outputs else [] """ ResourcePool keys indicating resources generated or updated by the NodeBlock, optionally including metadata for the outputs' respective sidecars. @@ -153,7 +153,7 @@ def nodeblock( Parameters ---------- name - Used in the graph and logging to identify the NodeBlock and its component nodes. + Used in the graph and logging to identify the NodeBlock and its component nodes. Function's ``.__name__`` is used if ``name`` is not provided. config Indicates the nested keys in a C-PAC pipeline configuration should configure a NodeBlock built from this function. If config is set to ``None``, then all other configuration-related entities must be specified from the From 6207348fb5382dda0fd800ade1ed5b771aedc92e Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Mon, 15 Jul 2024 17:41:51 -0400 Subject: [PATCH 48/93] :zap: Replace some `deepcopy` calls --- .../tests/test_preproc_connections.py | 2 +- CPAC/pipeline/engine/engine.py | 8 +- CPAC/pipeline/engine/resource.py | 77 +++++++++++-------- 3 files changed, 47 insertions(+), 40 deletions(-) diff --git a/CPAC/func_preproc/tests/test_preproc_connections.py b/CPAC/func_preproc/tests/test_preproc_connections.py index d7a4466ff4..f58380a7fd 100644 --- a/CPAC/func_preproc/tests/test_preproc_connections.py +++ b/CPAC/func_preproc/tests/test_preproc_connections.py @@ -81,7 +81,7 @@ "from-template_to-T1w_mode-image_desc-linear_xfm", ] -NUM_TESTS = 12 # number of parameterizations to run for many-parameter tests +NUM_TESTS = 48 # number of parameterizations to run for many-parameter tests def _filter_assertion_message( diff --git a/CPAC/pipeline/engine/engine.py b/CPAC/pipeline/engine/engine.py index f9bd8c857f..a3b6c609b6 100644 --- a/CPAC/pipeline/engine/engine.py +++ b/CPAC/pipeline/engine/engine.py @@ -17,7 +17,6 @@ """C-PAC pipeline engine.""" import ast -import copy import hashlib import json import os @@ -36,7 +35,7 @@ WFLOGGER, ) -PIPELINE_BLOCKS = list[NodeBlockFunction | "PIPELINE_BLOCKS"] +PIPELINE_BLOCKS = list["NodeBlockFunction | PIPELINE_BLOCKS"] class NodeBlock: @@ -55,7 +54,7 @@ def __init__( self.input_interface = [] if isinstance(node_block_function, tuple): self.input_interface = node_block_function[1] - node_block_function = node_block_function[0] + node_block_function = node_block_function[0] # noqa: PLW2901 if not isinstance(self.input_interface, list): self.input_interface = [self.input_interface] @@ -332,7 +331,7 @@ def connect_block(self, wf: pe.Workflow, cfg: Configuration, rpool: ResourcePool for label, connection in outs.items(): self.check_output(outputs, label, name) - new_json_info = copy.deepcopy(strat_pool.json) + new_json_info = strat_pool.json # transfer over data-specific json info # for example, if the input data json is _bold and the output is also _bold @@ -486,7 +485,6 @@ def wrap_block(node_blocks, interface, wf, cfg, strat_pool, pipe_num, opt): """ for block in node_blocks: - # new_pool = copy.deepcopy(strat_pool) for in_resource, val in interface.items(): if isinstance(val, tuple): strat_pool.set_data( diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index c4e849c146..74b1d1808d 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -18,8 +18,9 @@ import ast from collections.abc import KeysView -import copy +from copy import deepcopy from itertools import chain +import json import os from pathlib import Path import re @@ -59,6 +60,7 @@ from CPAC.utils.interfaces.function import Function from CPAC.utils.monitoring import ( getLogger, + UTLOGGER, WFLOGGER, ) from CPAC.utils.outputs import Outputs @@ -205,7 +207,7 @@ def __init__(self, data: tuple[pe.Node, str], json: dict) -> None: """Initialize a Resource.""" self.data = ResourceData(*data) """Tuple of source Node and output key.""" - self.json = json + self._json = json """Metadata.""" self._keys = {"data", "json"} """Dictionary-style subscriptable keys.""" @@ -245,6 +247,20 @@ def __str__(self) -> str: """Return string representation of Resource.""" return f"{self.data[0]}" + def get_json(self) -> dict[str | tuple, Any]: + """Return a deep copy of Resource JSON.""" + UTLOGGER.debug( + "%s is a deep copy of the attached JSON. Assign it to a variable before modifying or the changes will be ephemeral.", + self.__class__.__name__, + ) + return json.loads(json.dumps(self._json)) + + def set_json(self, value=dict) -> None: + """Update Resource JSON.""" + self._json.update(value) + + json = property(get_json, set_json, doc=get_json.__doc__) + class _Pool: """All Resources.""" @@ -664,7 +680,7 @@ def set_data( resource: str, node: pe.Node | pe.Workflow, output: str, - json_info: dict, + json_info: dict[str | tuple, Any], pipe_idx: PIPE_IDX, node_name: str, fork: bool = False, @@ -1861,7 +1877,7 @@ def get_strats( strat_str_list = [] strat_list_list = [] for strat_tuple in strats: - strat_list = list(copy.deepcopy(strat_tuple)) + strat_list = list(deepcopy(strat_tuple)) strat_str = str(strat_list) if strat_str not in strat_str_list: strat_str_list.append(strat_str) @@ -1886,11 +1902,11 @@ def get_strats( for xlabel in linked: if drop or xlabel is None: break - xjson = copy.deepcopy(json_dct[xlabel]) + xjson = deepcopy(json_dct[xlabel]) for ylabel in linked: if xlabel == ylabel or ylabel is None: continue - yjson = copy.deepcopy(json_dct[ylabel]) + yjson = deepcopy(json_dct[ylabel]) if "CpacVariant" not in xjson: xjson["CpacVariant"] = {} @@ -1971,25 +1987,20 @@ def get_strats( # `new_strats` is A DICTIONARY OF RESOURCEPOOL OBJECTS! each one is a new slice of the resource pool combined together. self.pipe_list.append(pipe_idx) if "CpacVariant" in strat_resource["json"]: - if "CpacVariant" not in new_strats[pipe_idx].json: - new_strats[pipe_idx].json["CpacVariant"] = {} - _variant = new_strats[pipe_idx].json["CpacVariant"] - assert isinstance(_variant, dict) - for younger_resource, variant_list in _variant.items(): + if "CpacVariant" not in new_strats[pipe_idx]._json: + new_strats[pipe_idx]._json["CpacVariant"] = {} + for younger_resource, variant_list in ( + new_strats[pipe_idx]._json["CpacVariant"].items() + ): if ( younger_resource - not in new_strats[pipe_idx].json["CpacVariant"] + not in new_strats[pipe_idx]._json["CpacVariant"] ): - new_strats[pipe_idx].json["CpacVariant"][ + new_strats[pipe_idx]._json["CpacVariant"][ younger_resource ] = variant_list # preserve each input's JSON info also - data_type = resource.split("_")[-1] - if data_type not in new_strats[pipe_idx].json["subjson"]: - new_strats[pipe_idx].json["subjson"][data_type] = {} - new_strats[pipe_idx].json["subjson"][data_type].update( - copy.deepcopy(strat_resource["json"]) - ) + new_strats[pipe_idx].preserve_json_info(resource, strat_resource) else: new_strats = {} for resource_strat_list in total_pool: @@ -2005,12 +2016,7 @@ def get_strats( new_strats[pipe_idx].json["subjson"] = {} new_strats[pipe_idx].json["CpacProvenance"] = cpac_prov # preserve each input's JSON info also - data_type = resource.split("_")[-1] - if data_type not in new_strats[pipe_idx].json["subjson"]: - new_strats[pipe_idx].json["subjson"][data_type] = {} - new_strats[pipe_idx].json["subjson"][data_type].update( - copy.deepcopy(strat_resource["json"]) - ) + new_strats[pipe_idx].preserve_json_info(resource, strat_resource) return new_strats def ingress_freesurfer(self) -> None: @@ -2943,12 +2949,15 @@ def get_data(self, resource, report_fetched=False): assert isinstance(_resource, Resource) return _resource.data - @property - def json(self) -> dict: - """Return strategy-specific JSON.""" - return self._json - - @json.setter - def json(self, strategy_json=dict) -> None: - """Update strategy-specific JSON.""" - self._json.update(strategy_json) + json = property( + fget=Resource.get_json, + fset=Resource.set_json, + doc="""Return a deep copy of strategy-specific JSON.""", + ) + + def preserve_json_info(self, resource: str, strat_resource: Resource) -> None: + """Preserve JSON info when updating a StratPool.""" + data_type = resource.split("_")[-1] + if data_type not in self._json["subjson"]: + self._json["subjson"][data_type] = {} + self._json["subjson"][data_type].update(strat_resource.json) From 588df00291752b2902aa0f976065076c90e73842 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Tue, 16 Jul 2024 15:36:24 -0400 Subject: [PATCH 49/93] :recycle: Typehint `StratPool.append_name` --- CPAC/pipeline/engine/resource.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 74b1d1808d..c6c8871557 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -265,7 +265,7 @@ def set_json(self, value=dict) -> None: class _Pool: """All Resources.""" - def __init__(self, name: str = "") -> None: + def __init__(self) -> None: """Initialize a ResourcePool or StratPool.""" self.ants_interp: str self.cfg: Configuration @@ -276,7 +276,7 @@ def __init__(self, name: str = "") -> None: self.fwhm: list[int] self.info: dict = {} self.logdir: Optional[str] - self.name = name + self.name: list[str] | str self.num_ants_cores: int self.num_cpus = int self.part_id: str @@ -423,8 +423,9 @@ def generate_prov_string(prov: LIST_OF_LIST_OF_STR | tuple) -> tuple[str, str]: resource = last_entry.split(":")[0] return (resource, str(prov)) - def get_name(self): - return self.name + def get_name(self) -> str: + """Return stringified name.""" + return str(self.name) def check_rpool(self, resource): if not isinstance(resource, list): @@ -1156,7 +1157,8 @@ def __init__( wf: Optional[pe.Workflow] = None, ) -> None: """Initialize a ResourcePool.""" - super().__init__(name=name) + self.name = name + super().__init__() if isinstance(data_paths, dict): data_paths = DataPaths(data_paths=data_paths) elif not data_paths: @@ -1914,7 +1916,7 @@ def get_strats( yjson["CpacVariant"] = {} current_strat = [] - for key, val in xjson["CpacVariant"].items(): + for val in xjson["CpacVariant"].values(): if isinstance(val, list): current_strat.append(val[0]) else: @@ -1927,7 +1929,7 @@ def get_strats( current_strat.append(f"NO-{spread_label}") other_strat = [] - for key, val in yjson["CpacVariant"].items(): + for val in yjson["CpacVariant"].values(): if isinstance(val, list): other_strat.append(val[0]) else: @@ -2847,17 +2849,21 @@ def __init__( self, rpool: Optional[dict] = None, *, - name: str = "", + name: str | list[str] = "", ) -> None: """Initialize a StratPool.""" - super().__init__(name=name) + super().__init__() if not rpool: self.rpool = STRAT_DICT({}) else: self.rpool = STRAT_DICT(rpool) self._json: dict[str, dict] = {"subjson": {}} + if not isinstance(name, list): + name = [name] + self.name: list[str] = name - def append_name(self, name): + def append_name(self, name: str) -> None: + """Append a name to the StratPool.""" self.name.append(name) @overload From fc2714a12c53f578067e2a1bc11a7ee1d2d6f74e Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Tue, 16 Jul 2024 16:16:58 -0400 Subject: [PATCH 50/93] :recycle: Clarify `ResourcePool.gather_pipes` --- CPAC/pipeline/engine/resource.py | 213 +++++++++++++----------------- CPAC/pipeline/test/test_engine.py | 6 +- CPAC/utils/utils.py | 1 + 3 files changed, 95 insertions(+), 125 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index c6c8871557..0a4088a294 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -26,7 +26,6 @@ import re from types import NoneType from typing import Any, Literal, NamedTuple, Optional, overload -import warnings from nipype.interfaces import utility as util # type: ignore [import-untyped] from nipype.interfaces.utility import Rename # type: ignore [import-untyped] @@ -1302,53 +1301,55 @@ def back_propogate_template_name( pass return - def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): - from CPAC.func_preproc.func_motion import motion_estimate_filter - - excl = [] - substring_excl = [] + def gather_pipes( # noqa: PLR0915 + self, + wf: pe.Workflow, + cfg: Configuration, + all_types: bool = False, + add_excl: Optional[list[str]] = None, + ) -> None: + """Gather pipes including naming, postproc, and expected outputs.""" + excl: list[str] = [] + # substring_excl: list[str] = [] outputs_logger = getLogger(f"{self.part_id}_expectedOutputs") expected_outputs = ExpectedOutputs() if add_excl: excl += add_excl - if "nonsmoothed" not in cfg.post_processing["spatial_smoothing"]["output"]: + if "nonsmoothed" not in cfg.post_processing["spatial_smoothing"]["output"]: # type: ignore [attr-defined] excl += Outputs.native_nonsmooth excl += Outputs.template_nonsmooth - if "raw" not in cfg.post_processing["z-scoring"]["output"]: + if "raw" not in cfg.post_processing["z-scoring"]["output"]: # type: ignore [attr-defined] excl += Outputs.native_raw excl += Outputs.template_raw - if not cfg.pipeline_setup["output_directory"]["write_debugging_outputs"]: + if not cfg.pipeline_setup["output_directory"]["write_debugging_outputs"]: # type: ignore [attr-defined] # substring_excl.append(['bold']) excl += Outputs.debugging for resource in self.keys(): - if resource not in Outputs.any: - continue - - if resource in excl: + if resource in excl or resource not in Outputs.any: continue - drop = False - for substring_list in substring_excl: - bool_list = [] - for substring in substring_list: - if substring in resource: - bool_list.append(True) - else: - bool_list.append(False) - for item in bool_list: - if not item: - break - else: - drop = True - if drop: - break - if drop: - continue + # drop = False + # for substring_list in substring_excl: + # bool_list = [] + # for substring in substring_list: + # if substring in resource: + # bool_list.append(True) + # else: + # bool_list.append(False) + # for item in bool_list: + # if not item: + # break + # else: + # drop = True + # if drop: + # break + # if drop: + # continue subdir = "other" if resource in Outputs.anat: @@ -1366,8 +1367,8 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): if "ses-" not in ses_id: ses_id = f"ses-{ses_id}" - out_dir = cfg.pipeline_setup["output_directory"]["path"] - pipe_name = cfg.pipeline_setup["pipeline_name"] + out_dir = cfg.pipeline_setup["output_directory"]["path"] # type: ignore [attr-defined] + pipe_name = cfg.pipeline_setup["pipeline_name"] # type: ignore [attr-defined] container = os.path.join(f"pipeline_{pipe_name}", part_id, ses_id) filename = f"{unique_id}_{res_in_filename(self.cfg, resource)}" @@ -1386,80 +1387,10 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): # TODO: have to link the pipe_idx's here. and call up 'desc-preproc_T1w' from a Sources in a json and replace. here. # TODO: can do the pipeline_description.json variants here too! - for resource in self.keys(): - if resource not in Outputs.any: - continue - - if resource in excl: - continue - - drop = False - for substring_list in substring_excl: - bool_list = [] - for substring in substring_list: - if substring in resource: - bool_list.append(True) - else: - bool_list.append(False) - for item in bool_list: - if not item: - break - else: - drop = True - if drop: - break - if drop: - continue - - num_variant = 0 + num_variant: Optional[int | str] = 0 if len(self.rpool[resource]) == 1: num_variant = "" - all_jsons = [ - self.rpool[resource][pipe_idx]["json"] - for pipe_idx in self.rpool[resource] - ] - unlabelled = { - key - for json_info in all_jsons - for key in json_info.get("CpacVariant", {}).keys() - if key not in (*motion_estimate_filter.outputs, "regressors") - } - if "bold" in unlabelled: - all_bolds = list( - chain.from_iterable( - json_info["CpacVariant"]["bold"] - for json_info in all_jsons - if "CpacVariant" in json_info - and "bold" in json_info["CpacVariant"] - ) - ) - # not any(not) because all is overloaded as a parameter here - if not any( - not re.match( - r"apply_(phasediff|blip)_to_timeseries_separately_.*", _bold - ) - for _bold in all_bolds - ): - # this fork point should only result in 0 or 1 forks - unlabelled.remove("bold") - del all_bolds - all_forks = { - key: set( - chain.from_iterable( - json_info["CpacVariant"][key] - for json_info in all_jsons - if "CpacVariant" in json_info - and key in json_info["CpacVariant"] - ) - ) - for key in unlabelled - } - # del all_jsons - for key, forks in all_forks.items(): - if len(forks) < 2: # noqa: PLR2004 - # no int suffix needed if only one fork - unlabelled.remove(key) - # del all_forks + unlabelled = self._get_unlabelled(resource) for pipe_idx in self.rpool[resource]: pipe_x = self.get_pipe_number(pipe_idx) json_info = self.rpool[resource][pipe_idx]["json"] @@ -1467,6 +1398,7 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): try: if unlabelled: + assert isinstance(num_variant, int) num_variant += 1 except TypeError: pass @@ -1476,7 +1408,7 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): except KeyError: pass - if out_dct["subdir"] == "other" and not all: + if out_dct["subdir"] == "other" and not all_types: continue unique_id = out_dct["unique_id"] @@ -1534,7 +1466,7 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): # grab the FWHM if smoothed for tag in resource.split("_"): if "desc-" in tag and "-sm" in tag: - fwhm_idx = pipe_idx.replace(f"{resource}:", "fwhm:") + fwhm_idx = str(pipe_idx).replace(f"{resource}:", "fwhm:") try: node, out = self.rpool["fwhm"][fwhm_idx]["data"] wf.connect(node, out, id_string, "fwhm") @@ -1548,7 +1480,7 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): atlas_id = None if not resource.endswith("desc-confounds_timeseries"): if resource.split("_")[-1] in atlas_suffixes: - atlas_idx = pipe_idx.replace(resource, "atlas_name") + atlas_idx = str(pipe_idx).replace(resource, "atlas_name") # need the single quote and the colon inside the double # quotes - it's the encoded pipe_idx # atlas_idx = new_idx.replace(f"'{temp_rsc}:", @@ -1562,13 +1494,8 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): atlas_id = tag.replace("atlas-", "") id_string.inputs.atlas_id = atlas_id else: - warnings.warn( - str( - LookupError( - "\n[!] No atlas ID found for " - f"{out_dct['filename']}.\n" - ) - ) + WFLOGGER.warning( + "\n[!] No atlas ID found for %s.\n", out_dct["filename"] ) nii_name = pe.Node(Rename(), name=f"nii_{resource_idx}_{pipe_x}") nii_name.inputs.keep_ext = True @@ -1595,13 +1522,11 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): WFLOGGER.warning(os_error) continue - write_json_imports = ["import os", "import json"] write_json = pe.Node( Function( input_names=["json_data", "filename"], output_names=["json_file"], function=write_output_json, - imports=write_json_imports, ), name=f"json_{resource_idx}_{pipe_x}", ) @@ -1611,13 +1536,13 @@ def gather_pipes(self, wf, cfg, all=False, add_incl=None, add_excl=None): ds = pe.Node(DataSink(), name=f"sinker_{resource_idx}_{pipe_x}") ds.inputs.parameterization = False ds.inputs.base_directory = out_dct["out_dir"] - ds.inputs.encrypt_bucket_keys = cfg.pipeline_setup["Amazon-AWS"][ + ds.inputs.encrypt_bucket_keys = cfg.pipeline_setup["Amazon-AWS"][ # type: ignore[attr-defined] "s3_encryption" ] ds.inputs.container = out_dct["container"] - if cfg.pipeline_setup["Amazon-AWS"]["aws_output_bucket_credentials"]: - ds.inputs.creds_path = cfg.pipeline_setup["Amazon-AWS"][ + if cfg.pipeline_setup["Amazon-AWS"]["aws_output_bucket_credentials"]: # type: ignore[attr-defined] + ds.inputs.creds_path = cfg.pipeline_setup["Amazon-AWS"][ # type: ignore[attr-defined] "aws_output_bucket_credentials" ] expected_outputs += ( @@ -2117,9 +2042,7 @@ def ingress_freesurfer(self) -> None: key, fs_ingress, "outputspec.data", {}, "", f"fs_{key}_ingress" ) else: - warnings.warn( - str(LookupError(f"\n[!] Path does not exist for {fullpath}.\n")) - ) + WFLOGGER.warning("\n[!] Path does not exist for %s.\n", fullpath) return @@ -2841,6 +2764,52 @@ def ingress_raw_anat_data(self) -> None: if self.cfg.surface_analysis["freesurfer"]["ingress_reconall"]: # type: ignore[attr-defined] self.ingress_freesurfer() + def _get_unlabelled(self, resource: str) -> set[str]: + """Get unlabelled resources (that need integer suffixes to differentiate).""" + from CPAC.func_preproc.func_motion import motion_estimate_filter + + all_jsons = [ + self.rpool[resource][pipe_idx]._json for pipe_idx in self.rpool[resource] + ] + unlabelled = { + key + for json_info in all_jsons + for key in json_info.get("CpacVariant", {}).keys() + if key not in (*motion_estimate_filter.outputs, "regressors") + } + if "bold" in unlabelled: + all_bolds = list( + chain.from_iterable( + json_info["CpacVariant"]["bold"] + for json_info in all_jsons + if "CpacVariant" in json_info and "bold" in json_info["CpacVariant"] + ) + ) + if all( + re.match(r"apply_(phasediff|blip)_to_timeseries_separately_.*", _bold) + for _bold in all_bolds + ): + # this fork point should only result in 0 or 1 forks + unlabelled.remove("bold") + del all_bolds + all_forks = { + key: set( + chain.from_iterable( + json_info["CpacVariant"][key] + for json_info in all_jsons + if "CpacVariant" in json_info and key in json_info["CpacVariant"] + ) + ) + for key in unlabelled + } + del all_jsons + for key, forks in all_forks.items(): + if len(forks) < 2: # noqa: PLR2004 + # no int suffix needed if only one fork + unlabelled.remove(key) + del all_forks + return unlabelled + class StratPool(_Pool): """A pool of ResourcePools keyed by strategy.""" diff --git a/CPAC/pipeline/test/test_engine.py b/CPAC/pipeline/test/test_engine.py index 8193fc744d..c3e27b3500 100644 --- a/CPAC/pipeline/test/test_engine.py +++ b/CPAC/pipeline/test/test_engine.py @@ -50,7 +50,7 @@ def test_ingress_func_raw_data( """Test :py:method:~`CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_func_data`.""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) - rpool.gather_pipes(rpool.wf, cfg, all=True) + rpool.gather_pipes(rpool.wf, cfg, all_tpyes=True) @pytest.mark.parametrize("preconfig", ["default"]) @@ -64,7 +64,7 @@ def test_ingress_anat_raw_data( data_paths=sub_data_dct, ) rpool.ingress_raw_anat_data() - rpool.gather_pipes(rpool.wf, cfg, all=True) + rpool.gather_pipes(rpool.wf, cfg, all_types=True) @pytest.mark.parametrize("preconfig", ["default"]) @@ -77,7 +77,7 @@ def test_ingress_pipeconfig_data( cfg=cfg, data_paths=sub_data_dct, ) - rpool.gather_pipes(rpool.wf, cfg, all=True) + rpool.gather_pipes(rpool.wf, cfg, all_types=True) @pytest.mark.parametrize("preconfig", ["anat-only"]) diff --git a/CPAC/utils/utils.py b/CPAC/utils/utils.py index 99051e9368..8e179411ae 100644 --- a/CPAC/utils/utils.py +++ b/CPAC/utils/utils.py @@ -224,6 +224,7 @@ def create_id_string( return combine_multiple_entity_instances(res_in_filename(cfg, out_filename)) +@Function.sig_imports(["import os", "import json"]) def write_output_json(json_data, filename, indent=3, basedir=None): """Write a dictionary to a JSON file.""" if not basedir: From d30496cb3b494114b50c21323fdf51a711e27d3f Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 13:28:36 -0400 Subject: [PATCH 51/93] :recycle: Move `connect_pipeline` from standalone function to `ResourcePool` method --- .../tests/test_preproc_connections.py | 5 +- .../longitudinal_workflow.py | 7 +- CPAC/pipeline/cpac_pipeline.py | 100 +--- CPAC/pipeline/engine/__init__.py | 4 +- CPAC/pipeline/engine/engine.py | 430 +----------------- CPAC/pipeline/engine/nodeblock.py | 189 +++++++- CPAC/pipeline/engine/resource.py | 384 +++++++++++++++- CPAC/pipeline/test/test_engine.py | 3 +- 8 files changed, 561 insertions(+), 561 deletions(-) diff --git a/CPAC/func_preproc/tests/test_preproc_connections.py b/CPAC/func_preproc/tests/test_preproc_connections.py index f58380a7fd..9b7da2ed4c 100644 --- a/CPAC/func_preproc/tests/test_preproc_connections.py +++ b/CPAC/func_preproc/tests/test_preproc_connections.py @@ -36,7 +36,6 @@ ) from CPAC.func_preproc.func_preproc import func_normalize from CPAC.nuisance.nuisance import choose_nuisance_blocks -from CPAC.pipeline.cpac_pipeline import connect_pipeline from CPAC.pipeline.engine import ResourcePool from CPAC.pipeline.nipype_pipeline_engine import Workflow from CPAC.registration.registration import ( @@ -81,7 +80,7 @@ "from-template_to-T1w_mode-image_desc-linear_xfm", ] -NUM_TESTS = 48 # number of parameterizations to run for many-parameter tests +NUM_TESTS = 8 # number of parameterizations to run for many-parameter tests def _filter_assertion_message( @@ -268,7 +267,7 @@ def test_motion_filter_connections( if not rpool.check_rpool("desc-cleaned_bold"): pipeline_blocks += choose_nuisance_blocks(c, generate_only) wf = Workflow(re.sub(r"[\[\]\-\:\_ \'\",]", "", str(rpool))) - connect_pipeline(wf, c, rpool, pipeline_blocks) + rpool.connect_pipeline(wf, c, pipeline_blocks) # Check that filtering is happening as expected filter_switch_key = [ "functional_preproc", diff --git a/CPAC/longitudinal_pipeline/longitudinal_workflow.py b/CPAC/longitudinal_pipeline/longitudinal_workflow.py index 1d4e62a8f1..962d444a4e 100644 --- a/CPAC/longitudinal_pipeline/longitudinal_workflow.py +++ b/CPAC/longitudinal_pipeline/longitudinal_workflow.py @@ -28,7 +28,6 @@ build_anat_preproc_stack, build_segmentation_stack, build_T1w_registration_stack, - connect_pipeline, ) from CPAC.pipeline.engine import ResourcePool from CPAC.pipeline.engine.nodeblock import nodeblock @@ -434,7 +433,7 @@ def anat_longitudinal_wf(subject_id, sub_list, config): pipeline_name="anat_longitudinal_pre-preproc", ) pipeline_blocks = build_anat_preproc_stack(rpool, config) - workflow = connect_pipeline(rpool.wf, config, rpool, pipeline_blocks) + workflow = rpool.connect_pipeline(rpool.wf, config, pipeline_blocks) session_wfs[unique_id] = rpool @@ -542,7 +541,7 @@ def anat_longitudinal_wf(subject_id, sub_list, config): pipeline_blocks = build_segmentation_stack(rpool, config, pipeline_blocks) - wf = connect_pipeline(rpool.wf, config, rpool, pipeline_blocks) + wf = rpool.connect_pipeline(rpool.wf, config, pipeline_blocks) excl = [ "space-longitudinal_desc-brain_T1w", @@ -647,7 +646,7 @@ def anat_longitudinal_wf(subject_id, sub_list, config): warp_longitudinal_seg_to_T1w, ] - wf = connect_pipeline(rpool.wf, config, rpool, pipeline_blocks) + wf = rpool.connect_pipeline(rpool.wf, config, pipeline_blocks) rpool.gather_pipes(wf, config) diff --git a/CPAC/pipeline/cpac_pipeline.py b/CPAC/pipeline/cpac_pipeline.py index b3192f11ea..8b932fc833 100644 --- a/CPAC/pipeline/cpac_pipeline.py +++ b/CPAC/pipeline/cpac_pipeline.py @@ -25,7 +25,7 @@ import sys import time from time import strftime -from typing import Any, Optional +from typing import Any import yaml import nipype # type: ignore [import-untyped] @@ -130,11 +130,10 @@ ingress_regressors, nuisance_regression_template, ) -from CPAC.pipeline import nipype_pipeline_engine as pe # pylint: disable=wrong-import-order from CPAC.pipeline.check_outputs import check_outputs -from CPAC.pipeline.engine import NodeBlock, PIPELINE_BLOCKS, ResourcePool +from CPAC.pipeline.engine import ResourcePool from CPAC.pipeline.nipype_pipeline_engine.plugins import ( LegacyMultiProcPlugin, MultiProcPlugin, @@ -201,12 +200,9 @@ from CPAC.utils.docs import version_report from CPAC.utils.monitoring import ( FMLOGGER, - getLogger, log_nodes_cb, log_nodes_initial, - LOGTAIL, set_up_logger, - WARNING_FREESURFER_OFF_WITH_DATA, WFLOGGER, ) from CPAC.utils.monitoring.draw_gantt_chart import resource_report @@ -1116,96 +1112,6 @@ def build_segmentation_stack(rpool, cfg, pipeline_blocks=None): return pipeline_blocks -def list_blocks(pipeline_blocks, indent=None): - """List node blocks line by line. - - Parameters - ---------- - pipeline_blocks : list or tuple - - indent : int or None - number of spaces after a tab indent - - Returns - ------- - str - """ - blockstring = yaml.dump( - [ - getattr( - block, - "__name__", - getattr( - block, - "name", - yaml.safe_load(list_blocks(list(block))) - if isinstance(block, (tuple, list, set)) - else str(block), - ), - ) - for block in pipeline_blocks - ] - ) - if isinstance(indent, int): - blockstring = "\n".join( - [ - "\t" + " " * indent + line.replace("- - ", "- ") - for line in blockstring.split("\n") - ] - ) - return blockstring - - -def connect_pipeline( - wf: pe.Workflow, - cfg: Configuration, - rpool: ResourcePool, - pipeline_blocks: PIPELINE_BLOCKS, -) -> pe.Workflow: - """Connect the pipeline blocks to the workflow.""" - WFLOGGER.info( - "Connecting pipeline blocks:\n%s", list_blocks(pipeline_blocks, indent=1) - ) - previous_nb: Optional[NodeBlock] = None - for block in pipeline_blocks: - try: - nb = NodeBlock(block, debug=cfg["pipeline_setup", "Debugging", "verbose"]) - wf = nb.connect_block(wf, cfg, rpool) - except LookupError as e: - if nb.name == "freesurfer_postproc": - WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) - LOGTAIL["warnings"].append(WARNING_FREESURFER_OFF_WITH_DATA) - continue - previous_nb_str = ( - (f"after node block '{previous_nb.get_name()}':") - if previous_nb - else "at beginning:" - ) - # Alert user to block that raises error - if isinstance(block, list): - node_block_names = str([NodeBlock(b).get_name() for b in block]) - e.args = ( - f"When trying to connect one of the node blocks " - f"{node_block_names} " - f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", - ) - else: - node_block_names = NodeBlock(block).get_name() - e.args = ( - f"When trying to connect node block " - f"'{node_block_names}' " - f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", - ) - if cfg.pipeline_setup["Debugging"]["verbose"]: # type: ignore [attr-defined] - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug(e.args[0]) - verbose_logger.debug(rpool) - raise - previous_nb = nb - - return wf - - def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None): """Build a C-PAC workflow for a single subject.""" from CPAC.utils.datasource import gather_extraction_maps @@ -1600,7 +1506,7 @@ def build_workflow(subject_id, sub_dict, cfg, pipeline_name=None): # Connect the entire pipeline! try: - wf = connect_pipeline(rpool.wf, cfg, rpool, pipeline_blocks) + wf = rpool.connect_pipeline(rpool.wf, cfg, pipeline_blocks) except LookupError as lookup_error: missing_key = None errorstrings = [arg for arg in lookup_error.args[0].split("\n") if arg.strip()] diff --git a/CPAC/pipeline/engine/__init__.py b/CPAC/pipeline/engine/__init__.py index a63f117563..342e452435 100644 --- a/CPAC/pipeline/engine/__init__.py +++ b/CPAC/pipeline/engine/__init__.py @@ -17,16 +17,14 @@ """C-PAC engine.""" from .engine import ( - NodeBlock, - PIPELINE_BLOCKS, run_node_blocks, wrap_block, ) +from .nodeblock import NodeBlock from .resource import ResourcePool, StratPool __all__ = [ "NodeBlock", - "PIPELINE_BLOCKS", "ResourcePool", "StratPool", "run_node_blocks", diff --git a/CPAC/pipeline/engine/engine.py b/CPAC/pipeline/engine/engine.py index a3b6c609b6..04350b2809 100644 --- a/CPAC/pipeline/engine/engine.py +++ b/CPAC/pipeline/engine/engine.py @@ -16,430 +16,10 @@ # License along with C-PAC. If not, see . """C-PAC pipeline engine.""" -import ast -import hashlib -import json import os -from typing import Any, Optional - -from nipype import config, logging # type: ignore [import-untyped] from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.engine.nodeblock import NODEBLOCK_INPUTS, NodeBlockFunction -from CPAC.pipeline.engine.resource import ResourcePool -from CPAC.utils.configuration.configuration import Configuration -from CPAC.utils.monitoring import ( - getLogger, - LOGTAIL, - WARNING_FREESURFER_OFF_WITH_DATA, - WFLOGGER, -) - -PIPELINE_BLOCKS = list["NodeBlockFunction | PIPELINE_BLOCKS"] - - -class NodeBlock: - def __init__( - self, - node_block_functions: NodeBlockFunction | PIPELINE_BLOCKS, - debug: bool = False, - ) -> None: - """Create a ``NodeBlock`` from a list of py:class:`~CPAC.pipeline.engine.nodeblock.NodeBlockFunction`s.""" - if not isinstance(node_block_functions, list): - node_block_functions = [node_block_functions] - - self.node_blocks: dict[str, Any] = {} - - for node_block_function in node_block_functions: # <---- sets up the NodeBlock object in case you gave it a list of node blocks instead of a single one - for option forking. - self.input_interface = [] - if isinstance(node_block_function, tuple): - self.input_interface = node_block_function[1] - node_block_function = node_block_function[0] # noqa: PLW2901 - if not isinstance(self.input_interface, list): - self.input_interface = [self.input_interface] - - if not isinstance(node_block_function, NodeBlockFunction): - # If the object is a plain function `__name__` will be more useful than `str()` - obj_str = ( - node_block_function.__name__ # type: ignore [attr-defined] - if hasattr(node_block_function, "__name__") - else str(node_block_function) - ) - msg = f'Object is not a nodeblock: "{obj_str}"' - raise TypeError(msg) - - name = node_block_function.name - self.name = name - self.node_blocks[name] = {} - - if self.input_interface: - for interface in self.input_interface: - for orig_input in node_block_function.inputs: - if isinstance(orig_input, tuple): - list_tup = list(orig_input) - if interface[0] in list_tup: - list_tup.remove(interface[0]) - list_tup.append(interface[1]) - node_block_function.inputs.remove(orig_input) - node_block_function.inputs.append(tuple(list_tup)) - elif orig_input == interface[0]: - node_block_function.inputs.remove(interface[0]) - node_block_function.inputs.append(interface[1]) - - for key, val in node_block_function.legacy_nodeblock_dict().items(): - self.node_blocks[name][key] = val - - self.node_blocks[name]["block_function"] = node_block_function - - # TODO: fix/replace below - self.outputs: dict[str, Optional[str]] = {} - for out in node_block_function.outputs: - self.outputs[out] = None - - self.options: list[str] | dict[str, Any] = ["base"] - if node_block_function.outputs is not None: - self.options = node_block_function.outputs - - WFLOGGER.info("Connecting %s...", name) - if debug: - config.update_config({"logging": {"workflow_level": "DEBUG"}}) - logging.update_logging(config) - WFLOGGER.debug( - '"inputs": %s\n\t "outputs": %s%s', - node_block_function.inputs, - list(self.outputs.keys()), - f'\n\t"options": {self.options}' - if self.options != ["base"] - else "", - ) - config.update_config({"logging": {"workflow_level": "INFO"}}) - logging.update_logging(config) - - def get_name(self): - return self.name - - def check_null(self, val): - if isinstance(val, str): - val = None if val.lower() == "none" else val - return val - - def check_output(self, outputs, label, name): - if label not in outputs: - msg = ( - f'\n[!] Output name "{label}" in the block ' - "function does not match the outputs list " - f'{outputs} in Node Block "{name}"\n' - ) - raise NameError(msg) - - def grab_tiered_dct(self, cfg, key_list): - cfg_dct = cfg.dict() - for key in key_list: - try: - cfg_dct = cfg_dct.get(key, {}) - except KeyError as ke: - msg = "[!] The config provided to the node block is not valid" - raise KeyError(msg) from ke - return cfg_dct - - def connect_block(self, wf: pe.Workflow, cfg: Configuration, rpool: ResourcePool): - debug = bool(cfg.pipeline_setup["Debugging"]["verbose"]) # type: ignore [attr-defined] - all_opts: list[str] = [] - - sidecar_additions = { - "CpacConfigHash": hashlib.sha1( - json.dumps(cfg.dict(), sort_keys=True).encode("utf-8") - ).hexdigest(), - "CpacConfig": cfg.dict(), - } - - if cfg["pipeline_setup"]["output_directory"].get("user_defined"): - sidecar_additions["UserDefined"] = cfg["pipeline_setup"][ - "output_directory" - ]["user_defined"] - - for name, block_dct in self.node_blocks.items(): - # iterates over either the single node block in the sequence, or a list of node blocks within the list of node blocks, i.e. for option forking. - switch = self.check_null(block_dct["switch"]) - config = self.check_null(block_dct["config"]) - option_key = self.check_null(block_dct["option_key"]) - option_val = self.check_null(block_dct["option_val"]) - inputs: NODEBLOCK_INPUTS = self.check_null(block_dct["inputs"]) - outputs = self.check_null(block_dct["outputs"]) - - block_function = block_dct["block_function"] - - opts = [] - if option_key and option_val: - if not isinstance(option_key, list): - option_key = [option_key] - if not isinstance(option_val, list): - option_val = [option_val] - if config: - key_list = config + option_key - else: - key_list = option_key - if "USER-DEFINED" in option_val: - # load custom config data into each 'opt' - opts = self.grab_tiered_dct(cfg, key_list) - else: - for option in option_val: - try: - if option in self.grab_tiered_dct(cfg, key_list): - # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list - opts.append(option) - except AttributeError as err: - msg = f"{err}\nNode Block: {name}" - raise Exception(msg) - - if opts is None: - opts = [opts] - - elif option_key and not option_val: - # enables multiple config forking entries - if not isinstance(option_key[0], list): - msg = ( - f"[!] The option_key field ({option_key}) " - f"for {name} exists but there is no " - "option_val.\n\nIf you are trying to " - "populate multiple option keys, the " - "option_val field must contain a list of " - "a list.\n" - ) - raise ValueError(msg) - for option_config in option_key: - # option_config is a list of pipe config levels down to the option - if config: - key_list = config + option_config - else: - key_list = option_config - option_val = option_config[-1] - if option_val in self.grab_tiered_dct(cfg, key_list[:-1]): - opts.append(option_val) - else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! - opts = [None] - # THIS ALSO MEANS the multiple option-val's in docstring node blocks can be entered once in the entire node-block sequence, not in a list of multiples - if not opts: - # for node blocks where the options are split into different - # block functions - opts will be empty for non-selected - # options, and would waste the get_strats effort below - continue - all_opts += opts - - if not switch: - switch = [True] - else: - if config: - try: - key_list = config + switch - except TypeError as te: - msg = ( - "\n\n[!] Developer info: Docstring error " - f"for {name}, make sure the 'config' or " - "'switch' fields are lists.\n\n" - ) - raise TypeError(msg) from te - switch = self.grab_tiered_dct(cfg, key_list) - elif isinstance(switch[0], list): - # we have multiple switches, which is designed to only work if - # config is set to "None" - switch_list = [] - for key_list in switch: - val = self.grab_tiered_dct(cfg, key_list) - if isinstance(val, list): - # fork switches - if True in val: - switch_list.append(True) - if False in val: - switch_list.append(False) - else: - switch_list.append(val) - if False in switch_list: - switch = [False] - else: - switch = [True] - else: - # if config is set to "None" - key_list = switch - switch = self.grab_tiered_dct(cfg, key_list) - if not isinstance(switch, list): - switch = [switch] - if True in switch: - for ( - pipe_idx, - strat_pool, # strat_pool is a ResourcePool like {'desc-preproc_T1w': { 'json': info, 'data': (node, out) }, 'desc-brain_mask': etc.} - ) in rpool.get_strats(inputs, debug).items(): - # keep in mind rpool.get_strats(inputs) = {pipe_idx1: {'desc-preproc_T1w': etc.}, pipe_idx2: {..} } - fork = False in switch - for opt in opts: # it's a dictionary of ResourcePools called strat_pools, except those sub-ResourcePools only have one level! no pipe_idx strat keys. - # remember, you can get 'data' or 'json' from strat_pool with member functions - # strat_pool has all of the JSON information of all the inputs! - # so when we set_data below for the TOP-LEVEL MAIN RPOOL (not the strat_pool), we can generate new merged JSON information for each output. - # particularly, our custom 'CpacProvenance' field. - node_name = name - pipe_x = rpool.get_pipe_number(pipe_idx) - - replaced_inputs = [] - for interface in self.input_interface: - if isinstance(interface[1], list): - for input_name in interface[1]: - if strat_pool.check_rpool(input_name): - break - else: - input_name = interface[1] - strat_pool.copy_resource(input_name, interface[0]) - replaced_inputs.append(interface[0]) - try: - wf, outs = block_function(wf, cfg, strat_pool, pipe_x, opt) - except IOError as e: # duplicate node - WFLOGGER.warning(e) - continue - - if not outs: - if block_function.__name__ == "freesurfer_postproc": - WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) - LOGTAIL["warnings"].append( - WARNING_FREESURFER_OFF_WITH_DATA - ) - continue - - if opt and len(option_val) > 1: - node_name = f"{node_name}_{opt}" - elif opt and "USER-DEFINED" in option_val: - node_name = f'{node_name}_{opt["Name"]}' - - if debug: - verbose_logger = getLogger("CPAC.engine") - verbose_logger.debug("\n=======================") - verbose_logger.debug("Node name: %s", node_name) - prov_dct = rpool.get_resource_strats_from_prov( - ast.literal_eval(str(pipe_idx)) - ) - for key, val in prov_dct.items(): - verbose_logger.debug("-------------------") - verbose_logger.debug("Input - %s:", key) - sub_prov_dct = rpool.get_resource_strats_from_prov(val) - for sub_key, sub_val in sub_prov_dct.items(): - sub_sub_dct = rpool.get_resource_strats_from_prov( - sub_val - ) - verbose_logger.debug(" sub-input - %s:", sub_key) - verbose_logger.debug(" prov = %s", sub_val) - verbose_logger.debug( - " sub_sub_inputs = %s", sub_sub_dct.keys() - ) - - for label, connection in outs.items(): - self.check_output(outputs, label, name) - new_json_info = strat_pool.json - - # transfer over data-specific json info - # for example, if the input data json is _bold and the output is also _bold - data_type = label.split("_")[-1] - if data_type in new_json_info["subjson"]: - if ( - "SkullStripped" - in new_json_info["subjson"][data_type] - ): - new_json_info["SkullStripped"] = new_json_info[ - "subjson" - ][data_type]["SkullStripped"] - - # determine sources for the outputs, i.e. all input data into the node block - new_json_info["Sources"] = [ - x - for x in strat_pool.get_entire_rpool() - if x != "json" and x not in replaced_inputs - ] - - if isinstance(outputs, dict): - new_json_info.update(outputs[label]) - if "Description" not in outputs[label]: - # don't propagate old Description - try: - del new_json_info["Description"] - except KeyError: - pass - if "Template" in outputs[label]: - template_key = outputs[label]["Template"] - if template_key in new_json_info["Sources"]: - # only if the pipeline config template key is entered as the 'Template' field - # otherwise, skip this and take in the literal 'Template' string - try: - new_json_info["Template"] = new_json_info[ - "subjson" - ][template_key]["Description"] - except KeyError: - pass - try: - new_json_info["Resolution"] = new_json_info[ - "subjson" - ][template_key]["Resolution"] - except KeyError: - pass - else: - # don't propagate old Description - try: - del new_json_info["Description"] - except KeyError: - pass - - if "Description" in new_json_info: - new_json_info["Description"] = " ".join( - new_json_info["Description"].split() - ) - - for sidecar_key, sidecar_value in sidecar_additions.items(): - if sidecar_key not in new_json_info: - new_json_info[sidecar_key] = sidecar_value - - try: - del new_json_info["subjson"] - except KeyError: - pass - - if fork or len(opts) > 1 or len(all_opts) > 1: - if "CpacVariant" not in new_json_info: - new_json_info["CpacVariant"] = {} - raw_label = rpool.get_raw_label(label) - if raw_label not in new_json_info["CpacVariant"]: - new_json_info["CpacVariant"][raw_label] = [] - new_json_info["CpacVariant"][raw_label].append( - node_name - ) - - rpool.set_data( - label, - connection[0], - connection[1], - new_json_info, - pipe_idx, - node_name, - fork, - ) - - wf, post_labels = rpool.post_process( - wf, - label, - connection, - new_json_info, - pipe_idx, - pipe_x, - outs, - ) - - if rpool.func_reg: - for postlabel in post_labels: - connection = (postlabel[1], postlabel[2]) - wf = rpool.derivative_xfm( - wf, - postlabel[0], - connection, - new_json_info, - pipe_idx, - pipe_x, - ) - return wf +from CPAC.utils.monitoring import WFLOGGER def wrap_block(node_blocks, interface, wf, cfg, strat_pool, pipe_num, opt): @@ -509,7 +89,7 @@ def wrap_block(node_blocks, interface, wf, cfg, strat_pool, pipe_num, opt): def run_node_blocks(blocks, data_paths, cfg=None): - from CPAC.pipeline.engine import NodeBlock + from CPAC.pipeline.engine.nodeblock import NodeBlock from CPAC.pipeline.engine.resource import ResourcePool if not cfg: @@ -541,9 +121,9 @@ def run_node_blocks(blocks, data_paths, cfg=None): run_blocks += blocks[1] for block in run_blocks: - wf = NodeBlock( - block, debug=cfg["pipeline_setup", "Debugging", "verbose"] - ).connect_block(wf, cfg, rpool) + wf = rpool.connect_block( + wf, NodeBlock(block, debug=cfg["pipeline_setup", "Debugging", "verbose"]) + ) rpool.gather_pipes(wf, cfg) wf.run() diff --git a/CPAC/pipeline/engine/nodeblock.py b/CPAC/pipeline/engine/nodeblock.py index f9090f2453..eba4f5617f 100644 --- a/CPAC/pipeline/engine/nodeblock.py +++ b/CPAC/pipeline/engine/nodeblock.py @@ -16,9 +16,22 @@ # License along with C-PAC. If not, see . """Class and decorator for NodeBlock functions.""" -from typing import Any, Callable, Optional +from typing import Any, Callable, Optional, TYPE_CHECKING + +import yaml +from nipype import config, logging # type: ignore [import-untyped] +from nipype.pipeline.engine import Workflow # type: ignore[import-untyped] + +from CPAC.utils.configuration.configuration import Configuration +from CPAC.utils.monitoring import ( + WFLOGGER, +) + +if TYPE_CHECKING: + from CPAC.pipeline.engine.resource import Resource, StratPool NODEBLOCK_INPUTS = list[str | list | tuple] +PIPELINE_BLOCKS = list["NodeBlockFunction | PIPELINE_BLOCKS"] class NodeBlockFunction: @@ -81,27 +94,17 @@ def __init__( ] ).rstrip() - # all node block functions have this signature - def __call__(self, wf, cfg, strat_pool, pipe_num, opt=None): - """ - - Parameters - ---------- - wf : ~nipype.pipeline.engine.workflows.Workflow - - cfg : ~CPAC.utils.configuration.Configuration - - strat_pool - - pipe_num : int - - opt : str, optional - - Returns - ------- - wf : ~nipype.pipeline.engine.workflows.Workflow + def __call__( + self, + wf: Workflow, + cfg: Configuration, + strat_pool: "StratPool", + pipe_num: Optional[int | str], + opt: Optional[str] = None, + ) -> tuple[Workflow, dict[str, "Resource"]]: + """Call a NodeBlockFunction. - out : dict + All node block functions have the same signature. """ return self.func(wf, cfg, strat_pool, pipe_num, opt) @@ -136,6 +139,150 @@ def __str__(self) -> str: return f"NodeBlockFunction({self.name})" +class NodeBlock: + """A worflow subgraph composed of :py:class:`NodeBlockFunction`s.""" + + def __init__( + self, + node_block_functions: NodeBlockFunction | PIPELINE_BLOCKS, + debug: bool = False, + ) -> None: + """Create a ``NodeBlock`` from a list of py:class:`~CPAC.pipeline.engine.nodeblock.NodeBlockFunction`s.""" + if not isinstance(node_block_functions, list): + node_block_functions = [node_block_functions] + + self.node_blocks: dict[str, Any] = {} + + for node_block_function in node_block_functions: # <---- sets up the NodeBlock object in case you gave it a list of node blocks instead of a single one - for option forking. + self.input_interface = [] + if isinstance(node_block_function, tuple): + self.input_interface = node_block_function[1] + node_block_function = node_block_function[0] # noqa: PLW2901 + if not isinstance(self.input_interface, list): + self.input_interface = [self.input_interface] + + if not isinstance(node_block_function, NodeBlockFunction): + # If the object is a plain function `__name__` will be more useful than `str()` + obj_str = ( + node_block_function.__name__ # type: ignore [attr-defined] + if hasattr(node_block_function, "__name__") + else str(node_block_function) + ) + msg = f'Object is not a nodeblock: "{obj_str}"' + raise TypeError(msg) + + name = node_block_function.name + self.name = name + self.node_blocks[name] = {} + + if self.input_interface: + for interface in self.input_interface: + for orig_input in node_block_function.inputs: + if isinstance(orig_input, tuple): + list_tup = list(orig_input) + if interface[0] in list_tup: + list_tup.remove(interface[0]) + list_tup.append(interface[1]) + node_block_function.inputs.remove(orig_input) + node_block_function.inputs.append(tuple(list_tup)) + elif orig_input == interface[0]: + node_block_function.inputs.remove(interface[0]) + node_block_function.inputs.append(interface[1]) + + for key, val in node_block_function.legacy_nodeblock_dict().items(): + self.node_blocks[name][key] = val + + self.node_blocks[name]["block_function"] = node_block_function + + # TODO: fix/replace below + self.outputs: dict[str, Optional[str]] = {} + for out in node_block_function.outputs: + self.outputs[out] = None + + self.options: list[str] | dict[str, Any] = ["base"] + if node_block_function.outputs is not None: + self.options = node_block_function.outputs + + WFLOGGER.info("Connecting %s...", name) + if debug: + config.update_config({"logging": {"workflow_level": "DEBUG"}}) + logging.update_logging(config) + WFLOGGER.debug( + '"inputs": %s\n\t "outputs": %s%s', + node_block_function.inputs, + list(self.outputs.keys()), + f'\n\t"options": {self.options}' + if self.options != ["base"] + else "", + ) + config.update_config({"logging": {"workflow_level": "INFO"}}) + logging.update_logging(config) + + def get_name(self): + return self.name + + def check_null(self, val): + if isinstance(val, str): + val = None if val.lower() == "none" else val + return val + + def check_output(self, outputs, label, name): + if label not in outputs: + msg = ( + f'\n[!] Output name "{label}" in the block ' + "function does not match the outputs list " + f'{outputs} in Node Block "{name}"\n' + ) + raise NameError(msg) + + def grab_tiered_dct(self, cfg, key_list): + cfg_dct = cfg.dict() + for key in key_list: + try: + cfg_dct = cfg_dct.get(key, {}) + except KeyError as ke: + msg = "[!] The config provided to the node block is not valid" + raise KeyError(msg) from ke + return cfg_dct + + @staticmethod + def list_blocks( + pipeline_blocks: PIPELINE_BLOCKS, indent: Optional[int] = None + ) -> str: + """List node blocks line by line. + + Parameters + ---------- + pipeline_blocks: list of + + indent: number of spaces after a tab indent + """ + blockstring = yaml.dump( + [ + getattr( + block, + "__name__", + getattr( + block, + "name", + yaml.safe_load(NodeBlock.list_blocks(list(block))) + if isinstance(block, (tuple, list, set)) + else str(block), + ), + ) + for block in pipeline_blocks + ] + ) + if isinstance(indent, int): + blockstring = "\n".join( + [ + "\t" + " " * indent + line.replace("- - ", "- ") + for line in blockstring.split("\n") + ] + ) + return blockstring + + def nodeblock( name: Optional[str] = None, config: Optional[list[str]] = None, diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 0a4088a294..8f1e4a7515 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -19,6 +19,7 @@ import ast from collections.abc import KeysView from copy import deepcopy +import hashlib from itertools import chain import json import os @@ -37,7 +38,7 @@ ) from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.check_outputs import ExpectedOutputs -from CPAC.pipeline.engine.nodeblock import NODEBLOCK_INPUTS +from CPAC.pipeline.engine.nodeblock import NodeBlock from CPAC.pipeline.utils import name_fork, source_set from CPAC.registration.registration import transform_derivative from CPAC.resources.templates.lookup_table import lookup_identifier @@ -59,7 +60,9 @@ from CPAC.utils.interfaces.function import Function from CPAC.utils.monitoring import ( getLogger, + LOGTAIL, UTLOGGER, + WARNING_FREESURFER_OFF_WITH_DATA, WFLOGGER, ) from CPAC.utils.outputs import Outputs @@ -426,7 +429,8 @@ def get_name(self) -> str: """Return stringified name.""" return str(self.name) - def check_rpool(self, resource): + def check_rpool(self, resource: list[str] | str) -> bool: + """Check if a resource is present in the _Pool.""" if not isinstance(resource, list): resource = [resource] for name in resource: @@ -821,7 +825,9 @@ def get_json(self, resource, strat=None): raise Exception(msg) return strat_json - def get_cpac_provenance(self, resource, strat=None): + def get_cpac_provenance( + self, resource: list[str] | str, strat: Optional[str | list | tuple] = None + ) -> list: # NOTE: strat_resource has to be entered properly by the developer # it has to either be rpool[resource][strat] or strat_pool[resource] if isinstance(resource, list): @@ -1144,6 +1150,12 @@ def node_data(self, resource: str | tuple[str], **kwargs) -> ResourceData: class ResourcePool(_Pool): """A pool of Resources.""" + from CPAC.pipeline.engine.nodeblock import ( + NODEBLOCK_INPUTS, + NodeBlockFunction, + PIPELINE_BLOCKS, + ) + def __init__( self, name: str = "", @@ -1705,7 +1717,7 @@ def get_data( assert isinstance(_resource, Resource) return _resource.data - def get_strats( + def get_strats( # noqa: PLR0912,PLR0915 self, resources: NODEBLOCK_INPUTS, debug: bool = False ) -> dict[str | tuple, "StratPool"]: """Get a dictionary of StratPools.""" @@ -1829,11 +1841,11 @@ def get_strats( for xlabel in linked: if drop or xlabel is None: break - xjson = deepcopy(json_dct[xlabel]) + xjson = json.loads(json.dumps(json_dct[xlabel])) for ylabel in linked: if xlabel == ylabel or ylabel is None: continue - yjson = deepcopy(json_dct[ylabel]) + yjson = json.loads(json.dumps(json_dct[ylabel])) if "CpacVariant" not in xjson: xjson["CpacVariant"] = {} @@ -2764,6 +2776,366 @@ def ingress_raw_anat_data(self) -> None: if self.cfg.surface_analysis["freesurfer"]["ingress_reconall"]: # type: ignore[attr-defined] self.ingress_freesurfer() + def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # noqa: PLR0912,PLR0915 + """Connect a NodeBlock via the ResourcePool.""" + from CPAC.pipeline.engine.nodeblock import NODEBLOCK_INPUTS + + debug = bool(self.cfg.pipeline_setup["Debugging"]["verbose"]) # type: ignore [attr-defined] + all_opts: list[str] = [] + + sidecar_additions = { + "CpacConfigHash": hashlib.sha1( + json.dumps(self.cfg.dict(), sort_keys=True).encode("utf-8") + ).hexdigest(), + "CpacConfig": self.cfg.dict(), + } + + if self.cfg["pipeline_setup"]["output_directory"].get("user_defined"): + sidecar_additions["UserDefined"] = self.cfg["pipeline_setup"][ + "output_directory" + ]["user_defined"] + + for name, block_dct in block.node_blocks.items(): + # iterates over either the single node block in the sequence, or a list of node blocks within the list of node blocks, i.e. for option forking. + switch = block.check_null(block_dct["switch"]) + config = block.check_null(block_dct["config"]) + option_key = block.check_null(block_dct["option_key"]) + option_val = block.check_null(block_dct["option_val"]) + inputs: NODEBLOCK_INPUTS = block.check_null(block_dct["inputs"]) + outputs = block.check_null(block_dct["outputs"]) + + block_function = block_dct["block_function"] + + opts = [] + if option_key and option_val: + if not isinstance(option_key, list): + option_key = [option_key] + if not isinstance(option_val, list): + option_val = [option_val] + if config: + key_list = config + option_key + else: + key_list = option_key + if "USER-DEFINED" in option_val: + # load custom config data into each 'opt' + opts = block.grab_tiered_dct(self.cfg, key_list) + else: + for option in option_val: + try: + if option in block.grab_tiered_dct(self.cfg, key_list): + # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list + opts.append(option) + except AttributeError as err: + msg = f"{err}\nNode Block: {name}" + raise Exception(msg) + + if opts is None: + opts = [opts] + + elif option_key and not option_val: + # enables multiple config forking entries + if not isinstance(option_key[0], list): + msg = ( + f"[!] The option_key field ({option_key}) " + f"for {name} exists but there is no " + "option_val.\n\nIf you are trying to " + "populate multiple option keys, the " + "option_val field must contain a list of " + "a list.\n" + ) + raise ValueError(msg) + for option_config in option_key: + # option_config is a list of pipe config levels down to the option + if config: + key_list = config + option_config + else: + key_list = option_config + option_val = option_config[-1] + if option_val in block.grab_tiered_dct(self.cfg, key_list[:-1]): + opts.append(option_val) + else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! + opts = [None] + # THIS ALSO MEANS the multiple option-val's in docstring node blocks can be entered once in the entire node-block sequence, not in a list of multiples + if not opts: + # for node blocks where the options are split into different + # block functions - opts will be empty for non-selected + # options, and would waste the get_strats effort below + continue + all_opts += opts + + if not switch: + switch = [True] + else: + if config: + try: + key_list = config + switch + except TypeError as te: + msg = ( + "\n\n[!] Developer info: Docstring error " + f"for {name}, make sure the 'config' or " + "'switch' fields are lists.\n\n" + ) + raise TypeError(msg) from te + switch = block.grab_tiered_dct(self.cfg, key_list) + elif isinstance(switch[0], list): + # we have multiple switches, which is designed to only work if + # config is set to "None" + switch_list = [] + for key_list in switch: + val = block.grab_tiered_dct(self.cfg, key_list) + if isinstance(val, list): + # fork switches + if True in val: + switch_list.append(True) + if False in val: + switch_list.append(False) + else: + switch_list.append(val) + if False in switch_list: + switch = [False] + else: + switch = [True] + else: + # if config is set to "None" + key_list = switch + switch = block.grab_tiered_dct(self.cfg, key_list) + if not isinstance(switch, list): + switch = [switch] + if True in switch: + for ( + pipe_idx, + strat_pool, # strat_pool is a ResourcePool like {'desc-preproc_T1w': { 'json': info, 'data': (node, out) }, 'desc-brain_mask': etc.} + ) in self.get_strats(inputs, debug).items(): + # keep in mind rpool.get_strats(inputs) = {pipe_idx1: {'desc-preproc_T1w': etc.}, pipe_idx2: {..} } + fork = False in switch + for opt in opts: # it's a dictionary of ResourcePools called strat_pools, except those sub-ResourcePools only have one level! no pipe_idx strat keys. + # remember, you can get 'data' or 'json' from strat_pool with member functions + # strat_pool has all of the JSON information of all the inputs! + # so when we set_data below for the TOP-LEVEL MAIN RPOOL (not the strat_pool), we can generate new merged JSON information for each output. + # particularly, our custom 'CpacProvenance' field. + node_name = name + pipe_x = self.get_pipe_number(pipe_idx) + + replaced_inputs = [] + for interface in block.input_interface: + if isinstance(interface[1], list): + for input_name in interface[1]: + if strat_pool.check_rpool(input_name): + break + else: + input_name = interface[1] + strat_pool.copy_resource(input_name, interface[0]) + replaced_inputs.append(interface[0]) + try: + wf, outs = block_function( + wf, self.cfg, strat_pool, pipe_x, opt + ) + except IOError as e: # duplicate node + WFLOGGER.warning(e) + continue + + if not outs: + if block_function.__name__ == "freesurfer_postproc": + WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) + LOGTAIL["warnings"].append( + WARNING_FREESURFER_OFF_WITH_DATA + ) + continue + + if opt and len(option_val) > 1: + node_name = f"{node_name}_{opt}" + elif opt and "USER-DEFINED" in option_val: + node_name = f'{node_name}_{opt["Name"]}' + + if debug: + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug("\n=======================") + verbose_logger.debug("Node name: %s", node_name) + prov_dct = self.get_resource_strats_from_prov( + ast.literal_eval(str(pipe_idx)) + ) + for key, val in prov_dct.items(): + verbose_logger.debug("-------------------") + verbose_logger.debug("Input - %s:", key) + sub_prov_dct = self.get_resource_strats_from_prov(val) + for sub_key, sub_val in sub_prov_dct.items(): + sub_sub_dct = self.get_resource_strats_from_prov( + sub_val + ) + verbose_logger.debug(" sub-input - %s:", sub_key) + verbose_logger.debug(" prov = %s", sub_val) + verbose_logger.debug( + " sub_sub_inputs = %s", sub_sub_dct.keys() + ) + + for label, connection in outs.items(): + block.check_output(outputs, label, name) + new_json_info = strat_pool.json + + # transfer over data-specific json info + # for example, if the input data json is _bold and the output is also _bold + data_type = label.split("_")[-1] + if data_type in new_json_info["subjson"]: + if ( + "SkullStripped" + in new_json_info["subjson"][data_type] + ): + new_json_info["SkullStripped"] = new_json_info[ + "subjson" + ][data_type]["SkullStripped"] + + # determine sources for the outputs, i.e. all input data into the node block + new_json_info["Sources"] = [ + x + for x in strat_pool.get_entire_rpool() + if x != "json" and x not in replaced_inputs + ] + + if isinstance(outputs, dict): + new_json_info.update(outputs[label]) + if "Description" not in outputs[label]: + # don't propagate old Description + try: + del new_json_info["Description"] + except KeyError: + pass + if "Template" in outputs[label]: + template_key = outputs[label]["Template"] + if template_key in new_json_info["Sources"]: + # only if the pipeline config template key is entered as the 'Template' field + # otherwise, skip this and take in the literal 'Template' string + try: + new_json_info["Template"] = new_json_info[ + "subjson" + ][template_key]["Description"] + except KeyError: + pass + try: + new_json_info["Resolution"] = new_json_info[ + "subjson" + ][template_key]["Resolution"] + except KeyError: + pass + else: + # don't propagate old Description + try: + del new_json_info["Description"] + except KeyError: + pass + + if "Description" in new_json_info: + new_json_info["Description"] = " ".join( + new_json_info["Description"].split() + ) + + for sidecar_key, sidecar_value in sidecar_additions.items(): + if sidecar_key not in new_json_info: + new_json_info[sidecar_key] = sidecar_value + + try: + del new_json_info["subjson"] + except KeyError: + pass + + if fork or len(opts) > 1 or len(all_opts) > 1: + if "CpacVariant" not in new_json_info: + new_json_info["CpacVariant"] = {} + raw_label = self.get_raw_label(label) + if raw_label not in new_json_info["CpacVariant"]: + new_json_info["CpacVariant"][raw_label] = [] + new_json_info["CpacVariant"][raw_label].append( + node_name + ) + + self.set_data( + label, + connection[0], + connection[1], + new_json_info, + pipe_idx, + node_name, + fork, + ) + + wf, post_labels = self.post_process( + wf, + label, + connection, + new_json_info, + pipe_idx, + pipe_x, + outs, + ) + + if self.func_reg: + for postlabel in post_labels: + connection = (postlabel[1], postlabel[2]) # noqa: PLW2901 + wf = self.derivative_xfm( + wf, + postlabel[0], + connection, + new_json_info, + pipe_idx, + pipe_x, + ) + return wf + + def connect_pipeline( + self, + wf: pe.Workflow, + cfg: Configuration, + pipeline_blocks: PIPELINE_BLOCKS, + ) -> pe.Workflow: + """Connect the pipeline blocks to the workflow.""" + from CPAC.pipeline.engine.nodeblock import NodeBlockFunction, PIPELINE_BLOCKS + + WFLOGGER.info( + "Connecting pipeline blocks:\n%s", + NodeBlock.list_blocks(pipeline_blocks, indent=1), + ) + previous_nb: Optional[NodeBlockFunction | PIPELINE_BLOCKS] = None + for block in pipeline_blocks: + try: + wf = self.connect_block( + wf, + NodeBlock( + block, debug=cfg["pipeline_setup", "Debugging", "verbose"] + ), + ) + except LookupError as e: + if getattr(block, "name", "") == "freesurfer_postproc": + WFLOGGER.warning(WARNING_FREESURFER_OFF_WITH_DATA) + LOGTAIL["warnings"].append(WARNING_FREESURFER_OFF_WITH_DATA) + continue + previous_nb_str = ( + (f"after node block '{previous_nb.name}':") + if isinstance(previous_nb, NodeBlockFunction) + else "at beginning:" + ) + # Alert user to block that raises error + if isinstance(block, list): + node_block_names = str([NodeBlock(b).get_name() for b in block]) + e.args = ( + f"When trying to connect one of the node blocks " + f"{node_block_names} " + f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", + ) + else: + node_block_names = NodeBlock(block).get_name() + e.args = ( + f"When trying to connect node block " + f"'{node_block_names}' " + f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", + ) + if cfg.pipeline_setup["Debugging"]["verbose"]: # type: ignore [attr-defined] + verbose_logger = getLogger("CPAC.engine") + verbose_logger.debug(e.args[0]) + verbose_logger.debug(self) + raise + previous_nb = block + + return wf + def _get_unlabelled(self, resource: str) -> set[str]: """Get unlabelled resources (that need integer suffixes to differentiate).""" from CPAC.func_preproc.func_motion import motion_estimate_filter diff --git a/CPAC/pipeline/test/test_engine.py b/CPAC/pipeline/test/test_engine.py index c3e27b3500..4bbdd07f57 100644 --- a/CPAC/pipeline/test/test_engine.py +++ b/CPAC/pipeline/test/test_engine.py @@ -23,7 +23,6 @@ from CPAC.pipeline.cpac_pipeline import ( build_anat_preproc_stack, build_workflow, - connect_pipeline, ) from CPAC.pipeline.engine import ResourcePool from CPAC.utils.bids_utils import create_cpac_data_config @@ -89,7 +88,7 @@ def test_build_anat_preproc_stack( rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) pipeline_blocks = build_anat_preproc_stack(rpool, cfg) - wf = connect_pipeline(rpool.wf, cfg, rpool, pipeline_blocks) + wf = rpool.connect_pipeline(rpool.wf, cfg, pipeline_blocks) rpool.gather_pipes(wf, cfg) From a9a3c489cdea5b1078a78cf1fc5da204c12bd55e Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 13:41:27 -0400 Subject: [PATCH 52/93] :coffin: Remove `_Pool.node_data` method --- CPAC/distortion_correction/distortion_correction.py | 2 +- CPAC/func_preproc/func_motion.py | 2 +- CPAC/pipeline/engine/resource.py | 6 +----- CPAC/qc/xcp.py | 12 ++++++------ CPAC/registration/registration.py | 6 +++--- 5 files changed, 12 insertions(+), 16 deletions(-) diff --git a/CPAC/distortion_correction/distortion_correction.py b/CPAC/distortion_correction/distortion_correction.py index 0b0454e7d3..7b714aaace 100644 --- a/CPAC/distortion_correction/distortion_correction.py +++ b/CPAC/distortion_correction/distortion_correction.py @@ -764,7 +764,7 @@ def distcor_blip_fsl_topup(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(run_topup, "out_jacs", vnum_base, "jac_matrix_list") wf.connect(run_topup, "out_warps", vnum_base, "warp_field_list") - mean_bold = strat_pool.node_data("sbref") + mean_bold = strat_pool.get_data("sbref") flirt = pe.Node(interface=fsl.FLIRT(), name="flirt") flirt.inputs.dof = 6 diff --git a/CPAC/func_preproc/func_motion.py b/CPAC/func_preproc/func_motion.py index 7077a5fc31..dfec8ab91c 100644 --- a/CPAC/func_preproc/func_motion.py +++ b/CPAC/func_preproc/func_motion.py @@ -830,7 +830,7 @@ def motion_estimate_filter(wf, cfg, strat_pool, pipe_num, opt=None): notch.inputs.lowpass_cutoff = opt.get("lowpass_cutoff") notch.inputs.filter_order = opt.get("filter_order") - movement_parameters = strat_pool.node_data("desc-movementParameters_motion") + movement_parameters = strat_pool.get_data("desc-movementParameters_motion") wf.connect( movement_parameters.node, movement_parameters.out, notch, "motion_params" ) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 8f1e4a7515..b3202cd1e9 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -1140,11 +1140,7 @@ def post_process(self, wf, label, connection, json_info, pipe_idx, pipe_x, outs) fork=True, ) - return (wf, post_labels) - - def node_data(self, resource: str | tuple[str], **kwargs) -> ResourceData: - """Create ResourceData objects.""" - return ResourceData(*self.get_data(resource, **kwargs)) # type: ignore[attr-defined] + return wf, post_labels class ResourcePool(_Pool): diff --git a/CPAC/qc/xcp.py b/CPAC/qc/xcp.py index 43ddc03381..7fd59071bf 100644 --- a/CPAC/qc/xcp.py +++ b/CPAC/qc/xcp.py @@ -111,7 +111,7 @@ def _connect_motion(wf, nodes, strat_pool, qc_file, pipe_num): """ # pylint: disable=invalid-name, too-many-arguments try: - nodes = {**nodes, "censor-indices": strat_pool.node_data("censor-indices")} + nodes = {**nodes, "censor-indices": strat_pool.get_data("censor-indices")} wf.connect( nodes["censor-indices"].node, nodes["censor-indices"].out, @@ -501,7 +501,7 @@ def qc_xcp(wf, cfg, strat_pool, pipe_num, opt=None): ) qc_file.inputs.desc = "preproc" qc_file.inputs.regressors = ( - strat_pool.node_data("regressors") + strat_pool.get_data("regressors") .node.name.split("regressors_")[-1][::-1] .split("_", 1)[-1][::-1] ) @@ -511,7 +511,7 @@ def qc_xcp(wf, cfg, strat_pool, pipe_num, opt=None): op_string="-bin ", ) nodes = { - key: strat_pool.node_data(key) + key: strat_pool.get_data(key) for key in [ "bold", "desc-preproc_bold", @@ -526,13 +526,13 @@ def qc_xcp(wf, cfg, strat_pool, pipe_num, opt=None): ] if strat_pool.check_rpool(key) } - nodes["bold2template_mask"] = strat_pool.node_data( + nodes["bold2template_mask"] = strat_pool.get_data( ["space-template_desc-bold_mask", "space-EPItemplate_desc-bold_mask"] ) - nodes["template_mask"] = strat_pool.node_data( + nodes["template_mask"] = strat_pool.get_data( ["T1w-brain-template-mask", "EPI-template-mask"] ) - nodes["template"] = strat_pool.node_data( + nodes["template"] = strat_pool.get_data( ["T1w-brain-template-funcreg", "EPI-brain-template-funcreg"] ) resample_bold_mask_to_template = pe.Node( diff --git a/CPAC/registration/registration.py b/CPAC/registration/registration.py index af2112f77f..9db3fcfd4b 100644 --- a/CPAC/registration/registration.py +++ b/CPAC/registration/registration.py @@ -2616,7 +2616,7 @@ def register_ANTs_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None): node, out = connect wf.connect(node, out, ants_rc, "inputspec.input_brain") - t1w_brain_template = strat_pool.node_data("T1w-brain-template") + t1w_brain_template = strat_pool.get_data("T1w-brain-template") wf.connect( t1w_brain_template.node, t1w_brain_template.out, @@ -2635,10 +2635,10 @@ def register_ANTs_anat_to_template(wf, cfg, strat_pool, pipe_num, opt=None): ) wf.connect(node, out, ants_rc, "inputspec.input_head") - t1w_template = strat_pool.node_data("T1w-template") + t1w_template = strat_pool.get_data("T1w-template") wf.connect(t1w_template.node, t1w_template.out, ants_rc, "inputspec.reference_head") - brain_mask = strat_pool.node_data( + brain_mask = strat_pool.get_data( [ "space-T1w_desc-brain_mask", "space-longitudinal_desc-brain_mask", From 3613f8cdb9799222185aabb97e2f4f796ad8bd4d Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 14:16:16 -0400 Subject: [PATCH 53/93] :construction_worker: Livelog pytest --- .circleci/main.yml | 2 +- dev/circleci_data/test_in_image.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/main.yml b/.circleci/main.yml index c1cb4bc391..f071ea00ee 100644 --- a/.circleci/main.yml +++ b/.circleci/main.yml @@ -172,7 +172,7 @@ commands: name: Testing Singularity installation command: | pip install -r dev/circleci_data/requirements.txt - coverage run -m pytest --junitxml=test-results/junit.xml --continue-on-collection-errors dev/circleci_data/test_install.py + coverage run -m pytest --capture=no --junitxml=test-results/junit.xml --continue-on-collection-errors dev/circleci_data/test_install.py jobs: combine-coverage: diff --git a/dev/circleci_data/test_in_image.sh b/dev/circleci_data/test_in_image.sh index b62de84994..9420d7c1ab 100755 --- a/dev/circleci_data/test_in_image.sh +++ b/dev/circleci_data/test_in_image.sh @@ -4,7 +4,7 @@ export PATH=$PATH:/home/$(whoami)/.local/bin pip install -r /code/dev/circleci_data/requirements.txt # run test with coverage as module -python -m coverage run --include */CPAC/*,*/run.py,*/dev/docker_data/* -m pytest --ignore-glob=*test_install.py --junitxml=test-results/junit.xml --doctest-modules dev/circleci_data /code/CPAC +python -m coverage run --include */CPAC/*,*/run.py,*/dev/docker_data/* -m pytest --ignore-glob=*test_install.py --capture=no --junitxml=test-results/junit.xml --doctest-modules dev/circleci_data /code/CPAC echo "$?" > test-results/exitcode From 4bf5f00af8a57af92f29e1b54b55f61d9d29d2d1 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 14:20:36 -0400 Subject: [PATCH 54/93] :recycle: Move `post_process` method back into `ResourcePool` --- CPAC/pipeline/engine/nodeblock.py | 4 +- CPAC/pipeline/engine/resource.py | 304 ++++++++++++++++-------------- 2 files changed, 160 insertions(+), 148 deletions(-) diff --git a/CPAC/pipeline/engine/nodeblock.py b/CPAC/pipeline/engine/nodeblock.py index eba4f5617f..4326cc3c20 100644 --- a/CPAC/pipeline/engine/nodeblock.py +++ b/CPAC/pipeline/engine/nodeblock.py @@ -28,7 +28,7 @@ ) if TYPE_CHECKING: - from CPAC.pipeline.engine.resource import Resource, StratPool + from CPAC.pipeline.engine.resource import ResourceData, StratPool NODEBLOCK_INPUTS = list[str | list | tuple] PIPELINE_BLOCKS = list["NodeBlockFunction | PIPELINE_BLOCKS"] @@ -101,7 +101,7 @@ def __call__( strat_pool: "StratPool", pipe_num: Optional[int | str], opt: Optional[str] = None, - ) -> tuple[Workflow, dict[str, "Resource"]]: + ) -> tuple[Workflow, dict[str, "ResourceData"]]: """Call a NodeBlockFunction. All node block functions have the same signature. diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index b3202cd1e9..5b30459bcf 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -38,7 +38,7 @@ ) from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.check_outputs import ExpectedOutputs -from CPAC.pipeline.engine.nodeblock import NodeBlock +from CPAC.pipeline.engine.nodeblock import NodeBlock, NodeBlockFunction from CPAC.pipeline.utils import name_fork, source_set from CPAC.registration.registration import transform_derivative from CPAC.resources.templates.lookup_table import lookup_identifier @@ -999,149 +999,6 @@ def filter_name(self, cfg: Configuration) -> str: return sidecar["CpacVariant"][key][0][::-1].split("_", 1)[0][::-1] return "none" - def post_process(self, wf, label, connection, json_info, pipe_idx, pipe_x, outs): - input_type = "func_derivative" - - post_labels = [(label, connection[0], connection[1])] - - if re.match(r"(.*_)?[ed]c[bw]$", label) or re.match(r"(.*_)?lfcd[bw]$", label): - # suffix: [eigenvector or degree] centrality [binarized or weighted] - # or lfcd [binarized or weighted] - mask = "template-specification-file" - elif "space-template" in label: - if "space-template_res-derivative_desc-bold_mask" in self.keys(): - mask = "space-template_res-derivative_desc-bold_mask" - else: - mask = "space-template_desc-bold_mask" - else: - mask = "space-bold_desc-brain_mask" - - mask_idx = None - for entry in json_info["CpacProvenance"]: - if isinstance(entry, list): - if entry[-1].split(":")[0] == mask: - mask_prov = entry - mask_idx = self.generate_prov_string(mask_prov)[1] - break - - if self.smoothing_bool: - if label in Outputs.to_smooth: - for smooth_opt in self.smooth_opts: - sm = spatial_smoothing( - f"{label}_smooth_{smooth_opt}_{pipe_x}", - self.fwhm, - input_type, - smooth_opt, - ) - wf.connect(connection[0], connection[1], sm, "inputspec.in_file") - node, out = self.get_data( - mask, pipe_idx=mask_idx, quick_single=mask_idx is None - ) - wf.connect(node, out, sm, "inputspec.mask") - - if "desc-" not in label: - if "space-" in label: - for tag in label.split("_"): - if "space-" in tag: - smlabel = label.replace(tag, f"{tag}_desc-sm") - break - else: - smlabel = f"desc-sm_{label}" - else: - for tag in label.split("_"): - if "desc-" in tag: - newtag = f"{tag}-sm" - smlabel = label.replace(tag, newtag) - break - - post_labels.append((smlabel, sm, "outputspec.out_file")) - - self.set_data( - smlabel, - sm, - "outputspec.out_file", - json_info, - pipe_idx, - f"spatial_smoothing_{smooth_opt}", - fork=True, - ) - self.set_data( - "fwhm", - sm, - "outputspec.fwhm", - json_info, - pipe_idx, - f"spatial_smoothing_{smooth_opt}", - fork=True, - ) - - if self.zscoring_bool: - for label_con_tpl in post_labels: - label = label_con_tpl[0] - connection = (label_con_tpl[1], label_con_tpl[2]) - if label in Outputs.to_zstd: - zstd = z_score_standardize(f"{label}_zstd_{pipe_x}", input_type) - - wf.connect(connection[0], connection[1], zstd, "inputspec.in_file") - - node, out = self.get_data(mask, pipe_idx=mask_idx) - wf.connect(node, out, zstd, "inputspec.mask") - - if "desc-" not in label: - if "space-template" in label: - new_label = label.replace( - "space-template", "space-template_desc-zstd" - ) - else: - new_label = f"desc-zstd_{label}" - else: - for tag in label.split("_"): - if "desc-" in tag: - newtag = f"{tag}-zstd" - new_label = label.replace(tag, newtag) - break - - post_labels.append((new_label, zstd, "outputspec.out_file")) - - self.set_data( - new_label, - zstd, - "outputspec.out_file", - json_info, - pipe_idx, - "zscore_standardize", - fork=True, - ) - - elif label in Outputs.to_fisherz: - zstd = fisher_z_score_standardize( - f"{label}_zstd_{pipe_x}", label, input_type - ) - - wf.connect( - connection[0], connection[1], zstd, "inputspec.correlation_file" - ) - - # if the output is 'space-template_desc-MeanSCA_correlations', we want 'desc-MeanSCA_timeseries' - oned = label.replace("correlations", "timeseries") - - node, out = outs[oned] - wf.connect(node, out, zstd, "inputspec.timeseries_oned") - - post_labels.append((new_label, zstd, "outputspec.out_file")) - - self.set_data( - new_label, - zstd, - "outputspec.out_file", - json_info, - pipe_idx, - "fisher_zscore_standardize", - fork=True, - ) - - return wf, post_labels - class ResourcePool(_Pool): """A pool of Resources.""" @@ -2800,7 +2657,7 @@ def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # no inputs: NODEBLOCK_INPUTS = block.check_null(block_dct["inputs"]) outputs = block.check_null(block_dct["outputs"]) - block_function = block_dct["block_function"] + block_function: NodeBlockFunction = block_dct["block_function"] opts = [] if option_key and option_val: @@ -3065,7 +2922,9 @@ def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # no if self.func_reg: for postlabel in post_labels: - connection = (postlabel[1], postlabel[2]) # noqa: PLW2901 + connection = ResourceData( # noqa: PLW2901 + postlabel[1], postlabel[2] + ) wf = self.derivative_xfm( wf, postlabel[0], @@ -3132,6 +2991,159 @@ def connect_pipeline( return wf + def post_process( + self, + wf: pe.Workflow, + label: str, + connection: ResourceData | tuple[pe.Node, str], + json_info: dict, + pipe_idx: str | tuple, + pipe_x: int, + outs: dict[str, ResourceData], + ) -> tuple[pe.Workflow, list[tuple[str, pe.Node | pe.Workflow, str]]]: + """Connect smoothing and z-scoring, if configured.""" + input_type = "func_derivative" + + post_labels = [(label, connection[0], connection[1])] + + if re.match(r"(.*_)?[ed]c[bw]$", label) or re.match(r"(.*_)?lfcd[bw]$", label): + # suffix: [eigenvector or degree] centrality [binarized or weighted] + # or lfcd [binarized or weighted] + mask = "template-specification-file" + elif "space-template" in label: + if "space-template_res-derivative_desc-bold_mask" in self.keys(): + mask = "space-template_res-derivative_desc-bold_mask" + else: + mask = "space-template_desc-bold_mask" + else: + mask = "space-bold_desc-brain_mask" + + mask_idx = None + for entry in json_info["CpacProvenance"]: + if isinstance(entry, list): + if entry[-1].split(":")[0] == mask: + mask_prov = entry + mask_idx = self.generate_prov_string(mask_prov)[1] + break + + if self.smoothing_bool: + if label in Outputs.to_smooth: + for smooth_opt in self.smooth_opts: + sm = spatial_smoothing( + f"{label}_smooth_{smooth_opt}_{pipe_x}", + self.fwhm, + input_type, + smooth_opt, + ) + wf.connect(connection[0], connection[1], sm, "inputspec.in_file") + node, out = self.get_data( + mask, pipe_idx=mask_idx, quick_single=mask_idx is None + ) + wf.connect(node, out, sm, "inputspec.mask") + + if "desc-" not in label: + if "space-" in label: + for tag in label.split("_"): + if "space-" in tag: + smlabel = label.replace(tag, f"{tag}_desc-sm") + break + else: + smlabel = f"desc-sm_{label}" + else: + for tag in label.split("_"): + if "desc-" in tag: + newtag = f"{tag}-sm" + smlabel = label.replace(tag, newtag) + break + + post_labels.append((smlabel, sm, "outputspec.out_file")) + + self.set_data( + smlabel, + sm, + "outputspec.out_file", + json_info, + pipe_idx, + f"spatial_smoothing_{smooth_opt}", + fork=True, + ) + self.set_data( + "fwhm", + sm, + "outputspec.fwhm", + json_info, + pipe_idx, + f"spatial_smoothing_{smooth_opt}", + fork=True, + ) + + if self.zscoring_bool: + for label_con_tpl in post_labels: + label = label_con_tpl[0] + connection = (label_con_tpl[1], label_con_tpl[2]) + if label in Outputs.to_zstd: + zstd = z_score_standardize(f"{label}_zstd_{pipe_x}", input_type) + + wf.connect(connection[0], connection[1], zstd, "inputspec.in_file") + + node, out = self.get_data(mask, pipe_idx=mask_idx) + wf.connect(node, out, zstd, "inputspec.mask") + + if "desc-" not in label: + if "space-template" in label: + new_label = label.replace( + "space-template", "space-template_desc-zstd" + ) + else: + new_label = f"desc-zstd_{label}" + else: + for tag in label.split("_"): + if "desc-" in tag: + newtag = f"{tag}-zstd" + new_label = label.replace(tag, newtag) + break + + post_labels.append((new_label, zstd, "outputspec.out_file")) + + self.set_data( + new_label, + zstd, + "outputspec.out_file", + json_info, + pipe_idx, + "zscore_standardize", + fork=True, + ) + + elif label in Outputs.to_fisherz: + zstd = fisher_z_score_standardize( + f"{label}_zstd_{pipe_x}", label, input_type + ) + + wf.connect( + connection[0], connection[1], zstd, "inputspec.correlation_file" + ) + + # if the output is 'space-template_desc-MeanSCA_correlations', we want 'desc-MeanSCA_timeseries' + oned = label.replace("correlations", "timeseries") + + node, out = outs[oned] + wf.connect(node, out, zstd, "inputspec.timeseries_oned") + + post_labels.append((new_label, zstd, "outputspec.out_file")) + + self.set_data( + new_label, + zstd, + "outputspec.out_file", + json_info, + pipe_idx, + "fisher_zscore_standardize", + fork=True, + ) + + return wf, post_labels + def _get_unlabelled(self, resource: str) -> set[str]: """Get unlabelled resources (that need integer suffixes to differentiate).""" from CPAC.func_preproc.func_motion import motion_estimate_filter From b0b94c955a296daafba9c2463a824c495d51e6ce Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 14:23:36 -0400 Subject: [PATCH 55/93] :recycle: Move `filter_name` method into `StratPool` --- CPAC/pipeline/engine/resource.py | 64 ++++++++++++++++---------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 5b30459bcf..d6fbcca824 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -967,38 +967,6 @@ def filtered_movement(self) -> bool: # not a strat_pool or no movement parameters in strat_pool return False - def filter_name(self, cfg: Configuration) -> str: - """ - Return the name of the filter for this strategy. - - In a strat_pool with filtered movement parameters. - """ - motion_filters = cfg[ - "functional_preproc", - "motion_estimates_and_correction", - "motion_estimate_filter", - "filters", - ] - if len(motion_filters) == 1 and cfg.switch_is_on( - [ - "functional_preproc", - "motion_estimates_and_correction", - "motion_estimate_filter", - "run", - ], - exclusive=True, - ): - return motion_filters[0]["Name"] - try: - key = "motion" - sidecar = self.get_json("desc-movementParameters_motion") - except KeyError: - sidecar = None - if sidecar is not None and "CpacVariant" in sidecar: - if sidecar["CpacVariant"][key]: - return sidecar["CpacVariant"][key][0][::-1].split("_", 1)[0][::-1] - return "none" - class ResourcePool(_Pool): """A pool of Resources.""" @@ -3310,6 +3278,38 @@ def get_data(self, resource, report_fetched=False): doc="""Return a deep copy of strategy-specific JSON.""", ) + def filter_name(self, cfg: Configuration) -> str: + """ + Return the name of the filter for this strategy. + + In a strat_pool with filtered movement parameters. + """ + motion_filters = cfg[ + "functional_preproc", + "motion_estimates_and_correction", + "motion_estimate_filter", + "filters", + ] + if len(motion_filters) == 1 and cfg.switch_is_on( + [ + "functional_preproc", + "motion_estimates_and_correction", + "motion_estimate_filter", + "run", + ], + exclusive=True, + ): + return motion_filters[0]["Name"] + try: + key = "motion" + sidecar = self.get_json("desc-movementParameters_motion") + except KeyError: + sidecar = None + if sidecar is not None and "CpacVariant" in sidecar: + if sidecar["CpacVariant"][key]: + return sidecar["CpacVariant"][key][0][::-1].split("_", 1)[0][::-1] + return "none" + def preserve_json_info(self, resource: str, strat_resource: Resource) -> None: """Preserve JSON info when updating a StratPool.""" data_type = resource.split("_")[-1] From 29b481d81fec07d5e0f9b7e1a7d4f4db825bae0f Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 14:25:12 -0400 Subject: [PATCH 56/93] :recycle: Move `filtered_movement` property into `StratPool` --- CPAC/pipeline/engine/resource.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index d6fbcca824..2094932b26 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -950,23 +950,6 @@ def derivative_xfm(self, wf, label, connection, json_info, pipe_idx, pipe_x): return wf - @property - def filtered_movement(self) -> bool: - """ - Check if the movement parameters have been filtered in this strat_pool. - - Returns - ------- - bool - """ - try: - return "motion_estimate_filter" in str( - self.get_cpac_provenance("desc-movementParameters_motion") - ) - except KeyError: - # not a strat_pool or no movement parameters in strat_pool - return False - class ResourcePool(_Pool): """A pool of Resources.""" @@ -3316,3 +3299,14 @@ def preserve_json_info(self, resource: str, strat_resource: Resource) -> None: if data_type not in self._json["subjson"]: self._json["subjson"][data_type] = {} self._json["subjson"][data_type].update(strat_resource.json) + + @property + def filtered_movement(self) -> bool: + """Check if the movement parameters have been filtered in this StratPool.""" + try: + return "motion_estimate_filter" in str( + self.get_cpac_provenance("desc-movementParameters_motion") + ) + except KeyError: + # not a strat_pool or no movement parameters in strat_pool + return False From 48e8b90e287db9128589987c049170553f3599b4 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 14:43:34 -0400 Subject: [PATCH 57/93] :recycle: Move `derivative_xfm` back into `ResourcePool` --- CPAC/pipeline/engine/resource.py | 151 +++++++++++++++++-------------- 1 file changed, 83 insertions(+), 68 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 2094932b26..767ceee82b 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -263,6 +263,11 @@ def set_json(self, value=dict) -> None: json = property(get_json, set_json, doc=get_json.__doc__) + @property + def cpac_provenance(self) -> list: + """Get CpacProvenance of a Resource.""" + return self.json["CpacProvenance"] + class _Pool: """All Resources.""" @@ -883,73 +888,6 @@ def flatten_prov(self, prov): return flat_prov return None - def derivative_xfm(self, wf, label, connection, json_info, pipe_idx, pipe_x): - if label in self.xfm: - json_info = dict(json_info) - - # get the bold-to-template transform from the current strat_pool info - xfm_idx = None - xfm_label = "from-bold_to-template_mode-image_xfm" - for entry in json_info["CpacProvenance"]: - if isinstance(entry, list): - if entry[-1].split(":")[0] == xfm_label: - xfm_prov = entry - xfm_idx = self.generate_prov_string(xfm_prov)[1] - break - - # but if the resource doesn't have the bold-to-template transform - # in its provenance/strategy, find the appropriate one for this - # current pipe_idx/strat - if not xfm_idx: - xfm_info = [] - for pipe_idx, entry in self.get(xfm_label).items(): - xfm_info.append((pipe_idx, entry["json"]["CpacProvenance"])) - else: - xfm_info = [(xfm_idx, xfm_prov)] - - for num, xfm_entry in enumerate(xfm_info): - xfm_idx, xfm_prov = xfm_entry - reg_tool = check_prov_for_regtool(xfm_prov) - - xfm = transform_derivative( - f"{label}_xfm_{pipe_x}_{num}", - label, - reg_tool, - self.num_cpus, - self.num_ants_cores, - ants_interp=self.ants_interp, - fsl_interp=self.fsl_interp, - opt=None, - ) - wf.connect(connection[0], connection[1], xfm, "inputspec.in_file") - - node, out = self.get_data("T1w-brain-template-deriv", quick_single=True) - wf.connect(node, out, xfm, "inputspec.reference") - - node, out = self.get_data( - "from-bold_to-template_mode-image_xfm", pipe_idx=xfm_idx - ) - wf.connect(node, out, xfm, "inputspec.transform") - - label = f"space-template_{label}" - json_info["Template"] = self.get_json_info( - "T1w-brain-template-deriv", None, "Description" - ) - new_prov = json_info["CpacProvenance"] + xfm_prov - json_info["CpacProvenance"] = new_prov - new_pipe_idx = self.generate_prov_string(new_prov) - self.set_data( - label, - xfm, - "outputspec.out_file", - json_info, - new_pipe_idx, - f"{label}_xfm_{num}", - fork=True, - ) - - return wf - class ResourcePool(_Pool): """A pool of Resources.""" @@ -2942,11 +2880,88 @@ def connect_pipeline( return wf + def derivative_xfm( + self, + wf: pe.Workflow, + label: str, + connection: ResourceData | tuple[pe.Node | pe.Workflow, str], + json_info: dict, + pipe_idx: str | tuple, + pipe_x: int, + ) -> pe.Workflow: + """Find the appropriate bold-to-template transform for given ``pipe_idx``.""" + if label in self.xfm: + json_info = dict(json_info) + + # get the bold-to-template transform from the current strat_pool info + xfm_idx: Optional[str | tuple] = None + xfm_label = "from-bold_to-template_mode-image_xfm" + for entry in json_info["CpacProvenance"]: + if isinstance(entry, list): + if entry[-1].split(":")[0] == xfm_label: + xfm_prov = entry + xfm_idx = self.generate_prov_string(xfm_prov)[1] + break + + # but if the resource doesn't have the bold-to-template transform + # in its provenance/strategy, find the appropriate one for this + # current pipe_idx/strat + xfm_info: list[tuple[str | tuple, list]] + if not xfm_idx: + xfm_info = [] + for pipe_idx, entry in self.get(xfm_label).items(): + xfm_info.append((pipe_idx, entry.cpac_provenance)) + else: + xfm_info = [(xfm_idx, xfm_prov)] + + for num, xfm_entry in enumerate(xfm_info): + xfm_idx, xfm_prov = xfm_entry + reg_tool = check_prov_for_regtool(xfm_prov) + + xfm = transform_derivative( + f"{label}_xfm_{pipe_x}_{num}", + label, + reg_tool, + self.num_cpus, + self.num_ants_cores, + ants_interp=self.ants_interp, + fsl_interp=self.fsl_interp, + opt=None, + ) + wf.connect(connection[0], connection[1], xfm, "inputspec.in_file") + + node, out = self.get_data("T1w-brain-template-deriv", quick_single=True) + wf.connect(node, out, xfm, "inputspec.reference") + + node, out = self.get_data( + "from-bold_to-template_mode-image_xfm", pipe_idx=xfm_idx + ) + wf.connect(node, out, xfm, "inputspec.transform") + + label = f"space-template_{label}" + json_info["Template"] = self.get_json_info( + "T1w-brain-template-deriv", None, "Description" + ) + new_prov = json_info["CpacProvenance"] + xfm_prov + json_info["CpacProvenance"] = new_prov + new_pipe_idx = self.generate_prov_string(new_prov) + self.set_data( + label, + xfm, + "outputspec.out_file", + json_info, + new_pipe_idx, + f"{label}_xfm_{num}", + fork=True, + ) + + return wf + def post_process( self, wf: pe.Workflow, label: str, - connection: ResourceData | tuple[pe.Node, str], + connection: ResourceData | tuple[pe.Node | pe.Workflow, str], json_info: dict, pipe_idx: str | tuple, pipe_x: int, From 74629acc965547f0ae65efd2a091b508ab70f641 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 14:44:18 -0400 Subject: [PATCH 58/93] :coffin: Remove unused `flatten_prov` method --- CPAC/pipeline/engine/resource.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 767ceee82b..cdb87d74d9 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -875,19 +875,6 @@ def get_resource_strats_from_prov(prov): strat_resource[resource] = entry return strat_resource - def flatten_prov(self, prov): - if isinstance(prov, str): - return [prov] - if isinstance(prov, list): - flat_prov = [] - for entry in prov: - if isinstance(entry, list): - flat_prov += self.flatten_prov(entry) - else: - flat_prov.append(entry) - return flat_prov - return None - class ResourcePool(_Pool): """A pool of Resources.""" From 748b98efd2c503c6cb7f6ab2dbca3f45fc1b7cd3 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 14:48:56 -0400 Subject: [PATCH 59/93] :recycle: Move `get_resource_strats_from_prov` back into `ResourcePool` --- CPAC/pipeline/engine/resource.py | 45 +++++++++++++++++--------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index cdb87d74d9..6e7adb7f16 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -854,27 +854,6 @@ def generate_prov_list(prov_str): raise TypeError(msg) return ast.literal_eval(prov_str) - @staticmethod - def get_resource_strats_from_prov(prov): - # if you provide the provenance of a resource pool output, this will - # return a dictionary of all the preceding resource pool entries that - # led to that one specific output: - # {rpool entry}: {that entry's provenance} - # {rpool entry}: {that entry's provenance} - strat_resource = {} - if isinstance(prov, str): - resource = prov.split(":")[0] - strat_resource[resource] = prov - else: - for spot, entry in enumerate(prov): - if isinstance(entry, list): - resource = entry[-1].split(":")[0] - strat_resource[resource] = entry - elif isinstance(entry, str): - resource = entry.split(":")[0] - strat_resource[resource] = entry - return strat_resource - class ResourcePool(_Pool): """A pool of Resources.""" @@ -3097,6 +3076,30 @@ def post_process( return wf, post_labels + @staticmethod + def get_resource_strats_from_prov(prov: list | str) -> dict[str, list | str]: + """Return all entries that led to this provenance. + + If you provide the provenance of a resource pool output, this will + return a dictionary of all the preceding resource pool entries that + led to that one specific output: + {rpool entry}: {that entry's provenance} + {rpool entry}: {that entry's provenance} + """ + strat_resource: dict[str, list | str] = {} + if isinstance(prov, str): + resource = prov.split(":")[0] + strat_resource[resource] = prov + else: + for entry in prov: + if isinstance(entry, list): + resource = entry[-1].split(":")[0] + strat_resource[resource] = entry + elif isinstance(entry, str): + resource = entry.split(":")[0] + strat_resource[resource] = entry + return strat_resource + def _get_unlabelled(self, resource: str) -> set[str]: """Get unlabelled resources (that need integer suffixes to differentiate).""" from CPAC.func_preproc.func_motion import motion_estimate_filter From 52c38bffb1e36dafff6476aa6d6ac20600fbb64c Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 14:49:30 -0400 Subject: [PATCH 60/93] :coffin: Remove unused `generate_prov_list` method --- CPAC/pipeline/engine/resource.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 6e7adb7f16..fd75928f84 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -844,16 +844,6 @@ def get_cpac_provenance( json_data = self.get_json(resource, strat) return json_data["CpacProvenance"] - @staticmethod - def generate_prov_list(prov_str): - if not isinstance(prov_str, str): - msg = ( - "\n[!] Developer info: the CpacProvenance " - f"entry for {prov_str!s} has to be a string.\n" - ) - raise TypeError(msg) - return ast.literal_eval(prov_str) - class ResourcePool(_Pool): """A pool of Resources.""" From f2423a250d7769281d26db67f5618ce4ae1a56d9 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 15:15:37 -0400 Subject: [PATCH 61/93] :recycle: Move `get_cpac_provenance` and `regressor_dct` into `StratPool` --- CPAC/nuisance/nuisance.py | 2 +- CPAC/pipeline/engine/resource.py | 111 ++++++++++++++++--------------- 2 files changed, 57 insertions(+), 56 deletions(-) diff --git a/CPAC/nuisance/nuisance.py b/CPAC/nuisance/nuisance.py index b47fc0886f..967647f429 100644 --- a/CPAC/nuisance/nuisance.py +++ b/CPAC/nuisance/nuisance.py @@ -2681,7 +2681,7 @@ def nuisance_regression(wf, cfg, strat_pool, pipe_num, opt, space, res=None): outputs : dict """ - opt = strat_pool.regressor_dct(cfg) + opt = strat_pool.regressor_dct bandpass = "Bandpass" in opt bandpass_before = ( bandpass diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index fd75928f84..a3ff5235da 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -298,7 +298,6 @@ def __init__(self) -> None: self.unique_id: str self.zscoring_bool: bool self.wf: pe.Workflow - self._regressor_dct: dict def __repr__(self) -> str: """Return reproducible ResourcePool string.""" @@ -647,43 +646,6 @@ def get_resource_from_prov(prov: LIST_OF_LIST_OF_STR) -> Optional[str]: return prov[-1].split(":")[0] return None - def regressor_dct(self, cfg) -> dict: - """Return the regressor dictionary for the current strategy if one exists. - - Raises KeyError otherwise. - """ - # pylint: disable=attribute-defined-outside-init - if hasattr(self, "_regressor_dct"): # memoized - # pylint: disable=access-member-before-definition - return self._regressor_dct - key_error = KeyError( - "[!] No regressors in resource pool. \n\n" - "Try turning on create_regressors or " - "ingress_regressors." - ) - _nr = cfg["nuisance_corrections", "2-nuisance_regression"] - if not hasattr(self, "timeseries"): - if _nr["Regressors"]: - self.regressors = {reg["Name"]: reg for reg in _nr["Regressors"]} - else: - self.regressors = [] - if self.check_rpool("parsed_regressors"): # ingressed regressor - # name regressor workflow without regressor_prov - strat_name = _nr["ingress_regressors"]["Regressors"]["Name"] - if strat_name in self.regressors: - self._regressor_dct = self.regressors[strat_name] - return self._regressor_dct - self._regressor_dct = _nr["ingress_regressors"]["Regressors"] - return self._regressor_dct - prov = self.get_cpac_provenance("desc-confounds_timeseries") - strat_name_components = prov[-1].split("_") - for _ in list(range(prov[-1].count("_"))): - reg_name = "_".join(strat_name_components[-_:]) - if isinstance(self.regressors, dict) and reg_name in self.regressors: - self._regressor_dct = self.regressors[reg_name] - return self._regressor_dct - raise key_error - def set_data( self, resource: str, @@ -830,20 +792,6 @@ def get_json(self, resource, strat=None): raise Exception(msg) return strat_json - def get_cpac_provenance( - self, resource: list[str] | str, strat: Optional[str | list | tuple] = None - ) -> list: - # NOTE: strat_resource has to be entered properly by the developer - # it has to either be rpool[resource][strat] or strat_pool[resource] - if isinstance(resource, list): - for _resource in resource: - try: - return self.get_cpac_provenance(_resource, strat) - except KeyError: - continue - json_data = self.get_json(resource, strat) - return json_data["CpacProvenance"] - class ResourcePool(_Pool): """A pool of Resources.""" @@ -1611,7 +1559,7 @@ def get_strats( # noqa: PLR0912,PLR0915 # make the merged strat label from the multiple inputs # strat_list is actually the merged CpacProvenance lists pipe_idx = str(strat_list) - new_strats[pipe_idx] = StratPool(name=pipe_idx) + new_strats[pipe_idx] = StratPool(name=pipe_idx, cfg=self.cfg) # new_strats is A DICTIONARY OF StratPool OBJECTS! new_strats[pipe_idx].json = {"CpacProvenance": strat_list} @@ -1647,7 +1595,7 @@ def get_strats( # noqa: PLR0912,PLR0915 strat_resource = self.rpool[resource][pipe_idx] # remember, `strat_resource` is a Resource. new_strats[pipe_idx] = StratPool( - rpool={resource: strat_resource}, name=pipe_idx + rpool={resource: strat_resource}, name=pipe_idx, cfg=self.cfg ) # <----- again, new_strats is A DICTIONARY OF StratPool OBJECTS! new_strats[pipe_idx].json = strat_resource.json new_strats[pipe_idx].json["subjson"] = {} @@ -3142,8 +3090,9 @@ class StratPool(_Pool): def __init__( self, - rpool: Optional[dict] = None, + cfg: Configuration, *, + rpool: Optional[dict] = None, name: str | list[str] = "", ) -> None: """Initialize a StratPool.""" @@ -3153,9 +3102,11 @@ def __init__( else: self.rpool = STRAT_DICT(rpool) self._json: dict[str, dict] = {"subjson": {}} + self.cfg = cfg if not isinstance(name, list): name = [name] self.name: list[str] = name + self._regressor_dct: dict def append_name(self, name: str) -> None: """Append a name to the StratPool.""" @@ -3256,6 +3207,18 @@ def get_data(self, resource, report_fetched=False): doc="""Return a deep copy of strategy-specific JSON.""", ) + def get_cpac_provenance(self, resource: list[str] | str) -> list: + """Get CpacProvenance for a given Resource.""" + # NOTE: strat_resource has to be entered properly by the developer + # it has to either be rpool[resource][strat] or strat_pool[resource] + if isinstance(resource, list): + for _resource in resource: + try: + return self.get_cpac_provenance(_resource) + except KeyError: + continue + return self.get(resource).cpac_provenance + def filter_name(self, cfg: Configuration) -> str: """ Return the name of the filter for this strategy. @@ -3295,6 +3258,44 @@ def preserve_json_info(self, resource: str, strat_resource: Resource) -> None: self._json["subjson"][data_type] = {} self._json["subjson"][data_type].update(strat_resource.json) + @property + def regressor_dct(self) -> dict: + """Return the regressor dictionary for the current strategy if one exists. + + Raises KeyError otherwise. + """ + # pylint: disable=attribute-defined-outside-init + if hasattr(self, "_regressor_dct"): # memoized + # pylint: disable=access-member-before-definition + return self._regressor_dct + key_error = KeyError( + "[!] No regressors in resource pool. \n\n" + "Try turning on create_regressors or " + "ingress_regressors." + ) + _nr = self.cfg["nuisance_corrections", "2-nuisance_regression"] + if not hasattr(self, "timeseries"): + if _nr["Regressors"]: + self.regressors = {reg["Name"]: reg for reg in _nr["Regressors"]} + else: + self.regressors = [] + if self.check_rpool("parsed_regressors"): # ingressed regressor + # name regressor workflow without regressor_prov + strat_name = _nr["ingress_regressors"]["Regressors"]["Name"] + if strat_name in self.regressors: + self._regressor_dct = self.regressors[strat_name] + return self._regressor_dct + self._regressor_dct = _nr["ingress_regressors"]["Regressors"] + return self._regressor_dct + prov = self.get_cpac_provenance("desc-confounds_timeseries") + strat_name_components = prov[-1].split("_") + for _ in list(range(prov[-1].count("_"))): + reg_name = "_".join(strat_name_components[-_:]) + if isinstance(self.regressors, dict) and reg_name in self.regressors: + self._regressor_dct = self.regressors[reg_name] + return self._regressor_dct + raise key_error + @property def filtered_movement(self) -> bool: """Check if the movement parameters have been filtered in this StratPool.""" From 0f9099cd24e7d6c0ab087deaf4bf4f9dc61c3e4e Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 15:25:03 -0400 Subject: [PATCH 62/93] :recycle: Split `get_json` across `ResourcePool` and `StratPool` --- CPAC/pipeline/engine/resource.py | 34 ++++++++++---------------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index a3ff5235da..f68e5e6d76 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -770,28 +770,6 @@ def update_resource(self, resource, new_name): def get_pipe_idxs(self, resource): return self.rpool[resource].keys() - def get_json(self, resource, strat=None): - # NOTE: strat_resource has to be entered properly by the developer - # it has to either be rpool[resource][strat] or strat_pool[resource] - if strat: - strat_resource = self.rpool[resource][strat] - else: - # for strat_pools mainly, where there is no 'strat' key level - strat_resource = self.rpool[resource] - - # TODO: the below hits the exception if you use get_cpac_provenance on - # TODO: the main rpool (i.e. if strat=None) - if "json" in strat_resource: - strat_json = strat_resource["json"] - else: - msg = ( - "\n[!] Developer info: the JSON " - f"information for {resource} and {strat} " - f"is incomplete.\n" - ) - raise Exception(msg) - return strat_json - class ResourcePool(_Pool): """A pool of Resources.""" @@ -1363,6 +1341,10 @@ def get_data( assert isinstance(_resource, Resource) return _resource.data + def get_json(self, resource: str, strat: str | tuple) -> dict: + """Get JSON metadata from a Resource in a strategy.""" + return self.get(resource, pipe_idx=strat).json + def get_strats( # noqa: PLR0912,PLR0915 self, resources: NODEBLOCK_INPUTS, debug: bool = False ) -> dict[str | tuple, "StratPool"]: @@ -1414,7 +1396,7 @@ def get_strats( # noqa: PLR0912,PLR0915 if not rp_dct: len_inputs -= 1 continue - assert isinstance(rp_dct, dict) + assert isinstance(rp_dct, dict) and fetched_resource is not None sub_pool = [] if debug: verbose_logger.debug("len(rp_dct): %s\n", len(rp_dct)) @@ -3201,10 +3183,14 @@ def get_data(self, resource, report_fetched=False): assert isinstance(_resource, Resource) return _resource.data + def get_json(self, resource: str) -> dict: + """Get JSON metadata from a Resource in a StratPool.""" + return self.get(resource).json + json = property( fget=Resource.get_json, fset=Resource.set_json, - doc="""Return a deep copy of strategy-specific JSON.""", + doc="""Return a deep copy of full-StratPool-strategy-specific JSON.""", ) def get_cpac_provenance(self, resource: list[str] | str) -> list: From 6fb1dc160cd2873bd82e5b0b754d224a38a1d278 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 15:25:33 -0400 Subject: [PATCH 63/93] :coffin: Remove unused `get_pipe_idxs` method --- CPAC/pipeline/engine/resource.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index f68e5e6d76..68a0f187d2 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -767,9 +767,6 @@ def update_resource(self, resource, new_name): # move over any new pipe_idx's self.rpool[new_name].update(self.rpool[resource]) - def get_pipe_idxs(self, resource): - return self.rpool[resource].keys() - class ResourcePool(_Pool): """A pool of Resources.""" From a004ab6c7f450e69552096a91495cd5bae6d30aa Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 15:25:59 -0400 Subject: [PATCH 64/93] :coffin: Remove unused `update_resource` method --- CPAC/pipeline/engine/resource.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 68a0f187d2..fa06a22bf1 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -763,10 +763,6 @@ def copy_resource(self, resource, new_name): msg = f"[!] {resource} not in the resource pool." raise Exception(msg) - def update_resource(self, resource, new_name): - # move over any new pipe_idx's - self.rpool[new_name].update(self.rpool[resource]) - class ResourcePool(_Pool): """A pool of Resources.""" From 69cb6033b11ab4ecd08360265b31e45cb79c2fe4 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 15:28:21 -0400 Subject: [PATCH 65/93] :recycle: Move `copy_resource` method into `StratPool` --- CPAC/pipeline/engine/resource.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index fa06a22bf1..f5b60e7a68 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -756,13 +756,6 @@ def get( ) raise LookupError(msg) - def copy_resource(self, resource, new_name): - try: - self.rpool[new_name] = self.rpool[resource] - except KeyError: - msg = f"[!] {resource} not in the resource pool." - raise Exception(msg) - class ResourcePool(_Pool): """A pool of Resources.""" @@ -3198,6 +3191,14 @@ def get_cpac_provenance(self, resource: list[str] | str) -> list: continue return self.get(resource).cpac_provenance + def copy_resource(self, resource: str, new_name: str): + """Copy a resource within a StratPool.""" + try: + self.rpool[new_name] = self.rpool[resource] + except KeyError: + msg = f"[!] {resource} not in the resource pool." + raise Exception(msg) + def filter_name(self, cfg: Configuration) -> str: """ Return the name of the filter for this strategy. From 0a0b5a0408f75208919f5352c625886c43b79a59 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 15:37:39 -0400 Subject: [PATCH 66/93] :recycle: Move `get_json_info` back into `ResourcePool` --- CPAC/pipeline/engine/resource.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index f5b60e7a68..1c6acda03a 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -300,7 +300,7 @@ def __init__(self) -> None: self.wf: pe.Workflow def __repr__(self) -> str: - """Return reproducible ResourcePool string.""" + """Return reproducible _Pool string.""" params = [ f"{param}={getattr(self, param)}" for param in ["rpool", "name", "cfg", "pipe_list"] @@ -309,10 +309,10 @@ def __repr__(self) -> str: return f'{self.__class__.__name__}({", ".join(params)})' def __str__(self) -> str: - """Return string representation of ResourcePool.""" + """Return string representation of a _Pool.""" if self.name: - return f"ResourcePool({self.name}): {list(self.rpool)}" - return f"ResourcePool: {list(self.rpool)}" + return f"{self.__class__.__name__}({self.name}): {list(self.rpool)}" + return f"{self.__class__.__name__}: {list(self.rpool)}" def initialize_nipype_wf(self, name: str = "") -> None: """Initialize a new nipype workflow.""" @@ -620,13 +620,6 @@ def set_json_info(self, resource, pipe_idx, key, val): self.rpool[resource][pipe_idx]["json"] = {} self.rpool[resource][pipe_idx]["json"][key] = val - def get_json_info(self, resource, pipe_idx, key): - # TODO: key checks - if not pipe_idx: - for pipe_idx, val in self.rpool[resource].items(): - return val["json"][key] - return self.rpool[resource][pipe_idx][key] - @staticmethod def get_resource_from_prov(prov: LIST_OF_LIST_OF_STR) -> Optional[str]: """Return the last item in the provenance list. @@ -1331,6 +1324,15 @@ def get_json(self, resource: str, strat: str | tuple) -> dict: """Get JSON metadata from a Resource in a strategy.""" return self.get(resource, pipe_idx=strat).json + def get_json_info(self, resource: str, key: str) -> Any: + """Get a metadata value from a matching from any strategy.""" + # TODO: key checks + for val in self.rpool[resource].values(): + if key in val.json: + return val.json[key] + msg = f"{key} not found in any strategy for {resource} in {self}." + raise KeyError(msg) + def get_strats( # noqa: PLR0912,PLR0915 self, resources: NODEBLOCK_INPUTS, debug: bool = False ) -> dict[str | tuple, "StratPool"]: @@ -2812,7 +2814,7 @@ def derivative_xfm( label = f"space-template_{label}" json_info["Template"] = self.get_json_info( - "T1w-brain-template-deriv", None, "Description" + "T1w-brain-template-deriv", "Description" ) new_prov = json_info["CpacProvenance"] + xfm_prov json_info["CpacProvenance"] = new_prov From 839c7cd77ec4525c628064d8f56d311c857ae069 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 15:38:26 -0400 Subject: [PATCH 67/93] :coffin: Remove unused `set_json_info` method --- CPAC/pipeline/engine/resource.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 1c6acda03a..ac035ff18b 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -607,19 +607,6 @@ def get_strat_info(self, prov, label=None, logdir=None): strat_info, f"{label}_strat_info", indent=4, basedir=logdir ) - def set_json_info(self, resource, pipe_idx, key, val): - # TODO: actually should probably be able to inititialize resource/pipe_idx - if pipe_idx not in self.rpool[resource]: - msg = ( - "\n[!] DEV: The pipeline/strat ID does not exist " - f"in the resource pool.\nResource: {resource}" - f"Pipe idx: {pipe_idx}\nKey: {key}\nVal: {val}\n" - ) - raise Exception(msg) - if "json" not in self.rpool[resource][pipe_idx]: - self.rpool[resource][pipe_idx]["json"] = {} - self.rpool[resource][pipe_idx]["json"][key] = val - @staticmethod def get_resource_from_prov(prov: LIST_OF_LIST_OF_STR) -> Optional[str]: """Return the last item in the provenance list. From b8ca36cd18d6f929c23dfe4ab2369b271f7d5a4b Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 15:38:49 -0400 Subject: [PATCH 68/93] :coffin: Remove unused `get_strat_info` method --- CPAC/pipeline/engine/resource.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index ac035ff18b..bf55810007 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -590,23 +590,6 @@ def get_raw_label(resource: str) -> str: break return resource - def get_strat_info(self, prov, label=None, logdir=None): - strat_info = {} - for entry in prov: - if isinstance(entry, list): - strat_info[entry[-1].split(":")[0]] = entry - elif isinstance(entry, str): - strat_info[entry.split(":")[0]] = entry.split(":")[1] - if label: - if not logdir: - logdir = self.logdir - WFLOGGER.info( - "\n\nPrinting out strategy info for %s in %s\n", label, logdir - ) - write_output_json( - strat_info, f"{label}_strat_info", indent=4, basedir=logdir - ) - @staticmethod def get_resource_from_prov(prov: LIST_OF_LIST_OF_STR) -> Optional[str]: """Return the last item in the provenance list. From 2fc7244daa9e6aa632b6b11724e950ddcdfb1c2f Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 15:39:26 -0400 Subject: [PATCH 69/93] :recycle: Move `get_raw_label` back into `ResourcePool` --- CPAC/pipeline/engine/resource.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index bf55810007..00cf82581a 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -581,15 +581,6 @@ def __contains__(self, key) -> bool: """Return True if key in Pool, False otherwise.""" return key in self.keys() - @staticmethod - def get_raw_label(resource: str) -> str: - """Remove ``desc-*`` label.""" - for tag in resource.split("_"): - if "desc-" in tag: - resource = resource.replace(f"{tag}_", "") - break - return resource - @staticmethod def get_resource_from_prov(prov: LIST_OF_LIST_OF_STR) -> Optional[str]: """Return the last item in the provenance list. @@ -1303,6 +1294,15 @@ def get_json_info(self, resource: str, key: str) -> Any: msg = f"{key} not found in any strategy for {resource} in {self}." raise KeyError(msg) + @staticmethod + def get_raw_label(resource: str) -> str: + """Remove ``desc-*`` label.""" + for tag in resource.split("_"): + if "desc-" in tag: + resource = resource.replace(f"{tag}_", "") + break + return resource + def get_strats( # noqa: PLR0912,PLR0915 self, resources: NODEBLOCK_INPUTS, debug: bool = False ) -> dict[str | tuple, "StratPool"]: From 14c3e32ca7814275977914bcf32e9f27d80d38bc Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 15:44:44 -0400 Subject: [PATCH 70/93] :coffin: Remove unused `get_entire_rpool` method --- CPAC/pipeline/engine/resource.py | 5 +---- CPAC/utils/strategy.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 00cf82581a..7f6eb4877a 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -570,9 +570,6 @@ def get_pool_info(self): def set_pool_info(self, info_dct): self.info.update(info_dct) - def get_entire_rpool(self): - return self.rpool - def keys(self) -> KeysView: """Return rpool's keys.""" return self.rpool.keys() @@ -2573,7 +2570,7 @@ def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # no # determine sources for the outputs, i.e. all input data into the node block new_json_info["Sources"] = [ x - for x in strat_pool.get_entire_rpool() + for x in strat_pool.rpool if x != "json" and x not in replaced_inputs ] diff --git a/CPAC/utils/strategy.py b/CPAC/utils/strategy.py index 67f4de5770..a0520fdf6b 100644 --- a/CPAC/utils/strategy.py +++ b/CPAC/utils/strategy.py @@ -57,7 +57,7 @@ def get_node_from_resource_pool(self, resource_key): @property def resource_pool(self): """Strategy's ResourcePool dict.""" - return self._resource_pool.get_entire_rpool() + return self._resource_pool.rpool @property def rpool(self): From 77ffc16b9ca7517e6c398b1b53eb49ed6b3bf7c8 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 16:05:08 -0400 Subject: [PATCH 71/93] :coffin: Remove unused `wrap_block` function --- .../distortion_correction.py | 9 --- CPAC/pipeline/engine/__init__.py | 2 - CPAC/pipeline/engine/engine.py | 66 ------------------- CPAC/pipeline/engine/resource.py | 6 -- 4 files changed, 83 deletions(-) diff --git a/CPAC/distortion_correction/distortion_correction.py b/CPAC/distortion_correction/distortion_correction.py index 7b714aaace..5f0728b628 100644 --- a/CPAC/distortion_correction/distortion_correction.py +++ b/CPAC/distortion_correction/distortion_correction.py @@ -438,11 +438,6 @@ def distcor_blip_afni_qwarp(wf, cfg, strat_pool, pipe_num, opt=None): node, out = strat_pool.get_data("pe-direction") wf.connect(node, out, match_epi_fmaps_node, "bold_pedir") - # interface = {'bold': (match_epi_fmaps_node, 'opposite_pe_epi'), - # 'desc-brain_bold': 'opposite_pe_epi_brain'} - # wf, strat_pool = wrap_block([bold_mask_afni, bold_masking], - # interface, wf, cfg, strat_pool, pipe_num, opt) - func_get_brain_mask = pe.Node( interface=preprocess.Automask(), name=f"afni_mask_opposite_pe_{pipe_num}" ) @@ -530,10 +525,6 @@ def distcor_blip_afni_qwarp(wf, cfg, strat_pool, pipe_num, opt=None): wf.connect(node, out, undistort_func_mean, "reference_image") wf.connect(convert_afni_warp, "ants_warp", undistort_func_mean, "transforms") - # interface = {'desc-preproc_bold': (undistort_func_mean, 'output_image')} - # wf, strat_pool = wrap_block([bold_mask_afni], - # interface, wf, cfg, strat_pool, pipe_num, opt) - remask = pe.Node( interface=preprocess.Automask(), name=f"afni_remask_boldmask_{pipe_num}" ) diff --git a/CPAC/pipeline/engine/__init__.py b/CPAC/pipeline/engine/__init__.py index 342e452435..7642676910 100644 --- a/CPAC/pipeline/engine/__init__.py +++ b/CPAC/pipeline/engine/__init__.py @@ -18,7 +18,6 @@ from .engine import ( run_node_blocks, - wrap_block, ) from .nodeblock import NodeBlock from .resource import ResourcePool, StratPool @@ -28,5 +27,4 @@ "ResourcePool", "StratPool", "run_node_blocks", - "wrap_block", ] diff --git a/CPAC/pipeline/engine/engine.py b/CPAC/pipeline/engine/engine.py index 04350b2809..a50ae9a277 100644 --- a/CPAC/pipeline/engine/engine.py +++ b/CPAC/pipeline/engine/engine.py @@ -22,72 +22,6 @@ from CPAC.utils.monitoring import WFLOGGER -def wrap_block(node_blocks, interface, wf, cfg, strat_pool, pipe_num, opt): - """Wrap a list of node block functions to use within other node blocks. - - Example usage: - - # This calls the 'bold_mask_afni' and 'bold_masking' node blocks to - # skull-strip an EPI field map, without having to invoke the NodeBlock - # connection system. - - # The interface dictionary tells wrap_block to set the EPI field map - # in the parent node block's throw-away strat_pool as 'bold', so that - # the 'bold_mask_afni' and 'bold_masking' node blocks will see that as - # the 'bold' input. - - # It also tells wrap_block to set the 'desc-brain_bold' output of - # the 'bold_masking' node block to 'opposite_pe_epi_brain' (what it - # actually is) in the parent node block's strat_pool, which gets - # returned. - - # Note 'bold' and 'desc-brain_bold' (all on the left side) are the - # labels that 'bold_mask_afni' and 'bold_masking' understand/expect - # through their interfaces and docstrings. - - # The right-hand side (the values of the 'interface' dictionary) are - # what 'make sense' within the current parent node block - in this - # case, the distortion correction node block dealing with field maps. - - interface = {'bold': (match_epi_fmaps_node, 'opposite_pe_epi'), - 'desc-brain_bold': 'opposite_pe_epi_brain'} - wf, strat_pool = wrap_block([bold_mask_afni, bold_masking], - interface, wf, cfg, strat_pool, - pipe_num, opt) - - ...further downstream in the parent node block: - - node, out = strat_pool.get_data('opposite_pe_epi_brain') - - # The above line will connect the output of the 'bold_masking' node - # block (which is the skull-stripped version of 'opposite_pe_epi') to - # the next node. - - """ - for block in node_blocks: - for in_resource, val in interface.items(): - if isinstance(val, tuple): - strat_pool.set_data( - in_resource, val[0], val[1], {}, "", "", fork=True - ) # - if "sub_num" not in strat_pool.get_pool_info(): - strat_pool.set_pool_info({"sub_num": 0}) - sub_num = strat_pool.get_pool_info()["sub_num"] - - wf, outputs = block(wf, cfg, strat_pool, f"{pipe_num}-{sub_num}", opt) # - for out, val in outputs.items(): - if out in interface and isinstance(interface[out], str): - strat_pool.set_data( - interface[out], outputs[out][0], outputs[out][1], {}, "", "" - ) - else: - strat_pool.set_data(out, outputs[out][0], outputs[out][1], {}, "", "") - sub_num += 1 - strat_pool.set_pool_info({"sub_num": sub_num}) - - return (wf, strat_pool) - - def run_node_blocks(blocks, data_paths, cfg=None): from CPAC.pipeline.engine.nodeblock import NodeBlock from CPAC.pipeline.engine.resource import ResourcePool diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 7f6eb4877a..50e18f5935 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -564,12 +564,6 @@ def create_func_datasource( def get_pipe_number(self, pipe_idx): return self.pipe_list.index(pipe_idx) - def get_pool_info(self): - return self.info - - def set_pool_info(self, info_dct): - self.info.update(info_dct) - def keys(self) -> KeysView: """Return rpool's keys.""" return self.rpool.keys() From ef402e593c7564df5c6178e34e89478394600b05 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 16:11:07 -0400 Subject: [PATCH 72/93] :recycle: Move `_get_pipe_number` back into `ResourcePool` --- CPAC/pipeline/engine/resource.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 50e18f5935..ccbea72d42 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -561,9 +561,6 @@ def create_func_datasource( return wf - def get_pipe_number(self, pipe_idx): - return self.pipe_list.index(pipe_idx) - def keys(self) -> KeysView: """Return rpool's keys.""" return self.rpool.keys() @@ -959,7 +956,7 @@ def gather_pipes( # noqa: PLR0915 num_variant = "" unlabelled = self._get_unlabelled(resource) for pipe_idx in self.rpool[resource]: - pipe_x = self.get_pipe_number(pipe_idx) + pipe_x = self._get_pipe_number(pipe_idx) json_info = self.rpool[resource][pipe_idx]["json"] out_dct = self.rpool[resource][pipe_idx]["out"] @@ -1727,7 +1724,7 @@ def ingress_output_dir(self) -> None: data_label = data_label.replace("brain_mask", "bold_mask") try: - pipe_x = self.get_pipe_number(pipe_idx) + pipe_x = self._get_pipe_number(pipe_idx) except ValueError: pipe_x = len(self.pipe_list) if filepath in outdir_anat: @@ -2491,7 +2488,7 @@ def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # no # so when we set_data below for the TOP-LEVEL MAIN RPOOL (not the strat_pool), we can generate new merged JSON information for each output. # particularly, our custom 'CpacProvenance' field. node_name = name - pipe_x = self.get_pipe_number(pipe_idx) + pipe_x = self._get_pipe_number(pipe_idx) replaced_inputs = [] for interface in block.input_interface: @@ -2969,6 +2966,10 @@ def get_resource_strats_from_prov(prov: list | str) -> dict[str, list | str]: strat_resource[resource] = entry return strat_resource + def _get_pipe_number(self, pipe_idx: str | tuple) -> int: + """Return the index of a strategy in ``self.pipe_list``.""" + return self.pipe_list.index(pipe_idx) + def _get_unlabelled(self, resource: str) -> set[str]: """Get unlabelled resources (that need integer suffixes to differentiate).""" from CPAC.func_preproc.func_motion import motion_estimate_filter From c9449407fce80e764b199f39c33267c0a3478f41 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 16:13:05 -0400 Subject: [PATCH 73/93] :recycle: Move `create_func_datasource` method into `ResourcePool` --- CPAC/pipeline/engine/resource.py | 238 +++++++++++++++---------------- 1 file changed, 119 insertions(+), 119 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index ccbea72d42..dae9befcce 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -442,125 +442,6 @@ def check_rpool(self, resource: list[str] | str) -> bool: return True return False - def create_func_datasource( - self, rest_dict: dict, wf_name="func_datasource" - ) -> pe.Workflow: - """Create a workflow to gather timeseries data. - - Return the functional timeseries-related file paths for each series/scan from the - dictionary of functional files described in the data configuration (sublist) YAML - file. - - Scan input (from inputnode) is an iterable. - """ - wf = pe.Workflow(name=wf_name) - - inputnode = pe.Node( - util.IdentityInterface( - fields=["subject", "scan", "creds_path", "dl_dir"], - mandatory_inputs=True, - ), - name="inputnode", - ) - - outputnode = pe.Node( - util.IdentityInterface( - fields=[ - "subject", - "rest", - "scan", - "scan_params", - "phase_diff", - "magnitude", - ] - ), - name="outputspec", - ) - - # have this here for now because of the big change in the data - # configuration format - # (Not necessary with ingress - format does not comply) - if not self.check_rpool("derivatives-dir"): - check_scan = pe.Node( - Function( - input_names=["func_scan_dct", "scan"], - output_names=[], - function=check_func_scan, - as_module=True, - ), - name="check_func_scan", - ) - - check_scan.inputs.func_scan_dct = rest_dict - wf.connect(inputnode, "scan", check_scan, "scan") - - # get the functional scan itself - selectrest = pe.Node( - Function( - input_names=["scan", "rest_dict", "resource"], - output_names=["file_path"], - function=get_rest, - as_module=True, - ), - name="selectrest", - ) - selectrest.inputs.rest_dict = rest_dict - selectrest.inputs.resource = "scan" - wf.connect(inputnode, "scan", selectrest, "scan") - - # check to see if it's on an Amazon AWS S3 bucket, and download it, if it - # is - otherwise, just return the local file path - check_s3_node = pe.Node( - Function( - input_names=["file_path", "creds_path", "dl_dir", "img_type"], - output_names=["local_path"], - function=check_for_s3, - as_module=True, - ), - name="check_for_s3", - ) - - wf.connect(selectrest, "file_path", check_s3_node, "file_path") - wf.connect(inputnode, "creds_path", check_s3_node, "creds_path") - wf.connect(inputnode, "dl_dir", check_s3_node, "dl_dir") - check_s3_node.inputs.img_type = "func" - - wf.connect(inputnode, "subject", outputnode, "subject") - wf.connect(check_s3_node, "local_path", outputnode, "rest") - wf.connect(inputnode, "scan", outputnode, "scan") - - # scan parameters CSV - select_scan_params = pe.Node( - Function( - input_names=["scan", "rest_dict", "resource"], - output_names=["file_path"], - function=get_rest, - as_module=True, - ), - name="select_scan_params", - ) - select_scan_params.inputs.rest_dict = rest_dict - select_scan_params.inputs.resource = "scan_parameters" - wf.connect(inputnode, "scan", select_scan_params, "scan") - - # if the scan parameters file is on AWS S3, download it - s3_scan_params = pe.Node( - Function( - input_names=["file_path", "creds_path", "dl_dir", "img_type"], - output_names=["local_path"], - function=check_for_s3, - as_module=True, - ), - name="s3_scan_params", - ) - - wf.connect(select_scan_params, "file_path", s3_scan_params, "file_path") - wf.connect(inputnode, "creds_path", s3_scan_params, "creds_path") - wf.connect(inputnode, "dl_dir", s3_scan_params, "dl_dir") - wf.connect(s3_scan_params, "local_path", outputnode, "scan_params") - - return wf - def keys(self) -> KeysView: """Return rpool's keys.""" return self.rpool.keys() @@ -2182,6 +2063,125 @@ def ingress_pipeconfig_paths(self): f"{key}_config_ingress", ) + def create_func_datasource( + self, rest_dict: dict, wf_name="func_datasource" + ) -> pe.Workflow: + """Create a workflow to gather timeseries data. + + Return the functional timeseries-related file paths for each series/scan from the + dictionary of functional files described in the data configuration (sublist) YAML + file. + + Scan input (from inputnode) is an iterable. + """ + wf = pe.Workflow(name=wf_name) + + inputnode = pe.Node( + util.IdentityInterface( + fields=["subject", "scan", "creds_path", "dl_dir"], + mandatory_inputs=True, + ), + name="inputnode", + ) + + outputnode = pe.Node( + util.IdentityInterface( + fields=[ + "subject", + "rest", + "scan", + "scan_params", + "phase_diff", + "magnitude", + ] + ), + name="outputspec", + ) + + # have this here for now because of the big change in the data + # configuration format + # (Not necessary with ingress - format does not comply) + if not self.check_rpool("derivatives-dir"): + check_scan = pe.Node( + Function( + input_names=["func_scan_dct", "scan"], + output_names=[], + function=check_func_scan, + as_module=True, + ), + name="check_func_scan", + ) + + check_scan.inputs.func_scan_dct = rest_dict + wf.connect(inputnode, "scan", check_scan, "scan") + + # get the functional scan itself + selectrest = pe.Node( + Function( + input_names=["scan", "rest_dict", "resource"], + output_names=["file_path"], + function=get_rest, + as_module=True, + ), + name="selectrest", + ) + selectrest.inputs.rest_dict = rest_dict + selectrest.inputs.resource = "scan" + wf.connect(inputnode, "scan", selectrest, "scan") + + # check to see if it's on an Amazon AWS S3 bucket, and download it, if it + # is - otherwise, just return the local file path + check_s3_node = pe.Node( + Function( + input_names=["file_path", "creds_path", "dl_dir", "img_type"], + output_names=["local_path"], + function=check_for_s3, + as_module=True, + ), + name="check_for_s3", + ) + + wf.connect(selectrest, "file_path", check_s3_node, "file_path") + wf.connect(inputnode, "creds_path", check_s3_node, "creds_path") + wf.connect(inputnode, "dl_dir", check_s3_node, "dl_dir") + check_s3_node.inputs.img_type = "func" + + wf.connect(inputnode, "subject", outputnode, "subject") + wf.connect(check_s3_node, "local_path", outputnode, "rest") + wf.connect(inputnode, "scan", outputnode, "scan") + + # scan parameters CSV + select_scan_params = pe.Node( + Function( + input_names=["scan", "rest_dict", "resource"], + output_names=["file_path"], + function=get_rest, + as_module=True, + ), + name="select_scan_params", + ) + select_scan_params.inputs.rest_dict = rest_dict + select_scan_params.inputs.resource = "scan_parameters" + wf.connect(inputnode, "scan", select_scan_params, "scan") + + # if the scan parameters file is on AWS S3, download it + s3_scan_params = pe.Node( + Function( + input_names=["file_path", "creds_path", "dl_dir", "img_type"], + output_names=["local_path"], + function=check_for_s3, + as_module=True, + ), + name="s3_scan_params", + ) + + wf.connect(select_scan_params, "file_path", s3_scan_params, "file_path") + wf.connect(inputnode, "creds_path", s3_scan_params, "creds_path") + wf.connect(inputnode, "dl_dir", s3_scan_params, "dl_dir") + wf.connect(s3_scan_params, "local_path", outputnode, "scan_params") + + return wf + def ingress_raw_func_data(self): """Ingress raw functional data.""" func_paths_dct = self.data_paths.func From cf852734ab0ccc01a4c389a69375884cfac70745 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 16:16:48 -0400 Subject: [PATCH 74/93] :coffin: Remove unused `get_name` method --- CPAC/pipeline/cpac_pipeline.py | 2 +- CPAC/pipeline/engine/nodeblock.py | 3 --- CPAC/pipeline/engine/resource.py | 8 ++------ CPAC/utils/strategy.py | 3 --- 4 files changed, 3 insertions(+), 13 deletions(-) diff --git a/CPAC/pipeline/cpac_pipeline.py b/CPAC/pipeline/cpac_pipeline.py index 8b932fc833..4e92fe7f45 100644 --- a/CPAC/pipeline/cpac_pipeline.py +++ b/CPAC/pipeline/cpac_pipeline.py @@ -560,7 +560,7 @@ def run_workflow( # for strat_no, strat in enumerate(strat_list): # strat_label = 'strat_%d' % strat_no - # subject_info[strat_label] = strat.get_name() + # subject_info[strat_label] = strat.name # subject_info['resource_pool'].append(strat.get_resource_pool()) subject_info["status"] = "Running" diff --git a/CPAC/pipeline/engine/nodeblock.py b/CPAC/pipeline/engine/nodeblock.py index 4326cc3c20..8a31981653 100644 --- a/CPAC/pipeline/engine/nodeblock.py +++ b/CPAC/pipeline/engine/nodeblock.py @@ -218,9 +218,6 @@ def __init__( config.update_config({"logging": {"workflow_level": "INFO"}}) logging.update_logging(config) - def get_name(self): - return self.name - def check_null(self, val): if isinstance(val, str): val = None if val.lower() == "none" else val diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index dae9befcce..cb3166a3cd 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -429,10 +429,6 @@ def generate_prov_string(prov: LIST_OF_LIST_OF_STR | tuple) -> tuple[str, str]: resource = last_entry.split(":")[0] return (resource, str(prov)) - def get_name(self) -> str: - """Return stringified name.""" - return str(self.name) - def check_rpool(self, resource: list[str] | str) -> bool: """Check if a resource is present in the _Pool.""" if not isinstance(resource, list): @@ -2690,14 +2686,14 @@ def connect_pipeline( ) # Alert user to block that raises error if isinstance(block, list): - node_block_names = str([NodeBlock(b).get_name() for b in block]) + node_block_names = str([NodeBlock(b).name for b in block]) e.args = ( f"When trying to connect one of the node blocks " f"{node_block_names} " f"to workflow '{wf}' {previous_nb_str} {e.args[0]}", ) else: - node_block_names = NodeBlock(block).get_name() + node_block_names = NodeBlock(block).name e.args = ( f"When trying to connect node block " f"'{node_block_names}' " diff --git a/CPAC/utils/strategy.py b/CPAC/utils/strategy.py index a0520fdf6b..7ce1966198 100644 --- a/CPAC/utils/strategy.py +++ b/CPAC/utils/strategy.py @@ -29,9 +29,6 @@ def __init__(self): def append_name(self, name): self.name.append(name) - def get_name(self): - return self.name - def set_leaf_properties(self, node, out_file): self.leaf_node = node self.leaf_out_file = out_file From 84274b98f994fdf31b41a6db1538d65c40ccd8ef Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 16:20:19 -0400 Subject: [PATCH 75/93] :recycle: Move `_config_lookup` method into `ResourcePool` --- CPAC/pipeline/engine/resource.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index cb3166a3cd..f0b291644d 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -406,13 +406,6 @@ def json_outdir_ingress( return json_info, pipe_idx, node_name, data_label - def _config_lookup(self, keylist, fallback_type: type = NoneType) -> Any: - """Lookup a config key, return None if not found.""" - try: - return self.cfg[keylist] - except (AttributeError, KeyError): - return fallback_type() - @staticmethod def generate_prov_string(prov: LIST_OF_LIST_OF_STR | tuple) -> tuple[str, str]: """Generate a string from a SINGLE RESOURCE'S dictionary of MULTIPLE PRECEDING RESOURCES (or single, if just one). @@ -2962,6 +2955,15 @@ def get_resource_strats_from_prov(prov: list | str) -> dict[str, list | str]: strat_resource[resource] = entry return strat_resource + def _config_lookup( + self, keylist: str | list[str], fallback_type: type = NoneType + ) -> Any: + """Lookup a config key, return None if not found.""" + try: + return self.cfg[keylist] + except (AttributeError, KeyError): + return fallback_type() + def _get_pipe_number(self, pipe_idx: str | tuple) -> int: """Return the index of a strategy in ``self.pipe_list``.""" return self.pipe_list.index(pipe_idx) From 8389d5b39d4794a7922617afe7d0bb0ab8b63436 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 16:21:51 -0400 Subject: [PATCH 76/93] :recycle: Move `json_outdir_ingress` method into `ResourcePool` --- CPAC/pipeline/engine/resource.py | 156 +++++++++++++++---------------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index f0b291644d..cb92ddf822 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -328,84 +328,6 @@ def initialize_nipype_wf(self, name: str = "") -> None: ), } - def json_outdir_ingress( - self, filepath: Path | str, data_label: str, json: dict - ) -> tuple[dict, tuple[str, str], str, str]: - """Ingress sidecars from a BIDS derivatives directory.""" - desc_val = None - for tag in data_label.split("_"): - if "desc-" in tag: - desc_val = tag - break - jsonpath = str(filepath) - for ext in EXTS: - jsonpath = jsonpath.replace(ext, "") - jsonpath = f"{jsonpath}.json" - - if not os.path.exists(jsonpath): - WFLOGGER.info( - "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n", - filepath, - jsonpath, - ) - json_info = { - "Description": "This data was generated elsewhere and " - "supplied by the user into this C-PAC run's " - "output directory. This JSON file was " - "automatically generated by C-PAC because a " - "JSON file was not supplied with the data." - } - json_info = {**json_info, **json} - write_output_json(json_info, jsonpath) - else: - json_info = read_json(jsonpath) - json_info = {**json_info, **json} - if "CpacProvenance" in json_info: - if desc_val: - # it's a C-PAC output, let's check for pipe_idx/strat integer - # suffixes in the desc- entries. - only_desc = str(desc_val) - - if only_desc[-1].isdigit(): - for _strat_idx in range(0, 3): - # let's stop at 3, please don't run >999 strategies okay? - if only_desc[-1].isdigit(): - only_desc = only_desc[:-1] - - if only_desc[-1] == "-": - only_desc = only_desc.rstrip("-") - else: - msg = ( - "\n[!] Something went wrong with either " - "reading in the output directory or when " - "it was written out previously.\n\nGive " - "this to your friendly local C-PAC " - f"developer:\n\n{data_label!s}\n" - ) - raise IOError(msg) - - # remove the integer at the end of the desc-* variant, we will - # get the unique pipe_idx from the CpacProvenance below - data_label = data_label.replace(desc_val, only_desc) - - # preserve cpac provenance/pipe_idx - pipe_idx = self.generate_prov_string(json_info["CpacProvenance"]) - node_name = "" - else: - json_info["CpacProvenance"] = [f"{data_label}:Non-C-PAC Origin: {filepath}"] # type: ignore [assignment] - if "Description" not in json_info: - json_info["Description"] = ( - "This data was generated elsewhere and " - "supplied by the user into this C-PAC run's " - "output directory. This JSON file was " - "automatically generated by C-PAC because a " - "JSON file was not supplied with the data." - ) - pipe_idx = self.generate_prov_string(json_info["CpacProvenance"]) - node_name = f"{data_label}_ingress" - - return json_info, pipe_idx, node_name, data_label - @staticmethod def generate_prov_string(prov: LIST_OF_LIST_OF_STR | tuple) -> tuple[str, str]: """Generate a string from a SINGLE RESOURCE'S dictionary of MULTIPLE PRECEDING RESOURCES (or single, if just one). @@ -2299,6 +2221,84 @@ def func_outdir_ingress(self, func_dict: dict, key: str, func_paths: dict) -> No f"outdir_{key}_ingress", ) + def json_outdir_ingress( + self, filepath: Path | str, data_label: str, json: dict + ) -> tuple[dict, tuple[str, str], str, str]: + """Ingress sidecars from a BIDS derivatives directory.""" + desc_val = None + for tag in data_label.split("_"): + if "desc-" in tag: + desc_val = tag + break + jsonpath = str(filepath) + for ext in EXTS: + jsonpath = jsonpath.replace(ext, "") + jsonpath = f"{jsonpath}.json" + + if not os.path.exists(jsonpath): + WFLOGGER.info( + "\n\n[!] No JSON found for file %s.\nCreating %s..\n\n", + filepath, + jsonpath, + ) + json_info = { + "Description": "This data was generated elsewhere and " + "supplied by the user into this C-PAC run's " + "output directory. This JSON file was " + "automatically generated by C-PAC because a " + "JSON file was not supplied with the data." + } + json_info = {**json_info, **json} + write_output_json(json_info, jsonpath) + else: + json_info = read_json(jsonpath) + json_info = {**json_info, **json} + if "CpacProvenance" in json_info: + if desc_val: + # it's a C-PAC output, let's check for pipe_idx/strat integer + # suffixes in the desc- entries. + only_desc = str(desc_val) + + if only_desc[-1].isdigit(): + for _strat_idx in range(0, 3): + # let's stop at 3, please don't run >999 strategies okay? + if only_desc[-1].isdigit(): + only_desc = only_desc[:-1] + + if only_desc[-1] == "-": + only_desc = only_desc.rstrip("-") + else: + msg = ( + "\n[!] Something went wrong with either " + "reading in the output directory or when " + "it was written out previously.\n\nGive " + "this to your friendly local C-PAC " + f"developer:\n\n{data_label!s}\n" + ) + raise IOError(msg) + + # remove the integer at the end of the desc-* variant, we will + # get the unique pipe_idx from the CpacProvenance below + data_label = data_label.replace(desc_val, only_desc) + + # preserve cpac provenance/pipe_idx + pipe_idx = self.generate_prov_string(json_info["CpacProvenance"]) + node_name = "" + else: + json_info["CpacProvenance"] = [f"{data_label}:Non-C-PAC Origin: {filepath}"] # type: ignore [assignment] + if "Description" not in json_info: + json_info["Description"] = ( + "This data was generated elsewhere and " + "supplied by the user into this C-PAC run's " + "output directory. This JSON file was " + "automatically generated by C-PAC because a " + "JSON file was not supplied with the data." + ) + pipe_idx = self.generate_prov_string(json_info["CpacProvenance"]) + node_name = f"{data_label}_ingress" + + return json_info, pipe_idx, node_name, data_label + def ingress_raw_anat_data(self) -> None: """Ingress raw anatomical data.""" if not self.data_paths.anat: From 2e97e510cf965d9cdd86e3b369fd4f4663fa6991 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 16:23:21 -0400 Subject: [PATCH 77/93] :recycle: Move `initialize_nipype_wf` method into `ResourcePool` --- CPAC/pipeline/engine/resource.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index cb92ddf822..585c451e6d 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -314,20 +314,6 @@ def __str__(self) -> str: return f"{self.__class__.__name__}({self.name}): {list(self.rpool)}" return f"{self.__class__.__name__}: {list(self.rpool)}" - def initialize_nipype_wf(self, name: str = "") -> None: - """Initialize a new nipype workflow.""" - if name: - name = f"_{name}" - workflow_name = f"cpac{name}_{self.unique_id}" - self.wf = pe.Workflow(name=workflow_name) - self.wf.base_dir = self.cfg.pipeline_setup["working_directory"]["path"] # type: ignore[attr-defined] - self.wf.config["execution"] = { - "hash_method": "timestamp", - "crashdump_dir": os.path.abspath( - self.cfg.pipeline_setup["log_directory"]["path"] # type: ignore[attr-defined] - ), - } - @staticmethod def generate_prov_string(prov: LIST_OF_LIST_OF_STR | tuple) -> tuple[str, str]: """Generate a string from a SINGLE RESOURCE'S dictionary of MULTIPLE PRECEDING RESOURCES (or single, if just one). @@ -1324,6 +1310,20 @@ def get_strats( # noqa: PLR0912,PLR0915 new_strats[pipe_idx].preserve_json_info(resource, strat_resource) return new_strats + def initialize_nipype_wf(self, name: str = "") -> None: + """Initialize a new nipype workflow.""" + if name: + name = f"_{name}" + workflow_name = f"cpac{name}_{self.unique_id}" + self.wf = pe.Workflow(name=workflow_name) + self.wf.base_dir = self.cfg.pipeline_setup["working_directory"]["path"] # type: ignore[attr-defined] + self.wf.config["execution"] = { + "hash_method": "timestamp", + "crashdump_dir": os.path.abspath( + self.cfg.pipeline_setup["log_directory"]["path"] # type: ignore[attr-defined] + ), + } + def ingress_freesurfer(self) -> None: """Ingress FreeSurfer data.""" try: From a85522235fbbb020f803c12b553b88b00ce903a0 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 16:24:39 -0400 Subject: [PATCH 78/93] :coffin: Remove unused function `run_node_blocks` --- CPAC/pipeline/engine/__init__.py | 4 -- CPAC/pipeline/engine/engine.py | 63 -------------------------------- 2 files changed, 67 deletions(-) delete mode 100644 CPAC/pipeline/engine/engine.py diff --git a/CPAC/pipeline/engine/__init__.py b/CPAC/pipeline/engine/__init__.py index 7642676910..534c9f7450 100644 --- a/CPAC/pipeline/engine/__init__.py +++ b/CPAC/pipeline/engine/__init__.py @@ -16,9 +16,6 @@ # License along with C-PAC. If not, see . """C-PAC engine.""" -from .engine import ( - run_node_blocks, -) from .nodeblock import NodeBlock from .resource import ResourcePool, StratPool @@ -26,5 +23,4 @@ "NodeBlock", "ResourcePool", "StratPool", - "run_node_blocks", ] diff --git a/CPAC/pipeline/engine/engine.py b/CPAC/pipeline/engine/engine.py deleted file mode 100644 index a50ae9a277..0000000000 --- a/CPAC/pipeline/engine/engine.py +++ /dev/null @@ -1,63 +0,0 @@ -# Copyright (C) 2021-2024 C-PAC Developers - -# This file is part of C-PAC. - -# C-PAC is free software: you can redistribute it and/or modify it under -# the terms of the GNU Lesser General Public License as published by the -# Free Software Foundation, either version 3 of the License, or (at your -# option) any later version. - -# C-PAC is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. - -# You should have received a copy of the GNU Lesser General Public -# License along with C-PAC. If not, see . -"""C-PAC pipeline engine.""" - -import os - -from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.utils.monitoring import WFLOGGER - - -def run_node_blocks(blocks, data_paths, cfg=None): - from CPAC.pipeline.engine.nodeblock import NodeBlock - from CPAC.pipeline.engine.resource import ResourcePool - - if not cfg: - cfg = { - "pipeline_setup": { - "working_directory": {"path": os.getcwd()}, - "log_directory": {"path": os.getcwd()}, - } - } - - # TODO: WE HAVE TO PARSE OVER UNIQUE ID'S!!! - - wf = pe.Workflow(name="node_blocks") - rpool = ResourcePool(wf=wf, cfg=cfg, data_paths=data_paths) - wf.base_dir = cfg.pipeline_setup["working_directory"]["path"] - wf.config["execution"] = { - "hash_method": "timestamp", - "crashdump_dir": cfg.pipeline_setup["log_directory"]["path"], - } - - run_blocks = [] - if rpool.check_rpool("desc-preproc_T1w"): - WFLOGGER.info("Preprocessed T1w found, skipping anatomical preprocessing.") - else: - run_blocks += blocks[0] - if rpool.check_rpool("desc-preproc_bold"): - WFLOGGER.info("Preprocessed BOLD found, skipping functional preprocessing.") - else: - run_blocks += blocks[1] - - for block in run_blocks: - wf = rpool.connect_block( - wf, NodeBlock(block, debug=cfg["pipeline_setup", "Debugging", "verbose"]) - ) - rpool.gather_pipes(wf, cfg) - - wf.run() From 0d848b9cc79183e75fc8930fd7f332c7ccf13b5a Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 16:44:34 -0400 Subject: [PATCH 79/93] :recycle: Replace calls to `grab_tiered_dct` with direct config lookup --- CPAC/pipeline/engine/nodeblock.py | 44 ++++++++++--------------------- CPAC/pipeline/engine/resource.py | 12 ++++----- 2 files changed, 20 insertions(+), 36 deletions(-) diff --git a/CPAC/pipeline/engine/nodeblock.py b/CPAC/pipeline/engine/nodeblock.py index 8a31981653..e6ac45439e 100644 --- a/CPAC/pipeline/engine/nodeblock.py +++ b/CPAC/pipeline/engine/nodeblock.py @@ -232,16 +232,6 @@ def check_output(self, outputs, label, name): ) raise NameError(msg) - def grab_tiered_dct(self, cfg, key_list): - cfg_dct = cfg.dict() - for key in key_list: - try: - cfg_dct = cfg_dct.get(key, {}) - except KeyError as ke: - msg = "[!] The config provided to the node block is not valid" - raise KeyError(msg) from ke - return cfg_dct - @staticmethod def list_blocks( pipeline_blocks: PIPELINE_BLOCKS, indent: Optional[int] = None @@ -250,7 +240,7 @@ def list_blocks( Parameters ---------- - pipeline_blocks: list of + pipeline_blocks: list of :py:class:`NodeBlockFunction`s indent: number of spaces after a tab indent """ @@ -296,25 +286,19 @@ def nodeblock( Parameters ---------- - name - Used in the graph and logging to identify the NodeBlock and its component nodes. Function's ``.__name__`` is used if ``name`` is not provided. - config - Indicates the nested keys in a C-PAC pipeline configuration should configure a NodeBlock built from this - function. If config is set to ``None``, then all other configuration-related entities must be specified from the - root of the configuration. - switch - Indicates any keys that should evaluate to True for this NodeBlock to be active. A list of lists of strings - indicates multiple switches that must all be True to run, and is currently only an option if config is set to - ``None``. - option_key - Indicates the nested keys (starting at the nested key indicated by config) that should configure this NodeBlock. - option_val - Indicates values for which this NodeBlock should be active. - inputs - ResourcePool keys indicating files needed for the NodeBlock's functionality. - outputs - ResourcePool keys indicating files generated or updated by the NodeBlock, optionally including metadata - for the outputs' respective sidecars. + name: Used in the graph and logging to identify the NodeBlock and its component nodes. Function's ``.__name__`` is used if ``name`` is not provided. + + config: Indicates the nested keys in a C-PAC pipeline configuration should configure a NodeBlock built from this function. If config is set to ``None``, then all other configuration-related entities must be specified from the root of the configuration. + + switch: Indicates any keys that should evaluate to True for this NodeBlock to be active. A list of lists of strings indicates multiple switches that must all be True to run, and is currently only an option if config is set to ``None``. + + option_key: Indicates the nested keys (starting at the nested key indicated by config) that should configure this NodeBlock. + + option_val: Indicates values for which this NodeBlock should be active. + + inputs: ResourcePool keys indicating files needed for the NodeBlock's functionality. + + outputs: ResourcePool keys indicating files generated or updated by the NodeBlock, optionally including metadata for the outputs' respective sidecars. """ return lambda func: NodeBlockFunction( func, diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 585c451e6d..742e0aeef7 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -2381,11 +2381,11 @@ def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # no key_list = option_key if "USER-DEFINED" in option_val: # load custom config data into each 'opt' - opts = block.grab_tiered_dct(self.cfg, key_list) + opts = self.cfg[key_list] else: for option in option_val: try: - if option in block.grab_tiered_dct(self.cfg, key_list): + if option in self.cfg[key_list]: # goes over the option_vals in the node block docstring, and checks if the user's pipeline config included it in the forking list opts.append(option) except AttributeError as err: @@ -2414,7 +2414,7 @@ def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # no else: key_list = option_config option_val = option_config[-1] - if option_val in block.grab_tiered_dct(self.cfg, key_list[:-1]): + if option_val in self.cfg[key_list[:-1]]: opts.append(option_val) else: # AND, if there are multiple option-val's (in a list) in the docstring, it gets iterated below in 'for opt in option' etc. AND THAT'S WHEN YOU HAVE TO DELINEATE WITHIN THE NODE BLOCK CODE!!! opts = [None] @@ -2439,13 +2439,13 @@ def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # no "'switch' fields are lists.\n\n" ) raise TypeError(msg) from te - switch = block.grab_tiered_dct(self.cfg, key_list) + switch = self.cfg[key_list] elif isinstance(switch[0], list): # we have multiple switches, which is designed to only work if # config is set to "None" switch_list = [] for key_list in switch: - val = block.grab_tiered_dct(self.cfg, key_list) + val = self.cfg[key_list] if isinstance(val, list): # fork switches if True in val: @@ -2461,7 +2461,7 @@ def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # no else: # if config is set to "None" key_list = switch - switch = block.grab_tiered_dct(self.cfg, key_list) + switch = self.cfg[key_list] if not isinstance(switch, list): switch = [switch] if True in switch: From 757145566ac483af734a37291bb0412b8a3f2222 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 16:54:50 -0400 Subject: [PATCH 80/93] :recycle: Move `_check_null` from method to private function --- CPAC/pipeline/engine/nodeblock.py | 16 +++++++--------- CPAC/pipeline/engine/resource.py | 28 +++++++++++++++++++--------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/CPAC/pipeline/engine/nodeblock.py b/CPAC/pipeline/engine/nodeblock.py index e6ac45439e..5db1c6c4cd 100644 --- a/CPAC/pipeline/engine/nodeblock.py +++ b/CPAC/pipeline/engine/nodeblock.py @@ -31,6 +31,7 @@ from CPAC.pipeline.engine.resource import ResourceData, StratPool NODEBLOCK_INPUTS = list[str | list | tuple] +NODEBLOCK_OUTPUTS = list[str] | dict[str, Any] PIPELINE_BLOCKS = list["NodeBlockFunction | PIPELINE_BLOCKS"] @@ -46,7 +47,7 @@ def __init__( option_key: Optional[str | list[str]] = None, option_val: Optional[str | list[str]] = None, inputs: Optional[NODEBLOCK_INPUTS] = None, - outputs: Optional[list[str] | dict[str, Any]] = None, + outputs: Optional[NODEBLOCK_OUTPUTS] = None, ) -> None: self.func = func """Nodeblock function reference.""" @@ -70,9 +71,7 @@ def __init__( """ self.option_val: Optional[str | list[str]] = option_val """Indicates values for which this NodeBlock should be active.""" - if inputs is None: - inputs = [] - self.inputs: list[str | list | tuple] = inputs + self.inputs: list[str | list | tuple] = inputs if inputs else [] """ResourcePool keys indicating resources needed for the NodeBlock's functionality.""" self.outputs: list[str] | dict[str, Any] = outputs if outputs else [] """ @@ -218,12 +217,11 @@ def __init__( config.update_config({"logging": {"workflow_level": "INFO"}}) logging.update_logging(config) - def check_null(self, val): - if isinstance(val, str): - val = None if val.lower() == "none" else val - return val + def check_output(self, outputs: NODEBLOCK_OUTPUTS, label: str, name: str) -> None: + """Check if a label is listed in a NodeBlock's ``outputs``. - def check_output(self, outputs, label, name): + Raises ``NameError`` if a mismatch is found. + """ if label not in outputs: msg = ( f'\n[!] Output name "{label}" in the block ' diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 742e0aeef7..6d46e14237 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -38,7 +38,12 @@ ) from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.check_outputs import ExpectedOutputs -from CPAC.pipeline.engine.nodeblock import NodeBlock, NodeBlockFunction +from CPAC.pipeline.engine.nodeblock import ( + NodeBlock, + NODEBLOCK_INPUTS, + NODEBLOCK_OUTPUTS, + NodeBlockFunction, +) from CPAC.pipeline.utils import name_fork, source_set from CPAC.registration.registration import transform_derivative from CPAC.resources.templates.lookup_table import lookup_identifier @@ -2341,8 +2346,6 @@ def ingress_raw_anat_data(self) -> None: def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # noqa: PLR0912,PLR0915 """Connect a NodeBlock via the ResourcePool.""" - from CPAC.pipeline.engine.nodeblock import NODEBLOCK_INPUTS - debug = bool(self.cfg.pipeline_setup["Debugging"]["verbose"]) # type: ignore [attr-defined] all_opts: list[str] = [] @@ -2360,12 +2363,12 @@ def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # no for name, block_dct in block.node_blocks.items(): # iterates over either the single node block in the sequence, or a list of node blocks within the list of node blocks, i.e. for option forking. - switch = block.check_null(block_dct["switch"]) - config = block.check_null(block_dct["config"]) - option_key = block.check_null(block_dct["option_key"]) - option_val = block.check_null(block_dct["option_val"]) - inputs: NODEBLOCK_INPUTS = block.check_null(block_dct["inputs"]) - outputs = block.check_null(block_dct["outputs"]) + switch = _check_null(block_dct["switch"]) + config = _check_null(block_dct["config"]) + option_key = _check_null(block_dct["option_key"]) + option_val = _check_null(block_dct["option_val"]) + inputs: NODEBLOCK_INPUTS = _check_null(block_dct["inputs"]) + outputs: NODEBLOCK_OUTPUTS = _check_null(block_dct["outputs"]) block_function: NodeBlockFunction = block_dct["block_function"] @@ -3248,3 +3251,10 @@ def filtered_movement(self) -> bool: except KeyError: # not a strat_pool or no movement parameters in strat_pool return False + + +def _check_null(val: Any) -> Any: + """Return ``None`` if ``val`` == "none" (case-insensitive).""" + if isinstance(val, str): + val = None if val.lower() == "none" else val + return val From 4a05442c89ca1a59083370eb6c16bb8e99cd975d Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Wed, 17 Jul 2024 17:04:43 -0400 Subject: [PATCH 81/93] :construction_worker: Pre-clone `bids-examples` --- .circleci/main.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.circleci/main.yml b/.circleci/main.yml index f071ea00ee..91add3529f 100644 --- a/.circleci/main.yml +++ b/.circleci/main.yml @@ -68,7 +68,9 @@ commands: steps: - run: name: Getting Sample BIDS Data - command: git clone https://github.com/bids-standard/bids-examples.git + command: | + mkdir -p /home/circleci/project/dev/circleci_data/.pytest_cache/d/bids-examples + git clone https://github.com/bids-standard/bids-examples.git /home/circleci/project/dev/circleci_data/.pytest_cache/d/bids-examples get-singularity: parameters: version: From dd0985fd3774b79e8d96cc77c46cb1576a2a7115 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 18 Jul 2024 09:24:29 -0400 Subject: [PATCH 82/93] =?UTF-8?q?:pencil2:=20Fix=20typo=20(~~"tpyes"~~=20?= =?UTF-8?q?=E2=86=92=20"types")?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CPAC/pipeline/test/test_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAC/pipeline/test/test_engine.py b/CPAC/pipeline/test/test_engine.py index 4bbdd07f57..01283042fb 100644 --- a/CPAC/pipeline/test/test_engine.py +++ b/CPAC/pipeline/test/test_engine.py @@ -49,7 +49,7 @@ def test_ingress_func_raw_data( """Test :py:method:~`CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_func_data`.""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) - rpool.gather_pipes(rpool.wf, cfg, all_tpyes=True) + rpool.gather_pipes(rpool.wf, cfg, all_types=True) @pytest.mark.parametrize("preconfig", ["default"]) From 7b04cc88e238ddf4bda165e5e6b7cc6a3376f1fd Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 18 Jul 2024 09:42:39 -0400 Subject: [PATCH 83/93] :white_check_mark: Unlink symlink instead of rmtree --- dev/circleci_data/test_external_utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dev/circleci_data/test_external_utils.py b/dev/circleci_data/test_external_utils.py index 5682f75d66..d4892fee3b 100644 --- a/dev/circleci_data/test_external_utils.py +++ b/dev/circleci_data/test_external_utils.py @@ -19,7 +19,6 @@ from logging import INFO import os from pathlib import Path -from shutil import rmtree import sys import click @@ -111,7 +110,12 @@ def test_new_settings_template(bids_examples, caplog, cli_runner): participant_yaml = DATA_DIR / "data_config_ds051.yml" group_yaml = DATA_DIR / "group_analysis_participants_ds051.txt" - rmtree(str(example_dir)) + if example_dir.is_symlink() or example_dir.is_file(): + example_dir.unlink() + else: + from shutil import rmtree + + rmtree(example_dir) assert result.exit_code == 0 assert "\n".join(caplog.messages).startswith( "\nGenerating data configuration file.." From 75e38e069579c877eb3fc00ca9820e1c42e0bc7c Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 18 Jul 2024 10:57:21 -0400 Subject: [PATCH 84/93] :memo: Update CHANGELOG re: #2131 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index df8f40a666..a8bb98da0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Moved `pygraphviz` from requirements to `graphviz` optional dependencies group. +- Split `ResourcePool` into three classes: `Resource`, `ResourcePool`, and `StratPool`. ### Fixed From f5dd824cdc5d545cded78ca1b08397562ead006e Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 18 Jul 2024 12:41:45 -0400 Subject: [PATCH 85/93] :art: Standardize docstring format across changes. --- CPAC/pipeline/engine/nodeblock.py | 115 +++++++++++++++++---------- CPAC/pipeline/engine/resource.py | 126 ++++++++++++++++-------------- CPAC/pipeline/schema.py | 53 +++---------- CPAC/pipeline/test/test_engine.py | 14 ++-- CPAC/pipeline/utils.py | 30 ++----- 5 files changed, 162 insertions(+), 176 deletions(-) diff --git a/CPAC/pipeline/engine/nodeblock.py b/CPAC/pipeline/engine/nodeblock.py index 5db1c6c4cd..e1b1437d3c 100644 --- a/CPAC/pipeline/engine/nodeblock.py +++ b/CPAC/pipeline/engine/nodeblock.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -"""Class and decorator for NodeBlock functions.""" +"""Classes and decorator for :py:class:`NodeBlock`\u200bs and :py:class:`NodeBlockFunction`\u200bs.""" from typing import Any, Callable, Optional, TYPE_CHECKING @@ -50,32 +50,37 @@ def __init__( outputs: Optional[NODEBLOCK_OUTPUTS] = None, ) -> None: self.func = func - """Nodeblock function reference.""" + """`Nodeblock` function reference.""" self.name: str = name - """Used in the graph and logging to identify the NodeBlock and its component nodes.""" + """Used in the graph and logging to identify the :py:class:`NodeBlock` and its component :py:class:`~nipype.pipeline.engine.Node`\u200bs.""" self.config: Optional[list[str]] = config """ - Indicates the nested keys in a C-PAC pipeline configuration should configure a NodeBlock built from this - function. If config is set to ``None``, then all other configuration-related entities must be specified from the + Indicates the nested keys in a C-PAC pipeline :py:class:`Configuration` + should configure a `NodeBlock` built from this function. If `config` is set to + `None`, then all other configuration-related entities must be specified from the root of the configuration. """ self.switch: Optional[list[str] | list[list[str]]] = switch """ - Indicates any keys that should evaluate to True for this NodeBlock to be active. A list of lists of strings - indicates multiple switches that must all be True to run, and is currently only an option if config is set to - ``None``. + Indicates any keys that should evaluate to `True` for this :py:class:`NodeBlock` + to be active. A list of lists of strings indicates multiple `switch`\u200bes + that must all be `True` to run, and is currently only an option if `config` is + set to `None`. """ self.option_key: Optional[str | list[str]] = option_key """ - Indicates the nested keys (starting at the nested key indicated by config) that should configure this NodeBlock. + Indicates the nested keys (starting at the nested key indicated by `config`) + that should configure this :py:class:`NodeBlock`. """ self.option_val: Optional[str | list[str]] = option_val - """Indicates values for which this NodeBlock should be active.""" + """Indicates values for which this :py:class:`NodeBlock` should be active.""" self.inputs: list[str | list | tuple] = inputs if inputs else [] - """ResourcePool keys indicating resources needed for the NodeBlock's functionality.""" + """:py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating + resources needed for the :py:class:`NodeBlock`\u200b's functionality.""" self.outputs: list[str] | dict[str, Any] = outputs if outputs else [] """ - ResourcePool keys indicating resources generated or updated by the NodeBlock, optionally including metadata + :py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating + resources generated or updated by the `NodeBlock`, optionally including metadata for the outputs' respective sidecars. """ @@ -101,14 +106,14 @@ def __call__( pipe_num: Optional[int | str], opt: Optional[str] = None, ) -> tuple[Workflow, dict[str, "ResourceData"]]: - """Call a NodeBlockFunction. + """Call a `NodeBlockFunction`. - All node block functions have the same signature. + All `NodeBlockFunction`\u200bs have the same signature. """ return self.func(wf, cfg, strat_pool, pipe_num, opt) def legacy_nodeblock_dict(self): - """Return nodeblock metadata as a dictionary. + """Return :py:class:`NodeBlock` metadata as a dictionary. Helper for compatibility reasons. """ @@ -123,7 +128,7 @@ def legacy_nodeblock_dict(self): } def __repr__(self) -> str: - """Return reproducible string representation of a NodeBlockFunction.""" + """Return reproducible string representation of a `NodeBlockFunction`.""" return ( f"NodeBlockFunction({self.func.__module__}." f'{self.func.__name__}, "{self.name}", ' @@ -134,19 +139,19 @@ def __repr__(self) -> str: ) def __str__(self) -> str: - """Return string representation of a NodeBlockFunction.""" + """Return string representation of a `NodeBlockFunction`.""" return f"NodeBlockFunction({self.name})" class NodeBlock: - """A worflow subgraph composed of :py:class:`NodeBlockFunction`s.""" + """A :py:class:`Workflow` subgraph composed of :py:class:`NodeBlockFunction`\u200bs.""" def __init__( self, node_block_functions: NodeBlockFunction | PIPELINE_BLOCKS, debug: bool = False, ) -> None: - """Create a ``NodeBlock`` from a list of py:class:`~CPAC.pipeline.engine.nodeblock.NodeBlockFunction`s.""" + """Create a `NodeBlock` from a list of py:class:`NodeBlockFunction`\u200bs.""" if not isinstance(node_block_functions, list): node_block_functions = [node_block_functions] @@ -218,9 +223,12 @@ def __init__( logging.update_logging(config) def check_output(self, outputs: NODEBLOCK_OUTPUTS, label: str, name: str) -> None: - """Check if a label is listed in a NodeBlock's ``outputs``. + """Check if a label is listed in a `NodeBlock`\u200b's `outputs`. - Raises ``NameError`` if a mismatch is found. + Raises + ------ + NameError + If a mismatch is found. """ if label not in outputs: msg = ( @@ -234,13 +242,20 @@ def check_output(self, outputs: NODEBLOCK_OUTPUTS, label: str, name: str) -> Non def list_blocks( pipeline_blocks: PIPELINE_BLOCKS, indent: Optional[int] = None ) -> str: - """List node blocks line by line. + """List :py:class:`NodeBlockFunction`\u200bs line by line. Parameters ---------- - pipeline_blocks: list of :py:class:`NodeBlockFunction`s + pipeline_blocks + list of :py:class:`NodeBlockFunction`\u200bs - indent: number of spaces after a tab indent + indent + number of spaces after a tab indent + + Returns + ------- + str + formatted list of :py:class:`NodeBlockFunction`\u200bs """ blockstring = yaml.dump( [ @@ -277,26 +292,46 @@ def nodeblock( inputs: Optional[NODEBLOCK_INPUTS] = None, outputs: Optional[list[str] | dict[str, Any]] = None, ): - """ - Define a node block. + """Define a :py:class:`NodeBlockFunction`\u200b. - Connections to the pipeline configuration and to other node blocks. + Connections to the pipeline :py:class:`Configuration` and to other :py:class:`NodeBlockFunction`\u200bs. Parameters ---------- - name: Used in the graph and logging to identify the NodeBlock and its component nodes. Function's ``.__name__`` is used if ``name`` is not provided. - - config: Indicates the nested keys in a C-PAC pipeline configuration should configure a NodeBlock built from this function. If config is set to ``None``, then all other configuration-related entities must be specified from the root of the configuration. - - switch: Indicates any keys that should evaluate to True for this NodeBlock to be active. A list of lists of strings indicates multiple switches that must all be True to run, and is currently only an option if config is set to ``None``. - - option_key: Indicates the nested keys (starting at the nested key indicated by config) that should configure this NodeBlock. - - option_val: Indicates values for which this NodeBlock should be active. - - inputs: ResourcePool keys indicating files needed for the NodeBlock's functionality. - - outputs: ResourcePool keys indicating files generated or updated by the NodeBlock, optionally including metadata for the outputs' respective sidecars. + name + Used in the graph and logging to identify the :py:class:`NodeBlock` and its + component :py:class:`~nipype.pipeline.engine.Node`\u200bs. + The :py:class:`NodeBlockFunction`\u200b's `.__name__` is used if `name` is not + provided. + + config + Indicates the nested keys in a C-PAC pipeline :py:class:`Configuration` should + configure a :py:class:`NodeBlock` built from this + :py:class:`NodeBlockFunction`\u200b. If `config` is set to `None`, then all other + :py:class:`Configuration`\u200b-related entities must be specified from the root + of the :py:class:`Configuration`\u200b. + + switch + Indicates any keys that should evaluate to `True` for this :py:class:`NodeBlock` + to be active. A list of lists of strings indicates multiple switches that must + all be `True` to run, and is currently only an option if config is set to + `None`. + + option_key + Indicates the nested keys (starting at the nested key indicated by `config`) + that should configure this :py:class:`NodeBlock`\u200b. + + option_val + Indicates values for which this :py:class:`NodeBlock` should be active. + + inputs + ResourcePool keys indicating files needed for the :py:class:`NodeBlock`\u200b's + functionality. + + outputs + :py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating files + generated or updated by the :py:class:`NodeBlock`, optionally including metadata + for the outputs' respective sidecars. """ return lambda func: NodeBlockFunction( func, diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 6d46e14237..3e669444f2 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -"""Resources and ResourcePools for C-PAC.""" +""":py:class:`Resource`\u200bs and :py:class:`ResourcePool`\u200bs for C-PAC.""" import ast from collections.abc import KeysView @@ -30,13 +30,13 @@ from nipype.interfaces import utility as util # type: ignore [import-untyped] from nipype.interfaces.utility import Rename # type: ignore [import-untyped] +from nipype.pipeline import engine as pe from CPAC.image_utils.spatial_smoothing import spatial_smoothing from CPAC.image_utils.statistical_transforms import ( fisher_z_score_standardize, z_score_standardize, ) -from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.check_outputs import ExpectedOutputs from CPAC.pipeline.engine.nodeblock import ( NodeBlock, @@ -92,7 +92,7 @@ class DataPaths: def __init__( self, *, data_paths: Optional[dict] = None, part_id: Optional[str] = "" ) -> None: - """Initialize a ``DataPaths`` instance.""" + """Initialize a `DataPaths` instance.""" if not data_paths: data_paths = {} if part_id and "part_id" in data_paths and part_id != data_paths["part_id"]: @@ -117,17 +117,17 @@ def __init__( self.derivatives_dir: Optional[str] = data_paths.get("derivatives_dir") def __repr__(self) -> str: - """Return reproducible string representation of ``DataPaths`` instance.""" + """Return reproducible string representation of `DataPaths` instance.""" return f"DataPaths(data_paths={self.as_dict()})" def __str__(self) -> str: - """Return string representation of a ``DataPaths`` instance.""" + """Return string representation of a `DataPaths` instance.""" return f"" def as_dict(self) -> dict: - """Return ``data_paths`` dictionary. + """Return a `data_paths` dictionary. - data_paths format:: + `data_paths` format:: {"anat": {"T1w": "{T1w path}", "T2w": "{T2w path}"}, "creds_path": {None OR path to credentials CSV}, @@ -178,7 +178,7 @@ def set_iterables( def strip_template(data_label: str) -> tuple[str, dict[str, str]]: - """Strip a template name from a data label to use as a Resource key.""" + """Strip a template name from a data label to use as a :py:class:`Resource` key.""" json = {} # rename to template for prefix in ["space-", "from-", "to-"]: @@ -199,21 +199,21 @@ def strip_template(data_label: str) -> tuple[str, dict[str, str]]: class ResourceData(NamedTuple): - """Attribute and tuple access for ResourceData.""" + """Attribute and tuple access for `ResourceData`.""" node: pe.Node - """Resource Node.""" + """Resource :py:class:`~pe.Node`.""" out: str """Output key.""" class Resource: - """A single Resource and its methods.""" + """A single `Resource` and its methods.""" def __init__(self, data: tuple[pe.Node, str], json: dict) -> None: - """Initialize a Resource.""" + """Initialize a `Resource`.""" self.data = ResourceData(*data) - """Tuple of source Node and output key.""" + """Tuple of source :py:class:`~pe.Node` and output key.""" self._json = json """Metadata.""" self._keys = {"data", "json"} @@ -224,7 +224,7 @@ def keys(self) -> list[str]: return list(self._keys) def __contains__(self, item: Any) -> bool: - """Return True if item in self.keys(), False otherwise.""" + """Return `True` if `item` in `self.keys()`, `False` otherwise.""" return item in self.keys() def __getitem__(self, name: str) -> Any: @@ -235,7 +235,7 @@ def __getitem__(self, name: str) -> Any: raise KeyError(msg) def __repr__(self) -> str: - """Return reproducible string for Resource.""" + """Return reproducible string for `Resource`.""" positional = f"Resource(data={self.data}, json={self.json}" kw = ", ".join( f"{key}={getattr(self, key)}" @@ -245,17 +245,17 @@ def __repr__(self) -> str: return f"{positional}{kw})" def __setitem__(self, name: str, value: Any) -> None: - """Provide legacy dict-style set access.""" + """Provide legacy dict-style set access for `Resource`.""" setattr(self, name, value) if name not in self.keys(): self._keys.add(name) def __str__(self) -> str: - """Return string representation of Resource.""" + """Return string representation of `Resource`.""" return f"{self.data[0]}" def get_json(self) -> dict[str | tuple, Any]: - """Return a deep copy of Resource JSON.""" + """Return a deep copy of `Resource` JSON.""" UTLOGGER.debug( "%s is a deep copy of the attached JSON. Assign it to a variable before modifying or the changes will be ephemeral.", self.__class__.__name__, @@ -263,14 +263,14 @@ def get_json(self) -> dict[str | tuple, Any]: return json.loads(json.dumps(self._json)) def set_json(self, value=dict) -> None: - """Update Resource JSON.""" + """Update `Resource` JSON.""" self._json.update(value) json = property(get_json, set_json, doc=get_json.__doc__) @property def cpac_provenance(self) -> list: - """Get CpacProvenance of a Resource.""" + """Get "CpacProvenance" of a `Resource`.""" return self.json["CpacProvenance"] @@ -278,7 +278,7 @@ class _Pool: """All Resources.""" def __init__(self) -> None: - """Initialize a ResourcePool or StratPool.""" + """Initialize a :py:class:`ResourcePool` or :py:class:`StratPool`\u200b.""" self.ants_interp: str self.cfg: Configuration self.creds_paths: Optional[str] @@ -305,7 +305,7 @@ def __init__(self) -> None: self.wf: pe.Workflow def __repr__(self) -> str: - """Return reproducible _Pool string.""" + """Return reproducible `_Pool` string.""" params = [ f"{param}={getattr(self, param)}" for param in ["rpool", "name", "cfg", "pipe_list"] @@ -314,7 +314,7 @@ def __repr__(self) -> str: return f'{self.__class__.__name__}({", ".join(params)})' def __str__(self) -> str: - """Return string representation of a _Pool.""" + """Return string representation of a `_Pool`.""" if self.name: return f"{self.__class__.__name__}({self.name}): {list(self.rpool)}" return f"{self.__class__.__name__}: {list(self.rpool)}" @@ -336,7 +336,7 @@ def generate_prov_string(prov: LIST_OF_LIST_OF_STR | tuple) -> tuple[str, str]: return (resource, str(prov)) def check_rpool(self, resource: list[str] | str) -> bool: - """Check if a resource is present in the _Pool.""" + """Check if a `resource` is present in the `_Pool`.""" if not isinstance(resource, list): resource = [resource] for name in resource: @@ -345,11 +345,11 @@ def check_rpool(self, resource: list[str] | str) -> bool: return False def keys(self) -> KeysView: - """Return rpool's keys.""" + """Return `rpool`'s keys.""" return self.rpool.keys() def __contains__(self, key) -> bool: - """Return True if key in Pool, False otherwise.""" + """Return `True` if key in `_Pool`, `False` otherwise.""" return key in self.keys() @staticmethod @@ -359,7 +359,7 @@ def get_resource_from_prov(prov: LIST_OF_LIST_OF_STR) -> Optional[str]: Each resource (i.e. "desc-cleaned_bold" AKA nuisance-regressed BOLD data) has its own provenance list. the name of the resource, and the node that produced it, is always the last item in the provenance - list, with the two separated by a colon : + list, with the two separated by a colon (`:`) """ if not len(prov): return None @@ -382,7 +382,7 @@ def set_data( fork: bool = False, inject: bool = False, ) -> None: - """Plug a Resource into a _Pool.""" + """Plug a :py:class:`Resource` into a `_Pool`.""" json_info = json_info.copy() cpac_prov: LIST_OF_LIST_OF_STR = [] if "CpacProvenance" in json_info: @@ -449,7 +449,7 @@ def get( Optional[Resource | STRAT_DICT | dict] | tuple[Optional[Resource | STRAT_DICT], Optional[str]] ): - """Return a dictionary of strats or a single Resource.""" + """Return a dictionary of strats or a single :py:class:`Resource`\u200b.""" if not isinstance(resource, list): resource = [resource] # if a list of potential inputs are given, pick the first one found @@ -483,7 +483,7 @@ def get( class ResourcePool(_Pool): - """A pool of Resources.""" + """A pool of :py:class:`Resource`\u200bs.""" from CPAC.pipeline.engine.nodeblock import ( NODEBLOCK_INPUTS, @@ -502,7 +502,7 @@ def __init__( pipeline_name: str = "", wf: Optional[pe.Workflow] = None, ) -> None: - """Initialize a ResourcePool.""" + """Initialize a `ResourcePool`.""" self.name = name super().__init__() if isinstance(data_paths, dict): @@ -617,9 +617,9 @@ def __init__( self.ingress_pipeconfig_paths() def back_propogate_template_name( - self, resource_idx: str, json_info: dict, id_string: "pe.Node" + self, resource_idx: str, json_info: dict, id_string: pe.Node ) -> None: - """Find and apply the template name from a resource's provenance.""" + """Find and apply the template name from a :py:class:`Resource`\u200b's provenance.""" if "template" in resource_idx and self.check_rpool("derivatives-dir"): if self.check_rpool("template"): node, out = self.get_data("template") @@ -1038,7 +1038,7 @@ def get_data( report_fetched=False, quick_single=False, ): - """Get ResourceData from ResourcePool.""" + """Get :py:class:`ResourceData` from `ResourcePool`.""" _resource = self.get(resource, pipe_idx=pipe_idx, report_fetched=report_fetched) if report_fetched: if pipe_idx: @@ -1053,7 +1053,7 @@ def get_data( return _resource.data def get_json(self, resource: str, strat: str | tuple) -> dict: - """Get JSON metadata from a Resource in a strategy.""" + """Get JSON metadata from a :py:class:`Resource` in a strategy.""" return self.get(resource, pipe_idx=strat).json def get_json_info(self, resource: str, key: str) -> Any: @@ -1077,7 +1077,7 @@ def get_raw_label(resource: str) -> str: def get_strats( # noqa: PLR0912,PLR0915 self, resources: NODEBLOCK_INPUTS, debug: bool = False ) -> dict[str | tuple, "StratPool"]: - """Get a dictionary of StratPools.""" + """Get a dictionary of :py:class:`StratPool`\u200bs.""" # TODO: NOTE: NOT COMPATIBLE WITH SUB-RPOOL/STRAT_POOLS # TODO: (and it doesn't have to be) import itertools @@ -1316,7 +1316,7 @@ def get_strats( # noqa: PLR0912,PLR0915 return new_strats def initialize_nipype_wf(self, name: str = "") -> None: - """Initialize a new nipype workflow.""" + """Initialize a new nipype :py:class:`~pe.Workflow`\u200b.""" if name: name = f"_{name}" workflow_name = f"cpac{name}_{self.unique_id}" @@ -1430,7 +1430,7 @@ def ingress_freesurfer(self) -> None: return def ingress_output_dir(self) -> None: - """Ingress an output directory into a ResourcePool.""" + """Ingress an output directory into a `ResourcePool`.""" dir_path = self.data_paths.derivatives_dir assert dir_path is not None WFLOGGER.info("\nPulling outputs from %s.\n", dir_path) @@ -1982,7 +1982,7 @@ def ingress_pipeconfig_paths(self): def create_func_datasource( self, rest_dict: dict, wf_name="func_datasource" ) -> pe.Workflow: - """Create a workflow to gather timeseries data. + """Create a :py:class:`~pe.Workflow` to gather timeseries data. Return the functional timeseries-related file paths for each series/scan from the dictionary of functional files described in the data configuration (sublist) YAML @@ -2345,7 +2345,7 @@ def ingress_raw_anat_data(self) -> None: self.ingress_freesurfer() def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # noqa: PLR0912,PLR0915 - """Connect a NodeBlock via the ResourcePool.""" + """Connect a :py:class:`NodeBlock` via the `ResourcePool`.""" debug = bool(self.cfg.pipeline_setup["Debugging"]["verbose"]) # type: ignore [attr-defined] all_opts: list[str] = [] @@ -2938,9 +2938,9 @@ def post_process( def get_resource_strats_from_prov(prov: list | str) -> dict[str, list | str]: """Return all entries that led to this provenance. - If you provide the provenance of a resource pool output, this will - return a dictionary of all the preceding resource pool entries that - led to that one specific output: + If you provide the provenance of a `ResourcePool` output, this will + return a dictionary of all the preceding `ResourcePool` entries that + led to that one specific output:: {rpool entry}: {that entry's provenance} {rpool entry}: {that entry's provenance} """ @@ -2961,18 +2961,21 @@ def get_resource_strats_from_prov(prov: list | str) -> dict[str, list | str]: def _config_lookup( self, keylist: str | list[str], fallback_type: type = NoneType ) -> Any: - """Lookup a config key, return None if not found.""" + """Lookup a :py:class:`Configuration` key, return `None` if not found.""" try: return self.cfg[keylist] except (AttributeError, KeyError): return fallback_type() def _get_pipe_number(self, pipe_idx: str | tuple) -> int: - """Return the index of a strategy in ``self.pipe_list``.""" + """Return the index of a strategy in `self.pipe_list`.""" return self.pipe_list.index(pipe_idx) def _get_unlabelled(self, resource: str) -> set[str]: - """Get unlabelled resources (that need integer suffixes to differentiate).""" + """Get unlabelled :py:class:`Resource`\u200bs. + + These :py:class:`Resource`\u200bs need integer suffixes to differentiate. + """ from CPAC.func_preproc.func_motion import motion_estimate_filter all_jsons = [ @@ -3019,7 +3022,7 @@ def _get_unlabelled(self, resource: str) -> set[str]: class StratPool(_Pool): - """A pool of ResourcePools keyed by strategy.""" + """A pool of :py:class:`ResourcePool`s keyed by strategy.""" def __init__( self, @@ -3028,7 +3031,7 @@ def __init__( rpool: Optional[dict] = None, name: str | list[str] = "", ) -> None: - """Initialize a StratPool.""" + """Initialize a `StratPool`.""" super().__init__() if not rpool: self.rpool = STRAT_DICT({}) @@ -3042,7 +3045,7 @@ def __init__( self._regressor_dct: dict def append_name(self, name: str) -> None: - """Append a name to the StratPool.""" + """Append a name to the `StratPool`.""" self.name.append(name) @overload @@ -3112,7 +3115,7 @@ def get( report_fetched: bool = False, optional: bool = False, ): - """Return a Resource.""" + """Return a :py:class:`Resource`\u200b.""" return super().get(resource, pipe_idx, report_fetched, optional) @overload @@ -3124,7 +3127,7 @@ def get_data( self, resource: list[str] | str, report_fetched: Literal[False] = False ) -> ResourceData: ... def get_data(self, resource, report_fetched=False): - """Get ResourceData from a StratPool.""" + """Get :py:class:`ResourceData` from a `StratPool`.""" _resource = self.get(resource, report_fetched=report_fetched) if report_fetched: assert isinstance(_resource, tuple) @@ -3135,17 +3138,17 @@ def get_data(self, resource, report_fetched=False): return _resource.data def get_json(self, resource: str) -> dict: - """Get JSON metadata from a Resource in a StratPool.""" + """Get JSON metadata from a :py:class:`Resource` in a `StratPool`.""" return self.get(resource).json json = property( fget=Resource.get_json, fset=Resource.set_json, - doc="""Return a deep copy of full-StratPool-strategy-specific JSON.""", + doc="""Return a deep copy of full-`StratPool`-strategy-specific JSON.""", ) def get_cpac_provenance(self, resource: list[str] | str) -> list: - """Get CpacProvenance for a given Resource.""" + """Get "CpacProvenance" for a given :py:class:`Resource`\u200b.""" # NOTE: strat_resource has to be entered properly by the developer # it has to either be rpool[resource][strat] or strat_pool[resource] if isinstance(resource, list): @@ -3157,7 +3160,7 @@ def get_cpac_provenance(self, resource: list[str] | str) -> list: return self.get(resource).cpac_provenance def copy_resource(self, resource: str, new_name: str): - """Copy a resource within a StratPool.""" + """Copy a :py:class:`Resource` within a `StratPool`.""" try: self.rpool[new_name] = self.rpool[resource] except KeyError: @@ -3168,7 +3171,7 @@ def filter_name(self, cfg: Configuration) -> str: """ Return the name of the filter for this strategy. - In a strat_pool with filtered movement parameters. + In a `StratPool` with filtered movement parameters. """ motion_filters = cfg[ "functional_preproc", @@ -3197,7 +3200,7 @@ def filter_name(self, cfg: Configuration) -> str: return "none" def preserve_json_info(self, resource: str, strat_resource: Resource) -> None: - """Preserve JSON info when updating a StratPool.""" + """Preserve JSON info when updating a `StratPool`.""" data_type = resource.split("_")[-1] if data_type not in self._json["subjson"]: self._json["subjson"][data_type] = {} @@ -3207,7 +3210,10 @@ def preserve_json_info(self, resource: str, strat_resource: Resource) -> None: def regressor_dct(self) -> dict: """Return the regressor dictionary for the current strategy if one exists. - Raises KeyError otherwise. + Raises + ------ + KeyError + If regressor dictionary does not exist in current strategy. """ # pylint: disable=attribute-defined-outside-init if hasattr(self, "_regressor_dct"): # memoized @@ -3243,7 +3249,7 @@ def regressor_dct(self) -> dict: @property def filtered_movement(self) -> bool: - """Check if the movement parameters have been filtered in this StratPool.""" + """Check if the movement parameters have been filtered in this `StratPool`.""" try: return "motion_estimate_filter" in str( self.get_cpac_provenance("desc-movementParameters_motion") @@ -3254,7 +3260,7 @@ def filtered_movement(self) -> bool: def _check_null(val: Any) -> Any: - """Return ``None`` if ``val`` == "none" (case-insensitive).""" + """Return `None` if `val` == "none" (case-insensitive).""" if isinstance(val, str): val = None if val.lower() == "none" else val return val diff --git a/CPAC/pipeline/schema.py b/CPAC/pipeline/schema.py index 915cb47045..8f9e2ffc58 100644 --- a/CPAC/pipeline/schema.py +++ b/CPAC/pipeline/schema.py @@ -63,18 +63,12 @@ Number = Any(float, int, All(str, Match(SCIENTIFIC_NOTATION_STR_REGEX))) -def str_to_bool1_1(x): # pylint: disable=invalid-name - """Convert strings to Booleans for YAML1.1 syntax. +def str_to_bool1_1(x: Any) -> bool: # pylint: disable=invalid-name + """Convert strings to Booleans for YAML1.1 syntax[1]_. - Ref https://yaml.org/type/bool.html - - Parameters + References ---------- - x : any - - Returns - ------- - bool + .. [1] 2005-01-18. Oren Ben-Kiki, Clark Evans & Brian Ingerson. `"Boolean Language-Independent Type for YAML™ Version 1.1" [Working Draft] `_. Copyright © 2001-2005 Oren Ben-Kiki, Clark Evans, Brian Ingerson. """ if isinstance(x, str): try: @@ -316,19 +310,9 @@ def str_to_bool1_1(x): # pylint: disable=invalid-name ) -def name_motion_filter(mfilter, mfilters=None): +def name_motion_filter(mfilter: dict, mfilters: Optional[list] = None) -> str: """Given a motion filter, create a short string for the filename. - Parameters - ---------- - mfilter : dict - - mfliters : list or None - - Returns - ------- - str - Examples -------- >>> name_motion_filter({'filter_type': 'notch', 'filter_order': 2, @@ -385,19 +369,8 @@ def name_motion_filter(mfilter, mfilters=None): return name -def permutation_message(key, options): - """Give a human-readable error message for keys that accept permutation values. - - Parameters - ---------- - key: str - - options: list or set - - Returns - ------- - msg: str - """ +def permutation_message(key: str, options: list | set) -> str: + """Give a human-readable error message for keys that accept permutation values.""" return f""" \'{key}\' takes a dictionary with paths to region-of-interest (ROI) @@ -412,7 +385,7 @@ def permutation_message(key, options): """ -def sanitize(filename): +def sanitize(filename: str) -> str: """Sanitize a filename and replace whitespaces with underscores.""" return re.sub(r"\s+", "_", sanitize_filename(filename)) @@ -1253,20 +1226,12 @@ def sanitize(filename): ) -def schema(config_dict): +def schema(config_dict: dict) -> dict: """Validate a participant-analysis pipeline configuration. Validate against the latest validation schema by first applying backwards- compatibility patches, then applying Voluptuous validation, then handling complex configuration interaction checks before returning validated config_dict. - - Parameters - ---------- - config_dict : dict - - Returns - ------- - dict """ from CPAC.utils.utils import _changes_1_8_0_to_1_8_1 diff --git a/CPAC/pipeline/test/test_engine.py b/CPAC/pipeline/test/test_engine.py index 01283042fb..e23741d2e8 100644 --- a/CPAC/pipeline/test/test_engine.py +++ b/CPAC/pipeline/test/test_engine.py @@ -32,21 +32,21 @@ def _set_up_test( bids_examples: Path, preconfig: str, tmp_path: Path ) -> tuple[Configuration, dict]: - """Set up ``cfg`` and ``sub_data`` for engine tests.""" + """Set up `cfg` and `sub_data` for engine tests.""" bids_dir = str(bids_examples / "ds051") sub_data = create_cpac_data_config(bids_dir, skip_bids_validator=True)[0] cfg = Preconfiguration(preconfig) cfg.pipeline_setup["output_directory"]["path"] = str(tmp_path / "out") cfg.pipeline_setup["working_directory"]["path"] = str(tmp_path / "work") cfg.pipeline_setup["log_directory"]["path"] = str(tmp_path / "logs") - return (cfg, sub_data) + return cfg, sub_data @pytest.mark.parametrize("preconfig", ["default"]) def test_ingress_func_raw_data( bids_examples: Path, preconfig: str, tmp_path: Path ) -> None: - """Test :py:method:~`CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_func_data`.""" + """Test :py:method:`ResourcePool.ingress_raw_func_data`\u200b.""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) rpool.gather_pipes(rpool.wf, cfg, all_types=True) @@ -56,7 +56,7 @@ def test_ingress_func_raw_data( def test_ingress_anat_raw_data( bids_examples: Path, preconfig: str, tmp_path: Path ) -> None: - """Test :py:method:~`CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_anat_data`.""" + """Test :py:method:`ResourcePool.ingress_raw_anat_data`\u200b.""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool( cfg=cfg, @@ -70,7 +70,7 @@ def test_ingress_anat_raw_data( def test_ingress_pipeconfig_data( bids_examples: Path, preconfig: str, tmp_path: Path ) -> None: - """Test :py:method:~`CPAC.pipeline.engine.resource.ResourcePool.ingress_pipeconfig_paths`.""" + """Test :py:method:`ResourcePool.ingress_pipeconfig_paths`\u200b.""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool( cfg=cfg, @@ -83,7 +83,7 @@ def test_ingress_pipeconfig_data( def test_build_anat_preproc_stack( bids_examples: Path, preconfig: str, tmp_path: Path ) -> None: - """Test :py:func:~`CPAC.pipeline.cpac_pipeline.build_anat_preproc_stack`.""" + """Test :py:func:`~build_anat_preproc_stack`\u200b.""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) @@ -94,7 +94,7 @@ def test_build_anat_preproc_stack( @pytest.mark.parametrize("preconfig", ["default"]) def test_build_workflow(bids_examples: Path, preconfig: str, tmp_path: Path) -> None: - """Test :py:func:~`CPAC.pipeline.cpac_pipeline.build_workflow`.""" + """Test :py:func:`~build_workflow`\u200b.""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) wf = build_workflow(sub_data_dct["subject_id"], sub_data_dct, cfg) diff --git a/CPAC/pipeline/utils.py b/CPAC/pipeline/utils.py index 7b1dbaffff..6f6953fef2 100644 --- a/CPAC/pipeline/utils.py +++ b/CPAC/pipeline/utils.py @@ -19,27 +19,13 @@ from itertools import chain from CPAC.utils.bids_utils import insert_entity +from CPAC.utils.configuration.configuration import Configuration -def name_fork(resource_idx, cfg, json_info, out_dct): - """Create and insert entities for forkpoints. - - Parameters - ---------- - resource_idx : str - - cfg : CPAC.utils.configuration.Configuration - - json_info : dict - - out_dct : dict - - Returns - ------- - resource_idx : str - - out_dct : dict - """ +def name_fork( + resource_idx: str, cfg: Configuration, json_info: dict, out_dct: dict +) -> tuple[str, dict]: + """Create and insert entities for forkpoints.""" from CPAC.func_preproc.func_motion import motion_estimate_filter if cfg.switch_is_on( @@ -104,12 +90,6 @@ def present_outputs(outputs: dict, keys: list) -> dict: NodeBlocks that differ only by configuration options and relevant output keys. - Parameters - ---------- - outputs : dict - - keys : list of str - Returns ------- dict From 6c96667dc3261d5c0c1ef4e7118ee99ebe567258 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 18 Jul 2024 12:59:32 -0400 Subject: [PATCH 86/93] :bug: Fix conflicting class name --- CPAC/pipeline/schema.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CPAC/pipeline/schema.py b/CPAC/pipeline/schema.py index 8f9e2ffc58..cf13a4ecc8 100644 --- a/CPAC/pipeline/schema.py +++ b/CPAC/pipeline/schema.py @@ -21,6 +21,7 @@ from itertools import chain, permutations import re from subprocess import CalledProcessError +from typing import Optional as TypeOptional import numpy as np from pathvalidate import sanitize_filename @@ -310,7 +311,7 @@ def str_to_bool1_1(x: Any) -> bool: # pylint: disable=invalid-name ) -def name_motion_filter(mfilter: dict, mfilters: Optional[list] = None) -> str: +def name_motion_filter(mfilter: dict, mfilters: TypeOptional[list] = None) -> str: """Given a motion filter, create a short string for the filename. Examples From 763af4e8f980bd5079347b6764435e96fe817fa1 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 18 Jul 2024 14:30:35 -0400 Subject: [PATCH 87/93] :art: More docstring updates for ResourcePool refactor --- CPAC/nuisance/nuisance.py | 30 +------- CPAC/pipeline/engine/nodeblock.py | 86 +++++++++++----------- CPAC/pipeline/engine/resource.py | 48 ++++++------ CPAC/pipeline/schema.py | 8 +- CPAC/pipeline/test/test_engine.py | 10 +-- CPAC/qc/xcp.py | 22 +++--- CPAC/registration/registration.py | 26 ++++--- CPAC/utils/interfaces/function/function.py | 29 ++++---- CPAC/utils/strategy.py | 2 +- 9 files changed, 122 insertions(+), 139 deletions(-) diff --git a/CPAC/nuisance/nuisance.py b/CPAC/nuisance/nuisance.py index 967647f429..871d18d232 100644 --- a/CPAC/nuisance/nuisance.py +++ b/CPAC/nuisance/nuisance.py @@ -37,8 +37,8 @@ TR_string_to_float, ) from CPAC.pipeline import nipype_pipeline_engine as pe -from CPAC.pipeline.engine import ResourcePool from CPAC.pipeline.engine.nodeblock import nodeblock +from CPAC.pipeline.engine.resource import StratPool from CPAC.registration.registration import ( apply_transform, warp_timeseries_to_EPItemplate, @@ -2413,34 +2413,12 @@ def nuisance_regressors_generation_T1w(wf, cfg, strat_pool, pipe_num, opt=None): def nuisance_regressors_generation( wf: Workflow, cfg: Configuration, - strat_pool: ResourcePool, + strat_pool: StratPool, pipe_num: int, opt: dict, space: Literal["T1w", "bold"], ) -> tuple[Workflow, dict]: - """Generate nuisance regressors. - - Parameters - ---------- - wf : ~nipype.pipeline.engine.workflows.Workflow - - cfg : ~CPAC.utils.configuration.Configuration - - strat_pool : ~CPAC.pipeline.engine.ResourcePool - - pipe_num : int - - opt : dict - - space : str - T1w or bold - - Returns - ------- - wf : nipype.pipeline.engine.workflows.Workflow - - outputs : dict - """ + """Generate nuisance regressors.""" prefixes = [f"space-{space}_"] * 2 reg_tool = None if space == "T1w": @@ -2664,7 +2642,7 @@ def nuisance_regressors_generation( return (wf, outputs) -def nuisance_regression(wf, cfg, strat_pool, pipe_num, opt, space, res=None): +def nuisance_regression(wf, cfg, strat_pool: StratPool, pipe_num, opt, space, res=None): """Nuisance regression in native (BOLD) or template space. Parameters diff --git a/CPAC/pipeline/engine/nodeblock.py b/CPAC/pipeline/engine/nodeblock.py index e1b1437d3c..fc6fe59b76 100644 --- a/CPAC/pipeline/engine/nodeblock.py +++ b/CPAC/pipeline/engine/nodeblock.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -"""Classes and decorator for :py:class:`NodeBlock`\u200bs and :py:class:`NodeBlockFunction`\u200bs.""" +"""Classes and decorator for :`NodeBlock`s and :`NodeBlockFunction`s.""" from typing import Any, Callable, Optional, TYPE_CHECKING @@ -36,7 +36,7 @@ class NodeBlockFunction: - """Store a reference to the nodeblock function and all of its meta-data.""" + """Store a reference to the nodeblock function and all of its metadata.""" def __init__( self, @@ -50,22 +50,24 @@ def __init__( outputs: Optional[NODEBLOCK_OUTPUTS] = None, ) -> None: self.func = func - """`Nodeblock` function reference.""" + """:py:class:`Nodeblock` function reference.""" self.name: str = name - """Used in the graph and logging to identify the :py:class:`NodeBlock` and its component :py:class:`~nipype.pipeline.engine.Node`\u200bs.""" + """Used in the graph and logging to identify the :py:class:`NodeBlock` and its + component :py:class:`~nipype.pipeline.engine.Node` s.""" self.config: Optional[list[str]] = config """ - Indicates the nested keys in a C-PAC pipeline :py:class:`Configuration` - should configure a `NodeBlock` built from this function. If `config` is set to - `None`, then all other configuration-related entities must be specified from the - root of the configuration. + Indicates the nested keys in a C-PAC pipeline + :py:class:`~CPAC.utils.configuration.Configuration` should configure a + :py:class:`NodeBlock` built from this function. If `config` is set to ``None``, + then all other :py:class:`~CPAC.utils.configuration.Configuration` -related + entities must be specified from the root of the :py:class:`~CPAC.utils.configuration.Configuration` . """ self.switch: Optional[list[str] | list[list[str]]] = switch """ - Indicates any keys that should evaluate to `True` for this :py:class:`NodeBlock` - to be active. A list of lists of strings indicates multiple `switch`\u200bes - that must all be `True` to run, and is currently only an option if `config` is - set to `None`. + Indicates any keys that should evaluate to ``True`` for this :py:class:`NodeBlock` + to be active. A list of lists of strings indicates multiple `switch` es + that must all be ``True`` to run, and is currently only an option if `config` is + set to ``None``. """ self.option_key: Optional[str | list[str]] = option_key """ @@ -76,12 +78,12 @@ def __init__( """Indicates values for which this :py:class:`NodeBlock` should be active.""" self.inputs: list[str | list | tuple] = inputs if inputs else [] """:py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating - resources needed for the :py:class:`NodeBlock`\u200b's functionality.""" + resources needed for the :py:class:`NodeBlock`'s functionality.""" self.outputs: list[str] | dict[str, Any] = outputs if outputs else [] """ :py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating - resources generated or updated by the `NodeBlock`, optionally including metadata - for the outputs' respective sidecars. + resources generated or updated by the :py:class:`NodeBlock`, optionally + including metadata for the outputs' respective sidecars. """ # Forward function attributes similar to functools.update_wrapper: @@ -106,9 +108,9 @@ def __call__( pipe_num: Optional[int | str], opt: Optional[str] = None, ) -> tuple[Workflow, dict[str, "ResourceData"]]: - """Call a `NodeBlockFunction`. + """Call a :py:class:`NodeBlockFunction`. - All `NodeBlockFunction`\u200bs have the same signature. + All :py:class:`NodeBlockFunction` s have the same signature. """ return self.func(wf, cfg, strat_pool, pipe_num, opt) @@ -128,7 +130,7 @@ def legacy_nodeblock_dict(self): } def __repr__(self) -> str: - """Return reproducible string representation of a `NodeBlockFunction`.""" + """Return reproducible string representation of a :py:class:`NodeBlockFunction`.""" return ( f"NodeBlockFunction({self.func.__module__}." f'{self.func.__name__}, "{self.name}", ' @@ -139,19 +141,19 @@ def __repr__(self) -> str: ) def __str__(self) -> str: - """Return string representation of a `NodeBlockFunction`.""" + """Return string representation of a :py:class:`NodeBlockFunction`.""" return f"NodeBlockFunction({self.name})" class NodeBlock: - """A :py:class:`Workflow` subgraph composed of :py:class:`NodeBlockFunction`\u200bs.""" + """A :py:class:`~nipype.pipeline.engine.Workflow` subgraph composed of :py:class:`NodeBlockFunction`s.""" def __init__( self, node_block_functions: NodeBlockFunction | PIPELINE_BLOCKS, debug: bool = False, ) -> None: - """Create a `NodeBlock` from a list of py:class:`NodeBlockFunction`\u200bs.""" + """Create a :py:class:`NodeBlock` from a list of :py:class:`NodeBlockFunction` s.""" if not isinstance(node_block_functions, list): node_block_functions = [node_block_functions] @@ -223,7 +225,7 @@ def __init__( logging.update_logging(config) def check_output(self, outputs: NODEBLOCK_OUTPUTS, label: str, name: str) -> None: - """Check if a label is listed in a `NodeBlock`\u200b's `outputs`. + """Check if a label is listed in a :py:class:`NodeBlock` 's `outputs`. Raises ------ @@ -242,12 +244,12 @@ def check_output(self, outputs: NODEBLOCK_OUTPUTS, label: str, name: str) -> Non def list_blocks( pipeline_blocks: PIPELINE_BLOCKS, indent: Optional[int] = None ) -> str: - """List :py:class:`NodeBlockFunction`\u200bs line by line. + """List :py:class:`NodeBlockFunction` s line by line. Parameters ---------- pipeline_blocks - list of :py:class:`NodeBlockFunction`\u200bs + list of :py:class:`NodeBlockFunction` s indent number of spaces after a tab indent @@ -255,7 +257,7 @@ def list_blocks( Returns ------- str - formatted list of :py:class:`NodeBlockFunction`\u200bs + formatted list of :py:class:`NodeBlockFunction` s """ blockstring = yaml.dump( [ @@ -292,46 +294,48 @@ def nodeblock( inputs: Optional[NODEBLOCK_INPUTS] = None, outputs: Optional[list[str] | dict[str, Any]] = None, ): - """Define a :py:class:`NodeBlockFunction`\u200b. + """Define a :py:class:`NodeBlockFunction` . - Connections to the pipeline :py:class:`Configuration` and to other :py:class:`NodeBlockFunction`\u200bs. + Connections to the pipeline :py:class:`~CPAC.utils.configuration.Configuration` and to other :py:class:`NodeBlockFunction` s. Parameters ---------- name Used in the graph and logging to identify the :py:class:`NodeBlock` and its - component :py:class:`~nipype.pipeline.engine.Node`\u200bs. - The :py:class:`NodeBlockFunction`\u200b's `.__name__` is used if `name` is not + component :py:class:`~nipype.pipeline.engine.Node` s. + The :py:class:`NodeBlockFunction`'s `.__name__` is used if `name` is not provided. config - Indicates the nested keys in a C-PAC pipeline :py:class:`Configuration` should - configure a :py:class:`NodeBlock` built from this - :py:class:`NodeBlockFunction`\u200b. If `config` is set to `None`, then all other - :py:class:`Configuration`\u200b-related entities must be specified from the root - of the :py:class:`Configuration`\u200b. + Indicates the nested keys in a C-PAC pipeline + :py:class:`~CPAC.pipeline.configuration.Configuration` should configure a + :py:class:`NodeBlock` built from this :py:class:`NodeBlockFunction`. If `config` + is set to ``None``, then all other + :py:class:`~CPAC.pipeline.configuration.Configuration` -related entities + must be specified from the root of the + :py:class:`~CPAC.pipeline.configuration.Configuration` . switch - Indicates any keys that should evaluate to `True` for this :py:class:`NodeBlock` - to be active. A list of lists of strings indicates multiple switches that must - all be `True` to run, and is currently only an option if config is set to - `None`. + Indicates any keys that should evaluate to ``True`` for this + :py:class:`NodeBlock` to be active. A list of lists of strings indicates + multiple switches that must all be ``True`` to run, and is currently only an + option if config is set to ``None``. option_key Indicates the nested keys (starting at the nested key indicated by `config`) - that should configure this :py:class:`NodeBlock`\u200b. + that should configure this :py:class:`NodeBlock`. option_val Indicates values for which this :py:class:`NodeBlock` should be active. inputs - ResourcePool keys indicating files needed for the :py:class:`NodeBlock`\u200b's + :py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating files needed for the :py:class:`NodeBlock` 's functionality. outputs :py:class:`~CPAC.pipeline.engine.resource.ResourcePool` keys indicating files generated or updated by the :py:class:`NodeBlock`, optionally including metadata - for the outputs' respective sidecars. + for the `outputs` ' respective sidecars. """ return lambda func: NodeBlockFunction( func, diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 3e669444f2..5499dc44bc 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -""":py:class:`Resource`\u200bs and :py:class:`ResourcePool`\u200bs for C-PAC.""" +""":py:class:`Resource` s and :py:class:`ResourcePool` s for C-PAC.""" import ast from collections.abc import KeysView @@ -202,7 +202,7 @@ class ResourceData(NamedTuple): """Attribute and tuple access for `ResourceData`.""" node: pe.Node - """Resource :py:class:`~pe.Node`.""" + """Resource :py:class:`~nipype.pipeline.engine.Node`.""" out: str """Output key.""" @@ -213,8 +213,8 @@ class Resource: def __init__(self, data: tuple[pe.Node, str], json: dict) -> None: """Initialize a `Resource`.""" self.data = ResourceData(*data) - """Tuple of source :py:class:`~pe.Node` and output key.""" - self._json = json + """Tuple of source :py:class:`~nipype.pipeline.engine.Node` and output key.""" + self._json: dict = json """Metadata.""" self._keys = {"data", "json"} """Dictionary-style subscriptable keys.""" @@ -224,7 +224,7 @@ def keys(self) -> list[str]: return list(self._keys) def __contains__(self, item: Any) -> bool: - """Return `True` if `item` in `self.keys()`, `False` otherwise.""" + """Return ``True`` if `item` in :py:method:`~Resource.keys()`, ``False`` otherwise.""" return item in self.keys() def __getitem__(self, name: str) -> Any: @@ -278,7 +278,7 @@ class _Pool: """All Resources.""" def __init__(self) -> None: - """Initialize a :py:class:`ResourcePool` or :py:class:`StratPool`\u200b.""" + """Initialize a :py:class:`ResourcePool` or :py:class:`StratPool` .""" self.ants_interp: str self.cfg: Configuration self.creds_paths: Optional[str] @@ -349,7 +349,7 @@ def keys(self) -> KeysView: return self.rpool.keys() def __contains__(self, key) -> bool: - """Return `True` if key in `_Pool`, `False` otherwise.""" + """Return ``True`` if key in `_Pool`, ``False`` otherwise.""" return key in self.keys() @staticmethod @@ -449,7 +449,7 @@ def get( Optional[Resource | STRAT_DICT | dict] | tuple[Optional[Resource | STRAT_DICT], Optional[str]] ): - """Return a dictionary of strats or a single :py:class:`Resource`\u200b.""" + """Return a dictionary of strats or a single :py:class:`Resource` .""" if not isinstance(resource, list): resource = [resource] # if a list of potential inputs are given, pick the first one found @@ -483,7 +483,7 @@ def get( class ResourcePool(_Pool): - """A pool of :py:class:`Resource`\u200bs.""" + """A pool of :py:class:`Resource` s.""" from CPAC.pipeline.engine.nodeblock import ( NODEBLOCK_INPUTS, @@ -619,7 +619,7 @@ def __init__( def back_propogate_template_name( self, resource_idx: str, json_info: dict, id_string: pe.Node ) -> None: - """Find and apply the template name from a :py:class:`Resource`\u200b's provenance.""" + """Find and apply the template name from a :py:class:`Resource` 's provenance.""" if "template" in resource_idx and self.check_rpool("derivatives-dir"): if self.check_rpool("template"): node, out = self.get_data("template") @@ -995,7 +995,7 @@ def get( ): """Return a dictionary of strats. - Inside those, are dictionaries like ``{'data': (node, out), 'json': info}``. + Inside those are dictionaries like ``{'data': (node, out), 'json': info}``. """ return super().get(resource, pipe_idx, report_fetched, optional) @@ -1077,7 +1077,7 @@ def get_raw_label(resource: str) -> str: def get_strats( # noqa: PLR0912,PLR0915 self, resources: NODEBLOCK_INPUTS, debug: bool = False ) -> dict[str | tuple, "StratPool"]: - """Get a dictionary of :py:class:`StratPool`\u200bs.""" + """Get a dictionary of :py:class:`StratPool` s.""" # TODO: NOTE: NOT COMPATIBLE WITH SUB-RPOOL/STRAT_POOLS # TODO: (and it doesn't have to be) import itertools @@ -1316,7 +1316,7 @@ def get_strats( # noqa: PLR0912,PLR0915 return new_strats def initialize_nipype_wf(self, name: str = "") -> None: - """Initialize a new nipype :py:class:`~pe.Workflow`\u200b.""" + """Initialize a new nipype :py:class:`~nipype.pipeline.engine.Workflow` .""" if name: name = f"_{name}" workflow_name = f"cpac{name}_{self.unique_id}" @@ -1982,7 +1982,7 @@ def ingress_pipeconfig_paths(self): def create_func_datasource( self, rest_dict: dict, wf_name="func_datasource" ) -> pe.Workflow: - """Create a :py:class:`~pe.Workflow` to gather timeseries data. + """Create a :py:class:`~nipype.pipeline.engine.Workflow` to gather timeseries data. Return the functional timeseries-related file paths for each series/scan from the dictionary of functional files described in the data configuration (sublist) YAML @@ -2345,7 +2345,7 @@ def ingress_raw_anat_data(self) -> None: self.ingress_freesurfer() def connect_block(self, wf: pe.Workflow, block: NodeBlock) -> pe.Workflow: # noqa: PLR0912,PLR0915 - """Connect a :py:class:`NodeBlock` via the `ResourcePool`.""" + """Connect a :py:class:`~CPAC.pipeline.engine.nodeblock.NodeBlock` via the `ResourcePool`.""" debug = bool(self.cfg.pipeline_setup["Debugging"]["verbose"]) # type: ignore [attr-defined] all_opts: list[str] = [] @@ -2713,7 +2713,7 @@ def derivative_xfm( pipe_idx: str | tuple, pipe_x: int, ) -> pe.Workflow: - """Find the appropriate bold-to-template transform for given ``pipe_idx``.""" + """Find the appropriate bold-to-template transform for given `pipe_idx`.""" if label in self.xfm: json_info = dict(json_info) @@ -2961,7 +2961,7 @@ def get_resource_strats_from_prov(prov: list | str) -> dict[str, list | str]: def _config_lookup( self, keylist: str | list[str], fallback_type: type = NoneType ) -> Any: - """Lookup a :py:class:`Configuration` key, return `None` if not found.""" + """Lookup a :py:class:`~CPAC.utils.configuration.Configuration` key, return ``None`` if not found.""" try: return self.cfg[keylist] except (AttributeError, KeyError): @@ -2972,9 +2972,9 @@ def _get_pipe_number(self, pipe_idx: str | tuple) -> int: return self.pipe_list.index(pipe_idx) def _get_unlabelled(self, resource: str) -> set[str]: - """Get unlabelled :py:class:`Resource`\u200bs. + """Get unlabelled :py:class:`Resource` s. - These :py:class:`Resource`\u200bs need integer suffixes to differentiate. + These :py:class:`Resource` s need integer suffixes to differentiate. """ from CPAC.func_preproc.func_motion import motion_estimate_filter @@ -3022,7 +3022,7 @@ def _get_unlabelled(self, resource: str) -> set[str]: class StratPool(_Pool): - """A pool of :py:class:`ResourcePool`s keyed by strategy.""" + """A pool of :py:class:`ResourcePool` s keyed by strategy.""" def __init__( self, @@ -3042,7 +3042,7 @@ def __init__( if not isinstance(name, list): name = [name] self.name: list[str] = name - self._regressor_dct: dict + self._regressor_dct: dict = {} def append_name(self, name: str) -> None: """Append a name to the `StratPool`.""" @@ -3115,7 +3115,7 @@ def get( report_fetched: bool = False, optional: bool = False, ): - """Return a :py:class:`Resource`\u200b.""" + """Return a :py:class:`Resource` .""" return super().get(resource, pipe_idx, report_fetched, optional) @overload @@ -3148,7 +3148,7 @@ def get_json(self, resource: str) -> dict: ) def get_cpac_provenance(self, resource: list[str] | str) -> list: - """Get "CpacProvenance" for a given :py:class:`Resource`\u200b.""" + """Get "CpacProvenance" for a given :py:class:`Resource` .""" # NOTE: strat_resource has to be entered properly by the developer # it has to either be rpool[resource][strat] or strat_pool[resource] if isinstance(resource, list): @@ -3260,7 +3260,7 @@ def filtered_movement(self) -> bool: def _check_null(val: Any) -> Any: - """Return `None` if `val` == "none" (case-insensitive).""" + """Return ``None`` if `val` == "none" (case-insensitive).""" if isinstance(val, str): val = None if val.lower() == "none" else val return val diff --git a/CPAC/pipeline/schema.py b/CPAC/pipeline/schema.py index cf13a4ecc8..6dc11326d5 100644 --- a/CPAC/pipeline/schema.py +++ b/CPAC/pipeline/schema.py @@ -21,7 +21,7 @@ from itertools import chain, permutations import re from subprocess import CalledProcessError -from typing import Optional as TypeOptional +from typing import Any as TypeAny, Optional as TypeOptional import numpy as np from pathvalidate import sanitize_filename @@ -64,8 +64,8 @@ Number = Any(float, int, All(str, Match(SCIENTIFIC_NOTATION_STR_REGEX))) -def str_to_bool1_1(x: Any) -> bool: # pylint: disable=invalid-name - """Convert strings to Booleans for YAML1.1 syntax[1]_. +def str_to_bool1_1(x: TypeAny) -> bool: # pylint: disable=invalid-name + """Convert strings to Booleans for YAML1.1 syntax [1]_. References ---------- @@ -1232,7 +1232,7 @@ def schema(config_dict: dict) -> dict: Validate against the latest validation schema by first applying backwards- compatibility patches, then applying Voluptuous validation, then handling complex - configuration interaction checks before returning validated config_dict. + configuration interaction checks before returning validated `config_dict`. """ from CPAC.utils.utils import _changes_1_8_0_to_1_8_1 diff --git a/CPAC/pipeline/test/test_engine.py b/CPAC/pipeline/test/test_engine.py index e23741d2e8..f35b0599d6 100644 --- a/CPAC/pipeline/test/test_engine.py +++ b/CPAC/pipeline/test/test_engine.py @@ -46,7 +46,7 @@ def _set_up_test( def test_ingress_func_raw_data( bids_examples: Path, preconfig: str, tmp_path: Path ) -> None: - """Test :py:method:`ResourcePool.ingress_raw_func_data`\u200b.""" + """Test :py:method:`ResourcePool.ingress_raw_func_data` .""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) rpool.gather_pipes(rpool.wf, cfg, all_types=True) @@ -56,7 +56,7 @@ def test_ingress_func_raw_data( def test_ingress_anat_raw_data( bids_examples: Path, preconfig: str, tmp_path: Path ) -> None: - """Test :py:method:`ResourcePool.ingress_raw_anat_data`\u200b.""" + """Test :py:method:`ResourcePool.ingress_raw_anat_data` .""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool( cfg=cfg, @@ -70,7 +70,7 @@ def test_ingress_anat_raw_data( def test_ingress_pipeconfig_data( bids_examples: Path, preconfig: str, tmp_path: Path ) -> None: - """Test :py:method:`ResourcePool.ingress_pipeconfig_paths`\u200b.""" + """Test :py:method:`ResourcePool.ingress_pipeconfig_paths` .""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool( cfg=cfg, @@ -83,7 +83,7 @@ def test_ingress_pipeconfig_data( def test_build_anat_preproc_stack( bids_examples: Path, preconfig: str, tmp_path: Path ) -> None: - """Test :py:func:`~build_anat_preproc_stack`\u200b.""" + """Test :py:func:`~build_anat_preproc_stack` .""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) @@ -94,7 +94,7 @@ def test_build_anat_preproc_stack( @pytest.mark.parametrize("preconfig", ["default"]) def test_build_workflow(bids_examples: Path, preconfig: str, tmp_path: Path) -> None: - """Test :py:func:`~build_workflow`\u200b.""" + """Test :py:func:`~build_workflow` .""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) wf = build_workflow(sub_data_dct["subject_id"], sub_data_dct, cfg) diff --git a/CPAC/qc/xcp.py b/CPAC/qc/xcp.py index 7fd59071bf..61bb008a0e 100644 --- a/CPAC/qc/xcp.py +++ b/CPAC/qc/xcp.py @@ -67,6 +67,7 @@ import pandas as pd import nibabel as nib from nipype.interfaces import afni, fsl +from nipype.pipeline.engine import Node, Workflow from CPAC.generate_motion_statistics.generate_motion_statistics import ( DVARS_strip_t0, @@ -74,6 +75,7 @@ ) from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.pipeline.engine.nodeblock import nodeblock +from CPAC.pipeline.engine.resource import StratPool from CPAC.qc.qcmetrics import regisQ from CPAC.utils.interfaces.function import Function @@ -85,29 +87,25 @@ ] -def _connect_motion(wf, nodes, strat_pool, qc_file, pipe_num): +def _connect_motion( + wf: Workflow, nodes: dict, strat_pool: StratPool, qc_file: Node, pipe_num: int +) -> Workflow: """ Connect the motion metrics to the workflow. Parameters ---------- - wf : nipype.pipeline.engine.Workflow + wf The workflow to connect the motion metrics to. - nodes : dict + nodes Dictionary of nodes already collected from the strategy pool. - strat_pool : CPAC.pipeline.engine.ResourcePool + strat_pool The current strategy pool. - qc_file : nipype.pipeline.engine.Node - A function node with the function ``generate_xcp_qc``. - - pipe_num : int - - Returns - ------- - wf : nipype.pipeline.engine.Workflow + qc_file + A function node with the function :py:func:`generate_xcp_qc` . """ # pylint: disable=invalid-name, too-many-arguments try: diff --git a/CPAC/registration/registration.py b/CPAC/registration/registration.py index 9db3fcfd4b..3673b267cf 100644 --- a/CPAC/registration/registration.py +++ b/CPAC/registration/registration.py @@ -17,7 +17,7 @@ # pylint: disable=too-many-lines,ungrouped-imports,wrong-import-order """Workflows for registration.""" -from typing import Optional +from typing import Optional, TYPE_CHECKING from voluptuous import RequiredFieldInvalid from nipype.interfaces import afni, ants, c3, fsl, utility as util @@ -39,10 +39,14 @@ seperate_warps_list, single_ants_xfm_to_list, ) +from CPAC.utils.configuration.configuration import Configuration from CPAC.utils.interfaces import Function from CPAC.utils.interfaces.fsl import Merge as fslMerge from CPAC.utils.utils import check_prov_for_motion_tool, check_prov_for_regtool +if TYPE_CHECKING: + from CPAC.pipeline.engine.resource import StratPool + def apply_transform( wf_name, @@ -5416,8 +5420,8 @@ def warp_tissuemask_to_template(wf, cfg, strat_pool, pipe_num, xfm, template_spa def warp_resource_to_template( wf: pe.Workflow, - cfg, - strat_pool, + cfg: Configuration, + strat_pool: "StratPool", pipe_num: int, input_resource: list[str] | str, xfm: str, @@ -5428,24 +5432,24 @@ def warp_resource_to_template( Parameters ---------- - wf : pe.Workflow + wf - cfg : CPAC.utils.configuration.Configuration + cfg - strat_pool : CPAC.pipeline.engine.ResourcePool + strat_pool - pipe_num : int + pipe_num - input_resource : str or list + input_resource key for the resource to warp to template - xfm : str + xfm key for the transform to apply - reference : str, optional + reference key for reference if not using f'{template_space}-template' - time_series : boolean, optional + time_series resource to transform is 4D? Returns diff --git a/CPAC/utils/interfaces/function/function.py b/CPAC/utils/interfaces/function/function.py index 34d01373d5..2df6741717 100644 --- a/CPAC/utils/interfaces/function/function.py +++ b/CPAC/utils/interfaces/function/function.py @@ -156,28 +156,28 @@ class Function(NipypeFunction): def __init__( self, - input_names=None, - output_names="out", - function=None, - imports=None, - as_module=False, + input_names: Optional[str | list[str]] = None, + output_names: Optional[str | list[str]] = "out", + function: Optional[Callable] = None, + imports: Optional[list[str]] = None, + as_module: bool = False, **inputs, ): - """Initialize a :py:func`~CPAC.utils.interfaces.function.Function` interface. + """Initialize a :py:func:`~CPAC.utils.interfaces.function.Function` interface. Parameters ---------- - input_names : single str or list or None + input_names names corresponding to function inputs if ``None``, derive input names from function argument names - output_names : single str or list + output_names names corresponding to function outputs (default: 'out'). if list of length > 1, has to match the number of outputs - function : callable + function callable python object. must be able to execute in an - isolated namespace (possibly in concert with the ``imports`` + isolated namespace (possibly in concert with the `imports` parameter) - imports : list of strings + imports list of import statements that allow the function to execute in an otherwise empty namespace. If these collide with imports defined via the :py:meth:`Function.sig_imports` @@ -244,12 +244,11 @@ def sig_imports(imports: list[str]) -> Callable: Parameters ---------- - imports : list of str + imports import statements to import the function in an otherwise empty namespace. If these collide with imports defined via the - :py:meth:`Function.__init__` initialization method, the - imports given as a parameter here will be overridden by - those from the initializer. + :py:meth:`Function.__init__` method, the imports given as a parameter here + will be overridden by those from the initializer. Returns ------- diff --git a/CPAC/utils/strategy.py b/CPAC/utils/strategy.py index 7ce1966198..42d6848e9c 100644 --- a/CPAC/utils/strategy.py +++ b/CPAC/utils/strategy.py @@ -21,7 +21,7 @@ class Strategy: def __init__(self): - self._resource_pool = ResourcePool({}) + self._resource_pool = ResourcePool() self.leaf_node = None self.leaf_out_file = None self.name = [] From 61eeef43c383331cd6363fa088e6d45ceed6774d Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 18 Jul 2024 20:08:08 -0400 Subject: [PATCH 88/93] :bug: Fix memoization --- CPAC/pipeline/engine/resource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 5499dc44bc..0cc005e59b 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -3216,7 +3216,7 @@ def regressor_dct(self) -> dict: If regressor dictionary does not exist in current strategy. """ # pylint: disable=attribute-defined-outside-init - if hasattr(self, "_regressor_dct"): # memoized + if hasattr(self, "_regressor_dct") and self._regressor_dct: # memoized # pylint: disable=access-member-before-definition return self._regressor_dct key_error = KeyError( From 2402a2ca721fd01e33100e4c3ef6571dde3e9dc8 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 18 Jul 2024 20:22:13 -0400 Subject: [PATCH 89/93] :pencil2: A little more docstring cleanup --- CPAC/pipeline/engine/nodeblock.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CPAC/pipeline/engine/nodeblock.py b/CPAC/pipeline/engine/nodeblock.py index fc6fe59b76..e68bfbf0d2 100644 --- a/CPAC/pipeline/engine/nodeblock.py +++ b/CPAC/pipeline/engine/nodeblock.py @@ -14,7 +14,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with C-PAC. If not, see . -"""Classes and decorator for :`NodeBlock`s and :`NodeBlockFunction`s.""" +"""Classes and decorator for :py:class:`NodeBlock` s and :py:class:`NodeBlockFunction` s.""" from typing import Any, Callable, Optional, TYPE_CHECKING @@ -146,7 +146,7 @@ def __str__(self) -> str: class NodeBlock: - """A :py:class:`~nipype.pipeline.engine.Workflow` subgraph composed of :py:class:`NodeBlockFunction`s.""" + """A :py:class:`~nipype.pipeline.engine.Workflow` subgraph composed of :py:class:`NodeBlockFunction` s.""" def __init__( self, From 8d8094131ca709b3a09e7a201c03deb5e6a8c0b9 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Thu, 18 Jul 2024 21:01:35 -0400 Subject: [PATCH 90/93] :art: Qualify refs to documented functions. --- CPAC/pipeline/engine/resource.py | 2 +- CPAC/pipeline/test/test_engine.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index 0cc005e59b..fb397a525d 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -224,7 +224,7 @@ def keys(self) -> list[str]: return list(self._keys) def __contains__(self, item: Any) -> bool: - """Return ``True`` if `item` in :py:method:`~Resource.keys()`, ``False`` otherwise.""" + """Return ``True`` if `item` in :py:meth:`Resource.keys()`, ``False`` otherwise.""" return item in self.keys() def __getitem__(self, name: str) -> Any: diff --git a/CPAC/pipeline/test/test_engine.py b/CPAC/pipeline/test/test_engine.py index f35b0599d6..07e0e6e5a4 100644 --- a/CPAC/pipeline/test/test_engine.py +++ b/CPAC/pipeline/test/test_engine.py @@ -46,7 +46,7 @@ def _set_up_test( def test_ingress_func_raw_data( bids_examples: Path, preconfig: str, tmp_path: Path ) -> None: - """Test :py:method:`ResourcePool.ingress_raw_func_data` .""" + """Test :py:meth:`~CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_func_data` .""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) rpool.gather_pipes(rpool.wf, cfg, all_types=True) @@ -56,7 +56,7 @@ def test_ingress_func_raw_data( def test_ingress_anat_raw_data( bids_examples: Path, preconfig: str, tmp_path: Path ) -> None: - """Test :py:method:`ResourcePool.ingress_raw_anat_data` .""" + """Test :py:meth:`~CPAC.pipeline.engine.resource.ResourcePool.ingress_raw_anat_data` .""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool( cfg=cfg, @@ -70,7 +70,7 @@ def test_ingress_anat_raw_data( def test_ingress_pipeconfig_data( bids_examples: Path, preconfig: str, tmp_path: Path ) -> None: - """Test :py:method:`ResourcePool.ingress_pipeconfig_paths` .""" + """Test :py:meth:`~CPAC.pipeline.engine.resource.ResourcePool.ingress_pipeconfig_paths` .""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool( cfg=cfg, @@ -83,7 +83,7 @@ def test_ingress_pipeconfig_data( def test_build_anat_preproc_stack( bids_examples: Path, preconfig: str, tmp_path: Path ) -> None: - """Test :py:func:`~build_anat_preproc_stack` .""" + """Test :py:func:`~CPAC.pipeline.cpac_pipeline.build_anat_preproc_stack` .""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) @@ -94,7 +94,7 @@ def test_build_anat_preproc_stack( @pytest.mark.parametrize("preconfig", ["default"]) def test_build_workflow(bids_examples: Path, preconfig: str, tmp_path: Path) -> None: - """Test :py:func:`~build_workflow` .""" + """Test :py:func:`~CPAC.pipeline.cpac_pipeline.build_workflow` .""" cfg, sub_data_dct = _set_up_test(bids_examples, preconfig, tmp_path) rpool = ResourcePool(cfg=cfg, data_paths=sub_data_dct) wf = build_workflow(sub_data_dct["subject_id"], sub_data_dct, cfg) From 836d100324ff1e91b4cd9cf6eaac17e891471698 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Fri, 19 Jul 2024 00:50:16 -0400 Subject: [PATCH 91/93] :art: Remove duplicate imports --- CPAC/pipeline/engine/resource.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/CPAC/pipeline/engine/resource.py b/CPAC/pipeline/engine/resource.py index fb397a525d..988d4bdc04 100644 --- a/CPAC/pipeline/engine/resource.py +++ b/CPAC/pipeline/engine/resource.py @@ -30,7 +30,7 @@ from nipype.interfaces import utility as util # type: ignore [import-untyped] from nipype.interfaces.utility import Rename # type: ignore [import-untyped] -from nipype.pipeline import engine as pe +from nipype.pipeline import engine as pe # type: ignore [import-untyped] from CPAC.image_utils.spatial_smoothing import spatial_smoothing from CPAC.image_utils.statistical_transforms import ( @@ -43,6 +43,7 @@ NODEBLOCK_INPUTS, NODEBLOCK_OUTPUTS, NodeBlockFunction, + PIPELINE_BLOCKS, ) from CPAC.pipeline.utils import name_fork, source_set from CPAC.registration.registration import transform_derivative @@ -485,12 +486,6 @@ def get( class ResourcePool(_Pool): """A pool of :py:class:`Resource` s.""" - from CPAC.pipeline.engine.nodeblock import ( - NODEBLOCK_INPUTS, - NodeBlockFunction, - PIPELINE_BLOCKS, - ) - def __init__( self, name: str = "", From 0a5310884894d0bed5666ef0149e156986d90d55 Mon Sep 17 00:00:00 2001 From: Jon Clucas Date: Fri, 19 Jul 2024 01:16:36 -0400 Subject: [PATCH 92/93] :goal_net: Catch and release no-regressors --- CPAC/nuisance/nuisance.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/CPAC/nuisance/nuisance.py b/CPAC/nuisance/nuisance.py index 871d18d232..04807755b7 100644 --- a/CPAC/nuisance/nuisance.py +++ b/CPAC/nuisance/nuisance.py @@ -2363,7 +2363,7 @@ def erode_mask_WM(wf, cfg, strat_pool, pipe_num, opt=None): outputs=["desc-confounds_timeseries", "censor-indices"], ) def nuisance_regressors_generation_EPItemplate(wf, cfg, strat_pool, pipe_num, opt=None): - return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, opt, "bold") + return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, "bold") @nodeblock( @@ -2407,7 +2407,7 @@ def nuisance_regressors_generation_EPItemplate(wf, cfg, strat_pool, pipe_num, op outputs=["desc-confounds_timeseries", "censor-indices"], ) def nuisance_regressors_generation_T1w(wf, cfg, strat_pool, pipe_num, opt=None): - return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, opt, "T1w") + return nuisance_regressors_generation(wf, cfg, strat_pool, pipe_num, "T1w") def nuisance_regressors_generation( @@ -2415,10 +2415,14 @@ def nuisance_regressors_generation( cfg: Configuration, strat_pool: StratPool, pipe_num: int, - opt: dict, space: Literal["T1w", "bold"], ) -> tuple[Workflow, dict]: """Generate nuisance regressors.""" + try: + opt = strat_pool.regressor_dct + except LookupError: + # no regressors to generate + return wf, {} prefixes = [f"space-{space}_"] * 2 reg_tool = None if space == "T1w": @@ -2659,7 +2663,11 @@ def nuisance_regression(wf, cfg, strat_pool: StratPool, pipe_num, opt, space, re outputs : dict """ - opt = strat_pool.regressor_dct + try: + opt = strat_pool.regressor_dct + except LookupError: + # no regressors + return wf, {} bandpass = "Bandpass" in opt bandpass_before = ( bandpass From 5a6fa79202cd2b8b0f2ed9f50175124a18bbb235 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Jul 2024 18:22:47 +0000 Subject: [PATCH 93/93] :arrow_up: Bump setuptools from 68.0.0 to 70.0.0 Bumps [setuptools](https://github.com/pypa/setuptools) from 68.0.0 to 70.0.0. - [Release notes](https://github.com/pypa/setuptools/releases) - [Changelog](https://github.com/pypa/setuptools/blob/main/NEWS.rst) - [Commits](https://github.com/pypa/setuptools/compare/v68.0.0...v70.0.0) --- updated-dependencies: - dependency-name: setuptools dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 185f432729..58afacfa6d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,7 +27,7 @@ prov==2.0.0 psutil==5.9.5 PyBASC==0.6.1 pybids==0.15.6 -PyPEER @ https://github.com/shnizzedy/PyPEER/archive/6965d2b2bea0fef824e885fec33a8e0e6bd50a97.zip +PyPEER @ git+https://git@github.com/ChildMindInstitute/PyPEER.git@6965d2b2bea0fef824e885fec33a8e0e6bd50a97 python-dateutil==2.8.2 PyYAML==6.0 requests==2.32.0 @@ -45,7 +45,7 @@ h5py==3.8.0 importlib-metadata==6.8.0 lxml==4.9.2 pip==23.3 -setuptools<60.0 -urllib3==1.26.19 +setuptools==70.0.0 +urllib3==1.26.18 wheel==0.40.0 zipp==3.19.1