From 5dc0d025a5865e2c93e756f8895b847ab4136a8c Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Thu, 23 Sep 2021 09:51:48 +0200 Subject: [PATCH 01/24] Init traceability feature --- clinica/engine/provenance.py | 215 ++++++++++++++++++ clinica/engine/provenance_utils.py | 116 ++++++++++ clinica/pipelines/engine.py | 2 + .../spatial_svm_pipeline.py | 2 +- .../statistics_volume_correction_pipeline.py | 2 +- clinica/utils/input_files.py | 80 ++++--- clinica/utils/inputs.py | 35 ++- 7 files changed, 402 insertions(+), 50 deletions(-) create mode 100644 clinica/engine/provenance.py create mode 100644 clinica/engine/provenance_utils.py diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py new file mode 100644 index 000000000..0db9d17ff --- /dev/null +++ b/clinica/engine/provenance.py @@ -0,0 +1,215 @@ +import json +import functools +from os import read + +from pathlib import Path +from typing import Optional + + +def provenance(func): + from .provenance_utils import get_files_list + + @functools.wraps(func) + def run_wrapper(self, **kwargs): + ret = [] + pipeline_fullname = self.fullname + in_files_paths = get_files_list(self, pipeline_fullname, dict_field="input_to") + + prov_context = get_context(files_paths=in_files_paths) + prov_command = get_command(self, in_files_paths) + + if validate_command(prov_context, prov_command): + ret = func(self) + else: + raise Exception( + "The pipeline selected is incompatible with the input files provenance" + ) + out_files_paths = get_files_list( + self, pipeline_fullname, dict_field="output_from" + ) + register_prov(prov_command, out_files_paths) + + return ret + + return run_wrapper + + +def register_prov(prov_command: dict, out_files: list) -> bool: + + # TODO: iterate over out_files and create a provenance file for each + for file in out_files: + write_prov_file(prov_command, file) + print("Provenance registered succesfully") + return True + + +def get_context(files_paths: str) -> dict: + """ + Return a dictionary with the provenance info related to the files in the files_paths + """ + from clinica.engine.provenance_utils import read_prov, get_associated_prov + + prov_data = {"Entity": [], "Agent": [], "Activity": []} + for path in files_paths: + prov_record = read_prov(get_associated_prov(path)) + if prov_record: + prov_data = append_prov_dict(prov_data, prov_record) + + return prov_data + + +def get_command(self, input_files_paths: list) -> dict: + """ + Read the user command and save information in a dict + """ + import sys + + new_entities = [] + new_agent = get_agent() + for path in input_files_paths: + new_entities.append(get_entity(path)) + new_activity = get_activity(self, new_agent["@id"], new_entities) + + return { + "Agent": [new_agent], + "Activity": [new_activity], + "Entity": new_entities, + } + + +def write_prov_file(prov_command, files_paths): + """ + Write the dictionary data to the file_path + """ + from clinica.engine.provenance_utils import read_prov, get_associated_prov + + for file_path in files_paths: + prov_path = get_associated_prov(file_path) + + if prov_path.exists(): + # append the pipeline provenance information to the old provenance file + prov_main = read_prov(prov_path) + prov_main = append_prov_dict(prov_main, prov_command) + else: + print("help") + # create new provenance file with pipeline information + return "" + + +def append_prov_dict(prov_main: dict, prov_new: dict) -> dict: + """ + Append a specific prov data to the global prov dict + """ + + for k in prov_new.keys(): + for el in prov_new[k]: + if prov_main[k] and el not in prov_main[k]: + prov_main[k].append(el) + return prov_main + + +def get_agent() -> dict: + import clinica + from .provenance_utils import get_agent_id + + agent_version = clinica.__version__ + agent_label = clinica.__name__ + agent_id = get_agent_id(agent_label + agent_version) + + new_agent = {"@id": agent_id, "label": agent_label, "version": agent_version} + + return new_agent + + +def get_activity(self, agent_id: str, entities: list) -> dict: + """ + Add the current command to the list of activities + """ + import sys + from .provenance_utils import get_activity_id + + activity_parameters = self.parameters + activity_label = self.fullname + activity_id = get_activity_id(self.fullname) + activity_command = (sys.argv[1:],) + activity_agent = agent_id + activity_used_files = [e["@id"] for e in entities] + + new_activity = { + "@id": activity_id, + "label": activity_label, + "command": activity_command, + "parameters": activity_parameters, + "wasAssociatedWith": activity_agent, + "used": activity_used_files, + } + + return new_activity + + +def get_entity(img_path: str) -> dict: + """ + Add the current file to the list of entities + """ + from clinica.engine.provenance_utils import get_entity_id + from clinica.engine.provenance_utils import get_last_activity + from pathlib import Path + + entity_id = get_entity_id(img_path) + entity_label = Path(img_path).name + entity_path = img_path + entity_source = get_last_activity(img_path) + + new_entity = { + "@id": entity_id, + "label": entity_label, + "atLocation": entity_path, + "wasGeneratedBy": entity_source, + } + + return new_entity + + +def create_prov_file(command, path): + """ + Create new provenance file based on command + """ + # TODO: create a json-ld object next to the file and add it to the active prov object + return + + +def validate_command(prov_context: dict, prov_command: dict) -> bool: + """ + Check the command is valid on the data being run + """ + flag = True + new_activity_id = prov_command["Activity"][0]["@id"] + new_agent_id = prov_command["Agent"][0]["@id"] + + for entity in prov_context["Entity"]: + old_activity_id = entity["wasGeneratedBy"] + if old_activity_id: + ptr_activity = next( + item + for item in prov_context["Activity"] + if item["@id"] == old_activity_id + ) + old_agent_id = ptr_activity["wasAssociatedWith"] + flag and is_valid( + {(old_agent_id, old_activity_id): (new_agent_id, new_activity_id)} + ) + return flag + + +def is_valid(command: dict) -> bool: + valid_list = [ + { + ("clin:clinica0.5.0", "clin:adni2Bids"): ( + "clin:clinica0.5.0", + "clin:t1-linear", + ) + } + ] + if command in valid_list: + return True + return False diff --git a/clinica/engine/provenance_utils.py b/clinica/engine/provenance_utils.py new file mode 100644 index 000000000..998e68496 --- /dev/null +++ b/clinica/engine/provenance_utils.py @@ -0,0 +1,116 @@ +from typing import Union, Optional +from pathlib import Path + + +def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list: + """ + Calls clinica_file_reader with the appropriate extentions + """ + from clinica.utils.inputs import clinica_file_reader + import clinica.utils.input_files as cif + + dict_field_options = ["input_to", "output_from"] + if dict_field not in dict_field_options: + raise (f"dict_field must be one of {dict_field_options}") + + # retrieve all the data dictionaries from the input_files module + files_dicts = { + k: v + for k, v in vars(cif).items() + if isinstance(v, dict) + and dict_field in v.keys() + and pipeline_fullname in v[dict_field] + } + # TODO: check if bids or caps as output + ret_files = [] + for elem in files_dicts: + ref_dir = ( + self.bids_directory if dict_field == "input_to" else self.caps_directory + ) + current_file = clinica_file_reader( + self.subjects, + self.sessions, + ref_dir, + files_dicts[elem], + raise_exception=False, + ) + if current_file: + ret_files.extend(current_file) + + return ret_files + + +def is_entity_tracked(prov_context: dict, entity_id: str) -> bool: + flag_exists = next( + (True for item in prov_context["Entity"] if item["@id"] == entity_id), + False, + ) + return flag_exists + + +def is_agent_tracked(prov_context: dict, agent_id: str) -> bool: + flag_exists = next( + (True for item in prov_context["Agent"] if item["@id"] == agent_id), + False, + ) + return flag_exists + + +def is_activity_tracked(prov_context: dict, activity_id: str) -> bool: + flag_exists = next( + (True for item in prov_context["Activity"] if item["@id"] == activity_id), + False, + ) + return flag_exists + + +def get_entity_id(file_path: str) -> str: + from pathlib import Path + + entity_id = Path(file_path).with_suffix("").name + return entity_id + + +def get_activity_id(pipeline_name: str) -> str: + return "clin:" + pipeline_name + + +def get_agent_id(agent_name: str) -> str: + return "clin:" + agent_name + + +def get_last_activity(file_path: str) -> Optional[list]: + + """ + Return the last activity executed on the file + """ + + prov_record = read_prov(get_associated_prov(file_path)) + if prov_record and prov_record["Activity"]: + last_activity = prov_record["Activity"][-1]["@id"] + return last_activity + return None + + +def get_associated_prov(file_path: str) -> Path: + + file_path = Path(file_path) + while file_path.suffix != "": + file_path = file_path.with_suffix("") + + associated_jsonld = file_path.with_suffix(".jsonld") + return associated_jsonld + + +def read_prov(prov_path: Path) -> Optional[dict]: + """ + Check if the given file is a valid provenance json-ld + """ + import json + + # TODO: check that the provenance file associations and uses exists + if prov_path.exists(): + with open(prov_path, "r") as fp: + json_ld_data = json.load(fp) + return json_ld_data + return None diff --git a/clinica/pipelines/engine.py b/clinica/pipelines/engine.py index ec72d6f02..68e3562f7 100644 --- a/clinica/pipelines/engine.py +++ b/clinica/pipelines/engine.py @@ -7,6 +7,7 @@ import click from nipype.pipeline.engine import Workflow +import clinica.engine.provenance as prov def postset(attribute, value): @@ -234,6 +235,7 @@ def build(self): self.build_output_node() return self + @prov.provenance def run(self, plugin=None, plugin_args=None, update_hash=False, bypass_check=False): """Executes the Pipeline. diff --git a/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py b/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py index 79d4c9e17..e630bcbda 100644 --- a/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py +++ b/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py @@ -93,7 +93,7 @@ def build_input_node(self): "*_T1w_segm-graymatter_space-Ixi549Space_modulated-on_probability.nii.gz", ), "description": "graymatter tissue segmented in T1w MRI in Ixi549 space", - "needed_pipeline": "t1-volume-tissue-segmentation", + "output_from": "t1-volume-tissue-segmentation", } elif self.parameters["orig_input_data"] == "pet-volume": if not ( diff --git a/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py b/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py index b9afd5e63..3f8dffa94 100644 --- a/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py +++ b/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py @@ -39,7 +39,7 @@ def build_input_node(self): { "pattern": self.parameters["t_map"] + "*", "description": "statistics t map", - "needed_pipeline": "statistics-volume", + "output_from": "statistics-volume", }, ) diff --git a/clinica/utils/input_files.py b/clinica/utils/input_files.py index 380e2f4c9..93ce26edb 100644 --- a/clinica/utils/input_files.py +++ b/clinica/utils/input_files.py @@ -7,150 +7,154 @@ # BIDS -T1W_NII = {"pattern": "sub-*_ses-*_t1w.nii*", "description": "T1w MRI"} +T1W_NII = { + "pattern": "sub-*_ses-*_t1w.nii*", + "description": "T1w MRI", + "input_to": ["t1-linear"], +} # T1-FreeSurfer T1_FS_WM = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/wm.seg.mgz", "description": "segmentation of white matter (mri/wm.seg.mgz).", - "needed_pipeline": "t1-freesurfer", + "output_from": "t1-freesurfer", } T1_FS_BRAIN = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/brain.mgz", "description": " extracted brain from T1w MRI (mri/brain.mgz).", - "needed_pipeline": "t1-freesurfer", + "output_from": "t1-freesurfer", } T1_FS_ORIG_NU = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/orig_nu.mgz", "description": "intensity normalized volume generated after correction for" " non-uniformity in FreeSurfer (mri/orig_nu.mgz).", - "needed_pipeline": "t1-freesurfer", + "output_from": "t1-freesurfer", } T1_FS_LONG_ORIG_NU = { "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/mri/orig_nu.mgz", "description": "intensity normalized volume generated after correction for non-uniformity in FreeSurfer (orig_nu.mgz) in longitudinal", - "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal", + "output_from": "t1-freesurfer and t1-freesurfer longitudinal", } T1_FS_WM_SURF_R = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/surf/rh.white", "description": "right white matter/gray matter border surface (rh.white).", - "needed_pipeline": "t1-freesurfer", + "output_from": "t1-freesurfer", } T1_FS_LONG_SURF_R = { "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/surf/rh.white", "description": "right white matter/gray matter border surface (rh.white) generated with t1-freesurfer-longitudinal.", - "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal", + "output_from": "t1-freesurfer and t1-freesurfer longitudinal", } T1_FS_LONG_SURF_L = { "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/surf/lh.white", "description": "left white matter/gray matter border surface (lh.white) generated with t1-freesurfer-longitudinal.", - "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal", + "output_from": "t1-freesurfer and t1-freesurfer longitudinal", } T1_FS_WM_SURF_L = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/surf/lh.white", "description": "left white matter/gray matter border surface (lh.white).", - "needed_pipeline": "t1-freesurfer", + "output_from": "t1-freesurfer", } T1_FS_DESTRIEUX = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/aparc.a2009s+aseg.mgz", "description": "Destrieux-based segmentation (mri/aparc.a2009s+aseg.mgz).", - "needed_pipeline": "t1-freesurfer", + "output_from": "t1-freesurfer", } T1_FS_DESTRIEUX_PARC_L = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/lh.aparc.a2009s.annot", "description": "left hemisphere surface-based Destrieux parcellation (label/lh.aparc.a2009s.annot).", - "needed_pipeline": "t1-freesurfer", + "output_from": "t1-freesurfer", } T1_FS_LONG_DESTRIEUX_PARC_L = { "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/lh.aparc.a2009s.annot", "description": "left hemisphere surface-based Destrieux parcellation (label/lh.aparc.a2009s.annot) generated with t1-freesurfer-longitudinal.", - "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal", + "output_from": "t1-freesurfer and t1-freesurfer longitudinal", } T1_FS_LONG_DESTRIEUX_PARC_R = { "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/rh.aparc.a2009s.annot", "description": "right hemisphere surface-based Destrieux parcellation (label/rh.aparc.a2009s.annot) generated with t1-freesurfer-longitudinal.", - "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal", + "output_from": "t1-freesurfer and t1-freesurfer longitudinal", } T1_FS_DESTRIEUX_PARC_R = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/rh.aparc.a2009s.annot", "description": "right hemisphere surface-based Destrieux parcellation (label/rh.aparc.a2009s.annot).", - "needed_pipeline": "t1-freesurfer", + "output_from": "t1-freesurfer", } T1_FS_DESIKAN = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/aparc+aseg.mgz", "description": "Desikan-based segmentation (mri/aparc.a2009s+aseg.mgz).", - "needed_pipeline": "t1-freesurfer", + "output_from": "t1-freesurfer", } T1_FS_DESIKAN_PARC_L = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/lh.aparc.annot", "description": "left hemisphere surface-based Desikan parcellation (label/lh.aparc.annot).", - "needed_pipeline": "t1-freesurfer", + "output_from": "t1-freesurfer", } T1_FS_DESIKAN_PARC_R = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/rh.aparc.annot", "description": "right hemisphere surface-based Desikan parcellation (label/rh.aparc.annot).", - "needed_pipeline": "t1-freesurfer", + "output_from": "t1-freesurfer", } # T1-FreeSurfer-Template T1_FS_T_DESTRIEUX = { "pattern": "freesurfer_unbiased_template/sub-*_long-*/mri/aparc.a2009s+aseg.mgz", "description": "Destrieux-based segmentation (mri/aparc.a2009s+aseg.mgz) from unbiased template.", - "needed_pipeline": "t1-freesurfer-longitudinal or t1-freesurfer-template", + "output_from": "t1-freesurfer-longitudinal or t1-freesurfer-template", } # T1-FreeSurfer-Longitudinal-Correction T1_FS_LONG_DESIKAN_PARC_L = { "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/lh.aparc.annot", "description": "left hemisphere surface-based Desikan parcellation (label/lh.aparc.annot) generated with t1-freesurfer-longitudinal.", - "needed_pipeline": "t1-freesurfer and t1-freesurfer-longitudinal", + "output_from": "t1-freesurfer and t1-freesurfer-longitudinal", } T1_FS_LONG_DESIKAN_PARC_R = { "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/rh.aparc.annot", "description": "right hemisphere surface-based Desikan parcellation (label/rh.aparc.annot) generated with t1-freesurfer-longitudinal.", - "needed_pipeline": "t1-freesurfer and t1-freesurfer-longitudinal", + "output_from": "t1-freesurfer and t1-freesurfer-longitudinal", } T1W_LINEAR = { "pattern": "*space-MNI152NLin2009cSym_res-1x1x1_T1w.nii.gz", "description": "T1w image registered in MNI152NLin2009cSym space using t1-linear pipeline", - "needed_pipeline": "t1-linear", + "output_from": "t1-linear", } T1W_LINEAR_CROPPED = { "pattern": "*space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz", "description": "T1W Image registered using t1-linear and cropped " "(matrix size 169×208×179, 1 mm isotropic voxels)", - "needed_pipeline": "t1-linear", + "output_from": "t1-linear", } T1W_EXTENSIVE = { "pattern": "*space-Ixi549Space_desc-SkullStripped_T1w.nii.gz", "description": "T1w image skull-stripped registered in Ixi549Space space using clinicaDL preprocessing pipeline", - "needed_pipeline": "t1-extensive", + "output_from": "t1-extensive", } T1W_TO_MNI_TRANSFORM = { "pattern": "*space-MNI152NLin2009cSym_res-1x1x1_affine.mat", "description": "Transformation matrix from T1W image to MNI space using t1-linear pipeline", - "needed_pipeline": "t1-linear", + "output_from": "t1-linear", } # T1-Volume @@ -170,7 +174,7 @@ def t1_volume_native_tpm(tissue_number): f"*_*_T1w_segm-{INDEX_TISSUE_MAP[tissue_number]}_probability.nii*", ), "description": f"Tissue probability map {INDEX_TISSUE_MAP[tissue_number]} in native space", - "needed_pipeline": "t1-volume-tissue-segmentation", + "output_from": "t1-volume-tissue-segmentation", } return information @@ -189,7 +193,7 @@ def t1_volume_dartel_input_tissue(tissue_number): f"*_*_T1w_segm-{INDEX_TISSUE_MAP[tissue_number]}_dartelinput.nii*", ), "description": f"Dartel input for tissue probability map {INDEX_TISSUE_MAP[tissue_number]} from T1w MRI", - "needed_pipeline": "t1-volume-tissue-segmentation", + "output_from": "t1-volume-tissue-segmentation", } return information @@ -217,7 +221,7 @@ def t1_volume_native_tpm_in_mni(tissue_number, modulation): f"Tissue probability map {INDEX_TISSUE_MAP[tissue_number]} based on " f"native MRI in MNI space (Ixi549) {description_modulation} modulation." ), - "needed_pipeline": "t1-volume-tissue-segmentation", + "output_from": "t1-volume-tissue-segmentation", } return information @@ -245,7 +249,7 @@ def t1_volume_template_tpm_in_mni(group_label, tissue_number, modulation): f"Tissue probability map {INDEX_TISSUE_MAP[tissue_number]} based " f"on {group_label} template in MNI space (Ixi549) {description_modulation} modulation." ), - "needed_pipeline": "t1-volume", + "output_from": "t1-volume", } return information @@ -262,7 +266,7 @@ def t1_volume_deformation_to_template(group_label): f"sub-*_ses-*_T1w_target-{group_label}_transformation-forward_deformation.nii*", ), "description": f"Deformation from native space to group template {group_label} space.", - "needed_pipeline": "t1-volume-create-dartel", + "output_from": "t1-volume-create-dartel", } return information @@ -277,7 +281,7 @@ def t1_volume_i_th_iteration_group_template(group_label, i): f"group-{group_label}_iteration-{i}_template.nii*", ), "description": f"Iteration #{i} of Dartel template {group_label}", - "needed_pipeline": "t1-volume or t1-volume-create-dartel", + "output_from": "t1-volume or t1-volume-create-dartel", } return information @@ -290,7 +294,7 @@ def t1_volume_final_group_template(group_label): f"group-{group_label}", "t1", f"group-{group_label}_template.nii*" ), "description": f"T1w template file of group {group_label}", - "needed_pipeline": "t1-volume or t1-volume-create-dartel", + "output_from": "t1-volume or t1-volume-create-dartel", } return information @@ -327,25 +331,25 @@ def t1_volume_final_group_template(group_label): DWI_PREPROC_NII = { "pattern": "dwi/preprocessing/sub-*_ses-*_dwi_space-*_preproc.nii*", "description": "preprocessed DWI", - "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", + "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", } DWI_PREPROC_BRAINMASK = { "pattern": "dwi/preprocessing/sub-*_ses-*_dwi_space-*_brainmask.nii*", "description": "b0 brainmask", - "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", + "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", } DWI_PREPROC_BVEC = { "pattern": "dwi/preprocessing/sub-*_ses-*_dwi_space-*_preproc.bvec", "description": "preprocessed bvec", - "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", + "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", } DWI_PREPROC_BVAL = { "pattern": "dwi/preprocessing/*_dwi_space-*_preproc.bval", "description": "preprocessed bval", - "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", + "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", } """ PET """ @@ -411,7 +415,7 @@ def pet_volume_normalized_suvr_pet( f"{mask_description} SUVR map (using {suvr_reference_region} region) of {acq_label}-PET " f"{pvc_description} and {fwhm_description} in Ixi549Space space based on {group_label} DARTEL template" ), - "needed_pipeline": "pet-volume", + "output_from": "pet-volume", } return information @@ -433,6 +437,10 @@ def pet_linear_nii(acq_label, suvr_reference_region, uncropped_image): f"*_acq-{acq_label}_pet_space-MNI152NLin2009cSym{description}_res-1x1x1_suvr-{suvr_reference_region}_pet.nii.gz", ), "description": "", +<<<<<<< HEAD "needed_pipeline": "pet-linear", +======= + "output_from": "pet-linear", +>>>>>>> de9d4d8b (Init traceability feature) } return information diff --git a/clinica/utils/inputs.py b/clinica/utils/inputs.py index 45241122d..0d870aba1 100644 --- a/clinica/utils/inputs.py +++ b/clinica/utils/inputs.py @@ -186,10 +186,10 @@ def clinica_file_reader( sessions: list of sessions (must be same size as subjects, and must correspond ) input_directory: location of the bids or caps directory information: dictionary containing all the relevant information to look for the files. Dict must contains the - following keys : pattern, description. The optional key is: needed_pipeline + following keys : pattern, description. The optional key is: output_from pattern: define the pattern of the final file description: string to describe what the file is - needed_pipeline (optional): string describing the pipeline(s) needed to obtain the related + output_from (optional): string describing the pipeline(s) needed to obtain the related file raise_exception: if True (normal behavior), an exception is raised if errors happen. If not, we return the file list as it is @@ -215,7 +215,7 @@ def clinica_file_reader( caps_directory, {'pattern': 'freesurfer_cross_sectional/sub-*_ses-*/mri/orig_nu.mgz', 'description': 'freesurfer file orig_nu.mgz', - 'needed_pipeline': 't1-freesurfer'}) + 'output_from': 't1-freesurfer'}) gives: ['/caps/subjects/sub-ADNI011S4105/ses-M00/t1/freesurfer_cross_sectional/sub-ADNI011S4105_ses-M00/mri/orig_nu.mgz'] - You have a partial name of the file: @@ -236,7 +236,7 @@ def clinica_file_reader( caps, {'pattern': 'rh.white', 'description': 'right hemisphere of outter cortical surface.', - 'needed_pipeline': 't1-freesurfer'}) + 'output_from': 't1-freesurfer'}) the following error will arise: * More than 1 file found:: /caps/subjects/sub-ADNI011S4105/ses-M00/t1/freesurfer_cross_sectional/fsaverage/surf/rh.white @@ -266,9 +266,9 @@ def clinica_file_reader( elem in information.keys() for elem in ["pattern", "description"] ), "'information' must contain the keys 'pattern' and 'description'" assert all( - elem in ["pattern", "description", "needed_pipeline"] + elem in ["pattern", "description", "output_from", "input_to"] for elem in information.keys() - ), "'information' can only contain the keys 'pattern', 'description' and 'needed_pipeline'" + ), "'information' can only contain the keys 'pattern', 'description', 'output_from' and 'input_to'" pattern = information["pattern"] is_bids = determine_caps_or_bids(input_directory) @@ -330,6 +330,18 @@ def clinica_file_reader( for msg in error_encountered: error_message += msg if len(error_encountered) > 0 and raise_exception is True: + error_message = ( + f"Clinica encountered {len(error_encountered)} " + f"problem(s) while getting {information['description']}:\n" + ) + if "output_from" in information.keys(): + if information["output_from"]: + error_message += ( + "Please note that the following clinica pipeline(s) must " + f"have run to obtain these files: {information['output_from']}\n" + ) + for msg in error_encountered: + error_message += msg if is_bids: raise ClinicaBIDSError(error_message) else: @@ -397,10 +409,10 @@ def clinica_group_reader(caps_directory, information, raise_exception=True): Args: caps_directory: input caps directory information: dictionary containing all the relevant information to look for the files. Dict must contains the - following keys : pattern, description, needed_pipeline + following keys : pattern, description, output_from pattern: define the pattern of the final file description: string to describe what the file is - needed_pipeline (optional): string describing the pipeline needed to obtain the file beforehand + output_from (optional): string describing the pipeline needed to obtain the file beforehand raise_exception: if True (normal behavior), an exception is raised if errors happen. If not, we return the file list as it is @@ -418,9 +430,8 @@ def clinica_group_reader(caps_directory, information, raise_exception=True): information, dict ), "A dict must be provided for the argument 'dict'" assert all( - elem in information.keys() - for elem in ["pattern", "description", "needed_pipeline"] - ), "'information' must contain the keys 'pattern', 'description', 'needed_pipeline'" + elem in information.keys() for elem in ["pattern", "description", "output_from"] + ), "'information' must contain the keys 'pattern', 'description', 'output_from'" pattern = information["pattern"] # Some check on the formatting on the data @@ -446,7 +457,7 @@ def clinica_group_reader(caps_directory, information, raise_exception=True): error_string += ( f"\n\tCAPS directory: {caps_directory}\n" "Please note that the following clinica pipeline(s) must have run to obtain these files: " - f"{information['needed_pipeline']}\n" + f"{information['output_from']}\n" ) raise ClinicaCAPSError(error_string) return current_glob_found[0] From b1bc38e4ed0e222f224e64d33eaaab316b4ad052 Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Thu, 23 Sep 2021 13:04:04 +0200 Subject: [PATCH 02/24] Add function to create new provenance files --- clinica/engine/provenance.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index 0db9d17ff..f310cede6 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -19,7 +19,8 @@ def run_wrapper(self, **kwargs): prov_command = get_command(self, in_files_paths) if validate_command(prov_context, prov_command): - ret = func(self) + # ret = func(self) + print("The pipeline succesfully executed.") else: raise Exception( "The pipeline selected is incompatible with the input files provenance" @@ -77,22 +78,21 @@ def get_command(self, input_files_paths: list) -> dict: } -def write_prov_file(prov_command, files_paths): +def write_prov_file(prov_command, file_path): """ Write the dictionary data to the file_path """ from clinica.engine.provenance_utils import read_prov, get_associated_prov - for file_path in files_paths: - prov_path = get_associated_prov(file_path) + prov_path = get_associated_prov(file_path) - if prov_path.exists(): - # append the pipeline provenance information to the old provenance file - prov_main = read_prov(prov_path) - prov_main = append_prov_dict(prov_main, prov_command) - else: - print("help") - # create new provenance file with pipeline information + if prov_path.exists(): + # append the pipeline provenance information to the old provenance file + prov_main = read_prov(prov_path) + prov_main = append_prov_dict(prov_main, prov_command) + else: + create_prov_file(prov_command, prov_path) + # create new provenance file with pipeline information return "" @@ -103,7 +103,7 @@ def append_prov_dict(prov_main: dict, prov_new: dict) -> dict: for k in prov_new.keys(): for el in prov_new[k]: - if prov_main[k] and el not in prov_main[k]: + if k in prov_main.keys() and el not in prov_main[k]: prov_main[k].append(el) return prov_main @@ -170,11 +170,15 @@ def get_entity(img_path: str) -> dict: return new_entity -def create_prov_file(command, path): +def create_prov_file(prov_command, prov_path): """ Create new provenance file based on command """ - # TODO: create a json-ld object next to the file and add it to the active prov object + import json + + with open(prov_path, "w") as fp: + json.dump(prov_command, fp, indent=4) + return From 9c0ee0d49f45cbfcf35ebb4c205775bc0a5e7ba0 Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Mon, 3 Jan 2022 10:53:40 +0100 Subject: [PATCH 03/24] Update clinica_file_reader call --- clinica/engine/provenance_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clinica/engine/provenance_utils.py b/clinica/engine/provenance_utils.py index 998e68496..6c209c629 100644 --- a/clinica/engine/provenance_utils.py +++ b/clinica/engine/provenance_utils.py @@ -27,7 +27,7 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list: ref_dir = ( self.bids_directory if dict_field == "input_to" else self.caps_directory ) - current_file = clinica_file_reader( + current_file, _ = clinica_file_reader( self.subjects, self.sessions, ref_dir, From aa0cb6fe46209f93443d58d83d588de694fcaa6b Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Tue, 4 Jan 2022 17:55:39 +0100 Subject: [PATCH 04/24] Add data model --- clinica/engine/provenance_utils.py | 116 ----------------------------- 1 file changed, 116 deletions(-) delete mode 100644 clinica/engine/provenance_utils.py diff --git a/clinica/engine/provenance_utils.py b/clinica/engine/provenance_utils.py deleted file mode 100644 index 6c209c629..000000000 --- a/clinica/engine/provenance_utils.py +++ /dev/null @@ -1,116 +0,0 @@ -from typing import Union, Optional -from pathlib import Path - - -def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list: - """ - Calls clinica_file_reader with the appropriate extentions - """ - from clinica.utils.inputs import clinica_file_reader - import clinica.utils.input_files as cif - - dict_field_options = ["input_to", "output_from"] - if dict_field not in dict_field_options: - raise (f"dict_field must be one of {dict_field_options}") - - # retrieve all the data dictionaries from the input_files module - files_dicts = { - k: v - for k, v in vars(cif).items() - if isinstance(v, dict) - and dict_field in v.keys() - and pipeline_fullname in v[dict_field] - } - # TODO: check if bids or caps as output - ret_files = [] - for elem in files_dicts: - ref_dir = ( - self.bids_directory if dict_field == "input_to" else self.caps_directory - ) - current_file, _ = clinica_file_reader( - self.subjects, - self.sessions, - ref_dir, - files_dicts[elem], - raise_exception=False, - ) - if current_file: - ret_files.extend(current_file) - - return ret_files - - -def is_entity_tracked(prov_context: dict, entity_id: str) -> bool: - flag_exists = next( - (True for item in prov_context["Entity"] if item["@id"] == entity_id), - False, - ) - return flag_exists - - -def is_agent_tracked(prov_context: dict, agent_id: str) -> bool: - flag_exists = next( - (True for item in prov_context["Agent"] if item["@id"] == agent_id), - False, - ) - return flag_exists - - -def is_activity_tracked(prov_context: dict, activity_id: str) -> bool: - flag_exists = next( - (True for item in prov_context["Activity"] if item["@id"] == activity_id), - False, - ) - return flag_exists - - -def get_entity_id(file_path: str) -> str: - from pathlib import Path - - entity_id = Path(file_path).with_suffix("").name - return entity_id - - -def get_activity_id(pipeline_name: str) -> str: - return "clin:" + pipeline_name - - -def get_agent_id(agent_name: str) -> str: - return "clin:" + agent_name - - -def get_last_activity(file_path: str) -> Optional[list]: - - """ - Return the last activity executed on the file - """ - - prov_record = read_prov(get_associated_prov(file_path)) - if prov_record and prov_record["Activity"]: - last_activity = prov_record["Activity"][-1]["@id"] - return last_activity - return None - - -def get_associated_prov(file_path: str) -> Path: - - file_path = Path(file_path) - while file_path.suffix != "": - file_path = file_path.with_suffix("") - - associated_jsonld = file_path.with_suffix(".jsonld") - return associated_jsonld - - -def read_prov(prov_path: Path) -> Optional[dict]: - """ - Check if the given file is a valid provenance json-ld - """ - import json - - # TODO: check that the provenance file associations and uses exists - if prov_path.exists(): - with open(prov_path, "r") as fp: - json_ld_data = json.load(fp) - return json_ld_data - return None From 6b28410e8108b448c87d7b51f7575686a959acaf Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Tue, 4 Jan 2022 18:00:53 +0100 Subject: [PATCH 05/24] rename files --- clinica/engine/prov_model.py | 96 +++++++++++++++++++++++++++++ clinica/engine/prov_utils.py | 116 +++++++++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+) create mode 100644 clinica/engine/prov_model.py create mode 100644 clinica/engine/prov_utils.py diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py new file mode 100644 index 000000000..45e7efe59 --- /dev/null +++ b/clinica/engine/prov_model.py @@ -0,0 +1,96 @@ +from attr import define, field +import attr +import typing +from typing import Union, List +from abc import ABC, abstractmethod + + +# Define PROV abstract concepts + + +@define +class Identifier: + id: int + + +class ProvElement(ABC): + @property + @classmethod + @abstractmethod + def id(cls): + """id is required for ProvElements""" + return NotImplementedError + + @property + def attributes(cls): + """attributes are optional""" + return NotImplementedError + + +class ProvRelation(ABC): + + id: Identifier + src: ProvElement + dest: ProvElement + + +# Define PROV Types + + +@define +class ProvEntity(ProvElement): + """Provenance Entity element""" + + id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) + attributes: set + + +@define +class ProvActivity(ProvElement): + """Provenance Activity element""" + + id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) + attributes: set + + +@define +class ProvAgent(ProvElement): + """Provenance Agent element""" + + id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) + attributes: set + + +# Define PROV Relations + + +@define +class ProvGeneration(ProvRelation): + id: Identifier = field( + default=None, + validator=attr.validators.optional(attr.validators.instance_of(Identifier)), + ) + + src: ProvActivity = field( + default=None, + validator=attr.validators.optional(attr.validators.instance_of(ProvActivity)), + ) + dest: ProvEntity = field( + default=None, + validator=attr.validators.optional(attr.validators.instance_of(ProvEntity)), + ) + + # entity: an identifier (e) for a created entity; + # activity: an OPTIONAL identifier (a) for the activity that creates the entity; + # time: an OPTIONAL "generation time" (t), the time at which the entity was completely created; + # attributes: an OPTIONALa + + +@define +class ProvUsage(ProvRelation): + pass + + +@define +class ProvAssociation(ProvRelation): + pass diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py new file mode 100644 index 000000000..6c209c629 --- /dev/null +++ b/clinica/engine/prov_utils.py @@ -0,0 +1,116 @@ +from typing import Union, Optional +from pathlib import Path + + +def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list: + """ + Calls clinica_file_reader with the appropriate extentions + """ + from clinica.utils.inputs import clinica_file_reader + import clinica.utils.input_files as cif + + dict_field_options = ["input_to", "output_from"] + if dict_field not in dict_field_options: + raise (f"dict_field must be one of {dict_field_options}") + + # retrieve all the data dictionaries from the input_files module + files_dicts = { + k: v + for k, v in vars(cif).items() + if isinstance(v, dict) + and dict_field in v.keys() + and pipeline_fullname in v[dict_field] + } + # TODO: check if bids or caps as output + ret_files = [] + for elem in files_dicts: + ref_dir = ( + self.bids_directory if dict_field == "input_to" else self.caps_directory + ) + current_file, _ = clinica_file_reader( + self.subjects, + self.sessions, + ref_dir, + files_dicts[elem], + raise_exception=False, + ) + if current_file: + ret_files.extend(current_file) + + return ret_files + + +def is_entity_tracked(prov_context: dict, entity_id: str) -> bool: + flag_exists = next( + (True for item in prov_context["Entity"] if item["@id"] == entity_id), + False, + ) + return flag_exists + + +def is_agent_tracked(prov_context: dict, agent_id: str) -> bool: + flag_exists = next( + (True for item in prov_context["Agent"] if item["@id"] == agent_id), + False, + ) + return flag_exists + + +def is_activity_tracked(prov_context: dict, activity_id: str) -> bool: + flag_exists = next( + (True for item in prov_context["Activity"] if item["@id"] == activity_id), + False, + ) + return flag_exists + + +def get_entity_id(file_path: str) -> str: + from pathlib import Path + + entity_id = Path(file_path).with_suffix("").name + return entity_id + + +def get_activity_id(pipeline_name: str) -> str: + return "clin:" + pipeline_name + + +def get_agent_id(agent_name: str) -> str: + return "clin:" + agent_name + + +def get_last_activity(file_path: str) -> Optional[list]: + + """ + Return the last activity executed on the file + """ + + prov_record = read_prov(get_associated_prov(file_path)) + if prov_record and prov_record["Activity"]: + last_activity = prov_record["Activity"][-1]["@id"] + return last_activity + return None + + +def get_associated_prov(file_path: str) -> Path: + + file_path = Path(file_path) + while file_path.suffix != "": + file_path = file_path.with_suffix("") + + associated_jsonld = file_path.with_suffix(".jsonld") + return associated_jsonld + + +def read_prov(prov_path: Path) -> Optional[dict]: + """ + Check if the given file is a valid provenance json-ld + """ + import json + + # TODO: check that the provenance file associations and uses exists + if prov_path.exists(): + with open(prov_path, "r") as fp: + json_ld_data = json.load(fp) + return json_ld_data + return None From 8fdf9e74bc14f046c0b749d44d3d45c4eb72a7fb Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Tue, 11 Jan 2022 11:47:30 +0100 Subject: [PATCH 06/24] Update prov with Data Model --- clinica/engine/prov_model.py | 27 +++++- clinica/engine/prov_utils.py | 76 +++++++++------ clinica/engine/provenance.py | 177 ++++++++++++++++++++--------------- 3 files changed, 172 insertions(+), 108 deletions(-) diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py index 45e7efe59..74ef6143f 100644 --- a/clinica/engine/prov_model.py +++ b/clinica/engine/prov_model.py @@ -42,7 +42,7 @@ class ProvEntity(ProvElement): """Provenance Entity element""" id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) - attributes: set + attributes: dict @define @@ -50,7 +50,7 @@ class ProvActivity(ProvElement): """Provenance Activity element""" id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) - attributes: set + attributes: dict @define @@ -58,7 +58,7 @@ class ProvAgent(ProvElement): """Provenance Agent element""" id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) - attributes: set + attributes: dict # Define PROV Relations @@ -94,3 +94,24 @@ class ProvUsage(ProvRelation): @define class ProvAssociation(ProvRelation): pass + + +@define +class ProvEntry: + """ + A prov entry in triple form + """ + + subject: ProvElement + predicate: ProvRelation + object: ProvElement + + +@define +class ProvRecord: + """ + A provenance document containting a PROV context and a list of entries + """ + + context: dict + entries: list[ProvEntry] diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py index 6c209c629..8289405c5 100644 --- a/clinica/engine/prov_utils.py +++ b/clinica/engine/prov_utils.py @@ -1,10 +1,17 @@ -from typing import Union, Optional +from typing import Optional from pathlib import Path +from .prov_model import * -def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list: + +def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list[Path]: """ - Calls clinica_file_reader with the appropriate extentions + params: + pipeline_fullname: the current running pipeline name + dict_field: variable to specify if fetching inputs or outputs to the pipeline + + return: + list of 'Path's to the files used in the pipeline """ from clinica.utils.inputs import clinica_file_reader import clinica.utils.input_files as cif @@ -13,7 +20,8 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list: if dict_field not in dict_field_options: raise (f"dict_field must be one of {dict_field_options}") - # retrieve all the data dictionaries from the input_files module + # Retrieve all the data dict from the input_files module + files_dicts = { k: v for k, v in vars(cif).items() @@ -22,6 +30,7 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list: and pipeline_fullname in v[dict_field] } # TODO: check if bids or caps as output + ret_files = [] for elem in files_dicts: ref_dir = ( @@ -35,7 +44,7 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list: raise_exception=False, ) if current_file: - ret_files.extend(current_file) + ret_files.extend(Path(current_file)) return ret_files @@ -64,53 +73,62 @@ def is_activity_tracked(prov_context: dict, activity_id: str) -> bool: return flag_exists -def get_entity_id(file_path: str) -> str: - from pathlib import Path - - entity_id = Path(file_path).with_suffix("").name - return entity_id +def get_entity_id(path_file: Path) -> str: + id = Identifier + id.id = path_file.with_suffix("").name + return id -def get_activity_id(pipeline_name: str) -> str: - return "clin:" + pipeline_name +def get_activity_id(pipeline_name: str) -> Identifier: + id = Identifier + id.id = "clin:" + pipeline_name + return id -def get_agent_id(agent_name: str) -> str: - return "clin:" + agent_name +def get_agent_id(agent_current: ProvAgent) -> Identifier: + id = Identifier + id.id = "clin:" + agent_current.attributes["label"] + return id -def get_last_activity(file_path: str) -> Optional[list]: +def get_last_activity(path_entity: Path) -> Optional[ProvActivity]: """ Return the last activity executed on the file """ - prov_record = read_prov(get_associated_prov(file_path)) - if prov_record and prov_record["Activity"]: - last_activity = prov_record["Activity"][-1]["@id"] + prov_record = read_prov_jsonld(get_path_prov(path_entity)) + if prov_record and prov_record.entries: + last_activity = prov_record.entries[-1]["@id"] return last_activity return None -def get_associated_prov(file_path: str) -> Path: +def get_path_prov(path_entity: Path) -> Path: + """ + return: Path of the provenance file associated with an entity + """ - file_path = Path(file_path) - while file_path.suffix != "": - file_path = file_path.with_suffix("") + while path_entity.suffix != "": + path_entity = path_entity.with_suffix("") - associated_jsonld = file_path.with_suffix(".jsonld") - return associated_jsonld + path_prov = path_entity.with_suffix(".jsonld") + return path_prov -def read_prov(prov_path: Path) -> Optional[dict]: +def read_prov_jsonld(path_prov: Path) -> Optional[ProvRecord]: """ - Check if the given file is a valid provenance json-ld + return: ProvRecord in a specific location stored in jsonld format """ import json + prov_record = ProvRecord() + # TODO: check that the provenance file associations and uses exists - if prov_path.exists(): - with open(prov_path, "r") as fp: + if path_prov.exists(): + with open(path_prov, "r") as fp: json_ld_data = json.load(fp) - return json_ld_data + prov_record.records = json_ld_data["records"] + return prov_record + return None diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index f310cede6..94fe4b984 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -5,98 +5,129 @@ from pathlib import Path from typing import Optional +from clinica.engine.prov_utils import read_prov_jsonld + +from .prov_model import * + def provenance(func): - from .provenance_utils import get_files_list + from .prov_utils import get_files_list @functools.wraps(func) def run_wrapper(self, **kwargs): ret = [] pipeline_fullname = self.fullname - in_files_paths = get_files_list(self, pipeline_fullname, dict_field="input_to") + paths_input_files = get_files_list( + self, pipeline_fullname, dict_field="input_to" + ) - prov_context = get_context(files_paths=in_files_paths) - prov_command = get_command(self, in_files_paths) + record_history = get_history(paths_files=paths_input_files) + entries_current = get_command(self, paths_input_files) - if validate_command(prov_context, prov_command): + if validate_command(record_history, entries_current): # ret = func(self) print("The pipeline succesfully executed.") else: raise Exception( "The pipeline selected is incompatible with the input files provenance" ) - out_files_paths = get_files_list( + paths_out_files = get_files_list( self, pipeline_fullname, dict_field="output_from" ) - register_prov(prov_command, out_files_paths) + register_prov(entries_current, paths_out_files) return ret return run_wrapper -def register_prov(prov_command: dict, out_files: list) -> bool: +def register_prov(entries_current: list[ProvEntry], out_files: Path) -> None: # TODO: iterate over out_files and create a provenance file for each + for file in out_files: - write_prov_file(prov_command, file) + write_prov_file(entries_current, file) print("Provenance registered succesfully") return True -def get_context(files_paths: str) -> dict: +def get_history(paths_files: list[Path]) -> ProvRecord: """ - Return a dictionary with the provenance info related to the files in the files_paths + return: + a ProvRecord for the associated files in path_files """ - from clinica.engine.provenance_utils import read_prov, get_associated_prov - prov_data = {"Entity": [], "Agent": [], "Activity": []} - for path in files_paths: - prov_record = read_prov(get_associated_prov(path)) + from .prov_utils import read_prov_jsonld, get_path_prov + + prov_record = ProvRecord + + for path in paths_files: + prov_record_tmp = read_prov_jsonld(get_path_prov(path)) if prov_record: - prov_data = append_prov_dict(prov_data, prov_record) + prov_record.entries.extend(prov_record_tmp.entries) - return prov_data + return prov_record -def get_command(self, input_files_paths: list) -> dict: +def get_command(self, paths_inputs: list[Path]) -> ProvEntry: """ - Read the user command and save information in a dict + params: + paths_inputs: list of input entries paths + return: + ProvEntry associated with the launched pipeline """ import sys - new_entities = [] + entries_command = [] + new_agent = get_agent() - for path in input_files_paths: - new_entities.append(get_entity(path)) + + new_entities = [] + + for path in paths_inputs: + entity_curr = get_entity(path) + new_entities.append(entity_curr) + new_activity = get_activity(self, new_agent["@id"], new_entities) - return { - "Agent": [new_agent], - "Activity": [new_activity], - "Entity": new_entities, - } + entry_curr = ProvEntry + entry_curr.subject = new_agent + entry_curr.predicate = ProvAssociation + entry_curr.object = new_activity + # TODO create several entries from this information -def write_prov_file(prov_command, file_path): + entries_command.append(entry_curr) + + return entries_command + + +def write_prov_file( + list_prov_entries: list, path_entity: Path, overwrite=False +) -> None: """ - Write the dictionary data to the file_path + Append the current provenance info to the prov file. If it does not exist, create new + + params: + prov_entries: list of ProvEntry + entity_path: path of the prov-associated element """ - from clinica.engine.provenance_utils import read_prov, get_associated_prov - prov_path = get_associated_prov(file_path) + from .prov_utils import read_prov_jsonld, get_path_prov + + prov_path = get_path_prov(path_entity) if prov_path.exists(): # append the pipeline provenance information to the old provenance file - prov_main = read_prov(prov_path) - prov_main = append_prov_dict(prov_main, prov_command) + prov_record = read_prov_jsonld(prov_path) + prov_record.extend(list_prov_entries) else: - create_prov_file(prov_command, prov_path) + create_prov_file(list_prov_entries, prov_path) # create new provenance file with pipeline information - return "" + return -def append_prov_dict(prov_main: dict, prov_new: dict) -> dict: +def extend_prov(prov_main: dict, prov_new: dict) -> dict: """ Append a specific prov data to the global prov dict """ @@ -108,64 +139,58 @@ def append_prov_dict(prov_main: dict, prov_new: dict) -> dict: return prov_main -def get_agent() -> dict: +def get_agent() -> ProvAgent: import clinica - from .provenance_utils import get_agent_id + from .prov_utils import get_agent_id - agent_version = clinica.__version__ - agent_label = clinica.__name__ - agent_id = get_agent_id(agent_label + agent_version) + new_agent = ProvAgent() - new_agent = {"@id": agent_id, "label": agent_label, "version": agent_version} + new_agent.attributes["version"] = clinica.__version__ + new_agent.attributes["label"] = clinica.__name__ + new_agent.id = get_agent_id(new_agent) return new_agent -def get_activity(self, agent_id: str, entities: list) -> dict: +def get_activity( + self, agent_id: Identifier, entities: list[ProvEntity] +) -> ProvActivity: """ - Add the current command to the list of activities + return + ProvActivity from related entities and associated agent """ import sys - from .provenance_utils import get_activity_id - - activity_parameters = self.parameters - activity_label = self.fullname - activity_id = get_activity_id(self.fullname) - activity_command = (sys.argv[1:],) - activity_agent = agent_id - activity_used_files = [e["@id"] for e in entities] - - new_activity = { - "@id": activity_id, - "label": activity_label, - "command": activity_command, - "parameters": activity_parameters, - "wasAssociatedWith": activity_agent, - "used": activity_used_files, - } + from .prov_utils import get_activity_id + + new_activity = ProvActivity + + new_activity.attributes["parameters"] = self.parameters + new_activity.attributes["label"] = self.fullname + new_activity.id = get_activity_id(self.fullname) + new_activity.attributes["command"] = (sys.argv[1:],) + + # TODO include related agent and entity to the activity + # activity_agent = agent_id + # activity_used_files = [e["@id"] for e in entities] return new_activity -def get_entity(img_path: str) -> dict: +def get_entity(path_curr: Path) -> ProvEntity: """ - Add the current file to the list of entities + return an Entity object from the file in path_curr """ - from clinica.engine.provenance_utils import get_entity_id - from clinica.engine.provenance_utils import get_last_activity - from pathlib import Path - entity_id = get_entity_id(img_path) - entity_label = Path(img_path).name - entity_path = img_path - entity_source = get_last_activity(img_path) + from clinica.engine.prov_utils import get_entity_id + + new_entity = ProvEntity() + + new_entity.id = get_entity_id(path_curr) + new_entity.attributes["label"] = path_curr.name + new_entity.attributes["path"] = path_curr - new_entity = { - "@id": entity_id, - "label": entity_label, - "atLocation": entity_path, - "wasGeneratedBy": entity_source, - } + # TODO: implement function to return the latest associated activity + # new_entity.attributes["wasGeneratedBy"] = get_last_activity(path_curr) return new_entity From 47f9ce591e9b694e9c69397ac3275aad901d3928 Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Tue, 18 Jan 2022 11:33:52 +0100 Subject: [PATCH 07/24] Fix typing with list --- clinica/engine/prov_model.py | 2 +- clinica/engine/prov_utils.py | 4 ++-- clinica/engine/provenance.py | 11 +++++------ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py index 74ef6143f..048aed522 100644 --- a/clinica/engine/prov_model.py +++ b/clinica/engine/prov_model.py @@ -114,4 +114,4 @@ class ProvRecord: """ context: dict - entries: list[ProvEntry] + entries: List[ProvEntry] diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py index 8289405c5..9d74ab394 100644 --- a/clinica/engine/prov_utils.py +++ b/clinica/engine/prov_utils.py @@ -1,10 +1,10 @@ -from typing import Optional +from typing import Optional, List from pathlib import Path from .prov_model import * -def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list[Path]: +def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[Path]: """ params: pipeline_fullname: the current running pipeline name diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index 94fe4b984..c97edbc80 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -1,9 +1,8 @@ -import json import functools from os import read from pathlib import Path -from typing import Optional +from typing import Optional, List from clinica.engine.prov_utils import read_prov_jsonld @@ -41,7 +40,7 @@ def run_wrapper(self, **kwargs): return run_wrapper -def register_prov(entries_current: list[ProvEntry], out_files: Path) -> None: +def register_prov(entries_current: List[ProvEntry], out_files: Path) -> None: # TODO: iterate over out_files and create a provenance file for each @@ -51,7 +50,7 @@ def register_prov(entries_current: list[ProvEntry], out_files: Path) -> None: return True -def get_history(paths_files: list[Path]) -> ProvRecord: +def get_history(paths_files: List[Path]) -> ProvRecord: """ return: a ProvRecord for the associated files in path_files @@ -69,7 +68,7 @@ def get_history(paths_files: list[Path]) -> ProvRecord: return prov_record -def get_command(self, paths_inputs: list[Path]) -> ProvEntry: +def get_command(self, paths_inputs: List[Path]) -> ProvEntry: """ params: paths_inputs: list of input entries paths @@ -153,7 +152,7 @@ def get_agent() -> ProvAgent: def get_activity( - self, agent_id: Identifier, entities: list[ProvEntity] + self, agent_id: Identifier, entities: List[ProvEntity] ) -> ProvActivity: """ return From a3ef086acfc8a990652a5f1f73f7fe4f7d25d0db Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Fri, 21 Jan 2022 09:54:26 +0100 Subject: [PATCH 08/24] Fix various issues --- clinica/engine/prov_model.py | 57 ++++++++++++++++++++++++++++-------- clinica/engine/prov_utils.py | 37 +++++++++++++---------- clinica/engine/provenance.py | 15 ++++++---- 3 files changed, 75 insertions(+), 34 deletions(-) diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py index 048aed522..db3144ffd 100644 --- a/clinica/engine/prov_model.py +++ b/clinica/engine/prov_model.py @@ -8,9 +8,19 @@ # Define PROV abstract concepts +@define +class ProvContext: + label: str + link: str + + @define class Identifier: - id: int + seed: int = field() + label: str = field( + default=None, + validator=attr.validators.optional(attr.validators.instance_of(str)), + ) class ProvElement(ABC): @@ -41,24 +51,42 @@ class ProvRelation(ABC): class ProvEntity(ProvElement): """Provenance Entity element""" - id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) - attributes: dict + id: Identifier = field( + init=False, validator=[attr.validators.instance_of(Identifier)] + ) + attributes: dict = field(default={}) + + def __attrs_post_init__(self): + self.id = Identifier(seed=0) @define class ProvActivity(ProvElement): """Provenance Activity element""" - id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) - attributes: dict + id: Identifier = field( + init=False, validator=[attr.validators.instance_of(Identifier)] + ) + attributes: dict = field(default={}) + + def __attrs_post_init__(self): + self.id = Identifier(seed=0) @define class ProvAgent(ProvElement): """Provenance Agent element""" - id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) - attributes: dict + id: Identifier = field( + init=False, validator=[attr.validators.instance_of(Identifier)] + ) + attributes: dict = field( + default={}, + validator=attr.validators.optional(attr.validators.instance_of(dict)), + ) + + def __attrs_post_init__(self): + self.id = Identifier(seed=0) # Define PROV Relations @@ -67,19 +95,24 @@ class ProvAgent(ProvElement): @define class ProvGeneration(ProvRelation): id: Identifier = field( - default=None, + init=False, validator=attr.validators.optional(attr.validators.instance_of(Identifier)), ) src: ProvActivity = field( - default=None, + init=False, validator=attr.validators.optional(attr.validators.instance_of(ProvActivity)), ) dest: ProvEntity = field( - default=None, + init=False, validator=attr.validators.optional(attr.validators.instance_of(ProvEntity)), ) + def __attrs_post_init__(self): + self.id = Identifier(seed=0) + self.src = ProvActivity() + self.dest = ProvEntity() + # entity: an identifier (e) for a created entity; # activity: an OPTIONAL identifier (a) for the activity that creates the entity; # time: an OPTIONAL "generation time" (t), the time at which the entity was completely created; @@ -113,5 +146,5 @@ class ProvRecord: A provenance document containting a PROV context and a list of entries """ - context: dict - entries: List[ProvEntry] + context: dict = field(default={}) + entries: List[ProvEntry] = field(default=[]) diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py index 9d74ab394..7b24204ff 100644 --- a/clinica/engine/prov_utils.py +++ b/clinica/engine/prov_utils.py @@ -10,8 +10,7 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[ pipeline_fullname: the current running pipeline name dict_field: variable to specify if fetching inputs or outputs to the pipeline - return: - list of 'Path's to the files used in the pipeline + return list of 'Path's to the files used in the pipeline """ from clinica.utils.inputs import clinica_file_reader import clinica.utils.input_files as cif @@ -44,7 +43,7 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[ raise_exception=False, ) if current_file: - ret_files.extend(Path(current_file)) + ret_files.extend([Path(x) for x in current_file]) return ret_files @@ -74,27 +73,27 @@ def is_activity_tracked(prov_context: dict, activity_id: str) -> bool: def get_entity_id(path_file: Path) -> str: - id = Identifier - id.id = path_file.with_suffix("").name + id = Identifier(seed=0) + id.label = path_file.with_suffix("").name return id def get_activity_id(pipeline_name: str) -> Identifier: - id = Identifier - id.id = "clin:" + pipeline_name + id = Identifier(seed=0) + id.label = "clin:" + pipeline_name return id def get_agent_id(agent_current: ProvAgent) -> Identifier: - id = Identifier - id.id = "clin:" + agent_current.attributes["label"] + id = Identifier(seed=0) + id.label = "clin:" + agent_current.attributes["label"] return id def get_last_activity(path_entity: Path) -> Optional[ProvActivity]: """ - Return the last activity executed on the file + return the last activity executed on the file """ prov_record = read_prov_jsonld(get_path_prov(path_entity)) @@ -120,15 +119,21 @@ def read_prov_jsonld(path_prov: Path) -> Optional[ProvRecord]: """ return: ProvRecord in a specific location stored in jsonld format """ - import json - prov_record = ProvRecord() - - # TODO: check that the provenance file associations and uses exists if path_prov.exists(): with open(path_prov, "r") as fp: - json_ld_data = json.load(fp) - prov_record.records = json_ld_data["records"] + + prov_record = deserialize_jsonld(fp) return prov_record return None + + +def deserialize_jsonld(fp_jsonld) -> List[ProvEntry]: + """ + params: + + return list of ProvEntry objects from jsonld dictionary data + """ + + return [] diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index c97edbc80..874733d78 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -58,11 +58,11 @@ def get_history(paths_files: List[Path]) -> ProvRecord: from .prov_utils import read_prov_jsonld, get_path_prov - prov_record = ProvRecord + prov_record = ProvRecord({}, []) for path in paths_files: prov_record_tmp = read_prov_jsonld(get_path_prov(path)) - if prov_record: + if prov_record_tmp: prov_record.entries.extend(prov_record_tmp.entries) return prov_record @@ -87,11 +87,11 @@ def get_command(self, paths_inputs: List[Path]) -> ProvEntry: entity_curr = get_entity(path) new_entities.append(entity_curr) - new_activity = get_activity(self, new_agent["@id"], new_entities) + new_activity = get_activity(self, new_agent.id, new_entities) entry_curr = ProvEntry entry_curr.subject = new_agent - entry_curr.predicate = ProvAssociation + entry_curr.predicate = ProvAssociation() entry_curr.object = new_activity # TODO create several entries from this information @@ -161,7 +161,7 @@ def get_activity( import sys from .prov_utils import get_activity_id - new_activity = ProvActivity + new_activity = ProvActivity() new_activity.attributes["parameters"] = self.parameters new_activity.attributes["label"] = self.fullname @@ -206,11 +206,14 @@ def create_prov_file(prov_command, prov_path): return -def validate_command(prov_context: dict, prov_command: dict) -> bool: +def validate_command( + prov_context: ProvRecord, prov_command: List[Optional[ProvEntry]] +) -> bool: """ Check the command is valid on the data being run """ flag = True + prov_subject = prov_command[0].subject new_activity_id = prov_command["Activity"][0]["@id"] new_agent_id = prov_command["Agent"][0]["@id"] From bd4636bdaa0c61db3ee2397c9dff22b395f3a22a Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Thu, 27 Jan 2022 15:00:14 +0100 Subject: [PATCH 09/24] Rename prov extraction functions --- clinica/engine/provenance.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index 874733d78..853f54c75 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -20,11 +20,11 @@ def run_wrapper(self, **kwargs): self, pipeline_fullname, dict_field="input_to" ) - record_history = get_history(paths_files=paths_input_files) - entries_current = get_command(self, paths_input_files) + prov_record = get_prov_record(paths_files=paths_input_files) + prov_entry = get_pipeline_entry(self, paths_input_files) - if validate_command(record_history, entries_current): - # ret = func(self) + if validate_command(prov_record, prov_entry): + ret = func(self) print("The pipeline succesfully executed.") else: raise Exception( @@ -33,7 +33,7 @@ def run_wrapper(self, **kwargs): paths_out_files = get_files_list( self, pipeline_fullname, dict_field="output_from" ) - register_prov(entries_current, paths_out_files) + register_prov(prov_entry, paths_out_files) return ret @@ -50,7 +50,7 @@ def register_prov(entries_current: List[ProvEntry], out_files: Path) -> None: return True -def get_history(paths_files: List[Path]) -> ProvRecord: +def get_prov_record(paths_files: List[Path]) -> ProvRecord: """ return: a ProvRecord for the associated files in path_files @@ -68,7 +68,7 @@ def get_history(paths_files: List[Path]) -> ProvRecord: return prov_record -def get_command(self, paths_inputs: List[Path]) -> ProvEntry: +def get_pipeline_entry(self, paths_inputs: List[Path]) -> ProvEntry: """ params: paths_inputs: list of input entries paths @@ -94,8 +94,6 @@ def get_command(self, paths_inputs: List[Path]) -> ProvEntry: entry_curr.predicate = ProvAssociation() entry_curr.object = new_activity - # TODO create several entries from this information - entries_command.append(entry_curr) return entries_command From 5dc84751b47346f9e875fa014889623a5884242e Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Mon, 31 Jan 2022 09:11:47 +0100 Subject: [PATCH 10/24] Deserialize json-ld --- clinica/engine/prov_model.py | 36 +++++++------------- clinica/engine/prov_utils.py | 64 ++++++++++++++++++++++++++++-------- clinica/engine/provenance.py | 53 +++++++++-------------------- 3 files changed, 78 insertions(+), 75 deletions(-) diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py index db3144ffd..a462c16fc 100644 --- a/clinica/engine/prov_model.py +++ b/clinica/engine/prov_model.py @@ -10,15 +10,18 @@ @define class ProvContext: - label: str - link: str + _namespaces: list + + +@define +class Namespace: + id: str + uri: str @define class Identifier: - seed: int = field() label: str = field( - default=None, validator=attr.validators.optional(attr.validators.instance_of(str)), ) @@ -51,43 +54,28 @@ class ProvRelation(ABC): class ProvEntity(ProvElement): """Provenance Entity element""" - id: Identifier = field( - init=False, validator=[attr.validators.instance_of(Identifier)] - ) + id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) attributes: dict = field(default={}) - def __attrs_post_init__(self): - self.id = Identifier(seed=0) - @define class ProvActivity(ProvElement): """Provenance Activity element""" - id: Identifier = field( - init=False, validator=[attr.validators.instance_of(Identifier)] - ) + id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) attributes: dict = field(default={}) - def __attrs_post_init__(self): - self.id = Identifier(seed=0) - @define class ProvAgent(ProvElement): """Provenance Agent element""" - id: Identifier = field( - init=False, validator=[attr.validators.instance_of(Identifier)] - ) + id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) attributes: dict = field( default={}, validator=attr.validators.optional(attr.validators.instance_of(dict)), ) - def __attrs_post_init__(self): - self.id = Identifier(seed=0) - # Define PROV Relations @@ -109,7 +97,7 @@ class ProvGeneration(ProvRelation): ) def __attrs_post_init__(self): - self.id = Identifier(seed=0) + self.id = Identifier(label="") self.src = ProvActivity() self.dest = ProvEntity() @@ -146,5 +134,5 @@ class ProvRecord: A provenance document containting a PROV context and a list of entries """ - context: dict = field(default={}) + context: ProvContext = field() entries: List[ProvEntry] = field(default=[]) diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py index 7b24204ff..f32f3bafd 100644 --- a/clinica/engine/prov_utils.py +++ b/clinica/engine/prov_utils.py @@ -73,20 +73,17 @@ def is_activity_tracked(prov_context: dict, activity_id: str) -> bool: def get_entity_id(path_file: Path) -> str: - id = Identifier(seed=0) - id.label = path_file.with_suffix("").name + id = Identifier(label=path_file.with_suffix("").name) return id def get_activity_id(pipeline_name: str) -> Identifier: - id = Identifier(seed=0) - id.label = "clin:" + pipeline_name + id = Identifier(label="clin:" + pipeline_name) return id -def get_agent_id(agent_current: ProvAgent) -> Identifier: - id = Identifier(seed=0) - id.label = "clin:" + agent_current.attributes["label"] +def get_agent_id() -> Identifier: + id = Identifier(label="RRID:Clinica") return id @@ -121,19 +118,60 @@ def read_prov_jsonld(path_prov: Path) -> Optional[ProvRecord]: """ if path_prov.exists(): - with open(path_prov, "r") as fp: - - prov_record = deserialize_jsonld(fp) - return prov_record + elements, prov_record = deserialize_jsonld(path_prov) + return prov_record return None -def deserialize_jsonld(fp_jsonld) -> List[ProvEntry]: +def deserialize_jsonld(path_prov) -> ProvRecord: """ params: return list of ProvEntry objects from jsonld dictionary data """ - return [] + import rdflib + + g = rdflib.Graph(identifier="prov_graph_records") + g.parse(path_prov, format="json-ld") + + elements = {} + entries = [] + + # fetch context: + context = ProvContext([]) + for lbl, link in g.namespace_manager.namespaces(): + namespace = Namespace(lbl, link.n3()) + context._namespaces.append(namespace) + + for s, p, o in g: + + if str(p) == "http://www.w3.org/ns/prov#Activity": + id = Identifier(label=g.namespace_manager.qname(o)) + elements[id.label] = ProvActivity(id) + + elif str(p) == "http://www.w3.org/ns/prov#Agent": + id = Identifier(label=g.namespace_manager.qname(o)) + elements[id.label] = ProvAgent(id) + + elif str(p) == "http://www.w3.org/ns/prov#Entity": + id = Identifier(label=g.namespace_manager.qname(o)) + elements[id.label] = ProvEntity(id) + + for s, p, o in g: + if type(s) != rdflib.term.BNode: + attr = g.namespace_manager.qname(p).split(":")[1] + + subj = elements[g.namespace_manager.qname(s)] + subj.attributes[attr] = str(o) + + curr_entry = ProvEntry( + subject=g.namespace_manager.qname(s), predicate=attr, object=o + ) + + entries.append(curr_entry) + + prov_rec = ProvRecord(context=context, entries=entries) + + return elements, prov_rec diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index 853f54c75..7c36272c9 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -40,7 +40,7 @@ def run_wrapper(self, **kwargs): return run_wrapper -def register_prov(entries_current: List[ProvEntry], out_files: Path) -> None: +def register_prov(entries_current: ProvRecord, out_files: Path) -> None: # TODO: iterate over out_files and create a provenance file for each @@ -87,16 +87,14 @@ def get_pipeline_entry(self, paths_inputs: List[Path]) -> ProvEntry: entity_curr = get_entity(path) new_entities.append(entity_curr) - new_activity = get_activity(self, new_agent.id, new_entities) + new_activity = get_activity(self, new_agent, new_entities) entry_curr = ProvEntry entry_curr.subject = new_agent entry_curr.predicate = ProvAssociation() entry_curr.object = new_activity - entries_command.append(entry_curr) - - return entries_command + return entry_curr def write_prov_file( @@ -140,18 +138,15 @@ def get_agent() -> ProvAgent: import clinica from .prov_utils import get_agent_id - new_agent = ProvAgent() + new_agent = ProvAgent(id=get_agent_id()) new_agent.attributes["version"] = clinica.__version__ new_agent.attributes["label"] = clinica.__name__ - new_agent.id = get_agent_id(new_agent) return new_agent -def get_activity( - self, agent_id: Identifier, entities: List[ProvEntity] -) -> ProvActivity: +def get_activity(self, agent: Identifier, entities: List[ProvEntity]) -> ProvActivity: """ return ProvActivity from related entities and associated agent @@ -159,16 +154,13 @@ def get_activity( import sys from .prov_utils import get_activity_id - new_activity = ProvActivity() + new_activity = ProvActivity(id=get_activity_id(self.fullname)) new_activity.attributes["parameters"] = self.parameters new_activity.attributes["label"] = self.fullname - new_activity.id = get_activity_id(self.fullname) new_activity.attributes["command"] = (sys.argv[1:],) - - # TODO include related agent and entity to the activity - # activity_agent = agent_id - # activity_used_files = [e["@id"] for e in entities] + new_activity.attributes["used"] = [x.id for x in entities] + new_activity.attributes["wasAssociatedWith"] = agent.id return new_activity @@ -180,9 +172,7 @@ def get_entity(path_curr: Path) -> ProvEntity: from clinica.engine.prov_utils import get_entity_id - new_entity = ProvEntity() - - new_entity.id = get_entity_id(path_curr) + new_entity = ProvEntity(id=get_entity_id(path_curr)) new_entity.attributes["label"] = path_curr.name new_entity.attributes["path"] = path_curr @@ -204,29 +194,16 @@ def create_prov_file(prov_command, prov_path): return -def validate_command( - prov_context: ProvRecord, prov_command: List[Optional[ProvEntry]] -) -> bool: +def validate_command(prov_record: ProvRecord, prov_entry: ProvEntry) -> bool: """ Check the command is valid on the data being run """ flag = True - prov_subject = prov_command[0].subject - new_activity_id = prov_command["Activity"][0]["@id"] - new_agent_id = prov_command["Agent"][0]["@id"] - - for entity in prov_context["Entity"]: - old_activity_id = entity["wasGeneratedBy"] - if old_activity_id: - ptr_activity = next( - item - for item in prov_context["Activity"] - if item["@id"] == old_activity_id - ) - old_agent_id = ptr_activity["wasAssociatedWith"] - flag and is_valid( - {(old_agent_id, old_activity_id): (new_agent_id, new_activity_id)} - ) + + for entry in prov_record.entries: + # TODO: check that the record entries are compatible with the current entry + flag = True + return flag From 8f384c5dda25ebde550061fdb4a3171e0aa9807b Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Wed, 2 Feb 2022 18:05:39 +0100 Subject: [PATCH 11/24] Update record data model and serialization/deserialization --- clinica/engine/prov_model.py | 29 +++++++++++++++++++++++++++-- clinica/engine/prov_utils.py | 15 +++++++-------- clinica/engine/provenance.py | 29 ++++++++++++++--------------- 3 files changed, 48 insertions(+), 25 deletions(-) diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py index a462c16fc..155e5b140 100644 --- a/clinica/engine/prov_model.py +++ b/clinica/engine/prov_model.py @@ -1,9 +1,12 @@ +from xml.dom.minidom import Element from attr import define, field import attr -import typing +import cattr from typing import Union, List from abc import ABC, abstractmethod +from matplotlib.style import context + # Define PROV abstract concepts @@ -39,6 +42,10 @@ def attributes(cls): """attributes are optional""" return NotImplementedError + @classmethod + def get_type(cls): + return type(cls).__name__ + class ProvRelation(ABC): @@ -135,4 +142,22 @@ class ProvRecord: """ context: ProvContext = field() - entries: List[ProvEntry] = field(default=[]) + elements: List[ProvElement] = field(default=[]) + + def __getitem__(self, idx): + for element in self.elements: + if element.id == idx: + return element + + def to_json(self): + json_dict = {} + json_dict["prov:Agent"] = [ + cattr.unstructure(x) for x in self.elements if isinstance(x, ProvAgent) + ] + json_dict["prov:Activity"] = [ + cattr.unstructure(x) for x in self.elements if isinstance(x, ProvActivity) + ] + json_dict["prov:Entity"] = [ + cattr.unstructure(x) for x in self.elements if isinstance(x, ProvEntity) + ] + return json_dict diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py index f32f3bafd..322d1fc45 100644 --- a/clinica/engine/prov_utils.py +++ b/clinica/engine/prov_utils.py @@ -118,7 +118,7 @@ def read_prov_jsonld(path_prov: Path) -> Optional[ProvRecord]: """ if path_prov.exists(): - elements, prov_record = deserialize_jsonld(path_prov) + prov_record = deserialize_jsonld(path_prov) return prov_record return None @@ -137,7 +137,6 @@ def deserialize_jsonld(path_prov) -> ProvRecord: g.parse(path_prov, format="json-ld") elements = {} - entries = [] # fetch context: context = ProvContext([]) @@ -166,12 +165,12 @@ def deserialize_jsonld(path_prov) -> ProvRecord: subj = elements[g.namespace_manager.qname(s)] subj.attributes[attr] = str(o) - curr_entry = ProvEntry( - subject=g.namespace_manager.qname(s), predicate=attr, object=o - ) + # curr_entry = ProvEntry( + # subject=g.namespace_manager.qname(s), predicate=attr, object=o + # ) - entries.append(curr_entry) + # entries.append(curr_entry) - prov_rec = ProvRecord(context=context, entries=entries) + prov_rec = ProvRecord(context=context, elements=list(elements.values())) - return elements, prov_rec + return prov_rec diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index 7c36272c9..632e157b6 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -4,6 +4,8 @@ from pathlib import Path from typing import Optional, List +from torch import ne + from clinica.engine.prov_utils import read_prov_jsonld from .prov_model import * @@ -21,10 +23,10 @@ def run_wrapper(self, **kwargs): ) prov_record = get_prov_record(paths_files=paths_input_files) - prov_entry = get_pipeline_entry(self, paths_input_files) + prov_entry = get_pipeline_record(self, paths_input_files) if validate_command(prov_record, prov_entry): - ret = func(self) + # ret = func(self) print("The pipeline succesfully executed.") else: raise Exception( @@ -63,38 +65,35 @@ def get_prov_record(paths_files: List[Path]) -> ProvRecord: for path in paths_files: prov_record_tmp = read_prov_jsonld(get_path_prov(path)) if prov_record_tmp: - prov_record.entries.extend(prov_record_tmp.entries) + # TODO extend context as well + prov_record.elements.extend(prov_record_tmp.elements) return prov_record -def get_pipeline_entry(self, paths_inputs: List[Path]) -> ProvEntry: +def get_pipeline_record(self, paths_inputs: List[Path]) -> ProvRecord: """ params: paths_inputs: list of input entries paths return: - ProvEntry associated with the launched pipeline + ProvRecord associated with the launched pipeline """ import sys - entries_command = [] - + elements = [] new_agent = get_agent() - + elements.append(new_agent) new_entities = [] for path in paths_inputs: entity_curr = get_entity(path) new_entities.append(entity_curr) + elements.extend(new_entities) new_activity = get_activity(self, new_agent, new_entities) + elements.append(new_activity) - entry_curr = ProvEntry - entry_curr.subject = new_agent - entry_curr.predicate = ProvAssociation() - entry_curr.object = new_activity - - return entry_curr + return ProvRecord(context={}, elements=elements) def write_prov_file( @@ -200,7 +199,7 @@ def validate_command(prov_record: ProvRecord, prov_entry: ProvEntry) -> bool: """ flag = True - for entry in prov_record.entries: + for el in prov_record.elements: # TODO: check that the record entries are compatible with the current entry flag = True From d32e06fc4178d2824b7debbe32df0b79bfde3b05 Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Thu, 3 Feb 2022 16:30:27 +0100 Subject: [PATCH 12/24] Clean up unused code --- clinica/engine/prov_model.py | 8 ++--- clinica/engine/prov_utils.py | 58 +++++++++++-------------------- clinica/engine/provenance.py | 67 +++++++++++++----------------------- 3 files changed, 49 insertions(+), 84 deletions(-) diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py index 155e5b140..609a6136d 100644 --- a/clinica/engine/prov_model.py +++ b/clinica/engine/prov_model.py @@ -62,7 +62,7 @@ class ProvEntity(ProvElement): """Provenance Entity element""" id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) - attributes: dict = field(default={}) + attributes: dict = field(default=attr.Factory(dict)) @define @@ -70,7 +70,7 @@ class ProvActivity(ProvElement): """Provenance Activity element""" id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) - attributes: dict = field(default={}) + attributes: dict = field(default=attr.Factory(dict)) @define @@ -79,7 +79,7 @@ class ProvAgent(ProvElement): id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) attributes: dict = field( - default={}, + default=attr.Factory(dict), validator=attr.validators.optional(attr.validators.instance_of(dict)), ) @@ -149,7 +149,7 @@ def __getitem__(self, idx): if element.id == idx: return element - def to_json(self): + def json(self): json_dict = {} json_dict["prov:Agent"] = [ cattr.unstructure(x) for x in self.elements if isinstance(x, ProvAgent) diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py index 322d1fc45..652fd44b2 100644 --- a/clinica/engine/prov_utils.py +++ b/clinica/engine/prov_utils.py @@ -48,41 +48,17 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[ return ret_files -def is_entity_tracked(prov_context: dict, entity_id: str) -> bool: - flag_exists = next( - (True for item in prov_context["Entity"] if item["@id"] == entity_id), - False, - ) - return flag_exists - - -def is_agent_tracked(prov_context: dict, agent_id: str) -> bool: - flag_exists = next( - (True for item in prov_context["Agent"] if item["@id"] == agent_id), - False, - ) - return flag_exists - - -def is_activity_tracked(prov_context: dict, activity_id: str) -> bool: - flag_exists = next( - (True for item in prov_context["Activity"] if item["@id"] == activity_id), - False, - ) - return flag_exists - - -def get_entity_id(path_file: Path) -> str: +def generate_entity_id(path_file: Path) -> Identifier: id = Identifier(label=path_file.with_suffix("").name) return id -def get_activity_id(pipeline_name: str) -> Identifier: +def generate_activity_id(pipeline_name: str) -> Identifier: id = Identifier(label="clin:" + pipeline_name) return id -def get_agent_id() -> Identifier: +def generate_agent_id() -> Identifier: id = Identifier(label="RRID:Clinica") return id @@ -94,9 +70,12 @@ def get_last_activity(path_entity: Path) -> Optional[ProvActivity]: """ prov_record = read_prov_jsonld(get_path_prov(path_entity)) - if prov_record and prov_record.entries: - last_activity = prov_record.entries[-1]["@id"] - return last_activity + if prov_record and prov_record.elements: + # TODO: filter activities by date + last_activity = [ + x for x in prov_record.elements if isinstance(x, ProvActivity) + ][-1] + return last_activity.id.label return None @@ -112,6 +91,17 @@ def get_path_prov(path_entity: Path) -> Path: return path_prov +def create_prov_file(prov_command, prov_path): + """ + Create new provenance file based on command + """ + import json + + with open(prov_path, "w") as fp: + json.dump(prov_command.json(), fp, indent=4) + return + + def read_prov_jsonld(path_prov: Path) -> Optional[ProvRecord]: """ return: ProvRecord in a specific location stored in jsonld format @@ -128,7 +118,7 @@ def deserialize_jsonld(path_prov) -> ProvRecord: """ params: - return list of ProvEntry objects from jsonld dictionary data + return ProvRecord object from jsonld dictionary data """ import rdflib @@ -165,12 +155,6 @@ def deserialize_jsonld(path_prov) -> ProvRecord: subj = elements[g.namespace_manager.qname(s)] subj.attributes[attr] = str(o) - # curr_entry = ProvEntry( - # subject=g.namespace_manager.qname(s), predicate=attr, object=o - # ) - - # entries.append(curr_entry) - prov_rec = ProvRecord(context=context, elements=list(elements.values())) return prov_rec diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index 632e157b6..78c664ac7 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -2,11 +2,9 @@ from os import read from pathlib import Path -from typing import Optional, List +from typing import List -from torch import ne - -from clinica.engine.prov_utils import read_prov_jsonld +from clinica.engine.prov_utils import create_prov_file from .prov_model import * @@ -22,10 +20,10 @@ def run_wrapper(self, **kwargs): self, pipeline_fullname, dict_field="input_to" ) - prov_record = get_prov_record(paths_files=paths_input_files) - prov_entry = get_pipeline_record(self, paths_input_files) + prov_history = get_history_record(paths_files=paths_input_files) + prov_current = get_pipeline_record(self, paths_input_files) - if validate_command(prov_record, prov_entry): + if validate_command(prov_history, prov_current): # ret = func(self) print("The pipeline succesfully executed.") else: @@ -35,7 +33,7 @@ def run_wrapper(self, **kwargs): paths_out_files = get_files_list( self, pipeline_fullname, dict_field="output_from" ) - register_prov(prov_entry, paths_out_files) + register_prov(prov_current, paths_out_files) return ret @@ -52,7 +50,7 @@ def register_prov(entries_current: ProvRecord, out_files: Path) -> None: return True -def get_prov_record(paths_files: List[Path]) -> ProvRecord: +def get_history_record(paths_files: List[Path]) -> ProvRecord: """ return: a ProvRecord for the associated files in path_files @@ -97,10 +95,10 @@ def get_pipeline_record(self, paths_inputs: List[Path]) -> ProvRecord: def write_prov_file( - list_prov_entries: list, path_entity: Path, overwrite=False + list_prov_entries: ProvRecord, path_entity: Path, overwrite=False ) -> None: """ - Append the current provenance info to the prov file. If it does not exist, create new + Create provenance file with current pipeline information params: prov_entries: list of ProvEntry @@ -111,13 +109,8 @@ def write_prov_file( prov_path = get_path_prov(path_entity) - if prov_path.exists(): - # append the pipeline provenance information to the old provenance file - prov_record = read_prov_jsonld(prov_path) - prov_record.extend(list_prov_entries) - else: - create_prov_file(list_prov_entries, prov_path) - # create new provenance file with pipeline information + create_prov_file(list_prov_entries, prov_path) + return @@ -135,9 +128,9 @@ def extend_prov(prov_main: dict, prov_new: dict) -> dict: def get_agent() -> ProvAgent: import clinica - from .prov_utils import get_agent_id + from .prov_utils import generate_agent_id - new_agent = ProvAgent(id=get_agent_id()) + new_agent = ProvAgent(id=generate_agent_id()) new_agent.attributes["version"] = clinica.__version__ new_agent.attributes["label"] = clinica.__name__ @@ -151,9 +144,9 @@ def get_activity(self, agent: Identifier, entities: List[ProvEntity]) -> ProvAct ProvActivity from related entities and associated agent """ import sys - from .prov_utils import get_activity_id + from .prov_utils import generate_activity_id - new_activity = ProvActivity(id=get_activity_id(self.fullname)) + new_activity = ProvActivity(id=generate_activity_id(self.fullname)) new_activity.attributes["parameters"] = self.parameters new_activity.attributes["label"] = self.fullname @@ -169,40 +162,28 @@ def get_entity(path_curr: Path) -> ProvEntity: return an Entity object from the file in path_curr """ - from clinica.engine.prov_utils import get_entity_id + from clinica.engine.prov_utils import generate_entity_id, get_last_activity - new_entity = ProvEntity(id=get_entity_id(path_curr)) + new_entity = ProvEntity(id=generate_entity_id(path_curr)) new_entity.attributes["label"] = path_curr.name - new_entity.attributes["path"] = path_curr + new_entity.attributes["path"] = str(path_curr) # TODO: implement function to return the latest associated activity - # new_entity.attributes["wasGeneratedBy"] = get_last_activity(path_curr) + new_entity.attributes["wasGeneratedBy"] = get_last_activity(path_curr) return new_entity -def create_prov_file(prov_command, prov_path): - """ - Create new provenance file based on command - """ - import json - - with open(prov_path, "w") as fp: - json.dump(prov_command, fp, indent=4) - - return - - -def validate_command(prov_record: ProvRecord, prov_entry: ProvEntry) -> bool: +def validate_command(prov_history: ProvRecord, prov_current: ProvRecord) -> bool: """ Check the command is valid on the data being run """ flag = True - for el in prov_record.elements: - # TODO: check that the record entries are compatible with the current entry - flag = True - + for a in prov_history.elements: + for b in prov_current.elements: + # TODO: check that the record entries are compatible with the current entry + flag = True return flag From 126ce0b733d104ac2dc99e918804c159636836ad Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Thu, 3 Feb 2022 17:22:53 +0100 Subject: [PATCH 13/24] fix conflict in rebase --- clinica/utils/input_files.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/clinica/utils/input_files.py b/clinica/utils/input_files.py index 93ce26edb..99190dbcd 100644 --- a/clinica/utils/input_files.py +++ b/clinica/utils/input_files.py @@ -437,10 +437,6 @@ def pet_linear_nii(acq_label, suvr_reference_region, uncropped_image): f"*_acq-{acq_label}_pet_space-MNI152NLin2009cSym{description}_res-1x1x1_suvr-{suvr_reference_region}_pet.nii.gz", ), "description": "", -<<<<<<< HEAD - "needed_pipeline": "pet-linear", -======= "output_from": "pet-linear", ->>>>>>> de9d4d8b (Init traceability feature) } return information From 672c28c75e39d2cdb0b1e56d3fcda2b90cd45eeb Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Mon, 7 Feb 2022 14:36:00 +0100 Subject: [PATCH 14/24] Update prov jsonld representation --- clinica/engine/prov_model.py | 71 ++++++++++-------------------------- clinica/engine/prov_utils.py | 3 +- clinica/engine/provenance.py | 12 +++--- 3 files changed, 28 insertions(+), 58 deletions(-) diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py index 609a6136d..457ac725b 100644 --- a/clinica/engine/prov_model.py +++ b/clinica/engine/prov_model.py @@ -28,12 +28,15 @@ class Identifier: validator=attr.validators.optional(attr.validators.instance_of(str)), ) + def __repr__(self): + return "%s" % self.label + class ProvElement(ABC): @property @classmethod @abstractmethod - def id(cls): + def uid(cls): """id is required for ProvElements""" return NotImplementedError @@ -61,67 +64,33 @@ class ProvRelation(ABC): class ProvEntity(ProvElement): """Provenance Entity element""" - id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) + uid: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) attributes: dict = field(default=attr.Factory(dict)) + def unstrct(self): + return {"id": str(self.uid), **self.attributes} + @define class ProvActivity(ProvElement): """Provenance Activity element""" - id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) + uid: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) attributes: dict = field(default=attr.Factory(dict)) + def unstrct(self): + return {"id": str(self.uid), **self.attributes} + @define class ProvAgent(ProvElement): """Provenance Agent element""" - id: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) - attributes: dict = field( - default=attr.Factory(dict), - validator=attr.validators.optional(attr.validators.instance_of(dict)), - ) - - -# Define PROV Relations - - -@define -class ProvGeneration(ProvRelation): - id: Identifier = field( - init=False, - validator=attr.validators.optional(attr.validators.instance_of(Identifier)), - ) - - src: ProvActivity = field( - init=False, - validator=attr.validators.optional(attr.validators.instance_of(ProvActivity)), - ) - dest: ProvEntity = field( - init=False, - validator=attr.validators.optional(attr.validators.instance_of(ProvEntity)), - ) - - def __attrs_post_init__(self): - self.id = Identifier(label="") - self.src = ProvActivity() - self.dest = ProvEntity() - - # entity: an identifier (e) for a created entity; - # activity: an OPTIONAL identifier (a) for the activity that creates the entity; - # time: an OPTIONAL "generation time" (t), the time at which the entity was completely created; - # attributes: an OPTIONALa - - -@define -class ProvUsage(ProvRelation): - pass - + uid: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) + attributes: dict = field(default=attr.Factory(dict)) -@define -class ProvAssociation(ProvRelation): - pass + def unstrct(self): + return {"id": str(self.uid), **self.attributes} @define @@ -146,18 +115,18 @@ class ProvRecord: def __getitem__(self, idx): for element in self.elements: - if element.id == idx: + if element.uid == idx: return element def json(self): json_dict = {} json_dict["prov:Agent"] = [ - cattr.unstructure(x) for x in self.elements if isinstance(x, ProvAgent) + x.unstrct() for x in self.elements if isinstance(x, ProvAgent) ] json_dict["prov:Activity"] = [ - cattr.unstructure(x) for x in self.elements if isinstance(x, ProvActivity) + x.unstrct() for x in self.elements if isinstance(x, ProvActivity) ] json_dict["prov:Entity"] = [ - cattr.unstructure(x) for x in self.elements if isinstance(x, ProvEntity) + x.unstrct() for x in self.elements if isinstance(x, ProvEntity) ] return json_dict diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py index 652fd44b2..332c14fda 100644 --- a/clinica/engine/prov_utils.py +++ b/clinica/engine/prov_utils.py @@ -75,7 +75,7 @@ def get_last_activity(path_entity: Path) -> Optional[ProvActivity]: last_activity = [ x for x in prov_record.elements if isinstance(x, ProvActivity) ][-1] - return last_activity.id.label + return str(last_activity.uid) return None @@ -99,6 +99,7 @@ def create_prov_file(prov_command, prov_path): with open(prov_path, "w") as fp: json.dump(prov_command.json(), fp, indent=4) + return diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index 78c664ac7..b5705ad11 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -105,7 +105,7 @@ def write_prov_file( entity_path: path of the prov-associated element """ - from .prov_utils import read_prov_jsonld, get_path_prov + from .prov_utils import get_path_prov prov_path = get_path_prov(path_entity) @@ -130,7 +130,7 @@ def get_agent() -> ProvAgent: import clinica from .prov_utils import generate_agent_id - new_agent = ProvAgent(id=generate_agent_id()) + new_agent = ProvAgent(uid=generate_agent_id()) new_agent.attributes["version"] = clinica.__version__ new_agent.attributes["label"] = clinica.__name__ @@ -146,13 +146,13 @@ def get_activity(self, agent: Identifier, entities: List[ProvEntity]) -> ProvAct import sys from .prov_utils import generate_activity_id - new_activity = ProvActivity(id=generate_activity_id(self.fullname)) + new_activity = ProvActivity(uid=generate_activity_id(self.fullname)) new_activity.attributes["parameters"] = self.parameters new_activity.attributes["label"] = self.fullname new_activity.attributes["command"] = (sys.argv[1:],) - new_activity.attributes["used"] = [x.id for x in entities] - new_activity.attributes["wasAssociatedWith"] = agent.id + new_activity.attributes["used"] = [str(x.uid) for x in entities] + new_activity.attributes["wasAssociatedWith"] = str(agent.uid) return new_activity @@ -164,7 +164,7 @@ def get_entity(path_curr: Path) -> ProvEntity: from clinica.engine.prov_utils import generate_entity_id, get_last_activity - new_entity = ProvEntity(id=generate_entity_id(path_curr)) + new_entity = ProvEntity(uid=generate_entity_id(path_curr)) new_entity.attributes["label"] = path_curr.name new_entity.attributes["path"] = str(path_curr) From 39a2064319f3f383bb251e974b29e3df4dfc1376 Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Tue, 8 Feb 2022 11:36:33 +0100 Subject: [PATCH 15/24] manually lint code --- clinica/engine/prov_model.py | 9 ++++----- clinica/engine/prov_utils.py | 4 ++-- clinica/engine/provenance.py | 13 +++++++------ clinica/pipelines/engine.py | 1 + 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py index 457ac725b..1bc1e30bc 100644 --- a/clinica/engine/prov_model.py +++ b/clinica/engine/prov_model.py @@ -1,13 +1,12 @@ +from abc import ABC, abstractmethod +from typing import List, Union from xml.dom.minidom import Element -from attr import define, field + import attr import cattr -from typing import Union, List -from abc import ABC, abstractmethod - +from attr import define, field from matplotlib.style import context - # Define PROV abstract concepts diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py index 332c14fda..fde16cea3 100644 --- a/clinica/engine/prov_utils.py +++ b/clinica/engine/prov_utils.py @@ -1,5 +1,5 @@ -from typing import Optional, List from pathlib import Path +from typing import List, Optional from .prov_model import * @@ -12,8 +12,8 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[ return list of 'Path's to the files used in the pipeline """ - from clinica.utils.inputs import clinica_file_reader import clinica.utils.input_files as cif + from clinica.utils.inputs import clinica_file_reader dict_field_options = ["input_to", "output_from"] if dict_field not in dict_field_options: diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index b5705ad11..3bb70932e 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -1,6 +1,5 @@ import functools from os import read - from pathlib import Path from typing import List @@ -56,7 +55,7 @@ def get_history_record(paths_files: List[Path]) -> ProvRecord: a ProvRecord for the associated files in path_files """ - from .prov_utils import read_prov_jsonld, get_path_prov + from .prov_utils import get_path_prov, read_prov_jsonld prov_record = ProvRecord({}, []) @@ -127,13 +126,14 @@ def extend_prov(prov_main: dict, prov_new: dict) -> dict: def get_agent() -> ProvAgent: - import clinica + from clinica import __name__, __version__ + from .prov_utils import generate_agent_id new_agent = ProvAgent(uid=generate_agent_id()) - new_agent.attributes["version"] = clinica.__version__ - new_agent.attributes["label"] = clinica.__name__ + new_agent.attributes["version"] = __version__ + new_agent.attributes["label"] = __name__ return new_agent @@ -144,13 +144,14 @@ def get_activity(self, agent: Identifier, entities: List[ProvEntity]) -> ProvAct ProvActivity from related entities and associated agent """ import sys + from .prov_utils import generate_activity_id new_activity = ProvActivity(uid=generate_activity_id(self.fullname)) new_activity.attributes["parameters"] = self.parameters new_activity.attributes["label"] = self.fullname - new_activity.attributes["command"] = (sys.argv[1:],) + new_activity.attributes["command"] = sys.argv[1:] new_activity.attributes["used"] = [str(x.uid) for x in entities] new_activity.attributes["wasAssociatedWith"] = str(agent.uid) diff --git a/clinica/pipelines/engine.py b/clinica/pipelines/engine.py index 68e3562f7..da7d35fae 100644 --- a/clinica/pipelines/engine.py +++ b/clinica/pipelines/engine.py @@ -7,6 +7,7 @@ import click from nipype.pipeline.engine import Workflow + import clinica.engine.provenance as prov From 46b98924c84dcc90fe662196c2a751a4bffa6be5 Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Tue, 15 Feb 2022 11:23:43 +0100 Subject: [PATCH 16/24] Start extending workflow to other pipelines --- clinica/engine/prov_utils.py | 44 ++++++++++++++++++++++++++++-------- clinica/engine/provenance.py | 7 +++--- clinica/utils/input_files.py | 1 + 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py index fde16cea3..ed4e0fc13 100644 --- a/clinica/engine/prov_utils.py +++ b/clinica/engine/prov_utils.py @@ -1,10 +1,14 @@ from pathlib import Path from typing import List, Optional +from clinica.utils.input_files import pet_linear_nii + from .prov_model import * -def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[Path]: +def get_files_list( + self, pipeline_fullname: str, dict_field="input_to", pipeline_args={} +) -> List[Path]: """ params: pipeline_fullname: the current running pipeline name @@ -13,21 +17,31 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[ return list of 'Path's to the files used in the pipeline """ import clinica.utils.input_files as cif + from clinica.utils.input_files import pet_linear_nii from clinica.utils.inputs import clinica_file_reader + funcs = {"pet-linear": pet_linear_nii} + dict_field_options = ["input_to", "output_from"] if dict_field not in dict_field_options: raise (f"dict_field must be one of {dict_field_options}") # Retrieve all the data dict from the input_files module - files_dicts = { - k: v - for k, v in vars(cif).items() - if isinstance(v, dict) - and dict_field in v.keys() - and pipeline_fullname in v[dict_field] - } + if pipeline_fullname in funcs and dict_field == "output_from": + files_dicts = { + "PET": funcs[pipeline_fullname]( + **clean_arguments(pipeline_args, funcs[pipeline_fullname]) + ) + } + else: + files_dicts = { + k: v + for k, v in vars(cif).items() + if isinstance(v, dict) + and dict_field in v.keys() + and pipeline_fullname in v[dict_field] + } # TODO: check if bids or caps as output ret_files = [] @@ -40,7 +54,7 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[ self.sessions, ref_dir, files_dicts[elem], - raise_exception=False, + raise_exception=True, ) if current_file: ret_files.extend([Path(x) for x in current_file]) @@ -136,7 +150,6 @@ def deserialize_jsonld(path_prov) -> ProvRecord: context._namespaces.append(namespace) for s, p, o in g: - if str(p) == "http://www.w3.org/ns/prov#Activity": id = Identifier(label=g.namespace_manager.qname(o)) elements[id.label] = ProvActivity(id) @@ -159,3 +172,14 @@ def deserialize_jsonld(path_prov) -> ProvRecord: prov_rec = ProvRecord(context=context, elements=list(elements.values())) return prov_rec + + +def clean_arguments(pipeline_args, file_func): + import inspect + + argspec = inspect.getargspec(file_func) + if not argspec.keywords: + for key in pipeline_args.copy().keys(): + if key not in argspec.args: + del pipeline_args[key] + return pipeline_args diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index 3bb70932e..c747c5f72 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -14,23 +14,24 @@ def provenance(func): @functools.wraps(func) def run_wrapper(self, **kwargs): ret = [] + pipeline_args = self.parameters pipeline_fullname = self.fullname paths_input_files = get_files_list( - self, pipeline_fullname, dict_field="input_to" + self, pipeline_fullname, "input_to", pipeline_args ) prov_history = get_history_record(paths_files=paths_input_files) prov_current = get_pipeline_record(self, paths_input_files) if validate_command(prov_history, prov_current): - # ret = func(self) + ret = func(self) print("The pipeline succesfully executed.") else: raise Exception( "The pipeline selected is incompatible with the input files provenance" ) paths_out_files = get_files_list( - self, pipeline_fullname, dict_field="output_from" + self, pipeline_fullname, "output_from", pipeline_args ) register_prov(prov_current, paths_out_files) diff --git a/clinica/utils/input_files.py b/clinica/utils/input_files.py index 99190dbcd..37f852abb 100644 --- a/clinica/utils/input_files.py +++ b/clinica/utils/input_files.py @@ -136,6 +136,7 @@ "pattern": "*space-MNI152NLin2009cSym_res-1x1x1_T1w.nii.gz", "description": "T1w image registered in MNI152NLin2009cSym space using t1-linear pipeline", "output_from": "t1-linear", + "input_to": "pet-linear", } T1W_LINEAR_CROPPED = { From f2c5300f211efe9d4562cc915ca34cf0d5437ed6 Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Wed, 23 Feb 2022 16:51:16 +0100 Subject: [PATCH 17/24] Connect provenance through nipype nodes --- clinica/engine/__init__.py | 3 + clinica/engine/prov_utils.py | 152 ++++++++++++-------- clinica/engine/provenance.py | 263 ++++++++++++++++------------------- clinica/pipelines/engine.py | 6 +- 4 files changed, 221 insertions(+), 203 deletions(-) diff --git a/clinica/engine/__init__.py b/clinica/engine/__init__.py index 5396b9d94..c9c7623f4 100644 --- a/clinica/engine/__init__.py +++ b/clinica/engine/__init__.py @@ -1 +1,4 @@ from .cmdparser import CmdParser +from nipype import config + +config.enable_debug_mode() diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py index ed4e0fc13..1428b48c7 100644 --- a/clinica/engine/prov_utils.py +++ b/clinica/engine/prov_utils.py @@ -1,65 +1,59 @@ from pathlib import Path from typing import List, Optional +from clinica.engine.prov_model import * -from clinica.utils.input_files import pet_linear_nii -from .prov_model import * +def mint_agent() -> ProvAgent: + """ + return + ProvAgent associated with running version of the software + """ + from clinica import __name__, __version__ + from clinica.engine.prov_utils import generate_agent_id + + new_agent = ProvAgent(uid=generate_agent_id()) + + new_agent.attributes["version"] = __version__ + new_agent.attributes["label"] = __name__ + + return new_agent -def get_files_list( - self, pipeline_fullname: str, dict_field="input_to", pipeline_args={} -) -> List[Path]: +def mint_activity(agent: Identifier, entities: List[ProvEntity]) -> ProvActivity: """ - params: - pipeline_fullname: the current running pipeline name - dict_field: variable to specify if fetching inputs or outputs to the pipeline + return + ProvActivity from related entities and associated agent + """ + import sys + + from clinica.engine.prov_utils import generate_activity_id + + new_activity = ProvActivity(uid=generate_activity_id("testfullname")) - return list of 'Path's to the files used in the pipeline + new_activity.attributes["parameters"] = "testparameters" + new_activity.attributes["label"] = "testfullname" + new_activity.attributes["command"] = sys.argv[1:] + new_activity.attributes["used"] = [str(x.uid) for x in entities] + new_activity.attributes["wasAssociatedWith"] = str(agent.uid) + + return new_activity + + +def mint_entity(path_curr: Path) -> ProvEntity: + """ + return an Entity object from the file in path_curr """ - import clinica.utils.input_files as cif - from clinica.utils.input_files import pet_linear_nii - from clinica.utils.inputs import clinica_file_reader - funcs = {"pet-linear": pet_linear_nii} + from clinica.engine.prov_utils import generate_entity_id, get_last_activity - dict_field_options = ["input_to", "output_from"] - if dict_field not in dict_field_options: - raise (f"dict_field must be one of {dict_field_options}") + new_entity = ProvEntity(uid=generate_entity_id(path_curr)) + new_entity.attributes["label"] = path_curr.name + new_entity.attributes["path"] = str(path_curr) - # Retrieve all the data dict from the input_files module + # TODO: implement function to return the latest associated activity + new_entity.attributes["wasGeneratedBy"] = get_last_activity(path_curr) - if pipeline_fullname in funcs and dict_field == "output_from": - files_dicts = { - "PET": funcs[pipeline_fullname]( - **clean_arguments(pipeline_args, funcs[pipeline_fullname]) - ) - } - else: - files_dicts = { - k: v - for k, v in vars(cif).items() - if isinstance(v, dict) - and dict_field in v.keys() - and pipeline_fullname in v[dict_field] - } - # TODO: check if bids or caps as output - - ret_files = [] - for elem in files_dicts: - ref_dir = ( - self.bids_directory if dict_field == "input_to" else self.caps_directory - ) - current_file, _ = clinica_file_reader( - self.subjects, - self.sessions, - ref_dir, - files_dicts[elem], - raise_exception=True, - ) - if current_file: - ret_files.extend([Path(x) for x in current_file]) - - return ret_files + return new_entity def generate_entity_id(path_file: Path) -> Identifier: @@ -97,12 +91,13 @@ def get_path_prov(path_entity: Path) -> Path: """ return: Path of the provenance file associated with an entity """ - - while path_entity.suffix != "": - path_entity = path_entity.with_suffix("") - - path_prov = path_entity.with_suffix(".jsonld") - return path_prov + if path_entity.is_file(): + while path_entity.suffix != "": + path_entity = path_entity.with_suffix("") + path_prov = path_entity.with_suffix(".jsonld") + return path_prov + else: + return None def create_prov_file(prov_command, prov_path): @@ -122,7 +117,7 @@ def read_prov_jsonld(path_prov: Path) -> Optional[ProvRecord]: return: ProvRecord in a specific location stored in jsonld format """ - if path_prov.exists(): + if path_prov and path_prov.exists(): prov_record = deserialize_jsonld(path_prov) return prov_record @@ -183,3 +178,48 @@ def clean_arguments(pipeline_args, file_func): if key not in argspec.args: del pipeline_args[key] return pipeline_args + + +def validate_command(prov_history: ProvRecord, prov_current: ProvRecord) -> bool: + """ + Check the command is valid on the data being run + """ + flag = True + + for a in prov_history.elements: + for b in prov_current.elements: + # TODO: check that the record entries are compatible with the current entry + flag = True + return flag + + +def is_valid(command: dict) -> bool: + valid_list = [ + { + ("clin:clinica0.5.0", "clin:adni2Bids"): ( + "clin:clinica0.5.0", + "clin:t1-linear", + ) + } + ] + if command in valid_list: + return True + return False + + +def write_prov_file( + list_prov_entries: ProvRecord, path_entity: Path, overwrite=False +) -> None: + """ + Create provenance file with current pipeline information + + params: + prov_entries: list of ProvEntry + entity_path: path of the prov-associated element + """ + + prov_path = get_path_prov(path_entity) + + create_prov_file(list_prov_entries, prov_path) + + return diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index c747c5f72..619c08db6 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -1,66 +1,111 @@ import functools + from os import read from pathlib import Path from typing import List -from clinica.engine.prov_utils import create_prov_file - -from .prov_model import * - def provenance(func): - from .prov_utils import get_files_list - @functools.wraps(func) def run_wrapper(self, **kwargs): - ret = [] + ret = func(self) + pipeline_args = self.parameters pipeline_fullname = self.fullname - paths_input_files = get_files_list( - self, pipeline_fullname, "input_to", pipeline_args - ) - - prov_history = get_history_record(paths_files=paths_input_files) - prov_current = get_pipeline_record(self, paths_input_files) - - if validate_command(prov_history, prov_current): - ret = func(self) - print("The pipeline succesfully executed.") - else: - raise Exception( - "The pipeline selected is incompatible with the input files provenance" - ) - paths_out_files = get_files_list( - self, pipeline_fullname, "output_from", pipeline_args - ) - register_prov(prov_current, paths_out_files) + + create_node_read(self) + create_node_update(self, pipeline_args, pipeline_fullname) + create_node_log(self) + + connect_nodes(self) return ret return run_wrapper -def register_prov(entries_current: ProvRecord, out_files: Path) -> None: +def connect_nodes(self): + # fmt: off - # TODO: iterate over out_files and create a provenance file for each + #self.output_node.outputs.get()[self.get_output_fields()[0]] + + self.connect( + [ + (self.input_node, self.prov_input_node, [("t1w", "input_files")]), + (self.input_node, self.prov_update_node, [("t1w", "input_files")]), + (self.prov_input_node, self.prov_update_node, [("prov_in_record", "prov_in_record")]), + (self.prov_update_node, self.prov_log_node,[("prov_upd_record", "prov_log_record")]), + (self.output_node, self.prov_log_node, [(self.get_output_fields()[0], "out_file")]), + ] + ) + return True + # fmt: on + + +def create_node_read(self): + import nipype.pipeline.engine as npe + import nipype.interfaces.utility as nutil + + self.prov_input_node = npe.Node( + nutil.Function( + input_names=["input_files"], + output_names=["prov_in_record"], + function=read_prov, + ), + name="ReadProvRecord", + ) + + +def create_node_update(self, parameters, fullname): + import nipype.pipeline.engine as npe + import nipype.interfaces.utility as nutil + + self.prov_update_node = npe.Node( + nutil.Function( + input_names=["input_files", "prov_in_record", "parameters", "fullname"], + output_names=["prov_upd_record"], + function=update_prov, + ), + name="UpdateRecord", + ) - for file in out_files: - write_prov_file(entries_current, file) - print("Provenance registered succesfully") return True -def get_history_record(paths_files: List[Path]) -> ProvRecord: +def create_node_log(self): + import nipype.pipeline.engine as npe + import nipype.interfaces.utility as nutil + + self.prov_log_node = npe.Node( + nutil.Function( + input_names=["prov_log_record", "out_file", "out_dir"], + output_names=["output_record"], + function=log_prov, + ), + name="LogProv", + ) + + self.prov_log_node.inputs.out_dir = self.caps_directory + return + + +def read_prov(input_files): """ return: a ProvRecord for the associated files in path_files """ - - from .prov_utils import get_path_prov, read_prov_jsonld + from clinica.engine.prov_utils import get_path_prov, read_prov_jsonld + from clinica.engine.prov_model import ProvRecord + from pathlib import Path prov_record = ProvRecord({}, []) + if isinstance(input_files, list): + paths_files = [Path(x) for x in input_files] + elif isinstance(input_files, str): + paths_files = [Path(input_files)] for path in paths_files: + print("in read_prov, path for input:", path) prov_record_tmp = read_prov_jsonld(get_path_prov(path)) if prov_record_tmp: # TODO extend context as well @@ -69,135 +114,65 @@ def get_history_record(paths_files: List[Path]) -> ProvRecord: return prov_record -def get_pipeline_record(self, paths_inputs: List[Path]) -> ProvRecord: +def update_prov(input_files, prov_in_record): """ params: - paths_inputs: list of input entries paths + input_files: list of input entries return: ProvRecord associated with the launched pipeline """ - import sys + from clinica.engine.prov_utils import ( + mint_activity, + mint_agent, + mint_entity, + validate_command, + ) + from pathlib import Path + from clinica.engine.prov_model import ProvRecord elements = [] - new_agent = get_agent() + new_agent = mint_agent() elements.append(new_agent) new_entities = [] - for path in paths_inputs: - entity_curr = get_entity(path) + if isinstance(input_files, list): + paths_files = [Path(x) for x in input_files] + elif isinstance(input_files, str): + paths_files = [Path(input_files)] + + for path in paths_files: + entity_curr = mint_entity(path) new_entities.append(entity_curr) elements.extend(new_entities) - new_activity = get_activity(self, new_agent, new_entities) + new_activity = mint_activity(new_agent, new_entities) elements.append(new_activity) - return ProvRecord(context={}, elements=elements) + prov_current = ProvRecord(context={}, elements=elements) + if not validate_command(prov_in_record, prov_current): + raise ("Invalid commmand") + return prov_current -def write_prov_file( - list_prov_entries: ProvRecord, path_entity: Path, overwrite=False -) -> None: - """ - Create provenance file with current pipeline information - - params: - prov_entries: list of ProvEntry - entity_path: path of the prov-associated element - """ - from .prov_utils import get_path_prov +def log_prov(prov_log_record, out_file, out_dir): + from clinica.engine.prov_utils import write_prov_file + from pathlib import Path - prov_path = get_path_prov(path_entity) + out_file = out_file + "*" + out_files_paths = [] + if isinstance(out_file, list): + for x in out_file: + out_files_paths.extend(list(Path(out_dir).rglob(x))) + elif isinstance(out_file, str): + out_files_paths = list(Path(out_dir).rglob(out_file)) - create_prov_file(list_prov_entries, prov_path) + print("the file searched:", out_file) + print("the folder searched:", out_dir) - return - - -def extend_prov(prov_main: dict, prov_new: dict) -> dict: - """ - Append a specific prov data to the global prov dict - """ - - for k in prov_new.keys(): - for el in prov_new[k]: - if k in prov_main.keys() and el not in prov_main[k]: - prov_main[k].append(el) - return prov_main - - -def get_agent() -> ProvAgent: - from clinica import __name__, __version__ - - from .prov_utils import generate_agent_id - - new_agent = ProvAgent(uid=generate_agent_id()) - - new_agent.attributes["version"] = __version__ - new_agent.attributes["label"] = __name__ - - return new_agent - - -def get_activity(self, agent: Identifier, entities: List[ProvEntity]) -> ProvActivity: - """ - return - ProvActivity from related entities and associated agent - """ - import sys - - from .prov_utils import generate_activity_id - - new_activity = ProvActivity(uid=generate_activity_id(self.fullname)) - - new_activity.attributes["parameters"] = self.parameters - new_activity.attributes["label"] = self.fullname - new_activity.attributes["command"] = sys.argv[1:] - new_activity.attributes["used"] = [str(x.uid) for x in entities] - new_activity.attributes["wasAssociatedWith"] = str(agent.uid) - - return new_activity - - -def get_entity(path_curr: Path) -> ProvEntity: - """ - return an Entity object from the file in path_curr - """ - - from clinica.engine.prov_utils import generate_entity_id, get_last_activity - - new_entity = ProvEntity(uid=generate_entity_id(path_curr)) - new_entity.attributes["label"] = path_curr.name - new_entity.attributes["path"] = str(path_curr) - - # TODO: implement function to return the latest associated activity - new_entity.attributes["wasGeneratedBy"] = get_last_activity(path_curr) - - return new_entity - - -def validate_command(prov_history: ProvRecord, prov_current: ProvRecord) -> bool: - """ - Check the command is valid on the data being run - """ - flag = True - - for a in prov_history.elements: - for b in prov_current.elements: - # TODO: check that the record entries are compatible with the current entry - flag = True - return flag - - -def is_valid(command: dict) -> bool: - valid_list = [ - { - ("clin:clinica0.5.0", "clin:adni2Bids"): ( - "clin:clinica0.5.0", - "clin:t1-linear", - ) - } - ] - if command in valid_list: - return True - return False + print("out_files_path:", out_files_paths) + print("in log prov, prov_record", prov_log_record) + for path_file in out_files_paths: + write_prov_file(prov_log_record, path_file) + print("Provenance registered succesfully") + return True diff --git a/clinica/pipelines/engine.py b/clinica/pipelines/engine.py index da7d35fae..4b8e63cd6 100644 --- a/clinica/pipelines/engine.py +++ b/clinica/pipelines/engine.py @@ -213,6 +213,7 @@ def has_output_connections(self): return False @postset("is_built", True) + @prov.provenance def build(self): """Builds the core, input and output nodes of the Pipeline. @@ -230,13 +231,12 @@ def build(self): self.check_dependencies() self.check_pipeline_parameters() if not self.has_input_connections(): - self.build_input_node() + self.input_files = self.build_input_node() self.build_core_nodes() if not self.has_output_connections(): - self.build_output_node() + self.output_files = self.build_output_node() return self - @prov.provenance def run(self, plugin=None, plugin_args=None, update_hash=False, bypass_check=False): """Executes the Pipeline. From fdf8ed72658afc021e3a45331a7e6ae3969c678c Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Tue, 1 Mar 2022 18:19:57 +0100 Subject: [PATCH 18/24] Add context to provenance --- clinica/engine/prov_model.py | 19 +++++++++++++------ clinica/engine/prov_utils.py | 7 ++++++- clinica/engine/provenance.py | 34 +++++++++++++++++----------------- 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py index 1bc1e30bc..6efeebd4c 100644 --- a/clinica/engine/prov_model.py +++ b/clinica/engine/prov_model.py @@ -66,7 +66,7 @@ class ProvEntity(ProvElement): uid: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) attributes: dict = field(default=attr.Factory(dict)) - def unstrct(self): + def unstruct(self): return {"id": str(self.uid), **self.attributes} @@ -77,7 +77,7 @@ class ProvActivity(ProvElement): uid: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) attributes: dict = field(default=attr.Factory(dict)) - def unstrct(self): + def unstruct(self): return {"id": str(self.uid), **self.attributes} @@ -88,7 +88,7 @@ class ProvAgent(ProvElement): uid: Identifier = field(validator=[attr.validators.instance_of(Identifier)]) attributes: dict = field(default=attr.Factory(dict)) - def unstrct(self): + def unstruct(self): return {"id": str(self.uid), **self.attributes} @@ -118,14 +118,21 @@ def __getitem__(self, idx): return element def json(self): + json_dict = {} + + context_keys = [x.id for x in self.context._namespaces] + context_vals = [y.uri for y in self.context._namespaces] + + json_dict["@context"] = dict(zip(context_keys, context_vals)) + json_dict["prov:Agent"] = [ - x.unstrct() for x in self.elements if isinstance(x, ProvAgent) + x.unstruct() for x in self.elements if isinstance(x, ProvAgent) ] json_dict["prov:Activity"] = [ - x.unstrct() for x in self.elements if isinstance(x, ProvActivity) + x.unstruct() for x in self.elements if isinstance(x, ProvActivity) ] json_dict["prov:Entity"] = [ - x.unstrct() for x in self.elements if isinstance(x, ProvEntity) + x.unstruct() for x in self.elements if isinstance(x, ProvEntity) ] return json_dict diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py index 1428b48c7..2b8b822df 100644 --- a/clinica/engine/prov_utils.py +++ b/clinica/engine/prov_utils.py @@ -1,3 +1,4 @@ +import json from pathlib import Path from typing import List, Optional from clinica.engine.prov_model import * @@ -134,13 +135,17 @@ def deserialize_jsonld(path_prov) -> ProvRecord: import rdflib g = rdflib.Graph(identifier="prov_graph_records") + built_in_namepsaces = list(g.namespace_manager.namespaces()) g.parse(path_prov, format="json-ld") + json_namespaces = list(g.namespace_manager.namespaces()) + json_namespaces = list(set(json_namespaces) - set(built_in_namepsaces)) elements = {} # fetch context: context = ProvContext([]) - for lbl, link in g.namespace_manager.namespaces(): + + for lbl, link in json_namespaces: namespace = Namespace(lbl, link.n3()) context._namespaces.append(namespace) diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index 619c08db6..415d79e2d 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -4,6 +4,8 @@ from pathlib import Path from typing import List +from clinica.engine.prov_model import ProvContext + def provenance(func): @functools.wraps(func) @@ -14,7 +16,7 @@ def run_wrapper(self, **kwargs): pipeline_fullname = self.fullname create_node_read(self) - create_node_update(self, pipeline_args, pipeline_fullname) + create_node_update(self) create_node_log(self) connect_nodes(self) @@ -27,17 +29,22 @@ def run_wrapper(self, **kwargs): def connect_nodes(self): # fmt: off - #self.output_node.outputs.get()[self.get_output_fields()[0]] + try: + output_field = self.get_output_fields()[0] + self.connect([(self.output_node, self.prov_log_node, [(self.get_output_fields()[0], "out_file")])]) + except Exception: + self.connect([(self.output_node, self.prov_log_node, [("", "out_file")])]) self.connect( [ (self.input_node, self.prov_input_node, [("t1w", "input_files")]), (self.input_node, self.prov_update_node, [("t1w", "input_files")]), (self.prov_input_node, self.prov_update_node, [("prov_in_record", "prov_in_record")]), - (self.prov_update_node, self.prov_log_node,[("prov_upd_record", "prov_log_record")]), - (self.output_node, self.prov_log_node, [(self.get_output_fields()[0], "out_file")]), + (self.prov_update_node, self.prov_log_node,[("prov_upd_record", "prov_log_record")]), ] ) + + return True # fmt: on @@ -56,13 +63,13 @@ def create_node_read(self): ) -def create_node_update(self, parameters, fullname): +def create_node_update(self): import nipype.pipeline.engine as npe import nipype.interfaces.utility as nutil self.prov_update_node = npe.Node( nutil.Function( - input_names=["input_files", "prov_in_record", "parameters", "fullname"], + input_names=["input_files", "prov_in_record"], output_names=["prov_upd_record"], function=update_prov, ), @@ -95,22 +102,20 @@ def read_prov(input_files): a ProvRecord for the associated files in path_files """ from clinica.engine.prov_utils import get_path_prov, read_prov_jsonld - from clinica.engine.prov_model import ProvRecord + from clinica.engine.prov_model import ProvRecord, ProvContext from pathlib import Path - prov_record = ProvRecord({}, []) + prov_record = ProvRecord(ProvContext([]), []) if isinstance(input_files, list): paths_files = [Path(x) for x in input_files] elif isinstance(input_files, str): paths_files = [Path(input_files)] for path in paths_files: - print("in read_prov, path for input:", path) prov_record_tmp = read_prov_jsonld(get_path_prov(path)) if prov_record_tmp: - # TODO extend context as well + prov_record.context = prov_record_tmp.context prov_record.elements.extend(prov_record_tmp.elements) - return prov_record @@ -148,7 +153,7 @@ def update_prov(input_files, prov_in_record): new_activity = mint_activity(new_agent, new_entities) elements.append(new_activity) - prov_current = ProvRecord(context={}, elements=elements) + prov_current = ProvRecord(prov_in_record.context, elements=elements) if not validate_command(prov_in_record, prov_current): raise ("Invalid commmand") @@ -167,11 +172,6 @@ def log_prov(prov_log_record, out_file, out_dir): elif isinstance(out_file, str): out_files_paths = list(Path(out_dir).rglob(out_file)) - print("the file searched:", out_file) - print("the folder searched:", out_dir) - - print("out_files_path:", out_files_paths) - print("in log prov, prov_record", prov_log_record) for path_file in out_files_paths: write_prov_file(prov_log_record, path_file) print("Provenance registered succesfully") From 203d84d27bd6fa3b266a5e57b490eef5046c0a46 Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Wed, 2 Mar 2022 15:27:34 +0100 Subject: [PATCH 19/24] specify error type in try catch --- clinica/engine/provenance.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index 415d79e2d..8b8a38522 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -18,7 +18,6 @@ def run_wrapper(self, **kwargs): create_node_read(self) create_node_update(self) create_node_log(self) - connect_nodes(self) return ret @@ -31,8 +30,8 @@ def connect_nodes(self): try: output_field = self.get_output_fields()[0] - self.connect([(self.output_node, self.prov_log_node, [(self.get_output_fields()[0], "out_file")])]) - except Exception: + self.connect([(self.output_node, self.prov_log_node, [(output_field, "out_file")])]) + except IndexError: self.connect([(self.output_node, self.prov_log_node, [("", "out_file")])]) self.connect( @@ -43,8 +42,6 @@ def connect_nodes(self): (self.prov_update_node, self.prov_log_node,[("prov_upd_record", "prov_log_record")]), ] ) - - return True # fmt: on From cb31dd1505e345fb02d8cb22d7ea8819156800d4 Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Wed, 2 Mar 2022 15:33:01 +0100 Subject: [PATCH 20/24] Revert changes in clinica/utils/ after architecture change --- .../spatial_svm_pipeline.py | 2 +- .../statistics_volume_correction_pipeline.py | 2 +- clinica/utils/input_files.py | 79 +++++++++---------- clinica/utils/inputs.py | 35 +++----- 4 files changed, 51 insertions(+), 67 deletions(-) diff --git a/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py b/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py index e630bcbda..79d4c9e17 100644 --- a/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py +++ b/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py @@ -93,7 +93,7 @@ def build_input_node(self): "*_T1w_segm-graymatter_space-Ixi549Space_modulated-on_probability.nii.gz", ), "description": "graymatter tissue segmented in T1w MRI in Ixi549 space", - "output_from": "t1-volume-tissue-segmentation", + "needed_pipeline": "t1-volume-tissue-segmentation", } elif self.parameters["orig_input_data"] == "pet-volume": if not ( diff --git a/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py b/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py index 3f8dffa94..b9afd5e63 100644 --- a/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py +++ b/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py @@ -39,7 +39,7 @@ def build_input_node(self): { "pattern": self.parameters["t_map"] + "*", "description": "statistics t map", - "output_from": "statistics-volume", + "needed_pipeline": "statistics-volume", }, ) diff --git a/clinica/utils/input_files.py b/clinica/utils/input_files.py index 37f852abb..380e2f4c9 100644 --- a/clinica/utils/input_files.py +++ b/clinica/utils/input_files.py @@ -7,155 +7,150 @@ # BIDS -T1W_NII = { - "pattern": "sub-*_ses-*_t1w.nii*", - "description": "T1w MRI", - "input_to": ["t1-linear"], -} +T1W_NII = {"pattern": "sub-*_ses-*_t1w.nii*", "description": "T1w MRI"} # T1-FreeSurfer T1_FS_WM = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/wm.seg.mgz", "description": "segmentation of white matter (mri/wm.seg.mgz).", - "output_from": "t1-freesurfer", + "needed_pipeline": "t1-freesurfer", } T1_FS_BRAIN = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/brain.mgz", "description": " extracted brain from T1w MRI (mri/brain.mgz).", - "output_from": "t1-freesurfer", + "needed_pipeline": "t1-freesurfer", } T1_FS_ORIG_NU = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/orig_nu.mgz", "description": "intensity normalized volume generated after correction for" " non-uniformity in FreeSurfer (mri/orig_nu.mgz).", - "output_from": "t1-freesurfer", + "needed_pipeline": "t1-freesurfer", } T1_FS_LONG_ORIG_NU = { "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/mri/orig_nu.mgz", "description": "intensity normalized volume generated after correction for non-uniformity in FreeSurfer (orig_nu.mgz) in longitudinal", - "output_from": "t1-freesurfer and t1-freesurfer longitudinal", + "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal", } T1_FS_WM_SURF_R = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/surf/rh.white", "description": "right white matter/gray matter border surface (rh.white).", - "output_from": "t1-freesurfer", + "needed_pipeline": "t1-freesurfer", } T1_FS_LONG_SURF_R = { "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/surf/rh.white", "description": "right white matter/gray matter border surface (rh.white) generated with t1-freesurfer-longitudinal.", - "output_from": "t1-freesurfer and t1-freesurfer longitudinal", + "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal", } T1_FS_LONG_SURF_L = { "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/surf/lh.white", "description": "left white matter/gray matter border surface (lh.white) generated with t1-freesurfer-longitudinal.", - "output_from": "t1-freesurfer and t1-freesurfer longitudinal", + "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal", } T1_FS_WM_SURF_L = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/surf/lh.white", "description": "left white matter/gray matter border surface (lh.white).", - "output_from": "t1-freesurfer", + "needed_pipeline": "t1-freesurfer", } T1_FS_DESTRIEUX = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/aparc.a2009s+aseg.mgz", "description": "Destrieux-based segmentation (mri/aparc.a2009s+aseg.mgz).", - "output_from": "t1-freesurfer", + "needed_pipeline": "t1-freesurfer", } T1_FS_DESTRIEUX_PARC_L = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/lh.aparc.a2009s.annot", "description": "left hemisphere surface-based Destrieux parcellation (label/lh.aparc.a2009s.annot).", - "output_from": "t1-freesurfer", + "needed_pipeline": "t1-freesurfer", } T1_FS_LONG_DESTRIEUX_PARC_L = { "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/lh.aparc.a2009s.annot", "description": "left hemisphere surface-based Destrieux parcellation (label/lh.aparc.a2009s.annot) generated with t1-freesurfer-longitudinal.", - "output_from": "t1-freesurfer and t1-freesurfer longitudinal", + "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal", } T1_FS_LONG_DESTRIEUX_PARC_R = { "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/rh.aparc.a2009s.annot", "description": "right hemisphere surface-based Destrieux parcellation (label/rh.aparc.a2009s.annot) generated with t1-freesurfer-longitudinal.", - "output_from": "t1-freesurfer and t1-freesurfer longitudinal", + "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal", } T1_FS_DESTRIEUX_PARC_R = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/rh.aparc.a2009s.annot", "description": "right hemisphere surface-based Destrieux parcellation (label/rh.aparc.a2009s.annot).", - "output_from": "t1-freesurfer", + "needed_pipeline": "t1-freesurfer", } T1_FS_DESIKAN = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/aparc+aseg.mgz", "description": "Desikan-based segmentation (mri/aparc.a2009s+aseg.mgz).", - "output_from": "t1-freesurfer", + "needed_pipeline": "t1-freesurfer", } T1_FS_DESIKAN_PARC_L = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/lh.aparc.annot", "description": "left hemisphere surface-based Desikan parcellation (label/lh.aparc.annot).", - "output_from": "t1-freesurfer", + "needed_pipeline": "t1-freesurfer", } T1_FS_DESIKAN_PARC_R = { "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/rh.aparc.annot", "description": "right hemisphere surface-based Desikan parcellation (label/rh.aparc.annot).", - "output_from": "t1-freesurfer", + "needed_pipeline": "t1-freesurfer", } # T1-FreeSurfer-Template T1_FS_T_DESTRIEUX = { "pattern": "freesurfer_unbiased_template/sub-*_long-*/mri/aparc.a2009s+aseg.mgz", "description": "Destrieux-based segmentation (mri/aparc.a2009s+aseg.mgz) from unbiased template.", - "output_from": "t1-freesurfer-longitudinal or t1-freesurfer-template", + "needed_pipeline": "t1-freesurfer-longitudinal or t1-freesurfer-template", } # T1-FreeSurfer-Longitudinal-Correction T1_FS_LONG_DESIKAN_PARC_L = { "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/lh.aparc.annot", "description": "left hemisphere surface-based Desikan parcellation (label/lh.aparc.annot) generated with t1-freesurfer-longitudinal.", - "output_from": "t1-freesurfer and t1-freesurfer-longitudinal", + "needed_pipeline": "t1-freesurfer and t1-freesurfer-longitudinal", } T1_FS_LONG_DESIKAN_PARC_R = { "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/rh.aparc.annot", "description": "right hemisphere surface-based Desikan parcellation (label/rh.aparc.annot) generated with t1-freesurfer-longitudinal.", - "output_from": "t1-freesurfer and t1-freesurfer-longitudinal", + "needed_pipeline": "t1-freesurfer and t1-freesurfer-longitudinal", } T1W_LINEAR = { "pattern": "*space-MNI152NLin2009cSym_res-1x1x1_T1w.nii.gz", "description": "T1w image registered in MNI152NLin2009cSym space using t1-linear pipeline", - "output_from": "t1-linear", - "input_to": "pet-linear", + "needed_pipeline": "t1-linear", } T1W_LINEAR_CROPPED = { "pattern": "*space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz", "description": "T1W Image registered using t1-linear and cropped " "(matrix size 169×208×179, 1 mm isotropic voxels)", - "output_from": "t1-linear", + "needed_pipeline": "t1-linear", } T1W_EXTENSIVE = { "pattern": "*space-Ixi549Space_desc-SkullStripped_T1w.nii.gz", "description": "T1w image skull-stripped registered in Ixi549Space space using clinicaDL preprocessing pipeline", - "output_from": "t1-extensive", + "needed_pipeline": "t1-extensive", } T1W_TO_MNI_TRANSFORM = { "pattern": "*space-MNI152NLin2009cSym_res-1x1x1_affine.mat", "description": "Transformation matrix from T1W image to MNI space using t1-linear pipeline", - "output_from": "t1-linear", + "needed_pipeline": "t1-linear", } # T1-Volume @@ -175,7 +170,7 @@ def t1_volume_native_tpm(tissue_number): f"*_*_T1w_segm-{INDEX_TISSUE_MAP[tissue_number]}_probability.nii*", ), "description": f"Tissue probability map {INDEX_TISSUE_MAP[tissue_number]} in native space", - "output_from": "t1-volume-tissue-segmentation", + "needed_pipeline": "t1-volume-tissue-segmentation", } return information @@ -194,7 +189,7 @@ def t1_volume_dartel_input_tissue(tissue_number): f"*_*_T1w_segm-{INDEX_TISSUE_MAP[tissue_number]}_dartelinput.nii*", ), "description": f"Dartel input for tissue probability map {INDEX_TISSUE_MAP[tissue_number]} from T1w MRI", - "output_from": "t1-volume-tissue-segmentation", + "needed_pipeline": "t1-volume-tissue-segmentation", } return information @@ -222,7 +217,7 @@ def t1_volume_native_tpm_in_mni(tissue_number, modulation): f"Tissue probability map {INDEX_TISSUE_MAP[tissue_number]} based on " f"native MRI in MNI space (Ixi549) {description_modulation} modulation." ), - "output_from": "t1-volume-tissue-segmentation", + "needed_pipeline": "t1-volume-tissue-segmentation", } return information @@ -250,7 +245,7 @@ def t1_volume_template_tpm_in_mni(group_label, tissue_number, modulation): f"Tissue probability map {INDEX_TISSUE_MAP[tissue_number]} based " f"on {group_label} template in MNI space (Ixi549) {description_modulation} modulation." ), - "output_from": "t1-volume", + "needed_pipeline": "t1-volume", } return information @@ -267,7 +262,7 @@ def t1_volume_deformation_to_template(group_label): f"sub-*_ses-*_T1w_target-{group_label}_transformation-forward_deformation.nii*", ), "description": f"Deformation from native space to group template {group_label} space.", - "output_from": "t1-volume-create-dartel", + "needed_pipeline": "t1-volume-create-dartel", } return information @@ -282,7 +277,7 @@ def t1_volume_i_th_iteration_group_template(group_label, i): f"group-{group_label}_iteration-{i}_template.nii*", ), "description": f"Iteration #{i} of Dartel template {group_label}", - "output_from": "t1-volume or t1-volume-create-dartel", + "needed_pipeline": "t1-volume or t1-volume-create-dartel", } return information @@ -295,7 +290,7 @@ def t1_volume_final_group_template(group_label): f"group-{group_label}", "t1", f"group-{group_label}_template.nii*" ), "description": f"T1w template file of group {group_label}", - "output_from": "t1-volume or t1-volume-create-dartel", + "needed_pipeline": "t1-volume or t1-volume-create-dartel", } return information @@ -332,25 +327,25 @@ def t1_volume_final_group_template(group_label): DWI_PREPROC_NII = { "pattern": "dwi/preprocessing/sub-*_ses-*_dwi_space-*_preproc.nii*", "description": "preprocessed DWI", - "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", + "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", } DWI_PREPROC_BRAINMASK = { "pattern": "dwi/preprocessing/sub-*_ses-*_dwi_space-*_brainmask.nii*", "description": "b0 brainmask", - "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", + "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", } DWI_PREPROC_BVEC = { "pattern": "dwi/preprocessing/sub-*_ses-*_dwi_space-*_preproc.bvec", "description": "preprocessed bvec", - "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", + "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", } DWI_PREPROC_BVAL = { "pattern": "dwi/preprocessing/*_dwi_space-*_preproc.bval", "description": "preprocessed bval", - "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", + "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap", } """ PET """ @@ -416,7 +411,7 @@ def pet_volume_normalized_suvr_pet( f"{mask_description} SUVR map (using {suvr_reference_region} region) of {acq_label}-PET " f"{pvc_description} and {fwhm_description} in Ixi549Space space based on {group_label} DARTEL template" ), - "output_from": "pet-volume", + "needed_pipeline": "pet-volume", } return information @@ -438,6 +433,6 @@ def pet_linear_nii(acq_label, suvr_reference_region, uncropped_image): f"*_acq-{acq_label}_pet_space-MNI152NLin2009cSym{description}_res-1x1x1_suvr-{suvr_reference_region}_pet.nii.gz", ), "description": "", - "output_from": "pet-linear", + "needed_pipeline": "pet-linear", } return information diff --git a/clinica/utils/inputs.py b/clinica/utils/inputs.py index 0d870aba1..45241122d 100644 --- a/clinica/utils/inputs.py +++ b/clinica/utils/inputs.py @@ -186,10 +186,10 @@ def clinica_file_reader( sessions: list of sessions (must be same size as subjects, and must correspond ) input_directory: location of the bids or caps directory information: dictionary containing all the relevant information to look for the files. Dict must contains the - following keys : pattern, description. The optional key is: output_from + following keys : pattern, description. The optional key is: needed_pipeline pattern: define the pattern of the final file description: string to describe what the file is - output_from (optional): string describing the pipeline(s) needed to obtain the related + needed_pipeline (optional): string describing the pipeline(s) needed to obtain the related file raise_exception: if True (normal behavior), an exception is raised if errors happen. If not, we return the file list as it is @@ -215,7 +215,7 @@ def clinica_file_reader( caps_directory, {'pattern': 'freesurfer_cross_sectional/sub-*_ses-*/mri/orig_nu.mgz', 'description': 'freesurfer file orig_nu.mgz', - 'output_from': 't1-freesurfer'}) + 'needed_pipeline': 't1-freesurfer'}) gives: ['/caps/subjects/sub-ADNI011S4105/ses-M00/t1/freesurfer_cross_sectional/sub-ADNI011S4105_ses-M00/mri/orig_nu.mgz'] - You have a partial name of the file: @@ -236,7 +236,7 @@ def clinica_file_reader( caps, {'pattern': 'rh.white', 'description': 'right hemisphere of outter cortical surface.', - 'output_from': 't1-freesurfer'}) + 'needed_pipeline': 't1-freesurfer'}) the following error will arise: * More than 1 file found:: /caps/subjects/sub-ADNI011S4105/ses-M00/t1/freesurfer_cross_sectional/fsaverage/surf/rh.white @@ -266,9 +266,9 @@ def clinica_file_reader( elem in information.keys() for elem in ["pattern", "description"] ), "'information' must contain the keys 'pattern' and 'description'" assert all( - elem in ["pattern", "description", "output_from", "input_to"] + elem in ["pattern", "description", "needed_pipeline"] for elem in information.keys() - ), "'information' can only contain the keys 'pattern', 'description', 'output_from' and 'input_to'" + ), "'information' can only contain the keys 'pattern', 'description' and 'needed_pipeline'" pattern = information["pattern"] is_bids = determine_caps_or_bids(input_directory) @@ -330,18 +330,6 @@ def clinica_file_reader( for msg in error_encountered: error_message += msg if len(error_encountered) > 0 and raise_exception is True: - error_message = ( - f"Clinica encountered {len(error_encountered)} " - f"problem(s) while getting {information['description']}:\n" - ) - if "output_from" in information.keys(): - if information["output_from"]: - error_message += ( - "Please note that the following clinica pipeline(s) must " - f"have run to obtain these files: {information['output_from']}\n" - ) - for msg in error_encountered: - error_message += msg if is_bids: raise ClinicaBIDSError(error_message) else: @@ -409,10 +397,10 @@ def clinica_group_reader(caps_directory, information, raise_exception=True): Args: caps_directory: input caps directory information: dictionary containing all the relevant information to look for the files. Dict must contains the - following keys : pattern, description, output_from + following keys : pattern, description, needed_pipeline pattern: define the pattern of the final file description: string to describe what the file is - output_from (optional): string describing the pipeline needed to obtain the file beforehand + needed_pipeline (optional): string describing the pipeline needed to obtain the file beforehand raise_exception: if True (normal behavior), an exception is raised if errors happen. If not, we return the file list as it is @@ -430,8 +418,9 @@ def clinica_group_reader(caps_directory, information, raise_exception=True): information, dict ), "A dict must be provided for the argument 'dict'" assert all( - elem in information.keys() for elem in ["pattern", "description", "output_from"] - ), "'information' must contain the keys 'pattern', 'description', 'output_from'" + elem in information.keys() + for elem in ["pattern", "description", "needed_pipeline"] + ), "'information' must contain the keys 'pattern', 'description', 'needed_pipeline'" pattern = information["pattern"] # Some check on the formatting on the data @@ -457,7 +446,7 @@ def clinica_group_reader(caps_directory, information, raise_exception=True): error_string += ( f"\n\tCAPS directory: {caps_directory}\n" "Please note that the following clinica pipeline(s) must have run to obtain these files: " - f"{information['output_from']}\n" + f"{information['needed_pipeline']}\n" ) raise ClinicaCAPSError(error_string) return current_glob_found[0] From 468721174a48d88458ab2a05ad2fea941d274cb7 Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Wed, 2 Mar 2022 17:53:53 +0100 Subject: [PATCH 21/24] Homogenize the funcs get_input_fields() get_output_fields() --- clinica/pipelines/pet_surface/pet_surface_pipeline.py | 2 +- .../pipelines/statistics_surface/statistics_surface_pipeline.py | 2 +- .../statistics_volume_correction_pipeline.py | 2 +- .../t1_volume_parcellation/t1_volume_parcellation_pipeline.py | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/clinica/pipelines/pet_surface/pet_surface_pipeline.py b/clinica/pipelines/pet_surface/pet_surface_pipeline.py index 9fe61d2a3..da77b98b4 100644 --- a/clinica/pipelines/pet_surface/pet_surface_pipeline.py +++ b/clinica/pipelines/pet_surface/pet_surface_pipeline.py @@ -41,7 +41,7 @@ def get_input_fields(self): def get_output_fields(self): """Specify the list of possible outputs of this pipeline.""" - return [] + return [""] def build_input_node(self): """Build and connect an input node to the pipeline.""" diff --git a/clinica/pipelines/statistics_surface/statistics_surface_pipeline.py b/clinica/pipelines/statistics_surface/statistics_surface_pipeline.py index 8a79cb9da..790b12020 100644 --- a/clinica/pipelines/statistics_surface/statistics_surface_pipeline.py +++ b/clinica/pipelines/statistics_surface/statistics_surface_pipeline.py @@ -74,7 +74,7 @@ def get_input_fields(self): Returns: A list of (string) input fields name. """ - return [] + return [""] def get_output_fields(self): """Specify the list of possible outputs of this pipeline. diff --git a/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py b/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py index b9afd5e63..3ad08980e 100644 --- a/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py +++ b/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py @@ -25,7 +25,7 @@ def get_output_fields(self): Returns: A list of (string) output fields name. """ - return [] + return [""] def build_input_node(self): """Build and connect an input node to the pipeline.""" diff --git a/clinica/pipelines/t1_volume_parcellation/t1_volume_parcellation_pipeline.py b/clinica/pipelines/t1_volume_parcellation/t1_volume_parcellation_pipeline.py index 0ff7193f6..32f847b31 100644 --- a/clinica/pipelines/t1_volume_parcellation/t1_volume_parcellation_pipeline.py +++ b/clinica/pipelines/t1_volume_parcellation/t1_volume_parcellation_pipeline.py @@ -36,6 +36,7 @@ def get_output_fields(self): Returns: A list of (string) output fields name. """ + return [""] def build_input_node(self): """Build and connect an input node to the pipeline.""" From ddc82c824b92219836a7efa7ec2239a2a959e9b0 Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Thu, 3 Mar 2022 14:49:09 +0100 Subject: [PATCH 22/24] Remove unused imports --- clinica/engine/prov_model.py | 5 +---- clinica/engine/prov_utils.py | 12 ++++++++++-- clinica/engine/provenance.py | 28 +++++++++++----------------- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py index 6efeebd4c..eb178d44d 100644 --- a/clinica/engine/prov_model.py +++ b/clinica/engine/prov_model.py @@ -1,11 +1,8 @@ from abc import ABC, abstractmethod -from typing import List, Union -from xml.dom.minidom import Element +from typing import List import attr -import cattr from attr import define, field -from matplotlib.style import context # Define PROV abstract concepts diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py index 2b8b822df..47945faea 100644 --- a/clinica/engine/prov_utils.py +++ b/clinica/engine/prov_utils.py @@ -1,7 +1,15 @@ -import json from pathlib import Path from typing import List, Optional -from clinica.engine.prov_model import * + +from clinica.engine.prov_model import ( + Identifier, + Namespace, + ProvActivity, + ProvAgent, + ProvContext, + ProvEntity, + ProvRecord, +) def mint_agent() -> ProvAgent: diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index 8b8a38522..922d95306 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -1,20 +1,11 @@ import functools -from os import read -from pathlib import Path -from typing import List - -from clinica.engine.prov_model import ProvContext - def provenance(func): @functools.wraps(func) def run_wrapper(self, **kwargs): ret = func(self) - pipeline_args = self.parameters - pipeline_fullname = self.fullname - create_node_read(self) create_node_update(self) create_node_log(self) @@ -47,8 +38,8 @@ def connect_nodes(self): def create_node_read(self): - import nipype.pipeline.engine as npe import nipype.interfaces.utility as nutil + import nipype.pipeline.engine as npe self.prov_input_node = npe.Node( nutil.Function( @@ -61,8 +52,8 @@ def create_node_read(self): def create_node_update(self): - import nipype.pipeline.engine as npe import nipype.interfaces.utility as nutil + import nipype.pipeline.engine as npe self.prov_update_node = npe.Node( nutil.Function( @@ -77,8 +68,8 @@ def create_node_update(self): def create_node_log(self): - import nipype.pipeline.engine as npe import nipype.interfaces.utility as nutil + import nipype.pipeline.engine as npe self.prov_log_node = npe.Node( nutil.Function( @@ -98,10 +89,11 @@ def read_prov(input_files): return: a ProvRecord for the associated files in path_files """ - from clinica.engine.prov_utils import get_path_prov, read_prov_jsonld - from clinica.engine.prov_model import ProvRecord, ProvContext from pathlib import Path + from clinica.engine.prov_model import ProvContext, ProvRecord + from clinica.engine.prov_utils import get_path_prov, read_prov_jsonld + prov_record = ProvRecord(ProvContext([]), []) if isinstance(input_files, list): paths_files = [Path(x) for x in input_files] @@ -123,14 +115,15 @@ def update_prov(input_files, prov_in_record): return: ProvRecord associated with the launched pipeline """ + from pathlib import Path + + from clinica.engine.prov_model import ProvRecord from clinica.engine.prov_utils import ( mint_activity, mint_agent, mint_entity, validate_command, ) - from pathlib import Path - from clinica.engine.prov_model import ProvRecord elements = [] new_agent = mint_agent() @@ -158,9 +151,10 @@ def update_prov(input_files, prov_in_record): def log_prov(prov_log_record, out_file, out_dir): - from clinica.engine.prov_utils import write_prov_file from pathlib import Path + from clinica.engine.prov_utils import write_prov_file + out_file = out_file + "*" out_files_paths = [] if isinstance(out_file, list): From 54b3ffe1f42ea8d35b2da12a0c1ba908684ba8a1 Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Thu, 3 Mar 2022 14:59:05 +0100 Subject: [PATCH 23/24] Lint __init__ file in engine --- clinica/engine/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clinica/engine/__init__.py b/clinica/engine/__init__.py index c9c7623f4..66d9fb818 100644 --- a/clinica/engine/__init__.py +++ b/clinica/engine/__init__.py @@ -1,4 +1,5 @@ -from .cmdparser import CmdParser from nipype import config +from .cmdparser import CmdParser + config.enable_debug_mode() From 07204cc0b47a7c1a3f75042585b96d489d8fd918 Mon Sep 17 00:00:00 2001 From: "omar.elrifai" Date: Mon, 14 Mar 2022 16:37:14 +0100 Subject: [PATCH 24/24] Update fields returned in pet-linear for prov compatibility --- clinica/engine/provenance.py | 5 +++-- clinica/pipelines/pet_linear/pet_linear_pipeline.py | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py index 922d95306..7559dae72 100644 --- a/clinica/engine/provenance.py +++ b/clinica/engine/provenance.py @@ -155,13 +155,14 @@ def log_prov(prov_log_record, out_file, out_dir): from clinica.engine.prov_utils import write_prov_file - out_file = out_file + "*" out_files_paths = [] + if isinstance(out_file, list): for x in out_file: out_files_paths.extend(list(Path(out_dir).rglob(x))) elif isinstance(out_file, str): - out_files_paths = list(Path(out_dir).rglob(out_file)) + + out_files_paths = list(Path(out_dir).rglob(Path(out_file).name)) for path_file in out_files_paths: write_prov_file(prov_log_record, path_file) diff --git a/clinica/pipelines/pet_linear/pet_linear_pipeline.py b/clinica/pipelines/pet_linear/pet_linear_pipeline.py index dd828dda2..448f08d32 100644 --- a/clinica/pipelines/pet_linear/pet_linear_pipeline.py +++ b/clinica/pipelines/pet_linear/pet_linear_pipeline.py @@ -43,9 +43,9 @@ def get_output_fields(self): A list of (string) output fields name. """ return [ - "registered_pet", - "transform_mat", - "registered_pet_in_t1w", + "suvr_pet", + "affine_mat", + "PETinT1w", ] # Fill here the list def build_input_node(self):