From 5dc0d025a5865e2c93e756f8895b847ab4136a8c Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Thu, 23 Sep 2021 09:51:48 +0200
Subject: [PATCH 01/24] Init traceability feature

---
 clinica/engine/provenance.py                  | 215 ++++++++++++++++++
 clinica/engine/provenance_utils.py            | 116 ++++++++++
 clinica/pipelines/engine.py                   |   2 +
 .../spatial_svm_pipeline.py                   |   2 +-
 .../statistics_volume_correction_pipeline.py  |   2 +-
 clinica/utils/input_files.py                  |  80 ++++---
 clinica/utils/inputs.py                       |  35 ++-
 7 files changed, 402 insertions(+), 50 deletions(-)
 create mode 100644 clinica/engine/provenance.py
 create mode 100644 clinica/engine/provenance_utils.py

diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
new file mode 100644
index 000000000..0db9d17ff
--- /dev/null
+++ b/clinica/engine/provenance.py
@@ -0,0 +1,215 @@
+import json
+import functools
+from os import read
+
+from pathlib import Path
+from typing import Optional
+
+
+def provenance(func):
+    from .provenance_utils import get_files_list
+
+    @functools.wraps(func)
+    def run_wrapper(self, **kwargs):
+        ret = []
+        pipeline_fullname = self.fullname
+        in_files_paths = get_files_list(self, pipeline_fullname, dict_field="input_to")
+
+        prov_context = get_context(files_paths=in_files_paths)
+        prov_command = get_command(self, in_files_paths)
+
+        if validate_command(prov_context, prov_command):
+            ret = func(self)
+        else:
+            raise Exception(
+                "The pipeline selected is incompatible with the input files provenance"
+            )
+        out_files_paths = get_files_list(
+            self, pipeline_fullname, dict_field="output_from"
+        )
+        register_prov(prov_command, out_files_paths)
+
+        return ret
+
+    return run_wrapper
+
+
+def register_prov(prov_command: dict, out_files: list) -> bool:
+
+    # TODO: iterate over out_files and create a provenance file for each
+    for file in out_files:
+        write_prov_file(prov_command, file)
+    print("Provenance registered succesfully")
+    return True
+
+
+def get_context(files_paths: str) -> dict:
+    """
+    Return a dictionary with the provenance info related to the files in the files_paths
+    """
+    from clinica.engine.provenance_utils import read_prov, get_associated_prov
+
+    prov_data = {"Entity": [], "Agent": [], "Activity": []}
+    for path in files_paths:
+        prov_record = read_prov(get_associated_prov(path))
+        if prov_record:
+            prov_data = append_prov_dict(prov_data, prov_record)
+
+    return prov_data
+
+
+def get_command(self, input_files_paths: list) -> dict:
+    """
+    Read the user command and save information in a dict
+    """
+    import sys
+
+    new_entities = []
+    new_agent = get_agent()
+    for path in input_files_paths:
+        new_entities.append(get_entity(path))
+    new_activity = get_activity(self, new_agent["@id"], new_entities)
+
+    return {
+        "Agent": [new_agent],
+        "Activity": [new_activity],
+        "Entity": new_entities,
+    }
+
+
+def write_prov_file(prov_command, files_paths):
+    """
+    Write the dictionary data to the file_path
+    """
+    from clinica.engine.provenance_utils import read_prov, get_associated_prov
+
+    for file_path in files_paths:
+        prov_path = get_associated_prov(file_path)
+
+        if prov_path.exists():
+            # append the pipeline provenance information to the old provenance file
+            prov_main = read_prov(prov_path)
+            prov_main = append_prov_dict(prov_main, prov_command)
+        else:
+            print("help")
+            # create new provenance file with pipeline information
+    return ""
+
+
+def append_prov_dict(prov_main: dict, prov_new: dict) -> dict:
+    """
+    Append a specific prov data to the global prov dict
+    """
+
+    for k in prov_new.keys():
+        for el in prov_new[k]:
+            if prov_main[k] and el not in prov_main[k]:
+                prov_main[k].append(el)
+    return prov_main
+
+
+def get_agent() -> dict:
+    import clinica
+    from .provenance_utils import get_agent_id
+
+    agent_version = clinica.__version__
+    agent_label = clinica.__name__
+    agent_id = get_agent_id(agent_label + agent_version)
+
+    new_agent = {"@id": agent_id, "label": agent_label, "version": agent_version}
+
+    return new_agent
+
+
+def get_activity(self, agent_id: str, entities: list) -> dict:
+    """
+    Add the current command to the list of activities
+    """
+    import sys
+    from .provenance_utils import get_activity_id
+
+    activity_parameters = self.parameters
+    activity_label = self.fullname
+    activity_id = get_activity_id(self.fullname)
+    activity_command = (sys.argv[1:],)
+    activity_agent = agent_id
+    activity_used_files = [e["@id"] for e in entities]
+
+    new_activity = {
+        "@id": activity_id,
+        "label": activity_label,
+        "command": activity_command,
+        "parameters": activity_parameters,
+        "wasAssociatedWith": activity_agent,
+        "used": activity_used_files,
+    }
+
+    return new_activity
+
+
+def get_entity(img_path: str) -> dict:
+    """
+    Add the current file to the list of entities
+    """
+    from clinica.engine.provenance_utils import get_entity_id
+    from clinica.engine.provenance_utils import get_last_activity
+    from pathlib import Path
+
+    entity_id = get_entity_id(img_path)
+    entity_label = Path(img_path).name
+    entity_path = img_path
+    entity_source = get_last_activity(img_path)
+
+    new_entity = {
+        "@id": entity_id,
+        "label": entity_label,
+        "atLocation": entity_path,
+        "wasGeneratedBy": entity_source,
+    }
+
+    return new_entity
+
+
+def create_prov_file(command, path):
+    """
+    Create new provenance file based on command
+    """
+    # TODO: create a json-ld object next to the file and add it to the active prov object
+    return
+
+
+def validate_command(prov_context: dict, prov_command: dict) -> bool:
+    """
+    Check the command is valid on the data being run
+    """
+    flag = True
+    new_activity_id = prov_command["Activity"][0]["@id"]
+    new_agent_id = prov_command["Agent"][0]["@id"]
+
+    for entity in prov_context["Entity"]:
+        old_activity_id = entity["wasGeneratedBy"]
+        if old_activity_id:
+            ptr_activity = next(
+                item
+                for item in prov_context["Activity"]
+                if item["@id"] == old_activity_id
+            )
+            old_agent_id = ptr_activity["wasAssociatedWith"]
+            flag and is_valid(
+                {(old_agent_id, old_activity_id): (new_agent_id, new_activity_id)}
+            )
+    return flag
+
+
+def is_valid(command: dict) -> bool:
+    valid_list = [
+        {
+            ("clin:clinica0.5.0", "clin:adni2Bids"): (
+                "clin:clinica0.5.0",
+                "clin:t1-linear",
+            )
+        }
+    ]
+    if command in valid_list:
+        return True
+    return False
diff --git a/clinica/engine/provenance_utils.py b/clinica/engine/provenance_utils.py
new file mode 100644
index 000000000..998e68496
--- /dev/null
+++ b/clinica/engine/provenance_utils.py
@@ -0,0 +1,116 @@
+from typing import Union, Optional
+from pathlib import Path
+
+
+def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list:
+    """
+    Calls clinica_file_reader with the appropriate extentions
+    """
+    from clinica.utils.inputs import clinica_file_reader
+    import clinica.utils.input_files as cif
+
+    dict_field_options = ["input_to", "output_from"]
+    if dict_field not in dict_field_options:
+        raise (f"dict_field must be one of {dict_field_options}")
+
+    # retrieve all the data dictionaries from the input_files module
+    files_dicts = {
+        k: v
+        for k, v in vars(cif).items()
+        if isinstance(v, dict)
+        and dict_field in v.keys()
+        and pipeline_fullname in v[dict_field]
+    }
+    # TODO: check if bids or caps as output
+    ret_files = []
+    for elem in files_dicts:
+        ref_dir = (
+            self.bids_directory if dict_field == "input_to" else self.caps_directory
+        )
+        current_file = clinica_file_reader(
+            self.subjects,
+            self.sessions,
+            ref_dir,
+            files_dicts[elem],
+            raise_exception=False,
+        )
+        if current_file:
+            ret_files.extend(current_file)
+
+    return ret_files
+
+
+def is_entity_tracked(prov_context: dict, entity_id: str) -> bool:
+    flag_exists = next(
+        (True for item in prov_context["Entity"] if item["@id"] == entity_id),
+        False,
+    )
+    return flag_exists
+
+
+def is_agent_tracked(prov_context: dict, agent_id: str) -> bool:
+    flag_exists = next(
+        (True for item in prov_context["Agent"] if item["@id"] == agent_id),
+        False,
+    )
+    return flag_exists
+
+
+def is_activity_tracked(prov_context: dict, activity_id: str) -> bool:
+    flag_exists = next(
+        (True for item in prov_context["Activity"] if item["@id"] == activity_id),
+        False,
+    )
+    return flag_exists
+
+
+def get_entity_id(file_path: str) -> str:
+    from pathlib import Path
+
+    entity_id = Path(file_path).with_suffix("").name
+    return entity_id
+
+
+def get_activity_id(pipeline_name: str) -> str:
+    return "clin:" + pipeline_name
+
+
+def get_agent_id(agent_name: str) -> str:
+    return "clin:" + agent_name
+
+
+def get_last_activity(file_path: str) -> Optional[list]:
+
+    """
+    Return the last activity executed on the file
+    """
+
+    prov_record = read_prov(get_associated_prov(file_path))
+    if prov_record and prov_record["Activity"]:
+        last_activity = prov_record["Activity"][-1]["@id"]
+        return last_activity
+    return None
+
+
+def get_associated_prov(file_path: str) -> Path:
+
+    file_path = Path(file_path)
+    while file_path.suffix != "":
+        file_path = file_path.with_suffix("")
+
+    associated_jsonld = file_path.with_suffix(".jsonld")
+    return associated_jsonld
+
+
+def read_prov(prov_path: Path) -> Optional[dict]:
+    """
+    Check if the given file is a valid provenance json-ld
+    """
+    import json
+
+    # TODO: check that the provenance file associations and uses exists
+    if prov_path.exists():
+        with open(prov_path, "r") as fp:
+            json_ld_data = json.load(fp)
+            return json_ld_data
+    return None
diff --git a/clinica/pipelines/engine.py b/clinica/pipelines/engine.py
index ec72d6f02..68e3562f7 100644
--- a/clinica/pipelines/engine.py
+++ b/clinica/pipelines/engine.py
@@ -7,6 +7,7 @@
 
 import click
 from nipype.pipeline.engine import Workflow
+import clinica.engine.provenance as prov
 
 
 def postset(attribute, value):
@@ -234,6 +235,7 @@ def build(self):
                 self.build_output_node()
         return self
 
+    @prov.provenance
     def run(self, plugin=None, plugin_args=None, update_hash=False, bypass_check=False):
         """Executes the Pipeline.
 
diff --git a/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py b/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py
index 79d4c9e17..e630bcbda 100644
--- a/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py
+++ b/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py
@@ -93,7 +93,7 @@ def build_input_node(self):
                     "*_T1w_segm-graymatter_space-Ixi549Space_modulated-on_probability.nii.gz",
                 ),
                 "description": "graymatter tissue segmented in T1w MRI in Ixi549 space",
-                "needed_pipeline": "t1-volume-tissue-segmentation",
+                "output_from": "t1-volume-tissue-segmentation",
             }
         elif self.parameters["orig_input_data"] == "pet-volume":
             if not (
diff --git a/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py b/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py
index b9afd5e63..3f8dffa94 100644
--- a/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py
+++ b/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py
@@ -39,7 +39,7 @@ def build_input_node(self):
             {
                 "pattern": self.parameters["t_map"] + "*",
                 "description": "statistics t map",
-                "needed_pipeline": "statistics-volume",
+                "output_from": "statistics-volume",
             },
         )
 
diff --git a/clinica/utils/input_files.py b/clinica/utils/input_files.py
index 380e2f4c9..93ce26edb 100644
--- a/clinica/utils/input_files.py
+++ b/clinica/utils/input_files.py
@@ -7,150 +7,154 @@
 
 # BIDS
 
-T1W_NII = {"pattern": "sub-*_ses-*_t1w.nii*", "description": "T1w MRI"}
+T1W_NII = {
+    "pattern": "sub-*_ses-*_t1w.nii*",
+    "description": "T1w MRI",
+    "input_to": ["t1-linear"],
+}
 
 # T1-FreeSurfer
 
 T1_FS_WM = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/wm.seg.mgz",
     "description": "segmentation of white matter (mri/wm.seg.mgz).",
-    "needed_pipeline": "t1-freesurfer",
+    "output_from": "t1-freesurfer",
 }
 
 T1_FS_BRAIN = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/brain.mgz",
     "description": " extracted brain from T1w MRI (mri/brain.mgz).",
-    "needed_pipeline": "t1-freesurfer",
+    "output_from": "t1-freesurfer",
 }
 
 T1_FS_ORIG_NU = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/orig_nu.mgz",
     "description": "intensity normalized volume generated after correction for"
     " non-uniformity in FreeSurfer (mri/orig_nu.mgz).",
-    "needed_pipeline": "t1-freesurfer",
+    "output_from": "t1-freesurfer",
 }
 
 T1_FS_LONG_ORIG_NU = {
     "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/mri/orig_nu.mgz",
     "description": "intensity normalized volume generated after correction for non-uniformity in FreeSurfer (orig_nu.mgz) in longitudinal",
-    "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal",
+    "output_from": "t1-freesurfer and t1-freesurfer longitudinal",
 }
 
 T1_FS_WM_SURF_R = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/surf/rh.white",
     "description": "right white matter/gray matter border surface (rh.white).",
-    "needed_pipeline": "t1-freesurfer",
+    "output_from": "t1-freesurfer",
 }
 
 T1_FS_LONG_SURF_R = {
     "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/surf/rh.white",
     "description": "right white matter/gray matter border surface (rh.white) generated with t1-freesurfer-longitudinal.",
-    "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal",
+    "output_from": "t1-freesurfer and t1-freesurfer longitudinal",
 }
 
 T1_FS_LONG_SURF_L = {
     "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/surf/lh.white",
     "description": "left white matter/gray matter border surface (lh.white) generated with t1-freesurfer-longitudinal.",
-    "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal",
+    "output_from": "t1-freesurfer and t1-freesurfer longitudinal",
 }
 
 T1_FS_WM_SURF_L = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/surf/lh.white",
     "description": "left white matter/gray matter border surface (lh.white).",
-    "needed_pipeline": "t1-freesurfer",
+    "output_from": "t1-freesurfer",
 }
 
 T1_FS_DESTRIEUX = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/aparc.a2009s+aseg.mgz",
     "description": "Destrieux-based segmentation (mri/aparc.a2009s+aseg.mgz).",
-    "needed_pipeline": "t1-freesurfer",
+    "output_from": "t1-freesurfer",
 }
 
 T1_FS_DESTRIEUX_PARC_L = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/lh.aparc.a2009s.annot",
     "description": "left hemisphere surface-based Destrieux parcellation (label/lh.aparc.a2009s.annot).",
-    "needed_pipeline": "t1-freesurfer",
+    "output_from": "t1-freesurfer",
 }
 
 T1_FS_LONG_DESTRIEUX_PARC_L = {
     "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/lh.aparc.a2009s.annot",
     "description": "left hemisphere surface-based Destrieux parcellation (label/lh.aparc.a2009s.annot) generated with t1-freesurfer-longitudinal.",
-    "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal",
+    "output_from": "t1-freesurfer and t1-freesurfer longitudinal",
 }
 
 T1_FS_LONG_DESTRIEUX_PARC_R = {
     "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/rh.aparc.a2009s.annot",
     "description": "right hemisphere surface-based Destrieux parcellation (label/rh.aparc.a2009s.annot) generated with t1-freesurfer-longitudinal.",
-    "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal",
+    "output_from": "t1-freesurfer and t1-freesurfer longitudinal",
 }
 
 T1_FS_DESTRIEUX_PARC_R = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/rh.aparc.a2009s.annot",
     "description": "right hemisphere surface-based Destrieux parcellation (label/rh.aparc.a2009s.annot).",
-    "needed_pipeline": "t1-freesurfer",
+    "output_from": "t1-freesurfer",
 }
 
 T1_FS_DESIKAN = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/aparc+aseg.mgz",
     "description": "Desikan-based segmentation (mri/aparc.a2009s+aseg.mgz).",
-    "needed_pipeline": "t1-freesurfer",
+    "output_from": "t1-freesurfer",
 }
 
 T1_FS_DESIKAN_PARC_L = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/lh.aparc.annot",
     "description": "left hemisphere surface-based Desikan parcellation (label/lh.aparc.annot).",
-    "needed_pipeline": "t1-freesurfer",
+    "output_from": "t1-freesurfer",
 }
 
 T1_FS_DESIKAN_PARC_R = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/rh.aparc.annot",
     "description": "right hemisphere surface-based Desikan parcellation (label/rh.aparc.annot).",
-    "needed_pipeline": "t1-freesurfer",
+    "output_from": "t1-freesurfer",
 }
 
 # T1-FreeSurfer-Template
 T1_FS_T_DESTRIEUX = {
     "pattern": "freesurfer_unbiased_template/sub-*_long-*/mri/aparc.a2009s+aseg.mgz",
     "description": "Destrieux-based segmentation (mri/aparc.a2009s+aseg.mgz) from unbiased template.",
-    "needed_pipeline": "t1-freesurfer-longitudinal or t1-freesurfer-template",
+    "output_from": "t1-freesurfer-longitudinal or t1-freesurfer-template",
 }
 
 # T1-FreeSurfer-Longitudinal-Correction
 T1_FS_LONG_DESIKAN_PARC_L = {
     "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/lh.aparc.annot",
     "description": "left hemisphere surface-based Desikan parcellation (label/lh.aparc.annot) generated with t1-freesurfer-longitudinal.",
-    "needed_pipeline": "t1-freesurfer and t1-freesurfer-longitudinal",
+    "output_from": "t1-freesurfer and t1-freesurfer-longitudinal",
 }
 
 T1_FS_LONG_DESIKAN_PARC_R = {
     "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/rh.aparc.annot",
     "description": "right hemisphere surface-based Desikan parcellation (label/rh.aparc.annot) generated with t1-freesurfer-longitudinal.",
-    "needed_pipeline": "t1-freesurfer and t1-freesurfer-longitudinal",
+    "output_from": "t1-freesurfer and t1-freesurfer-longitudinal",
 }
 
 T1W_LINEAR = {
     "pattern": "*space-MNI152NLin2009cSym_res-1x1x1_T1w.nii.gz",
     "description": "T1w image registered in MNI152NLin2009cSym space using t1-linear pipeline",
-    "needed_pipeline": "t1-linear",
+    "output_from": "t1-linear",
 }
 
 T1W_LINEAR_CROPPED = {
     "pattern": "*space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz",
     "description": "T1W Image registered using t1-linear and cropped "
     "(matrix size 169×208×179, 1 mm isotropic voxels)",
-    "needed_pipeline": "t1-linear",
+    "output_from": "t1-linear",
 }
 
 T1W_EXTENSIVE = {
     "pattern": "*space-Ixi549Space_desc-SkullStripped_T1w.nii.gz",
     "description": "T1w image skull-stripped registered in Ixi549Space space using clinicaDL preprocessing pipeline",
-    "needed_pipeline": "t1-extensive",
+    "output_from": "t1-extensive",
 }
 
 T1W_TO_MNI_TRANSFORM = {
     "pattern": "*space-MNI152NLin2009cSym_res-1x1x1_affine.mat",
     "description": "Transformation matrix from T1W image to MNI space using t1-linear pipeline",
-    "needed_pipeline": "t1-linear",
+    "output_from": "t1-linear",
 }
 
 # T1-Volume
@@ -170,7 +174,7 @@ def t1_volume_native_tpm(tissue_number):
             f"*_*_T1w_segm-{INDEX_TISSUE_MAP[tissue_number]}_probability.nii*",
         ),
         "description": f"Tissue probability map {INDEX_TISSUE_MAP[tissue_number]} in native space",
-        "needed_pipeline": "t1-volume-tissue-segmentation",
+        "output_from": "t1-volume-tissue-segmentation",
     }
     return information
 
@@ -189,7 +193,7 @@ def t1_volume_dartel_input_tissue(tissue_number):
             f"*_*_T1w_segm-{INDEX_TISSUE_MAP[tissue_number]}_dartelinput.nii*",
         ),
         "description": f"Dartel input for tissue probability map {INDEX_TISSUE_MAP[tissue_number]} from T1w MRI",
-        "needed_pipeline": "t1-volume-tissue-segmentation",
+        "output_from": "t1-volume-tissue-segmentation",
     }
     return information
 
@@ -217,7 +221,7 @@ def t1_volume_native_tpm_in_mni(tissue_number, modulation):
             f"Tissue probability map {INDEX_TISSUE_MAP[tissue_number]} based on "
             f"native MRI in MNI space (Ixi549) {description_modulation} modulation."
         ),
-        "needed_pipeline": "t1-volume-tissue-segmentation",
+        "output_from": "t1-volume-tissue-segmentation",
     }
     return information
 
@@ -245,7 +249,7 @@ def t1_volume_template_tpm_in_mni(group_label, tissue_number, modulation):
             f"Tissue probability map {INDEX_TISSUE_MAP[tissue_number]} based "
             f"on {group_label} template in MNI space (Ixi549) {description_modulation} modulation."
         ),
-        "needed_pipeline": "t1-volume",
+        "output_from": "t1-volume",
     }
     return information
 
@@ -262,7 +266,7 @@ def t1_volume_deformation_to_template(group_label):
             f"sub-*_ses-*_T1w_target-{group_label}_transformation-forward_deformation.nii*",
         ),
         "description": f"Deformation from native space to group template {group_label} space.",
-        "needed_pipeline": "t1-volume-create-dartel",
+        "output_from": "t1-volume-create-dartel",
     }
     return information
 
@@ -277,7 +281,7 @@ def t1_volume_i_th_iteration_group_template(group_label, i):
             f"group-{group_label}_iteration-{i}_template.nii*",
         ),
         "description": f"Iteration #{i} of Dartel template {group_label}",
-        "needed_pipeline": "t1-volume or t1-volume-create-dartel",
+        "output_from": "t1-volume or t1-volume-create-dartel",
     }
     return information
 
@@ -290,7 +294,7 @@ def t1_volume_final_group_template(group_label):
             f"group-{group_label}", "t1", f"group-{group_label}_template.nii*"
         ),
         "description": f"T1w template file of group {group_label}",
-        "needed_pipeline": "t1-volume or t1-volume-create-dartel",
+        "output_from": "t1-volume or t1-volume-create-dartel",
     }
     return information
 
@@ -327,25 +331,25 @@ def t1_volume_final_group_template(group_label):
 DWI_PREPROC_NII = {
     "pattern": "dwi/preprocessing/sub-*_ses-*_dwi_space-*_preproc.nii*",
     "description": "preprocessed DWI",
-    "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
+    "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
 }
 
 DWI_PREPROC_BRAINMASK = {
     "pattern": "dwi/preprocessing/sub-*_ses-*_dwi_space-*_brainmask.nii*",
     "description": "b0 brainmask",
-    "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
+    "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
 }
 
 DWI_PREPROC_BVEC = {
     "pattern": "dwi/preprocessing/sub-*_ses-*_dwi_space-*_preproc.bvec",
     "description": "preprocessed bvec",
-    "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
+    "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
 }
 
 DWI_PREPROC_BVAL = {
     "pattern": "dwi/preprocessing/*_dwi_space-*_preproc.bval",
     "description": "preprocessed bval",
-    "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
+    "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
 }
 
 """ PET """
@@ -411,7 +415,7 @@ def pet_volume_normalized_suvr_pet(
             f"{mask_description} SUVR map (using {suvr_reference_region} region) of {acq_label}-PET "
             f"{pvc_description} and {fwhm_description} in Ixi549Space space based on {group_label} DARTEL template"
         ),
-        "needed_pipeline": "pet-volume",
+        "output_from": "pet-volume",
     }
     return information
 
@@ -433,6 +437,10 @@ def pet_linear_nii(acq_label, suvr_reference_region, uncropped_image):
             f"*_acq-{acq_label}_pet_space-MNI152NLin2009cSym{description}_res-1x1x1_suvr-{suvr_reference_region}_pet.nii.gz",
         ),
         "description": "",
+<<<<<<< HEAD
         "needed_pipeline": "pet-linear",
+=======
+        "output_from": "pet-linear",
+>>>>>>> de9d4d8b (Init traceability feature)
     }
     return information
diff --git a/clinica/utils/inputs.py b/clinica/utils/inputs.py
index 45241122d..0d870aba1 100644
--- a/clinica/utils/inputs.py
+++ b/clinica/utils/inputs.py
@@ -186,10 +186,10 @@ def clinica_file_reader(
         sessions: list of sessions (must be same size as subjects, and must correspond )
         input_directory: location of the bids or caps directory
         information: dictionary containing all the relevant information to look for the files. Dict must contains the
-                     following keys : pattern, description. The optional key is: needed_pipeline
+                     following keys : pattern, description. The optional key is: output_from
                              pattern: define the pattern of the final file
                              description: string to describe what the file is
-                             needed_pipeline (optional): string describing the pipeline(s) needed to obtain the related
+                             output_from (optional): string describing the pipeline(s) needed to obtain the related
                                                         file
         raise_exception: if True (normal behavior), an exception is raised if errors happen. If not, we return the file
                         list as it is
@@ -215,7 +215,7 @@ def clinica_file_reader(
                                         caps_directory,
                                         {'pattern': 'freesurfer_cross_sectional/sub-*_ses-*/mri/orig_nu.mgz',
                                          'description': 'freesurfer file orig_nu.mgz',
-                                         'needed_pipeline': 't1-freesurfer'})
+                                         'output_from': 't1-freesurfer'})
                     gives: ['/caps/subjects/sub-ADNI011S4105/ses-M00/t1/freesurfer_cross_sectional/sub-ADNI011S4105_ses-M00/mri/orig_nu.mgz']
 
             - You have a partial name of the file:
@@ -236,7 +236,7 @@ def clinica_file_reader(
                                         caps,
                                         {'pattern': 'rh.white',
                                          'description': 'right hemisphere of outter cortical surface.',
-                                         'needed_pipeline': 't1-freesurfer'})
+                                         'output_from': 't1-freesurfer'})
                         the following error will arise:
                         * More than 1 file found::
                             /caps/subjects/sub-ADNI011S4105/ses-M00/t1/freesurfer_cross_sectional/fsaverage/surf/rh.white
@@ -266,9 +266,9 @@ def clinica_file_reader(
         elem in information.keys() for elem in ["pattern", "description"]
     ), "'information' must contain the keys 'pattern' and 'description'"
     assert all(
-        elem in ["pattern", "description", "needed_pipeline"]
+        elem in ["pattern", "description", "output_from", "input_to"]
         for elem in information.keys()
-    ), "'information' can only contain the keys 'pattern', 'description' and 'needed_pipeline'"
+    ), "'information' can only contain the keys 'pattern', 'description', 'output_from' and 'input_to'"
 
     pattern = information["pattern"]
     is_bids = determine_caps_or_bids(input_directory)
@@ -330,6 +330,18 @@ def clinica_file_reader(
     for msg in error_encountered:
         error_message += msg
     if len(error_encountered) > 0 and raise_exception is True:
+        error_message = (
+            f"Clinica encountered {len(error_encountered)} "
+            f"problem(s) while getting {information['description']}:\n"
+        )
+        if "output_from" in information.keys():
+            if information["output_from"]:
+                error_message += (
+                    "Please note that the following clinica pipeline(s) must "
+                    f"have run to obtain these files: {information['output_from']}\n"
+                )
+        for msg in error_encountered:
+            error_message += msg
         if is_bids:
             raise ClinicaBIDSError(error_message)
         else:
@@ -397,10 +409,10 @@ def clinica_group_reader(caps_directory, information, raise_exception=True):
     Args:
         caps_directory: input caps directory
         information: dictionary containing all the relevant information to look for the files. Dict must contains the
-                     following keys : pattern, description, needed_pipeline
+                     following keys : pattern, description, output_from
                              pattern: define the pattern of the final file
                              description: string to describe what the file is
-                             needed_pipeline (optional): string describing the pipeline needed to obtain the file beforehand
+                             output_from (optional): string describing the pipeline needed to obtain the file beforehand
         raise_exception: if True (normal behavior), an exception is raised if errors happen. If not, we return the file
                         list as it is
 
@@ -418,9 +430,8 @@ def clinica_group_reader(caps_directory, information, raise_exception=True):
         information, dict
     ), "A dict must be provided for the argument 'dict'"
     assert all(
-        elem in information.keys()
-        for elem in ["pattern", "description", "needed_pipeline"]
-    ), "'information' must contain the keys 'pattern', 'description', 'needed_pipeline'"
+        elem in information.keys() for elem in ["pattern", "description", "output_from"]
+    ), "'information' must contain the keys 'pattern', 'description', 'output_from'"
 
     pattern = information["pattern"]
     # Some check on the formatting on the data
@@ -446,7 +457,7 @@ def clinica_group_reader(caps_directory, information, raise_exception=True):
             error_string += (
                 f"\n\tCAPS directory: {caps_directory}\n"
                 "Please note that the following clinica pipeline(s) must have run to obtain these files: "
-                f"{information['needed_pipeline']}\n"
+                f"{information['output_from']}\n"
             )
         raise ClinicaCAPSError(error_string)
     return current_glob_found[0]

From b1bc38e4ed0e222f224e64d33eaaab316b4ad052 Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Thu, 23 Sep 2021 13:04:04 +0200
Subject: [PATCH 02/24] Add function to create new provenance files

---
 clinica/engine/provenance.py | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index 0db9d17ff..f310cede6 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -19,7 +19,8 @@ def run_wrapper(self, **kwargs):
         prov_command = get_command(self, in_files_paths)
 
         if validate_command(prov_context, prov_command):
-            ret = func(self)
+            # ret = func(self)
+            print("The pipeline succesfully executed.")
         else:
             raise Exception(
                 "The pipeline selected is incompatible with the input files provenance"
@@ -77,22 +78,21 @@ def get_command(self, input_files_paths: list) -> dict:
     }
 
 
-def write_prov_file(prov_command, files_paths):
+def write_prov_file(prov_command, file_path):
     """
     Write the dictionary data to the file_path
     """
     from clinica.engine.provenance_utils import read_prov, get_associated_prov
 
-    for file_path in files_paths:
-        prov_path = get_associated_prov(file_path)
+    prov_path = get_associated_prov(file_path)
 
-        if prov_path.exists():
-            # append the pipeline provenance information to the old provenance file
-            prov_main = read_prov(prov_path)
-            prov_main = append_prov_dict(prov_main, prov_command)
-        else:
-            print("help")
-            # create new provenance file with pipeline information
+    if prov_path.exists():
+        # append the pipeline provenance information to the old provenance file
+        prov_main = read_prov(prov_path)
+        prov_main = append_prov_dict(prov_main, prov_command)
+    else:
+        create_prov_file(prov_command, prov_path)
+        # create new provenance file with pipeline information
     return ""
 
 
@@ -103,7 +103,7 @@ def append_prov_dict(prov_main: dict, prov_new: dict) -> dict:
 
     for k in prov_new.keys():
         for el in prov_new[k]:
-            if prov_main[k] and el not in prov_main[k]:
+            if k in prov_main.keys() and el not in prov_main[k]:
                 prov_main[k].append(el)
     return prov_main
 
@@ -170,11 +170,15 @@ def get_entity(img_path: str) -> dict:
     return new_entity
 
 
-def create_prov_file(command, path):
+def create_prov_file(prov_command, prov_path):
     """
     Create new provenance file based on command
     """
-    # TODO: create a json-ld object next to the file and add it to the active prov object
+    import json
+
+    with open(prov_path, "w") as fp:
+        json.dump(prov_command, fp, indent=4)
+
     return
 
 

From 9c0ee0d49f45cbfcf35ebb4c205775bc0a5e7ba0 Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Mon, 3 Jan 2022 10:53:40 +0100
Subject: [PATCH 03/24] Update clinica_file_reader call

---
 clinica/engine/provenance_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clinica/engine/provenance_utils.py b/clinica/engine/provenance_utils.py
index 998e68496..6c209c629 100644
--- a/clinica/engine/provenance_utils.py
+++ b/clinica/engine/provenance_utils.py
@@ -27,7 +27,7 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list:
         ref_dir = (
             self.bids_directory if dict_field == "input_to" else self.caps_directory
         )
-        current_file = clinica_file_reader(
+        current_file, _ = clinica_file_reader(
             self.subjects,
             self.sessions,
             ref_dir,

From aa0cb6fe46209f93443d58d83d588de694fcaa6b Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Tue, 4 Jan 2022 17:55:39 +0100
Subject: [PATCH 04/24] Add data model

---
 clinica/engine/provenance_utils.py | 116 -----------------------------
 1 file changed, 116 deletions(-)
 delete mode 100644 clinica/engine/provenance_utils.py

diff --git a/clinica/engine/provenance_utils.py b/clinica/engine/provenance_utils.py
deleted file mode 100644
index 6c209c629..000000000
--- a/clinica/engine/provenance_utils.py
+++ /dev/null
@@ -1,116 +0,0 @@
-from typing import Union, Optional
-from pathlib import Path
-
-
-def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list:
-    """
-    Calls clinica_file_reader with the appropriate extentions
-    """
-    from clinica.utils.inputs import clinica_file_reader
-    import clinica.utils.input_files as cif
-
-    dict_field_options = ["input_to", "output_from"]
-    if dict_field not in dict_field_options:
-        raise (f"dict_field must be one of {dict_field_options}")
-
-    # retrieve all the data dictionaries from the input_files module
-    files_dicts = {
-        k: v
-        for k, v in vars(cif).items()
-        if isinstance(v, dict)
-        and dict_field in v.keys()
-        and pipeline_fullname in v[dict_field]
-    }
-    # TODO: check if bids or caps as output
-    ret_files = []
-    for elem in files_dicts:
-        ref_dir = (
-            self.bids_directory if dict_field == "input_to" else self.caps_directory
-        )
-        current_file, _ = clinica_file_reader(
-            self.subjects,
-            self.sessions,
-            ref_dir,
-            files_dicts[elem],
-            raise_exception=False,
-        )
-        if current_file:
-            ret_files.extend(current_file)
-
-    return ret_files
-
-
-def is_entity_tracked(prov_context: dict, entity_id: str) -> bool:
-    flag_exists = next(
-        (True for item in prov_context["Entity"] if item["@id"] == entity_id),
-        False,
-    )
-    return flag_exists
-
-
-def is_agent_tracked(prov_context: dict, agent_id: str) -> bool:
-    flag_exists = next(
-        (True for item in prov_context["Agent"] if item["@id"] == agent_id),
-        False,
-    )
-    return flag_exists
-
-
-def is_activity_tracked(prov_context: dict, activity_id: str) -> bool:
-    flag_exists = next(
-        (True for item in prov_context["Activity"] if item["@id"] == activity_id),
-        False,
-    )
-    return flag_exists
-
-
-def get_entity_id(file_path: str) -> str:
-    from pathlib import Path
-
-    entity_id = Path(file_path).with_suffix("").name
-    return entity_id
-
-
-def get_activity_id(pipeline_name: str) -> str:
-    return "clin:" + pipeline_name
-
-
-def get_agent_id(agent_name: str) -> str:
-    return "clin:" + agent_name
-
-
-def get_last_activity(file_path: str) -> Optional[list]:
-
-    """
-    Return the last activity executed on the file
-    """
-
-    prov_record = read_prov(get_associated_prov(file_path))
-    if prov_record and prov_record["Activity"]:
-        last_activity = prov_record["Activity"][-1]["@id"]
-        return last_activity
-    return None
-
-
-def get_associated_prov(file_path: str) -> Path:
-
-    file_path = Path(file_path)
-    while file_path.suffix != "":
-        file_path = file_path.with_suffix("")
-
-    associated_jsonld = file_path.with_suffix(".jsonld")
-    return associated_jsonld
-
-
-def read_prov(prov_path: Path) -> Optional[dict]:
-    """
-    Check if the given file is a valid provenance json-ld
-    """
-    import json
-
-    # TODO: check that the provenance file associations and uses exists
-    if prov_path.exists():
-        with open(prov_path, "r") as fp:
-            json_ld_data = json.load(fp)
-            return json_ld_data
-    return None

From 6b28410e8108b448c87d7b51f7575686a959acaf Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Tue, 4 Jan 2022 18:00:53 +0100
Subject: [PATCH 05/24] rename files

---
 clinica/engine/prov_model.py |  96 +++++++++++++++++++++++++++++
 clinica/engine/prov_utils.py | 116 +++++++++++++++++++++++++++++++++++
 2 files changed, 212 insertions(+)
 create mode 100644 clinica/engine/prov_model.py
 create mode 100644 clinica/engine/prov_utils.py

diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py
new file mode 100644
index 000000000..45e7efe59
--- /dev/null
+++ b/clinica/engine/prov_model.py
@@ -0,0 +1,96 @@
+from attr import define, field
+import attr
+import typing
+from typing import Union, List
+from abc import ABC, abstractmethod
+
+
+#  Define PROV abstract concepts
+
+
+@define
+class Identifier:
+    id: int
+
+
+class ProvElement(ABC):
+    @property
+    @classmethod
+    @abstractmethod
+    def id(cls):
+        """id is required for ProvElements"""
+        return NotImplementedError
+
+    @property
+    def attributes(cls):
+        """attributes are optional"""
+        return NotImplementedError
+
+
+class ProvRelation(ABC):
+
+    id: Identifier
+    src: ProvElement
+    dest: ProvElement
+
+
+# Define PROV Types
+
+
+@define
+class ProvEntity(ProvElement):
+    """Provenance Entity element"""
+
+    id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
+    attributes: set
+
+
+@define
+class ProvActivity(ProvElement):
+    """Provenance Activity element"""
+
+    id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
+    attributes: set
+
+
+@define
+class ProvAgent(ProvElement):
+    """Provenance Agent element"""
+
+    id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
+    attributes: set
+
+
+# Define PROV Relations
+
+
+@define
+class ProvGeneration(ProvRelation):
+    id: Identifier = field(
+        default=None,
+        validator=attr.validators.optional(attr.validators.instance_of(Identifier)),
+    )
+
+    src: ProvActivity = field(
+        default=None,
+        validator=attr.validators.optional(attr.validators.instance_of(ProvActivity)),
+    )
+    dest: ProvEntity = field(
+        default=None,
+        validator=attr.validators.optional(attr.validators.instance_of(ProvEntity)),
+    )
+
+    # entity: an identifier (e) for a created entity;
+    # activity: an OPTIONAL identifier (a) for the activity that creates the entity;
+    # time: an OPTIONAL "generation time" (t), the time at which the entity was completely created;
+    # attributes: an OPTIONALa
+
+
+@define
+class ProvUsage(ProvRelation):
+    pass
+
+
+@define
+class ProvAssociation(ProvRelation):
+    pass
diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py
new file mode 100644
index 000000000..6c209c629
--- /dev/null
+++ b/clinica/engine/prov_utils.py
@@ -0,0 +1,116 @@
+from typing import Union, Optional
+from pathlib import Path
+
+
+def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list:
+    """
+    Calls clinica_file_reader with the appropriate extentions
+    """
+    from clinica.utils.inputs import clinica_file_reader
+    import clinica.utils.input_files as cif
+
+    dict_field_options = ["input_to", "output_from"]
+    if dict_field not in dict_field_options:
+        raise (f"dict_field must be one of {dict_field_options}")
+
+    # retrieve all the data dictionaries from the input_files module
+    files_dicts = {
+        k: v
+        for k, v in vars(cif).items()
+        if isinstance(v, dict)
+        and dict_field in v.keys()
+        and pipeline_fullname in v[dict_field]
+    }
+    # TODO: check if bids or caps as output
+    ret_files = []
+    for elem in files_dicts:
+        ref_dir = (
+            self.bids_directory if dict_field == "input_to" else self.caps_directory
+        )
+        current_file, _ = clinica_file_reader(
+            self.subjects,
+            self.sessions,
+            ref_dir,
+            files_dicts[elem],
+            raise_exception=False,
+        )
+        if current_file:
+            ret_files.extend(current_file)
+
+    return ret_files
+
+
+def is_entity_tracked(prov_context: dict, entity_id: str) -> bool:
+    flag_exists = next(
+        (True for item in prov_context["Entity"] if item["@id"] == entity_id),
+        False,
+    )
+    return flag_exists
+
+
+def is_agent_tracked(prov_context: dict, agent_id: str) -> bool:
+    flag_exists = next(
+        (True for item in prov_context["Agent"] if item["@id"] == agent_id),
+        False,
+    )
+    return flag_exists
+
+
+def is_activity_tracked(prov_context: dict, activity_id: str) -> bool:
+    flag_exists = next(
+        (True for item in prov_context["Activity"] if item["@id"] == activity_id),
+        False,
+    )
+    return flag_exists
+
+
+def get_entity_id(file_path: str) -> str:
+    from pathlib import Path
+
+    entity_id = Path(file_path).with_suffix("").name
+    return entity_id
+
+
+def get_activity_id(pipeline_name: str) -> str:
+    return "clin:" + pipeline_name
+
+
+def get_agent_id(agent_name: str) -> str:
+    return "clin:" + agent_name
+
+
+def get_last_activity(file_path: str) -> Optional[list]:
+
+    """
+    Return the last activity executed on the file
+    """
+
+    prov_record = read_prov(get_associated_prov(file_path))
+    if prov_record and prov_record["Activity"]:
+        last_activity = prov_record["Activity"][-1]["@id"]
+        return last_activity
+    return None
+
+
+def get_associated_prov(file_path: str) -> Path:
+
+    file_path = Path(file_path)
+    while file_path.suffix != "":
+        file_path = file_path.with_suffix("")
+
+    associated_jsonld = file_path.with_suffix(".jsonld")
+    return associated_jsonld
+
+
+def read_prov(prov_path: Path) -> Optional[dict]:
+    """
+    Check if the given file is a valid provenance json-ld
+    """
+    import json
+
+    # TODO: check that the provenance file associations and uses exists
+    if prov_path.exists():
+        with open(prov_path, "r") as fp:
+            json_ld_data = json.load(fp)
+            return json_ld_data
+    return None

From 8fdf9e74bc14f046c0b749d44d3d45c4eb72a7fb Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Tue, 11 Jan 2022 11:47:30 +0100
Subject: [PATCH 06/24] Update prov with Data Model

---
 clinica/engine/prov_model.py |  27 +++++-
 clinica/engine/prov_utils.py |  76 +++++++++------
 clinica/engine/provenance.py | 177 ++++++++++++++++++++---------------
 3 files changed, 172 insertions(+), 108 deletions(-)

diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py
index 45e7efe59..74ef6143f 100644
--- a/clinica/engine/prov_model.py
+++ b/clinica/engine/prov_model.py
@@ -42,7 +42,7 @@ class ProvEntity(ProvElement):
     """Provenance Entity element"""
 
     id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
-    attributes: set
+    attributes: dict
 
 
 @define
@@ -50,7 +50,7 @@ class ProvActivity(ProvElement):
     """Provenance Activity element"""
 
     id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
-    attributes: set
+    attributes: dict
 
 
 @define
@@ -58,7 +58,7 @@ class ProvAgent(ProvElement):
     """Provenance Agent element"""
 
     id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
-    attributes: set
+    attributes: dict
 
 
 # Define PROV Relations
@@ -94,3 +94,24 @@ class ProvUsage(ProvRelation):
 @define
 class ProvAssociation(ProvRelation):
     pass
+
+
+@define
+class ProvEntry:
+    """
+    A prov entry in triple form
+    """
+
+    subject: ProvElement
+    predicate: ProvRelation
+    object: ProvElement
+
+
+@define
+class ProvRecord:
+    """
+    A provenance document containting a PROV context and a list of entries
+    """
+
+    context: dict
+    entries: list[ProvEntry]
diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py
index 6c209c629..8289405c5 100644
--- a/clinica/engine/prov_utils.py
+++ b/clinica/engine/prov_utils.py
@@ -1,10 +1,17 @@
-from typing import Union, Optional
+from typing import Optional
 from pathlib import Path
 
+from .prov_model import *
 
-def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list:
+
+def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list[Path]:
     """
-    Calls clinica_file_reader with the appropriate extentions
+    params:
+        pipeline_fullname: the current running pipeline name
+        dict_field: variable to specify if fetching inputs or outputs to the pipeline
+
+    return:
+        list of 'Path's to the files used in the pipeline
     """
     from clinica.utils.inputs import clinica_file_reader
     import clinica.utils.input_files as cif
@@ -13,7 +20,8 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list:
     if dict_field not in dict_field_options:
         raise (f"dict_field must be one of {dict_field_options}")
 
-    # retrieve all the data dictionaries from the input_files module
+    # Retrieve all the data dict from the input_files module
+
     files_dicts = {
         k: v
         for k, v in vars(cif).items()
@@ -22,6 +30,7 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list:
         and pipeline_fullname in v[dict_field]
     }
     # TODO: check if bids or caps as output
+
     ret_files = []
     for elem in files_dicts:
         ref_dir = (
@@ -35,7 +44,7 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list:
             raise_exception=False,
         )
         if current_file:
-            ret_files.extend(current_file)
+            ret_files.extend(Path(current_file))
 
     return ret_files
 
@@ -64,53 +73,62 @@ def is_activity_tracked(prov_context: dict, activity_id: str) -> bool:
     return flag_exists
 
 
-def get_entity_id(file_path: str) -> str:
-    from pathlib import Path
-
-    entity_id = Path(file_path).with_suffix("").name
-    return entity_id
+def get_entity_id(path_file: Path) -> str:
+    id = Identifier
+    id.id = path_file.with_suffix("").name
+    return id
 
 
-def get_activity_id(pipeline_name: str) -> str:
-    return "clin:" + pipeline_name
+def get_activity_id(pipeline_name: str) -> Identifier:
+    id = Identifier
+    id.id = "clin:" + pipeline_name
+    return id
 
 
-def get_agent_id(agent_name: str) -> str:
-    return "clin:" + agent_name
+def get_agent_id(agent_current: ProvAgent) -> Identifier:
+    id = Identifier
+    id.id = "clin:" + agent_current.attributes["label"]
+    return id
 
 
-def get_last_activity(file_path: str) -> Optional[list]:
+def get_last_activity(path_entity: Path) -> Optional[ProvActivity]:
 
     """
     Return the last activity executed on the file
     """
 
-    prov_record = read_prov(get_associated_prov(file_path))
-    if prov_record and prov_record["Activity"]:
-        last_activity = prov_record["Activity"][-1]["@id"]
+    prov_record = read_prov_jsonld(get_path_prov(path_entity))
+    if prov_record and prov_record.entries:
+        last_activity = prov_record.entries[-1]["@id"]
         return last_activity
     return None
 
 
-def get_associated_prov(file_path: str) -> Path:
+def get_path_prov(path_entity: Path) -> Path:
+    """
+    return: Path of the provenance file associated with an entity
+    """
 
-    file_path = Path(file_path)
-    while file_path.suffix != "":
-        file_path = file_path.with_suffix("")
+    while path_entity.suffix != "":
+        path_entity = path_entity.with_suffix("")
 
-    associated_jsonld = file_path.with_suffix(".jsonld")
-    return associated_jsonld
+    path_prov = path_entity.with_suffix(".jsonld")
+    return path_prov
 
 
-def read_prov(prov_path: Path) -> Optional[dict]:
+def read_prov_jsonld(path_prov: Path) -> Optional[ProvRecord]:
     """
-    Check if the given file is a valid provenance json-ld
+    return: ProvRecord in a specific location stored in jsonld format
     """
     import json
 
+    prov_record = ProvRecord()
+
     # TODO: check that the provenance file associations and uses exists
-    if prov_path.exists():
-        with open(prov_path, "r") as fp:
+    if path_prov.exists():
+        with open(path_prov, "r") as fp:
             json_ld_data = json.load(fp)
-            return json_ld_data
+            prov_record.records = json_ld_data["records"]
+            return prov_record
+
     return None
diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index f310cede6..94fe4b984 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -5,98 +5,129 @@
 from pathlib import Path
 from typing import Optional
 
+from clinica.engine.prov_utils import read_prov_jsonld
+
+from .prov_model import *
+
 
 def provenance(func):
-    from .provenance_utils import get_files_list
+    from .prov_utils import get_files_list
 
     @functools.wraps(func)
     def run_wrapper(self, **kwargs):
         ret = []
         pipeline_fullname = self.fullname
-        in_files_paths = get_files_list(self, pipeline_fullname, dict_field="input_to")
+        paths_input_files = get_files_list(
+            self, pipeline_fullname, dict_field="input_to"
+        )
 
-        prov_context = get_context(files_paths=in_files_paths)
-        prov_command = get_command(self, in_files_paths)
+        record_history = get_history(paths_files=paths_input_files)
+        entries_current = get_command(self, paths_input_files)
 
-        if validate_command(prov_context, prov_command):
+        if validate_command(record_history, entries_current):
             # ret = func(self)
             print("The pipeline succesfully executed.")
         else:
             raise Exception(
                 "The pipeline selected is incompatible with the input files provenance"
             )
-        out_files_paths = get_files_list(
+        paths_out_files = get_files_list(
             self, pipeline_fullname, dict_field="output_from"
         )
-        register_prov(prov_command, out_files_paths)
+        register_prov(entries_current, paths_out_files)
 
         return ret
 
     return run_wrapper
 
 
-def register_prov(prov_command: dict, out_files: list) -> bool:
+def register_prov(entries_current: list[ProvEntry], out_files: Path) -> None:
 
     # TODO: iterate over out_files and create a provenance file for each
+
     for file in out_files:
-        write_prov_file(prov_command, file)
+        write_prov_file(entries_current, file)
     print("Provenance registered succesfully")
     return True
 
 
-def get_context(files_paths: str) -> dict:
+def get_history(paths_files: list[Path]) -> ProvRecord:
     """
-    Return a dictionary with the provenance info related to the files in the files_paths
+    return:
+        a ProvRecord for the associated files in path_files
     """
-    from clinica.engine.provenance_utils import read_prov, get_associated_prov
 
-    prov_data = {"Entity": [], "Agent": [], "Activity": []}
-    for path in files_paths:
-        prov_record = read_prov(get_associated_prov(path))
+    from .prov_utils import read_prov_jsonld, get_path_prov
+
+    prov_record = ProvRecord
+
+    for path in paths_files:
+        prov_record_tmp = read_prov_jsonld(get_path_prov(path))
         if prov_record:
-            prov_data = append_prov_dict(prov_data, prov_record)
+            prov_record.entries.extend(prov_record_tmp.entries)
 
-    return prov_data
+    return prov_record
 
 
-def get_command(self, input_files_paths: list) -> dict:
+def get_command(self, paths_inputs: list[Path]) -> ProvEntry:
     """
-    Read the user command and save information in a dict
+    params:
+        paths_inputs: list of input entries paths
+    return:
+        ProvEntry associated with the launched pipeline
     """
     import sys
 
-    new_entities = []
+    entries_command = []
+
     new_agent = get_agent()
-    for path in input_files_paths:
-        new_entities.append(get_entity(path))
+
+    new_entities = []
+
+    for path in paths_inputs:
+        entity_curr = get_entity(path)
+        new_entities.append(entity_curr)
+
     new_activity = get_activity(self, new_agent["@id"], new_entities)
 
-    return {
-        "Agent": [new_agent],
-        "Activity": [new_activity],
-        "Entity": new_entities,
-    }
+    entry_curr = ProvEntry
+    entry_curr.subject = new_agent
+    entry_curr.predicate = ProvAssociation
+    entry_curr.object = new_activity
 
+    # TODO create several entries from this information
 
-def write_prov_file(prov_command, file_path):
+    entries_command.append(entry_curr)
+
+    return entries_command
+
+
+def write_prov_file(
+    list_prov_entries: list, path_entity: Path, overwrite=False
+) -> None:
     """
-    Write the dictionary data to the file_path
+    Append the current provenance info to the prov file. If it does not exist, create new
+
+    params:
+    prov_entries: list of ProvEntry
+    entity_path: path of the prov-associated element
     """
-    from clinica.engine.provenance_utils import read_prov, get_associated_prov
 
-    prov_path = get_associated_prov(file_path)
+    from .prov_utils import read_prov_jsonld, get_path_prov
+
+    prov_path = get_path_prov(path_entity)
 
     if prov_path.exists():
         # append the pipeline provenance information to the old provenance file
-        prov_main = read_prov(prov_path)
-        prov_main = append_prov_dict(prov_main, prov_command)
+        prov_record = read_prov_jsonld(prov_path)
+        prov_record.extend(list_prov_entries)
     else:
-        create_prov_file(prov_command, prov_path)
+        create_prov_file(list_prov_entries, prov_path)
         # create new provenance file with pipeline information
-    return ""
+    return
 
 
-def append_prov_dict(prov_main: dict, prov_new: dict) -> dict:
+def extend_prov(prov_main: dict, prov_new: dict) -> dict:
     """
     Append a specific prov data to the global prov dict
     """
@@ -108,64 +139,58 @@ def append_prov_dict(prov_main: dict, prov_new: dict) -> dict:
     return prov_main
 
 
-def get_agent() -> dict:
+def get_agent() -> ProvAgent:
     import clinica
-    from .provenance_utils import get_agent_id
+    from .prov_utils import get_agent_id
 
-    agent_version = clinica.__version__
-    agent_label = clinica.__name__
-    agent_id = get_agent_id(agent_label + agent_version)
+    new_agent = ProvAgent()
 
-    new_agent = {"@id": agent_id, "label": agent_label, "version": agent_version}
+    new_agent.attributes["version"] = clinica.__version__
+    new_agent.attributes["label"] = clinica.__name__
+    new_agent.id = get_agent_id(new_agent)
 
     return new_agent
 
 
-def get_activity(self, agent_id: str, entities: list) -> dict:
+def get_activity(
+    self, agent_id: Identifier, entities: list[ProvEntity]
+) -> ProvActivity:
     """
-    Add the current command to the list of activities
+    return
+        ProvActivity from related entities and associated agent
     """
     import sys
-    from .provenance_utils import get_activity_id
-
-    activity_parameters = self.parameters
-    activity_label = self.fullname
-    activity_id = get_activity_id(self.fullname)
-    activity_command = (sys.argv[1:],)
-    activity_agent = agent_id
-    activity_used_files = [e["@id"] for e in entities]
-
-    new_activity = {
-        "@id": activity_id,
-        "label": activity_label,
-        "command": activity_command,
-        "parameters": activity_parameters,
-        "wasAssociatedWith": activity_agent,
-        "used": activity_used_files,
-    }
+    from .prov_utils import get_activity_id
+
+    new_activity = ProvActivity
+
+    new_activity.attributes["parameters"] = self.parameters
+    new_activity.attributes["label"] = self.fullname
+    new_activity.id = get_activity_id(self.fullname)
+    new_activity.attributes["command"] = (sys.argv[1:],)
+
+    # TODO include related agent and entity to the activity
+    # activity_agent = agent_id
+    # activity_used_files = [e["@id"] for e in entities]
 
     return new_activity
 
 
-def get_entity(img_path: str) -> dict:
+def get_entity(path_curr: Path) -> ProvEntity:
     """
-    Add the current file to the list of entities
+    return an Entity object from the file in path_curr
     """
-    from clinica.engine.provenance_utils import get_entity_id
-    from clinica.engine.provenance_utils import get_last_activity
-    from pathlib import Path
 
-    entity_id = get_entity_id(img_path)
-    entity_label = Path(img_path).name
-    entity_path = img_path
-    entity_source = get_last_activity(img_path)
+    from clinica.engine.prov_utils import get_entity_id
+
+    new_entity = ProvEntity()
+
+    new_entity.id = get_entity_id(path_curr)
+    new_entity.attributes["label"] = path_curr.name
+    new_entity.attributes["path"] = path_curr
 
-    new_entity = {
-        "@id": entity_id,
-        "label": entity_label,
-        "atLocation": entity_path,
-        "wasGeneratedBy": entity_source,
-    }
+    # TODO: implement function to return the latest associated activity
+    # new_entity.attributes["wasGeneratedBy"] = get_last_activity(path_curr)
 
     return new_entity
 

From 47f9ce591e9b694e9c69397ac3275aad901d3928 Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Tue, 18 Jan 2022 11:33:52 +0100
Subject: [PATCH 07/24] Fix typing with list

---
 clinica/engine/prov_model.py |  2 +-
 clinica/engine/prov_utils.py |  4 ++--
 clinica/engine/provenance.py | 11 +++++------
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py
index 74ef6143f..048aed522 100644
--- a/clinica/engine/prov_model.py
+++ b/clinica/engine/prov_model.py
@@ -114,4 +114,4 @@ class ProvRecord:
     """
 
     context: dict
-    entries: list[ProvEntry]
+    entries: List[ProvEntry]
diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py
index 8289405c5..9d74ab394 100644
--- a/clinica/engine/prov_utils.py
+++ b/clinica/engine/prov_utils.py
@@ -1,10 +1,10 @@
-from typing import Optional
+from typing import Optional, List
 from pathlib import Path
 
 from .prov_model import *
 
 
-def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> list[Path]:
+def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[Path]:
     """
     params:
         pipeline_fullname: the current running pipeline name
diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index 94fe4b984..c97edbc80 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -1,9 +1,8 @@
-import json
 import functools
 from os import read
 
 from pathlib import Path
-from typing import Optional
+from typing import Optional, List
 
 from clinica.engine.prov_utils import read_prov_jsonld
 
@@ -41,7 +40,7 @@ def run_wrapper(self, **kwargs):
     return run_wrapper
 
 
-def register_prov(entries_current: list[ProvEntry], out_files: Path) -> None:
+def register_prov(entries_current: List[ProvEntry], out_files: Path) -> None:
 
     # TODO: iterate over out_files and create a provenance file for each
 
@@ -51,7 +50,7 @@ def register_prov(entries_current: list[ProvEntry], out_files: Path) -> None:
     return True
 
 
-def get_history(paths_files: list[Path]) -> ProvRecord:
+def get_history(paths_files: List[Path]) -> ProvRecord:
     """
     return:
         a ProvRecord for the associated files in path_files
@@ -69,7 +68,7 @@ def get_history(paths_files: list[Path]) -> ProvRecord:
     return prov_record
 
 
-def get_command(self, paths_inputs: list[Path]) -> ProvEntry:
+def get_command(self, paths_inputs: List[Path]) -> ProvEntry:
     """
     params:
         paths_inputs: list of input entries paths
@@ -153,7 +152,7 @@ def get_agent() -> ProvAgent:
 
 
 def get_activity(
-    self, agent_id: Identifier, entities: list[ProvEntity]
+    self, agent_id: Identifier, entities: List[ProvEntity]
 ) -> ProvActivity:
     """
     return

From a3ef086acfc8a990652a5f1f73f7fe4f7d25d0db Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Fri, 21 Jan 2022 09:54:26 +0100
Subject: [PATCH 08/24] Fix various issues

---
 clinica/engine/prov_model.py | 57 ++++++++++++++++++++++++++++--------
 clinica/engine/prov_utils.py | 37 +++++++++++++----------
 clinica/engine/provenance.py | 15 ++++++----
 3 files changed, 75 insertions(+), 34 deletions(-)

diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py
index 048aed522..db3144ffd 100644
--- a/clinica/engine/prov_model.py
+++ b/clinica/engine/prov_model.py
@@ -8,9 +8,19 @@
 #  Define PROV abstract concepts
 
 
+@define
+class ProvContext:
+    label: str
+    link: str
+
+
 @define
 class Identifier:
-    id: int
+    seed: int = field()
+    label: str = field(
+        default=None,
+        validator=attr.validators.optional(attr.validators.instance_of(str)),
+    )
 
 
 class ProvElement(ABC):
@@ -41,24 +51,42 @@ class ProvRelation(ABC):
 class ProvEntity(ProvElement):
     """Provenance Entity element"""
 
-    id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
-    attributes: dict
+    id: Identifier = field(
+        init=False, validator=[attr.validators.instance_of(Identifier)]
+    )
+    attributes: dict = field(default={})
+
+    def __attrs_post_init__(self):
+        self.id = Identifier(seed=0)
 
 
 @define
 class ProvActivity(ProvElement):
     """Provenance Activity element"""
 
-    id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
-    attributes: dict
+    id: Identifier = field(
+        init=False, validator=[attr.validators.instance_of(Identifier)]
+    )
+    attributes: dict = field(default={})
+
+    def __attrs_post_init__(self):
+        self.id = Identifier(seed=0)
 
 
 @define
 class ProvAgent(ProvElement):
     """Provenance Agent element"""
 
-    id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
-    attributes: dict
+    id: Identifier = field(
+        init=False, validator=[attr.validators.instance_of(Identifier)]
+    )
+    attributes: dict = field(
+        default={},
+        validator=attr.validators.optional(attr.validators.instance_of(dict)),
+    )
+
+    def __attrs_post_init__(self):
+        self.id = Identifier(seed=0)
 
 
 # Define PROV Relations
@@ -67,19 +95,24 @@ class ProvAgent(ProvElement):
 @define
 class ProvGeneration(ProvRelation):
     id: Identifier = field(
-        default=None,
+        init=False,
         validator=attr.validators.optional(attr.validators.instance_of(Identifier)),
     )
 
     src: ProvActivity = field(
-        default=None,
+        init=False,
         validator=attr.validators.optional(attr.validators.instance_of(ProvActivity)),
     )
     dest: ProvEntity = field(
-        default=None,
+        init=False,
         validator=attr.validators.optional(attr.validators.instance_of(ProvEntity)),
     )
 
+    def __attrs_post_init__(self):
+        self.id = Identifier(seed=0)
+        self.src = ProvActivity()
+        self.dest = ProvEntity()
+
     # entity: an identifier (e) for a created entity;
     # activity: an OPTIONAL identifier (a) for the activity that creates the entity;
     # time: an OPTIONAL "generation time" (t), the time at which the entity was completely created;
@@ -113,5 +146,5 @@ class ProvRecord:
     A provenance document containting a PROV context and a list of entries
     """
 
-    context: dict
-    entries: List[ProvEntry]
+    context: dict = field(default={})
+    entries: List[ProvEntry] = field(default=[])
diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py
index 9d74ab394..7b24204ff 100644
--- a/clinica/engine/prov_utils.py
+++ b/clinica/engine/prov_utils.py
@@ -10,8 +10,7 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[
         pipeline_fullname: the current running pipeline name
         dict_field: variable to specify if fetching inputs or outputs to the pipeline
 
-    return:
-        list of 'Path's to the files used in the pipeline
+    return list of 'Path's to the files used in the pipeline
     """
     from clinica.utils.inputs import clinica_file_reader
     import clinica.utils.input_files as cif
@@ -44,7 +43,7 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[
             raise_exception=False,
         )
         if current_file:
-            ret_files.extend(Path(current_file))
+            ret_files.extend([Path(x) for x in current_file])
 
     return ret_files
 
@@ -74,27 +73,27 @@ def is_activity_tracked(prov_context: dict, activity_id: str) -> bool:
 
 
 def get_entity_id(path_file: Path) -> str:
-    id = Identifier
-    id.id = path_file.with_suffix("").name
+    id = Identifier(seed=0)
+    id.label = path_file.with_suffix("").name
     return id
 
 
 def get_activity_id(pipeline_name: str) -> Identifier:
-    id = Identifier
-    id.id = "clin:" + pipeline_name
+    id = Identifier(seed=0)
+    id.label = "clin:" + pipeline_name
     return id
 
 
 def get_agent_id(agent_current: ProvAgent) -> Identifier:
-    id = Identifier
-    id.id = "clin:" + agent_current.attributes["label"]
+    id = Identifier(seed=0)
+    id.label = "clin:" + agent_current.attributes["label"]
     return id
 
 
 def get_last_activity(path_entity: Path) -> Optional[ProvActivity]:
 
     """
-    Return the last activity executed on the file
+    return the last activity executed on the file
     """
 
     prov_record = read_prov_jsonld(get_path_prov(path_entity))
@@ -120,15 +119,21 @@ def read_prov_jsonld(path_prov: Path) -> Optional[ProvRecord]:
     """
     return: ProvRecord in a specific location stored in jsonld format
     """
-    import json
 
-    prov_record = ProvRecord()
-
-    # TODO: check that the provenance file associations and uses exists
     if path_prov.exists():
         with open(path_prov, "r") as fp:
-            json_ld_data = json.load(fp)
-            prov_record.records = json_ld_data["records"]
+
+            prov_record = deserialize_jsonld(fp)
             return prov_record
 
     return None
+
+
+def deserialize_jsonld(fp_jsonld) -> List[ProvEntry]:
+    """
+    params:
+
+    return list of ProvEntry objects from jsonld dictionary data
+    """
+
+    return []
diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index c97edbc80..874733d78 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -58,11 +58,11 @@ def get_history(paths_files: List[Path]) -> ProvRecord:
 
     from .prov_utils import read_prov_jsonld, get_path_prov
 
-    prov_record = ProvRecord
+    prov_record = ProvRecord({}, [])
 
     for path in paths_files:
         prov_record_tmp = read_prov_jsonld(get_path_prov(path))
-        if prov_record:
+        if prov_record_tmp:
             prov_record.entries.extend(prov_record_tmp.entries)
 
     return prov_record
@@ -87,11 +87,11 @@ def get_command(self, paths_inputs: List[Path]) -> ProvEntry:
         entity_curr = get_entity(path)
         new_entities.append(entity_curr)
 
-    new_activity = get_activity(self, new_agent["@id"], new_entities)
+    new_activity = get_activity(self, new_agent.id, new_entities)
 
     entry_curr = ProvEntry
     entry_curr.subject = new_agent
-    entry_curr.predicate = ProvAssociation
+    entry_curr.predicate = ProvAssociation()
     entry_curr.object = new_activity
 
     # TODO create several entries from this information
@@ -161,7 +161,7 @@ def get_activity(
     import sys
     from .prov_utils import get_activity_id
 
-    new_activity = ProvActivity
+    new_activity = ProvActivity()
 
     new_activity.attributes["parameters"] = self.parameters
     new_activity.attributes["label"] = self.fullname
@@ -206,11 +206,14 @@ def create_prov_file(prov_command, prov_path):
     return
 
 
-def validate_command(prov_context: dict, prov_command: dict) -> bool:
+def validate_command(
+    prov_context: ProvRecord, prov_command: List[Optional[ProvEntry]]
+) -> bool:
     """
     Check the command is valid on the data being run
     """
     flag = True
+    prov_subject = prov_command[0].subject
     new_activity_id = prov_command["Activity"][0]["@id"]
     new_agent_id = prov_command["Agent"][0]["@id"]
 

From bd4636bdaa0c61db3ee2397c9dff22b395f3a22a Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Thu, 27 Jan 2022 15:00:14 +0100
Subject: [PATCH 09/24] Rename prov extraction functions

---
 clinica/engine/provenance.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index 874733d78..853f54c75 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -20,11 +20,11 @@ def run_wrapper(self, **kwargs):
             self, pipeline_fullname, dict_field="input_to"
         )
 
-        record_history = get_history(paths_files=paths_input_files)
-        entries_current = get_command(self, paths_input_files)
+        prov_record = get_prov_record(paths_files=paths_input_files)
+        prov_entry = get_pipeline_entry(self, paths_input_files)
 
-        if validate_command(record_history, entries_current):
-            # ret = func(self)
+        if validate_command(prov_record, prov_entry):
+            ret = func(self)
             print("The pipeline succesfully executed.")
         else:
             raise Exception(
@@ -33,7 +33,7 @@ def run_wrapper(self, **kwargs):
         paths_out_files = get_files_list(
             self, pipeline_fullname, dict_field="output_from"
         )
-        register_prov(entries_current, paths_out_files)
+        register_prov(prov_entry, paths_out_files)
 
         return ret
 
@@ -50,7 +50,7 @@ def register_prov(entries_current: List[ProvEntry], out_files: Path) -> None:
     return True
 
 
-def get_history(paths_files: List[Path]) -> ProvRecord:
+def get_prov_record(paths_files: List[Path]) -> ProvRecord:
     """
     return:
         a ProvRecord for the associated files in path_files
@@ -68,7 +68,7 @@ def get_history(paths_files: List[Path]) -> ProvRecord:
     return prov_record
 
 
-def get_command(self, paths_inputs: List[Path]) -> ProvEntry:
+def get_pipeline_entry(self, paths_inputs: List[Path]) -> ProvEntry:
     """
     params:
         paths_inputs: list of input entries paths
@@ -94,8 +94,6 @@ def get_command(self, paths_inputs: List[Path]) -> ProvEntry:
     entry_curr.predicate = ProvAssociation()
     entry_curr.object = new_activity
 
-    # TODO create several entries from this information
-
     entries_command.append(entry_curr)
 
     return entries_command

From 5dc84751b47346f9e875fa014889623a5884242e Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Mon, 31 Jan 2022 09:11:47 +0100
Subject: [PATCH 10/24] Deserialize json-ld

---
 clinica/engine/prov_model.py | 36 +++++++-------------
 clinica/engine/prov_utils.py | 64 ++++++++++++++++++++++++++++--------
 clinica/engine/provenance.py | 53 +++++++++--------------------
 3 files changed, 78 insertions(+), 75 deletions(-)

diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py
index db3144ffd..a462c16fc 100644
--- a/clinica/engine/prov_model.py
+++ b/clinica/engine/prov_model.py
@@ -10,15 +10,18 @@
 
 @define
 class ProvContext:
-    label: str
-    link: str
+    _namespaces: list
+
+
+@define
+class Namespace:
+    id: str
+    uri: str
 
 
 @define
 class Identifier:
-    seed: int = field()
     label: str = field(
-        default=None,
         validator=attr.validators.optional(attr.validators.instance_of(str)),
     )
 
@@ -51,43 +54,28 @@ class ProvRelation(ABC):
 class ProvEntity(ProvElement):
     """Provenance Entity element"""
 
-    id: Identifier = field(
-        init=False, validator=[attr.validators.instance_of(Identifier)]
-    )
+    id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
     attributes: dict = field(default={})
 
-    def __attrs_post_init__(self):
-        self.id = Identifier(seed=0)
-
 
 @define
 class ProvActivity(ProvElement):
     """Provenance Activity element"""
 
-    id: Identifier = field(
-        init=False, validator=[attr.validators.instance_of(Identifier)]
-    )
+    id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
     attributes: dict = field(default={})
 
-    def __attrs_post_init__(self):
-        self.id = Identifier(seed=0)
-
 
 @define
 class ProvAgent(ProvElement):
     """Provenance Agent element"""
 
-    id: Identifier = field(
-        init=False, validator=[attr.validators.instance_of(Identifier)]
-    )
+    id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
     attributes: dict = field(
         default={},
         validator=attr.validators.optional(attr.validators.instance_of(dict)),
     )
 
-    def __attrs_post_init__(self):
-        self.id = Identifier(seed=0)
-
 
 # Define PROV Relations
 
@@ -109,7 +97,7 @@ class ProvGeneration(ProvRelation):
     )
 
     def __attrs_post_init__(self):
-        self.id = Identifier(seed=0)
+        self.id = Identifier(label="")
         self.src = ProvActivity()
         self.dest = ProvEntity()
 
@@ -146,5 +134,5 @@ class ProvRecord:
     A provenance document containting a PROV context and a list of entries
     """
 
-    context: dict = field(default={})
+    context: ProvContext = field()
     entries: List[ProvEntry] = field(default=[])
diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py
index 7b24204ff..f32f3bafd 100644
--- a/clinica/engine/prov_utils.py
+++ b/clinica/engine/prov_utils.py
@@ -73,20 +73,17 @@ def is_activity_tracked(prov_context: dict, activity_id: str) -> bool:
 
 
 def get_entity_id(path_file: Path) -> str:
-    id = Identifier(seed=0)
-    id.label = path_file.with_suffix("").name
+    id = Identifier(label=path_file.with_suffix("").name)
     return id
 
 
 def get_activity_id(pipeline_name: str) -> Identifier:
-    id = Identifier(seed=0)
-    id.label = "clin:" + pipeline_name
+    id = Identifier(label="clin:" + pipeline_name)
     return id
 
 
-def get_agent_id(agent_current: ProvAgent) -> Identifier:
-    id = Identifier(seed=0)
-    id.label = "clin:" + agent_current.attributes["label"]
+def get_agent_id() -> Identifier:
+    id = Identifier(label="RRID:Clinica")
     return id
 
 
@@ -121,19 +118,60 @@ def read_prov_jsonld(path_prov: Path) -> Optional[ProvRecord]:
     """
 
     if path_prov.exists():
-        with open(path_prov, "r") as fp:
-
-            prov_record = deserialize_jsonld(fp)
-            return prov_record
+        elements, prov_record = deserialize_jsonld(path_prov)
+        return prov_record
 
     return None
 
 
-def deserialize_jsonld(fp_jsonld) -> List[ProvEntry]:
+def deserialize_jsonld(path_prov) -> ProvRecord:
     """
     params:
 
     return list of ProvEntry objects from jsonld dictionary data
     """
 
-    return []
+    import rdflib
+
+    g = rdflib.Graph(identifier="prov_graph_records")
+    g.parse(path_prov, format="json-ld")
+
+    elements = {}
+    entries = []
+
+    # fetch context:
+    context = ProvContext([])
+    for lbl, link in g.namespace_manager.namespaces():
+        namespace = Namespace(lbl, link.n3())
+        context._namespaces.append(namespace)
+
+    for s, p, o in g:
+
+        if str(p) == "http://www.w3.org/ns/prov#Activity":
+            id = Identifier(label=g.namespace_manager.qname(o))
+            elements[id.label] = ProvActivity(id)
+
+        elif str(p) == "http://www.w3.org/ns/prov#Agent":
+            id = Identifier(label=g.namespace_manager.qname(o))
+            elements[id.label] = ProvAgent(id)
+
+        elif str(p) == "http://www.w3.org/ns/prov#Entity":
+            id = Identifier(label=g.namespace_manager.qname(o))
+            elements[id.label] = ProvEntity(id)
+
+    for s, p, o in g:
+        if type(s) != rdflib.term.BNode:
+            attr = g.namespace_manager.qname(p).split(":")[1]
+
+            subj = elements[g.namespace_manager.qname(s)]
+            subj.attributes[attr] = str(o)
+
+            curr_entry = ProvEntry(
+                subject=g.namespace_manager.qname(s), predicate=attr, object=o
+            )
+
+            entries.append(curr_entry)
+
+    prov_rec = ProvRecord(context=context, entries=entries)
+
+    return elements, prov_rec
diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index 853f54c75..7c36272c9 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -40,7 +40,7 @@ def run_wrapper(self, **kwargs):
     return run_wrapper
 
 
-def register_prov(entries_current: List[ProvEntry], out_files: Path) -> None:
+def register_prov(entries_current: ProvRecord, out_files: Path) -> None:
 
     # TODO: iterate over out_files and create a provenance file for each
 
@@ -87,16 +87,14 @@ def get_pipeline_entry(self, paths_inputs: List[Path]) -> ProvEntry:
         entity_curr = get_entity(path)
         new_entities.append(entity_curr)
 
-    new_activity = get_activity(self, new_agent.id, new_entities)
+    new_activity = get_activity(self, new_agent, new_entities)
 
     entry_curr = ProvEntry
     entry_curr.subject = new_agent
     entry_curr.predicate = ProvAssociation()
     entry_curr.object = new_activity
 
-    entries_command.append(entry_curr)
-
-    return entries_command
+    return entry_curr
 
 
 def write_prov_file(
@@ -140,18 +138,15 @@ def get_agent() -> ProvAgent:
     import clinica
     from .prov_utils import get_agent_id
 
-    new_agent = ProvAgent()
+    new_agent = ProvAgent(id=get_agent_id())
 
     new_agent.attributes["version"] = clinica.__version__
     new_agent.attributes["label"] = clinica.__name__
-    new_agent.id = get_agent_id(new_agent)
 
     return new_agent
 
 
-def get_activity(
-    self, agent_id: Identifier, entities: List[ProvEntity]
-) -> ProvActivity:
+def get_activity(self, agent: Identifier, entities: List[ProvEntity]) -> ProvActivity:
     """
     return
         ProvActivity from related entities and associated agent
@@ -159,16 +154,13 @@ def get_activity(
     import sys
     from .prov_utils import get_activity_id
 
-    new_activity = ProvActivity()
+    new_activity = ProvActivity(id=get_activity_id(self.fullname))
 
     new_activity.attributes["parameters"] = self.parameters
     new_activity.attributes["label"] = self.fullname
-    new_activity.id = get_activity_id(self.fullname)
     new_activity.attributes["command"] = (sys.argv[1:],)
-
-    # TODO include related agent and entity to the activity
-    # activity_agent = agent_id
-    # activity_used_files = [e["@id"] for e in entities]
+    new_activity.attributes["used"] = [x.id for x in entities]
+    new_activity.attributes["wasAssociatedWith"] = agent.id
 
     return new_activity
 
@@ -180,9 +172,7 @@ def get_entity(path_curr: Path) -> ProvEntity:
 
     from clinica.engine.prov_utils import get_entity_id
 
-    new_entity = ProvEntity()
-
-    new_entity.id = get_entity_id(path_curr)
+    new_entity = ProvEntity(id=get_entity_id(path_curr))
     new_entity.attributes["label"] = path_curr.name
     new_entity.attributes["path"] = path_curr
 
@@ -204,29 +194,16 @@ def create_prov_file(prov_command, prov_path):
     return
 
 
-def validate_command(
-    prov_context: ProvRecord, prov_command: List[Optional[ProvEntry]]
-) -> bool:
+def validate_command(prov_record: ProvRecord, prov_entry: ProvEntry) -> bool:
     """
     Check the command is valid on the data being run
     """
     flag = True
-    prov_subject = prov_command[0].subject
-    new_activity_id = prov_command["Activity"][0]["@id"]
-    new_agent_id = prov_command["Agent"][0]["@id"]
-
-    for entity in prov_context["Entity"]:
-        old_activity_id = entity["wasGeneratedBy"]
-        if old_activity_id:
-            ptr_activity = next(
-                item
-                for item in prov_context["Activity"]
-                if item["@id"] == old_activity_id
-            )
-            old_agent_id = ptr_activity["wasAssociatedWith"]
-            flag and is_valid(
-                {(old_agent_id, old_activity_id): (new_agent_id, new_activity_id)}
-            )
+
+    for entry in prov_record.entries:
+        # TODO: check that the record entries are compatible with the current entry
+        flag = True
+
     return flag
 
 

From 8f384c5dda25ebde550061fdb4a3171e0aa9807b Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Wed, 2 Feb 2022 18:05:39 +0100
Subject: [PATCH 11/24] Update record data model and
 serialization/deserialization

---
 clinica/engine/prov_model.py | 29 +++++++++++++++++++++++++++--
 clinica/engine/prov_utils.py | 15 +++++++--------
 clinica/engine/provenance.py | 29 ++++++++++++++---------------
 3 files changed, 48 insertions(+), 25 deletions(-)

diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py
index a462c16fc..155e5b140 100644
--- a/clinica/engine/prov_model.py
+++ b/clinica/engine/prov_model.py
@@ -1,9 +1,12 @@
+from xml.dom.minidom import Element
 from attr import define, field
 import attr
-import typing
+import cattr
 from typing import Union, List
 from abc import ABC, abstractmethod
 
+from matplotlib.style import context
+
 
 #  Define PROV abstract concepts
 
@@ -39,6 +42,10 @@ def attributes(cls):
         """attributes are optional"""
         return NotImplementedError
 
+    @classmethod
+    def get_type(cls):
+        return type(cls).__name__
+
 
 class ProvRelation(ABC):
 
@@ -135,4 +142,22 @@ class ProvRecord:
     """
 
     context: ProvContext = field()
-    entries: List[ProvEntry] = field(default=[])
+    elements: List[ProvElement] = field(default=[])
+
+    def __getitem__(self, idx):
+        for element in self.elements:
+            if element.id == idx:
+                return element
+
+    def to_json(self):
+        json_dict = {}
+        json_dict["prov:Agent"] = [
+            cattr.unstructure(x) for x in self.elements if isinstance(x, ProvAgent)
+        ]
+        json_dict["prov:Activity"] = [
+            cattr.unstructure(x) for x in self.elements if isinstance(x, ProvActivity)
+        ]
+        json_dict["prov:Entity"] = [
+            cattr.unstructure(x) for x in self.elements if isinstance(x, ProvEntity)
+        ]
+        return json_dict
diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py
index f32f3bafd..322d1fc45 100644
--- a/clinica/engine/prov_utils.py
+++ b/clinica/engine/prov_utils.py
@@ -118,7 +118,7 @@ def read_prov_jsonld(path_prov: Path) -> Optional[ProvRecord]:
     """
 
     if path_prov.exists():
-        elements, prov_record = deserialize_jsonld(path_prov)
+        prov_record = deserialize_jsonld(path_prov)
         return prov_record
 
     return None
@@ -137,7 +137,6 @@ def deserialize_jsonld(path_prov) -> ProvRecord:
     g.parse(path_prov, format="json-ld")
 
     elements = {}
-    entries = []
 
     # fetch context:
     context = ProvContext([])
@@ -166,12 +165,12 @@ def deserialize_jsonld(path_prov) -> ProvRecord:
             subj = elements[g.namespace_manager.qname(s)]
             subj.attributes[attr] = str(o)
 
-            curr_entry = ProvEntry(
-                subject=g.namespace_manager.qname(s), predicate=attr, object=o
-            )
+            # curr_entry = ProvEntry(
+            #    subject=g.namespace_manager.qname(s), predicate=attr, object=o
+            # )
 
-            entries.append(curr_entry)
+            # entries.append(curr_entry)
 
-    prov_rec = ProvRecord(context=context, entries=entries)
+    prov_rec = ProvRecord(context=context, elements=list(elements.values()))
 
-    return elements, prov_rec
+    return prov_rec
diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index 7c36272c9..632e157b6 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -4,6 +4,8 @@
 from pathlib import Path
 from typing import Optional, List
 
+from torch import ne
+
 from clinica.engine.prov_utils import read_prov_jsonld
 
 from .prov_model import *
@@ -21,10 +23,10 @@ def run_wrapper(self, **kwargs):
         )
 
         prov_record = get_prov_record(paths_files=paths_input_files)
-        prov_entry = get_pipeline_entry(self, paths_input_files)
+        prov_entry = get_pipeline_record(self, paths_input_files)
 
         if validate_command(prov_record, prov_entry):
-            ret = func(self)
+            # ret = func(self)
             print("The pipeline succesfully executed.")
         else:
             raise Exception(
@@ -63,38 +65,35 @@ def get_prov_record(paths_files: List[Path]) -> ProvRecord:
     for path in paths_files:
         prov_record_tmp = read_prov_jsonld(get_path_prov(path))
         if prov_record_tmp:
-            prov_record.entries.extend(prov_record_tmp.entries)
+            # TODO extend context as well
+            prov_record.elements.extend(prov_record_tmp.elements)
 
     return prov_record
 
 
-def get_pipeline_entry(self, paths_inputs: List[Path]) -> ProvEntry:
+def get_pipeline_record(self, paths_inputs: List[Path]) -> ProvRecord:
     """
     params:
         paths_inputs: list of input entries paths
     return:
-        ProvEntry associated with the launched pipeline
+        ProvRecord associated with the launched pipeline
     """
     import sys
 
-    entries_command = []
-
+    elements = []
     new_agent = get_agent()
-
+    elements.append(new_agent)
     new_entities = []
 
     for path in paths_inputs:
         entity_curr = get_entity(path)
         new_entities.append(entity_curr)
+    elements.extend(new_entities)
 
     new_activity = get_activity(self, new_agent, new_entities)
+    elements.append(new_activity)
 
-    entry_curr = ProvEntry
-    entry_curr.subject = new_agent
-    entry_curr.predicate = ProvAssociation()
-    entry_curr.object = new_activity
-
-    return entry_curr
+    return ProvRecord(context={}, elements=elements)
 
 
 def write_prov_file(
@@ -200,7 +199,7 @@ def validate_command(prov_record: ProvRecord, prov_entry: ProvEntry) -> bool:
     """
     flag = True
 
-    for entry in prov_record.entries:
+    for el in prov_record.elements:
         # TODO: check that the record entries are compatible with the current entry
         flag = True
 

From d32e06fc4178d2824b7debbe32df0b79bfde3b05 Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Thu, 3 Feb 2022 16:30:27 +0100
Subject: [PATCH 12/24] Clean up unused code

---
 clinica/engine/prov_model.py |  8 ++---
 clinica/engine/prov_utils.py | 58 +++++++++++--------------------
 clinica/engine/provenance.py | 67 +++++++++++++-----------------------
 3 files changed, 49 insertions(+), 84 deletions(-)

diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py
index 155e5b140..609a6136d 100644
--- a/clinica/engine/prov_model.py
+++ b/clinica/engine/prov_model.py
@@ -62,7 +62,7 @@ class ProvEntity(ProvElement):
     """Provenance Entity element"""
 
     id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
-    attributes: dict = field(default={})
+    attributes: dict = field(default=attr.Factory(dict))
 
 
 @define
@@ -70,7 +70,7 @@ class ProvActivity(ProvElement):
     """Provenance Activity element"""
 
     id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
-    attributes: dict = field(default={})
+    attributes: dict = field(default=attr.Factory(dict))
 
 
 @define
@@ -79,7 +79,7 @@ class ProvAgent(ProvElement):
 
     id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
     attributes: dict = field(
-        default={},
+        default=attr.Factory(dict),
         validator=attr.validators.optional(attr.validators.instance_of(dict)),
     )
 
@@ -149,7 +149,7 @@ def __getitem__(self, idx):
             if element.id == idx:
                 return element
 
-    def to_json(self):
+    def json(self):
         json_dict = {}
         json_dict["prov:Agent"] = [
             cattr.unstructure(x) for x in self.elements if isinstance(x, ProvAgent)
diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py
index 322d1fc45..652fd44b2 100644
--- a/clinica/engine/prov_utils.py
+++ b/clinica/engine/prov_utils.py
@@ -48,41 +48,17 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[
     return ret_files
 
 
-def is_entity_tracked(prov_context: dict, entity_id: str) -> bool:
-    flag_exists = next(
-        (True for item in prov_context["Entity"] if item["@id"] == entity_id),
-        False,
-    )
-    return flag_exists
-
-
-def is_agent_tracked(prov_context: dict, agent_id: str) -> bool:
-    flag_exists = next(
-        (True for item in prov_context["Agent"] if item["@id"] == agent_id),
-        False,
-    )
-    return flag_exists
-
-
-def is_activity_tracked(prov_context: dict, activity_id: str) -> bool:
-    flag_exists = next(
-        (True for item in prov_context["Activity"] if item["@id"] == activity_id),
-        False,
-    )
-    return flag_exists
-
-
-def get_entity_id(path_file: Path) -> str:
+def generate_entity_id(path_file: Path) -> Identifier:
     id = Identifier(label=path_file.with_suffix("").name)
     return id
 
 
-def get_activity_id(pipeline_name: str) -> Identifier:
+def generate_activity_id(pipeline_name: str) -> Identifier:
     id = Identifier(label="clin:" + pipeline_name)
     return id
 
 
-def get_agent_id() -> Identifier:
+def generate_agent_id() -> Identifier:
     id = Identifier(label="RRID:Clinica")
     return id
 
@@ -94,9 +70,12 @@ def get_last_activity(path_entity: Path) -> Optional[ProvActivity]:
     """
 
     prov_record = read_prov_jsonld(get_path_prov(path_entity))
-    if prov_record and prov_record.entries:
-        last_activity = prov_record.entries[-1]["@id"]
-        return last_activity
+    if prov_record and prov_record.elements:
+        # TODO: filter activities by date
+        last_activity = [
+            x for x in prov_record.elements if isinstance(x, ProvActivity)
+        ][-1]
+        return last_activity.id.label
     return None
 
 
@@ -112,6 +91,17 @@ def get_path_prov(path_entity: Path) -> Path:
     return path_prov
 
 
+def create_prov_file(prov_command, prov_path):
+    """
+    Create new provenance file based on command
+    """
+    import json
+
+    with open(prov_path, "w") as fp:
+        json.dump(prov_command.json(), fp, indent=4)
+    return
+
+
 def read_prov_jsonld(path_prov: Path) -> Optional[ProvRecord]:
     """
     return: ProvRecord in a specific location stored in jsonld format
@@ -128,7 +118,7 @@ def deserialize_jsonld(path_prov) -> ProvRecord:
     """
     params:
 
-    return list of ProvEntry objects from jsonld dictionary data
+    return ProvRecord object from jsonld dictionary data
     """
 
     import rdflib
@@ -165,12 +155,6 @@ def deserialize_jsonld(path_prov) -> ProvRecord:
             subj = elements[g.namespace_manager.qname(s)]
             subj.attributes[attr] = str(o)
 
-            # curr_entry = ProvEntry(
-            #    subject=g.namespace_manager.qname(s), predicate=attr, object=o
-            # )
-
-            # entries.append(curr_entry)
-
     prov_rec = ProvRecord(context=context, elements=list(elements.values()))
 
     return prov_rec
diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index 632e157b6..78c664ac7 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -2,11 +2,9 @@
 from os import read
 
 from pathlib import Path
-from typing import Optional, List
+from typing import List
 
-from torch import ne
-
-from clinica.engine.prov_utils import read_prov_jsonld
+from clinica.engine.prov_utils import create_prov_file
 
 from .prov_model import *
 
@@ -22,10 +20,10 @@ def run_wrapper(self, **kwargs):
             self, pipeline_fullname, dict_field="input_to"
         )
 
-        prov_record = get_prov_record(paths_files=paths_input_files)
-        prov_entry = get_pipeline_record(self, paths_input_files)
+        prov_history = get_history_record(paths_files=paths_input_files)
+        prov_current = get_pipeline_record(self, paths_input_files)
 
-        if validate_command(prov_record, prov_entry):
+        if validate_command(prov_history, prov_current):
             # ret = func(self)
             print("The pipeline succesfully executed.")
         else:
@@ -35,7 +33,7 @@ def run_wrapper(self, **kwargs):
         paths_out_files = get_files_list(
             self, pipeline_fullname, dict_field="output_from"
         )
-        register_prov(prov_entry, paths_out_files)
+        register_prov(prov_current, paths_out_files)
 
         return ret
 
@@ -52,7 +50,7 @@ def register_prov(entries_current: ProvRecord, out_files: Path) -> None:
     return True
 
 
-def get_prov_record(paths_files: List[Path]) -> ProvRecord:
+def get_history_record(paths_files: List[Path]) -> ProvRecord:
     """
     return:
         a ProvRecord for the associated files in path_files
@@ -97,10 +95,10 @@ def get_pipeline_record(self, paths_inputs: List[Path]) -> ProvRecord:
 
 
 def write_prov_file(
-    list_prov_entries: list, path_entity: Path, overwrite=False
+    list_prov_entries: ProvRecord, path_entity: Path, overwrite=False
 ) -> None:
     """
-    Append the current provenance info to the prov file. If it does not exist, create new
+    Create provenance file with current pipeline information
 
     params:
     prov_entries: list of ProvEntry
@@ -111,13 +109,8 @@ def write_prov_file(
 
     prov_path = get_path_prov(path_entity)
 
-    if prov_path.exists():
-        # append the pipeline provenance information to the old provenance file
-        prov_record = read_prov_jsonld(prov_path)
-        prov_record.extend(list_prov_entries)
-    else:
-        create_prov_file(list_prov_entries, prov_path)
-        # create new provenance file with pipeline information
+    create_prov_file(list_prov_entries, prov_path)
+
     return
 
 
@@ -135,9 +128,9 @@ def extend_prov(prov_main: dict, prov_new: dict) -> dict:
 
 def get_agent() -> ProvAgent:
     import clinica
-    from .prov_utils import get_agent_id
+    from .prov_utils import generate_agent_id
 
-    new_agent = ProvAgent(id=get_agent_id())
+    new_agent = ProvAgent(id=generate_agent_id())
 
     new_agent.attributes["version"] = clinica.__version__
     new_agent.attributes["label"] = clinica.__name__
@@ -151,9 +144,9 @@ def get_activity(self, agent: Identifier, entities: List[ProvEntity]) -> ProvAct
         ProvActivity from related entities and associated agent
     """
     import sys
-    from .prov_utils import get_activity_id
+    from .prov_utils import generate_activity_id
 
-    new_activity = ProvActivity(id=get_activity_id(self.fullname))
+    new_activity = ProvActivity(id=generate_activity_id(self.fullname))
 
     new_activity.attributes["parameters"] = self.parameters
     new_activity.attributes["label"] = self.fullname
@@ -169,40 +162,28 @@ def get_entity(path_curr: Path) -> ProvEntity:
     return an Entity object from the file in path_curr
     """
 
-    from clinica.engine.prov_utils import get_entity_id
+    from clinica.engine.prov_utils import generate_entity_id, get_last_activity
 
-    new_entity = ProvEntity(id=get_entity_id(path_curr))
+    new_entity = ProvEntity(id=generate_entity_id(path_curr))
     new_entity.attributes["label"] = path_curr.name
-    new_entity.attributes["path"] = path_curr
+    new_entity.attributes["path"] = str(path_curr)
 
     # TODO: implement function to return the latest associated activity
-    # new_entity.attributes["wasGeneratedBy"] = get_last_activity(path_curr)
+    new_entity.attributes["wasGeneratedBy"] = get_last_activity(path_curr)
 
     return new_entity
 
 
-def create_prov_file(prov_command, prov_path):
-    """
-    Create new provenance file based on command
-    """
-    import json
-
-    with open(prov_path, "w") as fp:
-        json.dump(prov_command, fp, indent=4)
-
-    return
-
-
-def validate_command(prov_record: ProvRecord, prov_entry: ProvEntry) -> bool:
+def validate_command(prov_history: ProvRecord, prov_current: ProvRecord) -> bool:
     """
     Check the command is valid on the data being run
     """
     flag = True
 
-    for el in prov_record.elements:
-        # TODO: check that the record entries are compatible with the current entry
-        flag = True
-
+    for a in prov_history.elements:
+        for b in prov_current.elements:
+            # TODO: check that the record entries are compatible with the current entry
+            flag = True
     return flag
 
 

From 126ce0b733d104ac2dc99e918804c159636836ad Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Thu, 3 Feb 2022 17:22:53 +0100
Subject: [PATCH 13/24] fix conflict in rebase

---
 clinica/utils/input_files.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/clinica/utils/input_files.py b/clinica/utils/input_files.py
index 93ce26edb..99190dbcd 100644
--- a/clinica/utils/input_files.py
+++ b/clinica/utils/input_files.py
@@ -437,10 +437,6 @@ def pet_linear_nii(acq_label, suvr_reference_region, uncropped_image):
             f"*_acq-{acq_label}_pet_space-MNI152NLin2009cSym{description}_res-1x1x1_suvr-{suvr_reference_region}_pet.nii.gz",
         ),
         "description": "",
-<<<<<<< HEAD
-        "needed_pipeline": "pet-linear",
-=======
         "output_from": "pet-linear",
->>>>>>> de9d4d8b (Init traceability feature)
     }
     return information

From 672c28c75e39d2cdb0b1e56d3fcda2b90cd45eeb Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Mon, 7 Feb 2022 14:36:00 +0100
Subject: [PATCH 14/24] Update prov jsonld representation

---
 clinica/engine/prov_model.py | 71 ++++++++++--------------------------
 clinica/engine/prov_utils.py |  3 +-
 clinica/engine/provenance.py | 12 +++---
 3 files changed, 28 insertions(+), 58 deletions(-)

diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py
index 609a6136d..457ac725b 100644
--- a/clinica/engine/prov_model.py
+++ b/clinica/engine/prov_model.py
@@ -28,12 +28,15 @@ class Identifier:
         validator=attr.validators.optional(attr.validators.instance_of(str)),
     )
 
+    def __repr__(self):
+        return "%s" % self.label
+
 
 class ProvElement(ABC):
     @property
     @classmethod
     @abstractmethod
-    def id(cls):
+    def uid(cls):
         """id is required for ProvElements"""
         return NotImplementedError
 
@@ -61,67 +64,33 @@ class ProvRelation(ABC):
 class ProvEntity(ProvElement):
     """Provenance Entity element"""
 
-    id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
+    uid: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
     attributes: dict = field(default=attr.Factory(dict))
 
+    def unstrct(self):
+        return {"id": str(self.uid), **self.attributes}
+
 
 @define
 class ProvActivity(ProvElement):
     """Provenance Activity element"""
 
-    id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
+    uid: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
     attributes: dict = field(default=attr.Factory(dict))
 
+    def unstrct(self):
+        return {"id": str(self.uid), **self.attributes}
+
 
 @define
 class ProvAgent(ProvElement):
     """Provenance Agent element"""
 
-    id: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
-    attributes: dict = field(
-        default=attr.Factory(dict),
-        validator=attr.validators.optional(attr.validators.instance_of(dict)),
-    )
-
-
-# Define PROV Relations
-
-
-@define
-class ProvGeneration(ProvRelation):
-    id: Identifier = field(
-        init=False,
-        validator=attr.validators.optional(attr.validators.instance_of(Identifier)),
-    )
-
-    src: ProvActivity = field(
-        init=False,
-        validator=attr.validators.optional(attr.validators.instance_of(ProvActivity)),
-    )
-    dest: ProvEntity = field(
-        init=False,
-        validator=attr.validators.optional(attr.validators.instance_of(ProvEntity)),
-    )
-
-    def __attrs_post_init__(self):
-        self.id = Identifier(label="")
-        self.src = ProvActivity()
-        self.dest = ProvEntity()
-
-    # entity: an identifier (e) for a created entity;
-    # activity: an OPTIONAL identifier (a) for the activity that creates the entity;
-    # time: an OPTIONAL "generation time" (t), the time at which the entity was completely created;
-    # attributes: an OPTIONALa
-
-
-@define
-class ProvUsage(ProvRelation):
-    pass
-
+    uid: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
+    attributes: dict = field(default=attr.Factory(dict))
 
-@define
-class ProvAssociation(ProvRelation):
-    pass
+    def unstrct(self):
+        return {"id": str(self.uid), **self.attributes}
 
 
 @define
@@ -146,18 +115,18 @@ class ProvRecord:
 
     def __getitem__(self, idx):
         for element in self.elements:
-            if element.id == idx:
+            if element.uid == idx:
                 return element
 
     def json(self):
         json_dict = {}
         json_dict["prov:Agent"] = [
-            cattr.unstructure(x) for x in self.elements if isinstance(x, ProvAgent)
+            x.unstrct() for x in self.elements if isinstance(x, ProvAgent)
         ]
         json_dict["prov:Activity"] = [
-            cattr.unstructure(x) for x in self.elements if isinstance(x, ProvActivity)
+            x.unstrct() for x in self.elements if isinstance(x, ProvActivity)
         ]
         json_dict["prov:Entity"] = [
-            cattr.unstructure(x) for x in self.elements if isinstance(x, ProvEntity)
+            x.unstrct() for x in self.elements if isinstance(x, ProvEntity)
         ]
         return json_dict
diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py
index 652fd44b2..332c14fda 100644
--- a/clinica/engine/prov_utils.py
+++ b/clinica/engine/prov_utils.py
@@ -75,7 +75,7 @@ def get_last_activity(path_entity: Path) -> Optional[ProvActivity]:
         last_activity = [
             x for x in prov_record.elements if isinstance(x, ProvActivity)
         ][-1]
-        return last_activity.id.label
+        return str(last_activity.uid)
     return None
 
 
@@ -99,6 +99,7 @@ def create_prov_file(prov_command, prov_path):
 
     with open(prov_path, "w") as fp:
         json.dump(prov_command.json(), fp, indent=4)
+
     return
 
 
diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index 78c664ac7..b5705ad11 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -105,7 +105,7 @@ def write_prov_file(
     entity_path: path of the prov-associated element
     """
 
-    from .prov_utils import read_prov_jsonld, get_path_prov
+    from .prov_utils import get_path_prov
 
     prov_path = get_path_prov(path_entity)
 
@@ -130,7 +130,7 @@ def get_agent() -> ProvAgent:
     import clinica
     from .prov_utils import generate_agent_id
 
-    new_agent = ProvAgent(id=generate_agent_id())
+    new_agent = ProvAgent(uid=generate_agent_id())
 
     new_agent.attributes["version"] = clinica.__version__
     new_agent.attributes["label"] = clinica.__name__
@@ -146,13 +146,13 @@ def get_activity(self, agent: Identifier, entities: List[ProvEntity]) -> ProvAct
     import sys
     from .prov_utils import generate_activity_id
 
-    new_activity = ProvActivity(id=generate_activity_id(self.fullname))
+    new_activity = ProvActivity(uid=generate_activity_id(self.fullname))
 
     new_activity.attributes["parameters"] = self.parameters
     new_activity.attributes["label"] = self.fullname
     new_activity.attributes["command"] = (sys.argv[1:],)
-    new_activity.attributes["used"] = [x.id for x in entities]
-    new_activity.attributes["wasAssociatedWith"] = agent.id
+    new_activity.attributes["used"] = [str(x.uid) for x in entities]
+    new_activity.attributes["wasAssociatedWith"] = str(agent.uid)
 
     return new_activity
 
@@ -164,7 +164,7 @@ def get_entity(path_curr: Path) -> ProvEntity:
 
     from clinica.engine.prov_utils import generate_entity_id, get_last_activity
 
-    new_entity = ProvEntity(id=generate_entity_id(path_curr))
+    new_entity = ProvEntity(uid=generate_entity_id(path_curr))
     new_entity.attributes["label"] = path_curr.name
     new_entity.attributes["path"] = str(path_curr)
 

From 39a2064319f3f383bb251e974b29e3df4dfc1376 Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Tue, 8 Feb 2022 11:36:33 +0100
Subject: [PATCH 15/24] manually lint code

---
 clinica/engine/prov_model.py |  9 ++++-----
 clinica/engine/prov_utils.py |  4 ++--
 clinica/engine/provenance.py | 13 +++++++------
 clinica/pipelines/engine.py  |  1 +
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py
index 457ac725b..1bc1e30bc 100644
--- a/clinica/engine/prov_model.py
+++ b/clinica/engine/prov_model.py
@@ -1,13 +1,12 @@
+from abc import ABC, abstractmethod
+from typing import List, Union
 from xml.dom.minidom import Element
-from attr import define, field
+
 import attr
 import cattr
-from typing import Union, List
-from abc import ABC, abstractmethod
-
+from attr import define, field
 from matplotlib.style import context
 
-
 #  Define PROV abstract concepts
 
 
diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py
index 332c14fda..fde16cea3 100644
--- a/clinica/engine/prov_utils.py
+++ b/clinica/engine/prov_utils.py
@@ -1,5 +1,5 @@
-from typing import Optional, List
 from pathlib import Path
+from typing import List, Optional
 
 from .prov_model import *
 
@@ -12,8 +12,8 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[
 
     return list of 'Path's to the files used in the pipeline
     """
-    from clinica.utils.inputs import clinica_file_reader
     import clinica.utils.input_files as cif
+    from clinica.utils.inputs import clinica_file_reader
 
     dict_field_options = ["input_to", "output_from"]
     if dict_field not in dict_field_options:
diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index b5705ad11..3bb70932e 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -1,6 +1,5 @@
 import functools
 from os import read
-
 from pathlib import Path
 from typing import List
 
@@ -56,7 +55,7 @@ def get_history_record(paths_files: List[Path]) -> ProvRecord:
         a ProvRecord for the associated files in path_files
     """
 
-    from .prov_utils import read_prov_jsonld, get_path_prov
+    from .prov_utils import get_path_prov, read_prov_jsonld
 
     prov_record = ProvRecord({}, [])
 
@@ -127,13 +126,14 @@ def extend_prov(prov_main: dict, prov_new: dict) -> dict:
 
 
 def get_agent() -> ProvAgent:
-    import clinica
+    from clinica import __name__, __version__
+
     from .prov_utils import generate_agent_id
 
     new_agent = ProvAgent(uid=generate_agent_id())
 
-    new_agent.attributes["version"] = clinica.__version__
-    new_agent.attributes["label"] = clinica.__name__
+    new_agent.attributes["version"] = __version__
+    new_agent.attributes["label"] = __name__
 
     return new_agent
 
@@ -144,13 +144,14 @@ def get_activity(self, agent: Identifier, entities: List[ProvEntity]) -> ProvAct
         ProvActivity from related entities and associated agent
     """
     import sys
+
     from .prov_utils import generate_activity_id
 
     new_activity = ProvActivity(uid=generate_activity_id(self.fullname))
 
     new_activity.attributes["parameters"] = self.parameters
     new_activity.attributes["label"] = self.fullname
-    new_activity.attributes["command"] = (sys.argv[1:],)
+    new_activity.attributes["command"] = sys.argv[1:]
     new_activity.attributes["used"] = [str(x.uid) for x in entities]
     new_activity.attributes["wasAssociatedWith"] = str(agent.uid)
 
diff --git a/clinica/pipelines/engine.py b/clinica/pipelines/engine.py
index 68e3562f7..da7d35fae 100644
--- a/clinica/pipelines/engine.py
+++ b/clinica/pipelines/engine.py
@@ -7,6 +7,7 @@
 
 import click
 from nipype.pipeline.engine import Workflow
+
 import clinica.engine.provenance as prov
 
 

From 46b98924c84dcc90fe662196c2a751a4bffa6be5 Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Tue, 15 Feb 2022 11:23:43 +0100
Subject: [PATCH 16/24] Start extending workflow to other pipelines

---
 clinica/engine/prov_utils.py | 44 ++++++++++++++++++++++++++++--------
 clinica/engine/provenance.py |  7 +++---
 clinica/utils/input_files.py |  1 +
 3 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py
index fde16cea3..ed4e0fc13 100644
--- a/clinica/engine/prov_utils.py
+++ b/clinica/engine/prov_utils.py
@@ -1,10 +1,14 @@
 from pathlib import Path
 from typing import List, Optional
 
+from clinica.utils.input_files import pet_linear_nii
+
 from .prov_model import *
 
 
-def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[Path]:
+def get_files_list(
+    self, pipeline_fullname: str, dict_field="input_to", pipeline_args={}
+) -> List[Path]:
     """
     params:
         pipeline_fullname: the current running pipeline name
@@ -13,21 +17,31 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[
     return list of 'Path's to the files used in the pipeline
     """
     import clinica.utils.input_files as cif
+    from clinica.utils.input_files import pet_linear_nii
     from clinica.utils.inputs import clinica_file_reader
 
+    funcs = {"pet-linear": pet_linear_nii}
+
     dict_field_options = ["input_to", "output_from"]
     if dict_field not in dict_field_options:
         raise (f"dict_field must be one of {dict_field_options}")
 
     # Retrieve all the data dict from the input_files module
 
-    files_dicts = {
-        k: v
-        for k, v in vars(cif).items()
-        if isinstance(v, dict)
-        and dict_field in v.keys()
-        and pipeline_fullname in v[dict_field]
-    }
+    if pipeline_fullname in funcs and dict_field == "output_from":
+        files_dicts = {
+            "PET": funcs[pipeline_fullname](
+                **clean_arguments(pipeline_args, funcs[pipeline_fullname])
+            )
+        }
+    else:
+        files_dicts = {
+            k: v
+            for k, v in vars(cif).items()
+            if isinstance(v, dict)
+            and dict_field in v.keys()
+            and pipeline_fullname in v[dict_field]
+        }
     # TODO: check if bids or caps as output
 
     ret_files = []
@@ -40,7 +54,7 @@ def get_files_list(self, pipeline_fullname: str, dict_field="input_to") -> List[
             self.sessions,
             ref_dir,
             files_dicts[elem],
-            raise_exception=False,
+            raise_exception=True,
         )
         if current_file:
             ret_files.extend([Path(x) for x in current_file])
@@ -136,7 +150,6 @@ def deserialize_jsonld(path_prov) -> ProvRecord:
         context._namespaces.append(namespace)
 
     for s, p, o in g:
-
         if str(p) == "http://www.w3.org/ns/prov#Activity":
             id = Identifier(label=g.namespace_manager.qname(o))
             elements[id.label] = ProvActivity(id)
@@ -159,3 +172,14 @@ def deserialize_jsonld(path_prov) -> ProvRecord:
     prov_rec = ProvRecord(context=context, elements=list(elements.values()))
 
     return prov_rec
+
+
+def clean_arguments(pipeline_args, file_func):
+    import inspect
+
+    argspec = inspect.getargspec(file_func)
+    if not argspec.keywords:
+        for key in pipeline_args.copy().keys():
+            if key not in argspec.args:
+                del pipeline_args[key]
+    return pipeline_args
diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index 3bb70932e..c747c5f72 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -14,23 +14,24 @@ def provenance(func):
     @functools.wraps(func)
     def run_wrapper(self, **kwargs):
         ret = []
+        pipeline_args = self.parameters
         pipeline_fullname = self.fullname
         paths_input_files = get_files_list(
-            self, pipeline_fullname, dict_field="input_to"
+            self, pipeline_fullname, "input_to", pipeline_args
         )
 
         prov_history = get_history_record(paths_files=paths_input_files)
         prov_current = get_pipeline_record(self, paths_input_files)
 
         if validate_command(prov_history, prov_current):
-            # ret = func(self)
+            ret = func(self)
             print("The pipeline succesfully executed.")
         else:
             raise Exception(
                 "The pipeline selected is incompatible with the input files provenance"
             )
         paths_out_files = get_files_list(
-            self, pipeline_fullname, dict_field="output_from"
+            self, pipeline_fullname, "output_from", pipeline_args
         )
         register_prov(prov_current, paths_out_files)
 
diff --git a/clinica/utils/input_files.py b/clinica/utils/input_files.py
index 99190dbcd..37f852abb 100644
--- a/clinica/utils/input_files.py
+++ b/clinica/utils/input_files.py
@@ -136,6 +136,7 @@
     "pattern": "*space-MNI152NLin2009cSym_res-1x1x1_T1w.nii.gz",
     "description": "T1w image registered in MNI152NLin2009cSym space using t1-linear pipeline",
     "output_from": "t1-linear",
+    "input_to": "pet-linear",
 }
 
 T1W_LINEAR_CROPPED = {

From f2c5300f211efe9d4562cc915ca34cf0d5437ed6 Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Wed, 23 Feb 2022 16:51:16 +0100
Subject: [PATCH 17/24] Connect provenance through nipype nodes

---
 clinica/engine/__init__.py   |   3 +
 clinica/engine/prov_utils.py | 152 ++++++++++++--------
 clinica/engine/provenance.py | 263 ++++++++++++++++-------------------
 clinica/pipelines/engine.py  |   6 +-
 4 files changed, 221 insertions(+), 203 deletions(-)

diff --git a/clinica/engine/__init__.py b/clinica/engine/__init__.py
index 5396b9d94..c9c7623f4 100644
--- a/clinica/engine/__init__.py
+++ b/clinica/engine/__init__.py
@@ -1 +1,4 @@
 from .cmdparser import CmdParser
+from nipype import config
+
+config.enable_debug_mode()
diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py
index ed4e0fc13..1428b48c7 100644
--- a/clinica/engine/prov_utils.py
+++ b/clinica/engine/prov_utils.py
@@ -1,65 +1,59 @@
 from pathlib import Path
 from typing import List, Optional
+from clinica.engine.prov_model import *
 
-from clinica.utils.input_files import pet_linear_nii
 
-from .prov_model import *
+def mint_agent() -> ProvAgent:
+    """
+    return
+        ProvAgent associated with running version of the software
+    """
+    from clinica import __name__, __version__
+    from clinica.engine.prov_utils import generate_agent_id
+
+    new_agent = ProvAgent(uid=generate_agent_id())
+
+    new_agent.attributes["version"] = __version__
+    new_agent.attributes["label"] = __name__
+
+    return new_agent
 
 
-def get_files_list(
-    self, pipeline_fullname: str, dict_field="input_to", pipeline_args={}
-) -> List[Path]:
+def mint_activity(agent: Identifier, entities: List[ProvEntity]) -> ProvActivity:
     """
-    params:
-        pipeline_fullname: the current running pipeline name
-        dict_field: variable to specify if fetching inputs or outputs to the pipeline
+    return
+        ProvActivity from related entities and associated agent
+    """
+    import sys
+
+    from clinica.engine.prov_utils import generate_activity_id
+
+    new_activity = ProvActivity(uid=generate_activity_id("testfullname"))
 
-    return list of 'Path's to the files used in the pipeline
+    new_activity.attributes["parameters"] = "testparameters"
+    new_activity.attributes["label"] = "testfullname"
+    new_activity.attributes["command"] = sys.argv[1:]
+    new_activity.attributes["used"] = [str(x.uid) for x in entities]
+    new_activity.attributes["wasAssociatedWith"] = str(agent.uid)
+
+    return new_activity
+
+
+def mint_entity(path_curr: Path) -> ProvEntity:
+    """
+    return an Entity object from the file in path_curr
     """
-    import clinica.utils.input_files as cif
-    from clinica.utils.input_files import pet_linear_nii
-    from clinica.utils.inputs import clinica_file_reader
 
-    funcs = {"pet-linear": pet_linear_nii}
+    from clinica.engine.prov_utils import generate_entity_id, get_last_activity
 
-    dict_field_options = ["input_to", "output_from"]
-    if dict_field not in dict_field_options:
-        raise (f"dict_field must be one of {dict_field_options}")
+    new_entity = ProvEntity(uid=generate_entity_id(path_curr))
+    new_entity.attributes["label"] = path_curr.name
+    new_entity.attributes["path"] = str(path_curr)
 
-    # Retrieve all the data dict from the input_files module
+    # TODO: implement function to return the latest associated activity
+    new_entity.attributes["wasGeneratedBy"] = get_last_activity(path_curr)
 
-    if pipeline_fullname in funcs and dict_field == "output_from":
-        files_dicts = {
-            "PET": funcs[pipeline_fullname](
-                **clean_arguments(pipeline_args, funcs[pipeline_fullname])
-            )
-        }
-    else:
-        files_dicts = {
-            k: v
-            for k, v in vars(cif).items()
-            if isinstance(v, dict)
-            and dict_field in v.keys()
-            and pipeline_fullname in v[dict_field]
-        }
-    # TODO: check if bids or caps as output
-
-    ret_files = []
-    for elem in files_dicts:
-        ref_dir = (
-            self.bids_directory if dict_field == "input_to" else self.caps_directory
-        )
-        current_file, _ = clinica_file_reader(
-            self.subjects,
-            self.sessions,
-            ref_dir,
-            files_dicts[elem],
-            raise_exception=True,
-        )
-        if current_file:
-            ret_files.extend([Path(x) for x in current_file])
-
-    return ret_files
+    return new_entity
 
 
 def generate_entity_id(path_file: Path) -> Identifier:
@@ -97,12 +91,13 @@ def get_path_prov(path_entity: Path) -> Path:
     """
     return: Path of the provenance file associated with an entity
     """
-
-    while path_entity.suffix != "":
-        path_entity = path_entity.with_suffix("")
-
-    path_prov = path_entity.with_suffix(".jsonld")
-    return path_prov
+    if path_entity.is_file():
+        while path_entity.suffix != "":
+            path_entity = path_entity.with_suffix("")
+            path_prov = path_entity.with_suffix(".jsonld")
+            return path_prov
+    else:
+        return None
 
 
 def create_prov_file(prov_command, prov_path):
@@ -122,7 +117,7 @@ def read_prov_jsonld(path_prov: Path) -> Optional[ProvRecord]:
     return: ProvRecord in a specific location stored in jsonld format
     """
 
-    if path_prov.exists():
+    if path_prov and path_prov.exists():
         prov_record = deserialize_jsonld(path_prov)
         return prov_record
 
@@ -183,3 +178,48 @@ def clean_arguments(pipeline_args, file_func):
             if key not in argspec.args:
                 del pipeline_args[key]
     return pipeline_args
+
+
+def validate_command(prov_history: ProvRecord, prov_current: ProvRecord) -> bool:
+    """
+    Check the command is valid on the data being run
+    """
+    flag = True
+
+    for a in prov_history.elements:
+        for b in prov_current.elements:
+            # TODO: check that the record entries are compatible with the current entry
+            flag = True
+    return flag
+
+
+def is_valid(command: dict) -> bool:
+    valid_list = [
+        {
+            ("clin:clinica0.5.0", "clin:adni2Bids"): (
+                "clin:clinica0.5.0",
+                "clin:t1-linear",
+            )
+        }
+    ]
+    if command in valid_list:
+        return True
+    return False
+
+
+def write_prov_file(
+    list_prov_entries: ProvRecord, path_entity: Path, overwrite=False
+) -> None:
+    """
+    Create provenance file with current pipeline information
+
+    params:
+    prov_entries: list of ProvEntry
+    entity_path: path of the prov-associated element
+    """
+
+    prov_path = get_path_prov(path_entity)
+
+    create_prov_file(list_prov_entries, prov_path)
+
+    return
diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index c747c5f72..619c08db6 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -1,66 +1,111 @@
 import functools
+
 from os import read
 from pathlib import Path
 from typing import List
 
-from clinica.engine.prov_utils import create_prov_file
-
-from .prov_model import *
-
 
 def provenance(func):
-    from .prov_utils import get_files_list
-
     @functools.wraps(func)
     def run_wrapper(self, **kwargs):
-        ret = []
+        ret = func(self)
+
         pipeline_args = self.parameters
         pipeline_fullname = self.fullname
-        paths_input_files = get_files_list(
-            self, pipeline_fullname, "input_to", pipeline_args
-        )
-
-        prov_history = get_history_record(paths_files=paths_input_files)
-        prov_current = get_pipeline_record(self, paths_input_files)
-
-        if validate_command(prov_history, prov_current):
-            ret = func(self)
-            print("The pipeline succesfully executed.")
-        else:
-            raise Exception(
-                "The pipeline selected is incompatible with the input files provenance"
-            )
-        paths_out_files = get_files_list(
-            self, pipeline_fullname, "output_from", pipeline_args
-        )
-        register_prov(prov_current, paths_out_files)
+
+        create_node_read(self)
+        create_node_update(self, pipeline_args, pipeline_fullname)
+        create_node_log(self)
+
+        connect_nodes(self)
 
         return ret
 
     return run_wrapper
 
 
-def register_prov(entries_current: ProvRecord, out_files: Path) -> None:
+def connect_nodes(self):
+    # fmt: off
 
-    # TODO: iterate over out_files and create a provenance file for each
+    #self.output_node.outputs.get()[self.get_output_fields()[0]]
+
+    self.connect(
+        [
+            (self.input_node, self.prov_input_node, [("t1w", "input_files")]),
+            (self.input_node, self.prov_update_node, [("t1w", "input_files")]),
+            (self.prov_input_node, self.prov_update_node, [("prov_in_record", "prov_in_record")]),
+            (self.prov_update_node, self.prov_log_node,[("prov_upd_record", "prov_log_record")]),
+            (self.output_node, self.prov_log_node, [(self.get_output_fields()[0], "out_file")]),        
+        ]
+    )
+    return True
+    # fmt: on
+
+
+def create_node_read(self):
+    import nipype.pipeline.engine as npe
+    import nipype.interfaces.utility as nutil
+
+    self.prov_input_node = npe.Node(
+        nutil.Function(
+            input_names=["input_files"],
+            output_names=["prov_in_record"],
+            function=read_prov,
+        ),
+        name="ReadProvRecord",
+    )
+
+
+def create_node_update(self, parameters, fullname):
+    import nipype.pipeline.engine as npe
+    import nipype.interfaces.utility as nutil
+
+    self.prov_update_node = npe.Node(
+        nutil.Function(
+            input_names=["input_files", "prov_in_record", "parameters", "fullname"],
+            output_names=["prov_upd_record"],
+            function=update_prov,
+        ),
+        name="UpdateRecord",
+    )
 
-    for file in out_files:
-        write_prov_file(entries_current, file)
-    print("Provenance registered succesfully")
     return True
 
 
-def get_history_record(paths_files: List[Path]) -> ProvRecord:
+def create_node_log(self):
+    import nipype.pipeline.engine as npe
+    import nipype.interfaces.utility as nutil
+
+    self.prov_log_node = npe.Node(
+        nutil.Function(
+            input_names=["prov_log_record", "out_file", "out_dir"],
+            output_names=["output_record"],
+            function=log_prov,
+        ),
+        name="LogProv",
+    )
+
+    self.prov_log_node.inputs.out_dir = self.caps_directory
+    return
+
+
+def read_prov(input_files):
     """
     return:
         a ProvRecord for the associated files in path_files
     """
-
-    from .prov_utils import get_path_prov, read_prov_jsonld
+    from clinica.engine.prov_utils import get_path_prov, read_prov_jsonld
+    from clinica.engine.prov_model import ProvRecord
+    from pathlib import Path
 
     prov_record = ProvRecord({}, [])
+    if isinstance(input_files, list):
+        paths_files = [Path(x) for x in input_files]
+    elif isinstance(input_files, str):
+        paths_files = [Path(input_files)]
 
     for path in paths_files:
+        print("in read_prov, path for input:", path)
         prov_record_tmp = read_prov_jsonld(get_path_prov(path))
         if prov_record_tmp:
             # TODO extend context as well
@@ -69,135 +114,65 @@ def get_history_record(paths_files: List[Path]) -> ProvRecord:
     return prov_record
 
 
-def get_pipeline_record(self, paths_inputs: List[Path]) -> ProvRecord:
+def update_prov(input_files, prov_in_record):
     """
     params:
-        paths_inputs: list of input entries paths
+        input_files: list of input entries
     return:
         ProvRecord associated with the launched pipeline
     """
-    import sys
+    from clinica.engine.prov_utils import (
+        mint_activity,
+        mint_agent,
+        mint_entity,
+        validate_command,
+    )
+    from pathlib import Path
+    from clinica.engine.prov_model import ProvRecord
 
     elements = []
-    new_agent = get_agent()
+    new_agent = mint_agent()
     elements.append(new_agent)
     new_entities = []
 
-    for path in paths_inputs:
-        entity_curr = get_entity(path)
+    if isinstance(input_files, list):
+        paths_files = [Path(x) for x in input_files]
+    elif isinstance(input_files, str):
+        paths_files = [Path(input_files)]
+
+    for path in paths_files:
+        entity_curr = mint_entity(path)
         new_entities.append(entity_curr)
     elements.extend(new_entities)
 
-    new_activity = get_activity(self, new_agent, new_entities)
+    new_activity = mint_activity(new_agent, new_entities)
     elements.append(new_activity)
 
-    return ProvRecord(context={}, elements=elements)
+    prov_current = ProvRecord(context={}, elements=elements)
 
+    if not validate_command(prov_in_record, prov_current):
+        raise ("Invalid commmand")
+    return prov_current
 
-def write_prov_file(
-    list_prov_entries: ProvRecord, path_entity: Path, overwrite=False
-) -> None:
-    """
-    Create provenance file with current pipeline information
-
-    params:
-    prov_entries: list of ProvEntry
-    entity_path: path of the prov-associated element
-    """
 
-    from .prov_utils import get_path_prov
+def log_prov(prov_log_record, out_file, out_dir):
+    from clinica.engine.prov_utils import write_prov_file
+    from pathlib import Path
 
-    prov_path = get_path_prov(path_entity)
+    out_file = out_file + "*"
+    out_files_paths = []
+    if isinstance(out_file, list):
+        for x in out_file:
+            out_files_paths.extend(list(Path(out_dir).rglob(x)))
+    elif isinstance(out_file, str):
+        out_files_paths = list(Path(out_dir).rglob(out_file))
 
-    create_prov_file(list_prov_entries, prov_path)
+    print("the file searched:", out_file)
+    print("the folder searched:", out_dir)
 
-    return
-
-
-def extend_prov(prov_main: dict, prov_new: dict) -> dict:
-    """
-    Append a specific prov data to the global prov dict
-    """
-
-    for k in prov_new.keys():
-        for el in prov_new[k]:
-            if k in prov_main.keys() and el not in prov_main[k]:
-                prov_main[k].append(el)
-    return prov_main
-
-
-def get_agent() -> ProvAgent:
-    from clinica import __name__, __version__
-
-    from .prov_utils import generate_agent_id
-
-    new_agent = ProvAgent(uid=generate_agent_id())
-
-    new_agent.attributes["version"] = __version__
-    new_agent.attributes["label"] = __name__
-
-    return new_agent
-
-
-def get_activity(self, agent: Identifier, entities: List[ProvEntity]) -> ProvActivity:
-    """
-    return
-        ProvActivity from related entities and associated agent
-    """
-    import sys
-
-    from .prov_utils import generate_activity_id
-
-    new_activity = ProvActivity(uid=generate_activity_id(self.fullname))
-
-    new_activity.attributes["parameters"] = self.parameters
-    new_activity.attributes["label"] = self.fullname
-    new_activity.attributes["command"] = sys.argv[1:]
-    new_activity.attributes["used"] = [str(x.uid) for x in entities]
-    new_activity.attributes["wasAssociatedWith"] = str(agent.uid)
-
-    return new_activity
-
-
-def get_entity(path_curr: Path) -> ProvEntity:
-    """
-    return an Entity object from the file in path_curr
-    """
-
-    from clinica.engine.prov_utils import generate_entity_id, get_last_activity
-
-    new_entity = ProvEntity(uid=generate_entity_id(path_curr))
-    new_entity.attributes["label"] = path_curr.name
-    new_entity.attributes["path"] = str(path_curr)
-
-    # TODO: implement function to return the latest associated activity
-    new_entity.attributes["wasGeneratedBy"] = get_last_activity(path_curr)
-
-    return new_entity
-
-
-def validate_command(prov_history: ProvRecord, prov_current: ProvRecord) -> bool:
-    """
-    Check the command is valid on the data being run
-    """
-    flag = True
-
-    for a in prov_history.elements:
-        for b in prov_current.elements:
-            # TODO: check that the record entries are compatible with the current entry
-            flag = True
-    return flag
-
-
-def is_valid(command: dict) -> bool:
-    valid_list = [
-        {
-            ("clin:clinica0.5.0", "clin:adni2Bids"): (
-                "clin:clinica0.5.0",
-                "clin:t1-linear",
-            )
-        }
-    ]
-    if command in valid_list:
-        return True
-    return False
+    print("out_files_path:", out_files_paths)
+    print("in log prov, prov_record", prov_log_record)
+    for path_file in out_files_paths:
+        write_prov_file(prov_log_record, path_file)
+    print("Provenance registered succesfully")
+    return True
diff --git a/clinica/pipelines/engine.py b/clinica/pipelines/engine.py
index da7d35fae..4b8e63cd6 100644
--- a/clinica/pipelines/engine.py
+++ b/clinica/pipelines/engine.py
@@ -213,6 +213,7 @@ def has_output_connections(self):
             return False
 
     @postset("is_built", True)
+    @prov.provenance
     def build(self):
         """Builds the core, input and output nodes of the Pipeline.
 
@@ -230,13 +231,12 @@ def build(self):
             self.check_dependencies()
             self.check_pipeline_parameters()
             if not self.has_input_connections():
-                self.build_input_node()
+                self.input_files = self.build_input_node()
             self.build_core_nodes()
             if not self.has_output_connections():
-                self.build_output_node()
+                self.output_files = self.build_output_node()
         return self
 
-    @prov.provenance
     def run(self, plugin=None, plugin_args=None, update_hash=False, bypass_check=False):
         """Executes the Pipeline.
 

From fdf8ed72658afc021e3a45331a7e6ae3969c678c Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Tue, 1 Mar 2022 18:19:57 +0100
Subject: [PATCH 18/24] Add context to provenance

---
 clinica/engine/prov_model.py | 19 +++++++++++++------
 clinica/engine/prov_utils.py |  7 ++++++-
 clinica/engine/provenance.py | 34 +++++++++++++++++-----------------
 3 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py
index 1bc1e30bc..6efeebd4c 100644
--- a/clinica/engine/prov_model.py
+++ b/clinica/engine/prov_model.py
@@ -66,7 +66,7 @@ class ProvEntity(ProvElement):
     uid: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
     attributes: dict = field(default=attr.Factory(dict))
 
-    def unstrct(self):
+    def unstruct(self):
         return {"id": str(self.uid), **self.attributes}
 
 
@@ -77,7 +77,7 @@ class ProvActivity(ProvElement):
     uid: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
     attributes: dict = field(default=attr.Factory(dict))
 
-    def unstrct(self):
+    def unstruct(self):
         return {"id": str(self.uid), **self.attributes}
 
 
@@ -88,7 +88,7 @@ class ProvAgent(ProvElement):
     uid: Identifier = field(validator=[attr.validators.instance_of(Identifier)])
     attributes: dict = field(default=attr.Factory(dict))
 
-    def unstrct(self):
+    def unstruct(self):
         return {"id": str(self.uid), **self.attributes}
 
 
@@ -118,14 +118,21 @@ def __getitem__(self, idx):
                 return element
 
     def json(self):
+
         json_dict = {}
+
+        context_keys = [x.id for x in self.context._namespaces]
+        context_vals = [y.uri for y in self.context._namespaces]
+
+        json_dict["@context"] = dict(zip(context_keys, context_vals))
+
         json_dict["prov:Agent"] = [
-            x.unstrct() for x in self.elements if isinstance(x, ProvAgent)
+            x.unstruct() for x in self.elements if isinstance(x, ProvAgent)
         ]
         json_dict["prov:Activity"] = [
-            x.unstrct() for x in self.elements if isinstance(x, ProvActivity)
+            x.unstruct() for x in self.elements if isinstance(x, ProvActivity)
         ]
         json_dict["prov:Entity"] = [
-            x.unstrct() for x in self.elements if isinstance(x, ProvEntity)
+            x.unstruct() for x in self.elements if isinstance(x, ProvEntity)
         ]
         return json_dict
diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py
index 1428b48c7..2b8b822df 100644
--- a/clinica/engine/prov_utils.py
+++ b/clinica/engine/prov_utils.py
@@ -1,3 +1,4 @@
+import json
 from pathlib import Path
 from typing import List, Optional
 from clinica.engine.prov_model import *
@@ -134,13 +135,17 @@ def deserialize_jsonld(path_prov) -> ProvRecord:
     import rdflib
 
     g = rdflib.Graph(identifier="prov_graph_records")
+    built_in_namepsaces = list(g.namespace_manager.namespaces())
     g.parse(path_prov, format="json-ld")
+    json_namespaces = list(g.namespace_manager.namespaces())
+    json_namespaces = list(set(json_namespaces) - set(built_in_namepsaces))
 
     elements = {}
 
     # fetch context:
     context = ProvContext([])
-    for lbl, link in g.namespace_manager.namespaces():
+
+    for lbl, link in json_namespaces:
         namespace = Namespace(lbl, link.n3())
         context._namespaces.append(namespace)
 
diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index 619c08db6..415d79e2d 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -4,6 +4,8 @@
 from pathlib import Path
 from typing import List
 
+from clinica.engine.prov_model import ProvContext
+
 
 def provenance(func):
     @functools.wraps(func)
@@ -14,7 +16,7 @@ def run_wrapper(self, **kwargs):
         pipeline_fullname = self.fullname
 
         create_node_read(self)
-        create_node_update(self, pipeline_args, pipeline_fullname)
+        create_node_update(self)
         create_node_log(self)
 
         connect_nodes(self)
@@ -27,17 +29,22 @@ def run_wrapper(self, **kwargs):
 def connect_nodes(self):
     # fmt: off
 
-    #self.output_node.outputs.get()[self.get_output_fields()[0]]
+    try:
+        output_field = self.get_output_fields()[0]
+        self.connect([(self.output_node, self.prov_log_node, [(self.get_output_fields()[0], "out_file")])])
+    except Exception:
+         self.connect([(self.output_node, self.prov_log_node, [("", "out_file")])])
 
     self.connect(
         [
             (self.input_node, self.prov_input_node, [("t1w", "input_files")]),
             (self.input_node, self.prov_update_node, [("t1w", "input_files")]),
             (self.prov_input_node, self.prov_update_node, [("prov_in_record", "prov_in_record")]),
-            (self.prov_update_node, self.prov_log_node,[("prov_upd_record", "prov_log_record")]),
-            (self.output_node, self.prov_log_node, [(self.get_output_fields()[0], "out_file")]),        
+            (self.prov_update_node, self.prov_log_node,[("prov_upd_record", "prov_log_record")]),      
         ]
     )
+
+    
     return True
     # fmt: on
 
@@ -56,13 +63,13 @@ def create_node_read(self):
     )
 
 
-def create_node_update(self, parameters, fullname):
+def create_node_update(self):
     import nipype.pipeline.engine as npe
     import nipype.interfaces.utility as nutil
 
     self.prov_update_node = npe.Node(
         nutil.Function(
-            input_names=["input_files", "prov_in_record", "parameters", "fullname"],
+            input_names=["input_files", "prov_in_record"],
             output_names=["prov_upd_record"],
             function=update_prov,
         ),
@@ -95,22 +102,20 @@ def read_prov(input_files):
         a ProvRecord for the associated files in path_files
     """
     from clinica.engine.prov_utils import get_path_prov, read_prov_jsonld
-    from clinica.engine.prov_model import ProvRecord
+    from clinica.engine.prov_model import ProvRecord, ProvContext
     from pathlib import Path
 
-    prov_record = ProvRecord({}, [])
+    prov_record = ProvRecord(ProvContext([]), [])
     if isinstance(input_files, list):
         paths_files = [Path(x) for x in input_files]
     elif isinstance(input_files, str):
         paths_files = [Path(input_files)]
 
     for path in paths_files:
-        print("in read_prov, path for input:", path)
         prov_record_tmp = read_prov_jsonld(get_path_prov(path))
         if prov_record_tmp:
-            # TODO extend context as well
+            prov_record.context = prov_record_tmp.context
             prov_record.elements.extend(prov_record_tmp.elements)
-
     return prov_record
 
 
@@ -148,7 +153,7 @@ def update_prov(input_files, prov_in_record):
     new_activity = mint_activity(new_agent, new_entities)
     elements.append(new_activity)
 
-    prov_current = ProvRecord(context={}, elements=elements)
+    prov_current = ProvRecord(prov_in_record.context, elements=elements)
 
     if not validate_command(prov_in_record, prov_current):
         raise ("Invalid commmand")
@@ -167,11 +172,6 @@ def log_prov(prov_log_record, out_file, out_dir):
     elif isinstance(out_file, str):
         out_files_paths = list(Path(out_dir).rglob(out_file))
 
-    print("the file searched:", out_file)
-    print("the folder searched:", out_dir)
-
-    print("out_files_path:", out_files_paths)
-    print("in log prov, prov_record", prov_log_record)
     for path_file in out_files_paths:
         write_prov_file(prov_log_record, path_file)
     print("Provenance registered succesfully")

From 203d84d27bd6fa3b266a5e57b490eef5046c0a46 Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Wed, 2 Mar 2022 15:27:34 +0100
Subject: [PATCH 19/24] specify error type in try catch

---
 clinica/engine/provenance.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index 415d79e2d..8b8a38522 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -18,7 +18,6 @@ def run_wrapper(self, **kwargs):
         create_node_read(self)
         create_node_update(self)
         create_node_log(self)
-
         connect_nodes(self)
 
         return ret
@@ -31,8 +30,8 @@ def connect_nodes(self):
 
     try:
         output_field = self.get_output_fields()[0]
-        self.connect([(self.output_node, self.prov_log_node, [(self.get_output_fields()[0], "out_file")])])
-    except Exception:
+        self.connect([(self.output_node, self.prov_log_node, [(output_field, "out_file")])])
+    except IndexError:
          self.connect([(self.output_node, self.prov_log_node, [("", "out_file")])])
 
     self.connect(
@@ -43,8 +42,6 @@ def connect_nodes(self):
             (self.prov_update_node, self.prov_log_node,[("prov_upd_record", "prov_log_record")]),      
         ]
     )
-
-    
     return True
     # fmt: on
 

From cb31dd1505e345fb02d8cb22d7ea8819156800d4 Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Wed, 2 Mar 2022 15:33:01 +0100
Subject: [PATCH 20/24] Revert changes in clinica/utils/ after architecture
 change

---
 .../spatial_svm_pipeline.py                   |  2 +-
 .../statistics_volume_correction_pipeline.py  |  2 +-
 clinica/utils/input_files.py                  | 79 +++++++++----------
 clinica/utils/inputs.py                       | 35 +++-----
 4 files changed, 51 insertions(+), 67 deletions(-)

diff --git a/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py b/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py
index e630bcbda..79d4c9e17 100644
--- a/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py
+++ b/clinica/pipelines/machine_learning_spatial_svm/spatial_svm_pipeline.py
@@ -93,7 +93,7 @@ def build_input_node(self):
                     "*_T1w_segm-graymatter_space-Ixi549Space_modulated-on_probability.nii.gz",
                 ),
                 "description": "graymatter tissue segmented in T1w MRI in Ixi549 space",
-                "output_from": "t1-volume-tissue-segmentation",
+                "needed_pipeline": "t1-volume-tissue-segmentation",
             }
         elif self.parameters["orig_input_data"] == "pet-volume":
             if not (
diff --git a/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py b/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py
index 3f8dffa94..b9afd5e63 100644
--- a/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py
+++ b/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py
@@ -39,7 +39,7 @@ def build_input_node(self):
             {
                 "pattern": self.parameters["t_map"] + "*",
                 "description": "statistics t map",
-                "output_from": "statistics-volume",
+                "needed_pipeline": "statistics-volume",
             },
         )
 
diff --git a/clinica/utils/input_files.py b/clinica/utils/input_files.py
index 37f852abb..380e2f4c9 100644
--- a/clinica/utils/input_files.py
+++ b/clinica/utils/input_files.py
@@ -7,155 +7,150 @@
 
 # BIDS
 
-T1W_NII = {
-    "pattern": "sub-*_ses-*_t1w.nii*",
-    "description": "T1w MRI",
-    "input_to": ["t1-linear"],
-}
+T1W_NII = {"pattern": "sub-*_ses-*_t1w.nii*", "description": "T1w MRI"}
 
 # T1-FreeSurfer
 
 T1_FS_WM = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/wm.seg.mgz",
     "description": "segmentation of white matter (mri/wm.seg.mgz).",
-    "output_from": "t1-freesurfer",
+    "needed_pipeline": "t1-freesurfer",
 }
 
 T1_FS_BRAIN = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/brain.mgz",
     "description": " extracted brain from T1w MRI (mri/brain.mgz).",
-    "output_from": "t1-freesurfer",
+    "needed_pipeline": "t1-freesurfer",
 }
 
 T1_FS_ORIG_NU = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/orig_nu.mgz",
     "description": "intensity normalized volume generated after correction for"
     " non-uniformity in FreeSurfer (mri/orig_nu.mgz).",
-    "output_from": "t1-freesurfer",
+    "needed_pipeline": "t1-freesurfer",
 }
 
 T1_FS_LONG_ORIG_NU = {
     "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/mri/orig_nu.mgz",
     "description": "intensity normalized volume generated after correction for non-uniformity in FreeSurfer (orig_nu.mgz) in longitudinal",
-    "output_from": "t1-freesurfer and t1-freesurfer longitudinal",
+    "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal",
 }
 
 T1_FS_WM_SURF_R = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/surf/rh.white",
     "description": "right white matter/gray matter border surface (rh.white).",
-    "output_from": "t1-freesurfer",
+    "needed_pipeline": "t1-freesurfer",
 }
 
 T1_FS_LONG_SURF_R = {
     "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/surf/rh.white",
     "description": "right white matter/gray matter border surface (rh.white) generated with t1-freesurfer-longitudinal.",
-    "output_from": "t1-freesurfer and t1-freesurfer longitudinal",
+    "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal",
 }
 
 T1_FS_LONG_SURF_L = {
     "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/surf/lh.white",
     "description": "left white matter/gray matter border surface (lh.white) generated with t1-freesurfer-longitudinal.",
-    "output_from": "t1-freesurfer and t1-freesurfer longitudinal",
+    "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal",
 }
 
 T1_FS_WM_SURF_L = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/surf/lh.white",
     "description": "left white matter/gray matter border surface (lh.white).",
-    "output_from": "t1-freesurfer",
+    "needed_pipeline": "t1-freesurfer",
 }
 
 T1_FS_DESTRIEUX = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/aparc.a2009s+aseg.mgz",
     "description": "Destrieux-based segmentation (mri/aparc.a2009s+aseg.mgz).",
-    "output_from": "t1-freesurfer",
+    "needed_pipeline": "t1-freesurfer",
 }
 
 T1_FS_DESTRIEUX_PARC_L = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/lh.aparc.a2009s.annot",
     "description": "left hemisphere surface-based Destrieux parcellation (label/lh.aparc.a2009s.annot).",
-    "output_from": "t1-freesurfer",
+    "needed_pipeline": "t1-freesurfer",
 }
 
 T1_FS_LONG_DESTRIEUX_PARC_L = {
     "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/lh.aparc.a2009s.annot",
     "description": "left hemisphere surface-based Destrieux parcellation (label/lh.aparc.a2009s.annot) generated with t1-freesurfer-longitudinal.",
-    "output_from": "t1-freesurfer and t1-freesurfer longitudinal",
+    "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal",
 }
 
 T1_FS_LONG_DESTRIEUX_PARC_R = {
     "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/rh.aparc.a2009s.annot",
     "description": "right hemisphere surface-based Destrieux parcellation (label/rh.aparc.a2009s.annot) generated with t1-freesurfer-longitudinal.",
-    "output_from": "t1-freesurfer and t1-freesurfer longitudinal",
+    "needed_pipeline": "t1-freesurfer and t1-freesurfer longitudinal",
 }
 
 T1_FS_DESTRIEUX_PARC_R = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/rh.aparc.a2009s.annot",
     "description": "right hemisphere surface-based Destrieux parcellation (label/rh.aparc.a2009s.annot).",
-    "output_from": "t1-freesurfer",
+    "needed_pipeline": "t1-freesurfer",
 }
 
 T1_FS_DESIKAN = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/mri/aparc+aseg.mgz",
     "description": "Desikan-based segmentation (mri/aparc.a2009s+aseg.mgz).",
-    "output_from": "t1-freesurfer",
+    "needed_pipeline": "t1-freesurfer",
 }
 
 T1_FS_DESIKAN_PARC_L = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/lh.aparc.annot",
     "description": "left hemisphere surface-based Desikan parcellation (label/lh.aparc.annot).",
-    "output_from": "t1-freesurfer",
+    "needed_pipeline": "t1-freesurfer",
 }
 
 T1_FS_DESIKAN_PARC_R = {
     "pattern": "t1/freesurfer_cross_sectional/sub-*_ses-*/label/rh.aparc.annot",
     "description": "right hemisphere surface-based Desikan parcellation (label/rh.aparc.annot).",
-    "output_from": "t1-freesurfer",
+    "needed_pipeline": "t1-freesurfer",
 }
 
 # T1-FreeSurfer-Template
 T1_FS_T_DESTRIEUX = {
     "pattern": "freesurfer_unbiased_template/sub-*_long-*/mri/aparc.a2009s+aseg.mgz",
     "description": "Destrieux-based segmentation (mri/aparc.a2009s+aseg.mgz) from unbiased template.",
-    "output_from": "t1-freesurfer-longitudinal or t1-freesurfer-template",
+    "needed_pipeline": "t1-freesurfer-longitudinal or t1-freesurfer-template",
 }
 
 # T1-FreeSurfer-Longitudinal-Correction
 T1_FS_LONG_DESIKAN_PARC_L = {
     "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/lh.aparc.annot",
     "description": "left hemisphere surface-based Desikan parcellation (label/lh.aparc.annot) generated with t1-freesurfer-longitudinal.",
-    "output_from": "t1-freesurfer and t1-freesurfer-longitudinal",
+    "needed_pipeline": "t1-freesurfer and t1-freesurfer-longitudinal",
 }
 
 T1_FS_LONG_DESIKAN_PARC_R = {
     "pattern": "t1/long-*/freesurfer_longitudinal/sub-*_ses-*.long.sub-*_*/label/rh.aparc.annot",
     "description": "right hemisphere surface-based Desikan parcellation (label/rh.aparc.annot) generated with t1-freesurfer-longitudinal.",
-    "output_from": "t1-freesurfer and t1-freesurfer-longitudinal",
+    "needed_pipeline": "t1-freesurfer and t1-freesurfer-longitudinal",
 }
 
 T1W_LINEAR = {
     "pattern": "*space-MNI152NLin2009cSym_res-1x1x1_T1w.nii.gz",
     "description": "T1w image registered in MNI152NLin2009cSym space using t1-linear pipeline",
-    "output_from": "t1-linear",
-    "input_to": "pet-linear",
+    "needed_pipeline": "t1-linear",
 }
 
 T1W_LINEAR_CROPPED = {
     "pattern": "*space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz",
     "description": "T1W Image registered using t1-linear and cropped "
     "(matrix size 169×208×179, 1 mm isotropic voxels)",
-    "output_from": "t1-linear",
+    "needed_pipeline": "t1-linear",
 }
 
 T1W_EXTENSIVE = {
     "pattern": "*space-Ixi549Space_desc-SkullStripped_T1w.nii.gz",
     "description": "T1w image skull-stripped registered in Ixi549Space space using clinicaDL preprocessing pipeline",
-    "output_from": "t1-extensive",
+    "needed_pipeline": "t1-extensive",
 }
 
 T1W_TO_MNI_TRANSFORM = {
     "pattern": "*space-MNI152NLin2009cSym_res-1x1x1_affine.mat",
     "description": "Transformation matrix from T1W image to MNI space using t1-linear pipeline",
-    "output_from": "t1-linear",
+    "needed_pipeline": "t1-linear",
 }
 
 # T1-Volume
@@ -175,7 +170,7 @@ def t1_volume_native_tpm(tissue_number):
             f"*_*_T1w_segm-{INDEX_TISSUE_MAP[tissue_number]}_probability.nii*",
         ),
         "description": f"Tissue probability map {INDEX_TISSUE_MAP[tissue_number]} in native space",
-        "output_from": "t1-volume-tissue-segmentation",
+        "needed_pipeline": "t1-volume-tissue-segmentation",
     }
     return information
 
@@ -194,7 +189,7 @@ def t1_volume_dartel_input_tissue(tissue_number):
             f"*_*_T1w_segm-{INDEX_TISSUE_MAP[tissue_number]}_dartelinput.nii*",
         ),
         "description": f"Dartel input for tissue probability map {INDEX_TISSUE_MAP[tissue_number]} from T1w MRI",
-        "output_from": "t1-volume-tissue-segmentation",
+        "needed_pipeline": "t1-volume-tissue-segmentation",
     }
     return information
 
@@ -222,7 +217,7 @@ def t1_volume_native_tpm_in_mni(tissue_number, modulation):
             f"Tissue probability map {INDEX_TISSUE_MAP[tissue_number]} based on "
             f"native MRI in MNI space (Ixi549) {description_modulation} modulation."
         ),
-        "output_from": "t1-volume-tissue-segmentation",
+        "needed_pipeline": "t1-volume-tissue-segmentation",
     }
     return information
 
@@ -250,7 +245,7 @@ def t1_volume_template_tpm_in_mni(group_label, tissue_number, modulation):
             f"Tissue probability map {INDEX_TISSUE_MAP[tissue_number]} based "
             f"on {group_label} template in MNI space (Ixi549) {description_modulation} modulation."
         ),
-        "output_from": "t1-volume",
+        "needed_pipeline": "t1-volume",
     }
     return information
 
@@ -267,7 +262,7 @@ def t1_volume_deformation_to_template(group_label):
             f"sub-*_ses-*_T1w_target-{group_label}_transformation-forward_deformation.nii*",
         ),
         "description": f"Deformation from native space to group template {group_label} space.",
-        "output_from": "t1-volume-create-dartel",
+        "needed_pipeline": "t1-volume-create-dartel",
     }
     return information
 
@@ -282,7 +277,7 @@ def t1_volume_i_th_iteration_group_template(group_label, i):
             f"group-{group_label}_iteration-{i}_template.nii*",
         ),
         "description": f"Iteration #{i} of Dartel template {group_label}",
-        "output_from": "t1-volume or t1-volume-create-dartel",
+        "needed_pipeline": "t1-volume or t1-volume-create-dartel",
     }
     return information
 
@@ -295,7 +290,7 @@ def t1_volume_final_group_template(group_label):
             f"group-{group_label}", "t1", f"group-{group_label}_template.nii*"
         ),
         "description": f"T1w template file of group {group_label}",
-        "output_from": "t1-volume or t1-volume-create-dartel",
+        "needed_pipeline": "t1-volume or t1-volume-create-dartel",
     }
     return information
 
@@ -332,25 +327,25 @@ def t1_volume_final_group_template(group_label):
 DWI_PREPROC_NII = {
     "pattern": "dwi/preprocessing/sub-*_ses-*_dwi_space-*_preproc.nii*",
     "description": "preprocessed DWI",
-    "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
+    "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
 }
 
 DWI_PREPROC_BRAINMASK = {
     "pattern": "dwi/preprocessing/sub-*_ses-*_dwi_space-*_brainmask.nii*",
     "description": "b0 brainmask",
-    "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
+    "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
 }
 
 DWI_PREPROC_BVEC = {
     "pattern": "dwi/preprocessing/sub-*_ses-*_dwi_space-*_preproc.bvec",
     "description": "preprocessed bvec",
-    "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
+    "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
 }
 
 DWI_PREPROC_BVAL = {
     "pattern": "dwi/preprocessing/*_dwi_space-*_preproc.bval",
     "description": "preprocessed bval",
-    "output_from": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
+    "needed_pipeline": "dwi-preprocessing-using-t1 or dwi-preprocessing-using-fieldmap",
 }
 
 """ PET """
@@ -416,7 +411,7 @@ def pet_volume_normalized_suvr_pet(
             f"{mask_description} SUVR map (using {suvr_reference_region} region) of {acq_label}-PET "
             f"{pvc_description} and {fwhm_description} in Ixi549Space space based on {group_label} DARTEL template"
         ),
-        "output_from": "pet-volume",
+        "needed_pipeline": "pet-volume",
     }
     return information
 
@@ -438,6 +433,6 @@ def pet_linear_nii(acq_label, suvr_reference_region, uncropped_image):
             f"*_acq-{acq_label}_pet_space-MNI152NLin2009cSym{description}_res-1x1x1_suvr-{suvr_reference_region}_pet.nii.gz",
         ),
         "description": "",
-        "output_from": "pet-linear",
+        "needed_pipeline": "pet-linear",
     }
     return information
diff --git a/clinica/utils/inputs.py b/clinica/utils/inputs.py
index 0d870aba1..45241122d 100644
--- a/clinica/utils/inputs.py
+++ b/clinica/utils/inputs.py
@@ -186,10 +186,10 @@ def clinica_file_reader(
         sessions: list of sessions (must be same size as subjects, and must correspond )
         input_directory: location of the bids or caps directory
         information: dictionary containing all the relevant information to look for the files. Dict must contains the
-                     following keys : pattern, description. The optional key is: output_from
+                     following keys : pattern, description. The optional key is: needed_pipeline
                              pattern: define the pattern of the final file
                              description: string to describe what the file is
-                             output_from (optional): string describing the pipeline(s) needed to obtain the related
+                             needed_pipeline (optional): string describing the pipeline(s) needed to obtain the related
                                                         file
         raise_exception: if True (normal behavior), an exception is raised if errors happen. If not, we return the file
                         list as it is
@@ -215,7 +215,7 @@ def clinica_file_reader(
                                         caps_directory,
                                         {'pattern': 'freesurfer_cross_sectional/sub-*_ses-*/mri/orig_nu.mgz',
                                          'description': 'freesurfer file orig_nu.mgz',
-                                         'output_from': 't1-freesurfer'})
+                                         'needed_pipeline': 't1-freesurfer'})
                     gives: ['/caps/subjects/sub-ADNI011S4105/ses-M00/t1/freesurfer_cross_sectional/sub-ADNI011S4105_ses-M00/mri/orig_nu.mgz']
 
             - You have a partial name of the file:
@@ -236,7 +236,7 @@ def clinica_file_reader(
                                         caps,
                                         {'pattern': 'rh.white',
                                          'description': 'right hemisphere of outter cortical surface.',
-                                         'output_from': 't1-freesurfer'})
+                                         'needed_pipeline': 't1-freesurfer'})
                         the following error will arise:
                         * More than 1 file found::
                             /caps/subjects/sub-ADNI011S4105/ses-M00/t1/freesurfer_cross_sectional/fsaverage/surf/rh.white
@@ -266,9 +266,9 @@ def clinica_file_reader(
         elem in information.keys() for elem in ["pattern", "description"]
     ), "'information' must contain the keys 'pattern' and 'description'"
     assert all(
-        elem in ["pattern", "description", "output_from", "input_to"]
+        elem in ["pattern", "description", "needed_pipeline"]
         for elem in information.keys()
-    ), "'information' can only contain the keys 'pattern', 'description', 'output_from' and 'input_to'"
+    ), "'information' can only contain the keys 'pattern', 'description' and 'needed_pipeline'"
 
     pattern = information["pattern"]
     is_bids = determine_caps_or_bids(input_directory)
@@ -330,18 +330,6 @@ def clinica_file_reader(
     for msg in error_encountered:
         error_message += msg
     if len(error_encountered) > 0 and raise_exception is True:
-        error_message = (
-            f"Clinica encountered {len(error_encountered)} "
-            f"problem(s) while getting {information['description']}:\n"
-        )
-        if "output_from" in information.keys():
-            if information["output_from"]:
-                error_message += (
-                    "Please note that the following clinica pipeline(s) must "
-                    f"have run to obtain these files: {information['output_from']}\n"
-                )
-        for msg in error_encountered:
-            error_message += msg
         if is_bids:
             raise ClinicaBIDSError(error_message)
         else:
@@ -409,10 +397,10 @@ def clinica_group_reader(caps_directory, information, raise_exception=True):
     Args:
         caps_directory: input caps directory
         information: dictionary containing all the relevant information to look for the files. Dict must contains the
-                     following keys : pattern, description, output_from
+                     following keys : pattern, description, needed_pipeline
                              pattern: define the pattern of the final file
                              description: string to describe what the file is
-                             output_from (optional): string describing the pipeline needed to obtain the file beforehand
+                             needed_pipeline (optional): string describing the pipeline needed to obtain the file beforehand
         raise_exception: if True (normal behavior), an exception is raised if errors happen. If not, we return the file
                         list as it is
 
@@ -430,8 +418,9 @@ def clinica_group_reader(caps_directory, information, raise_exception=True):
         information, dict
     ), "A dict must be provided for the argument 'dict'"
     assert all(
-        elem in information.keys() for elem in ["pattern", "description", "output_from"]
-    ), "'information' must contain the keys 'pattern', 'description', 'output_from'"
+        elem in information.keys()
+        for elem in ["pattern", "description", "needed_pipeline"]
+    ), "'information' must contain the keys 'pattern', 'description', 'needed_pipeline'"
 
     pattern = information["pattern"]
     # Some check on the formatting on the data
@@ -457,7 +446,7 @@ def clinica_group_reader(caps_directory, information, raise_exception=True):
             error_string += (
                 f"\n\tCAPS directory: {caps_directory}\n"
                 "Please note that the following clinica pipeline(s) must have run to obtain these files: "
-                f"{information['output_from']}\n"
+                f"{information['needed_pipeline']}\n"
             )
         raise ClinicaCAPSError(error_string)
     return current_glob_found[0]

From 468721174a48d88458ab2a05ad2fea941d274cb7 Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Wed, 2 Mar 2022 17:53:53 +0100
Subject: [PATCH 21/24] Homogenize the funcs get_input_fields()
 get_output_fields()

---
 clinica/pipelines/pet_surface/pet_surface_pipeline.py           | 2 +-
 .../pipelines/statistics_surface/statistics_surface_pipeline.py | 2 +-
 .../statistics_volume_correction_pipeline.py                    | 2 +-
 .../t1_volume_parcellation/t1_volume_parcellation_pipeline.py   | 1 +
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/clinica/pipelines/pet_surface/pet_surface_pipeline.py b/clinica/pipelines/pet_surface/pet_surface_pipeline.py
index 9fe61d2a3..da77b98b4 100644
--- a/clinica/pipelines/pet_surface/pet_surface_pipeline.py
+++ b/clinica/pipelines/pet_surface/pet_surface_pipeline.py
@@ -41,7 +41,7 @@ def get_input_fields(self):
 
     def get_output_fields(self):
         """Specify the list of possible outputs of this pipeline."""
-        return []
+        return [""]
 
     def build_input_node(self):
         """Build and connect an input node to the pipeline."""
diff --git a/clinica/pipelines/statistics_surface/statistics_surface_pipeline.py b/clinica/pipelines/statistics_surface/statistics_surface_pipeline.py
index 8a79cb9da..790b12020 100644
--- a/clinica/pipelines/statistics_surface/statistics_surface_pipeline.py
+++ b/clinica/pipelines/statistics_surface/statistics_surface_pipeline.py
@@ -74,7 +74,7 @@ def get_input_fields(self):
         Returns:
             A list of (string) input fields name.
         """
-        return []
+        return [""]
 
     def get_output_fields(self):
         """Specify the list of possible outputs of this pipeline.
diff --git a/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py b/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py
index b9afd5e63..3ad08980e 100644
--- a/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py
+++ b/clinica/pipelines/statistics_volume_correction/statistics_volume_correction_pipeline.py
@@ -25,7 +25,7 @@ def get_output_fields(self):
         Returns:
             A list of (string) output fields name.
         """
-        return []
+        return [""]
 
     def build_input_node(self):
         """Build and connect an input node to the pipeline."""
diff --git a/clinica/pipelines/t1_volume_parcellation/t1_volume_parcellation_pipeline.py b/clinica/pipelines/t1_volume_parcellation/t1_volume_parcellation_pipeline.py
index 0ff7193f6..32f847b31 100644
--- a/clinica/pipelines/t1_volume_parcellation/t1_volume_parcellation_pipeline.py
+++ b/clinica/pipelines/t1_volume_parcellation/t1_volume_parcellation_pipeline.py
@@ -36,6 +36,7 @@ def get_output_fields(self):
         Returns:
             A list of (string) output fields name.
         """
+        return [""]
 
     def build_input_node(self):
         """Build and connect an input node to the pipeline."""

From ddc82c824b92219836a7efa7ec2239a2a959e9b0 Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Thu, 3 Mar 2022 14:49:09 +0100
Subject: [PATCH 22/24] Remove unused imports

---
 clinica/engine/prov_model.py |  5 +----
 clinica/engine/prov_utils.py | 12 ++++++++++--
 clinica/engine/provenance.py | 28 +++++++++++-----------------
 3 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/clinica/engine/prov_model.py b/clinica/engine/prov_model.py
index 6efeebd4c..eb178d44d 100644
--- a/clinica/engine/prov_model.py
+++ b/clinica/engine/prov_model.py
@@ -1,11 +1,8 @@
 from abc import ABC, abstractmethod
-from typing import List, Union
-from xml.dom.minidom import Element
+from typing import List
 
 import attr
-import cattr
 from attr import define, field
-from matplotlib.style import context
 
 #  Define PROV abstract concepts
 
diff --git a/clinica/engine/prov_utils.py b/clinica/engine/prov_utils.py
index 2b8b822df..47945faea 100644
--- a/clinica/engine/prov_utils.py
+++ b/clinica/engine/prov_utils.py
@@ -1,7 +1,15 @@
-import json
 from pathlib import Path
 from typing import List, Optional
-from clinica.engine.prov_model import *
+
+from clinica.engine.prov_model import (
+    Identifier,
+    Namespace,
+    ProvActivity,
+    ProvAgent,
+    ProvContext,
+    ProvEntity,
+    ProvRecord,
+)
 
 
 def mint_agent() -> ProvAgent:
diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index 8b8a38522..922d95306 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -1,20 +1,11 @@
 import functools
 
-from os import read
-from pathlib import Path
-from typing import List
-
-from clinica.engine.prov_model import ProvContext
-
 
 def provenance(func):
     @functools.wraps(func)
     def run_wrapper(self, **kwargs):
         ret = func(self)
 
-        pipeline_args = self.parameters
-        pipeline_fullname = self.fullname
-
         create_node_read(self)
         create_node_update(self)
         create_node_log(self)
@@ -47,8 +38,8 @@ def connect_nodes(self):
 
 
 def create_node_read(self):
-    import nipype.pipeline.engine as npe
     import nipype.interfaces.utility as nutil
+    import nipype.pipeline.engine as npe
 
     self.prov_input_node = npe.Node(
         nutil.Function(
@@ -61,8 +52,8 @@ def create_node_read(self):
 
 
 def create_node_update(self):
-    import nipype.pipeline.engine as npe
     import nipype.interfaces.utility as nutil
+    import nipype.pipeline.engine as npe
 
     self.prov_update_node = npe.Node(
         nutil.Function(
@@ -77,8 +68,8 @@ def create_node_update(self):
 
 
 def create_node_log(self):
-    import nipype.pipeline.engine as npe
     import nipype.interfaces.utility as nutil
+    import nipype.pipeline.engine as npe
 
     self.prov_log_node = npe.Node(
         nutil.Function(
@@ -98,10 +89,11 @@ def read_prov(input_files):
     return:
         a ProvRecord for the associated files in path_files
     """
-    from clinica.engine.prov_utils import get_path_prov, read_prov_jsonld
-    from clinica.engine.prov_model import ProvRecord, ProvContext
     from pathlib import Path
 
+    from clinica.engine.prov_model import ProvContext, ProvRecord
+    from clinica.engine.prov_utils import get_path_prov, read_prov_jsonld
+
     prov_record = ProvRecord(ProvContext([]), [])
     if isinstance(input_files, list):
         paths_files = [Path(x) for x in input_files]
@@ -123,14 +115,15 @@ def update_prov(input_files, prov_in_record):
     return:
         ProvRecord associated with the launched pipeline
     """
+    from pathlib import Path
+
+    from clinica.engine.prov_model import ProvRecord
     from clinica.engine.prov_utils import (
         mint_activity,
         mint_agent,
         mint_entity,
         validate_command,
     )
-    from pathlib import Path
-    from clinica.engine.prov_model import ProvRecord
 
     elements = []
     new_agent = mint_agent()
@@ -158,9 +151,10 @@ def update_prov(input_files, prov_in_record):
 
 
 def log_prov(prov_log_record, out_file, out_dir):
-    from clinica.engine.prov_utils import write_prov_file
     from pathlib import Path
 
+    from clinica.engine.prov_utils import write_prov_file
+
     out_file = out_file + "*"
     out_files_paths = []
     if isinstance(out_file, list):

From 54b3ffe1f42ea8d35b2da12a0c1ba908684ba8a1 Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Thu, 3 Mar 2022 14:59:05 +0100
Subject: [PATCH 23/24] Lint __init__ file in engine

---
 clinica/engine/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clinica/engine/__init__.py b/clinica/engine/__init__.py
index c9c7623f4..66d9fb818 100644
--- a/clinica/engine/__init__.py
+++ b/clinica/engine/__init__.py
@@ -1,4 +1,5 @@
-from .cmdparser import CmdParser
 from nipype import config
 
+from .cmdparser import CmdParser
+
 config.enable_debug_mode()

From 07204cc0b47a7c1a3f75042585b96d489d8fd918 Mon Sep 17 00:00:00 2001
From: "omar.elrifai" <omar.void@gmail.com>
Date: Mon, 14 Mar 2022 16:37:14 +0100
Subject: [PATCH 24/24] Update fields returned in pet-linear for prov
 compatibility

---
 clinica/engine/provenance.py                        | 5 +++--
 clinica/pipelines/pet_linear/pet_linear_pipeline.py | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/clinica/engine/provenance.py b/clinica/engine/provenance.py
index 922d95306..7559dae72 100644
--- a/clinica/engine/provenance.py
+++ b/clinica/engine/provenance.py
@@ -155,13 +155,14 @@ def log_prov(prov_log_record, out_file, out_dir):
 
     from clinica.engine.prov_utils import write_prov_file
 
-    out_file = out_file + "*"
     out_files_paths = []
+
     if isinstance(out_file, list):
         for x in out_file:
             out_files_paths.extend(list(Path(out_dir).rglob(x)))
     elif isinstance(out_file, str):
-        out_files_paths = list(Path(out_dir).rglob(out_file))
+
+        out_files_paths = list(Path(out_dir).rglob(Path(out_file).name))
 
     for path_file in out_files_paths:
         write_prov_file(prov_log_record, path_file)
diff --git a/clinica/pipelines/pet_linear/pet_linear_pipeline.py b/clinica/pipelines/pet_linear/pet_linear_pipeline.py
index dd828dda2..448f08d32 100644
--- a/clinica/pipelines/pet_linear/pet_linear_pipeline.py
+++ b/clinica/pipelines/pet_linear/pet_linear_pipeline.py
@@ -43,9 +43,9 @@ def get_output_fields(self):
             A list of (string) output fields name.
         """
         return [
-            "registered_pet",
-            "transform_mat",
-            "registered_pet_in_t1w",
+            "suvr_pet",
+            "affine_mat",
+            "PETinT1w",
         ]  # Fill here the list
 
     def build_input_node(self):