From f9c37d1b183fc3e3e2cf7607942245a828e130ac Mon Sep 17 00:00:00 2001 From: bpinsard Date: Wed, 21 Feb 2024 10:17:39 -0500 Subject: [PATCH] filter mark_sensitive based on save output --- heudiconv/external/dlad.py | 38 +++++++++++++-------------- heudiconv/external/tests/test_dlad.py | 8 +++--- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/heudiconv/external/dlad.py b/heudiconv/external/dlad.py index 5e1b305a..f0f30ec3 100644 --- a/heudiconv/external/dlad.py +++ b/heudiconv/external/dlad.py @@ -146,23 +146,27 @@ def add_to_datalad( message="Added gitattributes to place all .heudiconv content" " under annex", ) - ds.save( + save_res = ds.save( ".", recursive=True # not in effect! ? # annex_add_opts=['--include-dotfiles'] ) + annexed_files = [sr['path'] for sr in save_res if sr['key']] # Provide metadata for sensitive information - last_commit = "HEAD" - mark_sensitive(ds, "sourcedata", last_commit) - mark_sensitive(ds, "*_scans.tsv", last_commit) # top level - mark_sensitive(ds, "*/*_scans.tsv", last_commit) # within subj - mark_sensitive(ds, "*/*/*_scans.tsv", last_commit) # within sess/subj - mark_sensitive(ds, "*/anat", last_commit) # within subj - mark_sensitive(ds, "*/*/anat", last_commit) # within ses/subj + sensitive_patterns = [ + "sourcedata", + "*_scans.tsv", # top level + "*/*_scans.tsv", # within subj + "*/*/*_scans.tsv", # within sess/subj + "*/anat", # within subj + "*/*/anat", # within ses/subj + ] + for sp in sensitive_patterns: + mark_sensitive(ds, sp, annexed_files) if dsh_path: - mark_sensitive(ds, ".heudiconv", last_commit) # entire .heudiconv! + mark_sensitive(ds, ".heudiconv") # entire .heudiconv! superds.save(path=ds.path, message=msg, recursive=True) assert not ds.repo.dirty @@ -178,7 +182,7 @@ def add_to_datalad( """ -def mark_sensitive(ds: Dataset, path_glob: str, commit: str = None) -> None: +def mark_sensitive(ds: Dataset, path_glob: str, files: list[str] = None) -> None: """ Parameters @@ -186,22 +190,16 @@ def mark_sensitive(ds: Dataset, path_glob: str, commit: str = None) -> None: ds : Dataset to operate on path_glob : str glob of the paths within dataset to work on - commit : str - commit which files to mark + files : list[str] + subset of files to mark Returns ------- None """ paths = glob(op.join(ds.path, path_glob)) - if commit: - paths_in_commit = [ - op.join(ds.path, nf) - for nf in ds.repo.call_git( - ["show", "--name-only", commit, "--format=oneline"] - ).split("\n")[1:] - ] - paths = [p for p in paths if p in paths_in_commit] + if files: + paths = [p for p in paths if p in files] if not paths: return lgr.debug("Marking %d files with distribution-restrictions field", len(paths)) diff --git a/heudiconv/external/tests/test_dlad.py b/heudiconv/external/tests/test_dlad.py index 5900311a..ad335015 100644 --- a/heudiconv/external/tests/test_dlad.py +++ b/heudiconv/external/tests/test_dlad.py @@ -29,8 +29,7 @@ def test_mark_sensitive(tmp_path: Path) -> None: assert not all_meta.pop("g1", None) # nothing or empty record assert all_meta == {"f1": target_rec, "f2": target_rec, "g2": target_rec} - -def test_mark_sensitive_last_commit(tmp_path: Path) -> None: +def test_mark_sensitive_subset(tmp_path: Path) -> None: ds = dl.Dataset(tmp_path).create(force=True) create_tree( str(tmp_path), @@ -42,9 +41,10 @@ def test_mark_sensitive_last_commit(tmp_path: Path) -> None: }, ) ds.save(".") - mark_sensitive(ds, "f*", "HEAD") + mark_sensitive(ds, "f*", [str(tmp_path/'f1')]) all_meta = dict(ds.repo.get_metadata(".")) target_rec = {"distribution-restrictions": ["sensitive"]} # g2 since the same content assert not all_meta.pop("g1", None) # nothing or empty record - assert all_meta == {"f1": target_rec, "f2": target_rec, "g2": target_rec} + assert not all_meta.pop("f2", None) # nothing or empty record + assert all_meta == {"f1": target_rec, "g2": target_rec}