diff --git a/element_interface/dandi.py b/element_interface/dandi.py index 01553f4..ef9fd13 100644 --- a/element_interface/dandi.py +++ b/element_interface/dandi.py @@ -1,7 +1,6 @@ import os import subprocess -from dandi.download import download from dandi.upload import upload @@ -13,6 +12,8 @@ def upload_to_dandi( api_key: str = None, sync: bool = False, existing: str = "refresh", + validation: str = "required", + shell=True, # without this param, subprocess interprets first arg as file/dir ): """Upload NWB files to DANDI Archive @@ -27,6 +28,7 @@ def upload_to_dandi( sync (str, optional): If True, delete all files in archive that are not present in the local directory. existing (str, optional): see full description from `dandi upload --help` + validation (str, optional): [require|skip|ignore] see full description from `dandi upload --help` """ working_directory = working_directory or os.path.curdir @@ -38,29 +40,46 @@ def upload_to_dandi( working_directory, str(dandiset_id) ) # enforce str - dandiset_url = f"https://gui-staging.dandiarchive.org/#/dandiset/{dandiset_id}" if staging else f"https://dandiarchive.org/dandiset/{dandiset_id}/draft" - - subprocess.run( - ["dandi", "download", "--download", "dandiset.yaml", "-o", working_directory, dandiset_url], - shell=True, + dandiset_url = ( + f"https://gui-staging.dandiarchive.org/#/dandiset/{dandiset_id}" + if staging + else f"https://dandiarchive.org/dandiset/{dandiset_id}/draft" ) subprocess.run( - ["dandi", "organize", "-d", dandiset_directory, data_directory, "-f", "dry"], - shell=True, # without this param, subprocess interprets first arg as file/dir + [ + "dandi", + "download", + "--download", + "dandiset.yaml", + "-o", + working_directory, + dandiset_url, + ], + shell=shell, ) subprocess.run( - ["dandi", "organize", "-d", dandiset_directory, data_directory], shell=True + [ + "dandi", + "organize", + "-d", + dandiset_directory, + data_directory, + "--required-field", + "subject_id", + "--required-field", + "session_id", + ], + shell=shell, ) - subprocess.run( - ["dandi", "validate", dandiset_directory], shell=True - ) + subprocess.run(["dandi", "validate", dandiset_directory], shell=shell) upload( paths=[dandiset_directory], dandi_instance="dandi-staging" if staging else "dandi", existing=existing, sync=sync, + validation=validation, ) diff --git a/element_interface/prairie_view_loader.py b/element_interface/prairie_view_loader.py index 841e87a..ee306cd 100644 --- a/element_interface/prairie_view_loader.py +++ b/element_interface/prairie_view_loader.py @@ -1,65 +1,90 @@ import pathlib +from pathlib import Path import xml.etree.ElementTree as ET from datetime import datetime - import numpy as np -def get_prairieview_metadata(ome_tif_filepath: str) -> dict: - """Extract metadata for scans generated by Prairie View acquisition software. +class PrairieViewMeta: - The Prairie View software generates one `.ome.tif` imaging file per frame - acquired. The metadata for all frames is contained in one .xml file. This - function locates the .xml file and generates a dictionary necessary to - populate the DataJoint `ScanInfo` and `Field` tables. Prairie View works - with resonance scanners with a single field. Prairie View does not support - bidirectional x and y scanning. ROI information is not contained in the - `.xml` file. All images generated using Prairie View have square dimensions(e.g. 512x512). + def __init__(self, prairieview_dir: str): + """Initialize PrairieViewMeta loader class - Args: - ome_tif_filepath: An absolute path to the .ome.tif image file. + Args: + prairieview_dir (str): string, absolute file path to directory containing PrairieView dataset + """ + # ---- Search and verify CaImAn output file exists ---- + # May return multiple xml files. Only need one that contains scan metadata. + self.prairieview_dir = Path(prairieview_dir) - Raises: - FileNotFoundError: No .xml file containing information about the acquired scan - was found at path in parent directory at `ome_tif_filepath`. + for file in self.prairieview_dir.glob("*.xml"): + xml_tree = ET.parse(file) + xml_root = xml_tree.getroot() + if xml_root.find(".//Sequence"): + self.xml_file = file + self._xml_root = xml_root + break + else: + raise FileNotFoundError( + f"No PrarieView metadata .xml file found at {prairieview_dir}" + ) - Returns: - metainfo: A dict mapping keys to corresponding metadata values fetched from the - .xml file. - """ + self._meta = None - # May return multiple xml files. Only need one that contains scan metadata. - xml_files_list = pathlib.Path(ome_tif_filepath).parent.glob("*.xml") + @property + def meta(self): + if self._meta is None: + self._meta = _extract_prairieview_metadata(self.xml_file) + return self._meta - for file in xml_files_list: - xml_tree = ET.parse(file) - xml_file = xml_tree.getroot() - if xml_file.find(".//Sequence"): - break - else: - raise FileNotFoundError( - f"No PrarieView metadata .xml file found at {pathlib.Path(ome_tif_filepath).parent}" - ) + def get_prairieview_files(self, plane_idx=None, channel=None): + if plane_idx is None: + if self.meta['num_planes'] > 1: + raise ValueError(f"Please specify 'plane_idx' - Plane indices: {self.meta['plane_indices']}") + else: + plane_idx = self.meta['plane_indices'][0] + else: + assert plane_idx in self.meta['plane_indices'], f"Invalid 'plane_idx' - Plane indices: {self.meta['plane_indices']}" + + if channel is None: + if self.meta['num_channels'] > 1: + raise ValueError(f"Please specify 'channel' - Channels: {self.meta['channels']}") + else: + plane_idx = self.meta['channels'][0] + else: + assert channel in self.meta['channels'], f"Invalid 'channel' - Channels: {self.meta['channels']}" + + frames = self._xml_root.findall(f".//Sequence/Frame/[@index='{plane_idx}']/File/[@channel='{channel}']") + return [f.attrib['filename'] for f in frames] + + +def _extract_prairieview_metadata(xml_filepath: str): + xml_filepath = Path(xml_filepath) + if not xml_filepath.exists(): + raise FileNotFoundError(f"{xml_filepath} does not exist") + xml_tree = ET.parse(xml_filepath) + xml_root = xml_tree.getroot() bidirectional_scan = False # Does not support bidirectional roi = 0 n_fields = 1 # Always contains 1 field - recording_start_time = xml_file.find(".//Sequence/[@cycle='1']").attrib.get("time") + recording_start_time = xml_root.find(".//Sequence/[@cycle='1']").attrib.get("time") # Get all channels and find unique values channel_list = [ int(channel.attrib.get("channel")) - for channel in xml_file.iterfind(".//Sequence/Frame/File/[@channel]") + for channel in xml_root.iterfind(".//Sequence/Frame/File/[@channel]") ] - n_channels = len(set(channel_list)) - n_frames = len(xml_file.findall(".//Sequence/Frame")) + channels = set(channel_list) + n_channels = len(channels) + n_frames = len(xml_root.findall(".//Sequence/Frame")) framerate = 1 / float( - xml_file.findall('.//PVStateValue/[@key="framePeriod"]')[0].attrib.get("value") + xml_root.findall('.//PVStateValue/[@key="framePeriod"]')[0].attrib.get("value") ) # rate = 1/framePeriod usec_per_line = ( float( - xml_file.findall(".//PVStateValue/[@key='scanLinePeriod']")[0].attrib.get( + xml_root.findall(".//PVStateValue/[@key='scanLinePeriod']")[0].attrib.get( "value" ) ) @@ -67,15 +92,15 @@ def get_prairieview_metadata(ome_tif_filepath: str) -> dict: ) # Convert from seconds to microseconds scan_datetime = datetime.strptime( - xml_file.attrib.get("date"), "%m/%d/%Y %I:%M:%S %p" + xml_root.attrib.get("date"), "%m/%d/%Y %I:%M:%S %p" ) total_scan_duration = float( - xml_file.findall(".//Sequence/Frame")[-1].attrib.get("relativeTime") + xml_root.findall(".//Sequence/Frame")[-1].attrib.get("relativeTime") ) pixel_height = int( - xml_file.findall(".//PVStateValue/[@key='pixelsPerLine']")[0].attrib.get( + xml_root.findall(".//PVStateValue/[@key='pixelsPerLine']")[0].attrib.get( "value" ) ) @@ -83,7 +108,7 @@ def get_prairieview_metadata(ome_tif_filepath: str) -> dict: pixel_width = pixel_height um_per_pixel = float( - xml_file.find( + xml_root.find( ".//PVStateValue/[@key='micronsPerPixel']/IndexedValue/[@index='XAxis']" ).attrib.get("value") ) @@ -92,43 +117,45 @@ def get_prairieview_metadata(ome_tif_filepath: str) -> dict: # x and y coordinate values for the center of the field x_field = float( - xml_file.find( + xml_root.find( ".//PVStateValue/[@key='currentScanCenter']/IndexedValue/[@index='XAxis']" ).attrib.get("value") ) y_field = float( - xml_file.find( + xml_root.find( ".//PVStateValue/[@key='currentScanCenter']/IndexedValue/[@index='YAxis']" ).attrib.get("value") ) + if ( - xml_file.find( + xml_root.find( ".//Sequence/[@cycle='1']/Frame/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']" ) is None ): z_fields = np.float64( - xml_file.find( + xml_root.find( ".//PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/SubindexedValue" ).attrib.get("value") ) n_depths = 1 + plane_indices = {0} assert z_fields.size == n_depths bidirection_z = False - else: bidirection_z = ( - xml_file.find(".//Sequence").attrib.get("bidirectionalZ") == "True" + xml_root.find(".//Sequence").attrib.get("bidirectionalZ") == "True" ) # One "Frame" per depth in the .xml file. Gets number of frames in first sequence planes = [ int(plane.attrib.get("index")) - for plane in xml_file.findall(".//Sequence/[@cycle='1']/Frame") + for plane in xml_root.findall(".//Sequence/[@cycle='1']/Frame") ] - n_depths = len(set(planes)) + plane_indices = set(planes) + n_depths = len(plane_indices) - z_controllers = xml_file.findall( + z_controllers = xml_root.findall( ".//Sequence/[@cycle='1']/Frame/[@index='1']/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/SubindexedValue" ) @@ -137,13 +164,13 @@ def get_prairieview_metadata(ome_tif_filepath: str) -> dict: # must change depths. if len(z_controllers) > 1: z_repeats = [] - for controller in xml_file.findall( + for controller in xml_root.findall( ".//Sequence/[@cycle='1']/Frame/[@index='1']/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/" ): z_repeats.append( [ float(z.attrib.get("value")) - for z in xml_file.findall( + for z in xml_root.findall( ".//Sequence/[@cycle='1']/Frame/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/SubindexedValue/[@subindex='{0}']".format( controller.attrib.get("subindex") ) @@ -163,7 +190,7 @@ def get_prairieview_metadata(ome_tif_filepath: str) -> dict: else: z_fields = [ z.attrib.get("value") - for z in xml_file.findall( + for z in xml_root.findall( ".//Sequence/[@cycle='1']/Frame/PVStateShard/PVStateValue/[@key='positionCurrent']/SubindexedValues/[@index='ZAxis']/SubindexedValue/[@subindex='0']" ) ] @@ -195,6 +222,47 @@ def get_prairieview_metadata(ome_tif_filepath: str) -> dict: fieldY=y_field, fieldZ=z_fields, recording_time=recording_start_time, + channels=list(channels), + plane_indices=list(plane_indices), ) return metainfo + + +def get_prairieview_metadata(ome_tif_filepath: str) -> dict: + """Extract metadata for scans generated by Prairie View acquisition software. + + The Prairie View software generates one `.ome.tif` imaging file per frame + acquired. The metadata for all frames is contained in one .xml file. This + function locates the .xml file and generates a dictionary necessary to + populate the DataJoint `ScanInfo` and `Field` tables. Prairie View works + with resonance scanners with a single field. Prairie View does not support + bidirectional x and y scanning. ROI information is not contained in the + `.xml` file. All images generated using Prairie View have square dimensions(e.g. 512x512). + + Args: + ome_tif_filepath: An absolute path to the .ome.tif image file. + + Raises: + FileNotFoundError: No .xml file containing information about the acquired scan + was found at path in parent directory at `ome_tif_filepath`. + + Returns: + metainfo: A dict mapping keys to corresponding metadata values fetched from the + .xml file. + """ + + # May return multiple xml files. Only need one that contains scan metadata. + xml_files_list = pathlib.Path(ome_tif_filepath).parent.glob("*.xml") + + for file in xml_files_list: + xml_tree = ET.parse(file) + xml_file = xml_tree.getroot() + if xml_file.find(".//Sequence"): + break + else: + raise FileNotFoundError( + f"No PrarieView metadata .xml file found at {pathlib.Path(ome_tif_filepath).parent}" + ) + + return _extract_prairieview_metadata(file) diff --git a/element_interface/utils.py b/element_interface/utils.py index 14d4eee..c3832f4 100644 --- a/element_interface/utils.py +++ b/element_interface/utils.py @@ -5,7 +5,9 @@ import pathlib import sys import uuid - +import json +import pickle +from datetime import datetime from datajoint.utils import to_camel_case logger = logging.getLogger("datajoint") @@ -187,3 +189,68 @@ def __exit__(self, *args): logger.setLevel(self.prev_log_level) sys.stdout.close() sys.stdout = self._original_stdout + + +def memoized_result(parameters: dict, output_directory: str): + """ + This is a decorator factory designed to cache the results of a function based on its input parameters and the state of the output directory. + If the function is called with the same parameters and the output files in the directory remain unchanged, + it returns the cached results; otherwise, it executes the function and caches the new results along with metadata. + Conditions for robust usage: + - the "output_directory" is to store exclusively the resulting files generated by this function call only, not a shared space with other functions/processes + - the "parameters" passed to the decorator captures the true and uniqueness of the arguments to be used in the decorated function call + Args: + parameters: parameters that would identify a unique function call + output_directory: directory location for the output files + + Returns: a decorator to enable a function call to memoize/cached the resulting files + """ + + def decorator(func): + def wrapped(*args, **kwargs): + output_dir = _to_Path(output_directory) + input_hash = dict_to_uuid(parameters) + input_hash_fp = output_dir / f".{input_hash}.json" + # check if results already exist (from previous identical run) + output_dir_files_hash = dict_to_uuid( + { + f.relative_to(output_dir).as_posix(): f.stat().st_size + for f in output_dir.rglob("*") + if f.name != f".{input_hash}.json" + } + ) + if input_hash_fp.exists(): + with open(input_hash_fp, "r") as f: + meta = json.load(f) + if str(output_dir_files_hash) == meta["output_dir_files_hash"]: + logger.info(f"Existing results found, skip '{func.__name__}'") + with open(output_dir / f".{input_hash}_results.pickle", "rb") as f: + results = pickle.load(f) + return results + # no results - trigger the run + logger.info(f"No existing results found, calling '{func.__name__}'") + start_time = datetime.utcnow() + results = func(*args, **kwargs) + + with open(output_dir / f".{input_hash}_results.pickle", "wb") as f: + pickle.dump(results, f, protocol=pickle.HIGHEST_PROTOCOL) + + meta = { + "output_dir_files_hash": dict_to_uuid( + { + f.relative_to(output_dir).as_posix(): f.stat().st_size + for f in output_dir.rglob("*") + if f.name != f".{input_hash}.json" + } + ), + "start_time": start_time, + "completion_time": datetime.utcnow(), + } + with open(input_hash_fp, "w") as f: + json.dump(meta, f, default=str) + + return results + + return wrapped + + return decorator diff --git a/requirements.txt b/requirements.txt index 75d95e8..b18b774 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -dandi +dandi>=0.56.0 numpy