From 1328668fc50a10f75f07aacf1bd519386e992d9a Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Tue, 11 Feb 2025 11:46:30 -0600 Subject: [PATCH] Add streaming example for NWB files from DANDI using Zarr --- docs/gallery/advanced_io/plot_zarr_io.py | 43 ++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/docs/gallery/advanced_io/plot_zarr_io.py b/docs/gallery/advanced_io/plot_zarr_io.py index b61fe4a03..59a412c3d 100644 --- a/docs/gallery/advanced_io/plot_zarr_io.py +++ b/docs/gallery/advanced_io/plot_zarr_io.py @@ -93,6 +93,49 @@ with NWBZarrIO(path=absolute_path, mode="r") as io: read_nwbfile = io.read() +####################################################################################### +# Streaming from DANDI +# ------------------- +# One of the advantages of Zarr is its ability to efficiently stream data from cloud storage. +# Here's how to stream NWB files stored in Zarr format from the DANDI archive: + +from dandi.dandiapi import DandiAPIClient +import zarr +import s3fs + +# Initialize DANDI client and get a dandiset +client = DandiAPIClient() +dandiset = client.get_dandiset("000001", "draft") # Replace with your dandiset ID + +# Get an asset's S3 URL +asset = next(dandiset.get_assets()) +s3_url = asset.get_content_url(follow_redirects=1, strip_query=True) + +# Set up S3 access +fs = s3fs.S3FileSystem(anon=True) +store = zarr.S3Store(s3_url, client_kwargs={'S3': {'anon': True}}) + +# Stream data efficiently +with NWBZarrIO(path=store, mode='r') as io: + nwbfile = io.read() + + # Example: Access specific chunks of data + # This only loads the requested timepoints into memory + if 'neural_data' in nwbfile.acquisition: + chunk = nwbfile.acquisition['neural_data'].data[:1000] + + # Process data in chunks for memory efficiency + chunk_size = 1000 + if 'neural_data' in nwbfile.acquisition: + total_size = len(nwbfile.acquisition['neural_data'].data) + for i in range(0, total_size, chunk_size): + chunk = nwbfile.acquisition['neural_data'].data[i:i+chunk_size] + # Process your chunk here + chunk_mean = chunk.mean() + +# Clean up S3 filesystem cache +fs.clear_instance_cache() + ####################################################################################### # .. note:: # For more information, see the :hdmf-zarr:`hdmf-zarr documentation<>`.