From 7f4fef0488edd5fbb8c98b9b664434ef563e9b96 Mon Sep 17 00:00:00 2001 From: Sam Bray Date: Fri, 22 Dec 2023 10:19:17 -0800 Subject: [PATCH 1/4] Add detail to sharing notebook --- notebooks/02_Data_Sync.ipynb | 835 ++++++++++++++++++++++++--- notebooks/py_scripts/02_Data_Sync.py | 120 +++- 2 files changed, 858 insertions(+), 97 deletions(-) diff --git a/notebooks/02_Data_Sync.ipynb b/notebooks/02_Data_Sync.ipynb index defae02b0..ac65faa29 100644 --- a/notebooks/02_Data_Sync.ipynb +++ b/notebooks/02_Data_Sync.ipynb @@ -53,20 +53,21 @@ " inserts, see\n", " [these additional tutorials](https://github.com/datajoint/datajoint-tutorials)\n", "\n", - "Let's start by importing the `spyglass` package.\n" + "Let's start by importing the `spyglass` package and testing that your environent\n", + " is properly congigured for kachery sharing\n" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "[2023-09-28 09:39:48,974][INFO]: Connecting root@localhost:3307\n", - "[2023-09-28 09:39:49,050][INFO]: Connected root@localhost:3307\n" + "[2023-12-22 08:22:32,189][INFO]: Connecting sambray@lmf-db.cin.ucsf.edu:3306\n", + "[2023-12-22 08:22:32,244][INFO]: Connected sambray@lmf-db.cin.ucsf.edu:3306\n" ] } ], @@ -86,7 +87,16 @@ "\n", "import warnings\n", "\n", - "warnings.filterwarnings(\"ignore\")" + "warnings.filterwarnings(\"ignore\")\n", + "\n", + "\n", + "env_vars = os.environ\n", + "# check that base dir is defined\n", + "assert (\n", + " \"SPYGLASS_BASE_DIR\" in env_vars\n", + "), \"SPYGLASS_BASE_DIR not set. Please set in your .bashrc or .bash_profile\"\n", + "# check that analysis dir is correctly defined relative to base dir\n", + "assert dj.config[\"stores\"][\"analysis\"][\"location\"]" ] }, { @@ -154,7 +164,7 @@ "1. Try to load from the local file system/store. \n", "2. If unavailable, check if it is in the relevant sharing table (i.e., \n", " `NwbKachery` or `AnalysisNWBKachery`).\n", - "3. If present, attempt to download from the associated Kachery Resource.\n", + "3. If present, attempt to download from the associated Kachery Resource to the user's spyglass analysis directory.\n", "\n", "_Note:_ large file downloads may take a long time, so downloading raw data is\n", "not supported. We suggest direct transfer with\n", @@ -180,7 +190,8 @@ "3. `franklab.public`: Public file sharing (not yet active)\n", "\n", "Setting your zone can either be done as as an environment variable or an item \n", - "in a DataJoint config.\n", + "in a DataJoint config. Spyglass will automatically handle setting the appropriate zone when downloading\n", + "database files through kachery\n", "\n", "- Environment variable:\n", "\n", @@ -195,7 +206,7 @@ " \"custom\": {\n", " \"kachery_zone\": \"franklab.default\",\n", " \"kachery_dirs\": {\n", - " \"cloud\": \"/your/base/path/.kachery_cloud\"\n", + " \"cloud\": \"/your/base/path/.kachery-cloud\"\n", " }\n", " }\n", " ```" @@ -205,7 +216,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Host Setup" + "## Host Setup\n", + "\n", + "- If you are a team member who will be sharing data through a pre-existing database and zone, please skip to `Sharing Data`\n", + "\n", + "- If you are on a client machine and need to access files shared with you, please skip to `Accessing Shared Data`" ] }, { @@ -250,11 +265,11 @@ "suggest using the same name for the zone and resource.\n", "\n", "_Note:_ For each zone, you need to run the local daemon that listens for\n", - "requests from that zone. An example of the bash script we use is\n", + "requests from that zone and uploads data to the bucket for client download when requested. An example of the bash script we use is\n", "\n", "```bash\n", " export KACHERY_ZONE=franklab.collaborators\n", - " export KACHERY_CLOUD_DIR=/stelmo/nwb/.kachery_cloud\n", + " export KACHERY_CLOUD_DIR=/stelmo/nwb/.kachery-cloud\n", " cd /stelmo/nwb/franklab_collaborators_resource\n", " npx kachery-resource@latest share\n", "```" @@ -272,13 +287,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We'll add zones/resources to the Spyglass database. First, we'll check existing\n", - "Zones." + "Once you have a hosted zone running, we need to add it's information to the Spyglass database. \n", + "This will allow spyglass to manage linking files from our analysis tables to kachery.\n", + "First, we'll check existing Zones." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -355,20 +371,29 @@ "

lab_name

\n", " \n", " \n", - " \n", + " franklab.collaborators\n", + "franklab collaborator zone\n", + "/stelmo/nwb/.kachery-cloud\n", + "https://kachery-resource-proxy.herokuapp.com\n", + "Loren Frankfranklab.default\n", + "internal franklab kachery zone\n", + "/stelmo/nwb/.kachery-cloud\n", + "https://kachery-resource-proxy.herokuapp.com\n", + "Loren Frank \n", " \n", " \n", - "

Total: 0

\n", + "

Total: 2

\n", " " ], "text/plain": [ - "*kachery_zone_ description kachery_cloud_ kachery_proxy lab_name \n", - "+------------+ +------------+ +------------+ +------------+ +----------+\n", - "\n", - " (Total: 0)" + "*kachery_zone_ description kachery_cloud_ kachery_proxy lab_name \n", + "+------------+ +------------+ +------------+ +------------+ +------------+\n", + "franklab.colla franklab colla /stelmo/nwb/.k https://kacher Loren Frank \n", + "franklab.defau internal frank /stelmo/nwb/.k https://kacher Loren Frank \n", + " (Total: 2)" ] }, - "execution_count": 3, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -378,15 +403,71 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "Check existing file list:" + "To add a new hosted Zone, we need to prepare an entry for the `KacheryZone` table. \n", + "Note that the `kacherycloud_dir` key should be the path for the server daemon _hosting_ the zone,\n", + " and is not required to be present on the client machine:" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "zone_name = config.get(\"KACHERY_ZONE\")\n", + "cloud_dir = config.get(\"KACHERY_CLOUD_DIR\")\n", + "\n", + "zone_key = {\n", + " \"kachery_zone_name\": zone_name,\n", + " \"description\": \" \".join(zone_name.split(\".\")) + \" zone\",\n", + " \"kachery_cloud_dir\": cloud_dir,\n", + " \"kachery_proxy\": \"https://kachery-resource-proxy.herokuapp.com\",\n", + " \"lab_name\": sgc.Lab.fetch(\"lab_name\", limit=1)[0],\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use caution when inserting into an active database, as it could interfere with\n", + "ongoing work." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "sgs.KacheryZone().insert1(zone_key)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sharing Data" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once the zone exists, we can add `AnalysisNWB` files we want to share with members of the zone.\n", + "\n", + "The `AnalysisNwbFileKachery` table links analysis files made within other spyglass tables with a `uri` \n", + "used by kachery. We can view files already made available through kachery here:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -457,20 +538,56 @@ "

analysis_file_uri

\n", " the uri of the file\n", " \n", - " \n", + " franklab.collaborators\n", + "Banner20220224_18NJSA2B42.nwb\n", + "sha1://562b488936e5288eb89e7c480ae5c10b31c9cf2ffranklab.collaborators\n", + "Frodo20230810_0F936W4B9Z.nwb\n", + "sha1://b38d2b0fc1e9cde91cc239e1a0b50e3211b976fcfranklab.collaborators\n", + "Frodo20230810_2MJ374GSJX.nwb\n", + "sha1://ca9c238b83fd8539658a5100a9770a459a539771franklab.collaborators\n", + "Frodo20230810_4L35OWMGHQ.nwb\n", + "sha1://a8452cf8cf6e596b44569eb9189612d2dcd4c7d6franklab.collaborators\n", + "Frodo20230810_63PWL1N0VS.nwb\n", + "sha1://ca9c238b83fd8539658a5100a9770a459a539771franklab.collaborators\n", + "Frodo20230810_7LYW2MK0C9.nwb\n", + "sha1://ca9c238b83fd8539658a5100a9770a459a539771franklab.collaborators\n", + "Frodo20230810_998JNA1VBF.nwb\n", + "sha1://aa0e06028d52f5195cf24d61922ace233d8da783franklab.collaborators\n", + "Frodo20230810_CFKWZTGXX0.nwb\n", + "sha1://ca9c238b83fd8539658a5100a9770a459a539771franklab.collaborators\n", + "Frodo20230810_GMCOCDSJ54.nwb\n", + "sha1://2889b68d7aa2b30561e62be519c19759facad2d3franklab.collaborators\n", + "Frodo20230810_I25NQSZQ5O.nwb\n", + "sha1://973ea71d97aef91e050117bf860ea2ed83950b10franklab.collaborators\n", + "Frodo20230810_JS06HC1RLC.nwb\n", + "sha1://088a345c5eadfa3adea021de3f158aa86a527d4efranklab.collaborators\n", + "Frodo20230810_KEEEEBDUNE.nwb\n", + "sha1://4aa3199011b1405e745bbe96b62b825cd93bdacd \n", " \n", - " \n", - "

Total: 0

\n", + "

...

\n", + "

Total: 298

\n", " " ], "text/plain": [ "*kachery_zone_ *analysis_file analysis_file_\n", "+------------+ +------------+ +------------+\n", - "\n", - " (Total: 0)" + "franklab.colla Banner20220224 sha1://562b488\n", + "franklab.colla Frodo20230810_ sha1://b38d2b0\n", + "franklab.colla Frodo20230810_ sha1://ca9c238\n", + "franklab.colla Frodo20230810_ sha1://a8452cf\n", + "franklab.colla Frodo20230810_ sha1://ca9c238\n", + "franklab.colla Frodo20230810_ sha1://ca9c238\n", + "franklab.colla Frodo20230810_ sha1://aa0e060\n", + "franklab.colla Frodo20230810_ sha1://ca9c238\n", + "franklab.colla Frodo20230810_ sha1://2889b68\n", + "franklab.colla Frodo20230810_ sha1://973ea71\n", + "franklab.colla Frodo20230810_ sha1://088a345\n", + "franklab.colla Frodo20230810_ sha1://4aa3199\n", + " ...\n", + " (Total: 298)" ] }, - "execution_count": 4, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -480,62 +597,12 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "Prepare an entry for the `KacheryZone` table:" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "zone_name = config.get(\"KACHERY_ZONE\")\n", - "cloud_dir = config.get(\"KACHERY_CLOUD_DIR\")\n", + "We can share additional results by populating new entries in this table.\n", "\n", - "zone_key = {\n", - " \"kachery_zone_name\": zone_name,\n", - " \"description\": \" \".join(zone_name.split(\".\")) + \" zone\",\n", - " \"kachery_cloud_dir\": cloud_dir,\n", - " \"kachery_proxy\": \"https://kachery-resource-proxy.herokuapp.com\",\n", - " \"lab_name\": sgc.Lab.fetch(\"lab_name\", limit=1)[0],\n", - "}" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Use caution when inserting into an active database, as it could interfere with\n", - "ongoing work." - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "sgs.KacheryZone().insert1(zone_key)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data Setup" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Once the zone exists, we can add `AnalysisNWB` files we want to share by adding\n", - "entries to the `AnalysisNwbfileKacherySelection` table.\n", + "To do so we first add these entries to the `AnalysisNwbfileKacherySelection` table.\n", "\n", "_Note:_ This step depends on having previously run an analysis on the example \n", "file." @@ -584,6 +651,42 @@ "sgs.AnalysisNwbfileKachery.populate()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternatively, we can share data based on it's source table in the database using the helper function `share_data_to_kachery()` \n", + "\n", + "This will take a list of tables and add all associated analysis files for entries corresponding with a passed restriction. \n", + "Here, we are sharing LFP and position data for the Session \"minirec20230622_.nwb\"" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from spyglass.sharing import share_data_to_kachery\n", + "from spyglass.lfp.v1 import LFPV1\n", + "from spyglass.position.v1 import TrodesPosV1\n", + "\n", + "tables = [LFPV1, TrodesPosV1]\n", + "restriction = {\"nwb_file_name\": \"minirec20230622_.nwb\"}\n", + "share_data_to_kachery(\n", + " table_list=tables,\n", + " restriction=restriction,\n", + " zone_name=\"franklab.collaborators\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Managing access" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -619,6 +722,576 @@ "```" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Accessing Shared Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you are a collaborator accessing datasets, you first need to be given access to the zone by a collaborator admin (see above).\n", + "\n", + "If you know the uri for the dataset you are accessing you can test this process below (example is for members of `franklab.collaborators`)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kachery_cloud as kcl\n", + "\n", + "path = \"/path/to/save/file/to/test\"\n", + "zone_name = \"franklab.collaborators\"\n", + "uri = \"sha1://ceac0c1995580dfdda98d6aa45b7dda72d63afe4\"\n", + "\n", + "os.environ[\"KACHERY_ZONE\"] = zone_name\n", + "kcl.load_file(uri=uri, dest=path, verbose=True)\n", + "assert os.path.exists(path), f\"File not downloaded to {path}\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In normal use, spyglass will manage setting the zone and uri when accessing files.\n", + "In general, the easiest way to access data valueswill be through the `fetch1_dataframe()`\n", + "function part of many of the spyglass tables. In brief this will check for the appropriate\n", + "nwb analysis file in your local directory, and if not found, attempt to download it from the appropriate kachery zone.\n", + "It will then parse the relevant information from that nwb file into a pandas dataframe. \n", + "\n", + "We will look at an example with data from the `LFPV1` table:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

nwb_file_name

\n", + " name of the NWB file\n", + "
\n", + "

lfp_electrode_group_name

\n", + " the name of this group of electrodes\n", + "
\n", + "

target_interval_list_name

\n", + " descriptive name of this interval list\n", + "
\n", + "

filter_name

\n", + " descriptive name of this filter\n", + "
\n", + "

filter_sampling_rate

\n", + " sampling rate for this filter\n", + "
\n", + "

analysis_file_name

\n", + " name of the file\n", + "
\n", + "

interval_list_name

\n", + " descriptive name of this interval list\n", + "
\n", + "

lfp_object_id

\n", + " the NWB object ID for loading this object from the file\n", + "
\n", + "

lfp_sampling_rate

\n", + " the sampling rate, in HZ\n", + "
Winnie20220713_.nwbtetrode_sample_Winniepos 0 valid timesLFP 0-400 Hz30000Winnie20220713_C52XDICU6D.nwblfp_tetrode_sample_Winnie_pos 0 valid times_valid timesa89c590f-290b-4f9c-a568-b9ae67eee96d1000.0
\n", + " \n", + "

Total: 1

\n", + " " + ], + "text/plain": [ + "*nwb_file_name *lfp_electrode *target_interv *filter_name *filter_sampli analysis_file_ interval_list_ lfp_object_id lfp_sampling_r\n", + "+------------+ +------------+ +------------+ +------------+ +------------+ +------------+ +------------+ +------------+ +------------+\n", + "Winnie20220713 tetrode_sample pos 0 valid ti LFP 0-400 Hz 30000 Winnie20220713 lfp_tetrode_sa a89c590f-290b- 1000.0 \n", + " (Total: 1)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from spyglass.lfp.v1 import LFPV1\n", + "\n", + "# Here is the data we are going to access\n", + "LFPV1 & {\n", + " \"nwb_file_name\": \"Winnie20220713_.nwb\",\n", + " \"target_interval_list_name\": \"pos 0 valid times\",\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can access the data using `fetch1_dataframe()`" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...18192021222324252627
time
1.657741e+09-90-65-104-89-31-68-27-26-32-92...-91-99-87-117-123-85-73-74-6213
1.657741e+09-202-145-227-220-57-130-84-68-30-191...-168-199-176-250-238-172-158-140-12754
1.657741e+09-218-150-224-216-84-154-84-93-29-206...-125-153-158-219-206-137-132-129-12069
1.657741e+09-226-151-240-230-97-144-71-95-38-236...-105-136-149-183-210-111-83-129-92116
1.657741e+09-235-154-250-231-54-91-81-89-30-247...-85-107-116-140-190-68-28-114-36193
..................................................................
1.657742e+09-3-27-629-227-442-16725-15...-83-217-61-248-196-63-111-211-52166
1.657742e+0944194482-175-40713956238...3-11232-177-12322-5-14754285
1.657742e+09946392129-121-341611328888...62-28104-99-538261-62125347
1.657742e+09142107135179-106-37088178120148...11348199-447145108-13213453
1.657742e+091088495130-82-2815213473105...9746169-162211894-3175348
\n", + "

901529 rows × 28 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... 18 19 \\\n", + "time ... \n", + "1.657741e+09 -90 -65 -104 -89 -31 -68 -27 -26 -32 -92 ... -91 -99 \n", + "1.657741e+09 -202 -145 -227 -220 -57 -130 -84 -68 -30 -191 ... -168 -199 \n", + "1.657741e+09 -218 -150 -224 -216 -84 -154 -84 -93 -29 -206 ... -125 -153 \n", + "1.657741e+09 -226 -151 -240 -230 -97 -144 -71 -95 -38 -236 ... -105 -136 \n", + "1.657741e+09 -235 -154 -250 -231 -54 -91 -81 -89 -30 -247 ... -85 -107 \n", + "... ... ... ... ... ... ... .. ... ... ... ... ... ... \n", + "1.657742e+09 -3 -27 -6 29 -227 -442 -1 67 25 -15 ... -83 -217 \n", + "1.657742e+09 44 19 44 82 -175 -407 13 95 62 38 ... 3 -112 \n", + "1.657742e+09 94 63 92 129 -121 -341 61 132 88 88 ... 62 -28 \n", + "1.657742e+09 142 107 135 179 -106 -370 88 178 120 148 ... 113 48 \n", + "1.657742e+09 108 84 95 130 -82 -281 52 134 73 105 ... 97 46 \n", + "\n", + " 20 21 22 23 24 25 26 27 \n", + "time \n", + "1.657741e+09 -87 -117 -123 -85 -73 -74 -62 13 \n", + "1.657741e+09 -176 -250 -238 -172 -158 -140 -127 54 \n", + "1.657741e+09 -158 -219 -206 -137 -132 -129 -120 69 \n", + "1.657741e+09 -149 -183 -210 -111 -83 -129 -92 116 \n", + "1.657741e+09 -116 -140 -190 -68 -28 -114 -36 193 \n", + "... ... ... ... ... ... ... ... ... \n", + "1.657742e+09 -61 -248 -196 -63 -111 -211 -52 166 \n", + "1.657742e+09 32 -177 -123 22 -5 -147 54 285 \n", + "1.657742e+09 104 -99 -53 82 61 -62 125 347 \n", + "1.657742e+09 199 -44 7 145 108 -13 213 453 \n", + "1.657742e+09 169 -16 22 118 94 -3 175 348 \n", + "\n", + "[901529 rows x 28 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(\n", + " LFPV1\n", + " & {\n", + " \"nwb_file_name\": \"Winnie20220713_.nwb\",\n", + " \"target_interval_list_name\": \"pos 0 valid times\",\n", + " }\n", + ").fetch1_dataframe()" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/notebooks/py_scripts/02_Data_Sync.py b/notebooks/py_scripts/02_Data_Sync.py index 06473e567..ee18ffafd 100644 --- a/notebooks/py_scripts/02_Data_Sync.py +++ b/notebooks/py_scripts/02_Data_Sync.py @@ -45,7 +45,8 @@ # inserts, see # [these additional tutorials](https://github.com/datajoint/datajoint-tutorials) # -# Let's start by importing the `spyglass` package. +# Let's start by importing the `spyglass` package and testing that your environent +# is properly congigured for kachery sharing # # + @@ -65,6 +66,15 @@ import warnings warnings.filterwarnings("ignore") + + +env_vars = os.environ +# check that base dir is defined +assert ( + "SPYGLASS_BASE_DIR" in env_vars +), "SPYGLASS_BASE_DIR not set. Please set in your .bashrc or .bash_profile" +# check that analysis dir is correctly defined relative to base dir +assert dj.config["stores"]["analysis"]["location"] # - # For example analysis files, run the code hidden below. @@ -115,7 +125,7 @@ # 1. Try to load from the local file system/store. # 2. If unavailable, check if it is in the relevant sharing table (i.e., # `NwbKachery` or `AnalysisNWBKachery`). -# 3. If present, attempt to download from the associated Kachery Resource. +# 3. If present, attempt to download from the associated Kachery Resource to the user's spyglass analysis directory. # # _Note:_ large file downloads may take a long time, so downloading raw data is # not supported. We suggest direct transfer with @@ -133,7 +143,8 @@ # 3. `franklab.public`: Public file sharing (not yet active) # # Setting your zone can either be done as as an environment variable or an item -# in a DataJoint config. +# in a DataJoint config. Spyglass will automatically handle setting the appropriate zone when downloading +# database files through kachery # # - Environment variable: # @@ -148,12 +159,16 @@ # "custom": { # "kachery_zone": "franklab.default", # "kachery_dirs": { -# "cloud": "/your/base/path/.kachery_cloud" +# "cloud": "/your/base/path/.kachery-cloud" # } # } # ``` # ## Host Setup +# +# - If you are a team member who will be sharing data through a pre-existing database and zone, please skip to `Sharing Data` +# +# - If you are on a client machine and need to access files shared with you, please skip to `Accessing Shared Data` # ### Zones # @@ -177,11 +192,11 @@ # suggest using the same name for the zone and resource. # # _Note:_ For each zone, you need to run the local daemon that listens for -# requests from that zone. An example of the bash script we use is +# requests from that zone and uploads data to the bucket for client download when requested. An example of the bash script we use is # # ```bash # export KACHERY_ZONE=franklab.collaborators -# export KACHERY_CLOUD_DIR=/stelmo/nwb/.kachery_cloud +# export KACHERY_CLOUD_DIR=/stelmo/nwb/.kachery-cloud # cd /stelmo/nwb/franklab_collaborators_resource # npx kachery-resource@latest share # ``` @@ -189,16 +204,15 @@ # ## Database Setup # -# We'll add zones/resources to the Spyglass database. First, we'll check existing -# Zones. +# Once you have a hosted zone running, we need to add it's information to the Spyglass database. +# This will allow spyglass to manage linking files from our analysis tables to kachery. +# First, we'll check existing Zones. sgs.KacheryZone() -# Check existing file list: - -sgs.AnalysisNwbfileKachery() - -# Prepare an entry for the `KacheryZone` table: +# To add a new hosted Zone, we need to prepare an entry for the `KacheryZone` table. +# Note that the `kacherycloud_dir` key should be the path for the server daemon _hosting_ the zone, +# and is not required to be present on the client machine: # + zone_name = config.get("KACHERY_ZONE") @@ -218,10 +232,18 @@ sgs.KacheryZone().insert1(zone_key) -# ## Data Setup +# ## Sharing Data -# Once the zone exists, we can add `AnalysisNWB` files we want to share by adding -# entries to the `AnalysisNwbfileKacherySelection` table. +# Once the zone exists, we can add `AnalysisNWB` files we want to share with members of the zone. +# +# The `AnalysisNwbFileKachery` table links analysis files made within other spyglass tables with a `uri` +# used by kachery. We can view files already made available through kachery here: + +sgs.AnalysisNwbfileKachery() + +# We can share additional results by populating new entries in this table. +# +# To do so we first add these entries to the `AnalysisNwbfileKacherySelection` table. # # _Note:_ This step depends on having previously run an analysis on the example # file. @@ -247,6 +269,28 @@ sgs.AnalysisNwbfileKachery.populate() +# Alternatively, we can share data based on it's source table in the database using the helper function `share_data_to_kachery()` +# +# This will take a list of tables and add all associated analysis files for entries corresponding with a passed restriction. +# Here, we are sharing LFP and position data for the Session "minirec20230622_.nwb" + +# + +from spyglass.sharing import share_data_to_kachery +from spyglass.lfp.v1 import LFPV1 +from spyglass.position.v1 import TrodesPosV1 + +tables = [LFPV1, TrodesPosV1] +restriction = {"nwb_file_name": "minirec20230622_.nwb"} +share_data_to_kachery( + table_list=tables, + restriction=restriction, + zone_name="franklab.collaborators", +) + +# - + +# ## Managing access + # + [markdown] jupyter={"outputs_hidden": true} # If all of that worked, # @@ -272,6 +316,50 @@ # ``` # - +# ## Accessing Shared Data + +# If you are a collaborator accessing datasets, you first need to be given access to the zone by a collaborator admin (see above). +# +# If you know the uri for the dataset you are accessing you can test this process below (example is for members of `franklab.collaborators`) + +# + +import kachery_cloud as kcl + +path = "/path/to/save/file/to/test" +zone_name = "franklab.collaborators" +uri = "sha1://ceac0c1995580dfdda98d6aa45b7dda72d63afe4" + +os.environ["KACHERY_ZONE"] = zone_name +kcl.load_file(uri=uri, dest=path, verbose=True) +assert os.path.exists(path), f"File not downloaded to {path}" +# - + +# In normal use, spyglass will manage setting the zone and uri when accessing files. +# In general, the easiest way to access data valueswill be through the `fetch1_dataframe()` +# function part of many of the spyglass tables. In brief this will check for the appropriate +# nwb analysis file in your local directory, and if not found, attempt to download it from the appropriate kachery zone. +# It will then parse the relevant information from that nwb file into a pandas dataframe. +# +# We will look at an example with data from the `LFPV1` table: + +from spyglass.lfp.v1 import LFPV1 + +# Here is the data we are going to access +LFPV1 & { + "nwb_file_name": "Winnie20220713_.nwb", + "target_interval_list_name": "pos 0 valid times", +} + +# We can access the data using `fetch1_dataframe()` + +( + LFPV1 + & { + "nwb_file_name": "Winnie20220713_.nwb", + "target_interval_list_name": "pos 0 valid times", + } +).fetch1_dataframe() + # # Up Next # In the [next notebook](./03_Merge_Tables.ipynb), we'll explore the details of a From 86ab651fa8e9313165a7b888f1d5526f73edd517 Mon Sep 17 00:00:00 2001 From: Sam Bray Date: Fri, 22 Dec 2023 10:36:55 -0800 Subject: [PATCH 2/4] Fix spelling --- notebooks/02_Data_Sync.ipynb | 4 ++-- notebooks/py_scripts/02_Data_Sync.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/notebooks/02_Data_Sync.ipynb b/notebooks/02_Data_Sync.ipynb index ac65faa29..4a3f20423 100644 --- a/notebooks/02_Data_Sync.ipynb +++ b/notebooks/02_Data_Sync.ipynb @@ -53,8 +53,8 @@ " inserts, see\n", " [these additional tutorials](https://github.com/datajoint/datajoint-tutorials)\n", "\n", - "Let's start by importing the `spyglass` package and testing that your environent\n", - " is properly congigured for kachery sharing\n" + "Let's start by importing the `spyglass` package and testing that your environment\n", + " is properly configured for kachery sharing\n" ] }, { diff --git a/notebooks/py_scripts/02_Data_Sync.py b/notebooks/py_scripts/02_Data_Sync.py index ee18ffafd..60297729f 100644 --- a/notebooks/py_scripts/02_Data_Sync.py +++ b/notebooks/py_scripts/02_Data_Sync.py @@ -45,8 +45,8 @@ # inserts, see # [these additional tutorials](https://github.com/datajoint/datajoint-tutorials) # -# Let's start by importing the `spyglass` package and testing that your environent -# is properly congigured for kachery sharing +# Let's start by importing the `spyglass` package and testing that your environment +# is properly configured for kachery sharing # # + @@ -286,7 +286,6 @@ restriction=restriction, zone_name="franklab.collaborators", ) - # - # ## Managing access @@ -342,6 +341,7 @@ # # We will look at an example with data from the `LFPV1` table: +# + from spyglass.lfp.v1 import LFPV1 # Here is the data we are going to access @@ -349,6 +349,7 @@ "nwb_file_name": "Winnie20220713_.nwb", "target_interval_list_name": "pos 0 valid times", } +# - # We can access the data using `fetch1_dataframe()` From 9c54b5751c7fa43e36c0a6248755d6eb2c7fc707 Mon Sep 17 00:00:00 2001 From: Sam Bray Date: Wed, 27 Dec 2023 09:19:27 -0800 Subject: [PATCH 3/4] minor sharing notebook edits --- notebooks/02_Data_Sync.ipynb | 34 +++++++++++++--------------- notebooks/py_scripts/02_Data_Sync.py | 25 +++++++++----------- 2 files changed, 27 insertions(+), 32 deletions(-) diff --git a/notebooks/02_Data_Sync.ipynb b/notebooks/02_Data_Sync.ipynb index 4a3f20423..74f01a46b 100644 --- a/notebooks/02_Data_Sync.ipynb +++ b/notebooks/02_Data_Sync.ipynb @@ -54,12 +54,14 @@ " [these additional tutorials](https://github.com/datajoint/datajoint-tutorials)\n", "\n", "Let's start by importing the `spyglass` package and testing that your environment\n", - " is properly configured for kachery sharing\n" + " is properly configured for kachery sharing\n", + "\n", + "If you haven't already done so, be sure to set up your Spyglass base directory and Kachery sharing directory with [Setup](./00_Setup.ipynb)" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -87,16 +89,7 @@ "\n", "import warnings\n", "\n", - "warnings.filterwarnings(\"ignore\")\n", - "\n", - "\n", - "env_vars = os.environ\n", - "# check that base dir is defined\n", - "assert (\n", - " \"SPYGLASS_BASE_DIR\" in env_vars\n", - "), \"SPYGLASS_BASE_DIR not set. Please set in your .bashrc or .bash_profile\"\n", - "# check that analysis dir is correctly defined relative to base dir\n", - "assert dj.config[\"stores\"][\"analysis\"][\"location\"]" + "warnings.filterwarnings(\"ignore\")" ] }, { @@ -197,7 +190,7 @@ "\n", " ```bash\n", " export KACHERY_ZONE=franklab.default\n", - " export KACHERY_CLOUD_DIR=/stelmo/nwb/.kachery_cloud\n", + " export KACHERY_CLOUD_DIR=/stelmo/nwb/.kachery-cloud\n", " ```\n", "\n", "- DataJoint Config:\n", @@ -218,9 +211,9 @@ "source": [ "## Host Setup\n", "\n", - "- If you are a team member who will be sharing data through a pre-existing database and zone, please skip to `Sharing Data`\n", + "- If you are a member of a team with a pre-existing database and zone who will be sharing data, please skip to `Sharing Data`\n", "\n", - "- If you are on a client machine and need to access files shared with you, please skip to `Accessing Shared Data`" + "- If you are a collaborator outside your team's network and need to access files shared with you, please skip to `Accessing Shared Data`" ] }, { @@ -272,7 +265,12 @@ " export KACHERY_CLOUD_DIR=/stelmo/nwb/.kachery-cloud\n", " cd /stelmo/nwb/franklab_collaborators_resource\n", " npx kachery-resource@latest share\n", - "```" + "```\n", + "\n", + "For convenience, we recommend saving this code as a bash script which can be executed by the local daemon. For franklab member, these scripts can be found in the directory `/home/loren/bin/`:\n", + "\n", + "- run_restart_kachery_collab.sh\n", + "- run_restart_kachery_default.sh" ] }, { @@ -287,7 +285,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Once you have a hosted zone running, we need to add it's information to the Spyglass database. \n", + "Once you have a hosted zone running, we need to add its information to the Spyglass database. \n", "This will allow spyglass to manage linking files from our analysis tables to kachery.\n", "First, we'll check existing Zones." ] @@ -655,7 +653,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Alternatively, we can share data based on it's source table in the database using the helper function `share_data_to_kachery()` \n", + "Alternatively, we can share data based on its source table in the database using the helper function `share_data_to_kachery()` \n", "\n", "This will take a list of tables and add all associated analysis files for entries corresponding with a passed restriction. \n", "Here, we are sharing LFP and position data for the Session \"minirec20230622_.nwb\"" diff --git a/notebooks/py_scripts/02_Data_Sync.py b/notebooks/py_scripts/02_Data_Sync.py index 60297729f..a56c951ef 100644 --- a/notebooks/py_scripts/02_Data_Sync.py +++ b/notebooks/py_scripts/02_Data_Sync.py @@ -48,6 +48,7 @@ # Let's start by importing the `spyglass` package and testing that your environment # is properly configured for kachery sharing # +# If you haven't already done so, be sure to set up your Spyglass base directory and Kachery sharing directory with [Setup](./00_Setup.ipynb) # + import os @@ -66,15 +67,6 @@ import warnings warnings.filterwarnings("ignore") - - -env_vars = os.environ -# check that base dir is defined -assert ( - "SPYGLASS_BASE_DIR" in env_vars -), "SPYGLASS_BASE_DIR not set. Please set in your .bashrc or .bash_profile" -# check that analysis dir is correctly defined relative to base dir -assert dj.config["stores"]["analysis"]["location"] # - # For example analysis files, run the code hidden below. @@ -150,7 +142,7 @@ # # ```bash # export KACHERY_ZONE=franklab.default -# export KACHERY_CLOUD_DIR=/stelmo/nwb/.kachery_cloud +# export KACHERY_CLOUD_DIR=/stelmo/nwb/.kachery-cloud # ``` # # - DataJoint Config: @@ -166,9 +158,9 @@ # ## Host Setup # -# - If you are a team member who will be sharing data through a pre-existing database and zone, please skip to `Sharing Data` +# - If you are a member of a team with a pre-existing database and zone who will be sharing data, please skip to `Sharing Data` # -# - If you are on a client machine and need to access files shared with you, please skip to `Accessing Shared Data` +# - If you are a collaborator outside your team's network and need to access files shared with you, please skip to `Accessing Shared Data` # ### Zones # @@ -200,11 +192,16 @@ # cd /stelmo/nwb/franklab_collaborators_resource # npx kachery-resource@latest share # ``` +# +# For convenience, we recommend saving this code as a bash script which can be executed by the local daemon. For franklab member, these scripts can be found in the directory `/home/loren/bin/`: +# +# - run_restart_kachery_collab.sh +# - run_restart_kachery_default.sh # ## Database Setup # -# Once you have a hosted zone running, we need to add it's information to the Spyglass database. +# Once you have a hosted zone running, we need to add its information to the Spyglass database. # This will allow spyglass to manage linking files from our analysis tables to kachery. # First, we'll check existing Zones. @@ -269,7 +266,7 @@ sgs.AnalysisNwbfileKachery.populate() -# Alternatively, we can share data based on it's source table in the database using the helper function `share_data_to_kachery()` +# Alternatively, we can share data based on its source table in the database using the helper function `share_data_to_kachery()` # # This will take a list of tables and add all associated analysis files for entries corresponding with a passed restriction. # Here, we are sharing LFP and position data for the Session "minirec20230622_.nwb" From 357cb1b22d8c19b9e11bbb1273fb0e979fa97aa9 Mon Sep 17 00:00:00 2001 From: Sam Bray Date: Wed, 27 Dec 2023 10:19:16 -0800 Subject: [PATCH 4/4] change kachery dir in example dj config --- dj_local_conf_example.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dj_local_conf_example.json b/dj_local_conf_example.json index 63efcae68..437d77577 100644 --- a/dj_local_conf_example.json +++ b/dj_local_conf_example.json @@ -32,7 +32,7 @@ "base": "/your/path/like/stelmo/nwb/" }, "kachery_dirs": { - "cloud": "/your/path/.kachery_cloud" + "cloud": "/your/path/.kachery-cloud" }, "dlc_dirs": { "base": "/your/path/like/nimbus/deeplabcut/"