Skip to content

Commit

Permalink
No-credential data download
Browse files Browse the repository at this point in the history
  • Loading branch information
CBroz1 committed Nov 7, 2024
1 parent 0c5a903 commit 8f780fb
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 81 deletions.
31 changes: 17 additions & 14 deletions .github/workflows/test-conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ on:
- '!documentation'
schedule: # once a day at midnight UTC
- cron: '0 0 * * *'
pull_request_review:
- types: [submitted]
workflow_dispatch: # Manually trigger with 'Run workflow' button

concurrency: # Replace Cancel Workflow Action
Expand All @@ -15,15 +17,16 @@ concurrency: # Replace Cancel Workflow Action

jobs:
run-tests:
if: | # If not PR OR is approved PR.
github.event_name != 'pull_request_review'
|| github.event.review.state == 'approved'
runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}
env:
OS: ubuntu-latest
PYTHON: '3.9'
UCSF_BOX_TOKEN: ${{ secrets.UCSF_BOX_TOKEN }} # for download and testing
UCSF_BOX_USER: ${{ secrets.UCSF_BOX_USER }}
services:
mysql:
image: datajoint/mysql:8.0
Expand Down Expand Up @@ -57,23 +60,23 @@ jobs:
pip install --quiet .[test]
- name: Download data
env:
BASEURL: ftps://ftp.box.com/trodes_to_nwb_test_data/
NWBFILE: minirec20230622.nwb # Relative to Base URL
VID_ONE: 20230622_sample_01_a1/20230622_sample_01_a1.1.h264
VID_TWO: 20230622_sample_02_a1/20230622_sample_02_a1.1.h264
BASEURL: https://ucsf.box.com/shared/static/
NWB_URL: k3sgql6z475oia848q1rgms4zdh4rkjn.nwb
VID1URL: ykep8ek4ogad20wz4p0vuyuqfo60cv3w.h264
VID2URL: d2jjk0y565ru75xqojio3hymmehzr5he.h264
NWBFILE: minirec20230622.nwb
VID_ONE: 20230622_minirec_01_s1.1.h264
VID_TWO: 20230622_minirec_02_s2.1.h264
RAW_DIR: /home/runner/work/spyglass/spyglass/tests/_data/raw/
VID_DIR: /home/runner/work/spyglass/spyglass/tests/_data/video/
run: |
mkdir -p $RAW_DIR $VID_DIR
wget_opts() { # Declare func with download options
wget \
--recursive --no-verbose --no-host-directories --no-directories \
--user "$UCSF_BOX_USER" --password "$UCSF_BOX_TOKEN" \
-P "$1" "$BASEURL""$2"
curl_opts() { # Declare func with download options
curl -L --output "$1""$2" "$BASEURL""$3"
}
wget_opts $RAW_DIR $NWBFILE
wget_opts $VID_DIR $VID_ONE
wget_opts $VID_DIR $VID_TWO
curl_opts $RAW_DIR $NWBFILE $NWB_URL
curl_opts $VID_DIR $VID_ONE $VID1URL
curl_opts $VID_DIR $VID_TWO $VID2URL
- name: Run tests
run: |
pytest --no-docker --no-dlc
6 changes: 0 additions & 6 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,6 @@

## Environment

To allow pytest helpers to automatically dowlnoad requisite data, you'll need to
set credentials for Box. Consider adding these to a private `.env` file.

- `UCSF_BOX_USER`: UCSF email address
- `UCSF_BOX_TOKEN`: Token generated from UCSF Box account

To facilitate headless testing of various Qt-based tools as well as Tensorflow,
`pyproject.toml` includes some environment variables associated with the
display. These are...
Expand Down
1 change: 0 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ def pytest_configure(config):
)

DOWNLOADS = DataDownloader(
nwb_file_name=TEST_FILE,
base_dir=BASE_DIR,
verbose=VERBOSE,
download_dlc=not NO_DLC,
Expand Down
89 changes: 29 additions & 60 deletions tests/data_downloader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from functools import cached_property
from os import environ as os_environ
from pathlib import Path
from shutil import copy as shutil_copy
from subprocess import DEVNULL, Popen
Expand All @@ -9,135 +8,105 @@

from datajoint import logger as dj_logger

UCSF_BOX_USER = os_environ.get("UCSF_BOX_USER")
UCSF_BOX_TOKEN = os_environ.get("UCSF_BOX_TOKEN")
BASE_URL = "ftps://ftp.box.com/trodes_to_nwb_test_data/"
BASE_URL = "https://ucsf.box.com/shared/static/"

NON_DLC = 3 # First N items below are not for DeepLabCut
FILE_PATHS = [
{
"relative_dir": "raw",
"target_name": "minirec20230622.nwb",
"url": BASE_URL + "minirec20230622.nwb",
"url": BASE_URL + "k3sgql6z475oia848q1rgms4zdh4rkjn.nwb",
},
{
"relative_dir": "video",
"target_name": "20230622_minirec_01_s1.1.h264",
"url": BASE_URL + "20230622_sample_01_a1/20230622_sample_01_a1.1.h264",
"url": BASE_URL + "ykep8ek4ogad20wz4p0vuyuqfo60cv3w.h264",
},
{
"relative_dir": "video",
"target_name": "20230622_minirec_02_s2.1.h264",
"url": BASE_URL + "20230622_sample_02_a1/20230622_sample_02_a1.1.h264",
"url": BASE_URL + "d2jjk0y565ru75xqojio3hymmehzr5he.h264",
},
{
"relative_dir": "deeplabcut",
"target_name": "CollectedData_sc_eb.csv",
"url": BASE_URL + "minirec_dlc_items/CollectedData_sc_eb.csv",
"url": BASE_URL + "3nzqdfty51vrga7470rn2vayrtoor3ot.csv",
},
{
"relative_dir": "deeplabcut",
"target_name": "CollectedData_sc_eb.h5",
"url": BASE_URL + "minirec_dlc_items/CollectedData_sc_eb.h5",
"url": BASE_URL + "sx30rqljppeisi4jdyu53y51na0q9rff.h5",
},
{
"relative_dir": "deeplabcut",
"target_name": "img000.png",
"url": BASE_URL + "minirec_dlc_items/img000.png",
"url": BASE_URL + "wrvgncfbpjuzfhopkfaizzs069tb1ruu.png",
},
{
"relative_dir": "deeplabcut",
"target_name": "img001.png",
"url": BASE_URL + "minirec_dlc_items/img001.png",
"url": BASE_URL + "czbkxeinemat7jj7j0877pcosfqo9psh.png",
},
]


class DataDownloader:
def __init__(
self,
nwb_file_name,
file_paths=FILE_PATHS,
base_dir=".",
download_dlc=True,
verbose=True,
):
if not all([UCSF_BOX_USER, UCSF_BOX_TOKEN]):
raise ValueError(
"Missing os.environ credentials: UCSF_BOX_USER, UCSF_BOX_TOKEN."
)
if nwb_file_name != file_paths[0]["target_name"]:
raise ValueError(
f"Please adjust data_downloader.py to match: {nwb_file_name}"
)

self.cmd = [
"wget",
"--recursive",
"--no-host-directories",
"--no-directories",
"--user",
UCSF_BOX_USER,
"--password",
UCSF_BOX_TOKEN,
"-P", # Then need relative path, then url
]

self.verbose = verbose
if not verbose:
self.cmd.insert(self.cmd.index("--recursive") + 1, "--no-verbose")
self.cmd_kwargs = dict(stdout=DEVNULL, stderr=DEVNULL)
else:
if verbose:
self.cmd_kwargs = dict(stdout=stdout, stderr=stderr)
else:
self.cmd_kwargs = dict(stdout=DEVNULL, stderr=DEVNULL)

self.base_dir = Path(base_dir).resolve()
self.verbose = verbose
self.base_dir = Path(base_dir).expanduser().resolve()
self.download_dlc = download_dlc
self.file_paths = file_paths if download_dlc else file_paths[:NON_DLC]
self.base_dir.mkdir(exist_ok=True)

# Start downloads
_ = self.file_downloads

def rename_files(self):
"""Redundant, but allows rerun later in startup process of conftest."""
for path in self.file_paths:
target, url = path["target_name"], path["url"]
target_dir = self.base_dir / path["relative_dir"]
orig = target_dir / url.split("/")[-1]
dest = target_dir / target

if orig.exists():
orig.rename(dest)

@cached_property # Only make list of processes once
def file_downloads(self) -> Dict[str, Union[Popen, None]]:
"""{File: POpen/None} for each file. If exists/finished, None."""
ret = dict()
self.rename_files()
for path in self.file_paths:
target, url = path["target_name"], path["url"]
target_dir = self.base_dir / path["relative_dir"]
target_dir.mkdir(exist_ok=True, parents=True)

target = path["target_name"]
dest = target_dir / target
cmd = (
["echo", f"Already have {target}"]
if dest.exists()
else self.cmd + [target_dir, url]
)

if dest.exists():
cmd = ["echo", f"Already have {target}"]
else:
cmd = ["curl", "-L", "--output", str(dest), f"{path['url']}"]

print(f"cmd: {cmd}")

ret[target] = Popen(cmd, **self.cmd_kwargs)

return ret

def wait_for(self, target: str):
"""Wait for target to finish downloading."""
status = self.file_downloads.get(target).poll()

limit = 10
while status is None and limit > 0:
time_sleep(5) # Some
time_sleep(5)
limit -= 1
status = self.file_downloads.get(target).poll()
if status != 0:

if status != 0: # Error downloading
raise ValueError(f"Error downloading: {target}")
if limit < 1:
if limit < 1: # Reached attempt limit
raise TimeoutError(f"Timeout downloading: {target}")

def move_dlc_items(self, dest_dir: Path):
Expand Down

0 comments on commit 8f780fb

Please sign in to comment.