No-credential data download

LorenFrankLab · Nov 7, 2024 · 8f780fb · 8f780fb
1 parent 0c5a903
commit 8f780fb
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 81 deletions.
diff --git a/.github/workflows/test-conda.yml b/.github/workflows/test-conda.yml
@@ -7,6 +7,8 @@ on:
       - '!documentation'
   schedule:  # once a day at midnight UTC
     - cron: '0 0 * * *'
+  pull_request_review:
+    - types: [submitted]
   workflow_dispatch: # Manually trigger with 'Run workflow' button
 
 concurrency: # Replace Cancel Workflow Action
@@ -15,15 +17,16 @@ concurrency: # Replace Cancel Workflow Action
 
 jobs:
   run-tests:
+    if: | # If not PR OR is approved PR.
+      github.event_name != 'pull_request_review'
+      || github.event.review.state == 'approved'
     runs-on: ubuntu-latest
     defaults:
      run:
        shell: bash -l {0}
     env:
       OS: ubuntu-latest
       PYTHON: '3.9'
-      UCSF_BOX_TOKEN: ${{ secrets.UCSF_BOX_TOKEN }} # for download and testing
-      UCSF_BOX_USER: ${{ secrets.UCSF_BOX_USER }}
     services:
       mysql:
         image: datajoint/mysql:8.0
@@ -57,23 +60,23 @@ jobs:
         pip install --quiet .[test]
     - name: Download data
       env:
-        BASEURL: ftps://ftp.box.com/trodes_to_nwb_test_data/
-        NWBFILE: minirec20230622.nwb # Relative to Base URL
-        VID_ONE: 20230622_sample_01_a1/20230622_sample_01_a1.1.h264
-        VID_TWO: 20230622_sample_02_a1/20230622_sample_02_a1.1.h264
+        BASEURL: https://ucsf.box.com/shared/static/
+        NWB_URL: k3sgql6z475oia848q1rgms4zdh4rkjn.nwb
+        VID1URL: ykep8ek4ogad20wz4p0vuyuqfo60cv3w.h264
+        VID2URL: d2jjk0y565ru75xqojio3hymmehzr5he.h264
+        NWBFILE: minirec20230622.nwb
+        VID_ONE: 20230622_minirec_01_s1.1.h264
+        VID_TWO: 20230622_minirec_02_s2.1.h264
         RAW_DIR: /home/runner/work/spyglass/spyglass/tests/_data/raw/
         VID_DIR: /home/runner/work/spyglass/spyglass/tests/_data/video/
       run: |
         mkdir -p $RAW_DIR $VID_DIR
-        wget_opts() { # Declare func with download options
-          wget \
-            --recursive --no-verbose --no-host-directories --no-directories \
-            --user "$UCSF_BOX_USER" --password "$UCSF_BOX_TOKEN" \
-            -P "$1" "$BASEURL""$2"
+        curl_opts() { # Declare func with download options
+          curl -L --output "$1""$2" "$BASEURL""$3"
         }
-        wget_opts $RAW_DIR $NWBFILE
-        wget_opts $VID_DIR $VID_ONE
-        wget_opts $VID_DIR $VID_TWO
+        curl_opts $RAW_DIR $NWBFILE $NWB_URL
+        curl_opts $VID_DIR $VID_ONE $VID1URL
+        curl_opts $VID_DIR $VID_TWO $VID2URL
     - name: Run tests
       run: |
         pytest --no-docker --no-dlc
diff --git a/tests/README.md b/tests/README.md
@@ -2,12 +2,6 @@
 
 ## Environment
 
-To allow pytest helpers to automatically dowlnoad requisite data, you'll need to
-set credentials for Box. Consider adding these to a private `.env` file.
-
-- `UCSF_BOX_USER`: UCSF email address
-- `UCSF_BOX_TOKEN`: Token generated from UCSF Box account
-
 To facilitate headless testing of various Qt-based tools as well as Tensorflow,
 `pyproject.toml` includes some environment variables associated with the
 display. These are...

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -108,7 +108,6 @@ def pytest_configure(config):
     )
 
     DOWNLOADS = DataDownloader(
-        nwb_file_name=TEST_FILE,
         base_dir=BASE_DIR,
         verbose=VERBOSE,
         download_dlc=not NO_DLC,

diff --git a/tests/data_downloader.py b/tests/data_downloader.py
@@ -1,5 +1,4 @@
 from functools import cached_property
-from os import environ as os_environ
 from pathlib import Path
 from shutil import copy as shutil_copy
 from subprocess import DEVNULL, Popen
@@ -9,135 +8,105 @@
 
 from datajoint import logger as dj_logger
 
-UCSF_BOX_USER = os_environ.get("UCSF_BOX_USER")
-UCSF_BOX_TOKEN = os_environ.get("UCSF_BOX_TOKEN")
-BASE_URL = "ftps://ftp.box.com/trodes_to_nwb_test_data/"
+BASE_URL = "https://ucsf.box.com/shared/static/"
 
 NON_DLC = 3  # First N items below are not for DeepLabCut
 FILE_PATHS = [
     {
         "relative_dir": "raw",
         "target_name": "minirec20230622.nwb",
-        "url": BASE_URL + "minirec20230622.nwb",
+        "url": BASE_URL + "k3sgql6z475oia848q1rgms4zdh4rkjn.nwb",
     },
     {
         "relative_dir": "video",
         "target_name": "20230622_minirec_01_s1.1.h264",
-        "url": BASE_URL + "20230622_sample_01_a1/20230622_sample_01_a1.1.h264",
+        "url": BASE_URL + "ykep8ek4ogad20wz4p0vuyuqfo60cv3w.h264",
     },
     {
         "relative_dir": "video",
         "target_name": "20230622_minirec_02_s2.1.h264",
-        "url": BASE_URL + "20230622_sample_02_a1/20230622_sample_02_a1.1.h264",
+        "url": BASE_URL + "d2jjk0y565ru75xqojio3hymmehzr5he.h264",
     },
     {
         "relative_dir": "deeplabcut",
         "target_name": "CollectedData_sc_eb.csv",
-        "url": BASE_URL + "minirec_dlc_items/CollectedData_sc_eb.csv",
+        "url": BASE_URL + "3nzqdfty51vrga7470rn2vayrtoor3ot.csv",
     },
     {
         "relative_dir": "deeplabcut",
         "target_name": "CollectedData_sc_eb.h5",
-        "url": BASE_URL + "minirec_dlc_items/CollectedData_sc_eb.h5",
+        "url": BASE_URL + "sx30rqljppeisi4jdyu53y51na0q9rff.h5",
     },
     {
         "relative_dir": "deeplabcut",
         "target_name": "img000.png",
-        "url": BASE_URL + "minirec_dlc_items/img000.png",
+        "url": BASE_URL + "wrvgncfbpjuzfhopkfaizzs069tb1ruu.png",
     },
     {
         "relative_dir": "deeplabcut",
         "target_name": "img001.png",
-        "url": BASE_URL + "minirec_dlc_items/img001.png",
+        "url": BASE_URL + "czbkxeinemat7jj7j0877pcosfqo9psh.png",
     },
 ]
 
 
 class DataDownloader:
     def __init__(
         self,
-        nwb_file_name,
         file_paths=FILE_PATHS,
         base_dir=".",
         download_dlc=True,
         verbose=True,
     ):
-        if not all([UCSF_BOX_USER, UCSF_BOX_TOKEN]):
-            raise ValueError(
-                "Missing os.environ credentials: UCSF_BOX_USER, UCSF_BOX_TOKEN."
-            )
-        if nwb_file_name != file_paths[0]["target_name"]:
-            raise ValueError(
-                f"Please adjust data_downloader.py to match: {nwb_file_name}"
-            )
-
-        self.cmd = [
-            "wget",
-            "--recursive",
-            "--no-host-directories",
-            "--no-directories",
-            "--user",
-            UCSF_BOX_USER,
-            "--password",
-            UCSF_BOX_TOKEN,
-            "-P",  # Then need relative path, then url
-        ]
-
-        self.verbose = verbose
-        if not verbose:
-            self.cmd.insert(self.cmd.index("--recursive") + 1, "--no-verbose")
-            self.cmd_kwargs = dict(stdout=DEVNULL, stderr=DEVNULL)
-        else:
+        if verbose:
             self.cmd_kwargs = dict(stdout=stdout, stderr=stderr)
+        else:
+            self.cmd_kwargs = dict(stdout=DEVNULL, stderr=DEVNULL)
 
-        self.base_dir = Path(base_dir).resolve()
+        self.verbose = verbose
+        self.base_dir = Path(base_dir).expanduser().resolve()
         self.download_dlc = download_dlc
         self.file_paths = file_paths if download_dlc else file_paths[:NON_DLC]
         self.base_dir.mkdir(exist_ok=True)
 
         # Start downloads
         _ = self.file_downloads
 
-    def rename_files(self):
-        """Redundant, but allows rerun later in startup process of conftest."""
-        for path in self.file_paths:
-            target, url = path["target_name"], path["url"]
-            target_dir = self.base_dir / path["relative_dir"]
-            orig = target_dir / url.split("/")[-1]
-            dest = target_dir / target
-
-            if orig.exists():
-                orig.rename(dest)
-
     @cached_property  # Only make list of processes once
     def file_downloads(self) -> Dict[str, Union[Popen, None]]:
         """{File: POpen/None} for each file. If exists/finished, None."""
         ret = dict()
-        self.rename_files()
         for path in self.file_paths:
-            target, url = path["target_name"], path["url"]
             target_dir = self.base_dir / path["relative_dir"]
             target_dir.mkdir(exist_ok=True, parents=True)
+
+            target = path["target_name"]
             dest = target_dir / target
-            cmd = (
-                ["echo", f"Already have {target}"]
-                if dest.exists()
-                else self.cmd + [target_dir, url]
-            )
+
+            if dest.exists():
+                cmd = ["echo", f"Already have {target}"]
+            else:
+                cmd = ["curl", "-L", "--output", str(dest), f"{path['url']}"]
+
+            print(f"cmd: {cmd}")
+
             ret[target] = Popen(cmd, **self.cmd_kwargs)
+
         return ret
 
     def wait_for(self, target: str):
         """Wait for target to finish downloading."""
         status = self.file_downloads.get(target).poll()
+
         limit = 10
         while status is None and limit > 0:
-            time_sleep(5)  # Some
+            time_sleep(5)
             limit -= 1
             status = self.file_downloads.get(target).poll()
-        if status != 0:
+
+        if status != 0:  # Error downloading
             raise ValueError(f"Error downloading: {target}")
-        if limit < 1:
+        if limit < 1:  # Reached attempt limit
             raise TimeoutError(f"Timeout downloading: {target}")
 
     def move_dlc_items(self, dest_dir: Path):