diff --git a/.github/workflows/libtiledb-ci.yml b/.github/workflows/libtiledb-ci.yml index 9bbfa562fc..87f7b118fe 100644 --- a/.github/workflows/libtiledb-ci.yml +++ b/.github/workflows/libtiledb-ci.yml @@ -1,15 +1,18 @@ name: libTileDB-SOMA CodeCov on: - pull_request: - paths-ignore: - - "apis/python/**" - - "apis/r/**" - - ".pre-commit-config.yaml" - push: - branches: - - main - - 'release-*' +# XXX TEMP +# pull_request: +# paths-ignore: +# - "apis/python/**" +# - "apis/r/**" +# - ".pre-commit-config.yaml" +# # XXX TEMP +# - ".github/workflows/python-remote-storage.yml" +# push: +# branches: +# - main +# - 'release-*' workflow_dispatch: jobs: diff --git a/.github/workflows/libtiledbsoma-asan-ci.yml b/.github/workflows/libtiledbsoma-asan-ci.yml index 9b08b6fbbe..45e1271024 100644 --- a/.github/workflows/libtiledbsoma-asan-ci.yml +++ b/.github/workflows/libtiledbsoma-asan-ci.yml @@ -1,15 +1,18 @@ name: libtiledbsoma ASAN on: - pull_request: - paths-ignore: - - "apis/python/**" - - "apis/r/**" - - ".pre-commit-config.yaml" - push: - branches: - - main - - 'release-*' +# XXX TEMP +# pull_request: +# paths-ignore: +# - "apis/python/**" +# - "apis/r/**" +# - ".pre-commit-config.yaml" +# # XXX TEMP +# - ".github/workflows/python-remote-storage.yml" +# push: +# branches: +# - main +# - 'release-*' workflow_dispatch: jobs: diff --git a/.github/workflows/python-ci-minimal.yml b/.github/workflows/python-ci-minimal.yml index 7168102607..81d214e777 100644 --- a/.github/workflows/python-ci-minimal.yml +++ b/.github/workflows/python-ci-minimal.yml @@ -8,18 +8,19 @@ name: TileDB-SOMA Python CI (Minimal) # To test the full matrix on a working branch, invoke ./python-ci-full.yml from # https://github.com/single-cell-data/TileDB-SOMA/actions/workflows/python-ci-full.yml on: - pull_request: - branches: - - main - - 'release-*' - paths: - - '**' - - '!**.md' - - '!apis/r/**' - - '!docs/**' - - '!.github/**' - - '.github/workflows/python-ci-minimal.yml' - - '.github/workflows/python-ci-single.yml' + # XXX TEMP +# pull_request: +# branches: +# - main +# - 'release-*' +# paths: +# - '**' +# - '!**.md' +# - '!apis/r/**' +# - '!docs/**' +# - '!.github/**' +# - '.github/workflows/python-ci-minimal.yml' +# - '.github/workflows/python-ci-single.yml' workflow_dispatch: jobs: diff --git a/.github/workflows/python-dependency-variation.yml b/.github/workflows/python-dependency-variation.yml index db6417013d..3f2c654d1f 100644 --- a/.github/workflows/python-dependency-variation.yml +++ b/.github/workflows/python-dependency-variation.yml @@ -1,15 +1,16 @@ name: TileDB-SOMA Python CI with varying dependencies on: - push: - branches: - - main - - 'release-*' - pull_request: - paths-ignore: - - '**.md' - - 'apis/r/**' - - 'docs/**' +# XXX TEMP +# push: +# branches: +# - main +# - 'release-*' +# pull_request: +# paths-ignore: +# - '**.md' +# - 'apis/r/**' +# - 'docs/**' workflow_dispatch: jobs: diff --git a/.github/workflows/python-remote-storage.yml b/.github/workflows/python-remote-storage.yml new file mode 100644 index 0000000000..8c5b8e8664 --- /dev/null +++ b/.github/workflows/python-remote-storage.yml @@ -0,0 +1,104 @@ +name: TileDB-SOMA Python CI (remote storage) + +on: + workflow_dispatch: + # + # Not for regular use: + # TEMP + pull_request: + # + # TODO: a nightly cron + +env: + # Don't name this "TILEDB_REST_TOKEN" since that will map into a core + # env/config override, as if config key "rest.token" had been set. One of the + # purposes of this CI is to run tests where all config is passed via context + # arguments and none via environment variables, in order to flush out + # callsites within the code which aren't passing context as they should. + TILEDB_REST_UNITTEST_TOKEN: ${{ secrets.TILEDB_REST_UNITTEST_TOKEN}} + +jobs: + ci: + strategy: + fail-fast: false + matrix: + include: + # This could be run on MacOS too, but, we have enough OS redundancy, + # and MacOS-runner availability is a more tightly constrained resource + # in GitHub Actions as of 2025-02-06. + - name: linux + os: ubuntu-24.04 + # TODO: also on 3.12. But 3.9 is higher-pri, until we drop support + # for it. (Note our main CI tests across a broader set of Python + # versions.) + python_version: 3.9 + cc: gcc-13 + cxx: g++-13 + + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + + - name: Show matrix OS + run: echo "matrix.os:" ${{ matrix.os }} + + - name: Linux CPU info + if: ${{ matrix.os == 'ubuntu-24.04' }} + run: cat /proc/cpuinfo + + - name: MacOS CPU info + if: ${{ matrix.os == 'macOS-latest' }} + run: sysctl -a | grep cpu + + - name: Select XCode version + if: startsWith(matrix.os, 'macos') + uses: maxim-lobanov/setup-xcode@v1 + with: + xcode-version: '15.4' + + - name: Set up Python ${{ matrix.python_version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python_version }} + cache: pip + cache-dependency-path: ./apis/python/setup.py + + - name: Show XCode version + run: clang --version + + - name: Check out TileDB-SOMA + uses: actions/checkout@v4 + with: + fetch-depth: 0 # ensure we get all tags to inform package version determination + + - name: Log pip dependencies + run: pip list + + - name: Install tiledbsoma + # If you'reiterating on the testing framework itself, install from PyPI to avoid the + # time-consuming build. If you're trying to evaluate the code modified by a PR, + # install from source. + # + # From PyPI: + # run: pip install tiledbsoma==1.15.7 + # + # From source: + run: pip -v install -e apis/python[all] -C "--build-option=--no-tiledb-deprecated" + # + env: + CC: ${{ matrix.cc }} + CXX: ${{ matrix.cxx }} + + - name: Install dependencies + run: pip install --prefer-binary pytest typeguard tiledb.cloud + + - name: Show package versions + run: python scripts/show-versions.py + + - name: Show tiledb.cloud version + run: python -c 'import tiledb.cloud; print(tiledb.cloud.version.version)' + + - name: Run pytests for Python + shell: bash + run: python -m pytest apis/python/remote_tests -v --durations=20 --maxfail=50 diff --git a/.github/workflows/r-python-interop-testing.yml b/.github/workflows/r-python-interop-testing.yml index 2687f8af7b..b4c2f2110b 100644 --- a/.github/workflows/r-python-interop-testing.yml +++ b/.github/workflows/r-python-interop-testing.yml @@ -1,17 +1,21 @@ name: TileDB-SOMA R-Python interop testing on: - pull_request: - # TODO: leave this enabled for pre-merge signal for now. At some point we may want to go back to - # only having this signal post-merge. - #paths: - # - "apis/python/**" - # - "apis/r/**" - # - "apis/system/**" - push: - branches: - - main - - "release-*" +# XXX TEMP +# pull_request: +# paths-ignore: +# # XXX TEMP +# - ".github/workflows/python-remote-storage.yml" +# # TODO: leave this enabled for pre-merge signal for now. At some point we may want to go back to +# # only having this signal post-merge. +# #paths: +# # - "apis/python/**" +# # - "apis/r/**" +# # - "apis/system/**" +# push: +# branches: +# - main +# - "release-*" workflow_dispatch: jobs: diff --git a/apis/python/remote_tests/README.md b/apis/python/remote_tests/README.md new file mode 100644 index 0000000000..aadb54c043 --- /dev/null +++ b/apis/python/remote_tests/README.md @@ -0,0 +1,62 @@ +This is a WIP. + +These tests may be later moved into `apis/python/tests/remote`. + +## TO DO + +* File a task for allowing `.` in names at register +* Info for provenance `.h5ad` +* Script info for populating versions +* Make small stack; provenance +* Append mode: Monday/Tuesday is fine +* UDFs: + * remote obs schema + * collection-mapper test +* Show, upgrade, resize + +# Maybe + +* any direct-to-s3 tests? + +# Test-data setup + +``` +export TILEDB_REST_TOKEN="..." # Get the token for the Saas `unittest` user +export TILEDB_REST_PAYER_NAMESPACE=unittest +``` + +Source data you can find in the sandbox account `unittest` space: + +``` +s3://tiledb-unittest/soma-prod-test-data/h5ad/pbmc3k_unprocessed.h5ad +s3://tiledb-unittest/soma-prod-test-data/h5ad/pbmc3k_processed.h5ad +``` + +In a +Then use `tiledbsoma.io.from_h5ad` with the following sources and data: + +``` + +ingestor.py tiledb://unittest/s3://tiledb-unittest/soma-prod-test-data/1.15.7/pbmc3k_unprocessed_1.15.7 /var/s/a/pbmc3k_unprocessed.h5ad + +ubuntu@segge[prod][][u]$ aws s3 cp ./pbmc3k_unprocessed_1_12.3 s3://tiledb-unittest/soma-prod-test-data/1.12.3/pbmc3k_unprocessed_1_12.3 + +ubuntu@segge[prod][][u]$ aws s3 sync --delete ./pbmc3k_unprocessed_1.12.3 s3://tiledb-unittest/soma-prod-test-data/1.12.3/pbmc3k_unprocessed_1.12.3 + +drwxr-xr-x 6 ubuntu ubuntu 4096 Feb 6 18:14 pbmc3k_unprocessed_1.7.3 +drwxr-xr-x 6 ubuntu ubuntu 4096 Feb 6 18:14 pbmc3k_unprocessed_1.12.3 +drwxr-xr-x 6 ubuntu ubuntu 4096 Feb 6 18:12 pbmc3k_unprocessed_1.14.5 +drwxr-xr-x 6 ubuntu ubuntu 4096 Feb 6 18:12 pbmc3k_unprocessed_1.15.0 +drwxr-xr-x 6 ubuntu ubuntu 4096 Feb 6 18:13 pbmc3k_unprocessed_1.15.7 + +registrations: +s3://tiledb-unittest/soma-prod-test-data/1.7.3/pbmc3k_unprocessed_1.7.3 +s3://tiledb-unittest/soma-prod-test-data/1.12.3/pbmc3k_unprocessed_1.12.3 +s3://tiledb-unittest/soma-prod-test-data/1.14.5/pbmc3k_unprocessed_1.14.5 +s3://tiledb-unittest/soma-prod-test-data/1.15.0/pbmc3k_unprocessed_1.15.0 +s3://tiledb-unittest/soma-prod-test-data/1.15.7/pbmc3k_unprocessed_1.15.7 + + +s3://tiledb-unittest/soma-prod-test-data/h5ad/pbmc3k_unprocessed.h5ad +s3://tiledb-unittest/soma-prod-test-data/h5ad/pbmc3k_processed.h5ad +``` diff --git a/apis/python/remote_tests/__init__.py b/apis/python/remote_tests/__init__.py new file mode 100644 index 0000000000..eeedbf74bc --- /dev/null +++ b/apis/python/remote_tests/__init__.py @@ -0,0 +1,3 @@ +from typeguard import install_import_hook + +install_import_hook("tiledbsoma") diff --git a/apis/python/remote_tests/conftest.py b/apis/python/remote_tests/conftest.py new file mode 100644 index 0000000000..2e9fd9a5d4 --- /dev/null +++ b/apis/python/remote_tests/conftest.py @@ -0,0 +1,47 @@ +import os + +import pytest + +import tiledbsoma +import tiledb.cloud + + +@pytest.fixture +def conftest_token(): + env_name = "TILEDB_REST_UNITTEST_TOKEN" + token = os.getenv(env_name) + if token is None: + raise Exception(f'Environment variable "{env_name}" is not set') + return token + + +@pytest.fixture +def conftest_tiledb_cloud_login(conftest_token): + print("conftest_tiledb_cloud_login") + tiledb.cloud.login(token=conftest_token) + return None + + +@pytest.fixture +def conftest_user_profile(conftest_tiledb_cloud_login): + return tiledb.cloud.user_profile() + + +@pytest.fixture +def conftest_namespace(conftest_user_profile): + return conftest_user_profile.username + + +@pytest.fixture +def conftest_default_s3_path(conftest_user_profile): + return conftest_user_profile.default_s3_path + + +@pytest.fixture +def conftest_context(conftest_token, conftest_namespace): + return tiledbsoma.SOMATileDBContext( + tiledb_config={ + "rest.token": conftest_token, + "rest.payer_namespace": conftest_namespace, + } + ) diff --git a/apis/python/remote_tests/test_01_setup.py b/apis/python/remote_tests/test_01_setup.py new file mode 100644 index 0000000000..a8ded6125d --- /dev/null +++ b/apis/python/remote_tests/test_01_setup.py @@ -0,0 +1,77 @@ +# These are test that need to run first to check basic functionality, before we go on to test other, +# more complex things. +from __future__ import annotations + +import os +import sys + +import pytest +import scanpy + +import tiledbsoma +import tiledbsoma.io +import tiledb.cloud + +from .util import util_make_uri, util_tear_down_uri + +# Nominally this is the 'unittest' SaaS user. What we require is: +# * The user can _read_ data in the 'unittest' namespace. +# * For data _written_, the namespace and default_s3_path are taken from the cloud profile. +if os.getenv("TILEDB_REST_UNITTEST_TOKEN") is None: + pytest.skip( + reason="$TILEDB_REST_UNITTEST_TOKEN is not set", allow_module_level=True + ) + + +def test_skipping_correctly(): + assert os.getenv("TILEDB_REST_UNITTEST_TOKEN") is not None + + +def test_basic_read(conftest_context): + uri = "tiledb://unittest/pbmc3k_unprocessed_1.15.7" + assert tiledbsoma.Experiment.exists(uri, context=conftest_context) + with tiledbsoma.Experiment.open(uri, context=conftest_context) as exp: + assert exp.obs.count == 2700 + assert "RNA" in exp.ms + assert exp.ms["RNA"].var.count == 13714 + + +def test_basic_write(conftest_context, conftest_namespace, conftest_default_s3_path): + (creation_uri, readback_uri) = util_make_uri( + "soma-prod-ephemeral-data", + "ephemeral_basic_write", + conftest_namespace, + conftest_default_s3_path, + ) + + adata = scanpy.datasets.pbmc3k() + + tiledbsoma.io.from_anndata( + creation_uri, + adata, + measurement_name="RNA", + context=conftest_context, + ) + + with tiledbsoma.Experiment.open(readback_uri, context=conftest_context) as exp: + assert exp.obs.count == 2700 + assert "RNA" in exp.ms + assert exp.ms["RNA"].var.count == 32738 + + util_tear_down_uri(readback_uri) + + +@pytest.mark.skipif( + (sys.version_info.major, sys.version_info.minor) != (3, 9), + reason="As of 2025-02-05 UDFs require Python 3.9", +) +def test_remote_version(conftest_tiledb_cloud_login): + + def remote_version(): + import tiledbsoma + + return {"tiledbsoma": tiledbsoma.__version__} + + output = tiledb.cloud.udf.exec(remote_version) + assert "tiledbsoma" in output + assert output["tiledbsoma"].startswith("1.") diff --git a/apis/python/remote_tests/test_02_analysis.py b/apis/python/remote_tests/test_02_analysis.py new file mode 100644 index 0000000000..36a2a57ac6 --- /dev/null +++ b/apis/python/remote_tests/test_02_analysis.py @@ -0,0 +1,129 @@ +# These are test that need to run first to check basic functionality, before we go on to test other, +# more complex things. +from __future__ import annotations + +import os + +import pandas as pd +import pytest +import scanpy as sc + +import tiledbsoma +import tiledbsoma.io +import tiledbsoma.logging + +from .util import util_make_uri, util_tear_down_uri + +# Nominally this is the 'unittest' SaaS user. What we require is: +# +# * The user can _read_ data in the 'unittest' namespace. +# * For data _written_, the namespace and default_s3_path are taken from the +# cloud profile. +# +# For CI, this environment variable is a GitHub Actions secret, propagated in +# the CI YAML. +if os.getenv("TILEDB_REST_UNITTEST_TOKEN") is None: + pytest.skip( + reason="$TILEDB_REST_UNITTEST_TOKEN is not set", allow_module_level=True + ) + + +def test_write_with_updates( + conftest_context, conftest_namespace, conftest_default_s3_path +): + (creation_uri, readback_uri) = util_make_uri( + "soma-prod-ephemeral-data", + "ephemeral_analysis", + conftest_namespace, + conftest_default_s3_path, + ) + + adata = sc.datasets.pbmc3k() + + tiledbsoma.logging.info() + tiledbsoma.io.from_anndata( + creation_uri, + adata, + measurement_name="RNA", + context=conftest_context, + ) + + with tiledbsoma.Experiment.open(readback_uri, context=conftest_context) as exp: + assert "RNA" in exp.ms + + assert exp.metadata.get("dataset_type") == "soma" + assert exp.metadata.get("soma_object_type") == "SOMAExperiment" + assert exp.obs.metadata.get("soma_object_type") == "SOMADataFrame" + assert exp.ms["RNA"].var.metadata.get("soma_object_type") == "SOMADataFrame" + assert "data" in exp.ms["RNA"].X + assert ( + exp.ms["RNA"].X["data"].metadata.get("soma_object_type") + == "SOMASparseNDArray" + ) + + assert exp.obs.count == adata.obs.shape[0] + assert exp.ms["RNA"].var.count == adata.var.shape[0] + + obs_arrow = exp.obs.read().concat() + obs_pandas = obs_arrow.to_pandas() + assert obs_pandas.shape[0] == adata.obs.shape[0] + + # Here we augment that with some on-the-fly computed data. This imitates a common customer workflow. + # Add a categorical column + parity = [["even", "odd"][e % 2] for e in range(len(adata.obs))] + adata.obs["parity"] = pd.Categorical(parity) + with tiledbsoma.Experiment.open(creation_uri, "w", context=conftest_context) as exp: + tiledbsoma.io.update_obs(exp, adata.obs, context=conftest_context) + + with tiledbsoma.Experiment.open(readback_uri, context=conftest_context) as exp: + obs_arrow = exp.obs.read().concat() + obs_pandas = obs_arrow.to_pandas() + assert obs_pandas.shape[0] == adata.obs.shape[0] + + sc.pp.normalize_total(adata, inplace=True) + sc.pp.log1p(adata, copy=False) + + with tiledbsoma.open(creation_uri, "w", context=conftest_context) as exp: + tiledbsoma.io.add_X_layer( + exp, + measurement_name="RNA", + X_layer_name="logcounts", + X_layer_data=adata.X, + context=conftest_context, + ) + + with tiledbsoma.open(readback_uri, "w", context=conftest_context) as exp: + assert sorted(list(exp.ms["RNA"].X.keys())) == ["data", "logcounts"] + + # Add dimensional-reduction results + sc.pp.highly_variable_genes(adata, inplace=True) + adata = adata[:, adata.var.highly_variable] + sc.pp.scale(adata) + sc.tl.pca(adata, use_highly_variable=True, n_comps=5) + + with tiledbsoma.open(creation_uri, "w", context=conftest_context) as exp: + tiledbsoma.io.add_matrix_to_collection( + exp=exp, + measurement_name="RNA", + collection_name="obsm", + matrix_name="logcounts_pca", + matrix_data=adata.obsm["X_pca"], + context=conftest_context, + ) + + with tiledbsoma.open(readback_uri, "w", context=conftest_context) as exp: + assert sorted(list(exp.ms["RNA"].obsm.keys())) == ["logcounts_pca"] + + with tiledbsoma.open(creation_uri, "w", context=conftest_context) as exp: + tiledbsoma.io.add_matrix_to_collection( + exp=exp, + measurement_name="RNA", + collection_name="varm", + matrix_name="logcounts_pcs", + matrix_data=adata.varm["PCs"], + context=conftest_context, + ) + with tiledbsoma.open(exp.uri, context=conftest_context) as exp: + assert sorted(list(exp.ms["RNA"].varm.keys())) == ["logcounts_pcs"] + + util_tear_down_uri(readback_uri) diff --git a/apis/python/remote_tests/test_03_versions.py b/apis/python/remote_tests/test_03_versions.py new file mode 100644 index 0000000000..bb4fc11f2b --- /dev/null +++ b/apis/python/remote_tests/test_03_versions.py @@ -0,0 +1,122 @@ +# These are test that need to run first to check basic functionality, before we go on to test other, +# more complex things. +from __future__ import annotations + +import os + +import pytest + +import tiledbsoma +import tiledbsoma.io + +from .util import util_pbmc3k_unprocessed_versions + +# Nominally this is the 'unittest' SaaS user. What we require is: +# * The user can _read_ data in the 'unittest' namespace. +# * For data _written_, the namespace and default_s3_path are taken from the cloud profile. +if os.getenv("TILEDB_REST_UNITTEST_TOKEN") is None: + pytest.skip( + reason="$TILEDB_REST_UNITTEST_TOKEN is not set", allow_module_level=True + ) + + +@pytest.mark.parametrize( + "uri_and_info", + util_pbmc3k_unprocessed_versions(), +) +def test_basic_readback(conftest_context, uri_and_info): + uri, info = uri_and_info + with tiledbsoma.Experiment.open(uri, context=conftest_context) as exp: + + md = dict(exp.metadata) + assert md["dataset_type"] == "soma" + assert md["soma_object_type"] == "SOMAExperiment" + + md = dict(exp.obs.metadata) + assert md["soma_object_type"] == "SOMADataFrame" + + md = dict(exp.ms["RNA"].var.metadata) + assert md["soma_object_type"] == "SOMADataFrame" + + md = dict(exp.ms["RNA"].X["data"].metadata) + assert md["soma_object_type"] == "SOMASparseNDArray" + + obs_table = exp.obs.read().concat() + assert len(obs_table) == 2700 + obs_df = obs_table.to_pandas() + assert obs_df.shape == (2700, 6) + + var_table = exp.ms["RNA"].var.read().concat() + assert len(var_table) == 13714 + var_df = var_table.to_pandas() + assert var_df.shape == (13714, 2) + + X_coo = exp.ms["RNA"].X["data"].read().coos().concat() + if info["shape"] == "old": + assert X_coo.shape == (2147483646, 2147483646) + else: + assert X_coo.shape == (2700, 13714) + + # Implicitly checking for no throw + adata = tiledbsoma.io.to_anndata(exp, "RNA") + + assert adata.obs.shape == (2700, 4) + assert adata.var.shape == (13714, 0) + assert adata.X.shape == (2700, 13714) + + +@pytest.mark.parametrize( + "uri_and_info", + util_pbmc3k_unprocessed_versions(), +) +def test_dataframe_queries(conftest_context, uri_and_info): + uri, info = uri_and_info + with tiledbsoma.Experiment.open(uri, context=conftest_context) as exp: + + qobs = ( + exp.obs.read( + coords=[slice(0, 99)], + value_filter="nFeature_RNA > 1000", + column_names=["soma_joinid", "obs_id", "nFeature_RNA"], + ) + .concat() + .to_pandas() + ) + assert qobs.shape == (22, 3) + + qvar = ( + exp.ms["RNA"] + .var.read( + value_filter="var_id in ['ANXA1', 'IFI44', 'IFI44L', 'OAS1']", + ) + .concat() + .to_pandas() + ) + assert qvar.shape == (4, 2) + + +@pytest.mark.parametrize( + "uri_and_info", + util_pbmc3k_unprocessed_versions(), +) +def test_experiment_queries(conftest_context, uri_and_info): + uri, info = uri_and_info + with tiledbsoma.Experiment.open(uri, context=conftest_context) as exp: + + query = tiledbsoma.ExperimentAxisQuery( + experiment=exp, + measurement_name="RNA", + obs_query=tiledbsoma.AxisQuery( + value_filter="nFeature_RNA > 1000", + ), + var_query=tiledbsoma.AxisQuery( + value_filter="var_id in ['ANXA1', 'IFI44', 'IFI44L', 'OAS1']", + ), + ) + + assert (query.n_obs, query.n_vars) == (530, 4) + + +# tiledbsoma.io.show_experiment_shapes +# tiledbsoma.io.upgrade_experiment_shapes +# tiledbsoma.io.resize_experiment diff --git a/apis/python/remote_tests/test_04_todo.py b/apis/python/remote_tests/test_04_todo.py new file mode 100644 index 0000000000..712eb4b9db --- /dev/null +++ b/apis/python/remote_tests/test_04_todo.py @@ -0,0 +1,60 @@ +# +#``` +## ================================================================ +### UDFs +#def remote_obs_schema(exp_uri): +# import tiledbsoma +# exp = tiledbsoma.Experiment.open(exp_uri) +# return exp.obs.schema +#import tiledb.cloud +#import tiledb.cloud.udf +#tiledb.cloud.udf.exec( +# remote_obs_schema, +# soma_pbmc3k_uri, +#) +# +#def remote_query(exp_uri): +# import tiledbsoma +# exp = tiledbsoma.Experiment.open(exp_uri) +# +# query = tiledbsoma.ExperimentAxisQuery( +# experiment=exp, +# measurement_name="RNA", +# obs_query=tiledbsoma.AxisQuery( +# value_filter="n_genes_by_counts > 1000", +# ), +# var_query=tiledbsoma.AxisQuery( +# value_filter="n_cells_by_counts > 100", +# ), +# ) +# +# return (query.n_obs, query.n_vars) +#tiledb.cloud.udf.exec( +# remote_query, soma_pbmc3k_uri, +#) +# +## ================================================================ +## Collection-mapper test +#from tiledb.cloud.taskgraphs import client_executor as executor +#soco_uri = 'tiledb://TileDB-Inc/stack-small-soco-staging' +#res = tiledb.cloud.udf.exec( +# 'TileDB-Inc/soma_experiment_collection_mapper', +# soco_uri=soco_uri, +# measurement_name="RNA", +# X_layer_name="data", +# # callback = lambda x: x.obs.shape, +# # callback = lambda x: x, +# callback = lambda adata: [adata.obs.shape, adata.var.shape, adata.X.shape], +# # callback = lambda adata: adata.var, +# args_dict={}, +# reducer = lambda x: x, +# obs_attrs = ['obs_id', 'cell_type', 'is_primary_data'], +# var_attrs = ['var_id', 'means'], +#) +#dag = executor.LocalExecutor(res, namespace = "TileDB-Inc") +#dag.visualize() +##%%time +#dag.execute() +#dag.wait() +#dag.node("output").result() +#``` diff --git a/apis/python/remote_tests/util.py b/apis/python/remote_tests/util.py new file mode 100644 index 0000000000..835e361a42 --- /dev/null +++ b/apis/python/remote_tests/util.py @@ -0,0 +1,62 @@ +import datetime +import os +import pathlib +import shutil +from typing import Tuple + +import tiledb.cloud + +# For cloud: +# * Create with timestamp +# * Delete on teardown +# For local: +# * Create without timestamp +# o Only remove the URI from a _previous_ run (if any) +# * Do not delete on teardown -- so developers can look at the data + + +def util_make_uri( + dirname: str, + basename: str, + namespace: str, + default_s3_path: str, +) -> Tuple[str, str]: + if os.getenv("TILEDB_SOMA_CLOUD_TEST_LOCAL_PATHS") is None: + + # The default_s3_path contains the "s3://..." prefix and a trailing slash + # Note that double slashes can cause group-creation failures so we need + # to carefully strip them out. + bucket = (default_s3_path).rstrip("/") + stamp = datetime.datetime.today().strftime("%Y%m%d-%H%M%S") + creation_uri = f"tiledb://{namespace}/{bucket}/{dirname}/{basename}_{stamp}" + readback_uri = f"tiledb://{namespace}/{basename}_{stamp}" + return (creation_uri, readback_uri) + + else: + uri = f"/tmp/tiledbsoma-cloud-test/{dirname}/{basename}" + if os.path.exists(uri): + shutil.rmtree(uri) + pathlib.Path(os.path.dirname(uri)).mkdir(parents=True, exist_ok=True) + print() + print("USING LOCAL URI", uri) + print() + return (uri, uri) + + +def util_tear_down_uri(uri): + # This assumes tiledb.cloud.login has already been called at util_make_uri. + if uri.startswith("tiledb://"): + tiledb.cloud.groups.delete(uri=uri, recursive=True) + # Delete local URIs only on _next_ run, so devs can inspect + + +def util_pbmc3k_unprocessed_versions(): + # New shape as in https://github.com/single-cell-data/TileDB-SOMA/issues/2407 + # which was released with tiledbsoma 1.15.0. + return [ + ["tiledb://unittest/pbmc3k_unprocessed_1_7_3", {"shape": "old"}], + ["tiledb://unittest/pbmc3k_unprocessed_1_12_3", {"shape": "old"}], + ["tiledb://unittest/pbmc3k_unprocessed_1_14_5", {"shape": "old"}], + ["tiledb://unittest/pbmc3k_unprocessed_1_15_0", {"shape": "new"}], + ["tiledb://unittest/pbmc3k_unprocessed_1.15.7", {"shape": "new"}], + ] diff --git a/apis/python/src/tiledbsoma/io/ingest.py b/apis/python/src/tiledbsoma/io/ingest.py index eede5e1fbc..1c246b9d5e 100644 --- a/apis/python/src/tiledbsoma/io/ingest.py +++ b/apis/python/src/tiledbsoma/io/ingest.py @@ -1865,6 +1865,7 @@ def add_X_layer( X_layer_data: Union[Matrix, h5py.Dataset], ingest_mode: IngestMode = "write", use_relative_uri: bool | None = None, + context: SOMATileDBContext | None = None, ) -> None: """This is useful for adding X data, for example from `Scanpy `_'s ``scanpy.pp.normalize_total``, @@ -1884,6 +1885,7 @@ def add_X_layer( matrix_data=X_layer_data, ingest_mode=ingest_mode, use_relative_uri=use_relative_uri, + context=context, )