Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enrich data source / asset association #584

Merged
merged 38 commits into from
Jan 23, 2024
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
98dc358
Add columns to Asset <-> DataSource Assoc. Tests fail.
danielballan Oct 15, 2023
a4bd9af
Tweak construction
danielballan Oct 15, 2023
5fd277c
WIP
danielballan Oct 18, 2023
1fd1f98
Mostly works. Parameter names need adjustment.
danielballan Oct 18, 2023
cc49944
Add dedicated filepath-based constructors.
danielballan Oct 18, 2023
4df4af9
Sort assets by (parameter, num).
danielballan Oct 18, 2023
40571c8
Test registering TIFF seq out of alphanumeric order.
danielballan Oct 19, 2023
88b5671
Test HDF5 virtual dataset.
danielballan Oct 19, 2023
b49fd3c
copyedit docstring
danielballan Oct 24, 2023
d4b0de1
Copy edit docstring
danielballan Oct 24, 2023
63d4ca6
Test for parameter, num uniqueness constraints.
danielballan Nov 7, 2023
ebb187a
Create trigger. Caught one case but not the other.
danielballan Nov 7, 2023
cd1ae55
Trigger works for all cases, on SQLite.
danielballan Nov 10, 2023
34b5d16
Tests pass on PostgreSQL also
danielballan Nov 21, 2023
b78f7a1
Adapters expect URIs
danielballan Jan 20, 2024
2636720
Catalog and writing tests pass.
danielballan Jan 20, 2024
6abc1a8
Add docstring with examples
danielballan Jan 20, 2024
2ae5c06
Directory walker tests pass
danielballan Jan 20, 2024
cff9c68
Revert confused change
danielballan Jan 20, 2024
5dfff65
TIFF tests pass
danielballan Jan 20, 2024
5f7bcf6
Zarr handles init (no structure) and access (structure).
danielballan Jan 20, 2024
c470aaa
Update XDI to use URI.
danielballan Jan 20, 2024
3246992
Change some custom mimetypes and refactor default creation.
danielballan Jan 21, 2024
27d9cfa
WIP: Write migration
danielballan Jan 21, 2024
c73492f
Generate TIFF sequences in example data.
danielballan Jan 21, 2024
e1e2486
fixes
danielballan Jan 21, 2024
89bb6ed
Migration is tested on SQLite
danielballan Jan 21, 2024
cd6ee56
Fix usage
danielballan Jan 21, 2024
cbde2b7
Make PG trigger conditional same as SQLite
danielballan Jan 21, 2024
d58f8c6
Add missing constraint for PG.
danielballan Jan 22, 2024
af229a0
Include SQLite test data in CI.
danielballan Jan 22, 2024
db96001
Run database migrations against example data in CI.
danielballan Jan 22, 2024
588eb28
Target correct database for upgrade
danielballan Jan 22, 2024
9f2a2a3
Rename SQL function
danielballan Jan 22, 2024
ac0f8f2
Explain overly clever test.
danielballan Jan 23, 2024
3852c26
Update docstring for API change.
danielballan Jan 23, 2024
d8997e8
Give trigger better name.
danielballan Jan 23, 2024
2ddf620
Comment on handling of HDF5 virtual data sets.
danielballan Jan 23, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,25 @@ jobs:
shell: bash -l {0}
run: source continuous_integration/scripts/start_LDAP.sh

- name: Download SQLite example data.
shell: bash -l {0}
run: source continuous_integration/scripts/download_sqlite_data.sh

- name: Start PostgreSQL service in container.
shell: bash -l {0}
run: source continuous_integration/scripts/start_postgres.sh


- name: Ensure example data is migrated to current catalog database schema.
# The example data is expected to be kept up to date to the latest Tiled
# release, but this CI run may include some unreleased schema changes,
# so we run a migration here.
shell: bash -l {0}
run: |
set -vxeuo pipefail
tiled catalog upgrade-database sqlite+aiosqlite:///tiled_test_db_sqlite.db
tiled catalog upgrade-database postgresql+asyncpg://postgres:secret@localhost:5432/tiled-example-data

- name: Test with pytest
shell: bash -l {0}
run: |
Expand Down
130 changes: 122 additions & 8 deletions tiled/_tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import pandas.testing
import pytest
import pytest_asyncio
import sqlalchemy.dialects.postgresql.asyncpg
import sqlalchemy.exc
import tifffile
import xarray

Expand All @@ -17,12 +19,13 @@
from ..catalog import in_memory
from ..catalog.adapter import WouldDeleteData
from ..catalog.explain import record_explanations
from ..catalog.register import create_node_safe
from ..catalog.utils import ensure_uri
from ..client import Context, from_context
from ..client.xarray import write_xarray_dataset
from ..queries import Eq, Key
from ..server.app import build_app, build_app_from_config
from ..server.schemas import Asset, DataSource
from ..server.schemas import Asset, DataSource, Management
from ..structures.core import StructureFamily
from .utils import enter_password

Expand Down Expand Up @@ -197,9 +200,10 @@ async def test_metadata_index_is_used(example_data_adapter):
@pytest.mark.asyncio
async def test_write_array_external(a, tmpdir):
arr = numpy.ones((5, 3))
filepath = tmpdir / "file.tiff"
tifffile.imwrite(str(filepath), arr)
ad = TiffAdapter(str(filepath))
filepath = str(tmpdir / "file.tiff")
data_uri = ensure_uri(filepath)
tifffile.imwrite(filepath, arr)
ad = TiffAdapter(data_uri)
structure = asdict(ad.structure())
await a.create_node(
key="x",
Expand All @@ -211,7 +215,14 @@ async def test_write_array_external(a, tmpdir):
structure=structure,
parameters={},
management="external",
assets=[Asset(data_uri=str(ensure_uri(filepath)), is_directory=False)],
assets=[
Asset(
parameter="data_uri",
num=None,
data_uri=str(data_uri),
is_directory=False,
)
],
)
],
)
Expand All @@ -222,9 +233,10 @@ async def test_write_array_external(a, tmpdir):
@pytest.mark.asyncio
async def test_write_dataframe_external_direct(a, tmpdir):
df = pandas.DataFrame(numpy.ones((5, 3)), columns=list("abc"))
filepath = tmpdir / "file.csv"
filepath = str(tmpdir / "file.csv")
data_uri = ensure_uri(filepath)
df.to_csv(filepath, index=False)
dfa = read_csv(filepath)
dfa = read_csv(data_uri)
structure = asdict(dfa.structure())
await a.create_node(
key="x",
Expand All @@ -236,7 +248,14 @@ async def test_write_dataframe_external_direct(a, tmpdir):
structure=structure,
parameters={},
management="external",
assets=[Asset(data_uri=str(ensure_uri(filepath)), is_directory=False)],
assets=[
Asset(
parameter="data_uri",
num=None,
data_uri=data_uri,
is_directory=False,
)
],
)
],
)
Expand Down Expand Up @@ -411,3 +430,98 @@ async def test_access_control(tmpdir):
public_client["outer_z"]["inner"].read()
with pytest.raises(KeyError):
public_client["outer_x"]


@pytest.mark.parametrize(
"assets",
[
[
Asset(
data_uri="file://localhost/test1",
is_directory=False,
parameter="filepath",
num=None,
),
Asset(
data_uri="file://localhost/test2",
is_directory=False,
parameter="filepath",
num=1,
),
],
[
Asset(
data_uri="file://localhost/test1",
is_directory=False,
parameter="filepath",
num=1,
),
Asset(
data_uri="file://localhost/test2",
is_directory=False,
parameter="filepath",
num=None,
),
],
[
Asset(
data_uri="file://localhost/test1",
is_directory=False,
parameter="filepath",
num=None,
),
Asset(
data_uri="file://localhost/test2",
is_directory=False,
parameter="filepath",
num=None,
),
],
[
Asset(
data_uri="file://localhost/test1",
is_directory=False,
parameter="filepath",
num=1,
),
Asset(
data_uri="file://localhost/test2",
is_directory=False,
parameter="filepath",
num=1,
),
],
],
ids=[
"null-then-int",
"int-then-null",
"duplicate-null",
"duplicate-int",
],
)
@pytest.mark.asyncio
async def test_constraints_on_parameter_and_num(a, assets):
"Test constraints enforced by database on 'parameter' and 'num'."
arr_adapter = ArrayAdapter.from_array([1, 2, 3])
with pytest.raises(
(
sqlalchemy.exc.IntegrityError, # SQLite
sqlalchemy.exc.DBAPIError, # PostgreSQL
)
):
await create_node_safe(
a,
key="test",
structure_family=arr_adapter.structure_family,
metadata=dict(arr_adapter.metadata()),
specs=arr_adapter.specs,
data_sources=[
DataSource(
mimetype="application/x-test",
structure=asdict(arr_adapter.structure()),
parameters={},
management=Management.external,
assets=assets,
)
],
)
Loading