From c7edd9dd8c216c44865b35a90dbebec8d4ad9946 Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Wed, 29 Nov 2023 09:58:15 -0500 Subject: [PATCH 1/6] Deprecate /node/full/{path} routes --- README.md | 6 +- docs/source/explanations/compression.md | 4 +- .../explanations/specialized-formats.md | 14 +-- docs/source/reference/http-api-overview.md | 19 +-- docs/source/tutorials/plotly-integration.md | 4 +- tiled/client/dataframe.py | 6 +- tiled/server/core.py | 6 +- tiled/server/router.py | 108 +++++++++++++++++- 8 files changed, 135 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 6323f3f77..286f99b4b 100644 --- a/README.md +++ b/README.md @@ -139,13 +139,13 @@ data in whole or in efficiently-chunked parts in the format of your choice: ``` # Download tabular data as CSV -http://localhost:8000/api/v1/node/full/long_table?format=csv +http://localhost:8000/api/v1/table/full/long_table?format=csv # or XLSX (Excel) -http://localhost:8000/api/v1/node/full/long_table?format=xslx +http://localhost:8000/api/v1/table/full/long_table?format=xslx # and subselect columns. -http://localhost:8000/api/v1/node/full/long_table?format=xslx&field=A&field=B +http://localhost:8000/api/v1/table/full/long_table?format=xslx&field=A&field=B # View or download (2D) array data as PNG http://localhost:8000/api/v1/array/full/medium_image?format=png diff --git a/docs/source/explanations/compression.md b/docs/source/explanations/compression.md index 12e115c58..2603861bb 100644 --- a/docs/source/explanations/compression.md +++ b/docs/source/explanations/compression.md @@ -118,8 +118,8 @@ the client lists it as one that it supports. Here, the client lists `zstd` and `gzip`. ``` -$ http -p Hh :8000/node/full/C accept-encoding:zstd,gzip -GET /node/full/C HTTP/1.1 +$ http -p Hh :8000/table/full/C accept-encoding:zstd,gzip +GET /table/full/C HTTP/1.1 Accept: */* Connection: keep-alive Host: localhost:8000 diff --git a/docs/source/explanations/specialized-formats.md b/docs/source/explanations/specialized-formats.md index 7bf494e47..c3d736dbe 100644 --- a/docs/source/explanations/specialized-formats.md +++ b/docs/source/explanations/specialized-formats.md @@ -113,7 +113,7 @@ tiled catalog register catalog.db \ As is, we can access the data as CSV, for example. ``` -$ curl -H 'Accept: text/csv' 'http://localhost:8000/api/v1/node/full/example' +$ curl -H 'Accept: text/csv' 'http://localhost:8000/api/v1/table/full/example' ,energy,i0,itrans,mutrans 0,8779.0,149013.7,550643.089065,-1.3070486 1,8789.0,144864.7,531876.119084,-1.3006104 @@ -135,20 +135,20 @@ There are three equivalent ways to request a format, more formally called a "med 1. Use the standard [HTTP `Accept` Header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept). ``` - $ curl -H 'Accept: text/csv' 'http://localhost:8000/api/v1/node/full/example' + $ curl -H 'Accept: text/csv' 'http://localhost:8000/api/v1/table/full/example' ``` 2. Place the media type in a `format` query parameter. ``` - $ curl 'http://localhost:8000/api/v1/node/full/example?format=text/csv' + $ curl 'http://localhost:8000/api/v1/table/full/example?format=text/csv' ``` 3. Provide just a file extension. This is user friendly for people who do not know or care what a "media type" is. The server looks up `csv` in a registry mapping file extensions to media types. ``` - $ curl 'http://localhost:8000/api/v1/node/full/example?format=csv' + $ curl 'http://localhost:8000/api/v1/table/full/example?format=csv' ``` ``` @@ -273,7 +273,7 @@ tiled serve config --public config.yml we can request the content as XDI in any of these ways: ``` -$ curl -H 'Accept: application/x-xdi' 'http://localhost:8000/api/v1/node/full/example.xdi' -$ curl 'http://localhost:8000/api/v1/node/full/example?format=application/x-xdi' -$ curl 'http://localhost:8000/api/v1/node/full/example?format=xdi' +$ curl -H 'Accept: application/x-xdi' 'http://localhost:8000/api/v1/table/full/example.xdi' +$ curl 'http://localhost:8000/api/v1/table/full/example?format=application/x-xdi' +$ curl 'http://localhost:8000/api/v1/table/full/example?format=xdi' ``` diff --git a/docs/source/reference/http-api-overview.md b/docs/source/reference/http-api-overview.md index 657ecb0a4..3f15cd832 100644 --- a/docs/source/reference/http-api-overview.md +++ b/docs/source/reference/http-api-overview.md @@ -20,15 +20,16 @@ entries. The ``GET /api/v1/metadata/{path}`` route provides the metadata about one node. The ``GET /api/v1/search/{path}`` route provides paginated access to the children of -a given node, with optional filtering (search). The ``GET /api/v1/node/full/{path}`` route -provides all the metadata and data below a given node. - -Specialized data access routes ``GET /api/v1/array/block/{path}``, ``GET /api/v1/array/full/{path}``, -and ``GET /api/v1/table/partition/{path}`` provide options for slicing and sub-selection -specific to arrays and table. Generic clients, like a web browser, -should use the "full" routes, which send the entire (sliced) result in one -response. More sophisticated clients with some knowledge of Tiled may use the -other routes, which enable parallel chunk-based access. +a given node, with optional filtering (search). The responses contain links to +the data, in various forms. + +For example, data access routes ``GET /api/v1/array/block/{path}``, +``GET /api/v1/array/full/{path}``, and ``GET /api/v1/table/partition/{path}`` +provide options for slicing and sub-selection specific to arrays and table. +Generic clients, like a web browser, should use the "full" routes, which send +the entire (sliced) result in one response. More sophisticated clients with +some knowledge of Tiled may use the other routes, which enable parallel +chunk-based access. The root route, `GET /api/v1/` provides general information about the server and the formats and authentication providers it supports. diff --git a/docs/source/tutorials/plotly-integration.md b/docs/source/tutorials/plotly-integration.md index 081fcc1f4..4741c0edb 100644 --- a/docs/source/tutorials/plotly-integration.md +++ b/docs/source/tutorials/plotly-integration.md @@ -20,11 +20,11 @@ data visualization tool. 5. Use the "Import" menu to import data by URL. Enter a URL such as ``` - http://localhost:8000/api/v1/node/full/short_table?format=text/csv + http://localhost:8000/api/v1/table/full/short_table?format=text/csv ``` or, to load only certain columns, ``` - http://localhost:8000/api/v1/node/full/short_table?format=text/csv&field=A&field=B + http://localhost:8000/api/v1/table/full/short_table?format=text/csv&field=A&field=B ``` diff --git a/tiled/client/dataframe.py b/tiled/client/dataframe.py index affc3b349..3d30daeff 100644 --- a/tiled/client/dataframe.py +++ b/tiled/client/dataframe.py @@ -98,8 +98,8 @@ def _get_partition(self, partition, columns): params = {"partition": partition} if columns: # Note: The singular/plural inconsistency here is due to the fact that - # ["A", "B"] will be encoded in the URL as field=A&field=B - params["field"] = columns + # ["A", "B"] will be encoded in the URL as column=A&column=B + params["column"] = columns content = handle_error( self.context.http_client.get( self.item["links"]["partition"], @@ -222,7 +222,7 @@ def export(self, filepath, columns=None, *, format=None): """ params = {} if columns is not None: - params["field"] = columns + params["column"] = columns return export_util( filepath, format, diff --git a/tiled/server/core.py b/tiled/server/core.py index b6e2b7ca1..c1a0ff954 100644 --- a/tiled/server/core.py +++ b/tiled/server/core.py @@ -495,7 +495,7 @@ async def construct_resource( d["links"] = { "self": f"{base_url}/metadata/{path_str}", "search": f"{base_url}/search/{path_str}", - "full": f"{base_url}/node/full/{path_str}", + "full": f"{base_url}/container/full/{path_str}", } resource = schemas.Resource[ @@ -722,8 +722,8 @@ class WrongTypeForRoute(Exception): FULL_LINKS = { StructureFamily.array: {"full": "{base_url}/array/full/{path}"}, StructureFamily.awkward: {"full": "{base_url}/awkward/full/{path}"}, - StructureFamily.container: {"full": "{base_url}/node/full/{path}"}, - StructureFamily.table: {"full": "{base_url}/node/full/{path}"}, + StructureFamily.container: {"full": "{base_url}/container/full/{path}"}, + StructureFamily.table: {"full": "{base_url}/table/full/{path}"}, StructureFamily.sparse: {"full": "{base_url}/array/full/{path}"}, } diff --git a/tiled/server/router.py b/tiled/server/router.py index c8c59eee4..403f2b90d 100644 --- a/tiled/server/router.py +++ b/tiled/server/router.py @@ -540,10 +540,111 @@ async def table_partition( raise HTTPException(status_code=406, detail=err.args[0]) +@router.get( + "/table/full/{path:path}", + response_model=schemas.Response, + name="full 'container' or 'table'", +) +async def table_full( + request: Request, + entry=SecureEntry(scopes=["read:data"]), + principal: str = Depends(get_current_principal), + column: Optional[List[str]] = Query(None, min_length=1), + format: Optional[str] = None, + filename: Optional[str] = None, + serialization_registry=Depends(get_serialization_registry), + settings: BaseSettings = Depends(get_settings), +): + """ + Fetch the data below the given node. + """ + try: + with record_timing(request.state.metrics, "read"): + data = await ensure_awaitable(entry.read, column) + except KeyError as err: + (key,) = err.args + raise HTTPException(status_code=400, detail=f"No such field {key}.") + if data.memory_usage().sum() > settings.response_bytesize_limit: + raise HTTPException( + status_code=400, + detail=( + f"Response would exceed {settings.response_bytesize_limit}. " + "Select a subset of the columns to " + "request a smaller chunks." + ), + ) + try: + with record_timing(request.state.metrics, "pack"): + return await construct_data_response( + entry.structure_family, + serialization_registry, + data, + entry.metadata(), + request, + format, + specs=getattr(entry, "specs", []), + expires=getattr(entry, "content_stale_at", None), + filename=filename, + filter_for_access=None, + ) + except UnsupportedMediaTypes as err: + raise HTTPException(status_code=406, detail=err.args[0]) + + +@router.get( + "/container/full/{path:path}", + response_model=schemas.Response, + name="full 'container' or 'table'", +) +async def container_full( + request: Request, + entry=SecureEntry(scopes=["read:data"]), + principal: str = Depends(get_current_principal), + field: Optional[List[str]] = Query(None, min_length=1), + format: Optional[str] = None, + filename: Optional[str] = None, + serialization_registry=Depends(get_serialization_registry), + settings: BaseSettings = Depends(get_settings), +): + """ + Fetch the data below the given node. + """ + try: + with record_timing(request.state.metrics, "read"): + data = await ensure_awaitable(entry.read, field) + except KeyError as err: + (key,) = err.args + raise HTTPException(status_code=400, detail=f"No such field {key}.") + curried_filter = partial( + filter_for_access, + principal=principal, + scopes=["read:data"], + metrics=request.state.metrics, + ) + # TODO Walk node to determine size before handing off to serializer. + try: + with record_timing(request.state.metrics, "pack"): + return await construct_data_response( + entry.structure_family, + serialization_registry, + data, + entry.metadata(), + request, + format, + specs=getattr(entry, "specs", []), + expires=getattr(entry, "content_stale_at", None), + filename=filename, + filter_for_access=curried_filter, + ) + except UnsupportedMediaTypes as err: + raise HTTPException(status_code=406, detail=err.args[0]) + + @router.get( "/node/full/{path:path}", response_model=schemas.Response, name="full 'container' or 'table'", + deprecated=True, ) async def node_full( request: Request, @@ -856,9 +957,9 @@ async def post_metadata( links[ "partition" ] = f"{base_url}/table/partition/{path_str}?partition={{index}}" - links["full"] = f"{base_url}/node/full/{path_str}" + links["full"] = f"{base_url}/table/full/{path_str}" elif body.structure_family == StructureFamily.container: - links["full"] = f"{base_url}/node/full/{path_str}" + links["full"] = f"{base_url}/container/full/{path_str}" links["search"] = f"{base_url}/search/{path_str}" elif body.structure_family == StructureFamily.awkward: links["buffers"] = f"{base_url}/awkward/buffers/{path_str}" @@ -946,7 +1047,8 @@ async def put_array_block( return json_or_msgpack(request, None) -@router.put("/node/full/{path:path}") +@router.put("/table/full/{path:path}") +@router.put("/node/full/{path:path}", deprecated=True) async def put_node_full( request: Request, entry=SecureEntry(scopes=["write:data"]), From b0ff67b2b90ac16883733ab1e2d5ecee7972ba4c Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Wed, 29 Nov 2023 13:21:41 -0500 Subject: [PATCH 2/6] copy edits Co-authored-by: Padraic Shafer <76011594+padraic-shafer@users.noreply.github.com> --- docs/source/reference/http-api-overview.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/source/reference/http-api-overview.md b/docs/source/reference/http-api-overview.md index 3f15cd832..21b466592 100644 --- a/docs/source/reference/http-api-overview.md +++ b/docs/source/reference/http-api-overview.md @@ -25,12 +25,15 @@ the data, in various forms. For example, data access routes ``GET /api/v1/array/block/{path}``, ``GET /api/v1/array/full/{path}``, and ``GET /api/v1/table/partition/{path}`` -provide options for slicing and sub-selection specific to arrays and table. +provide options for slicing and sub-selection specific to arrays and tables. Generic clients, like a web browser, should use the "full" routes, which send the entire (sliced) result in one response. More sophisticated clients with some knowledge of Tiled may use the other routes, which enable parallel chunk-based access. +The ``GET /api/v1/container/full/{path}`` route + provides all the metadata and data below a given directory. This route also works for other container-like data structures. + The root route, `GET /api/v1/` provides general information about the server and the formats and authentication providers it supports. From 5359f7a200d170b605ee56cd3fbf86c7a29d380a Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Wed, 29 Nov 2023 13:24:35 -0500 Subject: [PATCH 3/6] Remove unused dep Co-authored-by: Padraic Shafer <76011594+padraic-shafer@users.noreply.github.com> --- tiled/server/router.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tiled/server/router.py b/tiled/server/router.py index 403f2b90d..0976270d5 100644 --- a/tiled/server/router.py +++ b/tiled/server/router.py @@ -604,7 +604,6 @@ async def container_full( format: Optional[str] = None, filename: Optional[str] = None, serialization_registry=Depends(get_serialization_registry), - settings: BaseSettings = Depends(get_settings), ): """ Fetch the data below the given node. From b908608ec972dc28923f198df6840e581b64c95c Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Wed, 29 Nov 2023 13:25:47 -0500 Subject: [PATCH 4/6] Fixup separated routes Co-authored-by: Padraic Shafer <76011594+padraic-shafer@users.noreply.github.com> --- tiled/server/router.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tiled/server/router.py b/tiled/server/router.py index 0976270d5..4023f5661 100644 --- a/tiled/server/router.py +++ b/tiled/server/router.py @@ -594,7 +594,7 @@ async def table_full( @router.get( "/container/full/{path:path}", response_model=schemas.Response, - name="full 'container' or 'table'", + name="full 'container' metadata and data", ) async def container_full( request: Request, @@ -606,8 +606,13 @@ async def container_full( serialization_registry=Depends(get_serialization_registry), ): """ - Fetch the data below the given node. + Fetch the data for the given container. """ + if entry.structure_family != StructureFamily.container: + raise HTTPException( + status_code=404, + detail=f"Cannot read {entry.structure_family} structure with /container/full route.", + ) try: with record_timing(request.state.metrics, "read"): data = await ensure_awaitable(entry.read, field) From 73902c2df5a4fcba6974424e552fb8c6d6366d3c Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Wed, 29 Nov 2023 13:27:03 -0500 Subject: [PATCH 5/6] Fix up separated routes (2) Co-authored-by: Padraic Shafer <76011594+padraic-shafer@users.noreply.github.com> --- tiled/server/router.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tiled/server/router.py b/tiled/server/router.py index 4023f5661..c861971b3 100644 --- a/tiled/server/router.py +++ b/tiled/server/router.py @@ -543,7 +543,7 @@ async def table_partition( @router.get( "/table/full/{path:path}", response_model=schemas.Response, - name="full 'container' or 'table'", + name="full 'table' data", ) async def table_full( request: Request, @@ -556,8 +556,13 @@ async def table_full( settings: BaseSettings = Depends(get_settings), ): """ - Fetch the data below the given node. + Fetch the data for the given table. """ + if entry.structure_family != StructureFamily.table: + raise HTTPException( + status_code=404, + detail=f"Cannot read {entry.structure_family} structure with /table/full route.", + ) try: with record_timing(request.state.metrics, "read"): data = await ensure_awaitable(entry.read, column) From d1dd37c4b2d8e76fa7e657b17ba42e0338d9df8c Mon Sep 17 00:00:00 2001 From: Dan Allan Date: Wed, 29 Nov 2023 13:28:57 -0500 Subject: [PATCH 6/6] Drop unused param --- tiled/server/router.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tiled/server/router.py b/tiled/server/router.py index c861971b3..2ce2d48df 100644 --- a/tiled/server/router.py +++ b/tiled/server/router.py @@ -548,7 +548,6 @@ async def table_partition( async def table_full( request: Request, entry=SecureEntry(scopes=["read:data"]), - principal: str = Depends(get_current_principal), column: Optional[List[str]] = Query(None, min_length=1), format: Optional[str] = None, filename: Optional[str] = None,