From 4cd06c5b191b0582e7adf816ac0855008a4d585c Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Fri, 3 Mar 2023 05:23:14 +0000 Subject: [PATCH 1/6] parse nci filepath to thredds url --- cubedash/_utils.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/cubedash/_utils.py b/cubedash/_utils.py index 5ceba6e8f..f79a122cb 100644 --- a/cubedash/_utils.py +++ b/cubedash/_utils.py @@ -155,15 +155,16 @@ def as_external_url( >>> # Converts s3 to http >>> as_external_url('s3://some-data/L2/S2A_OPER_MSI_ARD__A030100_T56LNQ_N02.09/ARD-METADATA.yaml', "ap-southeast-2") 'https://some-data.s3.ap-southeast-2.amazonaws.com/L2/S2A_OPER_MSI_ARD__A030100_T56LNQ_N02.09/ARD-METADATA.yaml' - >>> # Other URLs are left as-is - >>> unconvertable_url = 'file:///g/data/xu18/ga_ls8c_ard_3-1-0_095073_2019-03-22_final.odc-metadata.yaml' - >>> unconvertable_url == as_external_url(unconvertable_url) - True + >>> # Converts NCI filepaths to THREDDS location + >>> as_external_url('file:///g/data/xu18/ga_ls8c_ard_3-1-0_095073_2019-03-22_final.odc-metadata.yaml') + 'https://dapds00.nci.org.au/thredds/fileServer/xu18/ga_ls8c_ard_3-1-0_095073_2019-03-22_final.odc-metadata.yaml' + >>> # Leaves other urls as-is >>> as_external_url('some/relative/path.txt') 'some/relative/path.txt' >>> # if base uri was none, we may want to return the s3 location instead of the metadata yaml """ parsed = urlparse(url) + print(parsed) if s3_region and parsed.scheme == "s3": # get buckets for which link should be to data location instead of s3 link @@ -179,6 +180,12 @@ def as_external_url( return f"https://{parsed.netloc}.s3.{s3_region}.amazonaws.com{parsed.path}" + if parsed.scheme == "file": + path = parsed.path.replace("/g/data/", "") + if path.find("/ga") != -1: + path = path.replace("/ga/", "/") + return f"https://dapds00.nci.org.au/thredds/fileServer/{path}" + return url From b38e11f71adf2aa7fb2091d3d3eaa9c7c6589aba Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Mon, 6 Mar 2023 00:53:35 +0000 Subject: [PATCH 2/6] fix resolution logic, make optional based on config --- cubedash/_utils.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/cubedash/_utils.py b/cubedash/_utils.py index f79a122cb..6b08a8f3d 100644 --- a/cubedash/_utils.py +++ b/cubedash/_utils.py @@ -44,6 +44,11 @@ from sqlalchemy.engine import Engine from werkzeug.datastructures import MultiDict +from . import _model + +# Should we resolve NCI local filesystem paths to the corresponding THREDDS location? +RESOLVE_NCI = _model.app.config.get("NCI_LOCAL_TO_THREDDS", False) + _TARGET_CRS = "EPSG:4326" DEFAULT_PLATFORM_END_DATE = { @@ -134,7 +139,9 @@ def get_dataset_file_offsets(dataset: Dataset) -> Dict[str, str]: return uri_list -def as_resolved_remote_url(location: str, offset: str) -> str: +def as_resolved_remote_url( + location: str, offset: str, thredds: bool = RESOLVE_NCI +) -> str: """ Convert a dataset location and file offset to a full remote URL. """ @@ -142,11 +149,12 @@ def as_resolved_remote_url(location: str, offset: str) -> str: urljoin(location, offset), (flask.current_app.config.get("CUBEDASH_DATA_S3_REGION", "ap-southeast-2")), location is None, + thredds, ) def as_external_url( - url: str, s3_region: str = None, is_base: bool = False + url: str, s3_region: str = None, is_base: bool = False, to_thredds: bool = False ) -> Optional[str]: """ Convert a URL to an externally-visible one. @@ -164,7 +172,6 @@ def as_external_url( >>> # if base uri was none, we may want to return the s3 location instead of the metadata yaml """ parsed = urlparse(url) - print(parsed) if s3_region and parsed.scheme == "s3": # get buckets for which link should be to data location instead of s3 link @@ -180,11 +187,11 @@ def as_external_url( return f"https://{parsed.netloc}.s3.{s3_region}.amazonaws.com{parsed.path}" - if parsed.scheme == "file": + if parsed.scheme == "file" and to_thredds: path = parsed.path.replace("/g/data/", "") - if path.find("/ga") != -1: + if path.find("/ga/") != -1: path = path.replace("/ga/", "/") - return f"https://dapds00.nci.org.au/thredds/fileServer/{path}" + return f"https://dapds00.nci.org.au/thredds/fileServer/{path}" return url From 292d13281d5fad683290b7dc3c80316c7e75ac16 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Mon, 6 Mar 2023 01:04:03 +0000 Subject: [PATCH 3/6] fix config get --- cubedash/_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cubedash/_utils.py b/cubedash/_utils.py index 6b08a8f3d..3376528d6 100644 --- a/cubedash/_utils.py +++ b/cubedash/_utils.py @@ -44,10 +44,8 @@ from sqlalchemy.engine import Engine from werkzeug.datastructures import MultiDict -from . import _model - # Should we resolve NCI local filesystem paths to the corresponding THREDDS location? -RESOLVE_NCI = _model.app.config.get("NCI_LOCAL_TO_THREDDS", False) +RESOLVE_NCI = flask.current_app.config.get("NCI_LOCAL_TO_THREDDS", False) _TARGET_CRS = "EPSG:4326" From ad72118d5d3dbfb14c46528fa4fc046acc39ca7c Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Mon, 6 Mar 2023 01:25:56 +0000 Subject: [PATCH 4/6] fix config get --- cubedash/_utils.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/cubedash/_utils.py b/cubedash/_utils.py index 3376528d6..e1ea44273 100644 --- a/cubedash/_utils.py +++ b/cubedash/_utils.py @@ -138,7 +138,7 @@ def get_dataset_file_offsets(dataset: Dataset) -> Dict[str, str]: def as_resolved_remote_url( - location: str, offset: str, thredds: bool = RESOLVE_NCI + location: str, offset: str, thredds: Optional[bool] = None ) -> str: """ Convert a dataset location and file offset to a full remote URL. @@ -152,7 +152,10 @@ def as_resolved_remote_url( def as_external_url( - url: str, s3_region: str = None, is_base: bool = False, to_thredds: bool = False + url: str, + s3_region: str = None, + is_base: bool = False, + to_thredds: Optional[bool] = None, ) -> Optional[str]: """ Convert a URL to an externally-visible one. @@ -185,7 +188,12 @@ def as_external_url( return f"https://{parsed.netloc}.s3.{s3_region}.amazonaws.com{parsed.path}" - if parsed.scheme == "file" and to_thredds: + resolve_nci = ( + to_thredds + if to_thredds is not None + else flask.current_app.config.get("NCI_LOCAL_TO_THREDDS", False) + ) + if parsed.scheme == "file" and resolve_nci: path = parsed.path.replace("/g/data/", "") if path.find("/ga/") != -1: path = path.replace("/ga/", "/") From 980d49e19c90bff8730ab1ef50e15f94f4580e04 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Mon, 6 Mar 2023 02:03:40 +0000 Subject: [PATCH 5/6] fix config get --- cubedash/_utils.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cubedash/_utils.py b/cubedash/_utils.py index e1ea44273..4d0f1a958 100644 --- a/cubedash/_utils.py +++ b/cubedash/_utils.py @@ -44,9 +44,6 @@ from sqlalchemy.engine import Engine from werkzeug.datastructures import MultiDict -# Should we resolve NCI local filesystem paths to the corresponding THREDDS location? -RESOLVE_NCI = flask.current_app.config.get("NCI_LOCAL_TO_THREDDS", False) - _TARGET_CRS = "EPSG:4326" DEFAULT_PLATFORM_END_DATE = { From 0e89df3c7e1dc9d181c0469d49742e7d059fba79 Mon Sep 17 00:00:00 2001 From: Ariana Barzinpour Date: Thu, 4 May 2023 03:56:07 +0000 Subject: [PATCH 6/6] fix broken test --- integration_tests/test_stac.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/integration_tests/test_stac.py b/integration_tests/test_stac.py index 2cc4f86a8..6fdf901e7 100644 --- a/integration_tests/test_stac.py +++ b/integration_tests/test_stac.py @@ -525,12 +525,7 @@ def expect_404(url: str, message_contains: str = None): "/collections/ls7_nbar_scene/items" "?datetime=2000-01-01/2000-01-01&bbox=-48.206,-14.195,-45.067,-12.272", "/stac/collections/ls7_nbar_scene/items" - + ( - "?datetime=2000-01-01/2000-01-01&bbox=-48.206,-14.195,-45.067,-12.272" - # Flask will auto-escape parameters - # .replace(",", "%2C") - .replace("/", "%2F") - ), + "?datetime=2000-01-01/2000-01-01&bbox=-48.206,-14.195,-45.067,-12.272", ), ( "/collections/ls7_nbar_scene/items/0c5b625e-5432-4911-9f7d-f6b894e27f3c",