crim-ca · OlivGruwe · Jun 7, 2023 · Jun 7, 2023 · Jun 12, 2023 · Jun 12, 2023
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -12,6 +12,13 @@ Changes
 
 Changes:
 --------
+- Add support for various GeoTIFF formats, allowing flexible handling and representation of GeoTIFFs in outputs
+  (fixes `#100 <https://github.com/crim-ca/weaver/issues/100>`_).
+- Add support for ``GET /results/{id}`` and `` GET /outputs/{id}`` routes to enable direct access to individual
+  job result items by ID. This enhancement includes: support alternate representations based on the Accept header.
+  If an alternate format (e.g., YAML for a JSON source) is requested it will be automatically generated and returned.
+  Link headers containing all possible output formats, allowing retrieval via query parameters
+  (e.g., output?f=application/x-yaml). (fixes `#18 <https://github.com/crim-ca/weaver/issues/18>`_).
 - Add support of *OGC API - Processes - Part 4: Job Management* endpoints for `Job` creation and execution
   (fixes `#716 <https://github.com/crim-ca/weaver/issues/716>`_).
 - Add `CLI` operations ``update_job``, ``trigger_job`` and ``inputs`` corresponding to the required `Job` operations

diff --git a/Makefile b/Makefile
@@ -17,6 +17,7 @@ DOCKER_REPO ?= pavics/weaver
 # guess OS (Linux, Darwin,...)
 OS_NAME := $(shell uname -s 2>/dev/null || echo "unknown")
 CPU_ARCH := $(shell uname -m 2>/dev/null || uname -p 2>/dev/null || echo "unknown")
+SUDO ?=
 
 # conda
 CONDA_CMD      ?= __EMPTY__
@@ -228,10 +229,10 @@ conda-env-export:		## export the conda environment
 install: install-all    ## alias for 'install-all' target
 
 .PHONY: install-run
-install-run: conda-install install-sys install-pkg install-raw 	## install requirements and application to run locally
+install-run: conda-install install-sys install-pkg install-raw install-dev install-transform ## install requirements and application to run locally
 
 .PHONY: install-all
-install-all: conda-install install-sys install-pkg install-pip install-dev  ## install application with all dependencies
+install-all: conda-install install-sys install-pkg install-pip install-dev install-transform ## install application with all dependencies
 
 .PHONY: install-doc
 install-doc: install-pip	## install documentation dependencies
@@ -274,7 +275,7 @@ install-raw:	## install without any requirements or dependencies (suppose everyt
 install-npm:	## install npm package manager and dependencies if they cannot be found
 	@[ -f "$(shell which npm)" ] || ( \
 		echo "Binary package manager npm not found. Attempting to install it."; \
-		apt-get install npm \
+		$(SUDO) apt-get install npm \
 	)
 
 .PHONY: install-npm-stylelint
@@ -291,6 +292,16 @@ install-npm-remarklint: install-npm		## install remark-lint dependency for 'chec
 		npm install --save-dev \
 	)
 
+.PHONY: install-transform
+install-transform: install-cairo-dependencies       # install-transform dependencies
+
+.PHONY: install-cairo-dependencies
+install-cairo-dependencies:   ## install required dependencies for Transformer
+	@[ -f "$(shell which cairo)" ] || ( \
+		echo "Binary package manager cairo not found. Attempting to install it."; \
+		$(SUDO) apt-get install libpangocairo-1.0-0 \
+	)
+
 .PHONY: install-dev-npm
 install-dev-npm: install-npm install-npm-remarklint install-npm-remarklint  ## install all npm development dependencies
 

diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base
@@ -23,6 +23,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         g++ \
         git \
         nodejs \
+        libpangocairo-1.0-0 \
     && pip install --no-cache-dir --upgrade -r requirements-sys.txt \
     && pip install --no-cache-dir -r requirements.txt \
     && pip install --no-cache-dir -e ${APP_DIR} \

diff --git a/requirements.txt b/requirements.txt
@@ -11,6 +11,7 @@ boto3-stubs[s3]
 # https://github.com/celery/billiard/issues/313
 billiard>2; sys_platform != "win32"  # avoid issue with use_2to3
 billiard>3.2,<3.4; sys_platform == "win32"
+cairosvg
 # pymongo>=4 breaks for some kombu combinations corresponding to pinned Celery
 # - https://github.com/crim-ca/weaver/issues/386
 # - https://github.com/celery/kombu/pull/1536
@@ -50,6 +51,7 @@ duration
 esgf-compute-api @ git+https://github.com/ESGF/[email protected]
 # invalid 'zarr' requirement in 'geotiff' dependencies required by 'pywps' fail to install
 # (https://github.com/KipCrossing/geotiff/pull/59)
+fpdf
 geotiff>=0.2.8
 # gunicorn >20 breaks some config.ini loading parameters (paste)
 # use pserve to continue supporting config.ini with paste settings
@@ -58,6 +60,7 @@ gunicorn>=22
 # even more reduced dependency constraints (https://github.com/vinitkumar/json2xml/pull/195)
 json2xml==4.1.0
 jsonschema>=3.0.1
+
 # FIXME: kombu for pymongo>=4 not yet released as 5.3.0 (only pre-releases available)
 # - https://github.com/crim-ca/weaver/issues/386
 # - https://github.com/celery/kombu/pull/1536
@@ -68,13 +71,16 @@ mako
 # force use of later mistune (https://github.com/common-workflow-language/schema_salad/pull/619#issuecomment-1346025607)
 # employed by cwltool -> schema-salad -> mistune
 #mistune>=2.0.3,<2.1
+multipagetiff
 mypy_boto3_s3
 numpy>=1.22.2,<2; python_version < "3.10"
 numpy>=1.22.2; python_version >= "3.10"
 # esgf-compute-api (cwt) needs oauthlib but doesn't add it in their requirements
 oauthlib
 owslib==0.29.3
+pandas
 PasteDeploy>=3.1.0; python_version >= "3.12"
+Pillow
 pint
 psutil
 # notes: https://github.com/geopython/pygeofilter
@@ -102,9 +108,11 @@ pystac
 pystac_client
 python-box
 python-dateutil
+python-magic
 pytz
 pywps==4.6.0
 pyyaml>=5.2
+rasterio
 rdflib>=5  # pyup: ignore
 requests>=2.32.2
 requests_file

diff --git a/tests/functional/test_cli.py b/tests/functional/test_cli.py
@@ -450,7 +450,10 @@ def test_describe(self):
         for out_fmt in output_formats:
             out_fmt.pop("$schema", None)
             out_fmt.pop("$id", None)
-        assert output_formats == [{"default": True, "mediaType": ContentType.TEXT_PLAIN}]
+        assert output_formats == [{"default": True, "mediaType": ContentType.TEXT_PLAIN},
+                                  {"mediaType": ContentType.TEXT_HTML},
+                                  {"mediaType": ContentType.APP_PDF}
+                                  ]
         assert "undefined" not in result.message, "CLI should not have confused process description as response detail."
         assert result.body["description"] == (
             "Dummy process that simply echo's back the input message for testing purposes."

diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py
@@ -567,6 +567,14 @@ def test_deploy_process_io_no_format_default(self):
         expect_outputs["file"]["formats"][0]["default"] = False
         expect_outputs["file"]["formats"][1]["default"] = True
         expect_outputs["file"]["formats"][2]["default"] = False
+        # Alternate type added automatically in offering.
+        alternative_formats = [
+            {"mediaType": ContentType.IMAGE_GIF},
+            {"mediaType": ContentType.IMAGE_TIFF},
+            {"mediaType": ContentType.IMAGE_SVG_XML},
+            {"mediaType": ContentType.APP_PDF}
+        ]
+        expect_outputs["file"]["formats"].extend(alternative_formats)
         expect_outputs["file"]["schema"] = {
             "oneOf": [
                 {"type": "string", "format": "binary",
@@ -1508,14 +1516,20 @@ def test_deploy_merge_complex_io_with_multiple_formats_and_defaults(self):
                 # assert "default" not in format_spec
 
         assert proc["outputs"][0]["id"] == "single_value_single_format"
-        assert len(proc["outputs"][0]["formats"]) == 1
+        assert len(proc["outputs"][0]["formats"]) == 4  # Alternative format added in process
         assert proc["outputs"][0]["formats"][0]["mediaType"] == ContentType.APP_JSON
         assert proc["outputs"][0]["formats"][0]["default"] is True
+        assert proc["outputs"][0]["formats"][1]["mediaType"] == ContentType.TEXT_CSV
+        assert proc["outputs"][0]["formats"][2]["mediaType"] == ContentType.APP_XML
+        assert proc["outputs"][0]["formats"][3]["mediaType"] == ContentType.APP_YAML
         assert proc["outputs"][1]["id"] == "single_value_multi_format"
-        assert len(proc["outputs"][1]["formats"]) == 3
+        assert len(proc["outputs"][1]["formats"]) == 6  # Alternative format added in process
         assert proc["outputs"][1]["formats"][0]["mediaType"] == ContentType.APP_JSON
         assert proc["outputs"][1]["formats"][1]["mediaType"] == ContentType.TEXT_PLAIN
         assert proc["outputs"][1]["formats"][2]["mediaType"] == ContentType.APP_NETCDF
+        assert proc["outputs"][1]["formats"][3]["mediaType"] == ContentType.TEXT_CSV
+        assert proc["outputs"][1]["formats"][4]["mediaType"] == ContentType.APP_XML
+        assert proc["outputs"][1]["formats"][5]["mediaType"] == ContentType.APP_YAML
         assert proc["outputs"][1]["formats"][0]["default"] is True   # mandatory
         assert proc["outputs"][1]["formats"][1].get("default", False) is False  # omission is allowed
         assert proc["outputs"][1]["formats"][2].get("default", False) is False  # omission is allowed
@@ -3042,10 +3056,12 @@ def test_deploy_merge_complex_io_from_package(self):
         assert "minOccurs" not in proc["outputs"][0]
         assert "maxOccurs" not in proc["outputs"][0]
         assert isinstance(proc["outputs"][0]["formats"], list)
-        assert len(proc["outputs"][0]["formats"]) == 1
+        assert len(proc["outputs"][0]["formats"]) == 3
         assert isinstance(proc["outputs"][0]["formats"][0], dict)
         assert proc["outputs"][0]["formats"][0]["mediaType"] == ContentType.TEXT_PLAIN
         assert proc["outputs"][0]["formats"][0]["default"] is True
+        assert proc["outputs"][0]["formats"][1]["mediaType"] == ContentType.TEXT_HTML
+        assert proc["outputs"][0]["formats"][2]["mediaType"] == ContentType.APP_PDF
         expect = KNOWN_PROCESS_DESCRIPTION_FIELDS
         fields = set(proc.keys()) - expect
         assert len(fields) == 0, f"Unexpected fields found:\n  Unknown: {fields}\n  Expected: {expect}"
@@ -3145,15 +3161,23 @@ def test_deploy_merge_complex_io_from_package_and_offering(self):
         assert isinstance(proc["outputs"], list)
         assert len(proc["outputs"]) == 2
         assert proc["outputs"][0]["id"] == "complex_output_only_cwl_minimal"
-        assert len(proc["outputs"][0]["formats"]) == 1, \
-            "Default format should be added to process definition when omitted from both CWL and WPS"
+        assert len(proc["outputs"][0]["formats"]) == 3, (
+            "Default format and alternate formats should be added "
+            "to process definition when omitted from both CWL and WPS"
+        )
         assert proc["outputs"][0]["formats"][0]["mediaType"] == ContentType.TEXT_PLAIN
         assert proc["outputs"][0]["formats"][0]["default"] is True
+        assert proc["outputs"][0]["formats"][1]["mediaType"] == ContentType.TEXT_HTML
+        assert proc["outputs"][0]["formats"][2]["mediaType"] == ContentType.APP_PDF
         assert proc["outputs"][1]["id"] == "complex_output_both_cwl_and_wps"
-        assert len(proc["outputs"][1]["formats"]) == 1, \
-            "Default format should be added to process definition when omitted from both CWL and WPS"
+        assert len(proc["outputs"][1]["formats"]) == 3, (
+            "Default format and alternate formats should be added "
+            "to process definition when omitted from both CWL and WPS"
+        )
         assert proc["outputs"][1]["formats"][0]["mediaType"] == ContentType.TEXT_PLAIN
         assert proc["outputs"][1]["formats"][0]["default"] is True
+        assert proc["outputs"][1]["formats"][1]["mediaType"] == ContentType.TEXT_HTML
+        assert proc["outputs"][1]["formats"][2]["mediaType"] == ContentType.APP_PDF
         assert proc["outputs"][1]["title"] == "Additional detail only within WPS output", \
             "Additional details defined only in WPS matching CWL I/O by ID should be preserved"
 
@@ -3271,9 +3295,11 @@ def test_deploy_literal_and_complex_io_from_wps_xml_reference(self):
         assert proc["outputs"][1]["description"] == "Collected logs during process run."
         assert "minOccurs" not in proc["outputs"][1]
         assert "maxOccurs" not in proc["outputs"][1]
-        assert len(proc["outputs"][1]["formats"]) == 1
+        assert len(proc["outputs"][1]["formats"]) == 3
         assert proc["outputs"][1]["formats"][0]["default"] is True
         assert proc["outputs"][1]["formats"][0]["mediaType"] == ContentType.TEXT_PLAIN
+        assert proc["outputs"][1]["formats"][1]["mediaType"] == ContentType.TEXT_HTML
+        assert proc["outputs"][1]["formats"][2]["mediaType"] == ContentType.APP_PDF
 
     def test_deploy_enum_array_and_multi_format_inputs_from_wps_xml_reference(self):
         body = {
@@ -4118,8 +4144,9 @@ def test_execute_single_output_response_raw_reference_literal(self):
         assert results.content_type is None
         assert results.headers["Content-Location"] == results_href
         assert ("Link", output_data_link) in results.headerlist
+        rel_pattern = re.compile(r"rel=\"?([^\"]+)\"?")
         assert not any(
-            any(out_id in link[-1] for out_id in ["output_json", "output_text"])
+            any(out_id in rel_pattern.search(link[1]).group(1) for out_id in ["output_json", "output_text"])
             for link in results.headerlist if link[0] == "Link"
         ), "Filtered outputs should not be found in results response links."
         outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT})
@@ -4345,9 +4372,7 @@ def test_execute_single_output_multipart_accept_link(self):
             },
         }
 
-    # FIXME: implement (https://github.com/crim-ca/weaver/pull/548)
     @pytest.mark.oap_part1
-    @pytest.mark.xfail(reason="not implemented")
     def test_execute_single_output_multipart_accept_alt_format(self):
         """
         Validate the returned contents combining an ``Accept`` header as ``multipart`` and a ``format`` in ``outputs``.
@@ -4402,22 +4427,24 @@ def test_execute_single_output_multipart_accept_alt_format(self):
         output_json_as_yaml = yaml.safe_dump({"data": "test"})
         results_body = self.fix_result_multipart_indent(f"""
             --{boundary}
+            Content-Disposition: attachment; name="output_json"; filename="result.yml"
             Content-Type: {ContentType.APP_YAML}
+            Content-Location: {out_url}/{job_id}/output_json/result.yml
             Content-ID: <output_json@{job_id}>
-            Content-Length: 12
+            Content-Length: 11
 
             {output_json_as_yaml}
             --{boundary}--
         """)
         results_text = self.remove_result_multipart_variable(results.text)
         assert results.content_type.startswith(ContentType.MULTIPART_MIXED)
-        assert results_text == results_body
+        for line1, line2 in zip(results_text.splitlines(), results_body.splitlines()):
+            assert line1 == line2
         outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT})
         assert outputs.content_type.startswith(ContentType.APP_JSON)
         assert outputs.json["outputs"] == {
-            "output_data": "test",
             "output_json": {
-                "href": f"{out_url}/{job_id}/output_json/output.yml",
+                "href": f"{out_url}/{job_id}/output_json/result.yml",
                 "type": ContentType.APP_YAML,
             },
         }
@@ -4426,11 +4453,9 @@ def test_execute_single_output_multipart_accept_alt_format(self):
         result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers)
         assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}"
         assert result_json.content_type == ContentType.APP_JSON
-        assert result_json.text == "{\"data\":\"test\"}"
+        assert result_json.text == "{\"data\": \"test\"}"
 
-    # FIXME: implement (https://github.com/crim-ca/weaver/pull/548)
     @pytest.mark.oap_part1
-    @pytest.mark.xfail(reason="not implemented")
     def test_execute_single_output_response_document_alt_format_yaml(self):
         proc = "EchoResultsTester"
         p_id = self.fully_qualified_test_name(proc)
@@ -4479,32 +4504,34 @@ def test_execute_single_output_response_document_alt_format_yaml(self):
         output_json_as_yaml = yaml.safe_dump({"data": "test"})
         results_body = self.fix_result_multipart_indent(f"""
             --{boundary}
+            Content-Disposition: attachment; name="output_json"; filename="result.yml"
             Content-Type: {ContentType.APP_YAML}
+            Content-Location: {out_url}/{job_id}/output_json/result.yml
             Content-ID: <output_json@{job_id}>
-            Content-Length: 12
+            Content-Length: 11
 
             {output_json_as_yaml}
             --{boundary}--
         """)
         results_text = self.remove_result_multipart_variable(results.text)
         assert results.content_type.startswith(ContentType.MULTIPART_MIXED)
-        assert results_text == results_body
+        for line1, line2 in zip(results_text.splitlines(), results_body.splitlines()):
+            assert line1 == line2
+
         outputs = self.app.get(f"/jobs/{job_id}/outputs", params={"schema": JobInputsOutputsSchema.OGC_STRICT})
         assert outputs.content_type.startswith(ContentType.APP_JSON)
         assert outputs.json["outputs"] == {
-            "output_data": "test",
             "output_json": {
-                "href": f"{out_url}/{job_id}/output_json/output.yml",
+                "href": f"{out_url}/{job_id}/output_json/result.yml",
                 "type": ContentType.APP_YAML,
             },
         }
 
-        # FIXME: implement (https://github.com/crim-ca/weaver/pull/548)
         # validate the results can be obtained with the "real" representation
         result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers)
         assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.text}"
         assert result_json.content_type == ContentType.APP_JSON
-        assert result_json.text == "{\"data\":\"test\"}"
+        assert result_json.text == "{\"data\": \"test\"}"
 
     @pytest.mark.oap_part1
     def test_execute_single_output_response_document_alt_format_json_raw_literal(self):
@@ -4571,12 +4598,11 @@ def test_execute_single_output_response_document_alt_format_json_raw_literal(sel
             },
         }
 
-        # FIXME: add check of direct request of output (https://github.com/crim-ca/weaver/pull/548)
         # validate the results can be obtained with the "real" representation
-        # result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers)
-        # assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}"
-        # assert result_json.content_type == ContentType.APP_JSON
-        # assert result_json.json == {"data": "test"}
+        result_json = self.app.get(f"/jobs/{job_id}/results/output_json", headers=self.json_headers)
+        assert result_json.status_code == 200, f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}"
+        assert result_json.content_type == ContentType.APP_JSON
+        assert result_json.json == {"data": "test"}
 
     @pytest.mark.oap_part1
     def test_execute_single_output_response_document_default_format_json_special(self):

diff --git a/tests/functional/test_wps_provider.py b/tests/functional/test_wps_provider.py
@@ -150,9 +150,11 @@ def test_register_describe_execute_ncdump(self, mock_responses):
         assert "outputs" in body and len(body["outputs"]) == 1
         assert "output" in body["outputs"]
         assert "formats" in body["outputs"]["output"]
-        assert len(body["outputs"]["output"]["formats"]) == 1
+        assert len(body["outputs"]["output"]["formats"]) == 3
         assert body["outputs"]["output"]["formats"][0]["default"] is True
         assert body["outputs"]["output"]["formats"][0]["mediaType"] == ContentType.TEXT_PLAIN
+        assert body["outputs"]["output"]["formats"][1]["mediaType"] == ContentType.TEXT_HTML
+        assert body["outputs"]["output"]["formats"][2]["mediaType"] == ContentType.APP_PDF
         assert "literalDataDomains" not in body["outputs"]["output"]
 
         assert body["processDescriptionURL"] == proc_desc_url