Skip to content

Commit

Permalink
VRT processed dataset: unscale source raster values (#11623)
Browse files Browse the repository at this point in the history
  • Loading branch information
dbaston authored Jan 10, 2025
1 parent 06c046d commit ee04fce
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<VRTDataset subClass="VRTProcessedDataset">
<Input>
<Input unscale="AUTO">
<SourceFilename relativeToVRT="1">../byte.tif</SourceFilename>
</Input>
<OutputBands count="FROM_LAST_STEP" dataType="FROM_LAST_STEP"/>
Expand Down
135 changes: 133 additions & 2 deletions autotest/gdrivers/vrtprocesseddataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,22 @@
# SPDX-License-Identifier: MIT
###############################################################################

import os

import gdaltest
import pytest

from osgeo import gdal

from .vrtderived import _validate

pytestmark = pytest.mark.skipif(
not gdaltest.vrt_has_open_support(),
reason="VRT driver open missing",
)

np = pytest.importorskip("numpy")
pytest.importorskip("osgeo.gdal_array")
gdal_array = pytest.importorskip("osgeo.gdal_array")

###############################################################################
# Test error cases in general VRTProcessedDataset XML structure
Expand Down Expand Up @@ -73,6 +77,16 @@ def test_vrtprocesseddataset_errors(tmp_vsimem):
src_ds.GetRasterBand(3).Fill(3)
src_ds.Close()

with pytest.raises(Exception, match="Invalid value of 'unscale'"):
gdal.Open(
f"""<VRTDataset subclass='VRTProcessedDataset'>
<Input unscale="maybe">
<SourceFilename>{src_filename}</SourceFilename>
</Input>
</VRTDataset>
"""
)

with pytest.raises(Exception, match="ProcessingSteps element missing"):
gdal.Open(
f"""<VRTDataset subclass='VRTProcessedDataset'>
Expand Down Expand Up @@ -1216,7 +1230,7 @@ def test_vrtprocesseddataset_serialize(tmp_vsimem):
vrt_filename = str(tmp_vsimem / "the.vrt")
content = f"""<VRTDataset subclass='VRTProcessedDataset'>
<VRTRasterBand subClass='VRTProcessedRasterBand' dataType='Byte'/>
<Input>
<Input unscale="true">
<SourceFilename>{src_filename}</SourceFilename>
</Input>
<ProcessingSteps>
Expand Down Expand Up @@ -1517,3 +1531,120 @@ def test_vrtprocesseddataset_RasterIO(tmp_vsimem):
assert ds.GetRasterBand(1).GetBlockSize() == [1, 1]
with pytest.raises(Exception):
ds.ReadAsArray()


###############################################################################
# Validate processed datasets according to xsd


@pytest.mark.parametrize(
"fname",
[
f
for f in os.listdir(os.path.join(os.path.dirname(__file__), "data/vrt"))
if f.startswith("processed")
],
)
def test_vrt_processeddataset_validate(fname):
with open(os.path.join("data/vrt", fname)) as f:
_validate(f.read())


###############################################################################
# Test reading input datasets with scale and offset


@pytest.mark.parametrize(
"input_scaled", (True, False), ids=lambda x: f"input scaled={x}"
)
@pytest.mark.parametrize("unscale", (True, False, "auto"), ids=lambda x: f"unscale={x}")
@pytest.mark.parametrize(
"dtype", (gdal.GDT_Int16, gdal.GDT_Float32), ids=gdal.GetDataTypeName
)
def test_vrtprocesseddataset_scaled_inputs(tmp_vsimem, input_scaled, dtype, unscale):

src_filename = tmp_vsimem / "src.tif"

nx = 2
ny = 3
nz = 2

if dtype == gdal.GDT_Float32:
nodata = float("nan")
else:
nodata = 99

np_type = gdal_array.GDALTypeCodeToNumericTypeCode(dtype)

data = np.arange(nx * ny * nz, dtype=np_type).reshape(nz, ny, nx)
data[:, 2, 1] = nodata

if input_scaled:
offsets = [i + 2 for i in range(nz)]
scales = [(i + 1) / 4 for i in range(nz)]
else:
offsets = [0 for i in range(nz)]
scales = [1 for i in range(nz)]

with gdal.GetDriverByName("GTiff").Create(
src_filename, nx, ny, nz, eType=dtype
) as src_ds:
src_ds.WriteArray(data)
for i in range(src_ds.RasterCount):
bnd = src_ds.GetRasterBand(i + 1)
bnd.SetOffset(offsets[i])
bnd.SetScale(scales[i])
bnd.SetNoDataValue(nodata)

ds = gdal.Open(
f"""
<VRTDataset subclass='VRTProcessedDataset'>
<Input unscale="{unscale}">
<SourceFilename>{src_filename}</SourceFilename>
</Input>
<ProcessingSteps>
<Step>
<Algorithm>BandAffineCombination</Algorithm>
<Argument name="coefficients_1">0,1,0</Argument>
<Argument name="coefficients_2">0,0,1</Argument>
</Step>
</ProcessingSteps>
</VRTDataset>"""
)

assert ds.RasterCount == nz

if unscale is True or (unscale == "auto" and input_scaled):
for i in range(ds.RasterCount):
bnd = ds.GetRasterBand(i + 1)
assert bnd.DataType == gdal.GDT_Float64
assert bnd.GetScale() in (None, 1)
assert bnd.GetOffset() in (None, 0)
else:
for i in range(ds.RasterCount):
bnd = ds.GetRasterBand(i + 1)
assert bnd.DataType == dtype
assert bnd.GetScale() == scales[i]
assert bnd.GetOffset() == offsets[i]
assert (
np.isnan(bnd.GetNoDataValue())
if np.isnan(nodata)
else bnd.GetNoDataValue() == nodata
)

result = np.ma.stack(
[ds.GetRasterBand(i + 1).ReadAsMaskedArray() for i in range(ds.RasterCount)]
)

if unscale:
expected = np.ma.masked_array(
np.stack([data[i, :, :] * scales[i] + offsets[i] for i in range(nz)]),
np.isnan(data) if np.isnan(nodata) else data == nodata,
)
else:
expected = np.ma.masked_array(
data, np.isnan(data) if np.isnan(nodata) else data == nodata
)

np.testing.assert_array_equal(result.mask, expected.mask)
np.testing.assert_array_equal(result[~result.mask], expected[~expected.mask])
2 changes: 1 addition & 1 deletion doc/source/drivers/raster/vrt_processed_dataset.rst
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ The following child elements of ``VRTDataset`` may be defined: ``SRS``, ``GeoTra

The ``VRTDataset`` root element must also have the 2 following child elements:

- ``Input``, which must have one and only one of the following ``SourceFilename`` or ``VRTDataset`` as child elements, to define the input dataset to which to apply the processing steps.
- ``Input``, which must have one and only one of the following ``SourceFilename`` or ``VRTDataset`` as child elements, to define the input dataset to which to apply the processing steps. Starting with GDAL 3.11, values from the input dataset will be automatically unscaled; this can be disabled by setting the ``unscale`` attribute of ``Input`` to ``false``.

- ``ProcessingSteps``, with at least one child ``Step`` element.

Expand Down
12 changes: 11 additions & 1 deletion frmts/vrt/data/gdalvrt.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,16 @@
<xs:element name="VRTDataset" type="VRTDatasetType"/>
</xs:choice>
</xs:sequence>
<xs:attribute name="unscale" type="xs:string"> <!-- added in GDAL 3.11 -->
<xs:annotation>
<xs:documentation>
YES, NO, or AUTO.
If not specified, AUTO is the default and will result in
unscaling all input bands to Float64 if any input band has
a defined scale/offset.
</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:complexType>

<xs:complexType name="OutputBandsType">
Expand Down Expand Up @@ -249,7 +259,7 @@
<xs:extension base="xs:string">
<xs:attribute name="name" type="xs:string" use="required">
<xs:annotation>
<xs:documentation>Allowed names are specific of each processing function</xs:documentation>
<xs:documentation>Allowed names are specific to each processing function</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:extension>
Expand Down
3 changes: 3 additions & 0 deletions frmts/vrt/vrtdataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,9 @@ class VRTProcessedDataset final : public VRTDataset
//! Directory of the VRT
std::string m_osVRTPath{};

//! Source of source dataset generated with GDALTranslate
std::unique_ptr<GDALDataset> m_poVRTSrcDS{};

//! Source dataset
std::unique_ptr<GDALDataset> m_poSrcDS{};

Expand Down
69 changes: 69 additions & 0 deletions frmts/vrt/vrtprocesseddataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "cpl_minixml.h"
#include "cpl_string.h"
#include "gdal_utils.h"
#include "vrtdataset.h"

#include <algorithm>
Expand Down Expand Up @@ -206,6 +207,27 @@ CPLErr VRTProcessedDataset::XMLInit(const CPLXMLNode *psTree,
return CE_None;
}

static bool HasScaleOffset(GDALDataset &oSrcDS)
{
for (int i = 1; i <= oSrcDS.GetRasterCount(); i++)
{
int pbSuccess;
GDALRasterBand &oBand = *oSrcDS.GetRasterBand(i);
double scale = oBand.GetScale(&pbSuccess);
if (pbSuccess && scale != 1)
{
return true;
}
double offset = oBand.GetOffset(&pbSuccess);
if (pbSuccess && offset != 0)
{
return true;
}
}

return false;
}

/** Instantiate object from XML tree */
CPLErr VRTProcessedDataset::Init(const CPLXMLNode *psTree,
const char *pszVRTPathIn,
Expand Down Expand Up @@ -260,6 +282,53 @@ CPLErr VRTProcessedDataset::Init(const CPLXMLNode *psTree,
if (!m_poSrcDS)
return CE_Failure;

const char *pszUnscale = CPLGetXMLValue(psInput, "unscale", "AUTO");
bool bUnscale = false;
if (EQUAL(pszUnscale, "AUTO"))
{
if (HasScaleOffset(*m_poSrcDS))
{
bUnscale = true;
}
}
else if (EQUAL(pszUnscale, "YES") || EQUAL(pszUnscale, "ON") ||
EQUAL(pszUnscale, "TRUE") || EQUAL(pszUnscale, "1"))
{
bUnscale = true;
}
else if (!(EQUAL(pszUnscale, "NO") || EQUAL(pszUnscale, "OFF") ||
EQUAL(pszUnscale, "FALSE") || EQUAL(pszUnscale, "0")))
{
CPLError(CE_Failure, CPLE_AppDefined, "Invalid value of 'unscale'");
return CE_Failure;
}

if (bUnscale)
{
CPLStringList oArgs;
oArgs.AddString("-unscale");
oArgs.AddString("-ot");
oArgs.AddString("Float64");
oArgs.AddString("-of");
oArgs.AddString("VRT");
oArgs.AddString("-a_nodata");
oArgs.AddString("nan");
auto *poArgs = GDALTranslateOptionsNew(oArgs.List(), nullptr);
int pbUsageError;
CPLAssert(poArgs);
m_poVRTSrcDS.reset(m_poSrcDS.release());
// https://trac.cppcheck.net/ticket/11325
// cppcheck-suppress accessMoved
m_poSrcDS.reset(GDALDataset::FromHandle(
GDALTranslate("", m_poVRTSrcDS.get(), poArgs, &pbUsageError)));
GDALTranslateOptionsFree(poArgs);

if (pbUsageError || !m_poSrcDS)
{
return CE_Failure;
}
}

if (nRasterXSize == 0 && nRasterYSize == 0)
{
nRasterXSize = m_poSrcDS->GetRasterXSize();
Expand Down

0 comments on commit ee04fce

Please sign in to comment.