diff --git a/ext/RastersArchGDALExt/RastersArchGDALExt.jl b/ext/RastersArchGDALExt/RastersArchGDALExt.jl index 55fb3a239..293013406 100644 --- a/ext/RastersArchGDALExt/RastersArchGDALExt.jl +++ b/ext/RastersArchGDALExt/RastersArchGDALExt.jl @@ -1,10 +1,7 @@ module RastersArchGDALExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, ArchGDAL -else - using ..Rasters, ..ArchGDAL -end +using Rasters +using ArchGDAL import DiskArrays, Extents, @@ -16,15 +13,17 @@ using DimensionalData, using Rasters.Lookups using Rasters.Dimensions -using Rasters: GDALsource, AbstractProjected, RasterStackOrArray, FileArray, NoKW, +using Rasters: GDALsource, AbstractProjected, AbstractRaster, AbstractRasterStack, + RasterStackOrArray, FileArray, NoKW, RES_KEYWORD, SIZE_KEYWORD, CRS_KEYWORD, FILENAME_KEYWORD, SUFFIX_KEYWORD, EXPERIMENTAL, GDAL_EMPTY_TRANSFORM, GDAL_TOPLEFT_X, GDAL_WE_RES, GDAL_ROT1, GDAL_TOPLEFT_Y, GDAL_ROT2, GDAL_NS_RES, _no_crs_error -import Rasters: reproject, resample, warp, nokw +import Rasters: reproject, resample, warp, cellsize, nokw, isnokw, isnokwornothing import LinearAlgebra: dot, cross +const AG = ArchGDAL const RA = Rasters const AG = ArchGDAL const DD = DimensionalData diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 65b7ba5fb..1b187816b 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -34,59 +34,57 @@ const GDAL_VIRTUAL_FILESYSTEMS = "/vsi" .* ( "sparse", ) -# Array ######################################################################## - -function RA.FileArray{GDALsource}(ds::AG.RasterDataset{T}, filename; kw...) where {T} - eachchunk, haschunks = DA.eachchunk(ds), DA.haschunks(ds) - RA.FileArray{GDALsource,T,3}(filename, size(ds); eachchunk, haschunks, kw...) -end +# TODO more cases of return values here, like wrapped disk arrays RA.cleanreturn(A::AG.RasterDataset) = Array(A) RA.haslayers(::GDALsource) = false RA._sourcetrait(A::AG.RasterDataset) = GDALsource() -function Base.write( - filename::AbstractString, ::GDALsource, A::AbstractRaster{T}; +function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRasterStack; kw...) + ext = splitext(filename)[2] + throw(ArgumentError("Cant write a RasterStack to $ext with gdal")) +end +function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster{T}; force=false, verbose=true, + write=true, missingval=nokw, + scale=nokw, + offset=nokw, + coerce=nokw, + eltype=Missings.nonmissingtype(T), + f=identity, kw... ) where T RA.check_can_write(filename, force) - A1 = _maybe_correct_to_write(A, missingval) - _create_with_driver(filename, dims(A1), eltype(A1), Rasters.missingval(A1); _block_template=A1, kw...) do dataset - verbose && _maybe_warn_south_up(A, verbose, "Writing South-up. Use `reverse(myrast; dims=Y)` first to write conventional North-up") - open(A1; write=true) do O - AG.RasterDataset(dataset) .= parent(O) + write = f === identity ? write : true + A1 = _maybe_permute_to_gdal(A) + + # Missing values + missingval_pair = RA._write_missingval_pair(A, missingval; eltype, verbose) + + _create_with_driver(filename, dims(A1), eltype; + missingval=missingval_pair[1], _block_template=A1, scale, offset, verbose, kw... + ) do dataset + if write + mod = RA._mod(eltype, missingval_pair, scale, offset, coerce) + open(A1; write=true) do O + R = RA._maybe_modify(AG.RasterDataset(dataset), mod) + R .= parent(O) + if hasdim(A, Band()) + f(R) + else + f(view(R, :, :, 1)) + end + end end end return filename end -function RA.create(filename, ::GDALsource, T::Type, dims::DD.DimTuple; - missingval=nokw, - metadata=nokw, - name=nokw, - lazy=true, - verbose=true, - kw... -) - T = Missings.nonmissingtype(T) - missingval = ismissing(missingval) ? RA._writeable_missing(T) : missingval - _create_with_driver(filename, dims, T, missingval; kw...) do _ - verbose && _maybe_warn_south_up(dims, verbose, "Creating a South-up raster. Use `reverse(myrast; dims=Y)` first to write conventional North-up") - nothing - end - - return Raster(filename; source=GDALsource(), name, lazy, metadata, dropband=!hasdim(dims, Band)) -end - -function _maybe_warn_south_up(A, verbose, msg) - verbose && lookup(A, Y) isa AbstractSampled && order(A, Y) isa ForwardOrdered && @warn msg -end - function RA._open(f, ::GDALsource, filename::AbstractString; write=false, + mod=RA.NoMod(), kw... ) # Check the file actually exists because the GDAL error is unhelpful @@ -104,15 +102,13 @@ function RA._open(f, ::GDALsource, filename::AbstractString; end end end - if write - # Pass the OF_UPDATE flag to GDAL - AG.readraster(RA.cleanreturn ∘ f, filename; flags=AG.OF_UPDATE) - else - # Otherwise just read - AG.readraster(RA.cleanreturn ∘ f, filename) + flags = write ? AG.OF_UPDATE : AG.OF_READONLY + return AG.readraster(filename; flags) do A + RA.cleanreturn(f(RA._maybe_modify(A, mod))) end end -RA._open(f, ::GDALsource, ds::AG.RasterDataset; kw...) = RA.cleanreturn(f(ds)) +RA._open(f, ::GDALsource, A::AG.RasterDataset; mod=RA.NoMod(), kw...) = + RA.cleanreturn(f(RA._maybe_modify(A, mod))) # DimensionalData methods for ArchGDAL types ############################### @@ -206,20 +202,27 @@ function RA._dims(raster::AG.RasterDataset, crs=nokw, mappedcrs=nokw) end # TODO make metadata optional, its slow to get -function RA._metadata(raster::AG.RasterDataset, args...) - band = AG.getband(raster.ds, 1) +function RA._metadata(rds::AG.RasterDataset, args...) + band = AG.getband(rds.ds, 1) + metadata = RA._metadatadict(GDALsource()) # color = AG.getname(AG.getcolorinterp(band)) scale = AG.getscale(band) offset = AG.getoffset(band) # norvw = AG.noverview(band) units = AG.getunittype(band) - filelist = AG.filelist(raster) - metadata = RA._metadatadict(GDALsource(), "scale"=>scale, "offset"=>offset) - if units == "" + filepath = _getfilepath(rds) + # Set metadata if they are not default values + if scale != oneunit(scale) + metadata["scale"] = scale + end + if offset != zero(offset) + metadata["offset"] = offset + end + if units != "" metadata["units"] = units end - if length(filelist) > 0 - metadata["filepath"] = first(filelist) + if !isnothing(filepath) + metadata["filepath"] = filepath end return metadata end @@ -228,38 +231,26 @@ end # Create a Raster from a dataset RA.Raster(ds::AG.Dataset; kw...) = Raster(AG.RasterDataset(ds); kw...) -function RA.Raster(ds::AG.RasterDataset; - crs=crs(ds), - mappedcrs=nokw, - dims=RA._dims(ds, crs, mappedcrs), - refdims=(), - name=nokw, - metadata=RA._metadata(ds), - missingval=RA.missingval(ds), - lazy=false, - dropband=false -) - kw = (; refdims, name, metadata, missingval) - filelist = AG.filelist(ds) - raster = if lazy && length(filelist) > 0 - filename = first(filelist) - Raster(FileArray{GDALsource}(ds, filename), dims; kw...) +function RA.Raster(ds::AG.RasterDataset; lazy=false, kw...) + filepath = if lazy + fp = _getfilepath(ds) + isnothing(fp) ? "/vsimem" : fp else - Raster(Array(ds), dims; kw...) + "" end - return dropband ? RA._drop_single_band(raster, lazy) : raster + Raster(ds, filepath; kw...) end RA.missingval(ds::AG.Dataset, args...) = RA.missingval(AG.RasterDataset(ds)) -function RA.missingval(rasterds::AG.RasterDataset, args...) +function RA.missingval(rds::AG.RasterDataset, args...) # All bands have the same missingval in GDAL - band = AG.getband(rasterds.ds, 1) + band = AG.getband(rds.ds, 1) # GDAL will set this hasnodataval = Ref(Cint(0)) # Int64 and UInt64 need special casing in GDAL - nodataval = if eltype(rasterds) == Int64 + nodataval = if eltype(rds) == Int64 AG.GDAL.gdalgetrasternodatavalueasint64(band, hasnodataval) - elseif eltype(rasterds) == UInt64 + elseif eltype(rds) == UInt64 AG.GDAL.gdalgetrasternodatavalueasuint64(band, hasnodataval) else AG.GDAL.gdalgetrasternodatavalue(band, hasnodataval) @@ -273,7 +264,7 @@ function RA.missingval(rasterds::AG.RasterDataset, args...) end end -# GDAL always returns well known text +# GDAL always returns well known text crs function RA.crs(raster::AG.RasterDataset, args...) WellKnownText(GeoFormatTypes.CRS(), string(AG.getproj(raster.ds))) end @@ -286,14 +277,26 @@ function AG.Dataset(f::Function, A::AbstractRaster; kw...) f(rds.ds) end end -function AG.RasterDataset(f::Function, A::AbstractRaster; filename="", kw...) - A1 = _maybe_correct_to_write(A) - return _create_with_driver(filename, dims(A1), eltype(A1), missingval(A1); _block_template=A1, kw...) do dataset - rds = AG.RasterDataset(dataset) - open(A1) do a - rds .= parent(a) +function AG.RasterDataset(f::Function, A::AbstractRaster; + filename="", + scale=nokw, + offset=nokw, + coerce=nokw, + verbose=false, + eltype=Missings.nonmissingtype(eltype(A)), + missingval=Rasters.missingval(A), + kw... +) + return open(_maybe_permute_to_gdal(A)) do O + _create_with_driver(filename, dims(A), eltype; + _block_template=A, missingval, scale, offset, verbose, kw... + ) do dataset + rds = AG.RasterDataset(dataset) + mv = RA.missingval(rds) => RA.missingval(O) + mod = RA._mod(eltype, mv, scale, offset, coerce) + RA._maybe_modify(rds, mod) .= parent(O) + f(rds) end - f(rds) end end @@ -301,37 +304,30 @@ end # Sometimes GDAL stores the `missingval` in the wrong type, so fix it. _missingval_from_gdal(T::Type{<:AbstractFloat}, x::Real) = convert(T, x) -function _missingval_from_gdal(T::Type{<:Integer}, x::AbstractFloat) +function _missingval_from_gdal(T::Type{<:Integer}, x::AbstractFloat; verbose=true) if trunc(x) === x && x >= typemin(T) && x <= typemax(T) convert(T, x) else - @warn "Missing value $x can't be converted to array eltype $T. `missingval` set to `nothing`" + verbose && @warn "Missing value $x can't be converted to array eltype $T. `missingval` set to `nothing`" nothing end end -function _missingval_from_gdal(T::Type{<:Integer}, x::Integer) +function _missingval_from_gdal(T::Type{<:Integer}, x::Integer; verbose=true) if x >= typemin(T) && x <= typemax(T) convert(T, x) else - @warn "Missing value $x can't be converted to array eltype $T. `missingval` set to `nothing`" + verbose && @warn "Missing value $x can't be converted to array eltype $T. `missingval` set to `nothing`" nothing end end _missingval_from_gdal(T, x) = x -# Fix array and dimension configuration before writing with GDAL -_maybe_correct_to_write(A::AbstractDimArray, args...) = - _maybe_correct_to_write(lookup(A, X()), A, args...) -_maybe_correct_to_write(::Lookup, A::AbstractDimArray, args...) = A -function _maybe_correct_to_write( - lookup::Union{AbstractSampled,NoLookup}, A::AbstractDimArray, args... -) - RA._maybe_use_type_missingval(A, GDALsource(), args...) |> _maybe_permute_to_gdal +# Make sure driver is sensible +function _check_driver(::Nothing, driver) + isnokwornothing(driver) || isempty(driver) ? "MEM" : driver end - -_check_driver(filename::Nothing, driver) = "MEM" function _check_driver(filename::AbstractString, driver) - if isempty(driver) + if isnokwornothing(driver) || isempty(driver) if isempty(filename) driver = "MEM" else @@ -346,42 +342,62 @@ end # Handle creating a dataset with any driver, # applying the function `f` to the created dataset -function _create_with_driver(f, filename, dims::Tuple, T, missingval; - options=Dict{String,String}(), - driver="", - _block_template=nothing, +function _create_with_driver(f, filename, dims::Tuple, T; + verbose=true, + missingval=nokw, + options=nokw, + driver=nokw, chunks=nokw, + scale=nokw, + offset=nokw, + _block_template=nothing, kw... ) + # Allow but discourage south-up + verbose && _maybe_warn_south_up(dims, verbose, "Creating a South-up raster. You may wish to reverse the `Y` dimension to use conventional North-up") + options = isnokwornothing(options) ? Dict{String,String}() : options + + # Pairs should not get this far + @assert !(missingval isa Pair) + # If missingval is missing, generate one GDAL can write + missingval = ismissing(missingval) ? RA._writeable_missing(T; verbose) : missingval + # Make sure dimensions are valid for GDAL _gdal_validate(dims) + # Move x and y locus to start x, y = map(DD.dims(dims, (XDim, YDim))) do d maybeshiftlocus(Start(), RA.nolookup_to_sampled(d)) end + # Band handling - a 2d raster has 1 band newdims = hasdim(dims, Band()) ? (x, y, DD.dims(dims, Band)) : (x, y) nbands = hasdim(dims, Band) ? length(DD.dims(dims, Band())) : 1 + # Driver detection driver = _check_driver(filename, driver) - options_vec = _process_options(driver, options; _block_template, chunks) + options_vec = _process_options(driver, options; _block_template, chunks, verbose) gdaldriver = driver isa String ? AG.getdriver(driver) : driver + # Keywords and filenames for AG.create create_kw = (; width=length(x), height=length(y), nbands, dtype=T,) filename = isnothing(filename) ? "" : filename + # Not all drivers can be directly created, some need an intermediate step if AG.shortname(gdaldriver) in GDAL_DRIVERS_SUPPORTING_CREATE AG.create(filename; driver=gdaldriver, options=options_vec, create_kw...) do dataset - _set_dataset_properties!(dataset, newdims, missingval) + _set_dataset_properties!(dataset, newdims, missingval, scale, offset) f(dataset) end else # Create a tif and copy it to `filename`, as ArchGDAL.create # does not support direct creation of ASCII etc. rasters - tif_options_vec = _process_options("GTiff", Dict{String,String}(); chunks, _block_template) + tif_options_vec = _process_options("GTiff", Dict{String,String}(); + chunks, _block_template, verbose + ) tif_driver = AG.getdriver("GTiff") tif_name = tempname() * ".tif" AG.create(tif_name; driver=tif_driver, options=tif_options_vec, create_kw...) do dataset - _set_dataset_properties!(dataset, newdims, missingval) + _set_dataset_properties!(dataset, newdims, missingval, scale, offset) f(dataset) target_ds = AG.copy(dataset; filename=filename, driver=gdaldriver, options=options_vec) AG.destroy(target_ds) @@ -389,6 +405,7 @@ function _create_with_driver(f, filename, dims::Tuple, T, missingval; end end +# Makie sure gdal can actually write this raster @noinline function _gdal_validate(dims) all(hasdim(dims, (XDim, YDim))) || throw(ArgumentError("`Raster` must have both an `X` and `Y` to be converted to an ArchGDAL `Dataset`")) if length(dims) === 3 @@ -402,7 +419,8 @@ end # Convert a Dict of options to a Vector{String} for GDAL function _process_options(driver::String, options::Dict; chunks=nokw, - _block_template=nothing + _block_template=nothing, + verbose=true, ) options_str = Dict(string(k)=>string(v) for (k,v) in options) # Get the GDAL driver object @@ -429,7 +447,7 @@ function _process_options(driver::String, options::Dict; if (xchunksize % 16 == 0) && (ychunksize % 16 == 0) options_str["TILED"] = "YES" else - xchunksize == 1 || @warn "X and Y chunk size do not match. Columns are used and X size $xchunksize is ignored" + xchunksize == 1 || (verbose && @warn "X and Y chunk size do not match. Columns are used and X size $xchunksize is ignored") end # don't overwrite user specified values if !("BLOCKXSIZE" in keys(options_str)) @@ -443,7 +461,7 @@ function _process_options(driver::String, options::Dict; if xchunksize == ychunksize options_str["BLOCKSIZE"] = block_x else - @warn "Writing COG X and Y chunks do not match: $block_x, $block_y. Default of 512, 512 used." + verbose && @warn "Writing COG X and Y chunks do not match: $block_x, $block_y. Default of 512, 512 used." end end end @@ -489,9 +507,9 @@ end # Set the properties of an ArchGDAL Dataset to match # the dimensions and missingval of a Raster -_set_dataset_properties!(ds::AG.Dataset, A) = - _set_dataset_properties!(ds, dims(A), missingval(A)) -function _set_dataset_properties!(dataset::AG.Dataset, dims::Tuple, missingval) +_set_dataset_properties!(ds::AG.Dataset, A, scale, offset) = + _set_dataset_properties!(ds, dims(A), missingval(A), scale, offset) +function _set_dataset_properties!(dataset::AG.Dataset, dims::Tuple, missingval, scale, offset) # We cant write mixed Points/Intervals, so default to Intervals if mixed xy = DD.dims(dims, (X, Y)) if any(x -> x isa Intervals, map(sampling, xy)) && any(x -> x isa Points, map(sampling, xy)) @@ -520,12 +538,14 @@ function _set_dataset_properties!(dataset::AG.Dataset, dims::Tuple, missingval) gt = RA.dims2geotransform(x, y) AG.setgeotransform!(dataset, gt) - # Set the missing value/nodataval. This is a little complicated - # because gdal has separate method for 64 bit integers - if !isnothing(missingval) + if !RA.isnokwornothing(missingval) bands = hasdim(dims, Band) ? axes(DD.dims(dims, Band), 1) : 1 for i in bands rasterband = AG.getband(dataset, i) + (RA.isnokw(offset) || isnothing(offset)) || AG.setoffset!(rasterband, offset) + (RA.isnokw(scale) || isnothing(scale)) || AG.setscale!(rasterband, scale) + # Set the missing value/nodataval. This is a little complicated + # because gdal has separate method for 64 bit integers if missingval isa Int64 AG.GDAL.gdalsetrasternodatavalueasint64(rasterband, missingval) elseif missingval isa UInt64 @@ -575,7 +595,7 @@ _maybe_restore_from_gdal(A, dims::Tuple) = _maybe_reorder(permutedims(A, dims), _maybe_restore_from_gdal(A, dims::Union{Tuple{<:XDim,<:YDim,<:Band},Tuple{<:XDim,<:YDim}}) = _maybe_reorder(A, dims) -function _maybe_reorder(A, dims) +function _maybe_reorder(A, dims::Tuple) if all(map(l -> l isa AbstractSampled, lookup(dims, (XDim, YDim)))) && all(map(l -> l isa AbstractSampled, lookup(A, (XDim, YDim)))) reorder(A, dims) @@ -583,6 +603,14 @@ function _maybe_reorder(A, dims) A end end + +function _maybe_warn_south_up(A, verbose, msg) + if hasdim(A, Y()) + verbose && lookup(A, Y()) isa AbstractSampled && order(A, Y()) isa ForwardOrdered && @warn msg + end + return nothing +end + #= Geotranforms ######################################################################## See https://lists.osgeo.org/pipermail/gdal-dev/2011-July/029449.html @@ -625,6 +653,15 @@ RA.affine2geotransform(am) = error(USING_COORDINATETRANSFORMATIONS_MESSAGE) _isaligned(geotransform) = geotransform[GDAL_ROT1] == 0 && geotransform[GDAL_ROT2] == 0 +function _getfilepath(ds) + filelist = AG.filelist(ds) + if length(filelist) == 0 + return nothing + else + return first(filelist) + end +end + # precompilation # function _precompile(::Type{GDALsource}) # ccall(:jl_generating_output, Cint, ()) == 1 || return nothing @@ -647,3 +684,4 @@ _isaligned(geotransform) = geotransform[GDAL_ROT1] == 0 && geotransform[GDAL_ROT # end # _precompile(GRDsource) + diff --git a/ext/RastersArchGDALExt/resample.jl b/ext/RastersArchGDALExt/resample.jl index d5b067ed0..ead14b5d6 100644 --- a/ext/RastersArchGDALExt/resample.jl +++ b/ext/RastersArchGDALExt/resample.jl @@ -7,7 +7,12 @@ function resample(xs::Union{Tuple,NamedTuple}; to=first(xs), kw...) map(x -> resample(x; to, kw...), xs) end function resample(A::RasterStackOrArray; - to=nothing, res=nothing, crs=nothing, size=nothing, method=:near, kw... + to=nothing, + res=nothing, + crs=nothing, + size=nothing, + method=:near, + kw... ) (isnothing(size) || isnothing(res)) || _size_and_res_error() diff --git a/ext/RastersArchGDALExt/warp.jl b/ext/RastersArchGDALExt/warp.jl index 8ebfac98a..e8368e776 100644 --- a/ext/RastersArchGDALExt/warp.jl +++ b/ext/RastersArchGDALExt/warp.jl @@ -11,25 +11,40 @@ function warp(A::AbstractRaster, flags::Dict; filename=nothing, kw...) end end function warp(st::AbstractRasterStack, flags::Dict; filename=nothing, suffix=keys(st), kw...) - RA.mapargs((A, s) -> warp(A, flags; filename, suffix=s), st, suffix; kw...) + RA.mapargs((A, s) -> warp(A, flags; filename, suffix=s, kw...), st, suffix) end -function _warp(A::AbstractRaster, flags::Dict; filename=nothing, suffix="", kw...) +function _warp(A::AbstractRaster, flags::Dict; + filename=nothing, + suffix="", + missingval=Rasters.missingval(A), + name=Rasters.name(A), + kw... +) A1 = _set_gdalwarp_sampling(A) filename = RA._maybe_add_suffix(filename, suffix) flagvect = reduce([flags...]; init=String[]) do acc, (key, val) append!(acc, String[_asflag(key), _stringvect(val)...]) end + # TODO: detect if `A` already holds a lazy GDAL FileArray. + # If it does, we can just open it and use it directly. tempfile = isnothing(filename) ? nothing : tempname() * ".tif" warp_kw = isnothing(filename) || filename == "/vsimem/tmp" ? () : (; dest=filename) - out = AG.Dataset(A1; filename=tempfile, kw...) do dataset + mv_pair = RA._write_missingval_pair(A1, missingval; + verbose=false, eltype=eltype(A1), metadata=metadata(A), required=true + ) + # We really need a missingval for `warp`, as it may rotate and add missing values + out = AG.Dataset(A1; filename=tempfile, missingval=mv_pair[1], kw...) do dataset AG.gdalwarp([dataset], flagvect; warp_kw...) do warped # Read the raster lazily, dropping Band if there is none in `A` - raster = Raster(warped; lazy=true, dropband=!hasdim(A, Band()), name = name(A)) + raster = Raster(warped; + lazy=true, dropband=!hasdim(A, Band()), name, missingval=mv_pair + ) # Either read the MEM dataset to an Array, or keep a filename base raster lazy return isnothing(filename) ? read(raster) : raster end end + # And permute the dimensions back to what they were in A out1 = _maybe_restore_from_gdal(out, dims(A)) out2 = _reset_gdalwarp_sampling(out1, A) diff --git a/ext/RastersCoordinateTransformationsExt/RastersCoordinateTransformationsExt.jl b/ext/RastersCoordinateTransformationsExt/RastersCoordinateTransformationsExt.jl index 39d0ee745..1643f7a1e 100644 --- a/ext/RastersCoordinateTransformationsExt/RastersCoordinateTransformationsExt.jl +++ b/ext/RastersCoordinateTransformationsExt/RastersCoordinateTransformationsExt.jl @@ -1,11 +1,7 @@ module RastersCoordinateTransformationsExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, CoordinateTransformations -else - using ..Rasters, ..CoordinateTransformations -end - +using Rasters +using CoordinateTransformations using DimensionalData using Rasters.Lookups using Rasters.Dimensions @@ -16,7 +12,6 @@ const RA = Rasters const DD = DimensionalData const LA = Lookups - include("affineprojected.jl") end # module diff --git a/ext/RastersGRIBDatasetsExt/RastersGRIBDatasetsExt.jl b/ext/RastersGRIBDatasetsExt/RastersGRIBDatasetsExt.jl index 59c69048f..ca81de017 100644 --- a/ext/RastersGRIBDatasetsExt/RastersGRIBDatasetsExt.jl +++ b/ext/RastersGRIBDatasetsExt/RastersGRIBDatasetsExt.jl @@ -1,32 +1,12 @@ module RastersGRIBDatasetsExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, GRIBDatasets, CommonDataModel -else - using ..Rasters, ..GRIBDatasets, ..CommonDataModel -end - -import DiskArrays, - FillArrays, - Extents, - GeoInterface, - Missings +using Rasters +using GRIBDatasets -using Dates, - DimensionalData, - GeoFormatTypes - -using Rasters.Lookups -using Rasters.Dimensions using Rasters: GRIBsource -using CommonDataModel: AbstractDataset - const RA = Rasters -const DD = DimensionalData -const DA = DiskArrays -const GI = GeoInterface -const LA = Lookups +const GDS = GRIBDatasets include("gribdatasets_source.jl") diff --git a/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl b/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl index 25df03172..a43292693 100644 --- a/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl +++ b/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl @@ -1,7 +1,5 @@ -const GDS = GRIBDatasets - function RA.OpenStack(fs::RA.FileStack{GRIBsource,K}) where K - RA.OpenStack{GRIBsource,K}(GDS.GRIBDataset(RA.filename(fs))) + RA.OpenStack{GRIBsource,K}(GDS.GRIBDataset(RA.filename(fs)), fs.mods) end # In GRIBDatasets, the file is open for reading the values and closed afterwards. @@ -13,9 +11,11 @@ function RA._open(f, ::GRIBsource, filename::AbstractString; write=false, kw...) RA._open(f, GRIBsource(), ds; kw...) end -# Hack to get the inner DiskArrays chunks as they are not exposed at the top level -RA._get_eachchunk(var::GDS.Variable) = DiskArrays.eachchunk(var.values) -RA._get_haschunks(var::GDS.Variable) = DiskArrays.haschunks(var.values) - RA._sourcetrait(::GDS.Variable) = GRIBsource() RA._sourcetrait(::GDS.GRIBDataset) = GRIBsource() + +function RA.missingval(var::GDS.Variable{T}, args...) where T + mv = GDS.missing_value(var) + T1 = promote_type(typeof(mv), T) + return T1(mv) +end diff --git a/ext/RastersMakieExt/RastersMakieExt.jl b/ext/RastersMakieExt/RastersMakieExt.jl index 0c71302e7..4ddae96c3 100644 --- a/ext/RastersMakieExt/RastersMakieExt.jl +++ b/ext/RastersMakieExt/RastersMakieExt.jl @@ -1,10 +1,7 @@ module RastersMakieExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Makie, Rasters -else - using ..Makie, ..Rasters -end +using Makie +using Rasters using Rasters.DimensionalData using Rasters.Dimensions diff --git a/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl b/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl index c9d475ec6..8513b5c27 100644 --- a/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl +++ b/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl @@ -1,31 +1,23 @@ module RastersNCDatasetsExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, NCDatasets, CommonDataModel -else - using ..Rasters, ..NCDatasets, ..CommonDataModel -end - -import DiskArrays, - FillArrays, - Extents, - GeoInterface, - Missings +using Rasters +using NCDatasets +using CommonDataModel +using Dates +using DimensionalData -using Dates, - DimensionalData, - GeoFormatTypes +import Missings using Rasters.Lookups using Rasters.Dimensions -using Rasters: CDMsource, NCDsource, nokw, NoKW +using Rasters: CDMsource, NCDsource, NoKW, nokw, isnokw using CommonDataModel: AbstractDataset +const NCD = NCDatasets +const CDM = CommonDataModel const RA = Rasters const DD = DimensionalData -const DA = DiskArrays -const GI = GeoInterface const LA = Lookups include("ncdatasets_source.jl") diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 193a00fe3..6296bb86f 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -1,6 +1,3 @@ -const NCD = NCDatasets - - const NCDAllowedType = Union{Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float32,Float64,Char,String} function RA._check_allowed_type(::RA.NCDsource, eltyp) @@ -10,7 +7,8 @@ function RA._check_allowed_type(::RA.NCDsource, eltyp) """ )) end -function Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster; + +function Base.write(filename::AbstractString, source::NCDsource, A::AbstractRaster; append=false, force=false, kw... @@ -21,21 +19,21 @@ function Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster; RA.check_can_write(filename, force) "c" end - mode = !isfile(filename) || !append ? "c" : "a"; ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(A))) try - RA._writevar!(ds, A; kw...) + RA._writevar!(ds, source, A; kw...) finally close(ds) end return filename end -function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStack; +function Base.write(filename::AbstractString, source::Source, s::AbstractRasterStack{K,T}; append=false, force=false, missingval=nokw, + f=identity, kw... -) +) where {Source<:NCDsource,K,T} mode = if append isfile(filename) ? "a" : "c" else @@ -43,22 +41,23 @@ function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStac "c" end ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(s))) + missingval = RA._stack_nt(s, isnokw(missingval) ? Rasters.missingval(s) : missingval) try - if missingval isa NamedTuple - map(k -> RA._writevar!(ds, s[k]; missingval=missingval[k], kw...), keys(s)) - else - map(k -> RA._writevar!(ds, s[k]; missingval, kw...), keys(s)) + map(keys(s)) do k + RA._writevar!(ds, source, s[k]; missingval=missingval[k], kw...) end + f(RA.OpenStack{Source,K,T}(ds)) finally close(ds) end return filename end +Base.close(os::RA.OpenStack{NCDsource}) = NCD.close(RA.dataset(os)) + function RA.OpenStack(fs::RA.FileStack{NCDsource,K}) where K - RA.OpenStack{NCDsource,K}(NCD.Dataset(RA.filename(fs))) + RA.OpenStack{NCDsource,K}(NCD.Dataset(RA.filename(fs)), fs.mods) end -Base.close(os::RA.OpenStack{NCDsource}) = NCD.close(RA.dataset(os)) function RA._open(f, ::NCDsource, filename::AbstractString; write=false, kw...) isfile(filename) || RA._isurl(filename) || RA._filenotfound_error(filename) @@ -68,14 +67,38 @@ function RA._open(f, ::NCDsource, filename::AbstractString; write=false, kw...) end end - -# Hack to get the inner DiskArrays chunks as they are not exposed at the top level -RA._get_eachchunk(var::NCD.Variable) = DiskArrays.eachchunk(var) -RA._get_haschunks(var::NCD.Variable) = DiskArrays.haschunks(var) - RA._sourcetrait(::NCD.Dataset) = NCDsource() RA._sourcetrait(::NCD.Variable) = NCDsource() +@inline function RA.get_scale(metadata::Metadata{NCDsource}, scaled::Bool) + scale = scaled ? get(metadata, "scale_factor", nothing) : nothing + offset = scaled ? get(metadata, "add_offset", nothing) : nothing + return scale, offset +end + +RA.missingval(var::NCD.Variable, args...) = + RA.missingval(RA.Metadata{NCDsource}(CDM.attribs(var))) +RA.missingval(var::NCD.Variable, md::RA.Metadata{<:NCDsource}) = RA.missingval(md) + +function RA.missingval(md::RA.Metadata{NCDsource}) + # TODO: handle multiple missing values + fv = get(md, "_FillValue", nothing) + mv = get(md, "missing_value", nothing) + if isnothing(fv) + if mv isa Vector + length(mv) > 1 && @warn "'missing_value' $mv has multiple values. Currently we only uses the first." + return first(mv) + else + return mv + end + else + if !isnothing(mv) + fv == mv || @warn "Both '_FillValue' $fv and 'missing_value' $mv were found. Currently we only use the first." + end + return fv + end +end + # precompilation # const _NCDVar = NCDatasets.CFVariable{Union{Missing, Float32}, 3, NCDatasets.Variable{Float32, 3, NCDatasets.NCDataset}, NCDatasets.Attributes{NCDatasets.NCDataset{Nothing}}, NamedTuple{(:fillvalue, :scale_factor, :add_offset, :calendar, :time_origin, :time_factor), Tuple{Float32, Nothing, Nothing, Nothing, Nothing, Nothing}}} diff --git a/ext/RastersRasterDataSourcesExt/RastersRasterDataSourcesExt.jl b/ext/RastersRasterDataSourcesExt/RastersRasterDataSourcesExt.jl index 126916606..20beabb45 100644 --- a/ext/RastersRasterDataSourcesExt/RastersRasterDataSourcesExt.jl +++ b/ext/RastersRasterDataSourcesExt/RastersRasterDataSourcesExt.jl @@ -1,12 +1,7 @@ module RastersRasterDataSourcesExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, RasterDataSources -else - using ..Rasters, ..RasterDataSources -end +using Rasters, RasterDataSources -# using RasterDataSources: RasterDataSource using Rasters.Lookups using Rasters.Dimensions diff --git a/ext/RastersZarrDatasetsExt/RastersZarrDatasetsExt.jl b/ext/RastersZarrDatasetsExt/RastersZarrDatasetsExt.jl index 49f1647c9..bb44066aa 100644 --- a/ext/RastersZarrDatasetsExt/RastersZarrDatasetsExt.jl +++ b/ext/RastersZarrDatasetsExt/RastersZarrDatasetsExt.jl @@ -1,30 +1,12 @@ module RastersZarrDatasetsExt -using Rasters, CommonDataModel -using ZarrDatasets: ZarrDatasets as ZD - -import DiskArrays, - FillArrays, - Extents, - GeoInterface, - Missings - -using Dates, - DimensionalData, - GeoFormatTypes +using Rasters +using ZarrDatasets -using Rasters.Lookups -using Rasters.Dimensions +using ZarrDatasets: ZarrDatasets as ZD using Rasters: Zarrsource -using ZarrDatasets -using CommonDataModel: AbstractDataset, CommonDataModel as CDM - const RA = Rasters -const DD = DimensionalData -const DA = DiskArrays -const GI = GeoInterface -const LA = Lookups include("zarrdatasets_source.jl") diff --git a/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl b/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl index 5bf4f8f9e..ffe723686 100644 --- a/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl +++ b/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl @@ -1,20 +1,41 @@ function RA.OpenStack(fs::RA.FileStack{Zarrsource,K}) where K - RA.OpenStack{Zarrsource,K}(ZD.ZarrDataset(RA.filename(fs))) + RA.OpenStack{Zarrsource,K}(ZD.ZarrDataset(RA.filename(fs)), fs.mods) end # In ZarrDatasets, the file is open for reading the values and closed afterwards. Base.close(os::RA.OpenStack{Zarrsource}) = nothing function RA._open(f, ::Zarrsource, filename::AbstractString; write=false, kw...) - #isfile(filename) || RA._filenotfound_error(filename) ds = ZarrDatasets.ZarrDataset(filename) RA._open(f, Zarrsource(), ds; kw...) end - -# Hack to get the inner DiskArrays chunks as they are not exposed at the top level -RA._get_eachchunk(var::ZD.ZarrVariable) = DiskArrays.eachchunk(var.zarray) -RA._get_haschunks(var::ZD.ZarrVariable) = DiskArrays.haschunks(var.zarray) - RA._sourcetrait(::ZD.ZarrVariable) = Zarrsource() RA._sourcetrait(::ZD.ZarrDataset) = Zarrsource() + +RA.missingval(var::ZD.ZarrVariable, args...) = RA.missingval(RA.Metadata{Zarrsource}(CDM.attribs(var))) +RA.missingval(var::ZD.ZarrVariable, md::RA.Metadata{<:Zarrsource}) = RA.missingval(md) + +@inline function RA.get_scale(metadata::RA.Metadata{<: Zarrsource}, scaled::Bool) + scale = scaled ? get(metadata, "scale_factor", nothing) : nothing + offset = scaled ? get(metadata, "add_offset", nothing) : nothing + return scale, offset +end +# TODO: handle multiple missing values +function RA.missingval(md::RA.Metadata{<:Zarrsource}) + fv = get(md, "_FillValue", nothing) + mv = get(md, "missing_value", nothing) + if isnothing(fv) + if mv isa Vector + length(mv) > 1 && @warn "'missing_value' $mv has multiple values. Currently we only uses the first." + return first(mv) + else + return mv + end + else + if !isnothing(mv) + fv == mv || @warn "Both '_FillValue' $fv and 'missing_value' $mv were found. Currently we only use the first." + end + return fv + end +end diff --git a/src/Rasters.jl b/src/Rasters.jl index 94af83dfe..e660e7c86 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -101,6 +101,7 @@ include("methods/shared_docstrings.jl") include("lookup.jl") include("dimensions.jl") include("sources/sources.jl") +include("modifieddiskarray.jl") include("filearray.jl") include("filestack.jl") include("openstack.jl") diff --git a/src/array.jl b/src/array.jl index 92f5d52e7..4e75fb72e 100644 --- a/src/array.jl +++ b/src/array.jl @@ -10,14 +10,14 @@ wish to disable memory checks. This setting can be overridden with the `checkmem` keyword, where applicable. """ -function checkmem!(checkmem::Bool) +function checkmem!(checkmem::Bool) !checkmem || @warn "Setting `checkmem` to `false` globally may lead to out-of-memory errors or system crashes" CHECKMEM[] = checkmem return checkmem end const FLATTEN_SELECT = FileArray -const FLATTEN_IGNORE = Union{Dict,Set,Base.MultiplicativeInverses.SignedMultiplicativeInverse} +const FLATTEN_IGNORE = Union{Dict,Set,Base.MultiplicativeInverses.SignedMultiplicativeInverse,Array} """ AbstractRaster <: DimensionalData.AbstractDimArray @@ -42,7 +42,7 @@ abstract type AbstractRaster{T,N,D,A} <: AbstractDimArray{T,N,D,A} end Returns the value representing missing data in the dataset """ function missingval end -missingval(_) = missing +missingval(_) = nothing missingval(::AbstractArray{T}) where T = Missing <: T ? missing : nothing missingval(A::AbstractRaster) = A.missingval @@ -93,8 +93,7 @@ function DD.rebuild(A::AbstractRaster; end function DD.modify(f, A::AbstractRaster) - # Have to avoid calling `open` on CFDiskArray - newdata = if isdisk(A) && !(parent(A) isa CFDiskArray) + newdata = if isdisk(A) # TODO may have to avoid calling `open` on DiskArray open(A) do O f(parent(O)) end @@ -193,12 +192,12 @@ end Raster(A::AbstractDimArray; kw...) Raster(A::AbstractArray, dims; kw...) -A generic [`AbstractRaster`](@ref) for spatial/raster array data. It can hold -either memory-backed arrays or, if `lazy=true`, a [`FileArray`](@ref), -which stores the `String` path to an unopened file. +A generic [`AbstractRaster`](@ref) for spatial/raster array data. It can hold +either memory-backed arrays or, if `lazy=true`, a [`FileArray`](@ref), +which stores the `String` path to an unopened file. -If `lazy=true`, the file will only be opened lazily when it is indexed with `getindex` -or when `read(A)` is called. Broadcasting, taking a view, reversing, and most other +If `lazy=true`, the file will only be opened lazily when it is indexed with `getindex` +or when `read(A)` is called. Broadcasting, taking a view, reversing, and most other methods will _not_ load data from disk; they will be applied later, lazily. # Arguments @@ -207,28 +206,20 @@ methods will _not_ load data from disk; they will be applied later, lazily. # Keywords -- `name`: a `Symbol` name for the array, which will also retrieve the, alphabetically first, - named layer if `Raster` is used on a multi-layered file like a NetCDF. - If instead `RasterStack` is used to read the multi-layered file, by default, all variables - will be added to the stack. -$GROUP_KEYWORD -- `missingval`: value reprsenting missing data, normally detected from the file. Set manually - when you know the value is not specified or is incorrect. This will *not* change any - values in the raster, it simply assigns which value is treated as missing. To replace all of - the missing values in the raster, use [`replace_missing`](@ref). -- `metadata`: `Dict` or `Metadata` object for the array, or `NoMetadata()`. -$CONSTRUCTOR_CRS_KEYWORD -$CONSTRUCTOR_MAPPEDCRS_KEYWORD -- `refdims`: `Tuple of` position `Dimension`s the array was sliced from, defaulting to `()`. - Usually not needed. +$NAME_KEYWORD +$GROUP_KEYWORD +$MISSINGVAL_KEYWORD +$METADATA_KEYWORD +$CONSTRUCTOR_CRS_KEYWORD +$CONSTRUCTOR_MAPPEDCRS_KEYWORD +$REFDIMS_KEYWORD When a filepath `String` is used: $DROPBAND_KEYWORD $LAZY_KEYWORD -$REPLACE_MISSING_KEYWORD $SOURCE_KEYWORD -- `write`: defines the default `write` keyword value when calling `open` on the Raster. `false` by default. - Only makes sense to use when `lazy=true`. +$SCALED_KEYWORD +$RAW_KEYWORD When A is an `AbstractDimArray`: - `data`: can replace the data in an existing `AbstractRaster` @@ -248,6 +239,7 @@ struct Raster{T,N,D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me,Mi<:Union{T,Noth new{T,N,D,R,A,Na,Me,typeof(missingval1)}(data, dims, refdims, name, metadata, missingval1) end end +# Create a Raster from and AbstractArray and dims function Raster(A::AbstractArray{T,N}, dims::Tuple; refdims=(), name=Symbol(""), @@ -261,12 +253,15 @@ function Raster(A::AbstractArray{T,N}, dims::Tuple; A = isnokw(mappedcrs) ? A : setmappedcrs(A, mappedcrs) return A end +# Create a Raster from and AbstractVector and dims, +# reshaping the Vector to match the dimensions function Raster(A::AbstractArray{T,1}, dims::Tuple{<:Dimension,<:Dimension,Vararg}; kw... )::Raster{T,length(dims)} where T Raster(reshape(A, map(length, dims)), dims; kw...) end Raster(A::AbstractArray{<:Any,1}, dim::Dimension; kw...) = Raster(A, (dim,); kw...) +# Load a Raster from a table function Raster(table, dims::Tuple; name=nokw, kw... @@ -277,7 +272,9 @@ function Raster(table, dims::Tuple; A = reshape(cols[name], map(length, dims)) return Raster(A, dims; name, kw...) end +# Load a Raster from another AbstractArray with `dims` as keyword Raster(A::AbstractArray; dims, kw...) = Raster(A, dims; kw...)::Raster +# Load a Raster from another AbstractDimArray function Raster(A::AbstractDimArray; data=parent(A), dims=dims(A), @@ -289,20 +286,23 @@ function Raster(A::AbstractDimArray; )::Raster return Raster(data, dims; refdims, name, metadata, missingval, kw...) end +# Load a Raster from a string filename and predefined dimensions function Raster(filename::AbstractString, dims::Tuple{<:Dimension,<:Dimension,Vararg}; kw... )::Raster Raster(filename; dims, kw...) end -function Raster(filename::AbstractString; - source=nokw, +# Load a Raster from a string filename +function Raster(filename::AbstractString; + source=nokw, kw... ) source = _sourcetrait(filename, source) - _open(filename; source) do ds + _open(filename; source, mod=NoMod()) do ds Raster(ds, filename; source, kw...) end::Raster end +# Load a Raster from an opened Dataset function Raster(ds, filename::AbstractString; dims=nokw, refdims=(), @@ -313,74 +313,62 @@ function Raster(ds, filename::AbstractString; crs=nokw, mappedcrs=nokw, source=nokw, - replace_missing=false, - write=false, - lazy=false, - dropband=true, - checkmem=CHECKMEM[], + replace_missing=nokw, + coerce=convert, + scaled::Union{Bool,NoKW}=nokw, + verbose::Bool=true, + write::Bool=false, + lazy::Bool=false, + dropband::Bool=true, + checkmem::Bool=CHECKMEM[], + raw::Bool=false, + mod=nokw, + f=identity, )::Raster + _maybe_warn_replace_missing(replace_missing) + # `raw` option will ignore `scaled` and `missingval` + scaled, missingval = _raw_check(raw, scaled, missingval, verbose) + # TODO use a clearer name for this name1 = filekey(ds, name) + # Detect the source from filename source = _sourcetrait(filename, source) - data1, dims1, metadata1, missingval1 = _open(source, ds; name=name1, group) do var - metadata1 = isnokw(metadata) ? _metadata(var) : metadata - missingval1 = _fix_missingval(var, missingval) - rm = replace_missing && !isnothing(missingval1) - missingval2 = rm ? missing : missingval1 - data = if lazy - A = FileArray{typeof(source)}(var, filename; name=name1, group, write) - rm ? _replace_missing(A, missingval1) : A - else - checkmem && _checkobjmem(var) - x = Array(rm ? _replace_missing(var, missingval1) : var) - x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug - end - dims1 = isnokw(dims) ? _dims(var, crs, mappedcrs) : format(dims, data) - data, dims1, metadata1, missingval2 - end - name2 = name1 isa Union{NoKW,Nothing} ? Symbol("") : Symbol(name1) - raster = Raster(data1, dims1, refdims, name2, metadata1, missingval1) - return dropband ? _drop_single_band(raster, lazy) : raster -end - -_fix_missingval(::Type, ::Union{NoKW,Nothing}) = nothing -_fix_missingval(::AbstractArray, ::Nothing) = nothing -_fix_missingval(A::AbstractArray, ::NoKW) = _fix_missingval(A, Rasters.missingval(A)) -_fix_missingval(::AbstractArray{T}, missingval) where T = _fix_missingval(T, missingval) -function _fix_missingval(::Type{T}, missingval::M) where {T,M} - T1 = nonmissingtype(T) - if missingval isa T - missingval - elseif hasmethod(convert, Tuple{Type{T1},M}) && isreal(missingval) && - missingval <= typemax(T1) && missingval >= typemin(T1) - if T1 <: Integer && !isinteger(missingval) - nothing + # Open the dataset and variable specified by `name`, at `group` level if provided + # At this level we do not apply `mod`. + data_out, dims_out, metadata_out, missingval_out = _open(source, ds; name=name1, group, mod=NoMod()) do var + metadata_out = isnokw(metadata) ? _metadata(var) : metadata + missingval_out = _read_missingval_pair(var, metadata_out, missingval) + # Generate mod for scaling + mod = isnokw(mod) ? _mod(eltype(var), metadata_out, missingval_out; scaled, coerce) : mod + # Define or load the data array + data_out = if lazy + # Define a lay FileArray + FileArray{typeof(source)}(var, filename; + name=name1, group, mod, write + ) else - convert(T, missingval) + modvar = _maybe_modify(var, mod) + # Check the data will fit in memory + checkmem && _checkobjmem(modvar) + # Move the modified array to memory + Array(modvar) end - else - nothing + # Generate dims + dims_out = isnokw(dims) ? _dims(var, crs, mappedcrs) : format(dims, data_out) + # Return the data to the parent function + mv_outer = _outer_missingval(mod) + data_out, dims_out, metadata_out, mv_outer end -end - -function _replace_missing(A::AbstractArray{T}, missingval) where T - repmissing(x) = isequal(x, missingval) ? missing : x - return repmissing.(A) + # Use name or an empty Symbol + name_out = name1 isa Union{NoKW,Nothing} ? Symbol("") : Symbol(name1) + # Define the raster + raster = Raster(data_out, dims_out, refdims, name_out, metadata_out, missingval_out) + # Maybe drop a single band dimension + return _maybe_drop_single_band(raster, dropband, lazy) end filekey(ds, name) = name filekey(filename::String) = Symbol(splitext(basename(filename))[1]) -DD.dimconstructor(::Tuple{<:Dimension{<:AbstractProjected},Vararg{Dimension}}) = Raster - - -function _drop_single_band(raster, lazy::Bool) - if hasdim(raster, Band()) && size(raster, Band()) < 2 - if lazy - return view(raster, Band(1)) # TODO fix dropdims in DiskArrays - else - return dropdims(raster; dims=Band()) - end - else - return raster - end -end +# Add a `dimconstructor` method so `AbstractProjected` lookups create a Raster +# TODO this should be unwrapped to `DD.lookupconstructor` to avoid future ambiguities +DD.dimconstructor(::Tuple{<:Dimension{<:AbstractProjected},Vararg{Dimension}}) = Raster \ No newline at end of file diff --git a/src/create.jl b/src/create.jl index f0f66472c..25cdba8ae 100644 --- a/src/create.jl +++ b/src/create.jl @@ -1,41 +1,358 @@ +const TypeNamedTuple = NamedTuple{<:Any,<:Tuple{Vararg{Type}}} +""" + create([f!], [filename], template; kw...) -create(filename, A::AbstractRaster{T}; kw...) where T = create(filename, T, A; kw...) -function create(filename, T, A::AbstractRaster; - name=name(A), metadata=metadata(A), missingval=missingval(A), kw... +Create a new, uninitialised [`Raster`](@ref) or [`RasterStack`](@ref). + +If `filename` is a `String` it will be created on disk, and opened lazily. +If it is `nothing` or not passed, an in-memory `Raster` will be created. + +If type is a `Type` return value is a `Raster`. The `eltype` will usually be `T`, except +where `scale` and/or `offset` keywords are used or a `missingval` of a different type is specified, +in which case `T` will depend on the type promotion of `scale`, `offset` and `missingval` with `T`. +If `missingval` is a `Pair` of `on_disk_missingval => user_facing_missingval`, the user facing value +will effect `T`, not the internal on-disk value. + +If types is a `NamedTuple` of types, the result will be a `RasterStack`. In this case `fill` and +`missingval` can be single values (for all layers) or `NamedTuple` with the same names to specify per-layer. + +`f!` will be applied to the `Raster` or `RasterStack` while it is stil open after creation, +to avoid opening it twice. The return value of `f!` is disregarded but modifications +to the `Raster` or the `RasterStack` layers will be written to disk or changed in memory. + +## Arguments + +- `filename`: a String file path, which will create a file on disk and return it as + a lazy `Raster`, or `nothing` to create an in-memory `Raster`. +- `template`: a `Raster`, `Tuple` of `Dimension` or `Extents.Extent` to use as a template. + If an `Extent` is used, a `size` or `res` keyword must be passed. + If a `T` argument is not used, it is taken from the `template` eltype. +- `type`: the element type to use in the created array. A `NamedTuple` of types + will create a `RasterStack` + +## Keywords + +$NAME_KEYWORD +$REFDIMS_KEYWORD +$METADATA_KEYWORD +$WRITE_MISSINGVAL_KEYWORD +- `fill`: A value to fill the array with, before `scale` and `offset` are applied. + If there is no `fill`, raster values may remain undefined. They may be set to + `missingval` on disk, but this is not guaranteed. It us often more efficient to + use `fill` than to fill manually after `create`. +$SOURCE_KEYWORD +- `lazy`: A `Bool` specifying if to load data lazily from disk. For `create` + `lazy=true` is the default, as creating a disk-based file is normally associated + with it being larger than memory. +$CHUNKS_KEYWORD +$SCALE_KEYWORD +$OFFSET_KEYWORD +$COERCE_KEYWORD +$VERBOSE_KEYWORD +$RES_KEYWORD +$SIZE_KEYWORD +$CRS_KEYWORD +$CHUNKS_KEYWORD +- `reverse_y`: often we want to write `Y` dimensions in reverse. + When building dimensions from an `Extents.Extent` and `size` or `res` we can do this by + using `reverse_y=true`. Using a negative value in `res` will acheive the same result. + With a template `Raster` or a `Tuple` of `Dimension`, the existing order is used. + +## Example + +Here we create a UInt8 GeoTIFF and open it as a Raster, from -80 to 80 +lattitude, and 0 to 120 longitude, with a resolution of 0.25 degrees. + +We scale values from 0-1 over `UInt8` 0-200, and using `255`. +Values that don't convert exactly will error (we could use `coerce=trunc` to fix that). + +We use `UInt8(255)` as the `missingval` on disk, but mask it with `missing` in +the loaded `Raster`. + +We use standard lat/lon (EPSG:4326) as the crs, and force writing if the file exists. + +```julia +using Rasters, NCDatasets, ArchGDAL, Extents, Dates +using Rasters.Lookups +rast = Rasters.create("created.tif", UInt8, Extents.Extent(X=(0, 120), Y=(-80, 80), Band=(0, 12)); + res=(X=10.0, Y=10.0, Band=1), + # size=(X=100, Y=100, Band=12), + name=:myraster, + crs=EPSG(4326), + force=true, + fill=0x01, + sampling=(X=Intervals(Start()), Y=Intervals(Start()), Band=Intervals(Start())), +) do A + # While we have the newly created raster open, we can write to it + A[X=1:10, Y=1:10] .= 0xff +end + +read(rast) +``` + +We can also create a `RasterStack` by passing a `NamedTuple` of types: + +```julia +ext = Extents.Extent(X=(0, 120), Y=(-80, 80))#, Band=(1, 3)) +types = (a=UInt8, b=Int32, c=Float64) +rast = Rasters.create("created.nc", types, ext; + # res=(X=1.0, Y=1.0, Band=1), + size=(X=100, Y=100), + crs=EPSG(4326), + force=true, + # sampling=(X=Intervals(Start()), Y=Intervals(Start()), Band=Points()), +end + +RasterStack("created.nc") + +╭───────────────────────────────────────────╮ +│ 480×640 Raster{Union{Missing, Float64},2} │ +├───────────────────────────────────────────┴───────────────────────────────────────── dims ┐ + ↓ X Projected{Float64} LinRange{Float64}(0.0, 119.75, 480) ForwardOrdered Regular Points, + → Y Projected{Float64} LinRange{Float64}(79.75, -80.0, 640) ReverseOrdered Regular Points +├───────────────────────────────────────────────────────────────────────────────── metadata ┤ + Metadata{GDALsource} of Dict{String, Any} with 2 entries: + "filepath" => "created.tif" + "scale" => 0.005 +├─────────────────────────────────────────────────────────────────────────────────── raster ┤ + extent: Extent(X = (0.0, 119.75), Y = (-80.0, 79.75)) + missingval: missing + crs: GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199 +433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] + filename: nothing +└───────────────────────────────────────────────────────────────────────────────────────────┘ +``` +""" +# Create with a function that will be called to fill the raster +create(f::Function, args...; kw...) = create(args...; kw..., f) +# Create from Raster or RasterStack with no filename +create(A::Union{AbstractRaster,AbstractRasterStack}; kw...) = create(nothing, A; kw...) +# Create from type and Raster or RasterStack with no filename +create(T::Union{Type,TypeNamedTuple}, A::Union{Tuple,Extents.Extent,AbstractRaster,AbstractRasterStack}; kw...) = + create(nothing, T, A; kw...) +# Create from filename and Raster +function create(filename::Union{AbstractString,Nothing}, A::AbstractRaster{T}; + missingval=missingval(A), # Only take missingval here when types are not specified + kw... +) where T + create(filename, T, A; missingval, kw...) +end +# Create from filename and RasterStack +function create(filename::Union{AbstractString,Nothing}, st::AbstractRasterStack; + missingval=missingval(st), # Only take missingval here when types are not specified + kw... ) - create(filename, T, dims(A); parent=parent(A), name, metadata, missingval, kw...) -end -function create(filename::AbstractString, T::Type, dims::Tuple; - lazy=true, - parent=nothing, - suffix=nothing, - source::Source=_sourcetrait(filename), - missingval=nothing, kw... + create(filename, map(eltype, layers(st)), st; missingval, kw...) +end +# Create Raster from filename, type and a Raster, using its +# parent as the parent type so e.g. CuArray will propagate +create(filename::Union{AbstractString,Nothing}, T::Union{Type,TypeNamedTuple}, A::AbstractRaster; kw...) = + create(filename, T, dims(A); parent=parent(A), kw...) +# Create RasterStack from filename, NamedTuple type and dims +function create(filename::Union{AbstractString,Nothing}, T::NamedTuple{K1}, st::AbstractRasterStack{K2}; + metadata=metadata(st), + layerdims=nokw, + layermetadata=nokw, + kw... +) where {K1,K2} + if all(map(in(K2), K1)) + layerdims = isnokw(layerdims) ? DD.layerdims(st)[K1] : layerdims + layermetadata = isnokw(layermetadata) ? DD.layermetadata(st)[K1] : layermetadata + end + return create(filename, T, dims(st); + parent=first(parent(st)), metadata, missingval, layerdims, layermetadata, kw... + ) +end +# Create from filename, type and dims +function create(filename::AbstractString, T::Union{Type,NamedTuple}, dims::Tuple; + lazy=true, + parent=nokw, + suffix=nokw, + source::Source=_sourcetrait(filename), + missingval=nokw, + kw... ) filename = _maybe_add_suffix(filename, suffix) # This calls `create` in the /sources file for this `source` - create(filename, source, T, dims; lazy, missingval, kw...) + return create(filename, source, T, dims; lazy, missingval, kw...) end -function create(filename::Nothing, T::Type, dims::Tuple; - parent=nothing, - suffix=nothing, - missingval, +# Create from filename, type and extent with res or size keywords +function create(filename::Union{AbstractString,Nothing}, T::Union{Type,NamedTuple}, extent::Extents.Extent; + res=nokw, + size=nokw, + crs=nothing, + sampling=Points(), + reverse_y=nokw, kw... ) - T = isnothing(missingval) ? T : promote_type(T, typeof(missingval)) - data = isnothing(parent) ? Array{T}(undef, dims) : similar(parent, T, size(dims)) - Raster(data, dims; missingval, kw...) -end - -_maybe_add_suffix(filename::Nothing, suffix) = nothing -_maybe_add_suffix(filename::Nothing, suffix::Nothing) = nothing -_maybe_add_suffix(filename, suffix::Nothing) = filename -function _maybe_add_suffix(filename, suffix) - base, ext = splitext(filename) - if string(suffix) == "" - filename + dims = _extent2dims(extent; size, res, crs, sampling) + dims = if reverse_y isa Bool && reverse_y && hasdim(ds, Y()) + DD.setdims(ds, reverse(DD.dims(ds, Y()))) else - return string(base, "_", suffix, ext) + dims end + return create(filename, T, dims; kw...) end +# Create in-memory Raster from type and dims +function create(filename::Nothing, ::Type{T}, dims::Tuple; + missingval=nokw, + fill=nokw, + parent=nokw, + verbose=true, + f=identity, + # Not used but here for consistency + suffix=nokw, + force=false, + chunks=nokw, + driver=nokw, + options=nokw, + kw... +) where T + if verbose + isnokw(chunks) || _warn_keyword_not_used("chunks", chunks) + isnokw(driver) || _warn_keyword_not_used("driver", driver) + isnokw(options) || _warn_keyword_not_used("options", options) + end + # Split inner and outer missingval if needed + # For in-memory rasters we just ignore the inner value + mv_inner, mv_outer = missingval isa Pair ? missingval : (missingval, missingval) + # Get the element type from T and outer missingval + eltype = isnokwornothing(mv_outer) ? T : promote_type(T, typeof(mv_outer)) + # Create the array + data = if isnokwornothing(parent) + Array{eltype}(undef, dims) + else + similar(parent, eltype, size(dims)) + end + # Maybe fill the array + isnokwornothing(fill) || fill!(data, fill) + # Wrap as a Raster + rast = Raster(data, dims; missingval=mv_outer, kw...) + # Apply `f` before returning + f(rast) + return rast +end +# Create in-memory RasterStack from type and dims +function create(filename::Nothing, types::NamedTuple, dims::Tuple; + suffix=keys(types), + force=false, + chunks=nokw, + verbose=true, + driver=nokw, + options=nokw, + parent=nokw, + missingval=nokw, + fill=nokw, + layerdims=nokw, + layermetadata=nokw, + f=identity, + kw... +) + layerdims = isnokw(layerdims) ? map(_ -> basedims(dims), types) : layerdims + layermetadata = layermetadata isa NamedTuple ? layermetadata : map(_ -> layermetadata, types) + layerfill = fill isa NamedTuple ? fill : map(_ -> fill, types) + layermissingvals = missingval isa NamedTuple ? missingval : map(_ -> missingval, types) + layers = map(types, layermissingvals, layerfill, layerdims, layermetadata) do T, lmv, lfv, ld, lm + create(nothing, T, DD.dims(dims, ld); + parent, missingval=lmv, fill=lfv, metadata=lm, driver, options, + ) + end + st = RasterStack(layers; kw...) + f(st) + return st +end +# Create on-disk Raster from filename, source, type and dims +function create(filename::AbstractString, source::Source, ::Type{T}, dims::Tuple; + name=nokw, + missingval=nokw, + fill=nokw, + metadata=nokw, + chunks=nokw, + scale=nokw, + offset=nokw, + dropband=!hasdim(dims, Band()), + lazy=true, + verbose=true, + force=false, + coerce=nokw, + f=identity, + kw... +) where T + mv_inner, mv_outer = _missingval_pair(missingval) + eltype = Missings.nonmissingtype(T) + if isnokw(fill) || isnothing(fill) + write = false # Leave fill undefined + A = FillArrays.Zeros{eltype}(map(length, dims)) + else + fill isa eltype || throw(ArgumentError("fill must be of type $eltype, got $fill")) + write = true # Write fill to disk + A = FillArrays.Fill{eltype}(fill, map(length, dims)) + end + # Create layers of zero arrays + rast = Raster(A, dims; name, missingval=mv_inner) + Rasters.write(f, filename, source, rast; + eltype, chunks, metadata, scale, offset, missingval, verbose, force, coerce, write, kw... + ) do W + # write returns a variable, wrap it as a Raster + f(rebuild(rast; data=W)) + end + # Don't pass in `missingval`, read it again from disk in case it changed + r = Raster(filename; source, lazy, metadata, dropband, coerce, missingval=mv_outer) + return r +end +# Create on-disk RasterStack from filename, source, type and dims +function create(filename::AbstractString, source::Source, layertypes::NamedTuple, dims::Tuple; + lazy=true, + verbose=true, + force=false, + missingval=nokw, + fill=nokw, + metadata=nokw, + layerdims=nokw, + layermetadata=nokw, + chunks=nokw, + scale=nokw, + offset=nokw, + dropband=!hasdim(dims, Band), + coerce=nokw, + f=identity, + kw... +) + layerdims = if isnokwornothing(layerdims) + # Use the same dims for all layers by default + map(_ -> DD.basedims(dims), layertypes) + else + layerdims + end + mv_inner, mv_outer = _missingval_pair(missingval) + # Only write value to disk variables if fill is defined + write = !isnokwornothing(fill) + # Make sure fill is per-layer + fill_nt = fill isa NamedTuple ? fill : map(_ -> fill, layertypes) + # Define no-allocation layers with FillArrays + layers = map(layertypes, layerdims, fill_nt) do T, ld, fi + lks = lookup(dims, ld) + eltype = Missings.nonmissingtype(T) + size = map(length, lks) + if isnokwornothing(fi) + A = FillArrays.Zeros{eltype}(size) + else + A = FillArrays.Fill{eltype}(fi, size) + end + end + # Create layers of zero arrays + st1 = RasterStack(layers, dims; layerdims, layermetadata, missingval=mv_inner) + fn = Rasters.write(filename, st1; + chunks, metadata, scale, offset, missingval=mv_inner, verbose, force, coerce, write, kw... + ) do W + # write returns a variable, wrap it as a RasterStack + f(rebuild(st1; data=W)) + end + st2 = RasterStack(fn; source, lazy, metadata, layerdims, dropband, coerce, missingval=mv_outer) + return st2 +end + +_warn_keyword_not_used(label, obj) = @warn "`$label` of `$obj` found. But `chunks` are not used for in-memory rasters" +_missingval_pair(missingval::Pair) = missingval +_missingval_pair(missingval) = missingval => missingval \ No newline at end of file diff --git a/src/extensions.jl b/src/extensions.jl index 30f31fdd1..61d90a8e8 100644 --- a/src/extensions.jl +++ b/src/extensions.jl @@ -114,8 +114,12 @@ Run `using ArchGDAL` to make this method available. # Keywords +$MISSINGVAL_KEYWORD $FILENAME_KEYWORD $SUFFIX_KEYWORD +- `missingval`: the missing value to use during warping, will default to + `Rasters.missingval(A). Passing a pair will specify the missing value + to use after warping. Any additional keywords are passed to `ArchGDAL.Dataset`. @@ -125,7 +129,7 @@ This simply resamples the array with the `:tr` (output file resolution) and `:r` flags, giving us a pixelated version: ```jldoctest -using Rasters, RasterDataSources, Plots +using Rasters, ArchGDAL, RasterDataSources, Plots A = Raster(WorldClim{Climate}, :prec; month=1) a = plot(A) @@ -178,7 +182,7 @@ where each value in the raster encodes the area of the cell (in meters by defaul ## Example ```julia -using Rasters, ArchGDAL, Rasters.Lookups +using Rasters, Proj, Rasters.Lookups xdim = X(Projected(90.0:10.0:120; sampling=Intervals(Start()), crs=EPSG(4326))) ydim = Y(Projected(0.0:10.0:50; sampling=Intervals(Start()), crs=EPSG(4326))) myraster = rand(xdim, ydim) @@ -261,7 +265,7 @@ using Rasters, Rasters.Lookups, Proj, StatsBase xdim = X(Projected(90.0:10.0:120; sampling=Intervals(Start()), crs=EPSG(4326))) ydim = Y(Projected(0.0:10.0:50; sampling=Intervals(Start()), crs=EPSG(4326))) myraster = rand(xdim, ydim) -Rasters.sample(myraster, 5; weights = cellarea(myraster)) +Rasters.sample(myraster, 5; weights=cellarea(myraster)) # output diff --git a/src/filearray.jl b/src/filearray.jl index 500e2e151..a599731d1 100644 --- a/src/filearray.jl +++ b/src/filearray.jl @@ -1,4 +1,3 @@ - """ FileArray{S} <: DiskArrays.AbstractDiskArray @@ -6,38 +5,56 @@ Filearray is a DiskArrays.jl `AbstractDiskArray`. Instead of holding an open object, it just holds a filename string that is opened lazily when it needs to be read. """ -struct FileArray{S,T,N,Na,G,EC,HC} <: DiskArrays.AbstractDiskArray{T,N} +struct FileArray{S,T,N,Na,G,EC,HC,M<:AbstractModifications} <: DiskArrays.AbstractDiskArray{T,N} filename::String size::NTuple{N,Int} name::Na group::G eachchunk::EC haschunks::HC + mod::M write::Bool end function FileArray{S,T,N}( filename, - size, + size::NTuple{N,Int}, name::Na, - group::G=nothing, - eachchunk::EC=size, - haschunks::HC=DA.Unchunked(), - write=false -) where {S,T,N,Na,G,EC,HC} - FileArray{S,T,N,Na,G,EC,HC}(filename, size, name, group, eachchunk, haschunks, write) + group::G, + eachchunk::EC, + haschunks::HC, + mod::M, + write::Bool, +) where {S,T,N,Na,G,EC,HC,M} + FileArray{S,T,N,Na,G,EC,HC,M}( + String(filename), size, name, group, eachchunk, haschunks, mod, write + ) end -function FileArray{S,T,N}(filename::String, size::Tuple; - name=nokw, group=nokw, eachchunk=size, haschunks=DA.Unchunked(), write=false +function FileArray{S,T,N}(filename::AbstractString, size::Tuple; + name=nokw, + group=nokw, + eachchunk=size, + haschunks=DA.Unchunked(), + mod, + write=false ) where {S,T,N} name = isnokw(name) ? nothing : name group = isnokw(group) ? nothing : group - FileArray{S,T,N}(filename, size, name, group, eachchunk, haschunks, write) + FileArray{S,T,N}(filename, size, name, group, eachchunk, haschunks, mod, write) +end +function FileArray{S}( + var::AbstractArray{<:Any,N}, filename; mod, kw... +) where {S,N} + eachchunk = DA.eachchunk(var) + haschunks = DA.haschunks(var) + T = _mod_eltype(var, mod) + return FileArray{S,T,N}(filename, size(var); eachchunk, haschunks, mod, kw...) end # FileArray has S, T and N parameters not recoverable from fields ConstructionBase.constructorof(::Type{<:FileArray{S,T,N}}) where {S,T,N} = FileArray{S,T,N} filename(A::FileArray) = A.filename +mod(A::FileArray) = A.mod DD.name(A::FileArray) = A.name Base.size(A::FileArray) = A.size DA.eachchunk(A::FileArray) = A.eachchunk @@ -45,7 +62,7 @@ DA.haschunks(A::FileArray) = A.haschunks # Run function `f` on the result of _open for the file type function Base.open(f::Function, A::FileArray{S}; write=A.write, kw...) where S - _open(f, S(), filename(A); name=name(A), write, kw...) + _open(f, S(), filename(A); name=name(A), group=A.group, write, mod=mod(A), kw...) end function DA.readblock!(A::FileArray, dst, r::AbstractUnitRange...) @@ -88,9 +105,6 @@ DA.eachchunk(A::RasterDiskArray) = A.eachchunk DA.readblock!(A::RasterDiskArray, aout, r::AbstractUnitRange...) = aout .= parent(A)[r...] DA.writeblock!(A::RasterDiskArray, v, r::AbstractUnitRange...) = parent(A)[r...] .= v -# Already open, doesn't use `name` -_open(f, ::Source, A::RasterDiskArray; name=nokw, group=nokw) = f(A) - struct MissingDiskArray{T,N,V} <: DiskArrays.AbstractDiskArray{T,N} var::V end diff --git a/src/filestack.jl b/src/filestack.jl index 865864973..d943cc03e 100644 --- a/src/filestack.jl +++ b/src/filestack.jl @@ -9,24 +9,26 @@ typically netcdf or hdf5. `S` is a backend type like `NCDsource`, and `Na` is a tuple of `Symbol` keys. """ -struct FileStack{S,Na,T,SZ,G<:Union{AbstractString,Symbol,Nothing},EC,HC} +struct FileStack{S,Na,T,SZ,G<:Union{AbstractString,Symbol,Nothing},EC,HC,M} filename::String sizes::SZ group::G eachchunk::EC haschunks::HC + mods::M write::Bool end function FileStack{S,Na,T}( - filename::AbstractString, sizes::SZ, group::G, eachchunk::EC, haschunks::HC, write::Bool -) where {S,Na,T,SZ,G,EC,HC} - FileStack{S,Na,T,SZ,G,EC,HC}(String(filename), sizes, group, eachchunk, haschunks, write) + filename::AbstractString, sizes::SZ, group::G, eachchunk::EC, haschunks::HC, mods::M, write::Bool +) where {S,Na,T,SZ,G,EC,M,HC} + FileStack{S,Na,T,SZ,G,EC,HC,M}(String(filename), sizes, group, eachchunk, haschunks, mods, write) end # FileStack has `S,Na,T` parameters that are not recoverable from fields. ConstructionBase.constructorof(::Type{<:FileStack{S,Na,T}}) where {S,Na,T} = FileStack{S,Na,T} filename(fs::FileStack) = fs.filename +mods(fs::FileStack) = fs.mods DD.name(::FileStack{<:Any,Na}) where Na = Na DD.data_eltype(::FileStack{<:Any,<:Any,T}) where T = T @@ -43,8 +45,9 @@ function Base.getindex(fs::FileStack{S,Na,T}, name::Symbol) where {S,Na,T} size = fs.sizes[i] eachchunk = fs.eachchunk[i] haschunks = fs.haschunks[i] + mod = fs.mods[i] N = length(size) - return FileArray{S,_itype(T, i),N}(filename(fs), size, name, fs.group, eachchunk, haschunks, fs.write) + return FileArray{S,_itype(T, i),N}(filename(fs), size, name, fs.group, eachchunk, haschunks, mod, fs.write) end @inline _itype(::Type{<:NamedTuple{<:Any,T}}, i) where T = T.parameters[i] diff --git a/src/methods/burning/array_init.jl b/src/methods/burning/array_init.jl index 10ddd5937..a700a6454 100644 --- a/src/methods/burning/array_init.jl +++ b/src/methods/burning/array_init.jl @@ -1,5 +1,5 @@ -# Like `create` but without disk writes, mostly for Bool/Union{Missing,Boo}, +# Like `create` but without disk writes, mostly for Bool or Union{Missing,Bool}, # and uses `similar` where possible # TODO merge this with `create` somehow _init_bools(to; kw...) = _init_bools(to, BitArray; kw...) @@ -7,18 +7,30 @@ _init_bools(to, T::Type; kw...) = _init_bools(to, T, nothing; kw...) _init_bools(to::AbstractRasterSeries, T::Type, data; kw...) = _init_bools(first(to), T, data; kw...) _init_bools(to::AbstractRasterStack, T::Type, data; kw...) = _init_bools(first(to), T, data; kw...) _init_bools(to::AbstractRaster, T::Type, data; kw...) = _init_bools(to, dims(to), T, data; kw...) -_init_bools(to::Extents.Extent, T::Type, data; kw...) = _init_bools(to, _extent2dims(to; kw...), T, data; kw...) _init_bools(to::DimTuple, T::Type, data; kw...) = _init_bools(to, to, T, data; kw...) -function _init_bools(to::Nothing, T::Type, data; geometrycolumn=nothing,kw...) +function _init_bools(to::Nothing, T::Type, data; + geometrycolumn=nothing, + collapse=nokw, + res=nokw, + size=nokw, + kw... +) # Get the extent of the geometries - ext = _extent(data; geometrycolumn, kw...) + ext = _extent(data; geometrycolumn) isnothing(ext) && throw(ArgumentError("no recognised dimensions, extent or geometry")) + return _init_bools(ext, T, data; collapse, res, size, kw...) +end +function _init_bools(to::Extents.Extent, T::Type, data; + collapse=nokw, size=nokw, res=nokw, sampling=nokw, kw... +) # Convert the extent to dims (there must be `res` or `size` in `kw`) - dims = _extent2dims(ext; kw...) - return _init_bools(to, dims, T, data; kw...) + dims = _extent2dims(to; size, res, sampling, kw...) + _init_bools(to, dims, T, data; collapse, kw...) end -function _init_bools(to, dims::DimTuple, T::Type, data; collapse::Union{Bool,Nothing}=nothing, kw...) - if isnothing(data) || isnothing(collapse) || collapse +function _init_bools(to, dims::DimTuple, T::Type, data; + collapse::Union{Bool,Nothing,NoKW}=nokw, kw... +) + if isnothing(data) || isnokwornothing(collapse) || collapse _alloc_bools(to, dims, T; kw...) else n = if Base.IteratorSize(data) isa Base.HasShape @@ -31,13 +43,17 @@ function _init_bools(to, dims::DimTuple, T::Type, data; collapse::Union{Bool,Not end end -function _alloc_bools(to, dims::DimTuple, ::Type{BitArray}; missingval::Bool=false, metadata=NoMetadata(), kw...) +function _alloc_bools(to, dims::DimTuple, ::Type{BitArray}; + missingval::Bool=false, metadata=NoMetadata(), kw... +) # Use a BitArray vals = missingval == false ? falses(size(dims)) : trues(size(dims)) return Raster(vals, dims; missingval, metadata) end -function _alloc_bools(to, dims::DimTuple, ::Type{<:Array{T}}; missingval=false, metadata=NoMetadata(), kw...) where T - # Use an `Array` +function _alloc_bools(to, dims::DimTuple, ::Type{<:Array{T}}; + missingval=false, metadata=NoMetadata(), kw... +) where T + # Use an Array data = fill!(Raster{T}(undef, dims), missingval) return rebuild(data; missingval, metadata) end diff --git a/src/methods/classify.jl b/src/methods/classify.jl index be046ecb9..dbd501e4b 100644 --- a/src/methods/classify.jl +++ b/src/methods/classify.jl @@ -58,11 +58,9 @@ function classify(A::AbstractRaster, pairs::Union{Tuple,AbstractArray}; # We use `Val{T}` to force type stability through the closure valT = Val{T}() f(x) = _convert_val(valT, _classify(x, pairs, lower, upper, others, Rasters.missingval(A), missingval)) - A1 = create(filename, T, A; suffix, missingval) - open(A1; write=true) do O - broadcast!(f, O, A) + return create(filename, T, A; suffix, missingval) do C + broadcast!(f, C, A) end - return A1 end function classify(xs::AbstractRasterStack, pairs...; suffix=keys(xs), kw...) mapargs(xs, suffix) do x, s diff --git a/src/methods/crop_extend.jl b/src/methods/crop_extend.jl index aacb1920c..2026e8451 100644 --- a/src/methods/crop_extend.jl +++ b/src/methods/crop_extend.jl @@ -9,7 +9,7 @@ to match the size of the object `to`, or smallest of any dimensions that are sha # Keywords -- `to`: the object to crop to. This can be $OBJ_ARGUMENT +- `to`: the object to crop to. This can be $OBJ_ARGUMENT If no `to` keyword is passed, the smallest shared area of all `xs` is used. - `touches`: `true` or `false`. Whether to use `Touches` wraper on the object extent. @@ -157,12 +157,12 @@ function extend end function extend(l1::RasterStackOrArray, l2::RasterStackOrArray, ls::RasterStackOrArray...; kw...) extend((l1, l2, ls...); kw...) end -function extend(xs; to=nothing) +function extend(xs; to=nothing, kw...) if isnothing(to) to = _subsetbounds((min, max), xs) - map(l -> _extend_to(l, to), xs) + map(l -> _extend_to(l, to, kw...), xs) else - map(l -> extend(l; to), xs) + map(l -> extend(l; to, kw...), xs) end end extend(x::RasterStackOrArray; to=dims(x), kw...) = _extend_to(x, to; kw...) @@ -176,8 +176,12 @@ end _extend_to(x::RasterStackOrArray, to::Dimension; kw...) = _extend_to(x, (to,); kw...) function _extend_to(A::AbstractRaster, to::DimTuple; - filename=nothing, suffix=nothing, touches=false, - missingval=(isnothing(missingval(A)) ? missing : missingval(A)) + filename=nothing, + missingval=(isnothing(missingval(A)) ? nokw : missingval(A)), + fill=nokw, + touches=false, + verbose=true, + kw... ) others = otherdims(to, A) # Allow not specifying all dimensions @@ -193,35 +197,44 @@ function _extend_to(A::AbstractRaster, to::DimTuple; end others1 = otherdims(to, A) final_to = (set(dims(A), map(=>, dims(A, to), to)...)..., others1...) + + # If we are writing to disk swap missingval to something writeable + if ismissing(missingval) + missingval = _writeable_missing(filename, eltype(A); verbose) + end + # If no `fill` is passed use `missingval` or zero + if isnokw(fill) + fill = isnokwornothing(missingval) ? zero(Missings.nonmissingtype(eltype(A))) : missingval + end # Create a new extended array - newA = create(filename, eltype(A), final_to; - suffix, parent=parent(A), missingval, - name=name(A), metadata=metadata(A) - ) - # Input checks - map(dims(A, to), dims(newA, to)) do d1, d2 - if lookup(d1) isa Union{AbstractSampled,NoLookup} - b1, b2 = bounds(d1), bounds(d2) - b1[1] >= b2[1] || throw(ArgumentError("Lower bound of $(basetypeof(d1)) lookup of `$(b2[1])` are not larger than the original `$(b1[1])`")) - b1[2] <= b2[2] || throw(ArgumentError("Upper bound of $(basetypeof(d2)) lookup of `$(b2[2])` is not larger than the original `$(b1[2])`")) - elseif lookup(d1) isa Categorical - map(lookup(d1)) do x - x in d2 || throw(ArgumentError("category $x not in new dimension")) + return create(filename, eltype(A), final_to; + parent=parent(A), + missingval, + name=name(A), + metadata=metadata(A), + verbose, + fill, + kw... + ) do C + # Input checks + map(dims(A, to), dims(C, to)) do d1, d2 + if lookup(d1) isa Union{AbstractSampled,NoLookup} + b1, b2 = bounds(d1), bounds(d2) + b1[1] >= b2[1] || throw(ArgumentError("Lower bound of $(basetypeof(d1)) lookup of `$(b2[1])` are not larger than the original `$(b1[1])`")) + b1[2] <= b2[2] || throw(ArgumentError("Upper bound of $(basetypeof(d2)) lookup of `$(b2[2])` is not larger than the original `$(b1[2])`")) + elseif lookup(d1) isa Categorical + map(lookup(d1)) do x + x in d2 || throw(ArgumentError("category $x not in new dimension")) + end end end - end - # The missingval may have changed for disk-based arrays - if !isequal(missingval, Rasters.missingval(newA)) - A = replace_missing(A, Rasters.missingval(newA)) - end - open(newA; write=true) do O - # Fill it with missing/nodata values - O .= Rasters.missingval(O) - # Copy the original data to the new array + # The missingval may have changed for disk-based arrays + if !isequal(Rasters.missingval(A), Rasters.missingval(C)) + A = replace_missing(A, Rasters.missingval(C)) + end # Somehow this is slow from disk? - broadcast_dims!(identity, view(O, rangedims...), A) + broadcast_dims!(identity, view(C, rangedims...), A) end - return newA end function _extend_to(st::AbstractRasterStack, to::DimTuple; suffix=keys(st), kw...) mapargs((A, s) -> _extend_to(A, to; suffix=s, kw...), st, suffix) diff --git a/src/methods/fixed.png b/src/methods/fixed.png new file mode 100644 index 000000000..df0c21380 Binary files /dev/null and b/src/methods/fixed.png differ diff --git a/src/methods/fractional_resample.jl b/src/methods/fractional_resample.jl new file mode 100644 index 000000000..aa2da9ca0 --- /dev/null +++ b/src/methods/fractional_resample.jl @@ -0,0 +1,33 @@ +using DimensionalData + +fractional_resample(x, res; kw...) = fractional_resample(x; res, kw...) +fractional_resample(xs::RasterStackOrArray...; kw...) = fractional_resample(xs; kw...) +function fractional_resample(ser::AbstractRasterSeries, args...; kw...) + map(x -> resample(x, args...; kw...), ser) +end +function fractional_resample(A::AbstractRaster; + categories::NamedTuple, to=nothing, res=nothing, size=nothing +) + ds = _extent2dims(to; size, res, crs(A)) + shared_dims = commondims(A, ds) + isintervals(shared_dims) || throw(ArgumentError("Fractional resampling only supported for `Intervals`.")) + outputdims = setdims(dims(A), ds) + outputdata = Array{typeof(categories)}(undef, outputdims) + output = rebuild(A; data=outputdata, dims=outputdims, refdims=()) + map(DimIndices(otherdims(A, ds))) do D + slice = view(A, D...) + fractions = read_fractions(slice, categories, weights) + end +end + +function read_fractions(slice, categories, weights, indices::CartesianIndices) + vals = view(slice, indices) + map(categories) do c + sum(zip(vals, weights)) do (v, w) + _compare(c, v) * w + end / sum(weights) + end +end + +_compare(c::Interval, v) = v in c +_compare(c, v) = v == c diff --git a/src/methods/mask.jl b/src/methods/mask.jl index 1b19b44cb..1401a990c 100644 --- a/src/methods/mask.jl +++ b/src/methods/mask.jl @@ -4,6 +4,13 @@ const INVERT_KEYWORD = """ masked, and areas missing in `with` are masked. """ +const COLLAPSE_KEYWORD = """ +- `collapse`: if `true`, collapse all geometry masks into a single mask. Otherwise + return a Raster with an additional `geometry` dimension, so that each slice + along this axis is the mask of the `geometry` opbject of each row of the + table, feature in the feature collection, or just each geometry in the iterable. +""" + """ mask(A:AbstractRaster; with, missingval=missingval(A)) mask(x; with) @@ -82,12 +89,10 @@ function _mask(A::AbstractRaster, with::AbstractRaster; filename=nothing, suffix=nothing, missingval=_missingval_or_missing(A), kw... ) missingval = ismissing(missingval) ? missing : convert(eltype(A), missingval) - A1 = create(filename, A; suffix, missingval) - open(A1; write=true) do a + return create(filename, A; suffix, missingval) do C # The values array will be be written to A1 in `mask!` - mask!(a; with, missingval, values=A, kw...) + mask!(C; with, missingval, values=A, kw...) end - return A1 end function _mask(xs::AbstractRasterStack, with::AbstractRaster; suffix=keys(xs), kw...) mapargs((x, s) -> mask(x; with, suffix=s, kw...), xs, suffix) @@ -236,18 +241,9 @@ $GEOMETRYCOLUMN_KEYWORD $THREADED_KEYWORD $PROGRESS_KEYWORD -And specifically for `shape=:polygon`: - -- `boundary`: include pixels where the `:center` is inside the polygon, where - the line `:touches` the pixel, or that are completely `:inside` inside the polygon. - The default is `:center`. - For tabular data, feature collections and other iterables -- `collapse`: if `true`, collapse all geometry masks into a single mask. Otherwise - return a Raster with an additional `geometry` dimension, so that each slice - along this axis is the mask of the `geometry` opbject of each row of the - table, feature in the feature collection, or just each geometry in the iterable. +$COLLAPSE_KEYWORD # Example @@ -338,9 +334,10 @@ function boolmask!(dest::AbstractRaster, data; threaded=false, allocs=_burning_allocs(dest; threaded), geometrycolumn=nothing, + collapse=nokw, kw... ) - if hasdim(dest, :geometry) + if collapse === false && hasdim(dest, :geometry) geoms = _get_geometries(data, geometrycolumn) range = eachindex(geoms) _run(range, threaded, progress, "Burning each geometry to a BitArray slice...") do i @@ -351,16 +348,18 @@ function boolmask!(dest::AbstractRaster, data; burn_geometry!(slice, geom; kw..., fill=!invert, allocs=_get_alloc(allocs)) return nothing end - else + elseif isnokw(collapse) || collapse === true burn_geometry!(dest, data; kw..., allocs, lock, progress, threaded, geometrycolumn, fill=!invert) + else + throw(ArgumentError("`collapse` must be `false` or not passed if there is no `:geometry` dimension in `dest`")) end return dest end """ missingmask(obj::Raster; kw...) - missingmask(obj; [to, res, size, collapse]) - missingmask(obj::RasterStack; alllayers = true, kw...) + missingmask(obj; [to, res, size]) + missingmask(obj::RasterStack; alllayers=true, kw...) Create a mask array of `missing` and `true` values, from another `Raster`. `AbstractRasterStack` or `AbstractRasterSeries` are also accepted- @@ -376,6 +375,7 @@ The array returned from calling `missingmask` on a `AbstractRaster` is a - `obj`: $OBJ_ARGUMENT # Keywords + - `alllayers`: if `true` a mask is taken for all layers, otherwise only the first layer is used. Defaults to `true` $INVERT_KEYWORD $GEOM_KEYWORDS @@ -423,8 +423,8 @@ function missingmask!(dest::AbstractRaster, src::AbstractRaster; end end function missingmask!(dest::AbstractRaster, geom; kw...) - B = boolmask!(dest, geom; kw...) # boolmask! handles `invert` keyword here + B = boolmask!(dest, geom; kw...) dest .= _false_to_missing.(B) return dest end diff --git a/src/methods/mosaic.jl b/src/methods/mosaic.jl index 0c913d173..2e9a3f5df 100644 --- a/src/methods/mosaic.jl +++ b/src/methods/mosaic.jl @@ -65,22 +65,50 @@ function mosaic(f::Function, r1::RasterStackOrArray, rs::RasterStackOrArray...; mosaic(f, (r1, rs...); kw...) end mosaic(f::Function, regions; kw...) = _mosaic(f, first(regions), regions; kw...) -function _mosaic(f::Function, ::AbstractRaster, regions; - missingval=missingval(first(regions)), filename=nothing, suffix=nothing, kw... +function _mosaic(f::Function, A1::AbstractRaster, regions; + missingval=nokw, + filename=nothing, + suffix=nothing, + driver=nokw, + options=nokw, + force=false, + kw... ) - missingval = missingval isa Nothing ? missing : missingval - T = Base.promote_type(typeof(missingval), Base.promote_eltype(regions...)) + isnothing(missingval) && throw(ArgumentError("missingval cannot be `nothing` for `mosaic`")) + missingval = if isnokw(missingval) + mv = Rasters.missingval(first(regions)) + isnokwornothing(mv) ? missing : mv + else + missingval + end + missingval_pair = if !isnothing(filename) && (ismissing(missingval) || isnokwornothing(missingval)) + _type_missingval(eltype(A1)) => missing + elseif missingval isa Pair + missingval + else + missingval => missingval + end + T = Base.promote_type(typeof(last(missingval_pair)), Base.promote_eltype(regions...)) dims = _mosaic(Tuple(map(DD.dims, regions))) l1 = first(regions) - A = create(filename, T, dims; name=name(l1), missingval, metadata=metadata(l1)) - open(A; write=true) do a - _mosaic!(f, a, regions; missingval, kw...) + + return create(filename, T, dims; + name=name(l1), + fill=missingval_pair[1], + missingval=missingval_pair, + driver, + options, + force + ) do C + _mosaic!(f, C, regions; missingval, kw...) end - return A end function _mosaic(f::Function, ::AbstractRasterStack, regions; - filename=nothing, suffix=keys(first(regions)), kw... + filename=nothing, + suffix=keys(first(regions)), + kw... ) + # TODO make this write inside a single netcdf layers = map(suffix, map(values, regions)...) do s, A... mosaic(f, A...; filename, suffix=s, kw...) end @@ -139,10 +167,11 @@ mosaic!(f::Function, dest::RasterStackOrArray, regions::RasterStackOrArray...; k _mosaic!(f, dest, regions; kw...) function _mosaic!(f::Function, A::AbstractRaster{T}, regions::Union{Tuple,AbstractArray}; - missingval=missingval(A), atol=maybe_eps(T) + missingval=missingval(A), atol=nothing ) where T + isnokwornothing(missingval) && throw(ArgumentError("destination array must have a `missingval`")) _without_mapped_crs(A) do A1 - broadcast!(A1, DimKeys(A1; atol)) do ds + broadcast!(A1, DimSelectors(A1; atol)) do ds # Get all the regions that have this point ls = foldl(regions; init=()) do acc, l if DD.hasselection(l, ds) diff --git a/src/methods/rasterize.jl b/src/methods/rasterize.jl index b84b7c888..2d9e278ec 100644 --- a/src/methods/rasterize.jl +++ b/src/methods/rasterize.jl @@ -30,7 +30,7 @@ _reduce_init(::typeof(sum), ::Type{T}, missingval) where T = zero(nonmissingtype _reduce_init(::typeof(prod), ::Type{T}, missingval) where T = oneunit(nonmissingtype(T)) _reduce_init(::typeof(minimum), ::Type{T}, missingval) where T = typemax(nonmissingtype(T)) _reduce_init(::typeof(maximum), ::Type{T}, missingval) where T = typemin(nonmissingtype(T)) -_reduce_init(::typeof(last), ::Type{T}, missingval) where T = _maybe_nothing_to_missing(missingval) +_reduce_init(::typeof(last), ::Type{T}, missingval) where T = _maybe_to_missing(missingval) struct FillChooser{F,I,M} fill::F @@ -73,10 +73,13 @@ RasterCreator(to, data; kw...) = RasterCreator(_extent(to; kw...); kw...) function RasterCreator(to::Extents.Extent; res::Union{Nothing,Real,NTuple{<:Any,<:Real}}=nothing, - size::Union{Nothing,Int,NTuple{<:Any,Int}}=nothing, kw... + size::Union{Nothing,Int,NTuple{<:Any,Int}}=nothing, + crs=nokw, + mappedcrs=nokw, + kw... ) - to_as_dims = _extent2dims(to; size, res, kw...) - return RasterCreator(to_as_dims; kw...) + to_as_dims = _extent2dims(to; size, res, crs, mappedcrs) + return RasterCreator(to_as_dims; crs, mappedcrs, kw...) end @@ -477,12 +480,8 @@ function alloc_rasterize(f, r::RasterCreator; if prod(size(r.to)) == 0 throw(ArgumentError("Destination array is is empty, with size $(size(r.to))). Rasterization is not possible")) end - A = create(r.filename, eltype, r.to; name, missingval, metadata, suffix) - # TODO f should apply to the file when it is initially created - # instead of reopening but we need a `create(f, filename, ...)` method - open(A; write=true) do A - A .= Ref(missingval) - f(A) + A = create(r.filename, fill=missingval, eltype, r.to; name, missingval, metadata, suffix) do O + f(O) end return A end diff --git a/src/methods/replace_missing.jl b/src/methods/replace_missing.jl index 58c85b72c..7a820c6e2 100644 --- a/src/methods/replace_missing.jl +++ b/src/methods/replace_missing.jl @@ -23,7 +23,7 @@ missing """ replace_missing(x; missingval=missing, kw...) = replace_missing(x, missingval; kw...) function replace_missing(A::AbstractRaster{T}, missingval::MV; - filename=nothing, suffix=nothing + filename=nothing, kw... ) where {T,MV} MT = if ismissing(missingval) promote_type(T, Missing) @@ -37,13 +37,11 @@ function replace_missing(A::AbstractRaster{T}, missingval::MV; # But in both cases we make sure we return an array with the missingval # in the eltype, even if there are no missing values in the array. if !isnothing(filename) - A1 = create(filename, MT, dims(A); - parent=parent(A), suffix, missingval, name=name(A), metadata=metadata(A) - ) - open(A1; write=true) do O + return create(filename, MT, dims(A); + parent=parent(A), missingval, name=name(A), metadata=metadata(A), kw... + ) do O O .= repmissing.(A) end - return A1 else # We need to force T of Union{T,Missing} for DiskArrays broadcasts if isdisk(A) diff --git a/src/methods/shared_docstrings.jl b/src/methods/shared_docstrings.jl index 443808750..88bb5437d 100644 --- a/src/methods/shared_docstrings.jl +++ b/src/methods/shared_docstrings.jl @@ -1,5 +1,17 @@ # Share common docstrings here to keep things consistent +const NAME_KEYWORD = """ +- `name`: a `Symbol` name for a Raster, which will also retrieve the + a named layer if `Raster` is used on a multi-layered file like a NetCDF. +""" +const METADATA_KEYWORD = """ +- `metadata`: `Dict` or `Metadata` object for the array, or `NoMetadata()`. +""" +const REFDIMS_KEYWORD = """ +- `refdims`: `Tuple of` position `Dimension`s the array was sliced from, defaulting to `()`. + Usually not needed. +""" + const TO_KEYWORD = """ - `to`: a `Raster`, `RasterStack`, `Tuple` of `Dimension` or `Extents.Extent`. If no `to` object is provided the extent will be calculated from the geometries, @@ -115,13 +127,71 @@ const GROUP_KEYWORD = """ at any nested depth, i.e `group=:group1 => :group2 => :group3`. """ -const REPLACE_MISSING_KEYWORD = """ -- `replace_missing`: replace `missingval` with `missing`. This is done lazily if `lazy=true`. - Note that currently for NetCDF and GRIB files `replace_missing` is always true. - In future `replace_missing=false` will also work for these data sources. -""" - const CHECKMEMORY_KEYWORD = """ -- `checkmemory`: If `true` (the default), check if there is enough memory for the operation. +- `checkmemory`: if `true` (the default), check if there is enough memory for the operation. `false` will ignore memory needs. """ + +const SCALE_KEYWORD = """ +- `scale`: set `scale` for `x * scale + offset` transformations. +""" + +const OFFSET_KEYWORD = """ +- `offset`: set `offset` for `x * scale + offset` transformations. +""" + +const RAW_KEYWORD = """ +- `raw`: turn of all scaling and masking and load the raw values from disk. + `false` by default. If `true`, `scaled` will be set to `false` and `missingval` + will to the existing missing value in the file. A warning will be printed if + `scaled` or `missingval` are manually set to another value. +""" + +const SCALED_KEYWORD = """ +- `scaled`: apply scale and offset as `x * scale + offset` where + `scale` and/or `offset` are found in file metadata. `true` by default. + This is common where data has been convert to e.g. UInt8 to save disk space. + To ignore `scale` and `offset` metadata, use `scaled=false`. + Note 1: If `scale` and `offset` are `1.0` and `0.0` they will be ignored and the + original type will be used even when `scaled=true`. This is because these values + may be fallback defaults and we do not want to convert every `Real` array to larger + `Float64` values. + Note 2: `raw=true` will ignore `scaled` and `missingval` and return + the raw values. +""" + +const COERCE_KEYWORD = """ +- `coerce`: where `scale` and/or `offset` are present during `setindex!` to disk, + coerce values to the element type used on dist. `convert` is the default, + but `round`, `trunc` or or `ceil` or other functions with `f(::Type{T}, x)` + signature may be needed where the values are not exact. +""" + +const MISSINGVAL_KEYWORD = """ +- `missingval`: value representing missing data, normally detected from the file and + automatically converted to `missing`. Setting to an alternate value, such as `0` + or `NaN` may be desirable for improved perfomance. `nothing` specifies no missing value. + Using the same `missingval` the file already has removes the overhead of replacing it, + this can be done by passing the `missingval` function as `missingval`. + If the file has an incorrect value, we can manually define the transformation + as a pair like `correct_value => missing` or `correct_value => NaN`. + `correct_value => correct_value` will keep remove the overhead of changing it. + Note: When `raw=true` is set, `missingval` is not changed from the value specified + in the file. +""" + +const WRITE_MISSINGVAL_KEYWORD = """ +- `missingval`: set the missing value (i.e. FillValue / nodataval) of the written raster, + as Julia's `missing` cannot be stored. If not passed in, an appropriate `missingval` + will be detected from the objects `missingval`, its `metadata`, or a default will be + chosen base on the array element type(s). +""" + +const CHUNKS_KEYWORD = """ +- `chunks`: a `NTuple{N,Int}` specifying the chunk size for each dimension. + To specify only specific dimensions, a Tuple of `Dimension` wrapping `Int` + or a `NamedTuple` of `Int` can be used. Other dimensions will have a chunk + size of `1`. `true` can be used to mean: use the original + chunk size of the lazy `Raster` being written or X and Y of 256 by 256. + `false` means don't use chunks at all. +""" \ No newline at end of file diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index 1263a0acf..c08b62cc5 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -83,7 +83,7 @@ _zonal(f, x::Raster, of::Extents.Extent; skipmissing=true) = _maybe_skipmissing_call(f, crop(x; to=of, touches=true), skipmissing) function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing=true) cropped = crop(x; to=ext, touches=true) - prod(size(cropped)) > 0 || return missing + length(cropped) > 0 || return missing return maplayers(cropped) do A _maybe_skipmissing_call(f, A, skipmissing) end @@ -99,7 +99,7 @@ function _zonal(f, x::AbstractRaster, ::GI.AbstractGeometryTrait, geom; skipmissing=true, kw... ) cropped = crop(x; to=geom, touches=true) - prod(size(cropped)) > 0 || return missing + length(cropped) > 0 || return missing masked = mask(cropped; with=geom, kw...) return _maybe_skipmissing_call(f, masked, skipmissing) end @@ -107,10 +107,10 @@ function _zonal(f, st::AbstractRasterStack, ::GI.AbstractGeometryTrait, geom; skipmissing=true, kw... ) cropped = crop(st; to=geom, touches=true) - prod(size(cropped)) > 0 || return map(_ -> missing, layerdims(st)) + length(cropped) > 0 || return map(_ -> missing, st) masked = mask(cropped; with=geom, kw...) return maplayers(masked) do A - prod(size(A)) > 0 || return missing + length(A) > 0 || return missing _maybe_skipmissing_call(f, A, skipmissing) end end diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl new file mode 100644 index 000000000..93bddd210 --- /dev/null +++ b/src/modifieddiskarray.jl @@ -0,0 +1,247 @@ +abstract type AbstractModifications end +struct NoMod{T,Mi} <: AbstractModifications + missingval::Mi +end +NoMod{T}(missingval::Mi) where {T,Mi} = NoMod{T,Mi}(missingval) +NoMod() = NoMod{Any}() +NoMod{T}() where T = NoMod{T}(nothing) +NoMod{T}(::NoKW) where T = NoMod{T}(nothing) + +Base.eltype(::NoMod{T}) where T = T +source_eltype(::NoMod{T}) where T = T + +struct Mod{T1,T2,Mi,S,O,F} <: AbstractModifications + missingval::Mi + scale::S + offset::O + coerce::F + function Mod{T}(missingval, scale, offset, coerce) where T + missingval = missingval isa Pair && missingval[1] === missingval[2] ? missingval[1] : missingval + if isnokw(coerce) || isnothing(coerce) + coerce = convert + end + vals = map(_nokw2nothing, (missingval, scale, offset)) + T1 = _resolve_mod_eltype(T, vals...) + new{T1,T,map(typeof, vals)...,typeof(coerce)}(vals..., coerce) + end +end + +Base.eltype(::Mod{T1}) where T1 = T1 +source_eltype(::Mod{<:Any,T2}) where T2 = T2 + +function _resolve_mod_eltype(::Type{T}, missingval, scale, offset) where T + omv = _outer_missingval(missingval) + T1 = isnothing(omv) ? T : promote_type(T, typeof(omv)) + T2 = isnothing(scale) ? T1 : promote_type(T1, typeof(scale)) + T3 = isnothing(offset) ? T2 : promote_type(T2, typeof(offset)) + return T3 +end + +missingval(m::Mod) = m.missingval +missingval(m::NoMod) = m.missingval + +_inner_missingval(m::Mod) = _inner_missingval(m.missingval) +_inner_missingval(mv) = mv +_inner_missingval(mv::Pair) = mv[1] + +_outer_missingval(m::AbstractModifications) = _outer_missingval(m.missingval) +_outer_missingval(mv::Pair) = mv[2] +_outer_missingval(mv) = mv + +struct ModifiedDiskArray{I,T,N,V,M} <: DiskArrays.AbstractDiskArray{T,N} + var::V + mod::M +end +function ModifiedDiskArray(v::V, m::M; invert=false) where {V<:AbstractArray{<:Any,N},M} where N + T = invert ? source_eltype(m) : eltype(m) + return ModifiedDiskArray{invert,T,N,V,M}(v, m) +end + +Base.parent(A::ModifiedDiskArray) = A.var +Base.size(A::ModifiedDiskArray, args...) = size(parent(A), args...) +filename(A::ModifiedDiskArray) = filename(parent(A)) +missingval(A::ModifiedDiskArray) = A.missingval +DiskArrays.haschunks(A::ModifiedDiskArray) = DiskArrays.haschunks(parent(A)) +DiskArrays.eachchunk(A::ModifiedDiskArray) = DiskArrays.eachchunk(parent(A)) + +function DiskArrays.readblock!( + A::ModifiedDiskArray{false,<:Any,0}, out_block, I::AbstractVector... +) + out_block[] = _applymod(parent(A)[I...][], A.mod) + return out_block +end +function DiskArrays.readblock!( + A::ModifiedDiskArray{true,T,<:Any,0}, out_block, I::AbstractVector... +) where T + out_block[] = _invertmod(Val{T}(), parent(A)[I...], A.mod) + return out_block +end +function DiskArrays.readblock!( + A::ModifiedDiskArray{false}, out_block, I::AbstractVector... +) + out_block .= _applymod.(parent(A)[I...], (A.mod,)) + return out_block +end +function DiskArrays.readblock!( + A::ModifiedDiskArray{true,T}, out_block, I::AbstractVector... +) where T + out_block .= _invertmod.((Val{T}(),), parent(A)[I...], (A.mod,)) + return out_block +end + +function DiskArrays.writeblock!( + A::ModifiedDiskArray{false,<:Any,0,<:AbstractArray{T}}, block, I::AbstractVector... +) where T + + parent(A)[I...] = _invertmod(Val{source_eltype(A.mod)}(), block[], A.mod) + return nothing +end +function DiskArrays.writeblock!( + A::ModifiedDiskArray{true,<:Any,0,<:AbstractArray{T}}, _block, I::AbstractVector... +) where T + parent(A)[I...] = _applymod(Val{eltype(A.mod)}(), block[], A.mod) + return nothing +end +function DiskArrays.writeblock!( + A::ModifiedDiskArray{<:Any,<:Any,<:Any,<:AbstractArray{T}}, block, I::AbstractVector... +) where T + parent(A)[I...] = _invertmod.((Val{source_eltype(A.mod)}(),), block, (A.mod,)) + return nothing +end +function DiskArrays.writeblock!( + A::ModifiedDiskArray{true,<:Any,<:Any,<:AbstractArray{T}}, _block, I::AbstractVector... +) where T + parent(A)[I...] = _applymod.((Val{eltype(A.mod)}(),), block, (A.mod,)) + return nothing +end + +Base.@assume_effects :foldable function _applymod(x, m::Mod) + if _ismissing(x, _inner_missingval(m)) + _outer_missingval(m) + else + _scaleoffset(x, m) + end +end +Base.@assume_effects :foldable _applymod(x, ::NoMod) = x + +_ismissing(x, mv) = isequal(x, mv) +_ismissing(_, ::Nothing) = false + +_scaleoffset(x, m::Mod) = _scaleoffset(x, m.scale, m.offset) +_scaleoffset(x, scale, offset) = x * scale + offset +_scaleoffset(x, ::Nothing, offset) = x + offset +_scaleoffset(x, scale, ::Nothing) = x * scale +_scaleoffset(x, ::Nothing, ::Nothing) = x + +Base.@assume_effects :foldable function _invertmod(::Val{T}, x, m::Mod) where T + tm = if !isnothing(m.missingval) && _ismissing(x, _outer_missingval(m)) + return _inner_missingval(m) + else + x + end + return _scaleoffset_inv(T, tm, m) +end +Base.@assume_effects :foldable _invertmod(v, x, m::NoMod) = x + +Base.@assume_effects :foldable _scaleoffset_inv(::Type{T}, x, m::Mod) where T = + _scaleoffset_inv(m.coerce, T, x, m)::T +Base.@assume_effects :foldable _scaleoffset_inv(coerce::Base.Callable, ::Type{T}, x, m::Mod) where T = + coerce(T, _scaleoffset_inv1(x, m.scale, m.offset))::T + +Base.@assume_effects :foldable _scaleoffset_inv1(x, scale, offset) = (x - offset) / scale +Base.@assume_effects :foldable _scaleoffset_inv1(x, scale, ::Nothing) = x / scale +Base.@assume_effects :foldable _scaleoffset_inv1(x, ::Nothing, offset) = x - offset +Base.@assume_effects :foldable _scaleoffset_inv1(x, ::Nothing, ::Nothing) = x + +function _stack_mods(eltypes::Vector, metadata::Vector, missingval::AbstractVector; + scaled::Bool, coerce +) + map(eltypes, metadata, missingval) do T, md, mv + scale, offset = get_scale(md, scaled) + _mod(T, mv, scale, offset, coerce) + end +end +function _stack_mods(eltypes::Vector, metadata::Vector, missingval::Pair; + scaled::Bool, coerce +) + map(eltypes, metadata) do T, md + scale, offset = get_scale(md, scaled) + _mod(T, missingval, scale, offset, coerce) + end +end + +function _mod(::Type{T}, metadata, missingval; scaled::Bool, coerce) where T + scale, offset = get_scale(metadata, scaled) + _mod(T, missingval, scale, offset, coerce) +end +function _mod(::Type{T}, missingval, scale, offset, coerce) where T + if (isnokwornothing(missingval) || !(missingval isa Pair && !(isnothing(last(missingval))))) && + isnokwornothing(scale) && isnokwornothing(offset) + return NoMod{T}(missingval) + else + return Mod{T}(missingval, scale, offset, coerce) + end +end + +@inline get_scale(metadata::NoKW, scaled::Bool) = nothing, nothing +@inline function get_scale(metadata, scaled::Bool) + scale = scaled ? get(metadata, "scale", nothing) : nothing + offset = scaled ? get(metadata, "offset", nothing) : nothing + return scale, offset +end + +_mod_eltype(::AbstractArray{T}, ::NoMod) where T = T +_mod_eltype(::AbstractArray, m::Mod{T}) where T = T + +_mod_inverse_eltype(::AbstractArray{T}, ::NoMod) where T = T +_mod_inverse_eltype(::AbstractArray{T}, m::Mod) where T = + Base.promote_op(_invertmod, typeof(m.coerce), T, typeof(m)) + +_maybe_modify(var, m::Mod; kw...) = ModifiedDiskArray(var, m; kw...) +_maybe_modify(var, ::NoMod; kw...) = var + +_write_missingval_pair(A, missingval::Pair; kw...) = missingval +function _write_missingval_pair(A, missingval; + verbose=true, eltype, metadata=metadata(A), required=false +)::Pair + source_mv = Rasters.missingval(A) + disk_mv = if isnothing(source_mv) || isnothing(missingval) + if required + source_mv = isnothing(source_mv) ? missing : source_mv + _writeable_missing(eltype; verbose) + else + nothing + end + elseif isnokw(missingval) || ismissing(missingval) + # See if there is a missing value in metadata + md_mv = Rasters.missingval(metadata) + if isnothing(md_mv) + _writeable_missing(eltype; verbose) + else + md_mv + end + else + missingval + end + + return disk_mv => source_mv +end + +function _read_missingval_pair(var, metadata, missingval) + if isnokw(missingval) + mv = Rasters.missingval(var, metadata) + isnothing(mv) ? nothing => nothing : mv => missing + elseif isnothing(missingval) + nothing => nothing + elseif missingval isa Pair + # Pair: inner and outer missing values are manually defined + missingval + elseif missingval === Rasters.missingval + # `missingval` func: detect missing value and keep it as-is + mv = Rasters.missingval(var, metadata) + mv => mv + else + # Otherwise: detect missing value and convert it to `missingval` + Rasters.missingval(var, metadata) => missingval + end +end \ No newline at end of file diff --git a/src/nokw.jl b/src/nokw.jl index 7a6a4743f..89e0828e1 100644 --- a/src/nokw.jl +++ b/src/nokw.jl @@ -5,3 +5,8 @@ struct NoKW end const nokw = NoKW() @inline isnokw(::NoKW) = true @inline isnokw(_) = false +@inline isnokwornothing(::Union{NoKW,Nothing}) = true +@inline isnokwornothing(_) = false + +_nokw2nothing(::NoKW) = nothing +_nokw2nothing(x) = x diff --git a/src/openstack.jl b/src/openstack.jl index 84c3a0f04..0f3bec92c 100644 --- a/src/openstack.jl +++ b/src/openstack.jl @@ -12,15 +12,21 @@ contained in a single file. `X` is a backend type like `NCDsource`, and `K` is a tuple of `Symbol` keys. """ -struct OpenStack{X,K,T,DS} +struct OpenStack{X,K,T,DS,M} dataset::DS + mods::M +end +function OpenStack{X,K,T}( + dataset::DS, mods::M=NoMod() +) where {X,K,T,DS,M} + OpenStack{X,K,T,DS,M}(dataset, mods) end -OpenStack{X,K,T}(dataset::DS) where {X,K,T,DS} = OpenStack{X,K,T,DS}(dataset) dataset(os::OpenStack) = os.dataset # OpenStack has `X` and `K` parameter that is not recoverable from fields. -ConstructionBase.constructorof(::Type{<:OpenStack{X,K,T}}) where {X,K,T} = OpenStack{X,K,T} +ConstructionBase.constructorof(::Type{<:OpenStack{X,K,T}}) where {X,K,T} = + OpenStack{X,K,T} DD.data_eltype(::OpenStack{<:Any,<:Any,T}) where T = T @@ -28,4 +34,11 @@ Base.keys(::OpenStack{<:Any,K}) where K = K # TODO test this, and does it make sense to return an iterator here? Base.values(os::OpenStack{<:Any,K}) where K = (os[k] for k in K) # Indexing OpenStack returns memory-backed Raster. -Base.getindex(os::OpenStack, key::Symbol) = dataset(os)[key] +function Base.getindex(os::OpenStack{<:Any,K}, key::Symbol) where K + mods = os.mods + if mods isa AbstractModifications + _maybe_modify(dataset(os)[key], mods) + else + _maybe_modify(dataset(os)[key], NamedTuple{K}(mods)[key]) + end +end diff --git a/src/show.jl b/src/show.jl index 05f61bbc9..500a3ea56 100644 --- a/src/show.jl +++ b/src/show.jl @@ -19,9 +19,8 @@ function print_geo(io, mime, A; blockwidth) DD.print_block_separator(io, "raster", blockwidth) printstyled(io, "\n extent: "; color=:light_black) show(io, mime, Extents.extent(A)) - println(io) if missingval(A) !== nothing - printstyled(io, " missingval: "; color=:light_black) + printstyled(io, "\n missingval: "; color=:light_black) show(io, mime, missingval(A)) end if crs(A) !== nothing diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 5e9084da5..88f288a97 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -1,5 +1,7 @@ const CDM = CommonDataModel +const UNNAMED_FILE_KEY = "unnamed" + const CDM_DIM_MAP = Dict( "lat" => Y, "latitude" => Y, @@ -31,128 +33,50 @@ const CDM_STANDARD_NAME_MAP = Dict( "time" => Ti, ) -const UNNAMED_CDM_FILE_KEY = "unnamed" - - - -# CFDiskArray ######################################################################## - -struct CFDiskArray{T,N,TV,TA,TSA} <: DiskArrays.AbstractDiskArray{T,N} - var::CDM.CFVariable{T,N,TV,TA,TSA} -end - -# Rasters methods -FileArray{source}(var::CFDiskArray, filename::AbstractString; kw...) where source = - FileArray{source}(parent(var), filename; kw...) - -cleanreturn(A::CFDiskArray) = Array(A) -missingval(A::CFDiskArray) = missingval(parent(A)) - -# DimensionalData methods -_dims(var::CFDiskArray, args...) = _dims(parent(var), args...) -_metadata(var::CFDiskArray, args...) = _metadata(parent(var), args...) - -# Base methods -Base.parent(A::CFDiskArray) = A.var - -Base.getindex(os::OpenStack{<:CDMsource}, name::Symbol) = CFDiskArray(dataset(os)[name]) - -# DiskArrays.jl methods -function DiskArrays.readblock!(A::CFDiskArray, aout, i::AbstractUnitRange...) - aout .= getindex(parent(A), i...) -end -function DiskArrays.writeblock!(A::CFDiskArray, data, i::AbstractUnitRange...) - setindex!(parent(A), data, i...) - return data -end - -# We have to dig down to find the chunks as they are not implemented -# in the CDM, but they are in their internal objects. -DiskArrays.eachchunk(var::CFDiskArray) = _get_eachchunk(var) -DiskArrays.haschunks(var::CFDiskArray) = _get_haschunks(var) - -_get_eachchunk(var::CFDiskArray) = _get_eachchunk(parent(var)) -_get_eachchunk(var::CDM.CFVariable) = _get_eachchunk(var.var) -_get_haschunks(var::CFDiskArray) = _get_haschunks(parent(var)) -_get_haschunks(var::CDM.CFVariable) = _get_haschunks(var.var) - -_sourcetrait(var::CFDiskArray) = _sourcetrait(parent(var)) _sourcetrait(var::CDM.CFVariable) = _sourcetrait(var.var) -# CommonDataModel.jl methods -for method in (:size, :name, :dimnames, :dataset, :attribnames) - @eval begin - CDM.$(method)(var::CFDiskArray) = CDM.$(method)(parent(var)) - end -end - -for method in (:attrib, :dim) - @eval begin - CDM.$(method)(var::CFDiskArray, name::CDM.SymbolOrString) = CDM.$(method)(parent(var), name) - end -end - # Rasters methods for CDM types ############################### -function FileArray{source}(var::AbstractVariable, filename::AbstractString; kw...) where source<:CDMsource - eachchunk = DA.eachchunk(var) - haschunks = DA.haschunks(var) - T = eltype(var) - N = ndims(var) - FileArray{source,T,N}(filename, size(var); eachchunk, haschunks, kw...) -end - -function FileStack{source}( - ds::AbstractDataset, filename::AbstractString; +function FileStack{source}(ds::AbstractDataset, filename::AbstractString; write::Bool=false, group=nokw, name::NTuple{N,Symbol}, - vars + mods, + vars, ) where {source<:CDMsource,N} - T = NamedTuple{name,Tuple{map(var -> Union{Missing,eltype(var)}, vars)...}} + T = NamedTuple{name,Tuple{map(_mod_eltype, vars, mods)...}} layersizes = map(size, vars) - eachchunk = map(_get_eachchunk, vars) - haschunks = map(_get_haschunks, vars) + eachchunk = map(DiskArrays.eachchunk, vars) + haschunks = map(DiskArrays.haschunks, vars) group = isnokw(group) ? nothing : group - return FileStack{source,name,T}(filename, layersizes, group, eachchunk, haschunks, write) -end - -function Base.open(f::Function, A::FileArray{source}; write=A.write, kw...) where source<:CDMsource - _open(source(), filename(A); name=name(A), group=A.group, write, kw...) do var - f(var) - end + return FileStack{source,name,T}(filename, layersizes, group, eachchunk, haschunks, mods, write) end -function _open(f, ::CDMsource, ds::AbstractDataset; name=nokw, group=nothing, kw...) +function _open(f, ::CDMsource, ds::AbstractDataset; + name=nokw, + group=nothing, + mod=NoMod(), + kw... +) g = _getgroup(ds, group) - x = isnokw(name) ? g : CFDiskArray(g[_firstname(g, name)]) - cleanreturn(f(x)) + x = if isnokw(name) + g + else + v = CDM.variable(g, string(_name_or_firstname(g, name))) + _maybe_modify(v, mod) + end + return cleanreturn(f(x)) end -_open(f, ::CDMsource, var::CFDiskArray; kw...) = cleanreturn(f(var)) +_open(f, ::CDMsource, var::AbstractArray; mod=NoMod(), kw...) = + cleanreturn(f(_maybe_modify(var, mod))) # This allows arbitrary group nesting _getgroup(ds, ::Union{Nothing,NoKW}) = ds _getgroup(ds, group::Union{Symbol,AbstractString}) = ds.group[String(group)] _getgroup(ds, group::Pair) = _getgroup(ds.group[String(group[1])], group[2]) -function create(filename, source::CDMsource, T::Type, dims::DimTuple; - name=nokw, - missingval=nokw, - metadata=nokw, - lazy=true, - verbose=true, - chunks=nokw, -) - # Create layers of zero arrays - A = FillArrays.Zeros{T}(map(length, dims)) - rast = Raster(A, dims; name, missingval, metadata) - write(filename, source, rast; chunks) - return Raster(filename; metadata, source, lazy) -end - -filekey(ds::AbstractDataset, name) = _firstname(ds, name) -missingval(var::AbstractDataset) = missing -missingval(var::AbstractVariable{T}) where T = missing isa T ? missing : nothing +filekey(ds::AbstractDataset, name::Union{String,Symbol}) = Symbol(name) +filekey(ds::AbstractDataset, name) = _name_or_firstname(ds, name) cleanreturn(A::AbstractVariable) = Array(A) haslayers(::CDMsource) = true defaultcrs(::CDMsource) = EPSG(4326) @@ -183,7 +107,7 @@ end function _layers(ds::AbstractDataset, ::NoKW=nokw, ::NoKW=nokw) nondim = _nondimnames(ds) grid_mapping = String[] - vars = map(k -> ds[k], nondim) + vars = map(k -> CDM.variable(ds, k), nondim) attrs = map(CDM.attribs, vars) for attr in attrs if haskey(attr, "grid_mapping") @@ -197,9 +121,10 @@ function _layers(ds::AbstractDataset, ::NoKW=nokw, ::NoKW=nokw) attrs=attrs[bitinds], ) end - -function _layers(ds::AbstractDataset, names, ::NoKW) - vars = map(k -> ds[k], names) +_layers(ds::AbstractDataset, names, ::NoKW) = + _layers(ds, collect(names), nokw) +function _layers(ds::AbstractDataset, names::Vector, ::NoKW) + vars = map(n -> CDM.variable(ds, n), names) attrs = map(CDM.attribs, vars) (; names, vars, attrs) end @@ -258,11 +183,11 @@ end # Utils ######################################################################## # TODO don't load all keys here with _layers -_firstname(ds::AbstractDataset, name) = Symbol(name) -function _firstname(ds::AbstractDataset, name::NoKW=nokw) +_name_or_firstname(ds::AbstractDataset, name) = Symbol(name) +function _name_or_firstname(ds::AbstractDataset, name::Union{Nothing,NoKW}=nokw) names = _nondimnames(ds) if length(names) > 0 - Symbol(first(names)) + return Symbol(first(names)) else throw(ArgumentError("No non-dimension layers found in dataset with keys: $(keys(ds))")) end @@ -475,7 +400,14 @@ function _parse_period(period_str::String) end end -_attribdict(md::Metadata{<:CDMsource}) = Dict{String,Any}(string(k) => v for (k, v) in md) +function _attribdict(md::Metadata{<:CDMsource}) + attrib = Dict{String,Any}() + for (k, v) in md + v isa Tuple && continue + attrib[string(k)] = v + end + return attrib +end _attribdict(md) = Dict{String,Any}() # Add axis and standard name attributes to dimension variables @@ -508,46 +440,74 @@ end _unuseddimerror(dimname) = error("Dataset contains unused dimension $dimname") - # Add a var array to a dataset before writing it. -function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; +function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T,N}; verbose=true, missingval=nokw, + metadata=nokw, chunks=nokw, chunksizes=_chunks_to_tuple(A, dims(A), chunks), + scale=nokw, + offset=nokw, + coerce=convert, + eltype=Missings.nonmissingtype(T), + write=true, + name=DD.name(A), + options=nokw, + driver=nokw, + f=identity, kw... ) where {T,N} - missingval = missingval isa NoKW ? Rasters.missingval(A) : missingval + _check_allowed_type(source, eltype) + write = f === identity ? write : true _def_dim_var!(ds, A) - attrib = _attribdict(metadata(A)) - # Set _FillValue - eltyp = Missings.nonmissingtype(T) - _check_allowed_type(_sourcetrait(ds), eltyp) - if ismissing(missingval) - fillval = if haskey(attrib, "_FillValue") && attrib["_FillValue"] isa eltyp - attrib["_FillValue"] - else - CDM.fillvalue(eltyp) - end - attrib["_FillValue"] = fillval - A = replace_missing(A, fillval) - elseif Rasters.missingval(A) isa T - attrib["_FillValue"] = missingval + metadata = if isnokw(metadata) + DD.metadata(A) + elseif isnothing(metadata) + NoMetadata() + else + metadata + end + + missingval_pair = _write_missingval_pair(A, missingval; eltype, verbose, metadata) + + attrib = _attribdict(metadata) + # Scale and offset + scale = if isnokw(scale) || isnothing(scale) + delete!(attrib, "scale_factor") + nothing + else + attrib["scale_factor"] = scale + end + offset = if isnokw(offset) || isnothing(offset) + delete!(attrib, "add_offset") + nothing else - verbose && !(missingval isa Nothing) && @warn "`missingval` $(missingval) is not the same type as your data $T." + attrib["add_offset"] = offset end - key = if string(DD.name(A)) == "" - UNNAMED_CDM_FILE_KEY + mod = _mod(eltype, missingval_pair, scale, offset, coerce) + + if !isnothing(missingval_pair[1]) + attrib["_FillValue"] = missingval_pair[1] + end + + key = if isnokw(name) || string(name) == "" + UNNAMED_FILE_KEY else - string(DD.name(A)) + string(name) end - dimnames = lowercase.(string.(map(name, dims(A)))) - var = CDM.defVar(ds, key, eltyp, dimnames; attrib=attrib, chunksizes, kw...) |> CFDiskArray + dimnames = lowercase.(string.(map(Rasters.name, dims(A)))) + var = CDM.defVar(ds, key, eltype, dimnames; attrib, chunksizes, kw...) - # Write with a DiskArrays.jl broadcast - var .= A + if write + m = _maybe_modify(var.var, mod) + # Write with a DiskArays.jl broadcast + m .= A + # Apply `f` while the variable is open + f(m) + end return nothing end diff --git a/src/sources/grd.jl b/src/sources/grd.jl index 1ebc46b8c..b7f7556ba 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -41,6 +41,16 @@ filename(grd::GRDdataset) = grd.filename filekey(grd::GRDdataset, name::NoKW) = get(attrib(grd), "layername", Symbol("")) filekey(A::RasterDiskArray{GRDsource}, name) = filekey(A.attrib, name) +# Already open, doesn't use `name` +function _open(f, ::GRDsource, A::RasterDiskArray{GRDsource}; + name=nokw, + group=nokw, + mod=NoMod(), + kw... +) + cleanreturn(f(_maybe_modify(A, mod))) +end + Base.eltype(::GRDdataset{T}) where T = T function Base.size(grd::GRDdataset) ncols = parse(Int, grd.attrib["ncols"]) @@ -59,7 +69,12 @@ DiskArrays.haschunks(::GRDdataset) = DiskArrays.Unchunked() function _dims(A::RasterDiskArray{GRDsource}, crs=nokw, mappedcrs=nokw) attrib = A.attrib.attrib - crs = crs isa NoKW ? ProjString(attrib["projection"]) : crs + crs = if crs isa NoKW + str = attrib["projection"] + str == "" ? nothing : ProjString(str) + else + crs + end mappedcrs = mappedcrs isa NoKW ? nothing : mappedcrs xsize, ysize, nbands = size(A) @@ -115,14 +130,18 @@ function _metadata(A::RasterDiskArray{GRDsource}, args...) metadata end -function missingval(A::RasterDiskArray{GRDsource,T}) where T - if haskey(A.attrib.attrib, "nodatavalue") - ndv = A.attrib.attrib["nodatavalue"] +function missingval(A::RasterDiskArray{GRDsource,T}, args...) where T + _grd_mv(T, A.attrib.attrib) +end + +function _grd_mv(::Type{T}, md; verbose=true) where T + if haskey(md, "nodatavalue") + ndv = md["nodatavalue"] ndv === "nothing" && return nothing try return parse(T, ndv) catch - @warn "nodatavalue $(ndv) is not convertible to data type $T. `missingval` set to `nothing`." + verbose && @warn "nodatavalue $(ndv) is not convertible to data type $T. `missingval` set to `nothing`." return nothing end else @@ -133,16 +152,6 @@ end _sizeof(A::GRDdataset{T}) where T = sizeof(T) * prod(size(A)) _sizeof(A::RasterDiskArray{GRDsource}) = _sizeof(A.attrib) - -# Array ######################################################################## - -function FileArray{GRDsource}(A::RasterDiskArray{<:Any,T}, filename=filename(A.attrib); kw...) where T - filename = first(splitext(filename)) - eachchunk = DiskArrays.eachchunk(A) - haschunks = DiskArrays.haschunks(A) - FileArray{GRDsource,T,3}(filename, size(A); eachchunk, haschunks, kw...) -end - # Base methods """ @@ -163,13 +172,31 @@ Returns the base of `filename` with a `.grd` extension. """ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; force=false, + verbose=true, + write=true, missingval=nokw, chunks=nokw, + scale=nokw, + offset=nokw, + coerce=nokw, + eltype=Missings.nonmissingtype(eltype(A)), + f=identity, kw... ) - chunks isa NoKW || @warn "specifying chunks not supported for .grd files" check_can_write(filename, force) - A = _maybe_use_type_missingval(A, GRDsource(), missingval) + write = f === identity ? write : true + haskey(REVGRD_DATATYPE_TRANSLATION, eltype) || throw(ArgumentError(""" + Element type $eltype cannot be written to grd file. Convert it to one of $(keys(REVGRD_DATATYPE_TRANSLATION)), + usually by broadcasting the desired type constructor over the `Raster`, e.g. `newrast = Float32.(rast)`")) + """ + )) + isnokwornothing(scale) && isnokwornothing(offset) || throw(ArgumentError("Cant write scale or offset to .grd files")) + chunks isa NoKW || @warn "specifying chunks not supported for .grd files" + # Missing values + missingval_pair = _write_missingval_pair(A, missingval; eltype, verbose) + + # Missing values + if hasdim(A, Band) correctedA = permutedims(A, (X, Y, Band)) |> a -> reorder(a, (X(GRD_X_ORDER), Y(GRD_Y_ORDER), Band(GRD_BAND_ORDER))) @@ -181,19 +208,40 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; end # Remove extension filename = splitext(filename)[1] - minvalue = minimum(skipmissing(A)) - maxvalue = maximum(skipmissing(A)) - _write_grd(filename, eltype(A), dims(A), Rasters.missingval(A), minvalue, maxvalue, name(A)) - # Data: gri file - open(filename * ".gri", write=true) do IO - write(IO, parent(correctedA)) + # Data: write a raw gri file from the array + mod = _mod(eltype, missingval_pair, scale, offset, coerce) + gri_filename = filename * ".gri" + isfile(gri_filename) && rm(gri_filename) + _write_gri(gri_filename, Val{source_eltype(mod)}(), mod, parent(correctedA)) + _write_grd(filename, eltype, dims(A), missingval_pair[1], name(A)) + + if write + _mmapgrd(filename, source_eltype(mod), size(A); write=true) do M + f(rebuild(A, _maybe_modify(M, mod))) + end end return filename * ".grd" end -function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) +function _write_gri(filename, v, ::NoMod, A::Array{T}) where T + open(filename; write=true, lock=false) do io + write(io, A) + end +end +function _write_gri(filename, v, mod, A::AbstractArray) + open(filename; write=true, lock=false) do io + # Avoid `Ref` allocations + ref = Ref{source_eltype(mod)}(_invertmod(v, first(A), mod)) + for x in A # We are modifying the source array so invert the modifications + ref[] = _invertmod(v, x, mod) + write(io, ref) + end + end +end + +function _write_grd(filename, T, dims, missingval, name) filename = splitext(filename)[1] x, y = map(DD.dims(dims, (X(), Y()))) do d @@ -204,12 +252,12 @@ function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) ncols, nrows = length(x), length(y) xmin, xmax = bounds(x) ymin, ymax = bounds(y) - proj = convert(String, convert(ProjString, crs(x))) + proj = isnothing(crs(x)) ? "" : convert(String, convert(ProjString, crs(x))) datatype = REVGRD_DATATYPE_TRANSLATION[T] nodatavalue = missingval # Metadata: grd file - open(filename * ".grd"; write=true) do IO + open(filename * ".grd"; write=true, lock=false) do IO write(IO, """ [general] @@ -228,8 +276,6 @@ function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) nodatavalue= $nodatavalue byteorder= little nbands= $nbands - minvalue= $minvalue - maxvalue= $maxvalue [description] layername= $name """ @@ -237,44 +283,31 @@ function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) end end -function create(filename, ::GRDsource, T::Type, dims::DD.DimTuple; - name="layer", metadata=nothing, missingval=nothing, lazy=true, +# Rasters methods +function _open(f, ::GRDsource, filename::AbstractString; + mod=NoMod(), + write=false, + kw... ) - # Remove extension - basename = splitext(filename)[1] - minvalue = maxvalue = zero(T) - sze = map(length, DD.dims(dims, (XDim, YDim, Band))) - - # Metadata: grd file - _write_grd(basename, T, dims, missingval, minvalue, maxvalue, name) - - # Data: gri file - open(basename * ".gri", write=true) do IO - write(IO, FillArrays.Zeros(sze)) - end - return Raster(filename; source=GRDsource(), lazy) -end - -# AbstractRasterStack methods - -# Custom `open` because the data and metadata objects are separate -# Here we _mmapgrd instead of `_open` -function Base.open(f::Function, A::FileArray{GRDsource}, args...; write=A.write) - _mmapgrd(mm -> f(RasterDiskArray{GRDsource}(mm, A.eachchunk, A.haschunks)), A; write) -end - -function _open(f, ::GRDsource, filename::AbstractString; write=false, name=nokw, group=nokw) isfile(filename) || _filenotfound_error(filename) attr = GRDdataset(filename) _mmapgrd(attr; write) do mm A = RasterDiskArray{GRDsource}(mm, DA.eachchunk(attr), DA.haschunks(attr), attr) - f(A) + A1 = _maybe_modify(A, mod) + f(A1) end end _open(f, ::GRDsource, attrib::GRDdataset; kw...) = f(attrib) +function _open(f, ::GRDsource, A::RasterDiskArray; + mod=NoMod(), + kw... +) + cleanreturn(f(_maybe_modify(A, mod))) +end haslayers(::GRDsource) = false + # Utils ######################################################################## function _mmapgrd(f, x::Union{FileArray,GRDdataset}; kw...) diff --git a/src/stack.jl b/src/stack.jl index 796793e73..8fe81df14 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -68,15 +68,24 @@ function DD.layers(s::AbstractRasterStack{<:Any,<:Any,<:Any,<:OpenStack{<:Any,Ke end function DD.rebuild( - s::AbstractRasterStack, data, dims=dims(s), refdims=refdims(s), - layerdims=DD.layerdims(s), metadata=metadata(s), layermetadata=DD.layermetadata(s), + s::AbstractRasterStack, + data, + dims=dims(s), + refdims=refdims(s), + layerdims=DD.layerdims(s), + metadata=metadata(s), + layermetadata=DD.layermetadata(s), missingval=missingval(s), ) DD.basetypeof(s)(data, dims, refdims, layerdims, metadata, layermetadata, missingval) end function DD.rebuild(s::AbstractRasterStack; - data=parent(s), dims=dims(s), refdims=refdims(s), layerdims=DD.layerdims(s), - metadata=metadata(s), layermetadata=DD.layermetadata(s), + data=parent(s), + dims=dims(s), + refdims=refdims(s), + layerdims=DD.layerdims(s), + metadata=metadata(s), + layermetadata=DD.layermetadata(s), missingval=missingval(s), ) DD.basetypeof(s)( @@ -158,19 +167,21 @@ Load a file path or a `NamedTuple` of paths as a `RasterStack`, or convert argum - `name`: Used as stack layer names when a `Tuple`, `Vector` or splat of `Raster` is passed in. Has no effect when `NameTuple` is used - the `NamedTuple` keys are the layer names. -$GROUP_KEYWORD +$GROUP_KEYWORD - `metadata`: A `Dict` or `DimensionalData.Metadata` object. -- `missingval`: a single value for all layers or a `NamedTuple` of - missingval for each layer. `nothing` specifies no missing value. -$CONSTRUCTOR_CRS_KEYWORD -$CONSTRUCTOR_MAPPEDCRS_KEYWORD +$MISSINGVAL_KEYWORD + For `RasterStack` a `NamedTuple` can also be passed if layers + should have different `missingval`. +$CONSTRUCTOR_CRS_KEYWORD +$CONSTRUCTOR_MAPPEDCRS_KEYWORD - `refdims`: `Tuple` of `Dimension` that the stack was sliced from. For when one or multiple filepaths are used: $DROPBAND_KEYWORD $LAZY_KEYWORD -$REPLACE_MISSING_KEYWORD +$RAW_KEYWORD +$SCALED_KEYWORD $SOURCE_KEYWORD For when a single `Raster` is used: @@ -216,16 +227,22 @@ function RasterStack( missingval=nokw, kw... ) - # Handle values that must be be `NamedTuple` + K = keys(data) + # Handle values that musbe be `NamedTuple` layermetadata = if layermetadata isa NamedTuple layermetadata - elseif layermetadata isa Union{NoKW,Nothing,NoMetadata} - map(_ -> NoMetadata(), layerdims) + elseif layermetadata isa Union{Nothing,NoKW,NoMetadata} + NamedTuple{K}(map(_ -> NoMetadata(), K)) else throw(ArgumentError("$layermetadata is not a valid input for `layermetadata`. Try a `NamedTuple` of `Dict`, `MetaData` or `NoMetadata`")) end metadata = isnokw(metadata) ? NoMetadata() : metadata missingval = _maybe_collapse_missingval(missingval) + layerdims = if layerdims isa NamedTuple + layerdims + else + NamedTuple{K}(ntuple(i -> layerdims[i], Val{length(K)}())) + end st = RasterStack( data, dims, refdims, layerdims, metadata, layermetadata, missingval ) @@ -233,7 +250,7 @@ function RasterStack( end # Convert Tuple/Array of array to NamedTuples using name/key function RasterStack(data::Tuple{Vararg{AbstractArray}}, dims::Tuple; - name::Union{Tuple,AbstractArray,NamedTuple,Nothing}=nokw, + name::Union{Tuple,AbstractArray,NamedTuple,Nothing}=nokw, kw... ) isnokw(name) && throw(ArgumentError("Pass a Tuple, Array or NamedTuple of names to the `name` keyword")) @@ -246,7 +263,7 @@ function RasterStack(data::NamedTuple{<:Any,<:Tuple{Vararg{AbstractArray}}}, dim ) if isnokw(layerdims) # TODO: make this more sophisticated and match dimension length to axes? - # We don't worry about Raster keywords because these rasters will be deconstructed + # We don't worry about Raster keywords because these rasters will be deconstructed # again later, and `kw` will define the RasterStack keywords layers = map(data) do A Raster(A, dims[1:ndims(A)]) @@ -274,7 +291,7 @@ function RasterStack(layers::NamedTuple{K,<:Tuple{Vararg{AbstractDimArray}}}; refdims::Tuple=(), missingval=map(missingval, _layers), metadata=NoMetadata(), - layermetadata=map(DD.metadata, _layers), + layermetadata::NamedTuple{K}=map(DD.metadata, _layers), layerdims::NamedTuple{K}=map(DD.basedims, _layers), kw... ) where K @@ -297,7 +314,7 @@ function RasterStack(table, dims::Tuple; layers = map(name) do k col = Tables.getcolumn(table, k) reshape(col, map(length, dims)) - end |> NamedTuple{name} + end |> NamedTuple{cleankeys(name)} end return RasterStack(layers, dims; kw...) end @@ -332,8 +349,8 @@ function RasterStack(s::DD.AbstractDimStack; data=parent(s), dims::Union{Tuple,NoKW}=dims(s), refdims::Tuple=refdims(s), - layerdims=DD.layerdims(s), metadata=metadata(s), + layerdims=DD.layerdims(s), layermetadata=DD.layermetadata(s), missingval=missingval(s), kw... @@ -348,21 +365,33 @@ function RasterStack( name=map(filekey, filenames), kw... ) - RasterStack(NamedTuple{cleankeys(Tuple(name))}(filenames); kw...) + RasterStack(NamedTuple{cleankeys(name)}(filenames); kw...) end function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; source=nokw, - missingval=nokw, metadata=nokw, resize=nokw, layermetadata::Union{NoKW,NamedTuple{K}}=nokw, layerdims::Union{NoKW,NamedTuple{K}}=nokw, + missingval=nokw, + replace_missing=nokw, + scaled=nokw, + raw=false, + verbose=true, kw... ) where K - missingval = missingval isa NamedTuple ? missingval : map(_ -> missingval, filenames) - layermetadata = layermetadata isa NamedTuple ? layermetadata : map(_ -> layermetadata, filenames) - layers = map(keys(filenames), values(filenames), values(missingval), values(layermetadata)) do name, fn, mv, md - Raster(fn; source=_sourcetrait(fn, source), name, missingval=mv, metadata=md, kw...) + _maybe_warn_replace_missing(replace_missing) + scaled, missingval = _raw_check(raw, scaled, missingval, verbose) + + layermissingval = collect(_stack_nt(filenames, missingval)) + fn = collect(filenames) + layermetadata = layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) + layerdims = layerdims isa NamedTuple ? collect(layerdims) : map(_ -> NoKW(), fn) + layers = map(K, fn, layermetadata, layerdims, layermissingval) do name, fn, md, d, mv + Raster(fn; + source=_sourcetrait(fn, source), + dims=d, name, metadata=md, missingval=mv, scaled, verbose, kw... + ) end return RasterStack(NamedTuple{K}(layers); resize, metadata) end @@ -370,12 +399,20 @@ end function RasterStack(filename::AbstractString; lazy::Bool=false, dropband::Bool=true, - replace_missing::Bool=false, + raw::Bool=false, source::Union{Symbol,Source,NoKW}=nokw, + missingval=nokw, name=nokw, - group=nokw, + group::Union{Symbol,AbstractString,NoKW}=nokw, + scaled::Union{Bool,NoKW}=nokw, + coerce=nokw, + verbose::Bool=true, + replace_missing=nokw, # deprecated kw... ) + _maybe_warn_replace_missing(replace_missing) + scaled, missingval = _raw_check(raw, scaled, missingval, verbose) + source = _sourcetrait(filename, source) st = if isdir(filename) && !(source isa Zarrsource) # Load as a whole directory @@ -383,19 +420,21 @@ function RasterStack(filename::AbstractString; length(filenames) > 0 || throw(ArgumentError("No files in directory $filename")) # Detect keys from names name = if isnokw(name) - all_shared = true stripped = lstrip.(x -> x in (" ", "_"), (x -> x[1:end]).(filenames)) Symbol.(replace.(first.(splitext.(stripped)), Ref(" " => "_"))) else name end - RasterStack(joinpath.(Ref(filename), filenames); lazy, replace_missing, dropband, group, kw...) + RasterStack(joinpath.(Ref(filename), filenames); + missingval, scaled, coerce, lazy, dropband, group, kw... + ) else # Load as a single file if haslayers(source) # With multiple named layers - l_st = _layer_stack(filename; source, name, lazy, group, replace_missing, kw...) - + l_st = _layer_stack(filename; + source, name, lazy, group, missingval, scaled, coerce, kw... + ) # Maybe split the stack into separate arrays to remove extra dims. if isnokw(name) l_st @@ -404,20 +443,15 @@ function RasterStack(filename::AbstractString; end else # With bands actings as layers - RasterStack(Raster(filename; source, lazy, replace_missing, dropband=false); kw...) + raster = Raster(filename; + source, lazy, missingval, scaled, coerce, dropband=false, + ) + RasterStack(raster; kw...) end end # Maybe drop the Band dimension - if dropband && hasdim(st, Band()) && size(st, Band()) == 1 - if lazy - return view(st, Band(1)) # TODO fix dropdims in DiskArrays - else - return dropdims(st; dims=Band()) - end - else - return st - end + return _maybe_drop_single_band(st, dropband, lazy) end # TODO test this properly @@ -478,12 +512,14 @@ function _layer_stack(filename; name=nokw, group=nokw, metadata=nokw, - layerdims=nokw, layermetadata=nokw, + layerdims=nokw, missingval=nokw, crs=nokw, mappedcrs=nokw, - replace_missing=false, + coerce=convert, + scaled=nokw, + checkmem=true, lazy=false, kw... ) @@ -493,29 +529,54 @@ function _layer_stack(filename; dimdict = _dimdict(ds, crs, mappedcrs) refdims = isnokw(refdims) || isnothing(refdims) ? () : refdims metadata = isnokw(metadata) ? _metadata(ds) : metadata - layerdims = isnokw(layerdims) ? _layerdims(ds; layers, dimdict) : layerdims - dims = _sort_by_layerdims(isnokw(dims) ? _dims(ds, dimdict) : dims, layerdims) - layermetadata = isnokw(layermetadata) ? _layermetadata(ds; layers) : layermetadata - missingval = isnokw(missingval) ? Rasters.missingval(ds) : missingval + layerdims_vec = isnokw(layerdims) ? _layerdims(ds; layers, dimdict) : layerdims + dims = _sort_by_layerdims(isnokw(dims) ? _dims(ds, dimdict) : dims, layerdims_vec) + layermetadata_vec = if isnokw(layermetadata) + _layermetadata(ds; layers) + else + layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) + end name = Tuple(map(Symbol, layers.names)) + NT = NamedTuple{name} + layer_mvs = map(Rasters.missingval, layers.vars, layermetadata_vec) + missingval_vec = if isnokw(missingval) + layer_mvs .=> missing + elseif missingval isa NamedTuple + keys(missingval) == name || throw(ArgumentError("`missingval` names $(keys(missingval)) do not match layer names $name")) + layer_mvs .=> collect(missingval) + elseif missingval === Rasters.missingval + layer_mvs + else + layer_mvs .=> (missingval,) # Wrap in case its not iterable + end::Vector + eltype_vec = map(eltype, layers.vars) + mod_vec = _stack_mods(eltype_vec, layermetadata_vec, missingval_vec; scaled, coerce) data = if lazy - # TODO replace_missing is currently always true for - # CommonDataModel FileStack. We should change this. - FileStack{typeof(source)}(ds, filename; name, group, vars=Tuple(layers.vars)) + vars = ntuple(i -> layers.vars[i], length(name)) + mods = ntuple(i -> mod_vec[i], length(name)) + FileStack{typeof(source)}(ds, filename; name, group, mods, vars) else - map(layers.vars) do v - x = Array(replace_missing ? _replace_missing(v, missingval) : v) + map(layers.vars, layermetadata_vec, mod_vec) do var, md, mod + modvar = _maybe_modify(var, mod) + checkmem && _checkobjmem(modvar) + x = Array(modvar) x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug - end |> NamedTuple{name} - end - if replace_missing - missingval = missing + end |> NT end - data, (; dims, refdims, layerdims=NamedTuple{name}(layerdims), metadata, layermetadata=NamedTuple{name}(layermetadata), missingval) + mv_outer = NT(map(_outer_missingval, mod_vec)) + return data, (; + dims, + refdims, + layerdims=NT(layerdims_vec), + metadata, + layermetadata=NT(layermetadata_vec), + missingval=mv_outer, + ) end return RasterStack(data; field_kw..., kw...) end + # Try to sort the dimensions by layer dimension into a sensible # order that applies without permutation, preferencing the layers # with most dimensions, and those that come first. diff --git a/src/utils.jl b/src/utils.jl index 3de723b03..41c11f609 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,27 +1,29 @@ +# File paths, urls and strings + filter_ext(path, ext::AbstractString) = filter(fn -> splitext(fn)[2] == ext, readdir(path; join=true)) filter_ext(path, exts::Union{Tuple,AbstractArray}) = filter(fn -> splitext(fn)[2] in exts, readdir(path; join=true)) filter_ext(path, ext::Nothing) = readdir(path; join=true) -cleankeys(name) = (_cleankey(name),) -function cleankeys(keys::Union{NamedTuple,Tuple,AbstractArray}) - Tuple(map(_cleankey, keys, ntuple(i -> i, length(keys)))) -end - -function _cleankey(name::Union{Symbol,AbstractString,Name,NoName}, i=1) - if name in (NoName(), Symbol(""), Name(Symbol(""))) - Symbol("layer$i") +_maybe_add_suffix(filename::Nothing, suffix) = nothing +_maybe_add_suffix(filename::Nothing, suffix::Union{Nothing,NoKW}) = nothing +_maybe_add_suffix(filename, suffix::Union{Nothing,NoKW}) = filename +function _maybe_add_suffix(filename, suffix) + base, ext = splitext(filename) + if string(suffix) == "" + filename else - Symbol(name) + return string(base, "_", suffix, ext) end end -nolookup_to_sampled(A) = rebuild(A; dims=nolookup_to_sampled(dims(A))) -nolookup_to_sampled(dims::DimTuple) = map(nolookup_to_sampled, dims) -nolookup_to_sampled(d::Dimension) = - lookup(d) isa NoLookup ? set(d, Sampled(; sampling=Points())) : d +# Modified from IsURL.jl, many thanks to @zlatanvasovic +const WINDOWSREGEX = r"^[a-zA-Z]:[\\]" +const URLREGEX = r"^[a-zA-Z][a-zA-Z\d+\-.]*:" + +_isurl(str::AbstractString) = !occursin(WINDOWSREGEX, str) && occursin(URLREGEX, str) function _maybe_use_type_missingval(A::AbstractRaster{T}, source::Source, missingval=nokw) where T if ismissing(Rasters.missingval(A)) @@ -34,16 +36,17 @@ function _maybe_use_type_missingval(A::AbstractRaster{T}, source::Source, missin end end -# Create a standardised Metadata object of source T, containing a `Dict{String,Any}` -_metadatadict(s::Source, p1::Pair, pairs::Pair...) = - _metadatadict(s, (p1, pairs...)) -_metadatadict(::S) where S<:Source = Metadata{S}(Dict{String,Any}()) -function _metadatadict(::S, pairs) where S<:Source - dict = Dict{String,Any}() - for (k, v) in pairs - dict[String(k)] = v +cleankeys(name) = (_cleankey(name),) +function cleankeys(keys::Union{NamedTuple,Tuple,AbstractArray}) + Tuple(map(_cleankey, keys, ntuple(i -> i, length(keys)))) +end + +function _cleankey(name::Union{Symbol,AbstractString,Name,NoName}, i=1) + if name in (NoName(), Symbol(""), Name(Symbol(""))) + Symbol("layer$i") + else + Symbol(name) end - return Metadata{S}(dict) end # We often need to convert the locus and the lookup in the same step, @@ -74,129 +77,158 @@ end # _convert_by_lookup(::Type{Projected}, dim) = shiftlocus(Center(), convertlookup(Projected, dim)) -_unwrap(::Val{X}) where X = X -_unwrap(x) = x +# Missing values -_missingval_or_missing(x) = _maybe_nothing_to_missing(missingval(x)) +_missingval_or_missing(x) = _maybe_to_missing(missingval(x)) -_maybe_nothing_to_missing(::Nothing) = missing -_maybe_nothing_to_missing(missingval) = missingval +_maybe_to_missing(::Union{Nothing,NoKW}) = missing +_maybe_to_missing(missingval) = missingval -maybe_eps(dims::DimTuple; kw...) = map(maybe_eps, dims; kw...) -maybe_eps(dim::Dimension; kw...) = maybe_eps(eltype(dim); kw...) -maybe_eps(x; kw...) = maybe_eps(typeof(x); kw...) -maybe_eps(::Type; kw...) = nothing -maybe_eps(T::Type{<:AbstractFloat}; kw...) = _default_eps(T; kw...) - -# These are pretty random defaults, but seem to work -_default_eps(T::Type{<:Float32}; grow=true) = grow ? eps(T) : 100eps(T) -_default_eps(T::Type{<:Float64}; grow=true) = grow ? eps(T) : 1000eps(T) -_default_eps(T::Type{<:Integer}) = T(1) -_default_eps(::Type) = nothing - -_writeable_missing(filename::Nothing, T) = missing -_writeable_missing(filename::AbstractString, T) = _writeable_missing(T) -function _writeable_missing(T) - missingval = _type_missingval(Missings.nonmissingtype(T)) - @info "`missingval` set to $missingval" +_writeable_missing(filename::Nothing, T; kw...) = missing +_writeable_missing(filename::AbstractString, T; kw...) = _writeable_missing(T; kw...) +_writeable_missing(::Type{Missing}; verbose=true) = _writeable_missing(UInt8; verbose=true) +function _writeable_missing(T; verbose=true) + missingval = _type_missingval(T) + verbose && @info "`missingval` set to $missingval on disk" return missingval end -# Map filename suffix over a stack -function mapargs(f, st::AbstractRasterStack, args...) - layers = map(values(st), args...) do A, mappedargs... - f(A, mappedargs...) - end - return DD.rebuild_from_arrays(st, Tuple(layers)) -end - -_without_mapped_crs(f, x) = _without_mapped_crs(f, x, mappedcrs(x)) -_without_mapped_crs(f, x, ::Nothing) = f(x) -function _without_mapped_crs(f, dims::DimTuple, mappedcrs::GeoFormat) - dims1 = setmappedcrs(dims, nothing) - x = f(dims1) - return if x isa DimTuple - setmappedcrs(x, mappedcrs) - else - x - end -end -function _without_mapped_crs(f, A::AbstractRaster, mappedcrs::GeoFormat) - A = setmappedcrs(A, nothing) - x = f(A) - return if x isa AbstractRaster - setmappedcrs(x, mappedcrs) - else - x - end -end -function _without_mapped_crs(f, st::AbstractRasterStack, mappedcrs::GeoFormat) - st1 = maplayers(A -> setmappedcrs(A, nothing), st) - x = f(st1) - return if x isa AbstractRasterStack - setmappedcrs(x, mappedcrs) +_type_missingval(::Type{T}) where T = _type_missingval1(Missings.nonmissingtype(T)) + +_type_missingval1(::Type{T}) where T<:Number = typemin(T) +_type_missingval1(::Type{T}) where T<:Unsigned = typemax(T) +_type_missingval1(::Type{T}) where T<:AbstractString = T("") + +_fix_missingval(::Type, ::Union{NoKW,Nothing}) = nothing +_fix_missingval(::AbstractArray, ::Nothing) = nothing +_fix_missingval(A::AbstractArray, ::NoKW) = _fix_missingval(A, Rasters.missingval(A)) +_fix_missingval(::AbstractArray{T}, missingval) where T = _fix_missingval(T, missingval) +function _fix_missingval(::Type{T}, missingval::M) where {T,M} + T1 = nonmissingtype(T) + if missingval isa T + missingval + elseif hasmethod(convert, Tuple{Type{T1},M}) && isreal(missingval) && + missingval <= typemax(T1) && missingval >= typemin(T1) + if T1 <: Integer && !isinteger(missingval) + nothing + else + convert(T, missingval) + end else - x + nothing end end -function _extent2dims(to; size=nothing, res=nothing, crs=nothing, kw...) - _extent2dims(to, size, res, crs) -end -function _extent2dims(to::Extents.Extent, size::Nothing, res::Nothing, crs) - isnothing(res) && throw(ArgumentError("Pass either `size` or `res` keywords or a `Tuple` of `Dimension`s for `to`.")) -end -function _extent2dims(to::Extents.Extent, size, res, crs) - isnothing(res) || _size_and_res_error() -end -function _extent2dims(to::Extents.Extent{K}, size::Nothing, res::Real, crs) where K - tuple_res = ntuple(_ -> res, length(K)) - _extent2dims(to, size, tuple_res, crs) -end -function _extent2dims(to::Extents.Extent{K}, size::Nothing, res, crs) where K - ranges = map(values(to), res) do bounds, r - start, stop_closed = bounds - stop_open = stop_closed + maybe_eps(stop_closed; grow=false) - length = ceil(Int, (stop_open - start) / r) - range(; start, step=r, length) - end - return _extent2dims(to, ranges, crs) -end -function _extent2dims(to::Extents.Extent{K}, size, res::Nothing, crs) where K - if size isa Int - size = ntuple(_ -> size, length(K)) + +# Extents + +function _extent2dims(to::Extents.Extent; + size=nokw, res=nokw, crs=nokw, mappedcrs=nokw, sampling=nokw, kw... +) + sampling = _match_to_extent(to, isnokw(sampling) ? Intervals(Start()) : sampling) + _extent2dims(to, size, res; crs, mappedcrs, sampling, kw...) +end +_extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res::Union{Nothing,NoKW}; kw...) = + throw(ArgumentError("Pass either `size` or `res` keywords or a `Tuple` of `Dimension`s for `to`.")) +_extent2dims(to::Extents.Extent, size, res; kw...) = _size_and_res_error() +function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res; + sampling::Tuple, closed=true, kw... +) + res = _match_to_extent(to, res) + ranges = map(values(to), res, sampling) do (start, stop), step, samp + @assert step >= zero(step) "only positive `res` are supported, got $step" + if samp isa Intervals + if locus(samp) isa End + reverse(range(; start=stop+step, step=-step, stop=start+step)) + else + r = range(; start, step, stop) + if locus(samp) isa Start + r + else # Center + r .+ step / 2 + end + end + else + range(; start, step, stop) + end end - ranges = map(values(to), size) do bounds, length - start, stop_closed = bounds - stop_open = stop_closed + maybe_eps(stop_closed; grow=false) - step = (stop_open - start) / length - range(; start, step, length) + return _extent2dims(to, ranges; sampling, kw...) +end +function _extent2dims(to::Extents.Extent, size, res::Union{Nothing,NoKW}; + sampling::Tuple, crs, mappedcrs, closed=true, kw... +) + size = _match_to_extent(to, size) + ranges = map(values(to), size, sampling) do (start, stop), length, samp + r1 = range(; start, stop, length) + if samp isa Points + r1 + else + # We need to buffer extent for a closed interval input so that e.g. + # The raster will actually contain points of the extent, as raster + # pixels are closed/open intervals not closed/closed. + # Its hard to add a very small amount to any fp number and have + # it propagate through to the final extent. But this setup seems to work. + # We use the step or r1 to offset the end point of r, the buffer with 10 float + # steps, which seems to be enough. But it's arbitrary and count be revised. + nfloatsteps = 10 + step = (stop - start) / length + if locus(samp) isa End + start_open = if (closed && start isa AbstractFloat) + prevfloat(start + step, nfloatsteps) + else + start + step + end + reverse(range(; start=stop, stop=start_open, length)) + else + stop_open = if closed && stop isa AbstractFloat + nextfloat(stop - step, nfloatsteps) + else + stop - step + end + r = range(; start, stop=stop_open, length) + if locus(samp) isa Start + r + else # Center + r .+ (step / 2) + end + end + end end - return _extent2dims(to, ranges, crs) + return _extent2dims(to, ranges; sampling, crs, mappedcrs) end -function _extent2dims(to::Extents.Extent{K}, ranges, crs) where K +function _extent2dims(::Extents.Extent{K}, ranges; + crs, mappedcrs, sampling::Tuple, kw... +) where K + crs = isnokw(crs) ? nothing : crs + mappedcrs = isnokw(mappedcrs) ? nothing : mappedcrs emptydims = map(name2dim, K) - lookups = map(ranges) do range - Projected(range; - order=ForwardOrdered(), - sampling=Intervals(Start()), - span=Regular(step(range)), - crs, - ) + lookups = map(emptydims, ranges, sampling) do d, range, s + order = Lookups.orderof(range) + span = Regular(step(range)) + if d isa SpatialDim && !isnothing(crs) + Projected(range; sampling=s, order, span, crs, mappedcrs) + else + Sampled(range; sampling=s, order, span) + end end d = map(rebuild, emptydims, lookups) return d end -function _as_intervals(ds::Tuple) - # Rasterization only makes sense on Sampled Intervals - interval_dims = map(dims(ds, DEFAULT_POINT_ORDER)) do d - l = parent(d) - rebuild(d, rebuild(l; sampling=Intervals(locus(l)))) +function _match_to_extent(::Extents.Extent{K}, x) where K + if x isa DimTuple + map(val, dims(x, map(name2dim, K))) + elseif x isa NamedTuple + values(x[K]) + elseif x isa Tuple + x + else + map(_ -> x, K) end - return setdims(ds, interval_dims) end +# Geometries + # get geometries from what may be a table with a geometrycolumn or an interable of geometries # if it has no geometry column and does not iterate valid geometries, error informatively function _get_geometries(data, ::Nothing) @@ -238,7 +270,7 @@ function _get_geometries(data, geometrycolumn::NTuple{<:Any, <:Symbol}) ismissing(r) && return missing end return row - end + end return points end function _check_geometries(geoms) @@ -251,61 +283,27 @@ function _check_geometries(geoms) end # to distinguish between objects returned by _get_geometries and other objects struct IterableOfGeometries end -_warn_disk() = @warn "Disk-based objects may be very slow here. User `read` first." - -_filenotfound_error(filename) = throw(ArgumentError("file \"$filename\" not found")) - -_progress(args...; kw...) = ProgressMeter.Progress(args...; color=:blue, barlen=50, kw...) - -# Function barrier for splatted vector broadcast -@noinline _do_broadcast!(f, x, args...) = broadcast!(f, x, args...) - -_size_and_res_error() = throw(ArgumentError("Both `size` and `res` keywords are passed, but only one can be used")) - -_no_crs_error() = throw(ArgumentError("The provided object does not have a CRS. Use `setcrs` to set one.")) - -_type_missingval(::Type{T}) where T = typemin(T) -_type_missingval(::Type{T}) where T<:Unsigned = typemax(T) -# Modified from IsURL.jl, many thanks to @zlatanvasovic -const WINDOWSREGEX = r"^[a-zA-Z]:[\\]" -const URLREGEX = r"^[a-zA-Z][a-zA-Z\d+\-.]*:" -_isurl(str::AbstractString) = !occursin(WINDOWSREGEX, str) && occursin(URLREGEX, str) - -# Run `f` threaded or not, w -function _run(f, range::OrdinalRange, threaded::Bool, progress::Bool, desc::String) - p = progress ? _progress(length(range); desc) : nothing - if threaded - Threads.@threads :static for i in range - f(i) - isnothing(p) || ProgressMeter.next!(p) - end - else - for i in range - f(i) - isnothing(p) || ProgressMeter.next!(p) - end - end -end +# Chunking # NoKW means true -@inline function _chunks_to_tuple(template, dims, chunks::Bool) +@inline function _chunks_to_tuple(template, dimorder, chunks::Bool) if chunks == true if template isa AbstractArray && DA.haschunks(template) == DA.Chunked() # Get chunks from the template DA.max_chunksize(DA.eachchunk(template)) else # Use defaults - _chunks_to_tuple(template, dims, (X(512), Y(512))) + _chunks_to_tuple(template, dimorder, (X(512), Y(512))) end else nothing end end @inline function _chunks_to_tuple(template, dimorder, chunks::NTuple{N,Integer}) where N - n = length(dimorder) - if n < N + n = length(dimorder) + if n < N throw(ArgumentError("Length $n tuple needed for `chunks`, got $N")) elseif n > N (chunks..., ntuple(_ -> 1, Val{n-N}())...) @@ -330,24 +328,59 @@ end @inline _chunks_to_tuple(template, dimorder, chunks::Nothing) = nothing @inline _chunks_to_tuple(template, dims, chunks::NoKW) = nothing - _checkregular(A::AbstractRange) = true function _checkregular(A::AbstractArray) step = stepof(A) for i in eachindex(A)[2:end] if !(A[i] - A[i-1] ≈ step) - return false + return false end end return true end -function _checkobjmem(obj) +# Constructor helpers + +function _raw_check(raw, scaled, missingval, verbose) + if raw + # Scaled is false if raw is true + scaled isa Bool && scaled && verbose && @warn "`scaled=true` set to `false` because of `raw=true`" + # Only missingval of `nothing` has a meaning with `raw=true`, + # it turns off missingval completely. Other msissingval values are + # ignored and a warning is thrown unless verbose=false + if !isnokwornothing(missingval) + verbose && @warn "`missingval=$missingval` target value is not used because of `raw=true`" + end + return false, isnothing(missingval) ? nothing : Rasters.missingval + else + # Otherwise scaled is true and missingval is unchanged + scaled = isnokw(scaled) ? true : scaled + return scaled, missingval + end +end + +function _maybe_drop_single_band(x, dropband::Bool, lazy::Bool) + dropband || return x + if hasdim(x, Band()) && size(x, Band()) < 2 + if lazy + return view(x, Band(1)) # TODO fix dropdims in DiskArrays + else + return dropdims(x; dims=Band()) + end + else + return x + end +end + + +# Memory + +function _checkobjmem(obj) f = bytes -> """ - required memory $(bytes) is greater than system memory $(Sys.free_memory()). + required memory $(bytes) is greater than system memory $(Sys.free_memory()). Use `lazy=true` if you are loading dataset, and only call `read` on a subset after `view`. """ - _checkobjmem(f, obj) + _checkobjmem(f, obj) end _checkobjmem(f, obj) = _checkmem(f, _sizeof(obj)) @@ -360,13 +393,131 @@ _sizeof(s::AbstractRasterSeries) = function _no_memory_error(f, bytes) msg = f(bytes) * """ - If you beleive this is not correct, pass the keyword `checkmem=false` or set `Rasters.checkmem!(false)` + If you beleive this is not correct, pass the keyword `checkmem=false` or set `Rasters.checkmem!(false)` and try again. These options may crash your system if the file is actually larger than memory. """ return error(msg) end +# Lookups + +function _as_intervals(ds::Tuple) + # Rasterization only makes sense on Sampled Intervals + interval_dims = map(dims(ds, DEFAULT_POINT_ORDER)) do d + l = parent(d) + rebuild(d, rebuild(l; sampling=Intervals(locus(l)))) + end + return setdims(ds, interval_dims) +end + +nolookup_to_sampled(A) = rebuild(A; dims=nolookup_to_sampled(dims(A))) +nolookup_to_sampled(dims::DimTuple) = map(nolookup_to_sampled, dims) +nolookup_to_sampled(d::Dimension) = + lookup(d) isa NoLookup ? set(d, Sampled(; sampling=Points())) : d + + +# Metadata + +# Create a standardised Metadata object of source T, containing a `Dict{String,Any}` +_metadatadict(s::Source, p1::Pair, pairs::Pair...) = + _metadatadict(s, (p1, pairs...)) +_metadatadict(::S) where S<:Source = Metadata{S}(Dict{String,Any}()) +function _metadatadict(::S, pairs) where S<:Source + dict = Dict{String,Any}() + for (k, v) in pairs + dict[String(k)] = v + end + return Metadata{S}(dict) +end + + +# Other + +_progress(args...; kw...) = ProgressMeter.Progress(args...; color=:blue, barlen=50, kw...) + +# Function barrier for splatted vector broadcast +@noinline _do_broadcast!(f, x, args...) = broadcast!(f, x, args...) + +# Run `f` threaded or not, w +function _run(f, range::OrdinalRange, threaded::Bool, progress::Bool, desc::String) + p = progress ? _progress(length(range); desc) : nothing + if threaded + Threads.@threads :static for i in range + f(i) + isnothing(p) || ProgressMeter.next!(p) + end + else + for i in range + f(i) + isnothing(p) || ProgressMeter.next!(p) + end + end +end + +_unwrap(::Val{X}) where X = X +_unwrap(x) = x + +# Map filename suffix over a stack +function mapargs(f, st::AbstractRasterStack, args...; kw...) + layers = map(values(st), args...) do A, mappedargs... + f(A, mappedargs...; kw...) + end + return DD.rebuild_from_arrays(st, Tuple(layers)) +end + +_without_mapped_crs(f, x) = _without_mapped_crs(f, x, mappedcrs(x)) +_without_mapped_crs(f, x, ::Nothing) = f(x) +function _without_mapped_crs(f, dims::DimTuple, mappedcrs::GeoFormat) + dims1 = setmappedcrs(dims, nothing) + x = f(dims1) + if x isa DimTuple + x = setmappedcrs(x, mappedcrs) + end + return x +end +function _without_mapped_crs(f, A::AbstractRaster, mappedcrs::GeoFormat) + A = setmappedcrs(A, nothing) + x = f(A) + return if x isa AbstractRaster + setmappedcrs(x, mappedcrs) + else + x + end +end +function _without_mapped_crs(f, st::AbstractRasterStack, mappedcrs::GeoFormat) + st1 = maplayers(A -> setmappedcrs(A, nothing), st) + x = f(st1) + return if x isa AbstractRasterStack + setmappedcrs(x, mappedcrs) + else + x + end +end + + +# Warnings and erros + +_maybe_warn_replace_missing(replace_missing::NoKW) = nothing +function _maybe_warn_replace_missing(replace_missing) + @warn """ + `replace_missing` keyword no longer used. Rasters now automatically replaces `missingval` with `missing`. + Set `missingval=Rasters.missingval`, to keep the internal missing value, or to replace with some value besides + `missing` use e.g. `missingval=Rasters.missingval => NaN` for `NaN`. + """ +end + +@noinline _warn_disk() = @warn "Disk-based objects may be very slow here. User `read` first." + +_filenotfound_error(filename) = throw(ArgumentError("file \"$filename\" not found")) + +_size_and_res_error() = throw(ArgumentError("Both `size` and `res` keywords are passed, but only one can be used")) + +_no_crs_error() = throw(ArgumentError("The provided object does not have a CRS. Use `setcrs` to set one.")) + + +# Rows for extraction etc + # _rowtype returns the complete NamedTuple type for a point row # This code is entirely for types stability and performance. # It is used in extract and Rasters.sample diff --git a/src/write.jl b/src/write.jl index debc7e697..0ad84007b 100644 --- a/src/write.jl +++ b/src/write.jl @@ -1,19 +1,4 @@ -const CHUNKS_KEYWORD = """ -- `chunks`: a `NTuple{N,Int}` specifying the chunk size for each dimension. - To specify only specific dimensions, a Tuple of `Dimension` wrapping `Int` - or a `NamedTuple` of `Int` can be used. Other dimensions will have a chunk - size of `1`. `true` can be used to mean: use the original - chunk size of the lazy `Raster` being written or X and Y of 256 by 256. - `false` means don't use chunks at all. -""" - -const MISSINGVAL_KEYWORD = """ -- `missingval`: set the missing value (i.e. FillValue / nodataval) of the written raster, - as Julias `missing` cannot be stored. If not passed in, `missingval` will be detected - from metadata or a default will be chosen. -""" - const SOURCE_WRITE_DOCSTRING = """ Other keyword arguments are passed to the `write` method for the backend. @@ -33,12 +18,14 @@ Other keyword arguments are passed to the `write` method for the backend. ## GDAL Keywords $FORCE_KEYWORD -- `driver`: A GDAL driver name `String` or a GDAL driver retrieved via `ArchGDAL.getdriver(drivername)`. +- `driver`: A GDAL driver name `String` or a GDAL driver retrieved via `ArchGDAL.getdriver(drivername)`. By default `driver` is guessed from the filename extension. -- `options::Dict{String,String}`: A dictionary containing the dataset creation options passed to the driver. - For example: `Dict("COMPRESS" => "DEFLATE")`. -Valid `options` for each specific `driver` can be found at: https://gdal.org/drivers/raster/index.html +- `options::Dict{String,String}`: A dictionary containing the dataset creation options passed to the driver. + For example: `Dict("COMPRESS" => "DEFLATE")`. + +Valid `driver` names and the `options` for each can be found at: +[https://gdal.org/drivers/raster/index.html](https://gdal.org/drivers/raster/index.html) ## Source comments @@ -50,7 +37,7 @@ Returns the base of `filename` with a `.grd` extension. ### GDAL (tiff, and everything else) -Used if you `write` a `Raster` with a `filename` extension that no other backend can write. +Used if you `write` a `Raster` with a `filename` extension that no other backend can write. GDAL is the fallback, and writes a lot of file types, but is not guaranteed to work. """ @@ -64,18 +51,20 @@ file extension or using the `source` keyword. $CHUNKS_KEYWORD $FORCE_KEYWORD -$MISSINGVAL_KEYWORD +$WRITE_MISSINGVAL_KEYWORD $SOURCE_KEYWORD $SOURCE_WRITE_DOCSTRING Returns `filename`. """ -function Base.write( filename::AbstractString, A::AbstractRaster; - source=_sourcetrait(filename), +function Base.write(filename::AbstractString, A::AbstractRaster; + source=_sourcetrait(filename), + missingval=nokw, kw... ) - write(filename, _sourcetrait(source), A; kw...) + missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval + write(filename, _sourcetrait(source), A; missingval, kw...) end Base.write(A::AbstractRaster; kw...) = write(filename(A), A; kw...) # Fallback @@ -83,7 +72,7 @@ function Base.write( filename::AbstractString, source::Source, A::Union{AbstractRaster,AbstractRasterStack}; kw... ) missing_package = SOURCE2PACKAGENAME[source] - error("Missing package extension for $source. Run `using $missing_package` before useing `write` for this file.") + error("Missing package extension for $source. Run `using $missing_package` before using `write` for this file extension.") end """ @@ -113,11 +102,13 @@ function Base.write(path::AbstractString, s::AbstractRasterStack; ext=nothing, source=_sourcetrait(path, ext), verbose=true, + missingval=nokw, kw... ) source = _sourcetrait(source) + missingval = _stack_missingvals(s, missingval) if haslayers(source) - write(path, source, s; kw...) + write(path, source, s; missingval, kw...) else # Otherwise write separate files for each layer if isnothing(ext) @@ -136,13 +127,38 @@ function Base.write(path::AbstractString, s::AbstractRasterStack; if verbose @warn string("Cannot write complete stacks to \"", ext, "\", writing layers as individual files") end - map(keys(s), suffix1) do key, suf + map(keys(s), suffix1, missingval) do key, suf, mv fn = string(base, suf, ext) - write(fn, source, s[key]; kw...) + write(fn, source, s[key]; missingval=mv, kw...) end |> NamedTuple{keys(s)} end end +_stack_missingvals(::RasterStack{<:Any,T}, x) where T = _stack_missingvals(T, x) +function _stack_missingvals(::Type{T}, missingval::NamedTuple{K}) where {K,T<:NamedTuple{K}} + map(_types(T), values(missingval)) do t, mv + ismissing(mv) ? _type_missingval(t) : mv + end |> NamedTuple{K} +end +_stack_missingvals(::Type{T}, missingval::NamedTuple{K1}) where {K1,T<:NamedTuple{K2}} where K2 = + throw(ArgumentError("stack keys $K1 do not match misssingval keys $K2")) +_stack_missingvals(::Type{T}, missingval::Missing) where T<:NamedTuple{K} where K = + NamedTuple{K}(map(_type_missingval, _types(T))) +_stack_missingvals(::Type{T}, missingval) where T = + _stack_nt(T, missingval) + +_stack_nt(::NamedTuple{K}, x) where K = NamedTuple{K}(map(_ -> x, K)) +_stack_nt(::RasterStack{<:Any,T}, x) where T = _stack_nt(T, x) +_stack_nt(::Type{T}, x::NamedTuple{K}) where {K,T<:NamedTuple{K}} = x +_stack_nt(::Type{T}, x::NamedTuple{K1}) where {K1,T<:NamedTuple{K2}} where K2 = + throw(ArgumentError("stack keys $K1 do not match misssingval keys $K2")) +_stack_nt(::Type{T}, x) where T<:NamedTuple{K} where K = + NamedTuple{K}(map(_ -> x, K)) + +@generated function _types(::Type{<:NamedTuple{K,T}}) where {K,T} + Tuple(T.parameters) +end + """ Base.write(filepath::AbstractString, s::AbstractRasterSeries; kw...) @@ -191,15 +207,18 @@ function Base.write(path::AbstractString, A::AbstractRasterSeries; written_paths end end +Base.write(f::Function, args...; kw...) = write(args...; f, kw...) # Trait for source data that has stack layers haslayers(T) = false # This is used in source `write` methods +check_can_write(filename::Union{Nothing,NoKW}, force) = true function check_can_write(filename, force) if !check_can_write(Bool, filename, force) throw(ArgumentError("filename already exists at $filename. use the keyword `force=true` to write anyway")) end return true end +check_can_write(::Type{Bool}, filename::Union{Nothing,NoKW}, force) = true check_can_write(::Type{Bool}, filename, force) = (force || (!isfile(filename) && !isdir(filename))) diff --git a/test/array.jl b/test/array.jl index e77167091..fbd52580d 100644 --- a/test/array.jl +++ b/test/array.jl @@ -1,6 +1,7 @@ using Rasters, Test, Dates, DiskArrays using Rasters.Lookups, Rasters.Dimensions using Rasters: isdisk, ismem, filename +using ArchGDAL data1 = cumsum(cumsum(ones(10, 11); dims=1); dims=2) data2 = 2cumsum(cumsum(ones(10, 11, 1); dims=1); dims=2) @@ -82,7 +83,7 @@ end @testset "collect and Array" begin - @test collect(ga1) isa Array + @test collect(ga1) isa Array{Float64,2} @test collect(ga1) == data1 @test Array(ga1) isa Array{Float64,2} @test Array(ga1) == data1 diff --git a/test/create.jl b/test/create.jl new file mode 100644 index 000000000..e76579c04 --- /dev/null +++ b/test/create.jl @@ -0,0 +1,220 @@ +using Rasters, Test, Dates, DiskArrays, Extents, ArchGDAL, NCDatasets +using Rasters.Lookups, Rasters.Dimensions +using Rasters: isdisk, ismem, filename + +@testset "create Raster" begin + rast = Rasters.create(Int32, Extents.Extent(X=(0, 10), Y=(0, 5)); + size=(1024, 1024), + crs=EPSG(4326), + chunks=(X=128, Y=128), + force=true, + name=:testname, + fill=Int32(2), + ) do A + A .*= 3 + end + @test all(rast .=== Int32(6)) + @test crs(rast) == EPSG(4326) + @test size(rast) == (1024, 1024) + @test Rasters.name(rast) == :testname + @test missingval(rast) === nothing + @test ispoints(rast) + + ext = Extents.Extent(X=(0, 10), Y=(0, 5), Ti=(DateTime(2001), DateTime(2002))) + rast = @test_nowarn Rasters.create(Float64, ext; + res=(X=0.2, Y=0.1, Ti=Month(1)), + crs=EPSG(4326), + force=true, + sampling=Intervals(Start()), + name=:testname, + missingval=missing, + reverse_y=false, + fill=2.0, + ) do A + A .*= 3 + end + @test all(rast .=== 6.0) + @test crs(rast) == EPSG(4326) + # We need closed/open extents to fix this + @test_broken extent(rast) == ext + @test_broken size(rast) == (50, 50, 12) + @test Rasters.name(rast) == :testname + @test missingval(rast) === missing + @test isintervals(rast) + @test map(step, lookup(rast)) == (0.2, 0.1, Month(1)) + + D = (Ti(DateTime(2000):Month(1):DateTime(2000, 12); sampling=Intervals(Start())), X(0.0:0.01:10.0), Y(0.0:0.01:10)) + rast = Rasters.create(Int32, D; fill=1, missingval=missing, crs=EPSG(4326), name=:testname) + map(length, Rasters.dims(rast)) + @test crs(rast) == EPSG(4326) + @test size(rast) == (12, 1001, 1001) + @test Rasters.name(rast) == :testname + @test missingval(rast) === missing + @test isintervals(rast, Ti) + @test ispoints(rast, (X, Y)) + @test map(step, lookup(rast)) == (Month(1), 0.01, 0.01) + @test all(x -> x === Int32(1), rast) + + rast1 = Rasters.create(rast) + @test dims(rast1) == dims(rast) + @test eltype(rast1) == eltype(rast) +end + +@testset "create RasterStack" begin + st = Rasters.create((a=Int32, b=Float64, c=Bool), Extents.Extent(X=(0, 10), Y=(0, 5)); + size=(X=1024, Y=1024), + sampling=(X=Points(), Y=Intervals()), + crs=EPSG(4326), + force=true, + verbose=false, + missingval=(a=Int32(-9999), b=Float64(-9999), c=false), + fill=(a=Int32(-9999), b=0, c=false), + ) do st + st.c .= true + end + @test crs(st) == EPSG(4326) + @test size(st) == (1024, 1024) + @test Rasters.name(st) == (:a, :b, :c) + @test eltype(st) === @NamedTuple{a::Int32,b::Float64, c::Bool} + @test missingval(st) === (a=Int32(-9999), b=-9999.0, c=false) + @test ispoints(st, X) + @test isintervals(st, Y) + @test all(x -> x === Int32(-9999), st.a) + @test all(x -> x === 0.0, st.b) + @test all(x -> x === true, st.c) + + st2 = Rasters.create((a=UInt8, b=Float32), st; + layerdims=(a=(X(), Y()), b=(Y(),)), + missingval=(a=UInt8(0), b=1.0f0) + ) + @test basedims(st2.a) == (X(), Y()) + @test basedims(st2.b) == (Y(),) + @test eltype(st2) === @NamedTuple{a::UInt8, b::Float32} + @test missingval(st2) === (a=UInt8(0), b=1.0f0) + + @testset "from template with new dims" begin + st1 = Rasters.create(st; + layerdims=(a=(X, Y), b=(Y,), c=(X,)), + ) + @test crs(st1) == EPSG(4326) + @test size(st1) == (1024, 1024) + @test Rasters.name(st1) == (:a, :b, :c) + @test missingval(st1) === (a=Int32(-9999), b=-9999.0, c=false) + @test ispoints(st1, X) + @test isintervals(st1, Y) + @test basedims(st1.a) == (X(), Y()) + @test basedims(st1.b) == (Y(),) + @test basedims(st1.c) == (X(),) + end + + @testset "from template with new layers" begin + st1 = Rasters.create((c=UInt8, d=Int16), st; + missingval=(c=0x00, d=Int16(1)), + ) + @test crs(st1) == EPSG(4326) + @test size(st1) == (1024, 1024) + @test Rasters.name(st1) == (:c, :d) + @test eltype(st1) == @NamedTuple{c::UInt8,d::Int16} + @test missingval(st1) === (c=0x00, d=Int16(1)) + end + + @testset "from template with new dims and layers" begin + st1 = Rasters.create((c=UInt8, d=Int16), st; + layerdims=(c=(X, Y), d=(Y,)), + missingval=(c=UInt8(0), d=Int16(1)), + ) + @test crs(st1) == EPSG(4326) + @test size(st1) == (1024, 1024) + @test Rasters.name(st1) == (:c, :d) + @test missingval(st1) === (c=0x00, d=Int16(1)) + end +end + +ext = ".nc" +for ext in (".nc", ".tif", ".grd") + @testset "create $ext" begin + fn = tempname() * ext + created = Rasters.create(fn, UInt8, (X(1:10), Y(1:10)); + missingval=0xff, + fill=0x01, + force=true + ) + @test all(Raster(fn) .=== 0x01) + @test missingval(created) === 0xff + + fn = tempname() * ext + if ext == ".grd" + created = Rasters.create(fn, Int16, (X(1:10), Y(1:10)); + missingval=typemax(Int16), + force=true, + ); + open(created; write=true) do O + O .= 2 + nothing + end + @test all(Raster(fn) .=== Int16(2)) + @test missingval(Raster(fn; missingval)) === typemax(Int16) + else + @time created = Rasters.create(fn, Int16, (X(1:10), Y(1:10)); + missingval=typemax(Int16), + scale=0.1, + offset=5.0, + fill=Int16(1), + force=true, + ) do C + C .*= 3 + end + @test all(Raster(fn) .=== 3.0) + @test all(Raster(fn; scaled=false) .== Int16(-20)) + @test missingval(Raster(fn; missingval, scaled=false)) === typemax(Int16) + end + end +end + +@testset "create .nc stack" begin + created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); + missingval=(a=0xff, b=typemax(Float32)), + fill=(a=0x01, b=1.0f0), + layerdims=(a=(X,), b=(X, Y)), + force=true, + ) + @test missingval(created) == (a=0xff, b=typemax(Float32)) + @test size(created.a) == (10,) + @test size(created.b) == (10, 10) + @test all(created.a .=== 0x01) + @test all(created.b .=== 1.0f0) + st = RasterStack("created.nc"; missingval) + @test missingval(st) == (a=0xff, b=typemax(Float32)) + + created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); + missingval=(a=0xff, b=typemax(Float32)) => missing, + fill=(a=0x01, b=1.0f0), + layerdims=(a=(X,), b=(X, Y)), + force=true, + ) + + @test missingval(created) === missing + @test size(created.a) == (10,) + @test size(created.b) == (10, 10) + @test all(created.a .=== 0x01) + @test all(created.b .=== 1.0f0) + st = RasterStack("created.nc"; missingval) + @test missingval(st) == (a=0xff, b=typemax(Float32)) + + @testset "with a function" begin + created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); + missingval=(a=0xff, b=typemax(Float32)), + fill=(a=0x01, b=1.0f0), + layerdims=(a=(X,), b=(X, Y)), + force=true, + ) do st + map(layers(st)) do A + A .*= 2 + end + end + @test all(read(created.a) .=== 0x02) + @test all(read(created.b) .=== 2.0f0) + end +end + + diff --git a/test/methods.jl b/test/methods.jl index 81823e524..c2e15124f 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -55,9 +55,10 @@ gaMi = replace_missing(ga) @test all(map(values(replace_missing(st, NaN32)), (a=[NaN32 7.0f0; 2.0f0 NaN32], b=[1.0 0.4; 2.0 NaN])) do x, y all(x .=== y) end) - dNaN = replace_missing(ga, NaN32; filename="test.tif") + testfile = tempname() * ".tif" + dNaN = replace_missing(ga, NaN32; filename=testfile) + read(dNaN) @test all(isequal.(dNaN, [NaN32 7.0f0; 2.0f0 NaN32])) - rm("test.tif") stNaN = replace_missing(st, NaN32; filename="teststack.tif") @test all(maplayers(stNaN[Band(1)], (a=[NaN32 7.0f0; 2.0f0 NaN32], b=[1.0 0.4; 2.0 NaN])) do x, y all(x .=== y) @@ -89,16 +90,51 @@ end @test all(boolmask(se2, alllayers=false) .=== [true true; true false]) @test dims(boolmask(ga)) === dims(ga) x = boolmask(polygon; res=1.0, boundary=:touches) - @test x == trues(X(Projected(-20:1.0:0.0; sampling=Intervals(Start()), crs=nothing)), Y(Projected(10.0:1.0:30.0; sampling=Intervals(Start()), crs=nothing))) + @test x == trues(X(Projected(-20:1.0:0.0; sampling=Intervals(Start()), crs=nothing)), + Y(Projected(10.0:1.0:30.0; sampling=Intervals(Start()), crs=nothing))) @test all(x .!= boolmask(polygon; res=1.0, invert=true, boundary=:touches)) @test parent(x) isa BitMatrix # With a :geometry axis - x = boolmask([polygon, polygon]; collapse=false, res=1.0, boundary=:touches) - @test all(x .!= boolmask([polygon, polygon]; collapse=false, res=1.0, invert=true, boundary=:touches)) - @test eltype(x) == Bool - @test size(x) == (21, 21, 2) - @test sum(x) == 882 - @test parent(x) isa BitArray{3} + x1 = boolmask([polygon, polygon]; collapse=false, res=1.0, boundary=:touches) + x2 = boolmask([polygon, polygon]; collapse=false, res=1.0, boundary=:touches, sampling=Intervals(Center())) + x3 = boolmask([polygon, polygon]; collapse=false, res=1.0, boundary=:touches, sampling=Intervals(End())) + @test extent(x1) == extent(x2) == extent(x3) == Extent(X = (-20.0, 1.0), Y = (10.0, 31.0), geometry = (1, 2)) + x4 = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches) + x5 = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches, sampling=Intervals(Center())) + x6 = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches, sampling=Intervals(End())) + xs = (x1, x2, x3, x4, x5, x6) + @test all(x1 .!= boolmask([polygon, polygon]; collapse=false, res=1.0, invert=true, boundary=:touches)) + @test sampling(x1, X) isa Intervals{Start} + @test sampling(x2, X) isa Intervals{Center} + @test sampling(x3, X) isa Intervals{End} + @test sampling(x4, X) isa Intervals{Start} + @test sampling(x5, X) isa Intervals{Center} + @test sampling(x6, X) isa Intervals{End} + for x in xs + @test eltype(x1) == Bool + @test size(x) == (21, 21, 2) + @test sum(x) == 882 + @test parent(x) isa BitArray{3} + @test eltype(x) == Bool + end + @testset "size adds nextfloat" begin + s = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches) + bounds(s, X)[1] == -20.0 + bounds(s, X)[2] > 0.0 + bounds(s, Y)[1] == 10.0 + bounds(s, Y)[2] > 30.0 + c = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches, sampling=Intervals(Center())) + bounds(c, X)[1] == -20.0 + bounds(c, X)[2] > 0.0 + bounds(c, Y)[1] == 10.0 + bounds(c, Y)[2] > 30.0 + e = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches, sampling=Intervals(End())) + bounds(e, X)[1] < -20.0 + bounds(e, X)[2] == 0.0 + bounds(e, Y)[1] < 10.0 + bounds(e, Y)[2] == 30.0 + end + x = boolmask([polygon, polygon]; collapse=true, res=1.0, boundary=:touches) @test all(x .!= boolmask([polygon, polygon]; collapse=true, res=1.0, invert=true, boundary=:touches)) @test size(x) == (21, 21) @@ -181,9 +217,9 @@ end ga4 = replace_missing(ga1; missingval=-9999) mask!(ga4; with=ga, invert=true) @test all(ga4 .=== [-9999 -9999; -9999 3]) - dmask = mask(ga3; with=ga, filename="mask.tif") + maskfile = tempname() * ".tif" + dmask = mask(ga3; with=ga, filename=maskfile) @test Rasters.isdisk(dmask) - rm("mask.tif") stmask = mask(replace_missing(st, NaN); with=ga, filename="mask.tif") @test Rasters.isdisk(stmask) rm("mask_a.tif") @@ -228,7 +264,7 @@ end end end -@testset "mask_replace_missing" begin +@testset "mask" begin # Floating point rasters a = Raster([1.0 0.0; 1.0 1.0], dims=(X, Y), missingval=0.0) b = Raster([1.0 1.0; 1.0 0.0], dims=(X, Y), missingval=0.0) @@ -357,7 +393,8 @@ end @test_throws ArgumentError classify(ga1, [1, 2, 3]) end -@testset "points" begin dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2)) +@testset "points" begin + dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2)) rast = Raster([1 2; 3 4], dimz; name=:test) rast2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=5) rast_m = Raster([1 2; 3 missing], dimz; name=:test) @@ -590,7 +627,8 @@ end extended = extend(cropped, ga)[1] extended_r = extend(cropped_r; to=ga_r) extended1 = extend(extend(cropped; to=dims(ga, X)); to=dims(ga, Y)) - extended_d = extend(cropped; to=ga, filename="extended.tif") + filename = tempname() * ".tif" + extended_d = extend(cropped; to=ga, filename) @test map(identicalelements, maybe_layers.((extended, extended1, replace_missing(extended_d), ga))...) |> all @test map(identicalelements, maybe_layers.((extended_r, ga_r))...) |> all @test all(map(==, lookup(extended_d), lookup(extended))) @@ -665,7 +703,7 @@ end [missing 0.2 0.1; 1.2 1.1 0.3; 1.4 1.3 missing] - ) + ) # 3 dimensions A1 = Raster(ones(2, 2, 2), (X(2.0:-1.0:1.0), Y(5.0:1.0:6.0), Ti(DateTime(2001):Year(1):DateTime(2002)))) diff --git a/test/rasterize.jl b/test/rasterize.jl index 6d91f38f8..105f37ce6 100644 --- a/test/rasterize.jl +++ b/test/rasterize.jl @@ -277,8 +277,8 @@ end size=(250, 250), fill=UInt8(1), missingval=UInt8(0), ); # using Plots - # heatmap(parent(parent(rasters_raster))) - # heatmap(reverse(gdal_raster[:, :, 1]; dims=2)) + # Plots.heatmap(parent(parent(rasters_raster))) + # Plots.heatmap(reverse(gdal_raster[:, :, 1]; dims=2)) # Same results as GDAL @test sum(gdal_raster) == sum(rasters_raster) @test reverse(gdal_raster[:, :, 1]; dims=2) == rasters_raster @@ -289,11 +289,15 @@ end rasters_touches_raster = rasterize(last, shphandle.shapes; size=(250, 250), fill=UInt64(1), missingval=UInt64(0), boundary=:touches ) + # Plots.heatmap(reverse(gdal_touches_raster[:, :, 1]) + # Plots.heatmap(parent(rasters_touches_raster)) + # missingval(rasters_touches_raster) # Not quite the same answer as GDAL @test sum(gdal_touches_raster) == sum(rasters_touches_raster) @test reverse(gdal_touches_raster[:, :, 1], dims=2) == rasters_touches_raster # Test that its knwon to be off by 2: - @test count(reverse(gdal_touches_raster[:, :, 1], dims=2) .== rasters_touches_raster) == length(rasters_touches_raster) + @test count(reverse(gdal_touches_raster[:, :, 1], dims=2) .== rasters_touches_raster) == + length(rasters_touches_raster) # Two pixels differ in the angled line, top right # using Plots # Plots.heatmap(reverse(gdal_touches_raster[:, :, 1], dims=2)) @@ -396,12 +400,15 @@ end @test sum(skipmissing(r)) == (12 * 1 + 8 * 2 + 8 * 3 + 12 * 4) + (4 * 1.5 + 4 * 2.5 + 4 * 3.5) end - prod_r = rasterize(prod, polygons; res=5, fill=1:4, boundary=:center, filename="test.tif", threaded) + filename = tempname() * ".tif" + prod_r = rasterize(prod, polygons; res=5, fill=1:4, boundary=:center, filename, threaded) prod_r = rasterize(prod, polygons; res=5, fill=1:4, boundary=:center, threaded) @test sum(skipmissing(prod_r)) == (12 * 1 + 8 * 2 + 8 * 3 + 12 * 4) + (4 * 1 * 2 + 4 * 2 * 3 + 4 * 3 * 4) - prod_st = rasterize(prod, polygons; res=5, fill=(a=1:4, b=4:-1:1), missingval=missing, boundary=:center, threaded) + prod_st = rasterize(prod, polygons; + res=5, fill=(a=1:4, b=4:-1:1), missingval=missing, boundary=:center, threaded + ) @test_broken all(prod_st.a .=== rot180(parent(prod_st.b))) @test all(prod_r .=== prod_st.a) @@ -428,8 +435,8 @@ end # The outlines of these plots should exactly mactch, # with three values of 2 on the diagonal # using Plots - # Plots.plot(reduced_raster; clims=(0, 3)) - # Plots.plot!(polygons; opacity=0.3, fillcolor=:black) + # Plots.plot(reduced_raster_sum_touches; clims=(0, 3)) + # Plots.plot(polygons; opacity=0.3, fillcolor=:black) reduced_center = rasterize(sum, polygons; res=5, fill=1, boundary=:center, threaded) reduced_touches = rasterize(sum, polygons; res=5, fill=1, boundary=:touches, threaded) reduced_inside = rasterize(sum, polygons; res=5, fill=1, boundary=:inside, threaded) diff --git a/test/resample.jl b/test/resample.jl index 27067984e..d845f4b16 100644 --- a/test/resample.jl +++ b/test/resample.jl @@ -24,26 +24,8 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) end end - # Resample cea.tif using resample - cea = Raster(raster_path; missingval=0x00, name = :cea) - raster_output = resample(cea; res=output_res, crs=output_crs, method) - disk_output = resample(cea; res=output_res, crs=output_crs, method, filename="resample.tif") - - cea_permuted = permutedims(Raster(raster_path), (Y, X)) - permuted_output = resample(cea_permuted, output_res; crs=output_crs, method) - - # Compare ArchGDAL, resample and permuted resample - @test AG_output == - raster_output[Band(1)] == - disk_output[Band(1)] == - permutedims(permuted_output, (X, Y)) - @test abs(step(dims(raster_output, Y))) ≈ - abs(step(dims(raster_output, X))) ≈ - abs(step(dims(disk_output, X))) ≈ - abs(step(dims(permuted_output, X))) ≈ output_res - @test name(cea) == name(raster_output) - - rm("resample.tif") + cea = Raster(raster_path; missingval=0x00, name=:cea) + raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00) @testset "missingval propagates" begin @test missingval(resample(cea; res=output_res, crs=output_crs, method)) == 0x00 @@ -64,9 +46,7 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) resampled = resample(cea; method) @test crs(cea) == crs(resampled) @test cea == resampled - # There is some floating point error here after Rasters -> GDAL -> Rasterss... - # Should we correct it by detecting almost identical extent and using the original? - # @test_broken extent(cea) == extent(resampled) + @test extent(cea) == extent(resampled) end @testset "only `res` kw changes the array size predictably" begin @@ -133,7 +113,7 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) @testset "resample eltype propagates" begin r = Raster(rand(UInt8, X(1:10), Y(1:10))) r1 = resample(r; to=r) - @test eltype(r1) == UInt8 + @test eltype(r1) == Union{UInt8,Missing} end @testset "dimensions matcha after resampling with only `to`" begin @@ -153,6 +133,41 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) @test dims(resampled_3D, Z) == Z(1:2) end + mv = 0xff + for mv in (0xff, missing, Rasters.nokw) + # Resample cea.tif using resample + cea = Raster(raster_path; missingval=mv, name=:cea) + raster_output = resample(cea; + res=output_res, crs=output_crs, method, missingval=mv + ) + disk_output = resample(cea; + res=output_res, crs=output_crs, method, missingval=mv, filename="resample.tif" + ) + cea_permuted = permutedims(Raster(raster_path; + missingval=mv, name=:cea_permuted), (Y, X) + ) + permuted_output = resample(cea_permuted, output_res; + missingval=mv, crs=output_crs, method + ) + + AG_output1 = if mv === 0xff + AG_output + else + replace(AG_output, 0xff => missing) + end + # Compare ArchGDAL, resample and permuted resample + @test all(AG_output1 .=== parent(raster_output) .=== + read(disk_output) .=== + permutedims(permuted_output, (X, Y))) + @test abs(step(dims(raster_output, Y))) ≈ + abs(step(dims(raster_output, X))) ≈ + abs(step(dims(disk_output, X))) ≈ + abs(step(dims(permuted_output, X))) ≈ output_res + @test Rasters.name(cea) == Rasters.name(raster_output) + + rm("resample.tif") + end + @testset "resample to the same size or resolution leaves raster unchanged" begin res = 2 ys = range(; start=(90 - res / 2), step=-res, stop=(-90 + res / 2)) diff --git a/test/runtests.jl b/test/runtests.jl index e4337545c..052ba3137 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,6 +14,7 @@ end @time @safetestset "array" begin include("array.jl") end @time @safetestset "stack" begin include("stack.jl") end @time @safetestset "series" begin include("series.jl") end +@time @safetestset "create" begin include("create.jl") end @time @safetestset "utils" begin include("utils.jl") end @time @safetestset "set" begin include("set.jl") end @time @safetestset "aggregate" begin include("aggregate.jl") end @@ -26,22 +27,17 @@ end # CommondataModel sources @time @safetestset "commondatamodel" begin include("sources/commondatamodel.jl") end @time @safetestset "ncdatasets" begin include("sources/ncdatasets.jl") end -# @time @safetestset "zarr" begin include("sources/zarr.jl") end # TODO: FIXME +@time @safetestset "zarr" begin include("sources/zarr.jl") end if !Sys.iswindows() # GRIBDatasets doesn't work on Windows for now @time @safetestset "gribdatasets" begin include("sources/gribdatasets.jl") end + # GDAL Environment vars need to be set manually for windows, so skip for now + @time @safetestset "gdal" begin include("sources/gdal.jl") end + @time @safetestset "grd" begin include("sources/grd.jl") end end - # Only test SMAP locally for now, also RasterDataSources because CI downloads keep breaking if !haskey(ENV, "CI") @time @safetestset "rasterdatasources" begin include("sources/rasterdatasources.jl") end end - -if !Sys.iswindows() - # GDAL Environment vars need to be set manually for windows, so skip for now - @time @safetestset "gdal" begin include("sources/gdal.jl") end - @time @safetestset "grd" begin include("sources/grd.jl") end -end @time @safetestset "plot recipes" begin include("plotrecipes.jl") end - -@time @safetestset "resample" begin include("resample.jl") end +@time @safetestset "resample" begin include("resample.jl") end \ No newline at end of file diff --git a/test/sources/commondatamodel.jl b/test/sources/commondatamodel.jl index 6e2460092..0cda80779 100644 --- a/test/sources/commondatamodel.jl +++ b/test/sources/commondatamodel.jl @@ -15,7 +15,7 @@ import Rasters: ForwardOrdered, ReverseOrdered, Regular # test when reading a file ras = Raster(rand(X(f32_indices), Y(indices_one_third))) tempfile = tempname() * ".nc" - write(tempfile, ras) + write(tempfile, ras; force=true) ras_read = Raster(tempfile) steps = step.(dims(ras_read)) @test steps[1] == 0.05 diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index ddfb524e7..9d7731f08 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -16,7 +16,7 @@ gdalpath = maybedownload(url) @testset "lazyness" begin # Eager is the default - @test parent(gdalarray) isa Array + @test parent(gdalarray) isa Array # its a reshaped array now @test parent(lazyarray) isa DiskArrays.AbstractDiskArray @test parent(eagerarray) isa Array @testset "lazy broadcast" begin @@ -25,6 +25,26 @@ gdalpath = maybedownload(url) end end + @testset "cf" begin + # This file has no scale/offset so cf does nothing + @time cfarray = Raster(gdalpath) + @time cf_nomask_array = Raster(gdalpath; missingval=nothing) + @time rawarray = Raster(gdalpath; raw=true) + @time lazycfarray = Raster(gdalpath; lazy=true) + @time lazyrawarray = Raster(gdalpath; lazy=true, raw=true) + @test parent(cfarray) isa Matrix{UInt8} + @test parent(cf_nomask_array) isa Matrix{UInt8} + @test parent(rawarray) isa Matrix{UInt8} + open(lazycfarray) do A + @test parent(A) isa DiskArrays.SubDiskArray{UInt8} + @test parent(parent(A)) isa ArchGDAL.RasterDataset{UInt8} + end + open(lazyrawarray) do A + @test parent(A) isa DiskArrays.SubDiskArray{UInt8} + @test parent(parent(A)) isa ArchGDAL.RasterDataset{UInt8} + end + end + @testset "load from url" begin A = Raster("/vsicurl/" * url) B = Raster(url; source=:gdal) @@ -102,8 +122,8 @@ gdalpath = maybedownload(url) @test ndims(gdalarray) == 2 @test dims(gdalarray) isa Tuple{<:X,<:Y} @test lookup(refdims(gdalarray), Band) isa DimensionalData.Categorical; - # @test span(gdalarray, (Y, X)) == - # (Regular(-60.02213698319351), Regular(60.02213698319374)) + @test span(gdalarray, (Y, X)) == + (Regular(-60.02213698319351), Regular(60.02213698319374)) @test sampling(gdalarray, (Y, X)) == (Intervals(Start()), Intervals(Start())) # Bounds calculated in python using rasterio @@ -131,7 +151,7 @@ gdalpath = maybedownload(url) @testset "custom keywords" begin customgdalarray = Raster(gdalpath; name=:test, crs=EPSG(1000), mappedcrs=EPSG(4326), refdims=(Ti(),), - write=true, lazy=true, dropband=false, replace_missing=true, + write=true, lazy=true, dropband=false, ) @test name(customgdalarray) == :test @test refdims(customgdalarray) == (Ti(),) @@ -148,7 +168,7 @@ gdalpath = maybedownload(url) dimsgdalarray = Raster(gdalpath; dims=(Z(), X(), Y()), ) - @test dims(dimsgdalarray) isa Tuple{<:Z,X,Y} + @test dims(dimsgdalarray) isa Tuple{Z,X,Y} end @testset "indexing" begin @@ -167,7 +187,7 @@ gdalpath = maybedownload(url) @test gdalarray[Y(4.224e6..4.226e6), Band(1)] isa Raster end - @testset "methods" begin + @testset "methods" begin @testset "mean" begin @test all(mean(gdalarray; dims=Y) .=== mean(parent(gdalarray); dims=2)) end @@ -175,7 +195,7 @@ gdalpath = maybedownload(url) @testset "trim, crop, extend" begin a = read(replace_missing(gdalarray, zero(eltype(gdalarray)))) a[X(1:100)] .= missingval(a) - trimmed = trim(a) + trimmed = Rasters.trim(a) @test size(trimmed) == (414, 514) cropped = Rasters.crop(a; to=trimmed) @test size(cropped) == (414, 514) @@ -253,13 +273,19 @@ gdalpath = maybedownload(url) @time gdalarray = Raster(gdalpath; name=:test) A1 = gdalarray[X(1:300), Y(1:200)] A2 = gdalarray[X(57:500), Y(101:301)] - tempfile = tempname() * ".tif" - Afile = mosaic(first, A1, A2; missingval=0x00, atol=1e-8, filename=tempfile) - Amem = mosaic(first, A1, A2; missingval=0x00, atol=1e-8) + tempfile1 = tempname() * ".tif" + tempfile2 = tempname() * ".tif" + tempfile3 = tempname() * ".tif" + Afile = mosaic(first, A1, A2; missingval=0xff, atol=1e-8, filename=tempfile1) + Afile2 = mosaic(first, A1, A2; atol=1e-8, filename=tempfile2) + collect(Afile) + collect(Afile2) + @test missingval(Afile2) === missing + Amem = mosaic(first, A1, A2; missingval=0xff, atol=1e-8) Atest = gdalarray[X(1:500), Y(1:301)] - Atest[X(1:56), Y(201:301)] .= 0x00 - Atest[X(301:500), Y(1:100)] .= 0x00 - @test all(Atest .=== Amem .=== Afile) + Atest[X(1:56), Y(201:301)] .= 0xff + Atest[X(301:500), Y(1:100)] .= 0xff + @test all(Atest .=== Amem .=== Afile .=== replace_missing(Afile2, 0xff)) end end # methods @@ -269,8 +295,8 @@ gdalpath = maybedownload(url) @test size(geoA) == (50, 1) @test eltype(geoA) <: UInt8 @time geoA isa Raster{UInt8,1} - @test dims(geoA) isa Tuple{<:X,Y} - @test refdims(geoA) isa Tuple{<:Band} + @test dims(geoA) isa Tuple{X,Y} + @test refdims(geoA) isa Tuple{Band} @test metadata(geoA) == metadata(gdalarray) @test missingval(geoA) === nothing @test name(geoA) == :test @@ -281,9 +307,9 @@ gdalpath = maybedownload(url) @testset "2d asc" begin filename = tempname() * ".asc" - @time write(filename, gdalarray; force = true) + @time write(filename, gdalarray; force=true) saved1 = Raster(filename); - @test all(saved1 .== gdalarray) + @test all(parent(saved1 .== gdalarray)) # @test typeof(saved1) == typeof(geoA) @test val(dims(saved1, X)) ≈ val(dims(gdalarray, X)) @test val(dims(saved1, Y)) ≈ val(dims(gdalarray, Y)) @@ -391,15 +417,16 @@ gdalpath = maybedownload(url) end @testset "to grd" begin - write("testgrd.gri", gdalarray; force=true) - @test (@allocations write("testgrd.gri", gdalarray; force=true)) < 1e4 - grdarray = Raster("testgrd.gri") + fn = joinpath(tempdir(), tempname() * ".gri") + write(fn, gdalarray; force=true) + fn = joinpath(tempdir(), tempname() * ".gri") + @test (@allocations write(fn, gdalarray; force=true)) < 1e4 + grdarray = Raster(fn) @test crs(grdarray) == convert(ProjString, crs(gdalarray)) @test all(map((a, b) -> all(a .≈ b), bounds(grdarray), bounds(gdalarray))) @test index(grdarray, Y) ≈ index(gdalarray, Y) @test val(dims(grdarray, X)) ≈ val(dims(gdalarray, X)) @test grdarray == gdalarray - rm("testgrd.gri") end @testset "from Raster" begin @@ -439,10 +466,11 @@ gdalpath = maybedownload(url) end @testset "write missing" begin - A = read(replace_missing(gdalarray, missing)) + A = replace_missing(gdalarray, missing) filename = tempname() * ".tif" write(filename, A) - @test missingval(Raster(filename)) === typemax(UInt8) + @test missingval(Raster(filename)) === missing + @test missingval(Raster(filename; missingval)) === typemax(UInt8) rm(filename) end @@ -488,11 +516,11 @@ gdalpath = maybedownload(url) gdalarray[Y(1)] |> plot end - @testset "nodatavalue type matches the array type" begin + @testset "unmasked missingval type matches the array type" begin # Handle WorldClim/ucdavis unreliability A = nothing try - A = Raster(WorldClim{Climate}, :tavg; res="10m", month=1) + A = Raster(WorldClim{Climate}, :tavg; res="10m", month=1, missingval) catch end if !isnothing(A) @@ -681,8 +709,8 @@ end write(filename, gdalstack; force=true) base, ext = splitext(filename) filename_b = string(base, "_b", ext) - saved = read(Raster(filename_b)) - @test all(saved .== geoA) + saved = Raster(filename_b) + @test all(saved .=== geoA) end @testset "write multiple files with custom suffix" begin @@ -691,7 +719,7 @@ end base, ext = splitext(filename) filename_b = string(base, "_second", ext) saved = read(Raster(filename_b)) - @test all(saved .== geoA) + @test all(saved .=== geoA) end @testset "write netcdf" begin @@ -711,7 +739,6 @@ end gdalstack2 = RasterStack(filenames; lazy=true) @test DiskArrays.eachchunk(gdalstack2[:b])[1] == (1:128, 1:128) end - end @testset "show" begin @@ -746,21 +773,35 @@ end end ## Resample cea.tif using resample - raster_output = resample(gdalarray, output_res; crs=output_crs, method=resample_method) - disk_output = resample(gdalarray, output_res; crs=output_crs, method=resample_method, filename="resample.tif") - stack_output = resample(gdalstack, output_res; crs=output_crs, method=resample_method) - written_stack_output = resample(gdalstack, output_res; crs=output_crs, method=resample_method, filename="resample.tif") - series_output = resample(gdalser, output_res; crs=output_crs, method=resample_method) - + raster_output = resample(gdalarray, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) + disk_output = resample(gdalarray, output_res; + crs=output_crs, method=resample_method, filename="resample.tif", missingval=0xff=>0xff + ) + stack_output = resample(gdalstack, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) + written_stack_output = resample(gdalstack, output_res; + crs=output_crs, method=resample_method, filename="resample.tif", missingval=0xff=>0xff + ) + series_output = resample(gdalser, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) extradim_raster = cat(gdalarray, gdalarray, gdalarray; dims=Z) - extradim_output = resample(extradim_raster, output_res; crs=output_crs, method=resample_method) + extradim_output = resample(extradim_raster, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) permuted_raster = permutedims(gdalarray, (Y, X)) - permuted_output = resample(permuted_raster, output_res; crs=output_crs, method=resample_method) + permuted_output = resample(permuted_raster, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) # Compare ArchGDAL, resample and permuted resample @test AG_output == - raster_output == disk_output == + raster_output == + disk_output == stack_output[:a] == written_stack_output[:a] == series_output[1] == @@ -867,20 +908,19 @@ end @testset "detect dimension from file name" begin tifser = RasterSeries([gdalpath, gdalpath], Ti([DateTime(2001), DateTime(2002)])) - mkpath("tifseries") - write("tifseries/test.tif", tifser; force=true) - @test isfile("tifseries/test_2001-01-01T00:00:00.tif") - @test isfile("tifseries/test_2002-01-01T00:00:00.tif") - ser1 = RasterSeries("tifseries", Ti(DateTime)) - ser2 = RasterSeries("tifseries", Ti(DateTime); lazy=true) - ser3 = RasterSeries("tifseries/test.tif", Ti(DateTime)) - ser4 = RasterSeries("tifseries", Ti(DateTime; order=ForwardOrdered()); ext=".tif") - ser5 = RasterSeries("tifseries/test", Ti(DateTime); ext=".tif") + path = mkpath(joinpath(tempdir(), tempname(), "tifseries")) + write(joinpath(path, "test.tif"), tifser; force=true) + @test isfile(joinpath(path, "test_2001-01-01T00:00:00.tif")) + @test isfile(joinpath(path, "test_2002-01-01T00:00:00.tif")) + ser1 = RasterSeries(path, Ti(DateTime)) + ser2 = RasterSeries(path, Ti(DateTime); lazy=true) + ser3 = RasterSeries(joinpath(path, "test.tif"), Ti(DateTime)) + ser4 = RasterSeries(path, Ti(DateTime; order=ForwardOrdered()); ext=".tif") + ser5 = RasterSeries(joinpath(path, "test"), Ti(DateTime); ext=".tif") @test dims(ser1) == dims(ser2) == dims(ser3) == dims(ser3) == dims(ser5) == dims(tifser) - @test_throws ErrorException RasterSeries("tifseries", Ti(Int)) - ser6 = RasterSeries("tifseries/test", Ti(DateTime; sampling=Intervals(Center())); ext=".tif") + @test_throws ErrorException RasterSeries(path, Ti(Int)) + ser6 = RasterSeries(joinpath(path, "test"), Ti(DateTime; sampling=Intervals(Center())); ext=".tif") @test sampling(ser6) == (Intervals(Center()),) - rm("tifseries"; recursive=true) end @testset "methods" begin @@ -897,7 +937,7 @@ end mv = zero(eltype(gdalser[1])) ser = read(replace_missing(gdalser, mv)) ser = map(A -> (view(A, X(1:100)) .= mv; A), ser) - trimmed = trim(ser) + trimmed = Rasters.trim(ser) @test size(trimmed[1]) == (414, 514) cropped = crop(ser; to=trimmed[1]) @test size(cropped[1]) == (414, 514) @@ -943,6 +983,7 @@ end @test crs(gdalarray) == wkt @test crs(gdalarray[Y(1)]) == wkt end + end diff --git a/test/sources/grd.jl b/test/sources/grd.jl index 26ae1ca03..ca3c72425 100644 --- a/test/sources/grd.jl +++ b/test/sources/grd.jl @@ -4,13 +4,13 @@ using DiskArrays import NCDatasets, ArchGDAL using Rasters: FileArray, GRDsource, GDALsource, metadata, trim -testpath = joinpath(dirname(pathof(Rasters)), "../test/") +testpath = joinpath(dirname(pathof(Rasters)), "..", "test") include(joinpath(testpath, "test_utils.jl")) const DD = DimensionalData maybedownload("https://raw.githubusercontent.com/rspatial/raster/master/inst/external/rlogo.grd", "rlogo.grd") maybedownload("https://github.com/rspatial/raster/raw/master/inst/external/rlogo.gri", "rlogo.gri") -stem = joinpath(testpath, "data/rlogo") +stem = joinpath(testpath, "data", "rlogo") @test isfile(stem * ".grd") @test isfile(stem * ".gri") grdpath = stem * ".gri" @@ -29,11 +29,16 @@ grdpath = stem * ".gri" @test parent(eagerarray) isa Array end - @testset "replace_missing keyword" begin - # Eager is the default - @time missingarray = Raster(grdpath; replace_missing=true) + @testset "missingval" begin + @time missingarray = Raster(grdpath) @test missingval(missingarray) === missing @test eltype(missingarray) === Union{Missing,Float32} + @time missingarray = Raster(grdpath; missingval) + @test missingval(missingarray) === -3.4f38 + @test eltype(missingarray) === Float32 + @time missingarray = Raster(grdpath; missingval=missingval => NaN32) + @test missingval(missingarray) === NaN32 + @test eltype(missingarray) === Float32 end @testset "open" begin @@ -60,7 +65,7 @@ grdpath = stem * ".gri" end @testset "array properties" begin - @test grdarray isa Raster{Float32,3} + @test grdarray isa Raster{Union{Missing,Float32},3} end @testset "dimensions" begin @@ -74,7 +79,7 @@ grdpath = stem * ".gri" @testset "other fields" begin proj = ProjString("+proj=merc +datum=WGS84") @test name(grdarray) == Symbol("red:green:blue") - @test missingval(grdarray) == -3.4f38 + @test missingval(grdarray) === missing @test metadata(grdarray) isa Metadata{GRDsource,Dict{String,Any}} @test label(grdarray) == "red:green:blue" @test units(grdarray) == nothing @@ -100,7 +105,7 @@ grdpath = stem * ".gri" @test mappedcrs(customgrdarray) == EPSG(4326) @test mappedcrs(dims(customgrdarray, Y)) == EPSG(4326) @test mappedcrs(dims(customgrdarray, X)) == EPSG(4326) - @test parent(customgrdarray) isa DiskArrays.BroadcastDiskArray + @test parent(customgrdarray) isa Rasters.FileArray @test eltype(customgrdarray) == Union{Float32,Missing} # Needs to be separate as it overrides crs/mappedcrs dimsgrdarray = Raster(grdpath; @@ -110,9 +115,9 @@ grdpath = stem * ".gri" end @testset "getindex" begin - @test grdarray[Band(1)] isa Raster{Float32,2} - @test grdarray[Y(1), Band(1)] isa Raster{Float32,1} - @test grdarray[X(1), Band(1)] isa Raster{Float32,1} + @test grdarray[Band(1)] isa Raster{Union{Missing,Float32},2} + @test grdarray[Y(1), Band(1)] isa Raster{Union{Missing,Float32},1} + @test grdarray[X(1), Band(1)] isa Raster{Union{Missing,Float32},1} @test grdarray[X(50), Y(30), Band(1)] == 115.0f0 @test grdarray[1, 1, 1] == 255.0f0 @test grdarray[Y(At(20.0; atol=1e10)), X(At(20; atol=1e10)), Band(3)] == 255.0f0 @@ -130,13 +135,13 @@ grdpath = stem * ".gri" @test size(cropped) == (81, 77, 3) kwcropped = crop(a; to=trimmed, dims=(X,)) @test size(kwcropped) == (81, size(a,Y), 3) - @test all(collect(cropped .== trimmed)) + @test all(collect(cropped .=== trimmed)) extended = extend(cropped; to=a); - @test all(collect(extended .== a)) + @test all(collect(extended .=== a)) end @testset "mask and mask! to disk" begin - msk = read(replace_missing(grdarray, missing)) + msk = replace_missing(grdarray, missing) msk[X(1:73), Y([1, 5, 77])] .= missingval(msk) @test !any(grdarray[X(1:73)] .=== missingval(msk)) masked = mask(grdarray; with=msk) @@ -151,8 +156,6 @@ grdpath = stem * ".gri" mask!(A; with=msk, missingval=missingval(A)) end @test all(Raster(tempgri)[X(1:73), Y([1, 5, 77])] .=== missingval(grdarray)) - rm(tempgrd) - rm(tempgri) end @testset "classify! to disk" begin @@ -176,62 +179,54 @@ grdpath = stem * ".gri" tn = tempname() tempgrd = tn * ".grd" tempgri = tn * ".gri" - cp(stem * ".grd", tempgrd) - cp(stem * ".gri", tempgri) - Afile = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1, filename=tempgrd) + Afile = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1, filename=tempgrd, force=true) Amem = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1) Atest = grdarray[X(1:80), Y(1:60)] Atest[X(1:26), Y(31:60)] .= 0.0f0 Atest[X(41:80), Y(1:24)] .= 0.0f0 @test size(Atest) == size(Afile) == size(Amem) @test all(Atest .=== Amem .== Afile) - end - - @testset "rasterize" begin - # A = read(grdarray) - # R = rasterize(A; to=A) - # @test all(A .=== R .== grdarray) - # B = rebuild(read(grdarray) .= 0x00; missingval=0x00) - # rasterize!(B, read(grdarray)) - # @test all(B .=== grdarray |> collect) + read(Atest .- Afile) end end @testset "selectors" begin geoA = grdarray[Y(Contains(3)), X(:), Band(1)] - @test geoA isa Raster{Float32,1} + @test geoA isa Raster{Union{Missing,Float32},1} @test grdarray[X(Contains(20)), Y(Contains(10)), Band(1)] isa Float32 end @testset "conversion to Raster" begin geoA = grdarray[X(1:50), Y(1:1), Band(1)] @test size(geoA) == (50, 1) - @test eltype(geoA) <: Float32 - @time geoA isa Raster{Float32,1} + @test eltype(geoA) <: Union{Missing,Float32} + @time geoA isa Raster{Union{Missing,Float32},1} @test dims(geoA) isa Tuple{<:X,Y} @test refdims(geoA) isa Tuple{<:Band} @test metadata(geoA) == metadata(grdarray) - @test missingval(geoA) == -3.4f38 + @test missingval(geoA) === missing @test name(geoA) == Symbol("red:green:blue") end @testset "write" begin @testset "2d" begin filename2 = tempname() * ".gri" - write(filename2, grdarray[Band(1)]; force = true) + write(filename2, grdarray[Band(1)]; force=true) saved = Raster(filename2) # 1 band is added again on save @test size(saved) == size(grdarray[Band(1)]) @test parent(saved) == parent(grdarray[Band(1)]) - write(filename2, grdarray; force = true) - @test (@allocations write(filename2, grdarray; force = true)) < 3e3 + filename2 = tempname() * ".gri" + write(filename2, grdarray; force=true, verbose=false) + filename2 = tempname() * ".gri" + @test (@allocations write(filename2, grdarray; force=true, verbose=false)) < 3e3 end @testset "3d with subset" begin geoA = grdarray[1:100, 1:50, 1:2] filename = tempname() * ".grd" - write(filename, GRDsource(), geoA; force = true) + write(filename, GRDsource(), geoA; force=true) saved = Raster(filename) @test size(saved) == size(geoA) @test refdims(saved) == () @@ -251,24 +246,25 @@ grdpath = stem * ".gri" @test all(parent(saved) .=== parent(geoA)) @test saved isa typeof(geoA) @test parent(saved) == parent(geoA) + filename = tempname() * ".grd" write(filename, GRDsource(), geoA; force = true) + filename = tempname() * ".grd" @test (@allocations write(filename, GRDsource(), geoA; force = true)) < 3e3 end @testset "to netcdf" begin filename2 = tempname() * ".nc" span(grdarray[Band(1)]) - write(filename2, grdarray[Band(1)]; force = true) + write(filename2, grdarray[Band(1)]; force=true) saved = Raster(filename2; crs=crs(grdarray)) @test size(saved) == size(grdarray[Band(1)]) - @test all(parent(replace_missing(saved, missingval(grdarray))) .≈ parent(grdarray[Band(1)])) + @test all(parent(saved) .≈ parent(grdarray[Band(1)])) @test index(saved, X) ≈ index(grdarray, X) .+ 0.5 @test index(saved, Y) ≈ index(grdarray, Y) .+ 0.5 @test bounds(saved, Y) == bounds(grdarray, Y) @test bounds(saved, X) == bounds(grdarray, X) write(filename2, grdarray; force = true) @test (@allocations write(filename2, grdarray; force = true)) < 3e3 - end @testset "to gdal" begin @@ -280,7 +276,7 @@ grdpath = stem * ".gri" # @test convert(ProjString, crs(gdalarray)) == convert(ProjString, EPSG(4326)) @test val(dims(gdalarray, X)) ≈ val(dims(grdarray, X)) @test val(dims(gdalarray, Y)) ≈ val(dims(grdarray, Y)) - @test Raster(gdalarray) ≈ permutedims(grdarray[Band(1)], [X(), Y()]) + @test gdalarray ≈ replace_missing(permutedims(grdarray[Band(1)], [X(), Y()]), typemin(Int32)) # 3 Bands gdalfilename2 = tempname() * ".tif" write(gdalfilename2, grdarray) @@ -293,8 +289,11 @@ grdpath = stem * ".gri" A = replace_missing(grdarray, missing) filename = tempname() * ".grd" write(filename, A) - @test missingval(Raster(filename)) === typemin(Float32) - rm(filename) + @test missingval(Raster(filename)) === missing + filename = tempname() * ".grd" + write(filename, A) + @test missingval(Raster(filename; missingval=nothing)) === nothing + @test missingval(Raster(filename)) === missing end end @@ -350,7 +349,7 @@ end @testset "child array properties" begin @test size(grdstack[:a]) == size(Raster(grdstack[:a])) == (101, 77, 3) - @test grdstack[:a] isa Raster{Float32,3} + @test grdstack[:a] isa Raster{Union{Missing,Float32},3} end # Stack Constructors @@ -417,7 +416,7 @@ end @testset "child array properties" begin @test size(grdstack[:Band_3]) == size(Raster(grdstack[:Band_3])) == (101, 77) - @test grdstack[:Band_1] isa Raster{Float32,2} + @test grdstack[:Band_1] isa Raster{Union{Missing,Float32},2} end # Stack Constructors @@ -463,7 +462,7 @@ end end @testset "Grd series" begin - grdpath2 = stem * "2" * ".gri" + grdpath2 = joinpath(tempdir(), tempname() * ".gri") write(grdpath2, 2 .* Raster(grdpath); force=true) Raster(grdpath) .* 2 == Raster(grdpath2) eager_grdseries = RasterSeries([grdpath, grdpath2], (Ti,); mappedcrs=EPSG(4326)) diff --git a/test/sources/gribdatasets.jl b/test/sources/gribdatasets.jl index 3779e68d7..255984b3d 100644 --- a/test/sources/gribdatasets.jl +++ b/test/sources/gribdatasets.jl @@ -25,6 +25,9 @@ gribexamples_dir = abspath(joinpath(dirname(pathof(GRIBDatasets)), "..", "test", era5 = joinpath(gribexamples_dir, "era5-levels-members.grib") +ds = GRIBDatasets.GRIBDataset(era5) +v = ds[:z] + @testset "Raster" begin @time gribarray = Raster(era5) @time lazyarray = Raster(era5; lazy=true) @@ -87,8 +90,8 @@ era5 = joinpath(gribexamples_dir, "era5-levels-members.grib") end @testset "cf attributes" begin - z = lazystack[:z] - @test metadata(z)["standard_name"] == "geopotential" + z = lazystack.z + @test metadata(z)["cfName"] == "geopotential" @test metadata(lazystack)["Conventions"] == "CF-1.7" x = dims(lazystack, :X) diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 225fe7fcd..4c49ad607 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -3,7 +3,7 @@ using Rasters, DimensionalData, Test, Statistics, Dates, CFTime, Plots using Rasters.Lookups, Rasters.Dimensions using Rasters.DiskArrays import ArchGDAL, NCDatasets -using Rasters: FileArray, FileStack, NCDsource, crs, bounds, name, trim +using Rasters: FileArray, FileStack, NCDsource, crs, bounds, name, trim, metadata testdir = realpath(joinpath(dirname(pathof(Rasters)), "../test")) include(joinpath(testdir, "test_utils.jl")) @@ -31,22 +31,21 @@ stackkeys = ( ) @testset "grid mapping" begin - stack = RasterStack(joinpath(testdir, "data/grid_mapping_test.nc")) - @test metadata(stack.mask)["grid_mapping"] == Dict{String, Any}( - "straight_vertical_longitude_from_pole" => 0.0, - "false_easting" => 0.0, - "standard_parallel" => -71.0, - "inverse_flattening" => 298.27940504282, - "latitude_of_projection_origin" => -90.0, - "grid_mapping_name" => "polar_stereographic", - "semi_major_axis" => 6.378273e6, - "false_northing" => 0.0, + st = RasterStack(joinpath(testdir, "data/grid_mapping_test.nc")) + @test metadata(st.mask)["grid_mapping"] == Dict{String, Any}( + "straight_vertical_longitude_from_pole" => 0.0, + "false_easting" => 0.0, + "standard_parallel" => -71.0, + "inverse_flattening" => 298.27940504282, + "latitude_of_projection_origin" => -90.0, + "grid_mapping_name" => "polar_stereographic", + "semi_major_axis" => 6.378273e6, + "false_northing" => 0.0, ) end @testset "Raster" begin - @time ncarray = Raster(ncsingle) - + @time ncarray = Raster(ncsingle); @time lazyarray = Raster(ncsingle; lazy=true) @time eagerarray = Raster(ncsingle; lazy=false) @test_throws ArgumentError Raster("notafile.nc") @@ -59,6 +58,37 @@ end @time read(lazyarray); end + @testset "scaling and masking" begin + @time cfarray = Raster(ncsingle) + @time cfarray = Raster(ncsingle) + @time cf_nomask_array = Raster(ncsingle; missingval=nothing) + @time nocfarray = Raster(ncsingle; scaled=false) + @time nocf_nomask_array = Raster(ncsingle; scaled=false, missingval=nothing) + @time raw_array = Raster(ncsingle; raw=true) + @time lazycfarray = Raster(ncsingle; lazy=true, scaled=false) + @time lazynocfarray = Raster(ncsingle; lazy=true, scaled=false) + @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, scaled=false, missingval=nothing) + @test missingval(cfarray) === missing + @test missingval(nocfarray) === missing + @test missingval(cf_nomask_array) === nothing + @test missingval(nocf_nomask_array) === nothing + @test missingval(raw_array) === 1.0f20 + @test all(skipmissing(cfarray) .=== skipmissing(nocfarray)) + @test parent(cfarray) isa Array{Union{Float32,Missing}} + @test parent(nocfarray) isa Array{Union{Float32,Missing}} + @test parent(nocf_nomask_array) isa Array{Float32} + @test parent(raw_array) isa Array{Float32} + open(lazycfarray) do A + @test parent(A) isa Rasters.ModifiedDiskArray{false,Union{Missing,Float32}} + end + open(lazynocfarray) do A + @test parent(A) isa Rasters.ModifiedDiskArray{false,Union{Missing,Float32}} + end + open(lazynocf_nomask_array) do A + @test parent(parent(A)) isa NCDatasets.Variable{Float32} + end + end + # @testset "from url" begin # # TODO we need a permanent url here that doesn't end in .nc # url = "http://apdrc.soest.hawaii.edu:80/dods/public_data/Reanalysis_Data/NCEP/NCEP2/daily/surface/mslp" @@ -85,12 +115,14 @@ end @testset "handle empty variables" begin st = RasterStack((empty=view(ncarray, 1, 1, 1), full=ncarray)) empty_test = tempname() * ".nc" - write(empty_test, st) + write(empty_test, st; force=true) + rast = Raster(empty_test) st = RasterStack(empty_test) - @test name(rast) == name(st[:empty]) == :empty - @test size(rast) == size(st[:empty]) == () - @test all(st[:full] .=== ncarray) + @test name(rast) == name(st.empty) == :empty + @test size(rast) == size(st.empty) == () + @test st.empty[] === missing + @test all(st.full .=== ncarray) st = RasterStack(empty_test; lazy=true) end @@ -178,11 +210,12 @@ end A1 = ncarray[X(1:80), Y(1:100)] A2 = ncarray[X(50:150), Y(90:150)] tempfile = tempname() * ".nc" - Afile = mosaic(first, read(A1), read(A2); missingval=missing, atol=1e-7, filename=tempfile) - Amem = mosaic(first, A1, A2; missingval=missing, atol=1e-7) + Afile = mosaic(first, read(A1), read(A2); atol=1e-7, filename=tempfile, force=true) + Amem = mosaic(first, A1, A2; atol=1e-7) Atest = ncarray[X(1:150), Y(1:150)] Atest[X(1:49), Y(101:150)] .= missing Atest[X(81:150), Y(1:89)] .= missing + read(Afile) @test all(Atest .=== Afile .=== Amem) end @testset "slice" begin @@ -239,16 +272,16 @@ end end @testset "write" begin - @testset "to netcdf" begin - filename = tempname() * ".nc" - write(filename, ncarray; force=true) - @test (@allocations write(filename, ncarray; force=true)) < 1e4 + @testset "to netcdf" begin + fn = tempname() * ".nc" + write(fn, ncarray; force=true); + @test (@allocations write(fn, ncarray; force=true)) < 1e4 @testset "CF attributes" begin - @test NCDatasets.Dataset(filename)[:x].attrib["axis"] == "X" - @test NCDatasets.Dataset(filename)[:x].attrib["bounds"] == "x_bnds" + @test NCDatasets.Dataset(fn)[:x].attrib["axis"] == "X" + @test NCDatasets.Dataset(fn)[:x].attrib["bounds"] == "x_bnds" # TODO better units and standard name handling end - saved = Raster(filename) + saved = Raster(fn) @test size(saved) == size(ncarray) @test refdims(saved) == refdims(ncarray) @test missingval(saved) === missingval(ncarray) @@ -256,7 +289,8 @@ end all(s .== g) end |> all @test metadata(saved) == metadata(ncarray) - @test_broken all(metadata(dims(saved))[2] == metadata.(dims(ncarray))[2]) + # Bounds variable names are renamed so metadata is different + @test_broken all(metadata(dims(saved))[1] == metadata(dims(ncarray))[1]) @test Rasters.name(saved) == Rasters.name(ncarray) @test all(lookup.(dims(saved)) .== lookup.(dims(ncarray))) @test all(order.(dims(saved)) .== order.(dims(ncarray))) @@ -313,8 +347,9 @@ end end @testset "non allowed values" begin - @test_throws ArgumentError write(filename, convert.(Union{Missing,Float16}, ncarray); force=true) + @test_throws ArgumentError write(fn, convert.(Union{Missing,Float16}, ncarray); force=true) end + end @testset "to gdal" begin @@ -322,7 +357,7 @@ end nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) write(gdalfilename, nccleaned; force=true) @test (@allocations write(gdalfilename, nccleaned; force=true)) < 1e4 - gdalarray = Raster(gdalfilename) + gdalarray = Raster(gdalfilename; missingval=nothing) # gdalarray WKT is missing one AUTHORITY # @test_broken crs(gdalarray) == convert(WellKnownText, EPSG(4326)) # But the Proj representation is the same @@ -336,26 +371,27 @@ end @testset "to grd" begin nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) - write("testgrd.gri", nccleaned; force=true) - @test (@allocations write("testgrd.gri", nccleaned; force=true)) < 1e4 - grdarray = Raster("testgrd.gri"); + fn = tempname() * ".gri" + write(fn, nccleaned; force=true) + fn = tempname() * ".gri" + @test (@allocations write(fn, nccleaned; force=true)) < 1e4 + grdarray = Raster(fn, missingval=nothing); @test crs(grdarray) == convert(ProjString, EPSG(4326)) @test bounds(grdarray) == bounds(nccleaned) @test index(grdarray, Y) ≈ reverse(index(nccleaned, Y)) .- 0.5 @test index(grdarray, X) ≈ index(nccleaned, X) .- 1.0 @test parent(reverse(grdarray; dims=Y)) ≈ parent(nccleaned) - rm("testgrd.gri") - rm("testgrd.grd") end @testset "write points" begin + filename = tempname() * ".nc" lon, lat = X(25:1:30), Y(25:1:30) ti = Ti(DateTime(2001):Month(1):DateTime(2002)) ras = Raster(rand(lon, lat, ti)) - write("point_rast.nc", ras; force=true) - saved = Raster("point_rast.nc") + write(filename, ras; force=true) + saved = Raster(filename) @test sampling(saved) == (Points(), Points(), Points()) - @test @allocations(write("point_rast.nc", ras; force=true)) < 10e3 + @test @allocations(write(filename, ras; force=true)) < 10e3 end end @@ -498,10 +534,10 @@ end @test parent(st) isa NamedTuple @test first(parent(st)) isa Array length(dims(st[:aclcac])) - filename = tempname() * ".nc" - write(filename, st; force=true) - @test (@allocations write(filename, st; force=true)) < 1e6 # writing a rasterseries/stack has no force keyword - saved = RasterStack(RasterStack(filename)) + fn = tempname() * ".nc" + write(fn, st; force=true) + @test (@allocations write(fn, st; force=true)) < 1e6 # writing a rasterseries/stack has no force keyword + saved = RasterStack(RasterStack(fn)) @test keys(saved) == keys(st) @test metadata(saved)["advection"] == "Lin & Rood" @test metadata(saved) == metadata(st) == metadata(ncstack) @@ -543,14 +579,15 @@ end rm("test_2.nc") end -# Groups -if !haskey(ENV, "CI") - path = joinpath(testdir, "data/SMAP_L4_SM_gph_20160101T223000_Vv4011_001.h5") - stack = RasterStack(path; group="Geophysical_Data") - lazy_stack = RasterStack(path; group="Geophysical_Data", lazy=true) - rast = Raster(path; name=:surface_temp, group="Geophysical_Data") - lazy_rast = Raster(path; name=:surface_temp, group="Geophysical_Data", lazy=true) - @test all(stack[:surface_temp] .=== read(lazy_stack[:surface_temp]) .=== rast .=== read(lazy_rast)) +h5path = joinpath(testdir, "data/SMAP_L4_SM_gph_20160101T223000_Vv4011_001.h5") +if !haskey(ENV, "CI") && isfile(h5path) + @testset "HDF5 with Groups" begin + stack = RasterStack(h5path; group="Geophysical_Data") + lazy_stack = RasterStack(path; group="Geophysical_Data", lazy=true) + rast = Raster(path; name=:surface_temp, group="Geophysical_Data") + lazy_rast = Raster(path; name=:surface_temp, group="Geophysical_Data", lazy=true) + @test all(stack[:surface_temp] .=== read(lazy_stack[:surface_temp]) .=== rast .=== read(lazy_rast)) + end end nothing diff --git a/test/sources/rasterdatasources.jl b/test/sources/rasterdatasources.jl index c673bceea..f1b9afdab 100644 --- a/test/sources/rasterdatasources.jl +++ b/test/sources/rasterdatasources.jl @@ -2,23 +2,24 @@ using Rasters, RasterDataSources, Test, Dates, ArchGDAL, NCDatasets, Proj # Too big to test on CI # if !haskey(ENV, "CI") -# @testset "load WorldClim Weather" begin -# # Weather time-series -# dates = (Date(2001), Date(2002)) -# ser = RasterSeries(WorldClim{Weather}, (:prec,); date=dates) -# ser[Date(2001, 1)][:prec] -# A = Raster(WorldClim{Weather}, :prec; date=DateTime(2001, 05), mappedcrs=EPSG(4326)) -# end + # @testset "load WorldClim Weather" begin + # # Weather time-series + # ser = RasterSeries(WorldClim{Weather}, (:prec,); + # date=(Date(2001), Date(2002)), missingval=NaN32 + # ) + # @test all(ser[At(Date(2001, 1))].prec .=== + # Raster(WorldClim{Weather}, :prec; date=DateTime(2001), missingval=NaN32)) + # end # end @testset "load WorldClim Climate" begin # Weather time-series - ser = RasterSeries(WorldClim{Climate}, :prec; res="10m", month=Jan:March, mappedcrs=EPSG(4326)) + ser = RasterSeries(WorldClim{Climate}, :prec; res="10m", month=Jan:March, mappedcrs=EPSG(4326), raw=true) # Select Australia, using regular lat/lon selectors A = ser[month=Jan] @test A isa Raster A[Y(Between(-10, -45)), X(Between(110, 160))] - st = RasterStack(WorldClim{Climate}, (:prec, :tmax); month=1) + st = RasterStack(WorldClim{Climate}, (:prec, :tmax); month=1, raw=true) @test st[:prec] == A @test missingval(st) == (prec=-32768, tmax=-3.4f38) @test st isa RasterStack{(:prec,:tmax),@NamedTuple{prec::Int16,tmax::Float32},2} @@ -28,15 +29,16 @@ end A = Raster(WorldClim{BioClim}, :Bio_1; mappedcrs=EPSG(4326)) A[Y(Between(-10, -45)), X(Between(110, 160))] @test A isa Raster + @test missingval(A) === missing st = RasterStack(WorldClim{BioClim}, (1, 2)) - st[:bio1] + @test all(st.bio1 .=== A) @test st isa RasterStack @test A isa Raster # Future Bioclim works st = RasterStack(WorldClim{Future{BioClim, CMIP6, GFDL_ESM4, SSP370}}, (1, 2); date = Date(2050), res = "10m", lazy=true, - missingval=Inf, + missingval=Inf32, crs=nothing, mappedcrs=EPSG(4326), ) @@ -53,19 +55,19 @@ end @test Rasters.name(A) == :bio1 st = RasterStack(CHELSA{BioClim}, (:bio1, :BIO2); lazy=true) @test keys(st) == (:bio1, :bio2) - @test A isa Raster + @test A isa Raster{Float64,2} @test st isa RasterStack - @test st[:bio2] isa Raster - # Allow forcing keywords - st = RasterStack(CHELSA{BioClim}, (1, 2); - lazy=true, - missingval= Int16(9999), - metadata=Rasters.NoMetadata(), - crs=nothing, - mappedcrs=EPSG(4326), - ) - @test missingval(st) === UInt16(9999) - @test missingval(st.bio1) === UInt16(9999) + @test st.bio2 isa Raster{Float64,2} + + A = Raster(CHELSA{BioClim}, 1; lazy=true, raw=true) + st = RasterStack(CHELSA{BioClim}, (:bio1, :BIO2); lazy=true, raw=true) + @test A isa Raster{UInt16,2} + @test st isa RasterStack{(:bio1, :bio2),@NamedTuple{bio1::UInt16, bio2::UInt16}} + @test st.bio2 isa Raster{UInt16,2} + + st = RasterStack(CHELSA{BioClim}, (1, 2); lazy=true) + + @test missingval(st) === missingval(st.bio1) === nothing @test metadata(st) == Rasters.NoMetadata() end @@ -98,7 +100,6 @@ end @test crs(st[:s0_pct]) == EPSG(4326) dates = DateTime(2019, 10, 19), DateTime(2021, 11, 20) s = RasterSeries(ALWB{Values,Day}, (:s0_pct, :ss_pct); date=dates, lazy=true) - s[1] @test A isa Raster @test st isa RasterStack @test s isa RasterSeries diff --git a/test/sources/zarr.jl b/test/sources/zarr.jl index 8075d0f94..c823136d8 100644 --- a/test/sources/zarr.jl +++ b/test/sources/zarr.jl @@ -1,4 +1,8 @@ using Rasters +using DimensionalData +using DimensionalData.Lookups +using DimensionalData.Dimensions +using Dates using ZarrDatasets using ZarrDatasets.Zarr using Rasters: FileArray, FileStack, Zarrsource, crs, bounds, name, trim @@ -6,11 +10,10 @@ using Rasters: FileArray, FileStack, Zarrsource, crs, bounds, name, trim path = "https://s3.bgc-jena.mpg.de:9000/esdl-esdc-v3.0.2/esdc-16d-2.5deg-46x72x1440-3.0.2.zarr" @testset "Zarr Raster open" begin - -zraster = Raster(path, name="air_temperature_2m") -lazyarray = Raster(path, lazy=true, name="air_temperature_2m") -eagerarray = Raster(path, lazy=false, name="air_temperature_2m") +zraster = Raster(path; name="air_temperature_2m") +lazyarray = Raster(path; lazy=true, name="air_temperature_2m") +eagerarray = Raster(path; lazy=false, name="air_temperature_2m") @test_throws ArgumentError Raster("notafile.zarr/") @testset "lazyness" begin @@ -19,14 +22,15 @@ eagerarray = Raster(path, lazy=false, name="air_temperature_2m") @test parent(lazyarray) isa FileArray @test parent(eagerarray) isa Array end + @testset "read" begin @time A = read(lazyarray); @test A isa Raster @test parent(A) isa Array A2 = copy(A) .= 0 - @time read!(ncarray, A2); + @time read!(zraster, A2); A3 = copy(A) .= 0 - @time read!(ncsingle, A3) + @time read!(zraster, A3) @test all(A .=== A2) @test all(A .=== A3) end @@ -37,24 +41,27 @@ end @test index(zraster,X) == collect(-178.75:2.5:178.75) # TODO the spatial bounds are strange, because the data is point data # We should find a dataset that has actual intervals + @test bounds(zraster) == ( (-178.75, 178.75), - (-88.75, 88,75), + (-88.75, 88.75), (DateTime("1979-01-09T00:00:00"), DateTime("2021-12-27T00:00:00")), ) end + @testset "dimensions" begin @test ndims(zraster) == 3 @test length.(dims(zraster)) == (144, 72, 989) @test dims(zraster) isa Tuple{<:X,<:Y,<:Ti} @test refdims(zraster) == () - @test val.(span(ncarray)) == (2.5, 2.5, (nothing, nothing)) - @test typeof(lookup(ncarray)) <: Tuple{<:Mapped,<:Mapped,<:Sampled} + @test val.(span(zraster)) == (2.5, 2.5, (nothing, nothing)) + @test typeof(lookup(zraster)) <: Tuple{<:Mapped,<:Mapped,<:Sampled} end + @testset "other fields" begin @test ismissing(missingval(zraster)) - @test metadata(r)["original_name"] == "t2m" - @test metadata(zraster) isa Metadata{<:Rasters.CDMsource, Dict{String, Any}} + @test metadata(zraster)["original_name"] == "t2m" + @test metadata(zraster) isa Rasters.Metadata{<:Rasters.CDMsource, Dict{String, Any}} @test name(zraster) == :air_temperature_2m end @@ -69,4 +76,66 @@ end @test zraster[Ti(At(DateTime(1979,1,9))), X(At(-178.75)), Y(At(-88.75))] == -28.866226f0 end +@testset "CF conventions" begin + path = tempname() * ".zarr" + global_attrs = Dict( + "Conventions" => "CF-1.7", + "_FillValue" => -999.0, + "units" => "K", + "project" => "GOES", + ) + zg = zgroup(path; attrs = global_attrs) + x_attrs = Dict( + "units" => "rad", + "add_offset" => 0, + "_ARRAY_DIMENSIONS" => ["x"], + "axis" => "X", + "long_name" => "GOES fixed grid projection x-coordinate", + "scale_factor" => 1, + "_Netcdf4Dimid" => 1, + "standard_name" => "projection_x_coordinate", + "NAME" => "x", + ) + xs = zcreate(Float64, zg, "x", 100; attrs = x_attrs) + xs .= LinRange(-5.434177815866809e6, 5.322929839864517e6, 100) + + y_attrs = Dict( + "units" => "rad", + "add_offset" => 5.258454335331338e6, + "_ARRAY_DIMENSIONS" => ["y"], + "axis" => "Y", + "long_name" => "GOES fixed grid projection y-coordinate", + "scale_factor" => -1.0625617981243342e7, + "_Netcdf4Dimid" => 1, + "standard_name" => "projection_y_coordinate", + "NAME" => "y", + ) + ys = zcreate(Float64, zg, "y", 100; attrs = y_attrs) + ys .= LinRange(0, 1, 100) + + val_attrs = Dict( + "_Netcdf4Dimid" => 0, + "scale_factor" => 2.0, + "add_offset" => 180.0, + "_FillValue" => -1, + "units" => "K", + "_ARRAY_DIMENSIONS" => ["y", "x"], + "grid_mapping" => "goes_imager_projection", + "valid_range" => [0, -6], + ) + vals = zcreate(Float64, zg, "values", 100, 100; attrs = val_attrs) + vals .= (data = rand(100, 100)) + vals[1, 1] = 1.0 + vals[end, end] = 0.0 + + ra = Raster(path) + + @test extrema(ra) == (180.0, 182.0) # test scale and offset + @test Rasters.GeoInterface.extent(ra) == Rasters.GeoInterface.Extent(X = (-5.434177815866809e6, 5.322929839864517e6), Y = (-5.367163645912004e6, 5.258454335331338e6)) + @test Rasters.isreverse(ra.dims[2]) + @test Rasters.isforward(ra.dims[1]) + @test extrema(ra.dims[1]) == extrema(xs) + @test extrema(ra.dims[2]) == reverse(extrema(ys)) .* y_attrs["scale_factor"] .+ y_attrs["add_offset"] +end + end diff --git a/test/warp.jl b/test/warp.jl index 760a1b620..a2e09dd27 100644 --- a/test/warp.jl +++ b/test/warp.jl @@ -9,17 +9,29 @@ gdalpath = maybedownload(url) # test that warp actually does *something* r = Raster(gdalpath) crs_ = crs(r).val - warped = warp(r, Dict(:t_srs => "EPSG:25832")) - @test warped isa Raster - @test size(warped) == (720,721) - # the crs is way off, the image is rotated - all four corners should be black - @test warped[1,1] == warped[1,end] == warped[end,1] == warped[end,end] == 0 - # now compute mean squared error of the back transformation - warped_back = warp(warped, Dict(:t_srs => crs_)) - # get rid of black border - ex = extrema(findall(>(0), warped_back)) - cropped = warped_back[ex[1]:ex[2]] - # subtracting UInts brings us into hell -> Int - diff_ = parent(Int.(cropped[2:end-1, 2:end-1])) .- parent(r) - @test sum(x->x^2, diff_) / prod(size(diff_)) < 600 + @testset "default missing" begin + warped = warp(r, Dict(:t_srs => "EPSG:25832")) + @test warped isa Raster + @test size(warped) == (720, 721) + # the crs is rotated so the image is rotated an all four corners should be missing + missingval(warped) === missing + @test warped[1, 1] === warped[1, end] === warped[end, 1] === warped[end, end] === missing === missingval(warped) + end + + @testset "custom missing" begin + warped = warp(r, Dict(:t_srs => "EPSG:25832"); missingval=0xff => 0xff) + @test warped isa Raster + @test size(warped) == (720, 721) + # the crs is rotated so the image is rotated an all four corners should be missing + missingval(warped) === 0xff + @test warped[1, 1] === warped[1, end] === warped[end, 1] === warped[end, end] === 0xff === missingval(warped) + # now compute mean squared error of the back transformation + res = map(step, lookup(r)) + warped_back = Rasters.trim(warp(warped, Dict(:t_srs => crs_); res, missingval=0xff)) + # subtracting UInts brings us into hell -> Int + # we also need to shrink the range because of some bleed during warp + diff_ = parent(warped_back[2:end-1, 2:end-1]) .- r + + @test sum(x -> x^2, diff_) / prod(size(diff_)) < 600 + end end