From e0661d50d4df210f4e54c2e544b4b47990043bb1 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 18 Mar 2024 20:55:30 +0100 Subject: [PATCH 01/13] update cdm and bugfix (#607) * update cdm and bugfix * fix AbstractDiskArray * bugfix ncdatasets * return ncarray... * more cdm updates --- ext/RastersArchGDALExt/RastersArchGDALExt.jl | 4 +- ext/RastersArchGDALExt/gdal_source.jl | 28 ++- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 93 +++++--- src/Rasters.jl | 7 +- src/array.jl | 125 +++++++---- src/methods/shared_docstrings.jl | 19 ++ src/series.jl | 4 +- src/sources/commondatamodel.jl | 59 ++--- src/sources/grd.jl | 22 +- src/stack.jl | 212 ++++++++++++------ src/utils.jl | 1 + src/write.jl | 71 ++++-- test/sources/ncdatasets.jl | 2 +- test/sources/rasterdatasources.jl | 15 +- 14 files changed, 435 insertions(+), 227 deletions(-) diff --git a/ext/RastersArchGDALExt/RastersArchGDALExt.jl b/ext/RastersArchGDALExt/RastersArchGDALExt.jl index 315cfb19e..48b419318 100644 --- a/ext/RastersArchGDALExt/RastersArchGDALExt.jl +++ b/ext/RastersArchGDALExt/RastersArchGDALExt.jl @@ -16,11 +16,11 @@ using DimensionalData, using Rasters.Lookups using Rasters.Dimensions -using Rasters: GDALsource, AbstractProjected, RasterStackOrArray, FileArray, +using Rasters: GDALsource, AbstractProjected, RasterStackOrArray, FileArray, NoKW, RES_KEYWORD, SIZE_KEYWORD, CRS_KEYWORD, FILENAME_KEYWORD, SUFFIX_KEYWORD, EXPERIMENTAL, GDAL_EMPTY_TRANSFORM, GDAL_TOPLEFT_X, GDAL_WE_RES, GDAL_ROT1, GDAL_TOPLEFT_Y, GDAL_ROT2, GDAL_NS_RES -import Rasters: reproject, resample, warp, cellsize +import Rasters: reproject, resample, warp, cellsize, nokw const RA = Rasters const DD = DimensionalData diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index d879f4c52..57e52eedb 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -36,9 +36,9 @@ const GDAL_VIRTUAL_FILESYSTEMS = "/vsi" .* ( # Array ######################################################################## -function RA.FileArray{GDALsource}(raster::AG.RasterDataset{T}, filename; kw...) where {T} - eachchunk, haschunks = DA.eachchunk(raster), DA.haschunks(raster) - RA.FileArray{GDALsource,T,3}(filename, size(raster); eachchunk, haschunks, kw...) +function RA.FileArray{GDALsource}(ds::AG.RasterDataset{T}, filename; kw...) where {T} + eachchunk, haschunks = DA.eachchunk(ds), DA.haschunks(ds) + RA.FileArray{GDALsource,T,3}(filename, size(ds); eachchunk, haschunks, kw...) end RA.cleanreturn(A::AG.RasterDataset) = Array(A) @@ -46,15 +46,24 @@ RA.haslayers(::GDALsource) = false RA._sourcetrait(A::AG.RasterDataset) = GDALsource() """ - Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster; force=false, kw...) + Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster; kw...) Write a `Raster` to file using GDAL. +This method is called automatically if you `write` a `Raster` +with a `filename` extension that no other backend can read. + +GDAL is the fallback, and reads a lot of file types, but is not guaranteed to work. + # Keywords -- `driver`: A GDAL driver name or a GDAL driver retrieved via `ArchGDAL.getdriver(drivername)`. Guessed from the filename extension by default. -- `options::Dict{String,String}`: A dictionary containing the dataset creation options passed to the driver. For example: `Dict("COMPRESS"=>"DEFLATE")`\n - Valid options for the drivers can be looked up here: https://gdal.org/drivers/raster/index.html +$(RA.FORCE_KEYWORD) +- `driver`: A GDAL driver name or a GDAL driver retrieved via `ArchGDAL.getdriver(drivername)`. + Guessed from the filename extension by default. +- `options::Dict{String,String}`: A dictionary containing the dataset creation options passed to the driver. + For example: `Dict("COMPRESS" => "DEFLATE")`. + +Valid `options` for each specific `driver` can be looked up here: https://gdal.org/drivers/raster/index.html Returns `filename`. """ @@ -122,7 +131,7 @@ RA._open(f, ::GDALsource, ds::AG.RasterDataset; kw...) = RA.cleanreturn(f(ds)) # These methods are type piracy on DimensionalData/ArchGDAL and may have to move some day # We allow passing in crs and mappedcrs manually -function RA._dims(raster::AG.RasterDataset, crs=nothing, mappedcrs=nothing) +function RA._dims(raster::AG.RasterDataset, crs=nokw, mappedcrs=nokw) gt_dims = try AG.getgeotransform(raster) catch @@ -138,7 +147,8 @@ function RA._dims(raster::AG.RasterDataset, crs=nothing, mappedcrs=nothing) Band(Categorical(bandnames; order=Unordered())) end - crs = crs isa Nothing ? Rasters.crs(raster) : crs + crs = crs isa NoKW ? Rasters.crs(raster) : crs + mappedcrs = mappedcrs isa NoKW ? nothing : mappedcrs xy_metadata = metadata(raster) # Output Sampled index dims when the transformation is lat/lon alligned, diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 17c855c1e..4d107d101 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -4,15 +4,44 @@ const UNNAMED_NCD_FILE_KEY = "unnamed" const NCDAllowedType = Union{Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float32,Float64,Char,String} +const NCD_WRITE_KEYWORDS = """ +$(RA.FORCE_KEYWORD) +$(RA.VERBOSE_KEYWORD) +- `append`: If true, the variable of the current Raster will be appended + to `filename`, if it actually exists. + +The following keywords are passed to `NCDatasets.defVar`: + +- `chunksizes`: Vector of integers setting the chunk size. The total size of a + chunk must be less than 4 GiB. +- `deflatelevel`: Compression level: `0` (default) means no compression and `9` + means maximum compression. Each chunk will be compressed individually. +- `shuffle`: If `true`, the shuffle filter is activated which can improve the + compression ratio. +- `checksum`: The checksum method can be `:fletcher32` or `:nochecksum`, + the default. +- `typename`: The name of the NetCDF type required for vlen arrays + (https://web.archive.org/save/https://www.unidata.ucar.edu/software/netcdf/netcdf-4/newdocs/netcdf-c/nc_005fdef_005fvlen.html) +""" + """ Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster) Write an NCDarray to a NetCDF file using NCDatasets.jl +This method is called automatically if you `write` a `Raster` where `filename` +has the `.nc` extension. You do not need to invoke this method manually. + +## Keywords + +$NCD_WRITE_KEYWORDS + Returns `filename`. """ -function Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster; - append=false, force=false, verbose=true, kw... +function Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster; + append=false, + force=false, + kw... ) mode = if append isfile(filename) ? "a" : "c" @@ -35,37 +64,29 @@ end """ Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStack; kw...) -Write an NCDstack to a single netcdf file, using NCDatasets.jl. - -Currently `Metadata` is not handled for dimensions, and `Metadata` from other -[`AbstractRaster`](@ref) @types is ignored. +Write a `RasterStack` to a single netcdf file, using NCDatasets.jl. -# Keywords +## Keywords -Keywords are passed to `NCDatasets.defVar`. +$NCD_WRITE_KEYWORDS -- `append`: If true, the variable of the current Raster will be appended to - `filename`. Note that the variable of the current Raster should be not exist - before. If not, you need to set `append = false`. Rasters.jl can not - overwrite a previous existing variable. -- `fillvalue`: A value filled in the NetCDF file to indicate missing data. It - will be stored in the `_FillValue` attribute. -- `chunksizes`: Vector integers setting the chunk size. The total size of a - chunk must be less than 4 GiB. -- `deflatelevel`: Compression level: 0 (default) means no compression and 9 - means maximum compression. Each chunk will be compressed individually. -- `shuffle`: If true, the shuffle filter is activated which can improve the - compression ratio. -- `checksum`: The checksum method can be `:fletcher32` or `:nochecksum` - (checksumming is disabled, which is the default) - - `typename` (string): The name of the NetCDF type required for vlen arrays - (https://web.archive.org/save/https://www.unidata.ucar.edu/software/netcdf/netcdf-4/newdocs/netcdf-c/nc_005fdef_005fvlen.html) +Currently `Metadata` is not handled for individual dimensions, +and `Metadata` coming from other [`Source`](@ref) types is ignored. """ -function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStack; append = false, kw...) - mode = !isfile(filename) || !append ? "c" : "a"; +function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStack; + append=false, + force=false, + kw... +) + mode = if append + isfile(filename) ? "a" : "c" + else + RA.check_can_write(filename, force) + "c" + end ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(s))) try - map(key -> _writevar!(ds, s[key]), keys(s); kw...) + map(key -> _writevar!(ds, s[key]; kw...), keys(s)) finally close(ds) end @@ -86,13 +107,21 @@ function RA._open(f, ::NCDsource, filename::AbstractString; write=false, kw...) end # Add a var array to a dataset before writing it. -function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; kw...) where {T,N} +function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; + verbose=true, + missingval=missingval(A), + kw... +) where {T,N} _def_dim_var!(ds, A) attrib = RA._attribdict(metadata(A)) # Set _FillValue eltyp = Missings.nonmissingtype(T) - eltyp <: NCDAllowedType || throw(ArgumentError("$eltyp cannot be written to NetCDF, convert to one of $(Base.uniontypes(NCDAllowedType))")) - if ismissing(missingval(A)) + eltyp <: NCDAllowedType || throw(ArgumentError(""" + Element type $eltyp cannot be written to NetCDF. Convert it to one of $(Base.uniontypes(NCDAllowedType)), + usually by broadcasting the desired type constructor over the `Raster`, e.g. `newrast = Float32.(rast)`")) + """ + )) + if ismissing(missingval) fillval = if haskey(attrib, "_FillValue") && attrib["_FillValue"] isa eltyp attrib["_FillValue"] else @@ -101,9 +130,9 @@ function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; kw...) where {T attrib["_FillValue"] = fillval A = replace_missing(A, fillval) elseif missingval(A) isa T - attrib["_FillValue"] = missingval(A) + attrib["_FillValue"] = missingval else - missingval(A) isa Nothing || @warn "`missingval` $(missingval(A)) is not the same type as your data $T." + verbose && !(missingval isa Nothing) && @warn "`missingval` $(missingval) is not the same type as your data $T." end key = if string(DD.name(A)) == "" diff --git a/src/Rasters.jl b/src/Rasters.jl index daa7548ac..073ebe34a 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -89,12 +89,18 @@ const EXPERIMENTAL = """ const DEFAULT_POINT_ORDER = (X(), Y()) const DEFAULT_TABLE_DIM_KEYS = (:X, :Y) +# To use when `nothing` is a valid keyword value +struct NoKW end +const nokw = NoKW() + +include("methods/shared_docstrings.jl") include("lookup.jl") include("dimensions.jl") include("sources/sources.jl") include("filearray.jl") include("filestack.jl") include("openstack.jl") +include("sources/sources.jl") include("array.jl") include("stack.jl") include("series.jl") @@ -123,7 +129,6 @@ include("methods/burning/polygon.jl") include("methods/burning/extents.jl") include("methods/burning/utils.jl") -include("methods/shared_docstrings.jl") include("methods/mask.jl") include("methods/rasterize.jl") include("methods/aggregate.jl") diff --git a/src/array.jl b/src/array.jl index 30ce93375..30b145c4e 100644 --- a/src/array.jl +++ b/src/array.jl @@ -24,7 +24,8 @@ abstract type AbstractRaster{T,N,D,A} <: AbstractDimArray{T,N,D,A} end Returns the value representing missing data in the dataset """ function missingval end -missingval(x) = missing +missingval(_) = missing +missingval(::AbstractArray{T}) where T = Missing <: T ? missing : nothing missingval(A::AbstractRaster) = A.missingval # The filename might be buried somewhere in a DiskArray wrapper, so try to get it @@ -171,9 +172,9 @@ end """ Raster <: AbsractRaster - Raster(filepath::AbstractString, dims; kw...) - Raster(A::AbstractArray{T,N}, dims; kw...) - Raster(A::AbstractRaster; kw...) + Raster(filepath::String; kw...) + Raster(A::AbstractDimArray; kw...) + Raster(A::AbstractArray, dims; kw...) A generic [`AbstractRaster`](@ref) for spatial/raster array data. It may hold memory-backed arrays or [`FileArray`](@ref), that simply holds the `String` path @@ -181,35 +182,52 @@ to an unopened file. This will only be opened lazily when it is indexed with `ge or when `read(A)` is called. Broadcasting, taking a view, reversing and most other methods _do not_ load data from disk: they are applied later, lazily. +An `AbatractArray` for spatial/raster data. + +It may hold memory-backed arrays or, when `lazy=true` a [`FileArray`](@ref) +that simply holds the `String` path to an unopened file. + +WIth `lazy=true` the file will be opened lazily when it is indexed with `getindex` +or when `read(A)` is called. Broadcasting, taking a view, reversing and most other +methods _will not_ load data from disk: they are applied later, lazily. + +# Arguments + +- `dims`: `Tuple` of `Dimension`s needed when an `AbstractArray` is used. + # Keywords -- `dims`: `Tuple` of `Dimension`s for the array. -- `lazy`: A `Bool` specifying if to load the stack lazily from disk. `false` by default. -- `name`: `Symbol` name for the array, which will also retreive named layers if `Raster` - is used on a multi-layered file like a NetCDF. -- `missingval`: value reprsenting missing data, normally detected form the file. Set manually +- `name`: a `Symbol` name for the array, which will also retreive named layers if `Raster` + is used on a multi-layered file like a NetCDF. `name` becomes the layer name if the `Raster` + is combined into a `RasterStack`. +- `missingval`: value reprsenting missing data, normally detected from the file. Set manually when you know the value is not specified or is incorrect. This will *not* change any values in the raster, it simply assigns which value is treated as missing. To replace all of the missing values in the raster, use [`replace_missing`](@ref). -- `metadata`: `ArrayMetadata` object for the array, or `NoMetadata()`. +- `metadata`: `Dict` or `Metadata` object for the array, or `NoMetadata()`. - `crs`: the coordinate reference system of the objects `XDim`/`YDim` dimensions. Only set this if you know the detected crs is incrorrect, or it is not present in - the file. The `crs` is expected to be a GeoFormatTypes.jl `CRS` or `Mixed` `GeoFormat` type. + the file. The `crs` is expected to be a GeoFormatTypes.jl `CRS` or `Mixed` mode `GeoFormat` object, + like `EPSG(4326)`. - `mappedcrs`: the mapped coordinate reference system of the objects `XDim`/`YDim` dimensions. for `Mapped` lookups these are the actual values of the index. For `Projected` lookups this can be used to index in eg. `EPSG(4326)` lat/lon values, having it converted automatically. Only set this if the detected `mappedcrs` in incorrect, or the file does not have a `mappedcrs`, - e.g. a tiff. The `mappedcrs` is expected to be a GeoFormatTypes.jl `CRS` or `Mixed` `GeoFormat` type. -- `dropband`: drop single band dimensions. `true` by default. - -# Internal Keywords + e.g. a tiff. The `mappedcrs` is expected to be a GeoFormatTypes.jl `CRS` or `Mixed` mode `GeoFormat` type. +- `refdims`: `Tuple of` position `Dimension`s the array was sliced from, defaulting to `()`. + Usually not needed. -In some cases it is possible to set these keywords as well. +When a filepath `String` is used: +$DROPBAND_KEYWORD +$LAZY_KEYWORD +$SOURCE_KEYWORD +- `write`: defines the default `write` keyword value when calling `open` on the Raster. `false` by default. + Only makes sense to use when `lazy=true`. -- `data`: can replace the data in an `AbstractRaster` -- `refdims`: `Tuple of` position `Dimension`s the array was sliced from, defaulting to `()`. +When A is an `AbstractDimArray`: +- `data`: can replace the data in an existing `AbstractRaster` """ -struct Raster{T,N,D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me,Mi} <: AbstractRaster{T,N,D,A} +struct Raster{T,N,D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me,Mi<:Union{T,Nothing}} <: AbstractRaster{T,N,D,A} data::A dims::D refdims::R @@ -218,12 +236,16 @@ struct Raster{T,N,D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me,Mi} <: AbstractR missingval::Mi end function Raster(A::AbstractArray{T,N}, dims::Tuple; - refdims=(), name=Symbol(""), metadata=NoMetadata(), missingval=missing, - crs=nothing, mappedcrs=nothing + refdims=(), + name=Symbol(""), + metadata=NoMetadata(), + missingval=Missing <: T ? missing : nothing, + crs=nokw, + mappedcrs=nokw )::Raster{T,N} where {T,N} A = Raster(A, Dimensions.format(dims, A), refdims, name, metadata, missingval) - A = isnothing(crs) ? A : setcrs(A, crs) - A = isnothing(mappedcrs) ? A : setmappedcrs(A, mappedcrs) + A = crs isa NoKW ? A : setcrs(A, crs) + A = mappedcrs isa NoKW ? A : setmappedcrs(A, mappedcrs) return A end function Raster(A::AbstractArray{T,1}, dims::Tuple{<:Dimension,<:Dimension,Vararg}; @@ -231,54 +253,71 @@ function Raster(A::AbstractArray{T,1}, dims::Tuple{<:Dimension,<:Dimension,Varar )::Raster{T,length(dims)} where T Raster(reshape(A, map(length, dims)), dims; kw...) end -function Raster(table, dims::Tuple; name=first(_not_a_dimcol(table, dims)), kw...)::Raster +function Raster(table, dims::Tuple; + name=nokw, + kw... +) Tables.istable(table) || throw(ArgumentError("First argument to `Raster` is not a table or other known object: $table")) - isnothing(name) && throw(UndefKeywordError(:name)) + name = name isa NoKW ? first(_not_a_dimcol(table, dims)) : name cols = Tables.columns(table) A = reshape(cols[name], map(length, dims)) return Raster(A, dims; name, kw...) end Raster(A::AbstractArray; dims, kw...) = Raster(A, dims; kw...)::Raster function Raster(A::AbstractDimArray; - data=parent(A), dims=dims(A), refdims=refdims(A), - name=name(A), metadata=metadata(A), missingval=missingval(A), kw... + data=parent(A), + dims=dims(A), + refdims=refdims(A), + name=name(A), + metadata=metadata(A), + missingval=missingval(A), + kw... )::Raster return Raster(data, dims; refdims, name, metadata, missingval, kw...) end -function Raster(filename::AbstractString, dims::Tuple{<:Dimension,<:Dimension,Vararg}; kw...)::Raster +function Raster(filename::AbstractString, dims::Tuple{<:Dimension,<:Dimension,Vararg}; + kw... +)::Raster Raster(filename; dims, kw...) end -function Raster(filename::AbstractString; - name=nothing, key=name, source=nothing, kw... -)::Raster +function Raster(filename::AbstractString; source=nothing, kw...)::Raster source = _sourcetrait(filename, source) Base.invokelatest() do _open(filename; source) do ds - key = filekey(ds, key) - Raster(ds, filename, key; source, kw...) + Raster(ds, filename; source, kw...) end::Raster end::Raster end -function Raster(ds, filename::AbstractString, key=nothing; - crs=nothing, mappedcrs=nothing, dims=nothing, refdims=(), - name=Symbol(key isa Nothing ? "" : string(key)), - source=nothing, write=false, lazy=false, dropband=true, - metadata=_metadata(ds), missingval=missingval(ds) +function Raster(ds, filename::AbstractString; + crs=nokw, + mappedcrs=nokw, + dims=nokw, + refdims=(), + name=nokw, + metadata=nokw, + missingval=nokw, + source=nothing, + write=false, + lazy=false, + dropband=true, )::Raster + name1 = filekey(ds, name) source = _sourcetrait(filename, source) crs = defaultcrs(source, crs) mappedcrs = defaultmappedcrs(source, mappedcrs) - data, dims = _open(source, ds; key) do var - dims1 = dims isa Nothing ? _dims(var, crs, mappedcrs) : dims + data1, dims1, metadata1, missingval1 = _open(source, ds; key=name1) do var + dims1 = dims isa NoKW ? _dims(var, crs, mappedcrs) : dims + metadata1 = metadata isa NoKW ? _metadata(var) : metadata + missingval1 = missingval isa NoKW ? Rasters.missingval(var) : missingval data = if lazy - FileArray{typeof(source)}(var, filename; key, write) + FileArray{typeof(source)}(var, filename; key=name1, write) else _checkmem(var) Array(var) end - data, dims1 + data, dims1, metadata1, missingval1 end - raster = Raster(data, dims, refdims, name, metadata, missingval) + raster = Raster(data1, dims1, refdims, name1, metadata1, missingval1) return dropband ? _drop_single_band(raster, lazy) : raster end diff --git a/src/methods/shared_docstrings.jl b/src/methods/shared_docstrings.jl index bc8c1f912..0db255c1a 100644 --- a/src/methods/shared_docstrings.jl +++ b/src/methods/shared_docstrings.jl @@ -44,6 +44,9 @@ $CRS_KEYWORD $SHAPE_KEYWORDS """ +const LAZY_KEYWORD = """ +- `lazy`: A `Bool` specifying if to load data lazily from disk. `false` by default. +""" const FILENAME_KEYWORD = """ - `filename`: a filename to write to directly, useful for large files. """ @@ -57,3 +60,19 @@ const PROGRESS_KEYWORD = """ const VERBOSE_KEYWORD = """ - `vebose`: whether to print messages about potential problems. `true` by default. """ +const SOURCE_KEYWORD = """ +- `source`: Usually automatically detected from filepath extension. + To manually force, a `Symbol` can be passed `:gdal`, `:netcdf`, `:grd`, `:grib`. + The internal [`Rasters.Source`](@ref) objects, such as `Rasters.GDALsource()`, + `Rasters.GRIBsource()` or `Rasters.NCDsource()` can also be used. +""" +const EXT_KEYWORD = """ +- `ext`: filename extension such as ".tiff" or ".nc". + Used to specify specific files if only a directory path is used. +""" +const FORCE_KEYWORD = """ +- `force`: `false` by default. If `true` it force writing to a file destructively, even if it already exists. +""" +const DROPBAND_KEYWORD = """ +- `dropband`: drop single band dimensions when creating stacks from filenames. `true` by default. +""" diff --git a/src/series.jl b/src/series.jl index 4d9bb28a1..b6d0d8f4d 100644 --- a/src/series.jl +++ b/src/series.jl @@ -99,13 +99,11 @@ When loading a series from a Vector of `String` paths or a single `String` path: - `duplicate_first::Bool`: wether to duplicate the dimensions and metadata of the first file with all other files. This can save load time with a large series where dimensions are identical. `false` by default. -- `lazy`: load files lazily, `false` by default. +$LAZY_KEYWORD - `kw`: keywords passed to the child constructor [`Raster`](@ref) or [`RasterStack`](@ref). When loading a series from a single `String` path: -- `ext`: filename extension such as ".tiff" or ".nc". - Use to specify a subset of files if only a directory path is passed in. - `separator`: separator used to split lookup elements from the rest of a filename. '_' by default. diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 5b57ff8ab..e7edc387c 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -90,28 +90,7 @@ for method in (:attrib, :dim) end # Rasters methods for CDM types ############################### - -# This is usually called inside a closure and cleaned up in `cleanreturn` -function Raster(ds::AbstractDataset, filename::AbstractString, key::Nothing=nothing; - source=nothing, kw... -) - source = isnothing(source) ? _sourcetrait(filename) : _sourcetrait(source) - # Find the first valid variable - layers = _layers(ds) - for (key, var) in zip(layers.keys, layers.vars) - if ndims(var) > 0 - @info "No `name` or `key` keyword provided, using first valid layer with name `:$key`" - return Raster(CFDiskArray(var), filename, key; source, kw...) - end - end - throw(ArgumentError("dataset at $filename has no array variables")) -end -function Raster(ds::AbstractDataset, filename::AbstractString, key::Union{AbstractString,Symbol}; - source=nothing, kw... -) - return Raster(CFDiskArray(ds[key]), filename, key; source) -end - +# function FileArray{source}(var::AbstractVariable, filename::AbstractString; kw...) where source<:CDMsource eachchunk = DA.eachchunk(var) haschunks = DA.haschunks(var) @@ -137,12 +116,11 @@ function Base.open(f::Function, A::FileArray{source}; write=A.write, kw...) wher end end -function _open(f, ::CDMsource, ds::AbstractDataset; key=nothing, kw...) - x = key isa Nothing ? ds : CFDiskArray(ds[_firstkey(ds, key)]) +function _open(f, ::CDMsource, ds::AbstractDataset; key=nokw, kw...) + x = key isa NoKW ? ds : CFDiskArray(ds[_firstname(ds, key)]) cleanreturn(f(x)) end _open(f, ::CDMsource, var::CFDiskArray; kw...) = cleanreturn(f(var)) -# _open(f, ::CDMsource, var::CDM.CFVariable; kw...) = cleanreturn(f(CFDiskArray(var))) # TODO fix/test this for RasterStack function create(filename, source::CDMsource, T::Union{Type,Tuple}, dims::DimTuple; @@ -164,6 +142,7 @@ function create(filename, source::CDMsource, T::Union{Type,Tuple}, dims::DimTupl return Raster(filename; source=source, lazy) end +filekey(ds::AbstractDataset, key) = _firstname(ds, key) missingval(var::AbstractDataset) = missing missingval(var::AbstractVariable{T}) where T = missing isa T ? missing : nothing cleanreturn(A::AbstractVariable) = Array(A) @@ -171,23 +150,28 @@ haslayers(::CDMsource) = true defaultcrs(::CDMsource) = EPSG(4326) defaultmappedcrs(::CDMsource) = EPSG(4326) -function _layers(ds::AbstractDataset, ::Nothing=nothing) - dimkeys = CDM.dimnames(ds) - toremove = if "bnds" in dimkeys - dimkeys = setdiff(dimkeys, ("bnds",)) - boundskeys = String[] - for k in dimkeys +function _nondimnames(ds) + dimnames = CDM.dimnames(ds) + toremove = if "bnds" in dimnames + dimnames = setdiff(dimnames, ("bnds",)) + boundsnames = String[] + for k in dimnames var = ds[k] attr = CDM.attribs(var) if haskey(attr, "bounds") - push!(boundskeys, attr["bounds"]) + push!(boundsnames, attr["bounds"]) end end - union(dimkeys, boundskeys)::Vector{String} + union(dimnames, boundsnames)::Vector{String} else - dimkeys::Vector{String} + dimnames::Vector{String} end nondim = setdiff(keys(ds), toremove) + return nondim +end + +function _layers(ds::AbstractDataset, ::Nothing=nothing) + nondim = _nondimnames(ds) grid_mapping = String[] vars = map(k -> ds[k], nondim) attrs = map(CDM.attribs, vars) @@ -253,8 +237,9 @@ end # Utils ######################################################################## # TODO dont load all keys here with _layers -_firstkey(ds::AbstractDataset, key::Nothing=nothing) = Symbol(first(_layers(ds).keys)) -_firstkey(ds::AbstractDataset, key) = Symbol(key) +_firstname(ds::AbstractDataset, key) = Symbol(key) +_firstname(ds::AbstractDataset, key::NoKW=nokw) = + Symbol(first(_nondimnames(ds))) function _cdmdim(ds, dimname::Key, crs=nothing, mappedcrs=nothing) if haskey(ds, dimname) @@ -281,7 +266,7 @@ function _cdmfinddimlen(ds, dimname) return size(var)[findfirst(==(dimname), dimnames)] end end - return nothing + return nothsng end # Find the matching dimension constructor. If its an unknown name diff --git a/src/sources/grd.jl b/src/sources/grd.jl index f68a52ac5..191355d0d 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -38,7 +38,7 @@ end attrib(grd::GRDattrib) = grd.attrib filename(grd::GRDattrib) = grd.filename -filekey(grd::GRDattrib, key::Nothing) = get(attrib(grd), "layername", Symbol("")) +filekey(grd::GRDattrib, key::NoKW) = get(attrib(grd), "layername", Symbol("")) function _dims(grd::GRDattrib, crs=nothing, mappedcrs=nothing) attrib = grd.attrib @@ -142,15 +142,24 @@ end # Base methods """ - Base.write(filename::AbstractString, ::Type{GRDsource}, s::AbstractRaster; force=false) + Base.write(filename::AbstractString, ::Type{GRDsource}, s::AbstractRaster; kw...) -Write a `Raster` to a .grd file with a .gri header file. -The extension of `filename` will be ignored. +Write a `Raster` to a .grd file with a .gri header file. -Returns `filename`. +This method is called automatically if you `write` a `Raster` +with a `.grd` or `.gri` extension. + +## Keywords + +$FORCE_KEYWORD + +If this method is called directly the extension of `filename` will be ignored. + +Returns the base of `filename` with a `.grd` extension. """ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; - force=false, verbose=true, kw... + force=false, + kw... ) check_can_write(filename, force) A = _maybe_use_type_missingval(A, GRDsource()) @@ -221,7 +230,6 @@ function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) end end - function create(filename, ::GRDsource, T::Type, dims::DD.DimTuple; name="layer", metadata=nothing, missingval=nothing, keys=(name,), lazy=true, ) diff --git a/src/stack.jl b/src/stack.jl index 6599e4e5b..b9fe833d4 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -77,7 +77,6 @@ function DD.rebuild_from_arrays( end function DD.rebuild_from_arrays( s::AbstractRasterStack, das::NamedTuple{<:Any,<:Tuple{Vararg{AbstractDimArray}}}; - data=map(parent, das), refdims=refdims(s), metadata=DD.metadata(s), dims=nothing, @@ -85,7 +84,8 @@ function DD.rebuild_from_arrays( layermetadata=map(DD.metadata, das), missingval=map(missingval, das), ) - if isnothing(dims) + data = map(parent, das) + if dims isa Nothing # invokelatest avoids compiling this for other paths Base.invokelatest() do dims = DD.combinedims(collect(das)) @@ -108,7 +108,9 @@ function Base.getindex(s::AbstractRasterStack, key::Symbol) data_ = parent(s)[key] dims_ = dims(s, DD.layerdims(s, key)) metadata = DD.layermetadata(s, key) - Raster(data_, dims_, refdims(s), key, metadata, missingval(s, key)) + mv = missingval(s, key) + mv = mv isa eltype(data_) ? mv : nothing + Raster(data_, dims_, refdims(s), key, metadata, mv) end # Key + Index @propagate_inbounds @inline function Base.getindex(s::AbstractRasterStack, key::Symbol, i1, I...) @@ -124,18 +126,18 @@ end RasterStack(data...; name, kw...) RasterStack(data::Union{Vector,Tuple}; name, kw...) RasterStack(data::NamedTuple; kw...)) - RasterStack(s::AbstractRasterStack; kw...) - RasterStack(s::AbstractRaster; layersfrom=Band, kw...) - RasterStack(filename::AbstractString; kw...) + RasterStack(data::RasterStack; kw...) + RasterStack(data::Raster; layersfrom=Band, kw...) + RasterStack(filepath::AbstractString; kw...) Load a file path or a `NamedTuple` of paths as a `RasterStack`, or convert arguments, a `Vector` or `NamedTuple` of `Raster`s to `RasterStack`. # Arguments -- `data`: A `NamedTuple` of [`Raster`](@ref)s, or a `Vector`, `Tuple` or splatted arguments - of [`Raster`](@ref). The latter options must pass a `name` keyword argument. -- `filename`: A file (such as netcdf or tif) to be loaded as a stack, or a directory path +- `data`: A `NamedTuple` of [`Raster`](@ref)s or `String`, or a `Vector`, `Tuple` or splatted + arguments of [`Raster`](@ref). The latter options must pass a `name` keyword argument. +- `filepath`: A file (such as netcdf or tif) to be loaded as a stack, or a directory path containing multiple files. # Keywords @@ -143,12 +145,30 @@ Load a file path or a `NamedTuple` of paths as a `RasterStack`, or convert argum - `name`: Used as stack layer names when a `Tuple`, `Vector` or splat of `Raster` is passed in. Has no effect when `NameTuple` is used - the `NamedTuple` keys are the layer names. - `metadata`: A `Dict` or `DimensionalData.Metadata` object. +- `missingval`: a single value for all layers or a `NamedTuple` of + missingval for each layer. `nothing` specifies no missing value. +- `crs`: the coordinate reference system of the objects `XDim`/`YDim` dimensions. + Only set this if you know the detected crs is incrorrect, or it is not present in + the file. The `crs` is expected to be a GeoFormatTypes.jl `CRS` or `Mixed` mode `GeoFormat` object, + like `EPSG(4326)`. +- `mappedcrs`: the mapped coordinate reference system of the objects `XDim`/`YDim` dimensions. + for `Mapped` lookups these are the actual values of the index. For `Projected` lookups + this can be used to index in eg. `EPSG(4326)` lat/lon values, having it converted automatically. + Only set this if the detected `mappedcrs` in incorrect, or the file does not have a `mappedcrs`, + e.g. a tiff. The `mappedcrs` is expected to be a GeoFormatTypes.jl `CRS` or `Mixed` mode `GeoFormat` type. - `refdims`: `Tuple` of `Dimension` that the stack was sliced from. + +For when one or multiple filepaths are used: + +$DROPBAND_KEYWORD +$LAZY_KEYWORD +$SOURCE_KEYWORD + +For when a single `Raster` is used: + - `layersfrom`: `Dimension` to source stack layers from if the file is not already multi-layered. `nothing` is default, so that a single `RasterStack(raster)` is a single layered stack. `RasterStack(raster; layersfrom=Band)` will use the bands as layers. -- `lazy`: A `Bool` specifying whether to load the stack lazily from disk. `false` by default. -- `dropband`: drop single band dimensions when creating stacks from filenames. `true` by default. ```julia files = (temp="temp.tif", pressure="pressure.tif", relhum="relhum.tif") @@ -168,12 +188,18 @@ end # Multi-file stack from strings function RasterStack( filenames::Union{AbstractArray{<:AbstractString},Tuple{<:AbstractString,Vararg}}; - name=map(filekey, filenames), keys=name, kw... + name=map(filekey, filenames), + kw... ) - RasterStack(NamedTuple{cleankeys(Tuple(keys))}(filenames); kw...) + RasterStack(NamedTuple{cleankeys(Tuple(name))}(filenames); kw...) end function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; - crs=nothing, mappedcrs=nothing, source=nothing, lazy=false, dropband=true, kw... + lazy=false, + dropband=true, + source=nothing, + crs=nokw, + mappedcrs=nokw, + kw... ) where K layers = map(keys(filenames), values(filenames)) do key, fn source = _sourcetrait(fn, source) @@ -192,19 +218,25 @@ function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; end md = _metadata(ds) mv = missingval(ds) - raster = Raster(data, dims; name=key, metadata=md, missingval=mv) + raster = Raster(data, dims; name=key, metadata=md, missingval=mv) return dropband ? _drop_single_band(raster, lazy) : raster end end RasterStack(NamedTuple{K}(layers); kw...) end -# Multi Raster stack from Tuple of AbstractArray -function RasterStack(data::Tuple{Vararg{<:AbstractArray}}, dims::Tuple; name=nothing, keys=name, kw...) +# Convert Tuple/Array of array to NamedTuples using name/key +function RasterStack(data::Tuple{Vararg{<:AbstractArray}}, dims::Tuple; + name::Union{Tuple,AbstractArray,NamedTuple,Nothing}=nothing, + kw... +) + isnothing(name) && throw(ArgumentError("pass a Tuple, Array or NamedTuple of names to `name`")) return RasterStack(NamedTuple{cleankeys(keys)}(data), dims; kw...) end # Multi Raster stack from NamedTuple of AbstractArray function RasterStack(data::NamedTuple{<:Any,<:Tuple{Vararg{<:AbstractArray}}}, dims::Tuple; kw...) # TODO: make this more sophisticated and match dimension length to axes? + # We dont worry about Raster keywords because these rasters will be deconstructed + # again later, and `kw` will define the RasterStack keywords layers = map(data) do A Raster(A, dims[1:ndims(A)]) end @@ -214,29 +246,44 @@ end RasterStack(layers::AbstractDimArray...; kw...) = RasterStack(layers; kw...) # Multi Raster stack from tuple with `keys` keyword function RasterStack(layers::Tuple{Vararg{<:AbstractRaster}}; - name=map(name, layers), keys=name, kw... + name=map(name, layers), + kw... ) - RasterStack(NamedTuple{cleankeys(keys)}(layers); kw...) + RasterStack(NamedTuple{cleankeys(name)}(layers); kw...) end -# Multi Raster stack from NamedTuple -function RasterStack(layers::NamedTuple{<:Any,<:Tuple{Vararg{<:AbstractRaster}}}; - resize=nothing, refdims=(), metadata=NoMetadata(), kw... -) - # resize if not matching sizes - resize can be `crop`, `resample` or `extend` - layers = resize isa Nothing ? layers : resize(layers) - # DD.comparedims(layers...) - dims=DD.combinedims(layers...) - data=map(parent, layers) - layerdims=map(DD.basedims, layers) - layermetadata=map(DD.metadata, layers) - missingval=map(Rasters.missingval, layers) +# Multi RasterStack from NamedTuple +# This method is called after most other RasterStack methods. +function RasterStack(layers::NamedTuple{K,<:Tuple{Vararg{<:AbstractRaster}}}; + resize::Union{Function,Nothing}=nothing, + _layers=resize isa Nothing ? layers : resize(layers), + dims::Tuple=DD.combinedims(_layers...), + refdims::Tuple=(), + metadata=NoMetadata(), + missingval=map(Rasters.missingval, _layers), + layermetadata=map(DD.metadata, _layers), + layerdims::NamedTuple{K}=map(DD.basedims, _layers), +) where K + # Handle values that musbe be `NamedTuple` + layermetadata = if layermetadata isa NamedTuple + layermetadata + elseif layermetadata isa Union{Nothing,NoMetadata} + map(_ -> NoMetadata(), layers) + else + throw(ArgumentError("$layermetadata is not a valid input for `layermetadata`. Try a `NamedTuple` of `Dict`, `MetaData` or `NoMetadata`")) + end + missingval = missingval isa NoKW ? nothing : missingval + data = map(parent, _layers) return RasterStack( data, dims, refdims, layerdims, metadata, layermetadata, missingval ) end # Stack from a String function RasterStack(filename::AbstractString; - source=nothing, name=nothing, keys=name, lazy=false, dropband=true, kw... + lazy::Bool=false, + dropband::Bool=true, + source::Union{Symbol,Source,Nothing}=nothing, + name=nothing, + kw... ) source = _sourcetrait(filename, source) st = if isdir(filename) @@ -244,22 +291,22 @@ function RasterStack(filename::AbstractString; filenames = readdir(filename) length(filenames) > 0 || throw(ArgumentError("No files in directory $filename")) # Detect keys from names - keys = if isnothing(keys) + name = if isnothing(name) all_shared = true stripped = lstrip.(x -> x in (" ", "_"), (x -> x[1:end]).(filenames)) Symbol.(replace.(first.(splitext.(stripped)), Ref(" " => "_"))) else - keys + name end RasterStack(joinpath.(Ref(filename), filenames); lazy, kw...) else # Load as a single file if haslayers(source) # With multiple named layers - l_st = _layer_stack(filename; source, name, keys, lazy, kw...) + l_st = _layer_stack(filename; source, name, lazy, kw...) # Maybe split the stack into separate arrays to remove extra dims. - if !(keys isa Nothing) + if !(name isa Nothing) map(identity, l_st) else l_st @@ -283,22 +330,31 @@ function RasterStack(filename::AbstractString; end function _layer_stack(filename; - dims=nothing, refdims=(), metadata=nothing, crs=nothing, mappedcrs=nothing, - layerdims=nothing, layermetadata=nothing, missingval=nothing, - source=nothing, name=nothing, keys=name, resize=nothing, lazy=false, kw... + source=nokw, + dims=nokw, + refdims=(), + name=nokw, + metadata=nokw, + layerdims=nokw, + layermetadata=nokw, + missingval=nokw, + crs=nokw, + mappedcrs=nokw, + lazy=false, + kw... ) crs = defaultcrs(source, crs) mappedcrs = defaultmappedcrs(source, mappedcrs) data, field_kw = _open(filename; source) do ds - layers = _layers(ds, keys) + layers = _layers(ds, name) # Create a Dict of dimkey => Dimension to use in `dim` and `layerdims` dimdict = _dimdict(ds, crs, mappedcrs) refdims = refdims == () || refdims isa Nothing ? () : refdims - metadata = metadata isa Nothing ? _metadata(ds) : metadata - layerdims = layerdims isa Nothing ? _layerdims(ds; layers, dimdict) : layerdims - dims = _sort_by_layerdims(dims isa Nothing ? _dims(ds, dimdict) : dims, layerdims) - layermetadata = layermetadata isa Nothing ? _layermetadata(ds; layers) : layermetadata - missingval = missingval isa Nothing ? Rasters.missingval(ds) : missingval + metadata = metadata isa NoKW ? _metadata(ds) : metadata + layerdims = layerdims isa NoKW ? _layerdims(ds; layers, dimdict) : layerdims + dims = _sort_by_layerdims(dims isa NoKW ? _dims(ds, dimdict) : dims, layerdims) + layermetadata = layermetadata isa NoKW ? _layermetadata(ds; layers) : layermetadata + missingval = missingval isa NoKW ? Rasters.missingval(ds) : missingval tuplekeys = Tuple(map(Symbol, layers.keys)) data = if lazy FileStack{typeof(source)}(ds, filename; keys=tuplekeys, vars=Tuple(layers.vars)) @@ -355,57 +411,73 @@ end # Stack from a Raster function RasterStack(A::Raster; - layersfrom=nothing, name=nothing, keys=name, metadata=metadata(A), refdims=refdims(A), kw... + layersfrom=nothing, + name=nothing, + refdims::Tuple=refdims(A), + metadata=metadata(A), + kw... ) - keys = keys isa Union{AbstractString,Symbol,Name} ? (keys,) : keys + name = name isa Union{AbstractString,Symbol,Name} ? (name,) : name layers = if isnothing(layersfrom) - keys = if keys isa Nothing - keys = DD.name(A) in (NoName(), Symbol(""), Name(Symbol(""))) ? ("layer1",) : DD.name(A) + name = if name isa Nothing + name = DD.name(A) in (NoName(), Symbol(""), Name(Symbol(""))) ? ("layer1",) : DD.name(A) else - keys + name end - NamedTuple{cleankeys(keys)}((A,)) + NamedTuple{cleankeys(name)}((A,)) else - keys = keys isa Nothing ? _layerkeysfromdim(A, layersfrom) : keys + name = name isa Nothing ? _layerkeysfromdim(A, layersfrom) : name slices = slice(A, layersfrom) - NamedTuple{cleankeys(keys)}(Tuple(slices)) + NamedTuple{cleankeys(name)}(slices) end - return RasterStack(layers; refdims=refdims, metadata=metadata, kw...) + return RasterStack(layers; refdims, metadata, kw...) end # Stack from stack and dims args RasterStack(st::AbstractRasterStack, dims::Tuple; kw...) = RasterStack(st; dims, kw...) # Stack from table and dims args -function RasterStack(table, dims::Tuple; name=_not_a_dimcol(table, dims), keys=name, kw...) +function RasterStack(table, dims::Tuple; + name=_not_a_dimcol(table, dims), + kw... +) Tables.istable(table) || throw(ArgumentError("object $(typeof(table)) is not a valid input to `RasterStack`")) - # TODO use `name` everywhere, not keys - if keys isa Symbol - col = Tables.getcolumn(table, keys) - layers = NamedTuple{(keys,)}((reshape(col, map(length, dims)),)) + if name isa Symbol + col = Tables.getcolumn(table, name) + layers = NamedTuple{(name,)}((reshape(col, map(length, dims)),)) else - layers = map(keys) do k + layers = map(name) do k col = Tables.getcolumn(table, k) reshape(col, map(length, dims)) - end |> NamedTuple{keys} + end |> NamedTuple{name} end return RasterStack(layers, dims; kw...) end # Rebuild from internals function RasterStack( data::Union{FileStack,OpenStack,NamedTuple{<:Any,<:Tuple{Vararg{<:AbstractArray}}}}; - dims, refdims=(), layerdims, metadata=NoMetadata(), layermetadata, missingval) + dims::Tuple, + refdims::Tuple=(), + layerdims, + metadata=NoMetadata(), + layermetadata, + missingval, +) return RasterStack( data, dims, refdims, layerdims, metadata, layermetadata, missingval ) end # RasterStack from another stack -function RasterStack(s::AbstractDimStack; name=cleankeys(Base.keys(s)), keys=name, - data=NamedTuple{keys}(s[key] for key in keys), - dims=dims(s), refdims=refdims(s), layerdims=DD.layerdims(s), - metadata=metadata(s), layermetadata=DD.layermetadata(s), +function RasterStack(s::AbstractDimStack; + name::Tuple=Base.keys(s), + data::NamedTuple=NamedTuple{name}(s[n] for n in name), + dims::Tuple=dims(s), + refdims::Tuple=refdims(s), + layerdims=DD.layerdims(s), + metadata=metadata(s), + layermetadata=DD.layermetadata(s), missingval=missingval(s) ) st = RasterStack( - data, DD.dims(s), refdims, layerdims, metadata, layermetadata, missingval + data, dims, refdims, layerdims, metadata, layermetadata, missingval ) # TODO This is a bit of a hack, it should use `formatdims`. return set(st, dims...) @@ -449,7 +521,7 @@ function _layerkeysfromdim(A, dim) hasdim(A, dim) || throw(ArgumentError("`layersrom` dim `$(dim2key(dim))` not found in `$(map(basetypeof, dims(A)))`")) vals = parent(lookup(A, dim)) l = length(vals) - if l > MAX_STACK_SIZE + if l > MAX_STACK_SIZE D = basetypeof(dim) options = map(basetypeof, dims(otherdims(A, dim), d -> length(d) <= MAX_STACK_SIZE)) toolong = "Lookup of `layersfrom=$D` is too long to use for stack layers: $l" @@ -473,8 +545,8 @@ Base.convert(::Type{RasterStack}, src::AbstractDimStack) = RasterStack(src) Raster(stack::RasterStack) = cat(values(stack)...; dims=Band([keys(stack)...])) defaultcrs(::Source, crs) = crs -defaultcrs(s::Source, ::Nothing) = defaultcrs(s) +defaultcrs(s::Source, ::NoKW) = defaultcrs(s) defaultcrs(x) = nothing defaultmappedcrs(::Source, crs) = crs -defaultmappedcrs(s::Source, ::Nothing) = defaultmappedcrs(s) +defaultmappedcrs(s::Source, ::NoKW) = defaultmappedcrs(s) defaultmappedcrs(::Source) = nothing diff --git a/src/utils.jl b/src/utils.jl index 0ba449b9a..07863e92e 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,3 +1,4 @@ + filter_ext(path, ext::AbstractString) = filter(fn -> splitext(fn)[2] == ext, readdir(path; join=true)) filter_ext(path, exts::Union{Tuple,AbstractArray}) = diff --git a/src/write.jl b/src/write.jl index 59cfd0a0c..277e9ff5e 100644 --- a/src/write.jl +++ b/src/write.jl @@ -1,16 +1,26 @@ + """ - Base.write(filename::AbstractString, A::AbstractRaster; kw...) + Base.write(filename::AbstractString, A::AbstractRaster; [source], kw...) + +Write an [`AbstractRaster`](@ref) to file, guessing the backend from the +file extension or using the `source` keyword. -Write an [`AbstractRaster`](@ref) to file, guessing the backend from the file extension. +# Keywords -Keyword arguments are passed to the `write` method for the backend. +$FORCE_KEYWORD +$SOURCE_KEYWORD + +Other keyword arguments are passed to the `write` method for the backend. """ -function Base.write( - filename::AbstractString, A::AbstractRaster; source=_sourcetrait(filename), kw... +function Base.write( filename::AbstractString, A::AbstractRaster; + source=_sourcetype(filename), + kw... ) + source = _sourcetype(source) write(filename, source, A; kw...) end Base.write(A::AbstractRaster; kw...) = write(filename(A), A; kw...) +# Fallback function Base.write( filename::AbstractString, source::Source, A::Union{AbstractRaster,AbstractRasterStack}; kw... ) @@ -19,21 +29,32 @@ function Base.write( end """ - Base.write(filename::AbstractString, s::AbstractRasterStack; suffix, kw...) + Base.write(filename::AbstractString, s::AbstractRasterStack; kw...) -Write any [`AbstractRasterStack`](@ref) to file, guessing the backend from the file extension. +Write any [`AbstractRasterStack`](@ref) to one or multiple files, +depending on the backend. Backend is guessed from the filename extension +or forced with the `source` keyword. ## Keywords -- `suffix`: suffix to append to file names. By default the layer key is used. +$EXT_KEYWORD +$FORCE_KEYWORD +$SOURCE_KEYWORD +$SUFFIX_KEYWORD +$VERBOSE_KEYWORD Other keyword arguments are passed to the `write` method for the backend. If the source can't be saved as a stack-like object, individual array layers will be saved. """ function Base.write(path::AbstractString, s::AbstractRasterStack; - suffix=nothing, ext=nothing, source=_sourcetrait(path, ext), verbose=true, kw... + suffix=nothing, + ext=nothing, + source=_sourcetype(path, ext), + verbose=true, + kw... ) + source = _sourcetype(source) if haslayers(source) write(path, source, s; kw...) else @@ -52,10 +73,10 @@ function Base.write(path::AbstractString, s::AbstractRasterStack; suffix end if verbose - @warn string("Cannot write stacks to \"", ext, "\", writing layers as individual files") + @warn string("Cannot write complete stacks to \"", ext, "\", writing layers as individual files") end - map(keys(s), suffix1) do key, sfx - fn = string(base, sfx, ext) + map(keys(s)) do key + fn = string(base, suffix1, ext) write(fn, source, s[key]; kw...) end |> NamedTuple{keys(s)} end @@ -64,22 +85,31 @@ end """ Base.write(filepath::AbstractString, s::AbstractRasterSeries; kw...) -Write any [`AbstractRasterSeries`](@ref) to file, guessing the backend from the file extension. +Write any [`AbstractRasterSeries`](@ref) to multiple files, +guessing the backend from the file extension. The lookup values of the series will be appended to the filepath (before the extension), separated by underscores. ## Keywords -See other docs for `write`. All keywords are passed through to `Raster` and `RasterStack` methods. +$EXT_KEYWORD +$FORCE_KEYWORD +$SOURCE_KEYWORD +$VERBOSE_KEYWORD + +See specific docs for `write` for eg. gdal or netcdf keywords, or for `RasterStack`. +All keywords are passed through to these `Raster` and `RasterStack` methods. """ -function Base.write( - path::AbstractString, A::AbstractRasterSeries; - ext=nothing, source=_sourcetrait(path, ext), kw... +function Base.write(path::AbstractString, A::AbstractRasterSeries; + ext=nothing, + source=_sourcetype(path, ext), + verbose=true, + kw... ) - base, name_ext = splitext(path) - ext = isnothing(ext) ? name_ext : ext - verbose = true + source = _sourcetype(source) + base, path_ext = splitext(path) + ext = isnothing(ext) ? path_ext : ext map(A, DimPoints(A)) do raster, p lookupstring = join(map(string, p), "_") written_paths = if raster isa RasterStack && !haslayers(source) @@ -93,6 +123,7 @@ function Base.write( slice_filename = joinpath(dirpath, string(filename, ext)) write(slice_filename, raster; source, verbose, kw...) end + # Don't print warnings for every slice of the series verbose = false written_paths end diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 1ff2dde03..3cdf15521 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -368,7 +368,7 @@ end end @testset "Single file stack" begin - @time ncstack = RasterStack(ncmulti); + @time ncstack = RasterStack(ncmulti) @testset "lazyness" begin @time read(RasterStack(ncmulti)); diff --git a/test/sources/rasterdatasources.jl b/test/sources/rasterdatasources.jl index 6edb46c63..1f038a8c6 100644 --- a/test/sources/rasterdatasources.jl +++ b/test/sources/rasterdatasources.jl @@ -34,13 +34,24 @@ end end @testset "load CHELSA BioClim" begin - A = Raster(CHELSA{BioClim}, 1; mappedcrs=EPSG(4326)) + A = Raster(CHELSA{BioClim}, 1; lazy=true) @test Rasters.name(A) == :bio1 - st = RasterStack(CHELSA{BioClim}, (:bio1, :BIO2)) + st = RasterStack(CHELSA{BioClim}, (:bio1, :BIO2); lazy=true) @test keys(st) == (:bio1, :bio2) @test A isa Raster @test st isa RasterStack @test st[:bio2] isa Raster + # Allow forcing keywords + st = RasterStack(CHELSA{BioClim}, (1, 2); + lazy=true, + missingval=-9999, + metadata=Rasters.NoMetadata(), + crs=nothing, + mappedcrs=EPSG(4326), + ) + @test missingval(st) == -9999 + @test missingval(st.bio1) == -9999 + @test metadata(st) == Rasters.NoMetadata() end @testset "load EarthEnv HabitatHeterogeneity" begin From 4e91cd4ece0c29e093151237bfacdc19499ad246 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Fri, 29 Mar 2024 00:45:28 +0100 Subject: [PATCH 02/13] test raster keywords --- ext/RastersArchGDALExt/gdal_source.jl | 54 ++- .../RastersNCDatasetsExt.jl | 2 +- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 14 +- src/Rasters.jl | 1 - src/array.jl | 69 +++- src/filearray.jl | 30 +- src/filestack.jl | 29 +- src/lookup.jl | 28 +- src/methods/aggregate.jl | 2 +- src/methods/crop_extend.jl | 7 +- src/methods/mosaic.jl | 2 +- src/series.jl | 4 +- src/sources/commondatamodel.jl | 41 +- src/sources/grd.jl | 101 ++--- src/sources/sources.jl | 10 +- src/stack.jl | 351 +++++++++--------- src/utils.jl | 6 +- src/write.jl | 16 +- test/aggregate.jl | 6 +- test/array.jl | 16 +- test/methods.jl | 13 +- test/series.jl | 72 ++-- test/sources/gdal.jl | 26 +- test/sources/grd.jl | 48 ++- test/sources/ncdatasets.jl | 20 +- test/stack.jl | 132 +++---- 26 files changed, 622 insertions(+), 478 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 57e52eedb..88c0737b0 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -69,11 +69,14 @@ Returns `filename`. """ function Base.write( filename::AbstractString, ::GDALsource, A::AbstractRaster{T}; - force=false, verbose=true, kw... + force=false, + verbose=true, + missingval=nokw, + kw... ) where T RA.check_can_write(filename, force) - A1 = _maybe_correct_to_write(A) - _create_with_driver(filename, dims(A1), eltype(A1), missingval(A1); _block_template=A1, kw...) do dataset + A1 = _maybe_correct_to_write(A, missingval) + _create_with_driver(filename, dims(A1), eltype(A1), Rasters.missingval(A1); _block_template=A1, kw...) do dataset verbose && _maybe_warn_south_up(A, verbose, "Writing South-up. Use `reverse(x; dims=Y)` first to write conventional North-up") open(A1; write=true) do O AG.RasterDataset(dataset) .= parent(O) @@ -83,7 +86,12 @@ function Base.write( end function RA.create(filename, ::GDALsource, T::Type, dims::DD.DimTuple; - missingval=nothing, metadata=nothing, name=nothing, lazy=true, verbose=true, kw... + missingval=nokw, + metadata=nokw, + name=nokw, + lazy=true, + verbose=true, + kw... ) T = Missings.nonmissingtype(T) missingval = ismissing(missingval) ? RA._writeable_missing(T) : missingval @@ -92,14 +100,17 @@ function RA.create(filename, ::GDALsource, T::Type, dims::DD.DimTuple; nothing end - return Raster(filename; source=GDALsource(), name, lazy, dropband=!hasdim(dims, Band)) + return Raster(filename; source=GDALsource(), name, lazy, metadata, dropband=!hasdim(dims, Band)) end function _maybe_warn_south_up(A, verbose, msg) verbose && lookup(A, Y) isa AbstractSampled && order(A, Y) isa ForwardOrdered && @warn msg end -function RA._open(f, ::GDALsource, filename::AbstractString; write=false, kw...) +function RA._open(f, ::GDALsource, filename::AbstractString; + write=false, + kw... +) # Check the file actually exists because the GDAL error is unhelpful if !isfile(filename) # Allow gdal virtual file systems @@ -241,22 +252,22 @@ end RA.Raster(ds::AG.Dataset; kw...) = Raster(AG.RasterDataset(ds); kw...) function RA.Raster(ds::AG.RasterDataset; crs=crs(ds), - mappedcrs=nothing, + mappedcrs=nokw, dims=RA._dims(ds, crs, mappedcrs), refdims=(), - name=Symbol(""), + name=nokw, metadata=RA._metadata(ds), missingval=RA.missingval(ds), lazy=false, dropband=false ) - args = dims, refdims, name, metadata, missingval + kw = (; dims, refdims, name, metadata, missingval) filelist = AG.filelist(ds) raster = if lazy && length(filelist) > 0 filename = first(filelist) - A = Raster(FileArray{GDALsource}(ds, filename), args...) + Raster(FileArray{GDALsource}(ds, filename), dims; kw...) else - Raster(Array(ds), args...) + Raster(Array(ds), dims; kw...) end return dropband ? RA._drop_single_band(raster, lazy) : raster end @@ -331,10 +342,13 @@ end _missingval_from_gdal(T, x) = x # Fix array and dimension configuration before writing with GDAL -_maybe_correct_to_write(A) = _maybe_correct_to_write(lookup(A, X()), A) -_maybe_correct_to_write(lookup, A) = A -function _maybe_correct_to_write(lookup::Union{AbstractSampled,NoLookup}, A) - RA._maybe_use_type_missingval(A, GDALsource()) |> _maybe_permute_to_gdal +_maybe_correct_to_write(A::AbstractDimArray, args...) = + _maybe_correct_to_write(lookup(A, X()), A, args...) +_maybe_correct_to_write(::Lookup, A::AbstractDimArray, args...) = A +function _maybe_correct_to_write( + lookup::Union{AbstractSampled,NoLookup}, A::AbstractDimArray, args... +) + RA._maybe_use_type_missingval(A, GDALsource(), args...) |> _maybe_permute_to_gdal end _check_driver(filename::Nothing, driver) = "MEM" @@ -354,8 +368,12 @@ end # Handle creating a dataset with any driver, # applying the function `f` to the created dataset -function _create_with_driver(f, filename, dims, T, missingval; - options=Dict{String,String}(), driver="", _block_template=nothing, kw... +function _create_with_driver(f, filename, dims::Tuple, T, missingval; + options=Dict{String,String}(), + driver="", + _block_template=nothing, + chunks=nokw, + kw... ) _gdal_validate(dims) @@ -381,7 +399,7 @@ function _create_with_driver(f, filename, dims, T, missingval; else # Create a tif and copy it to `filename`, as ArchGDAL.create # does not support direct creation of ASCII etc. rasters - tif_options_vec = _process_options("GTiff", Dict{String,String}(); _block_template) + tif_options_vec = _process_options("GTiff", Dict{String,String}(); chunks, _block_template) tif_driver = AG.getdriver("GTiff") tif_name = tempname() * ".tif" AG.create(tif_name; driver=tif_driver, options=tif_options_vec, create_kw...) do dataset diff --git a/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl b/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl index 5c323974d..c9d475ec6 100644 --- a/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl +++ b/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl @@ -18,7 +18,7 @@ using Dates, using Rasters.Lookups using Rasters.Dimensions -using Rasters: CDMsource, NCDsource +using Rasters: CDMsource, NCDsource, nokw, NoKW using CommonDataModel: AbstractDataset diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 4d107d101..4a14e0cbf 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -76,6 +76,7 @@ and `Metadata` coming from other [`Source`](@ref) types is ignored. function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStack; append=false, force=false, + missingval=nokw, kw... ) mode = if append @@ -86,7 +87,11 @@ function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStac end ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(s))) try - map(key -> _writevar!(ds, s[key]; kw...), keys(s)) + if missingval isa NamedTuple + map(k -> _writevar!(ds, s[k]; missinval=missingval[k], kw...), keys(s)) + else + map(k -> _writevar!(ds, s[k]; missingval, kw...), keys(s)) + end finally close(ds) end @@ -109,9 +114,10 @@ end # Add a var array to a dataset before writing it. function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; verbose=true, - missingval=missingval(A), + missingval=nokw, kw... ) where {T,N} + missingval = missingval isa NoKW ? Rasters.missingval(A) : missingval _def_dim_var!(ds, A) attrib = RA._attribdict(metadata(A)) # Set _FillValue @@ -120,7 +126,7 @@ function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; Element type $eltyp cannot be written to NetCDF. Convert it to one of $(Base.uniontypes(NCDAllowedType)), usually by broadcasting the desired type constructor over the `Raster`, e.g. `newrast = Float32.(rast)`")) """ - )) + )) if ismissing(missingval) fillval = if haskey(attrib, "_FillValue") && attrib["_FillValue"] isa eltyp attrib["_FillValue"] @@ -129,7 +135,7 @@ function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; end attrib["_FillValue"] = fillval A = replace_missing(A, fillval) - elseif missingval(A) isa T + elseif Rasters.missingval(A) isa T attrib["_FillValue"] = missingval else verbose && !(missingval isa Nothing) && @warn "`missingval` $(missingval) is not the same type as your data $T." diff --git a/src/Rasters.jl b/src/Rasters.jl index 073ebe34a..2e3293c09 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -100,7 +100,6 @@ include("sources/sources.jl") include("filearray.jl") include("filestack.jl") include("openstack.jl") -include("sources/sources.jl") include("array.jl") include("stack.jl") include("series.jl") diff --git a/src/array.jl b/src/array.jl index 30b145c4e..b999ff7be 100644 --- a/src/array.jl +++ b/src/array.jl @@ -38,7 +38,8 @@ function filename(A::AbstractRaster) filename(first(arrays)) end end -filename(A::AbstractArray) = nothing # Fallback +filename(::AbstractArray) = nothing # Fallback +filename(A::DiskArrays.AbstractDiskArray) = filename(parent(A)) cleanreturn(A::AbstractRaster) = rebuild(A, cleanreturn(parent(A))) cleanreturn(x) = x @@ -63,7 +64,8 @@ function DD.rebuild( A::AbstractRaster, data, dims::Tuple, refdims, name, metadata, missingval=missingval(A) ) - Raster(data, dims, refdims, name, metadata, missingval) + missingval1 = _fix_missingval(eltype(data), missingval) + Raster(data, dims, refdims, name, metadata, missingval1) end function DD.rebuild(A::AbstractRaster; data=parent(A), dims=dims(A), refdims=refdims(A), name=name(A), @@ -72,6 +74,11 @@ function DD.rebuild(A::AbstractRaster; rebuild(A, data, dims, refdims, name, metadata, missingval) end +function _fix_missingval(::Type{T}, x::X) where {T,X} + promote_type(T, X) === T ? convert(T, x) : nothing +end +_fix_missingval(::Type{T}, mv::M) where {T,M<:T} = mv + function DD.modify(f, A::AbstractRaster) # Have to avoid calling `open` on CFDiskArray newdata = if isdisk(A) && !(parent(A) isa CFDiskArray) @@ -182,10 +189,10 @@ to an unopened file. This will only be opened lazily when it is indexed with `ge or when `read(A)` is called. Broadcasting, taking a view, reversing and most other methods _do not_ load data from disk: they are applied later, lazily. -An `AbatractArray` for spatial/raster data. +An `AbatractArray` for spatial/raster data. -It may hold memory-backed arrays or, when `lazy=true` a [`FileArray`](@ref) -that simply holds the `String` path to an unopened file. +It may hold memory-backed arrays or, when `lazy=true` a [`FileArray`](@ref) +that simply holds the `String` path to an unopened file. WIth `lazy=true` the file will be opened lazily when it is indexed with `getindex` or when `read(A)` is called. Broadcasting, taking a view, reversing and most other @@ -207,7 +214,7 @@ methods _will not_ load data from disk: they are applied later, lazily. - `metadata`: `Dict` or `Metadata` object for the array, or `NoMetadata()`. - `crs`: the coordinate reference system of the objects `XDim`/`YDim` dimensions. Only set this if you know the detected crs is incrorrect, or it is not present in - the file. The `crs` is expected to be a GeoFormatTypes.jl `CRS` or `Mixed` mode `GeoFormat` object, + the file. The `crs` is expected to be a GeoFormatTypes.jl `CRS` or `Mixed` mode `GeoFormat` object, like `EPSG(4326)`. - `mappedcrs`: the mapped coordinate reference system of the objects `XDim`/`YDim` dimensions. for `Mapped` lookups these are the actual values of the index. For `Projected` lookups @@ -220,6 +227,7 @@ methods _will not_ load data from disk: they are applied later, lazily. When a filepath `String` is used: $DROPBAND_KEYWORD $LAZY_KEYWORD +- `replace_missing`: replace `missingval` with `missing`. This is done lazily if `lazy=true`. $SOURCE_KEYWORD - `write`: defines the default `write` keyword value when calling `open` on the Raster. `false` by default. Only makes sense to use when `lazy=true`. @@ -234,6 +242,13 @@ struct Raster{T,N,D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me,Mi<:Union{T,Noth name::Na metadata::Me missingval::Mi + function Raster( + data::A, dims::D, refdims::R, name::Na, metadata::Me, missingval::Mi + ) where {D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me,Mi} where {T,N} + DD.checkdims(data, dims) + missingval1 = Mi <: Union{T,Nothing} ? missingval : convert(T, missingval) + new{T,N,D,R,A,Na,Me,typeof(missingval1)}(data, dims, refdims, name, metadata, missingval1) + end end function Raster(A::AbstractArray{T,N}, dims::Tuple; refdims=(), @@ -275,7 +290,7 @@ function Raster(A::AbstractDimArray; )::Raster return Raster(data, dims; refdims, name, metadata, missingval, kw...) end -function Raster(filename::AbstractString, dims::Tuple{<:Dimension,<:Dimension,Vararg}; +function Raster(filename::AbstractString, dims::Tuple{<:Dimension,<:Dimension,Vararg}; kw... )::Raster Raster(filename; dims, kw...) @@ -289,38 +304,56 @@ function Raster(filename::AbstractString; source=nothing, kw...)::Raster end::Raster end function Raster(ds, filename::AbstractString; - crs=nokw, - mappedcrs=nokw, dims=nokw, refdims=(), name=nokw, metadata=nokw, missingval=nokw, + crs=nokw, + mappedcrs=nokw, source=nothing, + replace_missing=false, write=false, lazy=false, dropband=true, )::Raster name1 = filekey(ds, name) source = _sourcetrait(filename, source) - crs = defaultcrs(source, crs) - mappedcrs = defaultmappedcrs(source, mappedcrs) - data1, dims1, metadata1, missingval1 = _open(source, ds; key=name1) do var - dims1 = dims isa NoKW ? _dims(var, crs, mappedcrs) : dims + data1, dims1, metadata1, missingval1 = _open(source, ds; key=name1) do var metadata1 = metadata isa NoKW ? _metadata(var) : metadata - missingval1 = missingval isa NoKW ? Rasters.missingval(var) : missingval + missingval1 = _check_missingval(var, missingval) + replace_missing1 = replace_missing && !isnothing(missingval1) + missingval2 = replace_missing1 ? missing : missingval1 data = if lazy - FileArray{typeof(source)}(var, filename; key=name1, write) + A = FileArray{typeof(source)}(var, filename; key=name1, write) + replace_missing1 ? _replace_missing(A, missingval1) : A else _checkmem(var) - Array(var) + Array(replace_missing1 ? _replace_missing(var, missingval1) : var) end - data, dims1, metadata1, missingval1 + dims1 = dims isa NoKW ? _dims(var, crs, mappedcrs) : format(dims, data) + data, dims1, metadata1, missingval2 end - raster = Raster(data1, dims1, refdims, name1, metadata1, missingval1) + raster = Raster(data1, dims1, refdims, Symbol(name1), metadata1, missingval1) return dropband ? _drop_single_band(raster, lazy) : raster end +_check_missingval(A::AbstractArray, ::NoKW) = _check_missingval(A, Rasters.missingval(A)) +_check_missingval(A::AbstractArray, ::Nothing) = nothing +function _check_missingval(::AbstractArray{T}, missingval) where T + if !(missingval isa T) + @warn "missingval $missingval of type $(typeof(missingval)) does not match the raster eltype $T" + nothing + else + missingval + end +end + +function _replace_missing(A::AbstractArray{T}, missingval) where T + repmissing(x) = isequal(x, missingval) ? missing : x + return repmissing.(A) +end + filekey(ds, key) = key filekey(filename::String) = Symbol(splitext(basename(filename))[1]) diff --git a/src/filearray.jl b/src/filearray.jl index 9960b5435..8d5cf71ec 100644 --- a/src/filearray.jl +++ b/src/filearray.jl @@ -6,38 +6,38 @@ Filearray is a DiskArrays.jl `AbstractDiskArray`. Instead of holding an open object, it just holds a filename string that is opened lazily when it needs to be read. """ -struct FileArray{S,T,N,K,EC,HC} <: DiskArrays.AbstractDiskArray{T,N} +struct FileArray{S,T,N,Na,EC,HC} <: DiskArrays.AbstractDiskArray{T,N} filename::String size::NTuple{N,Int} - key::K + name::Na eachchunk::EC haschunks::HC write::Bool end function FileArray{S,T,N}( - filename, size, key::K, eachchunk::EC=size, + filename, size, name::Na, eachchunk::EC=size, haschunks::HC=DA.Unchunked(), write=false -) where {S,T,N,K,EC,HC} - FileArray{S,T,N,K,EC,HC}(filename, size, key, eachchunk, haschunks, write) +) where {S,T,N,Na,EC,HC} + FileArray{S,T,N,Na,EC,HC}(filename, size, name, eachchunk, haschunks, write) end function FileArray{S,T,N}(filename::String, size::Tuple; - key=nothing, eachchunk=size, haschunks=DA.Unchunked(), write=false + name=nothing, eachchunk=size, haschunks=DA.Unchunked(), write=false ) where {S,T,N} - FileArray{S,T,N}(filename, size, key, eachchunk, haschunks, write) + FileArray{S,T,N}(filename, size, name, eachchunk, haschunks, write) end # FileArray has S, T and N parameters not recoverable from fields ConstructionBase.constructorof(::Type{<:FileArray{S,T,N}}) where {S,T,N} = FileArray{S,T,N} filename(A::FileArray) = A.filename -key(A::FileArray) = A.key +DD.name(A::FileArray) = A.name Base.size(A::FileArray) = A.size DA.eachchunk(A::FileArray) = A.eachchunk DA.haschunks(A::FileArray) = A.haschunks # Run function `f` on the result of _open for the file type function Base.open(f::Function, A::FileArray{S}; write=A.write, kw...) where S - _open(f, S(), filename(A); key=key(A), write, kw...) + _open(f, S(), filename(A); name=name(A), write, kw...) end function DA.readblock!(A::FileArray, dst, r::AbstractUnitRange...) @@ -58,17 +58,18 @@ end A basic DiskArrays.jl wrapper for objects that don't have one defined yet. When we `open` a `FileArray` it is replaced with a `RasterDiskArray`. """ -struct RasterDiskArray{S,T,N,V,EC,HC} <: DiskArrays.AbstractDiskArray{T,N} +struct RasterDiskArray{S,T,N,V,EC,HC,A} <: DiskArrays.AbstractDiskArray{T,N} var::V eachchunk::EC haschunks::HC + attrib::A end function RasterDiskArray{S}( - var::V, eachchunk=DA.eachchunk(var), haschunks=DA.haschunks(var) -) where {S,V} + var::V, eachchunk::EC=DA.eachchunk(var), haschunks::HC=DA.haschunks(var), attrib::A=nothing +) where {S,V,EC,HC,A} T = eltype(var) N = ndims(var) - RasterDiskArray{S,T,N,V,typeof(eachchunk),typeof(haschunks)}(var, eachchunk, haschunks) + RasterDiskArray{S,T,N,V,EC,HC,A}(var, eachchunk, haschunks, attrib) end Base.parent(A::RasterDiskArray) = A.var @@ -79,6 +80,9 @@ DA.eachchunk(A::RasterDiskArray) = A.eachchunk DA.readblock!(A::RasterDiskArray, aout, r::AbstractUnitRange...) = aout .= parent(A)[r...] DA.writeblock!(A::RasterDiskArray, v, r::AbstractUnitRange...) = parent(A)[r...] .= v +# Already open, doesn't use `name` +_open(f, ::Source, A::RasterDiskArray; name=nothing) = f(A) + struct MissingDiskArray{T,N,V} <: DiskArrays.AbstractDiskArray{T,N} var::V end diff --git a/src/filestack.jl b/src/filestack.jl index 7262f694a..ca3c69bb3 100644 --- a/src/filestack.jl +++ b/src/filestack.jl @@ -1,15 +1,15 @@ """ - FileStack{S,K} + FileStack{S,Na} - FileStack{S,K}(filename, types, sizes, eachchunk, haschunks, write) + FileStack{S,Na}(filename, types, sizes, eachchunk, haschunks, write) A wrapper object that holds file pointer and size/chunking metadata for a multi-layered stack stored in a single file, typically netcdf or hdf5. -`S` is a backend type like `NCDsource`, and `K` is a tuple of `Symbol` keys. +`S` is a backend type like `NCDsource`, and `Na` is a tuple of `Symbol` keys. """ -struct FileStack{S,K,F<:AbstractString,T,SZ,EC,HC} +struct FileStack{S,Na,F<:AbstractString,T,SZ,EC,HC} filename::F types::T sizes::SZ @@ -17,27 +17,28 @@ struct FileStack{S,K,F<:AbstractString,T,SZ,EC,HC} haschunks::HC write::Bool end -function FileStack{S,K}( +function FileStack{S,Na}( filename::F, types::T, sizes::SZ, eachchunk::EC, haschunks::HC, write::Bool - ) where {S,K,F,T,SZ,EC,HC} - FileStack{S,K,F,T,SZ,EC,HC}(filename, types, sizes, eachchunk, haschunks, write) + ) where {S,Na,F,T,SZ,EC,HC} + FileStack{S,Na,F,T,SZ,EC,HC}(filename, types, sizes, eachchunk, haschunks, write) end # FileStack has `S` parameter that is not recoverable from fields. -ConstructionBase.constructorof(::Type{<:FileStack{S,K}}) where {S,K} = FileStack{S,K} +ConstructionBase.constructorof(::Type{<:FileStack{S,Na}}) where {S,Na} = FileStack{S,Na} filename(fs::FileStack) = fs.filename -Base.keys(fs::FileStack{<:Any,K}) where K = K +DD.name(fs::FileStack{<:Any,Na}) where Na = Na +Base.keys(fs::FileStack{<:Any,Na}) where Na = Na Base.values(fs::FileStack{<:Any}) = (fs[k] for k in keys(fs)) # Indexing FileStack returns a FileArray, -# referencing a specific key in the same file. -function Base.getindex(fs::FileStack{S,K}, key::Symbol) where {S,K} - is = NamedTuple{K}(ntuple(identity, length(K))) - i = is[key] +# referencing a specific name in the same file. +function Base.getindex(fs::FileStack{S,Na}, name::Symbol) where {S,Na} + is = NamedTuple{Na}(ntuple(identity, length(Na))) + i = is[name] size = fs.sizes[i] eachchunk = fs.eachchunk[i] haschunks = fs.haschunks[i] T = fs.types[i] N = length(size) - A = FileArray{S,T,N}(filename(fs), size, key, eachchunk, haschunks, fs.write) + A = FileArray{S,T,N}(filename(fs), size, name, eachchunk, haschunks, fs.write) end diff --git a/src/lookup.jl b/src/lookup.jl index 388c21a17..b350bb801 100644 --- a/src/lookup.jl +++ b/src/lookup.jl @@ -54,7 +54,7 @@ The underlying `crs` will be detected by GDAL. If `mappedcrs` is not supplied (ie. `mappedcrs=nothing`), the base index will be shown on plots, and selectors will need to use whatever format it is in. """ -struct Projected{T,A<:AbstractVector{T},O<:Order,Sp<:Span,Sa<:Sampling,MD,PC,MC,D} <: AbstractProjected{T,O,Sp,Sa} +struct Projected{T,A<:AbstractVector{T},O<:Order,Sp<:Span,Sa<:Sampling,MD,PC<:Union{GeoFormat,Nothing},MC<:Union{GeoFormat,Nothing},D} <: AbstractProjected{T,O,Sp,Sa} data::A order::O span::Sp @@ -66,14 +66,22 @@ struct Projected{T,A<:AbstractVector{T},O<:Order,Sp<:Span,Sa<:Sampling,MD,PC,MC, end function Projected(data=AutoValues(); order=AutoOrder(), span=AutoSpan(), sampling=AutoSampling(), - metadata=NoMetadata(), crs, mappedcrs=nothing, dim=AutoDim() + metadata=NoMetadata(), + crs::Union{GeoFormat,Nothing}, + mappedcrs::Union{GeoFormat,Nothing}=nothing, + dim=AutoDim() ) Projected(data, order, span, sampling, metadata, crs, mappedcrs, dim) end function Projected(l::Sampled; order=order(l), span=span(l), sampling=sampling(l), - metadata=metadata(l), crs, mappedcrs=nothing, dim=AutoDim() + metadata=metadata(l), + crs::Union{GeoFormat,Nothing,NoKW}, + mappedcrs::Union{GeoFormat,NoKW,Nothing}=nokw, + dim=AutoDim() ) + crs = crs isa NoKW ? nothing : crs + mappedcrs = mappedcrs isa NoKW ? nothing : mappedcrs Projected(parent(l), order, span, sampling, metadata, crs, mappedcrs, dim) end @@ -110,7 +118,7 @@ Fields and behaviours are identical to [`Sampled`]($DDsampleddocs) with the addi The mapped dimension index will be used as for [`Sampled`]($DDsampleddocs), but to save in another format the underlying `crs` may be used to convert it. """ -struct Mapped{T,A<:AbstractVector{T},O<:Order,Sp<:Span,Sa<:Sampling,MD,PC,MC,D} <: AbstractProjected{T,O,Sp,Sa} +struct Mapped{T,A<:AbstractVector{T},O<:Order,Sp<:Span,Sa<:Sampling,MD,PC<:Union{GeoFormat,Nothing},MC<:Union{GeoFormat,Nothing},D<:Union{AutoDim,Dimension}} <: AbstractProjected{T,O,Sp,Sa} data::A order::O span::Sp @@ -122,13 +130,21 @@ struct Mapped{T,A<:AbstractVector{T},O<:Order,Sp<:Span,Sa<:Sampling,MD,PC,MC,D} end function Mapped(data=AutoValues(); order=AutoOrder(), span=AutoSpan(), sampling=AutoSampling(), - metadata=NoMetadata(), crs=nothing, mappedcrs, dim=AutoDim() + metadata=NoMetadata(), + crs::Union{GeoFormat,Nothing,NoKW}=nokw, + mappedcrs::Union{GeoFormat,Nothing,NoKW}, + dim=AutoDim() ) + crs = crs isa NoKW ? nothing : crs + mappedcrs = mappedcrs isa NoKW ? nothing : mappedcrs Mapped(data, order, span, sampling, metadata, crs, mappedcrs, dim) end function Mapped(l::Sampled; order=order(l), span=span(l), sampling=sampling(l), - metadata=metadata(l), crs=nothing, mappedcrs, dim=AutoDim() + metadata=metadata(l), + crs::Union{GeoFormat,Nothing}=nothng, + mappedcrs::Union{GeoFormat,Nothing}, + dim=AutoDim() ) Mapped(parent(l), order, span, sampling, metadata, crs, mappedcrs, dim) end diff --git a/src/methods/aggregate.jl b/src/methods/aggregate.jl index 77dea0793..c96dd29a2 100644 --- a/src/methods/aggregate.jl +++ b/src/methods/aggregate.jl @@ -303,7 +303,7 @@ function alloc_disag(method::Tuple, A::AbstractRaster, scale; # Dim aggregation determines the array size sze = map(length, dims_) T = ag_eltype(method, A) - mv = convert(T, missingval(A)) + mv = missingval(A) isa Nothing ? nothing : convert(T, missingval(A)) return create(filename, T, dims_; name=name(A), suffix, missingval=mv) end diff --git a/src/methods/crop_extend.jl b/src/methods/crop_extend.jl index 0ea5a777c..335d3322e 100644 --- a/src/methods/crop_extend.jl +++ b/src/methods/crop_extend.jl @@ -104,7 +104,7 @@ end function _crop_to(x, to::Extents.Extent; touches=false) # Take a view over the bounds _without_mapped_crs(x) do x1 - if touches + if touches view(x1, Touches(to)) else view(x1, to) @@ -174,7 +174,8 @@ end _extend_to(x::RasterStackOrArray, to::Dimension; kw...) = _extend_to(x, (to,); kw...) function _extend_to(A::AbstractRaster, to::DimTuple; - filename=nothing, suffix=nothing, touches=false, missingval=missingval(A) + filename=nothing, suffix=nothing, touches=false, + missingval=(isnothing(missingval(A)) ? missing : missingval(A)) ) others = otherdims(to, A) # Allow not specifying all dimensions @@ -202,7 +203,7 @@ function _extend_to(A::AbstractRaster, to::DimTuple; b1[1] >= b2[1] || throw(ArgumentError("Lower bound of $(basetypeof(d1)) lookup of `$(b2[1])` are not larger than the original `$(b1[1])`")) b1[2] <= b2[2] || throw(ArgumentError("Upper bound of $(basetypeof(d2)) lookup of `$(b2[2])` is not larger than the original `$(b1[2])`")) elseif lookup(d1) isa Categorical - map(lookup(d1)) do x + map(lookup(d1)) do x x in d2 || throw(ArgumentError("category $x not in new dimension")) end end diff --git a/src/methods/mosaic.jl b/src/methods/mosaic.jl index dd6680a43..58d9ab72c 100644 --- a/src/methods/mosaic.jl +++ b/src/methods/mosaic.jl @@ -68,7 +68,7 @@ mosaic(f::Function, regions; kw...) = _mosaic(f, first(regions), regions; kw...) function _mosaic(f::Function, ::AbstractRaster, regions; missingval=missingval(first(regions)), filename=nothing, suffix=nothing, kw... ) - missingval isa Nothing && throw(ArgumentError("Layers have no missingval, so pass a `missingval` keyword explicitly")) + missingval = missingval isa Nothing ? missing : missingval T = Base.promote_type(typeof(missingval), Base.promote_eltype(regions...)) dims = _mosaic(Tuple(map(DD.dims, regions))) l1 = first(regions) diff --git a/src/series.jl b/src/series.jl index b6d0d8f4d..9992b7e70 100644 --- a/src/series.jl +++ b/src/series.jl @@ -124,9 +124,9 @@ function RasterSeries(filenames::NamedTuple{K}, dims; kw...) where K RasterSeries(map((fns...) -> NamedTuple{K}(fns), values(filenames)...), dims; kw...) end function RasterSeries(filenames::AbstractArray{<:Union{AbstractString,NamedTuple}}, dims; - refdims=(), lazy=false, duplicate_first=false, child=nothing, resize=nothing, kw... + refdims=(), lazy=false, duplicate_first=false, child=nokw, resize=nokw, kw... ) - childtype = if isnothing(child) + childtype = if child isa NoKW eltype(filenames) <: NamedTuple ? RasterStack : Raster else child diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index e7edc387c..c6977a887 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -52,7 +52,7 @@ _metadata(var::CFDiskArray, args...) = _metadata(parent(var), args...) # Base methods Base.parent(A::CFDiskArray) = A.var -Base.getindex(os::OpenStack{<:CDMsource}, key::Symbol) = CFDiskArray(dataset(os)[key]) +Base.getindex(os::OpenStack{<:CDMsource}, name::Symbol) = CFDiskArray(dataset(os)[name]) # DiskArrays.jl methods function DiskArrays.readblock!(A::CFDiskArray, aout, i::AbstractUnitRange...) @@ -90,7 +90,7 @@ for method in (:attrib, :dim) end # Rasters methods for CDM types ############################### -# + function FileArray{source}(var::AbstractVariable, filename::AbstractString; kw...) where source<:CDMsource eachchunk = DA.eachchunk(var) haschunks = DA.haschunks(var) @@ -125,8 +125,7 @@ _open(f, ::CDMsource, var::CFDiskArray; kw...) = cleanreturn(f(var)) # TODO fix/test this for RasterStack function create(filename, source::CDMsource, T::Union{Type,Tuple}, dims::DimTuple; name=:layer1, - keys=(name,), - layerdims=map(_ -> dims, keys), + layerdims=map(_ -> dims, _astuple(name)), missingval=nothing, metadata=NoMetadata(), lazy=true, @@ -134,15 +133,15 @@ function create(filename, source::CDMsource, T::Union{Type,Tuple}, dims::DimTupl types = T isa Tuple ? T : Ref(T) missingval = T isa Tuple ? missingval : Ref(missingval) # Create layers of zero arrays - layers = map(layerdims, keys, types, missingval) do lds, key, t, mv + layers = map(layerdims, name, types, missingval) do lds, name, t, mv A = FillArrays.Zeros{t}(map(length, lds)) - Raster(A, dims=lds; name=key, missingval=mv) + Raster(A, dims=lds; name, missingval=mv) end write(filename, source, Raster(first(layers))) return Raster(filename; source=source, lazy) end -filekey(ds::AbstractDataset, key) = _firstname(ds, key) +filekey(ds::AbstractDataset, name) = _firstname(ds, name) missingval(var::AbstractDataset) = missing missingval(var::AbstractVariable{T}) where T = missing isa T ? missing : nothing cleanreturn(A::AbstractVariable) = Array(A) @@ -170,7 +169,7 @@ function _nondimnames(ds) return nondim end -function _layers(ds::AbstractDataset, ::Nothing=nothing) +function _layers(ds::AbstractDataset, ::NoKW=nokw) nondim = _nondimnames(ds) grid_mapping = String[] vars = map(k -> ds[k], nondim) @@ -182,18 +181,18 @@ function _layers(ds::AbstractDataset, ::Nothing=nothing) end bitinds = map(!in(grid_mapping), nondim) (; - keys=nondim[bitinds], + names=nondim[bitinds], vars=vars[bitinds], attrs=attrs[bitinds], ) end -function _layers(ds::AbstractDataset, keys) - vars = map(k -> ds[k], keys) +function _layers(ds::AbstractDataset, names) + vars = map(k -> ds[k], names) attrs = map(CDM.attribs, vars) - (; keys, vars, attrs) + (; names, vars, attrs) end -function _dims(var::AbstractVariable{<:Any,N}, crs=nothing, mappedcrs=nothing) where N +function _dims(var::AbstractVariable{<:Any,N}, crs=nokw, mappedcrs=nokw) where N dimnames = CDM.dimnames(var) ntuple(Val(N)) do i _cdmdim(CDM.dataset(var), dimnames[i], crs, mappedcrs) @@ -202,7 +201,7 @@ end _metadata(var::AbstractVariable; attr=CDM.attribs(var)) = _metadatadict(_sourcetrait(var), attr) -function _dimdict(ds::AbstractDataset, crs=nothing, mappedcrs=nothing) +function _dimdict(ds::AbstractDataset, crs=nokw, mappedcrs=nokw) dimdict = Dict{String,Dimension}() for dimname in CDM.dimnames(ds) dimdict[dimname] = _cdmdim(ds, dimname, crs, mappedcrs) @@ -237,11 +236,11 @@ end # Utils ######################################################################## # TODO dont load all keys here with _layers -_firstname(ds::AbstractDataset, key) = Symbol(key) -_firstname(ds::AbstractDataset, key::NoKW=nokw) = +_firstname(ds::AbstractDataset, name) = Symbol(name) +_firstname(ds::AbstractDataset, name::NoKW=nokw) = Symbol(first(_nondimnames(ds))) -function _cdmdim(ds, dimname::Key, crs=nothing, mappedcrs=nothing) +function _cdmdim(ds, dimname::Key, crs=nokw, mappedcrs=nokw) if haskey(ds, dimname) var = ds[dimname] D = _cdmdimtype(CDM.attribs(var), dimname) @@ -259,8 +258,8 @@ function _cdmdim(ds, dimname::Key, crs=nothing, mappedcrs=nothing) end function _cdmfinddimlen(ds, dimname) - for key in keys(ds) - var = ds[key] + for name in keys(ds) + var = ds[name] dimnames = CDM.dimnames(var) if dimname in dimnames return size(var)[findfirst(==(dimname), dimnames)] @@ -334,13 +333,13 @@ function _cdmlookup( ) # If the index is regularly spaced and there is no crs # then there is probably just one crs - the mappedcrs - crs = if crs isa Nothing && span isa Regular + crs = if crs isa NoKW && span isa Regular mappedcrs else crs end dim = DD.basetypeof(D)() - return Mapped(index, order, span, sampling, metadata, crs, mappedcrs, dim) + return Mapped(index; order, span, sampling, metadata, crs, mappedcrs, dim) end # Band dims have a Categorical lookup, with order function _cdmlookup(D::Type{<:Band}, index, order::Order, span, sampling, metadata, crs, mappedcrs) diff --git a/src/sources/grd.jl b/src/sources/grd.jl index 191355d0d..c4df3cda4 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -19,12 +19,12 @@ const REVGRD_DATATYPE_TRANSLATION = Dict{DataType, String}(v => k for (k,v) in GRD_DATATYPE_TRANSLATION) # GRD attributes wrapper. Only used during file load, for dispatch. -struct GRDattrib{T,F} +struct GRDdataset{T,F} filename::F attrib::Dict{String,String} write::Bool end -function GRDattrib(filename::AbstractString; write=false) +function GRDdataset(filename::AbstractString; write=false) filename = first(splitext(filename)) lines = readlines(filename * ".grd") entries = filter!(x -> !isempty(x) && !(x[1] == '['), lines) @@ -33,18 +33,36 @@ function GRDattrib(filename::AbstractString; write=false) pairs = map(c -> Pair(c[1], c[2]), captures) attrib = Dict(pairs...) T = GRD_DATATYPE_TRANSLATION[attrib["datatype"]] - GRDattrib{T,typeof(filename)}(filename, attrib, write) + GRDdataset{T,typeof(filename)}(filename, attrib, write) end -attrib(grd::GRDattrib) = grd.attrib -filename(grd::GRDattrib) = grd.filename -filekey(grd::GRDattrib, key::NoKW) = get(attrib(grd), "layername", Symbol("")) +attrib(grd::GRDdataset) = grd.attrib +filename(grd::GRDdataset) = grd.filename +filekey(grd::GRDdataset, key::NoKW) = get(attrib(grd), "layername", Symbol("")) +filekey(A::RasterDiskArray{GRDsource}, key) = filekey(A.attrib, key) -function _dims(grd::GRDattrib, crs=nothing, mappedcrs=nothing) - attrib = grd.attrib - crs = crs isa Nothing ? ProjString(attrib["projection"]) : crs +Base.eltype(::GRDdataset{T}) where T = T +function Base.size(grd::GRDdataset) + ncols = parse(Int, grd.attrib["ncols"]) + nrows = parse(Int, grd.attrib["nrows"]) + nbands = parse(Int, grd.attrib["nbands"]) + # The order is backwards to jula array order + ncols, nrows, nbands +end +Base.Array(grd::GRDdataset) = _mmapgrd(Array, grd) + +function DiskArrays.eachchunk(A::GRDdataset) + size_ = size(A) + DiskArrays.GridChunks(size_, size_) +end +DiskArrays.haschunks(::GRDdataset) = DiskArrays.Unchunked() - xsize, ysize, nbands = size(grd) +function _dims(A::RasterDiskArray{GRDsource}, crs=nokw, mappedcrs=nokw) + attrib = A.attrib.attrib + crs = crs isa NoKW ? ProjString(attrib["projection"]) : crs + mappedcrs = mappedcrs isa NoKW ? nothing : mappedcrs + + xsize, ysize, nbands = size(A) xbounds = parse.(Float64, (attrib["xmin"], attrib["xmax"])) ybounds = parse.(Float64, (attrib["ymin"], attrib["ymax"])) @@ -84,12 +102,12 @@ function _dims(grd::GRDattrib, crs=nothing, mappedcrs=nothing) return x, y, band end -_name(grd::GRDattrib) = Symbol(get(grd.attrib, "layername", "")) +_name(A::RasterDiskArray{GRDsource}) = Symbol(get(A.attrib.attrib, "layername", "")) -function _metadata(grd::GRDattrib, args...) +function _metadata(A::RasterDiskArray{GRDsource}, args...) metadata = _metadatadict(GRDsource()) for key in ("creator", "created", "history") - val = get(grd.attrib, key, "") + val = get(A.attrib.attrib, key, "") if val != "" metadata[key] = val end @@ -97,9 +115,9 @@ function _metadata(grd::GRDattrib, args...) metadata end -function missingval(grd::GRDattrib{T}) where T - if haskey(grd.attrib, "nodatavalue") - ndv = grd.attrib["nodatavalue"] +function missingval(A::RasterDiskArray{GRDsource,T}) where T + if haskey(A.attrib.attrib, "nodatavalue") + ndv = A.attrib.attrib["nodatavalue"] ndv === "nothing" && return nothing try return parse(T, ndv) @@ -112,31 +130,17 @@ function missingval(grd::GRDattrib{T}) where T end end -_sizeof(A::GRDattrib{T}) where T = sizeof(T) * prod(size(A)) - -Base.eltype(::GRDattrib{T}) where T = T - -function Base.size(grd::GRDattrib) - ncols = parse(Int, grd.attrib["ncols"]) - nrows = parse(Int, grd.attrib["nrows"]) - nbands = parse(Int, grd.attrib["nbands"]) - # The order is backwards to jula array order - ncols, nrows, nbands -end - -Base.Array(grd::GRDattrib) = _mmapgrd(Array, grd) +_sizeof(A::GRDdataset{T}) where T = sizeof(T) * prod(size(A)) +_sizeof(A::RasterDiskArray{GRDsource}) = _sizeof(A.attrib) # Array ######################################################################## -function FileArray{GRDsource}(grd::GRDattrib, filename=filename(grd); kw...) +function FileArray{GRDsource}(A::RasterDiskArray{<:Any,T}, filename=filename(A.attrib); kw...) where T filename = first(splitext(filename)) - size_ = size(grd) - eachchunk = DiskArrays.GridChunks(size_, size_) - haschunks = DiskArrays.Unchunked() - T = eltype(grd) - N = length(size_) - FileArray{GRDsource,T,N}(filename, size_; eachchunk, haschunks, kw...) + eachchunk = DiskArrays.eachchunk(A) + haschunks = DiskArrays.haschunks(A) + FileArray{GRDsource,T,3}(filename, size(A); eachchunk, haschunks, kw...) end # Base methods @@ -159,10 +163,11 @@ Returns the base of `filename` with a `.grd` extension. """ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; force=false, + missingval=nokw, kw... ) check_can_write(filename, force) - A = _maybe_use_type_missingval(A, GRDsource()) + A = _maybe_use_type_missingval(A, GRDsource(), missingval) if hasdim(A, Band) correctedA = permutedims(A, (X, Y, Band)) |> a -> reorder(a, (X(GRD_X_ORDER), Y(GRD_Y_ORDER), Band(GRD_BAND_ORDER))) @@ -176,7 +181,7 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; filename = splitext(filename)[1] minvalue = minimum(skipmissing(A)) maxvalue = maximum(skipmissing(A)) - _write_grd(filename, eltype(A), dims(A), missingval(A), minvalue, maxvalue, name(A)) + _write_grd(filename, eltype(A), dims(A), Rasters.missingval(A), minvalue, maxvalue, name(A)) # Data: gri file open(filename * ".gri", write=true) do IO @@ -231,7 +236,7 @@ function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) end function create(filename, ::GRDsource, T::Type, dims::DD.DimTuple; - name="layer", metadata=nothing, missingval=nothing, keys=(name,), lazy=true, + name="layer", metadata=nothing, missingval=nothing, lazy=true, ) # Remove extension basename = splitext(filename)[1] @@ -252,21 +257,25 @@ end # Custom `open` because the data and metadata objects are separate # Here we _mmapgrd instead of `_open` -function Base.open(f::Function, A::FileArray{GRDsource}, key...; write=A.write) +function Base.open(f::Function, A::FileArray{GRDsource}, args...; write=A.write) _mmapgrd(mm -> f(RasterDiskArray{GRDsource}(mm, A.eachchunk, A.haschunks)), A; write) end -function _open(f, ::GRDsource, filename::AbstractString; key=nothing, write=false) +function _open(f, ::GRDsource, filename::AbstractString; write=false, key=nothing) isfile(filename) || _filenotfound_error(filename) - _open(f, GRDsource(), GRDattrib(filename; write)) + attr = GRDdataset(filename) + _mmapgrd(attr; write) do mm + A = RasterDiskArray{GRDsource}(mm, DA.eachchunk(attr), DA.haschunks(attr), attr) + f(A) + end end -_open(f, ::GRDsource, attrib::GRDattrib; kw...) = f(attrib) +_open(f, ::GRDsource, attrib::GRDdataset; kw...) = f(attrib) haslayers(::GRDsource) = false # Utils ######################################################################## -function _mmapgrd(f, x::Union{FileArray,GRDattrib}; kw...) +function _mmapgrd(f, x::Union{FileArray,GRDdataset}; kw...) _mmapgrd(f, filename(x), eltype(x), size(x); kw...) end function _mmapgrd(f, filename::AbstractString, T::Type, size::Tuple; write=false) @@ -286,8 +295,8 @@ end # T = UInt16 # for T in (Any, UInt8, UInt16, Int16, UInt32, Int32, Int64, Float32, Float64) -# precompile(GRDattrib, (String,)) -# DS = Rasters.GRDattrib{T,String} +# precompile(GRDdataset, (String,)) +# DS = Rasters.GRDdataset{T,String} # precompile(crs, (DS,)) # precompile(Rasters.FileArray, (DS, String)) # precompile(dims, (DS,)) diff --git a/src/sources/sources.jl b/src/sources/sources.jl index 31ceb69c0..5b0fe0295 100644 --- a/src/sources/sources.jl +++ b/src/sources/sources.jl @@ -1,13 +1,15 @@ # Source dispatch singletons abstract type Source end -abstract type CDMsource <: Source end -struct NCDsource <: CDMsource end -struct GRIBsource <: CDMsource end struct GRDsource <: Source end struct GDALsource <: Source end struct SMAPsource <: Source end +abstract type CDMsource <: Source end + +struct GRIBsource <: CDMsource end +struct NCDsource <: CDMsource end + # Deprecations const CDMfile = CDMsource const NCDfile = NCDsource @@ -61,7 +63,7 @@ end # Get the source backend for a file extension, falling back to GDALsource _sourcetrait(filename::AbstractString, s::Source) = s _sourcetrait(filename::AbstractString, s) = _sourcetrait(s) -_sourcetrait(filename::AbstractString, ::Nothing) = _sourcetrait(filename) +_sourcetrait(filename::AbstractString, ::Union{Nothing,NoKW}) = _sourcetrait(filename) _sourcetrait(filename::AbstractString) = get(EXT2SOURCE, splitext(filename)[2], GDALsource()) _sourcetrait(filenames::NamedTuple) = _sourcetrait(first(filenames)) _sourcetrait(filename, ext) = get(EXT2SOURCE, ext, GDALsource()) diff --git a/src/stack.jl b/src/stack.jl index b9fe833d4..5f219e0a0 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -26,10 +26,10 @@ subset without loading the whole array. `getindex` on an `AbstractRasterStack` with a key returns another stack with `getindex` applied to all the arrays in the stack. """ -abstract type AbstractRasterStack{L} <: AbstractDimStack{L} end +abstract type AbstractRasterStack{L} <: DD.AbstractDimStack{L} end missingval(stack::AbstractRasterStack) = getfield(stack, :missingval) -missingval(s::AbstractRasterStack, key::Symbol) = _singlemissingval(missingval(s), key) +missingval(s::AbstractRasterStack, name::Symbol) = _singlemissingval(missingval(s), name) filename(stack::AbstractRasterStack{<:NamedTuple}) = map(s -> filename(s), stack) filename(stack::AbstractRasterStack{<:Union{FileStack,OpenStack}}) = filename(parent(stack)) @@ -38,13 +38,13 @@ isdisk(st::AbstractRasterStack) = isdisk(layers(st, 1)) setcrs(x::AbstractRasterStack, crs) = set(x, setcrs(dims(x), crs)...) setmappedcrs(x::AbstractRasterStack, mappedcrs) = set(x, setmappedcrs(dims(x), mappedcrs)...) -_singlemissingval(mvs::NamedTuple, key) = mvs[key] -_singlemissingval(mv, key) = mv +_singlemissingval(mvs::NamedTuple, name) = mvs[name] +_singlemissingval(mv, name) = mv # DimensionalData methods ###################################################### # Always read a stack before loading it as a table. -DD.DimTable(stack::AbstractRasterStack) = invoke(DD.DimTable, Tuple{AbstractDimStack}, read(stack)) +DD.DimTable(stack::AbstractRasterStack) = invoke(DD.DimTable, Tuple{DD.AbstractDimStack}, read(stack)) function DD.layers(s::AbstractRasterStack{<:FileStack{<:Any,Keys}}) where Keys NamedTuple{Keys}(map(K -> s[K], Keys)) @@ -98,25 +98,21 @@ end # Base methods ################################################################# +DD.name(s::AbstractRasterStack) = keys(s) Base.names(s::AbstractRasterStack) = keys(s) Base.copy(stack::AbstractRasterStack) = map(copy, stack) #### Stack getindex #### # Different to DimensionalData as we construct a Raster -Base.getindex(s::AbstractRasterStack, key::AbstractString) = s[Symbol(key)] -function Base.getindex(s::AbstractRasterStack, key::Symbol) - data_ = parent(s)[key] - dims_ = dims(s, DD.layerdims(s, key)) - metadata = DD.layermetadata(s, key) - mv = missingval(s, key) +Base.getindex(s::AbstractRasterStack, name::AbstractString) = s[Symbol(name)] +function Base.getindex(s::AbstractRasterStack, name::Symbol) + data_ = parent(s)[name] + dims_ = dims(s, DD.layerdims(s, name)) + metadata = DD.layermetadata(s, name) + mv = missingval(s, name) mv = mv isa eltype(data_) ? mv : nothing - Raster(data_, dims_, refdims(s), key, metadata, mv) + Raster(data_, dims_, refdims(s), name, metadata, mv) end -# Key + Index -@propagate_inbounds @inline function Base.getindex(s::AbstractRasterStack, key::Symbol, i1, I...) - A = s[key][i1, I...] -end - # Concrete AbstrackRasterStack implementation ################################################# @@ -185,52 +181,36 @@ struct RasterStack{L<:Union{FileStack,OpenStack,NamedTuple},D<:Tuple,R<:Tuple,LD layermetadata::LM missingval::MV end -# Multi-file stack from strings +# Rebuild from internals function RasterStack( - filenames::Union{AbstractArray{<:AbstractString},Tuple{<:AbstractString,Vararg}}; - name=map(filekey, filenames), - kw... -) - RasterStack(NamedTuple{cleankeys(Tuple(name))}(filenames); kw...) -end -function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; - lazy=false, - dropband=true, - source=nothing, + data::Union{FileStack,OpenStack,NamedTuple{<:Any,<:Tuple{Vararg{<:AbstractArray}}}}; + dims::Tuple, + refdims::Tuple=(), + layerdims, + metadata=NoMetadata(), + layermetadata, + missingval, crs=nokw, mappedcrs=nokw, - kw... -) where K - layers = map(keys(filenames), values(filenames)) do key, fn - source = _sourcetrait(fn, source) - crs = defaultcrs(source, crs) - mappedcrs = defaultmappedcrs(source, mappedcrs) - _open(source, fn; key) do ds - dims = _dims(ds, crs, mappedcrs) - prod(map(length, dims)) - data = if lazy - FileArray{typeof(source)}(ds, fn; key) - else - _open(source, ds; key) do A - _checkmem(A) - Array(A) - end - end - md = _metadata(ds) - mv = missingval(ds) - raster = Raster(data, dims; name=key, metadata=md, missingval=mv) - return dropband ? _drop_single_band(raster, lazy) : raster - end - end - RasterStack(NamedTuple{K}(layers); kw...) + name=nokw, + resize=nokw, +) + st = RasterStack( + data, dims, refdims, layerdims, metadata, layermetadata, missingval + ) + dims = format(dims, CartesianIndices(st)) + dims = crs isa NoKW ? dims : setcrs(dims, crs) + dims = mappedcrs isa NoKW ? dims : setmappedcrs(dims, mappedcrs) + st = rebuild(st; dims) + return name isa NoKW ? st : st[Dimensions._astuple(name)] end # Convert Tuple/Array of array to NamedTuples using name/key function RasterStack(data::Tuple{Vararg{<:AbstractArray}}, dims::Tuple; name::Union{Tuple,AbstractArray,NamedTuple,Nothing}=nothing, kw... ) - isnothing(name) && throw(ArgumentError("pass a Tuple, Array or NamedTuple of names to `name`")) - return RasterStack(NamedTuple{cleankeys(keys)}(data), dims; kw...) + isnothing(name) && throw(ArgumentError("pass a Tuple, Array or NamedTuple of names to the `name` keyword")) + return RasterStack(NamedTuple{cleankeys(name)}(data), dims; kw...) end # Multi Raster stack from NamedTuple of AbstractArray function RasterStack(data::NamedTuple{<:Any,<:Tuple{Vararg{<:AbstractArray}}}, dims::Tuple; kw...) @@ -244,8 +224,8 @@ function RasterStack(data::NamedTuple{<:Any,<:Tuple{Vararg{<:AbstractArray}}}, d end # Multi Raster stack from AbstractDimArray splat RasterStack(layers::AbstractDimArray...; kw...) = RasterStack(layers; kw...) -# Multi Raster stack from tuple with `keys` keyword -function RasterStack(layers::Tuple{Vararg{<:AbstractRaster}}; +# Multi Raster stack from tuple with `name` keyword +function RasterStack(layers::Tuple{Vararg{<:AbstractDimArray}}; name=map(name, layers), kw... ) @@ -253,15 +233,16 @@ function RasterStack(layers::Tuple{Vararg{<:AbstractRaster}}; end # Multi RasterStack from NamedTuple # This method is called after most other RasterStack methods. -function RasterStack(layers::NamedTuple{K,<:Tuple{Vararg{<:AbstractRaster}}}; - resize::Union{Function,Nothing}=nothing, - _layers=resize isa Nothing ? layers : resize(layers), +function RasterStack(layers::NamedTuple{K,<:Tuple{Vararg{<:AbstractDimArray}}}; + resize::Union{Function,NoKW}=nokw, + _layers=resize isa NoKW ? layers : resize(layers), dims::Tuple=DD.combinedims(_layers...), refdims::Tuple=(), + missingval=map(missingval, _layers), metadata=NoMetadata(), - missingval=map(Rasters.missingval, _layers), layermetadata=map(DD.metadata, _layers), layerdims::NamedTuple{K}=map(DD.basedims, _layers), + kw... ) where K # Handle values that musbe be `NamedTuple` layermetadata = if layermetadata isa NamedTuple @@ -274,15 +255,111 @@ function RasterStack(layers::NamedTuple{K,<:Tuple{Vararg{<:AbstractRaster}}}; missingval = missingval isa NoKW ? nothing : missingval data = map(parent, _layers) return RasterStack( - data, dims, refdims, layerdims, metadata, layermetadata, missingval + data; dims, refdims, layerdims, metadata, layermetadata, missingval, kw... + ) +end +# Stack from table and dims args +function RasterStack(table, dims::Tuple; + name=_not_a_dimcol(table, dims), + kw... +) + Tables.istable(table) || throw(ArgumentError("object $(typeof(table)) is not a valid input to `RasterStack`")) + if name isa Symbol + col = Tables.getcolumn(table, name) + layers = NamedTuple{(name,)}((reshape(col, map(length, dims)),)) + else + layers = map(name) do k + col = Tables.getcolumn(table, k) + reshape(col, map(length, dims)) + end |> NamedTuple{name} + end + return RasterStack(layers, dims; kw...) +end +# Stack from a Raster +function RasterStack(A::AbstractDimArray; + layersfrom=nokw, + name=nokw, + refdims::Tuple=refdims(A), + metadata=metadata(A), + missingval=missingval(A), + kw... +) + name = name isa Union{AbstractString,Symbol,Name} ? (name,) : name + layers = if layersfrom isa NoKW + name = if name isa NoKW + name = DD.name(A) in (NoName(), Symbol(""), Name(Symbol(""))) ? ("layer1",) : DD.name(A) + else + name + end + NamedTuple{cleankeys(name)}((A,)) + else + name = name isa NoKW ? _layerkeysfromdim(A, layersfrom) : name + slices = slice(A, layersfrom) + NamedTuple{cleankeys(name)}(slices) + end + return RasterStack(layers; refdims, metadata, missingval, kw...) +end +# Stack from stack and dims args +RasterStack(st::DD.AbstractDimStack, dims::Tuple; kw...) = RasterStack(st; dims, kw...) +# RasterStack from another stack +function RasterStack(s::DD.AbstractDimStack; + data::NamedTuple=parent(s), + dims::Union{Tuple,NoKW}=dims(s), + refdims::Tuple=refdims(s), + layerdims=DD.layerdims(s), + metadata=metadata(s), + layermetadata=DD.layermetadata(s), + missingval=missingval(s), + kw... +) + return RasterStack( + data; dims, refdims, layerdims, metadata, layermetadata, missingval, kw... ) end +# Multi-file stack from strings +function RasterStack( + filenames::Union{AbstractArray{<:AbstractString},Tuple{<:AbstractString,Vararg}}; + name=map(filekey, filenames), + kw... +) + RasterStack(NamedTuple{cleankeys(Tuple(name))}(filenames); kw...) +end +function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; + lazy=false, + dropband=true, + source=nothing, + crs=nokw, + mappedcrs=nokw, + missingval=nokw, + kw... +) where K + layers = map(keys(filenames), values(filenames)) do key, fn + source = _sourcetrait(fn, source) + _open(source, fn; key) do ds + dims = _dims(ds, crs, mappedcrs) + prod(map(length, dims)) + data = if lazy + FileArray{typeof(source)}(ds, fn; key) + else + _open(source, ds; key) do A + _checkmem(A) + Array(A) + end + end + md = _metadata(ds) + missingval = missingval isa NoKW ? Rasters.missingval(ds) : missingval + raster = Raster(data, dims; name=key, metadata=md, missingval) + return dropband ? _drop_single_band(raster, lazy) : raster + end + end + RasterStack(NamedTuple{K}(layers); kw...) +end # Stack from a String function RasterStack(filename::AbstractString; lazy::Bool=false, dropband::Bool=true, - source::Union{Symbol,Source,Nothing}=nothing, - name=nothing, + source::Union{Symbol,Source,NoKW}=nokw, + name=nokw, kw... ) source = _sourcetrait(filename, source) @@ -291,7 +368,7 @@ function RasterStack(filename::AbstractString; filenames = readdir(filename) length(filenames) > 0 || throw(ArgumentError("No files in directory $filename")) # Detect keys from names - name = if isnothing(name) + name = if name isa NoKW all_shared = true stripped = lstrip.(x -> x in (" ", "_"), (x -> x[1:end]).(filenames)) Symbol.(replace.(first.(splitext.(stripped)), Ref(" " => "_"))) @@ -306,7 +383,7 @@ function RasterStack(filename::AbstractString; l_st = _layer_stack(filename; source, name, lazy, kw...) # Maybe split the stack into separate arrays to remove extra dims. - if !(name isa Nothing) + if !(name isa NoKW) map(identity, l_st) else l_st @@ -329,6 +406,40 @@ function RasterStack(filename::AbstractString; end end +function DD.modify(f, s::AbstractRasterStack{<:FileStack{<:Any,K}}) where K + open(s) do o + map(K) do k + Array(parent(ost)[k]) + end + end +end + +# Open a single file stack +function Base.open(f::Function, st::AbstractRasterStack{<:FileStack{<:Any,K}}; kw...) where K + ost = OpenStack(parent(st)) + layers = map(K) do k + ost[k] + end |> NamedTuple{K} + out = f(rebuild(st; data=layers)) + close(ost) + return out +end +# Open a multi-file stack or just apply f to a memory backed stack +function Base.open(f::Function, st::AbstractRasterStack{<:NamedTuple}; kw...) + isdisk(st) ? _open_layers(f, st) : f(st) +end + +# Open all layers through nested closures, applying `f` to the rebuilt open stack +_open_layers(f, st) = _open_layers(f, st, DD.layers(f), NamedTuple()) +function _open_layers(f, st, unopened::NamedTuple{K}, opened::NamedTuple) where K + open(first(unopened)) do open_layer + _open_layers(f, st, Base.tail(unopened), merge(opened, NamedTuple{(first(K))}(open_layer))) + end +end +function _open_layers(f, st, unopened::NamedTuple{()}, opened) + f(rebuild(st; data=opened)) +end + function _layer_stack(filename; source=nokw, dims=nokw, @@ -343,8 +454,6 @@ function _layer_stack(filename; lazy=false, kw... ) - crs = defaultcrs(source, crs) - mappedcrs = defaultmappedcrs(source, mappedcrs) data, field_kw = _open(filename; source) do ds layers = _layers(ds, name) # Create a Dict of dimkey => Dimension to use in `dim` and `layerdims` @@ -409,114 +518,6 @@ function _merge_dimorder(neworder, currentorder) return outorder end -# Stack from a Raster -function RasterStack(A::Raster; - layersfrom=nothing, - name=nothing, - refdims::Tuple=refdims(A), - metadata=metadata(A), - kw... -) - name = name isa Union{AbstractString,Symbol,Name} ? (name,) : name - layers = if isnothing(layersfrom) - name = if name isa Nothing - name = DD.name(A) in (NoName(), Symbol(""), Name(Symbol(""))) ? ("layer1",) : DD.name(A) - else - name - end - NamedTuple{cleankeys(name)}((A,)) - else - name = name isa Nothing ? _layerkeysfromdim(A, layersfrom) : name - slices = slice(A, layersfrom) - NamedTuple{cleankeys(name)}(slices) - end - return RasterStack(layers; refdims, metadata, kw...) -end -# Stack from stack and dims args -RasterStack(st::AbstractRasterStack, dims::Tuple; kw...) = RasterStack(st; dims, kw...) -# Stack from table and dims args -function RasterStack(table, dims::Tuple; - name=_not_a_dimcol(table, dims), - kw... -) - Tables.istable(table) || throw(ArgumentError("object $(typeof(table)) is not a valid input to `RasterStack`")) - if name isa Symbol - col = Tables.getcolumn(table, name) - layers = NamedTuple{(name,)}((reshape(col, map(length, dims)),)) - else - layers = map(name) do k - col = Tables.getcolumn(table, k) - reshape(col, map(length, dims)) - end |> NamedTuple{name} - end - return RasterStack(layers, dims; kw...) -end -# Rebuild from internals -function RasterStack( - data::Union{FileStack,OpenStack,NamedTuple{<:Any,<:Tuple{Vararg{<:AbstractArray}}}}; - dims::Tuple, - refdims::Tuple=(), - layerdims, - metadata=NoMetadata(), - layermetadata, - missingval, -) - return RasterStack( - data, dims, refdims, layerdims, metadata, layermetadata, missingval - ) -end -# RasterStack from another stack -function RasterStack(s::AbstractDimStack; - name::Tuple=Base.keys(s), - data::NamedTuple=NamedTuple{name}(s[n] for n in name), - dims::Tuple=dims(s), - refdims::Tuple=refdims(s), - layerdims=DD.layerdims(s), - metadata=metadata(s), - layermetadata=DD.layermetadata(s), - missingval=missingval(s) -) - st = RasterStack( - data, dims, refdims, layerdims, metadata, layermetadata, missingval - ) - # TODO This is a bit of a hack, it should use `formatdims`. - return set(st, dims...) -end - -function DD.modify(f, s::AbstractRasterStack{<:FileStack{<:Any,K}}) where K - open(s) do o - map(K) do k - Array(parent(ost)[k]) - end - end -end - -# Open a single file stack -function Base.open(f::Function, st::AbstractRasterStack{<:FileStack{<:Any,K}}; kw...) where K - ost = OpenStack(parent(st)) - layers = map(K) do k - ost[k] - end |> NamedTuple{K} - out = f(rebuild(st; data=layers)) - close(ost) - return out -end -# Open a multi-file stack or just apply f to a memory backed stack -function Base.open(f::Function, st::AbstractRasterStack{<:NamedTuple}; kw...) - isdisk(st) ? _open_layers(f, st) : f(st) -end - -# Open all layers through nested closures, applying `f` to the rebuilt open stack -_open_layers(f, st) = _open_layers(f, st, DD.layers(st), NamedTuple()) -function _open_layers(f, st, unopened::NamedTuple{K}, opened::NamedTuple) where K - open(first(unopened)) do open_layer - _open_layers(f, st, Base.tail(unopened), merge(opened, NamedTuple{(first(K),)}((open_layer,)))) - end -end -function _open_layers(f, st, unopened::NamedTuple{()}, opened::NamedTuple) - f(rebuild(st; data=opened)) -end - function _layerkeysfromdim(A, dim) hasdim(A, dim) || throw(ArgumentError("`layersrom` dim `$(dim2key(dim))` not found in `$(map(basetypeof, dims(A)))`")) vals = parent(lookup(A, dim)) diff --git a/src/utils.jl b/src/utils.jl index 07863e92e..aef3c819b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -23,9 +23,9 @@ nolookup_to_sampled(dims::DimTuple) = map(nolookup_to_sampled, dims) nolookup_to_sampled(d::Dimension) = lookup(d) isa NoLookup ? set(d, Sampled(; sampling=Points())) : d -function _maybe_use_type_missingval(A::AbstractRaster{T}, source::Source) where T - if ismissing(missingval(A)) - newmissingval = _type_missingval(Missings.nonmissingtype(T)) +function _maybe_use_type_missingval(A::AbstractRaster{T}, source::Source, missingval=nokw) where T + if ismissing(Rasters.missingval(A)) + newmissingval = missingval isa NoKW ? _type_missingval(Missings.nonmissingtype(T)) : missingval A1 = replace_missing(A, newmissingval) @warn "`missing` cant be written with $(SOURCE2SYMBOL[source]), missinval for `$(eltype(A1))` of `$newmissingval` used instead" return A1 diff --git a/src/write.jl b/src/write.jl index 277e9ff5e..0da51082c 100644 --- a/src/write.jl +++ b/src/write.jl @@ -13,10 +13,10 @@ $SOURCE_KEYWORD Other keyword arguments are passed to the `write` method for the backend. """ function Base.write( filename::AbstractString, A::AbstractRaster; - source=_sourcetype(filename), + source=_sourcetrait(filename), kw... ) - source = _sourcetype(source) + source = _sourcetrait(source) write(filename, source, A; kw...) end Base.write(A::AbstractRaster; kw...) = write(filename(A), A; kw...) @@ -50,11 +50,11 @@ If the source can't be saved as a stack-like object, individual array layers wil function Base.write(path::AbstractString, s::AbstractRasterStack; suffix=nothing, ext=nothing, - source=_sourcetype(path, ext), + source=_sourcetrait(path, ext), verbose=true, kw... ) - source = _sourcetype(source) + source = _sourcetrait(source) if haslayers(source) write(path, source, s; kw...) else @@ -75,8 +75,8 @@ function Base.write(path::AbstractString, s::AbstractRasterStack; if verbose @warn string("Cannot write complete stacks to \"", ext, "\", writing layers as individual files") end - map(keys(s)) do key - fn = string(base, suffix1, ext) + map(keys(s), suffix1) do key, suf + fn = string(base, suf, ext) write(fn, source, s[key]; kw...) end |> NamedTuple{keys(s)} end @@ -103,11 +103,11 @@ All keywords are passed through to these `Raster` and `RasterStack` methods. """ function Base.write(path::AbstractString, A::AbstractRasterSeries; ext=nothing, - source=_sourcetype(path, ext), + source=_sourcetrait(path, ext), verbose=true, kw... ) - source = _sourcetype(source) + source = _sourcetrait(source) base, path_ext = splitext(path) ext = isnothing(ext) ? path_ext : ext map(A, DimPoints(A)) do raster, p diff --git a/test/aggregate.jl b/test/aggregate.jl index 0c0d372f1..977de8852 100644 --- a/test/aggregate.jl +++ b/test/aggregate.jl @@ -1,6 +1,6 @@ using Rasters, Test, Dates, Statistics using Rasters.Lookups, Rasters.Dimensions -using Rasters: upsample, downsample +using Rasters: upsample, downsample, aggregate @testset "upsample" begin @test upsample(1, 2) == 1 @@ -32,8 +32,8 @@ array1 = Raster(data1, dimz) array2 = Raster(data2, dimz) array1a = Raster(data3, dimz) array2a = Raster(data4, dimz) -stack1 = RasterStack(array1, array2; keys=(:array1, :array2)) -stack2 = RasterStack(array1a, array2a; keys=(:array1, :array2)) +stack1 = RasterStack(array1, array2; name=(:array1, :array2)) +stack2 = RasterStack(array1a, array2a; name=(:array1, :array2)) dates = DateTime(2017):Year(1):DateTime(2018) series = RasterSeries([stack1, stack2], (Ti(dates),)) diff --git a/test/array.jl b/test/array.jl index 8118a0b42..ca1eecc8c 100644 --- a/test/array.jl +++ b/test/array.jl @@ -119,10 +119,12 @@ end @test length(collect(skipmissing(nraster))) == 3 # Keeps nothing (integer) # Confirm that missingvals are removed by value, even when types don't match - r = Raster(ones(Int16, 8, 8), (X,Y); missingval = Int16(-9999)) + r = Raster(ones(Int16, 8, 8), (X,Y); missingval=Int16(-9999)) + @test missingval(r) == Int16(-9999) r[1:4, 1:4] .= -9999 - r = float.(r) - @test !(missingval(r) in skipmissing(r)) + rf = Float64.(r) + @test missingval(rf) === -9999.0 + @test !(missingval(rf) in skipmissing(rf)) @test length(collect(skipmissing(r))) == 48 end @@ -140,12 +142,16 @@ end @test name(vra) == :from_vector end -@testset "with properties" begin +@testset "keywords" begin + md = Dict("test" => 1) rast = Raster(rand(X(10:0.1:20), Y(9:0.1:19)); - name=:test, missingval=9999.9, crs=EPSG(4326), mappedcrs=EPSG(3857), refdims=(Ti(DateTime(2000,1,1)),) + name=:test, missingval=9999.9, metadata=md, + refdims=(Ti(DateTime(2000,1,1)),), + crs=EPSG(4326), mappedcrs=EPSG(3857), ) @test name(rast) == :test @test missingval(rast) == 9999.9 + @test Rasters.metadata(rast) == md @test crs(rast) == EPSG(4326) @test mappedcrs(rast) == EPSG(3857) @test refdims(rast) == (Ti(DateTime(2000,1,1)),) diff --git a/test/methods.jl b/test/methods.jl index 578cc8eef..8df49eaf2 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -1,7 +1,7 @@ using Rasters, Test, ArchGDAL, ArchGDAL.GDAL, Dates, Statistics, DataFrames, Extents, Shapefile, GeometryBasics import GeoInterface using Rasters.Lookups, Rasters.Dimensions -using Rasters: bounds +using Rasters: bounds, trim include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) @@ -54,7 +54,6 @@ gaMi = replace_missing(ga) @test all(map(values(replace_missing(st, NaN32)), (a=[NaN32 7.0f0; 2.0f0 NaN32], b=[1.0 0.4; 2.0 NaN])) do x, y all(x .=== y) end) - ga dNaN = replace_missing(ga, NaN32; filename="test.tif") @test all(isequal.(dNaN, [NaN32 7.0f0; 2.0f0 NaN32])) rm("test.tif") @@ -255,7 +254,7 @@ end @test all(classify(ga1, 1=>99, 2=>88, 3=>77; others=0) .=== [missing 99; 88 77]) @test all(classify(ga1, 1=>99, 2=>88; others=0) .=== [missing 99; 88 0]) A2 = [1.0 2.5; 3.0 4.0] - ga2 = Raster(A2, (X, Y); missingval=missing) + ga2 = Raster(A2 , (X, Y)) @test classify(ga2, (2, 3)=>:x, >(3)=>:y) == [1.0 :x; 3.0 :y] @test classify(ga2, (>=(1), <(2))=>:x, >=(3)=>:y) == [:x 2.5; :y :y] classify!(ga2, (1, 2.5)=>0.0, >=(3)=>-1.0; lower=(>), upper=(<=)) @@ -267,12 +266,12 @@ end end @testset "points" begin dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2)) - rast = Raster([1 2; 3 4], dimz; name=:test, missingval=missing) + rast = Raster([1 2; 3 4], dimz; name=:test) rast2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=5) - rast_m = Raster([1 2; 3 missing], dimz; name=:test, missingval=missing) + rast_m = Raster([1 2; 3 missing], dimz; name=:test) table = (geometry=[missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)], foo=zeros(4)) st = RasterStack(rast, rast2) - ga = Raster(A, (X(9.0:1.0:10.0), Y(0.1:0.1:0.2)); missingval=missing) + ga = Raster(A, (X(9.0:1.0:10.0), Y(0.1:0.1:0.2))) @test all(collect(points(ga; order=(Y, X))) .=== [missing (0.2, 9.0); (0.1, 10.0) missing]) @test all(collect(points(ga; order=(X, Y))) .=== [missing (9.0, 0.2); (10.0, 0.1) missing]) @test all(points(ga; order=(X, Y), ignore_missing=true) .=== @@ -287,7 +286,7 @@ createpoint(args...) = ArchGDAL.createpoint(args...) @testset "extract" begin dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2)) - rast = Raster([1 2; 3 4], dimz; name=:test, missingval=missing) + rast = Raster(Union{Int,Missing}[1 2; 3 4], dimz; name=:test, missingval=missing) rast2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=5) rast_m = Raster([1 2; 3 missing], dimz; name=:test, missingval=missing) table = (geometry=[missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)], foo=zeros(4)) diff --git a/test/series.jl b/test/series.jl index 69beb01af..00634cec4 100644 --- a/test/series.jl +++ b/test/series.jl @@ -1,5 +1,6 @@ using Rasters, Test, Dates, DimensionalData using Rasters.Lookups, Rasters.Dimensions +using Rasters: metadata include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) # RasterSeries from Raster/RasterStack components @@ -10,25 +11,24 @@ data2 = 2 * data1 data3 = 3 * data1 data4 = 4 * data1 dimz = X([30, 40]), Y(-10.0:10.0:20.0) -ga1 = Raster(data1, dimz) -ga2 = Raster(data2, dimz) -ga1a = Raster(data3, dimz) -ga2a = Raster(data4, dimz) -stack1 = RasterStack(ga1, ga2; keys=(:ga1, :ga2)) -stack2 = RasterStack(ga1a, ga2a; keys=(:ga1, :ga2)) -dates =[DateTime(2017), DateTime(2018)] +r1 = Raster(data1, dimz) +r2 = Raster(data2, dimz) +r1a = Raster(data3, dimz) +r2a = Raster(data4, dimz) +stack1 = RasterStack(r1, r2; name=(:r1, :r2)) +stack2 = RasterStack(r1a, r2a; name=(:r1, :r2)) +dates = [DateTime(2017), DateTime(2018)] ser = RasterSeries([stack1, stack2], Ti(dates)) @test issorted(dates) @testset "getindex returns the currect types" begin @test ser[Ti(1)] isa RasterStack{<:NamedTuple} - @test ser[Ti(1)][:ga2] isa Raster{Int,2} - @test ser[Ti(1)][:ga2, 1, 1] isa Int - @test ser[Ti(1)][:ga2][1, 1] isa Int + @test ser[Ti(1)][:r2] isa Raster{Int,2} + @test ser[Ti(1)][:r2][1, 1] isa Int end @testset "map" begin - @test map(x -> x[1], ser) == Raster([(ga1=1, ga2=2), (ga1=3, ga2=4)], dims(ser)) + @test map(x -> x[1], ser) == Raster([(r1=1, r2=2), (r1=3, r2=4)], dims(ser)) @test map(x -> x, ser) == ser; end @@ -41,21 +41,19 @@ end end @testset "getindex returns the currect results" begin - @test ser[Ti(Near(DateTime(2017)))][:ga1][X(1), Y(3)] === 3 - @test ser[Ti(At(DateTime(2017)))][:ga1, X(1), Y(3)] === 3 - @test ser[Ti(At(DateTime(2018)))][:ga2][X(2), Y(4)] === 32 - @test ser[Ti(At(DateTime(2018)))][:ga2, X(2), Y(4)] === 32 - @test ser[Ti(1)][:ga1, X(1), Y(2)] == 2 - @test ser[Ti(1)][:ga2, X(2), Y(3:4)] == [14, 16] + @test ser[Ti(Near(DateTime(2017)))][:r1][X(1), Y(3)] === 3 + @test ser[Ti(At(DateTime(2018)))][:r2][X(2), Y(4)] === 32 + @test ser[Ti(1)][:r1][X(1), Y(2)] == 2 + @test ser[Ti(1)][:r2][X(2), Y(3:4)] == [14, 16] end @testset "getindex is type stable all the way down" begin - # @inferred ser[Ti(At(DateTime(2017)))][:ga1, X(1), Y(2)] - @inferred ser[Ti(1)][:ga1][X(1), Y(2)] - # @inferred ser[Ti(1)][:ga1, X(1), Y(2:4)] - # @inferred ser[Ti(1)][:ga1][X(1), Y(2:4)] - # @inferred ser[1][:ga1, X(1:2), Y(:)] - # @inferred ser[1][:ga1][X(1:2), Y(:)] + # @inferred ser[Ti(At(DateTime(2017)))][:r1, X(1), Y(2)] + @inferred ser[Ti(1)][:r1][X(1), Y(2)] + # @inferred ser[Ti(1)][:r1, X(1), Y(2:4)] + # @inferred ser[Ti(1)][:r1][X(1), Y(2:4)] + # @inferred ser[1][:r1, X(1:2), Y(:)] + # @inferred ser[1][:r1][X(1:2), Y(:)] end @testset "setindex!" begin @@ -75,21 +73,21 @@ end end @testset "slice, combine" begin - ga1 = Raster(ones(4, 5, 10), (X(), Y(), Ti(10:10:100))) .* reshape(1:10, (1, 1, 10)) - ga2 = ga1 .* 2 - ser = slice(ga1, Ti) + r1 = Raster(ones(4, 5, 10), (X(), Y(), Ti(10:10:100))) .* reshape(1:10, (1, 1, 10)) + r2 = r1 .* 2 + ser = slice(r1, Ti) @test size(ser) == (10,) combined = Rasters.combine(ser, Ti()) - @test combined == ga1 - @test dims(combined) == dims(ga1) - ser = slice(ga1, (X, Ti)) + @test combined == r1 + @test dims(combined) == dims(r1) + ser = slice(r1, (X, Ti)) @test size(ser) == (4, 10) - ser = slice(ga1, (X, Y, Ti)) + ser = slice(r1, (X, Y, Ti)) combined2 = Rasters.combine(ser, (X, Y, Ti)) - slice(ga1, Ti) - @test combined == ga1 == permutedims(combined2, (X, Y, Ti)) - @test dims(combined) == dims(ga1) == dims(permutedims(combined2, (X, Y, Ti))) - stack = RasterStack((ga1=ga1, ga2=ga2)) + slice(r1, Ti) + @test combined == r1 == permutedims(combined2, (X, Y, Ti)) + @test dims(combined) == dims(r1) == dims(permutedims(combined2, (X, Y, Ti))) + stack = RasterStack((r1=r1, r2=r2)) ser = slice(stack, Ti) @test size(ser) == (10,) combined = Rasters.combine(ser, Ti) @@ -101,7 +99,7 @@ end @testset "show" begin # 2d - ser2 = slice(ga1, (X, Y)) + ser2 = slice(r1, (X, Y)) sh = sprint(show, MIME("text/plain"), ser2) # Test but don't lock this down too much @test occursin("RasterSeries", sh) @@ -109,7 +107,7 @@ end @test occursin("X", sh) @test occursin("Y", sh) # 1d - ser1 = slice(ga1, X) + ser1 = slice(r1, X) sh = sprint(show, MIME("text/plain"), ser1) # Test but don't lock this down too much @test occursin("RasterSeries", sh) @@ -125,4 +123,4 @@ end first_dims = dims(first(series)) @test all(dims(r) == first_dims for r in series) end -end \ No newline at end of file +end diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index aef3a6344..0b83d020b 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -10,7 +10,7 @@ gdalpath = maybedownload(url) @testset "Raster" begin @test_throws ArgumentError Raster("notafile.tif") - @time gdalarray = Raster(gdalpath; name=:test); + @time gdalarray = Raster(gdalpath; name=:test) @time lazyarray = Raster(gdalpath; lazy=true); @time eagerarray = Raster(gdalpath; lazy=false); @@ -55,7 +55,6 @@ gdalpath = maybedownload(url) @test A == A2 == A3 end - @testset "custom filename" begin gdal_custom = replace(gdalpath, "tif" => "foo") cp(gdalpath, gdal_custom, force=true) @@ -124,6 +123,29 @@ gdalpath = maybedownload(url) @test mappedcrs(gdalarray[Y(1)]) === nothing end + @testset "custom keywords" begin + customgdalarray = Raster(gdalpath; + name=:test, crs=EPSG(1000), mappedcrs=EPSG(4326), refdims=(Ti(),), + write=true, lazy=true, dropband=false, replace_missing=true, + ) + @test name(customgdalarray) == :test + @test refdims(customgdalarray) == (Ti(),) + @test label(customgdalarray) == "test" + @test crs(customgdalarray) == EPSG(1000) + @test crs(dims(customgdalarray, Y)) == EPSG(1000) + @test crs(dims(customgdalarray, X)) == EPSG(1000) + @test mappedcrs(customgdalarray) == EPSG(4326) + @test mappedcrs(dims(customgdalarray, Y)) == EPSG(4326) + @test mappedcrs(dims(customgdalarray, X)) == EPSG(4326) + @test parent(customgdalarray) isa FileArray + @test eltype(customgdalarray) == UInt8 + # Needs to be separate as it overrides crs/mappedcrs + dimsgdalarray = Raster(gdalpath; + dims=(Z(), X(), Y()), + ) + @test dims(dimsgdalarray) isa Tuple{<:Z,X,Y} + end + @testset "indexing" begin @test gdalarray[Band(1)] isa Raster{UInt8,2} @test gdalarray[Y(1), Band(1)] isa Raster{UInt8,1} diff --git a/test/sources/grd.jl b/test/sources/grd.jl index b31e65e49..a497e1d17 100644 --- a/test/sources/grd.jl +++ b/test/sources/grd.jl @@ -1,7 +1,8 @@ using Rasters, Test, Statistics, Dates, Plots using Rasters.Lookups, Rasters.Dimensions +using DiskArrays import NCDatasets, ArchGDAL -using Rasters: FileArray, GRDsource, GDALsource, metadata +using Rasters: FileArray, GRDsource, GDALsource, metadata, trim testpath = joinpath(dirname(pathof(Rasters)), "../test/") include(joinpath(testpath, "test_utils.jl")) @@ -28,6 +29,13 @@ grdpath = stem * ".gri" @test parent(eagerarray) isa Array end + @testset "replace_missing keyword" begin + # Eager is the default + @time missingarray = Raster(grdpath; replace_missing=true) + @test missingval(missingarray) === missing + @test eltype(missingarray) === Union{Missing,Float32} + end + @testset "open" begin @test all(open(A -> A[Y=1], grdarray) .=== grdarray[:, 1, :]) tempfile = tempname() @@ -64,21 +72,41 @@ grdpath = stem * ".gri" end @testset "other fields" begin + proj = ProjString("+proj=merc +datum=WGS84") + @test name(grdarray) == Symbol("red:green:blue") @test missingval(grdarray) == -3.4f38 @test metadata(grdarray) isa Metadata{GRDsource,Dict{String,Any}} - @test name(grdarray) == Symbol("red:green:blue") @test label(grdarray) == "red:green:blue" @test units(grdarray) == nothing - customgrdarray = Raster(grdpath; name=:test, mappedcrs=EPSG(4326)); + @test crs(grdarray) == proj + @test mappedcrs(grdarray) == nothing + @test crs(dims(grdarray, Y)) == proj + @test crs(dims(grdarray, X)) == proj + @test mappedcrs(dims(grdarray, Y)) == nothing + @test mappedcrs(dims(grdarray, X)) == nothing + end + + @testset "custom keywords" begin + customgrdarray = Raster(grdpath; + name=:test, crs=EPSG(1000), mappedcrs=EPSG(4326), refdims=(Ti(),), + write=true, lazy=true, dropband=false, replace_missing=true, + ) @test name(customgrdarray) == :test + @test refdims(customgrdarray) == (Ti(),) @test label(customgrdarray) == "test" + @test crs(customgrdarray) == EPSG(1000) + @test crs(dims(customgrdarray, Y)) == EPSG(1000) + @test crs(dims(customgrdarray, X)) == EPSG(1000) + @test mappedcrs(customgrdarray) == EPSG(4326) @test mappedcrs(dims(customgrdarray, Y)) == EPSG(4326) @test mappedcrs(dims(customgrdarray, X)) == EPSG(4326) - @test mappedcrs(customgrdarray) == EPSG(4326) - proj = ProjString("+proj=merc +datum=WGS84") - @test crs(dims(customgrdarray, Y)) == proj - @test crs(dims(customgrdarray, X)) == proj - @test crs(customgrdarray) == proj + @test parent(customgrdarray) isa DiskArrays.BroadcastDiskArray + @test eltype(customgrdarray) == Union{Float32,Missing} + # Needs to be separate as it overrides crs/mappedcrs + dimsgrdarray = Raster(grdpath; + dims=(Z(), X(), Y()), + ) + @test dims(dimsgrdarray) isa Tuple{<:Z,X,Y} end @testset "getindex" begin @@ -320,7 +348,7 @@ end @testset "conversion to RasterStack" begin geostack = RasterStack(grdstack) @test Symbol.(Tuple(keys(grdstack))) == keys(geostack) - smallstack = RasterStack(grdstack; keys=(:a,)) + smallstack = RasterStack(grdstack; name=(:a,)) @test keys(smallstack) == (:a,) end @@ -387,7 +415,7 @@ end @testset "conversion to RasterStack" begin geostack = RasterStack(grdstack) @test Symbol.(Tuple(keys(grdstack))) == keys(geostack) - smallstack = RasterStack(grdstack; keys=(:Band_2,)) + smallstack = RasterStack(grdstack; name=(:Band_2,)) @test keys(smallstack) == (:Band_2,) end diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 3cdf15521..f03c4a594 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -1,7 +1,8 @@ using Rasters, DimensionalData, Test, Statistics, Dates, CFTime, Plots + using Rasters.Lookups, Rasters.Dimensions import ArchGDAL, NCDatasets -using Rasters: FileArray, FileStack, NCDsource, crs, bounds, name +using Rasters: FileArray, FileStack, NCDsource, crs, bounds, name, trim testdir = realpath(joinpath(dirname(pathof(Rasters)), "../test")) include(joinpath(testdir, "test_utils.jl")) @@ -343,7 +344,8 @@ end end @testset "no missing value" begin - write("nomissing.nc", boolmask(ncarray) .* 1) + no_ext = tempname() * ".nc" + write(no_ext, boolmask(ncarray) .* 1) nomissing = Raster("nomissing.nc") @test missingval(nomissing) == nothing rm("nomissing.nc") @@ -432,17 +434,16 @@ end @test metadata(ncstack[:albedo])["long_name"] == "surface albedo" # Test some DimensionalData.jl tools work # Time dim should be reduced to length 1 by mean - @test axes(mean(ncstack[:albedo, Y(1:20)] , dims=Ti)) == + @test axes(mean(ncstack[:albedo][Y(1:20)] , dims=Ti)) == (Base.OneTo(192), Base.OneTo(20), Base.OneTo(1)) geoA = ncstack[:albedo][Ti(4:6), X(1), Y(2)] - @test geoA == ncstack[:albedo, Ti(4:6), X(1), Y(2)] @test size(geoA) == (3,) end @testset "custom filename" begin ncmulti_custom = replace(ncmulti, "nc" => "nc4") cp(ncmulti, ncmulti_custom, force=true) - @time ncstack_custom = RasterStack(ncmulti_custom, source=Rasters.NCDsource) + @time ncstack_custom = RasterStack(ncmulti_custom, source=Rasters.NCDsource()) @test ncstack_custom isa RasterStack @test map(read(ncstack_custom), read(ncstack)) do a, b all(a .=== b) @@ -464,8 +465,8 @@ end @test dims(ncmultistack[:tos]) isa Tuple{<:X,<:Y,<:Ti} @test ncmultistack[:tos] isa Raster{<:Any,3} @test ncmultistack[:tos][Ti(1)] isa Raster{<:Any,2} - @test ncmultistack[:tos, Y(1), Ti(1)] isa Raster{<:Any,1} - @test ncmultistack[:tos, 8, 30, 10] isa Float32 + @test ncmultistack[:tos][Y(1), Ti(1)] isa Raster{<:Any,1} + @test ncmultistack[:tos][8, 30, 10] isa Float32 end @testset "Subsetting keys" begin @@ -514,10 +515,11 @@ end @test geoseries isa RasterSeries{<:RasterStack} @test parent(geoseries) isa Vector{<:RasterStack} end - geoA = Raster(ncsingle; key=:tos) + geoA = Raster(ncsingle; name=:tos) @test all(read(ncseries[Ti(1)][:tos]) .=== read(geoA)) + using ProfileView, Profile - write("test.nc", ncseries) + @profview write("test.nc", ncseries) @test isfile("test_1.nc") @test isfile("test_2.nc") @test (@allocations write("test.nc", ncseries)) < 1e4 # writing a rasterseries/stack has no force keyword diff --git a/test/stack.jl b/test/stack.jl index f86af66d8..b4e95cadc 100644 --- a/test/stack.jl +++ b/test/stack.jl @@ -11,122 +11,122 @@ mval = -9999.0 meta = NoMetadata() # Formatting only occurs in shorthand constructors -ga1 = Raster(data1, dims1; refdims=refdimz, name=nme, metadata=meta, missingval=mval) -ga2 = Raster(data2, dims2) +raster1 = Raster(data1, dims1; refdims=refdimz, name=nme, metadata=meta, missingval=mval) +raster2 = Raster(data2, dims2) -@testset "stack constructors" begin +@testset "constructors and keywords" begin @test_throws ArgumentError RasterStack("notastack") + md = Dict("a" => 1) # Maybe too many ways to define a stack... - st1 = RasterStack((ga1, ga2); name=(:ga1, :ga2)) - st2 = RasterStack((ga1, ga2); keys=(:ga1, :ga2)) - st3 = RasterStack((data1, data2), dims2; keys=[:ga1, :ga2]) - st4 = RasterStack(st3) - st5 = RasterStack(st3, dims2) - st6 = RasterStack((ga1=data1, ga2=data2), dims2) - st7 = RasterStack((; ga1, ga2)) - @test st1 == st2 == st3 == st4 == st5 == st6 == st7 + kw = (; missingval=(r1=mval, r2=mval), refdims=(Ti(),), metadata=md, crs=EPSG(4326), mappedcrs=EPSG(3857), layermetadata=(r1=md, r2=md)) + st1 = RasterStack((raster1, raster2); name=(:r1, :r2), kw...) + st2 = RasterStack((data1, data2), dims2; name=[:r1, :r2], kw...) + st3 = RasterStack(st2; kw...) + st4 = RasterStack(st2, dims2; kw...) + st5 = RasterStack((r1=data1, r2=data2), dims2; kw...) + st6 = RasterStack((; r1=raster1, r2=raster2); kw...) + stacks = (st1, st2, st3, st4, st5, st6) + @test st1 == st2 == st3 == st4 == st5 == st6 + @test all(==((:r1, :r2)), map(name, stacks)) + @test all(==((r1=mval, r2=mval)), map(missingval, stacks)) + @test all(==((Ti(),)), map(refdims, stacks)) + @test all(==(md), map(metadata, stacks)) + @test all(==(EPSG(4326)), map(crs, stacks)) + @test all(==(EPSG(3857)), map(mappedcrs, stacks)) + @test all(==((r1=md, r2=md)), map(DimensionalData.layermetadata, stacks)) # The dimension differences are lost because the table # is tidy - every column is the same length table_st = RasterStack(DimTable(st3), dims2) - @test dims(table_st[:ga1]) isa Tuple{<:X,<:Y,<:Ti} + @test dims(table_st.r1) isa Tuple{<:X,<:Y,<:Ti} end -st = RasterStack((ga1, ga2); name=(:ga1, :ga2)) +st = RasterStack((raster1, raster2); name=(:r1, :r2)) @testset "stack layers" begin @test length(layers(st)) == 2 - @test first(layers(st)) == ga1 - @test last(layers(st)) == ga2 + @test first(layers(st)) == raster1 + @test last(layers(st)) == raster2 @test DimensionalData.layers(st) isa NamedTuple - @test st.ga1 == ga1 - @test st[:ga2] == ga2 - @test parent(st[:ga1]) == data1 - @test parent(st[:ga1]) isa Array{Float64,2} - @test keys(st) == (:ga1, :ga2) - @test haskey(st, :ga1) - @test names(st) == (:ga1, :ga2) - @test collect(values(st)) == [ga1, ga2] + @test st.r1 == raster1 + @test st[:r2] == raster2 + @test parent(st[:r1]) == data1 + @test parent(st[:r1]) isa Array{Float64,2} + @test keys(st) == (:r1, :r2) + @test haskey(st, :r1) + @test names(st) == (:r1, :r2) + @test collect(values(st)) == [raster1, raster2] end @testset "st fields " begin - @test DimensionalData.layerdims(st, :ga1) == DimensionalData.format(dims1, data1) + @test DimensionalData.layerdims(st, :r1) == DimensionalData.format(dims1, data1) @test metadata(st) == NoMetadata() - @test metadata(st, :ga1) == NoMetadata() + @test metadata(st, :r1) == NoMetadata() end @testset "indexing" begin # Indexing the st is the same as indexing its child array - a = st[:ga1][X(2:4), Y(5:6)] - @test a == st[:ga1, X(2:4), Y(5:6)] - - @inferred st[:ga1][X=2:4, Y=5:6] - # FIXME: This isn't inferred, the constants don't propagate like they - # do in the above call. Probably due to the anonymous wrapper function. - if VERSION < v"1.9.0-" - @test_broken @inferred st[:ga1, X(2:4), Y(5:6)] isa Raster - else - @test @inferred st[:ga1, X(2:4), Y(5:6)] isa Raster - end + a = st[:r1][X(2:4), Y(5:6)] + @inferred st[:r1][X=2:4, Y=5:6] # Getindex for a whole st of new Rasters a = st[X=2:4, Y=5:6] @test a isa RasterStack - @test a[:ga1] isa Raster - @test parent(a[:ga1]) isa Array - @test a[:ga1] == data1[2:4, 5:6] - @test a[:ga2] == data2[2:4, 5:6, 1:1] + @test a[:r1] isa Raster + @test parent(a[:r1]) isa Array + @test a[:r1] == data1[2:4, 5:6] + @test a[:r2] == data2[2:4, 5:6, 1:1] @testset "select new arrays for the whole st" begin s = st[Y=Between(-10, 10.0), Ti=At(DateTime(2019))] @test s isa RasterStack - @test s.ga1 isa Raster - @test parent(s[:ga1]) isa Array - @test s[:ga1] == data1[:, 5:7] - @test s[:ga2] == data2[:, 5:7, 1] - @test dims(s[:ga2]) == (X(Sampled(10.0:10.0:100.0, ForwardOrdered(), Regular(10.0), Points(), NoMetadata())), - Y(Sampled(-10.0:10.0:10.0, ForwardOrdered(), Regular(10.0), Points(), NoMetadata()))) - @test refdims(s[:ga2]) == + @test s.r1 isa Raster + @test parent(s[:r1]) isa Array + @test s[:r1] == data1[:, 5:7] + @test s[:r2] == data2[:, 5:7, 1] + @test dims(s[:r2]) == (X(Sampled(10.0:10.0:100.0, ForwardOrdered(), Regular(10.0), Points(), NoMetadata())), + Y(Sampled(-10.0:10.0:10.0, ForwardOrdered(), Regular(10.0), Points(), NoMetadata()))) + @test refdims(s[:r2]) == (Ti(Sampled([DateTime(2019)], ForwardOrdered(), Irregular((DateTime(2019), DateTime(2019))), Points(), NoMetadata())),) - @test ismissing(missingval(s, :ga2)) && ismissing(missingval(s[:ga2])) + @test isnothing(missingval(s, :r2)) && isnothing(missingval(s[:r2])) end @testset "select views of arrays for the whole st" begin sv = view(st, Y=Between(-4.0, 27.0), Ti=At(DateTime(2019))) @test sv isa RasterStack - @test sv.ga1 isa Raster - @test parent(sv.ga1) isa SubArray - @test sv[:ga1] == data1[:, 6:8] - @test sv[:ga2] == data2[:, 6:8, 1] - @test dims(sv.ga2) == (X(Sampled(10.0:10:100.0, ForwardOrdered(), Regular(10.0), Points(), NoMetadata())), + @test sv.r1 isa Raster + @test parent(sv.r1) isa SubArray + @test sv[:r1] == data1[:, 6:8] + @test sv[:r2] == data2[:, 6:8, 1] + @test dims(sv.r2) == (X(Sampled(10.0:10:100.0, ForwardOrdered(), Regular(10.0), Points(), NoMetadata())), Y(Sampled(0.0:10:20.0, ForwardOrdered(), Regular(10.0), Points(), NoMetadata()))) - @test refdims(sv[:ga2])[1] == + @test refdims(sv[:r2])[1] == Ti(Sampled(view([DateTime(2019)], 1:1), ForwardOrdered(), Irregular((DateTime(2019), DateTime(2019))), Points(), NoMetadata())) # Stack of view-based Rasters v = view(st, X(2:4), Y(5:6)) # @inferred view(st, X(2:4), Y(5:6)) @test v isa RasterStack - @test v[:ga1] isa Raster - @test parent(v[:ga1]) isa SubArray - @test v[:ga1] == view(data1, 2:4, 5:6) - @test v[:ga2] == view(data2, 2:4, 5:6, 1:1) + @test v[:r1] isa Raster + @test parent(v[:r1]) isa SubArray + @test v[:r1] == view(data1, 2:4, 5:6) + @test v[:r2] == view(data2, 2:4, 5:6, 1:1) end - end @testset "subset st with specific key(s)" begin - s1 = RasterStack(st; keys=(:ga2,)) - @test keys(s1) == (:ga2,) + s1 = RasterStack(st; name=(:r2,)) + s1 = RasterStack(st; name=(:r2,)) + @test keys(s1) == (:r2,) @test length(values(s1)) == 1 - s2 = RasterStack(st; keys=(:ga1, :ga2)) - @test keys(s2) == (:ga1, :ga2) + s2 = RasterStack(st; name=(:r1, :r2)) + @test keys(s2) == (:r1, :r2) @test length(values(s2)) == 2 end @testset "concatenate stacks" begin dims1b = X(110:10:200), Y(-50:10:50) dims2b = (dims1b..., Ti([DateTime(2019)])) - stack_a = RasterStack((l1=ga1, l2=ga2)) + stack_a = RasterStack((l1=raster1, l2=raster2)) stack_b = RasterStack((l1=Raster(data1 .+ 10, dims1b), l2=Raster(data2 .+ 20, dims2b))) catstack = cat(stack_a, stack_b; dims=X) @test size(catstack.l1) == (20, 11) @@ -149,8 +149,8 @@ end @testset "copy" begin cp = copy(st) - @test all(st[:ga1] .=== cp[:ga1]) - @test st[:ga1] !== cp[:ga1] + @test all(st[:r1] .=== cp[:r1]) + @test st[:r1] !== cp[:r1] end @testset "show" begin From 08cdcf1edc65a263e3194cff1cea8d092c06d3b6 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Fri, 12 Apr 2024 22:20:19 +0200 Subject: [PATCH 03/13] reorganise write docs --- ext/RastersArchGDALExt/gdal_source.jl | 24 +------ ext/RastersNCDatasetsExt/ncdatasets_source.jl | 47 +------------ src/sources/grd.jl | 2 + src/write.jl | 69 ++++++++++++++++--- 4 files changed, 65 insertions(+), 77 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 88c0737b0..c1a47e7ad 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -45,28 +45,6 @@ RA.cleanreturn(A::AG.RasterDataset) = Array(A) RA.haslayers(::GDALsource) = false RA._sourcetrait(A::AG.RasterDataset) = GDALsource() -""" - Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster; kw...) - -Write a `Raster` to file using GDAL. - -This method is called automatically if you `write` a `Raster` -with a `filename` extension that no other backend can read. - -GDAL is the fallback, and reads a lot of file types, but is not guaranteed to work. - -# Keywords - -$(RA.FORCE_KEYWORD) -- `driver`: A GDAL driver name or a GDAL driver retrieved via `ArchGDAL.getdriver(drivername)`. - Guessed from the filename extension by default. -- `options::Dict{String,String}`: A dictionary containing the dataset creation options passed to the driver. - For example: `Dict("COMPRESS" => "DEFLATE")`. - -Valid `options` for each specific `driver` can be looked up here: https://gdal.org/drivers/raster/index.html - -Returns `filename`. -""" function Base.write( filename::AbstractString, ::GDALsource, A::AbstractRaster{T}; force=false, @@ -372,7 +350,7 @@ function _create_with_driver(f, filename, dims::Tuple, T, missingval; options=Dict{String,String}(), driver="", _block_template=nothing, - chunks=nokw, + chunks=true, kw... ) _gdal_validate(dims) diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 4a14e0cbf..5348f28c7 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -4,40 +4,10 @@ const UNNAMED_NCD_FILE_KEY = "unnamed" const NCDAllowedType = Union{Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float32,Float64,Char,String} -const NCD_WRITE_KEYWORDS = """ -$(RA.FORCE_KEYWORD) -$(RA.VERBOSE_KEYWORD) -- `append`: If true, the variable of the current Raster will be appended - to `filename`, if it actually exists. - -The following keywords are passed to `NCDatasets.defVar`: - -- `chunksizes`: Vector of integers setting the chunk size. The total size of a - chunk must be less than 4 GiB. -- `deflatelevel`: Compression level: `0` (default) means no compression and `9` - means maximum compression. Each chunk will be compressed individually. -- `shuffle`: If `true`, the shuffle filter is activated which can improve the - compression ratio. -- `checksum`: The checksum method can be `:fletcher32` or `:nochecksum`, - the default. -- `typename`: The name of the NetCDF type required for vlen arrays - (https://web.archive.org/save/https://www.unidata.ucar.edu/software/netcdf/netcdf-4/newdocs/netcdf-c/nc_005fdef_005fvlen.html) -""" - -""" - Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster) - -Write an NCDarray to a NetCDF file using NCDatasets.jl - -This method is called automatically if you `write` a `Raster` where `filename` -has the `.nc` extension. You do not need to invoke this method manually. - ## Keywords $NCD_WRITE_KEYWORDS -Returns `filename`. -""" function Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster; append=false, force=false, @@ -58,21 +28,6 @@ function Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster; end return filename end - -# Stack ######################################################################## - -""" - Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStack; kw...) - -Write a `RasterStack` to a single netcdf file, using NCDatasets.jl. - -## Keywords - -$NCD_WRITE_KEYWORDS - -Currently `Metadata` is not handled for individual dimensions, -and `Metadata` coming from other [`Source`](@ref) types is ignored. -""" function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStack; append=false, force=false, @@ -115,6 +70,8 @@ end function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; verbose=true, missingval=nokw, + chunks=nokw, + chunksizes=chunks, kw... ) where {T,N} missingval = missingval isa NoKW ? Rasters.missingval(A) : missingval diff --git a/src/sources/grd.jl b/src/sources/grd.jl index c4df3cda4..83574b97d 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -164,8 +164,10 @@ Returns the base of `filename` with a `.grd` extension. function Base.write(filename::String, ::GRDsource, A::AbstractRaster; force=false, missingval=nokw, + chunks=nokw, kw... ) + chunks isa NoKW || @warn "specifying chunks not supported for .grd files" check_can_write(filename, force) A = _maybe_use_type_missingval(A, GRDsource(), missingval) if hasdim(A, Band) diff --git a/src/write.jl b/src/write.jl index 0da51082c..1a2d7915f 100644 --- a/src/write.jl +++ b/src/write.jl @@ -1,3 +1,51 @@ +const CHUNS_KEYWORD = """ +- `chunks`: a `NTuple{N,Int}` specifying the chunk size for each dimension. + To specify only specific dimensions, a Tuple of `Dimension` wrapping `Int` + or a `NamedTuple` of `Int` can be used. Other dimensions will have a chunk + size of `1`. `true` can be used to mean: use the original + chunk size of the lazy `Raster` being written or X and Y of 256 by 256. + `false` means dont use chunks at all. +""" + +const SOURCE_WRITE_DOCSTRING = """ +Other keyword arguments are passed to the `write` method for the backend. + +## NetCDF keywords + +- `append`: If true, the variable of the current Raster will be appended + to `filename`, if it actually exists. +- `deflatelevel`: Compression level: `0` (default) means no compression and `9` + means maximum compression. Each chunk will be compressed individually. +- `shuffle`: If `true`, the shuffle filter is activated which can improve the + compression ratio. +- `checksum`: The checksum method can be `:fletcher32` or `:nochecksum`, + the default. +- `typename`: The name of the NetCDF type required for vlen arrays + (https://web.archive.org/save/https://www.unidata.ucar.edu/software/netcdf/netcdf-4/newdocs/netcdf-c/nc_005fdef_005fvlen.html) + +## GDAL Keywords + +$(RA.FORCE_KEYWORD) +- `driver`: A GDAL driver name `String` or a GDAL driver retrieved via `ArchGDAL.getdriver(drivername)`. + By default `driver` is guessed from the filename extension. +- `options::Dict{String,String}`: A dictionary containing the dataset creation options passed to the driver. + For example: `Dict("COMPRESS" => "DEFLATE")`. + +Valid `options` for each specific `driver` can be found at: https://gdal.org/drivers/raster/index.html + + +## Source comments + +### R grd/grid files + +Write a `Raster` to a .grd file with a .gri header file. +Returns the base of `filename` with a `.grd` extension. + +### GDAL (tiff, and everything else) + +Used if you `write` a `Raster` with a `filename` extension that no other backend can write. +GDAL is the fallback, and writes a lot of file types, but is not guaranteed to work. +""" """ Base.write(filename::AbstractString, A::AbstractRaster; [source], kw...) @@ -7,17 +55,19 @@ file extension or using the `source` keyword. # Keywords +$CHUNKS_KEYWORD $FORCE_KEYWORD $SOURCE_KEYWORD -Other keyword arguments are passed to the `write` method for the backend. +$SOURCE_WRITE_DOCSTRING + +Returns `filename`. """ function Base.write( filename::AbstractString, A::AbstractRaster; source=_sourcetrait(filename), kw... ) - source = _sourcetrait(source) - write(filename, source, A; kw...) + write(filename, _sourcetrait(source), A; kw...) end Base.write(A::AbstractRaster; kw...) = write(filename(A), A; kw...) # Fallback @@ -35,17 +85,18 @@ Write any [`AbstractRasterStack`](@ref) to one or multiple files, depending on the backend. Backend is guessed from the filename extension or forced with the `source` keyword. +If the source can't be saved as a stack-like object, individual array layers will be saved. + ## Keywords +$CHUNKS_KEYWORD $EXT_KEYWORD $FORCE_KEYWORD $SOURCE_KEYWORD $SUFFIX_KEYWORD $VERBOSE_KEYWORD -Other keyword arguments are passed to the `write` method for the backend. - -If the source can't be saved as a stack-like object, individual array layers will be saved. +$SOURCE_WRITE_DOCSTRING """ function Base.write(path::AbstractString, s::AbstractRasterStack; suffix=nothing, @@ -91,15 +142,15 @@ guessing the backend from the file extension. The lookup values of the series will be appended to the filepath (before the extension), separated by underscores. +All keywords are passed through to these `Raster` and `RasterStack` methods. + ## Keywords +$CHUNKS_KEYWORD $EXT_KEYWORD $FORCE_KEYWORD $SOURCE_KEYWORD $VERBOSE_KEYWORD - -See specific docs for `write` for eg. gdal or netcdf keywords, or for `RasterStack`. -All keywords are passed through to these `Raster` and `RasterStack` methods. """ function Base.write(path::AbstractString, A::AbstractRasterSeries; ext=nothing, From 33c802ec0fe8660139a7dcf5aff0fdc079deea1f Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sat, 13 Apr 2024 21:24:01 +0200 Subject: [PATCH 04/13] standardise chunking with `chunks` keyword --- ext/RastersArchGDALExt/gdal_source.jl | 48 ++++++----- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 8 +- src/array.jl | 6 +- src/sources/commondatamodel.jl | 10 +-- src/stack.jl | 8 +- src/utils.jl | 41 ++++++++++ src/write.jl | 5 +- test/sources/gdal.jl | 11 +++ test/sources/ncdatasets.jl | 81 +++++++++++-------- test/utils.jl | 24 +++++- 10 files changed, 167 insertions(+), 75 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index c1a47e7ad..1375992e2 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -2,6 +2,8 @@ const AG = ArchGDAL const GDAL_LOCUS = Start() +const GDAL_DIM_ORDER = (X(), Y(), Band()) + # drivers supporting the gdal Create() method to directly write to disk const GDAL_DRIVERS_SUPPORTING_CREATE = ("GTiff", "HDF4", "KEA", "netCDF", "PCIDSK", "Zarr", "MEM"#=...=#) @@ -350,7 +352,7 @@ function _create_with_driver(f, filename, dims::Tuple, T, missingval; options=Dict{String,String}(), driver="", _block_template=nothing, - chunks=true, + chunks=nokw, kw... ) _gdal_validate(dims) @@ -363,7 +365,7 @@ function _create_with_driver(f, filename, dims::Tuple, T, missingval; nbands = hasdim(dims, Band) ? length(DD.dims(dims, Band())) : 1 driver = _check_driver(filename, driver) - options_vec = _process_options(driver, options; _block_template) + options_vec = _process_options(driver, options; _block_template, chunks) gdaldriver = driver isa String ? AG.getdriver(driver) : driver create_kw = (; width=length(x), height=length(y), nbands, dtype=T,) @@ -400,7 +402,10 @@ end end # Convert a Dict of options to a Vector{String} for GDAL -function _process_options(driver::String, options::Dict; _block_template=nothing) +function _process_options(driver::String, options::Dict; + chunks=nokw, + _block_template=nothing +) options_str = Dict(string(k)=>string(v) for (k,v) in options) # Get the GDAL driver object gdaldriver = AG.getdriver(driver) @@ -413,19 +418,21 @@ function _process_options(driver::String, options::Dict; _block_template=nothing # the goal is to set write block sizes that correspond to eventually blocked reads # creation options are driver dependent - if !isnothing(_block_template) && DA.haschunks(_block_template) == DA.Chunked() - block_x, block_y = DA.max_chunksize(DA.eachchunk(_block_template)) - - # GDAL default is line-by-line compression without tiling. - # Here, tiling is enabled if the source chunk size is viable for GTiff, - # i.e. when the chunk size is divisible by 16. - if (block_x % 16 == 0) && (block_y % 16 == 0) - options_str["TILED"] = "YES" - end + chunk_pattern = RA._chunks_to_tuple(_block_template, (X(), Y(), Band()), chunks) + if !isnothing(chunk_pattern) + xchunksize, ychunksize = chunk_pattern - block_x, block_y = string.((block_x, block_y)) + block_x, block_y = string.((xchunksize, ychunksize)) if driver == "GTiff" + # GDAL default is line-by-line compression without tiling. + # Here, tiling is enabled if the source chunk size is viable for GTiff, + # i.e. when the chunk size is divisible by 16. + if (xchunksize % 16 == 0) && (ychunksize % 16 == 0) + options_str["TILED"] = "YES" + else + xchunksize == 1 || @warn "X and Y chunk size do not match. Columns are used and X size $xchunksize is ignored" + end # dont overwrite user specified values if !("BLOCKXSIZE" in keys(options_str)) options_str["BLOCKXSIZE"] = block_x @@ -435,10 +442,11 @@ function _process_options(driver::String, options::Dict; _block_template=nothing end elseif driver == "COG" if !("BLOCKSIZE" in keys(options_str)) - # cog only supports square blocks - # if the source already has square blocks, use them - # otherwise use the driver default - options_str["BLOCKSIZE"] = block_x == block_y ? block_x : "512" + if xchunksize == ychunksize + options_str["BLOCKSIZE"] = block_x + else + @warn "Writing COG X and Y chunks do not match: $block_x, $block_y. Default of 512, 512 used." + end end end end @@ -470,9 +478,9 @@ function _bandnames(rds::AG.RasterDataset, nbands=AG.nraster(rds)) end end -function _gdalmetadata(dataset::AG.Dataset, key) +function _gdalmetadata(dataset::AG.Dataset, name) meta = AG.metadata(dataset) - regex = Regex("$key=(.*)") + regex = Regex("$name=(.*)") i = findfirst(f -> occursin(regex, f), meta) if i isa Nothing return "" @@ -560,7 +568,7 @@ function _extensiondriver(filename::AbstractString) end # Permute dims unless they match the normal GDAL dimension order -_maybe_permute_to_gdal(A) = _maybe_permute_to_gdal(A, DD.dims(A, (X, Y, Band))) +_maybe_permute_to_gdal(A) = _maybe_permute_to_gdal(A, DD.dims(A, GDAL_DIM_ORDER)) _maybe_permute_to_gdal(A, dims::Tuple) = A _maybe_permute_to_gdal(A, dims::Tuple{<:XDim,<:YDim,<:Band}) = permutedims(A, dims) _maybe_permute_to_gdal(A, dims::Tuple{<:XDim,<:YDim}) = permutedims(A, dims) diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 5348f28c7..59103707d 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -4,10 +4,6 @@ const UNNAMED_NCD_FILE_KEY = "unnamed" const NCDAllowedType = Union{Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float32,Float64,Char,String} -## Keywords - -$NCD_WRITE_KEYWORDS - function Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster; append=false, force=false, @@ -71,7 +67,7 @@ function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; verbose=true, missingval=nokw, chunks=nokw, - chunksizes=chunks, + chunksizes=RA._chunks_to_tuple(A, dims(A), chunks), kw... ) where {T,N} missingval = missingval isa NoKW ? Rasters.missingval(A) : missingval @@ -105,7 +101,7 @@ function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; end dimnames = lowercase.(string.(map(RA.name, dims(A)))) - var = NCD.defVar(ds, key, eltyp, dimnames; attrib=attrib, kw...) |> RA.CFDiskArray + var = NCD.defVar(ds, key, eltyp, dimnames; attrib=attrib, chunksizes, kw...) |> RA.CFDiskArray # Write with a DiskArays.jl broadcast var .= A diff --git a/src/array.jl b/src/array.jl index b999ff7be..94e240f7f 100644 --- a/src/array.jl +++ b/src/array.jl @@ -319,13 +319,13 @@ function Raster(ds, filename::AbstractString; )::Raster name1 = filekey(ds, name) source = _sourcetrait(filename, source) - data1, dims1, metadata1, missingval1 = _open(source, ds; key=name1) do var + data1, dims1, metadata1, missingval1 = _open(source, ds; name=name1) do var metadata1 = metadata isa NoKW ? _metadata(var) : metadata missingval1 = _check_missingval(var, missingval) replace_missing1 = replace_missing && !isnothing(missingval1) missingval2 = replace_missing1 ? missing : missingval1 data = if lazy - A = FileArray{typeof(source)}(var, filename; key=name1, write) + A = FileArray{typeof(source)}(var, filename; name=name1, write) replace_missing1 ? _replace_missing(A, missingval1) : A else _checkmem(var) @@ -354,7 +354,7 @@ function _replace_missing(A::AbstractArray{T}, missingval) where T return repmissing.(A) end -filekey(ds, key) = key +filekey(ds, name) = name filekey(filename::String) = Symbol(splitext(basename(filename))[1]) DD.dimconstructor(::Tuple{<:Dimension{<:AbstractProjected},Vararg{<:Dimension}}) = Raster diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index c6977a887..344f9d18a 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -101,23 +101,23 @@ end function FileStack{source}( ds::AbstractDataset, filename::AbstractString; - write::Bool=false, keys::NTuple{N,Symbol}, vars + write::Bool=false, name::NTuple{N,Symbol}, vars ) where {source<:CDMsource,N} layertypes = map(var -> Union{Missing,eltype(var)}, vars) layersizes = map(size, vars) eachchunk = map(_get_eachchunk, vars) haschunks = map(_get_haschunks, vars) - return FileStack{source,keys}(filename, layertypes, layersizes, eachchunk, haschunks, write) + return FileStack{source,name}(filename, layertypes, layersizes, eachchunk, haschunks, write) end function Base.open(f::Function, A::FileArray{source}; write=A.write, kw...) where source<:CDMsource - _open(source(), filename(A); key=key(A), write, kw...) do var + _open(source(), filename(A); name=name(A), write, kw...) do var f(var) end end -function _open(f, ::CDMsource, ds::AbstractDataset; key=nokw, kw...) - x = key isa NoKW ? ds : CFDiskArray(ds[_firstname(ds, key)]) +function _open(f, ::CDMsource, ds::AbstractDataset; name=nokw, kw...) + x = name isa NoKW ? ds : CFDiskArray(ds[_firstname(ds, name)]) cleanreturn(f(x)) end _open(f, ::CDMsource, var::CFDiskArray; kw...) = cleanreturn(f(var)) diff --git a/src/stack.jl b/src/stack.jl index 5f219e0a0..a463ca2aa 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -464,13 +464,13 @@ function _layer_stack(filename; dims = _sort_by_layerdims(dims isa NoKW ? _dims(ds, dimdict) : dims, layerdims) layermetadata = layermetadata isa NoKW ? _layermetadata(ds; layers) : layermetadata missingval = missingval isa NoKW ? Rasters.missingval(ds) : missingval - tuplekeys = Tuple(map(Symbol, layers.keys)) + names = Tuple(map(Symbol, layers.names)) data = if lazy - FileStack{typeof(source)}(ds, filename; keys=tuplekeys, vars=Tuple(layers.vars)) + FileStack{typeof(source)}(ds, filename; name=names, vars=Tuple(layers.vars)) else - NamedTuple{tuplekeys}(map(Array, layers.vars)) + NamedTuple{names}(map(Array, layers.vars)) end - data, (; dims, refdims, layerdims=NamedTuple{tuplekeys}(layerdims), metadata, layermetadata=NamedTuple{tuplekeys}(layermetadata), missingval) + data, (; dims, refdims, layerdims=NamedTuple{names}(layerdims), metadata, layermetadata=NamedTuple{names}(layermetadata), missingval) end return RasterStack(data; field_kw..., kw...) end diff --git a/src/utils.jl b/src/utils.jl index aef3c819b..c640d2c68 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -221,3 +221,44 @@ function _run(f, range::OrdinalRange, threaded::Bool, progress::Bool, desc::Stri end end end + +# NoKW means true +@inline function _chunks_to_tuple(template, dims, chunks::Bool) + if chunks == true + if template isa AbstractArray && DA.haschunks(template) == DA.Chunked() + # Get chunks from the template + DA.max_chunksize(DA.eachchunk(template)) + else + # Use defaults + _chunks_to_tuple(template, dims, (X(512), Y(512))) + end + else + nothing + end +end +@inline function _chunks_to_tuple(template, dimorder, chunks::NTuple{N,Integer}) where N + n = length(dimorder) + if n < N + throw(ArgumentError("Length $n tuple needed for `chunks`, got $N")) + elseif n > N + (chunks..., ntuple(_ -> 1, Val{n-N}())...) + else + chunks + end +end +@inline function _chunks_to_tuple(template, dimorder, chunks::DimTuple) + size_one_chunk_axes = map(d -> rebuild(d, 1), otherdims(dimorder, chunks)) + alldims = (chunks..., size_one_chunk_axes...) + int_chunks = map(val, dims(alldims, dimorder)) + if !isnothing(template) + if !all(map(>=, size(template), int_chunks)) + @warn "Chunks $int_chunks larger than array size $(size(template)). Using defaults." + return nothing + end + end + return int_chunks +end +@inline _chunks_to_tuple(template, dimorder, chunks::NamedTuple) = + _chunks_to_tuple(template, dimorder, DD.kw2dims(chunks)) +@inline _chunks_to_tuple(template, dimorder, chunks::Nothing) = nothing +@inline _chunks_to_tuple(template, dims, chunks::NoKW) = nothing diff --git a/src/write.jl b/src/write.jl index 1a2d7915f..ebf7c4dcb 100644 --- a/src/write.jl +++ b/src/write.jl @@ -1,4 +1,5 @@ -const CHUNS_KEYWORD = """ + +const CHUNKS_KEYWORD = """ - `chunks`: a `NTuple{N,Int}` specifying the chunk size for each dimension. To specify only specific dimensions, a Tuple of `Dimension` wrapping `Int` or a `NamedTuple` of `Int` can be used. Other dimensions will have a chunk @@ -25,7 +26,7 @@ Other keyword arguments are passed to the `write` method for the backend. ## GDAL Keywords -$(RA.FORCE_KEYWORD) +$FORCE_KEYWORD - `driver`: A GDAL driver name `String` or a GDAL driver retrieved via `ArchGDAL.getdriver(drivername)`. By default `driver` is guessed from the filename extension. - `options::Dict{String,String}`: A dictionary containing the dataset creation options passed to the driver. diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index 0b83d020b..5c82bffb3 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -377,6 +377,17 @@ gdalpath = maybedownload(url) @test_throws ArgumentError write(filename_gtiff2, gdalarray; driver="GTiff", options=Dict("COMPRESS"=>"FOOBAR")) end + @testset "chunks" begin + filename = tempname() * ".tiff" + write(filename, gdalarray; chunks=(128, 128, 1)) + gdalarray2 = Raster(filename; lazy=true) + @test DiskArrays.eachchunk(gdalarray2)[1] == (1:128, 1:128) + filename = tempname() * ".tiff" + @test_warn "X and Y chunks do not match" write(filename, gdalarray; chunks=(128, 256, 1), driver="COG") + gdalarray2 = Raster(filename; lazy=true) + @test DiskArrays.eachchunk(gdalarray2)[1] == (1:512, 1:512) + end + @testset "resave current" begin filename = tempname() * ".rst" write(filename, gdalarray) diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index f03c4a594..03ff079dc 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -1,6 +1,7 @@ using Rasters, DimensionalData, Test, Statistics, Dates, CFTime, Plots using Rasters.Lookups, Rasters.Dimensions +using Rasters.DiskArrays import ArchGDAL, NCDatasets using Rasters: FileArray, FileStack, NCDsource, crs, bounds, name, trim testdir = realpath(joinpath(dirname(pathof(Rasters)), "../test")) @@ -51,11 +52,11 @@ end @test_throws ArgumentError Raster("notafile.nc") @testset "lazyness" begin - @time read(Raster(ncsingle)); # Eager is the default @test parent(ncarray) isa Array @test parent(lazyarray) isa FileArray @test parent(eagerarray) isa Array + @time read(lazyarray); end @testset "from url" begin @@ -237,18 +238,15 @@ end @testset "write" begin @testset "to netcdf" begin - # TODO save and load subset - geoA = read(ncarray) - @test size(geoA) == size(ncarray) filename = tempname() * ".nc" - write(filename, geoA; force = true) - @test (@allocations write(filename, geoA; force = true)) < 1e4 + write(filename, ncarray) + @test (@allocations write(filename, ncarray; force=true)) < 1e4 @testset "CF attributes" begin @test NCDatasets.Dataset(filename)[:x].attrib["axis"] == "X" @test NCDatasets.Dataset(filename)[:x].attrib["bounds"] == "x_bnds" # TODO better units and standard name handling end - saved = read(Raster(filename)) + saved = Raster(filename) @test size(saved) == size(geoA) @test refdims(saved) == refdims(geoA) @test missingval(saved) === missingval(geoA) @@ -270,38 +268,53 @@ end @test saved isa typeof(geoA) # TODO test crs - # test for nc `kw...` - geoA = read(ncarray) - write("tos.nc", geoA; force=true) # default `deflatelevel = 0` - @time write("tos_small.nc", geoA; deflatelevel=2, force = true) - @test filesize("tos_small.nc") * 1.5 < filesize("tos.nc") # compress ratio >= 1.5 - isfile("tos.nc") && rm("tos.nc") - isfile("tos_small.nc") && rm("tos_small.nc") - @test (@allocations write("tos_small.nc", geoA; deflatelevel=2, force = true)) < 1e4 - - # test for nc `append` - n = 100 - x = rand(n, n) - r1 = Raster(x, (X, Y); name = "v1") - r2 = Raster(x, (X, Y); name = "v2") - fn = "test.nc" - isfile(fn) && rm(fn) - write(fn, r1, append=false; force = true) - size1 = filesize(fn) - write(fn, r2; append=true) - size2 = filesize(fn) - @test size2 > size1*1.8 # two variable - isfile(fn) && rm(fn) - @test (@allocations begin - write(fn, r1, append=false, force = true) + @testset "chunks" begin + filename = tempname() * ".nc" + write(filename, ncarray; chunks=(64, 64)) + @test DiskArrays.eachchunk(Raster(filename; lazy=true))[1] == (1:64, 1:64, 1:1) + filename = tempname() * ".nc" + write(filename, ncarray; chunks=(X=16, Y=10, Ti=8)) + @test DiskArrays.eachchunk(Raster(filename; lazy=true))[1] == (1:16, 1:10, 1:8) + filename = tempname() * ".nc" + @test_warn "larger than array size" write(filename, ncarray; chunks=(X=1000, Y=10, Ti=8)) + # No chunks + @test DiskArrays.haschunks(Raster(filename; lazy=true)) isa DiskArrays.Unchunked + @test DiskArrays.eachchunk(Raster(filename; lazy=true))[1] == map(Base.OneTo, size(ncarray)) + end + + @testset "deflatelevel" begin + write("tos.nc", ncarray; force=true) # default `deflatelevel = 0` + @time write("tos_small.nc", geoA; deflatelevel=2, force = true) + @test filesize("tos_small.nc") * 1.5 < filesize("tos.nc") # compress ratio >= 1.5 + @test (@allocations write("tos_small.nc", ncarray; deflatelevel=2, force=true)) < 1e4 + isfile("tos.nc") && rm("tos.nc") + isfile("tos_small.nc") && rm("tos_small.nc") + end + + @testset "append" begin + n = 100 + x = rand(n, n) + r1 = Raster(x, (X, Y); name = "v1") + r2 = Raster(x, (X, Y); name = "v2") + fn = "test.nc" + isfile(fn) && rm(fn) + write(fn, r1, append=false) + size1 = filesize(fn) write(fn, r2; append=true) - end) < 10e4 + size2 = filesize(fn) + @test size2 > size1*1.8 # two variable + isfile(fn) && rm(fn) + @test (@allocations begin + write(fn, r1, append=false, force=true) + write(fn, r2; append=true) + end) < 10e4 + end @testset "non allowed values" begin - # TODO return this test when the changes in NCDatasets.jl settle - # @test_throws ArgumentError write(filename, convert.(Union{Missing,Float16}, geoA)) + @test_throws ArgumentError write(filename, convert.(Union{Missing,Float16}, ncarray); force=true) end end + @testset "to gdal" begin gdalfilename = tempname() * ".tif" nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) diff --git a/test/utils.jl b/test/utils.jl index f05a9b5ff..6af63fa2f 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -1,7 +1,29 @@ using Rasters, Test -using Rasters: cleankeys +using Rasters: cleankeys, _chunks_to_tuple +using Rasters.DiskArrays @testset "cleankeys" begin @test cleankeys(["a", "b", "c"]) == (:a, :b, :c) @test cleankeys(("a", "b", "c")) == (:a, :b, :c) end + +@testset "_chunks_to_tuple" begin + A = zeros(100, 100) + template = DiskArrays.RechunkedDiskArray(A, DiskArrays.GridChunks(A, (32, 16, 1))) + _chunks_to_tuple(template, (X(), Y(), Ti()), true) == (32, 16, 1) + @test _chunks_to_tuple(template, (X(), Y()), true) == (32, 16) + @test _chunks_to_tuple(template, (Y(), X()), true) == (32, 16) + @test _chunks_to_tuple(template, (Y(), X()), false) == nothing + + @test_throws ArgumentError _chunks_to_tuple(template, (Y(), X()), (16, 32, 1)) + @test_throws ArgumentError _chunks_to_tuple(template, (Y(), X()), (16,)) + @test _chunks_to_tuple(template, (Y(), X()), (16, 32)) == (16, 32) + + @test _chunks_to_tuple(template, (X(), Y()), (X=16, Y=32)) == (16, 32) + @test _chunks_to_tuple(template, (X(), Y()), (Y=16, X=32)) == (32, 16) + + @test _chunks_to_tuple(template, (X(), Y()), (Y(8), X(12))) == (12, 8) + @test _chunks_to_tuple(template, (X(), Y()), (Y(8), X(12), Ti(5))) == (12, 8) + @test _chunks_to_tuple(template, (X(), Y(), Ti()), (Y(8), X(12))) == (12, 8, 1) + @test _chunks_to_tuple(template, (Ti(), X(), Y()), (Y(8), X(12))) == (1, 12, 8) +end From dd4b696a1843eabe7ad9afed2c798f1d513fb619 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sun, 14 Apr 2024 00:14:31 +0200 Subject: [PATCH 05/13] cleanup create --- src/create.jl | 10 ++++++++-- src/sources/commondatamodel.jl | 20 ++++++++------------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/create.jl b/src/create.jl index 75e69e527..f0f66472c 100644 --- a/src/create.jl +++ b/src/create.jl @@ -7,7 +7,10 @@ function create(filename, T, A::AbstractRaster; create(filename, T, dims(A); parent=parent(A), name, metadata, missingval, kw...) end function create(filename::AbstractString, T::Type, dims::Tuple; - lazy=true, parent=nothing, suffix=nothing, source::Source=_sourcetrait(filename), + lazy=true, + parent=nothing, + suffix=nothing, + source::Source=_sourcetrait(filename), missingval=nothing, kw... ) filename = _maybe_add_suffix(filename, suffix) @@ -15,7 +18,10 @@ function create(filename::AbstractString, T::Type, dims::Tuple; create(filename, source, T, dims; lazy, missingval, kw...) end function create(filename::Nothing, T::Type, dims::Tuple; - parent=nothing, suffix=nothing, missingval, kw... + parent=nothing, + suffix=nothing, + missingval, + kw... ) T = isnothing(missingval) ? T : promote_type(T, typeof(missingval)) data = isnothing(parent) ? Array{T}(undef, dims) : similar(parent, T, size(dims)) diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 344f9d18a..43f17d783 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -122,23 +122,19 @@ function _open(f, ::CDMsource, ds::AbstractDataset; name=nokw, kw...) end _open(f, ::CDMsource, var::CFDiskArray; kw...) = cleanreturn(f(var)) -# TODO fix/test this for RasterStack -function create(filename, source::CDMsource, T::Union{Type,Tuple}, dims::DimTuple; +function create(filename, source::CDMsource, T::Type, dims::DimTuple; name=:layer1, - layerdims=map(_ -> dims, _astuple(name)), - missingval=nothing, + missingval=nokw, metadata=NoMetadata(), lazy=true, + verbose=true, + chunks=nokw, ) - types = T isa Tuple ? T : Ref(T) - missingval = T isa Tuple ? missingval : Ref(missingval) # Create layers of zero arrays - layers = map(layerdims, name, types, missingval) do lds, name, t, mv - A = FillArrays.Zeros{t}(map(length, lds)) - Raster(A, dims=lds; name, missingval=mv) - end - write(filename, source, Raster(first(layers))) - return Raster(filename; source=source, lazy) + A = FillArrays.Zeros{T}(map(length, dims)) + rast = Raster(A, dims; name, missingval) + write(filename, source, rast; chunks) + return Raster(filename; source, lazy) end filekey(ds::AbstractDataset, name) = _firstname(ds, name) From 479757543be402bd864f5f256a80b1b174eb6eb9 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sun, 14 Apr 2024 00:20:52 +0200 Subject: [PATCH 06/13] create tweaks --- src/sources/commondatamodel.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 43f17d783..79d0b28c2 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -123,18 +123,18 @@ end _open(f, ::CDMsource, var::CFDiskArray; kw...) = cleanreturn(f(var)) function create(filename, source::CDMsource, T::Type, dims::DimTuple; - name=:layer1, + name=nokw, missingval=nokw, - metadata=NoMetadata(), + metadata=nokw, lazy=true, verbose=true, chunks=nokw, ) # Create layers of zero arrays A = FillArrays.Zeros{T}(map(length, dims)) - rast = Raster(A, dims; name, missingval) + rast = Raster(A, dims; name, missingval, metadata) write(filename, source, rast; chunks) - return Raster(filename; source, lazy) + return Raster(filename; metadata, source, lazy) end filekey(ds::AbstractDataset, name) = _firstname(ds, name) From b97322c4694aa459ada5bd32117bc8e2ab9138b1 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sun, 14 Apr 2024 10:00:15 +0200 Subject: [PATCH 07/13] fix chunk tests --- test/utils.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test/utils.jl b/test/utils.jl index 6af63fa2f..12c003902 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -10,18 +10,23 @@ end @testset "_chunks_to_tuple" begin A = zeros(100, 100) template = DiskArrays.RechunkedDiskArray(A, DiskArrays.GridChunks(A, (32, 16, 1))) + # Bool chunk selection is generated from template _chunks_to_tuple(template, (X(), Y(), Ti()), true) == (32, 16, 1) @test _chunks_to_tuple(template, (X(), Y()), true) == (32, 16) @test _chunks_to_tuple(template, (Y(), X()), true) == (32, 16) @test _chunks_to_tuple(template, (Y(), X()), false) == nothing + # Extra dims error @test_throws ArgumentError _chunks_to_tuple(template, (Y(), X()), (16, 32, 1)) - @test_throws ArgumentError _chunks_to_tuple(template, (Y(), X()), (16,)) + # Missing dims filled with 1s + @test _chunks_to_tuple(template, (Y(), X()), (16,)) == (16, 1) + # Correct number works as is @test _chunks_to_tuple(template, (Y(), X()), (16, 32)) == (16, 32) + # Named chunks @test _chunks_to_tuple(template, (X(), Y()), (X=16, Y=32)) == (16, 32) @test _chunks_to_tuple(template, (X(), Y()), (Y=16, X=32)) == (32, 16) - + @test _chunks_to_tuple(template, (X(), Y()), (Y(8), X(12))) == (12, 8) @test _chunks_to_tuple(template, (X(), Y()), (Y(8), X(12), Ti(5))) == (12, 8) @test _chunks_to_tuple(template, (X(), Y(), Ti()), (Y(8), X(12))) == (12, 8, 1) From 229999179218ad33e145425f17e09ae72022dd1a Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sun, 14 Apr 2024 10:13:59 +0200 Subject: [PATCH 08/13] doc tweaks --- src/write.jl | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/write.jl b/src/write.jl index ebf7c4dcb..833bea2b1 100644 --- a/src/write.jl +++ b/src/write.jl @@ -8,6 +8,12 @@ const CHUNKS_KEYWORD = """ `false` means dont use chunks at all. """ +const MISSINGVAL_KEYWORD = """ +- `missingval`: set the missing value (i.e. FillValue / nodataval) of the written raster, + as Julias `missing` cannot be stored. If not passed in, `missingval` will be detected + from metadata or a default will be chosen. +""" + const SOURCE_WRITE_DOCSTRING = """ Other keyword arguments are passed to the `write` method for the backend. @@ -58,6 +64,7 @@ file extension or using the `source` keyword. $CHUNKS_KEYWORD $FORCE_KEYWORD +$MISSINGVAL_KEYWORD $SOURCE_KEYWORD $SOURCE_WRITE_DOCSTRING @@ -93,6 +100,8 @@ If the source can't be saved as a stack-like object, individual array layers wil $CHUNKS_KEYWORD $EXT_KEYWORD $FORCE_KEYWORD +$MISSINGVAL_KEYWORD + For `RasterStack` this may be a `NamedTuple`, one for each layer. $SOURCE_KEYWORD $SUFFIX_KEYWORD $VERBOSE_KEYWORD @@ -150,6 +159,8 @@ All keywords are passed through to these `Raster` and `RasterStack` methods. $CHUNKS_KEYWORD $EXT_KEYWORD $FORCE_KEYWORD +$MISSINGVAL_KEYWORD + For series with `RasterStack` child objects, this may be a `NamedTuple`, one for each layer. $SOURCE_KEYWORD $VERBOSE_KEYWORD """ From 208303285339bcc9dbc53f52eb28fd96e44ff542 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sun, 14 Apr 2024 10:16:32 +0200 Subject: [PATCH 09/13] remove broken dims keyword --- ext/RastersArchGDALExt/gdal_source.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 1375992e2..6fee72f03 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -241,7 +241,7 @@ function RA.Raster(ds::AG.RasterDataset; lazy=false, dropband=false ) - kw = (; dims, refdims, name, metadata, missingval) + kw = (; refdims, name, metadata, missingval) filelist = AG.filelist(ds) raster = if lazy && length(filelist) > 0 filename = first(filelist) From 706b5ce2f1eb2108e3be34d1849329ae9f10c4c3 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Mon, 15 Apr 2024 07:25:38 +0200 Subject: [PATCH 10/13] add groups --- Project.toml | 5 +- ext/RastersHDF5Ext/RastersHDF5Ext.jl | 34 -- ext/RastersHDF5Ext/smap_source.jl | 210 ------------- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 12 +- .../constructors.jl | 26 +- src/array.jl | 24 +- src/filearray.jl | 32 +- src/filestack.jl | 13 +- src/methods/shared_docstrings.jl | 19 ++ src/series.jl | 14 +- src/show.jl | 20 +- src/sources/commondatamodel.jl | 60 +++- src/sources/grd.jl | 6 +- src/sources/sources.jl | 9 +- src/stack.jl | 51 +-- test/runtests.jl | 1 - test/sources/ncdatasets.jl | 95 +++--- test/sources/rasterdatasources.jl | 22 +- test/sources/smap.jl | 295 ------------------ 19 files changed, 242 insertions(+), 706 deletions(-) delete mode 100644 ext/RastersHDF5Ext/RastersHDF5Ext.jl delete mode 100644 ext/RastersHDF5Ext/smap_source.jl delete mode 100644 test/sources/smap.jl diff --git a/Project.toml b/Project.toml index b00978701..6de7d42f2 100644 --- a/Project.toml +++ b/Project.toml @@ -37,7 +37,6 @@ RasterDataSources = "3cb90ccd-e1b6-4867-9617-4276c8b2ca36" RastersArchGDALExt = "ArchGDAL" RastersCoordinateTransformationsExt = "CoordinateTransformations" RastersGRIBDatasetsExt = "GRIBDatasets" -RastersHDF5Ext = "HDF5" RastersMakieExt = "Makie" RastersNCDatasetsExt = "NCDatasets" RastersRasterDataSourcesExt = "RasterDataSources" @@ -61,7 +60,6 @@ GRIBDatasets = "0.2, 0.3" GeoFormatTypes = "0.4" GeometryBasics = "0.4" GeoInterface = "1" -HDF5 = "0.14, 0.15, 0.16" Makie = "0.19, 0.20" Missings = "0.4, 1" NCDatasets = "0.13, 0.14" @@ -85,7 +83,6 @@ CoordinateTransformations = "150eb455-5306-5404-9cee-2592286d6298" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326" GRIBDatasets = "82be9cdb-ee19-4151-bdb3-b400788d9abc" -HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" @@ -96,4 +93,4 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Aqua", "ArchGDAL", "CFTime", "CoordinateTransformations", "DataFrames", "GeometryBasics", "GRIBDatasets", "HDF5", "NCDatasets", "Plots", "RasterDataSources", "SafeTestsets", "Shapefile", "Statistics", "Test"] +test = ["Aqua", "ArchGDAL", "CFTime", "CoordinateTransformations", "DataFrames", "GeometryBasics", "GRIBDatasets", "NCDatasets", "Plots", "RasterDataSources", "SafeTestsets", "Shapefile", "Statistics", "Test"] diff --git a/ext/RastersHDF5Ext/RastersHDF5Ext.jl b/ext/RastersHDF5Ext/RastersHDF5Ext.jl deleted file mode 100644 index e9f42c8d3..000000000 --- a/ext/RastersHDF5Ext/RastersHDF5Ext.jl +++ /dev/null @@ -1,34 +0,0 @@ -module RastersHDF5Ext - -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, HDF5 -else - using ..Rasters, ..HDF5 -end - -import DiskArrays, - Extents, - HDF5, - Missings - -using Dates, - DimensionalData, - GeoFormatTypes, - GeoInterface, - Rasters - -using Rasters.Lookups -using Rasters.Dimensions -using Rasters: SMAPsource - -export smapseries - -const RA = Rasters -const DD = DimensionalData -const DA = DiskArrays -const GI = GeoInterface -const LA = Lookups - -include("smap_source.jl") - -end diff --git a/ext/RastersHDF5Ext/smap_source.jl b/ext/RastersHDF5Ext/smap_source.jl deleted file mode 100644 index e025d6b04..000000000 --- a/ext/RastersHDF5Ext/smap_source.jl +++ /dev/null @@ -1,210 +0,0 @@ -const SMAPMISSING = -9999.0f0 -const SMAPGEODATA = "Geophysical_Data" -const SMAPCRS = ProjString("+proj=cea +lon_0=0 +lat_ts=30 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0") -const SMAPSIZE = (3856, 1624) -const SMAPDIMTYPES = (X(), Y()) - -# Dataset wrapper ############################################################### -# Becauase SMAP is just one of many HDF5 formats, -# we wrap it in SMAPhdf5 and SMAPvar wrappers - -struct SMAPhdf5{T} - ds::T -end - -RA.missingval(ds::SMAPhdf5) = SMAPMISSING -RA.layerkeys(ds::SMAPhdf5) = keys(ds) -RA.filekey(ds::SMAPhdf5, key::Nothing) = first(keys(ds)) - -function _dims(wrapper::SMAPhdf5, args...) - dataset = parent(wrapper) - proj = read(HDF5.attributes(HDF5.root(dataset)["EASE2_global_projection"]), "grid_mapping_name") - if proj == "lambert_cylindrical_equal_area" - # There are matrices for lookup but all rows/colums are identical. - # For performance and simplicity we just take a vector slice for each dim. - extent = HDF5.attributes(HDF5.root(dataset)["Metadata/Extent"]) - lonbounds = read(extent["westBoundLongitude"]), read(extent["eastBoundLongitude"]) - latbounds = read(extent["southBoundLatitude"]), read(extent["northBoundLatitude"]) - lonvec = HDF5.root(dataset)["cell_lon"][:, 1] - latvec = HDF5.root(dataset)["cell_lat"][1, :] - lonlookup = Mapped(lonvec; - order=ForwardOrdered(), - span=Irregular(lonbounds), - sampling=Intervals(Center()), - crs=SMAPCRS, - mappedcrs=EPSG(4326), - dim=X(), - ) - latlookup = Mapped(latvec; - order=ReverseOrdered(), - span=Irregular(latbounds), - sampling=Intervals(Center()), - crs=SMAPCRS, - mappedcrs=EPSG(4326), - dim=Y(), - ) - return X(lonlookup), Y(latlookup) - else - error("projection $proj not supported") - end -end - -# TODO actually add metadata to the dict -_metadata(wrapper::SMAPhdf5) = RA._metadatadict(SMAPsource()) - -function _layerdims(ds::SMAPhdf5; layers=nothing) - keys = map(Symbol, isnothing(layers) ? RA.cleankeys(RA.layerkeys(ds)) : layers.keys) - # All dims are the same - NamedTuple{Tuple(keys)}(map(_ -> SMAPDIMTYPES, keys)) -end - -function _layermetadata(ds::SMAPhdf5; layers=nothing) - keys = map(Symbol, isnothing(layers) ? RA.cleankeys(RA.layerkeys(ds)) : layers.keys) - md = _metadata(ds) - NamedTuple{keys}(map(_ -> md, keys)) -end - -Base.keys(ds::SMAPhdf5) = RA.cleankeys(keys(parent(ds)[SMAPGEODATA])) -Base.parent(wrapper::SMAPhdf5) = wrapper.ds -Base.getindex(wrapper::SMAPhdf5, key) = SMAPvar(wrapper.ds[_smappath(key)]) - -_smappath(key::RA.Key) = SMAPGEODATA * "/" * string(key) - -struct SMAPvar{DS} <: AbstractArray{Float32,2} - ds::DS -end - -Base.parent(wrapper::SMAPvar) = wrapper.ds -Base.eltype(wrapper::SMAPvar) = eltype(parent(wrapper)) -Base.size(wrapper::SMAPvar) = SMAPSIZE -Base.ndims(wrapper::SMAPvar) = length(SMAPSIZE) -Base.getindex(wrapper::SMAPvar, args...) = getindex(parent(wrapper), args...) -Base.setindex!(wrapper::SMAPvar, args...) = setindex!(parent(wrapper), args...) -Base.Array(wrapper::SMAPvar) = Array(parent(wrapper)) -Base.collect(wrapper::SMAPvar) = collect(parent(wrapper)) - -DA.eachchunk(var::SMAPvar) = DA.GridChunks(var, size(var)) -DA.haschunks(var::SMAPvar) = DA.Unchunked() - -# Raster ###################################################################### - -function RA.FileArray{SMAPsource}(ds::SMAPhdf5, filename::AbstractString; key, kw...) - RA.FileArray{SMAPsource}(ds[key], filename; key, kw...) -end -function RA.FileArray{SMAPsource}(var::SMAPvar, filename::AbstractString; key, kw...) - T = eltype(var) - N = ndims(var) - eachchunk = DA.eachchunk(var) - haschunks = DA.haschunks(var) - RA.FileArray{SMAPsource,T,N}(filename, SMAPSIZE; key, eachchunk, haschunks, kw...) -end - -function Base.open(f::Function, A::RA.FileArray{SMAPsource}; kw...) - RA._open(SMAPsource, RA.filename(A); key=RA.key(A), kw...) do var - f(RA.RasterDiskArray{SMAPsource}(var)) - end -end - -DA.writeblock!(A::RA.RasterDiskArray{SMAPsource}, v, r::AbstractUnitRange...) = A[r...] = v - -RA.haslayers(::Type{SMAPsource}) = true - -# Stack ######################################################################## - -function RA.FileStack{SMAPsource}(ds::SMAPhdf5, filename::AbstractString; write=false, keys) - keys = map(Symbol, keys isa Nothing ? RA.layerkeys(ds) : keys) |> Tuple - type_size_ec_hc = map(keys) do key - var = RA.RasterDiskArray{SMAPsource}(ds[key]) - eltype(var), size(var), DA.eachchunk(var), DA.haschunks(var) - end - layertypes = map(x->x[1], type_size_ec_hc) - layersizes = map(x->x[2], type_size_ec_hc) - eachchunk = map(x->x[3], type_size_ec_hc) - haschunks = map(x->x[4], type_size_ec_hc) - RA.FileStack{SMAPsource,keys}(filename, layertypes, layersizes, eachchunk, haschunks, write) -end -function RA.OpenStack(fs::RA.FileStack{SMAPsource,K}; kw...) where K - ds = HDF5.h5open(RA.filename(fs); kw...) - RA.OpenStack{SMAPsource,K}(SMAPhdf5(ds)) -end -Base.close(os::RA.OpenStack{SMAPsource}) = nothing # HDF5 handles this apparently? - -# Series ####################################################################### - -""" - smapseries(filenames::AbstractString; kw...) - smapseries(filenames::Vector{<:AbstractString}, dims=nothing; kw...) - -[`RasterSeries`](@ref) loader for SMAP files and whole folders of files, -organised along the time dimension. Returns a [`RasterSeries`](@ref). - -# Arguments - -- `filenames`: A `String` path to a directory of SMAP files, - or a vector of `String` paths to specific files. -- `dims`: `Tuple` containing `Ti` dimension for the series. - Automatically generated form `filenames` unless passed in. - -# Keywords - -- `kw`: Passed to `RasterSeries`. -""" -function RA.smapseries(dir::AbstractString; kw...) - RA.smapseries(joinpath.(dir, filter_ext(dir, ".h5")); kw...) -end -function RA.smapseries(filenames::Vector{<:AbstractString}, dims=nothing; kw...) - if dims isa Nothing - usedpaths = String[] - timeseries = [] - errors = [] - for filename in filenames - try - t = _smap_timefromfilename(filename) - push!(timeseries, t) - push!(usedpaths, filename) - catch e - push!(errors, e) - end - end - # Use the first files time dim as a template, but join vals into an array of times. - dims = (_smap_timedim(timeseries),) - else - usedpaths = filenames - end - # Show errors after all files load, or you can't see them: - if length(errors) > 0 - println("Some errors thrown during file load: ") - println.(errors) - end - RasterSeries(usedpaths, dims; child=RasterStack, duplicate_first=true, kw...) -end - - -# Utils ######################################################################## - -function RA._open(f, ::Type{SMAPsource}, filename::AbstractString; key=nothing, kw...) - isfile(filename) || RA._filenotfound_error(filename) - HDF5.h5open(filename; kw...) do ds - RA._open(f, SMAPsource, SMAPhdf5(ds); key, kw...) - end -end -function RA._open(f, ::Type{SMAPsource}, ds::SMAPhdf5; key=nothing, kw...) - RA.cleanreturn(f(key isa Nothing ? ds : ds[key])) -end - - -function _smap_timefromfilename(filename::String) - dateformat = DateFormat("yyyymmddTHHMMSS") - dateregex = r"SMAP_L4_SM_gph_(\d+T\d+)_" - datematch = match(dateregex, filename) - if datematch !== nothing - return DateTime(datematch.captures[1], dateformat) - else - error("Date/time not correctly formatted in path: $filename") - end -end - -_smap_timedim(t::DateTime) = _smap_timedim(t:Hour(3):t) -function _smap_timedim(times::AbstractVector) - Ti(Sampled(times, ForwardOrdered(), Regular(Hour(3)), Intervals(Start()), RA._metadatadict(SMAPsource()))) -end diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 59103707d..8131d3049 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -111,9 +111,9 @@ end _def_dim_var!(ds::AbstractDataset, A) = map(d -> _def_dim_var!(ds, d), dims(A)) function _def_dim_var!(ds::AbstractDataset, dim::Dimension) - dimkey = lowercase(string(DD.name(dim))) - haskey(ds.dim, dimkey) && return nothing - NCD.defDim(ds, dimkey, length(dim)) + dimname = lowercase(string(DD.name(dim))) + haskey(ds.dim, dimname) && return nothing + NCD.defDim(ds, dimname, length(dim)) lookup(dim) isa NoLookup && return nothing # Shift index before conversion to Mapped @@ -127,11 +127,11 @@ function _def_dim_var!(ds::AbstractDataset, dim::Dimension) # Bounds variables if sampling(dim) isa Intervals bounds = Dimensions.dim2boundsmatrix(dim) - boundskey = get(metadata(dim), :bounds, string(dimkey, "_bnds")) + boundskey = get(metadata(dim), :bounds, string(dimname, "_bnds")) push!(attrib, "bounds" => boundskey) - NCD.defVar(ds, boundskey, bounds, ("bnds", dimkey)) + NCD.defVar(ds, boundskey, bounds, ("bnds", dimname)) end - NCD.defVar(ds, dimkey, Vector(index(dim)), (dimkey,); attrib=attrib) + NCD.defVar(ds, dimname, Vector(index(dim)), (dimname,); attrib=attrib) return nothing end diff --git a/ext/RastersRasterDataSourcesExt/constructors.jl b/ext/RastersRasterDataSourcesExt/constructors.jl index fd37bc9f1..24bafb87c 100644 --- a/ext/RastersRasterDataSourcesExt/constructors.jl +++ b/ext/RastersRasterDataSourcesExt/constructors.jl @@ -22,9 +22,9 @@ See the docs for for more specific details about data sources, layers and keyword arguments. """ function RA.Raster(T::Type{<:RDS.RasterDataSource}, layer; crs=_source_crs(T), kw...) - rds_kw, gd_kw = _filterkw(T, kw) + rds_kw, ra_kw = _filterkw(T, kw) filename = getraster(T, layer; rds_kw...) - Raster(filename; name=RDS.layerkeys(T, layer), crs, gd_kw...) + Raster(filename; name=RDS.layerkeys(T, layer), crs, ra_kw...) end """ @@ -53,9 +53,9 @@ for more specific details about data sources, layers and keyword arguments. RA.RasterStack(T::Type{<:RDS.RasterDataSource}; kw...) = RasterStack(T, RDS.layers(T); kw...) RA.RasterStack(T::Type{<:RDS.RasterDataSource}, layer::Symbol; kw...) = RasterStack(T, (layer,); kw...) function RA.RasterStack(T::Type{<:RDS.RasterDataSource}, layers::Tuple; crs=_source_crs(T), kw...) - rds_kw, gd_kw = _filterkw(T, kw) + rds_kw, ra_kw = _filterkw(T, kw) filenames = map(l -> RDS.getraster(T, l; rds_kw...), layers) - RasterStack(filenames; keys=RDS.layerkeys(T, layers), crs, gd_kw...) + RasterStack(filenames; name=RDS.layerkeys(T, layers), crs, ra_kw...) end """ @@ -84,7 +84,10 @@ for more specific details about data sources, layers and keyword arguments. RA.RasterSeries(T::Type{<:RDS.RasterDataSource}; kw...) = RasterSeries(T, RDS.layers(T); kw...) # DateTime time-series function RA.RasterSeries(T::Type{<:RDS.RasterDataSource}, layers; - resize=_mayberesize(T), crs=_source_crs(T), mappedcrs=nothing, kw... + resize=_mayberesize(T), + crs=_source_crs(T), + mappedcrs=RA.nokw, + kw... ) monthdim = if haskey(values(kw), :month) values(kw)[:month] isa AbstractArray Dim{:month}(values(kw)[:month]; lookup=Sampled(; sampling=Intervals(Start()))) @@ -108,21 +111,24 @@ function RA.RasterSeries(T::Type{<:RDS.RasterDataSource}, layers; throw(ArgumentError("A RasterSeries can only be constructed from a data source with `date` or `month` keywords that are AbstractArray or Tuple. For other sources, use RasterStack or Raster directly")) end - filenames = RDS.getraster(T, layers; kw...) + rds_kw, ra_kw = _filterkw(T, kw) + filenames = RDS.getraster(T, layers; rds_kw...) can_duplicate = RDS.has_constant_dims(T) && RDS.has_constant_metadata(T) if filenames isa AbstractVector{<:AbstractVector} - series = [RasterSeries(inner_fns, monthdim; resize, crs, mappedcrs, duplicate_first=can_duplicate) for inner_fns in filenames] + series = [RasterSeries(inner_fns, monthdim; resize, crs, mappedcrs, duplicate_first=can_duplicate, ra_kw...) for inner_fns in filenames] return RasterSeries(series, datedim) elseif filenames isa AbstractVector dim = isnothing(datedim) ? monthdim : datedim - return RasterSeries(filenames, dim; resize, crs, mappedcrs, duplicate_first=can_duplicate) + return RasterSeries(filenames, dim; resize, crs, mappedcrs, duplicate_first=can_duplicate, ra_kw...) + else + error("Returned filenames for `RasterSeries` must be a `Vector`. Got $filenames") end end -_mayberesize(T) = RDS.has_matching_layer_size(T) ? nothing : crop +_mayberesize(T) = RDS.has_matching_layer_size(T) ? RA.nokw : crop -_source_crs(T) = nothing +_source_crs(T) = RA.nokw _source_crs(T::Type{AWAP}) = crs=EPSG(4326) _source_crs(T::Type{ALWB}) = crs=EPSG(4326) diff --git a/src/array.jl b/src/array.jl index 94e240f7f..fffaf3f37 100644 --- a/src/array.jl +++ b/src/array.jl @@ -207,20 +207,14 @@ methods _will not_ load data from disk: they are applied later, lazily. - `name`: a `Symbol` name for the array, which will also retreive named layers if `Raster` is used on a multi-layered file like a NetCDF. `name` becomes the layer name if the `Raster` is combined into a `RasterStack`. +$GROUP_KEYWORD - `missingval`: value reprsenting missing data, normally detected from the file. Set manually when you know the value is not specified or is incorrect. This will *not* change any values in the raster, it simply assigns which value is treated as missing. To replace all of the missing values in the raster, use [`replace_missing`](@ref). - `metadata`: `Dict` or `Metadata` object for the array, or `NoMetadata()`. -- `crs`: the coordinate reference system of the objects `XDim`/`YDim` dimensions. - Only set this if you know the detected crs is incrorrect, or it is not present in - the file. The `crs` is expected to be a GeoFormatTypes.jl `CRS` or `Mixed` mode `GeoFormat` object, - like `EPSG(4326)`. -- `mappedcrs`: the mapped coordinate reference system of the objects `XDim`/`YDim` dimensions. - for `Mapped` lookups these are the actual values of the index. For `Projected` lookups - this can be used to index in eg. `EPSG(4326)` lat/lon values, having it converted automatically. - Only set this if the detected `mappedcrs` in incorrect, or the file does not have a `mappedcrs`, - e.g. a tiff. The `mappedcrs` is expected to be a GeoFormatTypes.jl `CRS` or `Mixed` mode `GeoFormat` type. +$CONSTRUCTOR_CRS_KEYWORD +$CONSTRUCTOR_MAPPEDCRS_KEYWORD - `refdims`: `Tuple of` position `Dimension`s the array was sliced from, defaulting to `()`. Usually not needed. @@ -295,7 +289,10 @@ function Raster(filename::AbstractString, dims::Tuple{<:Dimension,<:Dimension,Va )::Raster Raster(filename; dims, kw...) end -function Raster(filename::AbstractString; source=nothing, kw...)::Raster +function Raster(filename::AbstractString; + source=nokw, + kw... +)::Raster source = _sourcetrait(filename, source) Base.invokelatest() do _open(filename; source) do ds @@ -307,11 +304,12 @@ function Raster(ds, filename::AbstractString; dims=nokw, refdims=(), name=nokw, + group=nokw, metadata=nokw, missingval=nokw, crs=nokw, mappedcrs=nokw, - source=nothing, + source=nokw, replace_missing=false, write=false, lazy=false, @@ -319,13 +317,13 @@ function Raster(ds, filename::AbstractString; )::Raster name1 = filekey(ds, name) source = _sourcetrait(filename, source) - data1, dims1, metadata1, missingval1 = _open(source, ds; name=name1) do var + data1, dims1, metadata1, missingval1 = _open(source, ds; name=name1, group) do var metadata1 = metadata isa NoKW ? _metadata(var) : metadata missingval1 = _check_missingval(var, missingval) replace_missing1 = replace_missing && !isnothing(missingval1) missingval2 = replace_missing1 ? missing : missingval1 data = if lazy - A = FileArray{typeof(source)}(var, filename; name=name1, write) + A = FileArray{typeof(source)}(var, filename; name=name1, group, write) replace_missing1 ? _replace_missing(A, missingval1) : A else _checkmem(var) diff --git a/src/filearray.jl b/src/filearray.jl index 8d5cf71ec..157135464 100644 --- a/src/filearray.jl +++ b/src/filearray.jl @@ -3,27 +3,35 @@ FileArray{S} <: DiskArrays.AbstractDiskArray Filearray is a DiskArrays.jl `AbstractDiskArray`. Instead of holding -an open object, it just holds a filename string that is opened lazily +an open object, it just holds a filename string that is opened lazily when it needs to be read. """ -struct FileArray{S,T,N,Na,EC,HC} <: DiskArrays.AbstractDiskArray{T,N} +struct FileArray{S,T,N,Na,G,EC,HC} <: DiskArrays.AbstractDiskArray{T,N} filename::String size::NTuple{N,Int} name::Na + group::G eachchunk::EC haschunks::HC write::Bool end function FileArray{S,T,N}( - filename, size, name::Na, eachchunk::EC=size, - haschunks::HC=DA.Unchunked(), write=false -) where {S,T,N,Na,EC,HC} - FileArray{S,T,N,Na,EC,HC}(filename, size, name, eachchunk, haschunks, write) + filename, + size, + name::Na, + group::G=nothing, + eachchunk::EC=size, + haschunks::HC=DA.Unchunked(), + write=false +) where {S,T,N,Na,G,EC,HC} + FileArray{S,T,N,Na,G,EC,HC}(filename, size, name, group, eachchunk, haschunks, write) end -function FileArray{S,T,N}(filename::String, size::Tuple; - name=nothing, eachchunk=size, haschunks=DA.Unchunked(), write=false +function FileArray{S,T,N}(filename::String, size::Tuple; + name=nokw, group=nokw, eachchunk=size, haschunks=DA.Unchunked(), write=false ) where {S,T,N} - FileArray{S,T,N}(filename, size, name, eachchunk, haschunks, write) + name = name isa NoKW ? nothing : name + group = group isa NoKW ? nothing : group + FileArray{S,T,N}(filename, size, name, group, eachchunk, haschunks, write) end # FileArray has S, T and N parameters not recoverable from fields @@ -45,7 +53,7 @@ function DA.readblock!(A::FileArray, dst, r::AbstractUnitRange...) DA.readblock!(O, dst, r...) end end -function DA.writeblock!(A::FileArray, src, r::AbstractUnitRange...) +function DA.writeblock!(A::FileArray, src, r::AbstractUnitRange...) open(A; write=A.write) do O DA.writeblock!(O, src, r...) end @@ -55,7 +63,7 @@ end """ RasterDiskArray <: DiskArrays.AbstractDiskArray -A basic DiskArrays.jl wrapper for objects that don't have one defined yet. +A basic DiskArrays.jl wrapper for objects that don't have one defined yet. When we `open` a `FileArray` it is replaced with a `RasterDiskArray`. """ struct RasterDiskArray{S,T,N,V,EC,HC,A} <: DiskArrays.AbstractDiskArray{T,N} @@ -81,7 +89,7 @@ DA.readblock!(A::RasterDiskArray, aout, r::AbstractUnitRange...) = aout .= paren DA.writeblock!(A::RasterDiskArray, v, r::AbstractUnitRange...) = parent(A)[r...] .= v # Already open, doesn't use `name` -_open(f, ::Source, A::RasterDiskArray; name=nothing) = f(A) +_open(f, ::Source, A::RasterDiskArray; name=nokw, group=nokw) = f(A) struct MissingDiskArray{T,N,V} <: DiskArrays.AbstractDiskArray{T,N} var::V diff --git a/src/filestack.jl b/src/filestack.jl index ca3c69bb3..0179fcf82 100644 --- a/src/filestack.jl +++ b/src/filestack.jl @@ -9,18 +9,19 @@ typically netcdf or hdf5. `S` is a backend type like `NCDsource`, and `Na` is a tuple of `Symbol` keys. """ -struct FileStack{S,Na,F<:AbstractString,T,SZ,EC,HC} - filename::F +struct FileStack{S,Na,T,SZ,G<:Union{AbstractString,Symbol,Nothing},EC,HC} + filename::String types::T sizes::SZ + group::G eachchunk::EC haschunks::HC write::Bool end function FileStack{S,Na}( - filename::F, types::T, sizes::SZ, eachchunk::EC, haschunks::HC, write::Bool - ) where {S,Na,F,T,SZ,EC,HC} - FileStack{S,Na,F,T,SZ,EC,HC}(filename, types, sizes, eachchunk, haschunks, write) + filename, types::T, sizes::SZ, group::G, eachchunk::EC, haschunks::HC, write::Bool + ) where {S,Na,T,SZ,G,EC,HC} + FileStack{S,Na,T,SZ,G,EC,HC}(String(filename), types, sizes, group, eachchunk, haschunks, write) end # FileStack has `S` parameter that is not recoverable from fields. @@ -40,5 +41,5 @@ function Base.getindex(fs::FileStack{S,Na}, name::Symbol) where {S,Na} haschunks = fs.haschunks[i] T = fs.types[i] N = length(size) - A = FileArray{S,T,N}(filename(fs), size, name, eachchunk, haschunks, fs.write) + return FileArray{S,T,N}(filename(fs), size, name, fs.group, eachchunk, haschunks, fs.write) end diff --git a/src/methods/shared_docstrings.jl b/src/methods/shared_docstrings.jl index 0db255c1a..f9b6a0ea3 100644 --- a/src/methods/shared_docstrings.jl +++ b/src/methods/shared_docstrings.jl @@ -76,3 +76,22 @@ const FORCE_KEYWORD = """ const DROPBAND_KEYWORD = """ - `dropband`: drop single band dimensions when creating stacks from filenames. `true` by default. """ + +const CONSTRUCTOR_CRS_KEYWORD = """ +- `crs`: the coordinate reference system of the objects `XDim`/`YDim` dimensions. + Only set this if you know the detected crs is incrorrect, or it is not present in + the file. The `crs` is expected to be a GeoFormatTypes.jl `CRS` or `Mixed` mode `GeoFormat` object, + like `EPSG(4326)`. +""" +const CONSTRUCTOR_MAPPEDCRS_KEYWORD = """ +- `mappedcrs`: the mapped coordinate reference system of the objects `XDim`/`YDim` dimensions. + for `Mapped` lookups these are the actual values of the index. For `Projected` lookups + this can be used to index in eg. `EPSG(4326)` lat/lon values, having it converted automatically. + Only set this if the detected `mappedcrs` in incorrect, or the file does not have a `mappedcrs`, + e.g. a tiff. The `mappedcrs` is expected to be a GeoFormatTypes.jl `CRS` or `Mixed` mode `GeoFormat` type. +""" +const GROUP_KEYWORD = """ +- `group`: the group in the dataset where `name` can be found. Only needed for nested datasets. + A `String` or `Symbol` will select a single group. Pairs can also used to access groups + at any nested depth, i.e `group=:group1 => :group2 => :group3`. +""" diff --git a/src/series.jl b/src/series.jl index 9992b7e70..17003be0c 100644 --- a/src/series.jl +++ b/src/series.jl @@ -124,7 +124,12 @@ function RasterSeries(filenames::NamedTuple{K}, dims; kw...) where K RasterSeries(map((fns...) -> NamedTuple{K}(fns), values(filenames)...), dims; kw...) end function RasterSeries(filenames::AbstractArray{<:Union{AbstractString,NamedTuple}}, dims; - refdims=(), lazy=false, duplicate_first=false, child=nokw, resize=nokw, kw... + refdims=(), + lazy=false, + duplicate_first=false, + child=nokw, + resize=nokw, + kw... ) childtype = if child isa NoKW eltype(filenames) <: NamedTuple ? RasterStack : Raster @@ -152,7 +157,12 @@ function RasterSeries(filenames::AbstractArray{<:Union{AbstractString,NamedTuple end return RasterSeries(data, DD.format(dims, data); refdims) end -function RasterSeries(path::AbstractString, dims; refdims=(), ext=nothing, separator='_', kw...) +function RasterSeries(path::AbstractString, dims; + refdims=(), + ext=nothing, + separator='_', + kw... +) if isdir(path) dirpath = path filepaths = filter_ext(dirpath, ext) diff --git a/src/show.jl b/src/show.jl index 87926fb4a..20c366e71 100644 --- a/src/show.jl +++ b/src/show.jl @@ -9,7 +9,7 @@ function DD.show_after(io::IO, mime::MIME"text/plain", A::AbstractRaster; kw...) end end -function DD.show_after(io, mime, stack::AbstractRasterStack; kw...) +function DD.show_after(io::IO, mime, stack::AbstractRasterStack; kw...) blockwidth = get(io, :blockwidth, 0) print_geo(io, mime, stack; blockwidth) DD.print_block_close(io, blockwidth) @@ -55,14 +55,12 @@ function Base.summary(io::IO, ser::AbstractRasterSeries{T,N}) where {T,N} print(io, string(nameof(typeof(ser)), "{$(nameof(T)),$N}")) end -function Base.show(io::IO, mime::MIME"text/plain", A::AbstractRasterSeries{T,N}) where {T,N} - lines = 0 - summary(io, A) - DD.print_name(io, name(A)) - lines += Dimensions.print_dims(io, mime, dims(A)) - !(isempty(dims(A)) || isempty(refdims(A))) && println(io) - lines += Dimensions.print_refdims(io, mime, refdims(A)) - println(io) - ds = displaysize(io) - ioctx = IOContext(io, :displaysize => (ds[1] - lines, ds[2])) +function DD.show_after(io::IO, mime, series::AbstractRasterSeries; kw...) + if length(series) > 0 + blockwidth = get(io, :blockwidth, 0) + print_geo(io, mime, first(series); blockwidth) + DD.print_block_close(io, blockwidth) + end + ndims(series) > 0 && println(io) + DD.print_array(io, mime, series) end diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 79d0b28c2..d704d6ba3 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -101,27 +101,37 @@ end function FileStack{source}( ds::AbstractDataset, filename::AbstractString; - write::Bool=false, name::NTuple{N,Symbol}, vars + write::Bool=false, + group=nokw, + name::NTuple{N,Symbol}, + vars ) where {source<:CDMsource,N} layertypes = map(var -> Union{Missing,eltype(var)}, vars) layersizes = map(size, vars) eachchunk = map(_get_eachchunk, vars) haschunks = map(_get_haschunks, vars) - return FileStack{source,name}(filename, layertypes, layersizes, eachchunk, haschunks, write) + group = group isa NoKW ? nothing : group + return FileStack{source,name}(filename, layertypes, layersizes, group, eachchunk, haschunks, write) end function Base.open(f::Function, A::FileArray{source}; write=A.write, kw...) where source<:CDMsource - _open(source(), filename(A); name=name(A), write, kw...) do var + _open(source(), filename(A); name=name(A), group=A.group, write, kw...) do var f(var) end end -function _open(f, ::CDMsource, ds::AbstractDataset; name=nokw, kw...) - x = name isa NoKW ? ds : CFDiskArray(ds[_firstname(ds, name)]) +function _open(f, ::CDMsource, ds::AbstractDataset; name=nokw, group=nothing, kw...) + g = _getgroup(ds, group) + x = name isa NoKW ? g : CFDiskArray(g[_firstname(ds, name)]) cleanreturn(f(x)) end _open(f, ::CDMsource, var::CFDiskArray; kw...) = cleanreturn(f(var)) +# This allows arbitrary group nesting +_getgroup(ds, ::Union{Nothing,NoKW}) = ds +_getgroup(ds, group::Union{Symbol,AbstractString}) = ds.group[String(group)] +_getgroup(ds, group::Pair) = _getgroup(ds.group[String(group[1])], group[2]) + function create(filename, source::CDMsource, T::Type, dims::DimTuple; name=nokw, missingval=nokw, @@ -165,7 +175,7 @@ function _nondimnames(ds) return nondim end -function _layers(ds::AbstractDataset, ::NoKW=nokw) +function _layers(ds::AbstractDataset, ::NoKW=nokw, ::NoKW=nokw) nondim = _nondimnames(ds) grid_mapping = String[] vars = map(k -> ds[k], nondim) @@ -182,11 +192,14 @@ function _layers(ds::AbstractDataset, ::NoKW=nokw) attrs=attrs[bitinds], ) end -function _layers(ds::AbstractDataset, names) +function _layers(ds::AbstractDataset, names, ::NoKW) vars = map(k -> ds[k], names) attrs = map(CDM.attribs, vars) (; names, vars, attrs) end +function _layers(ds::AbstractDataset, names, group) + _layers(ds.group[group], names, nokw) +end function _dims(var::AbstractVariable{<:Any,N}, crs=nokw, mappedcrs=nokw) where N dimnames = CDM.dimnames(var) @@ -233,8 +246,14 @@ end # TODO dont load all keys here with _layers _firstname(ds::AbstractDataset, name) = Symbol(name) -_firstname(ds::AbstractDataset, name::NoKW=nokw) = - Symbol(first(_nondimnames(ds))) +function _firstname(ds::AbstractDataset, name::NoKW=nokw) + names = _nondimnames(ds) + if length(names) > 0 + Symbol(first(names)) + else + throw(ArgumentError("No non-dimension layers found in dataset with keys: $(keys(ds))")) + end +end function _cdmdim(ds, dimname::Key, crs=nokw, mappedcrs=nokw) if haskey(ds, dimname) @@ -310,18 +329,25 @@ function _cdmlookup( # http://cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries order = LA.orderof(index) var = ds[dimname] + # Detect lat/lon if haskey(CDM.attribs(var), "bounds") boundskey = var.attrib["bounds"] boundsmatrix = Array(ds[boundskey]) span, sampling = Explicit(boundsmatrix), Intervals(Center()) - return _cdmlookup(D, index, order, span, sampling, metadata, crs, mappedcrs) elseif eltype(index) <: Union{Missing,Dates.AbstractTime} span, sampling = _cdmperiod(index, metadata) - return _cdmlookup(D, index, order, span, sampling, metadata, crs, mappedcrs) else span, sampling = _cdmspan(index, order), Points() - return _cdmlookup(D, index, order, span, sampling, metadata, crs, mappedcrs) end + # We cant yet check CF standards crs, but we can at least check for units in lat/lon + if mappedcrs isa NoKW && get(metadata, "units", "") in ("degrees_north", "degrees_east") + mappedcrs = EPSG(4326) + end + # Additionally, crs and mappedcrs should be identical for Regular lookups + if crs isa NoKW span isa Regular + crs = mappedcrs + end + return _cdmlookup(D, index, order, span, sampling, metadata, crs, mappedcrs) end # For X and Y use a Mapped <: AbstractSampled lookup function _cdmlookup( @@ -392,13 +418,15 @@ function _parse_period(period_str::String) vals = map(x -> parse(Int, x), mtch.captures) if length(vals) == 6 y = Year(vals[1]) - m = Month(vals[2]) + mo = Month(vals[2]) d = Day(vals[3]) h = Hour(vals[4]) - m = Minute(vals[5]) + mi = Minute(vals[5]) s = Second(vals[6]) - compound = sum(y, m, d, h, m, s) - if length(compound.periods) == 1 + compound = sum((y, mo, d, h, mi, s)) + if length(compound.periods) == 0 + return nothing + elseif length(compound.periods) == 1 return compound.periods[1] else return compound diff --git a/src/sources/grd.jl b/src/sources/grd.jl index 83574b97d..04954b2af 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -38,8 +38,8 @@ end attrib(grd::GRDdataset) = grd.attrib filename(grd::GRDdataset) = grd.filename -filekey(grd::GRDdataset, key::NoKW) = get(attrib(grd), "layername", Symbol("")) -filekey(A::RasterDiskArray{GRDsource}, key) = filekey(A.attrib, key) +filekey(grd::GRDdataset, name::NoKW) = get(attrib(grd), "layername", Symbol("")) +filekey(A::RasterDiskArray{GRDsource}, name) = filekey(A.attrib, name) Base.eltype(::GRDdataset{T}) where T = T function Base.size(grd::GRDdataset) @@ -263,7 +263,7 @@ function Base.open(f::Function, A::FileArray{GRDsource}, args...; write=A.write) _mmapgrd(mm -> f(RasterDiskArray{GRDsource}(mm, A.eachchunk, A.haschunks)), A; write) end -function _open(f, ::GRDsource, filename::AbstractString; write=false, key=nothing) +function _open(f, ::GRDsource, filename::AbstractString; write=false, name=nokw, group=nokw) isfile(filename) || _filenotfound_error(filename) attr = GRDdataset(filename) _mmapgrd(attr; write) do mm diff --git a/src/sources/sources.jl b/src/sources/sources.jl index 5b0fe0295..5e52db2d3 100644 --- a/src/sources/sources.jl +++ b/src/sources/sources.jl @@ -3,7 +3,6 @@ abstract type Source end struct GRDsource <: Source end struct GDALsource <: Source end -struct SMAPsource <: Source end abstract type CDMsource <: Source end @@ -16,14 +15,12 @@ const NCDfile = NCDsource const GRIBfile = GRIBsource const GRDfile = GRDsource const GDALfile = GDALsource -const SMAPfile = SMAPsource const SYMBOL2SOURCE = Dict( :gdal => GDALsource(), :grd => GRDsource(), :netcdf => NCDsource(), :grib => GRIBsource(), - :smap => SMAPsource(), ) const SOURCE2SYMBOL = Dict(map(reverse, collect(pairs(SYMBOL2SOURCE)))) @@ -31,23 +28,21 @@ const SOURCE2SYMBOL = Dict(map(reverse, collect(pairs(SYMBOL2SOURCE)))) # File extensions. GDAL is the catch-all for everything else const SOURCE2EXT = Dict( GRDsource() => (".grd", ".gri"), - NCDsource() => (".nc",), + NCDsource() => (".nc", ".h5",), GRIBsource() => (".grib",), - SMAPsource() => (".h5",), ) const SOURCE2PACKAGENAME = Dict( GDALsource() => "ArchGDAL", NCDsource() => "NCDatasets", GRIBsource() => "GRIBDatasets", - SMAPsource() => "HDF5", ) const EXT2SOURCE = Dict( ".grd" => GRDsource(), ".gri" => GRDsource(), ".nc" => NCDsource(), + ".h5" => NCDsource(), ".grib" => GRIBsource(), - ".h5" => SMAPsource(), ) # exception to be raised when backend extension is not satisfied diff --git a/src/stack.jl b/src/stack.jl index a463ca2aa..64bf8047f 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -140,18 +140,12 @@ Load a file path or a `NamedTuple` of paths as a `RasterStack`, or convert argum - `name`: Used as stack layer names when a `Tuple`, `Vector` or splat of `Raster` is passed in. Has no effect when `NameTuple` is used - the `NamedTuple` keys are the layer names. +$GROUP_KEYWORD - `metadata`: A `Dict` or `DimensionalData.Metadata` object. - `missingval`: a single value for all layers or a `NamedTuple` of missingval for each layer. `nothing` specifies no missing value. -- `crs`: the coordinate reference system of the objects `XDim`/`YDim` dimensions. - Only set this if you know the detected crs is incrorrect, or it is not present in - the file. The `crs` is expected to be a GeoFormatTypes.jl `CRS` or `Mixed` mode `GeoFormat` object, - like `EPSG(4326)`. -- `mappedcrs`: the mapped coordinate reference system of the objects `XDim`/`YDim` dimensions. - for `Mapped` lookups these are the actual values of the index. For `Projected` lookups - this can be used to index in eg. `EPSG(4326)` lat/lon values, having it converted automatically. - Only set this if the detected `mappedcrs` in incorrect, or the file does not have a `mappedcrs`, - e.g. a tiff. The `mappedcrs` is expected to be a GeoFormatTypes.jl `CRS` or `Mixed` mode `GeoFormat` type. +$CONSTRUCTOR_CRS_KEYWORD +$CONSTRUCTOR_MAPPEDCRS_KEYWORD - `refdims`: `Tuple` of `Dimension` that the stack was sliced from. For when one or multiple filepaths are used: @@ -210,7 +204,7 @@ function RasterStack(data::Tuple{Vararg{<:AbstractArray}}, dims::Tuple; kw... ) isnothing(name) && throw(ArgumentError("pass a Tuple, Array or NamedTuple of names to the `name` keyword")) - return RasterStack(NamedTuple{cleankeys(name)}(data), dims; kw...) + return RasterStack(NamedTuple{cleankeys(name)}(data); dims, kw...) end # Multi Raster stack from NamedTuple of AbstractArray function RasterStack(data::NamedTuple{<:Any,<:Tuple{Vararg{<:AbstractArray}}}, dims::Tuple; kw...) @@ -273,7 +267,7 @@ function RasterStack(table, dims::Tuple; reshape(col, map(length, dims)) end |> NamedTuple{name} end - return RasterStack(layers, dims; kw...) + return RasterStack(layers; dims, kw...) end # Stack from a Raster function RasterStack(A::AbstractDimArray; @@ -333,26 +327,27 @@ function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; missingval=nokw, kw... ) where K - layers = map(keys(filenames), values(filenames)) do key, fn + layers = map(keys(filenames), values(filenames)) do name, fn source = _sourcetrait(fn, source) - _open(source, fn; key) do ds + _open(source, fn; name) do ds dims = _dims(ds, crs, mappedcrs) prod(map(length, dims)) data = if lazy - FileArray{typeof(source)}(ds, fn; key) + FileArray{typeof(source)}(ds, fn; name) else - _open(source, ds; key) do A + _open(source, ds; name) do A _checkmem(A) Array(A) end end md = _metadata(ds) missingval = missingval isa NoKW ? Rasters.missingval(ds) : missingval - raster = Raster(data, dims; name=key, metadata=md, missingval) + raster = Raster(data; dims, name, metadata=md, missingval) return dropband ? _drop_single_band(raster, lazy) : raster end end - RasterStack(NamedTuple{K}(layers); kw...) + # Try to keep the passed-in missingval as a single value + RasterStack(NamedTuple{K}(layers); missingval, kw...) end # Stack from a String function RasterStack(filename::AbstractString; @@ -360,6 +355,7 @@ function RasterStack(filename::AbstractString; dropband::Bool=true, source::Union{Symbol,Source,NoKW}=nokw, name=nokw, + group=nokw, kw... ) source = _sourcetrait(filename, source) @@ -380,7 +376,7 @@ function RasterStack(filename::AbstractString; # Load as a single file if haslayers(source) # With multiple named layers - l_st = _layer_stack(filename; source, name, lazy, kw...) + l_st = _layer_stack(filename; source, name, lazy, group, kw...) # Maybe split the stack into separate arrays to remove extra dims. if !(name isa NoKW) @@ -430,13 +426,14 @@ function Base.open(f::Function, st::AbstractRasterStack{<:NamedTuple}; kw...) end # Open all layers through nested closures, applying `f` to the rebuilt open stack -_open_layers(f, st) = _open_layers(f, st, DD.layers(f), NamedTuple()) +_open_layers(f, st) = _open_layers(f, st, DD.layers(st), NamedTuple()) function _open_layers(f, st, unopened::NamedTuple{K}, opened::NamedTuple) where K open(first(unopened)) do open_layer - _open_layers(f, st, Base.tail(unopened), merge(opened, NamedTuple{(first(K))}(open_layer))) + layer_nt = NamedTuple{(first(K),)}((open_layer,)) + _open_layers(f, st, Base.tail(unopened), merge(opened, layer_nt)) end end -function _open_layers(f, st, unopened::NamedTuple{()}, opened) +function _open_layers(f, st, unopened::NamedTuple{()}, opened::NamedTuple) f(rebuild(st; data=opened)) end @@ -445,6 +442,7 @@ function _layer_stack(filename; dims=nokw, refdims=(), name=nokw, + group=nokw, metadata=nokw, layerdims=nokw, layermetadata=nokw, @@ -455,7 +453,7 @@ function _layer_stack(filename; kw... ) data, field_kw = _open(filename; source) do ds - layers = _layers(ds, name) + layers = _layers(ds, name, group) # Create a Dict of dimkey => Dimension to use in `dim` and `layerdims` dimdict = _dimdict(ds, crs, mappedcrs) refdims = refdims == () || refdims isa Nothing ? () : refdims @@ -466,9 +464,14 @@ function _layer_stack(filename; missingval = missingval isa NoKW ? Rasters.missingval(ds) : missingval names = Tuple(map(Symbol, layers.names)) data = if lazy - FileStack{typeof(source)}(ds, filename; name=names, vars=Tuple(layers.vars)) + FileStack{typeof(source)}(ds, filename; name=names, group, vars=Tuple(layers.vars)) else - NamedTuple{names}(map(Array, layers.vars)) + arrays = map(layers.vars) do v + A = Array(v) + # Hack for NCDatasets.jl bug with zero dimensional arrays + A isa Array ? A : fill(A) + end + NamedTuple{names}(arrays) end data, (; dims, refdims, layerdims=NamedTuple{names}(layerdims), metadata, layermetadata=NamedTuple{names}(layermetadata), missingval) end diff --git a/test/runtests.jl b/test/runtests.jl index 9eeed5a57..548ada76b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -35,7 +35,6 @@ end # Only test SMAP locally for now, also RasterDataSources because CI dowloads keep breaking if !haskey(ENV, "CI") @time @safetestset "rasterdatasources" begin include("sources/rasterdatasources.jl") end - @time @safetestset "smap" begin include("sources/smap.jl") end end if !Sys.iswindows() diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 03ff079dc..11fd3fa91 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -45,7 +45,7 @@ stackkeys = ( end @testset "Raster" begin - @time ncarray = Raster(ncsingle); + @time ncarray = Raster(ncsingle) @time lazyarray = Raster(ncsingle; lazy=true) @time eagerarray = Raster(ncsingle; lazy=false) @@ -82,12 +82,16 @@ end @test all(A .=== A3) end - @testset "ignore empty variables" begin + @testset "handle empty variables" begin st = RasterStack((empty=view(ncarray, 1, 1, 1), full=ncarray)) - write("emptyval_test.nc", st) - rast = Raster("emptyval_test.nc") - @test name(rast) == :full - rm("emptyval_test.nc") + empty_test = tempname() * ".nc" + write(empty_test, st) + rast = Raster(empty_test) + st = RasterStack(empty_test) + @test name(rast) == name(st[:empty]) == :empty + @test size(rast) == size(st[:empty]) == () + @test all(st[:full] .=== ncarray) + st = RasterStack(empty_test; lazy=true) end @testset "array properties" begin @@ -200,7 +204,7 @@ end @testset "indexing with reverse lat" begin if !haskey(ENV, "CI") # CI downloads fail. But run locally ncrevlat = maybedownload("ftp://ftp.cdc.noaa.gov/Datasets/noaa.ersst.v5/sst.mon.ltm.1981-2010.nc") - ncrevlatarray = Raster(ncrevlat; key=:sst) + ncrevlatarray = Raster(ncrevlat; name=:sst) @test order(dims(ncrevlatarray, Y)) == ReverseOrdered() @test ncrevlatarray[Y(At(40)), X(At(100)), Ti(1)] === missing @test ncrevlatarray[Y(At(-40)), X(At(100)), Ti(1)] === ncrevlatarray[51, 65, 1] == 14.5916605f0 @@ -225,15 +229,15 @@ end end @testset "conversion to Raster" begin - geoA = ncarray[X(1:50), Y(20:20), Ti(1)] - @test size(geoA) == (50, 1) - @test eltype(geoA) <: Union{Missing,Float32} - @test geoA isa Raster{Union{Missing,Float32},2} - @test dims(geoA) isa Tuple{<:X,<:Y} - @test refdims(geoA) isa Tuple{<:Ti} - @test metadata(geoA) == metadata(ncarray) - @test ismissing(missingval(geoA)) - @test name(geoA) == :tos + ncslice = ncarray[X(1:50), Y(20:20), Ti(1)] + @test size(ncslice) == (50, 1) + @test eltype(ncslice) <: Union{Missing,Float32} + @test ncslice isa Raster{Union{Missing,Float32},2} + @test dims(ncslice) isa Tuple{<:X,<:Y} + @test refdims(ncslice) isa Tuple{<:Ti} + @test metadata(ncslice) == metadata(ncarray) + @test ismissing(missingval(ncslice)) + @test name(ncslice) == :tos end @testset "write" begin @@ -247,25 +251,25 @@ end # TODO better units and standard name handling end saved = Raster(filename) - @test size(saved) == size(geoA) - @test refdims(saved) == refdims(geoA) - @test missingval(saved) === missingval(geoA) + @test size(saved) == size(ncarray) + @test refdims(saved) == refdims(ncarray) + @test missingval(saved) === missingval(ncarray) @test map(metadata.(dims(saved)), metadata.(dims(Raster))) do s, g all(s .== g) end |> all - @test metadata(saved) == metadata(geoA) - @test_broken all(metadata(dims(saved))[2] == metadata.(dims(geoA))[2]) - @test Rasters.name(saved) == Rasters.name(geoA) - @test all(lookup.(dims(saved)) .== lookup.(dims(geoA))) - @test all(order.(dims(saved)) .== order.(dims(geoA))) - @test all(typeof.(span.(dims(saved))) .== typeof.(span.(dims(geoA)))) - @test all(val.(span.(dims(saved))) .== val.(span.(dims(geoA)))) - @test all(sampling.(dims(saved)) .== sampling.(dims(geoA))) - @test typeof(dims(saved)) <: typeof(dims(geoA)) - @test index(saved, 3) == index(geoA, 3) - @test all(val.(dims(saved)) .== val.(dims(geoA))) - @test all(parent(saved) .=== parent(geoA)) - @test saved isa typeof(geoA) + @test metadata(saved) == metadata(ncarray) + @test_broken all(metadata(dims(saved))[2] == metadata.(dims(ncarray))[2]) + @test Rasters.name(saved) == Rasters.name(ncarray) + @test all(lookup.(dims(saved)) .== lookup.(dims(ncarray))) + @test all(order.(dims(saved)) .== order.(dims(ncarray))) + @test all(typeof.(span.(dims(saved))) .== typeof.(span.(dims(ncarray)))) + @test all(val.(span.(dims(saved))) .== val.(span.(dims(ncarray)))) + @test all(sampling.(dims(saved)) .== sampling.(dims(ncarray))) + @test typeof(dims(saved)) <: typeof(dims(ncarray)) + @test index(saved, 3) == index(ncarray, 3) + @test all(val.(dims(saved)) .== val.(dims(ncarray))) + @test all(parent(saved) .=== parent(ncarray)) + @test saved isa typeof(ncarray) # TODO test crs @testset "chunks" begin @@ -357,12 +361,12 @@ end end @testset "no missing value" begin - no_ext = tempname() * ".nc" - write(no_ext, boolmask(ncarray) .* 1) - nomissing = Raster("nomissing.nc") + filename = tempname() * ".nc" + B = rebuild(boolmask(ncarray) .* 1; missingval=nothing) + write(filename, B) + nomissing = Raster(filename) @test missingval(nomissing) == nothing - rm("nomissing.nc") - @test name(ncarray) == :tos + @test eltype(nomissing) == Int64 end @testset "show" begin @@ -528,11 +532,10 @@ end @test geoseries isa RasterSeries{<:RasterStack} @test parent(geoseries) isa Vector{<:RasterStack} end - geoA = Raster(ncsingle; name=:tos) - @test all(read(ncseries[Ti(1)][:tos]) .=== read(geoA)) - using ProfileView, Profile + rast = Raster(ncsingle; name=:tos) + @test all(read(ncseries[Ti(1)][:tos]) .=== read(rast)) - @profview write("test.nc", ncseries) + write("test.nc", ncseries) @test isfile("test_1.nc") @test isfile("test_2.nc") @test (@allocations write("test.nc", ncseries)) < 1e4 # writing a rasterseries/stack has no force keyword @@ -541,4 +544,14 @@ end rm("test_2.nc") end +# Groups +if !haskey(ENV, "CI") + path = joinpath(testdir, "data/SMAP_L4_SM_gph_20160101T223000_Vv4011_001.h5") + stack = RasterStack(path; group="Geophysical_Data") + lazy_stack = RasterStack(path; group="Geophysical_Data", lazy=true) + rast = Raster(path; name=:surface_temp, group="Geophysical_Data") + lazy_rast = Raster(path; name=:surface_temp, group="Geophysical_Data", lazy=true) + @test all(stack[:surface_temp] .=== read(lazy_stack[:surface_temp]) .=== rast .=== read(lazy_rast)) +end + nothing diff --git a/test/sources/rasterdatasources.jl b/test/sources/rasterdatasources.jl index 1f038a8c6..8e5d46b95 100644 --- a/test/sources/rasterdatasources.jl +++ b/test/sources/rasterdatasources.jl @@ -44,13 +44,13 @@ end # Allow forcing keywords st = RasterStack(CHELSA{BioClim}, (1, 2); lazy=true, - missingval=-9999, + missingval=-Int16(9999), metadata=Rasters.NoMetadata(), crs=nothing, mappedcrs=EPSG(4326), ) - @test missingval(st) == -9999 - @test missingval(st.bio1) == -9999 + @test missingval(st) === Int16(-9999) + @test missingval(st.bio1) == Int16(-9999) @test metadata(st) == Rasters.NoMetadata() end @@ -69,20 +69,20 @@ end A[Y(Between(-10, -45)), X(Between(110, 160))] @test A isa Raster st = RasterStack(EarthEnv{LandCover}, (:evergreen_broadleaf_trees, :deciduous_broadleaf_trees); mappedcrs=EPSG(4326)) - keys(st) = (:evergreen_broadleaf_trees, :deciduous_broadleaf_trees) + @test keys(st) == (:evergreen_broadleaf_trees, :deciduous_broadleaf_trees) @test st isa RasterStack end @testset "load ALWB" begin - A = Raster(ALWB{Deciles,Day}, :rain_day; date=DateTime(2019, 10, 19)) + A = Raster(ALWB{Deciles,Day}, :rain_day; date=DateTime(2019, 10, 19), lazy=true) @test crs(A) == EPSG(4326) - A = Raster(ALWB{Values,Day}, :ss_pct; date=DateTime(2019, 10, 19)) + A = Raster(ALWB{Values,Day}, :ss_pct; date=DateTime(2019, 10, 19), lazy=true) @test crs(A) == EPSG(4326) - st = RasterStack(ALWB{Values,Day}, (:s0_pct, :ss_pct); date=DateTime(2019, 10, 19)) + st = RasterStack(ALWB{Values,Day}, (:s0_pct, :ss_pct); date=DateTime(2019, 10, 19), lazy=true) @test crs(st) == EPSG(4326) @test crs(st[:s0_pct]) == EPSG(4326) dates = DateTime(2019, 10, 19), DateTime(2021, 11, 20) - s = RasterSeries(ALWB{Values,Day}, (:s0_pct, :ss_pct); date=dates) + s = RasterSeries(ALWB{Values,Day}, (:s0_pct, :ss_pct); date=dates, lazy=true) s[1] @test A isa Raster @test st isa RasterStack @@ -92,17 +92,17 @@ end # Obscure .Z format may not work on windows if Sys.islinux() @testset "load AWAP" begin - A = Raster(AWAP, :rainfall; date=DateTime(2019, 10, 19)) + A = Raster(AWAP, :rainfall; date=DateTime(2019, 10, 19), lazy=true) @test crs(A) == EPSG(4326) # ALWB :solar has a broken index - the size is different and the # points dont exactly match the other layers. # Need to work out how to best resolve this kind of problem so that we can # still use the layers in stacks. layers = (:rainfall, :vprpress09, :vprpress15, :tmin, :tmax) - st = RasterStack(AWAP, layers; date=DateTime(2019, 10, 19), resize=crop) + st = RasterStack(AWAP, layers; date=DateTime(2019, 10, 19), resize=crop, lazy=true) @test crs(st) == EPSG(4326) dates = DateTime(2019, 09, 19), DateTime(2019, 11, 19) - s = RasterSeries(AWAP, layers; date=dates, resize=crop) + s = RasterSeries(AWAP, layers; date=dates, resize=crop, lazy=true) # test date as an Array s2 = RasterSeries(AWAP, layers; date=[dates...], resize=crop) # s = RasterSeries(AWAP; date=dates, resize=resample, crs=EPSG(4326)) TODO: all the same diff --git a/test/sources/smap.jl b/test/sources/smap.jl deleted file mode 100644 index 1f63be69e..000000000 --- a/test/sources/smap.jl +++ /dev/null @@ -1,295 +0,0 @@ -using Rasters, Test, Statistics, Dates, Plots -using Rasters.Lookups, Rasters.Dimensions -import ArchGDAL, NCDatasets, HDF5, CFTime -using Rasters: layerkeys, SMAPsource, FileArray - -Ext = Base.get_extension(Rasters, :RastersHDF5Ext) - -testpath = joinpath(dirname(pathof(Rasters)), "../test/") -include(joinpath(testpath, "test_utils.jl")) - -# TODO example files without a login requirement -path1 = joinpath(testpath, "data/SMAP_L4_SM_gph_20160101T223000_Vv4011_001.h5") -path2 = joinpath(testpath, "data/SMAP_L4_SM_gph_20160102T223000_Vv4011_001.h5") - -smapkeys = ( - :baseflow_flux, :heat_flux_ground, :heat_flux_latent, :heat_flux_sensible, - :height_lowatmmodlay, :land_evapotranspiration_flux, :land_fraction_saturated, - :land_fraction_snow_covered, :land_fraction_unsaturated, :land_fraction_wilting, - :leaf_area_index, :net_downward_longwave_flux, :net_downward_shortwave_flux, - :overland_runoff_flux, :precipitation_total_surface_flux, :radiation_longwave_absorbed_flux, - :radiation_shortwave_downward_flux, :sm_profile, :sm_profile_pctl, :sm_profile_wetness, - :sm_rootzone, :sm_rootzone_pctl, :sm_rootzone_wetness, :sm_surface, :sm_surface_wetness, - :snow_depth, :snow_mass, :snow_melt_flux, :snowfall_surface_flux, :soil_temp_layer1, - :soil_temp_layer2, :soil_temp_layer3, :soil_temp_layer4, :soil_temp_layer5, :soil_temp_layer6, - :soil_water_infiltration_flux, :specific_humidity_lowatmmodlay, :surface_pressure, :surface_temp, - :temp_lowatmmodlay, :vegetation_greenness_fraction, :windspeed_lowatmmodlay -) - -if isfile(path1) && isfile(path2) - # We need to wrap HDF5 for SMAP, as h5 file may not be SMAP files - @testset "SMAPhdf5 wrapper" begin - HDF5.h5open(path1) do f - ds= Ext.SMAPhdf5(f) - @test keys(ds) == layerkeys(ds) == smapkeys - @test dims(ds) isa Tuple{<:X,<:Y} - end - end - - @testset "Raster" begin - @time smaparray = Raster(path1) - @test_throws ArgumentError Raster("notafile.h5") - - @testset "lazyness" begin - @time read(Raster(path1)); - @time lazyarray = Raster(path1; lazy=true); - @time eagerarray = Raster(path1; lazy=false); - # Eager is the default - @test parent(smaparray) isa Array - @test parent(lazyarray) isa FileArray - @test parent(eagerarray) isa Array - end - - @testset "open" begin - @test all(open(A -> A[Y=1], smaparray) .=== smaparray[:, 1]) - end - - @testset "read" begin - @time A = read(smaparray); - @test A isa Raster - @test parent(A) isa Array - A2 = zero(A) - read!(smaparray, A2) - A3 = zero(A) - @time read!(path1, A3); - @test A == A2 == A3 - end - - @testset "array properties" begin - @test smaparray isa Raster - end - - @testset "dimensions" begin - @test ndims(smaparray) == 2 - HDF5.h5open(path1) do ds - @test size(smaparray) == length.(dims(smaparray)) == size(ds["Geophysical_Data/baseflow_flux"]) - end - @test dims(smaparray) isa Tuple{<:X,<:Y} - @test refdims(smaparray) == () - @test sampling(smaparray) == (Intervals(Center()), Intervals(Center())) - @test span(smaparray) == (Irregular((-180.0f0, 180.0f0)), Irregular((-85.04456f0, 85.04456f0))) - @test bounds(smaparray) == ((-180.0f0, 180.0f0), (-85.04456f0, 85.04456f0)) - end - - @testset "other fields" begin @test missingval(smaparray) == -9999.0 - @test metadata(smaparray) isa Metadata{SMAPsource,Dict{String,Any}} - @test name(smaparray) == :baseflow_flux - end - - @testset "indexing" begin - @test smaparray[Ti(1)] isa Raster{<:Any,2} - @test smaparray[Y(1), Ti(1)] isa Raster{<:Any,1} - @test smaparray[X(1), Ti(1)] isa Raster{<:Any,1} - @test smaparray[X(1), Y(1)] == -9999.0 - @test smaparray[X(30), Y(30)] isa Float32 - # Russia - @test smaparray[X(50), Y(100)] == -9999.0 - # Alaska - @test smaparray[Y(Near(64.2008)), X(Near(149.4937))] == 0.0f0 - end - - @testset "selectors" begin - a = smaparray[X(Near(21.0)), Y(Between(50, 52))] - @test bounds(a) == ((50.08451f0, 51.977905f0),) - x = smaparray[X(Near(150)), Y(Near(30))] - @test x isa Float32 - dimz = X(Between(-180.0, 180)), Y(Between(-90, 90)) - @test size(smaparray[dimz...]) == (3856, 1624) - @test index(smaparray[dimz...]) == index(smaparray) - nca = smaparray[Y(Between(-80, -25)), X(Between(0, 180))] - end - - @testset "methods" begin - @test all(mean(smaparray; dims=Y) .=== mean(parent(smaparray); dims=2)) - @testset "trim, crop, extend" begin - a = read(smaparray) - a[X(1:20)] .= missingval(a) - trimmed = trim(a) - @test size(trimmed) == (3836, 1512) - cropped = crop(a; to=trimmed) - dims(trimmed, X) - dims(cropped, X) - @test size(cropped) == (3836, 1512) - @test all(collect(cropped .=== trimmed)) - dims(cropped) - extended = extend(cropped; to=a) - @test all(collect(extended .=== a)) - end - @testset "slice" begin - ser = Rasters.slice(smaparray, X) - @test ser isa RasterSeries - @test size(ser) == (3856,) - @test index(ser, X) == index(smaparray, X) - @test bounds(ser, X) == (-180.0f0, 180.0f0) - A = ser[1] - @test bounds(A, Y) == (-85.04456f0, 85.04456f0) - end - end - - @testset "save" begin - @testset "to grd" begin - # TODO save and load subset - geoA = read(smaparray) - @test size(geoA) == size(smaparray) - filename = tempname() * ".grd" - write(filename, geoA) - saved = read(Raster(filename; mappedcrs=EPSG(4326))[Band(1)]) - dims(saved) - @test size(saved) == size(geoA) - @test missingval(saved) === missingval(geoA) - @test map(metadata.(dims(saved)), metadata.(dims(Raster))) do s, g - all(s .== g) - end |> all - @test Rasters.name(saved) == Rasters.name(geoA) - @test all(lookup.(dims(saved)) .!= lookup.(dims(geoA))) - @test all( - mappedbounds(saved)[1] - .≈ - bounds(geoA)[1]) - @test all(mappedbounds(saved)[2] .≈ mappedbounds(geoA)[2]) - bounds(saved) - @test all(parent(saved) .=== parent(geoA)) - end - @testset "to gdal" begin - gdalfilename = tempname() * ".tif" - @time write(gdalfilename, read(smaparray)) - gdalarray = Raster(gdalfilename; mappedcrs=EPSG(4326)) - # These come out with slightly different format - # @test convert(ProjString, crs(gdalarray)) == crs(smaparray) - @test all(map((a, b) -> all(a .≈ b), mappedbounds(dims(gdalarray)) , bounds(smaparray))) - # Tiff locus = Start, SMAP locus = Center - @test mappedindex(DimensionalData.shiftlocus(Center(), dims(gdalarray, Y))) ≈ index(smaparray, Y) - @test mappedindex(DimensionalData.shiftlocus(Center(), dims(gdalarray, X))) ≈ index(smaparray, X) - @test all(gdalarray .== read(smaparray)) - end - @testset "to netcdf" begin - ncdfilename = tempname() * ".nc" - @time write(ncdfilename, smaparray) - reorder(smaparray, ForwardOrdered()) - saved = Raster(ncdfilename) - @test_broken bounds(saved) == bounds(smaparray) - @test index(saved, Y) == index(smaparray, Y) - @test index(saved, X) == index(smaparray, X) - end - end - - @testset "show" begin - sh = sprint(show, MIME("text/plain"), smaparray) - @test occursin("Raster", sh) - @test occursin("Y", sh) - @test occursin("X", sh) - end - - @testset "plot" begin - smaparray[Ti(1:3:12)] |> plot # FIXME plot is weird - smaparray[Ti(1)] |> plot - smaparray[Y(100), Ti(1)] |> plot - end - - end - - @testset "stack" begin - @time smapstack = RasterStack(path1) - - @testset "lazyness" begin - @time read(RasterStack(path1)); - @time lazystack = RasterStack(path1; lazy=true) - @time eagerstack = RasterStack(path1; lazy=false); - # Lazy is the default - @test parent(smapstack[:snow_mass]) isa Array - @test parent(lazystack[:snow_mass]) isa FileArray - @test parent(eagerstack[:snow_mass]) isa Array - end - - @testset "read" begin - @time st = read(smapstack); - @test st isa RasterStack - @test parent(st) isa NamedTuple - @test first(parent(st)) isa Array - st2 = map(a -> a .* 0, st) - @time read!(smapstack, st2); - st3 = map(a -> a .* 0, st) - @time st = read!(path1, st3); - @test all(map((a, b, c) -> all(a .== b .== c), st, st2, st3)) - end - - @testset "conversion to Raster" begin - smaparray = smapstack[:soil_temp_layer1]; - @test smaparray isa Raster{Float32,2} - @test dims(smaparray) isa Tuple{<:X{<:Mapped{Float32}}, <:Y{<:Mapped{Float32}}} - @test span(smaparray) isa Tuple{Irregular{Tuple{Float32,Float32}},Irregular{Tuple{Float32,Float32}}} - @test span(smaparray) == (Irregular((-180.0f0, 180.0f0)), Irregular((-85.04456f0, 85.04456f0))) - @test bounds(smaparray) == ((-180.0f0, 180.0f0), (-85.04456f0, 85.04456f0)) - @test index(smaparray) isa Tuple{Vector{Float32},Vector{Float32}} - @test missingval(smaparray) == -9999.0 - @test smaparray[1, 1] == -9999.0 - @test name(smaparray) == :soil_temp_layer1 - dt = DateTime(2016, 1, 1, 22, 30) - step_ = Hour(3) - # Currently empty - @test metadata(smaparray) isa Metadata{SMAPsource,Dict{String,Any}} - end - - @testset "conversion to regular RasterStack" begin - # Stack Constructors - geostack = RasterStack(smapstack) - @test parent(geostack) isa NamedTuple; - @test Symbol.(Tuple(keys(smapstack))) == keys(geostack) - geostack = RasterStack(smapstack; keys=(:baseflow_flux, :snow_mass, :soil_temp_layer1)) - @test keys(geostack) == (:baseflow_flux, :snow_mass, :soil_temp_layer1) - end - - if VERSION > v"1.1-" - @testset "copy" begin - geoA = zero(smapstack[:soil_temp_layer1]) - @test geoA isa Raster - @test geoA != smapstack[:soil_temp_layer1] - copy!(geoA, smapstack, :soil_temp_layer1) - @test geoA == smapstack[:soil_temp_layer1] - end - end - - @testset "show" begin - sh1 = sprint(show, MIME("text/plain"), smapstack[:soil_temp_layer1]) - @test occursin("Raster", sh1) - @test occursin("Y", sh1) - @test occursin("X", sh1) - end - - end - - @testset "series" begin - ser = smapseries([path1, path2]) - val.(dims(ser)) - @test ser[1] isa RasterStack - @test first(bounds(ser, Ti)) == DateTime(2016, 1, 1, 22, 30) - @test last(bounds(ser, Ti)) == DateTime(2016, 1, 3, 1, 30) - @time modified_series = modify(Array, ser) - stackkeys = keys(modified_series[1]) - @test typeof(modified_series) <: RasterSeries{<:RasterStack{<:NamedTuple{stackkeys,<:Tuple{<:Array{Float32,2,},Vararg}}}} - @testset "read" begin - # FIXME: uses too much memory - # Seems to be a HDF5.jl memory leak? - # @time geoseries = read(ser) - # @test geoseries isa RasterSeries{<:RasterStack} - # @test geoseries.data isa Vector{<:RasterStack} - # @test first(geoseries.data[1].data) isa Array - end - - @testset "show" begin - sh = sprint(show, MIME("text/plain"), ser) - # Test but don't lock this down too much - @test occursin("RasterSeries", sh) - end - end -end From 702d4755b5608507fd8c35e3bafc9b52f2320644 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Wed, 17 Apr 2024 01:12:15 +0200 Subject: [PATCH 11/13] reorganise nokw and bugfix --- src/Rasters.jl | 5 +-- src/array.jl | 10 ++--- src/filearray.jl | 4 +- src/lookup.jl | 4 +- src/nokw.jl | 7 ++++ src/sources/commondatamodel.jl | 10 ++--- src/stack.jl | 74 ++++++++++++++++++---------------- test/sources/gdal.jl | 12 ++++++ test/sources/ncdatasets.jl | 1 + 9 files changed, 75 insertions(+), 52 deletions(-) create mode 100644 src/nokw.jl diff --git a/src/Rasters.jl b/src/Rasters.jl index 2e3293c09..053a08a32 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -89,10 +89,7 @@ const EXPERIMENTAL = """ const DEFAULT_POINT_ORDER = (X(), Y()) const DEFAULT_TABLE_DIM_KEYS = (:X, :Y) -# To use when `nothing` is a valid keyword value -struct NoKW end -const nokw = NoKW() - +include("nokw.jl") include("methods/shared_docstrings.jl") include("lookup.jl") include("dimensions.jl") diff --git a/src/array.jl b/src/array.jl index fffaf3f37..a0e140282 100644 --- a/src/array.jl +++ b/src/array.jl @@ -253,8 +253,8 @@ function Raster(A::AbstractArray{T,N}, dims::Tuple; mappedcrs=nokw )::Raster{T,N} where {T,N} A = Raster(A, Dimensions.format(dims, A), refdims, name, metadata, missingval) - A = crs isa NoKW ? A : setcrs(A, crs) - A = mappedcrs isa NoKW ? A : setmappedcrs(A, mappedcrs) + A = isnokw(crs) ? A : setcrs(A, crs) + A = isnokw(mappedcrs) ? A : setmappedcrs(A, mappedcrs) return A end function Raster(A::AbstractArray{T,1}, dims::Tuple{<:Dimension,<:Dimension,Vararg}; @@ -267,7 +267,7 @@ function Raster(table, dims::Tuple; kw... ) Tables.istable(table) || throw(ArgumentError("First argument to `Raster` is not a table or other known object: $table")) - name = name isa NoKW ? first(_not_a_dimcol(table, dims)) : name + name = isnokw(name) ? first(_not_a_dimcol(table, dims)) : name cols = Tables.columns(table) A = reshape(cols[name], map(length, dims)) return Raster(A, dims; name, kw...) @@ -318,7 +318,7 @@ function Raster(ds, filename::AbstractString; name1 = filekey(ds, name) source = _sourcetrait(filename, source) data1, dims1, metadata1, missingval1 = _open(source, ds; name=name1, group) do var - metadata1 = metadata isa NoKW ? _metadata(var) : metadata + metadata1 = isnokw(metadata) ? _metadata(var) : metadata missingval1 = _check_missingval(var, missingval) replace_missing1 = replace_missing && !isnothing(missingval1) missingval2 = replace_missing1 ? missing : missingval1 @@ -329,7 +329,7 @@ function Raster(ds, filename::AbstractString; _checkmem(var) Array(replace_missing1 ? _replace_missing(var, missingval1) : var) end - dims1 = dims isa NoKW ? _dims(var, crs, mappedcrs) : format(dims, data) + dims1 = isnokw(dims) ? _dims(var, crs, mappedcrs) : format(dims, data) data, dims1, metadata1, missingval2 end raster = Raster(data1, dims1, refdims, Symbol(name1), metadata1, missingval1) diff --git a/src/filearray.jl b/src/filearray.jl index 157135464..500e2e151 100644 --- a/src/filearray.jl +++ b/src/filearray.jl @@ -29,8 +29,8 @@ end function FileArray{S,T,N}(filename::String, size::Tuple; name=nokw, group=nokw, eachchunk=size, haschunks=DA.Unchunked(), write=false ) where {S,T,N} - name = name isa NoKW ? nothing : name - group = group isa NoKW ? nothing : group + name = isnokw(name) ? nothing : name + group = isnokw(group) ? nothing : group FileArray{S,T,N}(filename, size, name, group, eachchunk, haschunks, write) end diff --git a/src/lookup.jl b/src/lookup.jl index b350bb801..820515f78 100644 --- a/src/lookup.jl +++ b/src/lookup.jl @@ -80,8 +80,8 @@ function Projected(l::Sampled; mappedcrs::Union{GeoFormat,NoKW,Nothing}=nokw, dim=AutoDim() ) - crs = crs isa NoKW ? nothing : crs - mappedcrs = mappedcrs isa NoKW ? nothing : mappedcrs + crs = isnokw(crs) ? nothing : crs + mappedcrs = isnokw(mappedcrs) ? nothing : mappedcrs Projected(parent(l), order, span, sampling, metadata, crs, mappedcrs, dim) end diff --git a/src/nokw.jl b/src/nokw.jl new file mode 100644 index 000000000..7a6a4743f --- /dev/null +++ b/src/nokw.jl @@ -0,0 +1,7 @@ +# Often we need to distinguish between `nothing` and no user input, +# So that we can use automatic checks if the user didn't pass a keyword. +# NoKW is entirely for that. It should never be used from outside of the package. +struct NoKW end +const nokw = NoKW() +@inline isnokw(::NoKW) = true +@inline isnokw(_) = false diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index d704d6ba3..f82a3a3d5 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -110,7 +110,7 @@ function FileStack{source}( layersizes = map(size, vars) eachchunk = map(_get_eachchunk, vars) haschunks = map(_get_haschunks, vars) - group = group isa NoKW ? nothing : group + group = isnokw(group) ? nothing : group return FileStack{source,name}(filename, layertypes, layersizes, group, eachchunk, haschunks, write) end @@ -122,7 +122,7 @@ end function _open(f, ::CDMsource, ds::AbstractDataset; name=nokw, group=nothing, kw...) g = _getgroup(ds, group) - x = name isa NoKW ? g : CFDiskArray(g[_firstname(ds, name)]) + x = isnokw(name) ? g : CFDiskArray(g[_firstname(ds, name)]) cleanreturn(f(x)) end _open(f, ::CDMsource, var::CFDiskArray; kw...) = cleanreturn(f(var)) @@ -340,11 +340,11 @@ function _cdmlookup( span, sampling = _cdmspan(index, order), Points() end # We cant yet check CF standards crs, but we can at least check for units in lat/lon - if mappedcrs isa NoKW && get(metadata, "units", "") in ("degrees_north", "degrees_east") + if isnokw(mappedcrs) && get(metadata, "units", "") in ("degrees_north", "degrees_east") mappedcrs = EPSG(4326) end # Additionally, crs and mappedcrs should be identical for Regular lookups - if crs isa NoKW span isa Regular + if isnokw(crs) && span isa Regular crs = mappedcrs end return _cdmlookup(D, index, order, span, sampling, metadata, crs, mappedcrs) @@ -355,7 +355,7 @@ function _cdmlookup( ) # If the index is regularly spaced and there is no crs # then there is probably just one crs - the mappedcrs - crs = if crs isa NoKW && span isa Regular + crs = if isnokw(crs) && span isa Regular mappedcrs else crs diff --git a/src/stack.jl b/src/stack.jl index 64bf8047f..4e571fcb2 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -85,10 +85,10 @@ function DD.rebuild_from_arrays( missingval=map(missingval, das), ) data = map(parent, das) - if dims isa Nothing + if isnothing(dims) # invokelatest avoids compiling this for other paths - Base.invokelatest() do - dims = DD.combinedims(collect(das)) + dims = Base.invokelatest() do + DD.combinedims(collect(das)) end rebuild(s; data, dims, refdims, layerdims, metadata, layermetadata, missingval) else @@ -177,34 +177,27 @@ struct RasterStack{L<:Union{FileStack,OpenStack,NamedTuple},D<:Tuple,R<:Tuple,LD end # Rebuild from internals function RasterStack( - data::Union{FileStack,OpenStack,NamedTuple{<:Any,<:Tuple{Vararg{<:AbstractArray}}}}; + data::Union{FileStack,OpenStack}; dims::Tuple, refdims::Tuple=(), layerdims, metadata=NoMetadata(), layermetadata, missingval, - crs=nokw, - mappedcrs=nokw, - name=nokw, - resize=nokw, + kw... ) st = RasterStack( data, dims, refdims, layerdims, metadata, layermetadata, missingval ) - dims = format(dims, CartesianIndices(st)) - dims = crs isa NoKW ? dims : setcrs(dims, crs) - dims = mappedcrs isa NoKW ? dims : setmappedcrs(dims, mappedcrs) - st = rebuild(st; dims) - return name isa NoKW ? st : st[Dimensions._astuple(name)] + return _postprocess_stack(st, dims; kw...) end # Convert Tuple/Array of array to NamedTuples using name/key function RasterStack(data::Tuple{Vararg{<:AbstractArray}}, dims::Tuple; - name::Union{Tuple,AbstractArray,NamedTuple,Nothing}=nothing, + name::Union{Tuple,AbstractArray,NamedTuple,Nothing}=nokw, kw... ) - isnothing(name) && throw(ArgumentError("pass a Tuple, Array or NamedTuple of names to the `name` keyword")) - return RasterStack(NamedTuple{cleankeys(name)}(data); dims, kw...) + isnokw(name) && throw(ArgumentError("Pass a Tuple, Array or NamedTuple of names to the `name` keyword")) + return RasterStack(NamedTuple{cleankeys(name)}(data), dims; kw...) end # Multi Raster stack from NamedTuple of AbstractArray function RasterStack(data::NamedTuple{<:Any,<:Tuple{Vararg{<:AbstractArray}}}, dims::Tuple; kw...) @@ -246,11 +239,12 @@ function RasterStack(layers::NamedTuple{K,<:Tuple{Vararg{<:AbstractDimArray}}}; else throw(ArgumentError("$layermetadata is not a valid input for `layermetadata`. Try a `NamedTuple` of `Dict`, `MetaData` or `NoMetadata`")) end - missingval = missingval isa NoKW ? nothing : missingval + missingval = isnokw(missingval) ? nothing : missingval data = map(parent, _layers) - return RasterStack( - data; dims, refdims, layerdims, metadata, layermetadata, missingval, kw... + st = RasterStack( + data, dims, refdims, layerdims, metadata, layermetadata, missingval ) + return _postprocess_stack(st, dims; kw...) end # Stack from table and dims args function RasterStack(table, dims::Tuple; @@ -267,7 +261,7 @@ function RasterStack(table, dims::Tuple; reshape(col, map(length, dims)) end |> NamedTuple{name} end - return RasterStack(layers; dims, kw...) + return RasterStack(layers, dims; kw...) end # Stack from a Raster function RasterStack(A::AbstractDimArray; @@ -279,15 +273,15 @@ function RasterStack(A::AbstractDimArray; kw... ) name = name isa Union{AbstractString,Symbol,Name} ? (name,) : name - layers = if layersfrom isa NoKW - name = if name isa NoKW + layers = if isnokw(layersfrom) + name = if isnokw(name) name = DD.name(A) in (NoName(), Symbol(""), Name(Symbol(""))) ? ("layer1",) : DD.name(A) else name end NamedTuple{cleankeys(name)}((A,)) else - name = name isa NoKW ? _layerkeysfromdim(A, layersfrom) : name + name = isnokw(name) ? _layerkeysfromdim(A, layersfrom) : name slices = slice(A, layersfrom) NamedTuple{cleankeys(name)}(slices) end @@ -297,7 +291,7 @@ end RasterStack(st::DD.AbstractDimStack, dims::Tuple; kw...) = RasterStack(st; dims, kw...) # RasterStack from another stack function RasterStack(s::DD.AbstractDimStack; - data::NamedTuple=parent(s), + data=parent(s), dims::Union{Tuple,NoKW}=dims(s), refdims::Tuple=refdims(s), layerdims=DD.layerdims(s), @@ -307,7 +301,7 @@ function RasterStack(s::DD.AbstractDimStack; kw... ) return RasterStack( - data; dims, refdims, layerdims, metadata, layermetadata, missingval, kw... + data, dims; refdims, layerdims, metadata, layermetadata, missingval, kw... ) end # Multi-file stack from strings @@ -341,7 +335,7 @@ function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; end end md = _metadata(ds) - missingval = missingval isa NoKW ? Rasters.missingval(ds) : missingval + missingval = isnokw(missingval) ? Rasters.missingval(ds) : missingval raster = Raster(data; dims, name, metadata=md, missingval) return dropband ? _drop_single_band(raster, lazy) : raster end @@ -364,7 +358,7 @@ function RasterStack(filename::AbstractString; filenames = readdir(filename) length(filenames) > 0 || throw(ArgumentError("No files in directory $filename")) # Detect keys from names - name = if name isa NoKW + name = if isnokw(name) all_shared = true stripped = lstrip.(x -> x in (" ", "_"), (x -> x[1:end]).(filenames)) Symbol.(replace.(first.(splitext.(stripped)), Ref(" " => "_"))) @@ -379,7 +373,7 @@ function RasterStack(filename::AbstractString; l_st = _layer_stack(filename; source, name, lazy, group, kw...) # Maybe split the stack into separate arrays to remove extra dims. - if !(name isa NoKW) + if !isnokw(name) map(identity, l_st) else l_st @@ -402,6 +396,18 @@ function RasterStack(filename::AbstractString; end end +function _postprocess_stack(st, dims; + crs=nokw, + mappedcrs=nokw, + name=nokw, +) + dims = format(dims, CartesianIndices(st)) + dims = isnokw(crs) ? dims : setcrs(dims, crs) + dims = isnokw(mappedcrs) ? dims : setmappedcrs(dims, mappedcrs) + st = rebuild(st; dims) + return isnokw(name) ? st : st[Dimensions._astuple(name)] +end + function DD.modify(f, s::AbstractRasterStack{<:FileStack{<:Any,K}}) where K open(s) do o map(K) do k @@ -456,12 +462,12 @@ function _layer_stack(filename; layers = _layers(ds, name, group) # Create a Dict of dimkey => Dimension to use in `dim` and `layerdims` dimdict = _dimdict(ds, crs, mappedcrs) - refdims = refdims == () || refdims isa Nothing ? () : refdims - metadata = metadata isa NoKW ? _metadata(ds) : metadata - layerdims = layerdims isa NoKW ? _layerdims(ds; layers, dimdict) : layerdims - dims = _sort_by_layerdims(dims isa NoKW ? _dims(ds, dimdict) : dims, layerdims) - layermetadata = layermetadata isa NoKW ? _layermetadata(ds; layers) : layermetadata - missingval = missingval isa NoKW ? Rasters.missingval(ds) : missingval + refdims = isnokw(refdims) || isnothing(refdims) ? () : refdims + metadata = isnokw(metadata) ? _metadata(ds) : metadata + layerdims = isnokw(layerdims) ? _layerdims(ds; layers, dimdict) : layerdims + dims = _sort_by_layerdims(isnokw(dims) ? _dims(ds, dimdict) : dims, layerdims) + layermetadata = isnokw(layermetadata) ? _layermetadata(ds; layers) : layermetadata + missingval = isnokw(missingval) ? Rasters.missingval(ds) : missingval names = Tuple(map(Symbol, layers.names)) data = if lazy FileStack{typeof(source)}(ds, filename; name=names, group, vars=Tuple(layers.vars)) diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index 5c82bffb3..49eedf233 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -697,6 +697,7 @@ end @testset "write multiple files" begin filename = tempname() * ".tif" write(filename, gdalstack) + write(filename, gdalstack; force=true) base, ext = splitext(filename) filename_b = string(base, "_b", ext) saved = read(Raster(filename_b)) @@ -706,6 +707,7 @@ end @testset "write multiple files with custom suffix" begin filename = tempname() * ".tif" write(filename, gdalstack; suffix=("_first", "_second")) + write(filename, gdalstack; suffix=("_first", "_second")) base, ext = splitext(filename) filename_b = string(base, "_second", ext) saved = read(Raster(filename_b)) @@ -715,11 +717,21 @@ end @testset "write netcdf" begin filename = tempname() * ".nc" write(filename, gdalstack); + # Test forcing + write(filename, gdalstack; force=true); saved = RasterStack(filename); @test all(read(saved[:a]) .== geoA) rm(filename) end + @testset "chunks" begin + filename = tempname() * ".tiff" + write(filename, gdalstack; chunks=(128, 128)) + filenames = write(filename, gdalstack; force=true, chunks=(128, 128)) + gdalstack2 = RasterStack(filenames; lazy=true) + @test DiskArrays.eachchunk(gdalstack2[:b])[1] == (1:128, 1:128) + end + end @testset "show" begin diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 11fd3fa91..6bcd31e33 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -509,6 +509,7 @@ end @test (@allocations write(filename, st)) < 1e6 # writing a rasterseries/stack has no force keyword end + @testset "show" begin ncstack = view(RasterStack(ncmulti), X(7:99), Y(3:90)); sh = sprint(show, MIME("text/plain"), ncstack) From 833dc7b5364c7fdce8276d3c8eba2ef94696b33e Mon Sep 17 00:00:00 2001 From: rafaqz Date: Thu, 25 Apr 2024 02:01:36 +0200 Subject: [PATCH 12/13] bugfixes and Regular not Explicit when possible --- src/methods/reproject.jl | 13 +------------ src/sources/commondatamodel.jl | 30 ++++++++++++++++++++---------- src/stack.jl | 18 +++++++++++++----- test/sources/gdal.jl | 1 - test/sources/ncdatasets.jl | 26 +++++++++++--------------- 5 files changed, 45 insertions(+), 43 deletions(-) diff --git a/src/methods/reproject.jl b/src/methods/reproject.jl index 81bcb422d..029966ff8 100644 --- a/src/methods/reproject.jl +++ b/src/methods/reproject.jl @@ -29,7 +29,7 @@ function reproject(target::GeoFormat, l::AbstractProjected) source = crs(l) newdata = reproject(source, target, l.dim, parent(l)) newlookup = rebuild(l; data=newdata, crs=target) - if isregular(newdata) + if checkregular(newdata) return set(newlookup, Regular(stepof(newdata))) else newbounds = reproject(crs(l), target, l.dim, bounds(l)) @@ -79,14 +79,3 @@ end # Guess the step for arrays stepof(A::AbstractArray) = (last(A) - first(A)) / (length(A) - 1) stepof(A::AbstractRange) = step(A) - -isregular(A::AbstractRange) = true -function isregular(A::AbstractArray) - step = stepof(A) - for i in eachindex(A)[2:end] - if !(A[i] - A[i-1] ≈ step) - return false - end - end - return true -end diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index f82a3a3d5..e6536851d 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -330,15 +330,25 @@ function _cdmlookup( order = LA.orderof(index) var = ds[dimname] # Detect lat/lon - if haskey(CDM.attribs(var), "bounds") - boundskey = var.attrib["bounds"] - boundsmatrix = Array(ds[boundskey]) - span, sampling = Explicit(boundsmatrix), Intervals(Center()) - elseif eltype(index) <: Union{Missing,Dates.AbstractTime} - span, sampling = _cdmperiod(index, metadata) + span, sampling = if eltype(index) <: Union{Missing,Dates.AbstractTime} + _cdmperiod(index, metadata) else - span, sampling = _cdmspan(index, order), Points() + _cdmspan(index, order) + end + # We only use Explicit if the span is not Regular + # This is important for things like rasterizatin and conversion + # to gdal to be easy, and selectors are faster. + # TODO are there any possible floating point errors from this? + if haskey(CDM.attribs(var), "bounds") + span, sampling = if isregular(span) + span, Intervals(Center()) + else + boundskey = var.attrib["bounds"] + boundsmatrix = Array(ds[boundskey]) + Explicit(boundsmatrix), Intervals(Center()) + end end + # We cant yet check CF standards crs, but we can at least check for units in lat/lon if isnokw(mappedcrs) && get(metadata, "units", "") in ("degrees_north", "degrees_east") mappedcrs = EPSG(4326) @@ -374,7 +384,7 @@ end function _cdmspan(index, order) # Handle a length 1 index - length(index) == 1 && return Regular(zero(eltype(index))) + length(index) == 1 && return Regular(zero(eltype(index))), Points() step = index[2] - index[1] for i in 2:length(index)-1 # If any step sizes don't match, its Irregular @@ -390,11 +400,11 @@ function _cdmspan(index, order) else index[1], index[1] end - return Irregular(bounds) + return Irregular(bounds), Points() end end # Otherwise regular - return Regular(step) + return Regular(step), Points() end # delta_t and ave_period are not CF standards, but CDC diff --git a/src/stack.jl b/src/stack.jl index 4e571fcb2..a3fbbafcf 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -177,7 +177,7 @@ struct RasterStack{L<:Union{FileStack,OpenStack,NamedTuple},D<:Tuple,R<:Tuple,LD end # Rebuild from internals function RasterStack( - data::Union{FileStack,OpenStack}; + data::Union{FileStack,OpenStack,NamedTuple}; dims::Tuple, refdims::Tuple=(), layerdims, @@ -200,14 +200,21 @@ function RasterStack(data::Tuple{Vararg{<:AbstractArray}}, dims::Tuple; return RasterStack(NamedTuple{cleankeys(name)}(data), dims; kw...) end # Multi Raster stack from NamedTuple of AbstractArray -function RasterStack(data::NamedTuple{<:Any,<:Tuple{Vararg{<:AbstractArray}}}, dims::Tuple; kw...) +function RasterStack(data::NamedTuple{<:Any,<:Tuple{Vararg{<:AbstractArray}}}, dims::Tuple; + layerdims=nokw, + kw... +) # TODO: make this more sophisticated and match dimension length to axes? # We dont worry about Raster keywords because these rasters will be deconstructed # again later, and `kw` will define the RasterStack keywords - layers = map(data) do A - Raster(A, dims[1:ndims(A)]) + if isnokw(layerdims) + layers = map(data) do A + Raster(A, dims[1:ndims(A)]) + end + return RasterStack(layers; kw...) + else + return RasterStack(data; dims, layerdims, kw...) end - return RasterStack(layers; kw...) end # Multi Raster stack from AbstractDimArray splat RasterStack(layers::AbstractDimArray...; kw...) = RasterStack(layers; kw...) @@ -400,6 +407,7 @@ function _postprocess_stack(st, dims; crs=nokw, mappedcrs=nokw, name=nokw, + resize=nokw, ) dims = format(dims, CartesianIndices(st)) dims = isnokw(crs) ? dims : setcrs(dims, crs) diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index 49eedf233..0113974e2 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -707,7 +707,6 @@ end @testset "write multiple files with custom suffix" begin filename = tempname() * ".tif" write(filename, gdalstack; suffix=("_first", "_second")) - write(filename, gdalstack; suffix=("_first", "_second")) base, ext = splitext(filename) filename_b = string(base, "_second", ext) saved = read(Raster(filename_b)) diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 6bcd31e33..f4434d722 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -59,12 +59,12 @@ end @time read(lazyarray); end - @testset "from url" begin - # TODO we need a permanent url here that doesn't end in .nc - url = "http://apdrc.soest.hawaii.edu:80/dods/public_data/Reanalysis_Data/NCEP/NCEP2/daily/surface/mslp" - r = Raster(url; name=:mslp, source=:netcdf, lazy=true) - @test sum(r[Ti(1)]) == 1.0615972f9 - end + # @testset "from url" begin + # # TODO we need a permanent url here that doesn't end in .nc + # url = "http://apdrc.soest.hawaii.edu:80/dods/public_data/Reanalysis_Data/NCEP/NCEP2/daily/surface/mslp" + # r = Raster(url; name=:mslp, source=:netcdf, lazy=true) + # @test sum(r[Ti(1)]) == 1.0615972f9 + # end @testset "open" begin @test all(open(A -> A[Y=1], ncarray) .=== ncarray[:, 1, :]) @@ -112,9 +112,7 @@ end @test length.(dims(ncarray)) == (180, 170, 24) @test dims(ncarray) isa Tuple{<:X,<:Y,<:Ti} @test refdims(ncarray) == () - @test val.(span(ncarray)) == - (vcat((0.0:2.0:358.0)', (2.0:2.0:360.0)'), - vcat((-80.0:89.0)', (-79.0:90.0)'), + @test val.(span(ncarray)) == (2.0, 1.0, vcat(permutedims(DateTime360Day(2001, 1, 1):Month(1):DateTime360Day(2002, 12, 1)), permutedims(DateTime360Day(2001, 2, 1):Month(1):DateTime360Day(2003, 1, 1))) ) @@ -288,7 +286,7 @@ end @testset "deflatelevel" begin write("tos.nc", ncarray; force=true) # default `deflatelevel = 0` - @time write("tos_small.nc", geoA; deflatelevel=2, force = true) + @time write("tos_small.nc", ncarray; deflatelevel=2, force = true) @test filesize("tos_small.nc") * 1.5 < filesize("tos.nc") # compress ratio >= 1.5 @test (@allocations write("tos_small.nc", ncarray; deflatelevel=2, force=true)) < 1e4 isfile("tos.nc") && rm("tos.nc") @@ -322,7 +320,7 @@ end @testset "to gdal" begin gdalfilename = tempname() * ".tif" nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) - write(gdalfilename, nccleaned; force = true) + write(gdalfilename, nccleaned; force=true) @test (@allocations write(gdalfilename, nccleaned; force = true)) < 1e4 gdalarray = Raster(gdalfilename) # gdalarray WKT is missing one AUTHORITY @@ -500,13 +498,12 @@ end @test first(parent(st)) isa Array length(dims(st[:aclcac])) filename = tempname() * ".nc" - write(filename, st); + @test (@allocations write(filename, st)) < 1e6 # writing a rasterseries/stack has no force keyword saved = RasterStack(RasterStack(filename)) @test keys(saved) == keys(st) @test metadata(saved)["advection"] == "Lin & Rood" @test metadata(saved) == metadata(st) == metadata(ncstack) @test all(first(DimensionalData.layers(saved)) .== first(DimensionalData.layers(st))) - @test (@allocations write(filename, st)) < 1e6 # writing a rasterseries/stack has no force keyword end @@ -536,10 +533,9 @@ end rast = Raster(ncsingle; name=:tos) @test all(read(ncseries[Ti(1)][:tos]) .=== read(rast)) - write("test.nc", ncseries) + @test (@allocations write("test.nc", ncseries)) < 1e4 # writing a rasterseries/stack has no force keyword @test isfile("test_1.nc") @test isfile("test_2.nc") - @test (@allocations write("test.nc", ncseries)) < 1e4 # writing a rasterseries/stack has no force keyword RasterStack("test_1.nc") rm("test_1.nc") rm("test_2.nc") From 49861ab82ee7b9a061105798d2e3980015825742 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Thu, 25 Apr 2024 22:25:24 +0200 Subject: [PATCH 13/13] fix reproject --- src/methods/reproject.jl | 2 +- src/utils.jl | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/methods/reproject.jl b/src/methods/reproject.jl index 029966ff8..c5b548d8a 100644 --- a/src/methods/reproject.jl +++ b/src/methods/reproject.jl @@ -29,7 +29,7 @@ function reproject(target::GeoFormat, l::AbstractProjected) source = crs(l) newdata = reproject(source, target, l.dim, parent(l)) newlookup = rebuild(l; data=newdata, crs=target) - if checkregular(newdata) + if _checkregular(newdata) return set(newlookup, Regular(stepof(newdata))) else newbounds = reproject(crs(l), target, l.dim, bounds(l)) diff --git a/src/utils.jl b/src/utils.jl index c640d2c68..5e52c1af6 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -262,3 +262,15 @@ end _chunks_to_tuple(template, dimorder, DD.kw2dims(chunks)) @inline _chunks_to_tuple(template, dimorder, chunks::Nothing) = nothing @inline _chunks_to_tuple(template, dims, chunks::NoKW) = nothing + + +_checkregular(A::AbstractRange) = true +function _checkregular(A::AbstractArray) + step = stepof(A) + for i in eachindex(A)[2:end] + if !(A[i] - A[i-1] ≈ step) + return false + end + end + return true +end