Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix common data model chunk propagation, source type propagation, and other things #592

Merged
merged 8 commits into from
Mar 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
fail-fast: true
matrix:
version:
- '1.8'
- '1.9'
- '1'
os:
- ubuntu-latest
Expand Down
8 changes: 3 additions & 5 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"

[weakdeps]
Expand Down Expand Up @@ -53,8 +52,8 @@ CommonDataModel = "0.2.3"
ConstructionBase = "1"
CoordinateTransformations = "0.6.2"
DataFrames = "1"
DimensionalData = "0.25.1"
DiskArrays = "^0.3.3"
DimensionalData = "0.26"
DiskArrays = "0.3, 0.4"
Extents = "0.1"
FillArrays = "0.12, 0.13, 1"
Flatten = "0.4"
Expand All @@ -71,13 +70,12 @@ ProgressMeter = "1"
RasterDataSources = "0.5.7"
RecipesBase = "0.7, 0.8, 1.0"
Reexport = "0.2, 1.0"
Requires = "0.5, 1"
SafeTestsets = "0.1"
Setfield = "0.6, 0.7, 0.8, 1"
Shapefile = "0.10, 0.11"
Statistics = "1"
Test = "1"
julia = "1.8"
julia = "1.9"

[extras]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Expand Down
4 changes: 2 additions & 2 deletions ext/RastersArchGDALExt/RastersArchGDALExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ using DimensionalData,
GeoFormatTypes,
GeoInterface

using Rasters.LookupArrays
using Rasters.Lookups
using Rasters.Dimensions
using Rasters: GDALsource, AbstractProjected, RasterStackOrArray, FileArray,
RES_KEYWORD, SIZE_KEYWORD, CRS_KEYWORD, FILENAME_KEYWORD, SUFFIX_KEYWORD, EXPERIMENTAL,
Expand All @@ -26,7 +26,7 @@ const RA = Rasters
const DD = DimensionalData
const DA = DiskArrays
const GI = GeoInterface
const LA = LookupArrays
const LA = Lookups

include("cellsize.jl")
include("gdal_source.jl")
Expand Down
4 changes: 2 additions & 2 deletions ext/RastersArchGDALExt/gdal_source.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const GDAL_VIRTUAL_FILESYSTEMS = "/vsi" .* (

# Array ########################################################################

function RA.FileArray(raster::AG.RasterDataset{T}, filename; kw...) where {T}
function RA.FileArray{GDALsource}(raster::AG.RasterDataset{T}, filename; kw...) where {T}
eachchunk, haschunks = DA.eachchunk(raster), DA.haschunks(raster)
RA.FileArray{GDALsource,T,3}(filename, size(raster); eachchunk, haschunks, kw...)
end
Expand Down Expand Up @@ -242,7 +242,7 @@ function RA.Raster(ds::AG.RasterDataset;
filelist = AG.filelist(ds)
raster = if lazy && length(filelist) > 0
filename = first(filelist)
A = Raster(FileArray(ds, filename), args...)
A = Raster(FileArray{GDALsource}(ds, filename), args...)
else
Raster(Array(ds), args...)
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ else
end

using DimensionalData
using Rasters.LookupArrays
using Rasters.Lookups
using Rasters.Dimensions

import Rasters: AffineProjected, GDAL_EMPTY_TRANSFORM, GDAL_TOPLEFT_X,
GDAL_WE_RES, GDAL_ROT1, GDAL_TOPLEFT_Y, GDAL_ROT2, GDAL_NS_RES
const RA = Rasters
const DD = DimensionalData
const LA = LookupArrays
const LA = Lookups


include("affineprojected.jl")
Expand Down
2 changes: 1 addition & 1 deletion ext/RastersCoordinateTransformationsExt/affineprojected.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
function AffineProjected(f;
data=LA.AutoIndex(), metadata=DD.NoMetadata(), crs=nothing, mappedcrs=nothing, paired_lookup, dim=RA.AutoDim()
data=LA.AutoValues(), metadata=DD.NoMetadata(), crs=nothing, mappedcrs=nothing, paired_lookup, dim=RA.AutoDim()
)
AffineProjected(f, data, metadata, crs, mappedcrs, paired_lookup, dim)
end
Expand Down
4 changes: 2 additions & 2 deletions ext/RastersGRIBDatasetsExt/RastersGRIBDatasetsExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ using Dates,
DimensionalData,
GeoFormatTypes

using Rasters.LookupArrays
using Rasters.Lookups
using Rasters.Dimensions
using Rasters: GRIBsource

Expand All @@ -26,7 +26,7 @@ const RA = Rasters
const DD = DimensionalData
const DA = DiskArrays
const GI = GeoInterface
const LA = LookupArrays
const LA = Lookups

include("gribdatasets_source.jl")

Expand Down
6 changes: 4 additions & 2 deletions ext/RastersGRIBDatasetsExt/gribdatasets_source.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
const GDS = GRIBDatasets

RA.FileStack{GRIBsource}(ds::AbstractDataset, filename::AbstractString; write=false, keys) = RA.FileStack(GRIBsource, ds, filename; write, keys)

function RA.OpenStack(fs::RA.FileStack{GRIBsource,K}) where K
RA.OpenStack{GRIBsource,K}(GDS.GRIBDataset(RA.filename(fs)))
end
Expand All @@ -14,3 +12,7 @@ function RA._open(f, ::Type{GRIBsource}, filename::AbstractString; write=false,
ds = GRIBDatasets.GRIBDataset(filename)
RA._open(f, GRIBsource, ds; kw...)
end

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tcarion @Alexander-Barth this is the hack that is currently required to make disk arrays chunking propagate out from the internal Varable or DataValues objects.

Note that with your current implementation this means anyone using CFVariable can not do chunked reads of large datasets. The whole thing is read at once.

# Hack to get the inner DiskArrays chunks as they are not exposed at the top level
RA._get_eachchunk(var::GDS.Variable) = DiskArrays.eachchunk(var.values)
RA._get_haschunks(var::GDS.Variable) = DiskArrays.haschunks(var.values)
4 changes: 2 additions & 2 deletions ext/RastersHDF5Ext/RastersHDF5Ext.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ using Dates,
GeoInterface,
Rasters

using Rasters.LookupArrays
using Rasters.Lookups
using Rasters.Dimensions
using Rasters: SMAPsource

Expand All @@ -27,7 +27,7 @@ const RA = Rasters
const DD = DimensionalData
const DA = DiskArrays
const GI = GeoInterface
const LA = LookupArrays
const LA = Lookups

include("smap_source.jl")

Expand Down
6 changes: 3 additions & 3 deletions ext/RastersHDF5Ext/smap_source.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@

# Raster ######################################################################

function RA.FileArray(ds::SMAPhdf5, filename::AbstractString; key, kw...)
RA.FileArray(ds[key], filename; key, kw...)
function RA.FileArray{SMAPsource}(ds::SMAPhdf5, filename::AbstractString; key, kw...)
RA.FileArray{SMAPsource}(ds[key], filename; key, kw...)

Check warning on line 91 in ext/RastersHDF5Ext/smap_source.jl

View check run for this annotation

Codecov / codecov/patch

ext/RastersHDF5Ext/smap_source.jl#L90-L91

Added lines #L90 - L91 were not covered by tests
end
function RA.FileArray(var::SMAPvar, filename::AbstractString; key, kw...)
function RA.FileArray{SMAPsource}(var::SMAPvar, filename::AbstractString; key, kw...)

Check warning on line 93 in ext/RastersHDF5Ext/smap_source.jl

View check run for this annotation

Codecov / codecov/patch

ext/RastersHDF5Ext/smap_source.jl#L93

Added line #L93 was not covered by tests
T = eltype(var)
N = ndims(var)
eachchunk = DA.eachchunk(var)
Expand Down
4 changes: 2 additions & 2 deletions ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ using Dates,
DimensionalData,
GeoFormatTypes

using Rasters.LookupArrays
using Rasters.Lookups
using Rasters.Dimensions
using Rasters: CDMsource, NCDsource

Expand All @@ -26,7 +26,7 @@ const RA = Rasters
const DD = DimensionalData
const DA = DiskArrays
const GI = GeoInterface
const LA = LookupArrays
const LA = Lookups

include("ncdatasets_source.jl")

Expand Down
28 changes: 14 additions & 14 deletions ext/RastersNCDatasetsExt/ncdatasets_source.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ function Base.write(filename::AbstractString, ::Type{<:CDMsource}, A::AbstractRa
mode = !isfile(filename) || !append ? "c" : "a";
ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(A)))
try
_ncdwritevar!(ds, A; kw...)
_writevar!(ds, A; kw...)
finally
close(ds)
end
Expand Down Expand Up @@ -65,15 +65,13 @@ function Base.write(filename::AbstractString, ::Type{<:CDMsource}, s::AbstractRa
mode = !isfile(filename) || !append ? "c" : "a";
ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(s)))
try
map(key -> _ncdwritevar!(ds, s[key]), keys(s); kw...)
map(key -> _writevar!(ds, s[key]), keys(s); kw...)
finally
close(ds)
end
return filename
end

RA.FileStack{NCDsource}(ds::AbstractDataset, filename::AbstractString; write=false, keys) = RA.FileStack(NCDsource, ds, filename; write, keys)

function RA.OpenStack(fs::RA.FileStack{NCDsource,K}) where K
RA.OpenStack{NCDsource,K}(NCD.Dataset(RA.filename(fs)))
end
Expand All @@ -88,7 +86,7 @@ function RA._open(f, ::Type{NCDsource}, filename::AbstractString; write=false, k
end

# Add a var array to a dataset before writing it.
function _ncdwritevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; kw...) where {T,N}
function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; kw...) where {T,N}
_def_dim_var!(ds, A)
attrib = RA._attribdict(metadata(A))
# Set _FillValue
Expand Down Expand Up @@ -117,9 +115,8 @@ function _ncdwritevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; kw...) where
dimnames = lowercase.(string.(map(RA.name, dims(A))))
var = NCD.defVar(ds, key, eltyp, dimnames; attrib=attrib, kw...)

# NCDatasets needs Colon indices to write without allocations
# TODO do this with DiskArrays broadcast ??
var[map(_ -> Colon(), axes(A))...] = parent(read(A))
# Write with a DiskArays.jl broadcast
var .= A

return nothing
end
Expand All @@ -132,13 +129,13 @@ function _def_dim_var!(ds::AbstractDataset, dim::Dimension)
lookup(dim) isa NoLookup && return nothing

# Shift index before conversion to Mapped
dim = RA._ncdshiftlocus(dim)
dim = RA._cdmshiftlocus(dim)
if dim isa Y || dim isa X
dim = convertlookup(Mapped, dim)
end
# Attributes
attrib = RA._attribdict(metadata(dim))
RA._ncd_set_axis_attrib!(attrib, dim)
RA._cdm_set_axis_attrib!(attrib, dim)
# Bounds variables
if sampling(dim) isa Intervals
bounds = Dimensions.dim2boundsmatrix(dim)
Expand All @@ -150,10 +147,13 @@ function _def_dim_var!(ds::AbstractDataset, dim::Dimension)
return nothing
end

const _NCDVar = NCDatasets.CFVariable{Union{Missing, Float32}, 3, NCDatasets.Variable{Float32, 3, NCDatasets.NCDataset}, NCDatasets.Attributes{NCDatasets.NCDataset{Nothing}}, NamedTuple{(:fillvalue, :scale_factor, :add_offset, :calendar, :time_origin, :time_factor), Tuple{Float32, Nothing, Nothing, Nothing, Nothing, Nothing}}}
# Hack to get the inner DiskArrays chunks as they are not exposed at the top level
RA._get_eachchunk(var::NCD.Variable) = DiskArrays.eachchunk(var)
RA._get_haschunks(var::NCD.Variable) = DiskArrays.haschunks(var)

# precompilation

# const _NCDVar = NCDatasets.CFVariable{Union{Missing, Float32}, 3, NCDatasets.Variable{Float32, 3, NCDatasets.NCDataset}, NCDatasets.Attributes{NCDatasets.NCDataset{Nothing}}, NamedTuple{(:fillvalue, :scale_factor, :add_offset, :calendar, :time_origin, :time_factor), Tuple{Float32, Nothing, Nothing, Nothing, Nothing, Nothing}}}

# function _precompile(::Type{NCDsource})
# ccall(:jl_generating_output, Cint, ()) == 1 || return nothing
Expand All @@ -165,9 +165,9 @@ const _NCDVar = NCDatasets.CFVariable{Union{Missing, Float32}, 3, NCDatasets.Var
# precompile(dims, (_NCDVar,Symbol,Nothing,EPSG))
# precompile(dims, (_NCDVar,Symbol,EPSG,EPSG))
# precompile(_firstkey, (NCDatasets.NCDataset{Nothing},))
# precompile(_ncddim, (NCDatasets.NCDataset{Nothing}, Symbol, Nothing, Nothing))
# precompile(_ncddim, (NCDatasets.NCDataset{Nothing}, Symbol, Nothing, EPSG))
# precompile(_ncddim, (NCDatasets.NCDataset{Nothing}, Symbol, EPSG, EPSG))
# precompile(_cdmdim, (NCDatasets.NCDataset{Nothing}, Symbol, Nothing, Nothing))
# precompile(_cdmdim, (NCDatasets.NCDataset{Nothing}, Symbol, Nothing, EPSG))
# precompile(_cdmdim, (NCDatasets.NCDataset{Nothing}, Symbol, EPSG, EPSG))
# precompile(Raster, (NCDatasets.NCDataset{Nothing}, String, Nothing))
# precompile(Raster, (NCDatasets.NCDataset{Nothing}, String, Symbol))
# precompile(Raster, (_NCDVar, String, Symbol))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ else
end

# using RasterDataSources: RasterDataSource
using Rasters.LookupArrays
using Rasters.Lookups
using Rasters.Dimensions

const RA = Rasters
Expand Down
13 changes: 4 additions & 9 deletions src/Rasters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,16 @@ import Adapt,
Reexport,
Setfield

# This symbol is only defined on Julia versions that support extensions.
@static if !isdefined(Base, :get_extension)
using Requires
end

Reexport.@reexport using DimensionalData, GeoFormatTypes

using DimensionalData.Tables,
DimensionalData.LookupArrays,
DimensionalData.Lookups,
DimensionalData.Dimensions
DimensionalData.LookupArrays.IntervalSets
DimensionalData.Lookups.IntervalSets

using DimensionalData: Name, NoName
using .Dimensions: StandardIndices, DimTuple
using .LookupArrays: LookupArrayTuple
using .Lookups: LookupTuple

using RecipesBase: @recipe, @series
using Base: tail, @propagate_inbounds
Expand Down Expand Up @@ -74,7 +69,7 @@ export reproject, convertlookup
const DD = DimensionalData
const DA = DiskArrays
const GI = GeoInterface
const LA = LookupArrays
const LA = Lookups

# DimensionalData documentation urls
const DDdocs = "https://rafaqz.github.io/DimensionalData.jl/stable/api"
Expand Down
4 changes: 2 additions & 2 deletions src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ function Raster(filename::AbstractString;
source = isnothing(source) ? _sourcetype(filename) : _sourcetype(source)
_open(filename; source) do ds
key = filekey(ds, key)
Raster(ds, filename, key; kw...)
Raster(ds, filename, key; source, kw...)
end
end
function Raster(ds, filename::AbstractString, key=nothing;
Expand All @@ -265,7 +265,7 @@ function Raster(ds, filename::AbstractString, key=nothing;
mappedcrs = defaultmappedcrs(source, mappedcrs)
dims = dims isa Nothing ? DD.dims(ds, crs, mappedcrs) : dims
data = if lazy
FileArray(ds, filename; key, write)
FileArray{source}(ds, filename; key, write)
else
_open(Array, source, ds; key)
end
Expand Down
2 changes: 1 addition & 1 deletion src/extensions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ Run `using ArchGDAL` to make this method available.
## Example

```julia
using Rasters, ArchGDAL, Rasters.LookupArrays
using Rasters, ArchGDAL, Rasters.Lookups
dimz = X(Projected(90.0:10.0:120; sampling=Intervals(Start()), order=ForwardOrdered(), span=Regular(10.0), crs=EPSG(4326))),
Y(Projected(0.0:10.0:50; sampling=Intervals(Start()), order=ForwardOrdered(), span=Regular(10.0), crs=EPSG(4326)))

Expand Down
Loading
Loading