Skip to content

Commit

Permalink
Better extract (#565)
Browse files Browse the repository at this point in the history
* add more options to extract

* fix extract

* test

* attribution

* attribution

Co-authored-by: Tiem van der Deure <[email protected]>

* simplify

* simplify

* tweaks

* handle feature collections

* note that mixed point/geom is bad

* test names subsetting

* f

* f

* Update src/methods/extract.jl

Co-authored-by: Tiem van der Deure <[email protected]>

* Update test/methods.jl

Co-authored-by: Tiem van der Deure <[email protected]>

* handle tables

* drop mac try windows

---------

Co-authored-by: Tiem van der Deure <[email protected]>
  • Loading branch information
rafaqz and tiemvanderdeure authored Dec 2, 2023
1 parent 14c51c6 commit 19a53cc
Show file tree
Hide file tree
Showing 3 changed files with 267 additions and 66 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ jobs:
- '1'
os:
- ubuntu-latest
- macOS-latest
# - windows-latest
# - macOS-latest
- windows-latest
arch:
- x64
steps:
Expand Down
171 changes: 125 additions & 46 deletions src/methods/extract.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ sliced arrays or stacks will be returned instead of single values.
# Keywords
- `geometry`: include a `:geometry` column with the corresponding points for each value, `true` by default.
- `index`: include an `:index` column of the `CartesianIndex` for each value, `false` by default.
- `names`: `Tuple` of `Symbol` corresponding to layers of a `RasterStack`. All layers by default.
- `skipmissing`: skip missing points automatically.
- `atol`: a tolorerance for floating point lookup values for when the `LookupArray`
contains `Points`. `atol` is ignored for `Intervals`.
Expand All @@ -32,12 +36,9 @@ st = RasterStack(WorldClim{BioClim}, (1, 3, 5, 7, 12)) |> replace_missing
# Download some occurrence data
obs = GBIF2.occurrence_search("Burramys parvus"; limit=5, year="2009")
# Convert observations to points
pnts = collect((o.decimalLongitude, o.decimalLatitude) for o in obs if !ismissing(o.decimalLongitude))
# use `extract` to get values for all layers at each observation point.
# We `collect` to get a `Vector` from the lazy iterator.
collect(extract(st, pnts))
extract(st, obs; skipmissing=true)
# output
5-element Vector{NamedTuple{(:geometry, :bio1, :bio3, :bio5, :bio7, :bio12)}}:
Expand All @@ -47,66 +48,144 @@ collect(extract(st, pnts))
(geometry = (0.52, 40.37), bio1 = missing, bio3 = missing, bio5 = missing, bio7 = missing, bio12 = missing)
(geometry = (0.32, 40.24), bio1 = 16.321388f0, bio3 = 41.659454f0, bio5 = 30.029825f0, bio7 = 25.544561f0, bio12 = 480.0f0)
```
Note: passing in arrays, geometry collections or feature collections
containing a mix of points and other geometries has undefined results.
"""
function extract end
function extract(x::RasterStackOrArray, data;
dims=DD.dims(x, DEFAULT_POINT_ORDER), kw...
dims=DD.dims(x, DEFAULT_POINT_ORDER), names=_names(x), geometry=true, index=false, kw...
)
_extract(x, data; dims, names=_names(x), kw...)
T = if geometry
keys = index ? (:geometry, :index, names...,) : (:geometry, names...,)
NamedTuple{keys}
else
keys = index ? (:index, names...,) : (names...,)
NamedTuple{keys}
end
names = NamedTuple{names}(names)
if Tables.istable(data)
geomcolnames = GI.geometrycolumns(data)
isnothing(geomcolnames) && throw(ArgumentError("No `:geometry` column and `GeoInterface.geometrycolums(::$(typeof(data)))` does not define alternate columns"))
geometries = Tables.getcolumn(Tables.columns(data), first(geomcolnames))
_extract(T, x, geometries; dims, names, kw...)
else
_extract(T, x, data; dims, names, kw...)
end
end
_extract(A::RasterStackOrArray, point::Missing; kw...) = missing
function _extract(A::RasterStackOrArray, geom; kw...)
_extract(A, GI.geomtrait(geom), geom; kw...)

function _extract(T, A::RasterStackOrArray, geom::Missing; names, kw...)
[_maybe_add_fields(T, A, map(_ -> missing, names), missing, missing)]
end
function _extract(A::RasterStackOrArray, ::Nothing, geoms; kw...)
geom1 = first(skipmissing(geoms))
if GI.isgeometry(geom1) || GI.isfeature(geom1) || GI.isfeaturecollection(geom1)
(_extract(A, g; kw...) for g in geoms)
_extract(T, A::RasterStackOrArray, geom; kw...) =
_extract(T, A, GI.geomtrait(geom), geom; kw...)
_extract(T, A::RasterStackOrArray, ::Nothing, geom; kw...) =
throw(ArgumentError("$geom is not a valid GeoInterface.jl geometry"))
function _extract(T, A::RasterStackOrArray, ::Nothing, geoms::AbstractArray; names, skipmissing=false, kw...)
# Handle empty / all missing cases
(length(geoms) > 0 && any(!ismissing, geoms)) || return T[]

# Handle cases with some invalid geometries
invalid_geom_idx = findfirst(g -> !ismissing(g) && GI.geomtrait(g) === nothing, geoms)
invalid_geom_idx === nothing || throw(ArgumentError("$(geoms[invalid_geom_idx]) is not a valid GeoInterface.jl geometry"))

geom1 = first(Base.skipmissing(geoms))
trait1 = GI.trait(geom1)
# We need to split out points from other geoms
# TODO this will fail with mixed point/geom vectors
if trait1 isa GI.PointTrait
rows = (_extract_point(T, A, g; names, kw...) for g in geoms)
return skipmissing ? collect(_skip_missing_rows(rows)) : collect(rows)
else
throw(ArgumentError("`data` does not contain geomety objects"))
# This will be a list of vectors, we need to flatten it into one
rows = Iterators.flatten(_extract(T, A, g; names, skipmissing, kw...) for g in geoms)
return skipmissing ? collect(_skip_missing_rows(rows)) : collect(rows)
end
end
function _extract(A::RasterStackOrArray, ::GI.AbstractFeatureTrait, feature; kw...)
_extract(A, GI.geometry(feature); kw...)
function _extract(T, A::RasterStackOrArray, ::GI.AbstractFeatureTrait, feature; kw...)
_extract(T, A, GI.geometry(feature); kw...)
end
function _extract(A::RasterStackOrArray, ::GI.AbstractMultiPointTrait, geom; kw...)
(_extract(A, p; kw...) for p in GI.getpoint(geom))
function _extract(T, A::RasterStackOrArray, ::GI.FeatureCollectionTrait, fc; kw...)
# Fall back to the Array/iterator method for feature collections
_extract(T, A, [GI.geometry(f) for f in GI.getfeature(fc)]; kw...)
end
function _extract(A::RasterStackOrArray, ::GI.AbstractGeometryTrait, geom; names, kw...)
B = boolmask(geom; to=dims(A, DEFAULT_POINT_ORDER), kw...)
pts = DimPoints(B)
dis = DimIndices(B)
((; geometry=_geom_nt(dims(B), pts[I]), _prop_nt(A, I, names)...) for I in CartesianIndices(B) if B[I])
function _extract(T, A::RasterStackOrArray, ::GI.AbstractMultiPointTrait, geom; skipmissing=false, kw...)
rows = (_extract_point(T, A, g; kw...) for g in GI.getpoint(geom))
return skipmissing ? collect(_skip_missing_rows(rows)) : collect(rows)
end
_geom_nt(dims::DimTuple, pts) = NamedTuple{map(dim2key, dims)}(pts)
_prop_nt(st::AbstractRasterStack, I, names::NamedTuple{K}) where K = NamedTuple{K}(values(st[I]))
_prop_nt(A::AbstractRaster, I, names::NamedTuple{K}) where K = NamedTuple{K}((A[I],))

function _extract(x::RasterStackOrArray, ::GI.PointTrait, point; dims, names, atol=nothing)
# Get the actual dimensions available in the object
coords = map(DD.commondims(x, dims)) do d
_dimcoord(d, point)
end
function _extract(T, A::RasterStackOrArray, ::GI.AbstractGeometryTrait, geom;
names, skipmissing=false, kw...
)
# Make a raster mask of the geometry
B = boolmask(geom; to=commondims(A, DEFAULT_POINT_ORDER), kw...)
# Add a row for each pixel that is `true` in the mask
rows = (_maybe_add_fields(T, A, _prop_nt(A, I, names), I) for I in CartesianIndices(B) if B[I])
# Maybe skip missing rows
return skipmissing ? collect(_skip_missing_rows(rows)) : collect(rows)
end
_extract(T, x::RasterStackOrArray, trait::GI.PointTrait, point; kw...) =
_extract_point(T, x, point; kw...)

# Extract the values
if any(map(ismissing, coords))
# TODO test this branch somehow
geometry = map(_ -> missing, coords)
@inline _skip_missing_rows(rows) = Iterators.filter(row -> !any(ismissing, row), rows)

@inline _prop_nt(st::AbstractRasterStack, I, names::NamedTuple{K}) where K = t[I][K]
@inline _prop_nt(A::AbstractRaster, I, names::NamedTuple{K}) where K = NamedTuple{K}((A[I],))

# Extract a single point
function _extract_point(T, x::RasterStackOrArray, point;
dims, names::NamedTuple{K}, atol=nothing, kw...
) where K
# The point itself might be missing, so return missing for every field
if ismissing(point)
layer_vals = map(_ -> missing, names)
geom = missing
I = missing
else
selectors = map(dims, coords) do d, c
_at_or_contains(d, c, atol)
# Get the actual dimensions available in the object
coords = map(DD.commondims(x, dims)) do d
_dimcoord(d, point)
end
layer_vals = if DD.hasselection(x, selectors)
x isa Raster ? (x[selectors...],) : x[selectors...]
# If any are coordinates missing, also return missing for everything
if any(map(ismissing, coords))
layer_vals = map(_ -> missing, names)
geom = missing
I = missing
else
map(_ -> missing, names)
selectors = map(dims, coords) do d, c
_at_or_contains(d, c, atol)
end
# Check the selector is in bounds / actually selectable
if DD.hasselection(x, selectors)
I = DD.dims2indices(x, selectors)
layer_vals = x isa Raster ? NamedTuple{K}((x[I...],)) : x[I...][K]
else
# Otherwise return `missing` for everything
I = missing
layer_vals = map(_ -> missing, names)
end
# Return the point for the geometry, either way
geom = point
end
geometry = point
end
properties = NamedTuple{keys(names)}(layer_vals)
return (; geometry, properties...)

return _maybe_add_fields(T, x, layer_vals, geom, I)
end
function _extract_point(T, A::RasterStackOrArray, point::Missing; names, kw...)
# Missing points return a single row
return _maybe_add_fields(T, A, map(_ -> missing, names), missing, missing)
end

# Maybe add optional fields
@inline function _maybe_add_fields(T, A, layer_vals::NamedTuple, I)
_maybe_add_fields(T, A, layer_vals, DimPoints(A)[I], I)
end
@inline function _maybe_add_fields(::Type{T}, A, layer_vals::NamedTuple, point, I)::T where {T<:NamedTuple{K}} where K
if :geometry in K
:index in K ? merge((; geometry=point, index=I), layer_vals) : merge((; geometry=point), layer_vals)
else
:index in K ? merge((; index=I), layer_vals) : layer_vals
end
end

_names(A::AbstractRaster) = NamedTuple{(Symbol(name(A)),)}((Symbol(name(A)),))
_names(A::AbstractRasterStack) = NamedTuple{keys(A)}(keys(A))
_names(A::AbstractRaster) = (Symbol(name(A)),)
_names(A::AbstractRasterStack) = keys(A)
158 changes: 140 additions & 18 deletions test/methods.jl
Original file line number Diff line number Diff line change
Expand Up @@ -256,35 +256,157 @@ end
# Polygon(::Val{:ArchGDAL}, values) = ArchGDAL.createpolygon(values)

createpoint(args...) = ArchGDAL.createpoint(args...)
createfeature(x::Tuple{<:Any,<:Any}) = NamedTuple{(:geometry,:test)}(x)
createfeature(x::Tuple{<:Any,<:Any,<:Any}) = NamedTuple{(:geometry,:test,:test2)}(x)
createfeature(::Missing) = missing

@testset "extract" begin
dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2))
ga = Raster([1 2; 3 4], dimz; name=:test, missingval=missing)
ga2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=missing)
st = RasterStack(ga, ga2)
rast = Raster([1 2; 3 4], dimz; name=:test, missingval=missing)
rast2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=missing)
rast_m = Raster([1 2; 3 missing], dimz; name=:test, missingval=missing)
table = (geometry=[missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)], foo=zeros(4))
st = RasterStack(rast, rast2)
@testset "from Raster" begin
# Tuple points
@test all(extract(ga, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]) .===
createfeature.([missing, ((9.0, 0.1), 1), ((9.0, 0.2), 2), ((10.0, 0.3), missing)]))
ex = extract(rast, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)])
@test eltype(ex) == NamedTuple{(:geometry,:test)}
@test all(ex .=== [
(geometry = missing, test = missing)
(geometry = (9.0, 0.1), test = 1)
(geometry = (9.0, 0.2), test = 2)
(geometry = (10.0, 0.3), test = missing)
])
ex = extract(rast_m, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]; skipmissing=true)
@test eltype(ex) == NamedTuple{(:geometry,:test),Tuple{Tuple{Float64,Float64},Int}}
@test all(ex .=== [(geometry = (9.0, 0.1), test = 1), (geometry = (9.0, 0.2), test = 2)])
ex = extract(rast_m, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false)
@test eltype(ex) == NamedTuple{(:test,),Tuple{Int}}
@test all(ex .=== [(test = 1,), (test = 2,)])
@test all(extract(rast_m, [missing, (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false, index=true) .=== [
(index = (1, 1), test = 1,)
(index = (1, 2), test = 2,)
])
# NamedTuple (reversed) points
@test all(extract(ga, [missing, (Y=0.1, X=9.0), (Y=0.2, X=10.0), (Y=0.3, X=10.0)]) |> collect .===
createfeature.([missing, ((Y=0.1, X=9.0), 1), ((Y=0.2, X=10.0), 4), ((Y=0.3, X=10.0), missing)]))
@test all(extract(rast, [missing, (Y=0.1, X=9.0), (Y=0.2, X=10.0), (Y=0.3, X=10.0)]) .=== [
(geometry = missing, test = missing)
(geometry = (Y = 0.1, X = 9.0), test = 1)
(geometry = (Y = 0.2, X = 10.0), test = 4)
(geometry = (Y = 0.3, X = 10.0), test = missing)
])
# Vector points
@test all(extract(ga, [[9.0, 0.1], [10.0, 0.2]]) .== createfeature.([([9.0, 0.1], 1), ([10.0, 0.2], 4)]))
# ArchGDAL equality is broken
# @test all(extract(ga, ArchGDAL.createmultipoint([[0.1, 9.0], [0.2, 10.0], [0.3, 10.0]])) .==
# createfeature.([(createpoint(0.1, 9.0), 1), (createpoint(0.2, 10.0), 4), (createpoint(0.3, 10.0), missing)]))
@test all(extract(rast, [[9.0, 0.1], [10.0, 0.2]]) .== [
(geometry = [9.0, 0.1], test = 1)
(geometry = [10.0, 0.2], test = 4)
])
# Extract a polygon
p = ArchGDAL.createpolygon([[[8.0, 0.0], [11.0, 0.0], [11.0, 0.4], [8.0, 0.0]]])
@test all(extract(ga, p) .===
createfeature.([((X=9.0, Y=0.1), 1), ((X=10.0, Y=0.1), 3), ((X=10.0, Y=0.2), 4)]))
@test all(extract(rast_m, p) .=== [
(geometry = (9.0, 0.1), test = 1)
(geometry = (10.0, 0.1), test = 3)
(geometry = (10.0, 0.2), test = missing)
])
# Extract a vector of polygons
ex = extract(rast_m, [p, p])
@test eltype(ex) == NamedTuple{(:geometry,:test)}
@test all(ex .=== [
(geometry = (9.0, 0.1), test = 1)
(geometry = (10.0, 0.1), test = 3)
(geometry = (10.0, 0.2), test = missing)
(geometry = (9.0, 0.1), test = 1)
(geometry = (10.0, 0.1), test = 3)
(geometry = (10.0, 0.2), test = missing)
])
# Test all the keyword combinations
@test all(extract(rast_m, p) .=== [
(geometry = (9.0, 0.1), test = 1)
(geometry = (10.0, 0.1), test = 3)
(geometry = (10.0, 0.2), test = missing)
])
@test all(extract(rast_m, p; geometry=false) .=== [
(test = 1,)
(test = 3,)
(test = missing,)
])
@test all(extract(rast_m, p; geometry=false, index=true) .=== [
(index = CartesianIndex(1, 1), test = 1)
(index = CartesianIndex(2, 1), test = 3)
(index = CartesianIndex(2, 2), test = missing)
])
@test all(extract(rast_m, p; index=true) .=== [
(geometry = (9.0, 0.1), index = CartesianIndex(1, 1), test = 1)
(geometry = (10.0, 0.1), index = CartesianIndex(2, 1), test = 3)
(geometry = (10.0, 0.2), index = CartesianIndex(2, 2), test = missing)
])
@test extract(rast_m, p; skipmissing=true) == [
(geometry = (9.0, 0.1), test = 1)
(geometry = (10.0, 0.1), test = 3)
]
@test extract(rast_m, p; skipmissing=true, geometry=false) == [
(test = 1,)
(test = 3,)
]
@test extract(rast_m, p; skipmissing=true, geometry=false, index=true) == [
(index = CartesianIndex(1, 1), test = 1)
(index = CartesianIndex(2, 1), test = 3)
]
@test extract(rast_m, p; skipmissing=true, index=true) == [
(geometry = (9.0, 0.1), index = CartesianIndex(1, 1), test = 1)
(geometry = (10.0, 0.1), index = CartesianIndex(2, 1), test = 3)
]
# Empty geoms
@test extract(rast, []) == NamedTuple{(:geometry, :test),Tuple{Missing,Missing}}[]
@test extract(rast, []; geometry=false) == NamedTuple{(:test,),Tuple{Missing}}[]
# Missing coord errors
@test_throws ArgumentError extract(rast, [(0.0, missing), (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)])
@test_throws ArgumentError extract(rast, [(9.0, 0.1), (0.0, missing), (9.0, 0.2), (10.0, 0.3)])
@test_throws ArgumentError extract(rast, [(X=0.0, Y=missing), (9.0, 0.1), (9.0, 0.2), (10.0, 0.3)])
end

@testset "with table" begin
@test all(extract(rast, table) .=== [
(geometry = missing, test = missing)
(geometry = (9.0, 0.1), test = 1)
(geometry = (9.0, 0.2), test = 2)
(geometry = (10.0, 0.3), test = missing)
])
@test extract(rast, table; skipmissing=true) == [
(geometry = (9.0, 0.1), test = 1)
(geometry = (9.0, 0.2), test = 2)
]
@test extract(rast, table; skipmissing=true, geometry=false) == [
(test = 1,)
(test = 2,)
]
@test extract(rast, table; skipmissing=true, geometry=false, index=true) == [
(index = (1, 1), test = 1,)
(index = (1, 2), test = 2,)
]
end

@testset "from stack" begin
@test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]) |> collect .===
createfeature.([missing, ((9.0, 0.1), 1, 5), ((10.0, 0.2), 4, 8), ((10.0, 0.3), missing, missing)]))
@test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]) .=== [
(geometry = missing, test = missing, test2 = missing)
(geometry = (9.0, 0.1), test = 1, test2 = 5)
(geometry = (10.0, 0.2), test = 4, test2 = 8)
(geometry = (10.0, 0.3), test = missing, test2 = missing)
])
@test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true) == [
(geometry = (9.0, 0.1), test = 1, test2 = 5)
(geometry = (10.0, 0.2), test = 4, test2 = 8)
]
@test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false) == [
(test = 1, test2 = 5)
(test = 4, test2 = 8)
]
@test extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; skipmissing=true, geometry=false, index=true) == [
(index = (1, 1), test = 1, test2 = 5)
(index = (2, 2), test = 4, test2 = 8)
]
# Subset with `names`
@test all(extract(st, [missing, (9.0, 0.1), (10.0, 0.2), (10.0, 0.3)]; names=(:test2,)) .=== [
(geometry = missing, test2 = missing)
(geometry = (9.0, 0.1), test2 = 5)
(geometry = (10.0, 0.2), test2 = 8)
(geometry = (10.0, 0.3), test2 = missing)
])
end
end

Expand Down

0 comments on commit 19a53cc

Please sign in to comment.