diff --git a/docs/Project.toml b/docs/Project.toml index bf65d138a..a8012bd65 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -11,6 +11,7 @@ DocumenterVitepress = "4710194d-e776-4893-9690-8d956a29c365" GBIF2 = "dedd4f52-e074-43bf-924d-d6bce14ad628" GLMakie = "e9467ef8-e4e7-5192-8a1a-b1aee30e663a" GeoInterface = "cf35fbd7-0cd7-5166-be24-54bfbe79505f" +GeoMakie = "db073c08-6b98-4ee5-b6a4-5efafb3259c6" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306" MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" @@ -18,11 +19,15 @@ MLJGLMInterface = "caf8df21-4939-456d-ac9c-5fefbfb04c0c" Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" Maxnet = "81f79f80-22f2-4e41-ab86-00c11cf0f26f" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" +NaNStatistics = "b946abbf-3ea7-4610-9019-9858bfdeaf2d" +NaturalEarth = "436b0209-26ab-4e65-94a9-6526d86fea76" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Proj = "c94c279d-25a6-4763-9509-64d165bea63e" RasterDataSources = "3cb90ccd-e1b6-4867-9617-4276c8b2ca36" Rasters = "a3a2b9e3-a471-40c9-b274-f788e487c689" Shapefile = "8e980c4a-a4fe-5da2-b3a7-4b4b0353a2f4" SpeciesDistributionModels = "3ef73bbf-0321-4d3b-9a2e-5fbebc8e35da" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [sources] SpeciesDistributionModels = {url = "https://github.com/tiemvanderdeure/SpeciesDistributionModels.jl/"} diff --git a/docs/src/.vitepress/config.mts b/docs/src/.vitepress/config.mts index f26f08ff1..84f3e9762 100644 --- a/docs/src/.vitepress/config.mts +++ b/docs/src/.vitepress/config.mts @@ -18,19 +18,46 @@ const navTemp = { { text: 'Get Started', link: '/get_started' }, { text: 'Manual', items: [ - { text: 'Methods', link: '/manual/methods' }, - { text: 'Array Operations', link: '/manual/array_operations' }, - { text: 'Data Sources', link: '/manual/data_sources' }, - { text: 'Plots', + { text: 'Data Sources', link: '/manual/data_sources' }, + { text: 'Methods', items: [ - { text: 'Plots.jl', link: '/manual/plotting' }, - { text: 'Makie.jl', link: 'manual/plot_makie' }, + { text: 'Overview', link: '/manual/methods' }, + { text: 'rasterize', link: '/api#Rasters.rasterize' }, + { text: 'extract', link: '/api#Rasters.extract' }, + { text: 'zonal', link: '/api#Rasters.zonal-Tuple{Any,%20Union{AbstractRaster,%20AbstractRasterStack}}'}, + { text: 'aggregate', link: '/api#Rasters.aggregate' }, + { text: 'disaggregate', link: '/api#Rasters.disaggregate' }, + { text: 'cellarea', link: '/manual/cellarea' }, + { text: 'mosaic', link: '/api#Rasters.mosaic-Tuple{Function,%20Union{AbstractRaster,%20AbstractRasterStack},%20Vararg{Union{AbstractRaster,%20AbstractRasterStack}}}' }, + { text: 'crop', link: '/api#Rasters.crop' }, + { text: 'extend', link: '/api#Rasters.extend' }, + { text: 'trim', link: '/api#Rasters.trim-Tuple{Union{AbstractRaster,%20AbstractRasterStack}}' }, + { text: 'resample', link: '/api#Rasters.resample-Tuple' }, + { text: 'warp', link: '/api#Rasters.warp-Tuple' }, + { text: 'classify', link: '/api#Rasters.classify' }, + { text: 'mask', link: '/api#Rasters.mask-Tuple{Any}' }, + { text: 'replace_missing', link: '/api#Rasters.replace_missing-Tuple{Any}' }, + { text: 'modify', link: '/api#DimensionalData.modify-Tuple{Any,%20AbstractRasterSeries}' }, + { text: 'read', link: '/api#Base.read-Tuple{Union{AbstractRaster,%20AbstractRasterSeries,%20AbstractRasterStack}}' }, + { text: 'read!', link: '/api#Base.read!-Tuple{AbstractRaster,%20AbstractArray}' }, + { text: 'open', link: '/api#Base.open-Tuple{Function,%20AbstractRaster}' }, + { text: 'write', link: '/api#Base.write-Tuple{AbstractString,%20AbstractRasterSeries}' }, ] - }, + }, ] }, { text: 'Tutorials', items: [ + { text: 'Plotting', + collapsed: true, + items: [ + { text: 'Plots.jl', link: '/tutorials/plotting' }, + { text: 'Makie.jl', link: '/tutorials/plot_makie' }, + ] + }, + { text: 'Array Operations', link: '/tutorials/array_operations' }, + { text: 'Spatial mean', link: '/tutorials/spatial_mean' }, + { text: 'Reprojection and resampling', link: '/tutorials/resample'}, { text: 'Species Distribution Modelling', link: '/tutorials/gbif_wflow' }, ] }, @@ -98,19 +125,47 @@ export default defineConfig({ { text: 'Get Started', link: '/get_started' }, { text: 'Manual', items: [ - { text: 'Methods', link: '/manual/methods' }, - { text: 'Array Operations', link: '/manual/array_operations' }, { text: 'Data Sources', link: '/manual/data_sources' }, - { text: 'Plots', + { text: 'Methods', + collapsed: true, items: [ - { text: 'Plots.jl', link: '/manual/plotting' }, - { text: 'Makie.jl', link: '/manual/plot_makie' }, + { text: 'Overview', link: '/manual/methods' }, + { text: 'rasterize', link: '/api#Rasters.rasterize' }, + { text: 'extract', link: '/api#Rasters.extract' }, + { text: 'zonal', link: '/api#Rasters.zonal-Tuple{Any,%20Union{AbstractRaster,%20AbstractRasterStack}}'}, + { text: 'aggregate', link: '/api#Rasters.aggregate' }, + { text: 'disaggregate', link: '/api#Rasters.disaggregate' }, + { text: 'cellarea', link: '/manual/cellarea' }, + { text: 'mosaic', link: '/api#Rasters.mosaic-Tuple{Function,%20Union{AbstractRaster,%20AbstractRasterStack},%20Vararg{Union{AbstractRaster,%20AbstractRasterStack}}}' }, + { text: 'crop', link: '/api#Rasters.crop' }, + { text: 'extend', link: '/api#Rasters.extend' }, + { text: 'trim', link: '/api#Rasters.trim-Tuple{Union{AbstractRaster,%20AbstractRasterStack}}' }, + { text: 'resample', link: '/api#Rasters.resample-Tuple' }, + { text: 'warp', link: '/api#Rasters.warp-Tuple' }, + { text: 'classify', link: '/api#Rasters.classify' }, + { text: 'mask', link: '/api#Rasters.mask-Tuple{Any}' }, + { text: 'replace_missing', link: '/api#Rasters.replace_missing-Tuple{Any}' }, + { text: 'modify', link: '/api#DimensionalData.modify-Tuple{Any,%20AbstractRasterSeries}' }, + { text: 'read', link: '/api#Base.read-Tuple{Union{AbstractRaster,%20AbstractRasterSeries,%20AbstractRasterStack}}' }, + { text: 'read!', link: '/api#Base.read!-Tuple{AbstractRaster,%20AbstractArray}' }, + { text: 'open', link: '/api#Base.open-Tuple{Function,%20AbstractRaster}' }, + { text: 'write', link: '/api#Base.write-Tuple{AbstractString,%20AbstractRasterSeries}' }, ] - }, + }, ] }, { text: 'Tutorials', items: [ + { text: 'Plotting', + collapsed: true, + items: [ + { text: 'Plots.jl', link: '/tutorials/plotting' }, + { text: 'Makie.jl', link: '/tutorials/plot_makie' }, + ] + }, + { text: 'Array Operations', link: '/tutorials/array_operations' }, + { text: 'Spatial mean', link: '/tutorials/spatial_mean' }, + { text: 'Reprojection and resampling', link: '/tutorials/resample'}, { text: 'Species Distribution Modelling', link: '/tutorials/gbif_wflow' }, ] }, diff --git a/docs/src/manual/cellarea.md b/docs/src/manual/cellarea.md new file mode 100644 index 000000000..a1a2475d0 --- /dev/null +++ b/docs/src/manual/cellarea.md @@ -0,0 +1,67 @@ +## `cellarea` + +```@meta +CollapsedDocStrings=true +``` + +```@docs; canonical=false +cellarea +``` + +Computing the area of each cell in a raster is useful for a number of reasons - if you have a variable like +population per cell, or elevation ([spatially extensive variables](https://r-spatial.org/book/05-Attributes.html#sec-extensiveintensive)), +you'll want to account for the fact that different cells have different areas. + +Let's construct a raster and see what this looks like! We'll keep it in memory. + +The spherical method relies on the [Proj.jl](https://github.com/JuliaGeo/Proj.jl) package to perform coordinate transformation, so that has to be loaded explicitly. + +````@example cellarea +using Rasters, Proj +```` + +To construct a raster, we'll need to specify the `x` and `y` dimensions. These are called `lookups` in `Rasters.jl.` + +````@example cellarea +using Rasters.Lookups +```` + +We can now construct the x and y lookups. Here we'll use a start-at-one, step-by-five grid. +Note that we're specifying that the "sampling", i.e., what the coordinates actually mean, +is `Intervals(Start())`, meaning that the coordinates are the starting point of each interval. + +This is in contrast to `Points()` sampling, where each index in the raster represents the value at a sampling point; +here, each index represents a grid cell, which is defined by the coordinate being at the start. + +````@example cellarea +x = X(1:5:30; sampling = Intervals(Start()), crs = EPSG(4326)) +y = Y(50:5:80; sampling = Intervals(Start()), crs = EPSG(4326)); +nothing # hide +```` + +I have chosen the y-range here specifically so we can show the difference between spherical and planar `cellarea`. + +````@ansi cellarea +ras = Raster(ones(x, y); crs = EPSG(4326)) +```` + +We can just call `cellarea` on this raster, which returns cell areas in meters, on Earth, assuming it's a sphere: + +````@ansi cellarea +cellarea(ras) +```` + +and if we plot it, you can see the difference in cell area as we go from the equator to the poles: + +````@example cellarea +using CairoMakie +heatmap(cellarea(ras); axis = (; aspect = DataAspect())) +```` + +We can also try this using the planar method, which simply computes the area of the rectangle using `area = x_side_length * y_side_length`: + +````@ansi cellarea +cellarea(Planar(), ras) +```` + +Note that this is of course wildly inaccurate for a geographic dataset - but if you're working in a projected coordinate system, like polar stereographic or Mercator, this can be very useful (and a _lot_ faster)! \ No newline at end of file diff --git a/docs/src/manual/data_sources.md b/docs/src/manual/data_sources.md index 2604d9ca0..93df91881 100644 --- a/docs/src/manual/data_sources.md +++ b/docs/src/manual/data_sources.md @@ -43,10 +43,6 @@ They are always 3 dimensional, and have `Y`, `X` and [`Band`](@ref) dimensions. using Rasters ```` -````@docs -smapseries -```` - ## Writing file formats to disk Files can be written to disk with ArchGDAL.jl and NCDatasets.jl backends using diff --git a/docs/src/manual/array_operations.md b/docs/src/tutorials/array_operations.md similarity index 99% rename from docs/src/manual/array_operations.md rename to docs/src/tutorials/array_operations.md index ded913c65..e639be9a8 100644 --- a/docs/src/manual/array_operations.md +++ b/docs/src/tutorials/array_operations.md @@ -1,3 +1,5 @@ +# Array operations + Most base methods work as for regular julia `Array`s, such as `reverse` and rotations like `rotl90`. Base, statistics and linear algebra methods like `mean` that take a `dims` argument can also use the dimension name. diff --git a/docs/src/tutorials/gbif_wflow.md b/docs/src/tutorials/gbif_wflow.md index 5bd6fcedc..1fe352007 100644 --- a/docs/src/tutorials/gbif_wflow.md +++ b/docs/src/tutorials/gbif_wflow.md @@ -2,7 +2,7 @@ This example shows a full Species distribution modelling workflow, from loading data, to cleaning it, to fitting an ensemble and generating predictions. -It uses GBIF and WorldClim data, which are common datasets in ecology. +It uses GBIF and WorldClim data, which are common datasets in ecology. We'll load occurrences for the Mountain Pygmy Possum species using [GBIF2.jl](https://github.com/rafaqz/GBIF2.jl), an interface to the [Global Biodiversity Information Facility](https://www.gbif.org/), and extract environmental variables using BioClim data from [RasterDataSources.jl](https://github.com/EcoJulia/RasterDataSources.jl). ## Load Rasters, ArchGDAL, RasterDataSources and GBIF The GBIF2 library is used to download occurrence data, RasterDataSources to conveniently access Bioclim data. ArchGDAL is necessary to load in the Bioclim data. diff --git a/docs/src/manual/plot_makie.md b/docs/src/tutorials/plot_makie.md similarity index 100% rename from docs/src/manual/plot_makie.md rename to docs/src/tutorials/plot_makie.md diff --git a/docs/src/manual/plotting.md b/docs/src/tutorials/plotting.md similarity index 100% rename from docs/src/manual/plotting.md rename to docs/src/tutorials/plotting.md diff --git a/docs/src/tutorials/resample.md b/docs/src/tutorials/resample.md new file mode 100644 index 000000000..a7964e80c --- /dev/null +++ b/docs/src/tutorials/resample.md @@ -0,0 +1,291 @@ +```@meta +CollapsedDocStrings=true +``` +# Reprojection and resampling + +### What is resampling? + +**[`resample`](@ref)** "re-samples" the +data by interpolation and can also aggregate or disaggregate, changing the resolution. +It always returns a `Regular` lookup (like a range), and is the most flexible of the +resampling methods. + +This uses GDAL's `gdalwarp` algorithm under the hood. You can call that via [`warp`](@ref) +if you need more control, but generally `resample` is sufficient. + +::: tip warp, contributions are welcome! + +- Show how to use `warp` to reproject a raster + +::: + +Rasters.jl has a few other methods to change the lookups of a raster. These are: +- [`reproject`](@ref), which directly reprojects the lookup axes + (but is **only usable for specific cases**, where the source and destination + coordinate systems are both cylindrical, like the long-lat, Mercator, or Web-Mercator projections.) + + This is a lossless operation and keeps the data exactly the same - only the axes are changed. + +- [`aggregate`](@ref) and [`disaggregate`](@ref), which change the resolution of + the raster by merging ([`aggregate`](@ref)) or splitting ([`disaggregate`](@ref)) cells. + + They can't change cells fractionally, and can't change the projection or coordinate system. + +Of all these methods, **`resample`** is the most flexible and powerful, and is what we will focus on here. +It is, however, also the slowest. So if another method is applicable to your problem, you should consider it. + +### How `resample` works + +`resample` uses GDAL's `gdalwarp` algorithm under the hood. This is a battle-tested algorithm +and is generally pretty robust. However, it has the following limitations: +- It always assumes cell-based sampling, instead of point-based sampling. This does mean that + point-based rasters are converted to cell-based sampling. +- It can only accept some primitive types for the input data, since that data is passed directly to a C library. + Things like `RGB` or user-defined types are not usually supported. + +`resample` allows you to specify several methods, see some of them in the next section. + +### `resolution`, `size` and `methods` + +Let's start by loading the necessary packages: + +````@example resample +using Rasters, RasterDataSources, ArchGDAL +using DimensionalData +using DimensionalData.Lookups +using NaNStatistics +using CairoMakie +```` + +````@example resample +ras = Raster(WorldClim{BioClim}, 5) +ras_m = replace_missing(ras, missingval=NaN) +```` + +resampling to a given `size` or `res ≡ resolution` providing a `method` is done with + +:::tabs + +== size + +````@ansi resample +ras_sample = resample(ras_m; size=(1440, 720), method="average") +```` + +== resolution + +````@ansi resample +ras_sample = resample(ras_m; res=1.0, method="average") +```` + +::: + +other available methods to try: `"mode"`, `"max"`, `"sum"`, `"bilinear"`, `"cubic"`, `"cubicspline"`, `"lanczos"`, `"min"`, `"med"`, `"q1"`, `"q3"` and `"near"`. + +Let's consider a few more examples, with the following options: + +````@example resample +methods = ["average", "mode", "max", "sum"] +sizes = [(2160, 1080), (1440, 720), (720, 360), (360, 180)] +resolutions = [0.16666666666666666, 0.25, 0.5, 1.0]; +nothing # hide +```` + +:::tabs + +== sizes and methods + +````@example resample +method_sizes = [resample(ras_m; size=size, method=method) for method in methods for size in sizes] +with_theme(Rasters.theme_rasters()) do + colorrange = (nanminimum(ras_m), nanmaximum(ras_m)) + hm=nothing + fig = Figure(; size = (1000, 600)) + axs = [Axis(fig[i,j], title="size=$(size), method=:$(method)", titlefont=:regular) + for (i, method) in enumerate(methods) for (j, size) in enumerate(sizes)] + for (i, ax) in enumerate(axs) + hm = heatmap!(ax, method_sizes[i]; colorrange) + end + Colorbar(fig[:,end+1], hm) + hidedecorations!.(axs; grid=false) + rowgap!(fig.layout, 5) + colgap!(fig.layout, 10) + fig +end +```` + +== resolutions and methods + +````@example resample +method_res = [resample(ras_m; res=res, method=method) for method in methods for res in resolutions] +with_theme(Rasters.theme_rasters()) do + colorrange = (nanminimum(ras_m), nanmaximum(ras_m)) + hm=nothing + fig = Figure(; size = (1000, 600)) + axs = [Axis(fig[i,j], title="res=$(round(res, digits=4)), method=:$(method)", titlefont=:regular) + for (i, method) in enumerate(methods) for (j, res) in enumerate(resolutions)] + for (i, ax) in enumerate(axs) + hm = heatmap!(ax, method_res[i]; colorrange) + end + Colorbar(fig[:,end+1], hm) + hidedecorations!.(axs; grid=false) + rowgap!(fig.layout, 5) + colgap!(fig.layout, 10) + fig +end +```` + +::: + + +## `reproject` with `resample` using a `ProjString` + +Geospatial datasets will come in different [projections](https://proj.org/en/9.4/operations/projections/index.html) or coordinate reference systems (CRS) for many reasons. Here, we will focus on `MODIS SINUSOIDAL` and `EPSG`, and transformations between them. + +Let's load our test raster + +````@example resample +ras = Raster(WorldClim{BioClim}, 5) +ras_m = replace_missing(ras, missingval=NaN); +nothing # hide +```` + +### Sinusoidal Projection (MODIS) + +````@example resample +SINUSOIDAL_CRS = ProjString("+proj=sinu +lon_0=0 +type=crs") +```` + +::: details Raw MODIS ProjString + +````julia +SINUSOIDAL_CRS = ProjString("+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +a=6371007.181 +b=6371007.181 +units=m +no_defs") +```` + +::: + + +and the `resample` is performed with + +````@example resample +ras_sin = resample(ras_m; size=(2160, 1080), crs=SINUSOIDAL_CRS, method="average") +```` + +::: tip + +`GDAL` always changes the locus to cell sampling, you can reset this by using `shiftlocus`. + +::: + +let's compare the total counts! + +````@example resample +nansum(ras_m), nansum(ras_sin) +```` + +and, how does this looks like? + +````@example resample +fig, ax, plt = heatmap(ras_sin) +Colorbar(fig[1,2], plt) +fig +```` + +now, let's go back to `latitude` and `longitude` and reduce the resolution + +````@example resample +ras_epsg = resample(ras_sin; size=(1440,720), crs=EPSG(4326), method="average") +```` + +and let's apply `shiftlocus` such that the lookups share the exact same grid, which might be needed when building bigger datasets: + +````@example resample +locus_resampled = DimensionalData.shiftlocus(Center(), ras_epsg) +```` + +::: info Things to keep in mind + + - You can in fact resample to another raster `resample(ras; to=ref_ras)`, if you want perfect alignment. Contributions are welcome for this use case! + - This doesn't work for irregularly sampled rasters. + +::: + + +````@example resample +fig, ax, plt = heatmap(ras_epsg) +Colorbar(fig[1,2], plt) +fig +```` + +### A `Raster` from scratch + +````@example resample +x_range = LinRange(-180, 179.75, 1440) +y_range = LinRange(89.75, -90, 720) +ras_data = ras_epsg.data +nothing # hide +```` + +create the raster + +````@ansi resample +using Rasters.Lookups + +ras_scratch = Raster(ras_data, (X(x_range; sampling=Intervals(Start())), + Y(y_range; sampling=Intervals(Start()))), crs=EPSG(4326), missingval=NaN) + +```` + +::: warning + +Note that you need to specify `sampling=Intervals(Start())` for `X` and `Y`. + +This requires that you run `using Rasters.Lookups`, where the `Intervals` and `Start` types are defined. + +::: + +and take a look + +````@example resample +fig, ax, plt = heatmap(ras_scratch) +Colorbar(fig[1,2], plt) +fig +```` + +and the corresponding resampled projection + +````@ansi resample +ras_sin_s = resample(ras_scratch; size=(1440,720), crs=SINUSOIDAL_CRS, method="average") +```` + +````@example resample +fig, ax, plt = heatmap(ras_sin_s) +Colorbar(fig[1,2], plt) +fig +```` + +and go back from `sin` to `epsg`: + +````@example resample +ras_epsg = resample(ras_sin_s; size=(1440,720), crs=EPSG(4326), method="average") +locus_resampled = DimensionalData.shiftlocus(Center(), ras_epsg) + +fig, ax, plt = heatmap(locus_resampled) +Colorbar(fig[1,2], plt) +fig +```` + +and compare the total counts again! + +````@example resample +nansum(ras_sin_s), nansum(locus_resampled) +```` + +::: danger + +Note that all counts are a little bit off. Could we mitigate this some more? + +::: + + + diff --git a/docs/src/tutorials/spatial_mean.md b/docs/src/tutorials/spatial_mean.md new file mode 100644 index 000000000..5c26d9a45 --- /dev/null +++ b/docs/src/tutorials/spatial_mean.md @@ -0,0 +1,143 @@ +# Computing spatial means + +```@meta +CollapsedDocStrings=true +``` + +It's very common to want to compute the mean of some value over some area of a raster. The initial approach is to simply average the values, but this will give you the arithmetic mean, not the spatial mean. + +The reason for this is that raster cells do not always have the same area, especially over a large region of the Earth where its curvature comes into play. + +To compute the spatial mean, you need to weight the values by the area of each cell. You can do this by multiplying the values by the cell area, then summing the values, and dividing that number by the total area. That was the motivation for this example. + +Let's get the rainfall over Chile, and compute the average rainfall across the country for the month of June. + +## Acquiring the data + +We'll get the precipitation data across the globe from [WorldClim](https://www.worldclim.org/data/index.html), via [RasterDataSources.jl](https://github.com/EcoJulia/RasterDataSources.jl), and use the `month` keyword argument to get the June data. + +Then, we can get the geometry of Chile from [NaturalEarth.jl](https://github.com/JuliaGeo/NaturalEarth.jl), and use `Rasters.mask` to get the data just for Chile. + +````@example cellarea +using Rasters +import Proj # to activate the spherical `cellarea` method + +using ArchGDAL, RasterDataSources, NaturalEarth # purely for data loading + +using CairoMakie # for plotting + +precip = Raster(WorldClim{Climate}, :prec; month = 6) +```` + +````@example cellarea +all_countries = naturalearth("admin_0_countries", 10) +chile = all_countries.geometry[findfirst(==("Chile"), all_countries.NAME)] +```` + +Let's plot the precipitation on the world map, and highlight Chile: + +````@example cellarea +f, a, p = heatmap(precip; colorrange = Makie.zscale(replace_missing(precip, NaN)), axis = (; aspect = DataAspect())) +p2 = poly!(a, chile; color = (:red, 0.3), strokecolor = :red, strokewidth = 0.5) +f +```` + +You can see Chile highlighted in red, in the bottom left quadrant. + +## Processing the data + +First, let's make sure that we only have the data that we care about, and crop and mask the raster so it only has values in Chile. +We can crop by the geometry, which really just generates a view into the raster that is bounded by the geometry's bounding box. + +````@example cellarea +cropped_precip = crop(precip; to = chile) +```` + +Now, we mask the data such that any data outside the geometry is set to `missing`. + +````@example cellarea +masked_precip = mask(cropped_precip; with = chile) +heatmap(masked_precip) +```` + +This is a lot of missing data, but that's mainly because the Chile geometry we have encompasses the Easter Islands as well, in the middle of the Pacific. + + +```@docs; canonical=false +cellarea +``` + +`cellarea` computes the area of each cell in a raster. +This is useful for a number of reasons - if you have a variable like +population per cell, or elevation ([spatially extensive variables](https://r-spatial.org/book/05-Attributes.html#sec-extensiveintensive)), +you'll want to account for the fact that different cells have different areas. + +You can specify whether you want to compute the area in the plane of your projection +(`Planar()`), or on a sphere of some radius (`Spherical(; radius=...)`). + +Now, let's compute the average precipitation per square meter across Chile. +First, we need to get the area of each cell in square meters. We'll use the spherical method, since we're working with a geographic coordinate system. This is the default. + +````@example cellarea +areas = cellarea(masked_precip) +masked_areas = mask(areas; with = chile) +heatmap(masked_areas; axis = (; title = "Cell area in square meters")) +```` + +You can see here that cells are largest towards the equator, and smallest away from it. This means that cells away from the equator should have a smaller contribution to the average than cells nearer the equator. + +## Computing the spatial mean + +Now we can compute the average precipitation per square meter. First, we compute total precipitation per grid cell: + +````@example cellarea +precip_per_area = masked_precip .* masked_areas +```` + +We can sum this to get the total precipitation per square meter across Chile: + +````@example cellarea +total_precip = sum(skipmissing(precip_per_area)) +```` + +We can also sum the areas to get the total area of Chile (in this raster, at least). + +````@example cellarea +total_area = sum(skipmissing(masked_areas)) +```` + +And we can convert that to an average by dividing by the total area: + +````@example cellarea +avg_precip = total_precip / total_area +```` + +According to the internet, Chile gets about 100mm of rain per square meter in June, so our statistic seems pretty close. + +Let's see what happens if we don't account for cell areas. An equivalent assumption would be that all cells have the same area. + +````@example cellarea +bad_total_precip = sum(skipmissing(masked_precip)) +bad_avg_precip = bad_total_precip / length(collect(skipmissing(masked_precip))) +```` + +This is misestimated! This is why it's important to account for cell areas when computing averages. + +!!! note + If you made it this far, congratulations! + + It's interesting to note that we've replicated the workflow of `zonal` here. + `zonal` is a more general function that can be used to compute any function over geometries, + and it has multithreading built in. + + But fundamentally, this is all that `zonal` is doing under the hood - + masking and cropping the raster to the geometry, and then computing the statistic. + +## Summary + +In this tutorial, we've seen how to compute the spatial mean of a raster, and how to account for the fact that raster cells do not always have the same area. + +We've also seen how to use the `cellarea` function to compute the area of each cell in a raster, and how to use the `mask` function to get the data within a geometry. + +We've seen that the spatial mean is not the same as the arithmetic mean, and that we need to account for the area of each cell when computing the average. + diff --git a/src/extensions.jl b/src/extensions.jl index de579bdec..30f31fdd1 100644 --- a/src/extensions.jl +++ b/src/extensions.jl @@ -162,14 +162,19 @@ warp(args...; kw...) = throw_extension_error(warp, "ArchGDAL", :RastersArchGDALE cellarea([method], x) Gives the approximate area of each gridcell of `x`. -By assuming the earth is a sphere, it approximates the true size to about 0.1%, depending on latitude. +By assuming the earth is a sphere, it approximates the true size to about 0.1%, depending on latitude. -Run `using ArchGDAL` to make this method fully available. +Run `using ArchGDAL` or `using Proj` to make this method fully available. + +- `method`: You can specify whether you want to compute the area in the plane of your projection `Planar()` or on a sphere of some radius `Spherical(; radius=...)`(the default). -`method` can be `Spherical(; radius)` (the default) or `Planar()`. - `Spherical` will compute cell area on the sphere, by transforming all points back to long-lat. You can specify the radius by the `radius` keyword argument here. By default, this is `6371008.8`, the mean radius of the Earth. + - `Planar` will compute cell area in the plane of the CRS you have chosen. Be warned that this will likely be incorrect for non-equal-area projections. +Returns a Raster with the same x and y dimensions as the input, +where each value in the raster encodes the area of the cell (in meters by default). + ## Example ```julia @@ -298,4 +303,4 @@ struct AffineProjected{T,F,A<:AbstractVector{T},M,C,MC,P,D} <: LA.Unaligned{T,1} mappedcrs::MC paired_lookup::P dim::D -end +end \ No newline at end of file diff --git a/src/methods/reproject.jl b/src/methods/reproject.jl index 7ce5aef46..e18910bb6 100644 --- a/src/methods/reproject.jl +++ b/src/methods/reproject.jl @@ -79,4 +79,4 @@ end # Guess the step for arrays stepof(A::AbstractArray) = (last(A) - first(A)) / (length(A) - 1) -stepof(A::AbstractRange) = step(A) +stepof(A::AbstractRange) = step(A) \ No newline at end of file