diff --git a/docs/src/PALEOmodelOutput.md b/docs/src/PALEOmodelOutput.md index 3bc395a..2176487 100644 --- a/docs/src/PALEOmodelOutput.md +++ b/docs/src/PALEOmodelOutput.md @@ -81,6 +81,8 @@ CurrentModule = PALEOmodel ```@docs FieldArray +FieldArray(fr::FieldRecord) +select(fa::FieldArray) get_array(fr::FieldRecord) ``` diff --git a/src/CoordsDims.jl b/src/CoordsDims.jl index f555f7e..d52a18f 100644 --- a/src/CoordsDims.jl +++ b/src/CoordsDims.jl @@ -1,14 +1,27 @@ - - - ################################################################## # Coordinate filtering and selection ################################################################ +# test whether c::FieldArray is a valid coordinate for dimension nd +# ie c.dims_coords contains dimension nd +function is_coordinate(c::FieldArray, nd::PB.NamedDimension) + c_has_nd = false + for (c_nd, _) in c.dims_coords + if c_nd == nd + c_has_nd = true + end + end + + return c_has_nd +end + +is_boundary_coordinate(c::FieldArray) = + length(c.dims_coords) > 1 && c.dims_coords[1][1] == PB.NamedDimension("bnds", 2) + "find indices of coord from first before range[1] to first after range[2]" -function find_indices(coord_values::AbstractVector, range) +function _find_indices(coord_values::AbstractVector, range) length(range) == 2 || - throw(ArgumentError("find_indices: length(range) != 2 $range")) + throw(ArgumentError("_find_indices: length(range) != 2 $range")) idxstart = findlast(t -> t<=range[1], coord_values) isnothing(idxstart) && (idxstart = 1) @@ -20,7 +33,7 @@ function find_indices(coord_values::AbstractVector, range) end "find indices of coord nearest val" -function find_indices(coord_values::AbstractVector, val::Real) +function _find_indices(coord_values::AbstractVector, val::Real) idx = 1 for i in 1:length(coord_values) if abs(coord_values[i] - val) < abs(coord_values[idx] - val) @@ -32,74 +45,85 @@ function find_indices(coord_values::AbstractVector, val::Real) end """ - dimscoord_subset(dim::PB.NamedDimension, coords::Vector{FieldArray}, select_dimvals::AbstractString, select_filter) - -> cidx, dim_subset, coords_subset, coords_used + _dim_subset(dim::PB.NamedDimension, coords::Vector{FieldArray}, select_keyname::AbstractString, select_filter) + -> indices_subset, dim_subset, coords_used -Filter dimension `dim` according to key `select_dimvals` and `select_filter` (typically a single value or a range) +Filter indices from dimension `dim` according to key `select_keyname` and `select_filter` (typically a single value or a range) Filtering may be applied either to dimension indices from `dim`, or to coordinates from `coords`: -- If `select_dimvals` is of form "_isel", use dimension indices and return `coords_used=nothing` +- If `select_keyname` is of form "_isel", use dimension indices and return `coords_used=nothing` - Otherwise use coordinate values and return actual coordinate values used in `coords_used` -`cidx` are the filtered indices to use: -- if `cidx` is a scalar (an Int), `dim_subset=nothing` and `coords_subset=nothing` indicating this dimension should be squeezed out -- otherwise `cidx` is a Vector and `dim_subset` and `coords_subset` are the filtered subset of `dim` and `coords` +`indices_subset` are the filtered indices to use: +- if `indices_subset` is a scalar (an Int), then `dim_subset=nothing` indicating this dimension should be squeezed out +- otherwise `indices_subset` is a Vector and `dim_subset::PB.NamedDimension` has `dim_subset.size` corresponding to `indices_subset` used from `dim` """ -function dimscoord_subset(dim::PB.NamedDimension, coords::Vector{FieldArray}, select_dimvals::AbstractString, select_filter) - if length(select_dimvals) > 5 && select_dimvals[end-4:end] == "_isel" - @assert select_dimvals[1:end-5] == dim.name - cidx = select_filter +function _dim_subset(dim::PB.NamedDimension, coords::Vector{FieldArray}, select_keyname::AbstractString, select_filter) + if length(select_keyname) > 5 && select_keyname[end-4:end] == "_isel" + @assert select_keyname[1:end-5] == dim.name + indices_subset = select_filter coords_used=nothing else - # find cidx corresponding to a coordinate + # find indices_subset corresponding to a coordinate # find coordinate to use in coords - ccidx = findfirst(c -> c.name == select_dimvals, coords) + ccidx = findfirst(c -> c.name == select_keyname, coords) @assert !isnothing(ccidx) cc = coords[ccidx] - cidx, cvalue = find_indices(cc.values, select_filter) + indices_subset, cvalue = _find_indices(cc.values, select_filter) # reset to the value actually used coords_used = cvalue end - if cidx isa AbstractVector - dim_subset = PB.NamedDimension(dim.name, length(cidx)) - coords_subset = FieldArray[] - for c in coords - is_coordinate(c, dim) || - error("dimension $dim invalid coordinate $c") - if is_boundary_coordinate(c) - # c has 2 dimensions, (bounds, dim.name) - @assert c.dims_coords[2][1] == dim - coords_subset_dims = [ - c.dims_coords[1], # bounds - dim_subset => FieldArray[] - ] - cs = FieldArray(c.name, c.values[:, cidx], coords_subset_dims, c.attributes) - else - # c has 1 dimension == dim - @assert c.dims_coords[1][1] == dim - cs = FieldArray(c.name, c.values[cidx], [dim_subset => FieldArray[]], c.attributes) - end - push!(coords_subset, cs) - end + if indices_subset isa AbstractVector + dim_subset = PB.NamedDimension(dim.name, length(indices_subset)) else # squeeze out dimensions dim_subset = nothing - coords_subset = nothing end - return cidx, dim_subset, coords_subset, coords_used + return indices_subset, dim_subset, coords_used end -# test whether c::FieldArray is a valid coordinate for dimension nd -function is_coordinate(c::FieldArray, nd::PB.NamedDimension) - if length(c.dims_coords) == 1 && c.dims_coords[1][1] == nd - return true - elseif is_boundary_coordinate(c) && c.dims_coords[2][1] == nd - return true +# Select region from coord::FieldArray, given selectdimindices, a Dict(dimname=>dimindices, ...) of +# indices to keep for each dimension that should be filtered. +# NB: +# - intended for use with a `coord::FieldArray` without attached coordinates, any attached coordinates are just dropped +# - any dimensions with single index in selectdimindices are squeezed out +# - any dimension not present in selectdimindices has no filter applied +function _select_dims_indices(coord::FieldArray, selectdimindices::Dict{String, Any}) + dims = PB.get_dimensions(coord) + + select_indices = [] + select_dims = PB.NamedDimension[] + for nd in dims + if haskey(selectdimindices, nd.name) + dimindices = selectdimindices[nd.name] + push!(select_indices, dimindices) + if length(dimindices) > 1 + # subset + push!(select_dims, PB.NamedDimension(nd.name, length(dimindices))) + else + # squeeze out + end + else + # retain full dimension + push!(select_indices, 1:nd.size) + push!(select_dims, nd) + end + end + + select_dims_nocoords = Pair{PB.NamedDimension, Vector{FieldArray}}[ + d => FieldArray[] for d in select_dims + ] + + avalues = Array{eltype(coord.values), length(select_dims)}(undef, [nd.size for nd in select_dims]...) + + if isempty(select_dims) # 0D array + avalues[] = coord.values[select_indices...] + else + avalues[fill(Colon(), length(select_dims))...] .= @view coord.values[select_indices...] end - return false + + return FieldArray(coord.name, avalues, select_dims_nocoords, copy(coord.attributes)) end -is_boundary_coordinate(c::FieldArray) = - length(c.dims_coords) == 2 && c.dims_coords[1][1] == PB.NamedDimension("bnds", 2) \ No newline at end of file diff --git a/src/FieldArray.jl b/src/FieldArray.jl index 8238b5b..b43de99 100644 --- a/src/FieldArray.jl +++ b/src/FieldArray.jl @@ -11,19 +11,32 @@ not for numerically-intensive calculations. # Fields $(TYPEDFIELDS) """ -struct FieldArray{T <: AbstractArray} +struct FieldArray "variable name" name::String "n-dimensional Array of values" - values::T + values::AbstractArray # an abstract type to minimise compilation latency "Names of dimensions with optional attached coordinates" dims_coords::Vector{Pair{PB.NamedDimension, Vector{FieldArray}}} "variable attributes" attributes::Union{Dict{Symbol, Any}, Nothing} end +Base.copy(fa::FieldArray) = FieldArray(fa.name, copy(fa.values), copy(fa.dims_coords), isnothing(fa.attributes) ? nothing : copy(fa.attributes)) -PB.get_dimensions(f::FieldArray) = PB.NamedDimension[first(dc) for dc in f.dims_coords] +PB.get_dimensions(fa::FieldArray) = PB.NamedDimension[first(dc) for dc in fa.dims_coords] + +function PB.get_dimension(fa::FieldArray, dimname::AbstractString) + dimidx = findfirst(dc -> dc[1].name == dimname, fa.dims_coords) + !isnothing(dimidx) || error("FieldArray $(fa.name) has no dimension $dimname") + return fa.dims_coords[dimidx][1] +end + +function PB.get_coordinates(fa::FieldArray, dimname::AbstractString) + dimidx = findfirst(dc -> dc[1].name == dimname, fa.dims_coords) + !isnothing(dimidx) || error("FieldArray $(fa.name) has no dimension $dimname") + return fa.dims_coords[dimidx][2] +end function Base.show(io::IO, fa::FieldArray) get(io, :typeinfo, nothing) === FieldArray || print(io, "FieldArray") @@ -58,9 +71,9 @@ end Base.:*(a::Real, fa_in::FieldArray) = fa_in*a # default name from attributes -default_varnamefull(attributes::Nothing) = "" +_fieldarray_default_varnamefull(attributes::Nothing) = "" -function default_varnamefull(attributes::Dict; include_selectargs=false) +function _fieldarray_default_varnamefull(attributes::Dict; include_selectargs=false) name = get(attributes, :domain_name, "") name *= isempty(name) ? "" : "." name *= get(attributes, :var_name, "") @@ -87,3 +100,269 @@ Get FieldArray from PALEO object `obj` """ function get_array end +################################################################ +# Select regions +############################################################## + +""" + select(fa::FieldArray [, allselectargs::NamedTuple] ; kwargs...) -> fa_out::FieldArray + +Select a region of a [`FieldArray`](@ref) defined by `allselectargs`. + +# Selecting records and regions +`allselectargs` is a `NamedTuple` of form: + + ( = , = , ... [, squeeze_all_single_dims=true]) + +where `` is of form: +- `_isel` to select by array indices: `` may then be a single `Int` to select a single index, or a range `first:last` + to select a range of indices. +- `` to select by coordinate values using the coordinates attached to each dimension: `` may then be a single number + to select a single index corresponding to the nearest value of the corresponding coordinate, or `(first::Float64, last::Float64)` + (a Tuple) to select a range starting at the index with the nearest value of `fr.coords_record` before `first` and ending at + the nearest index after `last`. + +Available dimensions and coordinates `` depend on the FieldArray dimensions (as returned by `get_dimensions`, which will be a subset +of grid spatial dimensions and Domain data dimensions) and corresponding attached coordinates (as returned by `get_coordinates`). + +Some synonyms are defined for commonly used ``, these may require optional `record_dim_name` and `mesh` to be supplied in `kwargs`: + +|synonyms | dimcoordname | comment | +|:------------| :---------------------- |:--------------------------------------------------------------------------------| +| records | <`record_dim_name`>_isel | requires `record_dim_name` to be suppliied, usually tmodel | +| cells, cell | cells_isel | substituting named cells requires `mesh` to be supplied | +| column= | cells_isel = first:last | requires `mesh` to be supplied, select range of cells corresponding to column n | + + +NB: Dimensions corresponding to a selection for a single index or coordinate value are always squeezed out from the returned [`FieldArray`](@ref). +Optional argument `squeeze_all_single_dims` (default `true`) controls whether *all* output dimensions that contain a single index are +squeezed out (including eg a selection for a range that results in a dimension with one index, or where the input `FieldArray` contains a dimension +with a single index). + +Selection arguments used are optionally returned as strings in `fa_out.attributes[:filter_records]` +and `fa_out.attributes[:filter_region]` for use in plot labelling etc. + +# Keyword arguments +- `record_dim_name=nothing`: optionally supply name of record dimension as a String, to substitute for `` `records` + and to define dimension to use to populate `filter_records` attribute. +- `mesh=nothing`: optionally supply a grid from `PALEOboxes.Grids`, to use for substituting cell names, looking up column indices. +- `add_attributes=true`: `true` to transfer attributes from input `fr::FieldRecord` to output `FieldArray` + adding `:filter_region` and `:filter_records`, `false` to omit. +- `update_name=true`: `true` to update output `FieldArray` name to add Domain name prefix, and a suffix generated from `allselectargs` + (NB: requires `add_attributes=true`), `false` to use name from input FieldRecord. + +# Limitations +- it is only possible to select either a single slice or a contiguous range for each dimension, not a set of slices for a Vector of index + or coordinate values. +""" +function select( + @nospecialize(fa::FieldArray), @nospecialize(allselectargs::NamedTuple)=NamedTuple(); # creates a method ambiguity with deprecated form above + mesh=PB.AbstractMeshOrNothing=nothing, # for substitution of cell names etc + add_attributes=true, + record_dim_name::Union{Nothing, AbstractString}=nothing, # for reporting selection filters used + update_name=false, + verbose::Bool=false, +) + faname = fa.name + fa_out = nothing + + try + verbose && println("select (begin): $faname, allselectargs $allselectargs") + + ########################################################################## + # preprocess allselectargs to strip non-selection arguments and fix quirks + ######################################################################## + allselectargs_sort = [String(k) => v for (k, v) in pairs(allselectargs)] + + # get squeeze_all_single_dims from allselectargs_sort + idx_squeeze_all_single_dims = findfirst(x-> x[1] == "squeeze_all_single_dims", allselectargs_sort) + if isnothing(idx_squeeze_all_single_dims) + squeeze_all_single_dims = true + else + _, squeeze_all_single_dims = popat!(allselectargs_sort, idx_squeeze_all_single_dims) + end + + # quirk: column must precede cell selection, so move to front if present + idx_column = findfirst(x-> x[1] == "column", allselectargs_sort) + if !isnothing(idx_column) + column_select = popat!(allselectargs_sort, idx_column) + pushfirst!(allselectargs_sort, column_select) + end + + selectargs_used = fill(false, length(allselectargs_sort)) + + # vector of filtered dim => coords corresponding to select_indices, nothing if dimension squeezed out + dims_coords = Any[dc for dc in fa.dims_coords] + dims = [nd for (nd, c) in fa.dims_coords] + # indices in full array to use. Start with everything, then apply selections from 'allselectargs' + select_indices = Any[1:(nd.size) for nd in dims] + + selectargs_records = OrderedCollections.OrderedDict() # keep track of selection used, to provide as attributes in FieldArray + selectargs_region = OrderedCollections.OrderedDict() # keep track of selection used, to provide as attributes in FieldArray + idx_record_dim = findfirst(nd->nd.name == record_dim_name, dims) + selectargs_applied = [i == idx_record_dim ? selectargs_records : selectargs_region for i in 1:length(dims)] + + _filter_dims_coords!( + select_indices, dims_coords, selectargs_applied, + allselectargs_sort, selectargs_used; + mesh, record_dim_name, + ) + + unused_selectargs = [a for (a, u) in zip(allselectargs_sort, selectargs_used) if !u] + isempty(unused_selectargs) || + error( + "allselectargs contains select filter(s) ", unused_selectargs, " that do not match any dimensions or coordinates !\n", + "allselectargs_sort: ", allselectargs_sort, "\n", + "selectargs_used: ", selectargs_used + ) + + ############################################################## + # filter coordinates + ################################################################ + + alldimindices = Dict{String, Any}(nd.name => indices for (nd, indices) in zip(dims, select_indices)) + for id in 1:length(dims_coords) + if !isnothing(dims_coords[id]) + nd, unfiltered_coords = dims_coords[id] + filtered_coords = FieldArray[_select_dims_indices(fc, alldimindices) for fc in unfiltered_coords] + dims_coords[id] = nd => filtered_coords + end + end + + ############################################# + # squeeze out dimensions and coordinates + ############################################# + + # selections that produce a dimension with a single index are already squeezed out, + # but not dimensions that started with a single index and haven't had a selection applied + if squeeze_all_single_dims + for i in eachindex(dims_coords, select_indices) + if !isnothing(dims_coords[i]) && dims_coords[i][1].size == 1 + verbose && println("get_array: $faname squeezing out dimension $(dims_coords[i][1].name)") + @assert !isa(select_indices[i], Number) + @assert length(select_indices[i]) == 1 + dims_coords[i] = nothing + select_indices[i] = first(select_indices[i]) + end + end + end + + dims_coords_sq = Pair{PB.NamedDimension, Vector{FieldArray}}[dc for dc in dims_coords if !isnothing(dc)] + dims_sq = [d for (d, c) in dims_coords_sq] + + # squeeze out single dimensions by filtering out scalars in select_indices + # NB: lhs (assignment to output array) is contiguous ! + select_indices_sq = [1:length(si) for si in select_indices if si isa AbstractVector] + # consistency check between dimensions and indices of output array + @assert length(dims_sq) == length(select_indices_sq) + for (d_sq, si_sq) in zip(dims_sq, select_indices_sq) + @assert d_sq.size == length(si_sq) + end + + ############################################### + # create output FieldArray + ############################################### + + # create values array + avalues = Array{eltype(fa.values), length(dims_sq)}(undef, [nd.size for nd in dims_sq]...) + + if isempty(select_indices_sq) # 0D array + avalues[] = fa.values[select_indices...] + else + # TODO will need to define similar() for fa.values to do anything other than @view + avalues[fill(Colon(), length(dims_sq))...] .= @view fa.values[select_indices...] + end + + if add_attributes + # add attributes for selection used + attributes = copy(fa.attributes) + if !isempty(selectargs_records) + attributes[:filter_records] = NamedTuple(Symbol(k)=>v for (k, v) in selectargs_records) + end + if !isempty(selectargs_region) + attributes[:filter_region] = NamedTuple(Symbol(k)=>v for (k, v) in selectargs_region) + end + if update_name # Generate name from attributes, including selection suffixes + name = _fieldarray_default_varnamefull(attributes; include_selectargs=true) + else + name = fa.name + end + else + attributes = nothing + name = fa.name + end + + verbose && println("select (end): $faname -> $name, allselectargs $allselectargs") + + fa_out = FieldArray( + name, + avalues, + dims_coords_sq, + attributes, + ) + catch + @error "select exception: $faname, allselectargs $allselectargs" + rethrow() + end + + return fa_out +end + + + +# Apply filters in allselectargs, setting selectargs_used true when filter used. +# Update dimension indices to use in select_indices, filtered dimension (but unfiltered coords) in dims_coords, +# and add corresponding filter to selectargs_applied. +function _filter_dims_coords!( + select_indices::Vector, dims_coords::Vector, selectargs_applied::Vector{<:AbstractDict}, + allselectargs_sort::Vector, selectargs_used::Vector{Bool}; + mesh, record_dim_name, +) + + for k in eachindex(allselectargs_sort, selectargs_used) + select_dimcoordname_orig, select_filter_orig = allselectargs_sort[k] + select_dimcoordname, select_filter = select_dimcoordname_orig, select_filter_orig + + # name substitutions + if select_dimcoordname == "records" && !isnothing(record_dim_name) + select_dimcoordname = record_dim_name*"_isel" + end + if select_dimcoordname in ("cell", "cells") + select_dimcoordname = "cells_isel" + end + if select_dimcoordname == "cells_isel" && !isnothing(mesh) + select_filter = PB.Grids.substitute_cell_names(mesh, select_filter) + end + if select_dimcoordname == "column" && !isnothing(mesh) + select_filter = PB.Grids.column_indices(mesh, select_filter) + select_dimcoordname = "cells_isel" + end + + # try each dimension in turn to see if select_dimcoordname applies to either the dimension or attached coordinates + for dimidx in eachindex(dims_coords, select_indices, selectargs_applied) + if !isnothing(dims_coords[dimidx]) # dimension may have been squeezed out + dim, coords = dims_coords[dimidx] + if select_dimcoordname == dim.name*"_isel" || !isnothing(findfirst(c->c.name==select_dimcoordname, coords)) + !selectargs_used[k] || + error("select $select_dimcoordname_orig => $select_filter_orig matches multiple dimensions or coordinates !") + sidx, dim_subset, coords_filter_used = _dim_subset(dim, coords, select_dimcoordname, select_filter) + select_indices[dimidx] = select_indices[dimidx][sidx] # sidx is relative to current select_indices[dimidx] + if isnothing(dim_subset) + # squeeze out this dimension + dims_coords[dimidx] = nothing + else + dims_coords[dimidx] = dim_subset => coords # no select on coords !! (this is applied later) + end + + # keep track of selection used, to provide as attributes in FieldArray + select_filter_used = isnothing(coords_filter_used) ? select_filter_orig : coords_filter_used + selectargs_applied[dimidx][select_dimcoordname_orig] = select_filter_used + + selectargs_used[k] = true + end + end + end + end + + return nothing +end diff --git a/src/FieldRecord.jl b/src/FieldRecord.jl index 1ce3512..6bcb35c 100644 --- a/src/FieldRecord.jl +++ b/src/FieldRecord.jl @@ -1,19 +1,30 @@ -import Infiltrator + """ - FieldRecord{FieldData <: AbstractData, Space <: AbstractSpace, V, N, Mesh, R} + FieldRecord{FieldData <: AbstractData, Space <: AbstractSpace, V, N, Mesh <: AbstractMeshOrNothing, R} FieldRecord(dataset, f::PB.Field, attributes; [sizehint=nothing]) -A series of `records::R` each containing the `values` from a `PALEOboxes.Field{FieldData, Space, N, V, Mesh}`. +A series of `records::R` each containing values from a `PALEOboxes.Field{FieldData, Space, N, V, Mesh}`. + +Access stored values: +- As a [`FieldArray`](@ref), using [`FieldArray(fr::FieldRecord)`](@ref) or [`get_array`](@ref). +- For scalar data only, as a Vector using `PALEOboxes.get_data`. # Implementation -Fields with array `values` are stored in `records` as a Vector of arrays. -Fields with single `values` (`field_single_element` true) are stored as a Vector of `eltype(Field.values)`. +Storage in `records::R` is an internal format that may have: +- An optimisation to store Fields with single `values` as a Vector of `eltype(Field.values)`, + cf Fields with array `values` are stored in `records` as a Vector of arrays. +- A spatial cartesian grid stored as a linear Vector (eg `mesh` isa `PALEOboxes.Grids.CartesianLinearGrid`) """ -struct FieldRecord{FieldData <: PB.AbstractData, Space <: PB.AbstractSpace, V, N, Mesh, R} +struct FieldRecord{FieldData <: PB.AbstractData, Space <: PB.AbstractSpace, V, N, Mesh <: PB.AbstractMeshOrNothing, R} + "parent dataset containing this FieldRecord" dataset + "series of records in internal format" records::Vector{R} + "data dimensions of stored records" data_dims::NTuple{N, PB.NamedDimension} + "grid defining spatial dimensions of stored records" mesh::Mesh + "variable attributes" attributes::Dict{Symbol, Any} end @@ -32,7 +43,7 @@ function FieldRecord( dataset, f::PB.Field{FieldData, Space, V, N, Mesh}, attributes; sizehint::Union{Nothing, Int}=nothing ) where {FieldData, Space, V, N, Mesh} - if field_single_element(f) + if _field_single_element(f) # if Field contains single elements, store as a Vector of elements records = Vector{eltype(f.values)}() else @@ -52,29 +63,27 @@ function FieldRecord( end -# create a new FieldRecord, containing supplied `existing_values::Vector` data arrays +# create a new FieldRecord, containing supplied `existing_records::Vector` data arrays function FieldRecord( dataset, - existing_values::Vector, + existing_records::Vector, FieldData::Type, data_dims::NTuple{N, PB.NamedDimension}, Space::Type{<:PB.AbstractSpace}, mesh::Mesh, attributes; ) where {N, Mesh} - # check_values( - # existing_values, FieldData, data_dims, data_type, Space, spatial_size(space, mesh), - # ) - if field_single_element(FieldData, N, Space, Mesh) - # assume existing_values is a Vector, with each element to be stored in Field values::V as a length 1 Vector - V = Vector{eltype(existing_values)} + + if _field_single_element(FieldData, N, Space, Mesh) + # assume existing_records is a Vector, with each element to be stored in Field values::V as a length 1 Vector + V = Vector{eltype(existing_records)} else - # assume existing_values is a Vector of Field values::V - V = eltype(existing_values) + # assume existing_records is a Vector of Field values::V + V = eltype(existing_records) end - return FieldRecord{FieldData, Space, V, N, typeof(mesh), eltype(existing_values)}( - dataset, existing_values, data_dims, mesh, attributes, + return FieldRecord{FieldData, Space, V, N, typeof(mesh), eltype(existing_records)}( + dataset, existing_records, data_dims, mesh, attributes, ) end @@ -159,21 +168,20 @@ function Base.show(io::IO, ::MIME"text/plain", @nospecialize(fr::FieldRecord)) end """ - field_single_element(fr::FieldRecord)::Bool - field_single_element(f::Field)::Bool - field_single_element(::Type{FieldRecord})::Bool - field_single_element(::Type{Field})::Bool - + _field_single_element(fr::FieldRecord)::Bool + _field_single_element(f::Field)::Bool + _field_single_element(FieldData, N, Space, Mesh) + Test whether FieldRecord contains Fields with single elements stored as a Vector instead of a Vector of records. -- `field_single_element == false`: Fields contain array `values`, these are stored in FieldRecord `records` as a Vector of arrays. -- `field_single_element == true` Fields contain a single value, stored in FieldRecord `records` as Vector of `eltype(Field.values)`. +- `_field_single_element == false`: Fields contain array `values`, these are stored in FieldRecord `records` as a Vector of arrays. +- `_field_single_element == true` Fields contain a single value, stored in FieldRecord `records` as Vector of `eltype(Field.values)`. NB: this works on Types, and will return false for a field with Space == CellSpace with ncells=1, even though this actually contains a single value. TODO might be clearer to directly use PB.internal_size(Space, mesh) == (1,) which would also handle the CellSpace with ncells=1 case, but this wouldn't work in the type domain (needs an instance of mesh::Mesh) """ -function field_single_element(::Type{FieldData}, N, ::Type{Space}, ::Type{Mesh}) where {FieldData <: PB.AbstractData, Space <: PB.AbstractSpace, Mesh} +function _field_single_element(::Type{FieldData}, N, ::Type{Space}, ::Type{Mesh}) where {FieldData <: PB.AbstractData, Space <: PB.AbstractSpace, Mesh} if PB.field_single_element(FieldData, N) && (Space == PB.ScalarSpace || (Space == PB.CellSpace && Mesh == Nothing)) return true else @@ -181,13 +189,14 @@ function field_single_element(::Type{FieldData}, N, ::Type{Space}, ::Type{Mesh}) end end -field_single_element(::Type{PB.Field{FieldData, Space, V, N, Mesh}}) where {FieldData, Space, V, N, Mesh} = field_single_element(FieldData, N, Space, Mesh) -field_single_element(::Type{FR}) where {FR <: FieldRecord} = field_single_element(eltype(FR)) -field_single_element(f::T) where {T} = field_single_element(T) +_field_single_element(f::PB.Field{FieldData, Space, V, N, Mesh}) where {FieldData, Space, V, N, Mesh} = + _field_single_element(FieldData, N, Space, Mesh) +_field_single_element(fr::FieldRecord{FieldData, Space, V, N, Mesh, R}) where {FieldData, Space, V, N, Mesh, R} = + _field_single_element(FieldData, N, Space, Mesh) function Base.push!(fr::FieldRecord{FieldData, Space, V, N, Mesh, R}, f::PB.Field{FieldData, Space, V, N, Mesh}) where {FieldData, Space, V, N, Mesh, R} - if field_single_element(fr) + if _field_single_element(fr) # if Field contains single elements, store as a Vector of elements push!(fr.records, f.values[]) else @@ -208,7 +217,7 @@ Base.eltype(::Type{FieldRecord{FieldData, Space, V, N, Mesh, R}}) where {FieldDa function Base.getindex(fr::FieldRecord{FieldData, Space, V, N, Mesh, R}, i::Int) where {FieldData, Space, V, N, Mesh, R} - if field_single_element(fr) + if _field_single_element(fr) # if Field contains single elements, FieldRecord stores as a Vector of elements return PB.Field([fr.records[i]], FieldData, fr.data_dims, missing, Space, fr.mesh) else @@ -239,19 +248,19 @@ an `Int` to select a single record, or a range to select multiple records. If `squeeze_all_single_dims=true` (the default), if each record represents a scalar (eg a PALEO Variable with Space PB.ScalarSpace, or a PB.CellSpace variable in a Domain with -a single cell), then data is returned as a Julia Vector. NB: if `records` is an Int, +a single cell), then data is returned as a Vector. NB: if `records` is an Int, the single record requested is returned as a length-1 Vector. Non-scalar data (eg a non-ScalarSpace variable from a Domain with > 1 cell) is returned in internal format as a Vector-of-Vectors. """ -function PB.get_data(fr::FieldRecord; records=nothing, squeeze_all_single_dims=true) +function PB.get_data(@nospecialize(fr::FieldRecord); records=nothing, squeeze_all_single_dims=true) # Optionally squeeze out single cell stored internally as a Vector-of-Vectors, length 1 # (eg a CellSpace Variable in a Domain with 1 cell) squeeze_vecvec = squeeze_all_single_dims && !isempty(fr.records) && length(first(fr.records)) == 1 - if field_single_element(fr) || squeeze_vecvec - if field_single_element(fr) + if _field_single_element(fr) || squeeze_vecvec + if _field_single_element(fr) # internal format already is a Vector records_vec = fr.records else @@ -286,61 +295,27 @@ function PB.get_data(fr::FieldRecord; records=nothing, squeeze_all_single_dims=t end """ - get_array(fr::FieldRecord [, allselectargs::NamedTuple] ; kwargs...) -> fa::FieldArray - [deprecated] get_array(fr::FieldRecord [; kwargs...] [; allselectargs...]) -> fa::FieldArray + get_array( + fr::FieldRecord [, allselectargs::NamedTuple]; + coords=nothing, + lookup_coords=true, + add_attributes=true, + update_name=true, + omit_recorddim_if_constant=false, + ) -> fa_out::FieldArray + + [deprecated] get_array(fr::FieldRecord [; kwargs...] [; allselectargs...]) -> fa_out::FieldArray Return a [`FieldArray`](@ref) containing an Array of values and any attached coordinates, for records and spatial region defined by `allselectargs`. -# Selecting records and regions -`allselectargs` is a `NamedTuple` of form: - - ( = , = , ... [,expand_cartesian=false] [, squeeze_all_single_dims=true]) - -where `` is of form: -- `_isel` to select by array indices: `` may then be a single `Int` to select a single index, or a range `first:last` - to select a range of indices. -- `` to select by coordinate values using the coordinates attached to each dimension: `` may then be a single number - to select a single index corresponding to the nearest value of the corresponding coordinate, or `(first::Float64, last::Float64)` - (a Tuple) to select a range starting at the index with the nearest value of `fr.coords_record` before `first` and ending at - the nearest index after `last`. - -Available dimensions and coordinates `` depend on the FieldRecord dimensions (as returned by `get_dimensions`, which will be a subset -of grid spatial dimensions and Domain data dimensions) and corresponding attached coordinates (as returned by `get_coordinates`). +Combines [`FieldArray(fr::FieldRecord)`](@ref) and [`select(fa::FieldArray)`](@ref) -Some synonyms are defined for commonly used ``: - -|synonyms | dimcoordname | comment | -|:------------| :---------------------- |:--------------------------------------------------| -| records | _isel | is usually tmodel | -| cells, cell | cells_isel | | -| column= | cells_isel = first:last | select range of cells corresponding to column n | - - -NB: Dimensions corresponding to a selection for a single index or coordinate value are always squeezed out from the returned [`FieldArray`](@ref). -Optional argument `squeeze_all_single_dims` (default `true`) controls whether *all* output dimensions that contain a single index are -squeezed out (including eg a selection for a range that results in a dimension with one index, or where the input `FieldRecord` contains a dimension -with a single index). - -Optional argument `expand_cartesian` (default `false`) is only needed for spatially resolved output using a `CartesianLinearGrid`, +Optional `allselectargs` field `expand_cartesian` (default `false`) is only needed for spatially resolved output using a `CartesianLinearGrid`, set to `true` to expand compressed internal data (with spatial dimension `cells`) to a cartesian array (with spatial dimensions eg `lon`, `lat`, `zt`) -Selection arguments used are returned as strings in `fa.attributes` `filter_records` and `filter_region` for use in plot labelling etc. - # Keyword arguments -- `coords=nothing`: replace the attached coordinates from the input `fr::FieldRecord` for one or more dimensions. - Format is a Vector of Pairs of `""=>("", "", ...)`, - eg to replace an nD column atmosphere model default pressure coordinate with a z coordinate: - - coords=["cells"=>("atm.zmid", "atm.zlower", "atm.zupper")] -- `lookup_coordinates=true`: `true` to include coordinates, `false` to omit coordinates (both as selection options and in output `FieldArray`). -- `add_attributes=true`: `true` to transfer attributes from input `fr::FieldRecord` to output `FieldArray`, `false` to omit. -- `update_name=true`: `true` to update output `FieldArray` name to add Domain name prefix and suffix generated from `allselectargs` (NB: requires `add_attributes=true`), - `false` to use name from input FieldRecord. -- `omit_recorddim_if_constant=false`: Specify whether to include multiple (identical) records and record dimension for constant variables - (with attribute `is_constant = true`). PALEO currently always stores these records in the input `fr::FieldRecord`; - set `false` include them in `FieldArray` output, `true` to omit duplicate records and record dimension from output. - +See [`FieldArray(fr::FieldRecord)`](@ref) and [`select(fa::FieldArray)`](@ref) # Examples - select a timeseries for single cell index 3 : @@ -375,15 +350,10 @@ Selection arguments used are returned as strings in `fa.attributes` `filter_reco # Limitations - it is only possible to select either a single slice or a contiguous range for each dimension, not a set of slices for a Vector of index or coordinate values. -- time-varying coordinates (eg a z coordinate in an atmosphere climate model) are not fully handled. If a single record is selected, the correct - values for the coordinate at that time will be returned, but if multiple records are selected, the coordinate will be incorrectly fixed to the - values at the time corresponding to the first record. """ function get_array( @nospecialize(fr::FieldRecord); coords=nothing, - expand_cartesian::Bool=false, # can be overridden in allselectargs - squeeze_all_single_dims::Bool=true, # can be overridden in allselectargs lookup_coords::Bool=true, add_attributes::Bool=true, update_name=true, @@ -399,279 +369,228 @@ function get_array( return get_array( fr, NamedTuple(allselectargs); - coords, expand_cartesian, squeeze_all_single_dims, lookup_coords, add_attributes, update_name, omit_recorddim_if_constant, verbose, + coords, lookup_coords, add_attributes, update_name, omit_recorddim_if_constant, verbose, ) end - function get_array( @nospecialize(fr::FieldRecord), @nospecialize(allselectargs::NamedTuple); # allselectargs::NamedTuple=NamedTuple() creates a method ambiguity with deprecated form above coords=nothing, - expand_cartesian::Bool=false, # can be overridden in allselectargs - squeeze_all_single_dims::Bool=true, # can be overridden in allselectargs lookup_coords::Bool=true, add_attributes::Bool=true, update_name=true, omit_recorddim_if_constant::Bool=false, verbose::Bool=false, ) - frname = default_varnamefull(fr.attributes; include_selectargs=true) - fa = nothing - # @Infiltrator.infiltrate + frname = _fieldarray_default_varnamefull(fr.attributes; include_selectargs=true) + fa_select = nothing + try verbose && println("get_array (begin): $frname, allselectargs $allselectargs lookup_coords $lookup_coords") - if !isnothing(coords) - check_coords_argument(coords) || - error("argument coords should be a Vector of Pairs of \"dim_name\"=>(\"var_name1\", \"var_name2\", ...), eg: [\"z\"=>(\"atm.zmid\", \"atm.zlower\", \"atm.zupper\"), ...]") - end - - ########################################################################## - # preprocess allselectargs to strip non-selection arguments and fix quirks - ######################################################################## - allselectargs_sort = [String(k) => v for (k, v) in pairs(allselectargs)] - # override expand_cartesian - idx_expand_cartesian = findfirst(x-> x[1] == "expand_cartesian", allselectargs_sort) - if !isnothing(idx_expand_cartesian) - _, expand_cartesian = popat!(allselectargs_sort, idx_expand_cartesian) - end - # override squeeze_all_single_dims - idx_squeeze_all_single_dims = findfirst(x-> x[1] == "squeeze_all_single_dims", allselectargs_sort) - if !isnothing(idx_squeeze_all_single_dims) - _, squeeze_all_single_dims = popat!(allselectargs_sort, idx_squeeze_all_single_dims) + if :expand_cartesian in keys(allselectargs) + expand_cartesian = allselectargs.expand_cartesian + # remove :expand_cartesian from allselectargs + allselectargs = Base.structdiff(allselectargs, NamedTuple{(:expand_cartesian,)}((nothing,))) + else + expand_cartesian = false end + + fa_full = FieldArray( + fr; + expand_cartesian, + omit_recorddim_if_constant, + lookup_coords, + coords, + ) - # quirk: column must precede cell selection, so move to front if present - idx_column = findfirst(x-> x[1] == "column", allselectargs_sort) - if !isnothing(idx_column) - column_select = popat!(allselectargs_sort, idx_column) - pushfirst!(allselectargs_sort, column_select) - end + fa_select = select( + fa_full, allselectargs; + record_dim_name=fr.dataset.record_dim.name, + mesh=fr.mesh, + update_name, + add_attributes, + verbose, + ) - selectargs_used = fill(false, length(allselectargs_sort)) - - ################################################################ - # read dimensions - # (TODO reproduce code from get_dimensions so we can also get dims_spatial) - ########################################################## - # order is spatial (from grid), data_dims, then record dimension - dims_spatial = PB.get_dimensions(fr.mesh, space(fr); expand_cartesian) - dims = [dims_spatial..., fr.data_dims..., PB.NamedDimension(fr.dataset.record_dim.name, length(fr))] - recorddimidx = length(dims) - - ################################################################################## - # Read coordinates and apply selection: record dimension first, followed by non-record dimensions - # so we can handle the case where a single record is selected and coordinates are not constant. - # TODO this doesn't handle cases where multiple records are used with non-constant coordinates - ########################################################################################## - - # vector of filtered dim => coords corresponding to select_indices, nothing if dimension squeezed out - dims_coords = Vector{Any}(undef, length(dims)) - # indices in full array to use. Start with everything, then apply selections from 'allselectargs' - select_indices = Any[1:(nd.size) for nd in dims] - - # Record dimension - selectargs_records = OrderedCollections.OrderedDict() # keep track of selection used, to provide as attributes in FieldArray - is_constant = omit_recorddim_if_constant ? variable_is_constant(fr) : false - if !is_constant - # read record coordinates and apply selection - dims_coords[recorddimidx] = dims[recorddimidx] => lookup_coords ? _read_coordinates( - fr, dims[recorddimidx], nothing, expand_cartesian; substitute_coords=coords - ) : FieldArray[] - - _filter_dims_coords( - select_indices, dims_coords, recorddimidx, - allselectargs_sort, selectargs_used, selectargs_records, - fr, - ) - else - # use first record and squeeze out record dimension - select_indices[recorddimidx] = 1 # use first records (all records will be identical) - dims_coords[recorddimidx] = nothing # dimension will be squeezed out - selectargs_records["is_constant"] = "true" - end + catch + @error "get_array exception: $frname, allselectargs $allselectargs" + rethrow() + end - # get record indices to use - ridx_to_use = select_indices[recorddimidx] - - # Non-record dimensions + return fa_select +end - # read non-record coordinates, from first record selected - for i in 1:(length(dims)-1) - dims_coords[i] = dims[i] => lookup_coords ? _read_coordinates( - fr, dims[i], first(ridx_to_use), expand_cartesian; substitute_coords=coords - ) : FieldArray[] - end - selectargs_region = OrderedCollections.OrderedDict() # keep track of selection used, to provide as attributes in FieldArray - - _filter_dims_coords( - select_indices, dims_coords, 1:length(dims)-1, - allselectargs_sort, selectargs_used, selectargs_region, - fr, - ) - - unused_selectargs = [a for (a, u) in zip(allselectargs_sort, selectargs_used) if !u] - isempty(unused_selectargs) || - error( - "allselectargs contains select filter(s) ", unused_selectargs, " that do not match any dimensions or coordinates !\n", - "allselectargs_sort: ", allselectargs_sort, "\n", - "selectargs_used: ", selectargs_used - ) - - ############################################# - # squeeze out dimensions and coordinates - ############################################# - - # selections that produce a dimension with a single index are already squeezed out, - # but not dimensions that started with a single index and haven't had a selection applied - if squeeze_all_single_dims - for i in eachindex(dims_coords, select_indices) - if !isnothing(dims_coords[i]) && dims_coords[i][1].size == 1 - verbose && println("get_array: $frname squeezing out dimension $(dims_coords[i][1].name)") - @assert !isa(select_indices[i], Number) - @assert length(select_indices[i]) == 1 - dims_coords[i] = nothing - select_indices[i] = first(select_indices[i]) - end - end - # record dimension may have just been squeezed out - ridx_to_use = select_indices[recorddimidx] - end - - have_recorddim = !isnothing(dims_coords[recorddimidx]) - - dims_coords_sq = Pair{PB.NamedDimension, Vector{FieldArray}}[dc for dc in dims_coords if !isnothing(dc)] - dims_sq = [d for (d, c) in dims_coords_sq] - - # get non-record dimensions indices to use - nonrecordindicies = select_indices[1:end-1] - # squeeze out single dimensions by filtering out scalars in nonrecordindicies - # NB: lhs (assignment to output array) is contiguous ! - nonrecordindicies_sq = [1:length(nri) for nri in nonrecordindicies if nri isa AbstractVector] - # consistency check between dimensions and indices of output array - @assert length(dims_sq) == length(nonrecordindicies_sq) + have_recorddim - for (d_sq, nri_sq) in zip(dims_sq, nonrecordindicies_sq) # will stop after shortest iter and so skip recorddim if present - @assert d_sq.size == length(nri_sq) - end +""" + FieldRecordValues <: AbstractArray - ############################################### - # create output FieldArray - ############################################### - - # create values array - if field_single_element(fr) - @assert length(dims_coords) <= 2 - if have_recorddim - if length(dims_coords) == 1 || isnothing(dims_coords[1]) - # no spatial dimension - avalues = fr.records[ridx_to_use] - else - # eg a CellSpace variable in a Domain with no grid still has a spatial dimension size 1 - avalues = reshape(fr.records[ridx_to_use], 1, :) - end - else - if length(dims_coords) == 1 || isnothing(dims_coords[1]) - # no spatial dimension - # represent a scalar as a 0D Array - avalues = Array{eltype(fr.records), 0}(undef) - avalues[] = fr.records[ridx_to_use] - else - # eg a CellSpace variable in a Domain with no grid still has a spatial dimension size 1 - # represent as a 1D Array - avalues = [fr.records[ridx_to_use]] - end - end - else - if expand_cartesian && PB.has_internal_cartesian(fr.mesh, space(fr)) # !isempty(dims_spatial) - expand_fn = x -> PB.Grids.internal_to_cartesian(fr.mesh, x) - aeltype = Union{Missing, eltype(first(fr.records))} - else - expand_fn = identity - aeltype = eltype(first(fr.records)) - end - avalues = Array{aeltype, length(dims_sq)}(undef, [nd.size for nd in dims_sq]...) - if have_recorddim - _fill_array_from_records(avalues, Tuple(nonrecordindicies_sq), fr.records, expand_fn, ridx_to_use, Tuple(nonrecordindicies)) - else - if isempty(nonrecordindicies_sq) - avalues[] = expand_fn(fr.records[ridx_to_use])[nonrecordindicies...] - else - avalues[nonrecordindicies_sq...] .= @view expand_fn(fr.records[ridx_to_use])[nonrecordindicies...] - end - end - end - - if add_attributes - # add attributes for selection used - attributes = copy(fr.attributes) - if !isempty(selectargs_records) - attributes[:filter_records] = NamedTuple(Symbol(k)=>v for (k, v) in selectargs_records) - end - if !isempty(selectargs_region) - attributes[:filter_region] = NamedTuple(Symbol(k)=>v for (k, v) in selectargs_region) - end - if update_name # Generate name from attributes, including selection suffixes - name = default_varnamefull(attributes; include_selectargs=true) - else - name = fr.name - end - else - attributes = nothing - name = fr.name - end +Internal use by FieldArray: provides an n-dimensional Array view on FieldArray records +""" +struct FieldRecordValues{E, R, LI, NumSpatialDims, NumDataDims, HasRecordDim, FieldSingleElement, N} <: AbstractArray{E, N} + records::Vector{R} + linear_index::LI # Nothing or Array{Union{Missing, Int32}, NumSpatialDims} +end - +Base.eltype(::Type{FieldRecordValues{E}}) where {E} = E + +# HasRecordDim true, FieldSingleElement false +Base.size(rv::FieldRecordValues{E, R, Nothing, NumSpatialDims, NumDataDims, true, false, N}) where { + E, R, NumSpatialDims, NumDataDims, N, +} = (size(first(rv.records))..., length(rv.records)) +Base.getindex(rv::FieldRecordValues{E, R, Nothing, 1, 0, true, false, N}, i, ridx) where {E, R, N} = rv.records[ridx][i] +Base.getindex(rv::FieldRecordValues{E, R, Nothing, 2, 0, true, false, N}, i, j, ridx) where {E, R, N} = rv.records[ridx][i, j] +Base.getindex(rv::FieldRecordValues{E, R, Nothing, 3, 0, true, false, N}, i, j, k, ridx) where {E, R, N} = rv.records[ridx][i, j, k] +Base.getindex(rv::FieldRecordValues{E, R, Nothing, 0, 1, true, false, N}, i, ridx) where {E, R, N} = rv.records[ridx][i] +Base.getindex(rv::FieldRecordValues{E, R, Nothing, 0, 2, true, false, N}, i, j, ridx) where {E, R, N} = rv.records[ridx][i, j] +Base.getindex(rv::FieldRecordValues{E, R, Nothing, 1, 1, true, false, N}, i, j, ridx) where {E, R, N} = rv.records[ridx][i, j] + +Base.size(rv::FieldRecordValues{E, R, LI, NumSpatialDims, NumDataDims, true, false, N}) where { + E, R, LI, NumSpatialDims, NumDataDims, N, +} = (size(rv.linear_index)..., size(first(rv.records))[2:end]..., length(rv.records)) +function Base.getindex(rv::FieldRecordValues{E, R, LI, 2, 0, true, false, N}, i, j, ridx) where {E, R, LI, N} + linear_idx = rv.linear_index[i, j] + return ismissing(linear_idx) ? missing : rv.records[ridx][linear_idx] +end +function Base.getindex(rv::FieldRecordValues{E, R, LI, 3, 0, true, false, N}, i, j, k, ridx) where {E, R, LI, N} + linear_idx = rv.linear_index[i, j, k] + return ismissing(linear_idx) ? missing : rv.records[ridx][linear_idx] +end - verbose && println("get_array (end): $frname -> $name, allselectargs $allselectargs") +# HasRecordDim true, FieldSingleElement true +Base.size(rv::FieldRecordValues{E, R, Nothing, 0, 0, true, true, N}) where {E, R, N} = (length(rv.records),) +Base.getindex(rv::FieldRecordValues{E, R, Nothing, 0, 0, true, true, N}, ridx) where {E, R, N } = rv.records[ridx] - fa = FieldArray( - name, - avalues, - dims_coords_sq, - attributes, - ) - catch - @error "get_array exception: $frname, allselectargs $allselectargs" - rethrow() - end +Base.size(rv::FieldRecordValues{E, R, Nothing, 1, 0, true, true, N}) where {E, R, N} = (1, length(rv.records),) +function Base.getindex(rv::FieldRecordValues{E, R, Nothing, 1, 0, true, true, N}, i, ridx) where {E, R, N} + i == 1 || throw(BoundsError(rv, i)) + return rv.records[ridx] +end - return fa +# HasRecordDim false, FieldSingleElement false +Base.size(rv::FieldRecordValues{E, R, Nothing, NumSpatialDims, NumDataDims, false, false, N}) where { + E, R, NumSpatialDims, NumDataDims, N, +} = size(first(rv.records)) +Base.getindex(rv::FieldRecordValues{E, R, Nothing, 1, 0, false, false, N}, i) where {E, R, N} = rv.records[1][i] +Base.getindex(rv::FieldRecordValues{E, R, Nothing, 2, 0, false, false, N}, i, j) where {E, R, N} = rv.records[1][i, j] +Base.getindex(rv::FieldRecordValues{E, R, Nothing, 3, 0, false, false, N}, i, j, k) where {E, R, N} = rv.records[1][i, j, k] +Base.getindex(rv::FieldRecordValues{E, R, Nothing, 0, 1, false, false, N}, i) where {E, R, N} = rv.records[1][i] +Base.getindex(rv::FieldRecordValues{E, R, Nothing, 0, 2, false, false, N}, i, j) where {E, R, N} = rv.records[1][i, j] +Base.getindex(rv::FieldRecordValues{E, R, Nothing, 1, 1, false, false, N}, i, j) where {E, R, N} = rv.records[1][i, j] + +Base.size(rv::FieldRecordValues{E, R, LI, NumSpatialDims, NumDataDims, false, false, N}) where { + E, R, LI, NumSpatialDims, NumDataDims, N, +} = (size(rv.linear_index)..., size(first(rv.records))[2:end]) +function Base.getindex(rv::FieldRecordValues{E, R, LI, 2, 0, false, false, N}, i, j) where {E, R, LI, N} + linear_idx = rv.linear_index[i, j] + return ismissing(linear_idx) ? missing : rv.records[1][linear_idx] +end +function Base.getindex(rv::FieldRecordValues{E, R, LI, 3, 0, false, false, N}, i, j, k) where {E, R, LI, N} + linear_idx = rv.linear_index[i, j, k] + return ismissing(linear_idx) ? missing : rv.records[1][linear_idx] end -# function barrier optimisation -function _fill_array_from_records(avalues, nonrecordindicies_sq, records, expand_fn, ridx_to_use, nonrecordindicies) - for (riselect, ri) in enumerate(ridx_to_use) - if isempty(nonrecordindicies_sq) - avalues[riselect] = expand_fn(records[ri])[nonrecordindicies...] - else - avalues[nonrecordindicies_sq..., riselect] .= @view expand_fn(records[ri])[nonrecordindicies...] - end - end - return nothing +# HasRecordDim false, FieldSingleElement true +Base.size(rv::FieldRecordValues{E, R, Nothing, 0, 0, false, true, N}) where {E, R, N} = () +Base.getindex(rv::FieldRecordValues{E, R, Nothing, 0, 0, false, true, N}) where {E, R, N} = rv.records[1] + +Base.size(rv::FieldRecordValues{E, R, Nothing, 1, 0, false, true, N}) where {E, R, N} = (1, ) +function Base.getindex(rv::FieldRecordValues{E, R, Nothing, 1, 0, false, true, N}, i) where {E, R, N} + i == 1 || throw(BoundsError(rv, i)) + return rv.records[1] end -# check 'coords' of form [] or ["z"=>[ ... ], ] or ["z"=>(...),] -check_coords_argument(coords) = - isa(coords, AbstractVector) && ( - isempty(coords) || ( - isa(coords, AbstractVector{<:Pair}) && - isa(first(first(coords)), AbstractString) && - isa(last(first(coords)), Union{AbstractVector, Tuple}) - ) +""" + FieldArray( + fr::FieldRecord; + expand_cartesian=true, + omit_recorddim_if_constant=true, + lookup_coords=true, + coords=nothing, + ) + +Construct [`FieldArray`](@ref) from all records in `fr::FieldRecord` + +Provides a view of internal storage format of `FieldRecord` as an n-dimensional Array. + +# Keyword arguments +- `expand_cartesian`: (spatially resolved output using a `CartesianLinearGrid` only), `true` to expand compressed internal data + (with spatial dimension `cells`) to a cartesian array (with spatial dimensions eg `lon`, `lat`, `zt`) +- `omit_recorddim_if_constant`: Specify whether to include multiple (identical) records and record dimension for constant variables + (with attribute `is_constant = true`). PALEO currently always stores these records in the input `fr::FieldRecord`; + set `false` include them in `FieldArray` output, `true` to omit duplicate records and record dimension from output. +- `lookup_coords`: `true` to include coordinates, `false` to omit coordinates. +- `coords`: replace the attached coordinates from the input `fr::FieldRecord` for one or more dimensions. + Format is a Vector of Pairs of `""=>("", "", ...)`, + eg to replace an nD column atmosphere model default pressure coordinate with a z coordinate: + + coords=["cells"=>("atm.zmid", "atm.zlower", "atm.zupper")] +""" +function FieldArray( + @nospecialize(fr::FieldRecord); + expand_cartesian::Bool=false, + omit_recorddim_if_constant::Bool=true, + lookup_coords::Bool=true, + coords=nothing, +) + + is_constant = omit_recorddim_if_constant ? _variable_is_constant(fr) : false + _field_single_element(fr) + + dims_spatial = PB.get_dimensions(fr.mesh, space(fr); expand_cartesian) + E = _field_single_element(fr) ? eltype(fr.records) : eltype(eltype(fr.records)) # eltype of values + if expand_cartesian && PB.has_internal_cartesian(fr.mesh, space(fr)) + linear_index = fr.mesh.linear_index + E = Union{Missing, E} + else + linear_index = nothing + end + + values = FieldRecordValues{ + E, + eltype(fr.records), + typeof(linear_index), + length(dims_spatial), + length(fr.data_dims), + !is_constant, + _field_single_element(fr), + length(dims_spatial) + length(fr.data_dims) + !is_constant, + }(fr.records, linear_index) + + dims = [dims_spatial..., fr.data_dims...] + if !is_constant + push!(dims, PB.NamedDimension(fr.dataset.record_dim.name, length(fr))) + end + + dims_coords = Vector{Pair{PB.NamedDimension, Vector{FieldArray}}}(undef, length(dims)) + for i in 1:(length(dims)) + dims_coords[i] = dims[i] => lookup_coords ? _read_coordinates( + fr, dims[i]; expand_cartesian, coords, + ) : FieldArray[] + end + + fa = FieldArray( + fr.name, + values, + dims_coords, + fr.attributes, ) -# Read coordinates attached to dimension 'dim' -# For record coordinate, ridx_to_use = nothing -# For other coordinates, ridx_to_use = record index to use for coordinate (ie limitation only one record in use, or coordinate is constant) + return fa +end + function _read_coordinates( - fr::FieldRecord, dim::PB.NamedDimension, ridx_to_use::Union{Int, Nothing}, expand_cartesian::Bool; - substitute_coords=nothing, + fr::FieldRecord, dim::PB.NamedDimension; + expand_cartesian=true, + coords=nothing, ) coord_names = nothing - if !isnothing(substitute_coords) - for (sdim_name, scoord_names) in substitute_coords + if !isnothing(coords) + _check_coordinates_argument(coords) || + error("argument coords should be a Vector of Pairs of \"dim_name\"=>(\"var_name1\", \"var_name2\", ...), eg: [\"z\"=>(\"atm.zmid\", \"atm.zlower\", \"atm.zupper\"), ...]") + + for (sdim_name, scoord_names) in coords if sdim_name == dim.name coord_names = scoord_names break @@ -683,82 +602,34 @@ function _read_coordinates( coord_names = PB.get_coordinates(fr, dim.name; expand_cartesian) end - coords = FieldArray[] + coord_values = FieldArray[] for cn in coord_names cfr = PB.get_field(fr.dataset, cn) - if isnothing(ridx_to_use) - coord = get_array(cfr; lookup_coords=false, update_name=false) - else - coord = get_array(cfr, (records=ridx_to_use, ); lookup_coords=false, update_name=false) - end - is_coordinate(coord, dim) || - error("dimension $dim coord name $cn read invalid coordinate $coord") - push!(coords, coord) + cv = FieldArray(cfr; lookup_coords=false, expand_cartesian, omit_recorddim_if_constant=true) + is_coordinate(cv, dim) || + error("dimension $dim coord name $cn read invalid coordinate $cv") + push!(coord_values, cv) end - return coords + return coord_values end -# Apply filters in allselectargs to dimension indices dim_indices_to_filter, -# returning indices to use in select_indices and filtered dimension and coords in dims_coords -function _filter_dims_coords( - select_indices::Vector, dims_coords::Vector, dims_indices_to_filter, - allselectargs_sort::Vector, selectargs_used::Vector, selectargs_applied::AbstractDict, - fr::FieldRecord, -) - - for k in eachindex(allselectargs_sort, selectargs_used) - select_dimcoordname_orig, select_filter_orig = allselectargs_sort[k] - select_dimcoordname, select_filter = select_dimcoordname_orig, select_filter_orig - - # name substitutions - if select_dimcoordname == "records" - select_dimcoordname = fr.dataset.record_dim.name*"_isel" - end - if select_dimcoordname in ("cell", "cells") - select_dimcoordname = "cells_isel" - end - if select_dimcoordname == "cells_isel" - select_filter = PB.Grids.substitute_cell_names(fr.mesh, select_filter) - end - if select_dimcoordname == "column" - select_filter = PB.Grids.column_indices(fr.mesh, select_filter) - select_dimcoordname = "cells_isel" - end +# check 'coords' of form [] or ["z"=>[ ... ], ] or ["z"=>(...),] +_check_coordinates_argument(coords) = + isa(coords, AbstractVector) && ( + isempty(coords) || ( + isa(coords, AbstractVector{<:Pair}) && + isa(first(first(coords)), AbstractString) && + isa(last(first(coords)), Union{AbstractVector, Tuple}) + ) + ) - # try each dimension in turn to see if select_dimcoordname applies to either the dimension or attached coordinates - for dimidx in dims_indices_to_filter - if !isnothing(dims_coords[dimidx]) # dimension may have been squeezed out - dim, coords = dims_coords[dimidx] - if select_dimcoordname == dim.name*"_isel" || !isnothing(findfirst(c->c.name==select_dimcoordname, coords)) - !selectargs_used[k] || - error("select $select_dimcoordname_orig => $select_filter_orig matches multiple dimensions or coordinates !") - sidx, dim_subset, coords_subset, coords_filter_used = dimscoord_subset(dim, coords, select_dimcoordname, select_filter) - select_indices[dimidx] = select_indices[dimidx][sidx] # sidx is relative to current select_indices[dimidx] - if isnothing(dim_subset) - # squeeze out this dimension - dims_coords[dimidx] = nothing - else - dims_coords[dimidx] = dim_subset => coords_subset - end - - # keep track of selection used, to provide as attributes in FieldArray - select_filter_used = isnothing(coords_filter_used) ? select_filter_orig : coords_filter_used - selectargs_applied[select_dimcoordname_orig] = select_filter_used - - selectargs_used[k] = true - end - end - end - end - return nothing -end # TODO time-independent variables are indicated by setting :is_constant attribute, # but this is only recently added and FieldRecord still stores a record for every timestep # This uses :is_constant to guess if a variable is constant, and then checks the values really are constant -function variable_is_constant(fr::FieldRecord) +function _variable_is_constant(fr::FieldRecord) is_constant = false # PALEO indicates time-independent variables by setting :is_constant attribute, @@ -793,13 +664,13 @@ Dimension names and size are cross-checked against supplied names in `avalues_di This is effectively the inverse of: - a = get_array( - fr::FieldRecord, (expand_cartesian, squeeze_all_single_dims=false); - lookup_coordinates=false, add_attributes=false, omit_recorddim_if_constant=true, + a = FieldArray( + fr::FieldRecord; + lookup_coords=false, omit_recorddim_if_constant=true, expand_cartesian=true, ) avalues = a.values avalues_dimnames = [nd.name for (nd, _) in a.dims_coords] - attributes = fr.attributes + attributes = a.attributes """ function FieldRecord( dataset, @@ -846,7 +717,7 @@ function FieldRecord( end end - if field_single_element(FieldData, length(data_dims), Space, typeof(dataset.grid)) + if _field_single_element(FieldData, length(data_dims), Space, typeof(dataset.grid)) if is_constant @assert length(avalues) == 1 # avalues is 0D Array if a scalar variable diff --git a/src/PlotRecipes.jl b/src/PlotRecipes.jl index 1e1d79f..bd94544 100644 --- a/src/PlotRecipes.jl +++ b/src/PlotRecipes.jl @@ -267,7 +267,7 @@ with the Vector index prepended to `labelprefix` to identify the plot series (un - `labelattribute=nothing`: FieldArray attribute to use as label """ RecipesBase.@recipe function f( - fa::FieldArray; + @nospecialize(fa::FieldArray); swap_xy=false, mult_y_coord=1.0, structfield=nothing,