From 69fa57efa327425eab6548f6c13bf4603389af74 Mon Sep 17 00:00:00 2001 From: Stuart Daines Date: Sun, 29 Dec 2024 20:15:20 +0000 Subject: [PATCH 1/4] Work-in-progress updating dimensions and coordinates Simplify and align with Common Data Model See corresponding PALEOboxes PR https://github.com/PALEOtoolkit/PALEOboxes.jl/pull/151 TODO: this commit still includes old and now unused code Changes: - Add FixedCoord from PALEOboxes - generalized to handle 'bounds' ie Array size(2, ncells) as a simpler way of specifying cell edges (cf CF conventions) - OutputMemory now based on FieldRecord, not DataFrame - save / load to jld2 removed - updates to netcdf output format - Simpler and more generic get_array -> FieldArray from FieldRecord - replaces old approach using get_region --- Project.toml | 8 +- src/CoordsDims.jl | 270 ++++++++++ src/FieldArray.jl | 133 +++-- src/FieldRecord.jl | 518 +++++++++++++++---- src/OutputWriters.jl | 973 +++++++++++++++++++---------------- src/PALEOmodel.jl | 5 + src/PlotRecipes.jl | 50 +- src/Regions.jl | 194 +++++++ test/runfieldtests.jl | 170 +++--- test/runoutputwritertests.jl | 26 +- 10 files changed, 1652 insertions(+), 695 deletions(-) create mode 100644 src/CoordsDims.jl create mode 100644 src/Regions.jl diff --git a/Project.toml b/Project.toml index 43a1551..fc3390a 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PALEOmodel" uuid = "bf7b4fbe-ccb1-42c5-83c2-e6e9378b660c" authors = ["Stuart Daines "] -version = "0.15.49" +version = "0.16.0" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" @@ -9,12 +9,12 @@ DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b" FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Infiltrator = "5903a43b-9cc3-4c30-8d17-598619ec4e9b" -JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" MultiFloats = "bdf0d083-296b-4888-a5b6-7498122e68a5" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" PALEOboxes = "804b410e-d900-4b2a-9ecd-f5a06d4c1fd4" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" @@ -37,11 +37,11 @@ DiffRules = "1.0" FileIO = "1.0" ForwardDiff = "0.10" Infiltrator = "1.0" -JLD2 = "0.4, 0.5" MultiFloats = "1.0, 2.0" NCDatasets = "0.12, 0.14.2" NLsolve = "4.5" -PALEOboxes = "0.21.37" +OrderedCollections = "1.7.0" +PALEOboxes = "0.22" RecipesBase = "1.2" Requires = "1.0" Revise = "3.1" diff --git a/src/CoordsDims.jl b/src/CoordsDims.jl new file mode 100644 index 0000000..79a9e67 --- /dev/null +++ b/src/CoordsDims.jl @@ -0,0 +1,270 @@ + +################################ +# Coordinates +################################# + +""" + FixedCoord(name, values::Array{Float64, N}, attributes::Dict) + +A fixed (state independent) coordinate + +N = 1: a cell-centre coordinate, size(values) = (ncells, ) +N = 2: a boundary coordinate, size(values) = (2, ncells) +""" +mutable struct FixedCoord{N} + name::String + values::Array{Float64, N} + attributes::Dict{Symbol, Any} +end + +is_boundary_coordinate(fc::FixedCoord) = ndims(fc.values) == 2 && size(fc.values, 1) == 2 + +""" + append_units(name::AbstractString, attributes) -> "name (units)" + +Utility function to append variable units string to a variable name for display. +""" +function append_units(name::AbstractString, attributes::Dict{Symbol, Any}) + units = get(attributes, :units, "") + if isempty(units) + return name + else + return name*" ($units)" + end +end + +append_units(name::AbstractString, attributes::Nothing) = name + +""" + build_coords_edges(coords_vec::Vector{FixedCoord}) -> Vector{Float64} + +Build a vector of coordinate edges (length `n+1``) from `coords_vec`, assuming the PALEO +convention that `coords_vec` contains three elements with +cell midpoints, lower edges, upper edges each of length `n`, in that order. + +Falls back to just returning the first entry in `coords_vec` for other cases. +""" +function build_coords_edges(dim::PB.NamedDimension, coords_vec::Vector{FixedCoord}) + + if isempty(coords_vec) + # no coordinate - just return indices + co_values = collect(1:(dim.size+1)) + co_label = dim.name + elseif length(coords_vec) == 1 && is_boundary_coordinate(coords_vec[1]) + co = coords_vec[1] + co_values = vcat(co.values[1, :], co.values[2, end]) + co_label = append_units(co.name, co.attributes) + elseif length(coords_vec) == 1 || length(coords_vec) > 3 + # 1 coordinate or something we don't understand - take first + co = first(coords_vec) + co_values = co.values + co_label = append_units(co.name, co.attributes) + elseif length(coords_vec) == 2 && is_boundary_coordinate(coords_vec[2]) + # assume mid, boundary + co = coords_vec[2] + co_values = vcat(co.values[1, :], co.values[2, end]) + co_label = append_units(co.name, co.attributes) + elseif length(coords_vec) in (2, 3) + # 2 coordinates assume lower, upper edges + # 3 coordinates assume mid, lower, upper + co_lower = coords_vec[end-1] + co_upper = coords_vec[end] + co_label = append_units(co_lower.name*", "*co_upper.name, co_lower.attributes) + first(co_lower.values) < first(co_upper.values) || + @warn "build_coords_edges: $co_label co_lower is > co_upper - check model grid" + if co_lower.values[end] > co_lower.values[1] # ascending order + co_lower.values[2:end] == co_upper.values[1:end-1] || + @warn "build_coords_edges: $co_label lower and upper edges don't match" + co_values = [co_lower.values; co_upper.values[end]] + else # descending order + co_lower.values[1:end-1] == co_upper.values[2:end] || + @warn "build_coords_edges: $co_label lower and upper edges don't match" + co_values = [co_upper.values[1]; co_lower.values] + end + + end + + return co_values, co_label +end + +"guess coordinate edges from midpoints, assuming uniform spacing" +function guess_coords_edges(x_midpoints) + first_x = x_midpoints[1] - 0.5*(x_midpoints[2] - x_midpoints[1]) + last_x = x_midpoints[end] + 0.5*(x_midpoints[end] - x_midpoints[end-1]) + return [first_x; 0.5.*(x_midpoints[1:end-1] .+ x_midpoints[2:end]); last_x] +end + + +function get_region(fc::FixedCoord, indices::AbstractVector) + return FixedCoord(fc.name, fc.values[indices], fc.attributes) +end + +function get_region(fcv::Vector{FixedCoord}, indices::AbstractVector) + return [FixedCoord(fc.name, fc.values[indices], fc.attributes) for fc in fcv] +end + + +"find indices of coord from first before range[1] to first after range[2]" +function find_indices(coord::AbstractVector, range) + length(range) == 2 || + throw(ArgumentError("find_indices: length(range) != 2 $range")) + + idxstart = findlast(t -> t<=range[1], coord) + isnothing(idxstart) && (idxstart = 1) + + idxend = findfirst(t -> t>=range[2], coord) + isnothing(idxend) && (idxend = length(coord)) + + return idxstart:idxend, (coord[idxstart], coord[idxend]) +end + +"find indices of coord nearest val" +function find_indices(coord::AbstractVector, val::Real) + idx = 1 + for i in 1:length(coord) + if abs(coord[i] - val) < abs(coord[idx] - val) + idx = i + end + end + + return idx, coord[idx] +end + +""" + dimscoord_subset(dim::PB.NamedDimension, coords::Vector{FixedCoord}, select_dimvals::AbstractString, select_filter) + -> cidx, dim_subset, coords_subset, coords_used + +Filter dimension `dim` according to key `select_dimvals` and `select_filter` (typically a single value or a range) + +Filtering may be applied either to dimension indices from `dim`, or to coordinates from `coords`: +- If `select_dimvals` is of form "_isel", use dimension indices and return `coords_used=nothing` +- Otherwise use coordinate values and return actual coordinate values used in `coords_used` + +`cidx` are the filtered indices to use: +- if `cidx` is a scalar (an Int), `dim_subset=nothing` and `coords_subset=nothing` indicating this dimension should be squeezed out +- otherwise `cidx` is a Vector and `dim_subset` and `coords_subset` are the filtered subset of `dim` and `coords` +""" +function dimscoord_subset(dim::PB.NamedDimension, coords::Vector{FixedCoord}, select_dimvals::AbstractString, select_filter) + if length(select_dimvals) > 5 && select_dimvals[end-4:end] == "_isel" + @assert select_dimvals[1:end-5] == dim.name + cidx = select_filter + coords_used=nothing + else + # find cidx corresponding to a coordinate + # find coordinate to use in coords + ccidx = findfirst(c -> c.name == select_dimvals, coords) + @assert !isnothing(ccidx) + cc = coords[ccidx] + cidx, cvalue = find_indices(cc.values, select_filter) + # reset to the value actually used + coords_used = cvalue + end + + if cidx isa AbstractVector + dim_subset = PB.NamedDimension(dim.name, length(cidx)) + coords_subset = FixedCoord[] + for c in coords + if is_boundary_coordinate(c) + cs = FixedCoord(c.name, c.values[:, cidx], c.attributes) + else + cs = FixedCoord(c.name, c.values[cidx], c.attributes) + end + push!(coords_subset, cs) + end + else + # squeeze out dimensions + dim_subset = nothing + coords_subset = nothing + end + + return cidx, dim_subset, coords_subset, coords_used +end + + +#= +################################################# +# Dimensions +##################################################### + +""" + NamedDimension + +A named dimension, with optional attached fixed coordinates `coords` + +PALEO convention is that where possible `coords` contains three elements, for cell +midpoints, lower edges, upper edges, in that order. +""" +mutable struct NamedDimension + name::String + size::Int64 + coords::Vector{FixedCoord} # may be empty +end + +"create from size only (no coords)" +function NamedDimension(name, size::Integer) + return NamedDimension( + name, + size, + FixedCoord[], + ) +end + +"create from coord mid-points" +function NamedDimension(name, coord::AbstractVector) + return NamedDimension( + name, + length(coord), + [ + FixedCoord(name, coord, Dict{Symbol, Any}()), + ] + ) +end + +"create from coord mid-points and edges" +function NamedDimension(name, coord::AbstractVector, coord_edges::AbstractVector) + if coord[end] > coord[1] + # ascending order + coord_lower = coord_edges[1:end-1] + coord_upper = coord_edges[2:end] + else + # descending order + coord_lower = coord_edges[2:end] + coord_upper = coord_edges[1:end-1] + end + return NamedDimension( + name, + length(coord), + [ + FixedCoord(name, coord, Dict{Symbol, Any}()), + FixedCoord(name*"_lower", coord_lower, Dict{Symbol, Any}()), + FixedCoord(name*"_upper", coord_upper, Dict{Symbol, Any}()), + ] + ) +end + +function get_region(nd::NamedDimension, indices::AbstractVector) + return NamedDimension(nd.name, length(indices), get_region(nd.coords, indices)) +end + +""" + build_coords_edges(nd::NamedDimension) -> Vector{Float64} + +Call [`build_coords_edges`](@ref)(nd.coords), or fallback to just returning indices +if no coords present. +""" +function build_coords_edges(nd::NamedDimension) + if !isempty(nd.coords) + return build_coords_edges(nd.coords) + else + @warn "no coords for NamedDimension $(nd.name), returning indices" + return collect(1:nd.size), nd.name*" (indices)" + end +end + +function Base.show(io::IO, nd::NamedDimension) + print(io, "NamedDimension(name=", nd.name, ", size=", nd.size, ", coords=(") + join(io, [c.name for c in nd.coords], ", ") + print(io, "))") + return nothing +end + =# \ No newline at end of file diff --git a/src/FieldArray.jl b/src/FieldArray.jl index 1edfec0..6bdbea8 100644 --- a/src/FieldArray.jl +++ b/src/FieldArray.jl @@ -1,3 +1,4 @@ +import Infiltrator """ FieldArray @@ -8,29 +9,41 @@ Array with named dimensions and optional coordinates. NB: this aims to be simple and generic, not efficient !!! Intended for representing model output, not for numerically-intensive calculations. """ -struct FieldArray{T, N} +struct FieldArray{T} name::String values::T - dims::NTuple{N, PB.NamedDimension} + dims_coords::Vector{Pair{PB.NamedDimension, Vector{FixedCoord}}} attributes::Union{Dict, Nothing} end +PB.get_dimensions(f::FieldArray) = PB.NamedDimension[first(dc) for dc in f.dims_coords] function Base.show(io::IO, fa::FieldArray) print(io, - "FieldArray(name=\"", fa.name, "\", eltype=", eltype(fa.values),", size=", size(fa.values), ", dims=", fa.dims, ")" + "FieldArray(name=\"", fa.name, "\", eltype=", eltype(fa.values), ", size=", size(fa.values), ", dims_coords=", fa.dims_coords, ")" ) end function Base.show(io::IO, ::MIME"text/plain", fa::FieldArray) - println(io, "FieldArray(name=\"", fa.name, "\", eltype=", eltype(fa.values),")") - println(io, " dims: ", fa.dims) + println(io, "FieldArray(name=\"", fa.name, "\", eltype=", eltype(fa.values), ", size=", size(fa.values), ")") + # println(io, " dims_coords:") + # for (nd, coords) in fa.dims_coords + # println(io, " ", nd, " => ") + # for c in coords + # println(io, " ", c) + # end + # end + + println(io, " dims_coords:") + for (nd, coords) in fa.dims_coords + println(io, " ", nd, " => ", [c.name for c in coords]) + end println(io, " attributes: ", fa.attributes) end # basic arithmetic operations function Base.:*(fa_in::FieldArray, a::Real) - fa_out = FieldArray("$a*"*fa_in.name, a.*fa_in.values, fa_in.dims, copy(fa_in.attributes)) + fa_out = FieldArray("$a*"*fa_in.name, a.*fa_in.values, fa_in.dims_coords, copy(fa_in.attributes)) return fa_out end Base.:*(a::Real, fa_in::FieldArray) = fa_in*a @@ -64,52 +77,53 @@ Get FieldArray from PALEO object `obj` function get_array end """ - get_array(f::Field [, selectargs::NamedTuple]; [attributes=nothing]) -> FieldArray + get_array(dataset, recordidx, f::Field [, selectargs::NamedTuple]; [attributes=nothing]) -> FieldArray Return a [`FieldArray`](@ref) containing `f::Field` data values and any attached coordinates, for the spatial region defined by `selectargs`. Available `selectargs` depend on the grid `f.mesh`, and -are passed to `PB.Grids.get_region`. +are passed to `get_region`. `attributes` (if present) are added to `FieldArray` """ function get_array( - f::PB.Field{D, PB.ScalarSpace, V, N, M}, selectargs::NamedTuple=NamedTuple(); - attributes=nothing, -) where {D, V, N, M} + f::PB.Field{FieldData, PB.ScalarSpace, V, N, Mesh}, selectargs::NamedTuple=NamedTuple(); + attributes=nothing, coord_source, +) where {FieldData, V, N, Mesh} isempty(selectargs) || error("get_array on Field f defined on ScalarSpace with non-empty selectargs=$selectargs") - return FieldArray(default_fieldarray_name(attributes), f.values, f.data_dims, attributes) + data_dims_coordinates = get_data_dims_coords(coord_source, f.data_dims) + return FieldArray(default_fieldarray_name(attributes), f.values, data_dims_coordinates, attributes) end function get_array( - f::PB.Field{D, PB.CellSpace, V, 0, M}, selectargs::NamedTuple=NamedTuple(); - attributes=nothing, -) where {D, V, M} + f::PB.Field{FieldData, PB.CellSpace, V, 0, Mesh}, selectargs::NamedTuple=NamedTuple(); + attributes=nothing, coord_source=nothing, +) where {FieldData, V, Mesh} - values, dims = PB.Grids.get_region(f.mesh, f.values; selectargs...) + values, dims_coords = get_region(f.mesh, f.values; coord_source, selectargs...) if !isnothing(attributes) && !isempty(selectargs) attributes = copy(attributes) attributes[:filter_region] = selectargs end - return FieldArray(default_fieldarray_name(attributes), values, dims, attributes) + return FieldArray(default_fieldarray_name(attributes), values, dims_coords, attributes) end # single data dimension # TODO generalize this to arbitrary data dimensions function get_array( - f::PB.Field{D, PB.CellSpace, V, 1, M}, selectargs::NamedTuple=NamedTuple(); - attributes=nothing, -) where {D, V, M} + f::PB.Field{FieldData, PB.CellSpace, V, 1, Mesh}, selectargs::NamedTuple=NamedTuple(); + attributes=nothing, coord_source, +) where {FieldData, V, Mesh} f_space_dims_colons = ntuple(i->Colon(), ndims(f.values) - 1) f_size_datadim = size(f.values)[end] - dvalues, dims = PB.Grids.get_region(f.mesh, f.values[f_space_dims_colons..., 1]; selectargs...) + dvalues, dims_coords = get_region(f.mesh, f.values[f_space_dims_colons..., 1]; coord_source, selectargs...) d = (size(dvalues)..., f_size_datadim) values = Array{eltype(dvalues), length(d)}(undef, d...) @@ -117,12 +131,12 @@ function get_array( if length(d) == 1 # single cell - space dimension squeezed out for i in 1:f_size_datadim - values[i], dims = PB.Grids.get_region(f.mesh, f.values[f_space_dims_colons..., i]; selectargs...) + values[i], dims_coords = get_region(f.mesh, f.values[f_space_dims_colons..., i]; coord_source, selectargs...) end else dvalues_colons = ntuple(i->Colon(), ndims(dvalues)) for i in 1:f_size_datadim - dvalues, dims = PB.Grids.get_region(f.mesh, f.values[f_space_dims_colons..., i]; selectargs...) + dvalues, dims_coords = get_region(mesh, f.values[f_space_dims_colons..., i]; coord_source, selectargs...) values[dvalues_colons..., i] .= dvalues end end @@ -132,7 +146,9 @@ function get_array( attributes[:filter_region] = selectargs end - return FieldArray(default_fieldarray_name(attributes), values, (dims..., f.data_dims...), attributes) + data_dims_coords = get_data_dims_coords(coord_source, f.data_dims) + + return FieldArray(default_fieldarray_name(attributes), values, vcat(dims_coords, data_dims_coords), attributes) end @@ -141,10 +157,10 @@ end get_array(modeldata, varnamefull [, selectargs::NamedTuple] [; coords::AbstractVector]) -> FieldArray Get [`FieldArray`](@ref) by Variable name, for spatial region defined by `selectargs` -(which are passed to `PB.Grids.get_region`). +(which are passed to `get_region`). Optional argument `coords` can be used to supply plot coordinates from Variable in output. -Format is a Vector of Pairs of "coord_name"=>("var_name1", "var_name2", ...) +Format is a Vector of Pairs of "dim_name"=>("var_name1", "var_name2", ...) Example: to replace a 1D column default pressure coordinate with a z coordinate: @@ -157,25 +173,25 @@ function get_array( modeldata::PB.AbstractModelData, varnamefull::AbstractString, selectargs::NamedTuple=NamedTuple(); coords=nothing, ) - var = PB.get_variable(modeldata.model, varnamefull) - !isnothing(var) || - throw(ArgumentError("Variable $varnamefull not found")) - f = PB.get_field(var, modeldata) - attributes = copy(var.attributes) - attributes[:var_name] = var.name - attributes[:domain_name] = var.domain.name + varsplit = split(varnamefull, ".") + length(varsplit) == 2 || + throw(ArgumentError("varnamefull $varnamefull is not of form .")) + domainname, varname = varsplit + + coord_source = (modeldata, domainname) + f, attributes = _get_field_attributes(coord_source, varname) - varray = get_array(f, selectargs; attributes=attributes) + varray = get_array(f, selectargs; attributes, coord_source) if isnothing(coords) # keep original coords (if any) else check_coords_argument(coords) || - error("argument coords should be a Vector of Pairs of \"coord_name\"=>(\"var_name1\", \"var_name2\", ...), eg: [\"z\"=>(\"atm.zmid\", \"atm.zlower\", \"atm.zupper\"), ...]") + error("argument coords should be a Vector of Pairs of \"dim_name\"=>(\"var_name1\", \"var_name2\", ...), eg: [\"z\"=>(\"atm.zmid\", \"atm.zlower\", \"atm.zupper\"), ...]") vec_coords_arrays = [ - coord_name => Tuple(get_array(modeldata, cvn, selectargs) for cvn in coord_varnames) - for (coord_name, coord_varnames) in coords + dim_name => Tuple(get_array(modeldata, cvn, selectargs) for cvn in coord_varnames) + for (dim_name, coord_varnames) in coords ] varray = update_coordinates(varray, vec_coords_arrays) @@ -184,6 +200,22 @@ function get_array( return varray end +function get_data_dims_coords(coord_source, data_dims) + + data_dims_coordinates = Pair{PB.NamedDimension, Vector{PALEOmodel.FixedCoord}}[] + + for dd in data_dims + coordinates = PALEOmodel.FixedCoord[] + for coord_name in PB.get_coordinates(dd.name) + ddf, attributes = _get_field_attributes(coord_source, coord_name) + push!(coordinates, FixedCoord(coord_name, ddf.values, attributes)) + end + push!(data_dims_coordinates, dd => coordinates) + end + + return data_dims_coordinates +end + # check 'coords' of form [] or ["z"=>[ ... ], ] or ["z"=>(...),] check_coords_argument(coords) = isa(coords, AbstractVector) && ( @@ -199,7 +231,7 @@ check_coords_argument(coords) = Replace or add coordinates `vec_coord_arrays` to `varray`. -`new_coord_arrays` is a Vector of Pairs of "coord_name"=>(var1::FieldArray, var2::FieldArray, ...) +`new_coord_arrays` is a Vector of Pairs of "dim_name"=>(var1::FieldArray, var2::FieldArray, ...) Example: to replace a 1D column default pressure coordinate with a z coordinate: @@ -208,26 +240,25 @@ Example: to replace a 1D column default pressure coordinate with a z coordinate: function update_coordinates(varray::FieldArray, vec_coord_arrays::AbstractVector) check_coords_argument(vec_coord_arrays) || - error("argument vec_coords_arrays should be a Vector of Pairs of \"coord_name\"=>(var1::FieldArray, var2::FieldArray, ...), eg: [\"z\"=>(zmid::FieldArray, zlower::FieldArray, atm.zupper::FieldArray)]") + error("argument vec_coords_arrays should be a Vector of Pairs of \"dim_name\"=>(var1::FieldArray, var2::FieldArray, ...), eg: [\"z\"=>(zmid::FieldArray, zlower::FieldArray, atm.zupper::FieldArray)]") # generate Vector of NamedDimensions to use as new coordinates - named_dimensions = PB.NamedDimension[] - for (coord_name, coord_arrays) in vec_coord_arrays - fixed_coords = [] + new_dims_coords = Pair{PB.NamedDimension, Vector{FixedCoord}}[nd => copy(coords) for (nd, coords) in varray.dims_coords] + for (dim_name, coord_arrays) in vec_coord_arrays + dc_idx = findfirst(dc->first(dc).name == dim_name, new_dims_coords) + !isnothing(dc_idx) || error("FieldArray $varray has no dimension $dim_name") + nd, coords = new_dims_coords[dc_idx] + empty!(coords) for coord_array in coord_arrays - push!(fixed_coords, PB.FixedCoord(get(coord_array.attributes, :var_name, ""), coord_array.values, coord_array.attributes)) + coord_array_name = get(coord_array.attributes, :var_name, "") + nd.size == length(coord_array.values) || + error("FieldArray $varray dimension $dim_name size $(nd.size) != new coordinate $coord_array_name length(values) $(coord_array.values)") + push!(coords, PB.FixedCoord(coord_array_name, coord_array.values, coord_array.attributes)) end - push!( - named_dimensions, PB.NamedDimension( - coord_name, - length(first(fixed_coords).values), - fixed_coords, - ) - ) end # replace coordinates - varray_newcoords = FieldArray(varray.name, varray.values, Tuple(named_dimensions), varray.attributes) + varray_newcoords = FieldArray(varray.name, varray.values, new_dims_coords, varray.attributes) return varray_newcoords end \ No newline at end of file diff --git a/src/FieldRecord.jl b/src/FieldRecord.jl index 0a58e00..f039238 100644 --- a/src/FieldRecord.jl +++ b/src/FieldRecord.jl @@ -1,62 +1,27 @@ """ - FieldRecord{D <: AbstractData, S <: AbstractSpace, V, N, M, R} - FieldRecord( - f::PB.Field{D, S, V, N, M}, attributes; - coords_record, - sizehint::Union{Nothing, Int}=nothing - ) -> fr + FieldRecord{FieldData <: AbstractData, Space <: AbstractSpace, V, N, Mesh, R} + FieldRecord(dataset, f::PB.Field, attributes; [sizehint=nothing]) -A series of `records::R` each containing the `values` from a `PALEOboxes.Field{D, S, N, V, M}`. - -A `coords_record` may be attached to provide a coordinate (eg model time) corresponding to `records`. +A series of `records::R` each containing the `values` from a `PALEOboxes.Field{FieldData, Space, N, V, Mesh}`. # Implementation Fields with array `values` are stored in `records` as a Vector of arrays. Fields with single `values` (`field_single_element` true) are stored as a Vector of `eltype(Field.values)`. """ -struct FieldRecord{D <: PB.AbstractData, S <: PB.AbstractSpace, V, N, M, R} +struct FieldRecord{FieldData <: PB.AbstractData, Space <: PB.AbstractSpace, V, N, Mesh, R} + dataset records::Vector{R} data_dims::NTuple{N, PB.NamedDimension} - mesh::M + mesh::Mesh attributes::Dict{Symbol, Any} - coords_record::Vector{PB.FixedCoord} # coordinates attached to record dimension -end - -function Base.show(io::IO, fr::FieldRecord) - print(io, - "FieldRecord(eltype=", eltype(fr),", length=", length(fr), - ", attributes=", fr.attributes, ", coords_record=", fr.coords_record, ")" - ) + # coords_record::Vector{FixedCoord} # coordinates attached to record dimension end -function Base.show(io::IO, ::MIME"text/plain", fr::FieldRecord) - println(io, "FieldRecord(eltype=", eltype(fr),", length=", length(fr), ")") - println(io, " data_dims: ", fr.data_dims) - println(io, " mesh: ", fr.mesh) - println(io, " attributes: ", fr.attributes) - println(io, " coords_record: ", fr.coords_record) - return nothing -end - -"test whether Field contains single elements" -function field_single_element(::Type{field_data}, N, ::Type{S}, ::Type{M}) where {field_data <: PB.AbstractData, S <: PB.AbstractSpace, M} - if PB.field_single_element(field_data, N) && (S == PB.ScalarSpace || (S == PB.CellSpace && M == Nothing)) - return true - else - return false - end -end - -field_single_element(::Type{PB.Field{D, S, V, N, M}}) where {D, S, V, N, M} = field_single_element(D, N, S, M) -field_single_element(::Type{FR}) where {FR <: FieldRecord} = field_single_element(eltype(FR)) -field_single_element(f::T) where {T} = field_single_element(T) - - +# create empty FieldRecord function FieldRecord( - f::PB.Field{D, S, V, N, M}, attributes; - coords_record, + dataset, f::PB.Field{FieldData, Space, V, N, Mesh}, attributes; sizehint::Union{Nothing, Int}=nothing -) where {D, S, V, N, M} +) where {FieldData, Space, V, N, Mesh} if field_single_element(f) # if Field contains single elements, store as a Vector of elements records = Vector{eltype(f.values)}() @@ -67,24 +32,30 @@ function FieldRecord( if !isnothing(sizehint) sizehint!(records, sizehint) end - return FieldRecord{D, S, V, N, M, eltype(records)}(records, f.data_dims, f.mesh, attributes, coords_record) + return FieldRecord{FieldData, Space, V, N, Mesh, eltype(records)}( + dataset, + records, + f.data_dims, + f.mesh, + attributes, + ) end -"create a new FieldRecord, containing supplied `existing_values::Vector` data arrays" -function wrap_fieldrecord( + +# create a new FieldRecord, containing supplied `existing_values::Vector` data arrays +function FieldRecord( + dataset, existing_values::Vector, - field_data::Type, + FieldData::Type, data_dims::NTuple{N, PB.NamedDimension}, - data_type::Union{DataType, Missing}, - space::Type{<:PB.AbstractSpace}, - mesh::M, + Space::Type{<:PB.AbstractSpace}, + mesh::Mesh, attributes; - coords_record -) where {N, M} +) where {N, Mesh} # check_values( - # existing_values, field_data, data_dims, data_type, space, spatial_size(space, mesh), + # existing_values, FieldData, data_dims, data_type, Space, spatial_size(space, mesh), # ) - if field_single_element(field_data, N, space, M) + if field_single_element(FieldData, N, Space, Mesh) # assume existing_values is a Vector, with each element to be stored in Field values::V as a length 1 Vector V = Vector{eltype(existing_values)} else @@ -92,12 +63,86 @@ function wrap_fieldrecord( V = eltype(existing_values) end - return FieldRecord{field_data, space, V, N, typeof(mesh), eltype(existing_values)}( - existing_values, data_dims, mesh, attributes, coords_record + return FieldRecord{FieldData, Space, V, N, typeof(mesh), eltype(existing_values)}( + dataset, existing_values, data_dims, mesh, attributes, ) end -function Base.push!(fr::FieldRecord{D, S, V, N, M, R}, f::PB.Field{D, S, V, N, M}) where {D, S, V, N, M, R} +space(fr::FieldRecord{FieldData, Space, V, N, Mesh, R}) where {FieldData, Space, V, N, Mesh, R} = Space + +function PB.get_dimensions(fr::FieldRecord; expand_cartesian=false) + dims_spatial = PB.get_dimensions(fr.mesh, space(fr); expand_cartesian) + dims = [dims_spatial..., fr.data_dims..., PB.NamedDimension(fr.dataset.record_dim.name, length(fr))] + return dims +end + +function PB.get_coordinates(fr::FieldRecord, dimname::AbstractString; expand_cartesian=false) + if !isnothing(findfirst(nd -> nd.name == dimname, PB.get_dimensions(fr.mesh, space(fr); expand_cartesian))) + # no grid (fr.mesh == nothing) defines a "cell" dimension, but not get_coordinates + return isnothing(fr.mesh) ? String[] : PB.get_coordinates(fr.mesh, dimname) + elseif !isnothing(findfirst(nd -> nd.name == dimname, fr.data_dims)) + return hasproperty(fr.dataset, :data_dims_coordinates) ? get(fr.dataset.data_dims_coordinates, dimname, String[]) : String[] + elseif dimname in ("records", fr.dataset.record_dim.name) + return hasproperty(fr.dataset, :record_dim_coordinates) ? fr.dataset.record_dim_coordinates : String[] + end + + error("FieldRecord $fr has no dimension $dimname") +end + +function Base.show(io::IO, fr::FieldRecord) + print(io, + "FieldRecord(eltype=", eltype(fr),", length=", length(fr), + ", attributes=", fr.attributes, + ")" + ) +end + +function Base.show(io::IO, ::MIME"text/plain", fr::FieldRecord) + println(io, "FieldRecord(eltype=", eltype(fr),", length=", length(fr), ")") + # println(io, " records: ", fr.records) + println(io, " data_dims: ", fr.data_dims) + println(io, " mesh: ", fr.mesh) + println(io, " attributes: ", fr.attributes) + if PB.has_internal_cartesian(fr.mesh, space(fr)) + dimlist = [ " dimensions (internal): " => false, " dimensions (cartesian): " => true] + else + dimlist = [" dimensions: " => false] + end + for (str, expand_cartesian) in dimlist + println(io, str) + dims = PB.get_dimensions(fr; expand_cartesian) + for nd in dims + print(io, " ", nd) + coord_names = PB.get_coordinates(fr, nd.name; expand_cartesian) + if isempty(coord_names) + println() + else + println(" => ", coord_names) + end + end + end + return nothing +end + +"test whether Field contains single elements +TODO this will currently return false for CellSpace with ncells=1, which could be changed to true ? +TODO would be clearer to directly use PB.internal_size(Space, mesh) == (1,) which would also handle the CellSpace with ncells=1 case, +but this wouldn't work in the type domain (needs an instance of mesh::Mesh) +" +function field_single_element(::Type{FieldData}, N, ::Type{Space}, ::Type{Mesh}) where {FieldData <: PB.AbstractData, Space <: PB.AbstractSpace, Mesh} + if PB.field_single_element(FieldData, N) && (Space == PB.ScalarSpace || (Space == PB.CellSpace && Mesh == Nothing)) + return true + else + return false + end +end + +field_single_element(::Type{PB.Field{FieldData, Space, V, N, Mesh}}) where {FieldData, Space, V, N, Mesh} = field_single_element(FieldData, N, Space, Mesh) +field_single_element(::Type{FR}) where {FR <: FieldRecord} = field_single_element(eltype(FR)) +field_single_element(f::T) where {T} = field_single_element(T) + + +function Base.push!(fr::FieldRecord{FieldData, Space, V, N, Mesh, R}, f::PB.Field{FieldData, Space, V, N, Mesh}) where {FieldData, Space, V, N, Mesh, R} if field_single_element(fr) # if Field contains single elements, store as a Vector of elements push!(fr.records, f.values[]) @@ -108,25 +153,30 @@ function Base.push!(fr::FieldRecord{D, S, V, N, M, R}, f::PB.Field{D, S, V, N, M return fr end +function Base.push!(fr::FieldRecord{FieldData, Space, V, N, Mesh, R}, record::R) where {FieldData, Space, V, N, Mesh, R} + push!(fr.records, record) + return fr +end + Base.length(fr::FieldRecord) = length(fr.records) -Base.eltype(::Type{FieldRecord{D, S, V, N, M, R}}) where {D, S, V, N, M, R} = PB.Field{D, S, V, N, M} +Base.eltype(::Type{FieldRecord{FieldData, Space, V, N, Mesh, R}}) where {FieldData, Space, V, N, Mesh, R} = PB.Field{FieldData, Space, V, N, Mesh} -function Base.getindex(fr::FieldRecord{D, S, V, N, M, R}, i::Int) where {D, S, V, N, M, R} +function Base.getindex(fr::FieldRecord{FieldData, Space, V, N, Mesh, R}, i::Int) where {FieldData, Space, V, N, Mesh, R} if field_single_element(fr) # if Field contains single elements, FieldRecord stores as a Vector of elements - return PB.wrap_field([fr.records[i]], D, fr.data_dims, missing, S, fr.mesh) + return PB.Field([fr.records[i]], FieldData, fr.data_dims, missing, Space, fr.mesh) else # if Field contains something else, FieldRecord stores as a Vector of those things - return PB.wrap_field(fr.records[i], D, fr.data_dims, missing, S, fr.mesh) + return PB.Field(fr.records[i], FieldData, fr.data_dims, missing, Space, fr.mesh) end end Base.lastindex(fr::FieldRecord) = lastindex(fr.records) -function Base.copy(fr::FieldRecord{D, S, V, N, M, R}) where {D, S, V, N, M, R} - return FieldRecord{D, S, V, N, M, R}( +function Base.copy(fr::FieldRecord{FieldData, Space, V, N, Mesh, R}) where {FieldData, Space, V, N, Mesh, R} + return FieldRecord{FieldData, Space, V, N, Mesh, R}( deepcopy(fr.records), fr.data_dims, fr.mesh, @@ -151,11 +201,14 @@ any attached coordinates, for records and spatial region defined by `allselectar starting at the record with the nearest value of `fr.coords_record` before `first` and ending at the nearest record after `last`. -Available `selectargs` depend on the grid `fr.mesh`, and -are passed to `PB.Grids.get_region`. +Available `selectargs` depend on the grid `fr.mesh` and `fr.data_dims`, options include: +- grid spatial dimensions +[- TODO grid spatial coordinates (if defined)] +- `fr.data_dims` + Optional argument `coords` can be used to supply coordinates from additional `FieldRecords`, replacing any coordinates -attached to `fr`. Format is a Vector of Pairs of "coord_name"=>(cr1::FieldRecord, cr2::FieldRecord, ...) +attached to `fr`. Format is a Vector of Pairs of "dim_name"=>(cr1::FieldRecord, cr2::FieldRecord, ...) Example: to replace a 1D column default pressure coordinate with a z coordinate: @@ -178,7 +231,7 @@ function get_array( return get_array(fr, NamedTuple(allselectargs); coords=coords) end -function get_array( +function get_array_orig( fr::FieldRecord, allselectargs::NamedTuple; # allselectargs::NamedTuple=NamedTuple() creates a method ambiguity with deprecated form above coords::Union{Nothing, AbstractVector}=nothing ) @@ -212,14 +265,14 @@ function get_array( end function _get_array( - fr::FieldRecord{D, S, V, N, M, R}, allselectargs::NamedTuple, -) where {D, S, V, N, M, R} + fr::FieldRecord{FieldData, Space, V, N, Mesh, R}, allselectargs::NamedTuple, +) where {FieldData, Space, V, N, Mesh, R} # select records to use and create PB.NamedDimension ridx = 1:length(fr) selectargs_region = NamedTuple() selectargs_records = NamedTuple() - for (k, v) in zip(keys(allselectargs), allselectargs) + for (k, v) in zip(keys(allselectargs), values(allselectargs)) if k==:records if v isa Integer ridx = [v] @@ -227,19 +280,21 @@ function _get_array( ridx = v end selectargs_records = (records=v,) - elseif String(k) in getfield.(fr.coords_record, :name) + elseif String(k) in fr.dataset.record_dim_coordinates # find ridx corresponding to a coordinate - for cr in fr.coords_record - if String(k) == cr.name - ridx, cvalue = PB.find_indices(cr.values, v) - selectargs_records=NamedTuple((k=>cvalue,)) - end - end + rfr = PB.get_field(fr.dataset, String(k)) + ridx, cvalue = find_indices(rfr.records, v) + selectargs_records = NamedTuple((k=>cvalue,)) else selectargs_region = merge(selectargs_region, NamedTuple((k=>v,))) end end - records_dim = PB.NamedDimension("records", length(ridx), PB.get_region(fr.coords_record, ridx)) + records_coords = FixedCoord[] + for rcn in fr.dataset.record_dim_coordinates + rfr = PB.get_field(fr.dataset, rcn) + push!(records_coords, FixedCoord(rcn, rfr.records[ridx], rfr.attributes)) + end + records_dim_coords = PB.NamedDimension(fr.dataset.record_dim.name, length(ridx)) => records_coords # add attributes for selection used attributes = copy(fr.attributes) @@ -252,13 +307,15 @@ function _get_array( # Select selectargs_region if field_single_element(fr) # create FieldArray directly from FieldRecord + data_dims_coords = get_data_dims_coords((fr.dataset, ridx), fr.data_dims) + isempty(selectargs_region) || throw(ArgumentError("invalid index $selectargs_region")) if length(ridx) > 1 return FieldArray( name, fr.records[ridx], - (fr.data_dims..., records_dim), + vcat(data_dims_coords, records_dim_coords), attributes ) else @@ -266,7 +323,7 @@ function _get_array( return FieldArray( name, fr.records[first(ridx)], - fr.data_dims, + data_dims_coords, attributes ) end @@ -274,31 +331,31 @@ function _get_array( # pass through to Field # get FieldArray from first Field record and use this to figure out array shapes etc - far = get_array(fr[first(ridx)], selectargs_region) + far = get_array(fr[first(ridx)], selectargs_region; coord_source=(fr.dataset, first(ridx))) # TODO - Julia bug ? length(far.dims) returning wrong value, apparently triggered by this line # attributes = isnothing(attributes) ? Dict{Symbol, Any}() : copy(attributes) # in get_array if length(ridx) > 1 # add additional (last) dimension for records - favalues = Array{eltype(far.values), length(far.dims)+1}(undef, size(far.values)..., length(ridx)) + favalues = Array{eltype(far.values), length(far.dims_coords)+1}(undef, size(far.values)..., length(ridx)) fa = FieldArray( name, favalues, - (far.dims..., records_dim), + vcat(far.dims_coords, records_dim_coords), attributes, ) # copy values for first record - if isempty(far.dims) + if isempty(far.dims_coords) fa.values[1] = far.values else - fa.values[fill(Colon(), length(far.dims))..., 1] .= far.values + fa.values[fill(Colon(), length(far.dims_coords))..., 1] .= far.values end else # squeeze out record dimension so this is just fa with attributes added fa = FieldArray( name, far.values, - far.dims, + far.dims_coords, attributes ) end @@ -306,11 +363,11 @@ function _get_array( # fill with values from FieldArrays for Fields for remaining records if length(ridx) > 1 for (i, r) in enumerate(ridx[2:end]) - far = get_array(fr[r], selectargs_region) - if isempty(far.dims) + far = get_array(fr[r], selectargs_region; coord_source=(fr.dataset, r)) + if isempty(far.dims_coords) fa.values[i+1] = far.values else - fa.values[fill(Colon(), length(far.dims))..., i+1] .= far.values + fa.values[fill(Colon(), length(far.dims_coords))..., i+1] .= far.values end end end @@ -320,3 +377,282 @@ function _get_array( end + +function get_array( + fr::FieldRecord, allselectargs::NamedTuple; # allselectargs::NamedTuple=NamedTuple() creates a method ambiguity with deprecated form above + coords=nothing, + expand_cartesian=false, # can be overridden in allselectargs + squeeze_all_single_dims=true, # can be overridden in allselectargs + verbose=true, +) + frname = default_fieldarray_name(fr.attributes) + + @info "get_array (begin): $frname, allselectargs $allselectargs" + + if !isnothing(coords) + check_coords_argument(coords) || + error("argument coords should be a Vector of Pairs of \"dim_name\"=>(\"var_name1\", \"var_name2\", ...), eg: [\"z\"=>(\"atm.zmid\", \"atm.zlower\", \"atm.zupper\"), ...]") + end + + ########################################################################## + # preprocess allselectargs to strip non-selection arguments and fix quirks + ######################################################################## + allselectargs_sort = [String(k) => v for (k, v) in pairs(allselectargs)] + + # override expand_cartesian + idx_expand_cartesian = findfirst(x-> x[1] == "expand_cartesian", allselectargs_sort) + if !isnothing(idx_expand_cartesian) + _, expand_cartesian = popat!(allselectargs_sort, idx_expand_cartesian) + end + # override squeeze_all_single_dims + idx_squeeze_all_single_dims = findfirst(x-> x[1] == "squeeze_all_single_dims", allselectargs_sort) + if !isnothing(idx_squeeze_all_single_dims) + _, squeeze_all_single_dims = popat!(allselectargs_sort, idx_squeeze_all_single_dims) + end + + # quirk: column must precede cell selection, so move to front if present + idx_column = findfirst(x-> x[1] == "column", allselectargs_sort) + if !isnothing(idx_column) + column_select = popat!(allselectargs_sort, idx_column) + pushfirst!(allselectargs_sort, column_select) + end + + selectargs_used = fill(false, length(allselectargs_sort)) + + ################################################################ + # read dimensions + # (TODO reproduce code from get_dimensions so we can also get dims_spatial) + ########################################################## + # order is spatial (from grid), data_dims, then record dimension + dims_spatial = PB.get_dimensions(fr.mesh, space(fr); expand_cartesian) + dims = [dims_spatial..., fr.data_dims..., PB.NamedDimension(fr.dataset.record_dim.name, length(fr))] + recorddimidx = length(dims) + + ################################################################################## + # Read coordinates and apply selection: record dimension first, followed by non-record dimensions + # so we can handle the case where a single record is selected and coordinates are not constant. + # TODO this doesn't handle cases where multiple records are used with non-constant coordinates + ########################################################################################## + + # vector of filtered dim => coords corresponding to select_indices, nothing if dimension squeezed out + dims_coords = Vector{Any}(undef, length(dims)) + # indices in full array to use. Start with everything, then apply selections from 'allselectargs' + select_indices = Any[1:(nd.size) for nd in dims] + + # Record dimension + + # read record coordinates + dims_coords[recorddimidx] = dims[recorddimidx] => _read_coordinates( + fr, dims[recorddimidx], nothing, expand_cartesian; substitute_coords=coords + ) + + # keep track of selection used, to provide as attributes in FieldArray + selectargs_records = OrderedCollections.OrderedDict() + + _filter_dims_coords( + select_indices, dims_coords, recorddimidx, + allselectargs_sort, selectargs_used, selectargs_records, + fr, + ) + + # get record indices to use + ridx_to_use = select_indices[recorddimidx] + have_recorddim = !isnothing(dims_coords[recorddimidx]) + + # Non-record dimensions + + # read non-record coordinates, from first record selected + for i in 1:(length(dims)-1) + dims_coords[i] = dims[i] => _read_coordinates(fr, dims[i], first(ridx_to_use), expand_cartesian; substitute_coords=coords) + end + + selectargs_region = OrderedCollections.OrderedDict() + + _filter_dims_coords( + select_indices, dims_coords, 1:length(dims)-1, + allselectargs_sort, selectargs_used, selectargs_region, + fr, + ) + + unused_selectargs = [a for (a, u) in zip(allselectargs_sort, selectargs_used) if !u] + isempty(unused_selectargs) || + error("allselectargs contains select filter(s) $unused_selectargs that do not match any dimensions or coordinates !") + + ############################################# + # squeeze out dimensions and coordinates + ############################################# + + # selections that produce a dimension with a single index are already squeezed out, + # but not dimensions that started with a single index and haven't had a selection applied + if squeeze_all_single_dims + for i in eachindex(dims_coords, select_indices) + if !isnothing(dims_coords[i]) && dims_coords[i][1].size == 1 + @info "get_array: $frname squeezing out dimension $(dims_coords[i][1].name)" + @assert !isa(select_indices[i], Number) + @assert length(select_indices[i]) == 1 + dims_coords[i] = nothing + select_indices[i] = first(select_indices[i]) + end + end + end + + dims_coords_sq = Pair{PB.NamedDimension, Vector{PALEOmodel.FixedCoord}}[dc for dc in dims_coords if !isnothing(dc)] + dims_sq = [d for (d, c) in dims_coords_sq] + + # get non-record dimensions indices to use + nonrecordindicies = select_indices[1:end-1] + # squeeze out single dimensions by filtering out scalars in nonrecordindicies + # NB: lhs (assignment to output array) is contiguous ! + nonrecordindicies_sq = [1:length(nri) for nri in nonrecordindicies if nri isa AbstractVector] + # consistency check between dimensions and indices of output array + @assert length(dims_sq) == length(nonrecordindicies_sq) + have_recorddim + for (d_sq, nri_sq) in zip(dims_sq, nonrecordindicies_sq) # will stop after shortest iter and so skip recorddim if present + @assert d_sq.size == length(nri_sq) + end + + ############################################### + # create output FieldArray + ############################################### + + # create values array + if field_single_element(fr) + if have_recorddim + avalues = fr.records[ridx_to_use] + else + # represent a scalar as a 0D Array + avalues = Array{eltype(fr.records), 0}(undef) + avalues[] = fr.records[ridx_to_use] + end + else + if expand_cartesian && !isempty(dims_spatial) + expand_fn = x -> PB.Grids.internal_to_cartesian(fr.mesh, x) + aeltype = Union{Missing, eltype(first(fr.records))} + else + expand_fn = identity + aeltype = eltype(first(fr.records)) + end + avalues = Array{aeltype, length(dims_sq)}(undef, [nd.size for nd in dims_sq]...) + if have_recorddim + for (riselect, ri) in enumerate(ridx_to_use) + if isempty(nonrecordindicies_sq) + avalues[riselect] = expand_fn(fr.records[ri])[nonrecordindicies...] + else + avalues[nonrecordindicies_sq..., riselect] .= expand_fn(fr.records[ri])[nonrecordindicies...] + end + end + else + if isempty(nonrecordindicies_sq) + avalues[] = expand_fn(fr.records[ridx_to_use])[nonrecordindicies...] + else + avalues[nonrecordindicies_sq...] .= expand_fn(fr.records[ridx_to_use])[nonrecordindicies...] + end + end + end + + # add attributes for selection used + attributes = copy(fr.attributes) + if !isempty(selectargs_records) + attributes[:filter_records] = NamedTuple(Symbol(k)=>v for (k, v) in selectargs_records) + end + if !isempty(selectargs_region) + attributes[:filter_region] = NamedTuple(Symbol(k)=>v for (k, v) in selectargs_region) + end + + # Generate name from attributes, including selection suffixes + name = default_fieldarray_name(attributes) + + @info "get_array (end): $frname -> $name, allselectargs $allselectargs" + + return FieldArray( + name, + avalues, + dims_coords_sq, + attributes, + ) +end + +# Read coordinates attached to dimension 'dim' +# For record coordinate, ridx_to_use = nothing +# For other coordinates, ridx_to_use = record index to use for coordinate (ie limitation only one record in use, or coordinate is constant) +function _read_coordinates( + fr::FieldRecord, dim::PB.NamedDimension, ridx_to_use::Union{Int, Nothing}, expand_cartesian::Bool; + substitute_coords=nothing, +) + coord_names = nothing + if !isnothing(substitute_coords) + for (sdim_name, scoord_names) in substitute_coords + if sdim_name == dim.name + coord_names = scoord_names + break + end + end + end + + if isnothing(coord_names) + coord_names = PB.get_coordinates(fr, dim.name; expand_cartesian) + end + + coords = FixedCoord[] + for cn in coord_names + cfr = PB.get_field(fr.dataset, cn) + coord_values = isnothing(ridx_to_use) ? cfr.records : cfr.records[ridx_to_use] + push!(coords, FixedCoord(cn, coord_values, cfr.attributes)) + end + return coords +end + +# Apply filters in allselectargs to dimension indices dim_indices_to_filter, +# returning indices to use in select_indices and filtered dimension and coords in dims_coords +function _filter_dims_coords( + select_indices::Vector, dims_coords::Vector, dims_indices_to_filter, + allselectargs_sort::Vector, selectargs_used::Vector, selectargs_applied::AbstractDict, + fr::FieldRecord, +) + + for k in eachindex(allselectargs_sort, selectargs_used) + select_dimcoordname_orig, select_filter_orig = allselectargs_sort[k] + select_dimcoordname, select_filter = select_dimcoordname_orig, select_filter_orig + + # name substitutions + if select_dimcoordname == "records" + select_dimcoordname = fr.dataset.record_dim.name*"_isel" + end + if select_dimcoordname in ("cell", "cells") + select_dimcoordname = "cells_isel" + end + if select_dimcoordname == "cells_isel" + select_filter = PB.Grids.substitute_cell_names(fr.mesh, select_filter) + end + if select_dimcoordname == "column" + select_filter = PB.Grids.column_indices(fr.mesh, select_filter) + select_dimcoordname = "cells_isel" + end + + # try each dimension in turn to see if select_dimcoordname applies to either the dimension or attached coordinates + for dimidx in dims_indices_to_filter + if !isnothing(dims_coords[dimidx]) # dimension may have been squeezed out + dim, coords = dims_coords[dimidx] + if select_dimcoordname == dim.name*"_isel" || !isnothing(findfirst(c->c.name==select_dimcoordname, coords)) + !selectargs_used[k] || + error("select $select_dimcoordname_orig => $select_filter_orig matches multiple dimensions or coordinates !") + sidx, dim_subset, coords_subset, coords_filter_used = dimscoord_subset(dim, coords, select_dimcoordname, select_filter) + select_indices[dimidx] = select_indices[dimidx][sidx] # sidx is relative to current select_indices[dimidx] + if isnothing(dim_subset) + # squeeze out this dimension + dims_coords[dimidx] = nothing + else + dims_coords[dimidx] = dim_subset => coords_subset + end + + # keep track of selection used, to provide as attributes in FieldArray + select_filter_used = isnothing(coords_filter_used) ? select_filter_orig : coords_filter_used + selectargs_applied[select_dimcoordname_orig] = select_filter_used + + selectargs_used[k] = true + end + end + end + end + + return nothing +end \ No newline at end of file diff --git a/src/OutputWriters.jl b/src/OutputWriters.jl index 1b7bc6d..089118b 100644 --- a/src/OutputWriters.jl +++ b/src/OutputWriters.jl @@ -9,9 +9,9 @@ import PALEOboxes as PB import PALEOmodel +import OrderedCollections import DataFrames import FileIO -import JLD2 import NCDatasets import Infiltrator # Julia debugger @@ -52,12 +52,12 @@ PALEOmodel.AbstractOutputWriter """ initialize!( output::PALEOmodel.AbstractOutputWriter, model, modeldata, nrecords - [;coords_record=:tmodel] [coords_record_units="year"] + [;record_dim_name=:tmodel] [record_coord_units="yr"] ) Initialize from a PALEOboxes::Model, reserving memory for an assumed output dataset of `nrecords`. -The default for `coords_record` is `:tmodel`, for a sequence of records following the time evolution +The default for `record_dim_name` is `:tmodel`, for a sequence of records following the time evolution of the model. """ function initialize!( @@ -122,14 +122,11 @@ Return a [`PALEOmodel.FieldArray`](@ref) containing data values and any attached If `coords` is not supplied, equivalent to `PALEOmodel.get_array(PB.get_field(output, varname), allselectargs)`. Optional argument `coords` can be used to supply plot coordinates from Variable in output, to replace any default coordinates. -Format is a Vector of Pairs of "coord_name"=>("var_name1", "var_name2", ...) +Format is a Vector of Pairs of "dim_name"=>("var_name1", "var_name2", ...) Example: to replace a 1D column default pressure coordinate with a z coordinate: - coords=["z"=>("atm.zmid", "atm.zlower", "atm.zupper")] - -NB: the coordinates will be generated by applying `selectargs`, -so the supplied coordinate Variables must have the same dimensionality as `vars`. + coords=["cells"=>("atm.zmid", "atm.zlower", "atm.zupper")] """ function PALEOmodel.get_array( output::PALEOmodel.AbstractOutputWriter, varname::AbstractString; @@ -150,19 +147,21 @@ function PALEOmodel.get_array( ) fr = PB.get_field(output, varname) - if isnothing(coords) - coords_records=nothing - else - PALEOmodel.check_coords_argument(coords) || - error("argument coords should be a Vector of Pairs of \"coord_name\"=>(\"var_name1\", \"var_name2\", ...), eg: [\"z\"=>(\"atm.zmid\", \"atm.zlower\", \"atm.zupper\"), ...]") - - coords_records = [ - coord_name => Tuple(PB.get_field(output, cvn) for cvn in coord_varnames) - for (coord_name, coord_varnames) in coords - ] - end - - return PALEOmodel.get_array(fr, allselectargs; coords=coords_records) + # if isnothing(coords) + # coords_records=nothing + # else + # PALEOmodel.check_coords_argument(coords) || + # error("argument coords should be a Vector of Pairs of \"coord_name\"=>(\"var_name1\", \"var_name2\", ...), eg: [\"z\"=>(\"atm.zmid\", \"atm.zlower\", \"atm.zupper\"), ...]") + + # coords_records = [ + # coord_name => Tuple(PB.get_field(output, cvn) for cvn in coord_varnames) + # for (coord_name, coord_varnames) in coords + # ] + # end + # + # return PALEOmodel.get_array(fr, allselectargs; coords=coords_records) + + return PALEOmodel.get_array(fr, allselectargs; coords) end """ @@ -196,52 +195,123 @@ function PB.get_mesh(output::PALEOmodel.AbstractOutputWriter, domainname::Abstra In-memory model output, for one model Domain. -Includes an additional `coords_record` (usually `:tmodel`, when storing output vs time). - -# Implementation -`data::DataFrame` contains columns of same type as `FieldRecord.records` for each Variable. +Includes an additional record coordinate variable not present in Domain (usually `:tmodel`, when storing output vs time). """ mutable struct OutputMemoryDomain + parentdataset::Union{PALEOmodel.AbstractOutputWriter, Nothing} "Domain name" name::String "Model output for this Domain" - data::DataFrames.DataFrame - "record coordinate" - coords_record::Symbol + data::OrderedCollections.OrderedDict{Symbol, PALEOmodel.FieldRecord} + "record dimension" + record_dim::PB.NamedDimension + "record coordinates" + record_dim_coordinates::Vector{String} "Domain data_dims" data_dims::Vector{PB.NamedDimension} - "Variable metadata (attributes) (metadata[varname] -> attributes::Dict{Symbol, Any})" - metadata::Dict{String, Dict{Symbol, Any}} + "data_dims coordinates" + data_dims_coordinates::Dict{String, Vector{String}} "Domain Grid (if any)" grid::Union{PB.AbstractMesh, Nothing} # internal use only: all Variables in sorted order - _all_vars::Vector{PB.VariableDomain} - # current last record in preallocated data::DataFrame (may be less than length(data)) - _nrecs + _all_vars::Vector{Union{Nothing, PB.VariableDomain}} +end + +""" + OutputMemoryDomain( + name::AbstractString; + [record_dim_name::Symbol=:tmodel] + [grid=nothing] + [data_dims::Vector{PB.NamedDimension} = Vector{PB.NamedDimension}()] + ) + +Create empty OutputMemoryDomain. Add additional Fields with +`add_field!`. +""" +function OutputMemoryDomain( + name::AbstractString; + parentdataset=nothing, + data_dims::Vector{PB.NamedDimension} = Vector{PB.NamedDimension}(), + data_dims_coordinates::Dict{String, Vector{String}} = Dict{String, Vector{String}}(), + grid = nothing, + record_dim_name::Symbol=:tmodel, + record_coord_name::Symbol=record_dim_name, + coords_record::Union{Symbol, Nothing}=nothing, # deprecated +) + if !isnothing(coords_record) + @warn "coords_record argument is deprecated, use record_dim_name" + record_dim_name = coords_record + record_coord_name = coords_record + end + + return OutputMemoryDomain( + parentdataset, + name, + OrderedCollections.OrderedDict{Symbol, PALEOmodel.FieldRecord}(), + PB.NamedDimension(string(record_dim_name), 0), + [string(record_coord_name)], + deepcopy(data_dims), + deepcopy(data_dims_coordinates), + deepcopy(grid), + PB.VariableDomain[], + ) + end -Base.length(output::OutputMemoryDomain) = output._nrecs +Base.length(output::OutputMemoryDomain) = output.record_dim.size "create from a PALEOboxes::Domain" function OutputMemoryDomain( - dom::PB.Domain, modeldata::PB.ModelData, nrecords::Integer; - coords_record::Symbol=:tmodel, coords_record_units::AbstractString="year" + dom::PB.Domain, modeldata::PB.ModelData; + parentdataset=nothing, + record_dim_name::Symbol=:tmodel, + record_coord_name::Symbol=record_dim_name, + record_coord_units::AbstractString="yr", + coords_record::Union{Symbol, Nothing}=nothing, # deprecated + coords_record_units::Union{AbstractString, Nothing}=nothing, # deprecated ) - + if !isnothing(coords_record) + @warn "coords_record is deprecated, use record_dim_name" + record_dim_name = coords_record + record_coord_name = coords_record + end + if !isnothing(coords_record_units) + @warn "coords_record_units is deprecated, use record_coord_units" + record_coord_units = coords_record_units + end + odom = OutputMemoryDomain( - dom.name, - DataFrames.DataFrame(), - coords_record, - deepcopy(dom.data_dims), - Dict{String, Dict{Symbol, Any}}(), - deepcopy(dom.grid), - PB.VariableDomain[], - 0, + dom.name; + parentdataset, + record_dim_name, + record_coord_name, + data_dims = dom.data_dims, + data_dims_coordinates = dom.data_dims_coordinates, + grid = dom.grid, ) + # add record coodinate variable + odom.data[record_coord_name] = PALEOmodel.FieldRecord( + odom, + Float64[], + PB.ScalarData, + (), + PB.ScalarSpace, + nothing, + Dict{Symbol, Any}( + :var_name => string(record_coord_name), + :domain_name => dom.name, + :field_data => PB.ScalarData, + :space => PB.ScalarSpace, + :data_dims => (), + :units => record_coord_units, + ), + ) + # create list of variables sorted by host dependent type, then by name odom._all_vars = vcat( + nothing, # coords_record has no actual variable sort( PB.get_variables( dom, v->PB.get_attribute(v, :vfunction, PB.VF_Undefined) in (PB.VF_StateExplicit, PB.VF_Total, PB.VF_Constraint) @@ -262,43 +332,18 @@ function OutputMemoryDomain( by=var->var.name ), ) - - # add empty (ie undefined) columns to dataframe - - # add record coordinate column - DataFrames.insertcols!( - odom.data, - DataFrames.ncol(odom.data)+1, - coords_record => Vector{Float64}(undef, nrecords) - ) - odom.metadata[String(coords_record)] = Dict( - :var_name=>String(coords_record), :domain_name=>dom.name, - :vfunction=>PB.VF_Undefined, :description=>"output record coordinate", - :field_data=>PB.ScalarData, :space=>PB.ScalarSpace, :data_dims=>(), :units=>coords_record_units, - ) # add variables for var in odom._all_vars - # records storage type is that of FieldRecord.records - field = PB.get_field(var, modeldata) - if PALEOmodel.field_single_element(field) - # if Field contains single elements, store as a Vector of elements - records = Vector{eltype(field.values)}(undef, nrecords) - else - # if Field contains something else, store as a Vector of those things - records = Vector{typeof(field.values)}(undef, nrecords) + if !isnothing(var) + # records storage type is that of FieldRecord.records + field = PB.get_field(var, modeldata) + attrbs = deepcopy(var.attributes) + attrbs[:var_name] = var.name + attrbs[:domain_name] = dom.name + + odom.data[Symbol(var.name)] = PALEOmodel.FieldRecord(odom, field, attrbs) end - - DataFrames.insertcols!( - odom.data, - DataFrames.ncol(odom.data)+1, - Symbol(var.name) => records - ) - - attrbs = deepcopy(var.attributes) - attrbs[:var_name] = var.name - attrbs[:domain_name] = dom.name - odom.metadata[var.name] = attrbs end return odom @@ -307,9 +352,34 @@ end "create from a DataFrames DataFrame containing scalar data" function OutputMemoryDomain( name::AbstractString, data::DataFrames.DataFrame; - coords_record::Symbol=:tmodel, coords_record_units::AbstractString="year", - metadata::Dict{String, Dict{Symbol, Any}}=Dict(String(coords_record)=>Dict{Symbol, Any}(:units=>coords_record_units)), + record_dim_name::Symbol=:tmodel, + record_coord_name::Symbol=record_dim_name, + record_coord_units::AbstractString="yr", + metadata::Union{Dict{String, Dict{Symbol, Any}}, Nothing}=nothing, + coords_record::Union{Symbol, Nothing}=nothing, # deprecated + coords_record_units::Union{AbstractString, Nothing}=nothing, # deprecated ) + + if !isnothing(coords_record) + @warn "coords_record is deprecated, use record_dim_name" + record_dim_name = coords_record + record_coord_name = coords_record + end + if !isnothing(coords_record_units) + @warn "coords_record_units is deprecated, use record_coord_units" + record_coord_units = coords_record_units + end + + String(record_coord_name) in DataFrames.names(data) || + @warn "record_coord_name $record_coord_name is not present in supplied data DataFrame" + + if isnothing(metadata) + metadata = Dict(String(record_coord_name)=>Dict{Symbol, Any}(:units=>record_coord_units)) + end + + odom = OutputMemoryDomain(name; record_dim_name, record_coord_name) + odom.record_dim = PB.NamedDimension(string(record_dim_name), DataFrames.nrow(data)) + # create minimal metadata for scalar Variables for vname in DataFrames.names(data) vmeta = get!(metadata, vname, Dict{Symbol, Any}()) @@ -318,79 +388,44 @@ function OutputMemoryDomain( vmeta[:field_data] = PB.ScalarData vmeta[:space] = PB.ScalarSpace vmeta[:data_dims] = () - end - - return OutputMemoryDomain( - name, - data, - coords_record, - [], - metadata, - nothing, - [], - DataFrames.nrow(data) - ) - -end - -""" - OutputMemoryDomain(name::AbstractString, coords_record::PALEOmodel.FieldRecord) - -Create from a `coords_record` (eg defining `tmodel`). Add additional Fields with -`add_field!`. -""" -function OutputMemoryDomain( - name::AbstractString, coords_record::PALEOmodel.FieldRecord; - data_dims::Vector{PB.NamedDimension} = Vector{PB.NamedDimension}(), - grid = nothing, -) - data = DataFrames.DataFrame() - - haskey(coords_record.attributes, :var_name) || - throw(ArgumentError("FieldRecord has no :var_name attribute")) - varname = coords_record.attributes[:var_name] - - DataFrames.insertcols!(data, Symbol(varname)=>coords_record.records) - metadata = Dict(varname=>deepcopy(coords_record.attributes)) - # update domain name - metadata[varname][:domain_name] = name - return OutputMemoryDomain( - name, - data, - Symbol(varname), - data_dims, - metadata, - grid, - [], - DataFrames.nrow(data) - ) + odom.data[Symbol(vname)] = PALEOmodel.FieldRecord( + odom, + data[!, Symbol(vname)], + PB.ScalarData, + (), + PB.ScalarSpace, + nothing, + vmeta; + # coords_record + ) + end + return odom end function add_record!(odom::OutputMemoryDomain, modeldata, rec_coord) - odom._nrecs +=1 - df = odom.data - - df[!, odom.coords_record][odom._nrecs] = rec_coord - - for var in odom._all_vars - field = PB.get_field(var, modeldata) - values = PB.get_values_output(field) - - if PALEOmodel.field_single_element(field) - df[!, Symbol(var.name)][odom._nrecs] = values[] - else - # copy array(s) data - df[!, Symbol(var.name)][odom._nrecs] = copy(values) - end + odom.record_dim = PB.NamedDimension(odom.record_dim.name, odom.record_dim.size + 1) + + rec_dim_fr = odom.data[Symbol(odom.record_dim.name)] + push!(rec_dim_fr, rec_coord) + + for (fr, var) in PB.IteratorUtils.zipstrict(values(odom.data), odom._all_vars) + if !isnothing(var) + field = PB.get_field(var, modeldata) + push!(fr, field) + end end return nothing end +function Base.append!(output1::OutputMemoryDomain, output2::OutputMemoryDomain) + error("Not implemented") +end + function PB.add_field!(odom::OutputMemoryDomain, fr::PALEOmodel.FieldRecord) length(fr) == length(odom) || @@ -399,60 +434,49 @@ function PB.add_field!(odom::OutputMemoryDomain, fr::PALEOmodel.FieldRecord) haskey(fr.attributes, :var_name) || throw(ArgumentError("FieldRecord has no :var_name attribute")) varname = fr.attributes[:var_name] - !(Symbol(varname) in names(odom.data)) || + !(Symbol(varname) in keys(odom.data)) || throw(ArgumentError("Variable $varname already exists")) - DataFrames.insertcols!(odom.data, Symbol(varname)=>fr.records) - odom.metadata[varname] = deepcopy(fr.attributes) - # update domain name - odom.metadata[varname][:domain_name] = odom.name + odom.data[Symbol(varname)] = fr return nothing end -function PB.get_field(odom::OutputMemoryDomain, varname) - - df = odom.data - varname in DataFrames.names(df) || - error("Variable $varname not found in output (no column '$varname' in Dataframe output.domains[\"$(odom.name)\"].data)") - - use_all_records = DataFrames.nrow(df) == odom._nrecs - # vdata = df[!, Symbol(varname)] - vdata = use_all_records ? df[!, Symbol(varname)] : df[1:odom._nrecs, Symbol(varname)] - - attributes = get(odom.metadata, varname, nothing) +function PB.has_variable(odom::OutputMemoryDomain, varname::AbstractString) + return haskey(odom.data, Symbol(varname)) +end - !isnothing(attributes) || - @error "$(odom.name).$varname has no attributes" +function PB.get_field(odom::OutputMemoryDomain, varname_or_varnamefull::AbstractString) - grid = odom.grid + is_varnamefull = contains(varname_or_varnamefull, ".") - data_dims = [] - for dimname in attributes[:data_dims] - idx = findfirst(d -> d.name==dimname, odom.data_dims) - !isnothing(idx) || - error("Domain $(odom.name) has no dimension='$dimname' (available dimensions: $(odom.data_dims)") - push!(data_dims, odom.data_dims[idx]) + if is_varnamefull + fr = PB.get_field(odom.parentdataset, varname_or_varnamefull) + else + fr = get(odom.data, Symbol(varname_or_varnamefull), nothing) + !isnothing(fr) || + error("Variable $varname_or_varnamefull not found in output (no key '$varname_or_varnamefull' in OutputMemoryDomain output.domains[\"$(odom.name)\"].data)") end - field_data = attributes[:field_data] - space = attributes[:space] - - fr = PALEOmodel.wrap_fieldrecord( - vdata, field_data, Tuple(data_dims), missing, space, grid, attributes, - coords_record=[ - PB.FixedCoord( - String(odom.coords_record), - # df[!, odom.coords_record], - use_all_records ? df[!, odom.coords_record] : df[1:odom._nrecs, odom.coords_record], - odom.metadata[String(odom.coords_record)] - ), - ] - ) - return fr end +function PB.get_data(output::OutputMemoryDomain, varname::AbstractString; records=nothing) + + fr = PB.get_field(output, varname) + + if isnothing(records) + data_output = fr.records + else + data_output = fr.records[records] + # bodge - fix scalar data + if isa(records, Integer) && !isa(data_output, AbstractVector) + data_output =[data_output] + end + end + + return data_output +end function PB.show_variables( odom::OutputMemoryDomain; @@ -460,9 +484,9 @@ function PB.show_variables( filter = attrb->true, ) shownames = [] - for vn in names(odom.data) - if filter(odom.metadata[vn]) - push!(shownames, vn) + for (vnamesym, vfr) in odom.data + if filter(vfr.attributes) + push!(shownames, string(vnamesym)) end end sort!(shownames) @@ -470,12 +494,67 @@ function PB.show_variables( df = DataFrames.DataFrame() df.name = shownames for att in attributes - DataFrames.insertcols!(df, att=>[get(odom.metadata[vn], att, missing) for vn in shownames]) + DataFrames.insertcols!(df, att=>[get(odom.data[Symbol(vn)].attributes, att, missing) for vn in shownames]) end return df end + +function PB.get_table( + odom::OutputMemoryDomain, varnames::Vector{<:AbstractString} = AbstractString[], +) + df = DataFrames.DataFrame( + [k => v.records for (k, v) in odom.data if (isempty(varnames) || string(k) in varnames)] + ) + + return df +end + +function PB.get_dimensions(odom::OutputMemoryDomain) + spatial_dims = isnothing(odom.grid) ? PB.NamedDimension[] : PB.get_dimensions(odom.grid) + return vcat(spatial_dims, odom.data_dims, odom.record_dim) +end + +function PB.get_dimension(odom::OutputMemoryDomain, dimname::AbstractString) + # special case records to always return record_dim + if dimname == "records" + dimname = odom.record_dim.name + end + # + ad = PB.get_dimensions(odom) + idx = findfirst(d -> d.name==dimname, ad) + !isnothing(idx) || + error("OutputMemoryDomain $(odom.name) has no dimension='$dimname' (available dimensions: $ad") + return ad[idx] +end + +function PB.get_coordinates(odom::OutputMemoryDomain, dimname::AbstractString) + coord_names = String[] + if !isnothing(odom.grid) + coord_names = PB.get_coordinates(odom.grid, dimname) + end + if !isnothing(findfirst(dd -> dd.name == dimname, odom.data_dims)) + coord_names = get(odom.data_dims_coordinates, dimname, String[]) + end + if dimname in ("records", odom.record_dim.name) + coord_names = odom.record_dim_coordinates + end + + return coord_names +end + + +# coords = PALEOmodel.FixedCoord[] +# for cn in coord_names +# cfr = PB.get_field(odom, cn) +# @Infiltrator.infiltrate +# push!(coords, PALEOmodel.FixedCoord(cn, cfr.records, cfr.attributes)) +# end + +# return coords +# end + ######################################## # OutputMemory ########################################## @@ -529,34 +608,30 @@ function Base.length(output::OutputMemory) end -function PB.get_table(output::OutputMemory, domainname::AbstractString) +function PB.get_table(output::OutputMemory, domainname::AbstractString, varnames::Vector{<:AbstractString} = AbstractString[]) haskey(output.domains, domainname) || throw(ArgumentError("no Domain $domainname")) - return output.domains[domainname].data + return PB.get_table(output.domains[domainname], varnames) end -function PB.get_table(output::OutputMemory, varnames::Vector{<:AbstractString}) +function PB.get_table(output::OutputMemory, varnamefulls::Vector{<:AbstractString}) df = DataFrames.DataFrame() - for varname in varnames - if contains(varname, ".") - vdom, vname = split(varname, ".") - if haskey(output.domains, vdom) - dfdom = output.domains[vdom].data - if vname in names(dfdom) - vardata = dfdom[:, Symbol(vname)] - df = DataFrames.insertcols!(df, varname=>vardata) - else - @warn "no Variable found for $varname" - end + for varnamefull in varnamefulls + vdom, varname = domain_variable_name(varnamefull) + if haskey(output.domains, vdom) + if PB.has_variable(output.domains[vdom], varname) + vardata = PB.get_data(output.domains[vdom], varname) + df = DataFrames.insertcols!(df, varnamefull=>vardata) else - @warn "no Domain found for $varname" + @warn "no Variable found for $varnamefull" end else - @warn "Variable $varname is not of form ." + @warn "no Domain found for $varnamefull" end end + return df end @@ -599,30 +674,9 @@ Save to `filename` in JLD2 format (NB: filename must either have no extension or """ function save_jld2(output::OutputMemory, filename) - @warn """Files created by save_jld2 will not be readable with future versions of PALEO + @error """save_jld2 has been removed. Please use save_netcdf instead""" - filename = _check_filename_ext(filename, ".jld2") - - # create a temporary copy to omit _all_vars - output_novars = copy(output.domains) - for (k, omd) in output_novars - output_novars[k] = OutputMemoryDomain( - omd.name, - omd.data, - omd.coords_record, - omd.data_dims, - omd.metadata, - omd.grid, - [], # omit _allvars - omd._nrecs, - ) - end - - @info "saving to $filename ..." - FileIO.save(filename, output_novars) - @info "done" - return nothing end @@ -639,18 +693,10 @@ julia> output = PALEOmodel.OutputWriters.load_jld2!(PALEOmodel.OutputWriters.Out """ function load_jld2!(output::OutputMemory, filename) - filename = _check_filename_ext(filename, ".jld2") - - @info "loading from $filename ..." - - jld2data = FileIO.load(filename) # returns Dict - - empty!(output.domains) - for (domainname, odom) in jld2data - output.domains[domainname] = odom - end + @error """load_jld2! has been removed. + Please use save_netcdf, load_netcdf! instead""" - return output + return nothing end @@ -659,8 +705,7 @@ end function Base.append!(output1::OutputMemory, output2::OutputMemory) for domname in keys(output1.domains) o1dom, o2dom = output1.domains[domname], output2.domains[domaname] - append!(o1dom.data, o2dom.data) - o1dom.nrecs += o2dom.nrecs + append!(o1dom, o2dom) end return output1 @@ -669,17 +714,36 @@ end function initialize!( output::OutputMemory, model::PB.Model, modeldata::PB.ModelData, nrecords; - rec_coord::Symbol=:tmodel, # deprecated - coords_record::Symbol=rec_coord, coords_record_units::AbstractString="yr", + record_dim_name::Symbol=:tmodel, + record_coord_name::Symbol=record_dim_name, + record_coord_units::AbstractString="yr", + coords_record::Union{Symbol, Nothing}=nothing, # deprecated + coords_record_units::Union{AbstractString, Nothing}=nothing, # deprecated + rec_coord::Union{Symbol, Nothing}=nothing, # deprecated ) + if !isnothing(coords_record) + @warn "coords_record is deprecated, use record_dim_name" + record_dim_name = coords_record + record_coord_name = coords_record + end + if !isnothing(rec_coord) + @warn "rec_coord is deprecated, use record_dim_name" + record_dim_name = rec_coord + record_coord_name = rec_coord + end + if !isnothing(coords_record_units) + @warn "coords_record_units is deprecated, use record_coord_units" + record_coord_units = coords_record_units + end - # Create Dict of DataFrames with output empty!(output.domains) for dom in model.domains output.domains[dom.name] = OutputMemoryDomain( - dom, modeldata, nrecords, - coords_record=coords_record, - coords_record_units=coords_record_units, + dom, modeldata; # nrecords; + parentdataset=output, + record_dim_name, + record_coord_name, + record_coord_units, ) end @@ -701,7 +765,7 @@ function PB.has_variable(output::OutputMemory, varname::AbstractString) return ( haskey(output.domains, domainname) - && varname in DataFrames.names(output.domains[domainname].data) + && PB.has_variable(output.domains[domainname], varname) ) end @@ -734,21 +798,8 @@ function PB.get_data(output::OutputMemory, varnamefull::AbstractString; records= error("Variable $varnamefull not found in output: domain $(domainname) not present") odom = output.domains[domainname] - df = odom.data - varname in DataFrames.names(df) || - error("Variable $varname not found in output (no column '$varname' in Dataframe output.domains[\"$domainname\"].data)") - - if isnothing(records) - output = df[!, Symbol(varname)] - else - output = df[records, Symbol(varname)] - # bodge - fix scalar data - if isa(records, Integer) && !isa(output, AbstractVector) - output =[output] - end - end - return output + return PB.get_data(odom, varname; records) end @@ -756,20 +807,67 @@ end # Pretty printing ############################ -"compact form" +# compact form function Base.show(io::IO, odom::OutputMemoryDomain) print(io, "OutputMemoryDomain(name=", odom.name, + ", record_dim=", odom.record_dim, ", data_dims=", odom.data_dims, - ", length=", length(odom), ")") end -"compact form" +# multiline form +function Base.show(io::IO, ::MIME"text/plain", odom::OutputMemoryDomain) + println(io, "OutputMemoryDomain") + println(io, " name: $(odom.name)") + println(io, " record_dim: ", odom.record_dim) + if !isempty(odom.record_dim_coordinates) + println(io, " coordinates: ", odom.record_dim_coordinates) + end + println(io, " data_dims:") + for nd in odom.data_dims + println(io, " ", nd) + if haskey(odom.data_dims_coordinates, nd.name) + println(io, " coordinates: ", odom.data_dims_coordinates[nd.name]) + end + end + println(io, " grid:") + if !isnothing(odom.grid) + iogrid = IOBuffer() + show(iogrid, MIME"text/plain"(), odom.grid) + seekstart(iogrid) + for line in eachline(iogrid) + println(io, " ", line) + end + end + # TODO - show variables ? +end + + +# compact form function Base.show(io::IO, output::OutputMemory) print(io, "OutputMemory(domains=", keys(output.domains), ", user_data=", output.user_data, ")") end +# multiline form +function Base.show(io::IO, ::MIME"text/plain", output::OutputMemory) + println(io, "OutputMemory") + println(io, " user_data:") + for (k, v) in output.user_data + println(io, " ", k, " => ", v) + end + println(io, " domains:") + for (name, odom) in output.domains + iodom = IOBuffer() + show(iodom, MIME"text/plain"(), odom) + seekstart(iodom) + for line in eachline(iodom) + println(io, " ", line) + end + end + + return nothing +end ############################ @@ -810,12 +908,10 @@ Save to `filename` in netcdf4 format (NB: filename must either have no extension # Keyword arguments - `check_ext::Bool = true`: check that filename ends in ".nc" -- `add_coordinates::Bool = false`: true to attempt to add CF convention coords to variables (experimental, doesn't look that useful) """ function save_netcdf( output::OutputMemory, filename; check_ext::Bool=true, - add_coordinates::Bool=false, ) if check_ext filename = _check_filename_ext(filename, ".nc") @@ -831,7 +927,7 @@ function save_netcdf( @info "saving to $filename ..." NCDatasets.NCDataset(filename, "c") do nc_dataset - nc_dataset.attrib["PALEO_netcdf_version"] = "0.1.0" + nc_dataset.attrib["PALEO_netcdf_version"] = "0.2.0" nc_dataset.attrib["PALEO_domains"] = join([k for (k, v) in output.domains], " ") for (k, v) in output.user_data @@ -845,9 +941,10 @@ function save_netcdf( for (domname, dom) in output.domains dsg = NCDatasets.defGroup(nc_dataset, dom.name; attrib=[]) - coords_record = String(dom.coords_record) # record dimension (eg tmodel) - dsg.attrib["coords_record"] = coords_record - NCDatasets.defDim(dsg, coords_record, dom._nrecs) + record_dim_name = dom.record_dim.name # record dimension (eg tmodel) + dsg.attrib["record_dim_name"] = record_dim_name + NCDatasets.defDim(dsg, record_dim_name, dom.record_dim.size) + dsg.attrib["record_dim_coordinates"] = dom.record_dim_coordinates spatial_dimnames, spatial_unpackfn = grid_to_netcdf!(dsg, dom.grid) @@ -858,28 +955,17 @@ function save_netcdf( # As a workaround, we generate a unique name from dim_name * coord_name, and add the Variable data_dim_names = String[d.name for d in dom.data_dims] dsg.attrib["data_dims"] = data_dim_names - for data_dim in dom.data_dims - NCDatasets.defDim(dsg, data_dim.name, data_dim.size) - gen_unique_coord_names = String[data_dim.name*"_"*co.name for co in data_dim.coords] - dsg.attrib[data_dim.name*"_coords"] = gen_unique_coord_names - for (co, coname) in zip(data_dim.coords, gen_unique_coord_names) - # add variable - v = NCDatasets.defVar(dsg, coname, co.values, (data_dim.name,)) - attributes_to_netcdf!(v, co.attributes) - end - end + coordnames_to_netcdf(dsg, "data_dims_coords", dom.data_dims_coordinates) - colnames = sort(names(dom.data)) + varnames = sort(String.(keys(dom.data))) added_isotopelinear_dims = false nc_all_vdata = [] - for vname in colnames - haskey(dom.metadata, vname) || error("no metadata for Variable $(dom.name).$vname") - - attributes = dom.metadata[vname] - # see get_field method above - data_dims = attributes[:data_dims] - field_data = attributes[:field_data] - space = attributes[:space] + for vname in varnames + + varfr = PB.get_field(dom, vname) + # see get_field method above + field_data = varfr.attributes[:field_data] + space = varfr.attributes[:space] if field_data == PB.IsotopeLinear && !added_isotopelinear_dims NCDatasets.defDim(dsg, "isotopelinear", 2) @@ -888,22 +974,18 @@ function save_netcdf( added_isotopelinear_dims = true end - vdata = dom.data[1:dom._nrecs, vname] - @debug "writing Variable $(dom.name).$vname dframe eltype $(eltype(vdata)) space $space" + @debug "writing Variable $(dom.name).$vname records eltype $(eltype(varfr.records)) space $space" nc_v, nc_vdata = variable_to_netcdf!( dsg, - coords_record, + record_dim_name, vname, - vdata, + varfr, field_data, spatial_dimnames, spatial_unpackfn, - data_dims, space, - dom.grid, - attributes; - add_coordinates, + dom.grid; define_only=define_all_first, # just define the variable, don't write the data ) @@ -951,7 +1033,8 @@ function load_netcdf!(output::OutputMemory, filename; check_ext=true) paleo_netcdf_version = get(nc_dataset.attrib, "PALEO_netcdf_version", missing) !ismissing(paleo_netcdf_version) || error("not a PALEO netcdf output file ? (key PALEO_netcdf_version not present)") - paleo_netcdf_version == "0.1.0" || error("unsupported PALEO_netcdf_version $paleo_netcdf_version") + paleo_netcdf_version in ("0.1.0", "0.2.0") || + @warn "unsupported PALEO_netcdf_version $paleo_netcdf_version : supported versions 0.1, 0.2" for (k, v) in nc_dataset.attrib if k in ("PALEO_netcdf_version", "PALEO_domains") @@ -967,10 +1050,20 @@ function load_netcdf!(output::OutputMemory, filename; check_ext=true) end for (domainname, dsg) in nc_dataset.group - coords_record = dsg.attrib["coords_record"] - nrecs = dsg.dim[coords_record] - data = DataFrames.DataFrame() - metadata = Dict{String, Dict{Symbol, Any}}() + if haskey(dsg.attrib, "record_dim_name") + record_dim_name = dsg.attrib["record_dim_name"] + else + # PALEO_netcdf_version 0.1.0 backwards compatibility + record_dim_name = dsg.attrib["coords_record"] + end + nrecs = dsg.dim[record_dim_name] + + if haskey(dsg.attrib, "record_dim_coordinates") + record_dim_coordinates = ncattrib_as_vector(dsg, "record_dim_coordinates") + else + # PALEO_netcdf_version 0.1.0 backwards compatibility + record_dim_coordinates = String[coords_record] + end # reading out variables is slow, so do this once dsgvars = Dict(varname=>var for (varname, var) in dsg) @@ -979,45 +1072,37 @@ function load_netcdf!(output::OutputMemory, filename; check_ext=true) data_dim_names = ncattrib_as_vector(dsg, "data_dims") data_dim_sizes = [dsg.dim[ddn] for ddn in data_dim_names] - data_dims = PB.NamedDimension[] - for (ddn, dds) in zip(data_dim_names, data_dim_sizes) - dd_coords = PB.FixedCoord[] - for dd_coord_name in ncattrib_as_vector(dsg, ddn*"_coords") - var = dsgvars[dd_coord_name] - attributes = netcdf_to_attributes(var) - !(coords_record in NCDatasets.dimnames(var)) || error("data_dim coord variable $dd_coord_name is not constant! (has a $coords_record dimension)") - push!(dd_coords, PB.FixedCoord(dd_coord_name, Array(var), attributes)) - end - push!(data_dims, PB.NamedDimension(ddn, dds, dd_coords)) - end + data_dims = [PB.NamedDimension(ddn, dds) for (ddn, dds) in zip(data_dim_names, data_dim_sizes)] + data_dims_coordinates = netcdf_to_coordnames(dsg, "data_dims_coords") + + odom = OutputMemoryDomain( + output, + domainname, + OrderedCollections.OrderedDict{Symbol, PALEOmodel.FieldRecord}(), + PB.NamedDimension(record_dim_name, nrecs), + record_dim_coordinates, + data_dims, + data_dims_coordinates, + grid, + [], # omit _allvars + ) for (vnamenetcdf, var) in dsgvars if haskey(var.attrib, "var_name") # a PALEO variable, not eg a grid variable attributes = netcdf_to_attributes(var) vname = netcdf_to_name(vnamenetcdf, attributes) - metadata[vname] = attributes - vdata = netcdf_to_variable( + vfr = netcdf_to_variable( + odom, var, - coords_record, attributes, - grid, spatial_packfn, - nrecs, ) - data[!, vname] = vdata + + odom.data[Symbol(vname)] = vfr end end - output.domains[domainname] = OutputMemoryDomain( - domainname, - data, - Symbol(coords_record), - data_dims, - metadata, - grid, - [], # omit _allvars - nrecs, - ) + output.domains[domainname] = odom end end @@ -1026,22 +1111,19 @@ function load_netcdf!(output::OutputMemory, filename; check_ext=true) end -# write a variable to netcdf. cf get_field, wrap_fieldrecord +# write a variable to netcdf. cf get_field, FieldRecord function variable_to_netcdf!( ds, - coords_record::AbstractString, + record_dim_name::AbstractString, vname::AbstractString, - vdata::Vector, + vfr::PALEOmodel.FieldRecord, field_data::Type{<:PB.AbstractData}, grid_spatial_dims::NTuple{NS, String}, spatial_unpackfn, - data_dims::NTuple{ND, String}, space::Type{S}, - grid::Union{Nothing, PB.AbstractMesh}, - attributes; - add_coordinates=false, + grid::Union{Nothing, PB.AbstractMesh}; define_only=true, -) where {NS, ND, S <: PB.AbstractSpace} +) where {NS, S <: PB.AbstractSpace} if space === PB.ScalarSpace spatial_dims = () @@ -1050,20 +1132,23 @@ function variable_to_netcdf!( spatial_dims = grid_spatial_dims unpackfn = spatial_unpackfn end - coordinates = [spatial_dims...] - vname = name_to_netcdf(vname, attributes) + vname = name_to_netcdf(vname, vfr.attributes) + + data_dims = vfr.attributes[:data_dims] + ND = length(data_dims) - if variable_is_constant(vname, vdata, attributes) + if variable_is_constant(vname, vfr.records, vfr.attributes) v_records_dim = () - vdata = unpackfn(first(vdata)) + vdata = unpackfn(first(vfr.records)) else - v_records_dim = (coords_record,) + v_records_dim = (record_dim_name,) if !PALEOmodel.field_single_element(field_data, ND, space, typeof(grid)) # concatenate to array - vdata = vectors_to_array(vdata, unpackfn) + vdata = vectors_to_array(vfr.records, unpackfn) + else + vdata = vfr.records end - push!(coordinates, coords_record) end field_data_dims = field_data_netcdf_dimensions(field_data) @@ -1076,27 +1161,29 @@ function variable_to_netcdf!( vdt = define_only ? eltype(vdata) : vdata v = NCDatasets.defVar(ds, vname, vdt, (field_data_dims..., spatial_dims..., data_dims..., v_records_dim...)) - attributes_to_netcdf!(v, attributes) - if add_coordinates - v.attrib["coordinates"] = join(coordinates, " ") # TODO CF convention for coordinates ? - end + attributes_to_netcdf!(v, vfr.attributes) return (v, vdata) end # read a variable from netcdf function netcdf_to_variable( + odom, var, - coords_record::AbstractString, attributes, - grid, spatial_packfn, - nrecs, ) field_data = attributes[:field_data] space = attributes[:space] - data_dims = attributes[:data_dims] + data_dim_names = attributes[:data_dims] + data_dims_nd_vec = PB.NamedDimension[] + odom_dims = PB.get_dimensions(odom) + for dd_name in data_dim_names + dd_idx = findfirst(nd -> nd.name == dd_name, odom_dims) + push!(data_dims_nd_vec, odom_dims[dd_idx]) + end + data_dims = Tuple(data_dims_nd_vec) vdata = Array(var) # convert to Julia Array @@ -1104,16 +1191,53 @@ function netcdf_to_variable( packfn = space == PB.ScalarSpace ? identity : spatial_packfn - if coords_record in NCDatasets.dimnames(var) - if !PALEOmodel.field_single_element(field_data, length(data_dims), space, typeof(grid)) + if odom.record_dim.name in NCDatasets.dimnames(var) + if !PALEOmodel.field_single_element(field_data, length(data_dims), space, typeof(odom.grid)) vdata = array_to_vectors(vdata, packfn) end else # OutputMemory has no concept of a constant variable, so replicate - vdata = fill(packfn(vdata), nrecs) # scalar -> Vector, Array/Vector -> Vector of Arrays/Vectors + vdata = packfn(vdata) + if vdata isa AbstractArray && ndims(vdata) == 0 + # 0D Array -> scalar + vdata = vdata[] + end + vdata = fill(vdata, odom.record_dim.size) # scalar -> Vector, Array/Vector -> Vector of Arrays/Vectors + end + + + vfr = PALEOmodel.FieldRecord( + odom, + vdata, + field_data, + data_dims, + space, + odom.grid, + attributes, + ) + + return vfr +end + +function coordnames_to_netcdf(ds, rootname::AbstractString, coordinates::Dict{String, Vector{String}}) + for (dimname, coordnames) in coordinates + ds.attrib[rootname*"_"*dimname] = coordnames end - return vdata + return nothing +end + +function netcdf_to_coordnames(ds, rootname::AbstractString) + coordinates = Dict{String, Vector{String}}() + + for attname in keys(ds.attrib) + if (length(attname) > length(rootname) + 1) && attname[1:length(rootname)] == rootname + dimname = attname[(length(rootname)+2):end] + coordinates[dimname] = ncattrib_as_vector(ds, attname) + end + end + + return coordinates end # ScalarData no additional dimensions @@ -1187,7 +1311,7 @@ function netcdf_to_field_data(x, field_data::Type{PB.IsotopeLinear}) end # TODO PALEO has no real concept of time-independent variables -# time-independent variables are indicated by setting data_type attribute, +# time-independent variables are indicated by setting datatype attribute, # but this is also used for other purposes # This uses data_type to guess if a variable is constant, and then checks the values function variable_is_constant(vname::AbstractString, vdata::Vector, attributes) @@ -1330,6 +1454,8 @@ function grid_to_netcdf!(ds, grid::PB.Grids.UnstructuredVectorGrid) NCDatasets.defDim(ds, "cells", grid.ncells) + coordnames_to_netcdf(ds, "grid_coords", grid.coordinates) + # named cells cellnames = [String(k) for (k, v) in grid.cellnames] cellnames_indices = [v for (k, v) in grid.cellnames] @@ -1359,10 +1485,7 @@ function grid_to_netcdf!(ds, grid::PB.Grids.UnstructuredColumnGrid) v = NCDatasets.defVar(ds, "Icolumns", Icolumns .- 1, ("cells",)) # NB: zero-based v.attrib["comment"] = "zero-based indices of cells from top to bottom ordered by columns" - # optional z_coords - # we only store name, assuming the coord data will be a variable - z_coord_names = String[zc.name for zc in grid.z_coords] - ds.attrib["PALEO_z_coords"] = z_coord_names + coordnames_to_netcdf(ds, "grid_coords", grid.coordinates) # optional column labels if !isempty(grid.columnnames) @@ -1388,7 +1511,9 @@ function grid_to_netcdf!(ds, grid::PB.Grids.CartesianArrayGrid{N}) where {N} subdomain_to_netcdf!(ds, name, subdom) end - return (Tuple(grid.dimnames), identity) + coordnames_to_netcdf(ds, "grid_coords", grid.coordinates) + + return (Tuple(nd.name for nd in grid.dimensions), identity) end function grid_to_netcdf!(ds, grid::PB.Grids.CartesianLinearGrid{N}) where {N} @@ -1399,8 +1524,9 @@ function grid_to_netcdf!(ds, grid::PB.Grids.CartesianLinearGrid{N}) where {N} _cartesiandimscoords_to_netcdf(ds, grid) - nc_linear_index = NCDatasets.defVar(ds, "linear_index", grid.linear_index .- 1, grid.dimnames) # netcdf zero indexed - nc_linear_index.attrib["coordinates"] = join(grid.dimnames, " ") + dimnames = [nd.name for nd in grid.dimensions] + nc_linear_index = NCDatasets.defVar(ds, "linear_index", grid.linear_index .- 1, dimnames) # netcdf zero indexed + nc_linear_index.attrib["coordinates"] = join(dimnames, " ") # # CF conventions 'lossless compression by gathering' # poorly supported ? (doesn't work with xarray or iris) @@ -1427,9 +1553,11 @@ function grid_to_netcdf!(ds, grid::PB.Grids.CartesianLinearGrid{N}) where {N} subdomain_to_netcdf!(ds, name, subdom) end + coordnames_to_netcdf(ds, "grid_coords", grid.coordinates) + spatial_unpackfn = d -> PB.Grids.internal_to_cartesian(grid, d) # unpack linear representation into cartesian grid - return (Tuple(grid.dimnames), spatial_unpackfn) + return (Tuple(nd.name for nd in grid.dimensions), spatial_unpackfn) end function _cartesiandimscoords_to_netcdf( @@ -1440,46 +1568,22 @@ function _cartesiandimscoords_to_netcdf( # dimensions NCDatasets.defDim(ds, "cells", grid.ncells) - ds.attrib["PALEO_dimnames"] = grid.dimnames - for (dimname, dim) in zip(grid.dimnames, grid.dims) - NCDatasets.defDim(ds, dimname, dim) + ds.attrib["PALEO_dimnames"] = [nd.name for nd in grid.dimensions] + for nd in grid.dimensions + NCDatasets.defDim(ds, nd.name, nd.size) end - # coordinates + ds.attrib["PALEO_dimnames_extra"] = [nd.name for nd in grid.dimensions_extra] + for nd in grid.dimensions_extra + NCDatasets.defDim(ds, nd.name, nd.size) + end + + ds.attrib["PALEO_londim"] = grid.londim + ds.attrib["PALEO_latdim"] = grid.latdim + ds.attrib["PALEO_zdim"] = grid.zdim ds.attrib["PALEO_zidxsurface"] = grid.zidxsurface ds.attrib["PALEO_display_mult"] = grid.display_mult - have_coords = (length(grid.coords) == length(grid.dims)) - have_edges = (length(grid.coords_edges) == length(grid.dims)) - if have_edges - NCDatasets.defDim(ds, "bnds", 2) - end - if have_coords - for (i, dimname) in enumerate(grid.dimnames) - cv = NCDatasets.defVar(ds, dimname, grid.coords[i], (dimname,)) - if i == grid.londim - cv.attrib["axis"] = "X" - cv.attrib["units"] = "degrees_east" - cv.attrib["long_name"] = "longitude" - elseif i == grid.latdim - cv.attrib["axis"] = "Y" - cv.attrib["units"] = "degrees_north" - cv.attrib["long_name"] = "latitude" - elseif i == grid.zdim - cv.attrib["axis"] = "Z" - cv.attrib["units"] = "meters" - cv.attrib["positive"] = grid.display_mult[i] > 0 ? "up" : "down" - end - if have_edges - bndsname = dimname*"_bnds" - cv.attrib["bounds"] = bndsname - coord_edges = grid.coords_edges[i] - bv = NCDatasets.defVar(ds, bndsname, eltype(coord_edges), ("bnds", dimname,)) - bv[1, :] .= coord_edges[1:end-1] - bv[2, :] .= coord_edges[2:end] - end - end - end - + return nothing end @@ -1513,7 +1617,9 @@ function netcdf_to_grid(::Type{PB.Grids.UnstructuredVectorGrid}, ds::NCDatasets. vec_cellnames_indices = ncattrib_as_vector(ds, "PALEO_cellnames_indices") .+ 1 # netcdf is zero offset cellnames = Dict{Symbol, Int}(k=>v for (k, v) in zip(vec_cellnames, vec_cellnames_indices)) - return (PB.Grids.UnstructuredVectorGrid(ncells, cellnames, subdomains), identity) + coordinates = netcdf_to_coordnames(ds, "grid_coords") + + return (PB.Grids.UnstructuredVectorGrid(ncells, cellnames, subdomains, coordinates), identity) end function netcdf_to_grid(::Type{PB.Grids.UnstructuredColumnGrid}, ds::NCDatasets.Dataset, dsvars::Dict) @@ -1531,13 +1637,10 @@ function netcdf_to_grid(::Type{PB.Grids.UnstructuredColumnGrid}, ds::NCDatasets. colstart += cells_this_column end - # optional z_coords - z_coords = PB.FixedCoord[] - for z_coord_name in ncattrib_as_vector(ds, "PALEO_z_coords") - var = dsvars[z_coord_name] - attributes = netcdf_to_attributes(var) - !("records" in NCDatasets.dimnames(var)) || error("z_coord variable $z_coord_name is not constant! (has a records dimension)") - push!(z_coords, PB.FixedCoord(z_coord_name, Array(var), attributes)) + coordinates = netcdf_to_coordnames(ds, "grid_coords") + # backwards compatibility + if haskey(ds.attrib, "PALEO_z_coords") && !haskey(coordinates, "cells") + coordinates["cells"] = ncattrib_as_vector(ds, "PALEO_z_coords") end # optional column labels @@ -1547,23 +1650,25 @@ function netcdf_to_grid(::Type{PB.Grids.UnstructuredColumnGrid}, ds::NCDatasets. columnnames = Symbol[] end - return (PB.Grids.UnstructuredColumnGrid(ncells, Icolumns, z_coords, columnnames, subdomains), identity) + return (PB.Grids.UnstructuredColumnGrid(ncells, Icolumns, columnnames, subdomains, coordinates), identity) end function netcdf_to_grid(::Type{PB.Grids.CartesianArrayGrid}, ds::NCDatasets.Dataset, dsvars::Dict) subdomains = netcdf_to_subdomains(dsvars) - ncells, dimnames, dims, zidxsurface, display_mult, coords, coords_edges, londim, latdim, zdim = + ncells, dimensions, dimensions_extra, zidxsurface, display_mult, londim, latdim, zdim = _netcdf_to_cartesiandimscoords(PB.Grids.CartesianArrayGrid, ds, dsvars) + coordinates = netcdf_to_coordnames(ds, "grid_coords") + grid = PB.Grids.CartesianArrayGrid{length(dims)}( ncells, - dimnames, dims, - coords, coords_edges, + dimensions, dimensions_extra, londim, latdim, zdim, zidxsurface, display_mult, subdomains, + coordinates, ) return (grid, identity) @@ -1573,14 +1678,16 @@ function netcdf_to_grid(::Type{PB.Grids.CartesianLinearGrid}, ds::NCDatasets.Dat subdomains = netcdf_to_subdomains(dsvars) - ncells, dimnames, dims, zidxsurface, display_mult, coords, coords_edges, londim, latdim, zdim = + ncells, dimensions, dimensions_extra, zidxsurface, display_mult, londim, latdim, zdim = _netcdf_to_cartesiandimscoords(PB.Grids.CartesianLinearGrid, ds, dsvars) ncolumns = ds.attrib["PALEO_columns"] + coordinates = netcdf_to_coordnames(ds, "grid_coords") + # convert back to linear vectors linear_index = Array(dsvars["linear_index"]) .+ 1 # netcdf zero based # reconstruct cartesian index - cartesian_index = Vector{CartesianIndex{length(dims)}}() + cartesian_index = Vector{CartesianIndex{length(dimensions)}}() lin_cart_index = Int[] for ci in CartesianIndices(linear_index) i = linear_index[ci] @@ -1593,14 +1700,14 @@ function netcdf_to_grid(::Type{PB.Grids.CartesianLinearGrid}, ds::NCDatasets.Dat l = length(cartesian_index) l == ncells || error("cartesian <-> linear index invalid, length(cartesian_index) $l != ncells $ncells") - grid = PB.Grids.CartesianLinearGrid{length(dims)}( + grid = PB.Grids.CartesianLinearGrid{length(dimensions)}( ncells, ncolumns, - dimnames, dims, - coords, coords_edges, + dimensions, dimensions_extra, londim, latdim, zdim, zidxsurface, display_mult, subdomains, + coordinates, linear_index, cartesian_index, ) @@ -1618,39 +1725,18 @@ function _netcdf_to_cartesiandimscoords( ncells = ds.dim["cells"] - # dimensions and coordinates + # dimensions dimnames = ncattrib_as_vector(ds, "PALEO_dimnames") - dims = Int[] + dimensions = [PB.NamedDimension(dn, ds.dim[dn]) for dn in dimnames] + dimnames_extra = ncattrib_as_vector(ds, "PALEO_dimnames_extra") + dimensions_extra = [PB.NamedDimension(dn, ds.dim[dn]) for dn in dimnames_extra] + londim = ds.attrib["PALEO_londim"] + latdim = ds.attrib["PALEO_latdim"] + zdim = ds.attrib["PALEO_zdim"] zidxsurface = ds.attrib["PALEO_zidxsurface"] display_mult = ncattrib_as_vector(ds, "PALEO_display_mult") - - coords = Vector{Vector{Float64}}() - coords_edges = Vector{Vector{Float64}}() - londim, latdim, zdim = 0, 0, 0 - - for (i, dimname) in enumerate(dimnames) - push!(dims, ds.dim[dimname]) - if haskey(dsvars, dimname) - dimvar = dsvars[dimname] - push!(coords, Array(dimvar)) - axis = get(dimvar.attrib, "axis", nothing) - if axis == "X" - global londim = i - elseif axis == "Y" - global latdim = i - elseif axis == "Z" - global zdim = i - end - bndsname = dimname*"_bnds" - if haskey(dsvars, bndsname) - push!(coords_edges, vcat(ds[bndsname][1, :], ds[bndsname][2, end])) - end - end - end - isempty(coords) || (length(coords) == length(dimnames)) || error("spatial coordinates present but not on all dimensions") - isempty(coords_edges) || (length(coords_edges) == length(dimnames)) || error("spatial coordinates edges present but not on all dimensions") - return ncells, dimnames, dims, zidxsurface, display_mult, coords, coords_edges, londim, latdim, zdim + return ncells, dimensions, dimensions_extra, zidxsurface, display_mult, londim, latdim, zdim end function name_to_netcdf(vname, attributes) @@ -1738,6 +1824,7 @@ function netcdf_to_attributes(v) return attributes end + # NCDatasets will return a scalar even if the attribute was written as a vector with 1 element function ncattrib_as_vector(d, name) x = d.attrib[name] diff --git a/src/PALEOmodel.jl b/src/PALEOmodel.jl index 46ea507..24be4a1 100644 --- a/src/PALEOmodel.jl +++ b/src/PALEOmodel.jl @@ -21,6 +21,7 @@ import PALEOboxes as PB import ForwardDiff import ForwardDiff import SparsityTracing +import OrderedCollections import TimerOutputs: @timeit, @timeit_debug @@ -61,6 +62,10 @@ include("SteadyState.jl") include("SteadyStateKinsol.jl") +include("CoordsDims.jl") # TODO + +include("Regions.jl") # TODO + include("FieldArray.jl") include("FieldRecord.jl") diff --git a/src/PlotRecipes.jl b/src/PlotRecipes.jl index e3cc46c..8cc012e 100644 --- a/src/PlotRecipes.jl +++ b/src/PlotRecipes.jl @@ -166,16 +166,16 @@ RecipesBase.@recipe function f( vars = [vars] end - if isnothing(coords) - coords_records=nothing - else - check_coords_argument(coords) || - error("coords argument should be of form 'coords=[\"z\"=>(\"atm.zmid\", \"atm.zlower\", \"atm.zupper\"), ...]") - coords_records = [ - coord_name => Tuple(PB.get_field(output, cvn) for cvn in coord_varnames) - for (coord_name, coord_varnames) in coords - ] - end + # if isnothing(coords) + # coords_records=nothing + # else + # check_coords_argument(coords) || + # error("coords argument should be of form 'coords=[\"z\"=>(\"atm.zmid\", \"atm.zlower\", \"atm.zupper\"), ...]") + # coords_records = [ + # coord_name => Tuple(PB.get_field(output, cvn) for cvn in coord_varnames) + # for (coord_name, coord_varnames) in coords + # ] + # end delete!(plotattributes, :coords) for var in vars @@ -187,7 +187,7 @@ RecipesBase.@recipe function f( end # pass through to plot recipe for FieldRecord - PB.get_field(output, var), selectargs, coords_records + PB.get_field(output, var), selectargs, coords end end @@ -280,7 +280,7 @@ Example: to replace a 1D column default pressure coordinate with a z coordinate. NB: the coordinates will be generated by applying `selectargs`, so the supplied coordinate FieldRecords must have the same dimensionality as `fr`. """ -RecipesBase.@recipe function f(fr::FieldRecord, selectargs::NamedTuple, coords_records=nothing) +RecipesBase.@recipe function f(fr::FieldRecord, selectargs::NamedTuple, coords=nothing) # broadcast any Vector-valued argument in selectargs bcastargs = broadcast_dict([Dict{Symbol, Any}(pairs(selectargs))]) @@ -288,7 +288,7 @@ RecipesBase.@recipe function f(fr::FieldRecord, selectargs::NamedTuple, coords_r for sa in bcastargs sa_nt = NamedTuple(sa) RecipesBase.@series begin - get_array(fr, sa_nt; coords=coords_records) + get_array(fr, sa_nt; coords) end end end @@ -378,7 +378,7 @@ RecipesBase.@recipe function f( values = map_values.(values) delete!(plotattributes, :map_values) - if length(fa.dims) == 1 + if length(fa.dims_coords) == 1 if !isempty(labellist) label --> popfirst!(labellist) elseif !isnothing(labelattribute) @@ -386,20 +386,20 @@ RecipesBase.@recipe function f( else label --> labelprefix*name end - f_label = PB.append_units(get_attribute(fa, :var_name, ""), fa.attributes) - coords_vec = fa.dims[1].coords + f_label = append_units(get_attribute(fa, :var_name, ""), fa.attributes) + nameddim, coords_vec = fa.dims_coords[1] if length(coords_vec) == 1 || length(coords_vec) > 3 co = first(coords_vec) return swap_coords_values( co.values, values, - PB.append_units(co.name, co.attributes), f_label, + append_units(co.name, co.attributes), f_label, ) elseif length(coords_vec) in (2, 3) co_lower = coords_vec[end-1] co_upper = coords_vec[end] - c_label = PB.append_units(co_lower.name*", "*co_upper.name, co_lower.attributes) + c_label = append_units(co_lower.name*", "*co_upper.name, co_lower.attributes) return swap_coords_values( create_stepped(co_lower.values, co_upper.values, values)..., c_label, f_label, @@ -411,20 +411,20 @@ RecipesBase.@recipe function f( ) end - elseif length(fa.dims) == 2 - title --> PB.append_units(fa.name, fa.attributes) + elseif length(fa.dims_coords) == 2 + title --> append_units(fa.name, fa.attributes) - i_values, i_label = PB.build_coords_edges(fa.dims[1]) - j_values, j_label = PB.build_coords_edges(fa.dims[2]) + i_values, i_label = build_coords_edges(fa.dims_coords[1]...) + j_values, j_label = build_coords_edges(fa.dims_coords[2]...) # workaround for Plots.jl heatmap: needs x, y to either both be midpoints, or both be edges if length(i_values) == size(values, 1) && length(j_values) == size(values, 2)+1 @warn "$(fa.name) guessing '$i_label' coordinate edges from midpoints assuming uniform spacing" - i_values = PB.guess_coords_edges(i_values) + i_values = guess_coords_edges(i_values) end if length(i_values) == size(values, 1)+1 && length(j_values) == size(values, 2) @warn "$(fa.name) guessing '$j_label' coordinate edges from midpoints assuming uniform spacing" - j_values = PB.guess_coords_edges(j_values) + j_values = guess_coords_edges(j_values) end x_values, y_values, f_values = swap_coords_xy( @@ -451,7 +451,7 @@ RecipesBase.@recipe function f( return x_values, y_values, f_values else - throw(ArgumentError("unknown number of dimensions $(length(fa.dims)), can't plot $fa")) + throw(ArgumentError("unknown number of dimensions $(length(fa.dims_coords)), can't plot $fa")) end end diff --git a/src/Regions.jl b/src/Regions.jl new file mode 100644 index 0000000..924db7a --- /dev/null +++ b/src/Regions.jl @@ -0,0 +1,194 @@ +""" + get_region(grid::Union{PB.AbstractMesh, Nothing}, values; coord_source, selectargs...) + -> values_subset, dim_subset::Vector{Pair{PB.NamedDimension, Vector{PALEOmodel.FixedCoord}}} + +Return the subset of `values` given by `selectargs` (Grid-specific keywords eg cell=, column=, ...) +and corresponding dimensions (with attached coordinates). +""" +function get_region(grid::Union{PB.AbstractMesh, Nothing}, values) end + +""" + get_region(dataset, recordidx, grid::Nothing, values) -> values[], [] + +Fallback for Domain with no grid, assumed 1 cell +""" +function get_region(grid::Nothing, values; coord_source=nothing) + length(values) == 1 || + throw(ArgumentError("grid==Nothing and length(values) != 1")) + return values[], Pair{PB.NamedDimension, Vector{PALEOmodel.FixedCoord}}[] +end + + +""" + get_region(dataset, recordidx, grid::PB.Grids.UnstructuredVectorGrid, values; cell=Nothing) -> + values_subset, dim_subset + +# Keywords for region selection: +- `cell::Union{Nothing, Int, Symbol}`: an Int for cell number (first cell = 1), or a Symbol to look up in `cellnames` + `cell = Nothing` is also allowed for a single-cell Domain. +""" +function get_region( + grid::PB.Grids.UnstructuredVectorGrid, values; + cell::Union{Nothing, Int, Symbol}=nothing, + coord_source=nothing, +) + if isnothing(cell) + grid.ncells == 1 || throw(ArgumentError("'cell' argument (an Int or Symbol) is required for an UnstructuredVectorGrid with > 1 cell")) + idx = 1 + elseif cell isa Int + idx = cell + else + idx = get(grid.cellnames, cell, nothing) + !isnothing(idx) || + throw(ArgumentError("cell ':$cell' not present in grid.cellnames=$(grid.cellnames)")) + end + + return ( + values[idx], + Pair{PB.NamedDimension, Vector{PALEOmodel.FixedCoord}}[], # no dimensions (ie squeeze out a dimension length 1 for single cell) + ) +end + + + +""" + get_region(dataset, recordidx, grid::PB.Grids.UnstructuredColumnGrid, values; column, [cell=nothing]) -> + values_subset, (dim_subset::NamedDimension, ...) + +# Keywords for region selection: +- `column::Union{Int, Symbol}`: (may be an Int, or a Symbol to look up in `columnames`) +- `cell::Int`: optional cell index within `column`, highest cell is cell 1 +""" +function get_region( + grid::PB.Grids.UnstructuredColumnGrid, values; + column, + cell::Union{Nothing, Int}=nothing, + coord_source=nothing +) + + if column isa Int + column in 1:length(grid.Icolumns) || + throw(ArgumentError("column index $column out of range")) + colidx = column + else + colidx = findfirst(isequal(column), grid.columnnames) + !isnothing(colidx) || + throw(ArgumentError("columnname '$column' not present in grid.columnnames=$(grid.columnnames)")) + end + + if isnothing(cell) + indices = grid.Icolumns[colidx] + coordnames = PB.get_coordinates(grid, "cells") + coordinates = FixedCoord[] + for cn in coordnames + cf, attributes = _get_field_attributes(coord_source, cn) + push!(coordinates, FixedCoord(cn, cf.values[indices], attributes)) + end + return ( + values[indices], + [PB.NamedDimension("cells", length(indices)) => coordinates], + ) + else + # squeeze out z dimension + idx = grid.Icolumns[colidx][cell] + return ( + values[idx], + Pair{PB.NamedDimension, Vector{PALEOmodel.FixedCoord}}[], # no dimensions (ie squeeze out a dimension length 1 for single cell) + ) + end + +end + + +""" + get_region(dataset, recordidx, rid::Union{PB.Grids.CartesianLinearGrid{2}, PB.Grids.CartesianArrayGrid{2}} , internalvalues; [i=i_idx], [j=j_idx]) -> + arrayvalues_subset, (dim_subset::NamedDimension, ...) + +# Keywords for region selection: +- `i::Int`: optional, slice along first dimension +- `j::Int`: optional, slice along second dimension + +`internalvalues` are transformed if needed from internal Field representation as a Vector length `ncells`, to +an Array (2D if neither i, j arguments present, 1D if i or j present, 0D ie one cell if both present) +""" +function get_region( + grid::Union{PB.Grids.CartesianLinearGrid{2}, PB.Grids.CartesianArrayGrid{2}}, internalvalues; + i::Union{Integer, Colon}=Colon(), + j::Union{Integer, Colon}=Colon(), + coord_source, +) + return _get_region(grid, internalvalues, [i, j], coord_source) +end + +""" + get_region(grid::Union{PB.Grids.CartesianLinearGrid{3}, PB.Grids.CartesianArrayGrid{3}}, internalvalues; [i=i_idx], [j=j_idx]) -> + arrayvalues_subset, (dim_subset::NamedDimension, ...) + +# Keywords for region selection: +- `i::Int`: optional, slice along first dimension +- `j::Int`: optional, slice along second dimension +- `k::Int`: optional, slice along third dimension + +`internalvalues` are transformed if needed from internal Field representation as a Vector length `ncells`, to +an Array (3D if neither i, j, k arguments present, 2D if one of i, j or k present, 1D if two present, +0D ie one cell if i, j, k all specified). +""" +function get_region( + grid::Union{PB.Grids.CartesianLinearGrid{3}, PB.Grids.CartesianArrayGrid{3}}, internalvalues; + i::Union{Integer, Colon}=Colon(), + j::Union{Integer, Colon}=Colon(), + k::Union{Integer, Colon}=Colon(), + coord_source, +) + return _get_region(grid, internalvalues, [i, j, k], coord_source) +end + +function _get_region( + grid::Union{PB.Grids.CartesianLinearGrid, PB.Grids.CartesianArrayGrid}, internalvalues, indices, coord_source +) + if !isempty(grid.coords) && !isempty(grid.coords_edges) + dims = [ + PB.NamedDimension(grid.dimnames[idx], grid.coords[idx], grid.coords_edges[idx]) + for (idx, ind) in enumerate(indices) if isa(ind, Colon) + ] + elseif !isempty(grid.coords) + dims = [ + PB.NamedDimension(grid.dimnames[idx], grid.coords[idx]) + for (idx, ind) in enumerate(indices) if isa(ind, Colon) + ] + else + dims = [ + PB.NamedDimension(grid.dimnames[idx]) + for (idx, ind) in enumerate(indices) if isa(ind, Colon) + ] + end + + values = internal_to_cartesian(grid, internalvalues) + if !all(isequal(Colon()), indices) + values = values[indices...] + end + + return values, Tuple(dims) +end + +# field and attributes from a dataset that implements PB.get_field(dataset, name)::FieldRecord +function _get_field_attributes(coord_source::Tuple{Any, Int}, name) + dataset, record_idx = coord_source + fr = PB.get_field(dataset, name) + return fr[record_idx], fr.attributes +end + +function _get_field_attributes(coord_source::Tuple{PB.ModelData, AbstractString}, varname) + modeldata, domainname = coord_source + # PB.get_field(model, modeldata, varnamefull) doesn't provide attributes, so do this ourselves + varnamefull = domainname*"."*varname + var = PB.get_variable(modeldata.model, varnamefull) + !isnothing(var) || + throw(ArgumentError("Variable $varnamefull not found")) + f = PB.get_field(var, modeldata) + attributes = copy(var.attributes) + attributes[:var_name] = var.name + attributes[:domain_name] = var.domain.name + + return f, attributes +end \ No newline at end of file diff --git a/test/runfieldtests.jl b/test/runfieldtests.jl index dc37ded..34181c3 100644 --- a/test/runfieldtests.jl +++ b/test/runfieldtests.jl @@ -9,12 +9,17 @@ function test_scalar_field(f::PB.Field) @test isnan(f.values[]) f.values[] = 42.0 - fa = PALEOmodel.get_array(f) - @test fa.dims == () - @test fa.values[] == 42.0 + # fa = PALEOmodel.get_array(f) + # @test fa.dims == () + # @test fa.values[] == 42.0 - fr = PALEOmodel.FieldRecord(f, Dict{Symbol, Any}(), coords_record=[]) + fr = PALEOmodel.FieldRecord( + # (record_dim = PB.NamedDimension("records", -1), record_dim_coordinates=String[]), # mock dataset to supply record_dim + (record_dim = (name = "records",), ), # mock dataset to supply record_dim.name + f, + Dict{Symbol, Any}() + ) push!(fr, f) @test fr.records == [42.0] f.values[] = 43.0 @@ -22,23 +27,26 @@ function test_scalar_field(f::PB.Field) @test fr.records == [42.0, 43.0] @test fr[2].values == f.values - frw = PALEOmodel.wrap_fieldrecord( + fra = PALEOmodel.get_array(fr) + + @test fra.values == [42.0, 43.0] + fra_dimensions = PB.get_dimensions(fra) + @test length(fra_dimensions) == 1 + @test fra_dimensions[1].name == "records" + + frw = PALEOmodel.FieldRecord( + (record_dim = (name = "records",), ), # mock dataset to supply record_dim.name [42.0, 43.0], PB.ScalarData, (), - missing, PB.ScalarSpace, nothing, - Dict{Symbol, Any}(); - coords_record=[] + Dict{Symbol, Any}(), ) @test frw.records == [42.0, 43.0] @test frw[2].values == f.values - fra = PALEOmodel.get_array(fr) - @test fra.values == [42.0, 43.0] - @test length(fra.dims) == 1 - @test fra.dims[1].name == "records" + return nothing end @@ -48,7 +56,7 @@ end @testset "ScalarData, ScalarSpace" begin - f = PB.allocate_field(PB.ScalarData, (), Float64, PB.ScalarSpace, nothing; allocatenans=true) + f = PB.Field(PB.ScalarData, (), Float64, PB.ScalarSpace, nothing; allocatenans=true) test_scalar_field(f) end @@ -56,7 +64,7 @@ end @testset "ScalarData, CellSpace, no grid" begin # check that a CellSpace Field with no grid behaves as a ScalarSpace Field - f = PB.allocate_field(PB.ScalarData, (), Float64, PB.CellSpace, nothing; allocatenans=true) + f = PB.Field(PB.ScalarData, (), Float64, PB.CellSpace, nothing; allocatenans=true) test_scalar_field(f) end @@ -64,10 +72,14 @@ end @testset "ScalarData, CellSpace, UnstructuredColumnGrid" begin g = PB.Grids.UnstructuredColumnGrid(ncells=5, Icolumns=[collect(1:5)]) - f = PB.allocate_field(PB.ScalarData, (), Float64, PB.CellSpace, g; allocatenans=true) + f = PB.Field(PB.ScalarData, (), Float64, PB.CellSpace, g; allocatenans=true) f.values .= 42.0 - fr = PALEOmodel.FieldRecord(f, Dict{Symbol, Any}(), coords_record=[]) + fr = PALEOmodel.FieldRecord( + (record_dim = (name = "records",), ), # mock dataset to supply record_dim.name + f, + Dict{Symbol, Any}(), + ) push!(fr, f) f.values .= 43.0 push!(fr, f) @@ -76,30 +88,38 @@ end @test fr[1].values == fill(42.0, 5) - fa = PALEOmodel.get_array(fr, column=1) - @test length(fa.dims) == 2 - @test fa.dims[1].name == "z" - @test fa.dims[2].name == "records" + fa = PALEOmodel.get_array(fr, (column=1, )) + fa_dimensions = PB.get_dimensions(fa) + @test length(fa_dimensions) == 2 + @test fa_dimensions[1].name == "cells" + @test fa_dimensions[2].name == "records" @test size(fa.values) == (5, 2) @test fa.values[:, 1] == fill(42.0, 5) @test fa.values[:, 2] == fill(43.0, 5) + + fa_d = PALEOmodel.get_array(fr, column=1) # deprecated syntax + @test fa.values == fa_d.values end @testset "ArrayScalarData, ScalarSpace" begin - f = PB.allocate_field(PB.ArrayScalarData, (PB.NamedDimension("test", 2, []), ), Float64, PB.ScalarSpace, nothing; allocatenans=true) + f = PB.Field(PB.ArrayScalarData, (PB.NamedDimension("test", 2), ), Float64, PB.ScalarSpace, nothing; allocatenans=true) @test isnan.(f.values) == [true, true] f.values .= [42.0, 43.0] - fa = PALEOmodel.get_array(f) - @test length(fa.dims) == 1 - @test fa.dims[1].name == "test" - @test fa.dims[1].size == 2 + # fa = PALEOmodel.get_array(f) + # @test length(fa.dims) == 1 + # @test fa.dims[1].name == "test" + # @test fa.dims[1].size == 2 - @test fa.values == [42.0, 43.0] + # @test fa.values == [42.0, 43.0] - fr = PALEOmodel.FieldRecord(f, Dict{Symbol, Any}(), coords_record=[]) + fr = PALEOmodel.FieldRecord( + (record_dim = (name = "records",), ), # mock dataset to supply record_dim.name + f, + Dict{Symbol, Any}(), + ) push!(fr, f) @test fr.records == [[42.0, 43.0]] f.values .= [44.0, 45.0] @@ -109,30 +129,35 @@ end # FieldArray from Field - fa = PALEOmodel.get_array(f) - @test fa.values == [44.0, 45.0] - @test length(fa.dims) == 1 - @test fa.dims[1].name == "test" + # fa = PALEOmodel.get_array(f) + # @test fa.values == [44.0, 45.0] + # @test length(fa.dims) == 1 + # @test fa.dims[1].name == "test" # FieldArray from FieldRecord fra = PALEOmodel.get_array(fr) @test fra.values == [42.0 44.0; 43.0 45.0] - @test length(fra.dims) == 2 - @test fra.dims[1].name == "test" - @test fra.dims[2].name == "records" + fra_dimensions = PB.get_dimensions(fra) + @test length(fra_dimensions) == 2 + @test fra_dimensions[1].name == "test" + @test fra_dimensions[2].name == "records" end @testset "ArrayScalarData, CellSpace, no grid" begin # TODO this is possibly inconsistent with (ScalarData, CellSpace, no grid), # as Field.values here is a (1, 2) Array, not a (2,) Vector - f = PB.allocate_field(PB.ArrayScalarData, (PB.NamedDimension("test", 2, []), ), Float64, PB.CellSpace, nothing; allocatenans=true) + f = PB.Field(PB.ArrayScalarData, (PB.NamedDimension("test", 2), ), Float64, PB.CellSpace, nothing; allocatenans=true) @test_broken size(f.values) == (2, ) # TODO should be a Vector ? @test size(f.values) == (1, 2) @test isnan.(f.values) == [true true] # TODO 1x2 Array or Vector ? f.values .= [42.0 43.0] # TODO - fr = PALEOmodel.FieldRecord(f, Dict{Symbol, Any}(), coords_record=[]) + fr = PALEOmodel.FieldRecord( + (record_dim = (name = "records",), ), # mock dataset to supply record_dim.name + f, + Dict{Symbol, Any}(), + ) push!(fr, f) @test fr.records == [[42.0 43.0]] f.values .= [44.0 45.0] @@ -141,30 +166,37 @@ end @test fr[2].values == f.values # FieldArray from Field - fa = PALEOmodel.get_array(f) - @test fa.values == [44.0, 45.0] - @test length(fa.dims) == 1 - @test fa.dims[1].name == "test" - @test fa.dims[1].size == 2 - - # FieldArray from FieldRecord - fra = PALEOmodel.get_array(fr) - @test fra.values == [42.0 44.0; 43.0 45.0] - @test length(fra.dims) == 2 - @test fra.dims[1].name == "test" - @test fra.dims[2].name == "records" + # fa = PALEOmodel.get_array(f) + # @test fa.values == [44.0, 45.0] + # @test length(fa.dims) == 1 + # @test fa.dims[1].name == "test" + # @test fa.dims[1].size == 2 + + # FieldArray from FieldRecord + fra = PALEOmodel.get_array(fr) + @test fra.values == [42.0 44.0; 43.0 45.0] + fra_dimensions = PB.get_dimensions(fra) + @test length(fra_dimensions) == 2 + @test fra_dimensions[1].name == "test" + @test fra_dimensions[1].size == 2 + @test fra_dimensions[2].name == "records" + @test fra_dimensions[2].size == 2 end @testset "ArrayScalarData, CellSpace, UnstructuredColumnGrid" begin g = PB.Grids.UnstructuredColumnGrid(ncells=5, Icolumns=[collect(1:5)]) - f = PB.allocate_field(PB.ArrayScalarData, (PB.NamedDimension("test", 2, []), ), Float64, PB.CellSpace, g; allocatenans=true) + f = PB.Field(PB.ArrayScalarData, (PB.NamedDimension("test", 2), ), Float64, PB.CellSpace, g; allocatenans=true) @test size(f.values) == (5, 2) f.values .= 42.0 - fr = PALEOmodel.FieldRecord(f, Dict{Symbol, Any}(), coords_record=[]) + fr = PALEOmodel.FieldRecord( + (record_dim = (name = "records",), ), # mock dataset to supply record_dim.name + f, + Dict{Symbol, Any}(), + ) push!(fr, f) f.values .= 43.0 push!(fr, f) @@ -174,34 +206,36 @@ end @test fr[1].values == fill(42.0, 5, 2) # FieldArray from Field - fa = PALEOmodel.get_array(f, (column=1,)) - @test length(fa.dims) == 2 - @test fa.dims[1].name == "z" - @test fa.dims[2].name == "test" - @test size(fa.values) == (5, 2) - @test fa.values == fill(43.0, 5, 2) + # fa = PALEOmodel.get_array(f, (column=1,)) + # @test length(fa.dims) == 2 + # @test fa.dims[1].name == "z" + # @test fa.dims[2].name == "test" + # @test size(fa.values) == (5, 2) + # @test fa.values == fill(43.0, 5, 2) - fa = PALEOmodel.get_array(f, (column=1, cell=1)) - @test length(fa.dims) == 1 - @test fa.dims[1].name == "test" - @test size(fa.values) == (2, ) - @test fa.values == fill(43.0, 2) + # fa = PALEOmodel.get_array(f, (column=1, cell=1)) + # @test length(fa.dims) == 1 + # @test fa.dims[1].name == "test" + # @test size(fa.values) == (2, ) + # @test fa.values == fill(43.0, 2) # FieldArray from FieldRecord fra = PALEOmodel.get_array(fr, (column=1)) - @test length(fra.dims) == 3 - @test fra.dims[1].name == "z" - @test fra.dims[2].name == "test" - @test fra.dims[3].name == "records" + fra_dimensions = PB.get_dimensions(fra) + @test length(fra_dimensions) == 3 + @test fra_dimensions[1].name == "cells" + @test fra_dimensions[2].name == "test" + @test fra_dimensions[3].name == "records" @test size(fra.values) == (5, 2, 2) @test fra.values[:, :, 1] == fill(42.0, 5, 2) @test fra.values[:, :, 2] == fill(43.0, 5, 2) fra = PALEOmodel.get_array(fr, (column=1, cell=1)) - @test length(fra.dims) == 2 - @test fra.dims[1].name == "test" - @test fra.dims[2].name == "records" + fra_dimensions = PB.get_dimensions(fra) + @test length(fra_dimensions) == 2 + @test fra_dimensions[1].name == "test" + @test fra_dimensions[2].name == "records" @test size(fra.values) == (2, 2) @test fra.values[:, 1] == fill(42.0, 2) @test fra.values[:, 2] == fill(43.0, 2) diff --git a/test/runoutputwritertests.jl b/test/runoutputwritertests.jl index 3bc7886..c097ab4 100644 --- a/test/runoutputwritertests.jl +++ b/test/runoutputwritertests.jl @@ -60,25 +60,25 @@ end end -@testset "SaveLoad_jld2" begin +# @testset "SaveLoad_jld2" begin - model, modeldata, all_vars, output = create_test_model_output(2) - all_values = all_vars.values +# model, modeldata, all_vars, output = create_test_model_output(2) +# all_values = all_vars.values - all_values.global.O .= [2e19] - PALEOmodel.OutputWriters.add_record!(output, model, modeldata, 0.0) - all_values.global.O .= [4e19] - PALEOmodel.OutputWriters.add_record!(output, model, modeldata, 0.0) +# all_values.global.O .= [2e19] +# PALEOmodel.OutputWriters.add_record!(output, model, modeldata, 0.0) +# all_values.global.O .= [4e19] +# PALEOmodel.OutputWriters.add_record!(output, model, modeldata, 0.0) - tmpfile = tempname(; cleanup=true) - PALEOmodel.OutputWriters.save_jld2(output, tmpfile) +# tmpfile = tempname(; cleanup=true) +# PALEOmodel.OutputWriters.save_jld2(output, tmpfile) - load_output = PALEOmodel.OutputWriters.load_jld2!(PALEOmodel.OutputWriters.OutputMemory(), tmpfile) +# load_output = PALEOmodel.OutputWriters.load_jld2!(PALEOmodel.OutputWriters.OutputMemory(), tmpfile) - O_array = PALEOmodel.get_array(load_output, "global.O") - @test O_array.values == [2e19, 4e19] +# O_array = PALEOmodel.get_array(load_output, "global.O") +# @test O_array.values == [2e19, 4e19] -end +# end @testset "SaveLoad_netcdf" begin From 1b9daa5b9021fb11fe6b59488ed7ef3ba5a0ba34 Mon Sep 17 00:00:00 2001 From: Stuart Daines Date: Sun, 29 Dec 2024 22:42:36 +0000 Subject: [PATCH 2/4] Tidy up and remove old code - remove old unused code - fix doc build --- docs/src/PALEOmodelOutput.md | 7 +- src/CoordsDims.jl | 175 +------------------------------ src/FieldArray.jl | 187 --------------------------------- src/FieldRecord.jl | 181 ++++---------------------------- src/PALEOmodel.jl | 4 +- src/PlotRecipes.jl | 144 ++++++++++++++++---------- src/Regions.jl | 194 ----------------------------------- 7 files changed, 116 insertions(+), 776 deletions(-) delete mode 100644 src/Regions.jl diff --git a/docs/src/PALEOmodelOutput.md b/docs/src/PALEOmodelOutput.md index 3ca4c53..275a9f1 100644 --- a/docs/src/PALEOmodelOutput.md +++ b/docs/src/PALEOmodelOutput.md @@ -69,8 +69,6 @@ OutputMemoryDomain ```@docs save_netcdf load_netcdf! -save_jld2 -load_jld2! ``` ## Field Array @@ -78,11 +76,12 @@ load_jld2! ```@meta CurrentModule = PALEOmodel ``` -[`FieldArray`](@ref) provides a generic array type with named dimensions `PALEOboxes.NamedDimension` and optional coordinates `PALEOboxes.FixedCoord` for processing of model output. +[`FieldArray`](@ref) provides a generic array type with named dimensions `PALEOboxes.NamedDimension` and optional coordinates [`PALEOmodel.FixedCoord`](@ref) for processing of model output. ```@docs FieldArray -get_array(f::PB.Field{D, PB.ScalarSpace, V, N, M}; attributes=nothing) where {D, V, N, M} +get_array(fr::FieldRecord; coords=nothing) +FixedCoord ``` ## Plotting output diff --git a/src/CoordsDims.jl b/src/CoordsDims.jl index 79a9e67..f7d7def 100644 --- a/src/CoordsDims.jl +++ b/src/CoordsDims.jl @@ -8,6 +8,8 @@ A fixed (state independent) coordinate +These are generated from coordinate variables for use in output visualisation. + N = 1: a cell-centre coordinate, size(values) = (ncells, ) N = 2: a boundary coordinate, size(values) = (2, ncells) """ @@ -19,90 +21,11 @@ end is_boundary_coordinate(fc::FixedCoord) = ndims(fc.values) == 2 && size(fc.values, 1) == 2 -""" - append_units(name::AbstractString, attributes) -> "name (units)" - -Utility function to append variable units string to a variable name for display. -""" -function append_units(name::AbstractString, attributes::Dict{Symbol, Any}) - units = get(attributes, :units, "") - if isempty(units) - return name - else - return name*" ($units)" - end -end - -append_units(name::AbstractString, attributes::Nothing) = name - -""" - build_coords_edges(coords_vec::Vector{FixedCoord}) -> Vector{Float64} - -Build a vector of coordinate edges (length `n+1``) from `coords_vec`, assuming the PALEO -convention that `coords_vec` contains three elements with -cell midpoints, lower edges, upper edges each of length `n`, in that order. - -Falls back to just returning the first entry in `coords_vec` for other cases. -""" -function build_coords_edges(dim::PB.NamedDimension, coords_vec::Vector{FixedCoord}) - - if isempty(coords_vec) - # no coordinate - just return indices - co_values = collect(1:(dim.size+1)) - co_label = dim.name - elseif length(coords_vec) == 1 && is_boundary_coordinate(coords_vec[1]) - co = coords_vec[1] - co_values = vcat(co.values[1, :], co.values[2, end]) - co_label = append_units(co.name, co.attributes) - elseif length(coords_vec) == 1 || length(coords_vec) > 3 - # 1 coordinate or something we don't understand - take first - co = first(coords_vec) - co_values = co.values - co_label = append_units(co.name, co.attributes) - elseif length(coords_vec) == 2 && is_boundary_coordinate(coords_vec[2]) - # assume mid, boundary - co = coords_vec[2] - co_values = vcat(co.values[1, :], co.values[2, end]) - co_label = append_units(co.name, co.attributes) - elseif length(coords_vec) in (2, 3) - # 2 coordinates assume lower, upper edges - # 3 coordinates assume mid, lower, upper - co_lower = coords_vec[end-1] - co_upper = coords_vec[end] - co_label = append_units(co_lower.name*", "*co_upper.name, co_lower.attributes) - first(co_lower.values) < first(co_upper.values) || - @warn "build_coords_edges: $co_label co_lower is > co_upper - check model grid" - if co_lower.values[end] > co_lower.values[1] # ascending order - co_lower.values[2:end] == co_upper.values[1:end-1] || - @warn "build_coords_edges: $co_label lower and upper edges don't match" - co_values = [co_lower.values; co_upper.values[end]] - else # descending order - co_lower.values[1:end-1] == co_upper.values[2:end] || - @warn "build_coords_edges: $co_label lower and upper edges don't match" - co_values = [co_upper.values[1]; co_lower.values] - end - - end - return co_values, co_label -end - -"guess coordinate edges from midpoints, assuming uniform spacing" -function guess_coords_edges(x_midpoints) - first_x = x_midpoints[1] - 0.5*(x_midpoints[2] - x_midpoints[1]) - last_x = x_midpoints[end] + 0.5*(x_midpoints[end] - x_midpoints[end-1]) - return [first_x; 0.5.*(x_midpoints[1:end-1] .+ x_midpoints[2:end]); last_x] -end - - -function get_region(fc::FixedCoord, indices::AbstractVector) - return FixedCoord(fc.name, fc.values[indices], fc.attributes) -end - -function get_region(fcv::Vector{FixedCoord}, indices::AbstractVector) - return [FixedCoord(fc.name, fc.values[indices], fc.attributes) for fc in fcv] -end +################################################################## +# Coordinate filtering and selection +################################################################ "find indices of coord from first before range[1] to first after range[2]" function find_indices(coord::AbstractVector, range) @@ -180,91 +103,3 @@ function dimscoord_subset(dim::PB.NamedDimension, coords::Vector{FixedCoord}, se return cidx, dim_subset, coords_subset, coords_used end - -#= -################################################# -# Dimensions -##################################################### - -""" - NamedDimension - -A named dimension, with optional attached fixed coordinates `coords` - -PALEO convention is that where possible `coords` contains three elements, for cell -midpoints, lower edges, upper edges, in that order. -""" -mutable struct NamedDimension - name::String - size::Int64 - coords::Vector{FixedCoord} # may be empty -end - -"create from size only (no coords)" -function NamedDimension(name, size::Integer) - return NamedDimension( - name, - size, - FixedCoord[], - ) -end - -"create from coord mid-points" -function NamedDimension(name, coord::AbstractVector) - return NamedDimension( - name, - length(coord), - [ - FixedCoord(name, coord, Dict{Symbol, Any}()), - ] - ) -end - -"create from coord mid-points and edges" -function NamedDimension(name, coord::AbstractVector, coord_edges::AbstractVector) - if coord[end] > coord[1] - # ascending order - coord_lower = coord_edges[1:end-1] - coord_upper = coord_edges[2:end] - else - # descending order - coord_lower = coord_edges[2:end] - coord_upper = coord_edges[1:end-1] - end - return NamedDimension( - name, - length(coord), - [ - FixedCoord(name, coord, Dict{Symbol, Any}()), - FixedCoord(name*"_lower", coord_lower, Dict{Symbol, Any}()), - FixedCoord(name*"_upper", coord_upper, Dict{Symbol, Any}()), - ] - ) -end - -function get_region(nd::NamedDimension, indices::AbstractVector) - return NamedDimension(nd.name, length(indices), get_region(nd.coords, indices)) -end - -""" - build_coords_edges(nd::NamedDimension) -> Vector{Float64} - -Call [`build_coords_edges`](@ref)(nd.coords), or fallback to just returning indices -if no coords present. -""" -function build_coords_edges(nd::NamedDimension) - if !isempty(nd.coords) - return build_coords_edges(nd.coords) - else - @warn "no coords for NamedDimension $(nd.name), returning indices" - return collect(1:nd.size), nd.name*" (indices)" - end -end - -function Base.show(io::IO, nd::NamedDimension) - print(io, "NamedDimension(name=", nd.name, ", size=", nd.size, ", coords=(") - join(io, [c.name for c in nd.coords], ", ") - print(io, "))") - return nothing -end - =# \ No newline at end of file diff --git a/src/FieldArray.jl b/src/FieldArray.jl index 6bdbea8..7ff952c 100644 --- a/src/FieldArray.jl +++ b/src/FieldArray.jl @@ -1,4 +1,3 @@ -import Infiltrator """ FieldArray @@ -76,189 +75,3 @@ Get FieldArray from PALEO object `obj` """ function get_array end -""" - get_array(dataset, recordidx, f::Field [, selectargs::NamedTuple]; [attributes=nothing]) -> FieldArray - -Return a [`FieldArray`](@ref) containing `f::Field` data values and -any attached coordinates, for the spatial region defined by `selectargs`. - -Available `selectargs` depend on the grid `f.mesh`, and -are passed to `get_region`. - -`attributes` (if present) are added to `FieldArray` -""" -function get_array( - f::PB.Field{FieldData, PB.ScalarSpace, V, N, Mesh}, selectargs::NamedTuple=NamedTuple(); - attributes=nothing, coord_source, -) where {FieldData, V, N, Mesh} - isempty(selectargs) || - error("get_array on Field f defined on ScalarSpace with non-empty selectargs=$selectargs") - - data_dims_coordinates = get_data_dims_coords(coord_source, f.data_dims) - return FieldArray(default_fieldarray_name(attributes), f.values, data_dims_coordinates, attributes) -end - -function get_array( - f::PB.Field{FieldData, PB.CellSpace, V, 0, Mesh}, selectargs::NamedTuple=NamedTuple(); - attributes=nothing, coord_source=nothing, -) where {FieldData, V, Mesh} - - values, dims_coords = get_region(f.mesh, f.values; coord_source, selectargs...) - - if !isnothing(attributes) && !isempty(selectargs) - attributes = copy(attributes) - attributes[:filter_region] = selectargs - end - - return FieldArray(default_fieldarray_name(attributes), values, dims_coords, attributes) -end - -# single data dimension -# TODO generalize this to arbitrary data dimensions -function get_array( - f::PB.Field{FieldData, PB.CellSpace, V, 1, Mesh}, selectargs::NamedTuple=NamedTuple(); - attributes=nothing, coord_source, -) where {FieldData, V, Mesh} - - f_space_dims_colons = ntuple(i->Colon(), ndims(f.values) - 1) - f_size_datadim = size(f.values)[end] - - dvalues, dims_coords = get_region(f.mesh, f.values[f_space_dims_colons..., 1]; coord_source, selectargs...) - - d = (size(dvalues)..., f_size_datadim) - values = Array{eltype(dvalues), length(d)}(undef, d...) - - if length(d) == 1 - # single cell - space dimension squeezed out - for i in 1:f_size_datadim - values[i], dims_coords = get_region(f.mesh, f.values[f_space_dims_colons..., i]; coord_source, selectargs...) - end - else - dvalues_colons = ntuple(i->Colon(), ndims(dvalues)) - for i in 1:f_size_datadim - dvalues, dims_coords = get_region(mesh, f.values[f_space_dims_colons..., i]; coord_source, selectargs...) - values[dvalues_colons..., i] .= dvalues - end - end - - if !isnothing(attributes) && !isempty(selectargs) - attributes = copy(attributes) - attributes[:filter_region] = selectargs - end - - data_dims_coords = get_data_dims_coords(coord_source, f.data_dims) - - return FieldArray(default_fieldarray_name(attributes), values, vcat(dims_coords, data_dims_coords), attributes) -end - - - -""" - get_array(modeldata, varnamefull [, selectargs::NamedTuple] [; coords::AbstractVector]) -> FieldArray - -Get [`FieldArray`](@ref) by Variable name, for spatial region defined by `selectargs` -(which are passed to `get_region`). - -Optional argument `coords` can be used to supply plot coordinates from Variable in output. -Format is a Vector of Pairs of "dim_name"=>("var_name1", "var_name2", ...) - -Example: to replace a 1D column default pressure coordinate with a z coordinate: - - coords=["z"=>("atm.zmid", "atm.zlower", "atm.zupper")] - -NB: the coordinates will be generated by applying `selectargs`, -so the supplied coordinate Variables must have the same dimensionality as `vars`. -""" -function get_array( - modeldata::PB.AbstractModelData, varnamefull::AbstractString, selectargs::NamedTuple=NamedTuple(); - coords=nothing, -) - varsplit = split(varnamefull, ".") - length(varsplit) == 2 || - throw(ArgumentError("varnamefull $varnamefull is not of form .")) - domainname, varname = varsplit - - coord_source = (modeldata, domainname) - f, attributes = _get_field_attributes(coord_source, varname) - - varray = get_array(f, selectargs; attributes, coord_source) - - if isnothing(coords) - # keep original coords (if any) - else - check_coords_argument(coords) || - error("argument coords should be a Vector of Pairs of \"dim_name\"=>(\"var_name1\", \"var_name2\", ...), eg: [\"z\"=>(\"atm.zmid\", \"atm.zlower\", \"atm.zupper\"), ...]") - - vec_coords_arrays = [ - dim_name => Tuple(get_array(modeldata, cvn, selectargs) for cvn in coord_varnames) - for (dim_name, coord_varnames) in coords - ] - - varray = update_coordinates(varray, vec_coords_arrays) - end - - return varray -end - -function get_data_dims_coords(coord_source, data_dims) - - data_dims_coordinates = Pair{PB.NamedDimension, Vector{PALEOmodel.FixedCoord}}[] - - for dd in data_dims - coordinates = PALEOmodel.FixedCoord[] - for coord_name in PB.get_coordinates(dd.name) - ddf, attributes = _get_field_attributes(coord_source, coord_name) - push!(coordinates, FixedCoord(coord_name, ddf.values, attributes)) - end - push!(data_dims_coordinates, dd => coordinates) - end - - return data_dims_coordinates -end - -# check 'coords' of form [] or ["z"=>[ ... ], ] or ["z"=>(...),] -check_coords_argument(coords) = - isa(coords, AbstractVector) && ( - isempty(coords) || ( - isa(coords, AbstractVector{<:Pair}) && - isa(first(first(coords)), AbstractString) && - isa(last(first(coords)), Union{AbstractVector, Tuple}) - ) - ) - -""" - update_coordinates(varray::FieldArray, vec_coord_arrays::AbstractVector) -> FieldArray - -Replace or add coordinates `vec_coord_arrays` to `varray`. - -`new_coord_arrays` is a Vector of Pairs of "dim_name"=>(var1::FieldArray, var2::FieldArray, ...) - -Example: to replace a 1D column default pressure coordinate with a z coordinate: - - coords=["z"=>(zmid::FieldArray, zlower::FieldArray, atm.zupper::FieldArray)] -""" -function update_coordinates(varray::FieldArray, vec_coord_arrays::AbstractVector) - - check_coords_argument(vec_coord_arrays) || - error("argument vec_coords_arrays should be a Vector of Pairs of \"dim_name\"=>(var1::FieldArray, var2::FieldArray, ...), eg: [\"z\"=>(zmid::FieldArray, zlower::FieldArray, atm.zupper::FieldArray)]") - - # generate Vector of NamedDimensions to use as new coordinates - new_dims_coords = Pair{PB.NamedDimension, Vector{FixedCoord}}[nd => copy(coords) for (nd, coords) in varray.dims_coords] - for (dim_name, coord_arrays) in vec_coord_arrays - dc_idx = findfirst(dc->first(dc).name == dim_name, new_dims_coords) - !isnothing(dc_idx) || error("FieldArray $varray has no dimension $dim_name") - nd, coords = new_dims_coords[dc_idx] - empty!(coords) - for coord_array in coord_arrays - coord_array_name = get(coord_array.attributes, :var_name, "") - nd.size == length(coord_array.values) || - error("FieldArray $varray dimension $dim_name size $(nd.size) != new coordinate $coord_array_name length(values) $(coord_array.values)") - push!(coords, PB.FixedCoord(coord_array_name, coord_array.values, coord_array.attributes)) - end - end - - # replace coordinates - varray_newcoords = FieldArray(varray.name, varray.values, new_dims_coords, varray.attributes) - - return varray_newcoords -end \ No newline at end of file diff --git a/src/FieldRecord.jl b/src/FieldRecord.jl index f039238..843c1af 100644 --- a/src/FieldRecord.jl +++ b/src/FieldRecord.jl @@ -14,7 +14,6 @@ struct FieldRecord{FieldData <: PB.AbstractData, Space <: PB.AbstractSpace, V, N data_dims::NTuple{N, PB.NamedDimension} mesh::Mesh attributes::Dict{Symbol, Any} - # coords_record::Vector{FixedCoord} # coordinates attached to record dimension end # create empty FieldRecord @@ -201,21 +200,15 @@ any attached coordinates, for records and spatial region defined by `allselectar starting at the record with the nearest value of `fr.coords_record` before `first` and ending at the nearest record after `last`. -Available `selectargs` depend on the grid `fr.mesh` and `fr.data_dims`, options include: -- grid spatial dimensions -[- TODO grid spatial coordinates (if defined)] -- `fr.data_dims` +Available `selectargs` depend on the FieldRecord dimensions (as returned by `get_dimensions`, which will be a subset +of grid spatial dimensions and Domain data dimensions) and corresponding attached coordinates (as returned by `get_coordinates`. +Optional argument `coords` can be used to replace the attached coordinates for one or more dimensions. +Format is a Vector of Pairs of `""=>("", "", ...)`, +eg to replace a 1D column default pressure coordinate with a z coordinate: -Optional argument `coords` can be used to supply coordinates from additional `FieldRecords`, replacing any coordinates -attached to `fr`. Format is a Vector of Pairs of "dim_name"=>(cr1::FieldRecord, cr2::FieldRecord, ...) + coords=["cells"=>("atm.zmid", "atm.zlower", "atm.zupper")] -Example: to replace a 1D column default pressure coordinate with a z coordinate: - - coords=["z"=>(zmid::FieldRecord, zlower::FieldRecord, zupper::FieldRecord)] - -NB: the coordinates will be generated by applying `allselectargs`, -so the supplied coordinate FieldRecords must have the same dimensionality as `fr`. """ function get_array( fr::FieldRecord; @@ -231,163 +224,17 @@ function get_array( return get_array(fr, NamedTuple(allselectargs); coords=coords) end -function get_array_orig( - fr::FieldRecord, allselectargs::NamedTuple; # allselectargs::NamedTuple=NamedTuple() creates a method ambiguity with deprecated form above - coords::Union{Nothing, AbstractVector}=nothing -) - - varray = nothing - - try - # get data NB: with original coords or no coords - varray = _get_array(fr, allselectargs) - - if !isnothing(coords) - check_coords_argument(coords) || - error("coords argument should be of form 'coords=[\"z\"=>zmid::FieldRecord, zlower::FieldRecord, zupper::FieldRecord), ...]") - - # get arrays for replacement coordinates - vec_coord_arrays = [ - coord_name => Tuple(_get_array(coord_fr, allselectargs) for coord_fr in coord_fieldrecords) - for (coord_name, coord_fieldrecords) in coords - ] - - # replace coordinates - varray = update_coordinates(varray, vec_coord_arrays) - end - catch - frname = get(fr.attributes, :domain_name, "")*"."*get(fr.attributes, :var_name, "") - @warn "PALEOmodel.get_array(fr::FieldRecord) failed for variable $frname" - rethrow() - end - - return varray -end - -function _get_array( - fr::FieldRecord{FieldData, Space, V, N, Mesh, R}, allselectargs::NamedTuple, -) where {FieldData, Space, V, N, Mesh, R} - - # select records to use and create PB.NamedDimension - ridx = 1:length(fr) - selectargs_region = NamedTuple() - selectargs_records = NamedTuple() - for (k, v) in zip(keys(allselectargs), values(allselectargs)) - if k==:records - if v isa Integer - ridx = [v] - else - ridx = v - end - selectargs_records = (records=v,) - elseif String(k) in fr.dataset.record_dim_coordinates - # find ridx corresponding to a coordinate - rfr = PB.get_field(fr.dataset, String(k)) - ridx, cvalue = find_indices(rfr.records, v) - selectargs_records = NamedTuple((k=>cvalue,)) - else - selectargs_region = merge(selectargs_region, NamedTuple((k=>v,))) - end - end - records_coords = FixedCoord[] - for rcn in fr.dataset.record_dim_coordinates - rfr = PB.get_field(fr.dataset, rcn) - push!(records_coords, FixedCoord(rcn, rfr.records[ridx], rfr.attributes)) - end - records_dim_coords = PB.NamedDimension(fr.dataset.record_dim.name, length(ridx)) => records_coords - - # add attributes for selection used - attributes = copy(fr.attributes) - isempty(selectargs_records) || (attributes[:filter_records] = selectargs_records;) - isempty(selectargs_region) || (attributes[:filter_region] = selectargs_region;) - - # Generate name from attributes - name = default_fieldarray_name(attributes) - - # Select selectargs_region - if field_single_element(fr) - # create FieldArray directly from FieldRecord - data_dims_coords = get_data_dims_coords((fr.dataset, ridx), fr.data_dims) - - isempty(selectargs_region) || - throw(ArgumentError("invalid index $selectargs_region")) - if length(ridx) > 1 - return FieldArray( - name, - fr.records[ridx], - vcat(data_dims_coords, records_dim_coords), - attributes - ) - else - # squeeze out records dimension - return FieldArray( - name, - fr.records[first(ridx)], - data_dims_coords, - attributes - ) - end - else - # pass through to Field - - # get FieldArray from first Field record and use this to figure out array shapes etc - far = get_array(fr[first(ridx)], selectargs_region; coord_source=(fr.dataset, first(ridx))) - # TODO - Julia bug ? length(far.dims) returning wrong value, apparently triggered by this line - # attributes = isnothing(attributes) ? Dict{Symbol, Any}() : copy(attributes) - # in get_array - if length(ridx) > 1 - # add additional (last) dimension for records - favalues = Array{eltype(far.values), length(far.dims_coords)+1}(undef, size(far.values)..., length(ridx)) - fa = FieldArray( - name, - favalues, - vcat(far.dims_coords, records_dim_coords), - attributes, - ) - # copy values for first record - if isempty(far.dims_coords) - fa.values[1] = far.values - else - fa.values[fill(Colon(), length(far.dims_coords))..., 1] .= far.values - end - else - # squeeze out record dimension so this is just fa with attributes added - fa = FieldArray( - name, - far.values, - far.dims_coords, - attributes - ) - end - - # fill with values from FieldArrays for Fields for remaining records - if length(ridx) > 1 - for (i, r) in enumerate(ridx[2:end]) - far = get_array(fr[r], selectargs_region; coord_source=(fr.dataset, r)) - if isempty(far.dims_coords) - fa.values[i+1] = far.values - else - fa.values[fill(Colon(), length(far.dims_coords))..., i+1] .= far.values - end - end - end - - return fa - end - -end - function get_array( fr::FieldRecord, allselectargs::NamedTuple; # allselectargs::NamedTuple=NamedTuple() creates a method ambiguity with deprecated form above coords=nothing, expand_cartesian=false, # can be overridden in allselectargs squeeze_all_single_dims=true, # can be overridden in allselectargs - verbose=true, + verbose=false, ) frname = default_fieldarray_name(fr.attributes) - @info "get_array (begin): $frname, allselectargs $allselectargs" + verbose && @info "get_array (begin): $frname, allselectargs $allselectargs" if !isnothing(coords) check_coords_argument(coords) || @@ -561,7 +408,7 @@ function get_array( # Generate name from attributes, including selection suffixes name = default_fieldarray_name(attributes) - @info "get_array (end): $frname -> $name, allselectargs $allselectargs" + verbose && @info "get_array (end): $frname -> $name, allselectargs $allselectargs" return FieldArray( name, @@ -571,6 +418,16 @@ function get_array( ) end +# check 'coords' of form [] or ["z"=>[ ... ], ] or ["z"=>(...),] +check_coords_argument(coords) = + isa(coords, AbstractVector) && ( + isempty(coords) || ( + isa(coords, AbstractVector{<:Pair}) && + isa(first(first(coords)), AbstractString) && + isa(last(first(coords)), Union{AbstractVector, Tuple}) + ) + ) + # Read coordinates attached to dimension 'dim' # For record coordinate, ridx_to_use = nothing # For other coordinates, ridx_to_use = record index to use for coordinate (ie limitation only one record in use, or coordinate is constant) diff --git a/src/PALEOmodel.jl b/src/PALEOmodel.jl index 24be4a1..cfb5a91 100644 --- a/src/PALEOmodel.jl +++ b/src/PALEOmodel.jl @@ -62,9 +62,7 @@ include("SteadyState.jl") include("SteadyStateKinsol.jl") -include("CoordsDims.jl") # TODO - -include("Regions.jl") # TODO +include("CoordsDims.jl") include("FieldArray.jl") diff --git a/src/PlotRecipes.jl b/src/PlotRecipes.jl index 8cc012e..024f9e7 100644 --- a/src/PlotRecipes.jl +++ b/src/PlotRecipes.jl @@ -133,9 +133,6 @@ end heatmap(output::AbstractOutputWriter, var::AbstractString [, selectargs::NamedTuple] [; coords::AbstractVector]) plot(outputs::Vector{<:AbstractOutputWriter}, vars::Union{AbstractString, Vector{<:AbstractString}} [, selectargs::NamedTuple] [; coords::AbstractVector]) - plot(modeldata::AbstractModelData, vars::Union{AbstractString, Vector{<:AbstractString}} [, selectargs::NamedTuple] [; coords::AbstractVector]) - heatmap(modeldata::AbstractModelData, var::AbstractString [, selectargs::NamedTuple] [; coords::AbstractVector]) - Plot recipe that calls `PB.get_field(output, var)`, and passes on to `plot(fr::FieldRecord, selectargs)` (see [`RecipesBase.apply_recipe(::Dict{Symbol, Any}, fr::FieldRecord, selectargs::NamedTuple)`](@ref)) @@ -152,8 +149,6 @@ Example: to replace a 1D column default pressure coordinate with a z coordinate: coords=["z"=>("atm.zmid", "atm.zlower", "atm.zupper")] -NB: the coordinates will be generated by applying `selectargs`, -so the supplied coordinate Variables must have the same dimensionality as `vars`. """ RecipesBase.@recipe function f( output::AbstractOutputWriter, @@ -166,16 +161,6 @@ RecipesBase.@recipe function f( vars = [vars] end - # if isnothing(coords) - # coords_records=nothing - # else - # check_coords_argument(coords) || - # error("coords argument should be of form 'coords=[\"z\"=>(\"atm.zmid\", \"atm.zlower\", \"atm.zupper\"), ...]") - # coords_records = [ - # coord_name => Tuple(PB.get_field(output, cvn) for cvn in coord_varnames) - # for (coord_name, coord_varnames) in coords - # ] - # end delete!(plotattributes, :coords) for var in vars @@ -194,41 +179,6 @@ RecipesBase.@recipe function f( return nothing end -RecipesBase.@recipe function f( - modeldata::PB.AbstractModelData, - vars::Union{AbstractString, AbstractVector{<:AbstractString}}, - selectargs::NamedTuple=NamedTuple(); - coords=nothing, # NB: PlotRecipes doesn't support type annotation -) - delete!(plotattributes, :coords) - - if isa(vars, AbstractString) - vars = [vars] - end - - # broadcast any Vector-valued argument in selectargs - bcastargs = broadcast_dict([Dict{Symbol, Any}(pairs(selectargs))]) - - for var in vars - # if var is of form .., set :structfield to take a single field from struct-valued Var - var, var_structfield = _split_var_structfield(var) - if !isnothing(var_structfield) - structfield := var_structfield - end - - # broadcast selectargs - for sa in bcastargs - sa_nt = NamedTuple(sa) - RecipesBase.@series begin - # pass through to plot recipe for FieldArray - PALEOmodel.get_array(modeldata, var, sa_nt; coords=coords) - end - end - end - - return nothing -end - function _split_var_structfield(var) # if var is of form .., split off varsplit = split(var, ".") @@ -273,12 +223,13 @@ Calls [`get_array(fr, selectargs)`](@ref) and passes on to `plot(fa::FieldArray) Vector-valued fields in `selectargs` are "broadcasted" (generating a separate plot series for each combination) -Optional argument `coords_records` can be used to supply plot coordinates from `FieldRecords`. -Format is a Vector of Pairs of "coord_name"=>(cr1::FieldRecord, cr2::FieldRecord, ...) -Example: - coords_records=["z"=>(zmid::FieldRecord, zlower::FieldRecord, zupper::FieldRecord)] -to replace a 1D column default pressure coordinate with a z coordinate. NB: the coordinates will be generated by applying `selectargs`, -so the supplied coordinate FieldRecords must have the same dimensionality as `fr`. +Optional argument `coords` can be used to supply plot coordinates from Variable in output. +Format is a Vector of Pairs of "coord_name"=>("var_name1", "var_name2", ...) + +Example: to replace a 1D column default pressure coordinate with a z coordinate: + + coords=["z"=>("atm.zmid", "atm.zlower", "atm.zupper")] + """ RecipesBase.@recipe function f(fr::FieldRecord, selectargs::NamedTuple, coords=nothing) @@ -527,3 +478,84 @@ function broadcast_dict(dv::Vector{<:Dict}) return dvout end end + + +######################################################## +# Helper functions for output visualisation +######################################################### + +""" + build_coords_edges(coords_vec::Vector{FixedCoord}) -> Vector{Float64} + +Build a vector of coordinate edges (length `n+1``) from `coords_vec`, assuming the PALEO +convention that `coords_vec` contains three elements with +cell midpoints, lower edges, upper edges each of length `n`, in that order. + +Falls back to just returning the first entry in `coords_vec` for other cases. +""" +function build_coords_edges(dim::PB.NamedDimension, coords_vec::Vector{FixedCoord}) + + if isempty(coords_vec) + # no coordinate - just return indices + co_values = collect(1:(dim.size+1)) + co_label = dim.name + elseif length(coords_vec) == 1 && is_boundary_coordinate(coords_vec[1]) + co = coords_vec[1] + co_values = vcat(co.values[1, :], co.values[2, end]) + co_label = append_units(co.name, co.attributes) + elseif length(coords_vec) == 1 || length(coords_vec) > 3 + # 1 coordinate or something we don't understand - take first + co = first(coords_vec) + co_values = co.values + co_label = append_units(co.name, co.attributes) + elseif length(coords_vec) == 2 && is_boundary_coordinate(coords_vec[2]) + # assume mid, boundary + co = coords_vec[2] + co_values = vcat(co.values[1, :], co.values[2, end]) + co_label = append_units(co.name, co.attributes) + elseif length(coords_vec) in (2, 3) + # 2 coordinates assume lower, upper edges + # 3 coordinates assume mid, lower, upper + co_lower = coords_vec[end-1] + co_upper = coords_vec[end] + co_label = append_units(co_lower.name*", "*co_upper.name, co_lower.attributes) + first(co_lower.values) < first(co_upper.values) || + @warn "build_coords_edges: $co_label co_lower is > co_upper - check model grid" + if co_lower.values[end] > co_lower.values[1] # ascending order + co_lower.values[2:end] == co_upper.values[1:end-1] || + @warn "build_coords_edges: $co_label lower and upper edges don't match" + co_values = [co_lower.values; co_upper.values[end]] + else # descending order + co_lower.values[1:end-1] == co_upper.values[2:end] || + @warn "build_coords_edges: $co_label lower and upper edges don't match" + co_values = [co_upper.values[1]; co_lower.values] + end + + end + + return co_values, co_label +end + +"guess coordinate edges from midpoints, assuming uniform spacing" +function guess_coords_edges(x_midpoints) + first_x = x_midpoints[1] - 0.5*(x_midpoints[2] - x_midpoints[1]) + last_x = x_midpoints[end] + 0.5*(x_midpoints[end] - x_midpoints[end-1]) + return [first_x; 0.5.*(x_midpoints[1:end-1] .+ x_midpoints[2:end]); last_x] +end + + +""" + append_units(name::AbstractString, attributes) -> "name (units)" + +Utility function to append variable units string to a variable name for display. +""" +function append_units(name::AbstractString, attributes::Dict{Symbol, Any}) + units = get(attributes, :units, "") + if isempty(units) + return name + else + return name*" ($units)" + end +end + +append_units(name::AbstractString, attributes::Nothing) = name diff --git a/src/Regions.jl b/src/Regions.jl deleted file mode 100644 index 924db7a..0000000 --- a/src/Regions.jl +++ /dev/null @@ -1,194 +0,0 @@ -""" - get_region(grid::Union{PB.AbstractMesh, Nothing}, values; coord_source, selectargs...) - -> values_subset, dim_subset::Vector{Pair{PB.NamedDimension, Vector{PALEOmodel.FixedCoord}}} - -Return the subset of `values` given by `selectargs` (Grid-specific keywords eg cell=, column=, ...) -and corresponding dimensions (with attached coordinates). -""" -function get_region(grid::Union{PB.AbstractMesh, Nothing}, values) end - -""" - get_region(dataset, recordidx, grid::Nothing, values) -> values[], [] - -Fallback for Domain with no grid, assumed 1 cell -""" -function get_region(grid::Nothing, values; coord_source=nothing) - length(values) == 1 || - throw(ArgumentError("grid==Nothing and length(values) != 1")) - return values[], Pair{PB.NamedDimension, Vector{PALEOmodel.FixedCoord}}[] -end - - -""" - get_region(dataset, recordidx, grid::PB.Grids.UnstructuredVectorGrid, values; cell=Nothing) -> - values_subset, dim_subset - -# Keywords for region selection: -- `cell::Union{Nothing, Int, Symbol}`: an Int for cell number (first cell = 1), or a Symbol to look up in `cellnames` - `cell = Nothing` is also allowed for a single-cell Domain. -""" -function get_region( - grid::PB.Grids.UnstructuredVectorGrid, values; - cell::Union{Nothing, Int, Symbol}=nothing, - coord_source=nothing, -) - if isnothing(cell) - grid.ncells == 1 || throw(ArgumentError("'cell' argument (an Int or Symbol) is required for an UnstructuredVectorGrid with > 1 cell")) - idx = 1 - elseif cell isa Int - idx = cell - else - idx = get(grid.cellnames, cell, nothing) - !isnothing(idx) || - throw(ArgumentError("cell ':$cell' not present in grid.cellnames=$(grid.cellnames)")) - end - - return ( - values[idx], - Pair{PB.NamedDimension, Vector{PALEOmodel.FixedCoord}}[], # no dimensions (ie squeeze out a dimension length 1 for single cell) - ) -end - - - -""" - get_region(dataset, recordidx, grid::PB.Grids.UnstructuredColumnGrid, values; column, [cell=nothing]) -> - values_subset, (dim_subset::NamedDimension, ...) - -# Keywords for region selection: -- `column::Union{Int, Symbol}`: (may be an Int, or a Symbol to look up in `columnames`) -- `cell::Int`: optional cell index within `column`, highest cell is cell 1 -""" -function get_region( - grid::PB.Grids.UnstructuredColumnGrid, values; - column, - cell::Union{Nothing, Int}=nothing, - coord_source=nothing -) - - if column isa Int - column in 1:length(grid.Icolumns) || - throw(ArgumentError("column index $column out of range")) - colidx = column - else - colidx = findfirst(isequal(column), grid.columnnames) - !isnothing(colidx) || - throw(ArgumentError("columnname '$column' not present in grid.columnnames=$(grid.columnnames)")) - end - - if isnothing(cell) - indices = grid.Icolumns[colidx] - coordnames = PB.get_coordinates(grid, "cells") - coordinates = FixedCoord[] - for cn in coordnames - cf, attributes = _get_field_attributes(coord_source, cn) - push!(coordinates, FixedCoord(cn, cf.values[indices], attributes)) - end - return ( - values[indices], - [PB.NamedDimension("cells", length(indices)) => coordinates], - ) - else - # squeeze out z dimension - idx = grid.Icolumns[colidx][cell] - return ( - values[idx], - Pair{PB.NamedDimension, Vector{PALEOmodel.FixedCoord}}[], # no dimensions (ie squeeze out a dimension length 1 for single cell) - ) - end - -end - - -""" - get_region(dataset, recordidx, rid::Union{PB.Grids.CartesianLinearGrid{2}, PB.Grids.CartesianArrayGrid{2}} , internalvalues; [i=i_idx], [j=j_idx]) -> - arrayvalues_subset, (dim_subset::NamedDimension, ...) - -# Keywords for region selection: -- `i::Int`: optional, slice along first dimension -- `j::Int`: optional, slice along second dimension - -`internalvalues` are transformed if needed from internal Field representation as a Vector length `ncells`, to -an Array (2D if neither i, j arguments present, 1D if i or j present, 0D ie one cell if both present) -""" -function get_region( - grid::Union{PB.Grids.CartesianLinearGrid{2}, PB.Grids.CartesianArrayGrid{2}}, internalvalues; - i::Union{Integer, Colon}=Colon(), - j::Union{Integer, Colon}=Colon(), - coord_source, -) - return _get_region(grid, internalvalues, [i, j], coord_source) -end - -""" - get_region(grid::Union{PB.Grids.CartesianLinearGrid{3}, PB.Grids.CartesianArrayGrid{3}}, internalvalues; [i=i_idx], [j=j_idx]) -> - arrayvalues_subset, (dim_subset::NamedDimension, ...) - -# Keywords for region selection: -- `i::Int`: optional, slice along first dimension -- `j::Int`: optional, slice along second dimension -- `k::Int`: optional, slice along third dimension - -`internalvalues` are transformed if needed from internal Field representation as a Vector length `ncells`, to -an Array (3D if neither i, j, k arguments present, 2D if one of i, j or k present, 1D if two present, -0D ie one cell if i, j, k all specified). -""" -function get_region( - grid::Union{PB.Grids.CartesianLinearGrid{3}, PB.Grids.CartesianArrayGrid{3}}, internalvalues; - i::Union{Integer, Colon}=Colon(), - j::Union{Integer, Colon}=Colon(), - k::Union{Integer, Colon}=Colon(), - coord_source, -) - return _get_region(grid, internalvalues, [i, j, k], coord_source) -end - -function _get_region( - grid::Union{PB.Grids.CartesianLinearGrid, PB.Grids.CartesianArrayGrid}, internalvalues, indices, coord_source -) - if !isempty(grid.coords) && !isempty(grid.coords_edges) - dims = [ - PB.NamedDimension(grid.dimnames[idx], grid.coords[idx], grid.coords_edges[idx]) - for (idx, ind) in enumerate(indices) if isa(ind, Colon) - ] - elseif !isempty(grid.coords) - dims = [ - PB.NamedDimension(grid.dimnames[idx], grid.coords[idx]) - for (idx, ind) in enumerate(indices) if isa(ind, Colon) - ] - else - dims = [ - PB.NamedDimension(grid.dimnames[idx]) - for (idx, ind) in enumerate(indices) if isa(ind, Colon) - ] - end - - values = internal_to_cartesian(grid, internalvalues) - if !all(isequal(Colon()), indices) - values = values[indices...] - end - - return values, Tuple(dims) -end - -# field and attributes from a dataset that implements PB.get_field(dataset, name)::FieldRecord -function _get_field_attributes(coord_source::Tuple{Any, Int}, name) - dataset, record_idx = coord_source - fr = PB.get_field(dataset, name) - return fr[record_idx], fr.attributes -end - -function _get_field_attributes(coord_source::Tuple{PB.ModelData, AbstractString}, varname) - modeldata, domainname = coord_source - # PB.get_field(model, modeldata, varnamefull) doesn't provide attributes, so do this ourselves - varnamefull = domainname*"."*varname - var = PB.get_variable(modeldata.model, varnamefull) - !isnothing(var) || - throw(ArgumentError("Variable $varnamefull not found")) - f = PB.get_field(var, modeldata) - attributes = copy(var.attributes) - attributes[:var_name] = var.name - attributes[:domain_name] = var.domain.name - - return f, attributes -end \ No newline at end of file From 2d2d4e1b1e4679d1f3f0497b773e69cdfcef4526 Mon Sep 17 00:00:00 2001 From: Stuart Daines Date: Mon, 30 Dec 2024 20:59:45 +0000 Subject: [PATCH 3/4] Remove old code, update doc --- src/FieldArray.jl | 16 +- src/FieldRecord.jl | 385 +++++++++++++++++++---------------- src/ODEfixed.jl | 4 +- src/OutputWriters.jl | 73 ++++--- src/PlotRecipes.jl | 2 +- test/runfieldtests.jl | 29 +++ test/runoutputwritertests.jl | 25 +++ 7 files changed, 328 insertions(+), 206 deletions(-) diff --git a/src/FieldArray.jl b/src/FieldArray.jl index 7ff952c..5f33d90 100644 --- a/src/FieldArray.jl +++ b/src/FieldArray.jl @@ -48,19 +48,21 @@ end Base.:*(a::Real, fa_in::FieldArray) = fa_in*a # default name from attributes -default_fieldarray_name(attributes::Nothing) = "" +default_varnamefull(attributes::Nothing) = "" -function default_fieldarray_name(attributes::Dict) +function default_varnamefull(attributes::Dict; include_selectargs=false) name = get(attributes, :domain_name, "") name *= isempty(name) ? "" : "." name *= get(attributes, :var_name, "") - selectargs_records = get(attributes, :filter_records, NamedTuple()) - selectargs_region = get(attributes, :filter_region, NamedTuple()) - if !isempty(selectargs_region) || !isempty(selectargs_records) - name *= "(" * join(["$k=$v" for (k, v) in Dict(pairs(merge(selectargs_records, selectargs_region)))], ", ") * ")" + if include_selectargs + selectargs_records = get(attributes, :filter_records, NamedTuple()) + selectargs_region = get(attributes, :filter_region, NamedTuple()) + if !isempty(selectargs_region) || !isempty(selectargs_records) + name *= "(" * join(["$k=$v" for (k, v) in Dict(pairs(merge(selectargs_records, selectargs_region)))], ", ") * ")" + end end - + return name end diff --git a/src/FieldRecord.jl b/src/FieldRecord.jl index 843c1af..3fe42f2 100644 --- a/src/FieldRecord.jl +++ b/src/FieldRecord.jl @@ -16,6 +16,16 @@ struct FieldRecord{FieldData <: PB.AbstractData, Space <: PB.AbstractSpace, V, N attributes::Dict{Symbol, Any} end +function Base.getproperty(fr::FieldRecord, p::Symbol) + if p == :name + return get(fr.attributes, :var_name, "") + else + return getfield(fr, p) + end +end + +Base.propertynames(fr::FieldRecord) = (:name, fieldnames(typeof(fr))...) + # create empty FieldRecord function FieldRecord( dataset, f::PB.Field{FieldData, Space, V, N, Mesh}, attributes; @@ -67,6 +77,23 @@ function FieldRecord( ) end +# create a new copy of a FieldRecord, with new dataset, copying records and attributes and updating mesh, attributes[:domain] +function FieldRecord( + fr::FieldRecord{FieldData, Space, V, N, Mesh, R}, dataset; + mesh = dataset.grid, + domain::AbstractString = dataset.name, +) where {FieldData, Space, V, N, Mesh, R} + new_attributes = deepcopy(fr.attributes) + new_attributes[:domain_name] = domain + return FieldRecord{FieldData, Space, V, N, Mesh, R}( + dataset, + deepcopy(fr.records), + fr.data_dims, + mesh, + new_attributes, + ) +end + space(fr::FieldRecord{FieldData, Space, V, N, Mesh, R}) where {FieldData, Space, V, N, Mesh, R} = Space function PB.get_dimensions(fr::FieldRecord; expand_cartesian=false) @@ -75,6 +102,14 @@ function PB.get_dimensions(fr::FieldRecord; expand_cartesian=false) return dims end +function PB.get_dimension(fr::FieldRecord, dimname::AbstractString; expand_cartesian=false) + dims = PB.get_dimensions(fr; expand_cartesian) + idx = findfirst(d -> d.name==dimname, dims) + !isnothing(idx) || + error("no dimension='$dimname' (available dimensions: $dims") + return dims[idx] +end + function PB.get_coordinates(fr::FieldRecord, dimname::AbstractString; expand_cartesian=false) if !isnothing(findfirst(nd -> nd.name == dimname, PB.get_dimensions(fr.mesh, space(fr); expand_cartesian))) # no grid (fr.mesh == nothing) defines a "cell" dimension, but not get_coordinates @@ -89,15 +124,12 @@ function PB.get_coordinates(fr::FieldRecord, dimname::AbstractString; expand_car end function Base.show(io::IO, fr::FieldRecord) - print(io, - "FieldRecord(eltype=", eltype(fr),", length=", length(fr), - ", attributes=", fr.attributes, - ")" - ) + print(io, "FieldRecord(name='", fr.name, "', eltype=", eltype(fr),", length=", length(fr), ")",) end -function Base.show(io::IO, ::MIME"text/plain", fr::FieldRecord) - println(io, "FieldRecord(eltype=", eltype(fr),", length=", length(fr), ")") +function Base.show(io::IO, ::MIME"text/plain", @nospecialize(fr::FieldRecord)) + show(io, fr) + println() # println(io, " records: ", fr.records) println(io, " data_dims: ", fr.data_dims) println(io, " mesh: ", fr.mesh) @@ -176,11 +208,11 @@ Base.lastindex(fr::FieldRecord) = lastindex(fr.records) function Base.copy(fr::FieldRecord{FieldData, Space, V, N, Mesh, R}) where {FieldData, Space, V, N, Mesh, R} return FieldRecord{FieldData, Space, V, N, Mesh, R}( + fr.dataset, deepcopy(fr.records), fr.data_dims, fr.mesh, deepcopy(fr.attributes), - copy(fr.coords_record), ) end @@ -211,7 +243,7 @@ eg to replace a 1D column default pressure coordinate with a z coordinate: """ function get_array( - fr::FieldRecord; + @nospecialize(fr::FieldRecord); coords=nothing, allselectargs... ) @@ -226,196 +258,203 @@ end function get_array( - fr::FieldRecord, allselectargs::NamedTuple; # allselectargs::NamedTuple=NamedTuple() creates a method ambiguity with deprecated form above + @nospecialize(fr::FieldRecord), @nospecialize(allselectargs::NamedTuple); # allselectargs::NamedTuple=NamedTuple() creates a method ambiguity with deprecated form above coords=nothing, expand_cartesian=false, # can be overridden in allselectargs squeeze_all_single_dims=true, # can be overridden in allselectargs verbose=false, ) - frname = default_fieldarray_name(fr.attributes) - - verbose && @info "get_array (begin): $frname, allselectargs $allselectargs" - - if !isnothing(coords) - check_coords_argument(coords) || - error("argument coords should be a Vector of Pairs of \"dim_name\"=>(\"var_name1\", \"var_name2\", ...), eg: [\"z\"=>(\"atm.zmid\", \"atm.zlower\", \"atm.zupper\"), ...]") - end - - ########################################################################## - # preprocess allselectargs to strip non-selection arguments and fix quirks - ######################################################################## - allselectargs_sort = [String(k) => v for (k, v) in pairs(allselectargs)] + frname = default_varnamefull(fr.attributes; include_selectargs=true) + fa = nothing + try + verbose && @info "get_array (begin): $frname, allselectargs $allselectargs" - # override expand_cartesian - idx_expand_cartesian = findfirst(x-> x[1] == "expand_cartesian", allselectargs_sort) - if !isnothing(idx_expand_cartesian) - _, expand_cartesian = popat!(allselectargs_sort, idx_expand_cartesian) - end - # override squeeze_all_single_dims - idx_squeeze_all_single_dims = findfirst(x-> x[1] == "squeeze_all_single_dims", allselectargs_sort) - if !isnothing(idx_squeeze_all_single_dims) - _, squeeze_all_single_dims = popat!(allselectargs_sort, idx_squeeze_all_single_dims) - end - - # quirk: column must precede cell selection, so move to front if present - idx_column = findfirst(x-> x[1] == "column", allselectargs_sort) - if !isnothing(idx_column) - column_select = popat!(allselectargs_sort, idx_column) - pushfirst!(allselectargs_sort, column_select) - end - - selectargs_used = fill(false, length(allselectargs_sort)) - - ################################################################ - # read dimensions - # (TODO reproduce code from get_dimensions so we can also get dims_spatial) - ########################################################## - # order is spatial (from grid), data_dims, then record dimension - dims_spatial = PB.get_dimensions(fr.mesh, space(fr); expand_cartesian) - dims = [dims_spatial..., fr.data_dims..., PB.NamedDimension(fr.dataset.record_dim.name, length(fr))] - recorddimidx = length(dims) - - ################################################################################## - # Read coordinates and apply selection: record dimension first, followed by non-record dimensions - # so we can handle the case where a single record is selected and coordinates are not constant. - # TODO this doesn't handle cases where multiple records are used with non-constant coordinates - ########################################################################################## + if !isnothing(coords) + check_coords_argument(coords) || + error("argument coords should be a Vector of Pairs of \"dim_name\"=>(\"var_name1\", \"var_name2\", ...), eg: [\"z\"=>(\"atm.zmid\", \"atm.zlower\", \"atm.zupper\"), ...]") + end - # vector of filtered dim => coords corresponding to select_indices, nothing if dimension squeezed out - dims_coords = Vector{Any}(undef, length(dims)) - # indices in full array to use. Start with everything, then apply selections from 'allselectargs' - select_indices = Any[1:(nd.size) for nd in dims] + ########################################################################## + # preprocess allselectargs to strip non-selection arguments and fix quirks + ######################################################################## + allselectargs_sort = [String(k) => v for (k, v) in pairs(allselectargs)] + + # override expand_cartesian + idx_expand_cartesian = findfirst(x-> x[1] == "expand_cartesian", allselectargs_sort) + if !isnothing(idx_expand_cartesian) + _, expand_cartesian = popat!(allselectargs_sort, idx_expand_cartesian) + end + # override squeeze_all_single_dims + idx_squeeze_all_single_dims = findfirst(x-> x[1] == "squeeze_all_single_dims", allselectargs_sort) + if !isnothing(idx_squeeze_all_single_dims) + _, squeeze_all_single_dims = popat!(allselectargs_sort, idx_squeeze_all_single_dims) + end - # Record dimension + # quirk: column must precede cell selection, so move to front if present + idx_column = findfirst(x-> x[1] == "column", allselectargs_sort) + if !isnothing(idx_column) + column_select = popat!(allselectargs_sort, idx_column) + pushfirst!(allselectargs_sort, column_select) + end - # read record coordinates - dims_coords[recorddimidx] = dims[recorddimidx] => _read_coordinates( - fr, dims[recorddimidx], nothing, expand_cartesian; substitute_coords=coords - ) - - # keep track of selection used, to provide as attributes in FieldArray - selectargs_records = OrderedCollections.OrderedDict() - - _filter_dims_coords( - select_indices, dims_coords, recorddimidx, - allselectargs_sort, selectargs_used, selectargs_records, - fr, - ) + selectargs_used = fill(false, length(allselectargs_sort)) + + ################################################################ + # read dimensions + # (TODO reproduce code from get_dimensions so we can also get dims_spatial) + ########################################################## + # order is spatial (from grid), data_dims, then record dimension + dims_spatial = PB.get_dimensions(fr.mesh, space(fr); expand_cartesian) + dims = [dims_spatial..., fr.data_dims..., PB.NamedDimension(fr.dataset.record_dim.name, length(fr))] + recorddimidx = length(dims) + + ################################################################################## + # Read coordinates and apply selection: record dimension first, followed by non-record dimensions + # so we can handle the case where a single record is selected and coordinates are not constant. + # TODO this doesn't handle cases where multiple records are used with non-constant coordinates + ########################################################################################## + + # vector of filtered dim => coords corresponding to select_indices, nothing if dimension squeezed out + dims_coords = Vector{Any}(undef, length(dims)) + # indices in full array to use. Start with everything, then apply selections from 'allselectargs' + select_indices = Any[1:(nd.size) for nd in dims] + + # Record dimension + + # read record coordinates + dims_coords[recorddimidx] = dims[recorddimidx] => _read_coordinates( + fr, dims[recorddimidx], nothing, expand_cartesian; substitute_coords=coords + ) + + # keep track of selection used, to provide as attributes in FieldArray + selectargs_records = OrderedCollections.OrderedDict() + + _filter_dims_coords( + select_indices, dims_coords, recorddimidx, + allselectargs_sort, selectargs_used, selectargs_records, + fr, + ) - # get record indices to use - ridx_to_use = select_indices[recorddimidx] - have_recorddim = !isnothing(dims_coords[recorddimidx]) + # get record indices to use + ridx_to_use = select_indices[recorddimidx] + have_recorddim = !isnothing(dims_coords[recorddimidx]) - # Non-record dimensions + # Non-record dimensions - # read non-record coordinates, from first record selected - for i in 1:(length(dims)-1) - dims_coords[i] = dims[i] => _read_coordinates(fr, dims[i], first(ridx_to_use), expand_cartesian; substitute_coords=coords) - end + # read non-record coordinates, from first record selected + for i in 1:(length(dims)-1) + dims_coords[i] = dims[i] => _read_coordinates(fr, dims[i], first(ridx_to_use), expand_cartesian; substitute_coords=coords) + end - selectargs_region = OrderedCollections.OrderedDict() - - _filter_dims_coords( - select_indices, dims_coords, 1:length(dims)-1, - allselectargs_sort, selectargs_used, selectargs_region, - fr, - ) - - unused_selectargs = [a for (a, u) in zip(allselectargs_sort, selectargs_used) if !u] - isempty(unused_selectargs) || - error("allselectargs contains select filter(s) $unused_selectargs that do not match any dimensions or coordinates !") - - ############################################# - # squeeze out dimensions and coordinates - ############################################# - - # selections that produce a dimension with a single index are already squeezed out, - # but not dimensions that started with a single index and haven't had a selection applied - if squeeze_all_single_dims - for i in eachindex(dims_coords, select_indices) - if !isnothing(dims_coords[i]) && dims_coords[i][1].size == 1 - @info "get_array: $frname squeezing out dimension $(dims_coords[i][1].name)" - @assert !isa(select_indices[i], Number) - @assert length(select_indices[i]) == 1 - dims_coords[i] = nothing - select_indices[i] = first(select_indices[i]) + selectargs_region = OrderedCollections.OrderedDict() + + _filter_dims_coords( + select_indices, dims_coords, 1:length(dims)-1, + allselectargs_sort, selectargs_used, selectargs_region, + fr, + ) + + unused_selectargs = [a for (a, u) in zip(allselectargs_sort, selectargs_used) if !u] + isempty(unused_selectargs) || + error("allselectargs contains select filter(s) $unused_selectargs that do not match any dimensions or coordinates !") + + ############################################# + # squeeze out dimensions and coordinates + ############################################# + + # selections that produce a dimension with a single index are already squeezed out, + # but not dimensions that started with a single index and haven't had a selection applied + if squeeze_all_single_dims + for i in eachindex(dims_coords, select_indices) + if !isnothing(dims_coords[i]) && dims_coords[i][1].size == 1 + verbose && @info "get_array: $frname squeezing out dimension $(dims_coords[i][1].name)" + @assert !isa(select_indices[i], Number) + @assert length(select_indices[i]) == 1 + dims_coords[i] = nothing + select_indices[i] = first(select_indices[i]) + end end end - end - dims_coords_sq = Pair{PB.NamedDimension, Vector{PALEOmodel.FixedCoord}}[dc for dc in dims_coords if !isnothing(dc)] - dims_sq = [d for (d, c) in dims_coords_sq] - - # get non-record dimensions indices to use - nonrecordindicies = select_indices[1:end-1] - # squeeze out single dimensions by filtering out scalars in nonrecordindicies - # NB: lhs (assignment to output array) is contiguous ! - nonrecordindicies_sq = [1:length(nri) for nri in nonrecordindicies if nri isa AbstractVector] - # consistency check between dimensions and indices of output array - @assert length(dims_sq) == length(nonrecordindicies_sq) + have_recorddim - for (d_sq, nri_sq) in zip(dims_sq, nonrecordindicies_sq) # will stop after shortest iter and so skip recorddim if present - @assert d_sq.size == length(nri_sq) - end + dims_coords_sq = Pair{PB.NamedDimension, Vector{PALEOmodel.FixedCoord}}[dc for dc in dims_coords if !isnothing(dc)] + dims_sq = [d for (d, c) in dims_coords_sq] + + # get non-record dimensions indices to use + nonrecordindicies = select_indices[1:end-1] + # squeeze out single dimensions by filtering out scalars in nonrecordindicies + # NB: lhs (assignment to output array) is contiguous ! + nonrecordindicies_sq = [1:length(nri) for nri in nonrecordindicies if nri isa AbstractVector] + # consistency check between dimensions and indices of output array + @assert length(dims_sq) == length(nonrecordindicies_sq) + have_recorddim + for (d_sq, nri_sq) in zip(dims_sq, nonrecordindicies_sq) # will stop after shortest iter and so skip recorddim if present + @assert d_sq.size == length(nri_sq) + end - ############################################### - # create output FieldArray - ############################################### + ############################################### + # create output FieldArray + ############################################### - # create values array - if field_single_element(fr) - if have_recorddim - avalues = fr.records[ridx_to_use] - else - # represent a scalar as a 0D Array - avalues = Array{eltype(fr.records), 0}(undef) - avalues[] = fr.records[ridx_to_use] - end - else - if expand_cartesian && !isempty(dims_spatial) - expand_fn = x -> PB.Grids.internal_to_cartesian(fr.mesh, x) - aeltype = Union{Missing, eltype(first(fr.records))} - else - expand_fn = identity - aeltype = eltype(first(fr.records)) - end - avalues = Array{aeltype, length(dims_sq)}(undef, [nd.size for nd in dims_sq]...) - if have_recorddim - for (riselect, ri) in enumerate(ridx_to_use) + # create values array + if field_single_element(fr) + if have_recorddim + avalues = fr.records[ridx_to_use] + else + # represent a scalar as a 0D Array + avalues = Array{eltype(fr.records), 0}(undef) + avalues[] = fr.records[ridx_to_use] + end + else + if expand_cartesian && !isempty(dims_spatial) + expand_fn = x -> PB.Grids.internal_to_cartesian(fr.mesh, x) + aeltype = Union{Missing, eltype(first(fr.records))} + else + expand_fn = identity + aeltype = eltype(first(fr.records)) + end + avalues = Array{aeltype, length(dims_sq)}(undef, [nd.size for nd in dims_sq]...) + if have_recorddim + for (riselect, ri) in enumerate(ridx_to_use) + if isempty(nonrecordindicies_sq) + avalues[riselect] = expand_fn(fr.records[ri])[nonrecordindicies...] + else + avalues[nonrecordindicies_sq..., riselect] .= expand_fn(fr.records[ri])[nonrecordindicies...] + end + end + else if isempty(nonrecordindicies_sq) - avalues[riselect] = expand_fn(fr.records[ri])[nonrecordindicies...] + avalues[] = expand_fn(fr.records[ridx_to_use])[nonrecordindicies...] else - avalues[nonrecordindicies_sq..., riselect] .= expand_fn(fr.records[ri])[nonrecordindicies...] + avalues[nonrecordindicies_sq...] .= expand_fn(fr.records[ridx_to_use])[nonrecordindicies...] end end - else - if isempty(nonrecordindicies_sq) - avalues[] = expand_fn(fr.records[ridx_to_use])[nonrecordindicies...] - else - avalues[nonrecordindicies_sq...] .= expand_fn(fr.records[ridx_to_use])[nonrecordindicies...] - end end - end - - # add attributes for selection used - attributes = copy(fr.attributes) - if !isempty(selectargs_records) - attributes[:filter_records] = NamedTuple(Symbol(k)=>v for (k, v) in selectargs_records) - end - if !isempty(selectargs_region) - attributes[:filter_region] = NamedTuple(Symbol(k)=>v for (k, v) in selectargs_region) - end + + # add attributes for selection used + attributes = copy(fr.attributes) + if !isempty(selectargs_records) + attributes[:filter_records] = NamedTuple(Symbol(k)=>v for (k, v) in selectargs_records) + end + if !isempty(selectargs_region) + attributes[:filter_region] = NamedTuple(Symbol(k)=>v for (k, v) in selectargs_region) + end - # Generate name from attributes, including selection suffixes - name = default_fieldarray_name(attributes) + # Generate name from attributes, including selection suffixes + name = default_varnamefull(attributes; include_selectargs=true) - verbose && @info "get_array (end): $frname -> $name, allselectargs $allselectargs" + verbose && @info "get_array (end): $frname -> $name, allselectargs $allselectargs" - return FieldArray( - name, - avalues, - dims_coords_sq, - attributes, - ) + fa = FieldArray( + name, + avalues, + dims_coords_sq, + attributes, + ) + catch + @error "get_array exception: $frname, allselectargs $allselectargs" + rethrow() + end + + return fa end # check 'coords' of form [] or ["z"=>[ ... ], ] or ["z"=>(...),] diff --git a/src/ODEfixed.jl b/src/ODEfixed.jl index 2fb823e..abfa6d9 100644 --- a/src/ODEfixed.jl +++ b/src/ODEfixed.jl @@ -400,7 +400,7 @@ function integrateFixed( write_output_record(outputwriter::Nothing, model, modeldata, timesteppers, tmodel, Δt_outer) = nothing # set initial state - touter = tspan[1] + touter = Float64(tspan[1]) PALEOmodel.set_tforce!(modeldata.solver_view_all, touter) @@ -488,7 +488,7 @@ function integrateFixedthreads( write_output_record(outputwriter::Nothing, barrier, model, modeldata, timesteppers, tmodel, Δt_outer, tid) = nothing # write initial state - touter = tspan[1] + touter = Float64(tspan[1]) PALEOmodel.set_tforce!(modeldata.solver_view_all, touter) PALEOmodel.set_statevar!(modeldata.solver_view_all, initial_state) inextoutput = 2 diff --git a/src/OutputWriters.jl b/src/OutputWriters.jl index 089118b..9e1a7f8 100644 --- a/src/OutputWriters.jl +++ b/src/OutputWriters.jl @@ -51,11 +51,11 @@ PALEOmodel.AbstractOutputWriter """ initialize!( - output::PALEOmodel.AbstractOutputWriter, model, modeldata, nrecords + output::PALEOmodel.AbstractOutputWriter, model, modeldata, [nrecords] [;record_dim_name=:tmodel] [record_coord_units="yr"] ) -Initialize from a PALEOboxes::Model, reserving memory for an assumed output dataset of `nrecords`. +Initialize from a PALEOboxes::Model, optionally reserving memory for an assumed output dataset of `nrecords`. The default for `record_dim_name` is `:tmodel`, for a sequence of records following the time evolution of the model. @@ -221,7 +221,7 @@ end """ OutputMemoryDomain( name::AbstractString; - [record_dim_name::Symbol=:tmodel] + [record_dim_name::Symbol=:tmodel], [record_coord_name::Symbol=record_dim_name] [grid=nothing] [data_dims::Vector{PB.NamedDimension} = Vector{PB.NamedDimension}()] ) @@ -263,7 +263,7 @@ Base.length(output::OutputMemoryDomain) = output.record_dim.size "create from a PALEOboxes::Domain" function OutputMemoryDomain( - dom::PB.Domain, modeldata::PB.ModelData; + dom::PB.Domain, modeldata::PB.ModelData, nrecords=nothing; parentdataset=nothing, record_dim_name::Symbol=:tmodel, record_coord_name::Symbol=record_dim_name, @@ -307,7 +307,7 @@ function OutputMemoryDomain( :data_dims => (), :units => record_coord_units, ), - ) + ) # create list of variables sorted by host dependent type, then by name odom._all_vars = vcat( @@ -346,6 +346,35 @@ function OutputMemoryDomain( end end + if !isnothing(nrecords) + for (_, fr) in odom.data + sizehint!(fr.records, nrecords) + end + end + + return odom +end + +# create from a record coordinate FieldRecord +function OutputMemoryDomain( + name::AbstractString, record_coordinate::PALEOmodel.FieldRecord; + parentdataset=nothing, + data_dims::Vector{PB.NamedDimension} = Vector{PB.NamedDimension}(), + data_dims_coordinates::Dict{String, Vector{String}} = Dict{String, Vector{String}}(), + grid = nothing, +) + record_dim_name = Symbol(record_coordinate.attributes[:var_name]) + record_coord_name = record_dim_name + odom = OutputMemoryDomain( + name; + record_dim_name, + record_coord_name, + parentdataset, data_dims, data_dims_coordinates, grid + ) + + odom.record_dim = PB.NamedDimension(string(record_dim_name), length(record_coordinate)) + PB.add_field!(odom, record_coordinate) + return odom end @@ -404,7 +433,6 @@ function OutputMemoryDomain( return odom end - function add_record!(odom::OutputMemoryDomain, modeldata, rec_coord) odom.record_dim = PB.NamedDimension(odom.record_dim.name, odom.record_dim.size + 1) @@ -431,13 +459,12 @@ function PB.add_field!(odom::OutputMemoryDomain, fr::PALEOmodel.FieldRecord) length(fr) == length(odom) || throw(ArgumentError("FieldRecord length $(length(fr)) != OutputMemoryDomain length $(length(odom))")) - haskey(fr.attributes, :var_name) || - throw(ArgumentError("FieldRecord has no :var_name attribute")) - varname = fr.attributes[:var_name] - !(Symbol(varname) in keys(odom.data)) || - throw(ArgumentError("Variable $varname already exists")) + !isempty(fr.name) || + throw(ArgumentError("FieldRecord has empty name = \"\"")) + !(Symbol(fr.name) in keys(odom.data)) || + throw(ArgumentError("Variable $(fr.name) already exists")) - odom.data[Symbol(varname)] = fr + odom.data[Symbol(fr.name)] = PALEOmodel.FieldRecord(fr, odom) return nothing end @@ -713,7 +740,7 @@ end function initialize!( - output::OutputMemory, model::PB.Model, modeldata::PB.ModelData, nrecords; + output::OutputMemory, model::PB.Model, modeldata::PB.ModelData, nrecords=nothing; record_dim_name::Symbol=:tmodel, record_coord_name::Symbol=record_dim_name, record_coord_units::AbstractString="yr", @@ -739,7 +766,7 @@ function initialize!( empty!(output.domains) for dom in model.domains output.domains[dom.name] = OutputMemoryDomain( - dom, modeldata; # nrecords; + dom, modeldata, nrecords; parentdataset=output, record_dim_name, record_coord_name, @@ -1116,7 +1143,7 @@ function variable_to_netcdf!( ds, record_dim_name::AbstractString, vname::AbstractString, - vfr::PALEOmodel.FieldRecord, + @nospecialize(vfr::PALEOmodel.FieldRecord), field_data::Type{<:PB.AbstractData}, grid_spatial_dims::NTuple{NS, String}, spatial_unpackfn, @@ -1454,7 +1481,7 @@ function grid_to_netcdf!(ds, grid::PB.Grids.UnstructuredVectorGrid) NCDatasets.defDim(ds, "cells", grid.ncells) - coordnames_to_netcdf(ds, "grid_coords", grid.coordinates) + coordnames_to_netcdf(ds, "PALEO_grid_coords", grid.coordinates) # named cells cellnames = [String(k) for (k, v) in grid.cellnames] @@ -1485,7 +1512,7 @@ function grid_to_netcdf!(ds, grid::PB.Grids.UnstructuredColumnGrid) v = NCDatasets.defVar(ds, "Icolumns", Icolumns .- 1, ("cells",)) # NB: zero-based v.attrib["comment"] = "zero-based indices of cells from top to bottom ordered by columns" - coordnames_to_netcdf(ds, "grid_coords", grid.coordinates) + coordnames_to_netcdf(ds, "PALEO_grid_coords", grid.coordinates) # optional column labels if !isempty(grid.columnnames) @@ -1511,7 +1538,7 @@ function grid_to_netcdf!(ds, grid::PB.Grids.CartesianArrayGrid{N}) where {N} subdomain_to_netcdf!(ds, name, subdom) end - coordnames_to_netcdf(ds, "grid_coords", grid.coordinates) + coordnames_to_netcdf(ds, "PALEO_grid_coords", grid.coordinates) return (Tuple(nd.name for nd in grid.dimensions), identity) end @@ -1553,7 +1580,7 @@ function grid_to_netcdf!(ds, grid::PB.Grids.CartesianLinearGrid{N}) where {N} subdomain_to_netcdf!(ds, name, subdom) end - coordnames_to_netcdf(ds, "grid_coords", grid.coordinates) + coordnames_to_netcdf(ds, "PALEO_grid_coords", grid.coordinates) spatial_unpackfn = d -> PB.Grids.internal_to_cartesian(grid, d) # unpack linear representation into cartesian grid @@ -1617,7 +1644,7 @@ function netcdf_to_grid(::Type{PB.Grids.UnstructuredVectorGrid}, ds::NCDatasets. vec_cellnames_indices = ncattrib_as_vector(ds, "PALEO_cellnames_indices") .+ 1 # netcdf is zero offset cellnames = Dict{Symbol, Int}(k=>v for (k, v) in zip(vec_cellnames, vec_cellnames_indices)) - coordinates = netcdf_to_coordnames(ds, "grid_coords") + coordinates = netcdf_to_coordnames(ds, "PALEO_grid_coords") return (PB.Grids.UnstructuredVectorGrid(ncells, cellnames, subdomains, coordinates), identity) end @@ -1637,7 +1664,7 @@ function netcdf_to_grid(::Type{PB.Grids.UnstructuredColumnGrid}, ds::NCDatasets. colstart += cells_this_column end - coordinates = netcdf_to_coordnames(ds, "grid_coords") + coordinates = netcdf_to_coordnames(ds, "PALEO_grid_coords") # backwards compatibility if haskey(ds.attrib, "PALEO_z_coords") && !haskey(coordinates, "cells") coordinates["cells"] = ncattrib_as_vector(ds, "PALEO_z_coords") @@ -1660,7 +1687,7 @@ function netcdf_to_grid(::Type{PB.Grids.CartesianArrayGrid}, ds::NCDatasets.Data ncells, dimensions, dimensions_extra, zidxsurface, display_mult, londim, latdim, zdim = _netcdf_to_cartesiandimscoords(PB.Grids.CartesianArrayGrid, ds, dsvars) - coordinates = netcdf_to_coordnames(ds, "grid_coords") + coordinates = netcdf_to_coordnames(ds, "PALEO_grid_coords") grid = PB.Grids.CartesianArrayGrid{length(dims)}( ncells, @@ -1682,7 +1709,7 @@ function netcdf_to_grid(::Type{PB.Grids.CartesianLinearGrid}, ds::NCDatasets.Dat _netcdf_to_cartesiandimscoords(PB.Grids.CartesianLinearGrid, ds, dsvars) ncolumns = ds.attrib["PALEO_columns"] - coordinates = netcdf_to_coordnames(ds, "grid_coords") + coordinates = netcdf_to_coordnames(ds, "PALEO_grid_coords") # convert back to linear vectors linear_index = Array(dsvars["linear_index"]) .+ 1 # netcdf zero based diff --git a/src/PlotRecipes.jl b/src/PlotRecipes.jl index 024f9e7..86140f0 100644 --- a/src/PlotRecipes.jl +++ b/src/PlotRecipes.jl @@ -231,7 +231,7 @@ Example: to replace a 1D column default pressure coordinate with a z coordinate: coords=["z"=>("atm.zmid", "atm.zlower", "atm.zupper")] """ -RecipesBase.@recipe function f(fr::FieldRecord, selectargs::NamedTuple, coords=nothing) +RecipesBase.@recipe function f(@nospecialize(fr::FieldRecord), selectargs::NamedTuple, coords=nothing) # broadcast any Vector-valued argument in selectargs bcastargs = broadcast_dict([Dict{Symbol, Any}(pairs(selectargs))]) diff --git a/test/runfieldtests.jl b/test/runfieldtests.jl index 34181c3..68ac8bb 100644 --- a/test/runfieldtests.jl +++ b/test/runfieldtests.jl @@ -241,4 +241,33 @@ end @test fra.values[:, 2] == fill(43.0, 2) end + + @testset "copy FieldRecord" begin + g = PB.Grids.UnstructuredColumnGrid(ncells=5, Icolumns=[collect(1:5)]) + + f = PB.Field(PB.ScalarData, (), Float64, PB.CellSpace, g; allocatenans=true) + + f.values .= 42.0 + fr = PALEOmodel.FieldRecord( + (record_dim = (name = "records",), ), # mock dataset to supply record_dim.name + f, + Dict{Symbol, Any}(), + ) + push!(fr, f) + f.values .= 43.0 + push!(fr, f) + + @test fr.records == [fill(42.0, 5), fill(43.0, 5)] + + fr_copy = copy(fr) + @test fr_copy.records == [fill(42.0, 5), fill(43.0, 5)] + push!(fr_copy, f) + @test fr_copy.records == [fill(42.0, 5), fill(43.0, 5), fill(43.0, 5)] + @test length(fr_copy) == 3 + @test PB.get_dimension(fr_copy, "records") == PB.NamedDimension("records", 3) + + @test fr.records == [fill(42.0, 5), fill(43.0, 5)] + @test PB.get_dimension(fr, "records") == PB.NamedDimension("records", 2) + + end end diff --git a/test/runoutputwritertests.jl b/test/runoutputwritertests.jl index c097ab4..98efabe 100644 --- a/test/runoutputwritertests.jl +++ b/test/runoutputwritertests.jl @@ -139,6 +139,31 @@ end @test test_array.values == test end +@testset "FieldRecordCreateAdd" begin + # Test creation from a FieldRecord + + model, modeldata, all_vars, output = create_test_model_output(2) + all_values = all_vars.values + all_values.global.O .= [2e19] + PALEOmodel.OutputWriters.add_record!(output, model, modeldata, 0.0) + all_values.global.O .= [4e19] + PALEOmodel.OutputWriters.add_record!(output, model, modeldata, 1.0) + + + # create new output Domain "diag" with record coordinate tmodel + tmodel = PB.get_field(output, "global.tmodel") + diag = PALEOmodel.OutputWriters.OutputMemoryDomain("diag", copy(tmodel)) + output.domains["diag"] = diag + + # add a diagnostic + fr_O = PB.get_field(output, "global.O") + my_diag = copy(fr_O) + my_diag.records .= -42.0 + my_diag.attributes[:var_name] = "my_diag" + my_diag.attributes[:description] = "a model diagnostic calculated from global.O" + PB.add_field!(diag, my_diag) + +end end From d33218d16c65685cfec68eb0394c34263ddd7e49 Mon Sep 17 00:00:00 2001 From: Stuart Daines Date: Tue, 31 Dec 2024 12:14:38 +0000 Subject: [PATCH 4/4] update documentation --- Project.toml | 2 + docs/src/PALEOmodelOutput.md | 2 +- src/FieldArray.jl | 13 ++++-- src/FieldRecord.jl | 84 ++++++++++++++++++++++++++++-------- src/OutputWriters.jl | 61 ++++++-------------------- src/PALEOmodel.jl | 1 + 6 files changed, 95 insertions(+), 68 deletions(-) diff --git a/Project.toml b/Project.toml index fc3390a..0d26c9b 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.16.0" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" DiffRules = "b552c78f-8df3-52c6-915a-8e097449b14b" +DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Infiltrator = "5903a43b-9cc3-4c30-8d17-598619ec4e9b" @@ -34,6 +35,7 @@ TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" [compat] DataFrames = "1.1" DiffRules = "1.0" +DocStringExtensions = "0.8, 0.9" FileIO = "1.0" ForwardDiff = "0.10" Infiltrator = "1.0" diff --git a/docs/src/PALEOmodelOutput.md b/docs/src/PALEOmodelOutput.md index 275a9f1..7515509 100644 --- a/docs/src/PALEOmodelOutput.md +++ b/docs/src/PALEOmodelOutput.md @@ -80,7 +80,7 @@ CurrentModule = PALEOmodel ```@docs FieldArray -get_array(fr::FieldRecord; coords=nothing) +get_array(fr::FieldRecord) FixedCoord ``` diff --git a/src/FieldArray.jl b/src/FieldArray.jl index 5f33d90..929e691 100644 --- a/src/FieldArray.jl +++ b/src/FieldArray.jl @@ -3,16 +3,23 @@ A generic [xarray](https://xarray.pydata.org/en/stable/index.html)-like or [IRIS](https://scitools-iris.readthedocs.io/en/latest/)-like -Array with named dimensions and optional coordinates. +n-dimensional Array with named dimensions and optional coordinates. NB: this aims to be simple and generic, not efficient !!! Intended for representing model output, not for numerically-intensive calculations. + +# Fields +$(TYPEDFIELDS) """ -struct FieldArray{T} +struct FieldArray{T <: AbstractArray} + "variable name" name::String + "n-dimensional Array of values" values::T + "Names of dimensions with optional attached coordinates" dims_coords::Vector{Pair{PB.NamedDimension, Vector{FixedCoord}}} - attributes::Union{Dict, Nothing} + "variable attributes" + attributes::Union{Dict{Symbol, Any}, Nothing} end PB.get_dimensions(f::FieldArray) = PB.NamedDimension[first(dc) for dc in f.dims_coords] diff --git a/src/FieldRecord.jl b/src/FieldRecord.jl index 3fe42f2..f938e3e 100644 --- a/src/FieldRecord.jl +++ b/src/FieldRecord.jl @@ -155,11 +155,21 @@ function Base.show(io::IO, ::MIME"text/plain", @nospecialize(fr::FieldRecord)) return nothing end -"test whether Field contains single elements -TODO this will currently return false for CellSpace with ncells=1, which could be changed to true ? -TODO would be clearer to directly use PB.internal_size(Space, mesh) == (1,) which would also handle the CellSpace with ncells=1 case, +""" + field_single_element(fr::FieldRecord)::Bool + field_single_element(f::Field)::Bool + field_single_element(::Type{FieldRecord})::Bool + field_single_element(::Type{Field})::Bool + +Test whether FieldRecord contains Fields with single elements stored as a Vector instead of a Vector of records. + +- `field_single_element == false`: Fields contain array `values`, these are stored in FieldRecord `records` as a Vector of arrays. +- `field_single_element == true` Fields contain a single value, stored in FieldRecord `records` as Vector of `eltype(Field.values)`. + +NB: this works on Types, and will return false for a field with Space == CellSpace with ncells=1, even though this actually contains a single +value. TODO might be clearer to directly use PB.internal_size(Space, mesh) == (1,) which would also handle the CellSpace with ncells=1 case, but this wouldn't work in the type domain (needs an instance of mesh::Mesh) -" +""" function field_single_element(::Type{FieldData}, N, ::Type{Space}, ::Type{Mesh}) where {FieldData <: PB.AbstractData, Space <: PB.AbstractSpace, Mesh} if PB.field_single_element(FieldData, N) && (Space == PB.ScalarSpace || (Space == PB.CellSpace && Mesh == Nothing)) return true @@ -217,30 +227,70 @@ function Base.copy(fr::FieldRecord{FieldData, Space, V, N, Mesh, R}) where {Fiel end """ - get_array(fr::FieldRecord [, allselectargs::NamedTuple] [; coords::AbstractVector]) -> FieldArray - [deprecated] get_array(fr::FieldRecord [; coords::AbstractVector] [; allselectargs...]) -> FieldArray + get_array(fr::FieldRecord [, allselectargs::NamedTuple] [; coords::AbstractVector]) -> fa::FieldArray + [deprecated] get_array(fr::FieldRecord [; coords::AbstractVector] [; allselectargs...]) -> fa::FieldArray -Return a [`FieldArray`](@ref) containing `fr::FieldRecord` data values and +Return a [`FieldArray`](@ref) containing an Array of values and any attached coordinates, for records and spatial region defined by `allselectargs`. -`allselectarg` may include `recordarg` to define records, `selectargs` to define a spatial region. +# Selecting records and regions +`allselectargs` is a `NamedTuple` of form: -`recordarg` can be one of: -- `records=r::Int` to select a single record, or `records = first:last` to select a range. -- ``: (where eg `=tmodel`), `=t::Float64` to select a single record - with the nearest value of `fr.coords_record`, or `=(first::Float64, last::Float64)` (a Tuple) to select a range - starting at the record with the nearest value of `fr.coords_record` before `first` and ending at the nearest record after - `last`. + ( = , = , ... [,expand_cartesian=false] [, squeeze_all_single_dims=true]) -Available `selectargs` depend on the FieldRecord dimensions (as returned by `get_dimensions`, which will be a subset -of grid spatial dimensions and Domain data dimensions) and corresponding attached coordinates (as returned by `get_coordinates`. +where `` is of form: +- `_isel` to select by array indices: `` may then be a single `Int` to select a single index, or a range `first:last` + to select a range of indices. +- `` to select by coordinate values using the coordinates attached to each dimension: `` may then be a single number + to select a single index corresponding to the nearest value of the corresponding coordinate, or `(first::Float64, last::Float64)` + (a Tuple) to select a range starting at the index with the nearest value of `fr.coords_record` before `first` and ending at + the nearest index after `last`. + +Available dimensions and coordinates `` depend on the FieldRecord dimensions (as returned by `get_dimensions`, which will be a subset +of grid spatial dimensions and Domain data dimensions) and corresponding attached coordinates (as returned by `get_coordinates`). + +Some synonyms are defined for commonly used ``: + +|synonyms | dimcoordname | comment | +|:------------| :---------------------- |:--------------------------------------------------| +| records | _isel | is usually tmodel | +| cells, cell | cells_isel | | +| column= | cells_isel = first:last | select range of cells corresponding to column n | Optional argument `coords` can be used to replace the attached coordinates for one or more dimensions. Format is a Vector of Pairs of `""=>("", "", ...)`, -eg to replace a 1D column default pressure coordinate with a z coordinate: +eg to replace an nD column atmosphere model default pressure coordinate with a z coordinate: coords=["cells"=>("atm.zmid", "atm.zlower", "atm.zupper")] +Optional argument `expand_cartesian` is only needed for spatially resolved output using a `CartesianLinearGrid`, and should be +set to `true` to expand compressed internal data (with spatial dimension `cells`) to a cartesian array (with spatial dimensions eg `lon`, `lat`, `zt`), + +Dimensions corresponding to a selection for a single index or coordinate value are always squeezed out from the returned [`FieldArray`](@ref). +Optional argument `squeeze_all_single_dims` (default `true`) controls whether *all* output dimensions that contain a single index are +squeezed out (including eg a selection for a range that results in a dimension with one index, or where the input `FieldRecord` contains a dimension +with a single index). + +Selection arguments used are returned as strings in `fa.attributes` `filter_records` and `filter_region` for use in plot labelling etc. + + +# Examples +- select a timeseries for single cell index 3 + + get_array(fr, (cell=3, )) +- select first column at nearest available time to model time 10.0 + + get_array(fr, (column=1, tmodel=10.0)) +- set first column at nearest available time to model time 10.0, replacing atmosphere model pressure coordinate with z coordinate: + + get_array(fr, (column=1, tmodel=10.0); coords=["cells"=>("atm.zmid", "atm.zlower", "atm.zupper")]) +- select surface 2D array (dimension `zt`, index 1) from 3D output at nearest available time to model time 10.0, expanding `CartesianLinearGrid`. + + get_array(fr, (zt_isel=1, tmodel=10.0, expand_cartesian=true)) +- select section 2D array at nearest index to longitude 200 degrees from 3D output at nearest available time to model time 10.0, expanding `CartesianLinearGrid`. + + get_array(fr, (lon=200.0, tmodel=10.0, expand_cartesian=true)) + """ function get_array( @nospecialize(fr::FieldRecord); diff --git a/src/OutputWriters.jl b/src/OutputWriters.jl index 9e1a7f8..544d73e 100644 --- a/src/OutputWriters.jl +++ b/src/OutputWriters.jl @@ -116,17 +116,10 @@ function PB.show_variables(output::PALEOmodel.AbstractOutputWriter) end get_array(output::PALEOmodel.AbstractOutputWriter, varname::AbstractString [, allselectargs::NamedTuple] [; coords::AbstractVector]) -> FieldArray get_array(output::PALEOmodel.AbstractOutputWriter, varname::AbstractString; allselectargs...) -> FieldArray -Return a [`PALEOmodel.FieldArray`](@ref) containing data values and any attached coordinates, for the -[`PALEOmodel.FieldRecord`](@ref) for `varname`, and records and spatial region defined by `selectargs` - -If `coords` is not supplied, equivalent to `PALEOmodel.get_array(PB.get_field(output, varname), allselectargs)`. - -Optional argument `coords` can be used to supply plot coordinates from Variable in output, to replace any default coordinates. -Format is a Vector of Pairs of "dim_name"=>("var_name1", "var_name2", ...) +Return a [`PALEOmodel.FieldArray`](@ref) containing data values and any attached coordinates. -Example: to replace a 1D column default pressure coordinate with a z coordinate: - - coords=["cells"=>("atm.zmid", "atm.zlower", "atm.zupper")] +Equivalent to `PALEOmodel.get_array(PB.get_field(output, varname), allselectargs [; coords])`, +see [`PALEOmodel.get_array(fr::PALEOmodel.FieldRecord)`](@ref). """ function PALEOmodel.get_array( output::PALEOmodel.AbstractOutputWriter, varname::AbstractString; @@ -147,20 +140,6 @@ function PALEOmodel.get_array( ) fr = PB.get_field(output, varname) - # if isnothing(coords) - # coords_records=nothing - # else - # PALEOmodel.check_coords_argument(coords) || - # error("argument coords should be a Vector of Pairs of \"coord_name\"=>(\"var_name1\", \"var_name2\", ...), eg: [\"z\"=>(\"atm.zmid\", \"atm.zlower\", \"atm.zupper\"), ...]") - - # coords_records = [ - # coord_name => Tuple(PB.get_field(output, cvn) for cvn in coord_varnames) - # for (coord_name, coord_varnames) in coords - # ] - # end - # - # return PALEOmodel.get_array(fr, allselectargs; coords=coords_records) - return PALEOmodel.get_array(fr, allselectargs; coords) end @@ -259,8 +238,6 @@ function OutputMemoryDomain( end -Base.length(output::OutputMemoryDomain) = output.record_dim.size - "create from a PALEOboxes::Domain" function OutputMemoryDomain( dom::PB.Domain, modeldata::PB.ModelData, nrecords=nothing; @@ -433,6 +410,8 @@ function OutputMemoryDomain( return odom end +Base.length(output::OutputMemoryDomain) = output.record_dim.size + function add_record!(odom::OutputMemoryDomain, modeldata, rec_coord) odom.record_dim = PB.NamedDimension(odom.record_dim.name, odom.record_dim.size + 1) @@ -571,17 +550,6 @@ function PB.get_coordinates(odom::OutputMemoryDomain, dimname::AbstractString) return coord_names end - -# coords = PALEOmodel.FixedCoord[] -# for cn in coord_names -# cfr = PB.get_field(odom, cn) -# @Infiltrator.infiltrate -# push!(coords, PALEOmodel.FixedCoord(cn, cfr.records, cfr.attributes)) -# end - -# return coords -# end - ######################################## # OutputMemory ########################################## @@ -695,14 +663,14 @@ end """ save_jld2(output::OutputMemory, filename) -Deprecated - use [`save_netcdf`](@ref) +Removed in PALEOmodel v0.16 - use [`save_netcdf`](@ref) Save to `filename` in JLD2 format (NB: filename must either have no extension or have extension `.jld2`) """ function save_jld2(output::OutputMemory, filename) - @error """save_jld2 has been removed. - Please use save_netcdf instead""" + @error """save_jld2 has been removed in PALEOmodel v0.16 + Please use save_netcdf instead""" return nothing end @@ -710,18 +678,17 @@ end """ load_jld2!(output::OutputMemory, filename) +Removed in PALEOmodel v0.16 - use [`save_netcdf`](@ref), [`load_netcdf!`](@ref) for new output, +or use an earlier version of PALEOmodel to load jld2 output and save to netcdf. + Load from `filename` in JLD2 format, replacing any existing content in `output`. (NB: filename must either have no extension or have extension `.jld2`). - -# Example -```julia -julia> output = PALEOmodel.OutputWriters.load_jld2!(PALEOmodel.OutputWriters.OutputMemory(), "savedoutput.jld2") -``` """ function load_jld2!(output::OutputMemory, filename) - @error """load_jld2! has been removed. - Please use save_netcdf, load_netcdf! instead""" + @error """load_jld2! has been removed in PALEOmodel v0.16. + Please use save_netcdf, load_netcdf! for new output, + or use an earlier version of PALEOmodel to load jld2 output and save to netcdf.""" return nothing end diff --git a/src/PALEOmodel.jl b/src/PALEOmodel.jl index cfb5a91..2952b09 100644 --- a/src/PALEOmodel.jl +++ b/src/PALEOmodel.jl @@ -22,6 +22,7 @@ import ForwardDiff import ForwardDiff import SparsityTracing import OrderedCollections +using DocStringExtensions import TimerOutputs: @timeit, @timeit_debug