From 2d35e97c1346cbd2b2af3f0b89d41edca75098f2 Mon Sep 17 00:00:00 2001 From: Maarten Pronk Date: Sun, 13 Aug 2023 14:28:51 +0200 Subject: [PATCH] Upgrade compat to Julia 1.8. Allows for generic GeoInterface write. --- .github/workflows/CI.yml | 2 +- Project.toml | 8 ++++---- README.md | 2 -- src/io.jl | 12 ++++++++++-- src/utils.jl | 4 ++++ test/runtests.jl | 7 +++++++ 6 files changed, 26 insertions(+), 9 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 033d76e..7bc2c3c 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -18,7 +18,7 @@ jobs: fail-fast: false matrix: version: - - '1.6' + - '1.8' - '1' - 'nightly' os: diff --git a/Project.toml b/Project.toml index f6265ed..d56d52d 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "GeoParquet" uuid = "e99870d8-ce00-4fdd-aeee-e09192881159" -authors = ["Maarten Pronk ", "Julia Computing and contributors"] -version = "0.1.3" +authors = ["Maarten Pronk and contributors."] +version = "0.1.4" [deps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" @@ -20,11 +20,11 @@ Extents = "0.1" GeoFormatTypes = "0.4" GeoInterface = "1" JSON3 = "1.9" -Parquet2 = "0.1, 0.2" +Parquet2 = "0.2" StructTypes = "1.8" Tables = "1" WellKnownGeometry = "0.2" -julia = "1.6" +julia = "1.8" [extras] ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3" diff --git a/README.md b/README.md index e9606e0..02f7f85 100644 --- a/README.md +++ b/README.md @@ -41,5 +41,3 @@ test.parquet - [ ] Better access to metadata in `read` - [ ] Lazy read option - [ ] Easier support for ProjJSON - -We thank Julia Computing for supporting contributions to this package. diff --git a/src/io.jl b/src/io.jl index 8684b15..441ae96 100644 --- a/src/io.jl +++ b/src/io.jl @@ -2,18 +2,26 @@ write(ofn, t, columns=(:geom), crs::Union{GFT.ProjJSON,Nothing}=nothing, bbox::Union{Nothing,Vector{Float64}}=nothing; kwargs...) Write a dataframe with a geometry column to a Parquet file. Keyword arguments are passed to Parquet2 writefile method. -The geometry column should be a `Vector{GeoFormat.WellKnownBinary}`. +The geometry column should be a `Vector{GeoFormat.WellKnownBinary}` or its elements should support GeoInterface. You can construct one with WellKnownGeometry for geometries that support GeoInterface. """ function write(ofn::Union{AbstractString,Parquet2.FilePathsBase.AbstractPath}, df, geocolumns=(:geom,), crs::Union{GFT.ProjJSON,Nothing}=nothing, bbox::Union{Nothing,Vector{Float64}}=nothing; kwargs...) + Tables.istable(df) || throw(ArgumentError("`df` must be a table")) + columns = Dict{String,Any}() tcols = Tables.columns(df) + # For on the fly conversion to WKB + ndf = DataFrame(df; copycols=false) + for column in geocolumns column in Tables.columnnames(df) || error("Geometry column $column not found in table") data = Tables.getcolumn(tcols, column) GI.isgeometry(data[1]) || error("Geometry in $column must support the GeoInterface") + if !(data isa Vector{GFT.WellKnownBinary}) || !(data isa Vector{Vector{UInt8}}) + ndf[!, column] = _getwkb.(data) + end types = unique(typeof.(GI.geomtrait.(data))) gtypes = getindex.(Ref(geowkb), types) mc = MetaColumn(geometry_type=gtypes, bbox=bbox, crs=crs) @@ -21,7 +29,7 @@ function write(ofn::Union{AbstractString,Parquet2.FilePathsBase.AbstractPath}, d end md = Dict("geo" => JSON3.write(GeoParquet.MetaRoot(columns=columns, primary_column=String(geocolumns[1])))) - Parquet2.writefile(ofn, df, metadata=md, compression_codec=:zstd, kwargs...) + Parquet2.writefile(ofn, ndf, metadata=md, compression_codec=:zstd, kwargs...) ofn end diff --git a/src/utils.jl b/src/utils.jl index 0e0085d..782c77c 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -23,3 +23,7 @@ function todict(obj::Dict{Symbol,Any}) end return dict end + +_getwkb(x) = WellKnownGeometry.getwkb(x) +_getwkb(x::GFT.WellKnownBinary) = x +_getwkb(x::Vector{UInt8}) = GFT.WellKnownBinary(GFT.Geom(), x) diff --git a/test/runtests.jl b/test/runtests.jl index 35a2693..194d86d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -48,6 +48,13 @@ end df = GeoParquet.read(fn) df.test[1] == "test" + # Transparently convert columns to WKB + fn = "data/writec.parquet" + df = DataFrame(test="test", value=rand(2), geom=geom) + GeoParquet.write(fn, df) + ndf = GeoParquet.read(fn) + df.geom != ndf.geom # original is not mutated + fn = "data/example.parquet" df = GeoParquet.read(fn) GeoParquet.write("data/example_copy.parquet", df, (:geometry,))