From 201e95141d43e2f13817dab509ea00f554110773 Mon Sep 17 00:00:00 2001 From: Max Freudenberg Date: Fri, 21 Jun 2024 10:09:45 +0200 Subject: [PATCH] enable chunked writes --- Project.toml | 1 + src/GeoDataFrames.jl | 2 ++ src/io.jl | 57 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/Project.toml b/Project.toml index 4eaa957..22b0ad2 100644 --- a/Project.toml +++ b/Project.toml @@ -7,6 +7,7 @@ version = "0.3.8" ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3" DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +GDAL = "add2ef01-049f-52c4-9ee2-e494f65e021a" GeoFormatTypes = "68eda718-8dee-11e9-39e7-89f7f65f511f" GeoInterface = "cf35fbd7-0cd7-5166-be24-54bfbe79505f" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" diff --git a/src/GeoDataFrames.jl b/src/GeoDataFrames.jl index 4da914b..3a8eeeb 100644 --- a/src/GeoDataFrames.jl +++ b/src/GeoDataFrames.jl @@ -6,6 +6,8 @@ using Tables import GeoFormatTypes as GFT import GeoInterface using DataAPI +using Base.Iterators: partition +using GDAL include("exports.jl") include("io.jl") diff --git a/src/io.jl b/src/io.jl index acbafed..7897c9d 100644 --- a/src/io.jl +++ b/src/io.jl @@ -96,9 +96,21 @@ end """ write(fn::AbstractString, table; layer_name="data", crs::Union{GFT.GeoFormat,Nothing}=crs(table), driver::Union{Nothing,AbstractString}=nothing, options::Vector{AbstractString}=[], geom_columns::Set{Symbol}=(:geometry)) -Write the provided `table` to `fn`. The `geom_column` is expected to hold ArchGDAL geometries. +Write the provided `table` to `fn`. The `geom_column` is expected to hold ArchGDAL geometries. + +Experimental: Fast chunked writes can be enabled by setting `use_gdal_copy=true` and `chunksize` to the desired value (default 20000). """ -function write(fn::AbstractString, table; layer_name::AbstractString="data", crs::Union{GFT.GeoFormat,Nothing}=getcrs(table), driver::Union{Nothing,AbstractString}=nothing, options::Dict{String,String}=Dict{String,String}(), geom_columns=getgeometrycolumns(table), kwargs...) +function write( + fn::AbstractString, + table; + layer_name::AbstractString="data", + crs::Union{GFT.GeoFormat,Nothing}=getcrs(table), + driver::Union{Nothing,AbstractString}=nothing, + options::Dict{String,String}=Dict{String,String}(), + geom_columns=getgeometrycolumns(table), + use_gdal_copy = true, + chunksize = 20000, + kwargs...) rows = Tables.rows(table) sch = Tables.schema(rows) @@ -150,6 +162,7 @@ function write(fn::AbstractString, table; layer_name::AbstractString="data", crs ) do ds AG.newspatialref() do spatialref crs !== nothing && AG.importCRS!(spatialref, crs) + AG.createlayer( name=layer_name, geom=first(geom_types), # how to set the name though? @@ -181,9 +194,43 @@ function write(fn::AbstractString, table; layer_name::AbstractString="data", crs end end end - end - AG.copy(layer, dataset=ds, name=layer_name, options=stringlist(options)) - end + end # for + + if use_gdal_copy + AG.copy( + layer; + dataset = ds, + name = layer_name, + options = stringlist(options), + ) + else + AG.createlayer(; + name = layer_name, + dataset = ds, + geom = AG.getgeomtype(layer), + spatialref = AG.getspatialref(layer), + options = stringlist(options), + ) do targetlayer + # add field definitions + sourcelayerdef = AG.layerdefn(layer) + for fieldidx in 0:(AG.nfield(layer)-1) + AG.addfielddefn!( + targetlayer, + AG.getfielddefn(sourcelayerdef, fieldidx), + ) + end + + # iterate over features in chunks to get better speed than gdaldatasetcopylayer + for chunk in Iterators.partition(layer, chunksize) + GDAL.ogr_l_starttransaction(targetlayer) + for feature in chunk + AG.addfeature!(targetlayer, feature) + end + GDAL.ogr_l_committransaction(targetlayer) + end + end # createlayer + end # if use_gdal_copy + end # layer end end fn