Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor compressors to be in separate files #153

Merged
merged 7 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/src/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@ Pages = ["ZGroup.jl"]

```@autodocs
Modules = [Zarr]
Pages = ["Compressors.jl"]
Pages = ["Compressors/Compressors.jl", "Compressors/blosc.jl", "Compressors/zlib.jl"]
```
151 changes: 0 additions & 151 deletions src/Compressors.jl

This file was deleted.

106 changes: 106 additions & 0 deletions src/Compressors/Compressors.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import JSON # for JSON.lower

_reinterpret(::Type{T}, x::AbstractArray{S, 0}) where {T, S} = reinterpret(T, reshape(x, 1))
_reinterpret(::Type{T}, x::AbstractArray) where T = reinterpret(T, x)

"""
abstract type Compressor

The abstract supertype for all Zarr compressors.

## Interface

All subtypes of `Compressor` SHALL implement the following methods:

- `zcompress(a, c::Compressor)`: compress the array `a` using the compressor `c`.
- `zuncompress(a, c::Compressor, T)`: uncompress the array `a` using the compressor `c`
and return an array of type `T`.
- `JSON.lower(c::Compressor)`: return a JSON representation of the compressor `c`, which
follows the Zarr specification for that compressor.
- `getCompressor(::Type{<:Compressor}, d::Dict)`: return a compressor object from a given
dictionary `d` which contains the compressor's parameters according to the Zarr spec.

Subtypes of `Compressor` MAY also implement the following methods:

- `zcompress!(compressed, data, c::Compressor)`: compress the array `data` using the
compressor `c` and store the result in the array `compressed`.
- `zuncompress!(data, compressed, c::Compressor)`: uncompress the array `compressed`
using the compressor `c` and store the result in the array `data`.

Finally, an entry MUST be added to the `compressortypes` dictionary for each compressor type.
This must also follow the Zarr specification's name for that compressor. The name of the compressor
is the key, and the value is the compressor type (e.g. `BloscCompressor` or `NoCompressor`).

For example, the Blosc compressor is named "blosc" in the Zarr spec, so the entry for [`BloscCompressor`](@ref)
must be added to `compressortypes` as `compressortypes["blosc"] = BloscCompressor`.
"""
abstract type Compressor end

compressortypes = Dict{Union{String,Nothing}, Type{<: Compressor}}()
meggart marked this conversation as resolved.
Show resolved Hide resolved

# function getCompressor end
# function zcompress end
# function zuncompress end
# function zcompress! end
# function zuncompress! end
# JSON.lower is neither defined nor documented here, since that would be documentation piracy :yarr:

# Include the compressor implementations
include("blosc.jl")
include("zlib.jl")

# ## Fallback definitions for the compressor interface
# Define fallbacks and generic methods for the compressor interface
getCompressor(compdict::Dict) = getCompressor(compressortypes[compdict["id"]],compdict)
getCompressor(::Nothing) = NoCompressor()

Check warning on line 55 in src/Compressors/Compressors.jl

View check run for this annotation

Codecov / codecov/patch

src/Compressors/Compressors.jl#L55

Added line #L55 was not covered by tests

# Compression when no filter is given
zcompress!(compressed,data,c,::Nothing) = zcompress!(compressed,data,c)
zuncompress!(data,compressed,c,::Nothing) = zuncompress!(data,compressed,c)

# Fallback definition of mutating form of compress and uncompress
function zcompress!(compressed, data, c)
empty!(compressed)
append!(compressed,zcompress(data, c))
end
zuncompress!(data, compressed, c) = copyto!(data, zuncompress(compressed, c, eltype(data)))


# Function given a filter stack
function zcompress!(compressed, data, c, f)
a2 = foldl(f, init=data) do anow, fnow
zencode(anow,fnow)
end
zcompress!(compressed, a2, c)
end

function zuncompress!(data, compressed, c, f)
data2 = zuncompress(compressed, c, desttype(last(f)))
a2 = foldr(f, init = data2) do fnow, anow
zdecode(anow, fnow)
end
copyto!(data, a2)
end

# ## `NoCompressor`
# The default and most minimal implementation of a compressor follows here, which does
# no actual compression. This is a good reference implementation for other compressors.

"""
NoCompressor()

Creates an object that can be passed to ZArray constructors without compression.
"""
struct NoCompressor <: Compressor end

function zuncompress(a, ::NoCompressor, T)
_reinterpret(T,a)

Check warning on line 97 in src/Compressors/Compressors.jl

View check run for this annotation

Codecov / codecov/patch

src/Compressors/Compressors.jl#L96-L97

Added lines #L96 - L97 were not covered by tests
end

function zcompress(a, ::NoCompressor)
_reinterpret(UInt8,a)
end

JSON.lower(::NoCompressor) = nothing

compressortypes[nothing] = NoCompressor
70 changes: 70 additions & 0 deletions src/Compressors/blosc.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#=
# Blosc compression

This file implements a Blosc compressor via Blosc.jl.
=#

import Blosc

struct BloscCompressor <: Compressor
blocksize::Int
clevel::Int
cname::String
shuffle::Int
end

"""
BloscCompressor(;blocksize=0, clevel=5, cname="lz4", shuffle=1)

Returns a `BloscCompressor` struct that can serve as a Zarr array compressor. Keyword arguments are:

* `clevel=5` the compression level, number between 0 (no compression) and 9 (max compression)
* `cname="lz4"` compressor name, can be one of `"blosclz"`, `"lz4"`, and `"lz4hc"`
* `shuffle=1` Either NOSHUFFLE (0), SHUFFLE (1), BITSHUFFLE (2) or AUTOSHUFFLE (-1).
If AUTOSHUFFLE, bit-shuffle will be used for buffers with itemsize 1, and byte-shuffle will be used otherwise. The default is SHUFFLE.
"""
BloscCompressor(;blocksize=0, clevel=5, cname="lz4", shuffle=1) =
BloscCompressor(blocksize, clevel, cname, shuffle)

function getCompressor(::Type{BloscCompressor}, d::Dict)
BloscCompressor(d["blocksize"], d["clevel"], d["cname"], d["shuffle"])
end

zuncompress(a, ::BloscCompressor, T) = Blosc.decompress(Base.nonmissingtype(T), a)

function zuncompress!(data::DenseArray, compressed, ::BloscCompressor)
Blosc.decompress!(vec(data),compressed)
# if Int(pointer(data,length(data))-pointer(data)) != (length(data)-1)*sizeof(eltype(data))
# @show size(data)
# @show size(parent(data))
# @show typeof(data)
# @show Int(pointer(data,length(data))-pointer(data))
# @show (length(data)-1)*sizeof(eltype(data))
# error("Something is wrong")
# end
# Zarr.Blosc.blosc_decompress(data, compressed, sizeof(data))
end


function zcompress(a, c::BloscCompressor)
itemsize = sizeof(eltype(a))
shuffle = c.shuffle
# Weird auto shuffle logic from
# https://github.com/zarr-developers/numcodecs/blob/7d8f9762b4f0f9b5e135688b2eeb3f783f90f208/numcodecs/blosc.pyx#L264-L272
if shuffle == -1
if itemsize == 1
shuffle = Blosc.BITSHUFFLE
else
shuffle = Blosc.SHUFFLE
end
elseif shuffle ∉ (Blosc.NOSHUFFLE, Blosc.SHUFFLE, Blosc.BITSHUFFLE)
throw(ArgumentError("invalid shuffle argument; expected -1, 0, 1 or 2, found $shuffle"))

Check warning on line 61 in src/Compressors/blosc.jl

View check run for this annotation

Codecov / codecov/patch

src/Compressors/blosc.jl#L61

Added line #L61 was not covered by tests
end
Blosc.set_compressor(c.cname)
Blosc.compress(a; level=c.clevel, shuffle=shuffle)
end

JSON.lower(c::BloscCompressor) = Dict("id"=>"blosc", "cname"=>c.cname,
"clevel"=>c.clevel, "shuffle"=>c.shuffle, "blocksize"=>c.blocksize)

Zarr.compressortypes["blosc"] = BloscCompressor
38 changes: 38 additions & 0 deletions src/Compressors/zlib.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#=
# Zlib compression

This file implements a Zlib compressor via CodecZlib.jl.

=#

import CodecZlib

"""
ZlibCompressor(clevel=-1)
Returns a `ZlibCompressor` struct that can serve as a Zarr array compressor. Keyword arguments are:
* `clevel=-1` the compression level, number between -1 (Default), 0 (no compression) and 9 (max compression)
* default is -1 compromise between speed and compression (currently equivalent to level 6).
"""
struct ZlibCompressor <: Compressor
clevel::Int
end

ZlibCompressor(;clevel=-1) = ZlibCompressor(clevel)

function getCompressor(::Type{ZlibCompressor}, d::Dict)
ZlibCompressor(d["level"])
end

function zuncompress(a, ::ZlibCompressor, T)
result = transcode(CodecZlib.ZlibDecompressor,a)
_reinterpret(Base.nonmissingtype(T),result)
end

function zcompress(a, ::ZlibCompressor)
a_uint8 = _reinterpret(UInt8,a)[:]
transcode(CodecZlib.ZlibCompressor, a_uint8)
end

JSON.lower(z::ZlibCompressor) = Dict("id"=>"zlib", "level" => z.clevel)

Zarr.compressortypes["zlib"] = ZlibCompressor
2 changes: 1 addition & 1 deletion src/Zarr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import JSON
import Blosc

include("metadata.jl")
include("Compressors.jl")
include("Compressors/Compressors.jl")
include("Storage/Storage.jl")
include("Filters.jl")
include("ZArray.jl")
Expand Down
Loading