Skip to content
This repository has been archived by the owner on May 29, 2024. It is now read-only.

Commit

Permalink
refactor!: remove SmallDict
Browse files Browse the repository at this point in the history
With Julia 1.11's Memory, small dictionaries are much better than they
used to be. With a helper function to construct small dictionaries, it
still takes up a bit more space than our SmallDict, but I think it's
worth the code elimination to accept this compromise.
  • Loading branch information
tecosaur committed May 23, 2024
1 parent c1ee306 commit 8f3a61f
Show file tree
Hide file tree
Showing 11 changed files with 111 additions and 218 deletions.
3 changes: 1 addition & 2 deletions src/DataToolkitBase.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export loadcollection!, dataset

# For extension packages
export AbstractDataTransformer, DataStorage, DataLoader, DataWriter,
DataSet, DataCollection, QualifiedType, Identifier, FilePath, SmallDict,
DataSet, DataCollection, QualifiedType, Identifier, FilePath,
LintItem, LintReport
export load, storage, getstorage, putstorage, save, getlayer, resolve, refine,
parse_ident, supportedtypes, typeify, create, createpriority, lint
Expand All @@ -34,7 +34,6 @@ include("model/utils.jl")
include("model/advice.jl")
include("model/errors.jl")

include("model/smalldict.jl")
include("model/qualifiedtype.jl")
include("model/identification.jl")
include("model/parameters.jl")
Expand Down
11 changes: 5 additions & 6 deletions src/interaction/externals.jl
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,13 @@ end
Return the data set identified by `identstr`, optionally specifying the `collection`
the data set should be found in and any `parameters` that apply.
"""
dataset(identstr::AbstractString) = resolve(identstr; resolvetype=false)::DataSet
dataset(identstr::AbstractString, parameters::SmallDict{String, Any}) =
resolve(identstr, parameters; resolvetype=false)::DataSet
dataset(identstr::AbstractString, parameters::Dict{String, Any}) =
dataset(identstr, smallify(parameters))
dataset(identstr::AbstractString)::DataSet =
resolve(identstr; resolvetype=false)
dataset(identstr::AbstractString, parameters::Dict{String, Any})::DataSet =
resolve(identstr, parameters; resolvetype=false)

function dataset(identstr::AbstractString, kv::Pair{<:AbstractString, <:Any}, kvs::Pair{<:AbstractString, <:Any}...)
parameters = SmallDict{String, Any}()
parameters = newdict(String, Any, length(kvs) + 1)
parameters[String(first(kv))] = last(kv)
for (key, value) in kvs
parameters[String(key)] = value
Expand Down
8 changes: 4 additions & 4 deletions src/interaction/manipulation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ When no value is set, `nothing` is returned instead and if `quiet` is unset
function config_get(collection::DataCollection, propertypath::Vector{String}; quiet::Bool=false)
config = collection.parameters
for segment in propertypath
config isa AbstractDict || (config = SmallDict{String, Nothing}();)
config isa AbstractDict || (config = newdict(String, Nothing, 0);)
config = get(config, segment, nothing)
if isnothing(config)
quiet || printstyled(" unset\n", color=:light_black)
Expand Down Expand Up @@ -339,11 +339,11 @@ function config_set(collection::DataCollection, propertypath::Vector{String}, va
# however this way any plugin-processing of the configuration
# will be symmetric (i.e. applied at load and write).
snapshot = convert(Dict, collection)
config = get(snapshot, "config", SmallDict{String, Any}())
config = get(snapshot, "config", newdict(String, Any, 0))
window = config
for segment in propertypath[1:end-1]
if !haskey(window, segment)
window[segment] = SmallDict{String, Any}()
window[segment] = newdict(String, Any, 0)
end
window = window[segment]
end
Expand Down Expand Up @@ -385,7 +385,7 @@ function config_unset(collection::DataCollection, propertypath::Vector{String};
# however this way any plugin-processing of the configuration
# will be symmetric (i.e. applied at load and write).
snapshot = convert(Dict, collection)
config = get(snapshot, "config", SmallDict{String, Any}())
config = get(snapshot, "config", newdict(String, Any, 0))
window = config
for segment in propertypath[1:end-1]
if !haskey(window, segment)
Expand Down
10 changes: 5 additions & 5 deletions src/model/errors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ by the current version of $(@__MODULE__).
# Example occurrence
```julia-repl
julia> fromspec(DataCollection, SmallDict{String, Any}("data_config_version" => -1))
julia> fromspec(DataCollection, Dict{String, Any}("data_config_version" => -1))
ERROR: CollectionVersionMismatch: -1 (specified) ≠ $LATEST_DATA_CONFIG_VERSION (current)
The data collection specification uses the v-1 data collection format, however
the installed DataToolkitBase version expects the v$LATEST_DATA_CONFIG_VERSION version of the format.
Expand Down Expand Up @@ -334,7 +334,7 @@ Modification of `collection` is not viable, as it is read-only.
# Example Occurrence
```julia-repl
julia> lockedcollection = DataCollection(SmallDict{String, Any}("uuid" => Base.UUID(rand(UInt128)), "config" => SmallDict{String, Any}("locked" => true)))
julia> lockedcollection = DataCollection(Dict{String, Any}("uuid" => Base.UUID(rand(UInt128)), "config" => Dict{String, Any}("locked" => true)))
julia> write(lockedcollection)
ERROR: ReadonlyCollection: The data collection unnamed#298 is locked
Stacktrace: [...]
Expand All @@ -357,7 +357,7 @@ A catch-all for issues involving data transformers, with details given in `msg`.
# Example occurrence
```julia-repl
julia> emptydata = DataSet(DataCollection(), "empty", SmallDict{String, Any}("uuid" => Base.UUID(rand(UInt128))))
julia> emptydata = DataSet(DataCollection(), "empty", Dict{String, Any}("uuid" => Base.UUID(rand(UInt128))))
DataSet empty
julia> read(emptydata)
Expand All @@ -381,7 +381,7 @@ there is no transformer that satisfies this restriction.
# Example occurrence
```julia-repl
julia> emptydata = DataSet(DataCollection(), "empty", SmallDict{String, Any}("uuid" => Base.UUID(rand(UInt128))))
julia> emptydata = DataSet(DataCollection(), "empty", Dict{String, Any}("uuid" => Base.UUID(rand(UInt128))))
DataSet empty
julia> read(emptydata, String)
Expand Down Expand Up @@ -483,7 +483,7 @@ macro getparam(expr::Expr, default=nothing)
typename = if type isa Symbol type
elseif Meta.isexpr(type, :curly) first(type.args)
else :Any end
default = if typename (:Vector, :Dict, :SmallDict)
default = if typename (:Vector, :Dict)
:($type())
else :nothing end
end
Expand Down
10 changes: 5 additions & 5 deletions src/model/identification.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Identifier(ident::Identifier, params::SmallDict{String, Any}; replace::Bool=false) =
Identifier(ident::Identifier, params::Dict{String, Any}; replace::Bool=false) =
Identifier(ident.collection, ident.dataset, ident.type,
if replace || isempty(ident.parameters);
params
Expand All @@ -8,7 +8,7 @@ Identifier(ident::Identifier, params::SmallDict{String, Any}; replace::Bool=fals

Identifier(ident::Identifier, ::Nothing; replace::Bool=false) =
if replace
Identifier(ident, SmallDict{String, Any}(); replace)
Identifier(ident, newdict(String, Any, 0); replace)
else
ident
end
Expand Down Expand Up @@ -50,7 +50,7 @@ end

# Identifier(spec::AbstractString) = parse(Identifier, spec)

Identifier(spec::AbstractString, params::SmallDict{String, Any}) =
Identifier(spec::AbstractString, params::Dict{String, Any}) =
Identifier(parse(Identifier, spec), params)

Base.:(==)(a::Identifier, b::Identifier) =
Expand Down Expand Up @@ -176,13 +176,13 @@ resolve(ident::Identifier; resolvetype::Bool=true, stack::Vector{DataCollection}
end

"""
resolve(identstr::AbstractString, parameters::Union{SmallDict{String, Any}, Nothing}=nothing;
resolve(identstr::AbstractString, parameters::Union{Dict{String, Any}, Nothing}=nothing;
resolvetype::Bool=true, stack::Vector{DataCollection}=STACK)
Attempt to resolve the identifier given by `identstr` and `parameters` against
each layer of the data `stack` in turn.
"""
function resolve(identstr::AbstractString, parameters::Union{SmallDict{String, Any}, Nothing}=nothing;
function resolve(identstr::AbstractString, parameters::Union{Dict{String, Any}, Nothing}=nothing;
resolvetype::Bool=true, stack::Vector{DataCollection}=STACK)
isempty(stack) && throw(EmptyStackError())
if (cname = parse(Identifier, identstr).collection) |> !isnothing
Expand Down
13 changes: 7 additions & 6 deletions src/model/parameters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ Obtain a form (depending on `action`) of `value`, a property within `source`.
**`:encode`** Look for `Identifier`s in `value`, and turn them into DataSet references
(the inverse of `:extract`).
"""
function dataset_parameters(collection::DataCollection, action::Val, params::SmallDict{String,Any})
SmallDict{String, Any}(
keys(params) |> collect,
[dataset_parameters(collection, action, value)
for value in values(params)])
function dataset_parameters(collection::DataCollection, action::Val, params::Dict{String,Any})
d = newdict(String, Any, length(params))
for (key, value) in params
d[key] = dataset_parameters(collection, action, value)
end
d
end

function dataset_parameters(collection::DataCollection, action::Val, param::Vector)
Expand Down Expand Up @@ -80,7 +81,7 @@ end
add_dataset_refs!(acc::Vector{Identifier}, @nospecialize(adt::AbstractDataTransformer)) =
add_dataset_refs!(acc, adt.parameters)

add_dataset_refs!(acc::Vector{Identifier}, props::SmallDict) =
add_dataset_refs!(acc::Vector{Identifier}, props::Dict) =
for val in values(props)
add_dataset_refs!(acc, val)
end
Expand Down
16 changes: 8 additions & 8 deletions src/model/parser.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ function parse_ident(spec::AbstractString)
parse(QualifiedType, spec[3:end])
end
Identifier(collection, something(tryparse(UUID, dataset), dataset),
dtype, SmallDict{String,Any}())
dtype, newdict(String, Any, 0))
end

# ---------------
Expand All @@ -108,10 +108,10 @@ In some cases, it makes sense for this to be explicitly defined for a particular
transformer. """
function supportedtypes end # See `interaction/externals.jl` for method definitions.

supportedtypes(ADT::Type{<:AbstractDataTransformer}, spec::SmallDict{String, Any}, _::DataSet) =
supportedtypes(ADT::Type{<:AbstractDataTransformer}, spec::Dict{String, Any}, _::DataSet) =
supportedtypes(ADT, spec)

supportedtypes(ADT::Type{<:AbstractDataTransformer}, _::SmallDict{String, Any}) =
supportedtypes(ADT::Type{<:AbstractDataTransformer}, _::Dict{String, Any}) =
supportedtypes(ADT)

(ADT::Type{<:AbstractDataTransformer})(dataset::DataSet, spec::Dict{String, Any}) =
Expand All @@ -129,7 +129,7 @@ Create an `ADT` of `dataset` according to `spec`.
from the `"driver"` key in `spec`.
"""
function fromspec(ADT::Type{<:AbstractDataTransformer}, dataset::DataSet, spec::Dict{String, Any})
parameters = smallify(spec)
parameters = shrinkdict(spec)
driver = if ADT isa DataType
first(ADT.parameters)
elseif haskey(parameters, "driver")
Expand Down Expand Up @@ -171,7 +171,7 @@ end

DataStorage{driver}(dataset::Union{DataSet, DataCollection},
type::Vector{<:QualifiedType}, priority::Int,
parameters::SmallDict{String, Any}) where {driver} =
parameters::Dict{String, Any}) where {driver} =
DataStorage{driver, typeof(dataset)}(dataset, type, priority, parameters)

# ---------------
Expand All @@ -180,7 +180,7 @@ DataStorage{driver}(dataset::Union{DataSet, DataCollection},

DataCollection(name::Union{String, Nothing}=nothing; path::Union{String, Nothing}=nothing) =
DataCollection(LATEST_DATA_CONFIG_VERSION, name, uuid4(), String[],
SmallDict{String, Any}(), DataSet[], path,
Dict{String, Any}(), DataSet[], path,
AdviceAmalgamation(String[]), Main)

function DataCollection(spec::Dict{String, Any}; path::Union{String, Nothing}=nothing, mod::Module=Base.Main)
Expand Down Expand Up @@ -218,7 +218,7 @@ function fromspec(::Type{DataCollection}, spec::Dict{String, Any};
uuid4()
end)
plugins::Vector{String} = get(spec, "plugins", String[])
parameters = get(spec, "config", Dict{String, Any}()) |> smallify
parameters = get(spec, "config", Dict{String, Any}()) |> shrinkdict
unavailable_plugins = setdiff(plugins, getproperty.(PLUGINS, :name))
if length(unavailable_plugins) > 0
@warn string("The ", join(unavailable_plugins, ", ", ", and "),
Expand Down Expand Up @@ -263,7 +263,7 @@ function fromspec(::Type{DataSet}, collection::DataCollection, name::String, spe
@info "Data set '$name' had no UUID, one has been generated."
uuid4()
end)
parameters = smallify(spec)
parameters = shrinkdict(spec)
for reservedname in DATA_CONFIG_RESERVED_ATTRIBUTES[:dataset]
delete!(parameters, reservedname)
end
Expand Down
92 changes: 0 additions & 92 deletions src/model/smalldict.jl

This file was deleted.

Loading

0 comments on commit 8f3a61f

Please sign in to comment.