From 1f4f022a488c580f26d586902f8dbc425ff2ce19 Mon Sep 17 00:00:00 2001 From: TEC Date: Fri, 24 May 2024 02:11:36 +0800 Subject: [PATCH] WIP --- src/DataToolkitBase.jl | 1 + src/interaction/externals.jl | 167 +++------------ src/interaction/typetransforms.jl | 326 ++++++++++++++++++++++++++++++ 3 files changed, 355 insertions(+), 139 deletions(-) create mode 100644 src/interaction/typetransforms.jl diff --git a/src/DataToolkitBase.jl b/src/DataToolkitBase.jl index 051fb5e..f5ec52a 100644 --- a/src/DataToolkitBase.jl +++ b/src/DataToolkitBase.jl @@ -43,6 +43,7 @@ include("model/writer.jl") include("model/usepkg.jl") include("model/dataplugin.jl") +include("interaction/typetransforms.jl") include("interaction/externals.jl") include("interaction/display.jl") include("interaction/manipulation.jl") diff --git a/src/interaction/externals.jl b/src/interaction/externals.jl index 233644b..44ad47d 100644 --- a/src/interaction/externals.jl +++ b/src/interaction/externals.jl @@ -160,97 +160,32 @@ function Base.read(dataset::DataSet) @advise read1(dataset, as) end -""" - issubtype(X::Type, T::Union{Type, TypeVar}) - issubtype(x::X, T::Union{Type, TypeVar}) - -Check if `X` is indeed a subtype of `T`. - -This is a tweaked version of `isa` that can (mostly) handle `TypeVar` instances. -""" -function issubtype(X::Type, T::Union{Type, TypeVar}) - if T isa TypeVar - # We can't really handle complex `TypeVar` situations, - # but we'll give the very most basic a shot, and cross - # our fingers with the rest. - if T.lb isa Type && T.ub isa Type - T.lb <: X <: T.ub - else - false - end - else - @assert T isa Type - X <: T - end -end - -issubtype(x, T::Union{Type, TypeVar}) = - issubtype(typeof(x), T::Union{Type, TypeVar}) - -""" - isparamsubtype(X, T::Union{Type, TypeVar}, Tparam::Union{Type, TypeVar}, paramT::Type) - -Check that `arg` is of type `T`, where `T` may be parameterised by -`Tparam` which itself takes on the type `paramT`. - -More specifically, when `Tparam == Type{T}`, this checks that -`arg` is of type `paramT`, and returns `issubtype(arg, T)` otherwise. -""" -function isparamsubtype(X::Type, T::Union{Type, TypeVar}, Tparam::Union{Type, TypeVar}, paramT::Type) - if T isa TypeVar && Type{T} == Tparam - X <: paramT - else - issubtype(X, T) - end -end - """ read1(dataset::DataSet, as::Type) -The advisible implementation of `read(dataset::DataSet, as::Type)` -This is essentially an excersise in useful indirection. +The advisable implementation of `read(dataset::DataSet, as::Type)`, which see. + +This is essentially an exercise in useful indirection. """ function read1(dataset::DataSet, as::Type) - all_load_fn_sigs = map(fn -> Base.unwrap_unionall(fn.sig), - methods(load, Tuple{DataLoader, Any, Any})) - qtype = QualifiedType(as) - # Filter to loaders which are declared in `dataset` as supporting `as`. - # These will have already been ordered by priority during parsing. - potential_loaders = - filter(loader -> any(st -> ⊆(st, qtype, mod=dataset.collection.mod), loader.type), - dataset.loaders) - # If no matching loaders could be found, be a bit generous and /just try/ - # filtering to the specified `as` type. If this works, it's probably what's - # wanted, and incompatibility should be caught by later stages. - if isempty(potential_loaders) - # Here I use `!isempty(methods(...))` which may seem strange, given - # `hasmethod` exists. While in theory it would be reasonable to expect - # that `hasmethod(f, Tuple{A, Union{}, B})` would return true if a method - # with the signature `Tuple{A, <:Any, B}` existed, this is unfortunately - # not the case in practice, and so we must resort to `methods`. - potential_loaders = - filter(loader -> !isempty(methods(load, Tuple{typeof(loader), <:Any, Type{as}})), - dataset.loaders) - end - for loader in potential_loaders - load_fn_sigs = filter(fnsig -> issubtype(loader, fnsig.types[2]), all_load_fn_sigs) + for loader in dataset.loaders + l_steps = typesteps(loader, as) + isempty(l_steps) && continue # Find the highest priority load function that can be satisfied, # by going through each of the storage backends one at a time: # looking for the first that is (a) compatible with a load function, # and (b) available (checked via `!isnothing`). for storage in dataset.storage - for load_fn_sig in load_fn_sigs - supported_storage_types = Vector{Type}( - filter(!isnothing, typeify.(storage.type))) - valid_storage_types = - filter(stype -> isparamsubtype(stype, load_fn_sig.types[3], load_fn_sig.types[4], as), - supported_storage_types) - for storage_type in valid_storage_types - datahandle = open(dataset, storage_type; write = false) + for (Tloader_in, Tloader_out) in l_steps + s_steps = typesteps(storage, Tloader_in; write = false) + for (_, Tstorage_out) in s_steps + datahandle = open(dataset, Tstorage_out; write = false) if !isnothing(datahandle) - result = @advise dataset load(loader, datahandle, as) + result = @advise dataset load(loader, datahandle, Tloader_out) if !isnothing(result) return something(result) + elseif datahandle isa IOStream + close(datahandle) end end end @@ -259,29 +194,23 @@ function read1(dataset::DataSet, as::Type) # Check for a "null storage" option. This is to enable loaders # like DataToolkitCommon's `:julia` which can construct information # without an explicit storage backend. - for load_fn_sig in load_fn_sigs - if load_fn_sig.types[3] == Nothing - return @advise dataset load(loader, nothing, as) + for (Tloader_in, Tloader_out) in l_steps + if Tloader_in == Nothing + result = @advise dataset load(loader, nothing, as) + !isnothing(result) && return something(result) end end end - if length(potential_loaders) == 0 - throw(UnsatisfyableTransformer(dataset, DataLoader, [qtype])) + throw(guess_read_failure_cause(dataset, as)) +end + +function guess_read_failure_cause(dataset::DataSet, as::Type) + loader_steps = [typesteps(loader, as) for loader in dataset.loaders] |> Iterators.flatten |> collect + if all(isempty, loader_steps) + UnsatisfyableTransformer(dataset, DataLoader, [QualifiedType(as)]) else - loadertypes = map( - f -> QualifiedType( # Repeat the logic from `valid_storage_types` / `isparamsubtype` - if f.types[3] isa TypeVar - if f.types[4] == Type{f.types[3]} - as - else - f.types[3].ub - end - else - f.types[3] - end), - filter(f -> any(l -> issubtype(l, f.types[2]), potential_loaders), - all_load_fn_sigs)) |> unique - throw(UnsatisfyableTransformer(dataset, DataStorage, loadertypes)) + loader_intypes = map(QualifiedType, map(first, loader_steps) |> unique) + UnsatisfyableTransformer(dataset, DataStorage, loader_intypes) end end @@ -320,6 +249,8 @@ function load end load((loader, source, as)::Tuple{DataLoader, Any, Type}) = load(loader, source, as) +# A selection of fallback methods for various forms of raw file content + """ open(dataset::DataSet, as::Type; write::Bool=false) @@ -437,45 +368,3 @@ function save end save((writer, dest, info)::Tuple{DataWriter, Any, Any}) = save(writer, dest, info) - -# For use during parsing, see `fromspec` in `model/parser.jl`. - -function extracttypes(T::Type) - splitunions(T::Type) = if T isa Union Base.uniontypes(T) else (T,) end - if T == Type || T == Any - (Any,) - elseif T isa UnionAll - first(Base.unwrap_unionall(T).parameters).ub |> splitunions - elseif T isa Union - first.(getproperty.(Base.uniontypes(T), :parameters)) - else - T1 = first(T.parameters) - if T1 isa TypeVar T1.ub else T1 end |> splitunions - end -end - -const genericstore = first(methods(storage, Tuple{DataStorage{Any}, Any})) -const genericstoreget = first(methods(getstorage, Tuple{DataStorage{Any}, Any})) -const genericstoreput = first(methods(putstorage, Tuple{DataStorage{Any}, Any})) - -supportedtypes(L::Type{<:DataLoader}, T::Type=Any)::Vector{QualifiedType} = - map(fn -> extracttypes(Base.unwrap_unionall(fn.sig).types[4]), - sort(methods(load, Tuple{L, T, Any}), by=m->m.primary_world)) |> - Iterators.flatten .|> QualifiedType |> unique |> reverse - -supportedtypes(W::Type{<:DataWriter}, T::Type=Any)::Vector{QualifiedType} = - map(fn -> QualifiedType(Base.unwrap_unionall(fn.sig).types[4]), - sort(methods(save, Tuple{W, T, Any}), by=m->m.primary_world)) |> - unique |> reverse - -supportedtypes(S::Type{<:DataStorage})::Vector{QualifiedType} = - map(fn -> extracttypes(Base.unwrap_unionall(fn.sig).types[3]), - let ms = filter(m -> m != genericstore, - sort(methods(storage, Tuple{S, Any}), by=m->m.primary_world)) - if isempty(ms) - vcat(filter(m -> m != genericstoreget, - sort(methods(getstorage, Tuple{S, Any}), by=m->m.primary_world)), - filter(m -> m != genericstoreput, - sort(methods(putstorage, Tuple{S, Any}), by=m->m.primary_world))) - else ms end - end) |> Iterators.flatten .|> QualifiedType |> unique |> reverse diff --git a/src/interaction/typetransforms.jl b/src/interaction/typetransforms.jl new file mode 100644 index 0000000..c152cbf --- /dev/null +++ b/src/interaction/typetransforms.jl @@ -0,0 +1,326 @@ +## Type transformations + +# This is essentially the infrastructure for dynamic dispatch on steroids (`read1`). +# +# I'm aware that this use of `methods` and direct accessing of signatures looks +# somewhat iffy. However, I'm not aware of any alternative approach that is able +# to achieve the level of dynamism or ease of use that we're trying to produce +# here — short of perhaps explicitly registering methods somehow, but to me that +# approach seems to suffer from distinctly inferior ease of use. +# +# By determining all the possible type transformation the defined methods of a +# data transformer might be able to perform, given a target output type, we can +# then consider the possibilities of multiple stages of transformation together +# in concert. + +""" + issubtype(X::Type, T::Union{Type, TypeVar}) + issubtype(x::X, T::Union{Type, TypeVar}) + +Check if `X` is indeed a subtype of `T`. + +This is a tweaked version of `isa` that can (mostly) handle `TypeVar` instances. +""" +function issubtype(X::Type, T::Union{Type, TypeVar}) + if T isa TypeVar + # We can't really handle complex `TypeVar` situations, + # but we'll give the very most basic a shot, and cross + # our fingers with the rest. + if T.lb isa Type && T.ub isa Type + T.lb <: X <: T.ub + else + false + end + else + X <: T + end +end + +issubtype(x, T::Union{Type, TypeVar}) = + issubtype(typeof(x), T::Union{Type, TypeVar}) + +""" + paramtypebound(T::Union{Type, TypeVar}, Tparam::Union{Type, TypeVar}, paramT::Type) + +Return the `Type` that bounds `T`. + +This is simply `T` when `T` isa `Type`, but `T` may also be a `TypeVar` that is +parameterised by `Tparam`. In this case, the `Type` that `T` is parameterised by +is returned, which is taken to be `paramT`. + +Given a type `T` that may be parameterised according to `Tparam`, + +```julia-repl +julia> paramtypebound(String, IO, IO) +String + +julia> T = TypeVar(:T) +T + +julia> paramtypebound(T, Type{T}, Float64) +Float64 +``` +""" +function paramtypebound(T::Union{Type, TypeVar}, Tparam::Union{Type, TypeVar}, paramT::Type) + if T isa TypeVar && Type{T} == Tparam + paramT + elseif T isa TypeVar + T.ub + else + T + end::Type +end + +""" + targettypes(types::Vector{<:QualifiedType}, desired::Type) -> Vector{Type} + targettypes(transformer::AbstractDataTransformer, desired::Type) -> Vector{Type} + +Return all `Type`s that one might hope to produce from `types` or `transformer`. + +More specifically, this will give all `Type`s that can be produced which are a +subtype of `desired`, and `desired` itself. + +Priority order is preserved. +""" +function targettypes end + +function targettypes(types::Vector{<:QualifiedType}, desired::Type; mod::Module = Main) + @nospecialize + targets = Type[] + for typ in types + Ttyp = typeify(typ; mod) + isnothing(Ttyp) && continue + if Ttyp <: desired + push!(targets, Ttyp) + end + end + targets +end + +targettypes(@nospecialize(storage::DataStorage), @nospecialize(desired::Type)) = + targettypes(storage.type, desired; mod=storage.dataset.collection.mod) + +targettypes(@nospecialize(loader::DataLoader), @nospecialize(desired::Type)) = + targettypes(loader.type, desired; mod=loader.dataset.collection.mod) + +targettypes(@nospecialize(writer::DataWriter), @nospecialize(desired::Type)) = + targettypes(writer.type, desired; mod=writer.dataset.collection.mod) + +""" + ispreferredpath(a, b) + +Compares two "type paths" `a` and `b`, returning whether +`a` is preferred. + +Each "type path" is a tuple of the form: + + (Tin::Type => Tout::Type, index::Int, transformer::Type{<:AbstractDataTransformer}) +""" +function ispreferredpath(((a_in, a_out), a_ind, a_ldr)::Tuple{Pair{Type, Type}, Int, Type}, + ((b_in, b_out), b_ind, b_ldr)::Tuple{Pair{Type, Type}, Int, Type}) + @nospecialize + a_ind < b_ind || + Base.morespecific(a_out, b_out) || + Base.morespecific(a_ldr, b_ldr) +end + +""" + transformersigs(transformer::Type{<:AbstractDataTransformer}, desired::Type) + +Return processed signatures of the transformation methods implemented for +`transformer` that could produce/provide a subtype of `desired`. + +- `DataStorage` produces tuples of `(Type{<:DataStorage}, Type{out})` +- `DataLoaders` produces tuples of `(Type{<:DataLoader}, Type{in}, Type{out})` +- `DataWriter` produces tuples of `(Type{<:DataWriter}, Type{in}, Type{data})` + +The `DataStorage` method takes a `write::Bool` keyword argument. +""" +function transformersigs end + +function typevariants(T::Type)::Vector{Type} + if T == Type || T == Any + [Any] + elseif T isa UnionAll && T.var.ub isa Union + first(Base.unwrap_unionall(T).parameters).ub |> typevariants + elseif T isa UnionAll || (T isa DataType && T.name.name != :Type) + [T] + elseif T isa Union + Base.uniontypes(T) + elseif T isa Type{<:Any} + typevariants(first(T.parameters)) + else + [T] + end +end + +typevariants(T::TypeVar) = typevariants(T.ub) + +function transformersigs(S::Type{<:DataStorage}, desired::Type; read::Bool=true, write::Bool=true) + @nospecialize + ms = Vector{Method}(methods(storage, Tuple{DataStorage, <:Any}).ms) + read && append!(ms, methods(getstorage, Tuple{DataStorage, <:Any}).ms) + write && append!(ms, methods(putstorage, Tuple{DataStorage, <:Any}).ms) + sort!(ms, by = m -> m.primary_world) + sigs = [Base.unwrap_unionall(m.sig) for m in ms] + types = Tuple{Type, Union{Type, TypeVar}}[] + for sig in sigs + (_, Tstor::Union{Type, TypeVar}, Tout1::Type) = sig.types + Tstor == DataStorage && Tout1 in (Any, Type) && continue + issubtype(S, Tstor) || continue + if Tstor isa TypeVar + Tstor = Tstor.ub + end + if Tout1 == Type + push!(types, (Tstor, desired)) + else + for Tout in typevariants(Tout1) + Tout <: desired || desired <: Tout || continue + push!(types, (Tstor, Tout)) + end + end + end + types +end + +function transformersigs(L::Type{<:DataLoader}, desired::Type) + @nospecialize + ms = methods(load, Tuple{DataLoader, <:Any, <:Any}).ms + sort!(ms, by = m -> m.primary_world) + sigs = [Base.unwrap_unionall(m.sig) for m in ms] + types = Tuple{Type, Union{Type, TypeVar}, Type}[] + for sig in sigs + (_, Tloader::Union{Type, TypeVar}, Tin::Union{Type, TypeVar}, Tout1::Type) = sig.types + issubtype(L, Tloader) || continue + if Tloader isa TypeVar + Tloader = Tloader.ub + end + if Tout1 == Type{Tin} + push!(types, (Tloader, desired, desired)) + else + for Tout in typevariants(Tout1) + Tout <: desired || desired <: Tout || continue + push!(types, (Tloader, Tin, Tout)) + end + end + end + types +end + +function transformersigs(W::Type{<:DataWriter}, desired::Type) + @nospecialize + ms = methods(save, Tuple{DataWriter, <:Any, <:Any}).ms + sort!(ms, by = m -> m.primary_world) + sigs = [Base.unwrap_unionall(m.sig) for m in ms] + types = Tuple{Type, Union{Type, TypeVar}, Type}[] + for sig in sigs + (_, Twriter::Union{Type, TypeVar}, Tdest::Union{Type, TypeVar}, Tin1::Type) = sig.types + issubtype(W, Twriter) || continue + if Twriter isa TypeVar + Twriter = Twriter.ub + end + if Tin1 == Type + push!(types, (Twriter, Tdest, desired)) + else + for Tin in typevariants(Tin1) + issubtype(Tin, desired) || continue + push!(types, (Twriter, Tdest, Tin)) + end + end + end + types +end + +supportedtypes(S::Type{<:DataStorage}, T::Type=Any)::Vector{QualifiedType} = + map(QualifiedType, map(last, transformersigs(S, T)) |> unique |> reverse) + +supportedtypes(L::Type{<:DataLoader}, T::Type=Any)::Vector{QualifiedType} = + map(QualifiedType, map(last, transformersigs(L, T)) |> unique |> reverse) + +supportedtypes(W::Type{<:DataWriter}, T::Type=Any)::Vector{QualifiedType} = + map(QualifiedType, map(s -> s[2], transformersigs(W, T)) |> unique |> reverse) + +""" + typesteps(loader::DataLoader, desired::Type) -> Vector{Pair{Type, Type}} + +Identify and order all uses of `loader` that may produce a subtype of `desired`. + +More specifically, this finds all `load` methods that can produce a subtype of +`desired`, checks what input and output types they work with, and orders them +according to the declared types of `loader` and the specificity of the output +types (more specific is interpreted as better). + +The output vector gives the step-change in the type domain that each method performs. +""" +function typesteps end + +function typesteps(loader::DataLoader, desired::Type) + @nospecialize + target_types = targettypes(loader, desired) + desired in target_types || push!(target_types, desired) + path_infos = Tuple{Pair{Type, Type}, Int, Type}[] + for (Tloader, Tin, Tout) in transformersigs(typeof(loader), desired) + if Tout isa TypeVar || Tout == Any + for ttype in target_types + intype = paramtypebound(Tin, Tout, ttype) + target_ind = something(findfirst(qt -> qt <: ttype, target_types), + length(target_types) + 1) + push!(path_infos, ((intype => ttype), target_ind, Tloader)) + end + else + intype = paramtypebound(Tin, Tout, desired) + target_ind = something(findfirst(qt -> qt <: Tout, target_types), + length(target_types) + 1) + push!(path_infos, ((intype => Tout), target_ind, Tloader)) + end + end + sort!(path_infos, lt = ispreferredpath) + unique(map(first, path_infos)) +end + +function typesteps(store::DataStorage, desired::Type; write::Bool) + @nospecialize + target_types = targettypes(store, desired) + desired in target_types || push!(target_types, desired) + path_infos = Tuple{Pair{Type, Type}, Int, Type}[] + for (Tstor, Tout) in transformersigs(typeof(store), desired; read=!write, write) + if Tout isa TypeVar || Tout == Any + for ttype in target_types + target_ind = something(findfirst(qt -> qt <: Tout, target_types), + length(target_types) + 1) + push!(path_infos, ((Nothing => Tout), target_ind, Tstor)) + end + else + target_ind = something(findfirst(qt -> qt <: Tout, target_types), + length(target_types) + 1) + push!(path_infos, ((Nothing => Tout), target_ind, Tstor)) + end + end + sort!(path_infos, lt = ispreferredpath) + unique(map(first, path_infos)) +end + +function typesteps(writer::DataWriter, desired::Type) + @nospecialize + target_types = targettypes(writer, desired) + desired in target_types || push!(target_types, desired) + path_infos = Tuple{Pair{Type, Type}, Int, Type}[] + for (Twriter, Tdest, Tin) in transformersigs(typeof(writer), desired) + if Tin isa TypeVar || Tin == Any + for ttype in target_types + desttype = paramtypebound(Tdest, Tin, ttype) + target_ind = something(findfirst(qt -> qt <: ttype, target_types), + length(target_types) + 1) + push!(path_infos, ((ttype => desttype), target_ind, Twriter)) + end + else + desttype = paramtypebound(Tdest, Tin, desired) + target_ind = something(findfirst(qt -> qt <: Tin, target_types), + length(target_types) + 1) + push!(path_infos, ((Tin => desttype), target_ind, Twriter)) + end + end + sort!(path_infos, lt = ispreferredpath) + unique(map(first, path_infos)) +end +