diff --git a/Manifest.toml b/Manifest.toml index 3631e75..3c0d65e 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -72,9 +72,9 @@ uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" [[Missings]] deps = ["Dates", "InteractiveUtils", "SparseArrays", "Test"] -git-tree-sha1 = "adc26d2ee85a49c413464110d922cf21efc9d233" +git-tree-sha1 = "d1d2585677f2bd93a97cfeb8faa7a0de0f982042" uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" -version = "0.3.1" +version = "0.4.0" [[Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" @@ -113,6 +113,12 @@ uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" deps = ["Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +[[Requires]] +deps = ["Test"] +git-tree-sha1 = "f6fbf4ba64d295e146e49e021207993b6b48c7d1" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "0.5.2" + [[Rmath]] deps = ["BinaryProvider", "Libdl", "Random", "Statistics", "Test"] git-tree-sha1 = "9a6c758cdf73036c3239b0afbea790def1dabff9" diff --git a/Project.toml b/Project.toml index 74d0673..3130a7e 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "0.2.0" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +Requires = "ae029012-a4dd-5104-9daa-d747884805df" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/REQUIRE b/REQUIRE index 1068a15..41122b2 100644 --- a/REQUIRE +++ b/REQUIRE @@ -1,2 +1,3 @@ julia 0.7 Distributions +Requires diff --git a/src/SimpleANOVA.jl b/src/SimpleANOVA.jl index 7567599..fadd5e9 100644 --- a/src/SimpleANOVA.jl +++ b/src/SimpleANOVA.jl @@ -1,6 +1,6 @@ module SimpleANOVA -using Distributions +using Distributions, Requires include("InvertedIndices.jl") include("AnovaEffect.jl") include("AnovaValue.jl") @@ -14,9 +14,14 @@ const cellsname = "Cells" const errorname = "Error" const remaindername = "Remainder" +function __init__() + @require DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" include("anova_dataframes.jl") +end + """ anova(observations::Union{Array{Number}, Array{Vector{Number}}}, factortypes = FactorType[]; factornames = String[], hasreplicates = true) - anova(observations::Vector{Number}, factorassignments::Vector{Vector{Int}}, factortypes = FactorType[]; factornames = String[], hasreplicates = true) + anova(observations::Vector{Number}, factorassignments::Vector{Vector{Any}}, factortypes = FactorType[]; factornames = String[], hasreplicates = true) + anova(df::DataFrame, observationscolumn::Symbol, factorcolumns::Vector{Symbol}, factortypes = FactorType[]; factornames = String[]) Performs an Analysis of Variance (ANOVA) computation. @@ -107,7 +112,7 @@ function anova(observations::AbstractArray{T}, factortypes::Vector{FactorType} = anovakernel(observations, nreplicates, ncells, nnestedfactors, ncrossedfactors, nfactorlevels, crossedfactortypes, crossedfactornames, nestedfactornames) end -function anova(observations::AbstractVector{T}, factorassignments::AbstractVector{<:AbstractVector{<:Int}}, factortypes::Vector{FactorType} = FactorType[], factornames::Vector{<:AbstractString} = String[]) where {T <: Number} +function anova(observations::AbstractVector{T}, factorassignments::AbstractVector{<:AbstractVector}, factortypes::Vector{FactorType} = FactorType[], factornames::Vector{<:AbstractString} = String[]) where {T <: Number} length(observations) > 0 || return nfactors = length(factorassignments) N = length(observations) @@ -122,7 +127,7 @@ function anova(observations::AbstractVector{T}, factorassignments::AbstractVecto all(nperfactorlevel .|> length .== 1) || error("Design is unbalanced.") nperfactorlevel = nperfactorlevel .|> first - if any(maximum.(factorlevels) .> nfactorlevels) + if !(factorassignements <: Number) || any(maximum.(factorlevels) .> nfactorlevels) compressedfactorlevels = [1:i for i ∈ nfactorlevels] factorlevelremapping = [factorlevels[i] .=> compressedfactorlevels[i] for i ∈ 1:nfactors] factorassignments = [replace(factorassignments[i], factorlevelremapping[i]...) for i ∈ 1:nfactors] @@ -134,7 +139,7 @@ function anova(observations::AbstractVector{T}, factorassignments::AbstractVecto sortorder = sortperm(repeat(1:nreplicates, Int(N / nreplicates)) .+ sum([factorassignments[i] .* prod(nlevels[1:i]) for i ∈ 1:nfactors])) observationsmatrix = reshape(observations[sortorder], nlevels...) - anova(observationsmatrix, factortypes, factornames = factornames) + anova(observationsmatrix, factortypes, factornames = factornames, hasreplicates = nreplicates > 1) end function validate(factortypes::Vector{FactorType}, factornames::Vector{<:AbstractString}, nfactors) diff --git a/src/anova_dataframe.jl b/src/anova_dataframe.jl new file mode 100644 index 0000000..899d501 --- /dev/null +++ b/src/anova_dataframe.jl @@ -0,0 +1,7 @@ +function anova(df::DataFrame, observationscolumn::Symbol, factorcolumns::Vector{Symbol}, factortypes::Vector{FactorType} = FactorType[]; factornames::Vector{<:AbstractString} = String[]) + observations = df[observationscolumn] + length(observations) > 0 || return + eltype(observations) <: Number || error("Obervations must be numeric") + isempty(factornames) && (factornames = [String(col) for col ∈ factorcolumns]) + anova(df[observationscolumn], df[factorcolumns], factortypes, factornames = factornames) +end