Skip to content

Commit

Permalink
Contrasts (#12)
Browse files Browse the repository at this point in the history
  • Loading branch information
BioTurboNick authored Nov 2, 2019
1 parent e56925c commit 5e508e8
Show file tree
Hide file tree
Showing 8 changed files with 327 additions and 29 deletions.
25 changes: 13 additions & 12 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@ uuid = "9e28174c-4ba2-5203-b857-d8d62c4213ee"
version = "0.8.10"

[[BinaryProvider]]
deps = ["Libdl", "Logging", "SHA"]
git-tree-sha1 = "c7361ce8a2129f20b0e05a89f7070820cfed6648"
deps = ["Libdl", "SHA"]
git-tree-sha1 = "29995a7b317bbd06be147e1974a3541ce2502dca"
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
version = "0.5.6"
version = "0.5.7"

[[Compat]]
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
git-tree-sha1 = "84aa74986c5b9b898b0d1acaf3258741ee64754f"
git-tree-sha1 = "ed2c4abadf84c53d9e58510b5fc48912c2336fbb"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "2.1.0"
version = "2.2.0"

[[DataAPI]]
git-tree-sha1 = "674b67f344687a88310213ddfa8a2b3c76cc4252"
Expand All @@ -34,9 +34,9 @@ version = "1.1.0"

[[DataStructures]]
deps = ["InteractiveUtils", "OrderedCollections"]
git-tree-sha1 = "517ce30aa57cdfae1ab444a7c0aef8bb86345bc2"
git-tree-sha1 = "1fe8fad5fc84686dcbc674aa255bc867a64f8132"
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
version = "0.17.1"
version = "0.17.5"

[[Dates]]
deps = ["Printf"]
Expand All @@ -52,9 +52,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"

[[Distributions]]
deps = ["LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns"]
git-tree-sha1 = "b419fcf95ef9c8cf4d6610cd323890ad66d64240"
git-tree-sha1 = "058e5e39ceee0f92ccec70c5bf31c90ffb374669"
uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
version = "0.21.3"
version = "0.21.5"

[[InteractiveUtils]]
deps = ["Markdown"]
Expand Down Expand Up @@ -84,9 +84,10 @@ deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"

[[Missings]]
git-tree-sha1 = "29858ce6c8ae629cf2d733bffa329619a1c843d0"
deps = ["DataAPI"]
git-tree-sha1 = "de0a5ce9e5289f27df672ffabef4d1e5861247d5"
uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
version = "0.4.2"
version = "0.4.3"

[[Mmap]]
uuid = "a63ad114-7e13-5084-954f-fe012c677804"
Expand Down Expand Up @@ -183,7 +184,7 @@ uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
version = "0.8.0"

[[SuiteSparse]]
deps = ["Libdl", "LinearAlgebra", "SparseArrays"]
deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"

[[Test]]
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "SimpleANOVA"
uuid = "fff527a3-8410-504e-9ca3-60d5e79bb1e4"
authors = ["Nicholas Bauer <[email protected]>"]
version = "0.5.2"
version = "0.6.0"

[deps]
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
Expand Down
27 changes: 14 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,32 +16,36 @@ It uses multidimensional arrays to interpret the structure of the data. Replicat
Can also work with multiple vectors and DataFrames.

**New in v0.5**: ω² effect size (Disclaimer: effect size calculations for nested and 3-way mixed ANOVA is inferred and may not be correct.)
**New in v0.6**: Linear contrasts added for planned post-hoc tests between factor levels

See docstring for usage.
See docstrings for usage.

**Experimental, use at own risk!**

Examples
--------
```
data # N-dimensional matrix of observations
levene(data) # test data for homogeniety of variance
result = anova(data) # conduct the test
plot(result) # create pairwise factor plots
data # N-dimensional matrix of observations
levene(data) # test data for homogeniety of variance
result = anova(data) # conduct the test
plot(result) # create pairwise factor plots
differencecontrasts(result, 2) # perform orthogonal series of difference contrasts for the levels of factor 2
```
```
data # vector of observations
factors # vector of factor level assignment vectors
levene(data) # test data for homogeniety of variance
result = anova(data, factors) # conduct the test
plot(result) # create pairwise factor plots
data # vector of observations
factors # vector of factor level assignment vectors
levene(data) # test data for homogeniety of variance
result = anova(data, factors) # conduct the test
plot(result) # create pairwise factor plots
contrast(result, [1, 2, 2, 2]) # calculate the contrast between factor level 1 of factor 1 with remaining factor levels
```
```
df # DataFrame
factors # vector of symbols for factor assignment columns
levene(df, :observations, factors) # test data for homogeniety of variance
result = anova(df, :observations, factors) # conduct the test
plot(result) # create pairwise factor plots
simplecontrasts(result) # calculate the contrast between the first factor level (control) to each other level
```

Differences from SPSS
Expand All @@ -68,6 +72,3 @@ Choice of error terms for the F tests in mixed ANOVA follows Zar 1999, _Biostati
|----------------|-------------------|-------------------|-------------------|-------|-------|-------|-------|
| SPSS | A×C + A×B - A×B×C | A×B + B×C - A×B×C | A×C + B×C - A×B×C | A×B×C | A×B×C | A×B×C | Error |
| SimpleANOVA.jl | A×C + A×B - A×B×C | B×C | B×C | A×B×C | A×B×C | Error | Error |



6 changes: 4 additions & 2 deletions src/SimpleANOVA.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,11 @@ include("data/AnovaValue.jl")
include("data/AnovaFactor.jl")
include("data/AnovaResult.jl")
include("data/AnovaData.jl")
include("data/AnovaContrastResult.jl")
include("data/FactorType.jl")
include("anova.jl")
include("pretests.jl")
include("contrasts.jl")


function __init__()
Expand All @@ -38,8 +40,8 @@ function __init__()
end


export anova, ftest, plot, levene
export AnovaEffect, AnovaValue, AnovaFactor, AnovaResult, AnovaData
export anova, ftest, plot, levene, contrast, differencecontrasts, repeatedcontrasts, simplecontrasts
export AnovaEffect, AnovaValue, AnovaFactor, AnovaResult, AnovaData, AnovaContrastResult, AnovaContrastResults
export FactorType, fixed, random, nested

end
128 changes: 128 additions & 0 deletions src/contrasts.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@

pvalue(dist, x) = min(2 * min(cdf(dist, x), ccdf(dist, x)), 1.0)

"""
contrast(anovaresult::AnovaResult, groupassignment::Vector{Int})
Calculate a single-factor contrast against the groups specified in `groupassignment`.
Valid groups are "0", "1", and "2", where "0" excludes the group from the comparison.
Note: If you do nonorthogonal contrasts, use the Bonferroni or Šidák correction to adjust the
α level (the level at which you consider a result likely to be true):
Bonferroni: α′ = α/c for c contrasts
Šidák: α′ = 1 - (1 - α)^(1 / c) (slightly better)
where c = number of contrasts
Note: Effect size is calcluated using the error term associated with the factor. Other choices are possible,
including average of each group error; or the error associated with a control.
"""
function contrast(anovaresult::AnovaData, groupassignment::Vector{Int}, factorindex::Int = 1)
0 < factorindex anovaresult.ncrossedfactors || error("factor index must be a valid crossed factor")
all(0 .≤ groupassignment .≤ 2) || error("valid groups are 0, 1, and 2")
length(groupassignment) == anovaresult.ncrossedfactorlevels[factorindex] || error("each level must be assigned to a group")

lowerfactorlevels = prod(anovaresult.ncrossedfactorlevels[1:(factorindex-1)])
upperfactorlevels = prod(anovaresult.ncrossedfactorlevels[(factorindex+1):anovaresult.ncrossedfactors])

# factorindex is in the order the factors appear in the result
# but the crossedcellmeans dimensions are in reverse order (as input)
group1levels = repeat(groupassignment .== 1, outer = upperfactorlevels, inner = lowerfactorlevels)
group2levels = repeat(groupassignment .== 2, outer = upperfactorlevels, inner = lowerfactorlevels)
group1count = count(group1levels)
group2count = count(group2levels)

contrastcoefficients = zeros(anovaresult.ncrossedfactorlevels[factorindex] * lowerfactorlevels * upperfactorlevels)
contrastcoefficients[group1levels] .= 1 / group1count
contrastcoefficients[group2levels] .= -1 / group2count

errorfactor = anovaresult.crossedfactorsdenominators[factorindex]
ψ = sum(contrastcoefficients .* anovaresult.crossedcellmeans)
contrast = anovaresult.npercrossedcell * ψ ^ 2
error = sum(contrastcoefficients .^ 2) * errorfactor.ms

f = contrast / error
fdist = FDist(1, errorfactor.df)
p = ccdf(fdist, f)

effectsize = sqrt(f / (f + errorfactor.df)) # t^2 == f
#cohen's d: effectsize = ψ / sqrt(errorfactor.ms) ; preferred when group sizes very different

AnovaContrastResult(contrast, errorfactor.df, f, p, effectsize)
end

"""
differencecontrast(anovaresult::AnovaData, factorindex = 1, reverseorder = false)
Compute orthogonal contrasts on the factor levels in the original order. Forward direction also known as
a "Helmert" contrast; revere direction may also be called "Difference" (as in SPSS). See `contrast`
function for more.
"""
function differencecontrasts(anovaresult::AnovaData, reverseorder::Bool = false, factorindex::Int = 1)
0 < factorindex anovaresult.ncrossedfactors || error("factor index must be a valid crossed factor")

levels = anovaresult.ncrossedfactorlevels[1]
groupassignments = [[repeat([0], i - 1); 1; repeat([2], levels - i)] for i 1:(levels - 1)]
reverseorder && reverse!.(groupassignments)

AnovaContrastResults(contrast.(Ref(anovaresult), groupassignments, factorindex))
end

"""
repeatedcontrast(anovaresult::AnovaData, factorindex = 1)
Compute contrasts between neighboring levels. Non-orthogonal. See `contrast` function for more.
"""
function repeatedcontrasts(anovaresult::AnovaData, factorindex::Int = 1)
0 < factorindex anovaresult.ncrossedfactors || error("factor index must be a valid crossed factor")

levels = anovaresult.ncrossedfactorlevels[factorindex]
groupassignments = [[repeat([0], i - 1); 1; 2; repeat([0], levels - i - 1)] for i 1:(levels - 1)]

AnovaContrastResults(contrast.(Ref(anovaresult), groupassignments, factorindex))
end

"""
simplecontrast(anovaresult::AnovaData, controlindex = 1, factorindex = 1)
Compute contrasts of each level to a single level (control). Non-orthogonal. See `contrast` function for more.
"""
function simplecontrasts(anovaresult::AnovaData, controlindex::Int = 1, factorindex::Int = 1)
0 < factorindex anovaresult.ncrossedfactors || error("factor index must be a valid crossed factor")
0 < controlindex anovaresult.ncrossedfactorlevels[factorindex] || error("index must be for a valid factor level")

levels = anovaresult.ncrossedfactorlevels[factorindex]
groupassignments = [zeros(Int, levels) for i 1:(levels - 1)]
otherlevels = (1:levels)[Not(controlindex)]
for i 1:(levels - 1)
groupassignments[i][controlindex] = 1
groupassignments[i][otherlevels[i]] = 2
end

AnovaContrastResults(contrast.(Ref(anovaresult), groupassignments, factorindex))
end

#= To do this one, need to be able to code a factor level as in both groups
"""
deviationcontrasts(anovaresult::AnovaData, controlindex = 1, factorindex = 1)
Compute contrasts of each level except one (control) to all levels. Non-orthogonal.
See `contrast` function for more.
"""
function deviationcontrast(anovaresult::AnovaData, controlindex = 1, factorindex = 1)
0 < factorindex ≤ anovaresult.ncrossedfactors || error("factor index must be a valid crossed factor")
0 < controlindex ≤ anovaresult.ncrossedfactorlevels[1] || error("index must be for a valid factor level")
levels = anovaresult.ncrossedfactorlevels[1]
groupassignments = [zeros(Int, levels) for i ∈ 1:(levels - 1)]
otherlevels = (1:levels)[Not(controlindex)]
for i ∈ 1:(levels - 1)
groupassignments[i][controlindex] = 1
groupassignments[i][otherlevels[i]] = 2
end
[groupassignments[i][i] = 1 for i ∈ (1:levels)[Not(controlindex)]]
[groupassignments[i][i] = 1 for i ∈ (1:levels)[Not(controlindex)]]
contrast.(Ref(anovaresult), groupassignments)
end
=#
81 changes: 81 additions & 0 deletions src/data/AnovaContrastResult.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""
AnovaValue <: AnovaEffect
A set of values for an Anova item for which a mean square is not required.
`contrast` - the contrast value
`df` - degrees of freedom
`f` - the F statistic
`p` - the probability of a Type I error (incorrect rejection of null hypothesis)
`r` - the effect size
"""
struct AnovaContrastResult <: AnovaEffect
contrast::Float64
df::Float64
f::Float64
p::Float64
r::Float64
end

struct AnovaContrastResults
results::Vector{AnovaContrastResult}
end

import Base.isapprox
isapprox(x::AnovaContrastResults, y::AnovaContrastResults) = x.results .≈ y.results
isapprox(x::AnovaContrastResult, y::AnovaContrastResult) =
x.contrast y.contrast &&
x.df y.df &&
x.f y.f &&
x.p y.p &&
x.r y.r

import Base.show
show(io::IO, acr::AnovaContrastResult) = show(io, AnovaContrastResults([acr]))
function show(io::IO, acr::AnovaContrastResults)
colnames = ["Contrast", "DF", "F", "p", "r"]
nrows = length(acr.results)

compactshow(x) = sprint(show, x, context=:compact=>true)

contrast = [e.contrast |> compactshow for e acr.results]
df = [e.df |> Int |> compactshow for e acr.results]
f = [e.f |> compactshow for e acr.results]
p = [e.p |> compactshow for e acr.results]
r = [e.r |> compactshow for e acr.results]

ndec = [length.(last.(split.(values, "."))) for values [contrast, f, p, r]]
maxndec = maximum.(ndec)
rpadding = [maxndec[i] .- ndec[i] for i 1:4]

ss = [rpad(contrast[i], rpadding[1][i] + length(contrast[i])) for i 1:nrows]
f = [rpad(f[i], rpadding[2][i] + length(f[i])) for i 1:nrows]
p = [rpad(p[i], rpadding[3][i] + length(p[i])) for i 1:nrows]
r = [rpad(r[i], rpadding[4][i] + length(r[i])) for i 1:nrows]

colwidths = [length.(values) |> maximum for values [[colnames[1]; contrast],
[colnames[2]; df],
[colnames[3]; f],
[colnames[4]; p],
[colnames[5]; r]]]
colwidths[2:end] .+= 2

headerrow = join(lpad.(colnames, colwidths))
separator = repeat("-", sum(colwidths))
rows = [lpad(contrast[i], colwidths[1]) *
lpad(df[i], colwidths[2]) *
lpad(f[i], colwidths[3]) *
lpad(p[i], colwidths[4]) *
lpad(r[i], colwidths[5]) for i 1:nrows]

println(io)
println(io, "Analysis of Variance Contrast Results")
println(io)
println(io, headerrow)
println(io, separator)
foreach(i -> println(io, rows[i]), 1:length(rows))
end
Loading

0 comments on commit 5e508e8

Please sign in to comment.