Skip to content

Commit

Permalink
Added ω² effect size calculations (#9)
Browse files Browse the repository at this point in the history
* Implemented effect size calculations

3-factor random effect size calculations are just guessing.

* Fixes

* Fixes

* Adjusted omega squared

* Added partial tests and accompanying fixes

* Nested factors tests and fixes

* Finished going through tests and making fixes.
BioTurboNick authored Oct 3, 2019

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent c5601ea commit e19a408
Showing 6 changed files with 220 additions and 106 deletions.
115 changes: 104 additions & 11 deletions src/anova.jl
Original file line number Diff line number Diff line change
@@ -49,6 +49,7 @@ anova(observations, [nested, random]) # N-way fixed-effects ANOVA with 1 ra
- mean square (MS): SS / DF. Corrects for the larger variance expected if random values can be assigned to more bins. Also called "mean squared error" or "mean squared deviation."
- F-statistic: The division of MS values produce a result belonging to the "F distribution", the shape of which depends on the DF of the numerator and denominator. The location of this value on the distribution provides the p-value.
- p-value: The probability that, if all measurements had been drawn from the same population, you would obtain data at least as extreme as contained in your observations.
- effect size: The standardized difference in the measurement caused by the factor.
"""
function anova(observations::AbstractArray{T}, factortypes::Vector{FactorType} = FactorType[]; factornames::Vector{<:AbstractString} = String[], hasreplicates = true) where {T <: Union{Number, AbstractVector{<:Number}}}
length(observations) > 0 || return
@@ -143,22 +144,30 @@ function anovakernel(observations, nreplicates, ncells, nnestedfactors, ncrossed

# collapse replicate dimension
cellsums = eltype(observations) <: Number && nreplicates == 1 ? observations : sumfirstdim(observations)
C = sum(cellsums) ^ 2 / N
total = totalcalc(observations, N, C)
amongallnested, crossedcellsums, ncrossedfactorlevels, nnestedfactorlevels = amongnestedfactorscalc(cellsums, nfactorlevels, nnestedfactors, nreplicates, C)
cells = cellscalc(cellsums, nreplicates, ncells, C)
constant = sum(cellsums) ^ 2 / N
total = totalcalc(observations, N, constant)
amongallnested, crossedcellsums, ncrossedfactorlevels, nnestedfactorlevels = amongnestedfactorscalc(cellsums, nfactorlevels, nnestedfactors, nreplicates, constant)
cells = cellscalc(cellsums, nreplicates, ncells, constant)

crossedfactors = factorscalc(crossedcellsums, ncrossedfactors, ncrossedfactorlevels, N, C, crossedfactornames) # 3kb allocated here, possibly can't be avoided
interactions, interactionsmap = interactionscalc(cells, crossedcellsums, crossedfactors, ncrossedfactors, ncrossedfactorlevels, nnestedfactorlevels, nreplicates, C, crossedfactornames) # 9 kb allocated here!
crossedfactors = factorscalc(crossedcellsums, ncrossedfactors, ncrossedfactorlevels, N, constant, crossedfactornames) # 3kb allocated here, possibly can't be avoided
interactions, interactionsmap = interactionscalc(cells, crossedcellsums, crossedfactors, ncrossedfactors, ncrossedfactorlevels, nnestedfactorlevels, nreplicates, constant, crossedfactornames) # 9 kb allocated here!
nestedfactors = nestedfactorscalc(amongallnested, nnestedfactors, crossedfactors, interactions, nestedfactornames)
error = errorcalc(total, amongallnested, cells, [crossedfactors; interactions[1:end-1]], nnestedfactors, nreplicates)

reverse!(crossedfactors)

numerators = getnumerators(crossedfactors, ncrossedfactors, nnestedfactors, nestedfactors, interactions)
crossedbasedenominator = nnestedfactors > 0 ? nestedfactors[1] : error;
denominators = getdenominators(nnestedfactors, nestedfactors, nreplicates, crossedbasedenominator, error, total, crossedfactors, ncrossedfactors, crossedfactortypes, interactionsmap)# 2-3kb allocated, 50 allocations


numerators[1:ncrossedfactors] = reverse(numerators[1:ncrossedfactors])
if (ncrossedfactors > 2) # some hacky reverse stuff going on just to make it work. Really needs to be reworked to avoid.
denominators[1:ncrossedfactors] = reverse(denominators[1:ncrossedfactors])
end
reverse!(ncrossedfactorlevels)
if nnestedfactors > 1
reverse!(nnestedfactorlevels)
end

# drop least significant test if nreplicates == 1; either the lowest interaction level, or lowest nesting level if present
if nreplicates == 1
droppedfactor = pop!(numerators)
@@ -170,6 +179,9 @@ function anovakernel(observations, nreplicates, ncells, nnestedfactors, ncrossed

npercrossedcell = nreplicates * prod(nnestedfactorlevels)
crossedcellmeans = crossedcellsums ./ npercrossedcell

results = effectsizescalc(results, denominators, total, ncrossedfactors, npercrossedcell, ncrossedfactorlevels, crossedfactortypes, nnestedfactors, nnestedfactorlevels, nreplicates) # note: effect size doesn't account for nesting

data = AnovaData([total; results], total, ncrossedfactors, ncrossedfactorlevels, npercrossedcell, crossedfactors, denominators[1:ncrossedfactors], crossedcellmeans)
nnestedfactors > 0 && nreplicates == 1 && push!(data.effects, droppedfactor)
error.df > 0 && push!(data.effects, error)
@@ -340,13 +352,14 @@ function getdenominators(nnestedfactors, nestedfactors, nreplicates, crossedbase
crosseddenominators = Vector{AnovaFactor}(undef, ncrossedfactors)
for i 1:ncrossedfactors
otherfactors = (1:ncrossedfactors)[Not(i)]

j = otherfactors[1]
k = otherfactors[2]
crosseddenominators[i] = threewayinteraction(interactionsmap[(i,j)], interactionsmap[(i,k)], interactionsmap[(1,2,3)])
end
pairwiseinteractiondenominators = repeat([interactionsmap[(1,2,3)]], ncrossedfactors)
elseif count(f -> f == random, crossedfactortypes) == 1
i = findfirst(f -> f == random, crossedfactortypes)
i = findfirst(f -> f == random, reverse(crossedfactortypes))

crosseddenominators = Vector{AnovaFactor}(undef, ncrossedfactors)
crosseddenominators[i] = crossedbasedenominator
@@ -361,7 +374,7 @@ function getdenominators(nnestedfactors, nestedfactors, nreplicates, crossedbase
pairwiseinteractiondenominators[fixedinteractionindex] = interactionsmap[(1,2,3)]
pairwiseinteractiondenominators[Not(fixedinteractionindex)] .= crossedbasedenominator
elseif count(f -> f == random, crossedfactortypes) == 2
i = findfirst(f -> f == fixed, crossedfactortypes)
i = findfirst(f -> f == fixed, reverse(crossedfactortypes))
otherfactors = (1:ncrossedfactors)[Not(i)]
j = otherfactors[1]
k = otherfactors[2]
@@ -376,7 +389,7 @@ function getdenominators(nnestedfactors, nestedfactors, nreplicates, crossedbase
pairwiseinteractiondenominators[Not(randominteractionindex)] .= interactionsmap[(1,2,3)]
end

denominators = [crosseddenominators; pairwiseinteractiondenominators; crossedbasedenominator]
denominators = [crosseddenominators; reverse(pairwiseinteractiondenominators); crossedbasedenominator]
end

# determine correct denominators for nested factors
@@ -404,3 +417,83 @@ function ftest(x, y)
p = ccdf(fdist, f)
AnovaResult(x, f, p)
end

function effectsizescalc(results, denominators, total, ncrossedfactors, npercrossedcell, ncrossedfactorlevels, crossedfactortypes, nnestedfactors, nnestedfactorlevels, nreplicates)
differences = [results[i].ms - denominators[i].ms for i eachindex(results)]
crossedfactordfs = [r.df for r results[1:ncrossedfactors]]

if nreplicates == 1 && nnestedfactors > 0
nnestedfactors -= 1
nnestedfactorlevels = nnestedfactorlevels[1:(end-1)]
end

if ncrossedfactors == 1
if nnestedfactors == 0
ω² = [(results[1].ss - results[1].df * denominators[1].ms) / (total.ss + denominators[1].ms)]
else
effectdenominators = repeat([nreplicates], nnestedfactors + 1)
nfactorlevels = [ncrossedfactorlevels; nnestedfactorlevels]
effectdenominators[1] *= prod(nfactorlevels)
factors = ones(nnestedfactors + 1)
factors[1] = crossedfactordfs[1]
for i 2:nnestedfactors
effectdenominators[2:(end - i + 1)] .*= nfactorlevels[end - i + 2]
end
σ² = factors .* differences ./ effectdenominators
σ²total = sum(σ²) + denominators[end].ms
ω² = σ² ./ σ²total
end
else
if ncrossedfactors == 2
if npercrossedcell > 1
interactions = [[1,2]]
imax = 3
else
interactions = []
imax = 2
end
else
if npercrossedcell > 1
interactions = [[1,2], [1,3], [2,3], [1,2,3]]
imax = 7
else
interactions = [[1,2], [1,3], [2,3]]
imax = 6
end
end

icrossed = 1:ncrossedfactors
iother = (ncrossedfactors + 1):imax
factors = zeros(imax)
factors[icrossed] = [crossedfactortypes[i] == fixed ? crossedfactordfs[i] : 1 for i icrossed]
factors[iother] = [prod(factors[x]) for x interactions]
effectsdenominators = repeat([npercrossedcell], imax)

israndom = crossedfactortypes .== random
isfixed = crossedfactortypes .== fixed
crossedeffectsdenominators = effectsdenominators[icrossed]
crossedeffectsdenominators[isfixed] .*= prod(ncrossedfactorlevels)
crossedeffectsdenominators[israndom] .*= [prod(ncrossedfactorlevels[Not(i)]) for i icrossed[israndom]]
effectsdenominators[icrossed] = crossedeffectsdenominators
effectsdenominators[iother] .*= [prod(ncrossedfactorlevels[Not(icrossed[israndom] x)]) for x interactions]

σ² = factors .* differences[1:imax] ./ effectsdenominators

if nnestedfactors > 0
nestedrange = (length(results) .- nnestedfactors .+ 1):length(results)
nestedeffectdenominators = repeat([nreplicates], nnestedfactors)
for i 1:(nnestedfactors - 1)
nestedeffectdenominators[1:(end - i + 1)] .*= nnestedfactorlevels[end - i + 2]
end
σ²nested = differences[nestedrange] ./ nestedeffectdenominators
σ² = [σ²; σ²nested]
end

σ²total = sum(σ²) + denominators[end].ms
ω² = σ² ./ σ²total
end



return AnovaResult.(results, ω²)
end
14 changes: 9 additions & 5 deletions src/data/AnovaData.jl
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@ end

import Base.show
function show(io::IO, ad::AnovaData)
colnames = ["Effect", "SS", "DF", "MS", "F", "p"]
colnames = ["Effect", "SS", "DF", "MS", "F", "p", "ω²"]
rownames = [e.name for e ad.effects]
nrows = length(ad.effects)

@@ -27,22 +27,25 @@ function show(io::IO, ad::AnovaData)
ms = [typeof(e) [AnovaFactor, AnovaResult] ? e.ms |> compactshow : "" for e ad.effects]
f = [e isa AnovaResult ? e.f |> compactshow : "" for e ad.effects]
p = [e isa AnovaResult ? e.p |> compactshow : "" for e ad.effects]
ω² = [e isa AnovaResult ? e.ω² |> compactshow : "" for e ad.effects]

ndec = [length.(last.(split.(values, "."))) for values [ss, ms, f, p]]
ndec = [length.(last.(split.(values, "."))) for values [ss, ms, f, p, ω²]]
maxndec = maximum.(ndec)
rpadding = [maxndec[i] .- ndec[i] for i 1:4]
rpadding = [maxndec[i] .- ndec[i] for i 1:5]

ss = [rpad(ss[i], rpadding[1][i] + length(ss[i])) for i 1:nrows]
ms = [rpad(ms[i], rpadding[2][i] + length(ms[i])) for i 1:nrows]
f = [rpad(f[i], rpadding[3][i] + length(f[i])) for i 1:nrows]
p = [rpad(p[i], rpadding[4][i] + length(p[i])) for i 1:nrows]
ω² = [rpad(ω²[i], rpadding[5][i] + length(ω²[i])) for i 1:nrows]

colwidths = [length.(values) |> maximum for values [[colnames[1]; rownames],
[colnames[2]; ss],
[colnames[3]; df],
[colnames[4]; ms],
[colnames[5]; f],
[colnames[6]; p]]]
[colnames[6]; p],
[colnames[7]; ω²]]]
colwidths[2:end] .+= 2

headerrow = join(lpad.(colnames, colwidths))
@@ -52,7 +55,8 @@ function show(io::IO, ad::AnovaData)
lpad(df[i], colwidths[3]) *
lpad(ms[i], colwidths[4]) *
lpad(f[i], colwidths[5]) *
lpad(p[i], colwidths[6]) for i 1:nrows]
lpad(p[i], colwidths[6]) *
lpad(ω²[i], colwidths[7]) for i 1:nrows]

println(io)
println(io, "Analysis of Variance Results")
12 changes: 9 additions & 3 deletions src/data/AnovaResult.jl
Original file line number Diff line number Diff line change
@@ -13,7 +13,9 @@ A set of values for an Anova factor which has been tested
`f` - the F statistic
`p` - the probability of a Type I error (imcorrect rejection of null hypothesis)
`p` - the probability of a Type I error (incorrect rejection of null hypothesis)
`ω²` - the effect size
"""
struct AnovaResult <: AnovaEffect
name::String
@@ -22,9 +24,12 @@ struct AnovaResult <: AnovaEffect
ms::Float64
f::Float64
p::Float64
ω²::Float64
end

AnovaResult(factor, f, p) = AnovaResult(factor.name, factor.ss, factor.df, factor.ms, f, p)
AnovaResult(factor, f, p) = AnovaResult(factor.name, factor.ss, factor.df, factor.ms, f, p, 0)

AnovaResult(result, ω²) = AnovaResult(result.name, result.ss, result.df, result.ms, result.f, result.p, ω²)

import Base.isapprox
isapprox(x::AnovaResult, y::AnovaResult) =
@@ -33,6 +38,7 @@ isapprox(x::AnovaResult, y::AnovaResult) =
x.df == y.df &&
x.ms y.ms &&
x.f y.f &&
x.p y.p
x.p y.p &&
x.ω² y.ω²

export AnovaResult
152 changes: 81 additions & 71 deletions test/test_anova.jl

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions test/test_pretests.jl
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
@testset "Multidimensional Array" begin
@testset "1-way ANOVA" begin
expected = [AnovaValue( "Total", 48.24912, 19),
AnovaResult("Groups", 1.69792, 3, 0.56597333, 0.19452932, 0.89858),
AnovaResult("Groups", 1.69792, 3, 0.56597333, 0.19452932, 0.89858, 0),
AnovaFactor( "Error", 46.5512, 16, 2.90945)]

@testset "Replicate Vectors" begin
@@ -33,7 +33,7 @@

@testset "2-way ANOVA" begin
expected = [AnovaValue( "Total", 135.172, 19),
AnovaResult("Groups", 89.24992, 3, 29.749973, 10.365375, 0.00049480716),
AnovaResult("Groups", 89.24992, 3, 29.749973, 10.365375, 0.00049480716, 0),
AnovaFactor( "Error", 45.92208, 16, 2.87013)]

@testset "Replicate Vectors" begin
@@ -82,7 +82,7 @@

@testset "3-way ANOVA" begin
expected = [AnovaValue( "Total", 0.50277778, 71),
AnovaResult("Groups", 0.077777778, 17, 0.0045751634, 0.58131488, 0.8912173),
AnovaResult("Groups", 0.077777778, 17, 0.0045751634, 0.58131488, 0.8912173, 0),
AnovaFactor( "Error", 0.425, 54, 0.0078703704)]

@testset "Replicate Vectors" begin
@@ -142,7 +142,7 @@
repeat([repeat([1], 4); repeat([2], 4); repeat([3], 4)], 6)]

expected = [AnovaValue( "Total", 0.50277778, 71),
AnovaResult("Groups", 0.077777778, 17, 0.0045751634, 0.58131488, 0.8912173),
AnovaResult("Groups", 0.077777778, 17, 0.0045751634, 0.58131488, 0.8912173, 0),
AnovaFactor( "Error", 0.425, 54, 0.0078703704)]

@testset "3-way ANOVA ordered" begin
@@ -194,7 +194,7 @@
repeat([repeat([1], 4); repeat([2], 4); repeat([3], 4)], 6)]

expected = [AnovaValue( "Total", 0.50277778, 71),
AnovaResult("Groups", 0.077777778, 17, 0.0045751634, 0.58131488, 0.8912173),
AnovaResult("Groups", 0.077777778, 17, 0.0045751634, 0.58131488, 0.8912173, 0),
AnovaFactor( "Error", 0.425, 54, 0.0078703704)]

@testset "3-way ANOVA shuffled" begin
23 changes: 12 additions & 11 deletions test/test_show.jl
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
@testset "Display Tests" begin
@testset "AnovaData" begin
data = [AnovaValue( "Total", 1827.6975, 19),
AnovaResult( "A", 70.3125, 1, 70.3125, 3.0706495, 0.098856175),
AnovaResult( "B", 1386.1125, 1, 1386.1125, 60.533556, 7.9430782e-7),
AnovaResult("A × B", 4.9005, 1, 4.9005, 0.21401199, 0.64987001),
AnovaFactor( "C", 366.372, 16, 22.89825)]
AnovaResult( "A", 70.3125, 1, 70.3125, 3.0706495, 0.098856175, 0.025621074),
AnovaResult( "B", 1386.1125, 1, 1386.1125, 60.533556, 7.9430782e-7, 0.73663535),
AnovaResult("A × B", 4.9005, 1, 4.9005, 0.21401199, 0.64987001, -0.0097253817),
AnovaFactor(" C", 366.372, 16, 22.89825)]

result = AnovaData(data, data[1], 2, [2,2], 5, [AnovaFactor(r.name, r.ss, r.df, r.ms) for r data[2:3]], [data[5], data[5]], Float64[])

expectedlines = ["",
"Analysis of Variance Results",
"",
"Effect SS DF MS F p",
"-------------------------------------------------------",
" Total 1827.7 19 ",
" A 70.3125 1 70.3125 3.07065 0.0988562 ",
" B 1386.11 1 1386.11 60.5336 7.94308e-7",
" A × B 4.9005 1 4.9005 0.214012 0.64987 ",
" C 366.372 16 22.8983 ",
"Effect SS DF MS F p ω²",
"--------------------------------------------------------------------",
" Total 1827.7 19 ",
" A 70.3125 1 70.3125 3.07065 0.0988562 0.0256211 ",
" B 1386.11 1 1386.11 60.5336 7.94308e-7 0.736635 ",
" A × B 4.9005 1 4.9005 0.214012 0.64987 -0.00972538",
" C 366.372 16 22.8983 ",
""]

@test sprint(show, result) == join(expectedlines, "\n")

2 comments on commit e19a408

@BioTurboNick
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error while trying to register: "Tag with name 0.4.0 already exists and points to a different commit"

Please sign in to comment.