-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathModeling_heterogeneity_variance.R
93 lines (61 loc) · 2.99 KB
/
Modeling_heterogeneity_variance.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Modeling heterogeneity in residual variance.
# This is a brief tutorial demonstrating how to model heterogeneity in variance, first by writing out the support function, and then using the pre-built gls() in the nlme library
library(nlme)
require(bbmle)
# library(AED) # This has the example dataset for the first example (from the Zuur book).
Squid <- read.table("Squid.txt", h=T)
# http://highstat.com/Book2/AED_1.0.zip
# Install from local binary package
### Let us motivate this with an example
# This data set was to investigate factors that influenced squid sexual maturity
#data(Squid) # data is in the AED package
str(Squid) # DML is dorsal mantle length
Squid$fMONTH <- factor(Squid$MONTH)
M.1 <- lm(Testisweight ~ DML*fMONTH, data=Squid)
summary(M.1)
# full model
op <- par(mfrow = c(2,2),
mar = c(4, 4, 2, 2))
plot(M.1, which = c(1))
plot(Squid$fMONTH, resid(M.1),
xlab = "Month", ylab = "Residuals")
plot(Squid$DML, resid(M.1),
xlab = "Dorsal Mantle Length", ylab = "Residuals")
par(op)
summary(M.1)
# Clearly variance increases with DML, and the residuals show a great deal of heterogeneity.
### Explicit MLE approach (writing out our own Support function)
# Just as we have done for the "fixed" or deterministic part of a model using MLE, we can model the heterogeneity in the variance.
# In this case, since it looks like the heterogeneity in residual variation increases with increasing DML, we can start there.
design.M.1 <- model.matrix(~ DML, data = Squid) # Design matrix for this
NLL.M.1.HET.VAR <- function(b0, b1, sig){
het.sig <- sig*sqrt(design.M.1[,2]) # Here is what's new.
#Modeling the variance as the sqrt of DML.
#See Chapter 5.2 in Pinheiro and Bates (page 209) for why we use this "link" function for the variance.
det <- b0 + b1*design.M.1[,2] # We write out our deterministic part like before.
-sum(dnorm(Squid$Testisweight, mean=det, sd=het.sig, log=T))}
M.1.Het <- mle2(NLL.M.1.HET.VAR ,
start = list(b0 = 0, b1 = 0, sig = 0.01))
summary(M.1.Het)
logLik(M.1.Het)
# Remember that the variance is no longer a constant, so that "sig" represents the slope of the change in residual variance as DML changes
# There is a pre-built function in the nlme library, gls() : GENERALIZED LEAST SQUARES
# Compare this to gls
vf1Fixed <- varFixed(~DML)
M.1.gls <- gls(Testisweight ~ DML,
weights = vf1Fixed,
data=Squid, method = "ML")
# The default method is REML, which is generally better, but for comparison with our ML approach above, we need to use method="ML"
summary(M.1.gls)
logLik(M.1.gls)
M.1.gls$sigma # The parameter for the variance.
plot(M.1.gls)
M.1.lm <- lm(Testisweight ~ DML, data = Squid)
summary(M.1.lm)
# Now that you get the general idea, let's stick with the gls() so we can keep the script short!
vf1Fixed <- varFixed(~DML)
M.2.gls <- gls(Testisweight ~ DML*fMONTH,
weights = vf1Fixed,
data=Squid) # using default "REML" method/
summary(M.2.gls)
M.2.gls$sigma