-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtenfold_cv_lmer.R
82 lines (58 loc) · 4.32 KB
/
tenfold_cv_lmer.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# Script to demonstrate 10-fold cross validation on a mixed effects model with lme4
# Procedure:
# 1. Split data into ten "folds," or randomly chosen subsets of the data, all with roughly the same size
# 2. Fit the mixed effects model on the data 10 times. Each time, hold out one of the folds. (So each model is fit with ~90% of the data)
# 3. For each of the 10 fits, predict the y values for the unused 10% of the data from the fold that was held out.
# 4. Now that you have a predicted and true value for all the data points, calculate the root mean squared error.
# This tells you how well the model did at predicting values from out-of-sample data (meaning data that were not used to fit the model)
#### Example with fake data
# Create some fake data -- with block as random effect, x1 and x2 as fixed effects, and y as outcome
set.seed(2)
n <- 1234
x1 <- 1:n + rnorm(n, mean=0, sd=5)
x2 <- 10 + rnorm(n, mean=0, sd=5)
block <- rep_len(letters[1:10], n)
y <- 2 * x1 + -1 * x2 + rnorm(n, mean=0, sd=2)
dat <- data.frame(y=y, x1=x1, x2=x2, block=block)
### 1. Assign each data point to one of ten folds
fold_ID <- sample(rep_len(1:10, nrow(dat)))
table(fold_ID) # As you can see these are as evenly divided as possible.
dat$fold <- fold_ID
### 2. Fit the mixed effects model 10 times, each time holding out one fold.
# We will do this with a for loop. Each time through the for loop, we will fit a few different models and compare them.
model_fits <- list() # Initialize empty list
library(lme4)
for (i in 1:10) {
dat_holdout <- subset(dat, !fold_ID == i) # Hold out fold i.
# Fit models
null_model <- lmer(y ~ (1 | block), data = dat_holdout) # Null model with no fixed effects
model1 <- lmer(y ~ x1 + (1 | block), data = dat_holdout) # Only x1 as fixed effect
model2 <- lmer(y ~ x2 + (1 | block), data = dat_holdout) # Only x2 as fixed effect
model3 <- lmer(y ~ x1 + x2 + (1 | block), data = dat_holdout) # x1 and x2 as fixed effect
model4 <- lmer(y ~ x1 + x2 + x1:x2 + (1 | block), data = dat_holdout) # x1, x2, and interaction effect, as fixed effect
# Save model results in list
model_fits[[i]] <- list(null_model = null_model, model1 = model1, model2 = model2, model3 = model3, model4 = model4)
}
### 3. Use the model to predict the holdout values for each model, and calculate the squared error for each data point.
squared_errors <- list() # Initialize empty list
for (i in 1:10) {
dat_to_predict <- subset(dat, fold_ID == i) # Get the 10% of data in fold i.
# Predicted values for each data point for each of the models.
predicted_values_i <- data.frame(null_model = predict(model_fits[[i]]$null_model, dat_to_predict, type = 'response'),
model1 = predict(model_fits[[i]]$model1, dat_to_predict, type = 'response'),
model2 = predict(model_fits[[i]]$model2, dat_to_predict, type = 'response'),
model3 = predict(model_fits[[i]]$model3, dat_to_predict, type = 'response'),
model4 = predict(model_fits[[i]]$model4, dat_to_predict, type = 'response'))
# Calculate the difference between predicted and true value for each model.
# The following line of code subtracts the column of observed values (y) from each column of the predicted values dataframe.
errors_i <- sweep(predicted_values_i, 1, dat_to_predict$y, `-`)
# Take the square of the errors and save it in the list
squared_errors[[i]] <- errors_i^2
}
# 4. Combine the prediction error values for each fold into one data frame, then calculate root mean squared error (RMSE) for each model.
squared_errors_all <- do.call(rbind, squared_errors) # Combines the list of 10 squared error dataframes into one dataframe
# Calculate root mean squared error for each column.
apply(squared_errors_all, 2, function(x) sqrt(mean(x)))
# This result shows that the model with only x2 as predictor does not improve the null model. (I set up the fake data on purpose to show this) -- model2 has about the same RMSE as null model.
# However if you add x1 as predictor, it does a lot better (model1). If you add x1 and x2 together, it does even better (model3).
# But adding the interaction effect on top of the two separate predictors does not improve the RMSE. (model3 and model4 have about the same RMSE -- I also set up the fake data on purpose to do this!)