-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path003_LR_model.R
65 lines (58 loc) · 2.64 KB
/
003_LR_model.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
library(lme4)
library(tidyverse)
library(foreach)
library(doParallel)
library(AUC)
source("001_data_import.R")
# Import data for LR
df <- import_data_model(model = "LR")
eval_LR <- function(df, step_ahead = 1) {
# This function evaluates the Logistic Regression model of the Ahn et Al.(2017)
# With a forward chaining method
# Inputs
# df : dataframe computed by import_data_model(model = "LR") from "001_data_import.R"
# step-ahead : number of prediction steps at each step of the chain
# Ouputs :
# df containing for each size of training set, scores averaged over all filtered patients :
# the brier and AUROC scores
# for the log reg model and two benchmark models
# Maximum size of the training set
max_length = max(df$day) - step_ahead
# Make clusters for parallel computing
n_cores = 25
registerDoParallel(n_cores)
# Loop over all the training set sizes n_x
scores <- foreach (n_x = seq(10, max_length , 5), .combine = rbind) %dopar% {
# train test split
training_set <- filter(df, day < n_x)
test_set <- filter(df, (n_x <= day) & (day <= (n_x + step_ahead - 1)))[, c(1:6, 13:23)]
y = test_set$symptom
# Fit the glmm
glmm <- glmer(symptom~ temperature + PM10 + NO2 + DTR + RH + O3 + SCORAD + rainfall + factor(DOW) + factor(fever) + factor(sex) + (1|ID), data = training_set, binomial)
# Predict with glmm and two benchmark modesl
y_pred_glm = predict(glmm, newdata = test_set, type = "response")
y_pred_unif = rep(0.5, length(y_pred_glm)) # Uniform forecast
y_pred_hist <- unlist(lapply(1:length(y_pred_glm),
function(obs) {
ID_ <- test_set$ID[obs]
return(sum(filter(training_set, ID == ID_)$symptom) / length(filter(training_set, ID == ID_)$symptom))
}))
# Compute metrics
auroc_glm = auc(roc(y_pred_glm, factor(y)))
auroc_unif = auc(roc(y_pred_unif, factor(y)))
auroc_hist = auc(roc(y_pred_hist, factor(y)))
metrics = data.frame(training_size = rep(n_x, length(y)),
brier = (as.integer(y) - y_pred_glm)^2,
brier_unif = (as.integer(y) - y_pred_unif)^2,
brier_hist = (as.integer(y) - y_pred_hist)^2,
auroc = rep(auroc_glm, length(y)),
auroc_unif = rep(auroc_unif, length(y)),
auroc_hist = rep(auroc_hist, length(y))
)
return(metrics)
}
stopImplicitCluster()
return(as_tibble(scores))
}
# Run LR forward chaining evaluation
eval_LR_res <- eval_LR(df, 1)