From b8c2499b79f5bcf79460b6ddd0128985e65c56c7 Mon Sep 17 00:00:00 2001
From: Nima Hejazi <nh@nimahejazi.org>
Date: Mon, 15 May 2023 15:46:23 -0400
Subject: [PATCH] replace ::: with :: throughout

---
 05-origami.Rmd      |  5 ++-
 06-sl3.Rmd          | 74 ++++++++++++++++++++++-----------------------
 R_code/02-tlverse.R |  3 +-
 R_code/05-origami.R | 20 ++++++------
 R_code/06-sl3.R     | 53 +++++++++++++++++---------------
 5 files changed, 80 insertions(+), 75 deletions(-)

diff --git a/05-origami.Rmd b/05-origami.Rmd
index 9aa8ff8..4d92785 100644
--- a/05-origami.Rmd
+++ b/05-origami.Rmd
@@ -236,7 +236,6 @@ number.
 library(data.table)
 library(origami)
 library(knitr)
-library(kableExtra)
 
 # load data set and take a peek
 washb_data <- fread(
@@ -257,7 +256,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   head(washb_data) %>%
     kable() %>%
-    kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 ```
@@ -834,7 +833,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   head(washb_data) %>%
     kable() %>%
-    kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 ```
diff --git a/06-sl3.Rmd b/06-sl3.Rmd
index 7dbc347..7b6eb88 100644
--- a/06-sl3.Rmd
+++ b/06-sl3.Rmd
@@ -122,7 +122,6 @@ First, we need to load the data and relevant packages into the R session.
 
 ```{r setup-handbook-utils-noecho, echo = FALSE}
 library(knitr)
-library(kableExtra)
 library(data.table)
 ```
 
@@ -152,7 +151,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   head(washb_data) %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 ```
@@ -462,7 +461,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   head(df_plot) %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 ```
@@ -518,7 +517,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   head(cv_preds_option1) %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 ```
@@ -767,7 +766,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   cv_risk_table[,c(1:3)] %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 ```
@@ -903,7 +902,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   cv_sl_fit$cv_risk[,c(1:3)] %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 ```
@@ -924,20 +923,20 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   round(cv_sl_fit$coef, 3) %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 ```
 ### Revere-cross-validated predictive performance of Super Learner
 
-We can also use so-called "revere", to obtain a partial CV risk for the SL, 
-where the SL candidate learner fits are cross-validated but the meta-learner fit 
-is not. It takes essentially no extra time to calculate a revere-CV 
-performance/risk estimate of the SL, since we already have the CV fits of the 
-candidates. This isn't to say that revere-CV SL performance can replace that 
-obtained from actual CV SL. Revere can be used to very quickly examine an 
-approximate lower bound on the SL's CV risk *when the meta-learner is a simple model*,
-like NNLS. We can output the revere-based CV risk estimate by setting 
+We can also use so-called "revere", to obtain a partial CV risk for the SL,
+where the SL candidate learner fits are cross-validated but the meta-learner fit
+is not. It takes essentially no extra time to calculate a revere-CV
+performance/risk estimate of the SL, since we already have the CV fits of the
+candidates. This isn't to say that revere-CV SL performance can replace that
+obtained from actual CV SL. Revere can be used to very quickly examine an
+approximate lower bound on the SL's CV risk *when the meta-learner is a simple
+model*, like NNLS. We can output the revere-based CV risk estimate by setting
 `get_sl_revere_risk = TRUE` in `cv_risk`.
 
 ```{r sl-revere-risk}
@@ -957,15 +956,16 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   cv_risk_w_sl_revere[,c(1:3)] %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 ```
-##### Revere-cross-validated predictive performance of Super Learner by hand {-} 
-We show how to calculate the revere-CV predictive performance/risk of 
-the SL by hand below, as this might be helpful for understanding revere and 
-how it can be used to obtain a partial CV performance/risk estimate for the 
-SL. 
+
+##### Revere-cross-validated predictive performance of Super Learner by hand {-}
+
+We show how to calculate the revere-CV predictive performance/risk of the SL by
+hand below, as this might be helpful for understanding revere and how it can be
+used to obtain a partial CV performance/risk estimate for the SL.
 
 ```{r sl-revere-risk-byhand}
 ##### revere-based risk "by hand" #####
@@ -973,17 +973,17 @@ SL.
 sl_revere_risk_list <- lapply(seq_along(task$folds), function(i){
   # get validation dataset for fold i:
   v_data <- task$data[task$folds[[i]]$validation_set, ]
-  
+
   # get observed outcomes in fold i's validation dataset:
   v_outcomes <- v_data[["whz"]]
-  
-  # make task (for prediction) using fold i's validation dataset as data, 
+
+  # make task (for prediction) using fold i's validation dataset as data,
   # and keeping all else the same:
   v_task <- make_sl3_Task(
     covariates = task$nodes$covariates, data = v_data
   )
-  
-  # get predicted outcomes for fold i's validation dataset, using candidates 
+
+  # get predicted outcomes for fold i's validation dataset, using candidates
   # trained to fold i's training dataset
   v_preds <- sl_fit$fit_object$cv_fit$fit_object$fold_fits[[i]]$predict(v_task)
 
@@ -992,19 +992,19 @@ sl_revere_risk_list <- lapply(seq_along(task$folds), function(i){
     covariates = sl_fit$fit_object$cv_meta_task$nodes$covariates,
     data = v_preds
   )
-  
+
   # get predicted outcomes for fold i's metalevel dataset, using the fitted
-  # metalearner, cv_meta_fit 
+  # metalearner, cv_meta_fit
   sl_revere_v_preds <- sl_fit$fit_object$cv_meta_fit$predict(task=v_meta_task)
   # note: cv_meta_fit was trained on the metalevel dataset, which contains the
-  # candidates' cv predictions and validation dataset outcomes across ALL folds, 
+  # candidates' cv predictions and validation dataset outcomes across ALL folds,
   # so cv_meta_fit has already seen fold i's validation dataset outcomes.
-  
+
   # calculate predictive performance for fold i for the SL
   eval_function <- loss_squared_error # valid for estimation of conditional mean
-  # note: by evaluating the predictive performance of the SL using outcomes 
-  # that were already seen by the metalearner, this is not a cross-validated 
-  # measure of predictive performance for the SL. 
+  # note: by evaluating the predictive performance of the SL using outcomes
+  # that were already seen by the metalearner, this is not a cross-validated
+  # measure of predictive performance for the SL.
   sl_revere_v_loss <- eval_function(
     pred = sl_revere_v_preds, observed = v_outcomes
   )
@@ -1084,7 +1084,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   dSL_cv_risk_table[,c(1:3)] %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 ```
@@ -1396,7 +1396,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   head(task$X) %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 ```
@@ -1795,7 +1795,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   washb_varimp %>%
     kable(digits = 4) %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 ```
@@ -2037,7 +2037,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   head(chspred) %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 ```
diff --git a/R_code/02-tlverse.R b/R_code/02-tlverse.R
index c75e15d..89a2ea0 100644
--- a/R_code/02-tlverse.R
+++ b/R_code/02-tlverse.R
@@ -4,5 +4,6 @@
 
 
 ## ----renviron-example, results="asis", eval=FALSE-----------------------------
-## GITHUB_PAT <- yourPAT
+## GITHUB_PAT=yourPAT
+## 
 
diff --git a/R_code/05-origami.R b/R_code/05-origami.R
index 8e69d27..f131304 100644
--- a/R_code/05-origami.R
+++ b/R_code/05-origami.R
@@ -2,7 +2,6 @@
 library(data.table)
 library(origami)
 library(knitr)
-library(kableExtra)
 
 # load data set and take a peek
 washb_data <- fread(
@@ -23,13 +22,14 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   head(washb_data) %>%
     kable() %>%
-    kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 
 
 ## ----resubstitution-----------------------------------------------------------
-folds_resubstitution(nrow(washb_data))
+folds <- folds_resubstitution(nrow(washb_data))
+folds
 
 
 ## ----loo----------------------------------------------------------------------
@@ -78,7 +78,7 @@ knitr::include_graphics(path = "img/png/rolling_origin.png")
 
 ## ----rolling_origin-----------------------------------------------------------
 folds <- folds_rolling_origin(
-  t,
+  n = t,
   first_window = 50, validation_size = 10, gap = 5, batch = 20
 )
 folds[[1]]
@@ -91,7 +91,7 @@ knitr::include_graphics(path = "img/png/rolling_window.png")
 
 ## ----rolling_window-----------------------------------------------------------
 folds <- folds_rolling_window(
-  t,
+  n = t,
   window_size = 50, validation_size = 10, gap = 5, batch = 20
 )
 folds[[1]]
@@ -120,11 +120,12 @@ washb_data <- fread(
   stringsAsFactors = TRUE
 )
 
-# Remove missing data, then pick just the first 500 rows
+# remove missing data with drop_na(), then pick just the first 500 rows
 washb_data <- washb_data %>%
   drop_na() %>%
   slice(1:500)
 
+# specify the outcome and covariates as character vectors
 outcome <- "whz"
 covars <- colnames(washb_data)[-which(names(washb_data) == outcome)]
 
@@ -136,13 +137,12 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   head(washb_data) %>%
     kable() %>%
-    kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 
 
 ## ----covariates---------------------------------------------------------------
-outcome
 covars
 
 
@@ -151,7 +151,7 @@ lm_mod <- lm(whz ~ ., data = washb_data)
 summary(lm_mod)
 
 
-## ----get_naive_error----------------------------------------------------------
+## ----get_naive_mse------------------------------------------------------------
 (err <- mean(resid(lm_mod)^2))
 
 
@@ -227,7 +227,7 @@ cv_rf <- function(fold, data, reg_form) {
 # now, let's cross-validate...
 folds <- make_folds(washb_data)
 cvrf_results <- cross_validate(
-  cv_fun = cv_rf, folds = folds, 
+  cv_fun = cv_rf, folds = folds,
   data = washb_data, reg_form = "whz ~ .",
   use_future = FALSE
 )
diff --git a/R_code/06-sl3.R b/R_code/06-sl3.R
index 3231952..3ae0fcd 100644
--- a/R_code/06-sl3.R
+++ b/R_code/06-sl3.R
@@ -1,6 +1,5 @@
 ## ----setup-handbook-utils-noecho, echo = FALSE--------------------------------
 library(knitr)
-library(kableExtra)
 library(data.table)
 
 
@@ -25,7 +24,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   head(washb_data) %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 
@@ -144,9 +143,11 @@ df_plot <- data.table(
 )
 df_plot <- df_plot[order(df_plot$Obs), ] 
 
+
 ## ----predvobs-head, eval = FALSE----------------------------------------------
 ## head(df_plot)
 
+
 ## ----predvobs-head-handbook, echo = FALSE-------------------------------------
 if (knitr::is_latex_output()) {
   head(df_plot) %>%
@@ -154,10 +155,11 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   head(df_plot) %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 
+
 ## ----predobs-plot, fig.asp = .55, fig.cap = "Observed and predicted values for weight-for-height z-score (whz)"----
 # melt the table so we can plot observed and predicted values
 df_plot$id <- seq(1:nrow(df_plot))
@@ -191,6 +193,7 @@ identical(cv_preds_option1, cv_preds_option2)
 ## ----cv-predictions-head, eval = FALSE----------------------------------------
 ## head(cv_preds_option1)
 
+
 ## ----cv-predictions-head-handbook, echo = FALSE-------------------------------
 if (knitr::is_latex_output()) {
   head(cv_preds_option1) %>%
@@ -198,7 +201,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   head(cv_preds_option1) %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 
@@ -346,9 +349,11 @@ round(metalrnr_fit$coefficients, 3)
 ## ----sl-summary---------------------------------------------------------------
 cv_risk_table <- sl_fit$cv_risk(eval_fun = loss_squared_error)
 
+
 ## ----cv-risk-summary, eval = FALSE--------------------------------------------
 ## cv_risk_table[,c(1:3)]
 
+
 ## ----cv-risk-summary-handbook, echo = FALSE-----------------------------------
 if (knitr::is_latex_output()) {
   cv_risk_table[,c(1:3)] %>%
@@ -356,7 +361,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   cv_risk_table[,c(1:3)] %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 
@@ -453,7 +458,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   cv_sl_fit$cv_risk[,c(1:3)] %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 
@@ -468,7 +473,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   round(cv_sl_fit$coef, 3) %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 
@@ -490,7 +495,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   cv_risk_w_sl_revere[,c(1:3)] %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 
@@ -501,17 +506,17 @@ if (knitr::is_latex_output()) {
 sl_revere_risk_list <- lapply(seq_along(task$folds), function(i){
   # get validation dataset for fold i:
   v_data <- task$data[task$folds[[i]]$validation_set, ]
-  
+
   # get observed outcomes in fold i's validation dataset:
   v_outcomes <- v_data[["whz"]]
-  
-  # make task (for prediction) using fold i's validation dataset as data, 
+
+  # make task (for prediction) using fold i's validation dataset as data,
   # and keeping all else the same:
   v_task <- make_sl3_Task(
     covariates = task$nodes$covariates, data = v_data
   )
-  
-  # get predicted outcomes for fold i's validation dataset, using candidates 
+
+  # get predicted outcomes for fold i's validation dataset, using candidates
   # trained to fold i's training dataset
   v_preds <- sl_fit$fit_object$cv_fit$fit_object$fold_fits[[i]]$predict(v_task)
 
@@ -520,19 +525,19 @@ sl_revere_risk_list <- lapply(seq_along(task$folds), function(i){
     covariates = sl_fit$fit_object$cv_meta_task$nodes$covariates,
     data = v_preds
   )
-  
+
   # get predicted outcomes for fold i's metalevel dataset, using the fitted
-  # metalearner, cv_meta_fit 
+  # metalearner, cv_meta_fit
   sl_revere_v_preds <- sl_fit$fit_object$cv_meta_fit$predict(task=v_meta_task)
   # note: cv_meta_fit was trained on the metalevel dataset, which contains the
-  # candidates' cv predictions and validation dataset outcomes across ALL folds, 
+  # candidates' cv predictions and validation dataset outcomes across ALL folds,
   # so cv_meta_fit has already seen fold i's validation dataset outcomes.
-  
+
   # calculate predictive performance for fold i for the SL
   eval_function <- loss_squared_error # valid for estimation of conditional mean
-  # note: by evaluating the predictive performance of the SL using outcomes 
-  # that were already seen by the metalearner, this is not a cross-validated 
-  # measure of predictive performance for the SL. 
+  # note: by evaluating the predictive performance of the SL using outcomes
+  # that were already seen by the metalearner, this is not a cross-validated
+  # measure of predictive performance for the SL.
   sl_revere_v_loss <- eval_function(
     pred = sl_revere_v_preds, observed = v_outcomes
   )
@@ -575,7 +580,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   dSL_cv_risk_table[,c(1:3)] %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 
@@ -689,7 +694,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   head(task$X) %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 
@@ -819,7 +824,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   washb_varimp %>%
     kable(digits = 4) %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }
 
@@ -876,7 +881,7 @@ if (knitr::is_latex_output()) {
 } else if (knitr::is_html_output()) {
   head(chspred) %>%
     kable() %>%
-    kableExtra:::kable_styling(fixed_thead = TRUE) %>%
+    kableExtra::kable_styling(fixed_thead = TRUE) %>%
     scroll_box(width = "100%", height = "300px")
 }