From b8c2499b79f5bcf79460b6ddd0128985e65c56c7 Mon Sep 17 00:00:00 2001 From: Nima Hejazi Date: Mon, 15 May 2023 15:46:23 -0400 Subject: [PATCH] replace ::: with :: throughout --- 05-origami.Rmd | 5 ++- 06-sl3.Rmd | 74 ++++++++++++++++++++++----------------------- R_code/02-tlverse.R | 3 +- R_code/05-origami.R | 20 ++++++------ R_code/06-sl3.R | 53 +++++++++++++++++--------------- 5 files changed, 80 insertions(+), 75 deletions(-) diff --git a/05-origami.Rmd b/05-origami.Rmd index 9aa8ff8..4d92785 100644 --- a/05-origami.Rmd +++ b/05-origami.Rmd @@ -236,7 +236,6 @@ number. library(data.table) library(origami) library(knitr) -library(kableExtra) # load data set and take a peek washb_data <- fread( @@ -257,7 +256,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { head(washb_data) %>% kable() %>% - kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ``` @@ -834,7 +833,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { head(washb_data) %>% kable() %>% - kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ``` diff --git a/06-sl3.Rmd b/06-sl3.Rmd index 7dbc347..7b6eb88 100644 --- a/06-sl3.Rmd +++ b/06-sl3.Rmd @@ -122,7 +122,6 @@ First, we need to load the data and relevant packages into the R session. ```{r setup-handbook-utils-noecho, echo = FALSE} library(knitr) -library(kableExtra) library(data.table) ``` @@ -152,7 +151,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { head(washb_data) %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ``` @@ -462,7 +461,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { head(df_plot) %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ``` @@ -518,7 +517,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { head(cv_preds_option1) %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ``` @@ -767,7 +766,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { cv_risk_table[,c(1:3)] %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ``` @@ -903,7 +902,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { cv_sl_fit$cv_risk[,c(1:3)] %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ``` @@ -924,20 +923,20 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { round(cv_sl_fit$coef, 3) %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ``` ### Revere-cross-validated predictive performance of Super Learner -We can also use so-called "revere", to obtain a partial CV risk for the SL, -where the SL candidate learner fits are cross-validated but the meta-learner fit -is not. It takes essentially no extra time to calculate a revere-CV -performance/risk estimate of the SL, since we already have the CV fits of the -candidates. This isn't to say that revere-CV SL performance can replace that -obtained from actual CV SL. Revere can be used to very quickly examine an -approximate lower bound on the SL's CV risk *when the meta-learner is a simple model*, -like NNLS. We can output the revere-based CV risk estimate by setting +We can also use so-called "revere", to obtain a partial CV risk for the SL, +where the SL candidate learner fits are cross-validated but the meta-learner fit +is not. It takes essentially no extra time to calculate a revere-CV +performance/risk estimate of the SL, since we already have the CV fits of the +candidates. This isn't to say that revere-CV SL performance can replace that +obtained from actual CV SL. Revere can be used to very quickly examine an +approximate lower bound on the SL's CV risk *when the meta-learner is a simple +model*, like NNLS. We can output the revere-based CV risk estimate by setting `get_sl_revere_risk = TRUE` in `cv_risk`. ```{r sl-revere-risk} @@ -957,15 +956,16 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { cv_risk_w_sl_revere[,c(1:3)] %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ``` -##### Revere-cross-validated predictive performance of Super Learner by hand {-} -We show how to calculate the revere-CV predictive performance/risk of -the SL by hand below, as this might be helpful for understanding revere and -how it can be used to obtain a partial CV performance/risk estimate for the -SL. + +##### Revere-cross-validated predictive performance of Super Learner by hand {-} + +We show how to calculate the revere-CV predictive performance/risk of the SL by +hand below, as this might be helpful for understanding revere and how it can be +used to obtain a partial CV performance/risk estimate for the SL. ```{r sl-revere-risk-byhand} ##### revere-based risk "by hand" ##### @@ -973,17 +973,17 @@ SL. sl_revere_risk_list <- lapply(seq_along(task$folds), function(i){ # get validation dataset for fold i: v_data <- task$data[task$folds[[i]]$validation_set, ] - + # get observed outcomes in fold i's validation dataset: v_outcomes <- v_data[["whz"]] - - # make task (for prediction) using fold i's validation dataset as data, + + # make task (for prediction) using fold i's validation dataset as data, # and keeping all else the same: v_task <- make_sl3_Task( covariates = task$nodes$covariates, data = v_data ) - - # get predicted outcomes for fold i's validation dataset, using candidates + + # get predicted outcomes for fold i's validation dataset, using candidates # trained to fold i's training dataset v_preds <- sl_fit$fit_object$cv_fit$fit_object$fold_fits[[i]]$predict(v_task) @@ -992,19 +992,19 @@ sl_revere_risk_list <- lapply(seq_along(task$folds), function(i){ covariates = sl_fit$fit_object$cv_meta_task$nodes$covariates, data = v_preds ) - + # get predicted outcomes for fold i's metalevel dataset, using the fitted - # metalearner, cv_meta_fit + # metalearner, cv_meta_fit sl_revere_v_preds <- sl_fit$fit_object$cv_meta_fit$predict(task=v_meta_task) # note: cv_meta_fit was trained on the metalevel dataset, which contains the - # candidates' cv predictions and validation dataset outcomes across ALL folds, + # candidates' cv predictions and validation dataset outcomes across ALL folds, # so cv_meta_fit has already seen fold i's validation dataset outcomes. - + # calculate predictive performance for fold i for the SL eval_function <- loss_squared_error # valid for estimation of conditional mean - # note: by evaluating the predictive performance of the SL using outcomes - # that were already seen by the metalearner, this is not a cross-validated - # measure of predictive performance for the SL. + # note: by evaluating the predictive performance of the SL using outcomes + # that were already seen by the metalearner, this is not a cross-validated + # measure of predictive performance for the SL. sl_revere_v_loss <- eval_function( pred = sl_revere_v_preds, observed = v_outcomes ) @@ -1084,7 +1084,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { dSL_cv_risk_table[,c(1:3)] %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ``` @@ -1396,7 +1396,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { head(task$X) %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ``` @@ -1795,7 +1795,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { washb_varimp %>% kable(digits = 4) %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ``` @@ -2037,7 +2037,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { head(chspred) %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ``` diff --git a/R_code/02-tlverse.R b/R_code/02-tlverse.R index c75e15d..89a2ea0 100644 --- a/R_code/02-tlverse.R +++ b/R_code/02-tlverse.R @@ -4,5 +4,6 @@ ## ----renviron-example, results="asis", eval=FALSE----------------------------- -## GITHUB_PAT <- yourPAT +## GITHUB_PAT=yourPAT +## diff --git a/R_code/05-origami.R b/R_code/05-origami.R index 8e69d27..f131304 100644 --- a/R_code/05-origami.R +++ b/R_code/05-origami.R @@ -2,7 +2,6 @@ library(data.table) library(origami) library(knitr) -library(kableExtra) # load data set and take a peek washb_data <- fread( @@ -23,13 +22,14 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { head(washb_data) %>% kable() %>% - kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ## ----resubstitution----------------------------------------------------------- -folds_resubstitution(nrow(washb_data)) +folds <- folds_resubstitution(nrow(washb_data)) +folds ## ----loo---------------------------------------------------------------------- @@ -78,7 +78,7 @@ knitr::include_graphics(path = "img/png/rolling_origin.png") ## ----rolling_origin----------------------------------------------------------- folds <- folds_rolling_origin( - t, + n = t, first_window = 50, validation_size = 10, gap = 5, batch = 20 ) folds[[1]] @@ -91,7 +91,7 @@ knitr::include_graphics(path = "img/png/rolling_window.png") ## ----rolling_window----------------------------------------------------------- folds <- folds_rolling_window( - t, + n = t, window_size = 50, validation_size = 10, gap = 5, batch = 20 ) folds[[1]] @@ -120,11 +120,12 @@ washb_data <- fread( stringsAsFactors = TRUE ) -# Remove missing data, then pick just the first 500 rows +# remove missing data with drop_na(), then pick just the first 500 rows washb_data <- washb_data %>% drop_na() %>% slice(1:500) +# specify the outcome and covariates as character vectors outcome <- "whz" covars <- colnames(washb_data)[-which(names(washb_data) == outcome)] @@ -136,13 +137,12 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { head(washb_data) %>% kable() %>% - kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } ## ----covariates--------------------------------------------------------------- -outcome covars @@ -151,7 +151,7 @@ lm_mod <- lm(whz ~ ., data = washb_data) summary(lm_mod) -## ----get_naive_error---------------------------------------------------------- +## ----get_naive_mse------------------------------------------------------------ (err <- mean(resid(lm_mod)^2)) @@ -227,7 +227,7 @@ cv_rf <- function(fold, data, reg_form) { # now, let's cross-validate... folds <- make_folds(washb_data) cvrf_results <- cross_validate( - cv_fun = cv_rf, folds = folds, + cv_fun = cv_rf, folds = folds, data = washb_data, reg_form = "whz ~ .", use_future = FALSE ) diff --git a/R_code/06-sl3.R b/R_code/06-sl3.R index 3231952..3ae0fcd 100644 --- a/R_code/06-sl3.R +++ b/R_code/06-sl3.R @@ -1,6 +1,5 @@ ## ----setup-handbook-utils-noecho, echo = FALSE-------------------------------- library(knitr) -library(kableExtra) library(data.table) @@ -25,7 +24,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { head(washb_data) %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } @@ -144,9 +143,11 @@ df_plot <- data.table( ) df_plot <- df_plot[order(df_plot$Obs), ] + ## ----predvobs-head, eval = FALSE---------------------------------------------- ## head(df_plot) + ## ----predvobs-head-handbook, echo = FALSE------------------------------------- if (knitr::is_latex_output()) { head(df_plot) %>% @@ -154,10 +155,11 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { head(df_plot) %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } + ## ----predobs-plot, fig.asp = .55, fig.cap = "Observed and predicted values for weight-for-height z-score (whz)"---- # melt the table so we can plot observed and predicted values df_plot$id <- seq(1:nrow(df_plot)) @@ -191,6 +193,7 @@ identical(cv_preds_option1, cv_preds_option2) ## ----cv-predictions-head, eval = FALSE---------------------------------------- ## head(cv_preds_option1) + ## ----cv-predictions-head-handbook, echo = FALSE------------------------------- if (knitr::is_latex_output()) { head(cv_preds_option1) %>% @@ -198,7 +201,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { head(cv_preds_option1) %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } @@ -346,9 +349,11 @@ round(metalrnr_fit$coefficients, 3) ## ----sl-summary--------------------------------------------------------------- cv_risk_table <- sl_fit$cv_risk(eval_fun = loss_squared_error) + ## ----cv-risk-summary, eval = FALSE-------------------------------------------- ## cv_risk_table[,c(1:3)] + ## ----cv-risk-summary-handbook, echo = FALSE----------------------------------- if (knitr::is_latex_output()) { cv_risk_table[,c(1:3)] %>% @@ -356,7 +361,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { cv_risk_table[,c(1:3)] %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } @@ -453,7 +458,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { cv_sl_fit$cv_risk[,c(1:3)] %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } @@ -468,7 +473,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { round(cv_sl_fit$coef, 3) %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } @@ -490,7 +495,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { cv_risk_w_sl_revere[,c(1:3)] %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } @@ -501,17 +506,17 @@ if (knitr::is_latex_output()) { sl_revere_risk_list <- lapply(seq_along(task$folds), function(i){ # get validation dataset for fold i: v_data <- task$data[task$folds[[i]]$validation_set, ] - + # get observed outcomes in fold i's validation dataset: v_outcomes <- v_data[["whz"]] - - # make task (for prediction) using fold i's validation dataset as data, + + # make task (for prediction) using fold i's validation dataset as data, # and keeping all else the same: v_task <- make_sl3_Task( covariates = task$nodes$covariates, data = v_data ) - - # get predicted outcomes for fold i's validation dataset, using candidates + + # get predicted outcomes for fold i's validation dataset, using candidates # trained to fold i's training dataset v_preds <- sl_fit$fit_object$cv_fit$fit_object$fold_fits[[i]]$predict(v_task) @@ -520,19 +525,19 @@ sl_revere_risk_list <- lapply(seq_along(task$folds), function(i){ covariates = sl_fit$fit_object$cv_meta_task$nodes$covariates, data = v_preds ) - + # get predicted outcomes for fold i's metalevel dataset, using the fitted - # metalearner, cv_meta_fit + # metalearner, cv_meta_fit sl_revere_v_preds <- sl_fit$fit_object$cv_meta_fit$predict(task=v_meta_task) # note: cv_meta_fit was trained on the metalevel dataset, which contains the - # candidates' cv predictions and validation dataset outcomes across ALL folds, + # candidates' cv predictions and validation dataset outcomes across ALL folds, # so cv_meta_fit has already seen fold i's validation dataset outcomes. - + # calculate predictive performance for fold i for the SL eval_function <- loss_squared_error # valid for estimation of conditional mean - # note: by evaluating the predictive performance of the SL using outcomes - # that were already seen by the metalearner, this is not a cross-validated - # measure of predictive performance for the SL. + # note: by evaluating the predictive performance of the SL using outcomes + # that were already seen by the metalearner, this is not a cross-validated + # measure of predictive performance for the SL. sl_revere_v_loss <- eval_function( pred = sl_revere_v_preds, observed = v_outcomes ) @@ -575,7 +580,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { dSL_cv_risk_table[,c(1:3)] %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } @@ -689,7 +694,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { head(task$X) %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } @@ -819,7 +824,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { washb_varimp %>% kable(digits = 4) %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") } @@ -876,7 +881,7 @@ if (knitr::is_latex_output()) { } else if (knitr::is_html_output()) { head(chspred) %>% kable() %>% - kableExtra:::kable_styling(fixed_thead = TRUE) %>% + kableExtra::kable_styling(fixed_thead = TRUE) %>% scroll_box(width = "100%", height = "300px") }